parent
9a18da742c
commit
21be2ca462
@ -0,0 +1,59 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
hh=[]
|
||||||
|
ln=[]
|
||||||
|
texts=[]
|
||||||
|
url='https://www.autohome.com.cn/hangye/news/'
|
||||||
|
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.5211 SLBChan/128'}
|
||||||
|
response=requests.get(url,headers=headers)
|
||||||
|
response.encoding=response.apparent_encoding #防出现乱码
|
||||||
|
res_text=response.text
|
||||||
|
#print(res_text)
|
||||||
|
soup=BeautifulSoup(res_text,"lxml")
|
||||||
|
sout=soup.select('div.u-list-item>a')
|
||||||
|
for i in sout:
|
||||||
|
href=i.get('href')
|
||||||
|
hh.append(href)
|
||||||
|
#print(href)
|
||||||
|
for s in hh:
|
||||||
|
html="https:"+s #补全网址
|
||||||
|
ln.append(html)
|
||||||
|
#print(ln)
|
||||||
|
for j in ln[:5]:
|
||||||
|
#print(j)
|
||||||
|
resp = requests.get(j, headers=headers)
|
||||||
|
resp.encoding = response.apparent_encoding
|
||||||
|
h = resp.text
|
||||||
|
#print(h)
|
||||||
|
soup1 = BeautifulSoup(h, "lxml")
|
||||||
|
soupt1 = soup1.select('div.article-details')
|
||||||
|
for cc in soupt1:
|
||||||
|
# print(cc)
|
||||||
|
for cc1 in cc.select('h1'):
|
||||||
|
titl = cc1.text.replace("\r", "").replace("\n", "").replace(" ","")
|
||||||
|
texts.append(titl)
|
||||||
|
#print(titl)
|
||||||
|
for cc5 in cc.select('div.article-info>div'):
|
||||||
|
name = cc5.text.replace("\r", "").replace("\n", "").replace(" ","")
|
||||||
|
texts.append(name)
|
||||||
|
# print(name)
|
||||||
|
for cc2 in cc.select('div.article-info>span.time'):
|
||||||
|
time = cc2.text.replace("\r", "").replace(" ", "")
|
||||||
|
texts.append(time)
|
||||||
|
# print(time)
|
||||||
|
for cc4 in cc.select('p[data-paraid]'):
|
||||||
|
text = cc4.text.replace("\r", "").replace(" ", "").replace("\xa0", "").replace("\u3000", "")
|
||||||
|
# print(text)
|
||||||
|
texts.append(text)
|
||||||
|
#print(texts)
|
||||||
|
for x in texts:
|
||||||
|
sou=x+'\n'
|
||||||
|
ff=open('cc.txt','a',encoding='utf-8')
|
||||||
|
ff.write(sou)
|
||||||
|
ff.flush()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in new issue