remotes/1726335272834287033/master
p36049127 5 years ago
parent 78b2b35fce
commit cb28d808eb

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
def get_html(url):
try:
response = requests.get(url,headers=headers)
response.encoding = 'GBK'
response.encoding = 'utf-8'
# response.encoding = 'gbk'
html = response.text
return html
except:
print('请求网址出错')
def write(txt,txtname):
with open(txtname+'.txt', 'w', encoding='UTF-8') as f:
f.write(str(txt) + '\n')
f.close()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',}
o_urls = list()
keyword = input()
with open('page_urls\\'+str(keyword) + 'page_urls.txt','r',encoding = 'UTF-8') as f:
lines = f.readlines()
cnt = 0
for url in eval(lines[0]):
cnt += 1
try:
soup = BeautifulSoup(get_html(url),'lxml')
for i in range(10*(cnt-1)+1,10*cnt+2):
subs = soup.find_all(id = str(i))
if subs:
tmp = subs[0].find('h3')
if tmp:
tmp = tmp.find('a')
if tmp:
tmp = tmp.get('href')
o_urls.append(tmp)
print(tmp)
except:
print('false')
f.close()
with open('urls\\'+str(keyword) + 'urls.txt','w',encoding = 'UTF-8') as f:
f.write(str(o_urls))
f.close()
Loading…
Cancel
Save