remotes/1726335272834287033/master
p36049127 5 years ago
parent cb28d808eb
commit ab0cd734c4

@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
def get_html(url):
try:
response = requests.get(url,headers=headers)
response.encoding = 'GBK'
response.encoding = 'utf-8'
# response.encoding = 'gbk'
html = response.text
return html
except:
print('请求网址出错')
def write(txt,txtname):
with open(txtname+'.txt', 'w', encoding='UTF-8') as f:
f.write(str(txt) + '\n')
f.close()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',}
keyword = input()
with open('urls\\'+str(keyword)+'urls.txt','r', encoding='UTF-8') as f:
urls = f.read()
urls = eval(urls)
f.close()
for i in range(len(urls)):
url = str(urls[i])
with open('信息\\'+str(keyword)+'\\'+str(keyword)+str(i+1)+'.txt','w', encoding='UTF-8') as f:
try:
soup = BeautifulSoup(get_html(url),'lxml')
f.write(soup.text)
f.close()
except:
print('false')
Loading…
Cancel
Save