forked from p54670231/Idea
parent
cb28d808eb
commit
ab0cd734c4
@ -0,0 +1,39 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
def get_html(url):
|
||||
try:
|
||||
response = requests.get(url,headers=headers)
|
||||
response.encoding = 'GBK'
|
||||
response.encoding = 'utf-8'
|
||||
# response.encoding = 'gbk'
|
||||
html = response.text
|
||||
return html
|
||||
except:
|
||||
print('请求网址出错')
|
||||
|
||||
def write(txt,txtname):
|
||||
with open(txtname+'.txt', 'w', encoding='UTF-8') as f:
|
||||
f.write(str(txt) + '\n')
|
||||
f.close()
|
||||
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',}
|
||||
keyword = input()
|
||||
|
||||
with open('urls\\'+str(keyword)+'urls.txt','r', encoding='UTF-8') as f:
|
||||
urls = f.read()
|
||||
urls = eval(urls)
|
||||
f.close()
|
||||
|
||||
for i in range(len(urls)):
|
||||
url = str(urls[i])
|
||||
with open('信息\\'+str(keyword)+'\\'+str(keyword)+str(i+1)+'.txt','w', encoding='UTF-8') as f:
|
||||
try:
|
||||
soup = BeautifulSoup(get_html(url),'lxml')
|
||||
f.write(soup.text)
|
||||
f.close()
|
||||
except:
|
||||
print('false')
|
||||
|
||||
Loading…
Reference in new issue