You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
1.6 KiB

import requests
from bs4 import BeautifulSoup
from urllib.request import quote
import time
f = open('huiyi_01.txt','w',encoding ='utf-8') #创建txt格式文件方便等会存储
#添加请求头,模拟浏览器正常访问,避免被反爬虫
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
time.sleep(5)
# headers={'User-AgentMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0'}
#爬取的数量
for x in range(5):
url = 'https://www.zhangqiaokeyan.com/academic-conference-cn_1/'
res = requests.get(url, headers=headers)
# 查看是否能获取数据
print(res.status_code)
# 解析数据
bs1 = BeautifulSoup(res.text, 'html.parser')
list_titles = bs1.find_all('li', class_="list_item")
for i in list_titles:
# 标题
title = i.find('div', class_="itme_title").text
print(title)
f.write("题目:"+title.strip()+'\t')
# 获取文章跳转链接
half_link = i.find('div', class_="itme_title").find('a')['href']
wholelink = 'http:' + str(half_link)
print(wholelink)
f.write("链接:" + wholelink.strip()+'\t')
# 获取主办单位
s1=i.select_one('.item_mid > span').text
print(s1)
f.write("举办单位:" + s1.strip()+'\t')
# 举办时间
time = i.select_one('.item_right > span').text
print(time)
f.write("时间:" + time.strip())
f.write('\n')
f.close()