''' Author:@阳春布泽 date:23-1-5 note:目前只完成了爬虫部分,需要解决前端信息推送问题,有会QQ机器人或者微信小程序等同学欢迎合作 ''' import requests from bs4 import BeautifulSoup import time url = 'http://dean.xjtu.edu.cn/' header = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' } title_list = [] output_list = [] path = 'jwc.txt' path_data = 'old_data.txt' def get_title(url, header, title_list): r = requests.get(url, headers=header) r.encoding = r.apparent_encoding html = r.text soup = BeautifulSoup(html, 'html.parser') for i in soup.find_all(name='li'): date = i.get_text()[:4] try: for j in i.find_all(name='a', attrs={'target': "_blank"}): title_list.append((date, j['title'], j['href'])) except: print('failed') return title_list def compare_title(title_list, path_data, output_list): db = open(path_data, 'r', encoding='utf-8') db_list = db.read().split('//') for title in title_list: if not str(title[0]) in db_list: output_list.append(title) db.close() return output_list def store_title(title_list): db = open(path_data, 'wb') for title_tuple in title_list: db.write(bytes(title_tuple[0], encoding='utf-8')) db.write(bytes('//', encoding='utf-8')) db.close() def show_msg(output_list, path): f = open(path, 'wb') for i in output_list: for j in i: f.write(bytes(j, encoding='utf-8')) f.write(bytes('\t', encoding='utf-8')) f.write(bytes('\n', encoding='utf-8')) f.close() def print_msg(output_list): print(f'已为您更新{len(output_list)}条通知') time.sleep(3) def main(url, header, title_list, path_data, output_list, path): title_list = get_title(url, header, title_list) output_list = compare_title(title_list, path_data, output_list) store_title(title_list) show_msg(output_list, path) print_msg(output_list) main(url, header, title_list, path_data, output_list, path)