|
|
|
@ -1,77 +1,76 @@
|
|
|
|
|
'''
|
|
|
|
|
Author:@阳春布泽
|
|
|
|
|
date:23-1-5
|
|
|
|
|
列表 [(日期,title,网址)]
|
|
|
|
|
写入提取日期
|
|
|
|
|
'''
|
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
url = 'http://dean.xjtu.edu.cn/'
|
|
|
|
|
header = {
|
|
|
|
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
|
|
|
|
|
}
|
|
|
|
|
title_list = []
|
|
|
|
|
output_list = []
|
|
|
|
|
path = 'C:\\Users\\LZH\\Desktop\\jwc.txt'
|
|
|
|
|
path_data = 'C:\\Python(旧)\\程序\\爬取教务处通知\\old_data.txt'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_title(url, header, title_list):
|
|
|
|
|
r = requests.get(url, headers=header)
|
|
|
|
|
r.encoding = r.apparent_encoding
|
|
|
|
|
html = r.text
|
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
|
for i in soup.find_all(name='li'):
|
|
|
|
|
date = i.get_text()[:4]
|
|
|
|
|
try:
|
|
|
|
|
for j in i.find_all(name='a', attrs={'target': "_blank"}):
|
|
|
|
|
title_list.append((date, j['title'], j['href']))
|
|
|
|
|
except:
|
|
|
|
|
print('failed')
|
|
|
|
|
return title_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compare_title(title_list, path_data, output_list):
|
|
|
|
|
db = open(path_data, 'r', encoding='utf-8')
|
|
|
|
|
db_list = db.read().split('//')
|
|
|
|
|
for title in title_list:
|
|
|
|
|
if not str(title[0]) in db_list:
|
|
|
|
|
output_list.append(title)
|
|
|
|
|
db.close()
|
|
|
|
|
return output_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def store_title(title_list):
|
|
|
|
|
db = open(path_data, 'wb')
|
|
|
|
|
for title_tuple in title_list:
|
|
|
|
|
db.write(bytes(title_tuple[0], encoding='utf-8'))
|
|
|
|
|
db.write(bytes('//', encoding='utf-8'))
|
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def show_msg(output_list, path):
|
|
|
|
|
f = open(path, 'wb')
|
|
|
|
|
for i in output_list:
|
|
|
|
|
for j in i:
|
|
|
|
|
f.write(bytes(j, encoding='utf-8'))
|
|
|
|
|
f.write(bytes('\t', encoding='utf-8'))
|
|
|
|
|
f.write(bytes('\n', encoding='utf-8'))
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def print_msg(output_list):
|
|
|
|
|
print(f'已为您更新{len(output_list)}条通知')
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(url, header, title_list, path_data, output_list, path):
|
|
|
|
|
title_list = get_title(url, header, title_list)
|
|
|
|
|
output_list = compare_title(title_list, path_data, output_list)
|
|
|
|
|
store_title(title_list)
|
|
|
|
|
show_msg(output_list, path)
|
|
|
|
|
print_msg(output_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main(url, header, title_list, path_data, output_list, path)
|
|
|
|
|
'''
|
|
|
|
|
Author:@阳春布泽
|
|
|
|
|
date:23-1-5
|
|
|
|
|
note:目前只完成了爬虫部分,需要解决前端信息推送问题,有会QQ机器人或者微信小程序等同学欢迎合作
|
|
|
|
|
'''
|
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
url = 'http://dean.xjtu.edu.cn/'
|
|
|
|
|
header = {
|
|
|
|
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
|
|
|
|
|
}
|
|
|
|
|
title_list = []
|
|
|
|
|
output_list = []
|
|
|
|
|
path = 'jwc.txt'
|
|
|
|
|
path_data = 'old_data.txt'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_title(url, header, title_list):
|
|
|
|
|
r = requests.get(url, headers=header)
|
|
|
|
|
r.encoding = r.apparent_encoding
|
|
|
|
|
html = r.text
|
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
|
for i in soup.find_all(name='li'):
|
|
|
|
|
date = i.get_text()[:4]
|
|
|
|
|
try:
|
|
|
|
|
for j in i.find_all(name='a', attrs={'target': "_blank"}):
|
|
|
|
|
title_list.append((date, j['title'], j['href']))
|
|
|
|
|
except:
|
|
|
|
|
print('failed')
|
|
|
|
|
return title_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compare_title(title_list, path_data, output_list):
|
|
|
|
|
db = open(path_data, 'r', encoding='utf-8')
|
|
|
|
|
db_list = db.read().split('//')
|
|
|
|
|
for title in title_list:
|
|
|
|
|
if not str(title[0]) in db_list:
|
|
|
|
|
output_list.append(title)
|
|
|
|
|
db.close()
|
|
|
|
|
return output_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def store_title(title_list):
|
|
|
|
|
db = open(path_data, 'wb')
|
|
|
|
|
for title_tuple in title_list:
|
|
|
|
|
db.write(bytes(title_tuple[0], encoding='utf-8'))
|
|
|
|
|
db.write(bytes('//', encoding='utf-8'))
|
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def show_msg(output_list, path):
|
|
|
|
|
f = open(path, 'wb')
|
|
|
|
|
for i in output_list:
|
|
|
|
|
for j in i:
|
|
|
|
|
f.write(bytes(j, encoding='utf-8'))
|
|
|
|
|
f.write(bytes('\t', encoding='utf-8'))
|
|
|
|
|
f.write(bytes('\n', encoding='utf-8'))
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def print_msg(output_list):
|
|
|
|
|
print(f'已为您更新{len(output_list)}条通知')
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(url, header, title_list, path_data, output_list, path):
|
|
|
|
|
title_list = get_title(url, header, title_list)
|
|
|
|
|
output_list = compare_title(title_list, path_data, output_list)
|
|
|
|
|
store_title(title_list)
|
|
|
|
|
show_msg(output_list, path)
|
|
|
|
|
print_msg(output_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main(url, header, title_list, path_data, output_list, path)
|
|
|
|
|