diff --git a/dcs/tests/spider.py b/dcs/tests/spider.py index 29162ee..f827ee0 100644 --- a/dcs/tests/spider.py +++ b/dcs/tests/spider.py @@ -1,5 +1,5 @@ +import csv import threading -import time import dcs.tests.config from msedge.selenium_tools import Edge @@ -45,6 +45,24 @@ def crawl_zhiwang(word, pages_start=1, pages_end=2): # TODO 写入数据库 +def write2csv(papers: list, file_name='./paper_author.csv'): + # 写入文件 + f_papers_authors = open(file_name, 'w', encoding='utf-8', newline='') + writer_p_a = csv.writer(f_papers_authors) # 基于文件对象构建 csv写入对象 + writer_p_a.writerow(["name", "college", "major", "paper"]) # csv文件的表头 + + # 读取每一篇论文 + for paper in papers: + # 写入paper_author.csv文件 + for author in paper.authors: + if author.name: + # print(author + " ") + writer_p_a.writerow([author.name, author.college, author.major, paper.title]) + + # 关闭文件 + f_papers_authors.close() + + class Spider(threading.Thread): def __init__(self, word: str, pages_start=1, pages_end=1): super().__init__()