From eb4dec2e7b4e344dafa6e93ad672acb99593865e Mon Sep 17 00:00:00 2001 From: wufayuan <2858767122@qq.com> Date: Sat, 26 Mar 2022 10:32:02 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E7=8E=B0web=E6=9C=8D=E5=8A=A1?= =?UTF-8?q?=E5=99=A8=E5=90=91=E7=88=AC=E8=99=AB=E6=9C=8D=E5=8A=A1=E5=99=A8?= =?UTF-8?q?=E9=80=9A=E4=BF=A1=EF=BC=8C=E8=AF=B7=E6=B1=82=E7=99=BB=E5=BD=95?= =?UTF-8?q?=E5=92=8C=E6=B3=A8=E5=86=8C=EF=BC=8C=E4=BB=A5=E5=8F=8A=E4=B8=B0?= =?UTF-8?q?=E5=AF=8C=E4=BA=86=E4=B8=A4=E8=80=85=E4=B9=8B=E9=97=B4=E7=9A=84?= =?UTF-8?q?=E9=80=9A=E4=BF=A1=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dcs/tests/spider.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/dcs/tests/spider.py b/dcs/tests/spider.py index 29162ee..f827ee0 100644 --- a/dcs/tests/spider.py +++ b/dcs/tests/spider.py @@ -1,5 +1,5 @@ +import csv import threading -import time import dcs.tests.config from msedge.selenium_tools import Edge @@ -45,6 +45,24 @@ def crawl_zhiwang(word, pages_start=1, pages_end=2): # TODO 写入数据库 +def write2csv(papers: list, file_name='./paper_author.csv'): + # 写入文件 + f_papers_authors = open(file_name, 'w', encoding='utf-8', newline='') + writer_p_a = csv.writer(f_papers_authors) # 基于文件对象构建 csv写入对象 + writer_p_a.writerow(["name", "college", "major", "paper"]) # csv文件的表头 + + # 读取每一篇论文 + for paper in papers: + # 写入paper_author.csv文件 + for author in paper.authors: + if author.name: + # print(author + " ") + writer_p_a.writerow([author.name, author.college, author.major, paper.title]) + + # 关闭文件 + f_papers_authors.close() + + class Spider(threading.Thread): def __init__(self, word: str, pages_start=1, pages_end=1): super().__init__()