|
|
@ -1,5 +1,5 @@
|
|
|
|
|
|
|
|
import csv
|
|
|
|
import threading
|
|
|
|
import threading
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import dcs.tests.config
|
|
|
|
import dcs.tests.config
|
|
|
|
from msedge.selenium_tools import Edge
|
|
|
|
from msedge.selenium_tools import Edge
|
|
|
@ -45,6 +45,24 @@ def crawl_zhiwang(word, pages_start=1, pages_end=2):
|
|
|
|
# TODO 写入数据库
|
|
|
|
# TODO 写入数据库
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write2csv(papers: list, file_name='./paper_author.csv'):
|
|
|
|
|
|
|
|
# 写入文件
|
|
|
|
|
|
|
|
f_papers_authors = open(file_name, 'w', encoding='utf-8', newline='')
|
|
|
|
|
|
|
|
writer_p_a = csv.writer(f_papers_authors) # 基于文件对象构建 csv写入对象
|
|
|
|
|
|
|
|
writer_p_a.writerow(["name", "college", "major", "paper"]) # csv文件的表头
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 读取每一篇论文
|
|
|
|
|
|
|
|
for paper in papers:
|
|
|
|
|
|
|
|
# 写入paper_author.csv文件
|
|
|
|
|
|
|
|
for author in paper.authors:
|
|
|
|
|
|
|
|
if author.name:
|
|
|
|
|
|
|
|
# print(author + " ")
|
|
|
|
|
|
|
|
writer_p_a.writerow([author.name, author.college, author.major, paper.title])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 关闭文件
|
|
|
|
|
|
|
|
f_papers_authors.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Spider(threading.Thread):
|
|
|
|
class Spider(threading.Thread):
|
|
|
|
def __init__(self, word: str, pages_start=1, pages_end=1):
|
|
|
|
def __init__(self, word: str, pages_start=1, pages_end=1):
|
|
|
|
super().__init__()
|
|
|
|
super().__init__()
|
|
|
|