|
|
|
@ -2,6 +2,7 @@ import json
|
|
|
|
|
import socket
|
|
|
|
|
import struct
|
|
|
|
|
import threading
|
|
|
|
|
from configparser import ConfigParser
|
|
|
|
|
from json import JSONDecoder
|
|
|
|
|
from time import sleep
|
|
|
|
|
|
|
|
|
@ -17,8 +18,16 @@ from dcs.tools.message_process import parse_request, generate_response
|
|
|
|
|
def crawl_zhiwang(word, pages_start, pages_end):
|
|
|
|
|
edge_options = EdgeOptions()
|
|
|
|
|
edge_options.use_chromium = True
|
|
|
|
|
No_Image_loading = {"profile.managed_default_content_settings.images": 2, 'permissions.default.stylesheet': 2}
|
|
|
|
|
edge_options.add_experimental_option("prefs", No_Image_loading)
|
|
|
|
|
edge_options.add_argument('--headless')
|
|
|
|
|
driver = Edge(options=edge_options, executable_path=r'G:\course\yykf\dcs\bin\msedgedriver.exe')
|
|
|
|
|
configFile = '../../conf/settings.ini'
|
|
|
|
|
con = ConfigParser()
|
|
|
|
|
con.read(configFile, encoding='utf-8')
|
|
|
|
|
items = con.items('crawler')
|
|
|
|
|
items = dict(items)['edge_driver_path']
|
|
|
|
|
print(items)
|
|
|
|
|
driver = Edge(options=edge_options, executable_path=items)
|
|
|
|
|
|
|
|
|
|
soup = driver_open(driver, word)
|
|
|
|
|
papers = [] # 用于保存爬取到的论文
|
|
|
|
@ -85,19 +94,21 @@ class Crawl(threading.Thread):
|
|
|
|
|
|
|
|
|
|
crawl = Crawl()
|
|
|
|
|
crawl.start()
|
|
|
|
|
# res = crawl.crawl({'action': 'crawl zhiwang', 'word': 'science', 'pages_start': 1, 'pages_end': 2, 'cookie': '123'})
|
|
|
|
|
# logger.debug(res)
|
|
|
|
|
|
|
|
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_server:
|
|
|
|
|
socket_to_server.bind(('127.0.0.1', 9999))
|
|
|
|
|
socket_to_server.connect(('127.0.0.1', 7777))
|
|
|
|
|
|
|
|
|
|
# request = {'action': 'register', 'user': 'wufayuan', 'password': '113818'}
|
|
|
|
|
# socket_to_server.sendall(mp.generate_request(request))
|
|
|
|
|
# responseJson = JSONDecoder().decode(
|
|
|
|
|
# mp.read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
|
|
|
|
|
# "utf-8"))
|
|
|
|
|
# print(responseJson)
|
|
|
|
|
request = {'action': 'register', 'user': 'liuxiaoyu', 'password': '113818'}
|
|
|
|
|
socket_to_server.sendall(mp.generate_request(request))
|
|
|
|
|
responseJson = JSONDecoder().decode(
|
|
|
|
|
mp.read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
|
|
|
|
|
"utf-8"))
|
|
|
|
|
print(responseJson)
|
|
|
|
|
|
|
|
|
|
request = {'action': 'login', 'user': 'wufayuan', 'password': '113818'}
|
|
|
|
|
request = {'action': 'login', 'user': 'liuxiaoyu', 'password': '113818'}
|
|
|
|
|
socket_to_server.sendall(mp.generate_request(request))
|
|
|
|
|
responseJson = JSONDecoder().decode(
|
|
|
|
|
mp.read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
|
|
|
|
|