|
|
|
@ -17,7 +17,7 @@ from dcs.tools.message_process import parse_request, generate_response
|
|
|
|
|
def crawl_zhiwang(word, pages_start, pages_end):
|
|
|
|
|
edge_options = EdgeOptions()
|
|
|
|
|
edge_options.use_chromium = True
|
|
|
|
|
# edge_options.add_argument('--headless')
|
|
|
|
|
edge_options.add_argument('--headless')
|
|
|
|
|
driver = Edge(options=edge_options, executable_path=r'G:\course\yykf\dcs\bin\msedgedriver.exe')
|
|
|
|
|
|
|
|
|
|
soup = driver_open(driver, word)
|
|
|
|
@ -42,7 +42,7 @@ def crawl_zhiwang(word, pages_start, pages_end):
|
|
|
|
|
paper_id += 1
|
|
|
|
|
pages_start += 1
|
|
|
|
|
driver.close()
|
|
|
|
|
logger.debug("here")
|
|
|
|
|
# logger.debug("here")
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -112,13 +112,13 @@ with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as so
|
|
|
|
|
"utf-8"))
|
|
|
|
|
print(responseJson)
|
|
|
|
|
|
|
|
|
|
request = {'action': 'crawl zhiwang', 'word': 'science', 'pages_start': 1, 'pages_end': 3,
|
|
|
|
|
'cookie': cookie}
|
|
|
|
|
socket_to_server.sendall(mp.generate_request(request))
|
|
|
|
|
responseJson = JSONDecoder().decode(
|
|
|
|
|
mp.read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
|
|
|
|
|
"utf-8"))
|
|
|
|
|
print(responseJson)
|
|
|
|
|
# request = {'action': 'crawl zhiwang', 'word': 'science', 'pages_start': 1, 'pages_end': 3,
|
|
|
|
|
# 'cookie': cookie}
|
|
|
|
|
# socket_to_server.sendall(mp.generate_request(request))
|
|
|
|
|
# responseJson = JSONDecoder().decode(
|
|
|
|
|
# mp.read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
|
|
|
|
|
# "utf-8"))
|
|
|
|
|
# print(responseJson)
|
|
|
|
|
|
|
|
|
|
request = {'action': 'end'}
|
|
|
|
|
socket_to_server.sendall(mp.generate_request(request))
|
|
|
|
|