diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml new file mode 100644 index 0000000..b58aeeb --- /dev/null +++ b/.idea/dataSources.xml @@ -0,0 +1,12 @@ + + + + + mysql.8 + true + com.mysql.cj.jdbc.Driver + jdbc:mysql://localhost:3306/test + $ProjectFileDir$ + + + \ No newline at end of file diff --git a/.idea/dcs.iml b/.idea/dcs.iml index 8388dbc..7770d8e 100644 --- a/.idea/dcs.iml +++ b/.idea/dcs.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/.idea/jsLibraryMappings.xml b/.idea/jsLibraryMappings.xml new file mode 100644 index 0000000..d23208f --- /dev/null +++ b/.idea/jsLibraryMappings.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..d972ca4 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml new file mode 100644 index 0000000..d863b78 --- /dev/null +++ b/.idea/sqldialects.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/bin/msedgedriver.exe b/bin/msedgedriver.exe index 0ed1012..a62a987 100644 Binary files a/bin/msedgedriver.exe and b/bin/msedgedriver.exe differ diff --git a/dcs/client/client.py b/dcs/client/client.py index 009b1e4..55316de 100644 --- a/dcs/client/client.py +++ b/dcs/client/client.py @@ -17,7 +17,7 @@ class Crawl(threading.Thread): @staticmethod def crawl() -> dict: - result_map = {0: {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}} + result_map = {0: {'name': 'remote', 'college': 'remote', 'major': 'remote', 'title': 'remote'}} return result_map def run(self) -> None: @@ -65,13 +65,13 @@ with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as so "utf-8")) print(responseJson) - request = {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, - 'cookie': cookie} - socket_to_server.sendall(mp.generate_request(request)) - responseJson = JSONDecoder().decode( - mp.read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode( - "utf-8")) - print(responseJson) + # request = {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, + # 'cookie': cookie} + # socket_to_server.sendall(mp.generate_request(request)) + # responseJson = JSONDecoder().decode( + # mp.read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode( + # "utf-8")) + # print(responseJson) request = {'action': 'end'} socket_to_server.sendall(mp.generate_request(request)) diff --git a/dcs/communicate.py b/dcs/communicate.py index 6818eee..bdd5194 100644 --- a/dcs/communicate.py +++ b/dcs/communicate.py @@ -29,6 +29,14 @@ class Communicator(threading.Thread): with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_client: for info in self.info_list: + try: + logger.info(f'sending info to {info[0]}: {info[1]}') + socket_to_client.connect(info[0]) + socket_to_client.sendall(generate_request(info[1])) + self.info_list.remove(info) + except Exception as e: + logger.error(str(e)) + self.info_list.remove(info) socket_to_client.connect(info[0]) logger.info(f'sending response to {info[0]}: {info[1]}') socket_to_client.sendall(generate_request(info[1])) diff --git a/dcs/dcs.log b/dcs/dcs.log index 2f127a0..9458f52 100644 --- a/dcs/dcs.log +++ b/dcs/dcs.log @@ -6801,3 +6801,603 @@ 2022-04-21 20:42:59.230 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} 2022-04-21 20:42:59.476 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} 2022-04-21 20:42:59.497 | INFO | dcs.communicate:run:33 - sending response to ('127.0.0.1', 9000): {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}, 1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 'type': 'response'} +2022-06-28 10:40:07.596 | DEBUG | __main__::14 - reading config args... +2022-06-28 10:40:07.611 | DEBUG | __main__::21 - starting the main server... +2022-06-28 10:40:07.611 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 10:40:07.611 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 10:40:07.627 | DEBUG | __main__::36 - starting the user server... +2022-06-28 10:40:07.658 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 10:40:47.217 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-28 10:40:47.327 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 10:40:47.358 | INFO | dcs.tests.user_request_handler:register:37 - [REQUEST] register +2022-06-28 11:26:41.089 | DEBUG | __main__::14 - reading config args... +2022-06-28 11:26:41.089 | DEBUG | __main__::21 - starting the main server... +2022-06-28 11:26:41.089 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 11:26:41.089 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 11:26:41.105 | DEBUG | __main__::36 - starting the user server... +2022-06-28 11:26:41.152 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 11:26:44.118 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-28 11:26:44.228 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 11:26:44.291 | INFO | dcs.tests.user_request_handler:register:37 - [REQUEST] register +2022-06-28 11:28:58.088 | DEBUG | __main__::14 - reading config args... +2022-06-28 11:28:58.088 | DEBUG | __main__::21 - starting the main server... +2022-06-28 11:28:58.104 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 11:28:58.104 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 11:28:58.120 | DEBUG | __main__::36 - starting the user server... +2022-06-28 11:28:58.151 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 11:29:02.389 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-28 11:29:02.483 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 11:29:02.499 | INFO | dcs.tests.user_request_handler:register:37 - [REQUEST] register +2022-06-28 11:29:03.189 | INFO | dcs.tests.user_request_handler:register:45 - [RESPONSE] register: 注册成功 +2022-06-28 11:29:03.268 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'register': '注册成功'} +2022-06-28 11:29:03.299 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 11:29:03.331 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 11:29:04.005 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] cookie: 30b7a709cb34e1eeb1bb8dfdade7ea458354a17f +2022-06-28 11:29:04.037 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'cookie': '30b7a709cb34e1eeb1bb8dfdade7ea458354a17f'} +2022-06-28 11:29:04.068 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 11:29:04.084 | INFO | dcs.tests.user_request_handler:report_state:16 - [REQUEST] report free +2022-06-28 11:29:04.084 | INFO | dcs.tests.user_request_handler:report_state:23 - [RESPONSE] report free: success marked 30b7a709cb34e1eeb1bb8dfdade7ea458354a17f +2022-06-28 11:29:04.146 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'report_free': 'success marked 30b7a709cb34e1eeb1bb8dfdade7ea458354a17f'} +2022-06-28 11:29:04.193 | INFO | dcs.tests.spider_task:distribute_task:154 - distributing task: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': '30b7a709cb34e1eeb1bb8dfdade7ea458354a17f'}) +2022-06-28 11:29:04.193 | DEBUG | dcs.tests.spider_task:distribute_task:167 - [, , ] +2022-06-28 11:29:04.193 | DEBUG | dcs.tests.spider_task:run:203 - [] +2022-06-28 11:29:04.193 | DEBUG | dcs.tests.spider_task:run:215 - generating remote task +2022-06-28 11:29:04.193 | INFO | dcs.requester:get:44 - sending crawl request to ('127.0.0.1', 9000) +2022-06-28 11:29:04.193 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'crawling state': 'starting, please wait...'} +2022-06-28 11:29:04.225 | INFO | dcs.tests.requestHandler:run:20 - [REQUEST] end +2022-06-28 11:29:04.225 | DEBUG | dcs.tests.requestHandler:run:21 - communication over from ('127.0.0.1', 9000)! +2022-06-28 11:29:04.241 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 11:29:04.303 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 11:29:04.413 | DEBUG | dcs.requester:run:70 - receiving remote task result, saving... +2022-06-28 11:29:04.492 | DEBUG | dcs.requester:run:74 - result: {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}} +2022-06-28 11:29:05.151 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 11:29:05.151 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 11:29:05.496 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 11:29:05.496 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 11:29:05.763 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 11:29:06.140 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 11:29:06.517 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 11:29:06.627 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 11:29:06.768 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': '30b7a709cb34e1eeb1bb8dfdade7ea458354a17f'}) +2022-06-28 11:29:07.113 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': '30b7a709cb34e1eeb1bb8dfdade7ea458354a17f'}) +2022-06-28 11:29:07.145 | DEBUG | dcs.tests.spider_task:compose_result:177 - composing task... +2022-06-28 11:29:07.396 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 11:29:07.600 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 11:29:07.647 | INFO | dcs.communicate:run:33 - sending response to ('127.0.0.1', 9000): {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}, 1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 'type': 'response'} +2022-06-28 14:49:12.246 | DEBUG | __main__::14 - reading config args... +2022-06-28 14:49:12.246 | DEBUG | __main__::21 - starting the main server... +2022-06-28 14:49:12.246 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 14:49:12.246 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 14:49:12.261 | DEBUG | __main__::36 - starting the user server... +2022-06-28 14:49:12.371 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 14:49:17.048 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-28 14:49:17.063 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 14:49:17.110 | INFO | dcs.tests.user_request_handler:register:37 - [REQUEST] register +2022-06-28 14:49:17.597 | INFO | dcs.tests.user_request_handler:register:45 - [RESPONSE] register: 用户名已存在,注册失败 +2022-06-28 14:49:17.659 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'register': '用户名已存在,注册失败'} +2022-06-28 14:49:17.753 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 14:49:17.769 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 14:49:18.947 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] cookie: 9fef547cebc2f8673e826daa21337ce6f9b5ffcb +2022-06-28 14:49:18.962 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'cookie': '9fef547cebc2f8673e826daa21337ce6f9b5ffcb'} +2022-06-28 14:49:18.978 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 14:49:19.010 | INFO | dcs.tests.user_request_handler:report_state:16 - [REQUEST] report free +2022-06-28 14:49:19.025 | INFO | dcs.tests.user_request_handler:report_state:23 - [RESPONSE] report free: success marked 9fef547cebc2f8673e826daa21337ce6f9b5ffcb +2022-06-28 14:49:19.056 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'report_free': 'success marked 9fef547cebc2f8673e826daa21337ce6f9b5ffcb'} +2022-06-28 14:49:19.135 | INFO | dcs.tests.spider_task:distribute_task:154 - distributing task: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': '9fef547cebc2f8673e826daa21337ce6f9b5ffcb'}) +2022-06-28 14:49:19.135 | DEBUG | dcs.tests.spider_task:distribute_task:167 - [, , ] +2022-06-28 14:49:19.135 | DEBUG | dcs.tests.spider_task:run:203 - [] +2022-06-28 14:49:19.135 | DEBUG | dcs.tests.spider_task:run:215 - generating remote task +2022-06-28 14:49:19.135 | INFO | dcs.requester:get:44 - sending crawl request to ('127.0.0.1', 9000) +2022-06-28 14:49:19.166 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'crawling state': 'starting, please wait...'} +2022-06-28 14:49:19.198 | INFO | dcs.tests.requestHandler:run:20 - [REQUEST] end +2022-06-28 14:49:19.198 | DEBUG | dcs.tests.requestHandler:run:21 - communication over from ('127.0.0.1', 9000)! +2022-06-28 14:49:19.198 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 14:49:19.370 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 14:49:19.386 | DEBUG | dcs.requester:run:70 - receiving remote task result, saving... +2022-06-28 14:49:19.700 | DEBUG | dcs.requester:run:74 - result: {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}} +2022-06-28 14:49:20.453 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 14:49:20.453 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 14:49:20.595 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 14:49:20.595 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 14:49:20.846 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 14:49:20.956 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 14:49:21.364 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 14:49:21.505 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 14:49:21.725 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': '9fef547cebc2f8673e826daa21337ce6f9b5ffcb'}) +2022-06-28 14:49:21.960 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': '9fef547cebc2f8673e826daa21337ce6f9b5ffcb'}) +2022-06-28 14:49:22.023 | DEBUG | dcs.tests.spider_task:compose_result:177 - composing task... +2022-06-28 14:49:22.588 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 14:49:23.122 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 14:49:23.153 | INFO | dcs.communicate:run:33 - sending response to ('127.0.0.1', 9000): {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}, 1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 'type': 'response'} +2022-06-28 14:54:28.848 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52306) +2022-06-28 14:56:06.566 | DEBUG | __main__::14 - reading config args... +2022-06-28 14:56:06.566 | DEBUG | __main__::21 - starting the main server... +2022-06-28 14:56:06.581 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 14:56:06.581 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 14:56:06.597 | DEBUG | __main__::36 - starting the user server... +2022-06-28 14:56:06.660 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 14:57:35.539 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52347) +2022-06-28 14:59:09.516 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52356) +2022-06-28 15:01:29.819 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:01:29.819 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:01:29.819 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:01:29.819 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:01:29.835 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:01:29.945 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:01:42.873 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52363) +2022-06-28 15:01:57.533 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:01:57.533 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:01:57.533 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:01:57.533 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:01:57.548 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:01:57.611 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:02:50.367 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52367) +2022-06-28 15:02:50.414 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:02:50.446 | INFO | dcs.tests.user_request_handler:register:37 - [REQUEST] register +2022-06-28 15:02:50.979 | INFO | dcs.tests.user_request_handler:register:45 - [RESPONSE] register: 注册成功 +2022-06-28 15:02:50.995 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52367): {'register': '注册成功'} +2022-06-28 15:02:59.519 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52370) +2022-06-28 15:02:59.550 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:02:59.597 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:03:00.335 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] cookie: 9c8c82096cada9adb7f6fbec6a7d568567186932 +2022-06-28 15:03:00.351 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52370): {'cookie': '9c8c82096cada9adb7f6fbec6a7d568567186932'} +2022-06-28 15:03:42.009 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52374) +2022-06-28 15:03:42.056 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:03:42.072 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:03:43.044 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] cookie: d6c6088e46bdc7573af8e25de200f0cb88ca1e35 +2022-06-28 15:03:43.076 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52374): {'cookie': 'd6c6088e46bdc7573af8e25de200f0cb88ca1e35'} +2022-06-28 15:04:21.446 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52387) +2022-06-28 15:05:13.998 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52389) +2022-06-28 15:05:14.077 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:05:14.092 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:05:15.081 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] cookie: 88ab5f7581216778aed56970f68532cca5c2a18a +2022-06-28 15:05:15.081 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52389): {'cookie': '88ab5f7581216778aed56970f68532cca5c2a18a'} +2022-06-28 15:05:58.183 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52393) +2022-06-28 15:05:58.324 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:05:58.340 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:05:58.936 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] cookie: 9b4349c2a1c3baffbf0dd923f92fc44b8aca505d +2022-06-28 15:05:58.983 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52393): {'cookie': '9b4349c2a1c3baffbf0dd923f92fc44b8aca505d'} +2022-06-28 15:07:49.742 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:07:49.742 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:07:49.742 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:07:49.742 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:07:49.757 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:07:49.835 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:07:52.488 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52401) +2022-06-28 15:07:52.614 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:07:52.614 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:07:53.445 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] login: 86b46a0f4bbf9ccbe998ace76ac57c5e2f1d056a +2022-06-28 15:07:53.461 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52401): {'cookie': '86b46a0f4bbf9ccbe998ace76ac57c5e2f1d056a'} +2022-06-28 15:09:06.121 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52408) +2022-06-28 15:09:06.153 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:09:06.200 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:09:07.110 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] login: 762010ca505017652bd14ae5734779f67b3b2007 +2022-06-28 15:09:07.158 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52408): {'cookie': '762010ca505017652bd14ae5734779f67b3b2007'} +2022-06-28 15:10:42.794 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52414) +2022-06-28 15:10:42.872 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:10:42.872 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:10:43.815 | INFO | dcs.tests.user_request_handler:login:34 - [RESPONSE] login: d04b5b59c6bed3c9f87e2488ff13c6319c71a9f2 +2022-06-28 15:10:43.830 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52414): {'cookie': 'd04b5b59c6bed3c9f87e2488ff13c6319c71a9f2'} +2022-06-28 15:11:22.808 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:11:22.808 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:11:22.808 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:11:22.808 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:11:22.824 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:11:22.871 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:11:25.603 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52426) +2022-06-28 15:11:25.634 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:11:25.677 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:11:26.273 | INFO | dcs.tests.user_request_handler:login:35 - [RESPONSE] login: 81f7d8aeadc116583a7602b11088ca49ef37291d +2022-06-28 15:11:26.305 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52426): {'cookie': '81f7d8aeadc116583a7602b11088ca49ef37291d', 'login': '81f7d8aeadc116583a7602b11088ca49ef37291d'} +2022-06-28 15:11:36.207 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52430) +2022-06-28 15:13:27.254 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-28 15:13:27.286 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:13:27.301 | INFO | dcs.tests.user_request_handler:register:38 - [REQUEST] register +2022-06-28 15:13:27.897 | INFO | dcs.tests.user_request_handler:register:46 - [RESPONSE] register: 用户名已存在,注册失败 +2022-06-28 15:13:27.928 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'register': '用户名已存在,注册失败'} +2022-06-28 15:13:27.944 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:13:27.976 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:13:29.012 | INFO | dcs.tests.user_request_handler:login:35 - [RESPONSE] login: 7c947cd7366e9fdb7320e5450bf336b0448865f2 +2022-06-28 15:13:29.059 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'cookie': '7c947cd7366e9fdb7320e5450bf336b0448865f2', 'login': '7c947cd7366e9fdb7320e5450bf336b0448865f2'} +2022-06-28 15:13:29.184 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:13:29.231 | INFO | dcs.tests.user_request_handler:report_state:16 - [REQUEST] report free +2022-06-28 15:13:29.326 | INFO | dcs.tests.user_request_handler:report_state:23 - [RESPONSE] report free: success marked 7c947cd7366e9fdb7320e5450bf336b0448865f2 +2022-06-28 15:13:29.451 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'report_free': 'success marked 7c947cd7366e9fdb7320e5450bf336b0448865f2'} +2022-06-28 15:13:29.624 | INFO | dcs.tests.spider_task:distribute_task:154 - distributing task: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': '7c947cd7366e9fdb7320e5450bf336b0448865f2'}) +2022-06-28 15:13:29.624 | DEBUG | dcs.tests.spider_task:distribute_task:167 - [, , ] +2022-06-28 15:13:29.624 | DEBUG | dcs.tests.spider_task:run:203 - [] +2022-06-28 15:13:29.624 | DEBUG | dcs.tests.spider_task:run:215 - generating remote task +2022-06-28 15:13:29.624 | INFO | dcs.requester:get:44 - sending crawl request to ('127.0.0.1', 9000) +2022-06-28 15:13:29.655 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:13:29.671 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'crawling state': 'starting, please wait...'} +2022-06-28 15:13:29.859 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:13:29.875 | DEBUG | dcs.requester:run:70 - receiving remote task result, saving... +2022-06-28 15:13:29.875 | DEBUG | dcs.requester:run:74 - result: {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}} +2022-06-28 15:13:29.906 | INFO | dcs.tests.requestHandler:run:20 - [REQUEST] end +2022-06-28 15:13:29.906 | DEBUG | dcs.tests.requestHandler:run:21 - communication over from ('127.0.0.1', 9000)! +2022-06-28 15:13:30.644 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:13:30.644 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:13:30.958 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:13:30.958 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:13:31.020 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:13:31.350 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:13:31.460 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:13:31.774 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': '7c947cd7366e9fdb7320e5450bf336b0448865f2'}) +2022-06-28 15:13:31.978 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:13:32.449 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': '7c947cd7366e9fdb7320e5450bf336b0448865f2'}) +2022-06-28 15:13:32.449 | DEBUG | dcs.tests.spider_task:compose_result:177 - composing task... +2022-06-28 15:13:32.967 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 15:13:33.453 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 15:13:33.501 | INFO | dcs.communicate:run:33 - sending response to ('127.0.0.1', 9000): {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}, 1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 'type': 'response'} +2022-06-28 15:16:17.728 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-28 15:16:17.806 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:16:17.822 | INFO | dcs.tests.user_request_handler:register:38 - [REQUEST] register +2022-06-28 15:16:18.419 | INFO | dcs.tests.user_request_handler:register:46 - [RESPONSE] register: 用户名已存在,注册失败 +2022-06-28 15:16:18.450 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'register': '用户名已存在,注册失败'} +2022-06-28 15:16:18.513 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:16:18.529 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:16:19.470 | INFO | dcs.tests.user_request_handler:login:35 - [RESPONSE] login: e3939532880fb270dbec8a63ba01fda943df5e14 +2022-06-28 15:16:19.486 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'cookie': 'e3939532880fb270dbec8a63ba01fda943df5e14', 'login': 'e3939532880fb270dbec8a63ba01fda943df5e14'} +2022-06-28 15:16:19.502 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:16:19.517 | INFO | dcs.tests.user_request_handler:report_state:16 - [REQUEST] report free +2022-06-28 15:16:19.517 | INFO | dcs.tests.user_request_handler:report_state:23 - [RESPONSE] report free: success marked e3939532880fb270dbec8a63ba01fda943df5e14 +2022-06-28 15:16:19.533 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'report_free': 'success marked e3939532880fb270dbec8a63ba01fda943df5e14'} +2022-06-28 15:16:19.721 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'crawling state': 'starting, please wait...'} +2022-06-28 15:16:19.737 | INFO | dcs.tests.requestHandler:run:20 - [REQUEST] end +2022-06-28 15:16:19.737 | INFO | dcs.tests.spider_task:distribute_task:154 - distributing task: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': 'e3939532880fb270dbec8a63ba01fda943df5e14'}) +2022-06-28 15:16:19.768 | DEBUG | dcs.tests.requestHandler:run:21 - communication over from ('127.0.0.1', 9000)! +2022-06-28 15:16:19.816 | DEBUG | dcs.tests.spider_task:distribute_task:167 - [, , ] +2022-06-28 15:16:19.816 | DEBUG | dcs.tests.spider_task:run:203 - [] +2022-06-28 15:16:19.816 | DEBUG | dcs.tests.spider_task:run:215 - generating remote task +2022-06-28 15:16:19.816 | INFO | dcs.requester:get:44 - sending crawl request to ('127.0.0.1', 9000) +2022-06-28 15:16:19.972 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:16:19.988 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:16:20.004 | DEBUG | dcs.requester:run:70 - receiving remote task result, saving... +2022-06-28 15:16:20.004 | DEBUG | dcs.requester:run:74 - result: {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}} +2022-06-28 15:16:20.977 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:16:20.977 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:20.993 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:16:20.993 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:21.338 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:21.448 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:22.045 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:22.343 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:22.531 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'e3939532880fb270dbec8a63ba01fda943df5e14'}) +2022-06-28 15:16:22.547 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:16:22.782 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'e3939532880fb270dbec8a63ba01fda943df5e14'}) +2022-06-28 15:16:22.782 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:16:22.799 | DEBUG | dcs.tests.spider_task:compose_result:177 - composing task... +2022-06-28 15:16:23.050 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 15:16:23.332 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:16:23.332 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:23.348 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 15:16:23.395 | INFO | dcs.communicate:run:33 - sending response to ('127.0.0.1', 9000): {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}, 1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 'type': 'response'} +2022-06-28 15:16:23.599 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:23.740 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:16:23.740 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:24.039 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:24.133 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:24.525 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:16:24.557 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'e3939532880fb270dbec8a63ba01fda943df5e14'}) +2022-06-28 15:16:24.871 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'e3939532880fb270dbec8a63ba01fda943df5e14'}) +2022-06-28 15:17:07.523 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:17:07.523 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:17:07.523 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:17:07.523 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:17:07.532 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:17:07.579 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:17:39.064 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52487) +2022-06-28 15:19:21.217 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:19:21.217 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:19:21.217 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:19:21.217 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:19:21.232 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:19:21.327 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:19:24.922 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52491) +2022-06-28 15:22:48.102 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:22:48.117 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:22:48.117 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:22:48.117 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:22:48.133 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:22:48.227 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:22:52.372 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52497) +2022-06-28 15:25:25.911 | DEBUG | __main__::14 - reading config args... +2022-06-28 15:25:25.911 | DEBUG | __main__::21 - starting the main server... +2022-06-28 15:25:25.911 | DEBUG | __main__::26 - starting the requester server... +2022-06-28 15:25:25.911 | DEBUG | __main__::31 - starting the spider server... +2022-06-28 15:25:25.920 | DEBUG | __main__::36 - starting the user server... +2022-06-28 15:25:25.951 | DEBUG | __main__::41 - starting the communicator server... +2022-06-28 15:25:30.566 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52502) +2022-06-28 15:25:30.613 | INFO | dcs.spider:run:45 - processing spider request... +2022-06-28 15:25:30.629 | INFO | dcs.tests.spider_task:distribute_task:154 - distributing task: (('127.0.0.1', 52502), {'action': 'crawl zhiwang', 'word': 'computer'}) +2022-06-28 15:25:30.629 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52502): {'crawling state': 'starting, please wait...'} +2022-06-28 15:28:17.011 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 52509) +2022-06-28 15:28:17.058 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:28:17.074 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:28:18.251 | INFO | dcs.tests.user_request_handler:login:35 - [RESPONSE] login: 89cd39c2e271951c496fb7ae7e6043814832741b +2022-06-28 15:28:18.299 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 52509): {'cookie': '89cd39c2e271951c496fb7ae7e6043814832741b', 'login': '89cd39c2e271951c496fb7ae7e6043814832741b'} +2022-06-28 15:28:59.537 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-28 15:28:59.631 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:28:59.662 | INFO | dcs.tests.user_request_handler:register:38 - [REQUEST] register +2022-06-28 15:29:00.196 | INFO | dcs.tests.user_request_handler:register:46 - [RESPONSE] register: 用户名已存在,注册失败 +2022-06-28 15:29:00.212 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'register': '用户名已存在,注册失败'} +2022-06-28 15:29:00.274 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:29:00.290 | INFO | dcs.tests.user_request_handler:login:26 - [REQUEST] login +2022-06-28 15:29:01.342 | INFO | dcs.tests.user_request_handler:login:35 - [RESPONSE] login: e81ec809257de9e5b0ac79eb4a134d3015add49d +2022-06-28 15:29:01.389 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'cookie': 'e81ec809257de9e5b0ac79eb4a134d3015add49d', 'login': 'e81ec809257de9e5b0ac79eb4a134d3015add49d'} +2022-06-28 15:29:01.452 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-28 15:29:01.483 | INFO | dcs.tests.user_request_handler:report_state:16 - [REQUEST] report free +2022-06-28 15:29:01.483 | INFO | dcs.tests.user_request_handler:report_state:23 - [RESPONSE] report free: success marked e81ec809257de9e5b0ac79eb4a134d3015add49d +2022-06-28 15:29:01.483 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'report_free': 'success marked e81ec809257de9e5b0ac79eb4a134d3015add49d'} +2022-06-28 15:29:01.515 | INFO | dcs.spider:run:45 - processing spider request... +2022-06-28 15:29:01.515 | INFO | dcs.tests.spider_task:distribute_task:154 - distributing task: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': 'e81ec809257de9e5b0ac79eb4a134d3015add49d'}) +2022-06-28 15:29:01.515 | DEBUG | dcs.tests.spider_task:distribute_task:167 - [, , ] +2022-06-28 15:29:01.531 | INFO | dcs.communicate:run:26 - sending response to ('127.0.0.1', 9000): {'crawling state': 'starting, please wait...'} +2022-06-28 15:29:01.546 | DEBUG | dcs.tests.spider_task:run:203 - [] +2022-06-28 15:29:01.625 | INFO | dcs.tests.requestHandler:run:20 - [REQUEST] end +2022-06-28 15:29:01.641 | DEBUG | dcs.tests.spider_task:run:215 - generating remote task +2022-06-28 15:29:01.672 | DEBUG | dcs.tests.requestHandler:run:21 - communication over from ('127.0.0.1', 9000)! +2022-06-28 15:29:01.719 | INFO | dcs.requester:get:44 - sending crawl request to ('127.0.0.1', 9000) +2022-06-28 15:29:01.845 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:29:01.923 | DEBUG | dcs.tests.spider_task:run:223 - generating local task +2022-06-28 15:29:01.923 | DEBUG | dcs.requester:run:70 - receiving remote task result, saving... +2022-06-28 15:29:01.923 | DEBUG | dcs.requester:run:74 - result: {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}} +2022-06-28 15:29:02.803 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:29:02.818 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:29:02.913 | DEBUG | dcs.tests.spider_task:test_simulation:102 - simulation crawling... +2022-06-28 15:29:02.913 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:29:03.195 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:29:03.383 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:29:03.446 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:29:03.964 | INFO | dcs.tests.spider_task:write2database:35 - writing to database: test +2022-06-28 15:29:04.027 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'e81ec809257de9e5b0ac79eb4a134d3015add49d'}) +2022-06-28 15:29:04.828 | INFO | dcs.tests.spider_task:run:118 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'e81ec809257de9e5b0ac79eb4a134d3015add49d'}) +2022-06-28 15:29:04.874 | DEBUG | dcs.tests.spider_task:compose_result:177 - composing task... +2022-06-28 15:29:05.189 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 15:29:05.471 | DEBUG | dcs.tests.spider_task:compose_result:192 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-28 15:29:05.487 | INFO | dcs.communicate:run:33 - sending response to ('127.0.0.1', 9000): {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}, 1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 'type': 'response'} +2022-06-29 19:47:45.641 | DEBUG | __main__::14 - reading config args... +2022-06-29 19:47:45.641 | DEBUG | __main__::21 - starting the main server... +2022-06-29 19:47:45.641 | DEBUG | __main__::26 - starting the requester server... +2022-06-29 19:47:45.641 | DEBUG | __main__::31 - starting the spider server... +2022-06-29 19:47:45.657 | DEBUG | __main__::36 - starting the user server... +2022-06-29 19:47:45.735 | DEBUG | __main__::41 - starting the communicator server... +2022-06-29 19:47:55.071 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 53943) +2022-06-29 19:47:55.318 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 19:47:55.333 | INFO | dcs.tests.user_request_handler:register:35 - [REQUEST] register +2022-06-29 19:47:55.881 | INFO | dcs.tests.user_request_handler:register:42 - [RESPONSE] register: 注册成功 +2022-06-29 19:47:55.992 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 53943): {'register': '注册成功'} +2022-06-29 19:56:51.157 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54028) +2022-06-29 19:56:51.188 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 19:56:51.188 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 19:56:52.391 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: 5e3651e55fc608b75fbbe505b896f046b4c6891a +2022-06-29 19:56:52.407 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54028): {'cookie': '5e3651e55fc608b75fbbe505b896f046b4c6891a'} +2022-06-29 19:57:51.720 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54039) +2022-06-29 19:57:51.798 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 19:57:51.813 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 19:57:53.173 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: 9e717ab6d5a3862c7698c66c5546d3c9911e271a +2022-06-29 19:57:53.188 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54039): {'cookie': '9e717ab6d5a3862c7698c66c5546d3c9911e271a'} +2022-06-29 20:00:29.847 | DEBUG | __main__::14 - reading config args... +2022-06-29 20:00:29.847 | DEBUG | __main__::21 - starting the main server... +2022-06-29 20:00:29.847 | DEBUG | __main__::26 - starting the requester server... +2022-06-29 20:00:29.847 | DEBUG | __main__::31 - starting the spider server... +2022-06-29 20:00:29.862 | DEBUG | __main__::36 - starting the user server... +2022-06-29 20:00:29.956 | DEBUG | __main__::41 - starting the communicator server... +2022-06-29 20:00:32.644 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54069) +2022-06-29 20:00:32.722 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:00:32.753 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:00:33.534 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: 6ec8d6d01310d5b9b96b987e19569dad310fb1cc +2022-06-29 20:00:33.565 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54069): {'login': '6ec8d6d01310d5b9b96b987e19569dad310fb1cc'} +2022-06-29 20:01:16.042 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54084) +2022-06-29 20:01:16.073 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:01:16.073 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:01:17.385 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: c787a940860f59040b4d1e02921fa223cb46a869 +2022-06-29 20:01:17.401 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54084): {'login': 'c787a940860f59040b4d1e02921fa223cb46a869'} +2022-06-29 20:01:17.495 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54088) +2022-06-29 20:01:17.604 | INFO | dcs.spider:run:22 - processing spider request... +2022-06-29 20:01:17.651 | INFO | dcs.tests.spider_task:distribute_task:136 - distributing task: (('127.0.0.1', 54088), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': 'c787a940860f59040b4d1e02921fa223cb46a869'}) +2022-06-29 20:01:17.651 | DEBUG | dcs.tests.spider_task:distribute_task:149 - [, , ] +2022-06-29 20:01:17.651 | DEBUG | dcs.tests.spider_task:run:185 - [] +2022-06-29 20:01:17.651 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:01:17.667 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54088): {'crawling state': 'starting, please wait...'} +2022-06-29 20:01:17.667 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:01:17.760 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:01:18.745 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:01:18.776 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:19.104 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:01:19.182 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:19.542 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:19.745 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:01:19.745 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:20.135 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:20.245 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:20.635 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:20.995 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54088), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'c787a940860f59040b4d1e02921fa223cb46a869'}) +2022-06-29 20:01:21.260 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:21.729 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54088), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 4, 'cookie': 'c787a940860f59040b4d1e02921fa223cb46a869'}) +2022-06-29 20:01:21.917 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:01:22.917 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54088), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'c787a940860f59040b4d1e02921fa223cb46a869'}) +2022-06-29 20:01:22.932 | DEBUG | dcs.tests.spider_task:compose_result:159 - composing task... +2022-06-29 20:01:23.323 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:01:23.713 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:01:24.292 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:04:41.308 | DEBUG | __main__::14 - reading config args... +2022-06-29 20:04:41.308 | DEBUG | __main__::21 - starting the main server... +2022-06-29 20:04:41.308 | DEBUG | __main__::26 - starting the requester server... +2022-06-29 20:04:41.308 | DEBUG | __main__::31 - starting the spider server... +2022-06-29 20:04:41.323 | DEBUG | __main__::36 - starting the user server... +2022-06-29 20:04:41.355 | DEBUG | __main__::41 - starting the communicator server... +2022-06-29 20:04:44.276 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54136) +2022-06-29 20:04:44.339 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:04:44.355 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:04:45.417 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: e3b5b304e274e2cd45e284af54d653b75343bd4f +2022-06-29 20:04:45.511 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54136): {'login': 'e3b5b304e274e2cd45e284af54d653b75343bd4f'} +2022-06-29 20:04:45.526 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54140) +2022-06-29 20:04:45.558 | INFO | dcs.spider:run:22 - processing spider request... +2022-06-29 20:04:45.573 | INFO | dcs.tests.spider_task:distribute_task:136 - distributing task: (('127.0.0.1', 54140), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': 'e3b5b304e274e2cd45e284af54d653b75343bd4f'}) +2022-06-29 20:04:45.573 | DEBUG | dcs.tests.spider_task:distribute_task:149 - [, , ] +2022-06-29 20:04:45.573 | DEBUG | dcs.tests.spider_task:run:185 - [] +2022-06-29 20:04:45.573 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:04:45.605 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54140): {'crawl zhiwang': 'starting, please wait...'} +2022-06-29 20:04:45.636 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:04:45.683 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:04:46.698 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:04:46.698 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:46.745 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:04:46.745 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:47.167 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:47.183 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:04:47.183 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:47.245 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:47.448 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:47.917 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:47.964 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:48.011 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:04:48.198 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54140), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'e3b5b304e274e2cd45e284af54d653b75343bd4f'}) +2022-06-29 20:04:48.511 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54140), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'e3b5b304e274e2cd45e284af54d653b75343bd4f'}) +2022-06-29 20:04:48.589 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54140), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 4, 'cookie': 'e3b5b304e274e2cd45e284af54d653b75343bd4f'}) +2022-06-29 20:04:48.589 | DEBUG | dcs.tests.spider_task:compose_result:159 - composing task... +2022-06-29 20:04:49.386 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 8: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 9: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:04:50.026 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 8: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 9: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:04:50.995 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 8: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 9: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:23:25.116 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-29 20:23:25.225 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:23:25.241 | INFO | dcs.tests.user_request_handler:register:35 - [REQUEST] register +2022-06-29 20:23:25.616 | INFO | dcs.tests.user_request_handler:register:42 - [RESPONSE] register: 用户名已存在,注册失败 +2022-06-29 20:24:21.600 | DEBUG | __main__::14 - reading config args... +2022-06-29 20:24:21.600 | DEBUG | __main__::21 - starting the main server... +2022-06-29 20:24:21.600 | DEBUG | __main__::26 - starting the requester server... +2022-06-29 20:24:21.600 | DEBUG | __main__::31 - starting the spider server... +2022-06-29 20:24:21.632 | DEBUG | __main__::36 - starting the user server... +2022-06-29 20:24:21.725 | DEBUG | __main__::41 - starting the communicator server... +2022-06-29 20:24:34.241 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-29 20:24:34.335 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:24:34.335 | INFO | dcs.tests.user_request_handler:register:35 - [REQUEST] register +2022-06-29 20:24:35.382 | INFO | dcs.tests.user_request_handler:register:42 - [RESPONSE] register: 用户名已存在,注册失败 +2022-06-29 20:24:35.444 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 9000): {'register': '用户名已存在,注册失败'} +2022-06-29 20:24:35.569 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:24:35.569 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:24:37.116 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: e093dd66585292b073070fdcee8efeec9a135a30 +2022-06-29 20:24:37.319 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 9000): {'login': 'e093dd66585292b073070fdcee8efeec9a135a30'} +2022-06-29 20:25:00.038 | DEBUG | __main__::14 - reading config args... +2022-06-29 20:25:00.038 | DEBUG | __main__::21 - starting the main server... +2022-06-29 20:25:00.038 | DEBUG | __main__::26 - starting the requester server... +2022-06-29 20:25:00.038 | DEBUG | __main__::31 - starting the spider server... +2022-06-29 20:25:00.054 | DEBUG | __main__::36 - starting the user server... +2022-06-29 20:25:00.148 | DEBUG | __main__::41 - starting the communicator server... +2022-06-29 20:25:02.132 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 9000) +2022-06-29 20:25:02.226 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:25:02.273 | INFO | dcs.tests.user_request_handler:register:35 - [REQUEST] register +2022-06-29 20:25:03.210 | INFO | dcs.tests.user_request_handler:register:42 - [RESPONSE] register: 用户名已存在,注册失败 +2022-06-29 20:25:03.398 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 9000): {'register': '用户名已存在,注册失败'} +2022-06-29 20:25:03.523 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:25:03.538 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:25:04.351 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: db157a90920fc62a1a6001a64838144c8cb7c684 +2022-06-29 20:25:04.413 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 9000): {'login': 'db157a90920fc62a1a6001a64838144c8cb7c684'} +2022-06-29 20:25:04.445 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:25:04.445 | INFO | dcs.tests.user_request_handler:report_state:16 - [REQUEST] report free +2022-06-29 20:25:04.445 | INFO | dcs.tests.user_request_handler:report_state:22 - [RESPONSE] report free: success marked db157a90920fc62a1a6001a64838144c8cb7c684 +2022-06-29 20:25:04.554 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 9000): {'report_free': 'success marked db157a90920fc62a1a6001a64838144c8cb7c684'} +2022-06-29 20:25:04.585 | INFO | dcs.spider:run:22 - processing spider request... +2022-06-29 20:25:04.601 | INFO | dcs.tests.spider_task:distribute_task:136 - distributing task: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': 'db157a90920fc62a1a6001a64838144c8cb7c684'}) +2022-06-29 20:25:04.601 | DEBUG | dcs.tests.spider_task:distribute_task:149 - [, , ] +2022-06-29 20:25:04.601 | DEBUG | dcs.tests.spider_task:run:185 - [] +2022-06-29 20:25:04.601 | DEBUG | dcs.tests.spider_task:run:197 - generating remote task +2022-06-29 20:25:04.601 | INFO | dcs.requester:get:44 - sending crawl request to ('127.0.0.1', 9000) +2022-06-29 20:25:04.616 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 9000): {'crawl zhiwang': 'starting, please wait...'} +2022-06-29 20:25:04.632 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:25:04.632 | INFO | dcs.tests.requestHandler:run:20 - [REQUEST] end +2022-06-29 20:25:04.632 | DEBUG | dcs.tests.requestHandler:run:21 - communication over from ('127.0.0.1', 9000)! +2022-06-29 20:25:04.632 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:25:04.695 | DEBUG | dcs.requester:run:68 - receiving remote task result, saving... +2022-06-29 20:25:04.835 | DEBUG | dcs.requester:run:72 - result: {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}} +2022-06-29 20:25:05.507 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:25:05.507 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:05.944 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:25:05.944 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:06.148 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:06.351 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:06.882 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:07.038 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:07.335 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'db157a90920fc62a1a6001a64838144c8cb7c684'}) +2022-06-29 20:25:08.194 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 9000), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'db157a90920fc62a1a6001a64838144c8cb7c684'}) +2022-06-29 20:25:08.257 | DEBUG | dcs.tests.spider_task:compose_result:159 - composing task... +2022-06-29 20:25:09.226 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:25:10.304 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:25:10.570 | INFO | dcs.communicate:run:47 - sending info to ('127.0.0.1', 9000): {'0': {'name': 'remote', 'college': 'remote', 'major': 'remote', 'paper': 'remote'}, 1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 'type': 'response'} +2022-06-29 20:25:20.007 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54314) +2022-06-29 20:25:20.163 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:25:20.179 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:25:21.101 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: d49d977e3441b1f14823be39ebde5a56664f8c4b +2022-06-29 20:25:21.163 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54314): {'login': 'd49d977e3441b1f14823be39ebde5a56664f8c4b'} +2022-06-29 20:25:21.179 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54318) +2022-06-29 20:25:21.304 | INFO | dcs.spider:run:22 - processing spider request... +2022-06-29 20:25:21.320 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54318): {'crawl zhiwang': 'starting, please wait...'} +2022-06-29 20:25:21.335 | INFO | dcs.tests.spider_task:distribute_task:136 - distributing task: (('127.0.0.1', 54318), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': 'd49d977e3441b1f14823be39ebde5a56664f8c4b'}) +2022-06-29 20:25:21.335 | DEBUG | dcs.tests.spider_task:distribute_task:149 - [, , ] +2022-06-29 20:25:21.335 | DEBUG | dcs.tests.spider_task:run:185 - [] +2022-06-29 20:25:21.335 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:25:21.507 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:25:21.601 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:25:22.507 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:25:22.507 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:22.726 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:25:22.726 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:22.726 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:22.757 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:25:22.757 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:22.976 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:23.116 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:23.148 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:23.226 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54318), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'd49d977e3441b1f14823be39ebde5a56664f8c4b'}) +2022-06-29 20:25:23.538 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:23.913 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54318), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'd49d977e3441b1f14823be39ebde5a56664f8c4b'}) +2022-06-29 20:25:24.163 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:25:25.194 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54318), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 4, 'cookie': 'd49d977e3441b1f14823be39ebde5a56664f8c4b'}) +2022-06-29 20:25:25.210 | DEBUG | dcs.tests.spider_task:compose_result:159 - composing task... +2022-06-29 20:25:26.351 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 8: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 9: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:25:26.929 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 8: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 9: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:25:27.413 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 6: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 8: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 9: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:27:37.836 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54346) +2022-06-29 20:27:38.024 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:27:38.024 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:27:39.446 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: 3d05e7a36b5e59554c3dc905df5ee05af132a8fa +2022-06-29 20:27:53.321 | DEBUG | __main__::14 - reading config args... +2022-06-29 20:27:53.321 | DEBUG | __main__::21 - starting the main server... +2022-06-29 20:27:53.321 | DEBUG | __main__::26 - starting the requester server... +2022-06-29 20:27:53.321 | DEBUG | __main__::31 - starting the spider server... +2022-06-29 20:27:53.336 | DEBUG | __main__::36 - starting the user server... +2022-06-29 20:27:53.508 | DEBUG | __main__::41 - starting the communicator server... +2022-06-29 20:28:01.180 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54352) +2022-06-29 20:28:01.227 | INFO | dcs.user_process:run:18 - processing user request... +2022-06-29 20:28:01.242 | INFO | dcs.tests.user_request_handler:login:25 - [REQUEST] login +2022-06-29 20:28:02.321 | INFO | dcs.tests.user_request_handler:login:32 - [RESPONSE] login: b0d50e48b0a3431deff0be2c27528254e1fe8041 +2022-06-29 20:28:02.337 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54352): {'login': 'b0d50e48b0a3431deff0be2c27528254e1fe8041'} +2022-06-29 20:28:02.384 | DEBUG | dcs.server:run:25 - connected to client ('127.0.0.1', 54356) +2022-06-29 20:28:02.493 | INFO | dcs.spider:run:22 - processing spider request... +2022-06-29 20:28:02.524 | INFO | dcs.tests.spider_task:distribute_task:136 - distributing task: (('127.0.0.1', 54356), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 10, 'cookie': 'b0d50e48b0a3431deff0be2c27528254e1fe8041'}) +2022-06-29 20:28:02.524 | DEBUG | dcs.tests.spider_task:distribute_task:149 - [, , ] +2022-06-29 20:28:02.571 | DEBUG | dcs.tests.spider_task:run:185 - [] +2022-06-29 20:28:02.571 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:28:02.587 | INFO | dcs.communicate:run:40 - sending response to ('127.0.0.1', 54356): {'crawl zhiwang': 'starting, please wait...'} +2022-06-29 20:28:02.602 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:28:02.680 | DEBUG | dcs.tests.spider_task:run:205 - generating local task +2022-06-29 20:28:03.196 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:28:03.196 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:03.430 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:28:03.430 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:03.618 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:03.962 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:04.087 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:04.149 | DEBUG | dcs.tests.spider_task:test_simulation:84 - simulation crawling... +2022-06-29 20:28:04.149 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:04.259 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:04.415 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:04.524 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54356), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 4, 'pages_end': 7, 'cookie': 'b0d50e48b0a3431deff0be2c27528254e1fe8041'}) +2022-06-29 20:28:04.743 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54356), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 7, 'pages_end': 10, 'cookie': 'b0d50e48b0a3431deff0be2c27528254e1fe8041'}) +2022-06-29 20:28:05.165 | INFO | dcs.tests.spider_task:write2database:17 - writing to database: test +2022-06-29 20:28:05.774 | INFO | dcs.tests.spider_task:run:100 - partial crawl task finished: (('127.0.0.1', 54356), {'action': 'crawl zhiwang', 'word': 'computer', 'pages_start': 1, 'pages_end': 4, 'cookie': 'b0d50e48b0a3431deff0be2c27528254e1fe8041'}) +2022-06-29 20:28:05.805 | DEBUG | dcs.tests.spider_task:compose_result:159 - composing task... +2022-06-29 20:28:06.493 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:28:06.790 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} +2022-06-29 20:28:07.477 | DEBUG | dcs.tests.spider_task:compose_result:174 - {1: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 2: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 3: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 4: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 5: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}, 7: {'name': 'test', 'college': 'test', 'major': 'test', 'paper': 'test'}} diff --git a/dcs/main.py b/dcs/main.py index 9143433..3cf8703 100644 --- a/dcs/main.py +++ b/dcs/main.py @@ -18,27 +18,23 @@ con.read(configFile, encoding='utf-8') items = con.items('server') items = dict(items) -logger.debug('starting the main server...') +logger.debug('starting the servers...') global_var.server = Server(int(items['port']), eval(items['buffer_size'])) global_var.server.daemon = items['daemon'] global_var.server.start() -logger.debug('starting the requester server...') global_var.requester = Requester() global_var.requester.daemon = True global_var.requester.start() -logger.debug('starting the spider server...') global_var.spider = Spider() global_var.spider.daemon = True global_var.spider.start() -logger.debug('starting the user server...') global_var.up = UP() global_var.up.daemon = True global_var.up.start() -logger.debug('starting the communicator server...') global_var.communicator = Communicator() global_var.communicator.daemon = True global_var.communicator.start() diff --git a/dcs/paper_author.csv b/dcs/paper_author.csv deleted file mode 100644 index 062d07e..0000000 --- a/dcs/paper_author.csv +++ /dev/null @@ -1,72 +0,0 @@ -name,college,major,paper -张颖颖,信阳职业技术学院,临床医学;生物医学工程;医学教育与医学边缘学科;,心理健康教育对大学生手机依赖及心理健康的干预效果 -史玉霞,,教育理论与教育管理;,心理健康教育对大学生手机依赖及心理健康的干预效果 -孟贝,信阳职业技术学院,临床医学;医药卫生方针政策与法律法规研究;教育理论与教育管理;,心理健康教育对大学生手机依赖及心理健康的干预效果 -张明明,河南省平舆县疾病预防控制中心,生物医学工程;临床医学;,心理健康教育对大学生手机依赖及心理健康的干预效果 -余晓齐,信阳职业技术学院,医学教育与医学边缘学科;临床医学;预防医学与卫生学;,心理健康教育对大学生手机依赖及心理健康的干预效果 -张建育,赣南师范大学,教育理论与教育管理;心理学;高等教育;,大学生手机依赖与孤独感的关系:有调节的中介 -朱月晶,赣南师范大学,教育理论与教育管理;,大学生手机依赖与孤独感的关系:有调节的中介 -廖子龙,赣南师范大学,互联网技术;高等教育;,大学生手机依赖与孤独感的关系:有调节的中介 -廖秀红,赣南师范大学,教育理论与教育管理;心理学;基础医学;,大学生手机依赖与孤独感的关系:有调节的中介 -Shili Yang,山东大学,电信技术;,Analysis on the PPP performance of Android smart-phone: a case study of Huawei P40 pro -Yan Xu,山东大学,电信技术;,Analysis on the PPP performance of Android smart-phone: a case study of Huawei P40 pro -Tianhe Xu,山东大学,电信技术;自然地理学和测绘学;海洋学;,Analysis on the PPP performance of Android smart-phone: a case study of Huawei P40 pro -Nan Jiang,山东大学,电信技术;,Analysis on the PPP performance of Android smart-phone: a case study of Huawei P40 pro -JIANG Keyu,吉林大学,电信技术;,Research on mobile phone swaying and receiving position in optical camera communication -CHI Xuefen,吉林大学,电信技术;自动化技术;,Research on mobile phone swaying and receiving position in optical camera communication -JI Fenglei,吉林大学,电信技术;,Research on mobile phone swaying and receiving position in optical camera communication -LI Shuai,吉林大学,电信技术;,Research on mobile phone swaying and receiving position in optical camera communication -肖爽,信阳师范学院,体育;中等教育;特种医学;,大学生手机成瘾倾向与其体育锻炼情况的相关性研究 -胡荣婷,湖南中医药大学,教育理论与教育管理;心理学;预防医学与卫生学;,述情障碍与手机使用模式对大学生手机依赖的影响 -周志涵,湖南食品药品职业学院,职业教育;基础医学;生物学;,述情障碍与手机使用模式对大学生手机依赖的影响 -张斌,湖南中医药大学,心理学;教育理论与教育管理;高等教育;,述情障碍与手机使用模式对大学生手机依赖的影响 -毛惠梨,长沙学院,教育理论与教育管理;中医学;心理学;,述情障碍与手机使用模式对大学生手机依赖的影响 -彭妤,湖南中医药大学,中医学;教育理论与教育管理;心理学;,述情障碍与手机使用模式对大学生手机依赖的影响 -熊思成,湖南中医药大学,教育理论与教育管理;精神病学;中医学;,述情障碍与手机使用模式对大学生手机依赖的影响 -李若瑜,安徽医科大学,教育理论与教育管理;基础医学;预防医学与卫生学;,医学生手机使用与童年期虐待经历及睡眠质量关联 -李书琴,安徽医科大学,教育理论与教育管理;预防医学与卫生学;基础医学;,医学生手机使用与童年期虐待经历及睡眠质量关联 -蒋志成,安徽医科大学,教育理论与教育管理;预防医学与卫生学;人口学与计划生育;,医学生手机使用与童年期虐待经历及睡眠质量关联 -金正格,安徽医科大学,教育理论与教育管理;基础医学;体育;,医学生手机使用与童年期虐待经历及睡眠质量关联 -汪瑞,安庆医药高等专科学校,教育理论与教育管理;计算机软件及计算机应用;肿瘤学;,医学生手机使用与童年期虐待经历及睡眠质量关联 -宋先兵,安徽医学高等专科学校,医学教育与医学边缘学科;教育理论与教育管理;中药学;,医学生手机使用与童年期虐待经历及睡眠质量关联 -张诗晨,安徽医科大学,预防医学与卫生学;教育理论与教育管理;医药卫生方针政策与法律法规研究;,医学生手机使用与童年期虐待经历及睡眠质量关联 -万宇辉,安徽医科大学,预防医学与卫生学;教育理论与教育管理;儿科学;,医学生手机使用与童年期虐待经历及睡眠质量关联 -舒纯,广东东软学院,电力工业;,《手机充电插座》 -周正红,扬州市职业大学,临床医学;教育理论与教育管理;感染性疾病及传染病;,基于学业倦怠为中介的高职大学生健康水平与手机成瘾的相关性研究 -刘恒旸,扬州市职业大学,临床医学;医药卫生方针政策与法律法规研究;医学教育与医学边缘学科;,基于学业倦怠为中介的高职大学生健康水平与手机成瘾的相关性研究 -张涛,扬州市职业大学,教育理论与教育管理;,基于学业倦怠为中介的高职大学生健康水平与手机成瘾的相关性研究 -张东,扬州市职业大学,教育理论与教育管理;,基于学业倦怠为中介的高职大学生健康水平与手机成瘾的相关性研究 -梁杰珍,惠州卫生职业技术学院,高等教育;职业教育;,青少年家庭亲密度和适应性与手机依赖的关系——以广东省惠州市中职校学生为例 -钟小川,惠州学院,教育理论与教育管理;心理学;高等教育;,青少年家庭亲密度和适应性与手机依赖的关系——以广东省惠州市中职校学生为例 -张建华,新乡医学院三全学院,教育理论与教育管理;心理学;一般化学工业;,大学生父母教养方式与手机依赖行为的关系:情绪智力的中介作用 -赵奕帆,新密市中医院,临床医学;精神病学;教育理论与教育管理;,大学生父母教养方式与手机依赖行为的关系:情绪智力的中介作用 -董锦锦,新乡医学院第二附属医院,临床医学;精神病学;教育理论与教育管理;,大学生父母教养方式与手机依赖行为的关系:情绪智力的中介作用 -杨世昌,新乡医学院第二附属医院,教育理论与教育管理;心理学;,大学生父母教养方式与手机依赖行为的关系:情绪智力的中介作用 -刘香华,福建技术师范学院,教育理论与教育管理;,核心自我评价对手机依赖的影响:无聊倾向、消极应对方式的并列中介作用 -林若汾,福建技术师范学院,教育理论与教育管理;,核心自我评价对手机依赖的影响:无聊倾向、消极应对方式的并列中介作用 -蒲敏,福建技术师范学院,教育理论与教育管理;,核心自我评价对手机依赖的影响:无聊倾向、消极应对方式的并列中介作用 -陈金萍,福建技术师范学院,教育理论与教育管理;,核心自我评价对手机依赖的影响:无聊倾向、消极应对方式的并列中介作用 -徐含笑,福建技术师范学院,教育理论与教育管理;,核心自我评价对手机依赖的影响:无聊倾向、消极应对方式的并列中介作用 -孟亚,黄河科技学院,生物医学工程;医学教育与医学边缘学科;教育理论与教育管理;,本科护生心理韧性在生活事件与手机成瘾间的中介作用 -张浩,郑州大学附属肿瘤医院,临床医学;医学教育与医学边缘学科;教育理论与教育管理;,本科护生心理韧性在生活事件与手机成瘾间的中介作用 -于晓静,新乡医学院,医学教育与医学边缘学科;医药卫生方针政策与法律法规研究;教育理论与教育管理;,本科护生心理韧性在生活事件与手机成瘾间的中介作用 -孟亚,,,本科护生心理韧性在生活事件与手机成瘾间的中介作用 -张浩,,,本科护生心理韧性在生活事件与手机成瘾间的中介作用 -于晓静,,,本科护生心理韧性在生活事件与手机成瘾间的中介作用 -曲杨,安徽医科大学,预防医学与卫生学;精神病学;眼科与耳鼻咽喉科;,大学生手机依赖与焦虑抑郁共病症状的关联 -伍晓艳,安徽医科大学,预防医学与卫生学;儿科学;教育理论与教育管理;,大学生手机依赖与焦虑抑郁共病症状的关联 -陶舒曼,安徽医科大学,教育理论与教育管理;预防医学与卫生学;精神病学;,大学生手机依赖与焦虑抑郁共病症状的关联 -杨娅娟,安徽医科大学,临床医学;医学教育与医学边缘学科;肿瘤学;,大学生手机依赖与焦虑抑郁共病症状的关联 -邹立巍,安徽医科大学第二附属医院,临床医学;特种医学;肿瘤学;,大学生手机依赖与焦虑抑郁共病症状的关联 -谢阳,安徽医科大学,教育理论与教育管理;精神病学;预防医学与卫生学;,大学生手机依赖与焦虑抑郁共病症状的关联 -李婷婷,安徽医科大学,预防医学与卫生学;教育理论与教育管理;精神病学;,大学生手机依赖与焦虑抑郁共病症状的关联 -张丹,安徽医科大学,预防医学与卫生学;精神病学;,大学生手机依赖与焦虑抑郁共病症状的关联 -翟爽,安徽医科大学,预防医学与卫生学;精神病学;,大学生手机依赖与焦虑抑郁共病症状的关联 -陶芳标,安徽医科大学,预防医学与卫生学;儿科学;教育理论与教育管理;,大学生手机依赖与焦虑抑郁共病症状的关联 -周正红,扬州市职业大学,临床医学;教育理论与教育管理;感染性疾病及传染病;,基于结构方程模型的高职大学生手机成瘾、自我控制和自我效能的相关研究 -刘恒旸,扬州市职业大学,临床医学;医药卫生方针政策与法律法规研究;医学教育与医学边缘学科;,基于结构方程模型的高职大学生手机成瘾、自我控制和自我效能的相关研究 -张涛,扬州市职业大学,教育理论与教育管理;,基于结构方程模型的高职大学生手机成瘾、自我控制和自我效能的相关研究 -张东,扬州市职业大学,教育理论与教育管理;,基于结构方程模型的高职大学生手机成瘾、自我控制和自我效能的相关研究 -丘彩霞,广州大学,教育理论与教育管理;心理学;儿科学;,儿童青少年手机依赖与抑郁的关系:父母婚姻关系的调节作用 -刘庆奇,广州大学,心理学;教育理论与教育管理;儿科学;,儿童青少年手机依赖与抑郁的关系:父母婚姻关系的调节作用 -聂衍刚,广州大学,心理学;教育理论与教育管理;社会学及统计学;,儿童青少年手机依赖与抑郁的关系:父母婚姻关系的调节作用 diff --git a/dcs/requester.py b/dcs/requester.py index b47e1cd..d80fce0 100644 --- a/dcs/requester.py +++ b/dcs/requester.py @@ -35,10 +35,6 @@ class Requester(threading.Thread): def is_remote_task_complete(self, client_address, request_map): pass - # for req in self.reqs: - # if req[0].client_address == client_address and req[0].request_map == request_map: - # print(req[1]) - # return req[1] def get(self, client_address, task: Spider_partial_task): logger.info(f'sending crawl request to {str(client_address)}') @@ -63,8 +59,6 @@ class Req(threading.Thread): self.responseJson = JSONDecoder().decode( read_bytes(socket_to_client, struct.unpack('!Q', socket_to_client.recv(8))[0]).decode( "utf-8")) - # res.append(self.responseJson) - # sleep(10) cookie = self.responseJson['cookie'] del self.responseJson['cookie'] logger.debug('receiving remote task result, saving...') diff --git a/dcs/server.py b/dcs/server.py index b2ce127..3202275 100644 --- a/dcs/server.py +++ b/dcs/server.py @@ -24,13 +24,6 @@ class Server(threading.Thread): # 将监听和处理分离, 以便同时响 client_socket, _ = self.server_socket.accept() logger.debug(f'connected to client {client_socket.getpeername()}') self.client_sockets.append(client_socket) - # request_header_size = struct.unpack("!Q", read_bytes(client_socket, 8))[0] - # request_map = json.JSONDecoder().decode(read_bytes(client_socket, request_header_size).decode("utf-8")) - # end请求要在主线程处理,不然退出就不会及时响应 - # if request_map['action'] == 'end': - # logger.info(f"[REQUEST] end") - # logger.warning("communication over!") - # break r = RequestHandler(client_socket) r.start() # self.server_socket.close() diff --git a/dcs/spider.py b/dcs/spider.py index e4f2df9..8537522 100644 --- a/dcs/spider.py +++ b/dcs/spider.py @@ -1,5 +1,8 @@ import socket import threading + +from loguru import logger + from dcs.tests.spider_task import Spider_task @@ -11,34 +14,11 @@ class Spider(threading.Thread): self.daemon = True def add_task(self, request_map: dict, client_socket: socket.socket): - # distribute tasks, 5 pages as a task - # [pages_start, pages_end), like [1,3) means 1,2 page - # pages_start = request_map['pages_start'] - # pages_end = request_map['pages_end'] - # while pages_start < pages_end: - # tmp = request_map.copy() - # tmp['pages_start'] = pages_start - # if pages_start + const_page <= pages_end: - # pages_start += const_page - # else: - # pages_start = pages_end - # tmp['pages_end'] = pages_start - # self.tasks.append((client_socket, tmp)) self.tasks.append(Spider_task(client_socket, request_map)) def run(self) -> None: while True: - # free_remote_nodes = get_free_sockets() - # for task in self.tasks: - # logger.info(f'processing crawl task...') - # for f_node in free_remote_nodes: - # address = f_node.getpeername() - # global_var.requester.get(address, task[1]) - # global_var.responser.add_response('crawling state', task[0], {'crawling state': 'starting, please wait...'}) - # spider_task = Spider_task(task[0], task[1]) - # spider_task.start() - # self.tasks.remove(task) for task in self.tasks: + logger.info(f'processing spider request...') task.start() self.tasks.remove(task) - pass diff --git a/dcs/tests/requestHandler.py b/dcs/tests/requestHandler.py index bfd0fcf..f6c6561 100644 --- a/dcs/tests/requestHandler.py +++ b/dcs/tests/requestHandler.py @@ -12,6 +12,7 @@ class RequestHandler(threading.Thread): self.client_socket = client_socket def run(self) -> None: + request_map = None try: while True: request_map = parse_request(self.client_socket) @@ -21,12 +22,17 @@ class RequestHandler(threading.Thread): logger.debug(f"communication over from {self.client_socket.getpeername()}!") break elif request_map['action'] == 'crawl zhiwang': - chk_res = check(request_map['cookie']) + chk_res = check(request_map) if chk_res is None: logger.warning("user info error!") break global_var.spider.add_task(request_map, self.client_socket) elif request_map['action'] in ['report_free', 'login', 'register']: global_var.up.add_request(request_map, self.client_socket) + else: + logger.error(f"no action {request_map['action']}!") + global_var.communicator.add_response('error', self.client_socket, + {request_map['action']: f"no action {request_map['action']}!"}) except Exception as e: - print(str(e)) + logger.error(str(e)) + global_var.communicator.add_response('error', self.client_socket, {request_map['action']: str(e)}) diff --git a/dcs/tests/spider_task.py b/dcs/tests/spider_task.py index 352f700..9c40819 100644 --- a/dcs/tests/spider_task.py +++ b/dcs/tests/spider_task.py @@ -13,24 +13,6 @@ from dcs.tools.database import get_last_crawl_id, create_crawl_result_table from dcs.tests.zhiwang import * -def translate(word): - url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule' - data = {'i': word, - 'from': 'AUTO', - 'to': 'AUTO', - 'smartresult': 'dict', - 'client': 'fanyideskweb', - 'doctype': 'json', - 'version': '2.1', - 'keyfrom': 'fanyi.web', - 'action': 'FY_BY_REALTIME', - 'typoResult': 'false'} - r = requests.post(url, data) - answer = r.json() - result = answer['translateResult'][0][0]['tgt'] - return result - - def write2database(paper: Paper, table_name: str, last_crawl_id: int): logger.info(f'writing to database: {paper.title}') for author in paper.authors: @@ -57,22 +39,23 @@ def write2csv(papers: list, file_name='./paper_author.csv'): class Crawler(threading.Thread): - def __init__(self, partial_task: 'Spider_partial_task'): + def __init__(self, partial_task: 'Spider_partial_task', last_crawl_id): super(Crawler, self).__init__() self.partial_task = partial_task + self.last_crawl_id = last_crawl_id def crawl_zhiwang(self, user_name=None): edge_options = EdgeOptions() edge_options.use_chromium = True # edge_options.add_argument('headless') - driver = Edge(options=edge_options, executable_path=r'G:\Users\god\PycharmProjects\dcs\bin\msedgedriver.exe') + driver = Edge(options=edge_options, executable_path=r'G:\course\yykf\dcs\bin\msedgedriver.exe') soup = driver_open(driver, self.partial_task.word) # 搜索word papers = [] # 用于保存爬取到的论文 table_name = f'{user_name}_crawl_result' create_crawl_result_table(table_name=table_name) - last_crawl_id = get_last_crawl_id(table_name=table_name) - self.partial_task.crawl_id = last_crawl_id + 1 + # last_crawl_id = get_last_crawl_id(table_name=table_name) + self.partial_task.crawl_id = self.last_crawl_id + 1 paper_id = 0 # 爬取第一篇 @@ -80,14 +63,14 @@ class Crawler(threading.Thread): spider(driver, soup, papers) self.partial_task.pages_start += 1 while paper_id < len(papers): - write2database(papers[paper_id], table_name=table_name, last_crawl_id=last_crawl_id) + write2database(papers[paper_id], table_name=table_name, last_crawl_id=self.last_crawl_id) paper_id += 1 while self.partial_task.pages_start < self.partial_task.pages_end: content = change_page(driver, self.partial_task.pages_start) spider(driver, content, papers) while paper_id < len(papers): - write2database(papers[paper_id], table_name=table_name, last_crawl_id=last_crawl_id) + write2database(papers[paper_id], table_name=table_name, last_crawl_id=self.last_crawl_id) paper_id += 1 self.partial_task.pages_start += 1 driver.close() @@ -142,11 +125,12 @@ class Spider_partial_task: class Spider_task(threading.Thread): def __init__(self, client_socket: socket.socket, request_map: dict): super().__init__() + self.table_name = f'{Spider_partial_task(self, request_map).cui.user_name}_crawl_result' + self.last_crawl_id = get_last_crawl_id(table_name=self.table_name) self.client_socket = client_socket self.request_map = request_map self.partial_tasks: list[Spider_partial_task] = [] self.const_page = 3 - pass def distribute_task(self): # distribute tasks, 3 pages as a task @@ -179,6 +163,9 @@ class Spider_task(threading.Thread): remote_result = get_crawl_result(self.request_map['cookie']) for result_map in remote_result: result.update(result_map) + create_crawl_result_table(table_name=self.table_name) + for id, data in result_map.items(): + write_result2database([data['name'], data['college'], data['major'], data['title']], self.table_name, self.last_crawl_id) for task in self.partial_tasks: # print(task.task_type) if task.task_type == 'local': @@ -221,7 +208,7 @@ class Spider_task(threading.Thread): break else: logger.debug('generating local task') - crawler = Crawler(task) + crawler = Crawler(task, self.last_crawl_id) task.thread = crawler task.task_type = 'local' crawler.start() diff --git a/dcs/tests/user_request_handler.py b/dcs/tests/user_request_handler.py index ae189a3..c2287bd 100644 --- a/dcs/tests/user_request_handler.py +++ b/dcs/tests/user_request_handler.py @@ -18,7 +18,6 @@ class Urh(threading.Thread): response = { 'report_free': 'success marked ' + str(self.request_map['cookie']) } - # self.client_socket.sendall(generate_response(response)) global_var.communicator.add_response('report_free', self.client_socket, response) logger.info(f"[RESPONSE] report free: {response['report_free']}") @@ -27,11 +26,10 @@ class Urh(threading.Thread): database.mysql_conn() response = database.login(user, password, st) response = { - 'cookie': response + 'login': response } - # self.client_socket.sendall(generate_response(response)) global_var.communicator.add_response('cookie', self.client_socket, response) - logger.info(f"[RESPONSE] cookie: {response['cookie']}") + logger.info(f"[RESPONSE] login: {response['login']}") def register(self, user, password): logger.info(f"[REQUEST] register") @@ -40,10 +38,12 @@ class Urh(threading.Thread): response = { 'register': response } - # self.client_socket.sendall(generate_response(response)) global_var.communicator.add_response('register', self.client_socket, response) logger.info(f"[RESPONSE] register: {response['register']}") + def get_task_process(self): + pass + def run(self) -> None: if self.request_map['action'] == 'report_free': self.report_state('free') @@ -51,5 +51,7 @@ class Urh(threading.Thread): self.login(self.request_map['user'], self.request_map['password'], self.client_socket) elif self.request_map['action'] == 'register': self.register(self.request_map['user'], self.request_map['password']) + elif self.request_map['action'] == 'get task process': + pass else: self.client_socket.close() diff --git a/dcs/tests/zhiwang.py b/dcs/tests/zhiwang.py index b78117e..d08dab4 100644 --- a/dcs/tests/zhiwang.py +++ b/dcs/tests/zhiwang.py @@ -1,10 +1,13 @@ # 知网论文数据爬取 +import csv from bs4 import BeautifulSoup import time import requests # 定义论文类 +from msedge.selenium_tools import webdriver +from msedge.selenium_tools.service import Service from selenium.webdriver.common.by import By @@ -27,6 +30,12 @@ class Author: # 进入知网首页并搜索关键词 def driver_open(driver, key_word): + from msedge.selenium_tools import EdgeOptions + url = "https://www.cnki.net/" + edge_options = EdgeOptions() + edge_options.use_chromium = True + # 设置无界面模式,也可以添加其它设置 + edge_options.add_argument('--headless') url = "https://www.cnki.net/" driver.get(url) time.sleep(2) @@ -67,11 +76,13 @@ def spider(driver, soup, papers): for author in a_author: skey, code = get_skey_code(author) # 获取作者详情页url的skey和code name = author.get_text().strip() # 获取学者的名字 + # print('name : ' + name) print('name : ' + name) college, major = get_author_info(skey, code) # 在作者详情页获取大学和专业, major是一个数组 au = Author(name, college, major) # 创建一个学者对象 authors.append(au) + # print('\n') print('\n') paper = Paper(title, authors) papers.append(paper) @@ -113,10 +124,10 @@ def get_author_info(skey, code): college = h3[0].get_text().strip() major = h3[1].get_text().strip() # major = major.split(';')[0: -1] - print('college:' + college) - print('major: ' + major) + # print('college:' + college) + # print('major: ' + major) return college, major - print("无详细信息") + # print("无详细信息") return None, None diff --git a/dcs/tools/database.py b/dcs/tools/database.py index a78ecf0..4d239c8 100644 --- a/dcs/tools/database.py +++ b/dcs/tools/database.py @@ -5,7 +5,7 @@ import dcs.tools.cookie as cookie # 获取数据库连接对象 -def mysql_conn(host='127.0.0.1', user='root', passwd='xwdjzwy5252', db='test'): +def mysql_conn(host='192.168.43.65', user='root', passwd='427318Aa', db='test'): conn = pymysql.connect(host=host, user=user, passwd=passwd, db=db) return conn diff --git a/dcs/tools/message_process.py b/dcs/tools/message_process.py index 8214792..b974fd5 100644 --- a/dcs/tools/message_process.py +++ b/dcs/tools/message_process.py @@ -30,11 +30,13 @@ def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes': return data -def check(cookie: str): - if exists(cookie): - return cookie - return None # TODO: if user error, return None, else return cookie - pass +def check(cookie): + try: + if exists(cookie['cookie']): + return cookie + return None + except: + return None def generate_request(request) -> 'bytes': diff --git a/requirements.txt b/requirements.txt index 61a47dc..3cebf14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ loguru~=0.6.0 requests~=2.27.1 -pandas~=1.3.4 bs4~=0.0.1 beautifulsoup4~=4.10.0 selenium~=3.141.0 diff --git a/ui/connect.py b/ui/connect.py index 414e7df..960372c 100644 --- a/ui/connect.py +++ b/ui/connect.py @@ -1,9 +1,15 @@ +import json import socket import struct import argparse from json import JSONEncoder, JSONDecoder +def parse_request(client_socket: socket.socket): + request_header_size = struct.unpack("!Q", read_bytes(client_socket, 8))[0] + request_map = json.JSONDecoder().decode(read_bytes(client_socket, request_header_size).decode("utf-8")) + return request_map + def generate_request(request_info) -> 'bytes': """ 根据传入的dict生成请求 @@ -33,6 +39,8 @@ def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes': def send_request(ip, port, request_info): with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_server: + socket_to_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + socket_to_server.bind(('', 9002)) socket_to_server.connect((ip, int(port))) full_request = generate_request(request_info) @@ -43,11 +51,24 @@ def send_request(ip, port, request_info): read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode( "utf-8")) - return responseJson[request_info['action']] + return responseJson + + +def receive_response(): + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('', 9002)) + server_socket.listen() + while True: + client_socket, _ = server_socket.accept() + request_map = parse_request(client_socket) + if request_map['type'] == 'response': + print("receiving response:\n" + json.dumps(request_map, ensure_ascii=False)) if __name__ == '__main__': - # 使用方法 python.exe .\connect.py --ip 127.0.0.1 --port 7777 login --user 1 --password 1 + # 使用方法 python .\connect.py --ip 127.0.0.1 --port 7777 + # crawling --word computer --cookie 95f94e1ab71bdf96b85fef6e8f746c58eeb5f9fa --pages_start 1 --pages_end 10 parser = argparse.ArgumentParser('connect-manager') parser.add_argument('--ip', type=str, required=True) parser.add_argument('--port', type=str, required=True) @@ -56,6 +77,9 @@ if __name__ == '__main__': parser_crawling = subparsers.add_parser('crawling') parser_crawling.add_argument('--word', type=str, required=True) + parser_crawling.add_argument('--pages_end', type=int, required=True) + parser_crawling.add_argument('--pages_start', type=int, required=True) + parser_crawling.add_argument('--cookie', type=str, required=True) parser_login = subparsers.add_parser('login') parser_login.add_argument('--user', type=str, required=True) @@ -70,8 +94,12 @@ if __name__ == '__main__': request = dict() if args.action == 'crawling': - request = {'action': 'crawl zhiwang', 'word': args.word} + request = {'action': 'crawl zhiwang', 'word': args.word, 'pages_start': args.pages_start, + 'pages_end': args.pages_end, 'cookie': args.cookie} elif args.action == 'login' or args.action == 'register': request = {'action': args.action, 'user': args.user, 'password': args.password} response = send_request(args.ip, args.port, request) print(response) + + if args.action == 'crawling': + receive_response() diff --git a/ui/login.bat b/ui/login.bat new file mode 100644 index 0000000..56f8ccd --- /dev/null +++ b/ui/login.bat @@ -0,0 +1 @@ +python .\connect.py --ip 127.0.0.1 --port 7777 login --user wufayuan --password 113818 diff --git a/ui/login.js b/ui/login.js index dbca53c..ed4c535 100644 --- a/ui/login.js +++ b/ui/login.js @@ -1,4 +1,4 @@ -var fs = require("fs"); +var fs = require("fs"); var bodyParser = require('body-parser'); // 这个模块是获取post请求传过来的数据。 var multer = require('multer'); //multer - node.js 中间件,用于处理 enctype="multipart/form-data"(设置表单的MIME编码)的表单数据。 var express=require('express'); @@ -23,7 +23,7 @@ app.use(multer({ dest: 'tmp/' }).array('file'));//multer中间件用于上传表 app.get('/',function (req,res) { res.sendfile(__dirname + "/public/" + "index.html" ); }) - + /** * 实现登录验证功能 */ @@ -60,11 +60,11 @@ app.get('/login', function (req, res) { res.sendfile(__dirname + "/public/" + "ok.html" ); }) })**/ - + app.get('/register.html',function (req,res) { res.sendfile(__dirname+"/public/"+"register.html"); }) - + /** * 实现注册功能 */ @@ -94,26 +94,26 @@ app.get('/register',function (req,res) { app.get('/ok.html',function (req,res) { res.redirect("/public/"+"ok.html"); }) - - - + + + var server=app.listen(3000,function () { console.log("start"); }) // 上传文件api app.post('/file_upload', function (req, res) { - + console.log(req.files[0]); // 上传的文件信息 - - var des_file = __dirname + "/0/" + req.files[0].originalname; - fs.readFile( req.files[0].path, function (err, data) { - fs.writeFile(des_file, data, function (err) { + + var des_file = __dirname + "/0/" + req.files[0].originalname; + fs.readFile( req.files[0].path, function (err, data) { + fs.writeFile(des_file, data, function (err) { if( err ){ console.log( err ); }else{ response = { - message:'File uploaded successfully', + message:'File uploaded successfully', filename:req.files[0].originalname }; } @@ -134,7 +134,7 @@ function execute(cmd) { //调用cmd命令 }) } app.post('/check', function (req, res) { - execute('python connect.py --ip 127.0.0.1 --port 7777 crawling --word computer'); + execute('python connect.py --ip 127.0.0.1 --port 7777 crawling --word computer --cookie 72651f56b02caaf78076ba8f05a8223606e58216 --pages_start 1 --pages_end 5'); fs.readFile('./result.txt', 'utf-8', function (err, data) { if (err) { console.error(err);