重写了connect通信程序与服务器通信系统,目前可用性应当大幅提升,运行情况正常

master
wufayuan 3 years ago
parent b681c1b92d
commit 2f4fa14b2b

@ -0,0 +1,136 @@
import json
import socket
import struct
import threading
from configparser import ConfigParser
from json import JSONDecoder
from msedge.selenium_tools import Edge
from msedge.selenium_tools import EdgeOptions
from dcs.tests.zhiwang import *
from dcs.tools import message_process as mp
from dcs.tools.message_process import parse_request, generate_response
def crawl_zhiwang(word, pages_start, pages_end):
edge_options = EdgeOptions()
edge_options.use_chromium = True
No_Image_loading = {"profile.managed_default_content_settings.images": 2, 'permissions.default.stylesheet': 2}
edge_options.add_experimental_option("prefs", No_Image_loading)
edge_options.add_argument('--headless')
configFile = '../../conf/settings.ini'
con = ConfigParser()
con.read(configFile, encoding='utf-8')
items = con.items('crawler')
items = dict(items)['edge_driver_path']
print(items)
driver = Edge(options=edge_options, executable_path=items)
soup = driver_open(driver, word)
papers = [] # 用于保存爬取到的论文
paper_id = 0
res = {} # 保存终端爬取结果
# 爬取第一篇
if pages_start == 1:
spider(driver, soup, papers)
logger.debug(res)
pages_start += 1
while paper_id < len(papers):
write2res(papers[paper_id], res)
paper_id += 1
while pages_start < pages_end:
content = change_page(driver, pages_start)
spider(driver, content, papers)
while paper_id < len(papers):
write2res(papers[paper_id], res)
paper_id += 1
pages_start += 1
driver.close()
# logger.debug("here")
return res
def write2res(paper: Paper, res):
for author in paper.authors:
if author.name:
res.update(
{len(res): {'name': author.name, 'college': author.college, 'major': author.major,
'title': paper.title}})
class Crawl(threading.Thread):
def __init__(self):
super(Crawl, self).__init__()
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.server_socket.bind(('', local_port))
@staticmethod
def crawl(request_map) -> dict:
result_map = crawl_zhiwang(request_map['word'], request_map['pages_start'], request_map['pages_end'])
# result_map = {0: {'name': 'remote', 'college': 'remote', 'major': 'remote', 'title': 'remote'},
# 1: {'name': 'remote1', 'college': 'remote1', 'major': 'remote', 'title': 'remote'}}
logger.debug(result_map)
return result_map
def run(self) -> None:
self.server_socket.listen()
while True:
client_socket, _ = self.server_socket.accept()
request_map = parse_request(client_socket)
if request_map['type'] == 'request':
print("receiving help request:\n" + json.dumps(request_map, ensure_ascii=False))
response_map = self.crawl(request_map)
response_map.update({'cookie': request_map['cookie']})
client_socket.sendall(generate_response(response_map))
report_map = {'action': 'report_free', 'cookie': cookie}
logger.debug(send_request(socket_to_server, report_map))
if request_map['type'] == 'response':
print("receiving response:\n" + json.dumps(request_map, ensure_ascii=False))
# break
def send_request(socket2server, req):
socket2server.sendall(mp.generate_request(req))
responseJson = JSONDecoder().decode(
mp.read_bytes(socket2server, struct.unpack('!Q', socket2server.recv(8))[0]).decode(
"utf-8"))
return responseJson
server_ip = '127.0.0.1'
server_port = 7777
local_port = 9999
crawl = Crawl()
crawl.start()
# res = crawl.crawl({'action': 'crawl zhiwang', 'word': 'science', 'pages_start': 1, 'pages_end': 2, 'cookie': '123'})
# logger.debug(res)
socket_to_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
socket_to_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
socket_to_server.bind(('', local_port))
socket_to_server.connect((server_ip, server_port))
request = {'action': 'register', 'user': 'liuxiaoyu', 'password': '113818'}
logger.debug(send_request(socket_to_server, request))
request = {'action': 'login', 'user': 'liuxiaoyu', 'password': '113818'}
response = send_request(socket_to_server, request)
logger.debug(response)
cookie = response['cookie']
request = {'action': 'report_free', 'cookie': cookie}
logger.debug(send_request(socket_to_server, request))
# request = {'action': 'crawl zhiwang', 'word': 'science', 'pages_start': 1, 'pages_end': 3,
# 'cookie': cookie}
# logger.debug(send_request(socket_to_server, request))
request = {'action': 'end'}
socket_to_server.sendall(mp.generate_request(request))
crawl.join()

@ -13,6 +13,7 @@ class Communicator(threading.Thread):
self.info_list: list[tuple[tuple, dict]] = []
def add_response(self, response_type: str, client_socket: socket.socket, response_map: dict):
response_map.update({'type': response_type})
self.responser_list.append((response_type, client_socket, response_map))
def add_info(self, info_type: str, address: tuple, info_map: dict):
@ -27,13 +28,13 @@ class Communicator(threading.Thread):
client_socket.sendall(generate_response(response_map))
self.responser_list.remove(responser)
with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_client:
for info in self.info_list:
try:
logger.info(f'sending info to {info[0]}: {info[1]}')
socket_to_client.connect(info[0])
socket_to_client.sendall(generate_request(info[1]))
self.info_list.remove(info)
except Exception as e:
logger.error(str(e))
self.info_list.remove(info)
# with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_client:
# for info in self.info_list:
# try:
# logger.info(f'sending info to {info[0]}: {info[1]}')
# socket_to_client.connect(info[0])
# socket_to_client.sendall(generate_request(info[1]))
# self.info_list.remove(info)
# except Exception as e:
# logger.error(str(e))
# self.info_list.remove(info)

File diff suppressed because one or more lines are too long

@ -20,6 +20,9 @@ class RequestHandler(threading.Thread):
logger.info(f"[REQUEST] end")
logger.debug(f"communication over from {self.client_socket.getpeername()}!")
break
elif request_map['action'] == 'start':
logger.info(f"[REQUEST] start")
logger.debug(f"communication begin from {self.client_socket.getpeername()}!")
elif request_map['action'] == 'crawl zhiwang':
chk_res = check(request_map)
if chk_res is None:
@ -32,6 +35,7 @@ class RequestHandler(threading.Thread):
logger.error(f"no action {request_map['action']}!")
global_var.communicator.add_response('error', self.client_socket,
{request_map['action']: f"no action {request_map['action']}!"})
# logger.debug('request over!')
# break
except Exception as e:
logger.error(str(e))

@ -188,7 +188,8 @@ class Spider_task(threading.Thread):
result.update({'crawl_id': self.last_crawl_id+1, 'table_name': self.table_name})
global_var.communicator.add_info('response', self.client_socket.getpeername(), result)
# global_var.communicator.add_info('response', self.client_socket.getpeername(), result)
global_var.communicator.add_response('response', self.client_socket, result)
def run(self) -> None:
global_var.communicator.add_response('crawling state', self.client_socket,

@ -7,7 +7,10 @@ import dcs.tools.cookie as cookie
# 获取数据库连接对象
def mysql_conn(host='192.168.43.64', user='root', passwd='427318Aa', db='test'):
# def mysql_conn(host='10.129.16.173', user='root', passwd='427318Aa', db='test'):
# def mysql_conn(host='10.129.16.155', user='root', passwd='427318Aa', db='test'):
# def mysql_conn(host='192.168.43.64', user='root', passwd='427318Aa', db='test'):
def mysql_conn(host='127.0.0.1', user='root', passwd='xwdjzwy5252', db='test'):
try:
# logger.debug('connecting to database...')
conn = pymysql.connect(host=host, user=user, passwd=passwd, db=db)

@ -37,35 +37,29 @@ def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes':
return data
def send_request(ip, port, request_info):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_server:
socket_to_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
socket_to_server.bind(('', 9014))
socket_to_server.connect((ip, int(port)))
def send_request(request_info, socket_to_server):
full_request = generate_request(request_info)
full_request = generate_request(request_info)
socket_to_server.sendall(full_request)
socket_to_server.sendall(full_request)
if request_info['action'] == 'end' or request_info['action'] == 'start':
return
responseJson = JSONDecoder().decode(
read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
"utf-8"))
responseJson = JSONDecoder().decode(
read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
"utf-8"))
return responseJson
return responseJson
def receive_response():
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind(('', 9014))
server_socket.listen()
def receive_response(server_socket):
# while True:
client_socket, _ = server_socket.accept()
request_map = parse_request(client_socket)
if request_map['type'] == 'response':
print("receiving response:\n" + json.dumps(request_map, ensure_ascii=False))
with open('result.txt', 'w', encoding='utf-8') as f:
json.dump(request_map, f, ensure_ascii=False, indent=4)
# client_socket, _ = server_socket.accept()
request_map = parse_request(server_socket)
# if request_map['type'] == 'response':
print("receiving response:\n" + json.dumps(request_map, ensure_ascii=False))
with open('result.json', 'w', encoding='utf-8') as f:
json.dump(request_map, f, ensure_ascii=False, indent=4)
if __name__ == '__main__':
@ -93,15 +87,26 @@ if __name__ == '__main__':
args = parser.parse_args()
request = dict()
local_port = 10004
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# server_socket.bind(('', local_port))
server_socket.connect((args.ip, int(args.port)))
# server_socket.listen()
request = {'action': 'start'}
send_request(request, server_socket)
if args.action == 'crawling':
request = {'action': 'crawl zhiwang', 'word': args.word, 'pages_start': args.pages_start,
'pages_end': args.pages_end, 'cookie': args.cookie}
elif args.action == 'login' or args.action == 'register':
request = {'action': args.action, 'user': args.user, 'password': args.password}
response = send_request(args.ip, args.port, request)
response = send_request(request, server_socket)
print(response)
if args.action == 'crawling':
receive_response()
receive_response(server_socket)
request = {'action': 'end'}
send_request(request, server_socket)
server_socket.close()

@ -1 +1 @@
python .\connect.py --ip 127.0.0.1 --port 7777 login --user yuu --password yuu
python .\connect.py --ip 127.0.0.1 --port 7777 login --user liuxiaoyu --password 113818

@ -10,9 +10,12 @@ var fs = require("fs");
* 配置MySql
*/
var connection = mysql.createConnection({
host : '192.168.43.64',
// host : '192.168.43.64',
// host : '10.129.16.173',
host : '127.0.0.1',
user : 'root',
password : '427318Aa',
// password : '427318Aa',
password : 'xwdjzwy5252',
database : 'test',
port:'3306'
});
@ -134,8 +137,8 @@ function execute(cmd) { //调用cmd命令
})
}
app.post('/check', function (req, res) {
execute('python connect.py --ip 127.0.0.1 --port 7777 crawling --word computer --cookie fb90de22c26723e4d7172fcf1db124f4db91fa30 --pages_start 1 --pages_end 3');
fs.readFile('./result.txt', 'utf-8', function (err, data) {
execute('python connect.py --ip 127.0.0.1 --port 7777 crawling --word computer --cookie 8f607bcea67d4f62475fcc710e2f5aff794a4833 --pages_start 1 --pages_end 3');
fs.readFile('./result.json', 'utf-8', function (err, data) {
if (err) {
console.error(err);
}

Loading…
Cancel
Save