实现web服务器向爬虫服务器通信,请求登录和注册,以及丰富了两者之间的通信类型

master
wufayuan 3 years ago
parent 888089ca40
commit 1a9e10313a

@ -152,3 +152,79 @@
2022-03-24 15:46:50.788 | INFO | __main__:<module>:15 - starting the server... 2022-03-24 15:46:50.788 | INFO | __main__:<module>:15 - starting the server...
2022-03-24 15:59:48.432 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang 2022-03-24 15:59:48.432 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 15:59:48.432 | INFO | dcs.tests.spider:run:63 - crawling... 2022-03-24 15:59:48.432 | INFO | dcs.tests.spider:run:63 - crawling...
2022-03-24 16:48:50.400 | INFO | __main__:<module>:8 - reading config args...
2022-03-24 16:48:50.401 | INFO | __main__:<module>:15 - starting the server...
2022-03-24 16:50:01.739 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 16:50:01.739 | INFO | dcs.tests.spider:run:63 - crawling...
2022-03-24 16:53:06.225 | INFO | __main__:<module>:8 - reading config args...
2022-03-24 16:53:06.226 | INFO | __main__:<module>:15 - starting the server...
2022-03-24 16:53:14.249 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 16:53:14.249 | INFO | dcs.tests.spider:run:65 - crawling...
2022-03-24 16:53:14.249 | INFO | dcs.tests.spider:run:67 - 1648111994.2494771
2022-03-24 16:53:14.252 | INFO | dcs.tests.requestHandler:run:59 - [RESPONSE] crawl zhiwang: success, header size: 28
2022-03-24 16:53:23.793 | INFO | dcs.tests.requestHandler:run:62 - [REQUEST] report free
2022-03-24 16:53:23.793 | INFO | dcs.tests.requestHandler:run:74 - [RESPONSE] report free: success marked ['127.0.0.1', 7777], header size: 53
2022-03-24 16:53:27.349 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 16:53:27.350 | INFO | dcs.tests.spider:run:65 - crawling...
2022-03-24 16:53:27.351 | INFO | dcs.tests.spider:run:67 - 1648112007.351529
2022-03-24 16:53:27.351 | INFO | dcs.tests.requestHandler:run:59 - [RESPONSE] crawl zhiwang: success, header size: 28
2022-03-24 16:53:27.353 | INFO | dcs.tests.requestHandler:run:62 - [REQUEST] report free
2022-03-24 16:53:27.354 | INFO | dcs.tests.requestHandler:run:74 - [RESPONSE] report free: success marked ['127.0.0.1', 7777], header size: 53
2022-03-24 16:53:27.355 | INFO | dcs.tests.server:run:36 - [REQUEST] end
2022-03-24 16:53:27.355 | WARNING | dcs.tests.server:run:37 - communication over!
2022-03-24 16:53:27.356 | WARNING | __main__:<module>:21 - Overing...
2022-03-26 09:50:18.770 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:50:18.770 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:51:32.660 | WARNING | __main__:<module>:21 - Overing...
2022-03-26 09:55:34.616 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:55:34.617 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:55:38.207 | WARNING | __main__:<module>:21 - Overing...
2022-03-26 09:57:10.319 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:57:10.320 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:58:54.150 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:58:54.151 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:59:51.615 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:59:51.616 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:59:54.161 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:01:15.556 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:01:15.557 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:01:18.531 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:01:18.546 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: success
2022-03-26 10:03:25.146 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:03:25.146 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:04:08.583 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:04:08.584 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:04:11.472 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:04:11.500 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: None
2022-03-26 10:04:36.384 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:04:36.384 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:04:38.569 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:04:38.573 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: None
2022-03-26 10:05:58.156 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:05:58.181 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: None
2022-03-26 10:06:08.316 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:06:08.316 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:06:10.545 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:06:10.575 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 用户名错误,登录失败
2022-03-26 10:06:50.711 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:06:50.712 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:06:59.310 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:06:59.338 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 密码错误,登录失败
2022-03-26 10:09:58.400 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:09:58.401 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:11:43.925 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:11:43.926 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:12:14.824 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:12:14.824 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:12:17.060 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:12:17.076 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 用户名错误,登录失败
2022-03-26 10:12:43.494 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:12:43.494 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:12:46.095 | INFO | dcs.tests.requestHandler:register:77 - [REQUEST] register
2022-03-26 10:12:46.100 | INFO | dcs.tests.requestHandler:register:84 - [RESPONSE] register: None
2022-03-26 10:13:58.556 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:13:58.556 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:14:01.105 | INFO | dcs.tests.requestHandler:register:77 - [REQUEST] register
2022-03-26 10:14:01.128 | INFO | dcs.tests.requestHandler:register:84 - [RESPONSE] register: 注册成功
2022-03-26 10:14:29.237 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:14:29.252 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 1登录成功

@ -3,67 +3,73 @@ import pymysql
# 获取数据库连接对象 # 获取数据库连接对象
def mysql_conn(): def mysql_conn(host='127.0.0.1', user='root', passwd='xwdjzwy5252', db='test'):
conn = pymysql.connect(host='127.0.0.1', user='root', passwd='111111', db='qqq') conn = pymysql.connect(host=host, user=user, passwd=passwd, db=db)
return conn return conn
def register(): def register(u_name, u_pwd):
s1 = sha1()
s1.update(u_pwd.encode())
sha_pwd = s1.hexdigest()
try: try:
# 获取数据库连接对象 # 获取数据库连接对象
conn = mysql_conn() conn = mysql_conn()
# 获取数据库操作cursor游标 # 获取数据库操作cursor游标
cur = conn.cursor() cur = conn.cursor()
# 编写查询的sql语句 # 编写查询的sql语句
select_sql = f'select password from sh_users where username = "{u_name}"' select_sql = f'select user_password from userinfotest where user_name = "{u_name}"'
# 执行sql语句 # 执行sql语句
cur.execute(select_sql) cur.execute(select_sql)
# 获取执行结果 fetch_one(),判断结果 # 获取执行结果 fetch_one(),判断结果
res = cur.fetchone() res = cur.fetchone()
# 如果res返回None 表示没有找到数据,不存在可注册,存在注册失败 # 如果res返回None 表示没有找到数据,不存在可注册,存在注册失败
if res is not None: if res is not None:
print('用户名已存在,注册失败', res) info = '用户名已存在,注册失败'
else: else:
print('该用户名可以使用')
# 注册-> 插入数据手动commit # 注册-> 插入数据手动commit
insert_sql = 'insert into sh_users (username, password) values (%s,%s)' insert_sql = 'insert into userinfotest (user_name, user_password) values (%s,%s)'
insert_params = [u_name, sha_pwd] insert_params = [u_name, sha_pwd]
cur.execute(insert_sql, insert_params) cur.execute(insert_sql, insert_params)
conn.commit() conn.commit()
print('注册成功', u_name) info = '注册成功'
# 关闭连接 # 关闭连接
cur.close() cur.close()
conn.close() conn.close()
return info
except Exception as e: except Exception as e:
print(e) print(e)
def login(): def login(u_name, u_pwd):
s1 = sha1()
s1.update(u_pwd.encode())
sha_pwd = s1.hexdigest()
try: try:
conn = mysql_conn() conn = mysql_conn()
cur = conn.cursor() cur = conn.cursor()
select_sql = f'select password from sh_users where username = "{u_name}"' select_sql = f'select user_password from userinfotest where user_name = "{u_name}"'
cur.execute(select_sql) cur.execute(select_sql)
res = cur.fetchone() res = cur.fetchone()
if res is None: if res is None:
# 登录:根据用户名没有获取密码 # 登录:根据用户名没有获取密码
print('用户名错误,登录失败') info = '用户名错误,登录失败'
else: else:
# res有值用户名正确判断密码正确与否 # res有值用户名正确判断密码正确与否
m_pwd = res[0] m_pwd = res[0]
print(m_pwd, '===========================')
if m_pwd == sha_pwd: if m_pwd == sha_pwd:
print('登录成功', u_name) info = '用户' + u_name + '登录成功'
else: else:
print('密码错误,登录失败') info = '密码错误,登录失败'
# 关闭连接 # 关闭连接
cur.close() cur.close()
conn.close() conn.close()
return info
except Exception as e: except Exception as e:
print(e) print(e)
def cancel(): def cancel(u_name):
try: try:
conn = mysql_conn() conn = mysql_conn()
cur = conn.cursor() cur = conn.cursor()
@ -76,6 +82,8 @@ def cancel():
if __name__ == '__main__': if __name__ == '__main__':
pass
'''
u_name = input('请输入用户名') u_name = input('请输入用户名')
u_pwd = input('请输入密码') u_pwd = input('请输入密码')
@ -87,3 +95,4 @@ if __name__ == '__main__':
# register() # register()
login() login()
'''

@ -3,12 +3,22 @@ import threading
import json import json
import struct import struct
import dcs.tests.config import dcs.tests.config
import dcs.tests.database as database
from loguru import logger from loguru import logger
from dcs.tests.spider import Spider from dcs.tests.spider import Spider
def generate_response(response):
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
return response_binary
class RequestHandler(threading.Thread): class RequestHandler(threading.Thread):
def __init__(self, file_server: 'FileServer', client_socket: 'socket.socket', request_map: 'dict'): def __init__(self, file_server, client_socket: 'socket.socket', request_map: 'dict'):
super().__init__() super().__init__()
self.file_server = file_server self.file_server = file_server
self.client_socket = client_socket self.client_socket = client_socket
@ -16,62 +26,77 @@ class RequestHandler(threading.Thread):
self.daemon = True self.daemon = True
pass pass
def test(self):
logger.info(f"[REQUEST] test")
response = {
'test': 'hello TEST'
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] test: {response['test']}")
def translate(self):
logger.info(f"[REQUEST] translate")
spider = Spider(self.request_map['word'])
response = {
'translate': spider.run()
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] translate: {response['translate']}")
def crawl_zhiwang(self):
logger.info(f"[REQUEST] crawl zhiwang")
spider = Spider(self.request_map['word'])
spider.run()
response = {
'crawl zhiwang': 'success' # TODO
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] crawl zhiwang: {response['crawl zhiwang']}")
def report_state(self, state):
logger.info(f"[REQUEST] report free")
if self.request_map['spider_info'] not in dcs.tests.config.get_free_spiders():
dcs.tests.config.add_free_spider(self.request_map['spider_info'])
response = {
'report_free': 'success marked ' + str(self.request_map['spider_info'])
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] report free: {response['report_free']}")
def login(self, user, password):
logger.info(f"[REQUEST] login")
database.mysql_conn()
response = database.login(user, password)
response = {
'login': response
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] login: {response['login']}")
def register(self, user, password):
logger.info(f"[REQUEST] register")
database.mysql_conn()
response = database.register(user, password)
response = {
'register': response
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] register: {response['register']}")
def run(self) -> None: def run(self) -> None:
try: try:
if self.request_map['action'] == 'test': if self.request_map['action'] == 'test':
logger.info(f"[REQUEST] test") self.test()
response = {
'test': 'hello TEST'
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(f"[RESPONSE] test: {response['test']}, header size: {response_binary_len}")
elif self.request_map['action'] == 'translate': elif self.request_map['action'] == 'translate':
logger.info(f"[REQUEST] translate") self.translate()
spider = Spider(self.request_map['word'])
response = {
'translate': spider.run()
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(f"[RESPONSE] translate: {response['translate']}, header size: {response_binary_len}")
elif self.request_map['action'] == 'crawl zhiwang': elif self.request_map['action'] == 'crawl zhiwang':
logger.info(f"[REQUEST] crawl zhiwang") self.crawl_zhiwang()
spider = Spider(self.request_map['word'])
spider.run()
response = {
'crawl zhiwang': 'success' # TODO
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(
f"[RESPONSE] crawl zhiwang: {response['crawl zhiwang']}, header size: {response_binary_len}")
elif self.request_map['action'] == 'report_free': elif self.request_map['action'] == 'report_free':
logger.info(f"[REQUEST] report free") self.report_state('free')
if self.request_map['spider_info'] not in dcs.tests.config.get_free_spiders(): elif self.request_map['action'] == 'login':
dcs.tests.config.add_free_spider(self.request_map['spider_info']) self.login(self.request_map['user'], self.request_map['password'])
response = { elif self.request_map['action'] == 'register':
'report_free': 'success marked ' + str(self.request_map['spider_info']) self.register(self.request_map['user'], self.request_map['password'])
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(
f"[RESPONSE] report free: {response['report_free']}, header size: {response_binary_len}")
finally: finally:
self.client_socket.close() self.client_socket.close()

@ -1,4 +1,6 @@
import threading import threading
import time
import dcs.tests.config import dcs.tests.config
from msedge.selenium_tools import Edge from msedge.selenium_tools import Edge
from msedge.selenium_tools import EdgeOptions from msedge.selenium_tools import EdgeOptions

@ -3,4 +3,5 @@ requests~=2.27.1
pandas~=1.3.4 pandas~=1.3.4
bs4~=0.0.1 bs4~=0.0.1
beautifulsoup4~=4.10.0 beautifulsoup4~=4.10.0
selenium~=4.1.3 selenium~=3.141.0
PyMySQL~=1.0.2

@ -3,16 +3,18 @@ import struct
import argparse import argparse
from json import JSONEncoder, JSONDecoder from json import JSONEncoder, JSONDecoder
def generate_request(request) -> 'bytes':
def generate_request(request_info) -> 'bytes':
""" """
根据传入的dict生成请求 根据传入的dict生成请求
请求包含 8字节头长度+头数据 请求包含 8字节头长度+头数据
:param request: dict :param request_info: dict
:return: bytes 请求数据 :return: bytes 请求数据
""" """
request_bytes = JSONEncoder().encode(request).encode("utf-8") request_bytes = JSONEncoder().encode(request_info).encode("utf-8")
return struct.pack("!Q", len(request_bytes)) + request_bytes return struct.pack("!Q", len(request_bytes)) + request_bytes
def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes': def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes':
""" """
从socket读取size个字节 从socket读取size个字节
@ -28,14 +30,12 @@ def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes':
break break
return data return data
def crawling(ip, port, word: str):
def send_request(ip, port, request_info):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_server: with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_server:
socket_to_server.connect((ip, int(port))) socket_to_server.connect((ip, int(port)))
request = dict()
request['action'] = 'crawl zhiwang'
request['word'] = word
full_request = generate_request(request) full_request = generate_request(request_info)
socket_to_server.sendall(full_request) socket_to_server.sendall(full_request)
@ -43,15 +43,34 @@ def crawling(ip, port, word: str):
read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode( read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
"utf-8")) "utf-8"))
return responseJson['crawl zhiwang'] return responseJson[request_info['action']]
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser('Automanager') parser = argparse.ArgumentParser('connect-manager')
parser.add_argument('--ip', type = str, required = True) parser.add_argument('--ip', type=str, required=True)
parser.add_argument('--port', type = str, required = True) parser.add_argument('--port', type=str, required=True)
parser.add_argument('--word', type = str, required = True) subparsers = parser.add_subparsers(help='provide actions including crawling, login, register',
dest='action') # 创建子解析器
parser_crawling = subparsers.add_parser('crawling')
parser_crawling.add_argument('--word', type=str, required=True)
parser_login = subparsers.add_parser('login')
parser_login.add_argument('--user', type=str, required=True)
parser_login.add_argument('--password', type=str, required=True)
parser_register = subparsers.add_parser('register')
parser_register.add_argument('--user', type=str, required=True)
parser_register.add_argument('--password', type=str, required=True)
args = parser.parse_args() args = parser.parse_args()
crawling(args.ip, args.port, args.word) request = dict()
if args.action == 'crawling':
request = {'action': 'crawl zhiwang', 'word': args.word}
elif args.action == 'login' or args.action == 'register':
request = {'action': args.action, 'user': args.user, 'password': args.password}
response = send_request(args.ip, args.port, request)
print(response)

Loading…
Cancel
Save