实现web服务器向爬虫服务器通信,请求登录和注册,以及丰富了两者之间的通信类型

master
wufayuan 3 years ago
parent 888089ca40
commit 1a9e10313a

@ -152,3 +152,79 @@
2022-03-24 15:46:50.788 | INFO | __main__:<module>:15 - starting the server...
2022-03-24 15:59:48.432 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 15:59:48.432 | INFO | dcs.tests.spider:run:63 - crawling...
2022-03-24 16:48:50.400 | INFO | __main__:<module>:8 - reading config args...
2022-03-24 16:48:50.401 | INFO | __main__:<module>:15 - starting the server...
2022-03-24 16:50:01.739 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 16:50:01.739 | INFO | dcs.tests.spider:run:63 - crawling...
2022-03-24 16:53:06.225 | INFO | __main__:<module>:8 - reading config args...
2022-03-24 16:53:06.226 | INFO | __main__:<module>:15 - starting the server...
2022-03-24 16:53:14.249 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 16:53:14.249 | INFO | dcs.tests.spider:run:65 - crawling...
2022-03-24 16:53:14.249 | INFO | dcs.tests.spider:run:67 - 1648111994.2494771
2022-03-24 16:53:14.252 | INFO | dcs.tests.requestHandler:run:59 - [RESPONSE] crawl zhiwang: success, header size: 28
2022-03-24 16:53:23.793 | INFO | dcs.tests.requestHandler:run:62 - [REQUEST] report free
2022-03-24 16:53:23.793 | INFO | dcs.tests.requestHandler:run:74 - [RESPONSE] report free: success marked ['127.0.0.1', 7777], header size: 53
2022-03-24 16:53:27.349 | INFO | dcs.tests.requestHandler:run:47 - [REQUEST] crawl zhiwang
2022-03-24 16:53:27.350 | INFO | dcs.tests.spider:run:65 - crawling...
2022-03-24 16:53:27.351 | INFO | dcs.tests.spider:run:67 - 1648112007.351529
2022-03-24 16:53:27.351 | INFO | dcs.tests.requestHandler:run:59 - [RESPONSE] crawl zhiwang: success, header size: 28
2022-03-24 16:53:27.353 | INFO | dcs.tests.requestHandler:run:62 - [REQUEST] report free
2022-03-24 16:53:27.354 | INFO | dcs.tests.requestHandler:run:74 - [RESPONSE] report free: success marked ['127.0.0.1', 7777], header size: 53
2022-03-24 16:53:27.355 | INFO | dcs.tests.server:run:36 - [REQUEST] end
2022-03-24 16:53:27.355 | WARNING | dcs.tests.server:run:37 - communication over!
2022-03-24 16:53:27.356 | WARNING | __main__:<module>:21 - Overing...
2022-03-26 09:50:18.770 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:50:18.770 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:51:32.660 | WARNING | __main__:<module>:21 - Overing...
2022-03-26 09:55:34.616 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:55:34.617 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:55:38.207 | WARNING | __main__:<module>:21 - Overing...
2022-03-26 09:57:10.319 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:57:10.320 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:58:54.150 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:58:54.151 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:59:51.615 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 09:59:51.616 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 09:59:54.161 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:01:15.556 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:01:15.557 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:01:18.531 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:01:18.546 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: success
2022-03-26 10:03:25.146 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:03:25.146 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:04:08.583 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:04:08.584 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:04:11.472 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:04:11.500 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: None
2022-03-26 10:04:36.384 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:04:36.384 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:04:38.569 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:04:38.573 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: None
2022-03-26 10:05:58.156 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:05:58.181 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: None
2022-03-26 10:06:08.316 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:06:08.316 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:06:10.545 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:06:10.575 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 用户名错误,登录失败
2022-03-26 10:06:50.711 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:06:50.712 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:06:59.310 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:06:59.338 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 密码错误,登录失败
2022-03-26 10:09:58.400 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:09:58.401 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:11:43.925 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:11:43.926 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:12:14.824 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:12:14.824 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:12:17.060 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:12:17.076 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 用户名错误,登录失败
2022-03-26 10:12:43.494 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:12:43.494 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:12:46.095 | INFO | dcs.tests.requestHandler:register:77 - [REQUEST] register
2022-03-26 10:12:46.100 | INFO | dcs.tests.requestHandler:register:84 - [RESPONSE] register: None
2022-03-26 10:13:58.556 | INFO | __main__:<module>:8 - reading config args...
2022-03-26 10:13:58.556 | INFO | __main__:<module>:15 - starting the server...
2022-03-26 10:14:01.105 | INFO | dcs.tests.requestHandler:register:77 - [REQUEST] register
2022-03-26 10:14:01.128 | INFO | dcs.tests.requestHandler:register:84 - [RESPONSE] register: 注册成功
2022-03-26 10:14:29.237 | INFO | dcs.tests.requestHandler:login:67 - [REQUEST] login
2022-03-26 10:14:29.252 | INFO | dcs.tests.requestHandler:login:74 - [RESPONSE] login: 1登录成功

@ -3,67 +3,73 @@ import pymysql
# 获取数据库连接对象
def mysql_conn():
conn = pymysql.connect(host='127.0.0.1', user='root', passwd='111111', db='qqq')
def mysql_conn(host='127.0.0.1', user='root', passwd='xwdjzwy5252', db='test'):
conn = pymysql.connect(host=host, user=user, passwd=passwd, db=db)
return conn
def register():
def register(u_name, u_pwd):
s1 = sha1()
s1.update(u_pwd.encode())
sha_pwd = s1.hexdigest()
try:
# 获取数据库连接对象
conn = mysql_conn()
# 获取数据库操作cursor游标
cur = conn.cursor()
# 编写查询的sql语句
select_sql = f'select password from sh_users where username = "{u_name}"'
select_sql = f'select user_password from userinfotest where user_name = "{u_name}"'
# 执行sql语句
cur.execute(select_sql)
# 获取执行结果 fetch_one(),判断结果
res = cur.fetchone()
# 如果res返回None 表示没有找到数据,不存在可注册,存在注册失败
if res is not None:
print('用户名已存在,注册失败', res)
info = '用户名已存在,注册失败'
else:
print('该用户名可以使用')
# 注册-> 插入数据手动commit
insert_sql = 'insert into sh_users (username, password) values (%s,%s)'
insert_sql = 'insert into userinfotest (user_name, user_password) values (%s,%s)'
insert_params = [u_name, sha_pwd]
cur.execute(insert_sql, insert_params)
conn.commit()
print('注册成功', u_name)
info = '注册成功'
# 关闭连接
cur.close()
conn.close()
return info
except Exception as e:
print(e)
def login():
def login(u_name, u_pwd):
s1 = sha1()
s1.update(u_pwd.encode())
sha_pwd = s1.hexdigest()
try:
conn = mysql_conn()
cur = conn.cursor()
select_sql = f'select password from sh_users where username = "{u_name}"'
select_sql = f'select user_password from userinfotest where user_name = "{u_name}"'
cur.execute(select_sql)
res = cur.fetchone()
if res is None:
# 登录:根据用户名没有获取密码
print('用户名错误,登录失败')
info = '用户名错误,登录失败'
else:
# res有值用户名正确判断密码正确与否
m_pwd = res[0]
print(m_pwd, '===========================')
if m_pwd == sha_pwd:
print('登录成功', u_name)
info = '用户' + u_name + '登录成功'
else:
print('密码错误,登录失败')
info = '密码错误,登录失败'
# 关闭连接
cur.close()
conn.close()
return info
except Exception as e:
print(e)
def cancel():
def cancel(u_name):
try:
conn = mysql_conn()
cur = conn.cursor()
@ -76,6 +82,8 @@ def cancel():
if __name__ == '__main__':
pass
'''
u_name = input('请输入用户名')
u_pwd = input('请输入密码')
@ -87,3 +95,4 @@ if __name__ == '__main__':
# register()
login()
'''

@ -3,12 +3,22 @@ import threading
import json
import struct
import dcs.tests.config
import dcs.tests.database as database
from loguru import logger
from dcs.tests.spider import Spider
def generate_response(response):
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
return response_binary
class RequestHandler(threading.Thread):
def __init__(self, file_server: 'FileServer', client_socket: 'socket.socket', request_map: 'dict'):
def __init__(self, file_server, client_socket: 'socket.socket', request_map: 'dict'):
super().__init__()
self.file_server = file_server
self.client_socket = client_socket
@ -16,62 +26,77 @@ class RequestHandler(threading.Thread):
self.daemon = True
pass
def test(self):
logger.info(f"[REQUEST] test")
response = {
'test': 'hello TEST'
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] test: {response['test']}")
def translate(self):
logger.info(f"[REQUEST] translate")
spider = Spider(self.request_map['word'])
response = {
'translate': spider.run()
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] translate: {response['translate']}")
def crawl_zhiwang(self):
logger.info(f"[REQUEST] crawl zhiwang")
spider = Spider(self.request_map['word'])
spider.run()
response = {
'crawl zhiwang': 'success' # TODO
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] crawl zhiwang: {response['crawl zhiwang']}")
def report_state(self, state):
logger.info(f"[REQUEST] report free")
if self.request_map['spider_info'] not in dcs.tests.config.get_free_spiders():
dcs.tests.config.add_free_spider(self.request_map['spider_info'])
response = {
'report_free': 'success marked ' + str(self.request_map['spider_info'])
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] report free: {response['report_free']}")
def login(self, user, password):
logger.info(f"[REQUEST] login")
database.mysql_conn()
response = database.login(user, password)
response = {
'login': response
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] login: {response['login']}")
def register(self, user, password):
logger.info(f"[REQUEST] register")
database.mysql_conn()
response = database.register(user, password)
response = {
'register': response
}
self.client_socket.sendall(generate_response(response))
logger.info(f"[RESPONSE] register: {response['register']}")
def run(self) -> None:
try:
if self.request_map['action'] == 'test':
logger.info(f"[REQUEST] test")
response = {
'test': 'hello TEST'
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(f"[RESPONSE] test: {response['test']}, header size: {response_binary_len}")
self.test()
elif self.request_map['action'] == 'translate':
logger.info(f"[REQUEST] translate")
spider = Spider(self.request_map['word'])
response = {
'translate': spider.run()
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(f"[RESPONSE] translate: {response['translate']}, header size: {response_binary_len}")
self.translate()
elif self.request_map['action'] == 'crawl zhiwang':
logger.info(f"[REQUEST] crawl zhiwang")
spider = Spider(self.request_map['word'])
spider.run()
response = {
'crawl zhiwang': 'success' # TODO
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(
f"[RESPONSE] crawl zhiwang: {response['crawl zhiwang']}, header size: {response_binary_len}")
self.crawl_zhiwang()
elif self.request_map['action'] == 'report_free':
logger.info(f"[REQUEST] report free")
if self.request_map['spider_info'] not in dcs.tests.config.get_free_spiders():
dcs.tests.config.add_free_spider(self.request_map['spider_info'])
response = {
'report_free': 'success marked ' + str(self.request_map['spider_info'])
}
response_binary = json.JSONEncoder().encode(response).encode("utf-8")
response_binary_len = len(response_binary)
response_binary_len_binary = struct.pack("!Q", response_binary_len)
response_binary = response_binary_len_binary + response_binary
self.client_socket.sendall(response_binary)
logger.info(
f"[RESPONSE] report free: {response['report_free']}, header size: {response_binary_len}")
self.report_state('free')
elif self.request_map['action'] == 'login':
self.login(self.request_map['user'], self.request_map['password'])
elif self.request_map['action'] == 'register':
self.register(self.request_map['user'], self.request_map['password'])
finally:
self.client_socket.close()

@ -1,4 +1,6 @@
import threading
import time
import dcs.tests.config
from msedge.selenium_tools import Edge
from msedge.selenium_tools import EdgeOptions

@ -3,4 +3,5 @@ requests~=2.27.1
pandas~=1.3.4
bs4~=0.0.1
beautifulsoup4~=4.10.0
selenium~=4.1.3
selenium~=3.141.0
PyMySQL~=1.0.2

@ -3,16 +3,18 @@ import struct
import argparse
from json import JSONEncoder, JSONDecoder
def generate_request(request) -> 'bytes':
def generate_request(request_info) -> 'bytes':
"""
根据传入的dict生成请求
请求包含 8字节头长度+头数据
:param request: dict
:param request_info: dict
:return: bytes 请求数据
"""
request_bytes = JSONEncoder().encode(request).encode("utf-8")
request_bytes = JSONEncoder().encode(request_info).encode("utf-8")
return struct.pack("!Q", len(request_bytes)) + request_bytes
def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes':
"""
从socket读取size个字节
@ -28,14 +30,12 @@ def read_bytes(s: 'socket.socket', size: 'int') -> 'bytes':
break
return data
def crawling(ip, port, word: str):
def send_request(ip, port, request_info):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) as socket_to_server:
socket_to_server.connect((ip, int(port)))
request = dict()
request['action'] = 'crawl zhiwang'
request['word'] = word
full_request = generate_request(request)
full_request = generate_request(request_info)
socket_to_server.sendall(full_request)
@ -43,15 +43,34 @@ def crawling(ip, port, word: str):
read_bytes(socket_to_server, struct.unpack('!Q', socket_to_server.recv(8))[0]).decode(
"utf-8"))
return responseJson['crawl zhiwang']
return responseJson[request_info['action']]
if __name__ == '__main__':
parser = argparse.ArgumentParser('Automanager')
parser.add_argument('--ip', type = str, required = True)
parser.add_argument('--port', type = str, required = True)
parser.add_argument('--word', type = str, required = True)
parser = argparse.ArgumentParser('connect-manager')
parser.add_argument('--ip', type=str, required=True)
parser.add_argument('--port', type=str, required=True)
subparsers = parser.add_subparsers(help='provide actions including crawling, login, register',
dest='action') # 创建子解析器
parser_crawling = subparsers.add_parser('crawling')
parser_crawling.add_argument('--word', type=str, required=True)
parser_login = subparsers.add_parser('login')
parser_login.add_argument('--user', type=str, required=True)
parser_login.add_argument('--password', type=str, required=True)
parser_register = subparsers.add_parser('register')
parser_register.add_argument('--user', type=str, required=True)
parser_register.add_argument('--password', type=str, required=True)
args = parser.parse_args()
crawling(args.ip, args.port, args.word)
request = dict()
if args.action == 'crawling':
request = {'action': 'crawl zhiwang', 'word': args.word}
elif args.action == 'login' or args.action == 'register':
request = {'action': args.action, 'user': args.user, 'password': args.password}
response = send_request(args.ip, args.port, request)
print(response)

Loading…
Cancel
Save