diff --git a/Common.py b/Common.py new file mode 100644 index 0000000..43de68c --- /dev/null +++ b/Common.py @@ -0,0 +1,102 @@ +from datetime import datetime +from bs4 import BeautifulSoup +import urllib.request, urllib.error +import ssl, xlwt, re +from Proxypool import Get_UA + +def base_url(): + return 'https://www.dyttcn.com' + +def current_time(): + time = '%a %b %d %H:%M:%S %Y' + return datetime.now().strftime(time) + +def time_diff(start, end): + time = '%a %b %d %H:%M:%S %Y' + return datetime.strptime(end, time) - datetime.strptime(start, time) + +def visitURL(url: str, proxy_ip): + """ + 请求指定URL,获取网页源代码 + :param url: + :return: 返回网页源代码 + """ + # 关闭ssl证书印证 + ssl._create_default_https_context = ssl._create_unverified_context + + head = Get_UA() + + # 安装代理 IP opener + proxy_support = urllib.request.ProxyHandler({'http': proxy_ip}) + opener = urllib.request.build_opener(proxy_support) + urllib.request.install_opener(opener) + + request = urllib.request.Request(url=url, headers=head, method="GET") + + try: + response = urllib.request.urlopen(request) + html = response.read().decode("gbk") + return html + except urllib.error.HTTPError as e: + if hasattr(e, "code"): + print(e.code) + if hasattr(e, "reason"): + print(e.reason) + return False + +def get_movie_info(data_queue: list, result_queue: list, proxy_ip): + """ + :param data_queue: + :param result_queue: + :return: None + """ + # 正则匹配 + findname = re.compile(r'