diff --git a/spider2.py b/spider2.py new file mode 100644 index 0000000..0ecabc7 --- /dev/null +++ b/spider2.py @@ -0,0 +1,76 @@ +import os +import datetime +import time +from asyncio import futures + +import requests +import re + +Max_Workers = 24 # 最大线程数 + +# 打开目标网站 +def work(index): + url = 'https://pic.netbian.com/index_' + str(index) + '.html' + req = requests.get(url) + return req.text + + +# 使用正则表达式匹配图片访问链接 +def matchPicUrl(html): + # regexp = r'src="(/uploads.*?\.jpg)"' + regexp = r'