diff --git a/爬取任意图片.py b/爬取任意图片.py new file mode 100644 index 0000000..467afbe --- /dev/null +++ b/爬取任意图片.py @@ -0,0 +1,39 @@ +import requests +from lxml import etree +import re +import os +def main(word): + # https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1702608985846_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&dyTabStr=MCwzLDIsMSw2LDQsNSw3LDgsOQ%3D%3D&ie=utf-8&sid=&word=%E8%8B%B1%E9%9B%84%E8%81%94%E7%9B%9F&f=3&oq=yingx&rsp=0 + url=f"https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1702608985846_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&dyTabStr=MCwzLDIsMSw2LDQsNSw3LDgsOQ%3D%3D&ie=utf-8&sid=&word={word}=3&oq=yingx&rsp=0" + headers={"User-Agent": +"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0", + # Cookie:身份认证 + "Cookie":"BDUSS=XQwRGQ4SVhZM0swWnV2SH5jRWJGdWRZRlNSQUpzREpHM091WWpheXkybi0yRFZsSVFBQUFBJCQAAAAAAQAAAAEAAADPlMaE0KG617n-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5LDmX-Sw5lT; BDUSS_BFESS=XQwRGQ4SVhZM0swWnV2SH5jRWJGdWRZRlNSQUpzREpHM091WWpheXkybi0yRFZsSVFBQUFBJCQAAAAAAQAAAAEAAADPlMaE0KG617n-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5LDmX-Sw5lT; BAIDUID=665ACA5EC25B8387B4645C955AE87D9B:FG=1; BIDUPSID=665ACA5EC25B8387B4645C955AE87D9B; PSTM=1698821074; ZFY=VrdBuHlrIqwZsMWZ5SeQ6qiM7ZxnarxJws23F:AlzmbQ:C; BAIDUID_BFESS=665ACA5EC25B8387B4645C955AE87D9B:FG=1; H_PS_PSSID=39713_39780_39791_39679_39817_39834_39841_39902_39909_39934_39937_39933_39945_39938_39931_39783_39999; BA_HECTOR=8184848l2g018hakag0h810m1inm3ti1q; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; BDRCVFR[Q5XHKaSBNfR]=mk3SLVN4HKm; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=7; delPer=0; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; userFrom=null; ab_sr=1.0.1_OWE5OTA4MzhmNjhkNWUwYzEzZGRlZjMzYTRkNjExYzc0NWZlYjEzMWJhNzVhOTRjZmRiYTgyYjU0NDY3NTRlM2YyNTE1NzJmYzNjYzNkMzVkZTgyYTAwMmM3NDNhOTBiNDkwMWUwNTg5NDNhMTA4N2FkZWU0NDFlMjJhOGIzZTAzMGEyNjczNzczODYxNGMwMTAzMDQyNTc1YTAyYTIxMA==" + } + r=requests.get(url,headers=headers).text + + url_list=re.findall('"thumbURL":"(.*?)"',r) + + + n=1 + for url in url_list: + try: + print(url) + # 图片地址 + img_code=requests.get(url).content + # 写入图片 + # 在word文件去下载图片 + with open(f"{word}/{n}.jpg","wb") as f: + f.write(img_code) + n+=1 + except: + pass + + + + +if __name__=="__main__": + word=input("请输入你想要爬取的图片 ") + if not os.path.exists(word): + os.mkdir(word) + main(word) \ No newline at end of file