parent
611b505f6b
commit
748f755dc1
@ -0,0 +1,39 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
import re
|
||||
import os
|
||||
def main(word):
|
||||
# https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1702608985846_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&dyTabStr=MCwzLDIsMSw2LDQsNSw3LDgsOQ%3D%3D&ie=utf-8&sid=&word=%E8%8B%B1%E9%9B%84%E8%81%94%E7%9B%9F&f=3&oq=yingx&rsp=0
|
||||
url=f"https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1702608985846_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&dyTabStr=MCwzLDIsMSw2LDQsNSw3LDgsOQ%3D%3D&ie=utf-8&sid=&word={word}=3&oq=yingx&rsp=0"
|
||||
headers={"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
# Cookie:身份认证
|
||||
"Cookie":"BDUSS=XQwRGQ4SVhZM0swWnV2SH5jRWJGdWRZRlNSQUpzREpHM091WWpheXkybi0yRFZsSVFBQUFBJCQAAAAAAQAAAAEAAADPlMaE0KG617n-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5LDmX-Sw5lT; BDUSS_BFESS=XQwRGQ4SVhZM0swWnV2SH5jRWJGdWRZRlNSQUpzREpHM091WWpheXkybi0yRFZsSVFBQUFBJCQAAAAAAQAAAAEAAADPlMaE0KG617n-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5LDmX-Sw5lT; BAIDUID=665ACA5EC25B8387B4645C955AE87D9B:FG=1; BIDUPSID=665ACA5EC25B8387B4645C955AE87D9B; PSTM=1698821074; ZFY=VrdBuHlrIqwZsMWZ5SeQ6qiM7ZxnarxJws23F:AlzmbQ:C; BAIDUID_BFESS=665ACA5EC25B8387B4645C955AE87D9B:FG=1; H_PS_PSSID=39713_39780_39791_39679_39817_39834_39841_39902_39909_39934_39937_39933_39945_39938_39931_39783_39999; BA_HECTOR=8184848l2g018hakag0h810m1inm3ti1q; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; BDRCVFR[Q5XHKaSBNfR]=mk3SLVN4HKm; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=7; delPer=0; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; userFrom=null; ab_sr=1.0.1_OWE5OTA4MzhmNjhkNWUwYzEzZGRlZjMzYTRkNjExYzc0NWZlYjEzMWJhNzVhOTRjZmRiYTgyYjU0NDY3NTRlM2YyNTE1NzJmYzNjYzNkMzVkZTgyYTAwMmM3NDNhOTBiNDkwMWUwNTg5NDNhMTA4N2FkZWU0NDFlMjJhOGIzZTAzMGEyNjczNzczODYxNGMwMTAzMDQyNTc1YTAyYTIxMA=="
|
||||
}
|
||||
r=requests.get(url,headers=headers).text
|
||||
|
||||
url_list=re.findall('"thumbURL":"(.*?)"',r)
|
||||
|
||||
|
||||
n=1
|
||||
for url in url_list:
|
||||
try:
|
||||
print(url)
|
||||
# 图片地址
|
||||
img_code=requests.get(url).content
|
||||
# 写入图片
|
||||
# 在word文件去下载图片
|
||||
with open(f"{word}/{n}.jpg","wb") as f:
|
||||
f.write(img_code)
|
||||
n+=1
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
word=input("请输入你想要爬取的图片 ")
|
||||
if not os.path.exists(word):
|
||||
os.mkdir(word)
|
||||
main(word)
|
Loading…
Reference in new issue