You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
39 lines
2.8 KiB
39 lines
2.8 KiB
import requests
|
|
from lxml import etree
|
|
import re
|
|
import os
|
|
def main(word):
|
|
# https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1702608985846_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&dyTabStr=MCwzLDIsMSw2LDQsNSw3LDgsOQ%3D%3D&ie=utf-8&sid=&word=%E8%8B%B1%E9%9B%84%E8%81%94%E7%9B%9F&f=3&oq=yingx&rsp=0
|
|
url=f"https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1702608985846_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&dyTabStr=MCwzLDIsMSw2LDQsNSw3LDgsOQ%3D%3D&ie=utf-8&sid=&word={word}=3&oq=yingx&rsp=0"
|
|
headers={"User-Agent":
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
|
# Cookie:身份认证
|
|
"Cookie":"BDUSS=XQwRGQ4SVhZM0swWnV2SH5jRWJGdWRZRlNSQUpzREpHM091WWpheXkybi0yRFZsSVFBQUFBJCQAAAAAAQAAAAEAAADPlMaE0KG617n-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5LDmX-Sw5lT; BDUSS_BFESS=XQwRGQ4SVhZM0swWnV2SH5jRWJGdWRZRlNSQUpzREpHM091WWpheXkybi0yRFZsSVFBQUFBJCQAAAAAAQAAAAEAAADPlMaE0KG617n-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5LDmX-Sw5lT; BAIDUID=665ACA5EC25B8387B4645C955AE87D9B:FG=1; BIDUPSID=665ACA5EC25B8387B4645C955AE87D9B; PSTM=1698821074; ZFY=VrdBuHlrIqwZsMWZ5SeQ6qiM7ZxnarxJws23F:AlzmbQ:C; BAIDUID_BFESS=665ACA5EC25B8387B4645C955AE87D9B:FG=1; H_PS_PSSID=39713_39780_39791_39679_39817_39834_39841_39902_39909_39934_39937_39933_39945_39938_39931_39783_39999; BA_HECTOR=8184848l2g018hakag0h810m1inm3ti1q; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; BDRCVFR[Q5XHKaSBNfR]=mk3SLVN4HKm; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=7; delPer=0; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; userFrom=null; ab_sr=1.0.1_OWE5OTA4MzhmNjhkNWUwYzEzZGRlZjMzYTRkNjExYzc0NWZlYjEzMWJhNzVhOTRjZmRiYTgyYjU0NDY3NTRlM2YyNTE1NzJmYzNjYzNkMzVkZTgyYTAwMmM3NDNhOTBiNDkwMWUwNTg5NDNhMTA4N2FkZWU0NDFlMjJhOGIzZTAzMGEyNjczNzczODYxNGMwMTAzMDQyNTc1YTAyYTIxMA=="
|
|
}
|
|
r=requests.get(url,headers=headers).text
|
|
|
|
url_list=re.findall('"thumbURL":"(.*?)"',r)
|
|
|
|
|
|
n=1
|
|
for url in url_list:
|
|
try:
|
|
print(url)
|
|
# 图片地址
|
|
img_code=requests.get(url).content
|
|
# 写入图片
|
|
# 在word文件去下载图片
|
|
with open(f"{word}/{n}.jpg","wb") as f:
|
|
f.write(img_code)
|
|
n+=1
|
|
except:
|
|
pass
|
|
|
|
|
|
|
|
|
|
if __name__=="__main__":
|
|
word=input("请输入你想要爬取的图片 ")
|
|
if not os.path.exists(word):
|
|
os.mkdir(word)
|
|
main(word) |