diff --git a/bs4解析.py b/bs4解析.py new file mode 100644 index 0000000..45d1884 --- /dev/null +++ b/bs4解析.py @@ -0,0 +1,21 @@ +import urllib.request +from bs4 import BeautifulSoup +url='https://www.maoyan.com/films?showType=1' +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'cookie':'uuid_n_v=v1; uuid=E14D2110BA2511F084B07FE9CD57F5C1AE336B7C9C654F11BEFC4C0C2E3094E5; _lxsdk_cuid=19a533f0f66c8-035844f1a84ed9-4c657b58-26745c-19a533f0f66c8; _ga=GA1.1.1935872800.1762333234; WEBDFPID=70294y71u33y5xy70700506vuy901w6z80z46396zw197958177v28u9-1762421808487-1762335407008OEOGOMOfd79fef3d01d5e9aadc18ccd4d0c95074001; utm_source_rg=AM%25a63kvkv%25456; token=AgGyIWvs3GXQ333BPRu-rpr9jBG6sOa7sSDUR2ERThtF2t4NsgJh08xbYYEin3axYVykvNSneW4wOwAAAAB9LgAAkOszlMAnPdVfJE8YhKy2Dao2tKDk6zGFF8pWm30BNJUri18rQYe-v61-n-8ZLZjx; uid=2982575540; uid.sig=_z1u0Fjn6_PmoW3Ry9pUs4RHvZ4; _lxsdk=E14D2110BA2511F084B07FE9CD57F5C1AE336B7C9C654F11BEFC4C0C2E3094E5; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _csrf=a73e2741f6bb93bfe81ca7677402ccc7046f56fff46222d912a5cdc537d59e92; Hm_lvt_e0bacf12e04a7bd88ddbd9c74ef2b533=1762333233,1762587928; Hm_lpvt_e0bacf12e04a7bd88ddbd9c74ef2b533=1762587928; HMACCOUNT=F87CC60DE409423A; _ga_WN80P4PSY7=GS2.1.s1762587927$o2$g0$t1762587927$j60$l0$h0; __mta=244176442.1762333234102.1762337013692.1762587927950.8; _lxsdk_s=19a626d6484-662-fb5-b7d%7C%7C2' +} + +request = urllib.request.Request(url=url,headers=headers) +response = urllib.request.urlopen(request) +content = response.read().decode('utf-8') +soup=BeautifulSoup(content,'lxml') +pic_list=soup.select('.movie-hover-img') +for i in pic_list: + pic_names=i.get('alt') + pic_src=i.attrs.get('src') + urllib.request.urlretrieve(url=pic_src, filename='胡文浩/'+pic_names + '.jpg') \ No newline at end of file