You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
66 lines
3.0 KiB
66 lines
3.0 KiB
# program2
|
|
import requests
|
|
import parsel
|
|
import csv
|
|
f = open('火锅.csv',mode='a',encoding='utf-8',newline='')
|
|
csv_writer = csv.DictWriter(f,fieldnames=[
|
|
"店名",
|
|
"评论",
|
|
"人均消费",
|
|
"口味",
|
|
"环境",
|
|
"服务",
|
|
"地址",
|
|
"电话",
|
|
"详情页",
|
|
])
|
|
#写入表头
|
|
csv_writer.writeheader()
|
|
url = 'https://www.dianping.com/search/keyword/134/0_%E7%81%AB%E9%94%85'
|
|
headers = {
|
|
#用户信息,检测是否登陆账号
|
|
'Cookie':'fspop=test; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=18fed6691b1c8-077cc1cd96e08b-4c657b58-1bcab9-18fed6691b1b9; _lxsdk=18fed6691b1c8-077cc1cd96e08b-4c657b58-1bcab9-18fed6691b1b9; _hc.v=b59c272a-cc68-9272-45bd-4a21d06575ba.1717674873; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1717674875; s_ViewType=10; WEBDFPID=85z47658533w5776z14z954wv8v68z3381uvw3106w3979580x97w975-2033035004378-1717675004378WSSGKIQfd79fef3d01d5e9aadc18ccd4d0c95072217; dper=02027f22d8308fa4765341f0ef3f07cf5ef6575ef1f8ee803e73f4c40eac496f73f16954b7b86ded9657469ec37d57f31ac0d1fb301d8ccc13bf00000000a62000006ba8738b6163aa0aea87ddc5058913f2397cc45edf51a4b6730c4e9e34392b6d17fd4c415110b9ecf131bb0b48dca78c; qruuid=c408efa4-163b-468c-84ac-355e90a7ad2d; ll=7fd06e815b796be3df069dec7836c3df; cy=344; cye=changsha; _lxsdk_s=18fed6691b2-ab3-62d-b80%7C%7C338; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1717677461',
|
|
#域名
|
|
'Host' : 'www.dianping.com',
|
|
#防盗链 告诉服务器请求url地址从哪里跳转来的
|
|
'Referer':'https://www.dianping.com/',
|
|
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
|
|
}
|
|
#发送请求
|
|
response = requests.get(url=url,headers=headers)
|
|
#获取数据
|
|
# print(response.text)
|
|
#解析数据
|
|
selector = parsel.Selector(response.text)
|
|
#通过css选择器获取所有详情页url地址
|
|
href = selector.css('.shop-list ul li .pic a::attr(href)').getall()
|
|
# for循环遍历
|
|
for index in href:
|
|
#对于详情页url地址发送请求并且获取数据
|
|
html_data = requests.get(url=index,headers=headers).text
|
|
#解析数据,复制粘贴我们想要的店铺信息
|
|
selector_1 = parsel.Selector(html_data)
|
|
#复制店名
|
|
title = selector_1.css('.shop-name::text').get()
|
|
count = selector_1.css('#reviewCount::text').get() #评论
|
|
Price = selector_1.css('#avgPriceTitle::text').get() #人均消费
|
|
item_list = selector_1.css('#comment_score .item::text').getall() #评价
|
|
taste = item_list[0].split(': ')[-1] #口味评分
|
|
environment = item_list[1].split(': ')[-1]#环境评分
|
|
service = item_list[-1].split(': ')[-1] #服务评分
|
|
address = selector_1.css('#address::text').get() #地址
|
|
tel = selector_1.css('.tel::text').getall()[-1] #地址
|
|
dit = {
|
|
"店名":title,
|
|
"评论":count,
|
|
"人均消费":Price,
|
|
"口味":taste,
|
|
"环境":environment,
|
|
"服务":service,
|
|
"地址":address,
|
|
"电话":tel,
|
|
"详情页":index,
|
|
}
|
|
csv_writer.writerow(dit)
|
|
print(dit)
|