You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
3.0 KiB

# program2
import requests
import parsel
import csv
f = open('火锅.csv',mode='a',encoding='utf-8',newline='')
csv_writer = csv.DictWriter(f,fieldnames=[
"店名",
"评论",
"人均消费",
"口味",
"环境",
"服务",
"地址",
"电话",
"详情页",
])
#写入表头
csv_writer.writeheader()
url = 'https://www.dianping.com/search/keyword/134/0_%E7%81%AB%E9%94%85'
headers = {
#用户信息,检测是否登陆账号
'Cookie':'fspop=test; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=18fed6691b1c8-077cc1cd96e08b-4c657b58-1bcab9-18fed6691b1b9; _lxsdk=18fed6691b1c8-077cc1cd96e08b-4c657b58-1bcab9-18fed6691b1b9; _hc.v=b59c272a-cc68-9272-45bd-4a21d06575ba.1717674873; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1717674875; s_ViewType=10; WEBDFPID=85z47658533w5776z14z954wv8v68z3381uvw3106w3979580x97w975-2033035004378-1717675004378WSSGKIQfd79fef3d01d5e9aadc18ccd4d0c95072217; dper=02027f22d8308fa4765341f0ef3f07cf5ef6575ef1f8ee803e73f4c40eac496f73f16954b7b86ded9657469ec37d57f31ac0d1fb301d8ccc13bf00000000a62000006ba8738b6163aa0aea87ddc5058913f2397cc45edf51a4b6730c4e9e34392b6d17fd4c415110b9ecf131bb0b48dca78c; qruuid=c408efa4-163b-468c-84ac-355e90a7ad2d; ll=7fd06e815b796be3df069dec7836c3df; cy=344; cye=changsha; _lxsdk_s=18fed6691b2-ab3-62d-b80%7C%7C338; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1717677461',
#域名
'Host' : 'www.dianping.com',
#防盗链 告诉服务器请求url地址从哪里跳转来的
'Referer':'https://www.dianping.com/',
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
#发送请求
response = requests.get(url=url,headers=headers)
#获取数据
# print(response.text)
#解析数据
selector = parsel.Selector(response.text)
#通过css选择器获取所有详情页url地址
href = selector.css('.shop-list ul li .pic a::attr(href)').getall()
# for循环遍历
for index in href:
#对于详情页url地址发送请求并且获取数据
html_data = requests.get(url=index,headers=headers).text
#解析数据,复制粘贴我们想要的店铺信息
selector_1 = parsel.Selector(html_data)
#复制店名
title = selector_1.css('.shop-name::text').get()
count = selector_1.css('#reviewCount::text').get() #评论
Price = selector_1.css('#avgPriceTitle::text').get() #人均消费
item_list = selector_1.css('#comment_score .item::text').getall() #评价
taste = item_list[0].split(': ')[-1] #口味评分
environment = item_list[1].split(': ')[-1]#环境评分
service = item_list[-1].split(': ')[-1] #服务评分
address = selector_1.css('#address::text').get() #地址
tel = selector_1.css('.tel::text').getall()[-1] #地址
dit = {
"店名":title,
"评论":count,
"人均消费":Price,
"口味":taste,
"环境":environment,
"服务":service,
"地址":address,
"电话":tel,
"详情页":index,
}
csv_writer.writerow(dit)
print(dit)