ADD file via upload

main
pfq2phx49 5 months ago
parent 33ecb395f5
commit dea0f5bb41

@ -0,0 +1,45 @@
import requests
from lxml import etree
import csv
# 标题,位置,房屋的格局,关注人数,单价,总价
url = 'https://sh.lianjia.com/ershoufang/pudong/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers).content.decode('utf-8')
html = etree.HTML(response)
title_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='title']/a/text()")
address_list_1 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[1]/text()")
address_list_2 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[2]/text()")
address_list = [a_1 + '-' + a_2 for a_1, a_2 in zip(address_list_1, address_list_2)]
pattern_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='houseInfo']/text()")
pattern_list = [item.split('|')[0] for item in pattern_pre]
followers_count_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='followInfo']/text()")
followers_count_list = [item.split('')[0] for item in followers_count_pre]
unit_price_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='unitPrice']/span/text()")
total_price_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='totalPrice totalPrice2']/span/text()")
total_price_list = [item + '' for item in total_price_pre]
house_list = list()
for title, address, pattern, followers_count, unit_price, total_price in zip(title_list, address_list,
pattern_list, followers_count_list,
unit_price_list, total_price_list):
new_dict =dict()
new_dict['title'] = title if title else None
new_dict['address'] = address if address else None
new_dict['pattern'] = pattern if pattern else None
new_dict['followers_count'] = followers_count if followers_count else None
new_dict['unit_price'] = unit_price if unit_price else None
new_dict['total_price'] = total_price if total_price else None
house_list.append(new_dict)
for item in house_list:
print(item)
with open('lxw.csv',"w",encoding='utf-8',newline='') as f:
w = csv.writer(f)
w.writerow(["标题","地址","房屋格局","关注人数","单价","总价"])
w.writerows([list(item.values()) for item in house_list])
Loading…
Cancel
Save