You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
45 lines
2.5 KiB
45 lines
2.5 KiB
5 months ago
|
import requests
|
||
|
from lxml import etree
|
||
|
import csv
|
||
|
|
||
|
# 标题,位置,房屋的格局,关注人数,单价,总价
|
||
|
|
||
|
url = 'https://sh.lianjia.com/ershoufang/pudong/'
|
||
|
headers = {
|
||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
||
|
}
|
||
|
response = requests.get(url, headers=headers).content.decode('utf-8')
|
||
|
html = etree.HTML(response)
|
||
|
title_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='title']/a/text()")
|
||
|
address_list_1 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[1]/text()")
|
||
|
address_list_2 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[2]/text()")
|
||
|
address_list = [a_1 + '-' + a_2 for a_1, a_2 in zip(address_list_1, address_list_2)]
|
||
|
pattern_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='houseInfo']/text()")
|
||
|
pattern_list = [item.split('|')[0] for item in pattern_pre]
|
||
|
followers_count_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='followInfo']/text()")
|
||
|
followers_count_list = [item.split('人')[0] for item in followers_count_pre]
|
||
|
unit_price_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='unitPrice']/span/text()")
|
||
|
total_price_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='totalPrice totalPrice2']/span/text()")
|
||
|
total_price_list = [item + '万' for item in total_price_pre]
|
||
|
house_list = list()
|
||
|
for title, address, pattern, followers_count, unit_price, total_price in zip(title_list, address_list,
|
||
|
pattern_list, followers_count_list,
|
||
|
unit_price_list, total_price_list):
|
||
|
new_dict =dict()
|
||
|
new_dict['title'] = title if title else None
|
||
|
new_dict['address'] = address if address else None
|
||
|
new_dict['pattern'] = pattern if pattern else None
|
||
|
new_dict['followers_count'] = followers_count if followers_count else None
|
||
|
new_dict['unit_price'] = unit_price if unit_price else None
|
||
|
new_dict['total_price'] = total_price if total_price else None
|
||
|
house_list.append(new_dict)
|
||
|
|
||
|
|
||
|
for item in house_list:
|
||
|
print(item)
|
||
|
|
||
|
|
||
|
with open('lxw.csv',"w",encoding='utf-8',newline='') as f:
|
||
|
w = csv.writer(f)
|
||
|
w.writerow(["标题","地址","房屋格局","关注人数","单价","总价"])
|
||
|
w.writerows([list(item.values()) for item in house_list])
|