import requests
from lxml import etree
import csv

# 标题，位置，房屋的格局，关注人数，单价，总价

url = 'https://sh.lianjia.com/ershoufang/pudong/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers).content.decode('utf-8')
html = etree.HTML(response)
title_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='title']/a/text()")
address_list_1 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[1]/text()")
address_list_2 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[2]/text()")
address_list = [a_1 + '-' + a_2 for a_1, a_2 in zip(address_list_1, address_list_2)]
pattern_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='houseInfo']/text()")
pattern_list = [item.split('|')[0] for item in pattern_pre]
followers_count_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='followInfo']/text()")
followers_count_list = [item.split('人')[0] for item in followers_count_pre]
unit_price_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='unitPrice']/span/text()")
total_price_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='totalPrice totalPrice2']/span/text()")
total_price_list = [item + '万' for item in total_price_pre]
house_list = list()
for title, address, pattern, followers_count, unit_price, total_price in zip(title_list, address_list,
                                                                                  pattern_list, followers_count_list,
                                                                                  unit_price_list, total_price_list):
    new_dict =dict()
    new_dict['title'] = title if title  else None
    new_dict['address'] = address if address else None
    new_dict['pattern'] = pattern if pattern  else None
    new_dict['followers_count'] = followers_count if followers_count  else None
    new_dict['unit_price'] = unit_price if unit_price else None
    new_dict['total_price'] = total_price if total_price else None
    house_list.append(new_dict)


for item in house_list:
    print(item)


with open('lxw.csv',"w",encoding='utf-8',newline='') as f:
    w = csv.writer(f)
    w.writerow(["标题","地址","房屋格局","关注人数","单价","总价"])
    w.writerows([list(item.values()) for item in house_list])