import requests from lxml import etree import csv # 标题,位置,房屋的格局,关注人数,单价,总价 url = 'https://sh.lianjia.com/ershoufang/pudong/' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' } response = requests.get(url, headers=headers).content.decode('utf-8') html = etree.HTML(response) title_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='title']/a/text()") address_list_1 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[1]/text()") address_list_2 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[2]/text()") address_list = [a_1 + '-' + a_2 for a_1, a_2 in zip(address_list_1, address_list_2)] pattern_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='houseInfo']/text()") pattern_list = [item.split('|')[0] for item in pattern_pre] followers_count_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='followInfo']/text()") followers_count_list = [item.split('人')[0] for item in followers_count_pre] unit_price_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='unitPrice']/span/text()") total_price_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='totalPrice totalPrice2']/span/text()") total_price_list = [item + '万' for item in total_price_pre] house_list = list() for title, address, pattern, followers_count, unit_price, total_price in zip(title_list, address_list, pattern_list, followers_count_list, unit_price_list, total_price_list): new_dict =dict() new_dict['title'] = title if title else None new_dict['address'] = address if address else None new_dict['pattern'] = pattern if pattern else None new_dict['followers_count'] = followers_count if followers_count else None new_dict['unit_price'] = unit_price if unit_price else None new_dict['total_price'] = total_price if total_price else None house_list.append(new_dict) for item in house_list: print(item) with open('lxw.csv',"w",encoding='utf-8',newline='') as f: w = csv.writer(f) w.writerow(["标题","地址","房屋格局","关注人数","单价","总价"]) w.writerows([list(item.values()) for item in house_list])