diff --git a/main.py b/main.py new file mode 100644 index 0000000..cdba4f4 --- /dev/null +++ b/main.py @@ -0,0 +1,45 @@ +import requests +from lxml import etree +import csv + +# 标题,位置,房屋的格局,关注人数,单价,总价 + +url = 'https://sh.lianjia.com/ershoufang/pudong/' +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' +} +response = requests.get(url, headers=headers).content.decode('utf-8') +html = etree.HTML(response) +title_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='title']/a/text()") +address_list_1 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[1]/text()") +address_list_2 = html.xpath("//ul[@class='sellListContent']/li//div[@class='positionInfo']/a[2]/text()") +address_list = [a_1 + '-' + a_2 for a_1, a_2 in zip(address_list_1, address_list_2)] +pattern_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='houseInfo']/text()") +pattern_list = [item.split('|')[0] for item in pattern_pre] +followers_count_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='followInfo']/text()") +followers_count_list = [item.split('人')[0] for item in followers_count_pre] +unit_price_list = html.xpath("//ul[@class='sellListContent']/li//div[@class='unitPrice']/span/text()") +total_price_pre = html.xpath("//ul[@class='sellListContent']/li//div[@class='totalPrice totalPrice2']/span/text()") +total_price_list = [item + '万' for item in total_price_pre] +house_list = list() +for title, address, pattern, followers_count, unit_price, total_price in zip(title_list, address_list, + pattern_list, followers_count_list, + unit_price_list, total_price_list): + new_dict =dict() + new_dict['title'] = title if title else None + new_dict['address'] = address if address else None + new_dict['pattern'] = pattern if pattern else None + new_dict['followers_count'] = followers_count if followers_count else None + new_dict['unit_price'] = unit_price if unit_price else None + new_dict['total_price'] = total_price if total_price else None + house_list.append(new_dict) + + +for item in house_list: + print(item) + + +with open('lxw.csv',"w",encoding='utf-8',newline='') as f: + w = csv.writer(f) + w.writerow(["标题","地址","房屋格局","关注人数","单价","总价"]) + w.writerows([list(item.values()) for item in house_list]) \ No newline at end of file