You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
2.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
from lxml import html
etree = html.etree
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
urls = []
url = "https://nj.lianjia.com/ershoufang/"
urls.append(url)
url_fore = "https://nj.lianjia.com/ershoufang/pg"
url_last = "/"
for i in range(2, 6):
urls.append(url_fore + str(i) + url_last)
title = ['小区名', '总价(万)', '单价', '小区介绍']
# 在循环外部打开文件
with open('二手房信息1.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(title)
for url in urls:
response = requests.get(url, headers=headers)
# 检查响应状态码
if response.status_code == 200:
# 如果状态码是200说明请求成功继续解析HTML内容
html = response.content.decode(encoding='utf-8')
# print(html)
soup = etree.HTML(html)
div_list = soup.xpath('//div[@class="info clear"]')
ershoufang = []
for div in div_list:
house_title = div.xpath('.//div[@class="title"]/a/text()')[0]
priceinfo = div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()')[0]
price = div.xpath('.//div[@class="unitPrice"]/span/text()')[0].replace('单价', '')
house_name = div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0]
ershoufang.append([house_name, priceinfo, price, house_title])
# print(ershoufang)
# 在循环内部写入数据
for row in ershoufang:
writer.writerow(row)
print(ershoufang)
else:
# 如果状态码不是200说明请求失败打印错误信息并跳过该页面
print(f"请求失败,状态码为:{response.status_code}URL为{url}")
continue