You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
2.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
from lxml import html
etree = html.etree
import pandas as pd #用于数据处理和生成数据框 清洗的第三方模块
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
urls = []
url = "https://nj.lianjia.com/ershoufang/"
urls.append(url)
url_fore = "https://nj.lianjia.com/ershoufang/pg"
url_last = "/"
for i in range(2,6):
urls.append(url_fore+str(i)+url_last)
title = ['小区名', '总价(万)', '单价', '小区介绍']
df = pd.DataFrame(columns=title)#创建一个空的数据框 df并使用 title 列表中的元素作为数据框的列名。
for url in urls:
response=requests.get(url,headers=headers)
# 检查响应状态码
if response.status_code == 200:
# 如果状态码是200说明请求成功继续解析HTML内容
html=response.content.decode(encoding='utf-8')
# print(html)
soup = etree.HTML(html)
div_list = soup.xpath('//div[@class="info clear"]')
ershoufang=[]
for div in div_list:
house_name=div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0]
priceinfo=div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()')[0]
price=div.xpath('.//div[@class="unitPrice"]/span/text()')[0]
house_title = div.xpath('.//div[@class="title"]/a/text()')[0]
ershoufang.append([house_name,priceinfo,price,house_title])
# print(ershoufang)
df_temp = pd.DataFrame(ershoufang, columns=title)
df = pd.concat([df, df_temp])#将 df_temp 数据框与现有的数据框 df 合并。
print(df_temp)
else:
# 如果状态码不是200说明请求失败打印错误信息并跳过该页面
print(f"请求失败,状态码为:{response.status_code}URL为{url}")
continue
# df.to_csv('二手房信息.csv', index=False)