ADD file via upload

main
piawfkj9b 8 months ago
parent b93e2c1b72
commit 0cee304270

@ -0,0 +1,53 @@
import requests
from lxml import html
etree = html.etree
import pandas as pd #用于数据处理和生成数据框 清洗的第三方模块
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
urls = []
url = "https://nj.lianjia.com/ershoufang/"
urls.append(url)
url_fore = "https://nj.lianjia.com/ershoufang/pg"
url_last = "/"
for i in range(2,6):
urls.append(url_fore+str(i)+url_last)
title = ['小区名', '总价(万)', '单价', '小区介绍']
df = pd.DataFrame(columns=title)#创建一个空的数据框 df并使用 title 列表中的元素作为数据框的列名。
for url in urls:
response=requests.get(url,headers=headers)
# 检查响应状态码
if response.status_code == 200:
# 如果状态码是200说明请求成功继续解析HTML内容
html=response.content.decode(encoding='utf-8')
# print(html)
soup = etree.HTML(html)
div_list = soup.xpath('//div[@class="info clear"]')
ershoufang=[]
for div in div_list:
house_name=div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0]
priceinfo=div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()')[0]
price=div.xpath('.//div[@class="unitPrice"]/span/text()')[0]
house_title = div.xpath('.//div[@class="title"]/a/text()')[0]
ershoufang.append([house_name,priceinfo,price,house_title])
# print(ershoufang)
df_temp = pd.DataFrame(ershoufang, columns=title)
df = pd.concat([df, df_temp])#将 df_temp 数据框与现有的数据框 df 合并。
print(df_temp)
else:
# 如果状态码不是200说明请求失败打印错误信息并跳过该页面
print(f"请求失败,状态码为:{response.status_code}URL为{url}")
continue
# df.to_csv('二手房信息.csv', index=False)
Loading…
Cancel
Save