ADD file via upload

csv
piawfkj9b 9 months ago
parent f1a0626d70
commit 261e06ad59

@ -0,0 +1,54 @@
import requests
from lxml import html
etree = html.etree
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
urls = []
url = "https://nj.lianjia.com/ershoufang/"
urls.append(url)
url_fore = "https://nj.lianjia.com/ershoufang/pg"
url_last = "/"
for i in range(2, 6):
urls.append(url_fore + str(i) + url_last)
title = ['小区名', '总价(万)', '单价', '小区介绍']
# 在循环外部打开文件
with open('二手房信息1.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(title)
for url in urls:
response = requests.get(url, headers=headers)
# 检查响应状态码
if response.status_code == 200:
# 如果状态码是200说明请求成功继续解析HTML内容
html = response.content.decode(encoding='utf-8')
# print(html)
soup = etree.HTML(html)
div_list = soup.xpath('//div[@class="info clear"]')
ershoufang = []
for div in div_list:
house_title = div.xpath('.//div[@class="title"]/a/text()')[0]
priceinfo = div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()')[0]
price = div.xpath('.//div[@class="unitPrice"]/span/text()')[0].replace('单价', '')
house_name = div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0]
ershoufang.append([house_name, priceinfo, price, house_title])
# print(ershoufang)
# 在循环内部写入数据
for row in ershoufang:
writer.writerow(row)
print(ershoufang)
else:
# 如果状态码不是200说明请求失败打印错误信息并跳过该页面
print(f"请求失败,状态码为:{response.status_code}URL为{url}")
continue
Loading…
Cancel
Save