ADD file via upload

9 months ago · 261e06ad59
parent f1a0626d70
commit 261e06ad59
1 changed files with 54 additions and 0 deletions
--- a/期末csv.py
+++ b/期末csv.py
@ -0,0 +1,54 @@
+import requests
+from lxml import html
+etree = html.etree
+import csv
+
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
+}
+
+urls = []
+url = "https://nj.lianjia.com/ershoufang/"
+urls.append(url)
+url_fore = "https://nj.lianjia.com/ershoufang/pg"
+url_last = "/"
+for i in range(2, 6):
+    urls.append(url_fore + str(i) + url_last)
+
+title = ['小区名', '总价（万）', '单价', '小区介绍']
+
+# 在循环外部打开文件
+with open('二手房信息1.csv', 'w', newline='', encoding='utf-8') as f:
+    writer = csv.writer(f)
+    writer.writerow(title)
+
+    for url in urls:
+        response = requests.get(url, headers=headers)
+
+        # 检查响应状态码
+        if response.status_code == 200:
+            # 如果状态码是200，说明请求成功，继续解析HTML内容
+            html = response.content.decode(encoding='utf-8')
+            # print(html)
+
+            soup = etree.HTML(html)
+            div_list = soup.xpath('//div[@class="info clear"]')
+
+            ershoufang = []
+            for div in div_list:
+                house_title = div.xpath('.//div[@class="title"]/a/text()')[0]
+                priceinfo = div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()')[0]
+                price = div.xpath('.//div[@class="unitPrice"]/span/text()')[0].replace('单价', '')
+                house_name = div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0]
+
+                ershoufang.append([house_name, priceinfo, price, house_title])
+            # print(ershoufang)
+
+            # 在循环内部写入数据
+            for row in ershoufang:
+                writer.writerow(row)
+            print(ershoufang)
+        else:
+            # 如果状态码不是200，说明请求失败，打印错误信息并跳过该页面
+            print(f"请求失败，状态码为：{response.status_code}，URL为：{url}")
+            continue