ADD file via upload

8 months ago · 0cee304270
parent b93e2c1b72
commit 0cee304270
1 changed files with 53 additions and 0 deletions
--- a/期末.py
+++ b/期末.py
@ -0,0 +1,53 @@
+import requests
+from lxml import html
+etree = html.etree
+import pandas as pd #用于数据处理和生成数据框 清洗的第三方模块
+
+headers={
+    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
+}
+
+urls = []
+url = "https://nj.lianjia.com/ershoufang/"
+urls.append(url)
+url_fore = "https://nj.lianjia.com/ershoufang/pg"
+url_last = "/"
+for i in range(2,6):
+    urls.append(url_fore+str(i)+url_last)
+
+title = ['小区名', '总价（万）', '单价', '小区介绍']
+df = pd.DataFrame(columns=title)#创建一个空的数据框 df，并使用 title 列表中的元素作为数据框的列名。
+
+for url in urls:
+    response=requests.get(url,headers=headers)
+
+    # 检查响应状态码
+    if response.status_code == 200:
+        # 如果状态码是200，说明请求成功，继续解析HTML内容
+        html=response.content.decode(encoding='utf-8')
+        # print(html)
+
+        soup = etree.HTML(html)
+
+        div_list = soup.xpath('//div[@class="info clear"]')
+
+        ershoufang=[]
+        for div in div_list:
+            house_name=div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0]
+            priceinfo=div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()')[0]
+            price=div.xpath('.//div[@class="unitPrice"]/span/text()')[0]
+            house_title = div.xpath('.//div[@class="title"]/a/text()')[0]
+
+            ershoufang.append([house_name,priceinfo,price,house_title])
+        # print(ershoufang)
+
+        df_temp = pd.DataFrame(ershoufang, columns=title)
+        df = pd.concat([df, df_temp])#将 df_temp 数据框与现有的数据框 df 合并。
+        print(df_temp)
+    else:
+        # 如果状态码不是200，说明请求失败，打印错误信息并跳过该页面
+        print(f"请求失败，状态码为：{response.status_code}，URL为：{url}")
+        continue
+
+# df.to_csv('二手房信息.csv', index=False)
+