You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
1.3 KiB
33 lines
1.3 KiB
import csv
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
def getData():
|
|
url='https://bj.cityhouse.cn/market/rankforsale.html'
|
|
headers={
|
|
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
'Content-Type':'text/html; charset=utf-8',
|
|
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
|
|
}
|
|
response=requests.get(url=url,headers=headers)
|
|
soup = BeautifulSoup(response.text, 'lxml')
|
|
tbody=soup.find('tbody')
|
|
|
|
Ver_index=len(tbody.findAll('tr'))
|
|
Ori_index=len(tbody.findAll('tr')[0].findAll('th'))
|
|
|
|
with open('北京市区房价.csv','a',encoding='utf-8',newline='') as f:
|
|
write=csv.writer(f)
|
|
for i in range(Ver_index):
|
|
list=[]
|
|
if i==0:
|
|
for j in range(Ori_index):
|
|
list.append(tbody.findAll('tr')[i].findAll('th')[j].text.strip())
|
|
else:
|
|
for j in range(Ori_index):
|
|
list.append(tbody.findAll('tr')[i].findAll('td')[j].text.strip())
|
|
write.writerow(list)
|
|
print('shu数据获取完毕')
|
|
if __name__ == '__main__':
|
|
getData() |