parent
fa152414f4
commit
19b0cef7c7
@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Sun May 15 20:46:07 2022
|
||||
|
||||
@author: 张景瑞
|
||||
"""
|
||||
import requests
|
||||
import re
|
||||
headers={
|
||||
"user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'
|
||||
}#请求头信息
|
||||
message=[]
|
||||
for page in range(10):
|
||||
if page==0:
|
||||
url="https://top.chinaz.com/gongsi/index_zhuce.html"
|
||||
else:
|
||||
url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1)
|
||||
response=requests.get(url, headers=headers)
|
||||
html=response.text
|
||||
name=re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
|
||||
fund=re.findall('注册资本:</span>(.*?)</p>', html)
|
||||
pack=list(zip(name,fund))
|
||||
message.extend(pack)#写入列表
|
||||
import csv
|
||||
with open("content.csv", "w") as f:
|
||||
w=csv.writer(f)
|
||||
w.writerows(message)
|
||||
import pandas as pd
|
||||
df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312')
|
||||
print(df.head(20))
|
Loading…
Reference in new issue