You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
1001 B

# -*- coding: utf-8 -*-
"""
Created on Sun May 15 20:46:07 2022
@author: 张景瑞
"""
import requests
import re
headers={
"user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'
}#请求头信息
message=[]
for page in range(10):
if page==0:
url="https://top.chinaz.com/gongsi/index_zhuce.html"
else:
url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1)
response=requests.get(url, headers=headers)
html=response.text
name=re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
fund=re.findall('注册资本:</span>(.*?)</p>', html)
pack=list(zip(name,fund))
message.extend(pack)#写入列表
import csv
with open("content.csv", "w") as f:
w=csv.writer(f)
w.writerows(message)
import pandas as pd
df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312')
print(df.head(20))