You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
1001 B
31 lines
1001 B
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Sun May 15 20:46:07 2022
|
|
|
|
@author: 张景瑞
|
|
"""
|
|
import requests
|
|
import re
|
|
headers={
|
|
"user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'
|
|
}#请求头信息
|
|
message=[]
|
|
for page in range(10):
|
|
if page==0:
|
|
url="https://top.chinaz.com/gongsi/index_zhuce.html"
|
|
else:
|
|
url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1)
|
|
response=requests.get(url, headers=headers)
|
|
html=response.text
|
|
name=re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
|
|
fund=re.findall('注册资本:</span>(.*?)</p>', html)
|
|
pack=list(zip(name,fund))
|
|
message.extend(pack)#写入列表
|
|
import csv
|
|
with open("content.csv", "w") as f:
|
|
w=csv.writer(f)
|
|
w.writerows(message)
|
|
import pandas as pd
|
|
df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312')
|
|
print(df.head(20))
|