Update README.md

master
hnu202109070126 3 years ago
parent 19b0cef7c7
commit 3ad94a7c8b

@ -1,2 +1,32 @@
# asdasd23 # asdasd23
# -*- coding: utf-8 -*-
"""
Created on Sun May 15 20:46:07 2022
@author: 张景瑞
"""
import requests
import re
headers={
"user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'
}#请求头信息
message=[]
for page in range(10):
if page==0:
url="https://top.chinaz.com/gongsi/index_zhuce.html"
else:
url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1)
response=requests.get(url, headers=headers)
html=response.text
name=re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
fund=re.findall('注册资本:</span>(.*?)</p>', html)
pack=list(zip(name,fund))
message.extend(pack)#写入列表
import csv
with open("content.csv", "w") as f:
w=csv.writer(f)
w.writerows(message)
import pandas as pd
df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312')
print(df.head(20))

Loading…
Cancel
Save