parent
19b0cef7c7
commit
3ad94a7c8b
@ -1,2 +1,32 @@
|
|||||||
# asdasd23
|
# asdasd23
|
||||||
|
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Sun May 15 20:46:07 2022
|
||||||
|
|
||||||
|
@author: 张景瑞
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
headers={
|
||||||
|
"user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'
|
||||||
|
}#请求头信息
|
||||||
|
message=[]
|
||||||
|
for page in range(10):
|
||||||
|
if page==0:
|
||||||
|
url="https://top.chinaz.com/gongsi/index_zhuce.html"
|
||||||
|
else:
|
||||||
|
url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1)
|
||||||
|
response=requests.get(url, headers=headers)
|
||||||
|
html=response.text
|
||||||
|
name=re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
|
||||||
|
fund=re.findall('注册资本:</span>(.*?)</p>', html)
|
||||||
|
pack=list(zip(name,fund))
|
||||||
|
message.extend(pack)#写入列表
|
||||||
|
import csv
|
||||||
|
with open("content.csv", "w") as f:
|
||||||
|
w=csv.writer(f)
|
||||||
|
w.writerows(message)
|
||||||
|
import pandas as pd
|
||||||
|
df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312')
|
||||||
|
print(df.head(20))
|
||||||
|
Loading…
Reference in new issue