You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
2.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
# 请求的url
url = "https://top.chinaz.com/gongsitop/index_500top.html"
# 设置请求头信息
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}
# 使用reqeusts模快发起 GET 请求
response = requests.get(url, headers=headers)
# 获取请求的返回结果
html = response.text
import re
message = []
# 总共16个页面的数据
for page in range(16):
# 组装url
if page == 0:
url = "https://top.chinaz.com/gongsitop/index_500top.html"
else:
url = "https://top.chinaz.com/gongsitop/index_500top_{}.html".format(page + 1)
# 使用reqeusts模快发起 GET 请求
response = requests.get(url, headers=headers)
html = response.text
# 使用 findall 函数来获取数据
# 公司名
company = re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
# 注册资本
signfunds = re.findall('注册资本:</span>(.+?)</p>', html)
pageOne = list(zip(company,signfunds))
# 合并列表
message.extend(pageOne)
import csv
with open("content.csv", "w") as f:
w = csv.writer(f)
w.writerows(message)
import pandas as pd
# 读取数据
df = pd.read_csv("content.csv", names=["company","signfunds"])
y_funds = df.loc[df['signfunds'].str.contains('亿')]
y_funds['signfunds'] = y_funds['signfunds'].str.replace('亿元', '')
y_funds['signfunds'] = y_funds['signfunds'].astype(float)
y_funds_sorted = y_funds.sort_values(by=['signfunds'],ascending=False)
import matplotlib.pyplot as plt
%matplotlib inline
# 绘制横向条形图
funds_top_20 = y_funds_sorted.head(20)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.barh(funds_top_20['company'], funds_top_20['signfunds'])
plt.xlabel('注册资金(亿元)') # 添加x轴标签
plt.ylabel('公司') # 添加y轴标签
plt.title('注册资金最多的公司 top20 ') # 添加标题
plt.gca().invert_yaxis() # 反转y轴使得最高条形在顶部
plt.show()