From 91d59f044db43bee3d70549bff958248fd062d6b Mon Sep 17 00:00:00 2001
From: hnu202401010123 <916363713@qq.com>
Date: Wed, 25 Dec 2024 21:09:38 +0800
Subject: [PATCH] ADD file via upload
---
zhuceziben.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
create mode 100644 zhuceziben.py
diff --git a/zhuceziben.py b/zhuceziben.py
new file mode 100644
index 0000000..d965fd1
--- /dev/null
+++ b/zhuceziben.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 25 20:49:21 2024
+
+@author: 21470
+"""
+#### 代码窗口
+import requests
+url = "https://top.chinaz.com/gongsi/index_zhuce.html"
+headers = {
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
+}
+response = requests.get(url, headers=headers)
+html = response.text
+html
+import re
+# 使用 findall 函数来获取数据
+# 公司名
+company = re.findall('(.+?)', html)
+# 注册资本
+capital = re.findall('注册资本an>(.*?)
', html)
+pageOne = list(zip(company, capital))
+pageOne
+# 存储内容
+message = []
+# 总共16个页面的数据
+for page in range(16):
+ # 组装url
+ if page == 0:
+ url = "https://top.chinaz.com/gongsi/index_zhuce.html"
+ else:
+ url = "https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1)
+ # 使用reqeusts模快发起 GET 请求
+ response = requests.get(url, headers=headers)
+ html = response.text
+ # 使用 findall 函数来获取数据
+ # 公司名
+ company = re.findall('(.+?)', html)
+ # 注册资本
+ capital = re.findall('注册资本:(.*?)', html)
+ pageOne = list(zip(company, capital))
+ # 合并列表
+ message.extend(pageOne)
+message
+# 导入python中的内置模块csv
+import csv
+with open("zhucecontent.csv", "w") as f:
+ w = csv.writer(f)
+ w.writerows(message)
+!cat zhucecontent.csv
+import matplotlib.pyplot as plt
+import pandas as pd
+
+#读取数据
+df = pd.read_csv("zhucecontent.csv", names=["company", "capital"], header=None)
+
+df['capital'] = pd.to_numeric(df['capital'].str.replace(r'[^\d.]', '', regex=True).replace('', '0'), errors='coerce')
+
+# 按照注册资金降序排序
+sorted_df = df.sort_values(by='capital', ascending=False)
+
+# 选取前20家公司的数据
+top_20 = sorted_df.head(20)
+
+# 设置中文字体为黑体,解决中文显示问题
+plt.rcParams['font.sans-serif'] = ['SimHei']
+# 解决负号显示问题
+plt.rcParams['axes.unicode_minus'] = False
+
+# 绘制柱状图,以公司名称为x轴,注册资金为y轴
+plt.bar(top_20['company'], top_20['capital'])
+
+# 设置x轴标签
+plt.xlabel('公司名称')
+# 设置y轴标签
+plt.ylabel('注册资金')
+# 设置图表标题
+plt.title('中国五百强中注册资金最多的20家公司')
+# 设置x轴刻度标签旋转角度为45度,并靠右对齐,避免文字重叠
+plt.xticks(rotation=45, ha='right')
+# 自动调整子图参数,使之更紧凑合理,防止标签被截断等情况
+plt.tight_layout()
+
+# 显示图表
+plt.show()