You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Thu Jun 6 14:36:59 2024
@author: Asus
"""
import requests
url = "https://top.chinaz.com/gongsi/index_shizhi.html"
# 设置请求头信息
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}
# 使用reqeusts模快发起 GET 请求
response = requests.get(url, headers=headers)
# 获取请求的返回结果
html = response.text
import re
# 公司名
company = re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
# 注册资本
money = re.findall('<em>(.+?)</em>市值</div>', html)
pageOne = list(zip(company, money))
# 存储内容
message = []
# 总共16个页面的数据
for page in range(16):
# 组装url
if page == 0:
url = "https://top.chinaz.com/gongsi/index_shizhi.html"
else:
url = "https://top.chinaz.com/gongsi/index_shizhi_{}.html".format(page + 1)
# 使用reqeusts模快发起 GET 请求
response = requests.get(url, headers=headers)
html = response.text
# 使用 findall 函数来获取数据
# 公司名
company = re.findall('<a.*?target="_blank">(.+?)</a></h3>', html)
# 注册资本
money = re.findall('<em>(.+?)</em>市值</div>', html)
pageOne = list(zip(company, money))
# 合并列表
message.extend(pageOne)
import csv
with open("content.csv", "w",encoding="utf-8") as f:
w = csv.writer(f)
w.writerows(message)
import pandas as pd
# 读取数据
df = pd.read_csv("content.csv", names=["company", "money"])
df.head()
df.info()
df1 = df.groupby("money").count()["company"]
top20=df.sort_values(by='money',ascending=False).head(20)
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 步骤一替换sans-serif字体
plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题)
plt.figure(figsize=(10, 6)) # 设置图像大小
plt.bar(top20['company'], top20['money'], color='skyblue') # 绘制条形图
plt.title('市值最多的公司TOP20') # 设置标题
plt.xlabel('公司名称') # 设置x轴标签
plt.ylabel('市值') # 设置y轴标签
plt.xticks(rotation=45) # 设置x轴标签旋转角度以便更好地显示
plt.tight_layout() # 调整布局,避免标签重叠
plt.show() # 显示图像