You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 6 15:40:29 2024
@author: Asus
"""
import requests
import re
company_name_list = [ ]
company_money_list = [ ]
for i in range ( 10 ) :
# 请求的url
if i == 0 :
url = " https://top.chinaz.com/gongsi/index_zhuce.html "
else :
url = " https://top.chinaz.com/gongsi/index_zhuce " + " _ " + str ( i ) + " .html "
# 设置请求头信息
headers = {
" user-agent " : " Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 "
}
# 使用reqeusts模快发起 GET 请求
response = requests . get ( url , headers = headers )
# 获取请求的返回结果
html = response . text
# 使用 findall 函数来获取数据
company_name = re . findall ( ' <h3 class= " " ><a href=(.+?) target= " _blank " >(.+?)</a></h3> ' , html )
company_name = [ tuple_item [ 1 ] for tuple_item in company_name ]
company_name_list . append ( company_name )
company_money = re . findall ( ' <div class= " CoDate " ><em>(.+?)</em>注册资本</div> ' , html )
company_money_list . append ( company_money )
company_name_list = [ item for sublist in company_name_list for item in sublist ]
company_money_list = [ item for sublist in company_money_list for item in sublist ]
print ( company_name_list )
print ( company_money_list )
import matplotlib . pyplot as plt
plt . rcParams [ ' font.sans-serif ' ] = [ ' SimHei ' ] # 用来正常显示中文标签
# 使用zip组合列表, 并按注册资金排序
combined = list ( zip ( company_name_list , company_money_list ) )
# 提取前20个
top_20 = combined [ : 20 ]
# 解包公司名称和注册资金
top_20_companies , top_20_capital = zip ( * top_20 )
a = [ ]
for i in top_20_capital :
# 提取数字部分
number = i [ : - 1 ]
# 提取单位部分
unit = i [ - 1 ]
if unit == ' 亿 ' :
b = number * 100
a . append ( eval ( number ) )
# 绘制条形图
plt . figure ( figsize = ( 10 , 6 ) )
plt . bar ( top_20_companies , a )
plt . title ( " Top 20 Companies by Registered Capital " )
plt . xlabel ( " Company Name " )
plt . ylabel ( " Registered Capital (a hundred million) " )
plt . xticks ( rotation = 90 )
plt . show ( )