From 41c050ac27072ba89a16eb1b7b0cc8e1a04e7d2d Mon Sep 17 00:00:00 2001 From: hnu202309010129 Date: Thu, 13 Jun 2024 17:33:09 +0800 Subject: [PATCH] ADD file via upload --- python500.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 python500.py diff --git a/python500.py b/python500.py new file mode 100644 index 0000000..590507b --- /dev/null +++ b/python500.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 6 15:40:29 2024 + +@author: Asus +""" + +import requests +import re +company_name_list=[] +company_money_list=[] +for i in range(10): + # 请求的url + if i==0: + url = "https://top.chinaz.com/gongsi/index_zhuce.html" + else: + url = "https://top.chinaz.com/gongsi/index_zhuce"+"_"+str(i)+".html" + # 设置请求头信息 + headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" + } + # 使用reqeusts模快发起 GET 请求 + response = requests.get(url, headers=headers) + # 获取请求的返回结果 + html = response.text + +# 使用 findall 函数来获取数据 + company_name = re.findall('

(.+?)

', html) + company_name = [tuple_item[1] for tuple_item in company_name] + company_name_list.append(company_name) + company_money=re.findall('
(.+?)注册资本
', html) + company_money_list.append(company_money) + +company_name_list = [item for sublist in company_name_list for item in sublist] +company_money_list = [item for sublist in company_money_list for item in sublist] + + +print(company_name_list) +print(company_money_list) +import matplotlib.pyplot as plt +plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 +# 使用zip组合列表,并按注册资金排序 +combined = list(zip(company_name_list,company_money_list)) + +# 提取前20个 + +top_20=combined[:20] +# 解包公司名称和注册资金 +top_20_companies, top_20_capital = zip(*top_20) +a=[] +for i in top_20_capital: + # 提取数字部分 + number = i[:-1] + # 提取单位部分 + unit = i[-1] + if unit=='亿': + b=number *100 + a.append(eval(number)) +# 绘制条形图 +plt.figure(figsize=(10, 6)) +plt.bar(top_20_companies, a) +plt.title("Top 20 Companies by Registered Capital") +plt.xlabel("Company Name") +plt.ylabel("Registered Capital (a hundred million)") +plt.xticks(rotation=90) +plt.show() +