import requests import re import csv import pandas import matplotlib.pyplot as plt headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'} rank = [] company_list = [] zczb_list = [] for i in range(1,18): if i==1: url = "https://top.chinaz.com/gongsi/index_zhuce.html" else: url = "https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(i) responce = requests.get(url=url, headers=headers) company = re.findall('(.+?)', responce.text) zczb = re.findall('

注册资本:(.+?)

', responce.text) for i in zczb: if '美' in i: f = round(float(re.findall(r'\d+\.?\d*', i)[0])*7.18/10000,2) else: if '万' in i: f = round(float(re.findall(r'\d+\.?\d*', i)[0])/10000, 2) else: f = round(float(re.findall(r'\d+\.?\d*', i)[0]), 2) zczb[zczb.index(i)] = f company_list.extend(company) zczb_list.extend(zczb) rank = zip(company_list,zczb_list) with open('content.csv','w') as f: w = csv.writer(f) w.writerows(rank) df = pandas.read_csv('content.csv',names=['company','registered captital'],encoding='ANSI') plt.rcParams['font.sans-serif']=['SimHei'] x = df.loc[:19,'company'] height = df.loc[:19,'registered captital'] plt.grid(axis='y',which='major') plt.xlabel('公司') plt.ylabel('注册资金') plt.title('注册资金20强公司') plt.bar(x,height,width=0.5,align='center',color='b',alpha=0.5) plt.legend(['单位:亿人民币']) plt.tick_params(axis='x',labelsize=9) plt.xticks(rotation=30) plt.show()