# -*- coding: utf-8 -*- """ Created on Fri Jun 3 14:51:42 2022 @author: 86136 """ #### 代码窗口 import requests url = "https://top.chinaz.com/gongsi/index_zhuce.html" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"} response = requests.get(url, headers=headers) html = response.text import re message=[] for page in range(16): if page == 0: url = "https://top.chinaz.com/gongsi/index_zhuce.html" else: url = "https://top.chinaz.com/gongsi/index_zhuce.html".format(page + 1) response = requests.get(url, headers=headers) html = response.text company = re.findall('(.+?)', html) money = re.findall('注册资本:(.*?)

', html) pageOne = list(zip(company, money)) message.extend(pageOne) message=message[0:500] import csv with open("content.csv", "w") as f: w = csv.writer(f) w.writerows(message) import pandas as pd df = pd.read_csv("content.csv", names=["company", "money"],encoding='gbk') df=df.fillna('0') money1=list(df['money'])[:20] company1=list(df['company'])[:20] money2=[] for i in money1: p='' j=re.findall(r'\d+\.\d+',i) if j==[]: j=re.findall(r'\d+',i) for q in j: if '万' in i: p=round(float(q)*0.0001) elif '亿' in i: p=round(float(q)) money2.append(p) import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus']=False money2.reverse() company1.reverse() x=company1 y=money2 colors=['red']*5+['blue']*5+['green']*5+['yellow']*5 plt.barh(x,y,height=0.7,color=colors) plt.yticks(x,x) for i,j in zip(x,y): plt.text(j,i,j,fontsize=8) plt.title('注册资金最多的公司top20') plt.xlabel('公司') plt.ylabel('注册资金/亿元') plt.show()