From 2c7681d6ed89ad915e282b75fcd97bace57cacef Mon Sep 17 00:00:00 2001 From: hnu202311020110 <927899451@qq.com> Date: Fri, 15 Dec 2023 23:46:42 +0800 Subject: [PATCH] ADD file via upload --- createsql.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 createsql.py diff --git a/createsql.py b/createsql.py new file mode 100644 index 0000000..7bb3132 --- /dev/null +++ b/createsql.py @@ -0,0 +1,54 @@ +import requests +import re +import csv +import pandas +import matplotlib.pyplot as plt +headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'} +rank = [] +company_list = [] +zczb_list = [] +for i in range(1,18): + if i==1: + url = "https://top.chinaz.com/gongsi/index_zhuce.html" + else: + url = "https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(i) + responce = requests.get(url=url, headers=headers) + company = re.findall('(.+?)', responce.text) + zczb = re.findall('

注册资本:(.+?)

', responce.text) + for i in zczb: + if '美' in i: + f = round(float(re.findall(r'\d+\.?\d*', i)[0])*7.18/10000,2) + else: + if '万' in i: + f = round(float(re.findall(r'\d+\.?\d*', i)[0])/10000, 2) + else: + f = round(float(re.findall(r'\d+\.?\d*', i)[0]), 2) + zczb[zczb.index(i)] = f + company_list.extend(company) + zczb_list.extend(zczb) +rank = zip(company_list,zczb_list) +with open('content.csv','w') as f: + w = csv.writer(f) + w.writerows(rank) +df = pandas.read_csv('content.csv',names=['company','registered captital'],encoding='ANSI') +plt.rcParams['font.sans-serif']=['SimHei'] +x = df.loc[:19,'company'] +height = df.loc[:19,'registered captital'] +plt.grid(axis='y',which='major') +plt.xlabel('公司') +plt.ylabel('注册资金') +plt.title('注册资金20强公司') +plt.bar(x,height,width=0.5,align='center',color='b',alpha=0.5) +plt.legend(['单位:亿人民币']) +plt.tick_params(axis='x',labelsize=9) +plt.xticks(rotation=30) +plt.show() + + + + + + + + +