From 42a063e5eb00579204685bf5166089e229282e80 Mon Sep 17 00:00:00 2001 From: hnu202401010402 <3855575255@qq.com> Date: Tue, 24 Dec 2024 16:39:07 +0800 Subject: [PATCH] ADD file via upload --- 500强(1).py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 500强(1).py diff --git a/500强(1).py b/500强(1).py new file mode 100644 index 0000000..3c4a6cd --- /dev/null +++ b/500强(1).py @@ -0,0 +1,83 @@ +import requests +import re +import csv +import pandas as pd +import matplotlib.pyplot as plt +import warnings + +warnings.filterwarnings("ignore") +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" +} +message = [] +message2 = [] + + +def transform(capitals): + capital1 = [] + for capital in capitals: + if capital: + numb = re.findall(r'\d+\.?\d*', capital) + if capital[0:5] == "(人民币)" or capital[-5:] == "万元人民币" or capital[-5:] == "万人民币元" or capital[ + -4:] == "万人民币" or capital[ + -2:] == "万元" or capital[ + -1:] == "万": + capital = round(float(numb[0]) / 10000, 2) + # 港币和人民币的兑换:0.9392 + elif capital[0:4] == "(港币)" or capital[-3:] == "万港币": + capital = round(float(numb[0]) * 0.9392 / 10000, 2) + # 美元和人民币的兑换:7.1876 + elif capital[-3:] == "万美元": + capital = round(float(numb[0]) * 7.1876 / 10000, 2) + else: + capital = round(float(numb[0]), 2) + capital1.append(capital) + return capital1 +def getInfo(html): + company = re.findall('(.*?)', html) + person = re.findall('法定代表人:(.*?)

', html) + signDate = re.findall('注册时间:(.*?)

', html) + category = re.findall('证券类别:(.*?)

', html) + pageOne = list(zip(company, person, signDate, category)) + message.extend(pageOne) + +def save_message(list_message): + list1 = list_message[0] + with open("content1.csv", "w", encoding='utf-8') as f: + w = csv.writer(f) + w.writerows(list1) + +def draw_first(): + df = pd.read_csv("content1.csv", names=["company", "person", "signDate", "category"]) + df1 = df.groupby("category").count()["company"] + plt.rcParams['font.sans-serif'] = ['SimHei'] + labels = df1.index + sizes = df1.values + colors = ["blue", "red", "yellow", "green"] + explode = (0, 0, 0, 0.1) + plt.figure(figsize=(20, 10), dpi=80) + plt.subplot(131) + patches, text1, text2 = plt.pie(sizes, labels=labels, explode=explode, autopct='%3.2f%%', colors=colors, radius=1.5, + textprops={'fontsize': 15}, shadow=False, startangle=90) + plt.axis("equal") + plt.legend() + plt.title('中国500强公司各证券类型占比', pad=15, fontsize='xx-large', fontweight='heavy') + +def main(): + for page in range(16): + if page == 0: + url = "https://top.chinaz.com/gongsitop/index_500top.html" + else: + url = "https://top.chinaz.com/gongsitop/index_500top_{}.html".format(page + 1) + + response = requests.get(url, headers=headers) + html = response.text + getInfo(html) + list_message = [message, message2] + save_message(list_message) + draw_first() + +if __name__ == '__main__': + main() + +