diff --git a/README.md b/README.md index a009b66..9f02597 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ url = "https://top.chinaz.com/gongsi/index_zhuce.html" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" } +#requests爬取网页内容 # 使用reqeusts模快发起 GET 请求 response = requests.get(url, headers=headers) # 获取请求的返回结果 @@ -30,7 +31,7 @@ for page in range(17): capital= re.findall('注册资本:(.*?)

', html) # 合并列表 pageOne = list(zip(company,debt )) - message.extend(pageOne)#将列表放入message里 + message.extend(pageOne) #将列表放入message里 import csv with open("content.csv", "w") as f: w = csv.writer(f) @@ -58,6 +59,7 @@ for i in range(len(df3)): a=str(a) df3.remove(df3[0]) df3.append(a) +# 绘制条形图(饼图字迹会重合,所以不用饼图) import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei']# 用黑体显示中文 plt.rcParams['axes.unicode_minus'] = False # (解决坐标轴负数的负号显示问题)