From 3ad94a7c8bddad63fe3be293d8eee8409191000f Mon Sep 17 00:00:00 2001 From: hnu202109070126 Date: Wed, 1 Jun 2022 16:18:03 +0800 Subject: [PATCH] Update README.md --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index 952ccbd..2dc510c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,32 @@ # asdasd23 +# -*- coding: utf-8 -*- +""" +Created on Sun May 15 20:46:07 2022 + +@author: 张景瑞 +""" +import requests +import re +headers={ + "user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39' + }#请求头信息 +message=[] +for page in range(10): + if page==0: + url="https://top.chinaz.com/gongsi/index_zhuce.html" + else: + url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1) + response=requests.get(url, headers=headers) + html=response.text + name=re.findall('(.+?)', html) + fund=re.findall('注册资本:(.*?)

', html) + pack=list(zip(name,fund)) + message.extend(pack)#写入列表 +import csv +with open("content.csv", "w") as f: + w=csv.writer(f) + w.writerows(message) +import pandas as pd +df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312') +print(df.head(20))