# -*- coding: utf-8 -*- """ Created on Sun May 15 20:46:07 2022 @author: 张景瑞 """ import requests import re headers={ "user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39' }#请求头信息 message=[] for page in range(10): if page==0: url="https://top.chinaz.com/gongsi/index_zhuce.html" else: url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1) response=requests.get(url, headers=headers) html=response.text name=re.findall('(.+?)', html) fund=re.findall('注册资本:(.*?)

', html) pack=list(zip(name,fund)) message.extend(pack)#写入列表 import csv with open("content.csv", "w") as f: w=csv.writer(f) w.writerows(message) import pandas as pd df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312') print(df.head(20))