You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hnu202109070126
3ad94a7c8b
|
3 years ago | |
---|---|---|
README.md | 3 years ago | |
大作业-爬虫.py | 3 years ago |
README.md
asdasd23
-- coding: utf-8 --
""" Created on Sun May 15 20:46:07 2022
@author: 张景瑞 """ import requests import re headers={ "user-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39' }#请求头信息 message=[] for page in range(10): if page==0: url="https://top.chinaz.com/gongsi/index_zhuce.html" else: url="https://top.chinaz.com/gongsi/index_zhuce_{}.html".format(page + 1) response=requests.get(url, headers=headers) html=response.text name=re.findall('<a.?target="_blank">(.+?)', html) fund=re.findall('注册资本:(.?)
', html) pack=list(zip(name,fund)) message.extend(pack)#写入列表 import csv with open("content.csv", "w") as f: w=csv.writer(f) w.writerows(message) import pandas as pd df = pd.read_csv("content.csv", names=["name", "fund"],encoding='gb2312') print(df.head(20))