You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

95 lines
3.0 KiB

# -*- coding: utf-8 -*-
Created on Sun May 26 12:41:54 2024
@author: Panda
import requests
import pandas as pd
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
url_head = ""
Headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/104.0.5112.102"
global tl
tl = []
def crawl(index):
response = requests.get(url_head + index + ".html", headers = Headers)
if response.status_code == 200:
page_doc = response.text
#使用BeautifulSoup4 解析页面Html源码
# soup = BeautifulSoup(page_doc, features='lxml') #旧版本lxml与bs4的写法
soup = BeautifulSoup(page_doc, features='html.parser') #至少lxml 4.6.3起的写法
div_list = soup.find_all('div')
for div in div_list:
attrib = div["class"]
if attrib == ["CoListTxt"]:
string = div.text
row = string.strip('\n').replace('\n',' ').split(' ')
row = list(filter(None, row))
company = row[0]
t = row[1]
person = row[1][t.find('')+1:]
if len(row) >=8:
row[2] = row[2] + row[3]
t = row[2]
capital = row[2][t.find('')+1:]
t = row[3]
date = row[3][t.find('')+1:]
if len(row) >=7:
t = row[4]
stock_type = row[4][t.find('')+1:]
t = row[5]
stock_code = row[5][t.find('')+1:]
stock_type = '暂无'
stock_code = None
row = [company, person, capital, date, stock_type, stock_code]
def main():
inds = []
pages = 16 #网站总页数
for i in range(1, pages+1):
ch = '_' + str(i)
inds[0] = ''
for ind in inds:
df = pd.DataFrame(tl, columns=['公司', '法人', '注册资本', '注册时间', '证券', '股票代码'])
df.to_csv('result.csv', index=False, encoding='utf_8_sig')
percentages = df['证券'].value_counts(normalize=True) * 100
percentages = percentages.head(3)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False
percentages.plot.pie(autopct='%1.1f%%', startangle=90, shadow=False, labels=percentages.index)
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
return None
if __name__=='__main__':