ADD file via upload

main
hnu202401010102 8 months ago
parent f9a99f91b5
commit b5fd0b1ef8

@ -0,0 +1,65 @@
import re
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import matplotlib.pyplot as plt
import matplotlib
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"}
driver = webdriver.Edge()
driver.switch_to.default_content()
driver.get("https://top.chinaz.com/gongsitop/index_500top.html" )
next_num =2
temp_height=0
money=[]
name=[]
while True:
driver.execute_script("window.scrollBy(0,10000)")
soup = BeautifulSoup(driver.page_source, 'xml')
title_nodes=soup.find_all("div",class_="CoListTxt")
if next_num ==2:
url = "https://top.chinaz.com/gongsitop/index_500top.html"
else:
url = "https://top.chinaz.com/gongsitop/index_500top_{}.html".format(next_num-1)
response = requests.get(url, headers=headers)
html = response.text
money_=re.findall('注册资本:</span>(.*?)</p>',html)
s=0
for title_node in title_nodes:
s+=1
link_1=title_node.find("a")
name.append(link_1.get_text())
str_=''
for i in range(0,len(money_[s-1])):
if money_[s-1][i].isdigit() or money_[s-1][i]=='.':
str_+=money_[s-1][i]
m=0
if str_!='':
m=eval(str_)
if '亿' in money_[s-1]:
m=eval(str_)*10000
if "美元" in money_[s-1]:
m*=6.71
money.append(int(m))
if s==0:
break
check_height = driver.execute_script( "return document.documentElement.scrollTop || window.pageYOffset || document.body.scrollTop;")
if check_height == temp_height:
driver.get('https://top.chinaz.com/gongsitop/index_500top_'+str(next_num)+'.html')
next_num += 1
temp_height = check_height
list_=[]
for i in range(0,len(name)):
list_.append([name[i],money[i]])
list_.sort(key=lambda x:x[1],reverse=True)
y=[]
x=[]
for i in range(0,20):
y.append(list_[i][1]/10000)
x.append(list_[i][0])
matplotlib.rc('font', family='SimHei', weight='bold')
plt.rcParams['axes.unicode_minus'] = False
plt.barh(x, y, color='skyblue')
plt.xlabel('注册资本/万元')
plt.ylabel('公司')
plt.title('TOP10公司注册资本')
plt.show()
Loading…
Cancel
Save