parent
f9a99f91b5
commit
b5fd0b1ef8
@ -0,0 +1,65 @@
|
|||||||
|
import re
|
||||||
|
from selenium import webdriver
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib
|
||||||
|
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"}
|
||||||
|
driver = webdriver.Edge()
|
||||||
|
driver.switch_to.default_content()
|
||||||
|
driver.get("https://top.chinaz.com/gongsitop/index_500top.html" )
|
||||||
|
next_num =2
|
||||||
|
temp_height=0
|
||||||
|
money=[]
|
||||||
|
name=[]
|
||||||
|
while True:
|
||||||
|
driver.execute_script("window.scrollBy(0,10000)")
|
||||||
|
soup = BeautifulSoup(driver.page_source, 'xml')
|
||||||
|
title_nodes=soup.find_all("div",class_="CoListTxt")
|
||||||
|
if next_num ==2:
|
||||||
|
url = "https://top.chinaz.com/gongsitop/index_500top.html"
|
||||||
|
else:
|
||||||
|
url = "https://top.chinaz.com/gongsitop/index_500top_{}.html".format(next_num-1)
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
html = response.text
|
||||||
|
money_=re.findall('注册资本:</span>(.*?)</p>',html)
|
||||||
|
s=0
|
||||||
|
for title_node in title_nodes:
|
||||||
|
s+=1
|
||||||
|
link_1=title_node.find("a")
|
||||||
|
name.append(link_1.get_text())
|
||||||
|
str_=''
|
||||||
|
for i in range(0,len(money_[s-1])):
|
||||||
|
if money_[s-1][i].isdigit() or money_[s-1][i]=='.':
|
||||||
|
str_+=money_[s-1][i]
|
||||||
|
m=0
|
||||||
|
if str_!='':
|
||||||
|
m=eval(str_)
|
||||||
|
if '亿' in money_[s-1]:
|
||||||
|
m=eval(str_)*10000
|
||||||
|
if "美元" in money_[s-1]:
|
||||||
|
m*=6.71
|
||||||
|
money.append(int(m))
|
||||||
|
if s==0:
|
||||||
|
break
|
||||||
|
check_height = driver.execute_script( "return document.documentElement.scrollTop || window.pageYOffset || document.body.scrollTop;")
|
||||||
|
if check_height == temp_height:
|
||||||
|
driver.get('https://top.chinaz.com/gongsitop/index_500top_'+str(next_num)+'.html')
|
||||||
|
next_num += 1
|
||||||
|
temp_height = check_height
|
||||||
|
list_=[]
|
||||||
|
for i in range(0,len(name)):
|
||||||
|
list_.append([name[i],money[i]])
|
||||||
|
list_.sort(key=lambda x:x[1],reverse=True)
|
||||||
|
y=[]
|
||||||
|
x=[]
|
||||||
|
for i in range(0,20):
|
||||||
|
y.append(list_[i][1]/10000)
|
||||||
|
x.append(list_[i][0])
|
||||||
|
matplotlib.rc('font', family='SimHei', weight='bold')
|
||||||
|
plt.rcParams['axes.unicode_minus'] = False
|
||||||
|
plt.barh(x, y, color='skyblue')
|
||||||
|
plt.xlabel('注册资本/万元')
|
||||||
|
plt.ylabel('公司')
|
||||||
|
plt.title('TOP10公司注册资本')
|
||||||
|
plt.show()
|
Loading…
Reference in new issue