ADD file via upload

main
pseyg6lzf 7 months ago
parent 2b59d05235
commit 05ad0a4c7d

@ -0,0 +1,130 @@
import requests
from bs4 import BeautifulSoup
from urllib.robotparser import RobotFileParser
from matplotlib import pyplot as plt
import numpy as np
def can_fetch(urlrobots, url):
rp = RobotFileParser()
rp.set_url(urlrobots+"/robots.txt")
rp.read()
return rp.can_fetch('*', url)
def check_robots(url):
if can_fetch(url, url):
response = requests.get(url)
if response.status_code == 200:
flag=1
print('Robots协议允许访问该网站')
return True
else:
print("Robots协议不允许访问该网站")
return False
def get_pictures(url,path):
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'}
re=requests.get(url,headers=headers)
print(re.status_code)#查看请求状态返回200说明正常
with open('img/'+path, 'wb') as f:#把图片数据写入本地wb表示二进制储存
f.write(re.content)
def get_pictures_urls(text):
st='img src="'
m=len(st)
i=0
n=len(text)
urls=[]#储存url
while i<n:
if len(urls)==9:
break
if text[i:i+m]==st:
url=''
for j in range(i+m,n):
if text[j]=='"':
i=j
if url[20:25]=='image':
urls.append(url)
break
url+=text[j]
i+=1
return urls
def get_pictures_names(text):
st='title="'
m=len(st)
i=0
n=len(text)
urls=[]#储存url
while i<n:
if len(urls)==9:
break
if text[i:i+m]==st:
url=''
for j in range(i+m,n):
if text[j]=='"':
i=j
urls.append(url)
break
url+=text[j]
i+=1
return urls
url='https://www.iqiyi.com/ranks1PCW/home?v=12.3.15170&deviceId=76f0b3e%E2%80%A6'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'}
if check_robots(url):
re=requests.get(url,headers=headers)
re.encoding = "utf-8"
urls=get_pictures_urls(re.text)#获取当前页面所有图片的url
#print(re.text)
soup=BeautifulSoup(re.text,'lxml')#html.parser
all_top=soup.findAll(attrs={'class':'rvi__index__num'})
all_top1=str(soup.findAll(attrs={'class':'rvi__tit1'}))
names=get_pictures_names(all_top1)
for i in range(len(urls)):#批量爬取图片
url='https:'+urls[i]
path='榜'+str(i+1)+names[i]+'.jpg'
get_pictures(url,path)
#参数设置
plt.rcParams['font.sans-serif'] = ['SimHei'] #将中文字体设置为黑体
plt.rcParams['axes.unicode_minus'] = False #不包含中文负号
plt.rcParams['figure.dpi'] = 200 #分辨率为 200
plt.rcParams['figure.figsize'] = (5,3) #图像显示大小设置为 (5,3)
#排名和热度值数据导入
rankname = []
hotdegree = []
for i in names:
rankname.append(i)
for i in all_top:
hotdegree.append(int(i.string))
if len(hotdegree)==9:
break
#将横坐标转换为数值
x = np.arange(len(rankname))*8
width = 4
#计算每一块的起始坐标
rank_x = x
#绘图
plt.bar(rank_x,hotdegree,width=width,color="red",label="热度值")
#将横坐标标签命名
plt.xticks(x,labels=rankname,fontsize=4)
#显示柱状图的高度文本
for i in range(len(rankname)):
plt.text(rank_x[i],hotdegree[i], hotdegree[i],va="bottom",ha="center",fontsize=8)
#显示图例
plt.legend(loc="best")
plt.savefig('img/'+'Top9.png')
Loading…
Cancel
Save