From 05ad0a4c7d021e587c4707ec3f139e63f22294c8 Mon Sep 17 00:00:00 2001
From: pseyg6lzf <1986224603@qq.com>
Date: Sat, 27 Apr 2024 19:27:29 +0800
Subject: [PATCH] ADD file via upload

---
 新建 文本文档.txt | 130 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 新建 文本文档.txt

diff --git a/新建 文本文档.txt b/新建 文本文档.txt
new file mode 100644
index 0000000..ede0bcd
--- /dev/null
+++ b/新建 文本文档.txt	
@@ -0,0 +1,130 @@
+import requests
+from bs4 import BeautifulSoup
+from urllib.robotparser import RobotFileParser
+from matplotlib import pyplot as plt
+import numpy as np
+
+def can_fetch(urlrobots, url):
+    rp = RobotFileParser()
+    rp.set_url(urlrobots+"/robots.txt")
+    rp.read()
+    return rp.can_fetch('*', url)
+
+def check_robots(url):
+    if can_fetch(url, url):
+        response = requests.get(url)
+        if response.status_code == 200:
+            flag=1
+            print('Robots协议允许访问该网站')
+            return True
+    else:
+        print("Robots协议不允许访问该网站")
+        return False
+
+def get_pictures(url,path):
+    headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'}
+    re=requests.get(url,headers=headers)
+    print(re.status_code)#查看请求状态，返回200说明正常
+    with open('img/'+path, 'wb') as f:#把图片数据写入本地，wb表示二进制储存
+        f.write(re.content)
+        
+def get_pictures_urls(text):
+    st='img src="'
+    m=len(st)
+    i=0
+    n=len(text)
+    urls=[]#储存url
+    while i<n:
+        if len(urls)==9:
+            break
+        if text[i:i+m]==st:
+            url=''
+            for j in range(i+m,n):
+                if text[j]=='"':
+                    i=j
+                    if url[20:25]=='image':
+                        urls.append(url)
+                    break
+                url+=text[j]
+        i+=1
+    return urls
+
+def get_pictures_names(text):
+    st='title="'
+    m=len(st)
+    i=0
+    n=len(text)
+    urls=[]#储存url
+    while i<n:
+        if len(urls)==9:
+            break
+        if text[i:i+m]==st:
+            url=''
+            for j in range(i+m,n):
+                if text[j]=='"':
+                    i=j
+                    urls.append(url)
+                    break
+                url+=text[j]
+        i+=1
+    return urls
+
+url='https://www.iqiyi.com/ranks1PCW/home?v=12.3.15170&deviceId=76f0b3e%E2%80%A6'
+headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'}
+
+if check_robots(url):
+    re=requests.get(url,headers=headers)
+    re.encoding = "utf-8"
+    urls=get_pictures_urls(re.text)#获取当前页面所有图片的url
+
+    #print(re.text)
+
+    soup=BeautifulSoup(re.text,'lxml')#html.parser
+    all_top=soup.findAll(attrs={'class':'rvi__index__num'})
+    all_top1=str(soup.findAll(attrs={'class':'rvi__tit1'}))
+    names=get_pictures_names(all_top1) 
+
+    for i in range(len(urls)):#批量爬取图片
+        url='https:'+urls[i]
+        path='榜'+str(i+1)+names[i]+'.jpg'
+        get_pictures(url,path)
+        
+    #参数设置
+    plt.rcParams['font.sans-serif'] = ['SimHei']    #将中文字体设置为黑体
+    plt.rcParams['axes.unicode_minus'] = False     #不包含中文负号
+    plt.rcParams['figure.dpi'] = 200               #分辨率为 200
+    plt.rcParams['figure.figsize'] = (5,3)        #图像显示大小设置为 (5,3)
+
+    #排名和热度值数据导入
+    rankname = []
+    hotdegree = []
+    
+    for i in names:
+        rankname.append(i)
+                         
+    for i in all_top:
+        hotdegree.append(int(i.string))
+        if len(hotdegree)==9:
+            break
+        
+    #将横坐标转换为数值
+    x = np.arange(len(rankname))*8
+    width = 4
+
+    #计算每一块的起始坐标
+    rank_x = x
+
+    #绘图
+    plt.bar(rank_x,hotdegree,width=width,color="red",label="热度值")
+    
+
+    #将横坐标标签命名
+    plt.xticks(x,labels=rankname,fontsize=4)
+
+    #显示柱状图的高度文本
+    for i in range(len(rankname)):
+        plt.text(rank_x[i],hotdegree[i], hotdegree[i],va="bottom",ha="center",fontsize=8)
+
+    #显示图例
+    plt.legend(loc="best") 
+    plt.savefig('img/'+'Top9.png')
\ No newline at end of file