项目提交

3 years ago · 2deaaaf78c
parent 2d6a6a1f85
commit 2deaaaf78c
26 changed files with 693 additions and 0 deletions
--- a/_redis/README.md
+++ b/_redis/README.md
@ -0,0 +1,2 @@
+# python
+
--- a/_redis/STHeiti-Medium.ttc
+++ b/_redis/STHeiti-Medium.ttc
--- a/_redis/class.py
+++ b/_redis/class.py
@ -0,0 +1,208 @@
+
+import matplotlib.pyplot as plt
+from matplotlib import font_manager
+import pymysql
+
+class show(object):
+    info = {}
+    TScore = []  # 综合评分
+    name = []  # 动漫名字
+    bfl = []  # 播放量
+    pls = []  # 评论数
+    scs = []  # 收藏数
+
+    def save(self):#把数据从数据中取出存入对象
+        db = pymysql.connect(host='47.106.183.36', port=3306,
+                             user='fuchuang', password='fuchuang',
+                             database='fuchuang', charset='utf8mb4')  # 数据库连接
+        use = 'use fuchuang;'
+        show = 'show tables;'
+        str = 'select * from bangumi;'
+        cursor = db.cursor()
+        try:
+            cursor.execute(show)#
+            cursor.execute(str)#选中表
+            desc = cursor.description
+            data = cursor.fetchall()#获取表信息
+            list = []
+            for data in data:#筛选出评分大于6的数据
+                if (data[3] != '暂无评分'):
+                    if (float(data[3]) > 9.5):
+                        #print(data)
+                        self.name.append(data[1])# 动漫名字
+
+                        list = [data[2]]
+                        if '万' in list[0]:
+                            list[0] = float(list[0].replace('万', ''))
+                        elif '亿' in list[0]:
+                            list[0] = float(list[0].replace('亿', '')) * 10000
+                        self.bfl.append(list[0])  # 播放量
+
+                        list = [data[4]]
+                        if '万' in list[0]:
+                            list[0] = float(list[0].replace('万', ''))
+                        else:
+                            list[0] = float(list[0])
+                        self.TScore.append(float(data[3])*list[0])  # 综合评分
+                        self.scs.append(list[0])# 收藏数
+
+                        list = [data[5]]
+                        if '万' in list[0]:
+                            list[0] = float(list[0].replace('万', ''))
+                        else:
+                            list[0] = float(list[0])
+                        self.pls.append(list[0])# 评论数
+
+        except Exception as e:
+            print(e)
+            db.rollback()
+        finally:
+            cursor.close()
+            db.close()
+            print(self.name)
+            print(self.TScore)
+            print(self.bfl)
+            print(self.pls)
+            print(self.scs)
+            #info = {'动漫名': self.name, '播放量(万)': self.bfl, '评论数(万)': self.pls, '收藏数(万)': self.scs, '综合评分': self.TScore}
+            #dm_file = pandas.DataFrame(info)
+            #dm_file.to_excel('Dongman.xlsx', sheet_name="动漫数据分析")
+            # 将所有列表返回
+            return self.name, self.bfl, self.pls, self.scs, self.TScore
+    def view(self):#数据可视化
+        # 为了坐标轴上能显示中文
+        plt.rcParams['font.sans-serif'] = ['SimHei']
+        plt.rcParams['axes.unicode_minus'] = False
+
+        my_font = font_manager.FontProperties(fname='STHeiti-Medium.ttc')
+        dm_name = self.name  # 番剧名
+        dm_play = self.bfl  # 番剧播放量
+        dm_review = self.pls  # 番剧评论数
+        dm_favorite = self.scs  # 番剧收藏数
+        dm_com_score = self.TScore  # 番剧评分
+        y_score = [9.6, 9.7, 9.8, 9.9,10.0]
+
+        #dm_all = self.TScore * self.scs
+
+
+        fig, ax1 = plt.subplots()
+        plt.bar(dm_name, dm_com_score, color='red')
+        plt.title('综合评分和播放量数据分校', fontproperties=my_font)
+        ax1.tick_params(labelsize=6)
+        plt.xlabel('番剧名')
+        plt.ylabel('综合评分')
+        plt.xticks(rotation=90, color='green')
+
+        ax2 = ax1.twinx()
+        ax2.plot(dm_play, color='cyan')
+        plt.ylabel('播放量')
+
+        plt.plot(1, label='评分', color='red', linewidth=5.0)
+        plt.plot(1, label='播放量', color="cyan", linewidth=1.0, linestyle="-")
+        plt.legend()
+
+        plt.savefig(r'E:1.png', dpi=1000, bbox_inches='tight')
+
+        # **********************************************************************评论数和收藏数对比
+        # ********评论数条形图
+        fig, ax3 = plt.subplots()
+        plt.bar(dm_name, dm_play, color='green')
+        plt.title('番剧收藏数与评论数分析')
+        plt.ylabel('评论数（万）')
+        ax3.tick_params(labelsize=6)
+        plt.xticks(rotation=90, color='green')
+
+        # *******收藏数折线图
+        ax4 = ax3.twinx()  # 组合图必须加这个
+        ax4.plot(dm_favorite, color='yellow')  # 设置线粗细，节点样式
+        plt.ylabel('收藏数（万）')
+
+        plt.plot(1, label='评论数', color="green", linewidth=5.0)
+        plt.plot(1, label='收藏数', color="yellow", linewidth=1.0, linestyle="-")
+        plt.legend()
+        plt.savefig(r'E:2.png', dpi=1000, bbox_inches='tight')
+
+        # **********************************************************************综合评分和收藏数对比
+        # *******综合评分条形图
+        fig, ax5 = plt.subplots()
+        plt.bar(dm_name, dm_com_score, color='red')
+        plt.title('综合评分和收藏数量数据分析')
+        plt.ylabel('综合评分')
+        ax5.tick_params(labelsize=6)
+        plt.xticks(rotation=90, color='green')
+
+        # *******收藏折线图
+        ax6 = ax5.twinx()  # 组合图必须加这个
+        ax6.plot(dm_favorite, color='yellow')  # 设置线粗细，节点样式
+        plt.ylabel('收藏数（万）')
+        plt.plot(1, label='综合评分', color="red", linewidth=5.0)
+        plt.plot(1, label='收藏数', color="yellow", linewidth=1.0, linestyle="-")
+        plt.legend()
+
+        plt.savefig(r'E:3.png', dpi=1000, bbox_inches='tight')
+
+        # **********************************************************************播放量和评论数对比
+        # *******播放量条形图
+        fig, ax7 = plt.subplots()
+        plt.bar(dm_name, dm_play, color='cyan')
+        plt.title('播放量和收藏数 数据分析')
+        plt.ylabel('播放量（万）')
+        ax7.tick_params(labelsize=6)
+        plt.xticks(rotation=90, color='green')
+
+        # *******收藏数折线图
+        ax8 = ax7.twinx()  # 组合图必须加这个
+        ax8.plot(dm_favorite, color='yellow')  # 设置线粗细，节点样式
+        plt.ylabel('收藏数（万）')
+
+        plt.plot(1, label='评论数', color="green", linewidth=5.0)
+        plt.plot(1, label='收藏数', color="yellow", linewidth=1.0, linestyle="-")
+        plt.legend()
+        plt.savefig(r'E:4.png', dpi=1000, bbox_inches='tight')
+
+        # *******评论数折线图
+        # ax8 = ax7.twinx()  # 组合图必须加这个
+        # ax8.plot(dm_review, color='green')  # 设置线粗细，节点样式
+        # plt.ylabel('评论数（万）')
+        # plt.plot(1, label='播放量', color="cyan", linewidth=5.0)
+        # plt.plot(1, label='评论数', color="green", linewidth=1.0, linestyle="-")
+        # plt.legend()
+        # plt.savefig(r'E:4.png', dpi=1000, bbox_inches='tight')
+        #评论数的数据展示有问题
+        plt.show()
+    def print(self):
+        print(len(self.name))
+        print(len(self.bfl))
+        print(len(self.pls))
+        print(len(self.scs))
+        print(len(self.TScore))
+    def sort(self,i,j):
+        temp = self.name[i]
+        self.name[i] = self.name[j]
+        self.name[j] = temp
+
+        temp = self.bfl[i]
+        self.bfl[i] = self.bfl[j]
+        self.bfl[j] = temp
+
+        temp = self.pls[i]
+        self.pls[i] = self.pls[j]
+        self.pls[j] = temp
+
+        temp = self.scs[i]
+        self.scs[i] = self.scs[j]
+        self.scs[j] = temp
+
+        temp = self.TScore[i]
+        self.TScore[i] = self.TScore[j]
+        self.TScore[j] = temp
+
+def main():
+    a=show()#创建对象
+    a.save()#从数据库取数据
+    a.print()  # 输出各个数据个数
+    a.view()#可视化
+
+
+if __name__ == '__main__':
+    main()
--- a/_redis/ecxel.py
+++ b/_redis/ecxel.py
@ -0,0 +1,37 @@
+import pymysql
+import xlwt as xlwt
+
+
+def Toexcel(path, sql, title):
+    conn = pymysql.connect(host='47.106.183.36', port=3306,
+                           user='fuchuang', password='fuchuang',
+                           database='fuchuang', charset='utf8mb4')
+    curs = conn.cursor()
+    curs.execute(sql)
+    rows = curs.fetchall()
+    w = xlwt.Workbook(encoding='utf-8')
+    style = xlwt.XFStyle()  # 初始化样式
+    font = xlwt.Font()  # 为样式创建字体
+    font.name = "微软雅黑"  # 如果是 python2 ，需要这样写 u"微软雅黑"
+    style.font = font  # 为样式设置字体
+    ws = w.add_sheet("视频信息", cell_overwrite_ok=True)
+    # 将 title 作为 Excel 的列名
+    title = title.split(",")
+    for i in range(len(title)):
+        ws.write(0, i, title[i], style)
+    # 开始写入数据库查询到的数据
+    for i in range(len(rows)):
+        row = rows[i]
+        for j in range(len(row)):
+            if row[j]:
+                item = row[j]
+                ws.write(i + 1, j, item, style)
+    # 写文件完成，开始保存xls文件
+    w.save(path)
+    conn.close()
+
+
+sql_1 = '''select * from video'''
+Toexcel('视频信息.xls', sql_1, "id,标题,播放量,弹幕数,发布者")
+sql_2 = '''select * from bangumi'''
+Toexcel('番剧信息.xls', sql_2, "id,番名,播放量,评分,弹幕数")
--- a/_redis/geckodriver.log
+++ b/_redis/geckodriver.log
--- a/_redis/requestments.txt
+++ b/_redis/requestments.txt
--- a/_redis/scrapy.cfg
+++ b/_redis/scrapy.cfg
@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = spider
--- a/_redis/spider/init.py
+++ b/_redis/spider/init.py
--- a/_redis/spider/pycache/init.cpython-310.pyc
+++ b/_redis/spider/pycache/init.cpython-310.pyc
--- a/_redis/spider/pycache/items.cpython-310.pyc
+++ b/_redis/spider/pycache/items.cpython-310.pyc
--- a/_redis/spider/pycache/middlewares.cpython-310.pyc
+++ b/_redis/spider/pycache/middlewares.cpython-310.pyc
--- a/_redis/spider/pycache/pipelines.cpython-310.pyc
+++ b/_redis/spider/pycache/pipelines.cpython-310.pyc
--- a/_redis/spider/pycache/settings.cpython-310.pyc
+++ b/_redis/spider/pycache/settings.cpython-310.pyc
--- a/_redis/spider/pycache/utils.cpython-310.pyc
+++ b/_redis/spider/pycache/utils.cpython-310.pyc
--- a/_redis/spider/cookie.py
+++ b/_redis/spider/cookie.py
@ -0,0 +1,20 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from time import sleep
+import json
+
+s = Service("../chromedriver.exe")
+bro = webdriver.Chrome(service=s)
+# 打开b站
+bro.get('https://www.bilibili.com')
+bro.delete_all_cookies()  # 先删除cookies
+# 60秒时间留你进行登陆
+sleep(60)
+dictcookies = bro.get_cookies()  # 读取登录之后浏览器的cookies
+jsoncookies = json.dumps(dictcookies)  # 将字典数据转成json数据便于保存
+
+# 生成cookies.txt文件
+with open('cookies.txt', 'w') as f:  # 写进文本保存
+    f.write(jsoncookies)
+print('cookies is ok')
--- a/_redis/spider/cookies.txt
+++ b/_redis/spider/cookies.txt
@ -0,0 +1 @@
+[{"domain": ".bilibili.com", "httpOnly": false, "name": "innersign", "path": "/", "secure": false, "value": "0"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "i-wanna-go-back", "path": "/", "secure": false, "value": "-1"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "bili_jct", "path": "/", "secure": false, "value": "6c88a668c7442fa148fc9d06d6e40849"}, {"domain": ".bilibili.com", "expiry": 1700969680, "httpOnly": false, "name": "sid", "path": "/", "secure": false, "value": "qauykkrb"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": true, "name": "SESSDATA", "path": "/", "secure": true, "value": "8e595145%2C1684985681%2C785d7%2Ab1"}, {"domain": ".bilibili.com", "expiry": 1764041679, "httpOnly": false, "name": "buvid_fp", "path": "/", "secure": false, "value": "3fdd662d6b3f6d9fd8b2a5f1834013d4"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID", "path": "/", "secure": false, "value": "178665301"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "b_nut", "path": "/", "secure": false, "value": "1669433638"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "b_ut", "path": "/", "secure": false, "value": "5"}, {"domain": ".bilibili.com", "httpOnly": false, "name": "b_lsid", "path": "/", "secure": false, "value": "B3E3109EB_184B2000F25"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID__ckMd5", "path": "/", "secure": false, "value": "3f4304303449401f"}, {"domain": ".bilibili.com", "expiry": 1700969654, "httpOnly": false, "name": "_uuid", "path": "/", "secure": false, "value": "F0B6051B-46F1-CEFA-D47B-30415304BFA054324infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid3", "path": "/", "secure": false, "value": "69BFBC33-CEEC-E4A5-9E0A-7D9DB491BA0738742infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid4", "path": "/", "secure": false, "value": "7FF54BEA-5D27-6E79-874D-2092619183B623196-022112611-0kiN2FN18wB5k/cGcFypkA%3D%3D"}]
--- a/_redis/spider/items.py
+++ b/_redis/spider/items.py
@ -0,0 +1,21 @@
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class VideoItem(scrapy.Item):
+    title = scrapy.Field()  # 视频名
+    view_counts = scrapy.Field()  # 播放量
+    barrage = scrapy.Field()  # 弹幕数
+    up = scrapy.Field()  # up主
+
+
+class BiliItem(scrapy.Item):
+    title = scrapy.Field()  # 视频名
+    view_counts = scrapy.Field()  # 播放量
+    evaluate = scrapy.Field()  # 评分
+    attention = scrapy.Field()  # 追番数
+    barrage = scrapy.Field()  # 弹幕数
--- a/_redis/spider/middlewares.py
+++ b/_redis/spider/middlewares.py
@ -0,0 +1,123 @@
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+from scrapy import signals, Request
+from scrapy.http import HtmlResponse
+from spider.utils import create_chrome_driver, add_cookies
+
+
+# useful for handling different item types with a single interface
+
+class SpiderSpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class SpiderDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def __init__(self):  # 初始化数据管道时模拟用户登录
+        self.browser = create_chrome_driver(headless=True) #headless=True
+        self.browser.get('https://www.bilibili.com/v/popular/rank/all')
+        add_cookies(self.browser, "../cookies.txt")
+
+    def __del__(self):  # 销毁时执行该方法
+        self.browser.close()
+
+    def process_request(self, request: Request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        # request.cookies = COOKIES_DICT[randint(0, len(COOKIES_DICT))]
+        # request.meta = {'proxy': 'http://127.0.0.1:7080'}  # 使用代理,防封ip
+        self.browser.get(request.url)
+        return HtmlResponse(url=request.url, body=self.browser.page_source,  # 获取动态内容
+                            request=request, encoding='utf-8')
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ProxyMiddleware(object):
+    def process_request(self, request, spider):
+        proxy = "http://172.20.10.2:7080"
+        request.meta["proxy"] = proxy
+        print(f"ProxyMiddleware --> {proxy}")
+
--- a/_redis/spider/pipelines.py
+++ b/_redis/spider/pipelines.py
@ -0,0 +1,87 @@
+
+import pymysql
+from spider.items import VideoItem, BiliItem
+
+# import openpyxl
+
+
+# class ExcelPipeline:
+#     def __int__(self):
+#         self.wb = openpyxl.Workbook()
+#         self.ws = self.wb.active
+#         self.ws.title = 'Goods'
+#         self.ws.append(('标题', '价格', '销量', '图片', '店铺', '位置'))
+#
+#     def close_spider(self, spider):
+#         self.wb.save('商品数据.xlsx')
+#
+#     def process_item(self, item, spider):
+#         title = item.get('title', '')  # 如果拿不到，则赋空
+#         price = item.get('price', 0)
+#         deal_count = item.get('deal_count', 0)
+#         picture = item.get('picture', '')
+#         location = item.get('location', '')
+#         shop = item.get('shop', '')
+#         self.ws.append((title, price, deal_count, picture, shop, location))
+#         return item
+
+
+class MysqlPipeline:
+    def __init__(self):
+        self.conn = pymysql.connect(host='47.106.183.36', port=3306,
+                                    user='fuchuang', password='fuchuang',
+                                    database='fuchuang', charset='utf8mb4')
+        self.cursor = self.conn.cursor()
+        self.data_bangumi = []
+        self.data_video = []
+
+    def close_spider(self, spider):
+        if len(self.data_bangumi) > 0:
+            self._write_to_mysql_bangumi()
+            self.data_bangumi.clear()
+        if len(self.data_video) > 0:
+            self._write_to_mysql_video()
+            self.data_video.clear()
+        self.conn.commit()
+        self.conn.close()
+
+    def process_item(self, item, spider):
+        if type(item) == VideoItem:
+            title = item.get('title', '')  # 如果拿不到，则赋空
+            view_counts = item.get('view_counts', '0')
+            barrage = item.get('barrage', '0')
+            up = item.get('up', '')
+            self.data_video.append((title, view_counts, barrage, up))
+        if type(item) == BiliItem:
+            title = item.get('title', '')  # 如果拿不到，则赋空
+            view_counts = item.get('view_counts', '0')
+            evaluate = item.get('evaluate', '0')
+            attention = item.get('attention', '0')
+            barrage = item.get('barrage', '0')
+            self.data_bangumi.append((title, view_counts, evaluate, attention, barrage))
+        if len(self.data_bangumi) >= 20:
+            self._write_to_mysql_bangumi()
+            self.data_bangumi.clear()
+        if len(self.data_video) >= 20:
+            self._write_to_mysql_video()
+            self.data_video.clear()
+        return item
+
+    def _write_to_mysql_bangumi(self):
+        for item in self.data_bangumi:
+            self.cursor.execute(
+                'insert into bangumi (title, view_counts, evaluate, attention, barrage) values (%s, %s, %s, %s, %s)',
+                item
+            )
+        self.conn.commit()
+
+    def _write_to_mysql_video(self):
+        for item in self.data_video:
+            self.cursor.execute(
+                'insert into video (title, view_counts, barrage, up) values (%s, %s, %s, %s)',
+                item
+            )
+        self.conn.commit()
+
+
+
--- a/_redis/spider/settings.py
+++ b/_redis/spider/settings.py
@ -0,0 +1,101 @@
+
+BOT_NAME = 'spider'
+
+SPIDER_MODULES = ['spider.spiders']
+NEWSPIDER_MODULE = 'spider.spiders'
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+# USER_AGENT = 'spider (+http://www.yourdomain.com)'
+
+# scrapy-redis
+# 去重
+DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
+# 使用scrapy_redis的调度器
+SCHEDULER = "scrapy_redis.scheduler.Scheduler"
+# 是否允许暂停
+SCHEDULER_PERSIST = True
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+# CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+
+DOWNLOAD_DELAY = 3  #延迟时间
+RANDOMIZE_DOWNLOAD_DELAY = True  # 随机延迟开关
+
+# The download delay setting will honor only one of:
+# CONCURRENT_REQUESTS_PER_DOMAIN = 16
+# CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+# COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+# TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+# DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+# }
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+# SPIDER_MIDDLEWARES = {
+#    'spider.middlewares.SpiderSpiderMiddleware': 543,
+# }
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+DOWNLOADER_MIDDLEWARES = {
+    'spider.middlewares.SpiderDownloaderMiddleware': 543,
+ #   'spider.middlewares.ProxyMiddleware': 600,
+
+
+}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+# EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+# }
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+    # 'spider.pipelines.ExcelPipeline': 300,
+    'spider.pipelines.MysqlPipeline': 300,
+
+    # 使用scrapy_redis的管道
+    'scrapy_redis.pipelines.RedisPipeline': 400,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+# AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+# AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+# AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+# AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+# HTTPCACHE_ENABLED = True
+# HTTPCACHE_EXPIRATION_SECS = 0
+# HTTPCACHE_DIR = 'httpcache'
+# HTTPCACHE_IGNORE_HTTP_CODES = []
+# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+# Set settings whose default value is deprecated to a future-proof value
+REQUEST_FINGERPRINTER_IMPLEMENTATION = '2.7'
+TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'
--- a/_redis/spider/spiders/Bili.py
+++ b/_redis/spider/spiders/Bili.py
@ -0,0 +1,54 @@
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.project import get_project_settings
+from scrapy_redis.spiders import RedisCrawlSpider
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import Rule
+from scrapy import Request
+
+from spider.items import VideoItem, BiliItem
+
+
+class BiliSpider(RedisCrawlSpider):
+    name = 'Bili'
+    redis_key = 'Bili'
+
+    rules = [
+        Rule(LinkExtractor(allow=r"https://www.bilibili.com/bangumi/.*?"), callback='parse_Item', follow=True),
+        Rule(LinkExtractor(allow=r"https://www.bilibili.com/video/BV.*?"), callback='parse_Videoitem', follow=True),
+        Rule(LinkExtractor(allow=r"https://www.bilibili.com/bangumi/media/md.*?"), callback='parse_BiliItem',
+             follow=True),
+    ]
+
+    def parse_Videoitem(self, response, **kwargs):
+        Video_item = VideoItem()
+        Video_item['title'] = response.xpath('//*[@id="viewbox_report"]/h1/@title').extract()[0]
+        Video_item['view_counts'] = str(
+            response.xpath('//*[@id="viewbox_report"]/div/div/span[1]/@title').extract()[0]).replace("总播放数", "")
+        Video_item['barrage'] = str(
+            response.xpath('//*[@id="viewbox_report"]/div/div/span[2]/@title').extract()[0]).replace(
+            "历史累计弹幕数", "")
+        Video_item['up'] = str(response.xpath('//*[@id="v_upinfo"]/div[2]/div[1]/a[1]/text()').extract()[0]).replace(
+            "\\n",
+            "").strip()
+        yield Video_item
+
+    def parse_BiliItem(self, response, **kwargs):
+        bangumi_item = BiliItem()
+        bangumi_item['title'] = response.xpath(
+            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[1]/span[1]/text()').extract()[0]
+        bangumi_item['view_counts'] = response.xpath(
+            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[1]/em/text()').extract()[0]
+        bangumi_item['attention'] = response.xpath(
+            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[2]/em/text()').extract()[0]
+        bangumi_item['barrage'] = response.xpath(
+            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[3]/em/text()').extract()[0]
+        bangumi_item['evaluate'] = response.xpath(
+            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[2]/div/div[1]/text()').extract()[0]
+        yield bangumi_item
+
+    def parse_Item(self, response, **kwargs):
+        url = 'https:' + response.xpath('//*[@id="media_module"]/div/a/@href').extract()[0]
+        yield Request(url=url, callback=self.parse_BiliItem)
+
+
+
--- a/_redis/spider/spiders/init.py
+++ b/_redis/spider/spiders/init.py
@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/_redis/spider/spiders/pycache/Bili.cpython-310.pyc
+++ b/_redis/spider/spiders/pycache/Bili.cpython-310.pyc
--- a/_redis/spider/spiders/pycache/init.cpython-310.pyc
+++ b/_redis/spider/spiders/pycache/init.cpython-310.pyc
--- a/_redis/spider/spiders/bili.csv
+++ b/_redis/spider/spiders/bili.csv
--- a/_redis/spider/utils.py
+++ b/_redis/spider/utils.py
@ -0,0 +1,24 @@
+import json
+
+from selenium import webdriver
+
+
+def create_chrome_driver(*, headless=False):
+    options = webdriver.ChromeOptions()
+    if headless:
+        options.add_argument('--headless')  # 不显示浏览器窗口
+    options.add_experimental_option('excludeSwitches', ['enable-automation'])  # 防止识别Selenium驱动浏览器
+    options.add_experimental_option('useAutomationExtension', False)
+    browser = webdriver.Chrome(options=options,executable_path=r'chromedriver.exe')
+    browser.execute_cdp_cmd(
+        'Page.addScriptToEvaluateOnNewDocument',
+        {'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined'}  # 修改浏览器标识
+    )
+    return browser
+
+
+def add_cookies(browser, cookie_file):
+    with open(cookie_file, 'r') as file:
+        cookies_list = json.loads(file.read())
+        for cookie_dict in cookies_list:
+            browser.add_cookie(cookie_dict)
				`@ -0,0 +1 @@`
				[{"domain": ".bilibili.com", "httpOnly": false, "name": "innersign", "path": "/", "secure": false, "value": "0"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "i-wanna-go-back", "path": "/", "secure": false, "value": "-1"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "bili_jct", "path": "/", "secure": false, "value": "6c88a668c7442fa148fc9d06d6e40849"}, {"domain": ".bilibili.com", "expiry": 1700969680, "httpOnly": false, "name": "sid", "path": "/", "secure": false, "value": "qauykkrb"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": true, "name": "SESSDATA", "path": "/", "secure": true, "value": "8e595145%2C1684985681%2C785d7%2Ab1"}, {"domain": ".bilibili.com", "expiry": 1764041679, "httpOnly": false, "name": "buvid_fp", "path": "/", "secure": false, "value": "3fdd662d6b3f6d9fd8b2a5f1834013d4"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID", "path": "/", "secure": false, "value": "178665301"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "b_nut", "path": "/", "secure": false, "value": "1669433638"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "b_ut", "path": "/", "secure": false, "value": "5"}, {"domain": ".bilibili.com", "httpOnly": false, "name": "b_lsid", "path": "/", "secure": false, "value": "B3E3109EB_184B2000F25"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID__ckMd5", "path": "/", "secure": false, "value": "3f4304303449401f"}, {"domain": ".bilibili.com", "expiry": 1700969654, "httpOnly": false, "name": "_uuid", "path": "/", "secure": false, "value": "F0B6051B-46F1-CEFA-D47B-30415304BFA054324infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid3", "path": "/", "secure": false, "value": "69BFBC33-CEEC-E4A5-9E0A-7D9DB491BA0738742infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid4", "path": "/", "secure": false, "value": "7FF54BEA-5D27-6E79-874D-2092619183B623196-022112611-0kiN2FN18wB5k/cGcFypkA%3D%3D"}]