项目提交

3 years ago · 2deaaaf78c
parent 2d6a6a1f85
commit 2deaaaf78c
26 changed files with 693 additions and 0 deletions
--- a/_redis/README.md
+++ b/_redis/README.md
@ -0,0 +1,2 @@
 # python
--- a/_redis/STHeiti-Medium.ttc
+++ b/_redis/STHeiti-Medium.ttc
--- a/_redis/class.py
+++ b/_redis/class.py
@ -0,0 +1,208 @@
 import matplotlib.pyplot as plt
 from matplotlib import font_manager
 import pymysql
 class show(object):
    info = {}
    TScore = []  # 综合评分
    name = []  # 动漫名字
    bfl = []  # 播放量
    pls = []  # 评论数
    scs = []  # 收藏数
    def save(self):#把数据从数据中取出存入对象
        db = pymysql.connect(host='47.106.183.36', port=3306,
                             user='fuchuang', password='fuchuang',
                             database='fuchuang', charset='utf8mb4')  # 数据库连接
        use = 'use fuchuang;'
        show = 'show tables;'
        str = 'select * from bangumi;'
        cursor = db.cursor()
        try:
            cursor.execute(show)#
            cursor.execute(str)#选中表
            desc = cursor.description
            data = cursor.fetchall()#获取表信息
            list = []
            for data in data:#筛选出评分大于6的数据
                if (data[3] != '暂无评分'):
                    if (float(data[3]) > 9.5):
                        #print(data)
                        self.name.append(data[1])# 动漫名字
                        list = [data[2]]
                        if '万' in list[0]:
                            list[0] = float(list[0].replace('万', ''))
                        elif '亿' in list[0]:
                            list[0] = float(list[0].replace('亿', '')) * 10000
                        self.bfl.append(list[0])  # 播放量
                        list = [data[4]]
                        if '万' in list[0]:
                            list[0] = float(list[0].replace('万', ''))
                        else:
                            list[0] = float(list[0])
                        self.TScore.append(float(data[3])*list[0])  # 综合评分
                        self.scs.append(list[0])# 收藏数
                        list = [data[5]]
                        if '万' in list[0]:
                            list[0] = float(list[0].replace('万', ''))
                        else:
                            list[0] = float(list[0])
                        self.pls.append(list[0])# 评论数
        except Exception as e:
            print(e)
            db.rollback()
        finally:
            cursor.close()
            db.close()
            print(self.name)
            print(self.TScore)
            print(self.bfl)
            print(self.pls)
            print(self.scs)
            #info = {'动漫名': self.name, '播放量(万)': self.bfl, '评论数(万)': self.pls, '收藏数(万)': self.scs, '综合评分': self.TScore}
            #dm_file = pandas.DataFrame(info)
            #dm_file.to_excel('Dongman.xlsx', sheet_name="动漫数据分析")
            # 将所有列表返回
            return self.name, self.bfl, self.pls, self.scs, self.TScore
    def view(self):#数据可视化
        # 为了坐标轴上能显示中文
        plt.rcParams['font.sans-serif'] = ['SimHei']
        plt.rcParams['axes.unicode_minus'] = False
        my_font = font_manager.FontProperties(fname='STHeiti-Medium.ttc')
        dm_name = self.name  # 番剧名
        dm_play = self.bfl  # 番剧播放量
        dm_review = self.pls  # 番剧评论数
        dm_favorite = self.scs  # 番剧收藏数
        dm_com_score = self.TScore  # 番剧评分
        y_score = [9.6, 9.7, 9.8, 9.9,10.0]
        #dm_all = self.TScore * self.scs
        fig, ax1 = plt.subplots()
        plt.bar(dm_name, dm_com_score, color='red')
        plt.title('综合评分和播放量数据分校', fontproperties=my_font)
        ax1.tick_params(labelsize=6)
        plt.xlabel('番剧名')
        plt.ylabel('综合评分')
        plt.xticks(rotation=90, color='green')
        ax2 = ax1.twinx()
        ax2.plot(dm_play, color='cyan')
        plt.ylabel('播放量')
        plt.plot(1, label='评分', color='red', linewidth=5.0)
        plt.plot(1, label='播放量', color="cyan", linewidth=1.0, linestyle="-")
        plt.legend()
        plt.savefig(r'E:1.png', dpi=1000, bbox_inches='tight')
        # **********************************************************************评论数和收藏数对比
        # ********评论数条形图
        fig, ax3 = plt.subplots()
        plt.bar(dm_name, dm_play, color='green')
        plt.title('番剧收藏数与评论数分析')
        plt.ylabel('评论数（万）')
        ax3.tick_params(labelsize=6)
        plt.xticks(rotation=90, color='green')
        # *******收藏数折线图
        ax4 = ax3.twinx()  # 组合图必须加这个
        ax4.plot(dm_favorite, color='yellow')  # 设置线粗细，节点样式
        plt.ylabel('收藏数（万）')
        plt.plot(1, label='评论数', color="green", linewidth=5.0)
        plt.plot(1, label='收藏数', color="yellow", linewidth=1.0, linestyle="-")
        plt.legend()
        plt.savefig(r'E:2.png', dpi=1000, bbox_inches='tight')
        # **********************************************************************综合评分和收藏数对比
        # *******综合评分条形图
        fig, ax5 = plt.subplots()
        plt.bar(dm_name, dm_com_score, color='red')
        plt.title('综合评分和收藏数量数据分析')
        plt.ylabel('综合评分')
        ax5.tick_params(labelsize=6)
        plt.xticks(rotation=90, color='green')
        # *******收藏折线图
        ax6 = ax5.twinx()  # 组合图必须加这个
        ax6.plot(dm_favorite, color='yellow')  # 设置线粗细，节点样式
        plt.ylabel('收藏数（万）')
        plt.plot(1, label='综合评分', color="red", linewidth=5.0)
        plt.plot(1, label='收藏数', color="yellow", linewidth=1.0, linestyle="-")
        plt.legend()
        plt.savefig(r'E:3.png', dpi=1000, bbox_inches='tight')
        # **********************************************************************播放量和评论数对比
        # *******播放量条形图
        fig, ax7 = plt.subplots()
        plt.bar(dm_name, dm_play, color='cyan')
        plt.title('播放量和收藏数 数据分析')
        plt.ylabel('播放量（万）')
        ax7.tick_params(labelsize=6)
        plt.xticks(rotation=90, color='green')
        # *******收藏数折线图
        ax8 = ax7.twinx()  # 组合图必须加这个
        ax8.plot(dm_favorite, color='yellow')  # 设置线粗细，节点样式
        plt.ylabel('收藏数（万）')
        plt.plot(1, label='评论数', color="green", linewidth=5.0)
        plt.plot(1, label='收藏数', color="yellow", linewidth=1.0, linestyle="-")
        plt.legend()
        plt.savefig(r'E:4.png', dpi=1000, bbox_inches='tight')
        # *******评论数折线图
        # ax8 = ax7.twinx()  # 组合图必须加这个
        # ax8.plot(dm_review, color='green')  # 设置线粗细，节点样式
        # plt.ylabel('评论数（万）')
        # plt.plot(1, label='播放量', color="cyan", linewidth=5.0)
        # plt.plot(1, label='评论数', color="green", linewidth=1.0, linestyle="-")
        # plt.legend()
        # plt.savefig(r'E:4.png', dpi=1000, bbox_inches='tight')
        #评论数的数据展示有问题
        plt.show()
    def print(self):
        print(len(self.name))
        print(len(self.bfl))
        print(len(self.pls))
        print(len(self.scs))
        print(len(self.TScore))
    def sort(self,i,j):
        temp = self.name[i]
        self.name[i] = self.name[j]
        self.name[j] = temp
        temp = self.bfl[i]
        self.bfl[i] = self.bfl[j]
        self.bfl[j] = temp
        temp = self.pls[i]
        self.pls[i] = self.pls[j]
        self.pls[j] = temp
        temp = self.scs[i]
        self.scs[i] = self.scs[j]
        self.scs[j] = temp
        temp = self.TScore[i]
        self.TScore[i] = self.TScore[j]
        self.TScore[j] = temp
 def main():
    a=show()#创建对象
    a.save()#从数据库取数据
    a.print()  # 输出各个数据个数
    a.view()#可视化
 if __name__ == '__main__':
    main()
--- a/_redis/ecxel.py
+++ b/_redis/ecxel.py
@ -0,0 +1,37 @@
 import pymysql
 import xlwt as xlwt
 def Toexcel(path, sql, title):
    conn = pymysql.connect(host='47.106.183.36', port=3306,
                           user='fuchuang', password='fuchuang',
                           database='fuchuang', charset='utf8mb4')
    curs = conn.cursor()
    curs.execute(sql)
    rows = curs.fetchall()
    w = xlwt.Workbook(encoding='utf-8')
    style = xlwt.XFStyle()  # 初始化样式
    font = xlwt.Font()  # 为样式创建字体
    font.name = "微软雅黑"  # 如果是 python2 ，需要这样写 u"微软雅黑"
    style.font = font  # 为样式设置字体
    ws = w.add_sheet("视频信息", cell_overwrite_ok=True)
    # 将 title 作为 Excel 的列名
    title = title.split(",")
    for i in range(len(title)):
        ws.write(0, i, title[i], style)
    # 开始写入数据库查询到的数据
    for i in range(len(rows)):
        row = rows[i]
        for j in range(len(row)):
            if row[j]:
                item = row[j]
                ws.write(i + 1, j, item, style)
    # 写文件完成，开始保存xls文件
    w.save(path)
    conn.close()
 sql_1 = '''select * from video'''
 Toexcel('视频信息.xls', sql_1, "id,标题,播放量,弹幕数,发布者")
 sql_2 = '''select * from bangumi'''
 Toexcel('番剧信息.xls', sql_2, "id,番名,播放量,评分,弹幕数")
--- a/_redis/geckodriver.log
+++ b/_redis/geckodriver.log
--- a/_redis/requestments.txt
+++ b/_redis/requestments.txt
--- a/_redis/scrapy.cfg
+++ b/_redis/scrapy.cfg
@ -0,0 +1,11 @@
 # Automatically created by: scrapy startproject
 #
 # For more information about the [deploy] section see:
 # https://scrapyd.readthedocs.io/en/latest/deploy.html
 [settings]
 default = spider.settings
 [deploy]
 #url = http://localhost:6800/
 project = spider
--- a/_redis/spider/init.py
+++ b/_redis/spider/init.py
--- a/_redis/spider/pycache/init.cpython-310.pyc
+++ b/_redis/spider/pycache/init.cpython-310.pyc
--- a/_redis/spider/pycache/items.cpython-310.pyc
+++ b/_redis/spider/pycache/items.cpython-310.pyc
--- a/_redis/spider/pycache/middlewares.cpython-310.pyc
+++ b/_redis/spider/pycache/middlewares.cpython-310.pyc
--- a/_redis/spider/pycache/pipelines.cpython-310.pyc
+++ b/_redis/spider/pycache/pipelines.cpython-310.pyc
--- a/_redis/spider/pycache/settings.cpython-310.pyc
+++ b/_redis/spider/pycache/settings.cpython-310.pyc
--- a/_redis/spider/pycache/utils.cpython-310.pyc
+++ b/_redis/spider/pycache/utils.cpython-310.pyc
--- a/_redis/spider/cookie.py
+++ b/_redis/spider/cookie.py
@ -0,0 +1,20 @@
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
 from time import sleep
 import json
 s = Service("../chromedriver.exe")
 bro = webdriver.Chrome(service=s)
 # 打开b站
 bro.get('https://www.bilibili.com')
 bro.delete_all_cookies()  # 先删除cookies
 # 60秒时间留你进行登陆
 sleep(60)
 dictcookies = bro.get_cookies()  # 读取登录之后浏览器的cookies
 jsoncookies = json.dumps(dictcookies)  # 将字典数据转成json数据便于保存
 # 生成cookies.txt文件
 with open('cookies.txt', 'w') as f:  # 写进文本保存
    f.write(jsoncookies)
 print('cookies is ok')
--- a/_redis/spider/cookies.txt
+++ b/_redis/spider/cookies.txt
@ -0,0 +1 @@
 [{"domain": ".bilibili.com", "httpOnly": false, "name": "innersign", "path": "/", "secure": false, "value": "0"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "i-wanna-go-back", "path": "/", "secure": false, "value": "-1"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "bili_jct", "path": "/", "secure": false, "value": "6c88a668c7442fa148fc9d06d6e40849"}, {"domain": ".bilibili.com", "expiry": 1700969680, "httpOnly": false, "name": "sid", "path": "/", "secure": false, "value": "qauykkrb"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": true, "name": "SESSDATA", "path": "/", "secure": true, "value": "8e595145%2C1684985681%2C785d7%2Ab1"}, {"domain": ".bilibili.com", "expiry": 1764041679, "httpOnly": false, "name": "buvid_fp", "path": "/", "secure": false, "value": "3fdd662d6b3f6d9fd8b2a5f1834013d4"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID", "path": "/", "secure": false, "value": "178665301"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "b_nut", "path": "/", "secure": false, "value": "1669433638"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "b_ut", "path": "/", "secure": false, "value": "5"}, {"domain": ".bilibili.com", "httpOnly": false, "name": "b_lsid", "path": "/", "secure": false, "value": "B3E3109EB_184B2000F25"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID__ckMd5", "path": "/", "secure": false, "value": "3f4304303449401f"}, {"domain": ".bilibili.com", "expiry": 1700969654, "httpOnly": false, "name": "_uuid", "path": "/", "secure": false, "value": "F0B6051B-46F1-CEFA-D47B-30415304BFA054324infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid3", "path": "/", "secure": false, "value": "69BFBC33-CEEC-E4A5-9E0A-7D9DB491BA0738742infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid4", "path": "/", "secure": false, "value": "7FF54BEA-5D27-6E79-874D-2092619183B623196-022112611-0kiN2FN18wB5k/cGcFypkA%3D%3D"}]
--- a/_redis/spider/items.py
+++ b/_redis/spider/items.py
@ -0,0 +1,21 @@
 # Define here the models for your scraped items
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/items.html
 import scrapy
 class VideoItem(scrapy.Item):
    title = scrapy.Field()  # 视频名
    view_counts = scrapy.Field()  # 播放量
    barrage = scrapy.Field()  # 弹幕数
    up = scrapy.Field()  # up主
 class BiliItem(scrapy.Item):
    title = scrapy.Field()  # 视频名
    view_counts = scrapy.Field()  # 播放量
    evaluate = scrapy.Field()  # 评分
    attention = scrapy.Field()  # 追番数
    barrage = scrapy.Field()  # 弹幕数
--- a/_redis/spider/middlewares.py
+++ b/_redis/spider/middlewares.py
@ -0,0 +1,123 @@
 # Define here the models for your spider middleware
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 from scrapy import signals, Request
 from scrapy.http import HtmlResponse
 from spider.utils import create_chrome_driver, add_cookies
 # useful for handling different item types with a single interface
 class SpiderSpiderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the spider middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def process_spider_input(self, response, spider):
        # Called for each response that goes through the spider
        # middleware and into the spider.
        # Should return None or raise an exception.
        return None
    def process_spider_output(self, response, result, spider):
        # Called with the results returned from the Spider, after
        # it has processed the response.
        # Must return an iterable of Request, or item objects.
        for i in result:
            yield i
    def process_spider_exception(self, response, exception, spider):
        # Called when a spider or process_spider_input() method
        # (from other spider middleware) raises an exception.
        # Should return either None or an iterable of Request or item objects.
        pass
    def process_start_requests(self, start_requests, spider):
        # Called with the start requests of the spider, and works
        # similarly to the process_spider_output() method, except
        # that it doesn’t have a response associated.
        # Must return only requests (not items).
        for r in start_requests:
            yield r
    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
 class SpiderDownloaderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the downloader middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def __init__(self):  # 初始化数据管道时模拟用户登录
        self.browser = create_chrome_driver(headless=True) #headless=True
        self.browser.get('https://www.bilibili.com/v/popular/rank/all')
        add_cookies(self.browser, "../cookies.txt")
    def __del__(self):  # 销毁时执行该方法
        self.browser.close()
    def process_request(self, request: Request, spider):
        # Called for each request that goes through the downloader
        # middleware.
        # Must either:
        # - return None: continue processing this request
        # - or return a Response object
        # - or return a Request object
        # - or raise IgnoreRequest: process_exception() methods of
        #   installed downloader middleware will be called
        # request.cookies = COOKIES_DICT[randint(0, len(COOKIES_DICT))]
        # request.meta = {'proxy': 'http://127.0.0.1:7080'}  # 使用代理,防封ip
        self.browser.get(request.url)
        return HtmlResponse(url=request.url, body=self.browser.page_source,  # 获取动态内容
                            request=request, encoding='utf-8')
    def process_response(self, request, response, spider):
        # Called with the response returned from the downloader.
        # Must either;
        # - return a Response object
        # - return a Request object
        # - or raise IgnoreRequest
        return response
    def process_exception(self, request, exception, spider):
        # Called when a download handler or a process_request()
        # (from other downloader middleware) raises an exception.
        # Must either:
        # - return None: continue processing this exception
        # - return a Response object: stops process_exception() chain
        # - return a Request object: stops process_exception() chain
        pass
    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
 class ProxyMiddleware(object):
    def process_request(self, request, spider):
        proxy = "http://172.20.10.2:7080"
        request.meta["proxy"] = proxy
        print(f"ProxyMiddleware --> {proxy}")
--- a/_redis/spider/pipelines.py
+++ b/_redis/spider/pipelines.py
@ -0,0 +1,87 @@
 import pymysql
 from spider.items import VideoItem, BiliItem
 # import openpyxl
 # class ExcelPipeline:
 #     def __int__(self):
 #         self.wb = openpyxl.Workbook()
 #         self.ws = self.wb.active
 #         self.ws.title = 'Goods'
 #         self.ws.append(('标题', '价格', '销量', '图片', '店铺', '位置'))
 #
 #     def close_spider(self, spider):
 #         self.wb.save('商品数据.xlsx')
 #
 #     def process_item(self, item, spider):
 #         title = item.get('title', '')  # 如果拿不到，则赋空
 #         price = item.get('price', 0)
 #         deal_count = item.get('deal_count', 0)
 #         picture = item.get('picture', '')
 #         location = item.get('location', '')
 #         shop = item.get('shop', '')
 #         self.ws.append((title, price, deal_count, picture, shop, location))
 #         return item
 class MysqlPipeline:
    def __init__(self):
        self.conn = pymysql.connect(host='47.106.183.36', port=3306,
                                    user='fuchuang', password='fuchuang',
                                    database='fuchuang', charset='utf8mb4')
        self.cursor = self.conn.cursor()
        self.data_bangumi = []
        self.data_video = []
    def close_spider(self, spider):
        if len(self.data_bangumi) > 0:
            self._write_to_mysql_bangumi()
            self.data_bangumi.clear()
        if len(self.data_video) > 0:
            self._write_to_mysql_video()
            self.data_video.clear()
        self.conn.commit()
        self.conn.close()
    def process_item(self, item, spider):
        if type(item) == VideoItem:
            title = item.get('title', '')  # 如果拿不到，则赋空
            view_counts = item.get('view_counts', '0')
            barrage = item.get('barrage', '0')
            up = item.get('up', '')
            self.data_video.append((title, view_counts, barrage, up))
        if type(item) == BiliItem:
            title = item.get('title', '')  # 如果拿不到，则赋空
            view_counts = item.get('view_counts', '0')
            evaluate = item.get('evaluate', '0')
            attention = item.get('attention', '0')
            barrage = item.get('barrage', '0')
            self.data_bangumi.append((title, view_counts, evaluate, attention, barrage))
        if len(self.data_bangumi) >= 20:
            self._write_to_mysql_bangumi()
            self.data_bangumi.clear()
        if len(self.data_video) >= 20:
            self._write_to_mysql_video()
            self.data_video.clear()
        return item
    def _write_to_mysql_bangumi(self):
        for item in self.data_bangumi:
            self.cursor.execute(
                'insert into bangumi (title, view_counts, evaluate, attention, barrage) values (%s, %s, %s, %s, %s)',
                item
            )
        self.conn.commit()
    def _write_to_mysql_video(self):
        for item in self.data_video:
            self.cursor.execute(
                'insert into video (title, view_counts, barrage, up) values (%s, %s, %s, %s)',
                item
            )
        self.conn.commit()
--- a/_redis/spider/settings.py
+++ b/_redis/spider/settings.py
@ -0,0 +1,101 @@
 BOT_NAME = 'spider'
 SPIDER_MODULES = ['spider.spiders']
 NEWSPIDER_MODULE = 'spider.spiders'
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 # USER_AGENT = 'spider (+http://www.yourdomain.com)'
 # scrapy-redis
 # 去重
 DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
 # 使用scrapy_redis的调度器
 SCHEDULER = "scrapy_redis.scheduler.Scheduler"
 # 是否允许暂停
 SCHEDULER_PERSIST = True
 # Obey robots.txt rules
 ROBOTSTXT_OBEY = True
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
 # CONCURRENT_REQUESTS = 32
 # Configure a delay for requests for the same website (default: 0)
 # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
 # See also autothrottle settings and docs
 DOWNLOAD_DELAY = 3  #延迟时间
 RANDOMIZE_DOWNLOAD_DELAY = True  # 随机延迟开关
 # The download delay setting will honor only one of:
 # CONCURRENT_REQUESTS_PER_DOMAIN = 16
 # CONCURRENT_REQUESTS_PER_IP = 16
 # Disable cookies (enabled by default)
 # COOKIES_ENABLED = False
 # Disable Telnet Console (enabled by default)
 # TELNETCONSOLE_ENABLED = False
 # Override the default request headers:
 # DEFAULT_REQUEST_HEADERS = {
 #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 #   'Accept-Language': 'en',
 # }
 # Enable or disable spider middlewares
 # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 # SPIDER_MIDDLEWARES = {
 #    'spider.middlewares.SpiderSpiderMiddleware': 543,
 # }
 # Enable or disable downloader middlewares
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 DOWNLOADER_MIDDLEWARES = {
    'spider.middlewares.SpiderDownloaderMiddleware': 543,
 #   'spider.middlewares.ProxyMiddleware': 600,
 }
 # Enable or disable extensions
 # See https://docs.scrapy.org/en/latest/topics/extensions.html
 # EXTENSIONS = {
 #    'scrapy.extensions.telnet.TelnetConsole': None,
 # }
 # Configure item pipelines
 # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 ITEM_PIPELINES = {
    # 'spider.pipelines.ExcelPipeline': 300,
    'spider.pipelines.MysqlPipeline': 300,
    # 使用scrapy_redis的管道
    'scrapy_redis.pipelines.RedisPipeline': 400,
 }
 # Enable and configure the AutoThrottle extension (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
 # AUTOTHROTTLE_ENABLED = True
 # The initial download delay
 # AUTOTHROTTLE_START_DELAY = 5
 # The maximum download delay to be set in case of high latencies
 # AUTOTHROTTLE_MAX_DELAY = 60
 # The average number of requests Scrapy should be sending in parallel to
 # each remote server
 # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 # Enable showing throttling stats for every response received:
 # AUTOTHROTTLE_DEBUG = False
 # Enable and configure HTTP caching (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
 # HTTPCACHE_ENABLED = True
 # HTTPCACHE_EXPIRATION_SECS = 0
 # HTTPCACHE_DIR = 'httpcache'
 # HTTPCACHE_IGNORE_HTTP_CODES = []
 # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
 # Set settings whose default value is deprecated to a future-proof value
 REQUEST_FINGERPRINTER_IMPLEMENTATION = '2.7'
 TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'
--- a/_redis/spider/spiders/Bili.py
+++ b/_redis/spider/spiders/Bili.py
@ -0,0 +1,54 @@
 from scrapy.crawler import CrawlerProcess
 from scrapy.utils.project import get_project_settings
 from scrapy_redis.spiders import RedisCrawlSpider
 from scrapy.linkextractors import LinkExtractor
 from scrapy.spiders import Rule
 from scrapy import Request
 from spider.items import VideoItem, BiliItem
 class BiliSpider(RedisCrawlSpider):
    name = 'Bili'
    redis_key = 'Bili'
    rules = [
        Rule(LinkExtractor(allow=r"https://www.bilibili.com/bangumi/.*?"), callback='parse_Item', follow=True),
        Rule(LinkExtractor(allow=r"https://www.bilibili.com/video/BV.*?"), callback='parse_Videoitem', follow=True),
        Rule(LinkExtractor(allow=r"https://www.bilibili.com/bangumi/media/md.*?"), callback='parse_BiliItem',
             follow=True),
    ]
    def parse_Videoitem(self, response, **kwargs):
        Video_item = VideoItem()
        Video_item['title'] = response.xpath('//*[@id="viewbox_report"]/h1/@title').extract()[0]
        Video_item['view_counts'] = str(
            response.xpath('//*[@id="viewbox_report"]/div/div/span[1]/@title').extract()[0]).replace("总播放数", "")
        Video_item['barrage'] = str(
            response.xpath('//*[@id="viewbox_report"]/div/div/span[2]/@title').extract()[0]).replace(
            "历史累计弹幕数", "")
        Video_item['up'] = str(response.xpath('//*[@id="v_upinfo"]/div[2]/div[1]/a[1]/text()').extract()[0]).replace(
            "\\n",
            "").strip()
        yield Video_item
    def parse_BiliItem(self, response, **kwargs):
        bangumi_item = BiliItem()
        bangumi_item['title'] = response.xpath(
            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[1]/span[1]/text()').extract()[0]
        bangumi_item['view_counts'] = response.xpath(
            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[1]/em/text()').extract()[0]
        bangumi_item['attention'] = response.xpath(
            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[2]/em/text()').extract()[0]
        bangumi_item['barrage'] = response.xpath(
            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[3]/em/text()').extract()[0]
        bangumi_item['evaluate'] = response.xpath(
            '//*[@id="app"]/div[1]/div[2]/div/div[2]/div[2]/div[2]/div/div[1]/text()').extract()[0]
        yield bangumi_item
    def parse_Item(self, response, **kwargs):
        url = 'https:' + response.xpath('//*[@id="media_module"]/div/a/@href').extract()[0]
        yield Request(url=url, callback=self.parse_BiliItem)
--- a/_redis/spider/spiders/init.py
+++ b/_redis/spider/spiders/init.py
@ -0,0 +1,4 @@
 # This package will contain the spiders of your Scrapy project
 #
 # Please refer to the documentation for information on how to create and manage
 # your spiders.
--- a/_redis/spider/spiders/pycache/Bili.cpython-310.pyc
+++ b/_redis/spider/spiders/pycache/Bili.cpython-310.pyc
--- a/_redis/spider/spiders/pycache/init.cpython-310.pyc
+++ b/_redis/spider/spiders/pycache/init.cpython-310.pyc
--- a/_redis/spider/spiders/bili.csv
+++ b/_redis/spider/spiders/bili.csv
--- a/_redis/spider/utils.py
+++ b/_redis/spider/utils.py
@ -0,0 +1,24 @@
 import json
 from selenium import webdriver
 def create_chrome_driver(*, headless=False):
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument('--headless')  # 不显示浏览器窗口
    options.add_experimental_option('excludeSwitches', ['enable-automation'])  # 防止识别Selenium驱动浏览器
    options.add_experimental_option('useAutomationExtension', False)
    browser = webdriver.Chrome(options=options,executable_path=r'chromedriver.exe')
    browser.execute_cdp_cmd(
        'Page.addScriptToEvaluateOnNewDocument',
        {'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined'}  # 修改浏览器标识
    )
    return browser
 def add_cookies(browser, cookie_file):
    with open(cookie_file, 'r') as file:
        cookies_list = json.loads(file.read())
        for cookie_dict in cookies_list:
            browser.add_cookie(cookie_dict)
		`@ -0,0 +1 @@`
							[{"domain": ".bilibili.com", "httpOnly": false, "name": "innersign", "path": "/", "secure": false, "value": "0"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "i-wanna-go-back", "path": "/", "secure": false, "value": "-1"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "bili_jct", "path": "/", "secure": false, "value": "6c88a668c7442fa148fc9d06d6e40849"}, {"domain": ".bilibili.com", "expiry": 1700969680, "httpOnly": false, "name": "sid", "path": "/", "secure": false, "value": "qauykkrb"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": true, "name": "SESSDATA", "path": "/", "secure": true, "value": "8e595145%2C1684985681%2C785d7%2Ab1"}, {"domain": ".bilibili.com", "expiry": 1764041679, "httpOnly": false, "name": "buvid_fp", "path": "/", "secure": false, "value": "3fdd662d6b3f6d9fd8b2a5f1834013d4"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID", "path": "/", "secure": false, "value": "178665301"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "b_nut", "path": "/", "secure": false, "value": "1669433638"}, {"domain": ".bilibili.com", "expiry": 1700969682, "httpOnly": false, "name": "b_ut", "path": "/", "secure": false, "value": "5"}, {"domain": ".bilibili.com", "httpOnly": false, "name": "b_lsid", "path": "/", "secure": false, "value": "B3E3109EB_184B2000F25"}, {"domain": ".bilibili.com", "expiry": 1684985681, "httpOnly": false, "name": "DedeUserID__ckMd5", "path": "/", "secure": false, "value": "3f4304303449401f"}, {"domain": ".bilibili.com", "expiry": 1700969654, "httpOnly": false, "name": "_uuid", "path": "/", "secure": false, "value": "F0B6051B-46F1-CEFA-D47B-30415304BFA054324infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid3", "path": "/", "secure": false, "value": "69BFBC33-CEEC-E4A5-9E0A-7D9DB491BA0738742infoc"}, {"domain": ".bilibili.com", "expiry": 1764041638, "httpOnly": false, "name": "buvid4", "path": "/", "secure": false, "value": "7FF54BEA-5D27-6E79-874D-2092619183B623196-022112611-0kiN2FN18wB5k/cGcFypkA%3D%3D"}]