You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

88 lines
3.0 KiB

import pymysql
from spider.items import VideoItem, BiliItem
# import openpyxl
# class ExcelPipeline:
# def __int__(self):
# self.wb = openpyxl.Workbook()
# self.ws = self.wb.active
# self.ws.title = 'Goods'
# self.ws.append(('标题', '价格', '销量', '图片', '店铺', '位置'))
#
# def close_spider(self, spider):
# self.wb.save('商品数据.xlsx')
#
# def process_item(self, item, spider):
# title = item.get('title', '') # 如果拿不到,则赋空
# price = item.get('price', 0)
# deal_count = item.get('deal_count', 0)
# picture = item.get('picture', '')
# location = item.get('location', '')
# shop = item.get('shop', '')
# self.ws.append((title, price, deal_count, picture, shop, location))
# return item
class MysqlPipeline:
def __init__(self):
self.conn = pymysql.connect(host='47.106.183.36', port=3306,
user='fuchuang', password='fuchuang',
database='fuchuang', charset='utf8mb4')
self.cursor = self.conn.cursor()
self.data_bangumi = []
self.data_video = []
def close_spider(self, spider):
if len(self.data_bangumi) > 0:
self._write_to_mysql_bangumi()
self.data_bangumi.clear()
if len(self.data_video) > 0:
self._write_to_mysql_video()
self.data_video.clear()
self.conn.commit()
self.conn.close()
def process_item(self, item, spider):
if type(item) == VideoItem:
title = item.get('title', '') # 如果拿不到,则赋空
view_counts = item.get('view_counts', '0')
barrage = item.get('barrage', '0')
up = item.get('up', '')
self.data_video.append((title, view_counts, barrage, up))
if type(item) == BiliItem:
title = item.get('title', '') # 如果拿不到,则赋空
view_counts = item.get('view_counts', '0')
evaluate = item.get('evaluate', '0')
attention = item.get('attention', '0')
barrage = item.get('barrage', '0')
self.data_bangumi.append((title, view_counts, evaluate, attention, barrage))
if len(self.data_bangumi) >= 20:
self._write_to_mysql_bangumi()
self.data_bangumi.clear()
if len(self.data_video) >= 20:
self._write_to_mysql_video()
self.data_video.clear()
return item
def _write_to_mysql_bangumi(self):
for item in self.data_bangumi:
self.cursor.execute(
'insert into bangumi (title, view_counts, evaluate, attention, barrage) values (%s, %s, %s, %s, %s)',
item
)
self.conn.commit()
def _write_to_mysql_video(self):
for item in self.data_video:
self.cursor.execute(
'insert into video (title, view_counts, barrage, up) values (%s, %s, %s, %s)',
item
)
self.conn.commit()