You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
88 lines
3.0 KiB
88 lines
3.0 KiB
|
|
import pymysql
|
|
from spider.items import VideoItem, BiliItem
|
|
|
|
# import openpyxl
|
|
|
|
|
|
# class ExcelPipeline:
|
|
# def __int__(self):
|
|
# self.wb = openpyxl.Workbook()
|
|
# self.ws = self.wb.active
|
|
# self.ws.title = 'Goods'
|
|
# self.ws.append(('标题', '价格', '销量', '图片', '店铺', '位置'))
|
|
#
|
|
# def close_spider(self, spider):
|
|
# self.wb.save('商品数据.xlsx')
|
|
#
|
|
# def process_item(self, item, spider):
|
|
# title = item.get('title', '') # 如果拿不到,则赋空
|
|
# price = item.get('price', 0)
|
|
# deal_count = item.get('deal_count', 0)
|
|
# picture = item.get('picture', '')
|
|
# location = item.get('location', '')
|
|
# shop = item.get('shop', '')
|
|
# self.ws.append((title, price, deal_count, picture, shop, location))
|
|
# return item
|
|
|
|
|
|
class MysqlPipeline:
|
|
def __init__(self):
|
|
self.conn = pymysql.connect(host='47.106.183.36', port=3306,
|
|
user='fuchuang', password='fuchuang',
|
|
database='fuchuang', charset='utf8mb4')
|
|
self.cursor = self.conn.cursor()
|
|
self.data_bangumi = []
|
|
self.data_video = []
|
|
|
|
def close_spider(self, spider):
|
|
if len(self.data_bangumi) > 0:
|
|
self._write_to_mysql_bangumi()
|
|
self.data_bangumi.clear()
|
|
if len(self.data_video) > 0:
|
|
self._write_to_mysql_video()
|
|
self.data_video.clear()
|
|
self.conn.commit()
|
|
self.conn.close()
|
|
|
|
def process_item(self, item, spider):
|
|
if type(item) == VideoItem:
|
|
title = item.get('title', '') # 如果拿不到,则赋空
|
|
view_counts = item.get('view_counts', '0')
|
|
barrage = item.get('barrage', '0')
|
|
up = item.get('up', '')
|
|
self.data_video.append((title, view_counts, barrage, up))
|
|
if type(item) == BiliItem:
|
|
title = item.get('title', '') # 如果拿不到,则赋空
|
|
view_counts = item.get('view_counts', '0')
|
|
evaluate = item.get('evaluate', '0')
|
|
attention = item.get('attention', '0')
|
|
barrage = item.get('barrage', '0')
|
|
self.data_bangumi.append((title, view_counts, evaluate, attention, barrage))
|
|
if len(self.data_bangumi) >= 20:
|
|
self._write_to_mysql_bangumi()
|
|
self.data_bangumi.clear()
|
|
if len(self.data_video) >= 20:
|
|
self._write_to_mysql_video()
|
|
self.data_video.clear()
|
|
return item
|
|
|
|
def _write_to_mysql_bangumi(self):
|
|
for item in self.data_bangumi:
|
|
self.cursor.execute(
|
|
'insert into bangumi (title, view_counts, evaluate, attention, barrage) values (%s, %s, %s, %s, %s)',
|
|
item
|
|
)
|
|
self.conn.commit()
|
|
|
|
def _write_to_mysql_video(self):
|
|
for item in self.data_video:
|
|
self.cursor.execute(
|
|
'insert into video (title, view_counts, barrage, up) values (%s, %s, %s, %s)',
|
|
item
|
|
)
|
|
self.conn.commit()
|
|
|
|
|
|
|