You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
104 lines
3.3 KiB
104 lines
3.3 KiB
# Define your item pipelines here
|
|
#
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
|
|
# useful for handling different item types with a single interface
|
|
from itemadapter import ItemAdapter
|
|
import pymysql
|
|
import csv
|
|
from pymysql import cursors
|
|
from twisted.enterprise import adbapi
|
|
# import time
|
|
class JdPipeline(object):
|
|
fp = None
|
|
|
|
def open_spider(self, spider):
|
|
print('开始爬虫')
|
|
self.fp =open('./jdpc.csv', 'w', encoding='utf-8',newline="")
|
|
|
|
# '''with open('goods.csv', mode='a', encoding='utf-8', newline="") as f:
|
|
# csv_write = csv.writer(f)
|
|
# csv_write.writerow([name, price, deals, Name])'''
|
|
|
|
def process_item(self, item, spider):
|
|
price = item['price']
|
|
name = item['name']
|
|
#pj = item['pj']
|
|
csv_write = csv.writer(self.fp)
|
|
csv_write.writerow([name, price])
|
|
#self.fp.write(price + ' ' + name + '\n')
|
|
return item
|
|
|
|
def close_spider(self, spider):
|
|
print('结束爬虫')
|
|
self.fp.close()
|
|
|
|
class mysqlPipeline(object):
|
|
conn = None
|
|
cursor = None
|
|
def open_spider(self, spider): #,charset='utf-8'
|
|
self.conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='msrmsr', db='jd')
|
|
# self.start = time.perf_counter()
|
|
|
|
def process_item(self, item, spider):
|
|
self.cursor = self.conn.cursor()
|
|
try:
|
|
self.cursor.execute('insert into jdpc values ("%s","%s")'%(item["price"],item["name"]))
|
|
self.conn.commit()
|
|
except Exception as e:
|
|
print(e)
|
|
self.conn.rollback()
|
|
return item
|
|
def close_spider(self, spider):
|
|
# self.end = time.perf_counter()
|
|
# print(self.end-self.start)
|
|
self.cursor.close()
|
|
self.conn.close()
|
|
|
|
# class mysqlPipeline(object):
|
|
# def __init__(self):
|
|
# dbparams = {
|
|
# 'host': '127.0.0.1',
|
|
# 'user': 'root',
|
|
# 'password': 'msrmsr',
|
|
# 'database': 'jdpc',
|
|
# 'cursorclass': cursors.DictCursor # 这里需要传多一个参数:游标的一个参数给它
|
|
# }
|
|
# self.adpool = adbapi.ConnectionPool('pymysql', **dbparams)
|
|
# self._sql = None
|
|
#
|
|
# # def open_spider(self, spider):
|
|
# # self.start = time.perf_counter()
|
|
#
|
|
#
|
|
# @property
|
|
# def sql(self):
|
|
#
|
|
# if not self._sql:
|
|
#
|
|
# self._sql = 'insert into jdpc(price,name) values(%s,%s)'
|
|
# #self._sql='insert into jdpc values ("%s","%s")' % (item["price"], item["name"])
|
|
#
|
|
#
|
|
# return self._sql
|
|
#
|
|
# def process_item(self, item, spider):
|
|
# defer = self.adpool.runInteraction(self.insert_item, item) # 需要传一个真正导入数据库操作的函数给它,不然跟同步下载一样
|
|
# defer.addErrback(self.handle_error, item, spider) # 添加一个接收错误信息的函数
|
|
#
|
|
# def insert_item(self, cursor, item):
|
|
# cursor.execute(self.sql, [item['price'], item['name']])
|
|
#
|
|
# def handle_error(self, error, item, spider):
|
|
# print('-' * 30)
|
|
# print('Error:', error)
|
|
# print('-' * 30)
|
|
#
|
|
# # def close_spider(self, spider):
|
|
# #
|
|
# # self.end = time.perf_counter()
|
|
# # print(self.end-self.start)
|
|
|