parent
984fee9154
commit
d895d09bd1
@ -1,103 +0,0 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
import pymysql
|
||||
import csv
|
||||
from pymysql import cursors
|
||||
from twisted.enterprise import adbapi
|
||||
# import time
|
||||
class JdPipeline(object):
|
||||
fp = None
|
||||
|
||||
def open_spider(self, spider):
|
||||
print('开始爬虫')
|
||||
self.fp =open('./jdpc.csv', 'w', encoding='utf-8',newline="")
|
||||
|
||||
# '''with open('goods.csv', mode='a', encoding='utf-8', newline="") as f:
|
||||
# csv_write = csv.writer(f)
|
||||
# csv_write.writerow([name, price, deals, Name])'''
|
||||
|
||||
def process_item(self, item, spider):
|
||||
price = item['price']
|
||||
name = item['name']
|
||||
#pj = item['pj']
|
||||
csv_write = csv.writer(self.fp)
|
||||
csv_write.writerow([name, price])
|
||||
#self.fp.write(price + ' ' + name + '\n')
|
||||
return item
|
||||
|
||||
def close_spider(self, spider):
|
||||
print('结束爬虫')
|
||||
self.fp.close()
|
||||
|
||||
class mysqlPipeline(object):
|
||||
conn = None
|
||||
cursor = None
|
||||
def open_spider(self, spider): #,charset='utf-8'
|
||||
self.conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='msrmsr', db='jd')
|
||||
# self.start = time.perf_counter()
|
||||
|
||||
def process_item(self, item, spider):
|
||||
self.cursor = self.conn.cursor()
|
||||
try:
|
||||
self.cursor.execute('insert into jdpc values ("%s","%s")'%(item["price"],item["name"]))
|
||||
self.conn.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.conn.rollback()
|
||||
return item
|
||||
def close_spider(self, spider):
|
||||
# self.end = time.perf_counter()
|
||||
# print(self.end-self.start)
|
||||
self.cursor.close()
|
||||
self.conn.close()
|
||||
|
||||
# class mysqlPipeline(object):
|
||||
# def __init__(self):
|
||||
# dbparams = {
|
||||
# 'host': '127.0.0.1',
|
||||
# 'user': 'root',
|
||||
# 'password': 'msrmsr',
|
||||
# 'database': 'jdpc',
|
||||
# 'cursorclass': cursors.DictCursor # 这里需要传多一个参数:游标的一个参数给它
|
||||
# }
|
||||
# self.adpool = adbapi.ConnectionPool('pymysql', **dbparams)
|
||||
# self._sql = None
|
||||
#
|
||||
# # def open_spider(self, spider):
|
||||
# # self.start = time.perf_counter()
|
||||
#
|
||||
#
|
||||
# @property
|
||||
# def sql(self):
|
||||
#
|
||||
# if not self._sql:
|
||||
#
|
||||
# self._sql = 'insert into jdpc(price,name) values(%s,%s)'
|
||||
# #self._sql='insert into jdpc values ("%s","%s")' % (item["price"], item["name"])
|
||||
#
|
||||
#
|
||||
# return self._sql
|
||||
#
|
||||
# def process_item(self, item, spider):
|
||||
# defer = self.adpool.runInteraction(self.insert_item, item) # 需要传一个真正导入数据库操作的函数给它,不然跟同步下载一样
|
||||
# defer.addErrback(self.handle_error, item, spider) # 添加一个接收错误信息的函数
|
||||
#
|
||||
# def insert_item(self, cursor, item):
|
||||
# cursor.execute(self.sql, [item['price'], item['name']])
|
||||
#
|
||||
# def handle_error(self, error, item, spider):
|
||||
# print('-' * 30)
|
||||
# print('Error:', error)
|
||||
# print('-' * 30)
|
||||
#
|
||||
# # def close_spider(self, spider):
|
||||
# #
|
||||
# # self.end = time.perf_counter()
|
||||
# # print(self.end-self.start)
|
||||
|
||||
Loading…
Reference in new issue