From e9265362c03e5d11808fea670c87f700c00f0172 Mon Sep 17 00:00:00 2001 From: p6mtf24ic Date: Fri, 15 Apr 2022 20:24:10 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AE=A1=E9=81=93=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pipelines.py | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 pipelines.py diff --git a/pipelines.py b/pipelines.py new file mode 100644 index 0000000..6fb3d27 --- /dev/null +++ b/pipelines.py @@ -0,0 +1,103 @@ +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html + + +# useful for handling different item types with a single interface +from itemadapter import ItemAdapter +import pymysql +import csv +from pymysql import cursors +from twisted.enterprise import adbapi +# import time +class JdPipeline(object): + fp = None + + def open_spider(self, spider): + print('开始爬虫') + self.fp =open('./jdpc.csv', 'w', encoding='utf-8',newline="") + + # '''with open('goods.csv', mode='a', encoding='utf-8', newline="") as f: + # csv_write = csv.writer(f) + # csv_write.writerow([name, price, deals, Name])''' + + def process_item(self, item, spider): + price = item['price'] + name = item['name'] + #pj = item['pj'] + csv_write = csv.writer(self.fp) + csv_write.writerow([name, price]) + #self.fp.write(price + ' ' + name + '\n') + return item + + def close_spider(self, spider): + print('结束爬虫') + self.fp.close() + +class mysqlPipeline(object): + conn = None + cursor = None + def open_spider(self, spider): #,charset='utf-8' + self.conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='msrmsr', db='jd') + # self.start = time.perf_counter() + + def process_item(self, item, spider): + self.cursor = self.conn.cursor() + try: + self.cursor.execute('insert into jdpc values ("%s","%s")'%(item["price"],item["name"])) + self.conn.commit() + except Exception as e: + print(e) + self.conn.rollback() + return item + def close_spider(self, spider): + # self.end = time.perf_counter() + # print(self.end-self.start) + self.cursor.close() + self.conn.close() + +# class mysqlPipeline(object): +# def __init__(self): +# dbparams = { +# 'host': '127.0.0.1', +# 'user': 'root', +# 'password': 'msrmsr', +# 'database': 'jdpc', +# 'cursorclass': cursors.DictCursor # 这里需要传多一个参数:游标的一个参数给它 +# } +# self.adpool = adbapi.ConnectionPool('pymysql', **dbparams) +# self._sql = None +# +# # def open_spider(self, spider): +# # self.start = time.perf_counter() +# +# +# @property +# def sql(self): +# +# if not self._sql: +# +# self._sql = 'insert into jdpc(price,name) values(%s,%s)' +# #self._sql='insert into jdpc values ("%s","%s")' % (item["price"], item["name"]) +# +# +# return self._sql +# +# def process_item(self, item, spider): +# defer = self.adpool.runInteraction(self.insert_item, item) # 需要传一个真正导入数据库操作的函数给它,不然跟同步下载一样 +# defer.addErrback(self.handle_error, item, spider) # 添加一个接收错误信息的函数 +# +# def insert_item(self, cursor, item): +# cursor.execute(self.sql, [item['price'], item['name']]) +# +# def handle_error(self, error, item, spider): +# print('-' * 30) +# print('Error:', error) +# print('-' * 30) +# +# # def close_spider(self, spider): +# # +# # self.end = time.perf_counter() +# # print(self.end-self.start) +