You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

71 lines
2.5 KiB

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import pymysql
class PythonscrapyweatherPipeline(object):
# 连接数据库
def __init__(self, settings):
# self.connect = pymysql.connect(
# host='localhost',
# port=3306,
# db='datasave_sql',
# user='root',
# password='123456',
# charset="utf8",
# use_unicode=False)
# # 通过cursor执行增删查改
# self.cursor = self.connect.cursor()
# self.cursor.execute("SELECT VERSION()")
self.settings = settings
print("连接成功")
def process_item(self, item, spider):
print("开始插入")
# 插入数据库
sql = '''INSERT INTO weathers(city_Name,date,temperature,weather_condition,air_quality)
VALUES("{}","{}","{}","{}","{}")'''
try:
self.cursor.execute(sql.format(
# pymysql.converters.escape_string("1"),
pymysql.converters.escape_string(item["city_Name"]),
pymysql.converters.escape_string(item["date"]),
pymysql.converters.escape_string(item["temperature"]),
pymysql.converters.escape_string(item["weather_condition"]),
pymysql.converters.escape_string(item["air_quality"])))
self.connect.commit()
print(self.cursor.rowcount, "记录插入成功。")
except BaseException as e:
print("错误在这里>>>>>>>>>>>>>", e, "<<<<<<<<<<<<<错误在这里")
self.connect.rollback()
return item
@classmethod
def from_crawler(cls, crawler):
return cls(crawler.settings)
def open_spider(self, spider):
# 连接数据库
self.connect = pymysql.connect(
host=self.settings.get('MYSQL_HOST'),
port=self.settings.get('MYSQL_PORT'),
db=self.settings.get('MYSQL_DBNAME'),
user=self.settings.get('MYSQL_USER'),
passwd=self.settings.get('MYSQL_PASSWD'),
charset='utf8',
use_unicode=True)
# 通过cursor执行增删查改
self.cursor = self.connect.cursor();
self.connect.autocommit(True)
# 关闭数据库
def close_spider(self, spider):
self.cursor.close()
self.connect.close()