Add src

21 changed files with 0 additions and 441 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -1,3 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
--- a/.idea/PythonScrapyWeather.iml
+++ b/.idea/PythonScrapyWeather.iml
@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@ -1,12 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <profile version="1.0">
-    <option name="myName" value="Project Default" />
-    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
-      <option name="ignoredErrors">
-        <list>
-          <option value="N806" />
-        </list>
-      </option>
-    </inspection_tool>
-  </profile>
-</component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
-</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/PythonScrapyWeather.iml" filepath="$PROJECT_DIR$/.idea/PythonScrapyWeather.iml" />
-    </modules>
-  </component>
-</project>
--- a/PythonScrapyWeather/pycache/init.cpython-39.pyc
+++ b/PythonScrapyWeather/pycache/init.cpython-39.pyc
--- a/PythonScrapyWeather/pycache/items.cpython-39.pyc
+++ b/PythonScrapyWeather/pycache/items.cpython-39.pyc
--- a/PythonScrapyWeather/pycache/middlewares.cpython-39.pyc
+++ b/PythonScrapyWeather/pycache/middlewares.cpython-39.pyc
--- a/PythonScrapyWeather/pycache/pipelines.cpython-39.pyc
+++ b/PythonScrapyWeather/pycache/pipelines.cpython-39.pyc
--- a/PythonScrapyWeather/pycache/settings.cpython-39.pyc
+++ b/PythonScrapyWeather/pycache/settings.cpython-39.pyc
--- a/PythonScrapyWeather/items.py
+++ b/PythonScrapyWeather/items.py
@ -1,17 +0,0 @@
-# Define here the models for your scraped items
-#
-# See documentation in:
-# https://docs.scrapy.org/en/latest/topics/items.html
-
-import scrapy
-
-
-class PythonscrapyweatherItem(scrapy.Item):
-    # define the fields for your item here like:
-    # name = scrapy.Field()
-    province_Name = scrapy.Field()
-    city_Name = scrapy.Field()
-    date = scrapy.Field()
-    temperature = scrapy.Field()
-    weather_condition = scrapy.Field()
-    air_quality = scrapy.Field()
--- a/PythonScrapyWeather/middlewares.py
+++ b/PythonScrapyWeather/middlewares.py
@ -1,103 +0,0 @@
-# Define here the models for your spider middleware
-#
-# See documentation in:
-# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-
-from scrapy import signals
-
-# useful for handling different item types with a single interface
-from itemadapter import is_item, ItemAdapter
-
-
-class PythonscrapyweatherSpiderMiddleware:
-    # Not all methods need to be defined. If a method is not defined,
-    # scrapy acts as if the spider middleware does not modify the
-    # passed objects.
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        # This method is used by Scrapy to create your spiders.
-        s = cls()
-        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
-        return s
-
-    def process_spider_input(self, response, spider):
-        # Called for each response that goes through the spider
-        # middleware and into the spider.
-
-        # Should return None or raise an exception.
-        return None
-
-    def process_spider_output(self, response, result, spider):
-        # Called with the results returned from the Spider, after
-        # it has processed the response.
-
-        # Must return an iterable of Request, or item objects.
-        for i in result:
-            yield i
-
-    def process_spider_exception(self, response, exception, spider):
-        # Called when a spider or process_spider_input() method
-        # (from other spider middleware) raises an exception.
-
-        # Should return either None or an iterable of Request or item objects.
-        pass
-
-    def process_start_requests(self, start_requests, spider):
-        # Called with the start requests of the spider, and works
-        # similarly to the process_spider_output() method, except
-        # that it doesn’t have a response associated.
-
-        # Must return only requests (not items).
-        for r in start_requests:
-            yield r
-
-    def spider_opened(self, spider):
-        spider.logger.info('Spider opened: %s' % spider.name)
-
-
-class PythonscrapyweatherDownloaderMiddleware:
-    # Not all methods need to be defined. If a method is not defined,
-    # scrapy acts as if the downloader middleware does not modify the
-    # passed objects.
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        # This method is used by Scrapy to create your spiders.
-        s = cls()
-        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
-        return s
-
-    def process_request(self, request, spider):
-        # Called for each request that goes through the downloader
-        # middleware.
-
-        # Must either:
-        # - return None: continue processing this request
-        # - or return a Response object
-        # - or return a Request object
-        # - or raise IgnoreRequest: process_exception() methods of
-        #   installed downloader middleware will be called
-        return None
-
-    def process_response(self, request, response, spider):
-        # Called with the response returned from the downloader.
-
-        # Must either;
-        # - return a Response object
-        # - return a Request object
-        # - or raise IgnoreRequest
-        return response
-
-    def process_exception(self, request, exception, spider):
-        # Called when a download handler or a process_request()
-        # (from other downloader middleware) raises an exception.
-
-        # Must either:
-        # - return None: continue processing this exception
-        # - return a Response object: stops process_exception() chain
-        # - return a Request object: stops process_exception() chain
-        pass
-
-    def spider_opened(self, spider):
-        spider.logger.info('Spider opened: %s' % spider.name)
--- a/PythonScrapyWeather/pipelines.py
+++ b/PythonScrapyWeather/pipelines.py
@ -1,70 +0,0 @@
-# Define your item pipelines here
-#
-# Don't forget to add your pipeline to the ITEM_PIPELINES setting
-# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
-
-
-# useful for handling different item types with a single interface
-from itemadapter import ItemAdapter
-import pymysql
-
-class PythonscrapyweatherPipeline(object):
-    # 连接数据库
-    def __init__(self, settings):
-        # self.connect = pymysql.connect(
-        #     host='localhost',
-        #     port=3306,
-        #     db='datasave_sql',
-        #     user='root',
-        #     password='123456',
-        #     charset="utf8",
-        #     use_unicode=False)
-        # # 通过cursor执行增删查改
-        # self.cursor = self.connect.cursor()
-        # self.cursor.execute("SELECT VERSION()")
-        self.settings = settings
-        print("连接成功")
-
-    def process_item(self, item, spider):
-        print("开始插入")
-        # 插入数据库
-        sql = '''INSERT INTO weathers(city_Name,date,temperature,weather_condition,air_quality) 
-                   VALUES("{}","{}","{}","{}","{}")'''
-        try:
-            self.cursor.execute(sql.format(
-                # pymysql.converters.escape_string("1"),
-                pymysql.converters.escape_string(item["city_Name"]),
-                pymysql.converters.escape_string(item["date"]),
-                pymysql.converters.escape_string(item["temperature"]),
-                pymysql.converters.escape_string(item["weather_condition"]),
-                pymysql.converters.escape_string(item["air_quality"])))
-            self.connect.commit()
-            print(self.cursor.rowcount, "记录插入成功。")
-        except BaseException as e:
-            print("错误在这里>>>>>>>>>>>>>", e, "<<<<<<<<<<<<<错误在这里")
-            self.connect.rollback()
-        return item
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        return cls(crawler.settings)
-
-    def open_spider(self, spider):
-        # 连接数据库
-        self.connect = pymysql.connect(
-            host=self.settings.get('MYSQL_HOST'),
-            port=self.settings.get('MYSQL_PORT'),
-            db=self.settings.get('MYSQL_DBNAME'),
-            user=self.settings.get('MYSQL_USER'),
-            passwd=self.settings.get('MYSQL_PASSWD'),
-            charset='utf8',
-            use_unicode=True)
-
-        # 通过cursor执行增删查改
-        self.cursor = self.connect.cursor();
-        self.connect.autocommit(True)
-
-    # 关闭数据库
-    def close_spider(self, spider):
-        self.cursor.close()
-        self.connect.close()
--- a/PythonScrapyWeather/settings.py
+++ b/PythonScrapyWeather/settings.py
@ -1,110 +0,0 @@
-# Scrapy settings for PythonScrapyWeather project
-#
-# For simplicity, this file contains only settings considered important or
-# commonly used. You can find more settings consulting the documentation:
-#
-#     https://docs.scrapy.org/en/latest/topics/settings.html
-#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
-#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-
-BOT_NAME = 'PythonScrapyWeather'
-
-SPIDER_MODULES = ['PythonScrapyWeather.spiders']
-NEWSPIDER_MODULE = 'PythonScrapyWeather.spiders'
-
-
-# Crawl responsibly by identifying yourself (and your website) on the user-agent
-#USER_AGENT = 'PythonScrapyWeather (+http://www.yourdomain.com)'
-
-# Obey robots.txt rules
-ROBOTSTXT_OBEY = True
-
-# Configure maximum concurrent requests performed by Scrapy (default: 16)
-#CONCURRENT_REQUESTS = 32
-
-# Configure a delay for requests for the same website (default: 0)
-# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
-# See also autothrottle settings and docs
-#DOWNLOAD_DELAY = 3
-# The download delay setting will honor only one of:
-#CONCURRENT_REQUESTS_PER_DOMAIN = 16
-#CONCURRENT_REQUESTS_PER_IP = 16
-
-# Disable cookies (enabled by default)
-#COOKIES_ENABLED = False
-
-# Disable Telnet Console (enabled by default)
-#TELNETCONSOLE_ENABLED = False
-
-# Override the default request headers:
-#DEFAULT_REQUEST_HEADERS = {
-#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-#   'Accept-Language': 'en',
-#}
-
-# Enable or disable spider middlewares
-# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-#SPIDER_MIDDLEWARES = {
-#    'PythonScrapyWeather.middlewares.PythonscrapyweatherSpiderMiddleware': 543,
-#}
-
-# Enable or disable downloader middlewares
-# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
-#DOWNLOADER_MIDDLEWARES = {
-#    'PythonScrapyWeather.middlewares.PythonscrapyweatherDownloaderMiddleware': 543,
-#}
-
-# Enable or disable extensions
-# See https://docs.scrapy.org/en/latest/topics/extensions.html
-#EXTENSIONS = {
-#    'scrapy.extensions.telnet.TelnetConsole': None,
-#}
-
-# Configure item pipelines
-# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
-#ITEM_PIPELINES = {
-#    'PythonScrapyWeather.pipelines.PythonscrapyweatherPipeline': 300,
-#}
-
-# Enable and configure the AutoThrottle extension (disabled by default)
-# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
-#AUTOTHROTTLE_ENABLED = True
-# The initial download delay
-#AUTOTHROTTLE_START_DELAY = 5
-# The maximum download delay to be set in case of high latencies
-#AUTOTHROTTLE_MAX_DELAY = 60
-# The average number of requests Scrapy should be sending in parallel to
-# each remote server
-#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
-# Enable showing throttling stats for every response received:
-#AUTOTHROTTLE_DEBUG = False
-
-# Enable and configure HTTP caching (disabled by default)
-# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
-#HTTPCACHE_ENABLED = True
-#HTTPCACHE_EXPIRATION_SECS = 0
-#HTTPCACHE_DIR = 'httpcache'
-#HTTPCACHE_IGNORE_HTTP_CODES = []
-#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
-# 设置下载中间件
-DOWNLOADER_MIDDLEWARES = {
-    'PythonScrapyWeather.middlewares.PythonscrapyweatherDownloaderMiddleware': 543,
-}
-# 设置请求头
-DEFAULT_REQUEST_HEADERS = {
-  'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
-}
-# 设置COOKIE
-COOKIES_ENABLED = True
-# 设置代理池
-# IP_PROXY
-
-ITEM_PIPELINES = {
-   'PythonScrapyWeather.pipelines.PythonscrapyweatherPipeline': 300,
-}
-
-MYSQL_HOST = 'localhost'
-MYSQL_DBNAME = 'datasave_sql'
-MYSQL_USER = 'root'
-MYSQL_PASSWD = '123456'
-MYSQL_PORT = 3306
--- a/PythonScrapyWeather/spiders/Weathers.py
+++ b/PythonScrapyWeather/spiders/Weathers.py
@ -1,85 +0,0 @@
-import scrapy
-import requests
-from PythonScrapyWeather.items import PythonscrapyweatherItem
-"""
-多页面爬取有两种形式。
-1）从某一个或者多个主页中获取多个子页面的url列表，parse()函数依次爬取列表中的各个子页面。
-2）从递归爬取，这个相对简单。在scrapy中只要定义好初始页面以及爬虫规则rules，就能够实现自动化的递归爬取。
-"""
-
-class WeathersSpider(scrapy.Spider):
-    name = 'Weathers'
-    allowed_domains = ['tianqi.com']
-    start_urls = ['http://tianqi.com/']
-
-    def parse(self, response):
-        url = "https://www.tianqi.com"
-        allProvince_list = response.xpath('//div[@class="tqqgsf"]/p/a/text()').extract()
-        allCity_list = response.xpath('//div[@class="tqqgsf"]/p/a/@href').extract()
-        print("*************allCity_list*************", allCity_list)
-        for city_name in allCity_list:
-            city_url = city_name
-            print("*************city_url*************", city_url)
-            # 再通过省、直辖市的URL请求每个省所有市的URL（请求）
-            yield scrapy.Request(city_url, callback=self.subpage_content)
-
-    # 获取到每个省所有市的URL（响应）
-    def subpage_content(self, response):
-        print("response", response.status)
-        try:
-            # 实例化对象item
-            item = PythonscrapyweatherItem()
-            # 使用xpath方法遍历HTML所需要的元素
-            province_Data = response.xpath('//div[@class="left"]/div[5]')
-            # print("*************province_Data*************", province_Data)
-            for province_name in province_Data:
-                item["province_Name"] = province_name.xpath('//div[@class="left"]/div[5]/div/h2/text()').extract()[0]
-                province_Name = item["province_Name"]
-                print("*****************province_Name*******************", province_Name)
-                #获取每个省内的市区跳转
-
-                province_url = response.xpath('/html/body/div[7]/div[1]/div[5]/ul/li/a[1]/@href').extract()
-                print(province_url)
-                for city_url1 in province_url:
-                    url_test = 'http://tianqi.com/' + city_url1
-                    print(url_test)
-                    yield scrapy.Request(url_test, callback=self.subpage_content_1)
-                    # return item
-
-                    # requests.get('http://tianqi.com/' + city_url)
-
-                    # weather_Detail_Data = response.xpath('//div[@class="left"]')
-                    # for weather_detail in weather_Detail_Data:
-                    #     # 获取item对象的属性值
-                    #     item["city_Name"] = weather_detail.xpath('dl/dd[@class ="name"]/h1/text()').extract()[0]
-                    #     city_Name1 = item["city_Name"]
-                    #     print("*************************************************111111", city_Name1)
-                    #     item["date"] = weather_detail.xpath('dl/dd[@class="week"]/text()').extract()[0]
-                    #     item["temperature"] = weather_detail.xpath('dl/dd[@class="weather"]/span/text()').extract()[0]
-                    #     item["weather_condition"] = weather_detail.xpath('dl/dd[@class="weather"]/span/b/text()').extract()[0]
-                    #     item["air_quality"] = weather_detail.xpath('dl/dd[@class="kongqi"]/h5/text()').extract()[0]
-                    #     return item
-        except:
-            print(response.status)
-        pass
-
-    def subpage_content_1(self, response):
-        print("response2", response.status)
-        try:
-            # 实例化对象item
-            item = PythonscrapyweatherItem()
-            weather_Detail_Data = response.xpath('//div[@class="left"]')
-            for weather_detail in weather_Detail_Data:
-                # 获取item对象的属性值
-                item["city_Name"] = weather_detail.xpath('dl/dd[@class ="name"]/h1/text()').extract()[0]
-                # city_Name1 = item["city_Name"]
-                # print("*************************************************111111", city_Name1)
-                item["date"] = weather_detail.xpath('dl/dd[@class="week"]/text()').extract()[0]
-                item["temperature"] = weather_detail.xpath('dl/dd[@class="weather"]/span/text()').extract()[0]
-                item["weather_condition"] = weather_detail.xpath('dl/dd[@class="weather"]/span/b/text()').extract()[0]
-                item["air_quality"] = weather_detail.xpath('dl/dd[@class="kongqi"]/h5/text()').extract()[0]
-                return item
-        except:
-            print(response.status)
-        pass
-
--- a/PythonScrapyWeather/spiders/init.py
+++ b/PythonScrapyWeather/spiders/init.py
@ -1,4 +0,0 @@
-# This package will contain the spiders of your Scrapy project
-#
-# Please refer to the documentation for information on how to create and manage
-# your spiders.
--- a/PythonScrapyWeather/spiders/pycache/Weathers.cpython-39.pyc
+++ b/PythonScrapyWeather/spiders/pycache/Weathers.cpython-39.pyc
--- a/PythonScrapyWeather/spiders/pycache/init.cpython-39.pyc
+++ b/PythonScrapyWeather/spiders/pycache/init.cpython-39.pyc
--- a/scrapy.cfg
+++ b/scrapy.cfg
@ -1,11 +0,0 @@
-# Automatically created by: scrapy startproject
-#
-# For more information about the [deploy] section see:
-# https://scrapyd.readthedocs.io/en/latest/deploy.html
-
-[settings]
-default = PythonScrapyWeather.settings
-
-[deploy]
-#url = http://localhost:6800/
-project = PythonScrapyWeather
--- a/PythonScrapyWeather/init.py
+++ b/PythonScrapyWeather/init.py