You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.3 KiB
77 lines
2.3 KiB
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
|
|
# 修改要生成的文件名,下面的是默认,注意要用.csv结尾
|
|
FILENAME_CSV = {
|
|
"牛奶": "milk.csv",
|
|
"苹果": "apple.csv",
|
|
"橙子": "orange.csv",
|
|
"芒果": "mango.csv",
|
|
"冰淇淋": "iceCream.csv"
|
|
}
|
|
|
|
# 几个默认的爬取目录
|
|
BASEURL = {
|
|
'牛奶': 'https://list.jd.com/list.html?cat=1320,1585,9434', # ok
|
|
'苹果': 'https://list.jd.com/list.html?cat=12218,12221,13554', # ok
|
|
'橙子': 'https://list.jd.com/list.html?cat=12218,12221,13555', # ok
|
|
'芒果': 'https://list.jd.com/list.html?cat=12218,12221,13558', # ok
|
|
'冰淇淋': 'https://list.jd.com/list.html?cat=12218,13598,13603' # ok
|
|
}
|
|
|
|
FILEPATH = {
|
|
'牛奶': os.getcwd() + '\\1320,1585,9434\\1320,1585,9434',
|
|
}
|
|
|
|
# REDIS 相关配置
|
|
REDIS_HOST = 'tencentCloud'
|
|
REDIS_PORT = '6379'
|
|
REDIS_PASSWORD = 'root'
|
|
REDIS_LISTNAME = "urlList"
|
|
|
|
# 下载器相关配置
|
|
USER_AGENT = [
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0',
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586',
|
|
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
|
|
]
|
|
|
|
# 历史价格查询网站 vveby.com
|
|
HISTORY_PRICE_URL = r"https://www.vveby.com/search?keyword="
|
|
|
|
# 视图字体设置
|
|
FONT = ['Microsoft YaHei']
|
|
|
|
# banner信息
|
|
BANNER = {
|
|
"main": '''
|
|
#================*main*=================#
|
|
# 1.主界面
|
|
# 2.介绍
|
|
# 3.数据可视化
|
|
# 4.向Redis中填充数据
|
|
# 5.清空 Redis 队列缓存
|
|
# 6.调用 milkSpider
|
|
# 7.退出
|
|
#========================================#
|
|
''',
|
|
"introduce": '''
|
|
#================*introduce*=================#
|
|
# 1.使用Selenium + requests分情况地爬取数据
|
|
# 2.使用线程池缩减爬取总流程
|
|
# 3.使用Redis调度爬取队列并实现分布式
|
|
# 4.使用Matplotlib将数据可视化
|
|
# 输入[r]返回上一级...
|
|
#=============================================#
|
|
''',
|
|
"view": '''
|
|
#================*view*=================#
|
|
# 1.列出评论数最多的前几条商品信息
|
|
# 2.列出价格最低的前几条商品信息
|
|
# 3.返回上一层目录
|
|
#=============================================#
|
|
'''
|
|
}
|