做完了捏

master
wkyuu 3 years ago
parent 0fea744a5b
commit 939095f29a

@ -12,7 +12,7 @@ selenium + redis + 分布式 + xpath + etree + 可视化
- [x] 数据可视化
- [ ] 预计两种模式终端交互随机或取评价数为索引目标给出取出的item的具体信息例如价格趋势
- [ ] 选择目录,友好的选择交互体验
- [ ] 选择抽取item模式热评就列出前五条随机就随机取一条
- [ ] 选择主要参考方式(价格,评论
- [ ] python打包exe需要图形化界面
## project
@ -198,6 +198,22 @@ def getFont(): # 列出可用的字体
plt.rcParams['font.family'] = ['Microsoft YaHei']
```
### Pandas
```python
import pandas as pd
df = pd.read_csv(filename, encoding = 'utf-8', header = 0, error_bad_lines = False)
df.columns # 查看所有列头的名字
df.xx # 获得xx那一列的信息
df['xx'] # 同上
df.sort_values(by = 'xx', ascending = True) # 按某一列排序
df.loc[index] # 取index行全部数据
df.loc[index][index2] # 取那行的某一数据
```
### Requests
经典老碟
@ -372,4 +388,16 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
19[python matplotlib坐标轴设置的方法](https://www.csdn.net/tags/NtzaUgxsOTQ2NjgtYmxvZwO0O0OO0O0O.html)
20
20[史上最全用Pandas读取CSV看这篇就够了](https://cloud.tencent.com/developer/article/1856554)
21[pandas数据处理的常用操作](https://zhuanlan.zhihu.com/p/29535766)
22[★★pandas的数据输出显示设置](https://www.jianshu.com/p/5c0aa1fa19af)
23[解决pandasValueError: Cannot convert non-finite values (NA or inf) to integer](https://blog.csdn.net/zhongkeyuanchongqing/article/details/123599260)
24[pandas取dataframe特定行/列](https://www.cnblogs.com/nxf-rabbit75/p/10105271.html)
25[Pandas 获取DataFrame 的行索引和列索引](https://blog.csdn.net/YENTERTAINR/article/details/109254583)
26

@ -1,9 +1,12 @@
# -*- coding: utf-8 -*-
from lxml import etree
from concurrent.futures import ThreadPoolExecutor
import os
from concurrent.futures import ThreadPoolExecutor
from lxml import etree
import historyPrice
def gethtml(response, gethtml_mode = "url"): # 用etree格式化得到的对象
try:
if isElementTree(response):
@ -236,4 +239,4 @@ html = etree.HTML(item)
name = html.xpath(reg)[0]
'''
'''

@ -2,8 +2,6 @@
import os
from hyperlink import URL
# 修改要生成的文件名,下面的是默认,注意要用.csv结尾
FILENAME_CSV = {
"牛奶": "milk.csv",
@ -40,8 +38,6 @@ USER_AGENT = [
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
]
COOKIES_FILENAME = "cookies.json"
# 历史价格查询网站 vveby.com
HISTORY_PRICE_URL = r"https://www.vveby.com/search?keyword="
@ -52,13 +48,13 @@ FONT = ['Microsoft YaHei']
BANNER = {
"main": '''
#================*main*=================#
# 1.主界面 [x]
# 2.介绍 [x]
# 1.主界面
# 2.介绍
# 3.数据可视化
# 4.向Redis中填充数据 [x]
# 5.清空 Redis 队列缓存 [x]
# 6.调用 milkSpider [x]
# 7.退出 [x]
# 4.向Redis中填充数据
# 5.清空 Redis 队列缓存
# 6.调用 milkSpider
# 7.退出
#========================================#
''',
"introduce": '''
@ -71,8 +67,10 @@ BANNER = {
#=============================================#
''',
"view": '''
#================*view*=================#
# 1.列出评论数最多的前几条商品信息
# 2.列出价格最低的前几条商品信息
# 3.返回上一层目录
#=============================================#
'''
}

@ -1,16 +1,23 @@
# -*- coding: utf-8 -*-
from matplotlib import pyplot as plt
import os
import re
import matplotlib
import pandas as pd
from matplotlib import pyplot as plt
import milkSpider
import settings
import re
plt.rcParams['font.family'] = settings.FONT
# pd.set_option('display.expand_frame_repr', True)
# pd.set_option('display.max_colwidth', 10)
class view:
def __init__(self, itemList):
self.id = itemList[0]
self.string = itemList[1]
self.name = itemList[1]
self.string = itemList[10]
def getFont(): # 列出可用的字体
font = sorted([f.name for f in matplotlib.font_manager.fontManager.ttflist])
@ -28,9 +35,10 @@ class view:
x = []
y = []
itemList.pop()
itemList.reverse()
while itemList:
temp = itemList.pop()
for temp in itemList:
date = temp[0] + "" + temp[1] + ""
price = "" + temp[2] + ""
@ -42,16 +50,21 @@ class view:
x.append(date)
y.append(price)
plt.title("价格趋势")
plt.title("商品 [{}] 价格趋势".format(self.name))
plt.bar(x, y, color = 'g', align = 'center')
plt.xticks(size = 10.0, rotation = 45)
plt.xlabel("日期")
plt.ylabel("价格")
plt.plot(x, y, color = 'red', linewidth = 5.0, linestyle = '--')
print("等待可视化界面结束。。。")
plt.show()
if self.string == 0:
print("该商品历史价格趋势数据尚未被收录!")
return
itemList = []
print("以下是商品 [{}] 的历史价格趋势:".format(self.name))
for astr in self.string.split(';'):
strList = str2data(astr)
try:
@ -61,5 +74,105 @@ class view:
break
show(itemList)
def getData():
pass
def listCatalogues():
path = r"./Catalogues/"
dirList = os.listdir(path)
fileList = []
for filename in dirList:
fileList.append(path + filename)
return len(fileList), fileList
def getData(filename, catalogue):
while True:
print("# 当前选择的目录是 {} .".format(catalogue))
milkSpider.showBanner(menu = "view")
print("选择一项以查看or返回", end = '')
choice = str(input())
case = {'1': '评论数量(条)', '2': '价格(人民币)'}
if choice in case.keys():
mode = case.get(choice)
sort = False
break
elif choice == '3': return
else: print("无效选择!")
while True:
print("当前选择的模式是以 [{}] 为基准的排序方式".format(mode))
print("想要查看多少条数据(int)", end = '')
try:
flag = eval(input())
break
except BaseException:
print("无效输入!")
continue
df = pd.read_csv(filename, encoding = 'utf-8', header = 0, error_bad_lines = False)
if choice == '2': sort = True
dfnew = df.sort_values(by = mode, ascending = sort)
try:
dfnew = dfnew.fillna(0)
dfnew[["评论数量(条)"]] = dfnew[["评论数量(条)"]].astype(int)
except BaseException:
pass
dfnew = dfnew[:flag]
while True:
print(dfnew.iloc[:, [1, 2, 3]])
indexList = list(dfnew.index)
while True:
print("选择一项id以查看详细信息(或者 [r] 返回上层目录)", end = '')
try:
index = str(input())
index = eval(index)
if index in indexList:
break
else:
print("无效输入!")
continue
except BaseException:
if index == 'r': return
print("无效输入!")
continue
toShow = dfnew.loc[index]
print(toShow)
aitem = view(toShow)
aitem.main()
while True:
flag = str(input("输入 [r]返回上一级菜单 [c]继续查看: "))
if flag == 'r': return
elif flag == 'c': break
else: print("无效选项!")
def main():
length, fileList = listCatalogues()
while True:
print("检测到当前缓存中共有{}个目录:".format(length))
case = {}
for i in range(length):
print("# {}.{}".format(i + 1, fileList[i][13:-4]))
case[str(i+1)] = fileList[i]
print("# {}.输入 [r] 返回上一级菜单".format(i + 2))
print("选择一项以查看or返回", end = '')
choice = str(input())
if choice in case.keys():
getData(case.get(choice), case.get(choice)[13:-4])
elif choice == 'r': return
else: print("无效选择!")
if __name__ == "__main__":
# fileList = listCatalogues()
# length, dataList = getData(fileList)
main()
'''
# 数据调试
import pandas as pd
filename = "./Catalogues/milk.csv"
df = pd.read_csv(filename, encoding = 'utf-8', header = 0, error_bad_lines = False)
'''

Loading…
Cancel
Save