|
|
|
import logging
|
|
|
|
import random
|
|
|
|
import re
|
|
|
|
import jieba
|
|
|
|
import pandas as pd
|
|
|
|
from flask import Flask, render_template, jsonify
|
|
|
|
from nltk.corpus import stopwords
|
|
|
|
import utils
|
|
|
|
|
|
|
|
# author: cxy
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
@app.before_request
|
|
|
|
def setup_logging():
|
|
|
|
# 确保日志处理器已正确初始化
|
|
|
|
if not logging.getLogger('werkzeug').handlers:
|
|
|
|
logging.getLogger('werkzeug').addHandler(logging.StreamHandler())
|
|
|
|
|
|
|
|
class RequestFilter(logging.Filter):
|
|
|
|
def filter(self, record):
|
|
|
|
return 'GET /time' not in record.getMessage()
|
|
|
|
|
|
|
|
handler = logging.getLogger('werkzeug').handlers[0]
|
|
|
|
|
|
|
|
handler.addFilter(RequestFilter())
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/')
|
|
|
|
def hello_world(): # put application's code here
|
|
|
|
return render_template("main.html")
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/time')
|
|
|
|
def get_time():
|
|
|
|
return utils.get_time()
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/data')
|
|
|
|
def get_data():
|
|
|
|
df = pd.read_csv('./static/csv/barrage_clustered.csv')
|
|
|
|
data = df.to_dict(orient='records')
|
|
|
|
return jsonify(data)
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/wordcloud')
|
|
|
|
def wordcloud_data():
|
|
|
|
file_name = './static/csv/barrage.csv'
|
|
|
|
with open(file_name, encoding='utf-8') as f:
|
|
|
|
txt = f.read()
|
|
|
|
|
|
|
|
txt_list = jieba.lcut(txt)
|
|
|
|
|
|
|
|
stopwords_list = set(stopwords.words('chinese'))
|
|
|
|
stopwords_target = ['都', '不', '好', '哈哈哈', '说', '还', '很', '没']
|
|
|
|
for i in stopwords_target:
|
|
|
|
stopwords_list.add(i)
|
|
|
|
|
|
|
|
word_freq = {}
|
|
|
|
for word in txt_list:
|
|
|
|
if re.match(r'^[\u4e00-\u9fa5]+$', word) and word not in stopwords_list:
|
|
|
|
if word in word_freq:
|
|
|
|
word_freq[word] += 1
|
|
|
|
else:
|
|
|
|
word_freq[word] = 1
|
|
|
|
word_freq_list = [{'name': word, 'value': freq} for word, freq in word_freq.items()]
|
|
|
|
return jsonify(word_freq_list)
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/world_comment')
|
|
|
|
def get_world_comment():
|
|
|
|
file_path = './static/csv/world_comment.csv'
|
|
|
|
df = pd.read_csv(file_path)
|
|
|
|
data = []
|
|
|
|
grouped = df.groupby('url')
|
|
|
|
times = ['20s', '30s', '40s', '50s', '60s']
|
|
|
|
for url, group in grouped:
|
|
|
|
items = group['content'].tolist()
|
|
|
|
time = random.choice(times)
|
|
|
|
data.append({'time': time, 'items': items})
|
|
|
|
return jsonify(data)
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/barrage_sentiment')
|
|
|
|
def get_barrage_comment():
|
|
|
|
df = pd.read_csv('./static/csv/barrage_sentiment.csv')
|
|
|
|
data = df.to_dict(orient='records')
|
|
|
|
return jsonify(data)
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/barrage_count')
|
|
|
|
def count_rows():
|
|
|
|
df = pd.read_csv('./static/csv/barrage.csv')
|
|
|
|
row_count = len(df['barrage'])
|
|
|
|
return jsonify({'row_count': row_count})
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/average_sentiment')
|
|
|
|
def average_sentiment():
|
|
|
|
df = pd.read_csv('./static/csv/barrage_sentiment.csv')
|
|
|
|
avg_sentiment = df['sentiment'].mean()
|
|
|
|
return jsonify({'average_sentiment': avg_sentiment})
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/count_keywords')
|
|
|
|
def count_keywords():
|
|
|
|
df = pd.read_csv('./static/csv/barrage.csv')
|
|
|
|
keyword_count = df['barrage'].str.contains('AI技术|人工智能|科技|智能').sum()
|
|
|
|
keyword_count = int(keyword_count)
|
|
|
|
return jsonify({'keyword_count': keyword_count})
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
app.run()
|