diff --git a/app.py b/app.py new file mode 100644 index 0000000..ff9c80a --- /dev/null +++ b/app.py @@ -0,0 +1,114 @@ +import logging +import random +import re +import jieba +import pandas as pd +from flask import Flask, render_template, jsonify +from nltk.corpus import stopwords + +import utils + +app = Flask(__name__) + + +@app.before_request +def setup_logging(): + # 确保日志处理器已正确初始化 + if not logging.getLogger('werkzeug').handlers: + logging.getLogger('werkzeug').addHandler(logging.StreamHandler()) + + class RequestFilter(logging.Filter): + def filter(self, record): + return 'GET /time' not in record.getMessage() + + handler = logging.getLogger('werkzeug').handlers[0] + + handler.addFilter(RequestFilter()) + + +@app.route('/') +def hello_world(): # put application's code here + return render_template("main.html") + + +@app.route('/time') +def get_time(): + return utils.get_time() + + +@app.route('/data') +def get_data(): + df = pd.read_csv('./static/csv/barrage_clustered.csv') + data = df.to_dict(orient='records') + return jsonify(data) + + +@app.route('/wordcloud') +def wordcloud_data(): + file_name = './static/csv/barrage.csv' + with open(file_name, encoding='utf-8') as f: + txt = f.read() + + txt_list = jieba.lcut(txt) + + stopwords_list = set(stopwords.words('chinese')) + stopwords_target = ['都', '不', '好', '哈哈哈', '说', '还', '很', '没'] + for i in stopwords_target: + stopwords_list.add(i) + + word_freq = {} + for word in txt_list: + if re.match(r'^[\u4e00-\u9fa5]+$', word) and word not in stopwords_list: + if word in word_freq: + word_freq[word] += 1 + else: + word_freq[word] = 1 + word_freq_list = [{'name': word, 'value': freq} for word, freq in word_freq.items()] + return jsonify(word_freq_list) + + +@app.route('/world_comment') +def get_world_comment(): + file_path = './static/csv/world_comment.csv' + df = pd.read_csv(file_path) + data = [] + grouped = df.groupby('url') + times = ['20s', '30s', '40s', '50s', '60s'] + for url, group in grouped: + items = group['content'].tolist() + time = random.choice(times) + data.append({'time': time, 'items': items}) + return jsonify(data) + + +@app.route('/barrage_sentiment') +def get_barrage_comment(): + df = pd.read_csv('./static/csv/barrage_sentiment.csv') + data = df.to_dict(orient='records') + return jsonify(data) + + +@app.route('/barrage_count') +def count_rows(): + df = pd.read_csv('./static/csv/barrage.csv') + row_count = len(df['barrage']) + return jsonify({'row_count': row_count}) + + +@app.route('/average_sentiment') +def average_sentiment(): + df = pd.read_csv('./static/csv/barrage_sentiment.csv') + avg_sentiment = df['sentiment'].mean() + return jsonify({'average_sentiment': avg_sentiment}) + + +@app.route('/count_keywords') +def count_keywords(): + df = pd.read_csv('./static/csv/barrage.csv') + keyword_count = df['barrage'].str.contains('AI技术|人工智能|科技|智能').sum() + keyword_count = int(keyword_count) + return jsonify({'keyword_count': keyword_count}) + + +if __name__ == '__main__': + app.run() diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..d3264e8 --- /dev/null +++ b/utils.py @@ -0,0 +1,8 @@ +import time + + +def get_time(): + time_str = time.strftime("%Y{}%m{}%d{} %X") + return time_str.format("年", "月", "日") + + diff --git a/wordcloud.png b/wordcloud.png new file mode 100644 index 0000000..40b4788 Binary files /dev/null and b/wordcloud.png differ