diff --git a/profiler.py b/profiler.py new file mode 100644 index 0000000..91fd2de --- /dev/null +++ b/profiler.py @@ -0,0 +1,61 @@ +""" +profiler.py - 性能分析 +""" +import time +import cProfile +import pstats +import io +from main import search_videos, get_cid, get_danmaku, parse_danmaku +from analyse import filter_danmakus, count_danmakus, get_top_danmakus +from visualization import (filter_relevant_danmakus, preprocess_danmakus, + extract_words, remove_stopwords, + calculate_word_frequency, generate_wordcloud) + +def run_performance_analysis(): + """运行性能分析并记录结果。""" + profiler = cProfile.Profile() + profiler.enable() # 开始性能分析 + + # 固定参数 + keyword = "python" + max_results = 5 + stopwords = ['的', '是', '在', '有', '和', '这', '了', '与'] + + # 获取视频列表 + video_list = search_videos(keyword, max_results) + for video_id in video_list: + cid = get_cid(video_id) + danmakus = get_danmaku(video_id, cid) + xml_content = parse_danmaku(danmakus) # 假设 get_danmaku 返回 XML + time.sleep(2) + + # 处理弹幕 + filtered_danmakus = filter_danmakus(xml_content, keyword) + counted_danmakus = count_danmakus(filtered_danmakus) + get_top_danmakus(counted_danmakus) + + # 进一步分析 + relevant_danmakus = filter_relevant_danmakus(xml_content, keyword) + preprocessed_danmakus = preprocess_danmakus(relevant_danmakus) + words = extract_words(preprocessed_danmakus) + words_no_stopwords = remove_stopwords(words, stopwords) + word_freq = calculate_word_frequency(words_no_stopwords) + generate_wordcloud(word_freq) + + profiler.disable() # 停止性能分析 + + # 将性能分析结果输出到一个字符串流中 + output_stream = io.StringIO() + sortby = pstats.SortKey.CUMULATIVE # 按照累计时间排序 + stats = pstats.Stats(profiler, stream=output_stream).sort_stats(sortby) + stats.print_stats() # 打印结果 + + # 输出性能分析结果 + with open("profiler.txt", "w", encoding='utf-8') as output_file: + output_file.write(output_stream.getvalue()) + + # 保存性能分析数据以便使用 SnakeViz 可视化 + profiler.dump_stats("output.prof") + +if __name__ == "__main__": + run_performance_analysis()