You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
2.2 KiB

"""
profiler.py - 性能分析
"""
import time
import cProfile
import pstats
import io
from main import search_videos, get_cid, get_danmaku, parse_danmaku
from analyse import filter_danmakus, count_danmakus, get_top_danmakus
from visualization import (filter_relevant_danmakus, preprocess_danmakus,
extract_words, remove_stopwords,
calculate_word_frequency, generate_wordcloud)
def run_performance_analysis():
"""运行性能分析并记录结果。"""
profiler = cProfile.Profile()
profiler.enable() # 开始性能分析
# 固定参数
keyword = "python"
max_results = 5
stopwords = ['', '', '', '', '', '', '', '']
# 获取视频列表
video_list = search_videos(keyword, max_results)
for video_id in video_list:
cid = get_cid(video_id)
danmakus = get_danmaku(video_id, cid)
xml_content = parse_danmaku(danmakus) # 假设 get_danmaku 返回 XML
time.sleep(2)
# 处理弹幕
filtered_danmakus = filter_danmakus(xml_content, keyword)
counted_danmakus = count_danmakus(filtered_danmakus)
get_top_danmakus(counted_danmakus)
# 进一步分析
relevant_danmakus = filter_relevant_danmakus(xml_content, keyword)
preprocessed_danmakus = preprocess_danmakus(relevant_danmakus)
words = extract_words(preprocessed_danmakus)
words_no_stopwords = remove_stopwords(words, stopwords)
word_freq = calculate_word_frequency(words_no_stopwords)
generate_wordcloud(word_freq)
profiler.disable() # 停止性能分析
# 将性能分析结果输出到一个字符串流中
output_stream = io.StringIO()
sortby = pstats.SortKey.CUMULATIVE # 按照累计时间排序
stats = pstats.Stats(profiler, stream=output_stream).sort_stats(sortby)
stats.print_stats() # 打印结果
# 输出性能分析结果
with open("profiler.txt", "w", encoding='utf-8') as output_file:
output_file.write(output_stream.getvalue())
# 保存性能分析数据以便使用 SnakeViz 可视化
profiler.dump_stats("output.prof")
if __name__ == "__main__":
run_performance_analysis()