developed_code文件夹->用 cProfile 找出代码中的性能瓶颈并进行改进的版本 output文件夹->输出的内容main
parent
5baca231b8
commit
64d179c027
@ -0,0 +1,3 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
@ -0,0 +1,5 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
</profile>
|
||||||
|
</component>
|
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
@ -0,0 +1,49 @@
|
|||||||
|
"""
|
||||||
|
生成基于弹幕数据的词云图
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import wordcloud
|
||||||
|
from matplotlib.image import imread
|
||||||
|
import jieba
|
||||||
|
|
||||||
|
def blue_color_func(_random_state=None, **_kwargs):
|
||||||
|
"""
|
||||||
|
Generates a color in the HSL format with a random lightness value.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
_random_state (None or int): Used to seed the random number generator.
|
||||||
|
**_kwargs: Additional arguments (ignored in this function).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A string representing the color in HSL format.
|
||||||
|
"""
|
||||||
|
return f"hsl(210, 100%, {np.random.randint(50, 90)}%)"
|
||||||
|
|
||||||
|
def wordcloud_generation(danmu_data):
|
||||||
|
"""生成词云图并保存"""
|
||||||
|
dm_list = danmu_data['danmu'].dropna().astype(str).tolist()
|
||||||
|
dm_string = ' '.join(dm_list)
|
||||||
|
dmreal_string = ' '.join(jieba.lcut(dm_string))
|
||||||
|
img = imread("E:/Crawler/output/OIP.jpg")
|
||||||
|
my_stopwords = {'我', '你', '他', '这', '个', '是', '的', '了', '啊', '吗', '吧', '就', '都', '不', '也'}
|
||||||
|
wc = wordcloud.WordCloud(
|
||||||
|
stopwords=my_stopwords,
|
||||||
|
width=1920,
|
||||||
|
height=1200,
|
||||||
|
background_color='white',
|
||||||
|
font_path='msyhl.ttc',
|
||||||
|
mask=img,
|
||||||
|
max_words=100,
|
||||||
|
color_func=blue_color_func,
|
||||||
|
).generate(dmreal_string)
|
||||||
|
wc.to_file('E:/Crawler/output/danmu_dwordcloud.png')
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""加载数据并生成词云"""
|
||||||
|
dm = pd.read_excel('E:/Crawler/output/Top8_Danmu.xlsx', sheet_name='Sheet1')
|
||||||
|
wordcloud_generation(dm)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,22 @@
|
|||||||
|
"""
|
||||||
|
定义通用的HTTP请求头
|
||||||
|
"""
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
'authority': 'api.bilibili.com',
|
||||||
|
'accept': 'application/json, text/plain, */*',
|
||||||
|
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||||
|
'cookie': '...',
|
||||||
|
'origin': 'https://www.bilibili.com',
|
||||||
|
'referer': 'https://space.bilibili.com/1760559884?spm_id_from=333.788.0.0',
|
||||||
|
'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
|
||||||
|
'sec-ch-ua-mobile': '?0',
|
||||||
|
'sec-ch-ua-platform': '"Windows"',
|
||||||
|
'sec-fetch-dest': 'document',
|
||||||
|
'sec-fetch-mode': 'navigate',
|
||||||
|
'sec-fetch-site': 'same-origin',
|
||||||
|
'upgrade-insecure-requests': '1',
|
||||||
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||||
|
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||||
|
'Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'
|
||||||
|
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,29 @@
|
|||||||
|
import cProfile
|
||||||
|
import pstats
|
||||||
|
import a_wordcloud
|
||||||
|
import bvid
|
||||||
|
import to_allexcel
|
||||||
|
import to_danmu
|
||||||
|
import to_excel
|
||||||
|
|
||||||
|
def run_all():
|
||||||
|
a_wordcloud.main()
|
||||||
|
bvid.main()
|
||||||
|
to_allexcel.main()
|
||||||
|
to_danmu.main()
|
||||||
|
to_excel.main()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
profiler = cProfile.Profile()
|
||||||
|
profiler.enable()
|
||||||
|
|
||||||
|
run_all()
|
||||||
|
|
||||||
|
profiler.disable()
|
||||||
|
profiler.dump_stats('performance_profile.prof')
|
||||||
|
|
||||||
|
# 分析结果
|
||||||
|
with open('performance_report.txt', 'w') as f:
|
||||||
|
ps = pstats.Stats(profiler, stream=f)
|
||||||
|
ps.sort_stats('cumulative')
|
||||||
|
ps.print_stats()
|
@ -0,0 +1,48 @@
|
|||||||
|
"""
|
||||||
|
生成基于弹幕数据的词云图
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import wordcloud
|
||||||
|
from matplotlib.image import imread
|
||||||
|
import jieba
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
def blue_color_func(_random_state=None, **_kwargs):
|
||||||
|
"""Generates a color in the HSL format with a random lightness value."""
|
||||||
|
return f"hsl(210, 100%, {np.random.randint(50, 90)}%)"
|
||||||
|
|
||||||
|
def process_text(danmu_list):
|
||||||
|
"""并行处理弹幕文本,进行分词等操作"""
|
||||||
|
with ThreadPoolExecutor() as executor:
|
||||||
|
dm_string = ' '.join(executor.map(lambda text: ' '.join(jieba.lcut(text)), danmu_list))
|
||||||
|
return dm_string
|
||||||
|
|
||||||
|
def wordcloud_generation(danmu_data, stopwords, output_path):
|
||||||
|
"""生成词云图并保存"""
|
||||||
|
dm_list = danmu_data['danmu'].dropna().astype(str).tolist()
|
||||||
|
dmreal_string = process_text(dm_list)
|
||||||
|
img = imread("/output/OIP.jpg")
|
||||||
|
|
||||||
|
wc = wordcloud.WordCloud(
|
||||||
|
stopwords=stopwords,
|
||||||
|
width=1920,
|
||||||
|
height=1200,
|
||||||
|
background_color='white',
|
||||||
|
font_path='msyhl.ttc',
|
||||||
|
mask=img,
|
||||||
|
max_words=100,
|
||||||
|
color_func=blue_color_func,
|
||||||
|
).generate(dmreal_string)
|
||||||
|
wc.to_file(output_path)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""加载数据并生成词云"""
|
||||||
|
dm = pd.read_excel('E:/Crawler/output/Top8_Danmu.xlsx', sheet_name='Sheet1')
|
||||||
|
stopwords = {'我', '你', '他', '这', '个', '是', '的', '了', '啊', '吗', '吧', '就', '都', '不', '也'}
|
||||||
|
wordcloud_generation(dm, stopwords, '/output/danmu_dwordcloud.png')
|
||||||
|
print("词云图生成完成!")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,22 @@
|
|||||||
|
"""
|
||||||
|
定义通用的HTTP请求头
|
||||||
|
"""
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
'authority': 'api.bilibili.com',
|
||||||
|
'accept': 'application/json, text/plain, */*',
|
||||||
|
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||||
|
'cookie': '...',
|
||||||
|
'origin': 'https://www.bilibili.com',
|
||||||
|
'referer': 'https://space.bilibili.com/1760559884?spm_id_from=333.788.0.0',
|
||||||
|
'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
|
||||||
|
'sec-ch-ua-mobile': '?0',
|
||||||
|
'sec-ch-ua-platform': '"Windows"',
|
||||||
|
'sec-fetch-dest': 'document',
|
||||||
|
'sec-fetch-mode': 'navigate',
|
||||||
|
'sec-fetch-site': 'same-origin',
|
||||||
|
'upgrade-insecure-requests': '1',
|
||||||
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||||
|
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||||
|
'Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'
|
||||||
|
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,29 @@
|
|||||||
|
import cProfile
|
||||||
|
import pstats
|
||||||
|
import a_wordcloud
|
||||||
|
import bvid
|
||||||
|
import to_allexcel
|
||||||
|
import to_danmu
|
||||||
|
import to_excel
|
||||||
|
|
||||||
|
def run_all():
|
||||||
|
bvid.main()
|
||||||
|
to_danmu.main()
|
||||||
|
to_allexcel.main()
|
||||||
|
to_excel.main()
|
||||||
|
a_wordcloud.main()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
profiler = cProfile.Profile()
|
||||||
|
profiler.enable()
|
||||||
|
|
||||||
|
run_all()
|
||||||
|
|
||||||
|
profiler.disable()
|
||||||
|
profiler.dump_stats('performance_profile.prof')
|
||||||
|
|
||||||
|
# 分析结果
|
||||||
|
with open('performance_report.txt', 'w') as f:
|
||||||
|
ps = pstats.Stats(profiler, stream=f)
|
||||||
|
ps.sort_stats('cumulative')
|
||||||
|
ps.print_stats()
|
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
Binary file not shown.
After Width: | Height: | Size: 22 KiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
After Width: | Height: | Size: 13 KiB |
Binary file not shown.
Loading…
Reference in new issue