developed_code文件夹->用 cProfile 找出代码中的性能瓶颈并进行改进的版本 output文件夹->输出的内容main
parent
5baca231b8
commit
64d179c027
@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
@ -0,0 +1,5 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
</profile>
|
||||
</component>
|
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,49 @@
|
||||
"""
|
||||
生成基于弹幕数据的词云图
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import wordcloud
|
||||
from matplotlib.image import imread
|
||||
import jieba
|
||||
|
||||
def blue_color_func(_random_state=None, **_kwargs):
|
||||
"""
|
||||
Generates a color in the HSL format with a random lightness value.
|
||||
|
||||
Parameters:
|
||||
_random_state (None or int): Used to seed the random number generator.
|
||||
**_kwargs: Additional arguments (ignored in this function).
|
||||
|
||||
Returns:
|
||||
str: A string representing the color in HSL format.
|
||||
"""
|
||||
return f"hsl(210, 100%, {np.random.randint(50, 90)}%)"
|
||||
|
||||
def wordcloud_generation(danmu_data):
|
||||
"""生成词云图并保存"""
|
||||
dm_list = danmu_data['danmu'].dropna().astype(str).tolist()
|
||||
dm_string = ' '.join(dm_list)
|
||||
dmreal_string = ' '.join(jieba.lcut(dm_string))
|
||||
img = imread("E:/Crawler/output/OIP.jpg")
|
||||
my_stopwords = {'我', '你', '他', '这', '个', '是', '的', '了', '啊', '吗', '吧', '就', '都', '不', '也'}
|
||||
wc = wordcloud.WordCloud(
|
||||
stopwords=my_stopwords,
|
||||
width=1920,
|
||||
height=1200,
|
||||
background_color='white',
|
||||
font_path='msyhl.ttc',
|
||||
mask=img,
|
||||
max_words=100,
|
||||
color_func=blue_color_func,
|
||||
).generate(dmreal_string)
|
||||
wc.to_file('E:/Crawler/output/danmu_dwordcloud.png')
|
||||
|
||||
def main():
|
||||
"""加载数据并生成词云"""
|
||||
dm = pd.read_excel('E:/Crawler/output/Top8_Danmu.xlsx', sheet_name='Sheet1')
|
||||
wordcloud_generation(dm)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,22 @@
|
||||
"""
|
||||
定义通用的HTTP请求头
|
||||
"""
|
||||
|
||||
HEADERS = {
|
||||
'authority': 'api.bilibili.com',
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'cookie': '...',
|
||||
'origin': 'https://www.bilibili.com',
|
||||
'referer': 'https://space.bilibili.com/1760559884?spm_id_from=333.788.0.0',
|
||||
'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,29 @@
|
||||
import cProfile
|
||||
import pstats
|
||||
import a_wordcloud
|
||||
import bvid
|
||||
import to_allexcel
|
||||
import to_danmu
|
||||
import to_excel
|
||||
|
||||
def run_all():
|
||||
a_wordcloud.main()
|
||||
bvid.main()
|
||||
to_allexcel.main()
|
||||
to_danmu.main()
|
||||
to_excel.main()
|
||||
|
||||
if __name__ == '__main__':
|
||||
profiler = cProfile.Profile()
|
||||
profiler.enable()
|
||||
|
||||
run_all()
|
||||
|
||||
profiler.disable()
|
||||
profiler.dump_stats('performance_profile.prof')
|
||||
|
||||
# 分析结果
|
||||
with open('performance_report.txt', 'w') as f:
|
||||
ps = pstats.Stats(profiler, stream=f)
|
||||
ps.sort_stats('cumulative')
|
||||
ps.print_stats()
|
@ -0,0 +1,48 @@
|
||||
"""
|
||||
生成基于弹幕数据的词云图
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import wordcloud
|
||||
from matplotlib.image import imread
|
||||
import jieba
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
def blue_color_func(_random_state=None, **_kwargs):
|
||||
"""Generates a color in the HSL format with a random lightness value."""
|
||||
return f"hsl(210, 100%, {np.random.randint(50, 90)}%)"
|
||||
|
||||
def process_text(danmu_list):
|
||||
"""并行处理弹幕文本,进行分词等操作"""
|
||||
with ThreadPoolExecutor() as executor:
|
||||
dm_string = ' '.join(executor.map(lambda text: ' '.join(jieba.lcut(text)), danmu_list))
|
||||
return dm_string
|
||||
|
||||
def wordcloud_generation(danmu_data, stopwords, output_path):
|
||||
"""生成词云图并保存"""
|
||||
dm_list = danmu_data['danmu'].dropna().astype(str).tolist()
|
||||
dmreal_string = process_text(dm_list)
|
||||
img = imread("/output/OIP.jpg")
|
||||
|
||||
wc = wordcloud.WordCloud(
|
||||
stopwords=stopwords,
|
||||
width=1920,
|
||||
height=1200,
|
||||
background_color='white',
|
||||
font_path='msyhl.ttc',
|
||||
mask=img,
|
||||
max_words=100,
|
||||
color_func=blue_color_func,
|
||||
).generate(dmreal_string)
|
||||
wc.to_file(output_path)
|
||||
|
||||
def main():
|
||||
"""加载数据并生成词云"""
|
||||
dm = pd.read_excel('E:/Crawler/output/Top8_Danmu.xlsx', sheet_name='Sheet1')
|
||||
stopwords = {'我', '你', '他', '这', '个', '是', '的', '了', '啊', '吗', '吧', '就', '都', '不', '也'}
|
||||
wordcloud_generation(dm, stopwords, '/output/danmu_dwordcloud.png')
|
||||
print("词云图生成完成!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,22 @@
|
||||
"""
|
||||
定义通用的HTTP请求头
|
||||
"""
|
||||
|
||||
HEADERS = {
|
||||
'authority': 'api.bilibili.com',
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'cookie': '...',
|
||||
'origin': 'https://www.bilibili.com',
|
||||
'referer': 'https://space.bilibili.com/1760559884?spm_id_from=333.788.0.0',
|
||||
'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,29 @@
|
||||
import cProfile
|
||||
import pstats
|
||||
import a_wordcloud
|
||||
import bvid
|
||||
import to_allexcel
|
||||
import to_danmu
|
||||
import to_excel
|
||||
|
||||
def run_all():
|
||||
bvid.main()
|
||||
to_danmu.main()
|
||||
to_allexcel.main()
|
||||
to_excel.main()
|
||||
a_wordcloud.main()
|
||||
|
||||
if __name__ == '__main__':
|
||||
profiler = cProfile.Profile()
|
||||
profiler.enable()
|
||||
|
||||
run_all()
|
||||
|
||||
profiler.disable()
|
||||
profiler.dump_stats('performance_profile.prof')
|
||||
|
||||
# 分析结果
|
||||
with open('performance_report.txt', 'w') as f:
|
||||
ps = pstats.Stats(profiler, stream=f)
|
||||
ps.sort_stats('cumulative')
|
||||
ps.print_stats()
|
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
Binary file not shown.
After Width: | Height: | Size: 22 KiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
After Width: | Height: | Size: 13 KiB |
Binary file not shown.
Loading…
Reference in new issue