测试文件

main
QMZ 2 months ago
parent de37993285
commit 5eb71c03ff

Binary file not shown.

@ -0,0 +1,20 @@
import requests
from bs4 import BeautifulSoup
import re
import time
import jieba
import wordcloud
import matplotlib.pyplot as plt
import pandas as pd
from pandas import ExcelWriter
from collections import Counter
def chuli(etxt):
#danmustr=''.join(i for i in etxt) #将所有弹幕拼接在一起
#words=list(jieba.cut(danmustr)) ###利用jieba库将弹幕按词进行切分
words=[i for i in etxt if len(i)>1] ###挑出长度大于1的词语为去除诸如啊等字符
wc=wordcloud.WordCloud(background_color='white',height=1000,width=1000,font_path='simsun.ttc')#利用wordcloud库定义词云图片的信息
wc.generate(' '.join(words)) ##生成图片
print(wc)
plt.imshow(wc)
plt.show()

@ -0,0 +1,60 @@
import requests
from bs4 import BeautifulSoup
import re
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0' }
# 获取搜索结果页面的内容
def get_search_page(search_url):
response = requests.get(search_url, headers=headers)
response.raise_for_status() # 确保请求成功
return response.text
# 提取页面中所有视频的链接
def extract_video_links(page_content):
soup = BeautifulSoup(page_content, 'html.parser')
video_links = []
for a_tag in soup.select(".video-list.row div.bili-video-card > div > a"):
link = a_tag.get('href')
video_links.append(link)
return video_links
# 提取视频的BV号
def extract__BV(video_url):
video_id_match = re.search(r'/video/([^/]+)', video_url)
if video_id_match:
return video_id_match.group(1)
return None
def get_cid_from_bv(bv_ids):
cids=[]
for bv_id in bv_ids:
# 视频详情 API 地址
video_url = f'https://api.bilibili.com/x/web-interface/view?bvid={bv_id}'
# 发送请求
response = requests.get(video_url, headers=headers)
response.raise_for_status()
data = response.json()
# 提取 cid
if data.get('code') == 0:
cid = data.get('data', {}).get('cid')
cids.append(cid)
return cids
# 主函数
def main(search_url):
page_content = get_search_page(search_url)
video_links = extract_video_links(page_content)
bvs = []
for link in video_links:
bv = extract__BV(link)
if bv:
bvs.append(bv)
cids = []
cids = get_cid_from_bv(bvs)
return cids
search_url = 'https://search.bilibili.com/all?keyword=2024巴黎奥运会'
aa = main(search_url)
print(aa)

@ -0,0 +1,32 @@
import requests
from bs4 import BeautifulSoup
import re
import time
import jieba
import wordcloud
import matplotlib.pyplot as plt
import pandas as pd
from pandas import ExcelWriter
from collections import Counter
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0',
}
keywords = [
'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动化',
'算法', '数据科学' '自然语言处理', '计算机视觉', '人工智能技术', 'AI技术', 'AI应用', 'AI模型',
'大数据', '预测分析', '机器视觉', '自动驾驶',
'智能推荐', '计算机科学', '人工智能应用',
'数据分析','智能化', '情感计算','ai'
]
def sort(txt, keywords):
comment_counter = Counter()
for line in txt:
line.strip()
if any(word in keywords for word in jieba.cut(line)):
comment_counter[line] += 1
return comment_counter

File diff suppressed because one or more lines are too long

@ -0,0 +1,51 @@
import requests
from bs4 import BeautifulSoup
import re
import getcidfrombv as cid
import unittest
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0' }
class TestFunctions(unittest.TestCase):
def setUp(self):
# 在每个测试之前运行
self.content_list = [
'www.bilibili.com/video/BV1mE4m1R71o',
'www.bilibili.com/video/BV1wH4y1F7KD',
'www.bilibili.com/video/BV1M142187NQ',
'www.bilibili.com/video/BV1im42137XE',
'www.bilibili.com/video/BV1wS421d7Mj',
'www.bilibili.com/video/BV1FE4m1d7tS',
'www.bilibili.com/video/BV1PU411S7HC',
'www.bilibili.com/video/BV1fx4y1x7eW',
'www.bilibili.com/video/BV1rw4m1r7nF',
'www.bilibili.com/video/BV19E4m1R7Kf'
]
def test_getcid(self):
# 测试 sort 函数
expected_output = [
1646607366,
1663877514,
1648215637,
1627769596,
1628417603,
1640306641,
1627951394,
1650192254,
1649785565,
1629439509
]
actual_output=[]
bvs = []
for link in self.content_list:
bv = cid.extract__BV(link)
bvs.append(bv)
actual_output = cid.get_cid_from_bv(bvs)
self.assertEqual(expected_output, actual_output)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,50 @@
import unittest
from io import StringIO
from unittest.mock import patch
import jieba
import wordcloud
import matplotlib.pyplot as plt
from collections import Counter
import handle
# 单元测试类
class TestFunctions(unittest.TestCase):
def setUp(self):
# 在每个测试之前运行
self.keywords = [
'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动',
'算法', '数据科学', '自然语言', '计算机', '人工智能技术',
'大数据', '预测分析', '机器视觉',
'智能', '计算机', '人工智能应用',
'数据分析', '情感计算', 'ai'
]
self.sample_comments = [
'人工智能是未来的趋势。',
'机器学习算法在自动化中扮演了重要角色。',
'深度学习和神经网络的结合使得AI技术更加强大。',
'我喜欢学习计算机视觉和数据科学。',
'自动驾驶技术还需要进一步发展。',
'智能推荐系统可以大大提升用户体验。',
'情感计算是自然语言处理的一个重要方向。',
'这只是一些无关的评论。'
''
]
def test_sort(self):
# 测试 sort 函数
expected_output = Counter({
'人工智能是未来的趋势。': 1,
'机器学习算法在自动化中扮演了重要角色。': 1,
'深度学习和神经网络的结合使得AI技术更加强大。': 1,
'我喜欢学习计算机视觉和数据科学。': 1,
'自动驾驶技术还需要进一步发展。': 1,
'智能推荐系统可以大大提升用户体验。': 1,
'情感计算是自然语言处理的一个重要方向。': 1
})
actual_output = handle.sort(self.sample_comments, self.keywords)
self.assertEqual(expected_output, actual_output)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,39 @@
import unittest
from unittest.mock import patch, MagicMock
from cloud import chuli # 替换为实际模块名
class TestChuliFunction(unittest.TestCase):
@patch('wordcloud.WordCloud')
@patch('matplotlib.pyplot.imshow')
@patch('matplotlib.pyplot.show')
def test_chuli(self, mock_plt_show, mock_plt_imshow, MockWordCloud):
# 创建一个模拟的 WordCloud 对象
mock_wc = MockWordCloud.return_value
mock_wc.generate.return_value = None
# 输入测试数据
test_input = ['测试', '词云', '功能', '正常']
# 调用 chuli 函数
chuli(test_input)
# 验证 WordCloud 的构造函数是否被正确调用
MockWordCloud.assert_called_once_with(
background_color='white',
height=1000,
width=1000,
font_path='simsun.ttc'
)
# 验证 WordCloud 的 generate 方法是否被正确调用
mock_wc.generate.assert_called_once_with('测试 词云 功能 正常')
# 验证 plt.imshow 是否被调用
mock_plt_imshow.assert_called_once_with(mock_wc)
# 验证 plt.show 是否被调用
mock_plt_show.assert_called_once()
if __name__ == '__main__':
unittest.main()
Loading…
Cancel
Save