diff --git a/iat_ws_python3.py b/0604/realtime_transcriber.py similarity index 95% rename from iat_ws_python3.py rename to 0604/realtime_transcriber.py index 33a15cb..6c3b08b 100644 --- a/iat_ws_python3.py +++ b/0604/realtime_transcriber.py @@ -1,7 +1,4 @@ - -# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # import websocket -import datetime import hashlib import base64 import hmac @@ -15,6 +12,9 @@ from time import mktime import _thread as thread import pyaudio +class RealtimeTranscriber: + pass + STATUS_FIRST_FRAME = 0 # 第一帧的标识 STATUS_CONTINUE_FRAME = 1 # 中间帧标识 STATUS_LAST_FRAME = 2 # 最后一帧的标识 @@ -83,7 +83,7 @@ def on_message(ws, message): for i in data: for w in i["cw"]: result += w["w"] - if result != "。" or result == ".>" or result == ' .。' or result == ' 。': + if result == "。" or result == ".。" or result == ' .。' or result == ' 。': pass else: t.insert(END,result) @@ -184,5 +184,7 @@ root =Tk() t=Text(root) t.pack() -tkinter.Button(root,text='go',command=lambda :thread_it(run,)).pack() -root.mainloop() \ No newline at end of file +tkinter.Button(root,text='识别',command=lambda :thread_it(run,)).pack() +root.mainloop() + + diff --git a/yuyin.py b/0604/yuyin.py similarity index 73% rename from yuyin.py rename to 0604/yuyin.py index e6c801d..e6a4caf 100644 --- a/yuyin.py +++ b/0604/yuyin.py @@ -4,6 +4,7 @@ import speech_recognition as sr from tkinter import filedialog import threading + engine = pyttsx3.init() def center_window(window, width=200, height=150): @@ -100,10 +101,9 @@ def recognize_audio_from_file(): file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav")]) if file_path: r = sr.Recognizer() - r.language = 'zh-CN' with sr.AudioFile(file_path) as source: audio_data = r.record(source) - text = r.recognize_sphinx(audio_data) + text = r.recognize_sphinx(audio_data,language='zh-CN') window.update_idletasks() # 更新窗口以避免同步问题 text_entry.delete(1.0, tk.END) # 清空文本框 text_entry.insert(tk.END, f"{text}") @@ -117,37 +117,6 @@ def recognize_audio_from_file(): -def recognize_audio_realtime(parent): - window = tk.Toplevel(parent) - window.title("实时转写") - - center_window(window) - # 初始化识别器 - r = sr.Recognizer() - # 使用麦克风作为源 - mic = sr.Microphone() - - # 调整能量阈值和监听时间以适应不同环境 - with mic as source: - r.adjust_for_ambient_noise(source) - print("请开始说话...") - - # 这里简化处理,实际实时转写可能需要循环监听并处理数据块 - audio = r.listen(source, timeout=5) # 例如,监听5秒 - - try: - # 尝试识别 - text = r.recognize_google(audio, language='zh-CN') - text_entry.delete(1.0, tk.END) # 清空文本框 - text_entry.insert(tk.END, text) # 插入识别的文本 - result_label.config(text="实时语音已转换为文本") - except sr.WaitTimeoutError: - result_label.config(text="未检测到语音输入") - except sr.UnknownValueError: - result_label.config(text="无法识别音频中的内容") - except sr.RequestError as e: - result_label.config(text=f"识别服务请求错误; {e}") - def main(): window = tk.Tk() window.title("选择界面") @@ -162,13 +131,11 @@ def main(): audio_recognition_button = tk.Button(window, text="音频文件识别", command=lambda: create_audio_recognition_window(window)) audio_recognition_button.pack(padx=10,pady=10) - # 创建并配置跳转到实时语音识别窗口的按钮 - audio_button = tk.Button(window, text="实时转写",command=lambda: recognize_audio_realtime(window)) - audio_button.pack(padx=10,pady=10) + center_window(window) window.mainloop() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/1.py b/1.py deleted file mode 100644 index 8e36038..0000000 --- a/1.py +++ /dev/null @@ -1,92 +0,0 @@ -import tkinter as tk -from tkinter import ttk -import speech_recognition as sr -import pyttsx3 - -# 初始化文本转语音引擎 -engine = pyttsx3.init() - - -def text_to_speech(): - text = text_input.get("1.0", "end-1c") - engine.say(text) - engine.runAndWait() - - -def speech_to_text(): - # 初始化识别器 - r = sr.Recognizer() - - with sr.Microphone() as source: - print("请说话:") - audio = r.listen(source) - - try: - text = r.recognize_google(audio, language='zh-CN') - text_output.delete("1.0", tk.END) - text_output.insert(tk.END, text) - except sr.UnknownValueError: - print("Google Speech Recognition无法理解音频") - except sr.RequestError as e: - print(f"无法从Google Speech Recognition服务请求结果; {e}") - - -def real_time_transcription(): - # 这里简化处理,实际应用中需要一个循环监听并实时转写 - - print("实时转写功能尚未实现,请手动调用speech_to_text进行单次转写") - - -def switch_frame(frame): - frame.tkraise() - - -# 创建主窗口 -root = tk.Tk() -root.title("语音识别与转换系统") - -# 创建一个容器来放置不同的Frame -container = ttk.Frame(root) -container.pack(side="top", fill="both", expand=True) - -# 创建不同的Frame对应不同功能 -frame_text_to_speech = ttk.Frame(container) -frame_speech_to_text = ttk.Frame(container) -frame_real_time = ttk.Frame(container) - -# 在每个Frame中添加对应的功能组件 -# 文本转语音Frame -ttk.Label(frame_text_to_speech, text="请输入要转换为语音的文本:").pack() -text_input = tk.Text(frame_text_to_speech, height=20) -text_input.pack() -ttk.Button(frame_text_to_speech, text="转换", command=text_to_speech).pack() - -# 语音转文本Frame -ttk.Label(frame_speech_to_text, text="请点击下方按钮开始语音识别:").pack() -ttk.Button(frame_speech_to_text, text="开始识别", command=speech_to_text).pack() -text_output = tk.Text(frame_speech_to_text, height=20) -text_output.pack() - -# 实时转写Frame(简化示意) -ttk.Label(frame_real_time, text="实时转写功能界面(待实现)").pack() -ttk.Button(frame_real_time, text="返回", command=lambda: switch_frame(container)).pack() - -# 将所有Frame添加到容器中 -for frame in (frame_text_to_speech, frame_speech_to_text, frame_real_time): - frame.grid(row=0, column=0, sticky="nsew") - -switch_frame(frame_text_to_speech) # 默认显示文本转语音界面 - -# 创建顶部菜单进行功能切换 -menu = tk.Menu(root) - -root.config(menu=menu) - -sub_menu = tk.Menu(menu, tearoff=0) - -menu.add_cascade(label="功能选择", menu=sub_menu) -sub_menu.add_command(label="文本转语音", command=lambda: switch_frame(frame_text_to_speech)) -sub_menu.add_command(label="语音转文本", command=lambda: switch_frame(frame_speech_to_text)) -sub_menu.add_command(label="实时转写", command=lambda: switch_frame(frame_real_time)) - -root.mainloop() \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 7abb23b..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# yuyin - diff --git a/语音识别流程图.png b/语音识别流程图.png deleted file mode 100644 index 1610651..0000000 Binary files a/语音识别流程图.png and /dev/null differ diff --git a/转语音.py b/转语音.py deleted file mode 100644 index f8b5c9f..0000000 --- a/转语音.py +++ /dev/null @@ -1,45 +0,0 @@ -import tkinter as tk -from tkinter import ttk -import pyttsx3 - - -def text_to_speech(): - engine = pyttsx3.init() - text = text_entry.get("1.0", "end-1c") # 获取文本框中的文本 - engine.say(text) - speed = speed_slider.get() - engine.setProperty('rate', speed) - engine.runAndWait() - - -def main(): - global text_entry - - # 创建主窗口 - app = tk.Tk() - app.title("文本转语音") - - - - speed_slider = ttk.Scale(app, from_=50, to=200, orient=tk.HORIZONTAL) - speed_slider.set(120) # 默认语速 - speed_slider.pack() - - # 创建一个标签,用于提示输入文本 - label = tk.Label(app, text="请输入文字:") - label.pack(pady=10) - - # 创建一个文本框,用于用户输入 - text_entry = tk.Text(app, height=10) - text_entry.pack() - - # 创建一个按钮,点击时调用text_to_speech函数 - convert_button = tk.Button(app, text="识别", command=text_to_speech) - convert_button.pack(pady=10) - - # 运行主循环 - app.mainloop() - - -if __name__ == "__main__": - main() \ No newline at end of file