0604

1 year ago · a5f7c17d53
parent 8b6068dd52
commit a5f7c17d53
6 changed files with 12 additions and 182 deletions
--- a/0604/realtime_transcriber.py
+++ b/0604/realtime_transcriber.py
@ -1,7 +1,4 @@
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 import websocket
 import datetime
 import hashlib
 import base64
 import hmac
@ -15,6 +12,9 @@ from time import mktime
 import _thread as thread
 import pyaudio
 class RealtimeTranscriber:
    pass
 STATUS_FIRST_FRAME = 0  # 第一帧的标识
 STATUS_CONTINUE_FRAME = 1  # 中间帧标识
 STATUS_LAST_FRAME = 2  # 最后一帧的标识
@ -83,7 +83,7 @@ def on_message(ws, message):
            for i in data:
                for w in i["cw"]:
                    result += w["w"]
-            if result != "。" or result == ".>" or result == ' .。' or result == ' 。':
+            if result == "。" or result == ".。" or result == ' .。' or result == ' 。':
                pass
            else:
                t.insert(END,result)
@ -184,5 +184,7 @@ root =Tk()
 t=Text(root)
 t.pack()
-tkinter.Button(root,text='go',command=lambda :thread_it(run,)).pack()
+tkinter.Button(root,text='识别',command=lambda :thread_it(run,)).pack()
 root.mainloop()
--- a/0604/yuyin.py
+++ b/0604/yuyin.py
@ -4,6 +4,7 @@ import speech_recognition as sr
 from tkinter import filedialog
 import threading
 engine = pyttsx3.init()
 def center_window(window, width=200, height=150):
@ -100,10 +101,9 @@ def recognize_audio_from_file():
        file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav")])
        if file_path:
            r = sr.Recognizer()
            r.language = 'zh-CN'
            with sr.AudioFile(file_path) as source:
                audio_data = r.record(source)
-                text = r.recognize_sphinx(audio_data)
+                text = r.recognize_sphinx(audio_data,language='zh-CN')
                window.update_idletasks()  # 更新窗口以避免同步问题
                text_entry.delete(1.0, tk.END)  # 清空文本框
                text_entry.insert(tk.END, f"{text}")
@ -117,37 +117,6 @@ def recognize_audio_from_file():
 def recognize_audio_realtime(parent):
    window = tk.Toplevel(parent)
    window.title("实时转写")
    center_window(window)
    # 初始化识别器
    r = sr.Recognizer()
    # 使用麦克风作为源
    mic = sr.Microphone()
    # 调整能量阈值和监听时间以适应不同环境
    with mic as source:
        r.adjust_for_ambient_noise(source)
        print("请开始说话...")
        # 这里简化处理，实际实时转写可能需要循环监听并处理数据块
        audio = r.listen(source, timeout=5)  # 例如，监听5秒
    try:
        # 尝试识别
        text = r.recognize_google(audio, language='zh-CN')
        text_entry.delete(1.0, tk.END)  # 清空文本框
        text_entry.insert(tk.END, text)  # 插入识别的文本
        result_label.config(text="实时语音已转换为文本")
    except sr.WaitTimeoutError:
        result_label.config(text="未检测到语音输入")
    except sr.UnknownValueError:
        result_label.config(text="无法识别音频中的内容")
    except sr.RequestError as e:
        result_label.config(text=f"识别服务请求错误; {e}")
 def main():
    window = tk.Tk()
    window.title("选择界面")
@ -162,9 +131,7 @@ def main():
    audio_recognition_button = tk.Button(window, text="音频文件识别", command=lambda: create_audio_recognition_window(window))
    audio_recognition_button.pack(padx=10,pady=10)
-    # 创建并配置跳转到实时语音识别窗口的按钮
+
    audio_button = tk.Button(window, text="实时转写",command=lambda: recognize_audio_realtime(window))
    audio_button.pack(padx=10,pady=10)
    center_window(window)
--- a/1.py
+++ b/1.py
@ -1,92 +0,0 @@
 import tkinter as tk
 from tkinter import ttk
 import speech_recognition as sr
 import pyttsx3
 # 初始化文本转语音引擎
 engine = pyttsx3.init()
 def text_to_speech():
    text = text_input.get("1.0", "end-1c")
    engine.say(text)
    engine.runAndWait()
 def speech_to_text():
    # 初始化识别器
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("请说话:")
        audio = r.listen(source)
    try:
        text = r.recognize_google(audio, language='zh-CN')
        text_output.delete("1.0", tk.END)
        text_output.insert(tk.END, text)
    except sr.UnknownValueError:
        print("Google Speech Recognition无法理解音频")
    except sr.RequestError as e:
        print(f"无法从Google Speech Recognition服务请求结果; {e}")
 def real_time_transcription():
    # 这里简化处理，实际应用中需要一个循环监听并实时转写
    print("实时转写功能尚未实现，请手动调用speech_to_text进行单次转写")
 def switch_frame(frame):
    frame.tkraise()
 # 创建主窗口
 root = tk.Tk()
 root.title("语音识别与转换系统")
 # 创建一个容器来放置不同的Frame
 container = ttk.Frame(root)
 container.pack(side="top", fill="both", expand=True)
 # 创建不同的Frame对应不同功能
 frame_text_to_speech = ttk.Frame(container)
 frame_speech_to_text = ttk.Frame(container)
 frame_real_time = ttk.Frame(container)
 # 在每个Frame中添加对应的功能组件
 # 文本转语音Frame
 ttk.Label(frame_text_to_speech, text="请输入要转换为语音的文本:").pack()
 text_input = tk.Text(frame_text_to_speech, height=20)
 text_input.pack()
 ttk.Button(frame_text_to_speech, text="转换", command=text_to_speech).pack()
 # 语音转文本Frame
 ttk.Label(frame_speech_to_text, text="请点击下方按钮开始语音识别:").pack()
 ttk.Button(frame_speech_to_text, text="开始识别", command=speech_to_text).pack()
 text_output = tk.Text(frame_speech_to_text, height=20)
 text_output.pack()
 # 实时转写Frame（简化示意）
 ttk.Label(frame_real_time, text="实时转写功能界面（待实现）").pack()
 ttk.Button(frame_real_time, text="返回", command=lambda: switch_frame(container)).pack()
 # 将所有Frame添加到容器中
 for frame in (frame_text_to_speech, frame_speech_to_text, frame_real_time):
    frame.grid(row=0, column=0, sticky="nsew")
 switch_frame(frame_text_to_speech)  # 默认显示文本转语音界面
 # 创建顶部菜单进行功能切换
 menu = tk.Menu(root)
 root.config(menu=menu)
 sub_menu = tk.Menu(menu, tearoff=0)
 menu.add_cascade(label="功能选择", menu=sub_menu)
 sub_menu.add_command(label="文本转语音", command=lambda: switch_frame(frame_text_to_speech))
 sub_menu.add_command(label="语音转文本", command=lambda: switch_frame(frame_speech_to_text))
 sub_menu.add_command(label="实时转写", command=lambda: switch_frame(frame_real_time))
 root.mainloop()
--- a/README.md
+++ b/README.md
@ -1,2 +0,0 @@
 # yuyin
--- a/语音识别流程图.png
+++ b/语音识别流程图.png
--- a/转语音.py
+++ b/转语音.py
@ -1,45 +0,0 @@
 import tkinter as tk
 from tkinter import ttk
 import pyttsx3
 def text_to_speech():
    engine = pyttsx3.init()
    text = text_entry.get("1.0", "end-1c")  # 获取文本框中的文本
    engine.say(text)
    speed = speed_slider.get()
    engine.setProperty('rate', speed)
    engine.runAndWait()
 def main():
    global text_entry
    # 创建主窗口
    app = tk.Tk()
    app.title("文本转语音")
    speed_slider = ttk.Scale(app, from_=50, to=200, orient=tk.HORIZONTAL)
    speed_slider.set(120)  # 默认语速
    speed_slider.pack()
    # 创建一个标签，用于提示输入文本
    label = tk.Label(app, text="请输入文字:")
    label.pack(pady=10)
    # 创建一个文本框，用于用户输入
    text_entry = tk.Text(app, height=10)
    text_entry.pack()
    # 创建一个按钮，点击时调用text_to_speech函数
    convert_button = tk.Button(app, text="识别", command=text_to_speech)
    convert_button.pack(pady=10)
    # 运行主循环
    app.mainloop()
 if __name__ == "__main__":
    main()