From 7fcd005f06ad1f8a97e70084544a2ac47f6f08b6 Mon Sep 17 00:00:00 2001 From: jialin Date: Mon, 25 Nov 2024 13:31:42 +0800 Subject: [PATCH] chore: audio --- config/routes.ts | 16 +- src/locales/en-US/playground.ts | 9 +- src/locales/zh-CN/playground.ts | 7 +- .../llmodels/components/advance-config.tsx | 5 + src/pages/llmodels/components/table-list.tsx | 85 ++++++++--- src/pages/llmodels/config/index.ts | 3 +- src/pages/llmodels/config/types.ts | 2 + src/pages/playground/apis/index.ts | 29 ++++ .../playground/components/ground-images.tsx | 2 +- .../playground/components/ground-stt.tsx | 7 +- .../playground/components/ground-tts.tsx | 140 ++++-------------- src/pages/playground/speech.tsx | 4 +- src/utils/fetch-chunk-data.ts | 1 + 13 files changed, 159 insertions(+), 151 deletions(-) diff --git a/config/routes.ts b/config/routes.ts index 6e7f45a2..537e802f 100644 --- a/config/routes.ts +++ b/config/routes.ts @@ -25,14 +25,6 @@ export default [ icon: 'Comment', component: './playground/index' }, - { - name: 'speech', - title: 'Speech', - path: '/playground/speech', - key: 'speech', - icon: 'Comment', - component: './playground/speech' - }, { name: 'text2images', title: 'Text2Images', @@ -41,6 +33,14 @@ export default [ icon: 'Comment', component: './playground/images' }, + { + name: 'speech', + title: 'Speech', + path: '/playground/speech', + key: 'speech', + icon: 'Comment', + component: './playground/speech' + }, { name: 'embedding', title: 'embedding', diff --git a/src/locales/en-US/playground.ts b/src/locales/en-US/playground.ts index 2207ff7e..d82a84ef 100644 --- a/src/locales/en-US/playground.ts +++ b/src/locales/en-US/playground.ts @@ -75,5 +75,12 @@ export default { 'playground.rerank.rank': 'Rank', 'playground.rerank.score': 'Score', 'playground.rerank.query.holder': 'Input your query', - 'playground.image.prompt': 'Input Prompt' + 'playground.image.prompt': 'Input Prompt', + 'playground.audio.texttospeech': 'Text to Speech', + 'playground.audio.speechtotext': 'Speech to Text', + 'playground.audio.texttospeech.tips': 'Generated speech will appear here', + 'playground.audio.speechtotext.tips': + 'Upload an audio file or start recording', + 'playground.audio.enablemic': + "Enable microphone access in your browser's settings." }; diff --git a/src/locales/zh-CN/playground.ts b/src/locales/zh-CN/playground.ts index 789aff9b..e2b3e7cd 100644 --- a/src/locales/zh-CN/playground.ts +++ b/src/locales/zh-CN/playground.ts @@ -75,5 +75,10 @@ export default { 'playground.rerank.rank': '排序', 'playground.rerank.score': '分数', 'playground.rerank.query.holder': '输入查询', - 'playground.image.prompt': '输入提示' + 'playground.image.prompt': '输入提示', + 'playground.audio.texttospeech': '文本转语音', + 'playground.audio.speechtotext': '语音转文本', + 'playground.audio.texttospeech.tips': '生成的语音将出现在这里', + 'playground.audio.speechtotext.tips': '上传音频文件或开始录音', + 'playground.audio.enablemic': '请允许浏览器访问麦克风,以便开始录音' }; diff --git a/src/pages/llmodels/components/advance-config.tsx b/src/pages/llmodels/components/advance-config.tsx index 8a1954d2..797c33b8 100644 --- a/src/pages/llmodels/components/advance-config.tsx +++ b/src/pages/llmodels/components/advance-config.tsx @@ -264,6 +264,11 @@ const AdvanceConfig: React.FC = (props) => { value: backendOptionsMap.vllm, disabled: source === modelSourceMap.local_path_value ? false : isGGUF + }, + { + label: 'vox-box', + value: backendOptionsMap.voxBox, + disabled: false } ]} disabled={ diff --git a/src/pages/llmodels/components/table-list.tsx b/src/pages/llmodels/components/table-list.tsx index b3cfb3a3..6834e67d 100644 --- a/src/pages/llmodels/components/table-list.tsx +++ b/src/pages/llmodels/components/table-list.tsx @@ -423,6 +423,66 @@ const Models: React.FC = ({ [] ); + const renderModelTags = useCallback((record: ListItem) => { + if (record.reranker) { + return ( + + Reranker + + ); + } + + if (record.embedding_only && !record.reranker) { + return ( + + Embedding Only + + ); + } + if (record.text_to_speech) { + return ( + + {intl.formatMessage({ id: 'playground.audio.texttospeech' })} + + ); + } + if (record.speech_to_text) { + return ( + + {intl.formatMessage({ id: 'playground.audio.speechtotext' })} + + ); + } + return null; + }, []); const renderChildren = useCallback( (list: any, parent?: any) => { return ( @@ -548,30 +608,7 @@ const Models: React.FC = ({ {text} - {record.reranker && ( - - Reranker - - )} - {record.embedding_only && !record.reranker && ( - - Embedding Only - - )} + {renderModelTags(record)} ); }} diff --git a/src/pages/llmodels/config/index.ts b/src/pages/llmodels/config/index.ts index 03f82bf6..b2dd28ed 100644 --- a/src/pages/llmodels/config/index.ts +++ b/src/pages/llmodels/config/index.ts @@ -69,7 +69,8 @@ export const ollamaModelOptions = [ export const backendOptionsMap = { llamaBox: 'llama-box', - vllm: 'vllm' + vllm: 'vllm', + voxBox: 'vox-box' }; export const modelSourceMap: Record = { diff --git a/src/pages/llmodels/config/types.ts b/src/pages/llmodels/config/types.ts index 17f0468e..477f35b6 100644 --- a/src/pages/llmodels/config/types.ts +++ b/src/pages/llmodels/config/types.ts @@ -11,6 +11,8 @@ export interface ListItem { model_scope_model_id: string; embedding_only?: boolean; ready_replicas: number; + speech_to_text?: boolean; + text_to_speech?: boolean; replicas: number; s3Address: string; name: string; diff --git a/src/pages/playground/apis/index.ts b/src/pages/playground/apis/index.ts index 2ea35218..3bff3a4f 100644 --- a/src/pages/playground/apis/index.ts +++ b/src/pages/playground/apis/index.ts @@ -10,6 +10,10 @@ export const OPENAI_MODELS = '/v1-openai/models'; export const RERANKER_API = '/rerank'; +export const AUDIO_TEXT_TO_SPEECH_API = '/v1-openai/audio/speech'; + +export const AUDIO_SPEECH_TO_TEXT_API = '/v1-openai/audio/transcriptions'; + export async function execChatCompletions(params: any) { return request(`${CHAT_API}`, { method: 'POST', @@ -81,3 +85,28 @@ export const createImages = async ( } return res.json(); }; + +// ============ audio ============ +export const textToSpeech = async (params: any, options?: any) => { + const res = await fetch(AUDIO_TEXT_TO_SPEECH_API, { + method: 'POST', + body: JSON.stringify(params), + signal: params.signal + }); + if (!res.ok) { + throw new Error('Network response was not ok'); + } + return res.json(); +}; + +export const speechToText = async (params: any, options?: any) => { + const res = await fetch(AUDIO_SPEECH_TO_TEXT_API, { + method: 'POST', + body: JSON.stringify(params), + signal: params.signal + }); + if (!res.ok) { + throw new Error('Network response was not ok'); + } + return res.json(); +}; diff --git a/src/pages/playground/components/ground-images.tsx b/src/pages/playground/components/ground-images.tsx index 00582695..0b38550a 100644 --- a/src/pages/playground/components/ground-images.tsx +++ b/src/pages/playground/components/ground-images.tsx @@ -255,7 +255,7 @@ const GroundImages: React.FC = forwardRef((props, ref) => { const result: any = await fetchChunkedData({ data: params, - // url: 'http://192.168.50.27:40639/v1/images/generations', + // url: 'http://192.168.1.3:40487/v1/images/generations', url: CREAT_IMAGE_API, signal: requestToken.current.signal, headers: { diff --git a/src/pages/playground/components/ground-stt.tsx b/src/pages/playground/components/ground-stt.tsx index cd06e142..0c1b6e63 100644 --- a/src/pages/playground/components/ground-stt.tsx +++ b/src/pages/playground/components/ground-stt.tsx @@ -321,7 +321,9 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { return (
- Upload an audio file or start recording + + {intl.formatMessage({ id: 'playground.audio.speechtotext.tips' })} +
); }; @@ -469,7 +471,7 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { fontWeight: 500 }} > - Enable microphone access in your browser’s settings. + {intl.formatMessage({ id: 'playground.audio.enablemic' })} )} @@ -544,6 +546,7 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { payLoad={{ messages: viewCodeMessage }} + api="audio/transcriptions" parameters={parameters} onCancel={handleCloseViewCode} title={intl.formatMessage({ id: 'playground.viewcode' })} diff --git a/src/pages/playground/components/ground-tts.tsx b/src/pages/playground/components/ground-tts.tsx index 5d3a2799..1ca6be05 100644 --- a/src/pages/playground/components/ground-tts.tsx +++ b/src/pages/playground/components/ground-tts.tsx @@ -1,24 +1,21 @@ import IconFont from '@/components/icon-font'; import SpeechContent from '@/components/speech-content'; import useOverlayScroller from '@/hooks/use-overlay-scroller'; -import { fetchChunkedData, readStreamData } from '@/utils/fetch-chunk-data'; +import { fetchChunkedData } from '@/utils/fetch-chunk-data'; import { ThunderboltOutlined } from '@ant-design/icons'; import { useIntl, useSearchParams } from '@umijs/max'; import { Spin } from 'antd'; import classNames from 'classnames'; -import _ from 'lodash'; import 'overlayscrollbars/overlayscrollbars.css'; import { forwardRef, memo, useEffect, useImperativeHandle, - useMemo, useRef, useState } from 'react'; import { CHAT_API } from '../apis'; -import { Roles, generateMessages } from '../config'; import { TTSParamsConfig as paramsConfig } from '../config/params-config'; import { MessageItem } from '../config/types'; import '../style/ground-left.less'; @@ -43,7 +40,16 @@ const initialValues = { const GroundLeft: React.FC = forwardRef((props, ref) => { const { modelList } = props; const messageId = useRef(0); - const [messageList, setMessageList] = useState([]); + const [messageList, setMessageList] = useState< + { + prompt: string; + voice: string; + format: string; + speed: number; + uid: number; + autoplay: boolean; + }[] + >([]); const intl = useIntl(); const [searchParams] = useSearchParams(); @@ -78,51 +84,10 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { }; }); - const viewCodeMessage = useMemo(() => { - return generateMessages([ - { role: Roles.System, content: systemMessage }, - ...messageList - ]); - }, [messageList, systemMessage]); - const setMessageId = () => { messageId.current = messageId.current + 1; }; - const handleNewMessage = (message?: { role: string; content: string }) => { - const newMessage = message || { - role: - _.last(messageList)?.role === Roles.User ? Roles.Assistant : Roles.User, - content: '' - }; - messageList.push({ - ...newMessage, - uid: messageId.current + 1 - }); - setMessageId(); - setMessageList([...messageList]); - }; - - const joinMessage = (chunk: any) => { - setTokenResult({ - ...(chunk?.usage ?? {}) - }); - - if (!chunk || !_.get(chunk, 'choices', []).length) { - return; - } - contentRef.current = - contentRef.current + _.get(chunk, 'choices.0.delta.content', ''); - setMessageList([ - ...messageList, - ...currentMessageRef.current, - { - role: Roles.Assistant, - content: contentRef.current, - uid: messageId.current - } - ]); - }; const handleStopConversation = () => { controllerRef.current?.abort?.(); setLoading(false); @@ -134,39 +99,15 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { setLoading(true); setMessageId(); setTokenResult(null); + setCurrentPrompt(current?.content || ''); controllerRef.current?.abort?.(); controllerRef.current = new AbortController(); const signal = controllerRef.current.signal; - currentMessageRef.current = current - ? [ - { - ...current, - uid: messageId.current - } - ] - : []; - - contentRef.current = ''; - setMessageList((pre) => { - return [...pre, ...currentMessageRef.current]; - }); - - const messageParams = [ - { role: Roles.System, content: systemMessage }, - ...messageList, - ...currentMessageRef.current - ]; - - const messages = generateMessages(messageParams); const chatParams = { - messages: messages, ...parameters, - stream: true, - stream_options: { - include_usage: true - } + prompt: current?.content || currentPrompt }; const result: any = await fetchChunkedData({ data: chatParams, @@ -174,26 +115,16 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { signal }); - if (result?.error) { - setTokenResult({ - error: true, - errorMessage: - result?.data?.error?.message || result?.data?.message || '' - }); - return; - } - setMessageId(); - const { reader, decoder } = result; - await readStreamData(reader, decoder, (chunk: any) => { - if (chunk?.error) { - setTokenResult({ - error: true, - errorMessage: chunk?.error?.message || chunk?.message || '' - }); - return; + setMessageList([ + { + prompt: current?.content || currentPrompt, + voice: parameters.voice, + format: parameters.response_format, + speed: parameters.speed, + uid: messageId.current, + autoplay: checkvalueRef.current } - joinMessage(chunk); - }); + ]); } catch (error) { // console.log('error:', error); } finally { @@ -210,23 +141,7 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { }; const handleSendMessage = (message: Omit) => { - // submitMessage(currentMessage); - setMessageId(); - setLoading(true); - - setTimeout(() => { - setMessageList([ - { - prompt: message.content, - voice: parameters.voice, - format: parameters.response_format, - speed: parameters.speed, - uid: messageId.current, - autoplay: checkvalueRef.current - } - ]); - setLoading(false); - }, 1000); + submitMessage(message); }; const handleCloseViewCode = () => { @@ -236,7 +151,6 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { const handleSelectModel = () => {}; const handleOnCheckChange = (e: any) => { - console.log('handleOnCheckChange', e); checkvalueRef.current = e.target.checked; }; useEffect(() => { @@ -287,7 +201,11 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { className="font-size-32 text-secondary" > - Generated speech will appear here + + {intl.formatMessage({ + id: 'playground.audio.texttospeech.tips' + })} + )} {loading && ( @@ -314,7 +232,6 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { disabled={!parameters.model} isEmpty={true} handleSubmit={handleSendMessage} - addMessage={handleNewMessage} handleAbortFetch={handleStopConversation} clearAll={handleClear} setModelSelections={handleSelectModel} @@ -347,6 +264,7 @@ const GroundLeft: React.FC = forwardRef((props, ref) => { payLoad={{ prompt: currentPrompt }} + api="audio/speech" parameters={parameters} onCancel={handleCloseViewCode} title={intl.formatMessage({ id: 'playground.viewcode' })} diff --git a/src/pages/playground/speech.tsx b/src/pages/playground/speech.tsx index f18884c8..c3147ac6 100644 --- a/src/pages/playground/speech.tsx +++ b/src/pages/playground/speech.tsx @@ -30,12 +30,12 @@ const Playground: React.FC = () => { const [loaded, setLoaded] = useState(false); const optionsList = [ { - label: 'Text To Speech', + label: intl.formatMessage({ id: 'playground.audio.texttospeech' }), value: TabsValueMap.Tab1, icon: }, { - label: 'Speech To Text', + label: intl.formatMessage({ id: 'playground.audio.speechtotext' }), value: TabsValueMap.Tab2, icon: } diff --git a/src/utils/fetch-chunk-data.ts b/src/utils/fetch-chunk-data.ts index b70b6a24..8802bcaa 100644 --- a/src/utils/fetch-chunk-data.ts +++ b/src/utils/fetch-chunk-data.ts @@ -51,6 +51,7 @@ export const fetchChunkedData = async (params: { ...params.headers } }); + console.log('response====', response); if (!response.ok) { return { error: true,