|
|
|
|
@ -135,34 +135,45 @@ public class VoiceController {
|
|
|
|
|
private String recognizeWithXunFei(String audioBase64) throws Exception {
|
|
|
|
|
String wsUrl = buildAuthUrl(XunFeiConfig.STT_API_URL);
|
|
|
|
|
|
|
|
|
|
StringBuilder resultText = new StringBuilder();
|
|
|
|
|
// 解码音频数据
|
|
|
|
|
byte[] audioData = Base64.getDecoder().decode(audioBase64);
|
|
|
|
|
if (audioData.length < 1000) {
|
|
|
|
|
throw new Exception("音频数据太短,请说话时间长一些");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CompletableFuture<String> future = new CompletableFuture<>();
|
|
|
|
|
List<String> sentenceResults = new java.util.concurrent.CopyOnWriteArrayList<>();
|
|
|
|
|
|
|
|
|
|
HttpClient client = HttpClient.newHttpClient();
|
|
|
|
|
WebSocket webSocket = client.newWebSocketBuilder()
|
|
|
|
|
client.newWebSocketBuilder()
|
|
|
|
|
.buildAsync(URI.create(wsUrl), new WebSocket.Listener() {
|
|
|
|
|
private StringBuilder fullResult = new StringBuilder();
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void onOpen(WebSocket webSocket) {
|
|
|
|
|
// 发送第一帧
|
|
|
|
|
String firstFrame = buildFirstFrame(audioBase64);
|
|
|
|
|
webSocket.sendText(firstFrame, true);
|
|
|
|
|
WebSocket.Listener.super.onOpen(webSocket);
|
|
|
|
|
// 在新线程中分帧发送音频
|
|
|
|
|
new Thread(() -> {
|
|
|
|
|
try {
|
|
|
|
|
sendAudioFrames(webSocket, audioData);
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
future.completeExceptionally(e);
|
|
|
|
|
}
|
|
|
|
|
}).start();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public CompletionStage<?> onText(WebSocket webSocket, CharSequence data, boolean last) {
|
|
|
|
|
String response = data.toString();
|
|
|
|
|
// 解析响应提取文本
|
|
|
|
|
String text = parseXunFeiResponse(response);
|
|
|
|
|
if (text != null) {
|
|
|
|
|
fullResult.append(text);
|
|
|
|
|
}
|
|
|
|
|
String json = data.toString();
|
|
|
|
|
parseAndUpdateResult(json, sentenceResults);
|
|
|
|
|
|
|
|
|
|
// 检查是否结束
|
|
|
|
|
if (response.contains("\"status\":2")) {
|
|
|
|
|
future.complete(fullResult.toString());
|
|
|
|
|
if (json.contains("\"status\":2") || json.contains("\"status\": 2")) {
|
|
|
|
|
StringBuilder finalBuilder = new StringBuilder();
|
|
|
|
|
for (String s : sentenceResults) {
|
|
|
|
|
if (s != null) finalBuilder.append(s);
|
|
|
|
|
}
|
|
|
|
|
String finalResult = finalBuilder.toString().trim();
|
|
|
|
|
future.complete(finalResult.isEmpty() ? "未识别到语音内容" : finalResult);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return WebSocket.Listener.super.onText(webSocket, data, last);
|
|
|
|
|
@ -172,18 +183,118 @@ public class VoiceController {
|
|
|
|
|
public void onError(WebSocket webSocket, Throwable error) {
|
|
|
|
|
future.completeExceptionally(error);
|
|
|
|
|
}
|
|
|
|
|
}).join();
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public CompletionStage<?> onClose(WebSocket webSocket, int statusCode, String reason) {
|
|
|
|
|
if (!future.isDone()) {
|
|
|
|
|
StringBuilder finalBuilder = new StringBuilder();
|
|
|
|
|
for (String s : sentenceResults) {
|
|
|
|
|
if (s != null) finalBuilder.append(s);
|
|
|
|
|
}
|
|
|
|
|
String finalResult = finalBuilder.toString().trim();
|
|
|
|
|
future.complete(finalResult.isEmpty() ? "未识别到语音内容" : finalResult);
|
|
|
|
|
}
|
|
|
|
|
return WebSocket.Listener.super.onClose(webSocket, statusCode, reason);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return future.get(30, TimeUnit.SECONDS);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 构建讯飞语音识别第一帧数据
|
|
|
|
|
* 分帧发送音频数据
|
|
|
|
|
*/
|
|
|
|
|
private void sendAudioFrames(WebSocket webSocket, byte[] audioData) throws Exception {
|
|
|
|
|
int frameSize = 1280; // 每帧40ms音频
|
|
|
|
|
int status = 0; // 0-首帧, 1-中间帧, 2-尾帧
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < audioData.length; i += frameSize) {
|
|
|
|
|
int end = Math.min(i + frameSize, audioData.length);
|
|
|
|
|
byte[] frame = Arrays.copyOfRange(audioData, i, end);
|
|
|
|
|
|
|
|
|
|
if (i + frameSize >= audioData.length) {
|
|
|
|
|
status = 2; // 尾帧
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String frameJson = buildFrameJson(frame, status);
|
|
|
|
|
webSocket.sendText(frameJson, true);
|
|
|
|
|
|
|
|
|
|
if (status == 0) {
|
|
|
|
|
status = 1; // 后续都是中间帧
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 模拟实时发送,每帧间隔40ms
|
|
|
|
|
Thread.sleep(40);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 构建帧数据JSON
|
|
|
|
|
*/
|
|
|
|
|
private String buildFrameJson(byte[] audioFrame, int status) {
|
|
|
|
|
String audioBase64 = Base64.getEncoder().encodeToString(audioFrame);
|
|
|
|
|
|
|
|
|
|
if (status == 0) {
|
|
|
|
|
// 首帧,包含common和business
|
|
|
|
|
return "{\"common\":{\"app_id\":\"" + config.getAppId() + "\"}," +
|
|
|
|
|
"\"business\":{\"language\":\"zh_cn\",\"domain\":\"iat\",\"accent\":\"mandarin\",\"vad_eos\":3000}," +
|
|
|
|
|
"\"data\":{\"status\":0,\"format\":\"audio/L16;rate=16000\",\"encoding\":\"raw\",\"audio\":\"" + audioBase64 + "\"}}";
|
|
|
|
|
} else {
|
|
|
|
|
// 中间帧或尾帧
|
|
|
|
|
return "{\"data\":{\"status\":" + status + ",\"format\":\"audio/L16;rate=16000\",\"encoding\":\"raw\",\"audio\":\"" + audioBase64 + "\"}}";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 解析响应并更新结果
|
|
|
|
|
*/
|
|
|
|
|
private String buildFirstFrame(String audioBase64) {
|
|
|
|
|
return "{\"common\":{\"app_id\":\"" + config.getAppId() + "\"}," +
|
|
|
|
|
"\"business\":{\"language\":\"zh_cn\",\"domain\":\"iat\",\"accent\":\"mandarin\",\"vad_eos\":3000}," +
|
|
|
|
|
"\"data\":{\"status\":2,\"format\":\"audio/L16;rate=16000\",\"encoding\":\"raw\",\"audio\":\"" + audioBase64 + "\"}}";
|
|
|
|
|
private void parseAndUpdateResult(String json, List<String> sentenceResults) {
|
|
|
|
|
try {
|
|
|
|
|
if (!json.contains("\"code\":0") && !json.contains("\"code\": 0")) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 提取sn(句子序号)
|
|
|
|
|
int sn = 0;
|
|
|
|
|
int snStart = json.indexOf("\"sn\":");
|
|
|
|
|
if (snStart > 0) {
|
|
|
|
|
int snEnd = json.indexOf(",", snStart);
|
|
|
|
|
if (snEnd < 0) snEnd = json.indexOf("}", snStart);
|
|
|
|
|
if (snEnd > snStart) {
|
|
|
|
|
try {
|
|
|
|
|
sn = Integer.parseInt(json.substring(snStart + 5, snEnd).trim());
|
|
|
|
|
} catch (NumberFormatException ignored) {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 提取所有文字
|
|
|
|
|
StringBuilder text = new StringBuilder();
|
|
|
|
|
int pos = 0;
|
|
|
|
|
while (true) {
|
|
|
|
|
int wStart = json.indexOf("\"w\":", pos);
|
|
|
|
|
if (wStart < 0) break;
|
|
|
|
|
|
|
|
|
|
int valueStart = json.indexOf("\"", wStart + 4) + 1;
|
|
|
|
|
int valueEnd = json.indexOf("\"", valueStart);
|
|
|
|
|
if (valueEnd > valueStart) {
|
|
|
|
|
text.append(json.substring(valueStart, valueEnd));
|
|
|
|
|
pos = valueEnd;
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 确保List足够大
|
|
|
|
|
while (sentenceResults.size() <= sn) {
|
|
|
|
|
sentenceResults.add("");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 替换该句子的结果
|
|
|
|
|
sentenceResults.set(sn, text.toString());
|
|
|
|
|
|
|
|
|
|
} catch (Exception ignored) {
|
|
|
|
|
// 静默处理解析异常
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|