2026-03-24 11:39:01 +08:00

226 lines
8.2 KiB
C#

using System;
using System.Collections;
using System.Collections.Generic;
using System.Net.WebSockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using UnityEngine;
using Microphone = FrostweepGames.MicrophonePro.Microphone;
public class XunFeiSTT : STT
{
[SerializeField] private string APPID = "f2433640";
[SerializeField] private string APIKey = "d46a0dc56145c074ae751940ca74caf4";
[SerializeField] private string APISecret = "OTU4YTU4ZmY2OGYyYzQxYTA2ZTc5OWM5";
private ClientWebSocket webSocket;
private AudioClip RecordedClip;
private string microphoneDevice;
private Action<string> callBack;
private void Awake()
{
m_SpeechRecognizeURL = "wss://iat-api.xfyun.cn/v2/iat";
}
public override void StartSpeechToText(Action<string> _callback = null)
{
base.StartSpeechToText(_callback);
// Get the default microphone
microphoneDevice = Microphone.devices.Length > 0 ? Microphone.devices[0] : null;
if (string.IsNullOrEmpty(microphoneDevice))
{
Debug.LogError("没找到麦克风!");
return;
}
Begin();
}
public override void StopSpeechToText(Action<string> _callback = null)
{
base.StopSpeechToText(_callback);
if (string.IsNullOrEmpty(microphoneDevice))
{
Debug.LogError("没找到麦克风!");
return;
}
callBack = _callback;
StartCoroutine(End());
}
public void Begin()
{
//if (webSocket != null && webSocket.State == WebSocketState.Open)
//{
// Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束");
// return;
//}
Connect();
RecordedClip = Microphone.Start(null, false, 60, 16000);
}
public IEnumerator End()
{
Microphone.End(null);
yield return new WaitUntil(() => webSocket.State != WebSocketState.Open);
//Debug.Log("识别结束,停止录音");
}
async void Connect()
{
using (webSocket = new ClientWebSocket())
{
CancellationToken ct = new CancellationToken();
Uri url = new Uri(GetUrl(m_SpeechRecognizeURL));
await webSocket.ConnectAsync(url, ct);
//Debug.Log("连接成功");
StartCoroutine(Send(webSocket));
StringBuilder stringBuilder = new StringBuilder();
while (webSocket.State == WebSocketState.Open)
{
try
{
var result = new byte[4096];
await webSocket.ReceiveAsync(new ArraySegment<byte>(result), ct);//接受数据
List<byte> list = new List<byte>(result); while (list[list.Count - 1] == 0x00) list.RemoveAt(list.Count - 1);//去除空字节
string str = Encoding.UTF8.GetString(list.ToArray());
//Debug.Log("接收消息:" + str);
if (string.IsNullOrEmpty(str))
{
return;
}
STTData data = JsonUtility.FromJson<STTData>(str);
stringBuilder.Append(Get(data));
int status = data.data.status;
if (status == 2)
{
webSocket.Abort();
}
}
catch (Exception)
{
webSocket.Abort();
return;
}
}
//Debug.LogWarning("断开连接");
string s = stringBuilder.ToString();
if (!string.IsNullOrEmpty(s))
{
callBack?.Invoke(s);
Debug.LogWarning("识别到声音:" + s);
}
}
}
[Serializable]
public class STTData
{
[Serializable]
public class Data
{
[Serializable]
public class Result
{
[Serializable]
public class Ws
{
[Serializable]
public class Cw
{
public string w;
}
public Cw[] cw;
}
public Ws[] ws;
}
public int status;
public Result result;
}
public Data data;
}
string Get(STTData data)
{
StringBuilder stringBuilder = new StringBuilder();
var ws = data.data.result.ws;
foreach (var item in ws)
{
var cw = item.cw;
foreach (var w in cw)
{
stringBuilder.Append(w.w);
}
}
return stringBuilder.ToString();
}
void SendData(byte[] audio, int status, ClientWebSocket socket)
{
if (socket.State != WebSocketState.Open)
{
return;
}
string audioStr = audio == null ? "" : Convert.ToBase64String(audio);
string message = "{\"common\":{\"app_id\":\"" + APPID + "\"},\"business\":{\"language\":\"zh_cn\",\"domain\":\"iat\",\"accent\":\"mandarin\",\"vad_eos\":2000}," +
"\"data\":{\"status\":" + status + ",\"encoding\":\"raw\",\"format\":\"audio/L16;rate=16000\",\"audio\":\"" + audioStr + "\"}}";
//Debug.Log("发送消息:" + message);
socket.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(message)), WebSocketMessageType.Binary, true, new CancellationToken()); //发送数据
}
IEnumerator Send(ClientWebSocket socket)
{
yield return new WaitWhile(() => Microphone.GetPosition(null) <= 0);
float t = 0;
int position = Microphone.GetPosition(null);
const float waitTime = 0.04f;//每隔40ms发送音频
int status = 0;
int lastPosition = 0;
const int Maxlength = 640;//最大发送长度
while (position < RecordedClip.samples && socket.State == WebSocketState.Open)
{
t += waitTime;
yield return new WaitForSecondsRealtime(waitTime);
if (Microphone.IsRecording(null)) position = Microphone.GetPosition(null);
//Debug.Log("录音时长:" + t + "position=" + position + ",lastPosition=" + lastPosition);
if (position <= lastPosition)
{
//Debug.LogWarning("字节流发送完毕!强制结束!");
break;
}
int length = position - lastPosition > Maxlength ? Maxlength : position - lastPosition;
byte[] date = GetClip(lastPosition, length, RecordedClip);
SendData(date, status, socket);
lastPosition = lastPosition + length;
status = 1;
}
SendData(null, 2, socket);
//WebSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "关闭WebSocket连接",new CancellationToken());
Microphone.End(null);
}
public static byte[] GetClip(int star, int length, AudioClip recordedClip)
{
float[] soundata = new float[length];
recordedClip.GetData(soundata, star);
int rescaleFactor = 32767;
byte[] outData = new byte[soundata.Length * 2];
for (int i = 0; i < soundata.Length; i++)
{
short temshort = (short)(soundata[i] * rescaleFactor);
byte[] temdata = BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
return outData;
}
string GetUrl(string uriStr)
{
Uri uri = new Uri(uriStr);
string date = DateTime.Now.ToString("r");
string signature_origin = string.Format("host: " + uri.Host + "\ndate: " + date + "\nGET " + uri.AbsolutePath + " HTTP/1.1");
HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(APISecret));
string signature = Convert.ToBase64String(mac.ComputeHash(Encoding.UTF8.GetBytes(signature_origin)));
string authorization_origin = string.Format("api_key=\"{0}\",algorithm=\"hmac-sha256\",headers=\"host date request-line\",signature=\"{1}\"", APIKey, signature);
string authorization = Convert.ToBase64String(Encoding.UTF8.GetBytes(authorization_origin));
string url = string.Format("{0}?authorization={1}&date={2}&host={3}", uri, authorization, date, uri.Host);
return url;
}
}