using System; using System.Collections; using System.Collections.Generic; using System.Net.WebSockets; using System.Security.Cryptography; using System.Text; using System.Threading; using UnityEngine; using Microphone = FrostweepGames.MicrophonePro.Microphone; public class XunFeiSTT : STT { [SerializeField] private string APPID = "f2433640"; [SerializeField] private string APIKey = "d46a0dc56145c074ae751940ca74caf4"; [SerializeField] private string APISecret = "OTU4YTU4ZmY2OGYyYzQxYTA2ZTc5OWM5"; private ClientWebSocket webSocket; private AudioClip RecordedClip; private string microphoneDevice; private Action callBack; private void Awake() { m_SpeechRecognizeURL = "wss://iat-api.xfyun.cn/v2/iat"; } public override void StartSpeechToText(Action _callback = null) { base.StartSpeechToText(_callback); // Get the default microphone microphoneDevice = Microphone.devices.Length > 0 ? Microphone.devices[0] : null; if (string.IsNullOrEmpty(microphoneDevice)) { Debug.LogError("没找到麦克风!"); return; } Begin(); } public override void StopSpeechToText(Action _callback = null) { base.StopSpeechToText(_callback); if (string.IsNullOrEmpty(microphoneDevice)) { Debug.LogError("没找到麦克风!"); return; } callBack = _callback; StartCoroutine(End()); } public void Begin() { //if (webSocket != null && webSocket.State == WebSocketState.Open) //{ // Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束"); // return; //} Connect(); RecordedClip = Microphone.Start(null, false, 60, 16000); } public IEnumerator End() { Microphone.End(null); yield return new WaitUntil(() => webSocket.State != WebSocketState.Open); //Debug.Log("识别结束,停止录音"); } async void Connect() { using (webSocket = new ClientWebSocket()) { CancellationToken ct = new CancellationToken(); Uri url = new Uri(GetUrl(m_SpeechRecognizeURL)); await webSocket.ConnectAsync(url, ct); //Debug.Log("连接成功"); StartCoroutine(Send(webSocket)); StringBuilder stringBuilder = new StringBuilder(); while (webSocket.State == WebSocketState.Open) { try { var result = new byte[4096]; await webSocket.ReceiveAsync(new ArraySegment(result), ct);//接受数据 List list = new List(result); while (list[list.Count - 1] == 0x00) list.RemoveAt(list.Count - 1);//去除空字节 string str = Encoding.UTF8.GetString(list.ToArray()); //Debug.Log("接收消息:" + str); if (string.IsNullOrEmpty(str)) { return; } STTData data = JsonUtility.FromJson(str); stringBuilder.Append(Get(data)); int status = data.data.status; if (status == 2) { webSocket.Abort(); } } catch (Exception) { webSocket.Abort(); return; } } //Debug.LogWarning("断开连接"); string s = stringBuilder.ToString(); if (!string.IsNullOrEmpty(s)) { callBack?.Invoke(s); Debug.LogWarning("识别到声音:" + s); } } } [Serializable] public class STTData { [Serializable] public class Data { [Serializable] public class Result { [Serializable] public class Ws { [Serializable] public class Cw { public string w; } public Cw[] cw; } public Ws[] ws; } public int status; public Result result; } public Data data; } string Get(STTData data) { StringBuilder stringBuilder = new StringBuilder(); var ws = data.data.result.ws; foreach (var item in ws) { var cw = item.cw; foreach (var w in cw) { stringBuilder.Append(w.w); } } return stringBuilder.ToString(); } void SendData(byte[] audio, int status, ClientWebSocket socket) { if (socket.State != WebSocketState.Open) { return; } string audioStr = audio == null ? "" : Convert.ToBase64String(audio); string message = "{\"common\":{\"app_id\":\"" + APPID + "\"},\"business\":{\"language\":\"zh_cn\",\"domain\":\"iat\",\"accent\":\"mandarin\",\"vad_eos\":2000}," + "\"data\":{\"status\":" + status + ",\"encoding\":\"raw\",\"format\":\"audio/L16;rate=16000\",\"audio\":\"" + audioStr + "\"}}"; //Debug.Log("发送消息:" + message); socket.SendAsync(new ArraySegment(Encoding.UTF8.GetBytes(message)), WebSocketMessageType.Binary, true, new CancellationToken()); //发送数据 } IEnumerator Send(ClientWebSocket socket) { yield return new WaitWhile(() => Microphone.GetPosition(null) <= 0); float t = 0; int position = Microphone.GetPosition(null); const float waitTime = 0.04f;//每隔40ms发送音频 int status = 0; int lastPosition = 0; const int Maxlength = 640;//最大发送长度 while (position < RecordedClip.samples && socket.State == WebSocketState.Open) { t += waitTime; yield return new WaitForSecondsRealtime(waitTime); if (Microphone.IsRecording(null)) position = Microphone.GetPosition(null); //Debug.Log("录音时长:" + t + "position=" + position + ",lastPosition=" + lastPosition); if (position <= lastPosition) { //Debug.LogWarning("字节流发送完毕!强制结束!"); break; } int length = position - lastPosition > Maxlength ? Maxlength : position - lastPosition; byte[] date = GetClip(lastPosition, length, RecordedClip); SendData(date, status, socket); lastPosition = lastPosition + length; status = 1; } SendData(null, 2, socket); //WebSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "关闭WebSocket连接",new CancellationToken()); Microphone.End(null); } public static byte[] GetClip(int star, int length, AudioClip recordedClip) { float[] soundata = new float[length]; recordedClip.GetData(soundata, star); int rescaleFactor = 32767; byte[] outData = new byte[soundata.Length * 2]; for (int i = 0; i < soundata.Length; i++) { short temshort = (short)(soundata[i] * rescaleFactor); byte[] temdata = BitConverter.GetBytes(temshort); outData[i * 2] = temdata[0]; outData[i * 2 + 1] = temdata[1]; } return outData; } string GetUrl(string uriStr) { Uri uri = new Uri(uriStr); string date = DateTime.Now.ToString("r"); string signature_origin = string.Format("host: " + uri.Host + "\ndate: " + date + "\nGET " + uri.AbsolutePath + " HTTP/1.1"); HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(APISecret)); string signature = Convert.ToBase64String(mac.ComputeHash(Encoding.UTF8.GetBytes(signature_origin))); string authorization_origin = string.Format("api_key=\"{0}\",algorithm=\"hmac-sha256\",headers=\"host date request-line\",signature=\"{1}\"", APIKey, signature); string authorization = Convert.ToBase64String(Encoding.UTF8.GetBytes(authorization_origin)); string url = string.Format("{0}?authorization={1}&date={2}&host={3}", uri, authorization, date, uri.Host); return url; } }