using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;
using System.Text;
using System.Security.Cryptography;
using Microphone = FrostweepGames.MicrophonePro.Microphone;
using System.Net.WebSockets;
using System.Threading;
public class XunFeiSTTRealtime : STT
{
//讯飞控制台上所创建的应用对应的appid、appkey
[SerializeField] private string appid = "f2433640";
[SerializeField] private string appkey = "6fde2501ca9c018c392bdffb19757d8d";
private string timeStamp;
private string baseString;
private string toMd5;
private string signa;
private AudioClip RecordedClip;
private ClientWebSocket ws;
private CancellationToken ct;
//最大录音时长
private int MAX_RECORD_LENGTH = 3599;
///
/// 语音识别回调事件
///
public Action asrCallback;
public override void StartSpeechToText(Action _callback)
{
base.StartSpeechToText(_callback);
asrCallback = _callback;
StartASR();
}
public override void StopSpeechToText(Action _callback)
{
base.StopSpeechToText(_callback);
asrCallback = null;
StopASR();
}
public void StartASR()
{
if (ws != null && ws.State == WebSocketState.Open)
{
Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束");
return;
}
if (Microphone.devices.Length == 0)
{
Debug.LogError("未检测到可用的麦克风");
return;
}
ConnectASR_Aysnc();
RecordedClip = Microphone.Start(null, false, MAX_RECORD_LENGTH, 16000);
}
public async void StopASR()
{
// 避免重复调用
if (ws == null || ws.State == WebSocketState.Closed || ws.State == WebSocketState.Aborted)
{
return;
}
try
{
// 关掉发送音频的协程
StopCoroutine(SendAudioClip());
Microphone.End(null);
// 只有在连接打开状态下才发送结束标识
if (ws.State == WebSocketState.Open)
{
await ws.SendAsync(
new ArraySegment(Encoding.UTF8.GetBytes("{\"end\": true}")),
WebSocketMessageType.Text, // 这里应该用Text类型而不是Binary
true,
CancellationToken.None
);
}
// 等待关闭操作完成
if (ws.State == WebSocketState.Open || ws.State == WebSocketState.CloseSent)
{
await ws.CloseAsync(
WebSocketCloseStatus.NormalClosure,
"Closing normally",
CancellationToken.None
);
}
}
catch (ObjectDisposedException)
{
// 已释放的异常可以忽略,因为对象已经在清理中
Debug.Log("WebSocket already disposed");
}
catch (Exception ex)
{
Debug.LogError($"StopASR error: {ex.Message}");
}
finally
{
// 最终确保资源释放
if (ws != null)
{
// 如果还没终止,强制终止
if (ws.State != WebSocketState.Closed && ws.State != WebSocketState.Aborted)
{
ws.Abort();
}
// 释放资源
ws.Dispose();
ws = null; // 置空避免再次访问
}
}
}
async void ConnectASR_Aysnc()
{
ws = new ClientWebSocket();
ct = new CancellationToken();
Uri url = GetUri();
await ws.ConnectAsync(url, ct);
StartCoroutine(SendAudioClip());
try
{
while (ws.State == WebSocketState.Open)
{
var result = new byte[4096];
await ws.ReceiveAsync(new ArraySegment(result), ct); //接受数据
List list = new List(result);
while (list[list.Count - 1] == 0x00) list.RemoveAt(list.Count - 1); //去除空字节
string str = Encoding.UTF8.GetString(list.ToArray());
if (string.IsNullOrEmpty(str))
{
return;
}
ReceiveJsonData receiveJsonData = JsonUtility.FromJson(str);
if (receiveJsonData.action.Equals("started"))
{
Debug.Log("握手成功!");
}
else if (receiveJsonData.action.Equals("result"))
{
//Debug.Log("返回结果:" + str);
AnalysisResult(receiveJsonData.data);
}
else if (receiveJsonData.action.Equals("error"))
{
Debug.Log("Error: " + receiveJsonData.desc);
}
}
}
catch (Exception ex)
{
Debug.Log(ex.Message);
}
}
///
/// 发送音频片段
///
///
///
IEnumerator SendAudioClip()
{
yield return new WaitWhile(() => Microphone.GetPosition(null) <= 0);
float t = 0;
int position = Microphone.GetPosition(null);
const float waitTime = 0.04f; //每隔40ms发送音频
const int Maxlength = 1280; //最多发送1280字节
int status = 0;
int lastPosition = 0;
while (position < RecordedClip.samples && ws.State == WebSocketState.Open)
{
t += waitTime;
if (t >= MAX_RECORD_LENGTH)
{
Debug.Log("录音时长已用尽,结束语音识别!");
break;
}
yield return new WaitForSecondsRealtime(waitTime);
if (Microphone.IsRecording(null))
{
position = Microphone.GetPosition(null);
//Debug.Log("录音时长:" + t + "position=" + position + ",lastPosition=" + lastPosition);
}
if (position <= lastPosition)
{
// 防止出现当前采样位置和上一帧采样位置一样,导致length为0
// 那么在调用AudioClip.GetData(float[] data, int offsetSamples);时,将报错
continue;
}
int length = position - lastPosition > Maxlength ? Maxlength : position - lastPosition;
byte[] data = GetAudioClip(lastPosition, length, RecordedClip);
try
{
ws.SendAsync(new ArraySegment(data), WebSocketMessageType.Binary, true,
new CancellationToken()); //发送数据
}
catch (Exception ex)
{
Debug.Log(ex.Message);
}
lastPosition = lastPosition + length;
status = 1;
}
}
//string endText = "";
private void OnApplicationQuit()
{
StopASR();
}
///
/// 获取识别并返回字符串
///
/// 所获取的识别的Json字符串
/// 所识别的连贯的一句话
void AnalysisResult(string data)
{
Data result = JsonUtility.FromJson(data); // 等效于 JsonConvert.DeserializeObject(data)
StringBuilder stringBuilder = new StringBuilder();
//Debug.Log(result.cn.st.rt[0].ws.Length);
for (int i = 0; i < result.cn.st.rt[0].ws.Length; i++)
{
//******只有w字段有用,将其提取出来即可******
stringBuilder.Append(result.cn.st.rt[0].ws[i].cw[0].w);
}
string _thisType = result.cn.st.type;
string testing = stringBuilder.ToString();
//testing = readTextManager.SetKeyWordColor(testing);
//Debug.Log(stringBuilder + "***" + _thisType);
//type 结果类型标识 0-最终结(即这句话说完了);1-中间结果(即这句话没说完,下一句转写结果可能推翻前面的内容)
if (_thisType.Equals("0"))
{
//endText = endText + testing;
asrCallback?.Invoke(testing);
//_text.text = endText;
}
else
{
//_text.text = endText + testing;
}
}
///
/// 获取音频流片段
///
/// 起始采样点
/// 采样长度
/// 音频
///
public static byte[] GetAudioClip(int start, int length, AudioClip recordedClip)
{
float[] soundata = new float[length];
recordedClip.GetData(soundata, start);
int rescaleFactor = 32767;
byte[] outData = new byte[soundata.Length * 2];
for (int i = 0; i < soundata.Length; i++)
{
short temshort = (short)(soundata[i] * rescaleFactor);
byte[] temdata = BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
return outData;
}
///
/// 获得请求URI
///
/// 请求的URI
private Uri GetUri()
{
//精确到秒
timeStamp = GetTimeStamp();
//baseString由appid和当前时间戳ts拼接而成
baseString = appid + timeStamp;
//对baseString进行MD5
toMd5 = ToMD5(baseString);
//以apiKey为key对MD5之后的baseString进行HmacSHA1加密
//然后再对加密后的字符串进行base64编码
signa = ToHmacSHA1(toMd5, appkey);
string requestUrl = string.Format(m_SpeechRecognizeURL + "appid={0}&ts={1}&signa={2}&pd=tech", appid,
timeStamp, UrlEncode(signa));
//Debug.Log("requestUrl: " + requestUrl);
return new Uri(requestUrl);
}
#region 一些加密算法
///
/// 对字符串进行UrlEncode转码
///
/// 需要转码的字符串
/// 经过UrlEncode转码的字符串
public static string UrlEncode(string str)
{
StringBuilder sb = new StringBuilder();
byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
for (int i = 0; i < byStr.Length; i++)
{
sb.Append(@"%" + Convert.ToString(byStr[i], 16));
}
return (sb.ToString());
}
///
/// 获取时间戳
///
/// 时间戳,精确到秒
public static string GetTimeStamp()
{
TimeSpan ts = DateTime.UtcNow - new DateTime(1970, 1, 1, 0, 0, 0, 0);
return Convert.ToInt64(ts.TotalSeconds).ToString();
}
///
/// MD5字符串加密
///
/// 需要加密的字符串
/// 加密后字符串
public static string ToMD5(string txt)
{
using (MD5 mi = MD5.Create())
{
byte[] buffer = Encoding.Default.GetBytes(txt);
//开始加密
byte[] newBuffer = mi.ComputeHash(buffer);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < newBuffer.Length; i++)
{
sb.Append(newBuffer[i].ToString("x2"));
}
return sb.ToString();
}
}
///
/// HMACSHA1算法加密并返回ToBase64String
///
/// 要加密的原串
///私钥
/// 返回一个签名值(即哈希值)
public static string ToHmacSHA1(string text, string key)
{
//HMACSHA1加密
HMACSHA1 hmacsha1 = new HMACSHA1();
hmacsha1.Key = System.Text.Encoding.UTF8.GetBytes(key);
byte[] dataBuffer = System.Text.Encoding.UTF8.GetBytes(text);
byte[] hashBytes = hmacsha1.ComputeHash(dataBuffer);
return Convert.ToBase64String(hashBytes);
}
#endregion
}
[Serializable]
public struct ReceiveJsonData
{
///
/// 结果标识,started:握手,result:结果,error:异常
///
public string action;
///
/// 结果码(具体见错误码)
///
public string code;
///
/// 转写结果数据
///
public string data;
///
/// 描述
///
public string desc;
///
/// 会话ID
/// 主要用于DEBUG追查问题,如果出现问题,可以提供sid帮助确认问题。
///
public string sid;
}
///
/// 语音识别的结果
/// {
/// \"seg_id\":7,
/// \"cn\":{
/// \"st\":{
/// \"rt\":[
/// {\"ws\":[
/// {\"cw\":[{\"w\":\"我们\",\"wp\":\"n\"}],\"wb\":23,\"we\":70},
/// {\"cw\":[{\"w\":\"生活\",\"wp\":\"n\"}],\"wb\":71,\"we\":118},
/// {\"cw\":[{\"w\":\"的\",\"wp\":\"n\"}],\"wb\":119,\"we\":130},
/// {\"cw\":[{\"w\":\"世界\",\"wp\":\"n\"}],\"wb\":131,\"we\":172},
/// {\"cw\":[{\"w\":\"里\",\"wp\":\"n\"}],\"wb\":173,\"we\":201},
/// {\"cw\":[{\"w\":\"有\",\"wp\":\"n\"}],\"wb\":202,\"we\":226},
/// {\"cw\":[{\"w\":\"两\",\"wp\":\"n\"}],\"wb\":227,\"we\":249},
/// {\"cw\":[{\"w\":\"个\",\"wp\":\"n\"}],\"wb\":250,\"we\":263},
/// {\"cw\":[{\"w\":\"世界\",\"wp\":\"n\"}
/// ],
/// \"wb\":264,
/// \"we\":320}
/// }],
/// \"bg\":\"5120\",
/// \"type\":\"0\",
/// \"ed\":\"8520\"
/// }
/// },
/// \"ls\":false
/// }
///
[Serializable]
public struct Data
{
///
/// 转写结果序号 从0开始
///
public string seg_id;
[Serializable]
public struct CN
{
[Serializable]
public struct ST
{
[Serializable]
public struct RT
{
[Serializable]
public class WS
{
[Serializable]
public class CW
{
///
/// 词识别结果
///
public string w;
///
/// 词标识 n-普通词;s-顺滑词(语气词);p-标点
///
public string wp;
}
[SerializeField] public CW[] cw;
///
/// 词在本句中的开始时间,单位是帧,1帧=10ms 即词在整段语音中的开始时间为(bg+wb*10)ms
/// 中间结果的 wb 为 0
///
public string wb;
///
/// 词在本句中的结束时间,单位是帧,1帧=10ms 即词在整段语音中的结束时间为(bg+we*10)ms
/// 中间结果的 we 为 0
///
public string we;
}
[SerializeField] public WS[] ws;
}
[SerializeField] public RT[] rt;
///
/// 句子在整段语音中的开始时间,单位毫秒(ms)
/// 中间结果的bg为准确值
///
public string bg;
///
/// 结果类型标识 0-最终结果;1-中间结果
///
public string type;
///
/// 句子在整段语音中的结束时间,单位毫秒(ms)
/// 中间结果的ed为0
///
public string ed;
}
[SerializeField] public ST st;
}
[SerializeField] public CN cn;
///
///
///
public string ls;
}