518 lines
15 KiB
C#
518 lines
15 KiB
C#
using System.Collections;
|
||
using System.Collections.Generic;
|
||
using UnityEngine;
|
||
using System;
|
||
using System.Text;
|
||
using System.Security.Cryptography;
|
||
using Microphone = FrostweepGames.MicrophonePro.Microphone;
|
||
using System.Net.WebSockets;
|
||
using System.Threading;
|
||
|
||
public class XunFeiSTTRealtime : STT
|
||
{
|
||
//讯飞控制台上所创建的应用对应的appid、appkey
|
||
[SerializeField] private string appid = "f2433640";
|
||
[SerializeField] private string appkey = "6fde2501ca9c018c392bdffb19757d8d";
|
||
private string timeStamp;
|
||
private string baseString;
|
||
private string toMd5;
|
||
private string signa;
|
||
|
||
private AudioClip RecordedClip;
|
||
private ClientWebSocket ws;
|
||
private CancellationToken ct;
|
||
//最大录音时长
|
||
private int MAX_RECORD_LENGTH = 3599;
|
||
|
||
/// <summary>
|
||
/// 语音识别回调事件
|
||
/// </summary>
|
||
public Action<string> asrCallback;
|
||
public override void StartSpeechToText(Action<string> _callback)
|
||
{
|
||
base.StartSpeechToText(_callback);
|
||
asrCallback = _callback;
|
||
StartASR();
|
||
}
|
||
public override void StopSpeechToText(Action<string> _callback)
|
||
{
|
||
base.StopSpeechToText(_callback);
|
||
asrCallback = null;
|
||
StopASR();
|
||
}
|
||
public void StartASR()
|
||
{
|
||
if (ws != null && ws.State == WebSocketState.Open)
|
||
{
|
||
Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束");
|
||
return;
|
||
}
|
||
if (Microphone.devices.Length == 0)
|
||
{
|
||
Debug.LogError("未检测到可用的麦克风");
|
||
return;
|
||
}
|
||
ConnectASR_Aysnc();
|
||
RecordedClip = Microphone.Start(null, false, MAX_RECORD_LENGTH, 16000);
|
||
}
|
||
|
||
public async void StopASR()
|
||
{
|
||
// 避免重复调用
|
||
if (ws == null || ws.State == WebSocketState.Closed || ws.State == WebSocketState.Aborted)
|
||
{
|
||
return;
|
||
}
|
||
|
||
try
|
||
{
|
||
// 关掉发送音频的协程
|
||
StopCoroutine(SendAudioClip());
|
||
Microphone.End(null);
|
||
|
||
// 只有在连接打开状态下才发送结束标识
|
||
if (ws.State == WebSocketState.Open)
|
||
{
|
||
await ws.SendAsync(
|
||
new ArraySegment<byte>(Encoding.UTF8.GetBytes("{\"end\": true}")),
|
||
WebSocketMessageType.Text, // 这里应该用Text类型而不是Binary
|
||
true,
|
||
CancellationToken.None
|
||
);
|
||
}
|
||
|
||
// 等待关闭操作完成
|
||
if (ws.State == WebSocketState.Open || ws.State == WebSocketState.CloseSent)
|
||
{
|
||
await ws.CloseAsync(
|
||
WebSocketCloseStatus.NormalClosure,
|
||
"Closing normally",
|
||
CancellationToken.None
|
||
);
|
||
}
|
||
}
|
||
catch (ObjectDisposedException)
|
||
{
|
||
// 已释放的异常可以忽略,因为对象已经在清理中
|
||
Debug.Log("WebSocket already disposed");
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
Debug.LogError($"StopASR error: {ex.Message}");
|
||
}
|
||
finally
|
||
{
|
||
// 最终确保资源释放
|
||
if (ws != null)
|
||
{
|
||
// 如果还没终止,强制终止
|
||
if (ws.State != WebSocketState.Closed && ws.State != WebSocketState.Aborted)
|
||
{
|
||
ws.Abort();
|
||
}
|
||
// 释放资源
|
||
ws.Dispose();
|
||
ws = null; // 置空避免再次访问
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
|
||
async void ConnectASR_Aysnc()
|
||
{
|
||
ws = new ClientWebSocket();
|
||
ct = new CancellationToken();
|
||
Uri url = GetUri();
|
||
await ws.ConnectAsync(url, ct);
|
||
StartCoroutine(SendAudioClip());
|
||
try
|
||
{
|
||
while (ws.State == WebSocketState.Open)
|
||
{
|
||
var result = new byte[4096];
|
||
await ws.ReceiveAsync(new ArraySegment<byte>(result), ct); //接受数据
|
||
List<byte> list = new List<byte>(result);
|
||
while (list[list.Count - 1] == 0x00) list.RemoveAt(list.Count - 1); //去除空字节
|
||
string str = Encoding.UTF8.GetString(list.ToArray());
|
||
if (string.IsNullOrEmpty(str))
|
||
{
|
||
return;
|
||
}
|
||
ReceiveJsonData receiveJsonData = JsonUtility.FromJson<ReceiveJsonData>(str);
|
||
if (receiveJsonData.action.Equals("started"))
|
||
{
|
||
Debug.Log("握手成功!");
|
||
}
|
||
else if (receiveJsonData.action.Equals("result"))
|
||
{
|
||
//Debug.Log("返回结果:" + str);
|
||
AnalysisResult(receiveJsonData.data);
|
||
}
|
||
else if (receiveJsonData.action.Equals("error"))
|
||
{
|
||
Debug.Log("Error: " + receiveJsonData.desc);
|
||
}
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
Debug.Log(ex.Message);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 发送音频片段
|
||
/// </summary>
|
||
/// <param name="socket"></param>
|
||
/// <returns></returns>
|
||
IEnumerator SendAudioClip()
|
||
{
|
||
yield return new WaitWhile(() => Microphone.GetPosition(null) <= 0);
|
||
float t = 0;
|
||
int position = Microphone.GetPosition(null);
|
||
const float waitTime = 0.04f; //每隔40ms发送音频
|
||
const int Maxlength = 1280; //最多发送1280字节
|
||
int status = 0;
|
||
int lastPosition = 0;
|
||
while (position < RecordedClip.samples && ws.State == WebSocketState.Open)
|
||
{
|
||
t += waitTime;
|
||
if (t >= MAX_RECORD_LENGTH)
|
||
{
|
||
Debug.Log("录音时长已用尽,结束语音识别!");
|
||
break;
|
||
}
|
||
|
||
yield return new WaitForSecondsRealtime(waitTime);
|
||
if (Microphone.IsRecording(null))
|
||
{
|
||
position = Microphone.GetPosition(null);
|
||
//Debug.Log("录音时长:" + t + "position=" + position + ",lastPosition=" + lastPosition);
|
||
}
|
||
|
||
if (position <= lastPosition)
|
||
{
|
||
// 防止出现当前采样位置和上一帧采样位置一样,导致length为0
|
||
// 那么在调用AudioClip.GetData(float[] data, int offsetSamples);时,将报错
|
||
continue;
|
||
}
|
||
|
||
int length = position - lastPosition > Maxlength ? Maxlength : position - lastPosition;
|
||
byte[] data = GetAudioClip(lastPosition, length, RecordedClip);
|
||
try
|
||
{
|
||
ws.SendAsync(new ArraySegment<byte>(data), WebSocketMessageType.Binary, true,
|
||
new CancellationToken()); //发送数据
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
Debug.Log(ex.Message);
|
||
}
|
||
lastPosition = lastPosition + length;
|
||
status = 1;
|
||
}
|
||
}
|
||
|
||
//string endText = "";
|
||
|
||
private void OnApplicationQuit()
|
||
{
|
||
StopASR();
|
||
}
|
||
|
||
/// <summary>
|
||
/// 获取识别并返回字符串
|
||
/// </summary>
|
||
/// <param name="data">所获取的识别的Json字符串</param>
|
||
/// <returns>所识别的连贯的一句话</returns>
|
||
void AnalysisResult(string data)
|
||
{
|
||
Data result = JsonUtility.FromJson<Data>(data); // 等效于 JsonConvert.DeserializeObject<Data>(data)
|
||
StringBuilder stringBuilder = new StringBuilder();
|
||
|
||
//Debug.Log(result.cn.st.rt[0].ws.Length);
|
||
for (int i = 0; i < result.cn.st.rt[0].ws.Length; i++)
|
||
{
|
||
//******只有w字段有用,将其提取出来即可******
|
||
stringBuilder.Append(result.cn.st.rt[0].ws[i].cw[0].w);
|
||
}
|
||
|
||
string _thisType = result.cn.st.type;
|
||
string testing = stringBuilder.ToString();
|
||
|
||
//testing = readTextManager.SetKeyWordColor(testing);
|
||
|
||
//Debug.Log(stringBuilder + "***" + _thisType);
|
||
//type 结果类型标识 0-最终结(即这句话说完了);1-中间结果(即这句话没说完,下一句转写结果可能推翻前面的内容)
|
||
if (_thisType.Equals("0"))
|
||
{
|
||
//endText = endText + testing;
|
||
asrCallback?.Invoke(testing);
|
||
//_text.text = endText;
|
||
}
|
||
else
|
||
{
|
||
//_text.text = endText + testing;
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 获取音频流片段
|
||
/// </summary>
|
||
/// <param name="start">起始采样点</param>
|
||
/// <param name="length">采样长度</param>
|
||
/// <param name="recordedClip">音频</param>
|
||
/// <returns></returns>
|
||
public static byte[] GetAudioClip(int start, int length, AudioClip recordedClip)
|
||
{
|
||
float[] soundata = new float[length];
|
||
recordedClip.GetData(soundata, start);
|
||
int rescaleFactor = 32767;
|
||
byte[] outData = new byte[soundata.Length * 2];
|
||
for (int i = 0; i < soundata.Length; i++)
|
||
{
|
||
short temshort = (short)(soundata[i] * rescaleFactor);
|
||
byte[] temdata = BitConverter.GetBytes(temshort);
|
||
outData[i * 2] = temdata[0];
|
||
outData[i * 2 + 1] = temdata[1];
|
||
}
|
||
|
||
return outData;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 获得请求URI
|
||
/// </summary>
|
||
/// <returns>请求的URI</returns>
|
||
private Uri GetUri()
|
||
{
|
||
//精确到秒
|
||
timeStamp = GetTimeStamp();
|
||
|
||
//baseString由appid和当前时间戳ts拼接而成
|
||
baseString = appid + timeStamp;
|
||
|
||
//对baseString进行MD5
|
||
toMd5 = ToMD5(baseString);
|
||
|
||
//以apiKey为key对MD5之后的baseString进行HmacSHA1加密
|
||
//然后再对加密后的字符串进行base64编码
|
||
signa = ToHmacSHA1(toMd5, appkey);
|
||
|
||
string requestUrl = string.Format(m_SpeechRecognizeURL + "appid={0}&ts={1}&signa={2}&pd=tech", appid,
|
||
timeStamp, UrlEncode(signa));
|
||
//Debug.Log("requestUrl: " + requestUrl);
|
||
return new Uri(requestUrl);
|
||
}
|
||
#region 一些加密算法
|
||
|
||
/// <summary>
|
||
/// 对字符串进行UrlEncode转码
|
||
/// </summary>
|
||
/// <param name="str">需要转码的字符串</param>
|
||
/// <returns>经过UrlEncode转码的字符串</returns>
|
||
public static string UrlEncode(string str)
|
||
{
|
||
StringBuilder sb = new StringBuilder();
|
||
byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
|
||
for (int i = 0; i < byStr.Length; i++)
|
||
{
|
||
sb.Append(@"%" + Convert.ToString(byStr[i], 16));
|
||
}
|
||
|
||
return (sb.ToString());
|
||
}
|
||
|
||
/// <summary>
|
||
/// 获取时间戳
|
||
/// </summary>
|
||
/// <returns>时间戳,精确到秒</returns>
|
||
public static string GetTimeStamp()
|
||
{
|
||
TimeSpan ts = DateTime.UtcNow - new DateTime(1970, 1, 1, 0, 0, 0, 0);
|
||
return Convert.ToInt64(ts.TotalSeconds).ToString();
|
||
}
|
||
|
||
/// <summary>
|
||
/// MD5字符串加密
|
||
/// </summary>
|
||
/// <param name="txt">需要加密的字符串</param>
|
||
/// <returns>加密后字符串</returns>
|
||
public static string ToMD5(string txt)
|
||
{
|
||
using (MD5 mi = MD5.Create())
|
||
{
|
||
byte[] buffer = Encoding.Default.GetBytes(txt);
|
||
//开始加密
|
||
byte[] newBuffer = mi.ComputeHash(buffer);
|
||
StringBuilder sb = new StringBuilder();
|
||
for (int i = 0; i < newBuffer.Length; i++)
|
||
{
|
||
sb.Append(newBuffer[i].ToString("x2"));
|
||
}
|
||
|
||
return sb.ToString();
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// HMACSHA1算法加密并返回ToBase64String
|
||
/// </summary>
|
||
/// <param name="text">要加密的原串</param>
|
||
///<param name="key">私钥</param>
|
||
/// <returns>返回一个签名值(即哈希值)</returns>
|
||
public static string ToHmacSHA1(string text, string key)
|
||
{
|
||
//HMACSHA1加密
|
||
HMACSHA1 hmacsha1 = new HMACSHA1();
|
||
hmacsha1.Key = System.Text.Encoding.UTF8.GetBytes(key);
|
||
|
||
byte[] dataBuffer = System.Text.Encoding.UTF8.GetBytes(text);
|
||
byte[] hashBytes = hmacsha1.ComputeHash(dataBuffer);
|
||
|
||
return Convert.ToBase64String(hashBytes);
|
||
}
|
||
|
||
#endregion
|
||
}
|
||
[Serializable]
|
||
public struct ReceiveJsonData
|
||
{
|
||
/// <summary>
|
||
/// 结果标识,started:握手,result:结果,error:异常
|
||
/// </summary>
|
||
public string action;
|
||
|
||
/// <summary>
|
||
/// 结果码(具体见错误码)
|
||
/// </summary>
|
||
public string code;
|
||
|
||
/// <summary>
|
||
/// 转写结果数据
|
||
/// </summary>
|
||
public string data;
|
||
|
||
/// <summary>
|
||
/// 描述
|
||
/// </summary>
|
||
public string desc;
|
||
|
||
/// <summary>
|
||
/// 会话ID
|
||
/// 主要用于DEBUG追查问题,如果出现问题,可以提供sid帮助确认问题。
|
||
/// </summary>
|
||
public string sid;
|
||
}
|
||
/// <summary>
|
||
/// 语音识别的结果
|
||
/// {
|
||
/// \"seg_id\":7,
|
||
/// \"cn\":{
|
||
/// \"st\":{
|
||
/// \"rt\":[
|
||
/// {\"ws\":[
|
||
/// {\"cw\":[{\"w\":\"我们\",\"wp\":\"n\"}],\"wb\":23,\"we\":70},
|
||
/// {\"cw\":[{\"w\":\"生活\",\"wp\":\"n\"}],\"wb\":71,\"we\":118},
|
||
/// {\"cw\":[{\"w\":\"的\",\"wp\":\"n\"}],\"wb\":119,\"we\":130},
|
||
/// {\"cw\":[{\"w\":\"世界\",\"wp\":\"n\"}],\"wb\":131,\"we\":172},
|
||
/// {\"cw\":[{\"w\":\"里\",\"wp\":\"n\"}],\"wb\":173,\"we\":201},
|
||
/// {\"cw\":[{\"w\":\"有\",\"wp\":\"n\"}],\"wb\":202,\"we\":226},
|
||
/// {\"cw\":[{\"w\":\"两\",\"wp\":\"n\"}],\"wb\":227,\"we\":249},
|
||
/// {\"cw\":[{\"w\":\"个\",\"wp\":\"n\"}],\"wb\":250,\"we\":263},
|
||
/// {\"cw\":[{\"w\":\"世界\",\"wp\":\"n\"}
|
||
/// ],
|
||
/// \"wb\":264,
|
||
/// \"we\":320}
|
||
/// }],
|
||
/// \"bg\":\"5120\",
|
||
/// \"type\":\"0\",
|
||
/// \"ed\":\"8520\"
|
||
/// }
|
||
/// },
|
||
/// \"ls\":false
|
||
/// }
|
||
/// </summary>
|
||
[Serializable]
|
||
public struct Data
|
||
{
|
||
/// <summary>
|
||
/// 转写结果序号 从0开始
|
||
/// </summary>
|
||
public string seg_id;
|
||
|
||
[Serializable]
|
||
public struct CN
|
||
{
|
||
[Serializable]
|
||
public struct ST
|
||
{
|
||
[Serializable]
|
||
public struct RT
|
||
{
|
||
[Serializable]
|
||
public class WS
|
||
{
|
||
[Serializable]
|
||
public class CW
|
||
{
|
||
/// <summary>
|
||
/// 词识别结果
|
||
/// </summary>
|
||
public string w;
|
||
|
||
/// <summary>
|
||
/// 词标识 n-普通词;s-顺滑词(语气词);p-标点
|
||
/// </summary>
|
||
public string wp;
|
||
}
|
||
|
||
[SerializeField] public CW[] cw;
|
||
|
||
/// <summary>
|
||
/// 词在本句中的开始时间,单位是帧,1帧=10ms 即词在整段语音中的开始时间为(bg+wb*10)ms
|
||
/// 中间结果的 wb 为 0
|
||
/// </summary>
|
||
public string wb;
|
||
|
||
/// <summary>
|
||
/// 词在本句中的结束时间,单位是帧,1帧=10ms 即词在整段语音中的结束时间为(bg+we*10)ms
|
||
/// 中间结果的 we 为 0
|
||
/// </summary>
|
||
public string we;
|
||
}
|
||
|
||
[SerializeField] public WS[] ws;
|
||
}
|
||
|
||
[SerializeField] public RT[] rt;
|
||
|
||
/// <summary>
|
||
/// 句子在整段语音中的开始时间,单位毫秒(ms)
|
||
/// 中间结果的bg为准确值
|
||
/// </summary>
|
||
public string bg;
|
||
|
||
/// <summary>
|
||
/// 结果类型标识 0-最终结果;1-中间结果
|
||
/// </summary>
|
||
public string type;
|
||
|
||
/// <summary>
|
||
/// 句子在整段语音中的结束时间,单位毫秒(ms)
|
||
/// 中间结果的ed为0
|
||
/// </summary>
|
||
public string ed;
|
||
}
|
||
|
||
[SerializeField] public ST st;
|
||
}
|
||
|
||
[SerializeField] public CN cn;
|
||
|
||
/// <summary>
|
||
///
|
||
/// </summary>
|
||
public string ls;
|
||
} |