2026-03-24 11:39:01 +08:00

289 lines
7.8 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using eToile;
using System;
using System.Collections;
using System.Collections.Concurrent;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using UnityEngine;
using UnityWebSocket;
using Microphone = FrostweepGames.MicrophonePro.Microphone;
public class FunASR : STT
{
[Header("是否实时流式推理")]
public bool isRealTime = false;
private OnlineAudio ola;
// 定义音频片段的时间间隔为 10具体单位可能在代码其他地方有定义
private static int chunk_interval = 10;
// 定义一个整数数组,可能用于表示不同场景下的音频片段大小
private static int[] chunk_size = new int[] { 5, 10, 5 };
//发送实时音频数据线程
Thread SendAudioThread;
private Action<string> callBack;
private AudioClip recording;
private string microphoneDevice;
private bool isRecording = false;
private float recordingStartTime;
private const int maxRecordingTime = 15; //最长录音时间
WebSocket socket;
bool error;
public Action OpenCallBack;
private void Start()
{
if (isRealTime)
{
ola = gameObject.AddComponent<OnlineAudio>();
}
// 创建实例
socket = new WebSocket(m_SpeechRecognizeURL);
// 注册回调
socket.OnMessage += OnMessage;
socket.OnError += OnError;
socket.OnOpen += OnOpen;
// 连接
socket.ConnectAsync();
}
private void OnOpen(object sender, OpenEventArgs e)
{
OpenCallBack?.Invoke();
}
private void OnError(object sender, ErrorEventArgs e)
{
Debug.Log("连接失败");
error = true;
}
public override void StartSpeechToText(Action<string> _callback)
{
base.StartSpeechToText(_callback);
if (isRealTime)
{
callBack = _callback;
StartCoroutine(OnlineASR2());
}
else
{
StartRecording();
}
}
public override void StopSpeechToText(Action<string> _callback)
{
base.StopSpeechToText(_callback);
if (isRealTime)
{
StopRealTimeASR();
}
else
{
callBack = _callback;
StopRecording();
}
}
bool isStream = false;
string receiveTxt;
/// <summary>
/// 服务器回传消息
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void OnMessage(object sender, MessageEventArgs e)
{
RecData data = JsonUtility.FromJson<RecData>(e.Data);
if (!isStream)
{
receiveTxt = "";
callBack?.Invoke(receiveTxt);
}
if (data.mode == "2pass-online")
{
receiveTxt += data.text;
isStream = true;
callBack?.Invoke(receiveTxt);
}
else
{
string result = RemoveLeadingPunctuationIfPresent(data.text);
receiveTxt = result;
callBack?.Invoke(receiveTxt);
isStream = false;
}
//Debug.Log("Mode:" + data.mode + " Receive: " + data.text);
}
void Update()
{
if (isRecording && Time.time - recordingStartTime >= maxRecordingTime)
{
StopRecording();
}
}
/// <summary>
/// 开始录音
/// </summary>
void StartRecording()
{
if (isRecording)
return;
// Get the default microphone
microphoneDevice = Microphone.devices.Length > 0 ? Microphone.devices[0] : null;
if (string.IsNullOrEmpty(microphoneDevice))
{
Debug.LogError("没找到麦克风!");
return;
}
recording = Microphone.Start(microphoneDevice, false, maxRecordingTime, 16000);
recordingStartTime = Time.time;
isRecording = true;
Debug.Log("开始录音");
}
/// <summary>
/// 停止录音
/// </summary>
void StopRecording()
{
if (!isRecording)
return;
Microphone.End(microphoneDevice);
isRecording = false;
Debug.Log("停止录音");
// Convert AudioClip to byte array
byte[] wavData = OpenWavParser.AudioClipToByteArray(recording).ToArray();
//连接错误结束
Debug.Log("Audio converted to WAV format. Byte length: " + wavData.Length);
if (error/* || wavData.Length == 480044*/)
{
callBack?.Invoke("<color=red>语音识别错误</color>");
error = false;
return;
}
socket.SendAsync("{\"mode\":\"offline\",\"wav_name\":\"test.wav\",\"is_speaking\":true,\"hotwords\":\"\",\"itn\":true}");
socket.SendAsync(wavData);
socket.SendAsync("{\"is_speaking\": false}");
}
//实时语音 byte 数组队列
public static readonly ConcurrentQueue<byte[]> RealTimeAudioSet = new ConcurrentQueue<byte[]>();
IEnumerator OnlineASR2()
{
//开始录制声音、发送识别
ola.StartRec();
string firstbuff = string.Format("{{\"mode\": \"{0}\", \"chunk_size\": [{1},{2},{3}], \"chunk_interval\": {4}, \"wav_name\": \"microphone\", \"is_speaking\": true}}", "2pass", chunk_size[0], chunk_size[1], chunk_size[2], chunk_interval);
socket.SendAsync(firstbuff);
SendAudioThread = new Thread(SendAudioToSeverAsync);
SendAudioThread.Start();
while (true)
{
if (!OnlineAudio.voicebuff.IsEmpty)
{
byte[] buff;
int buffcnt = OnlineAudio.voicebuff.Count;
//音频管理脚本中 音频缓存出队
OnlineAudio.voicebuff.TryDequeue(out buff);
if (buff != null)
RealTimeAudioSet.Enqueue(buff);//实时语音 byte 数组队列 入队
}
// 暂停到下一帧,避免死循环卡死
yield return null; // 等待下一帧
}
}
public void StopRealTimeASR()
{
ola.StopRec();
// 异步发送表示音频结束的消息
Task.Run(() => socket.SendAsync("{\"is_speaking\": false}"));
}
/// <summary>
/// 发送音频数据
/// </summary>
private void SendAudioToSeverAsync()
{
while (true)
{
if (RealTimeAudioSet.Count > 0)
{
byte[] audio;
RealTimeAudioSet.TryDequeue(out audio);
if (audio == null)
continue;
byte[] mArray = new byte[audio.Length];
Array.Copy(audio, 0, mArray, 0, audio.Length);
if (mArray != null)
socket.SendAsync(mArray);
}
else
{
Thread.Sleep(10);
}
}
}
/// <summary>
/// 方法:去掉字符串最前面的标点符号(如果存在)
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
string RemoveLeadingPunctuationIfPresent(string input)
{
if (string.IsNullOrEmpty(input)) // 如果字符串为空或为null直接返回
{
return input;
}
char firstChar = input[0]; // 获取字符串的第一个字符
if (char.IsPunctuation(firstChar)) // 检查第一个字符是否是标点符号
{
return input.Substring(1); // 如果是标点符号,去掉第一个字符
}
else
{
return input; // 如果不是标点符号,返回原字符串
}
}
private void OnDestroy()
{
if (isRealTime)
{
StopRealTimeASR();
//线程 关闭
if (SendAudioThread != null)
{
if (SendAudioThread.IsAlive)
{
SendAudioThread.Abort();
}
}
}
}
private void OnApplicationQuit()
{
if (isRealTime)
{
//线程 关闭
if (SendAudioThread != null)
{
if (SendAudioThread.IsAlive)
{
SendAudioThread.Abort();
}
}
}
socket.CloseAsync();
}
}