300 lines
10 KiB
C#
300 lines
10 KiB
C#
using Newtonsoft.Json.Linq;
|
||
using System;
|
||
using System.Collections;
|
||
using System.Collections.Generic;
|
||
using System.Linq;
|
||
using System.Net.WebSockets;
|
||
using System.Security.Cryptography;
|
||
using System.Text;
|
||
using System.Threading;
|
||
using System.Threading.Tasks;
|
||
using UnityEngine;
|
||
|
||
public class XunFeiTTS : TTS
|
||
{
|
||
#region 参数
|
||
[SerializeField] private string Role = "x4_yezi";
|
||
[SerializeField] private string APPID = "f2433640";
|
||
[SerializeField] private string APIKey = "d46a0dc56145c074ae751940ca74caf4";
|
||
[SerializeField] private string APISecret = "OTU4YTU4ZmY2OGYyYzQxYTA2ZTc5OWM5";
|
||
private ClientWebSocket webSocket;
|
||
#endregion
|
||
Coroutine m_Coroutine;
|
||
private void Awake()
|
||
{
|
||
m_PostURL = "wss://tts-api.xfyun.cn/v2/tts";
|
||
}
|
||
public override void StartSpeak(string _msg, Action<AudioClip> _callback)
|
||
{
|
||
base.StartSpeak(_msg, _callback);
|
||
//构建Json字符串
|
||
JObject jObject = new()
|
||
{
|
||
["text"] = _msg,
|
||
//可以更改成你想要的声音,具体内容在讯飞控制台中查看
|
||
["voice"] = Role
|
||
};
|
||
//发送消息
|
||
m_Coroutine = StartCoroutine(SendTextToSpeechMsgCoroutine(jObject, _callback));
|
||
}
|
||
public override void StopSpeak()
|
||
{
|
||
base.StopSpeak();
|
||
if(m_Coroutine != null)
|
||
{
|
||
StopCoroutine(m_Coroutine);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 向XunFei发送消息的协程
|
||
/// </summary>
|
||
private IEnumerator SendTextToSpeechMsgCoroutine(JObject message, Action<AudioClip> callback)
|
||
{
|
||
//请求数据
|
||
Task<string> resultJson = TextToSpeech(message);
|
||
//等待返回消息
|
||
yield return new WaitUntil(() => resultJson.IsCompleted);
|
||
|
||
//成功接收到消息
|
||
if (resultJson.IsCompletedSuccessfully == true)
|
||
{
|
||
//解析Json字符串
|
||
JObject obj = JObject.Parse(resultJson.Result);
|
||
//获取音频数据(base64字符串)
|
||
string text = obj["data"].ToString();
|
||
//解析音频数据
|
||
float[] audioData = BytesToFloat(Convert.FromBase64String(text));
|
||
if (audioData.Length == 0)//讯飞文本转语音失败
|
||
{
|
||
Debug.Log($"讯飞文本转语音失败,可能由于输入文本为空或不正确,导致语音长度为0,错误信息:{resultJson.Result}");
|
||
//失败回调
|
||
callback.Invoke(null);
|
||
}
|
||
//构建AudioClip
|
||
AudioClip audioClip = AudioClip.Create("SynthesizedAudio", audioData.Length, 1, 16000, false);
|
||
audioClip.SetData(audioData, 0);
|
||
//回调
|
||
callback.Invoke(audioClip);
|
||
}
|
||
else
|
||
{
|
||
Debug.Log($"讯飞文本转语音消息发送失败,错误信息:{resultJson.Result}");
|
||
//失败回调
|
||
callback.Invoke(null);
|
||
}
|
||
}
|
||
public async Task<string> TextToSpeech(JObject request)
|
||
{
|
||
//获取请求数据
|
||
string text = request["text"].ToString();
|
||
string voice = request["voice"].ToString();
|
||
//建立连接
|
||
await Connect(m_PostURL);
|
||
//发送消息
|
||
await TTSSendMessage(text, voice);
|
||
//接收消息
|
||
string base64String = await TTSReceiveMessage();
|
||
//关闭接连
|
||
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty, CancellationToken.None);
|
||
|
||
//构建返回Json数据
|
||
JObject response = new JObject();
|
||
response["data"] = base64String;
|
||
//响应Post请求
|
||
return response.ToString();
|
||
}
|
||
/// <summary>
|
||
/// 连接讯飞API
|
||
/// </summary>
|
||
private async Task Connect(string url)
|
||
{
|
||
//新建ClientWebSocket
|
||
webSocket = new ClientWebSocket();
|
||
//使用WebSocket连接讯飞的服务
|
||
await webSocket.ConnectAsync(new Uri(GetUrl(url)), CancellationToken.None);
|
||
//await Console.Out.WriteLineAsync("讯飞WebSocket连接成功");
|
||
}
|
||
/// <summary>
|
||
/// 讯飞文本转语音,发送消息
|
||
/// </summary>
|
||
private async Task TTSSendMessage(string text, string voice)
|
||
{
|
||
//构建请求需要的Json字符串
|
||
JObject jsonData = TTSCreateJsonData(text, voice);
|
||
byte[] messageBytes = Encoding.UTF8.GetBytes(jsonData.ToString());
|
||
//发送消息
|
||
await webSocket.SendAsync(messageBytes, WebSocketMessageType.Text, true, CancellationToken.None);
|
||
}
|
||
#region Json处理
|
||
/// <summary>
|
||
/// 讯飞文本转语音,生成需要发送的Json数据
|
||
/// </summary>
|
||
private JObject TTSCreateJsonData(string text, string voice)
|
||
{
|
||
JObject requestObj = new JObject();
|
||
JObject commonJson = new JObject();
|
||
commonJson["app_id"] = APPID;
|
||
requestObj["common"] = commonJson;
|
||
JObject bussinessJson = new JObject();
|
||
bussinessJson["aue"] = "raw";
|
||
bussinessJson["vcn"] = voice;
|
||
bussinessJson["speed"] = 50;
|
||
bussinessJson["volume"] = 50;
|
||
bussinessJson["pitch"] = 50;
|
||
bussinessJson["tte"] = "UTF8";
|
||
requestObj["business"] = bussinessJson;
|
||
JObject dataJson = new JObject();
|
||
dataJson["status"] = 2;
|
||
dataJson["text"] = Convert.ToBase64String(Encoding.UTF8.GetBytes(text));
|
||
requestObj["data"] = dataJson;
|
||
return requestObj;
|
||
}
|
||
/// <summary>
|
||
/// 讯飞文本转语音,接收消息
|
||
/// </summary>
|
||
private async Task<string> TTSReceiveMessage()
|
||
{
|
||
//webSocket.
|
||
//状态值
|
||
int status = 0;
|
||
List<byte> bytes = new List<byte>();
|
||
while (status != 2)
|
||
{
|
||
bool receivedCompleted = false;
|
||
string receivedMessage = string.Empty;
|
||
while (!receivedCompleted)
|
||
{
|
||
byte[] buffer = new byte[8 * 1024];
|
||
WebSocketReceiveResult webSocketReceiveResult = await webSocket.ReceiveAsync(buffer, CancellationToken.None);
|
||
receivedMessage += Encoding.UTF8.GetString(buffer, 0, webSocketReceiveResult.Count);
|
||
receivedCompleted = webSocketReceiveResult.Count != 8 * 1024;
|
||
}
|
||
//await Console.Out.WriteLineAsync("receivedMessage:" + receivedMessage);
|
||
bytes.AddRange(Convert.FromBase64String(TTSParseMessage(receivedMessage, out status)).ToList());
|
||
//finalAudioBase64String += TTSParseMessage(receivedMessage, out status).TrimEnd('=');
|
||
}
|
||
string finalAudioBase64String = Convert.ToBase64String(bytes.ToArray());
|
||
//await Console.Out.WriteLineAsync("讯飞语音转文本:" + finalAudioBase64String);
|
||
return finalAudioBase64String;
|
||
}
|
||
/// <summary>
|
||
/// 讯飞文本转语音,解析收到的Json消息
|
||
/// </summary>
|
||
private string TTSParseMessage(string message, out int status)
|
||
{
|
||
JObject jObject = JObject.Parse(message);
|
||
if (jObject["message"].ToString() == "success")
|
||
{
|
||
if (jObject["data"] != null)
|
||
{
|
||
if (jObject["data"]["audio"] != null)
|
||
{
|
||
if ((int)jObject["data"]["status"] == 2)
|
||
{
|
||
status = 2;
|
||
}
|
||
else
|
||
{
|
||
status = 1;
|
||
}
|
||
return jObject["data"]["audio"].ToString();
|
||
|
||
}
|
||
}
|
||
Debug.LogError("ERROR:TTSParseMessage失败,data为空");
|
||
status = 0;
|
||
return string.Empty;
|
||
}
|
||
else
|
||
{
|
||
Debug.LogError("ERROR:TTSParseMessage失败,错误消息:" + jObject["message"].ToString());
|
||
status = 0;
|
||
return string.Empty;
|
||
}
|
||
}
|
||
#endregion
|
||
#region byte[]转float[]
|
||
/// <summary>
|
||
/// byte[]数组转化为AudioClip可读取的float[]类型
|
||
/// </summary>
|
||
private static float[] BytesToFloat(byte[] byteArray)
|
||
{
|
||
float[] sounddata = new float[byteArray.Length / 2];
|
||
for (int i = 0; i < sounddata.Length; i++)
|
||
{
|
||
sounddata[i] = bytesToFloat(byteArray[i * 2], byteArray[i * 2 + 1]);
|
||
}
|
||
return sounddata;
|
||
}
|
||
private static float bytesToFloat(byte firstByte, byte secondByte)
|
||
{
|
||
// convert two bytes to one short (little endian)
|
||
//小端和大端顺序要调整
|
||
short s;
|
||
if (BitConverter.IsLittleEndian)
|
||
s = (short)((secondByte << 8) | firstByte);
|
||
else
|
||
s = (short)((firstByte << 8) | secondByte);
|
||
// convert to range from -1 to (just below) 1
|
||
return s / 32768.0F;
|
||
}
|
||
#endregion
|
||
#region 生成URL
|
||
private string GetUrl(string url)
|
||
{
|
||
Uri uri = new Uri(url);
|
||
//官方文档要求时间必须是UTC+0或GMT时区,RFC1123格式(Thu, 01 Aug 2019 01:53:21 GMT)。
|
||
string date = DateTime.Now.ToString("r");
|
||
//组装生成鉴权
|
||
string authorization = ComposeAuthUrl(uri, date);
|
||
//生成最终鉴权
|
||
string uriStr = $"{uri}?authorization={authorization}&date={date}&host={uri.Host}";
|
||
//返回生成后的Url
|
||
return uriStr;
|
||
}
|
||
/// <summary>
|
||
/// 组装生成鉴权
|
||
/// </summary>
|
||
private string ComposeAuthUrl(Uri uri, string date)
|
||
{
|
||
string signature; //最终编码后的签名
|
||
string authorization_origin; //原始鉴权
|
||
//原始签名
|
||
string signature_origin = string.Format("host: " + uri.Host + "\ndate: " + date + "\nGET " + uri.AbsolutePath + " HTTP/1.1");
|
||
//使用hmac-sha256算法加密后的signature
|
||
string signature_sha = HmacSHA256(signature_origin, APISecret); //使用hmac - sha256算法结合apiSecret对signature_origin签名
|
||
signature = signature_sha;
|
||
string auth = "api_key=\"{0}\", algorithm=\"{1}\", headers=\"{2}\", signature=\"{3}\"";
|
||
authorization_origin = string.Format(auth, APIKey, "hmac-sha256", "host date request-line", signature); //参数介绍:APIKey,加密算法名,headers是参与签名的参数(该参数名是固定的"host date request-line"),生成的签名
|
||
return ToBase64String(authorization_origin);
|
||
}
|
||
|
||
/// <summary>
|
||
/// 加密算法HmacSHA256
|
||
/// </summary>
|
||
private static string HmacSHA256(string secret, string signKey)
|
||
{
|
||
string signRet = string.Empty;
|
||
using (HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(signKey)))
|
||
{
|
||
byte[] hash = mac.ComputeHash(Encoding.UTF8.GetBytes(secret));
|
||
signRet = Convert.ToBase64String(hash);
|
||
}
|
||
return signRet;
|
||
}
|
||
/// <summary>
|
||
/// UTF字符串转成Base64字符串
|
||
/// </summary>
|
||
private static string ToBase64String(string value)
|
||
{
|
||
if (value == null || value == "")
|
||
{
|
||
return "";
|
||
}
|
||
byte[] bytes = Encoding.UTF8.GetBytes(value);
|
||
return Convert.ToBase64String(bytes);
|
||
}
|
||
#endregion
|
||
}
|