当前位置：首页 > news >正文

徐州市徐州市城乡建设局网站首页英文网站营销

news 2025/11/14 23:19:23

徐州市徐州市城乡建设局网站首页,英文网站营销,wordpress 插件喜欢,电气工程师报考条件Unity 工具之 Azure 微软SSML语音合成TTS流式获取音频数据的简单整理目录 Unity 工具之 Azure 微软SSML语音合成TTS流式获取音频数据的简单整理一、简单介绍二、实现原理三、实现步骤四、关键代码一、简单介绍 Unity 工具类#xff0c;自己整理的一些游戏开发可…Unity 工具之 Azure 微软SSML语音合成TTS流式获取音频数据的简单整理目录 Unity 工具之 Azure 微软SSML语音合成TTS流式获取音频数据的简单整理一、简单介绍二、实现原理三、实现步骤四、关键代码一、简单介绍 Unity 工具类自己整理的一些游戏开发可能用到的模块单独独立使用方便游戏开发。本节介绍这里在使用微软的Azure 进行语音合成的两个方法的做简单整理这里简单说明如果你有更好的方法欢迎留言交流。语音合成标记语言 (SSML) 是一种基于 XML 的标记语言可用于微调文本转语音输出属性例如音调、发音、语速、音量等。与纯文本输入相比你拥有更大的控制权和灵活性。可以使用 SSML 来执行以下操作定义输入文本结构用于确定文本转语音输出的结构、内容和其他特征。例如可以使用 SSML 来定义段落、句子、中断/暂停或静音。可以使用事件标记例如书签或视素来包装文本这些标记可以稍后由应用程序处理。选择语音、语言、名称、样式和角色。可以在单个 SSML 文档中使用多个语音。调整重音、语速、音调和音量。还可以使用 SSML 插入预先录制的音频例如音效或音符。控制输出音频的发音。例如可以将 SSML 与音素和自定义词典配合使用来改进发音。还可以使用 SSML 定义单词或数学表达式的具体发音。下面是 SSML 文档的基本结构和语法的子集 speak version1.0 xmlnshttp://www.w3.org/2001/10/synthesis xmlns:msttshttps://www.w3.org/2001/mstts xml:langstringmstts:backgroundaudio srcstring volumestring fadeinstring fadeoutstring/voice namestring effectstringaudio srcstring/audiobookmark markstring/break strengthstring timestring /emphasis levelvalue/emphasislang xml:langstring/langlexicon uristring/math xmlnshttp://www.w3.org/1998/Math/MathML/mathmstts:audioduration valuestring/mstts:express-as stylestring styledegreevalue rolestring/mstts:express-asmstts:silence typestring valuestring/mstts:viseme typestring/p/pphoneme alphabetstring phstring/phonemeprosody pitchvalue contourvalue rangevalue ratevalue volumevalue/prosodys/ssay-as interpret-asstring formatstring detailstring/say-assub aliasstring/sub/voice /speak SSML 语音和声音语音合成标记语言 (SSML) 的语音和声音 - 语音服务 - Azure AI services | Microsoft Learn 官网注册面向学生的 Azure - 免费帐户额度 | Microsoft Azure 官网技术文档网址技术文档 | Microsoft Learn 官网的TTS 文本转语音快速入门 - 语音服务 - Azure Cognitive Services | Microsoft Learn Azure Unity SDK 包官网安装语音 SDK - Azure Cognitive Services | Microsoft Learn SDK具体链接 https://aka.ms/csspeech/unitypackage 二、实现原理 1、官网申请得到语音合成对应的 SPEECH_KEY 和 SPEECH_REGION 2、然后对应设置语言和需要的声音配置 3、使用 SSML 带有流式获取得到音频数据在声源中播放或者保存即可样例如下 public static async Task SynthesizeAudioAsync() {var speechConfig SpeechConfig.FromSubscription(YourSpeechKey, YourSpeechRegion);using var speechSynthesizer new SpeechSynthesizer(speechConfig, null);var ssml File.ReadAllText(./ssml.xml);var result await speechSynthesizer.SpeakSsmlAsync(ssml);using var stream AudioDataStream.FromResult(result);await stream.SaveToWaveFileAsync(path/to/write/file.wav); } 三、实现步骤基础的环境搭建参照Unity 工具之 Azure 微软语音合成普通方式和流式获取音频数据的简单整理_unity 语音合成 1、脚本实现挂载对应脚本到场景中 2、运行场景会使用 SSML方式合成TTS并播放四、关键代码 1、AzureTTSDataWithSSMLHandler using Microsoft.CognitiveServices.Speech; using System; using System.Threading; using System.Threading.Tasks; using System.Xml; using UnityEngine;/// summary /// 使用 SSML 方式语音合成 /// /summary public class AzureTTSDataWithSSMLHandler {/// summary/// Azure TTS 合成必要数据/// /summaryprivate const string SPEECH_KEY YOUR_SPEECH_KEY;private const string SPEECH_REGION YOUR_SPEECH_REGION;private const string SPEECH_RECOGNITION_LANGUAGE zh-CN;private string SPEECH_VOICE_NAME zh-CN-XiaoxiaoNeural;/// summary/// 创建 TTS 中的参数/// /summaryprivate CancellationTokenSource m_CancellationTokenSource;private AudioDataStream m_AudioDataStream;private Connection m_Connection;private SpeechConfig m_Config;private SpeechSynthesizer m_Synthesizer;/// summary/// 音频获取事件/// /summaryprivate ActionAudioDataStream m_AudioStream;/// summary/// 开始播放TTS事件/// /summaryprivate Action m_StartTTSPlayAction;/// summary/// 停止播放TTS事件/// /summaryprivate Action m_StartTTSStopAction;/// summary/// 初始化/// /summarypublic void Initialized(){m_Config SpeechConfig.FromSubscription(SPEECH_KEY, SPEECH_REGION);m_Synthesizer new SpeechSynthesizer(m_Config, null);m_Connection Connection.FromSpeechSynthesizer(m_Synthesizer);m_Connection.Open(true);}/// summary/// 开始进行语音合成/// /summary/// param namemsg合成的内容/param/// param namestream获取到的音频流数据/param/// param namestyle/parampublic async void Start(string msg, ActionAudioDataStream stream, string style chat){this.m_AudioStream stream;await SynthesizeAudioAsync(CreateSSML(msg, SPEECH_RECOGNITION_LANGUAGE, SPEECH_VOICE_NAME, style));}/// summary/// 停止语音合成/// /summarypublic void Stop(){m_StartTTSStopAction?.Invoke();if (m_AudioDataStream ! null){m_AudioDataStream.Dispose();m_AudioDataStream null;}if (m_CancellationTokenSource ! null){m_CancellationTokenSource.Cancel();}if (m_Synthesizer ! null){m_Synthesizer.Dispose();m_Synthesizer null;}if (m_Connection ! null){m_Connection.Dispose();m_Connection null;}}/// summary/// 设置语音合成开始播放事件/// /summary/// param nameonStartAction/parampublic void SetStartTTSPlayAction(Action onStartAction){if (onStartAction ! null){m_StartTTSPlayAction onStartAction;}}/// summary/// 设置停止语音合成事件/// /summary/// param nameonAudioStopAction/parampublic void SetStartTTSStopAction(Action onAudioStopAction){if (onAudioStopAction ! null){m_StartTTSStopAction onAudioStopAction;}}/// summary/// 开始异步请求合成 TTS 数据/// /summary/// param namespeakMsg/param/// returns/returnsprivate async Task SynthesizeAudioAsync(string speakMsg){Cancel();m_CancellationTokenSource new CancellationTokenSource();var result m_Synthesizer.StartSpeakingSsmlAsync(speakMsg);await result;m_StartTTSPlayAction?.Invoke();m_AudioDataStream AudioDataStream.FromResult(result.Result);m_AudioStream?.Invoke(m_AudioDataStream);}private void Cancel(){if (m_AudioDataStream ! null){m_AudioDataStream.Dispose();m_AudioDataStream null;}if (m_CancellationTokenSource ! null){m_CancellationTokenSource.Cancel();}}/// summary/// 生成需要的 SSML XML 数据/// 格式不唯一可以根据需要自行在增加删减/// /summary/// param namemsg合成的音频内容/param/// param namelanguage合成语音/param/// param namevoiceName采用谁的声音合成音频/param/// param namestyle合成时的语气类型/param/// returnsssml XML/returnsprivate string CreateSSML(string msg, string language, string voiceName, string style chat){// XmlDocumentXmlDocument xmlDoc new XmlDocument();// 设置 speak 基础元素XmlElement speakElem xmlDoc.CreateElement(speak);speakElem.SetAttribute(version, 1.0);speakElem.SetAttribute(xmlns, http://www.w3.org/2001/10/synthesis);speakElem.SetAttribute(xmlns:mstts, http://www.w3.org/2001/mstts);speakElem.SetAttribute(xml:lang, language);// 设置 voice 元素XmlElement voiceElem xmlDoc.CreateElement(voice);voiceElem.SetAttribute(name, voiceName);// 设置 mstts:viseme 元素XmlElement visemeElem xmlDoc.CreateElement(mstts, viseme, http://www.w3.org/2001/mstts);visemeElem.SetAttribute(type, FacialExpression);// 设置语气元素XmlElement styleElem xmlDoc.CreateElement(mstts, express-as, http://www.w3.org/2001/mstts);styleElem.SetAttribute(style, style.ToString().Replace(_, -));// 创建文本节点包含文本信息XmlNode textNode xmlDoc.CreateTextNode(msg);// 设置好的元素添加到 xml 中voiceElem.AppendChild(visemeElem);styleElem.AppendChild(textNode);voiceElem.AppendChild(styleElem);speakElem.AppendChild(voiceElem);xmlDoc.AppendChild(speakElem);Debug.Log([SSML XML] Result : xmlDoc.OuterXml);return xmlDoc.OuterXml;}}2、AzureTTSMono using Microsoft.CognitiveServices.Speech; using System; using System.Collections.Concurrent; using System.IO; using UnityEngine;[RequireComponent(typeof(AudioSource))] public class AzureTTSMono : MonoBehaviour {private AzureTTSDataWithSSMLHandler m_AzureTTSDataWithSSMLHandler;/// summary/// 音源和音频参数/// /summaryprivate AudioSource m_AudioSource;private AudioClip m_AudioClip;/// summary/// 音频流数据/// /summaryprivate ConcurrentQueuefloat[] m_AudioDataQueue new ConcurrentQueuefloat[]();private AudioDataStream m_AudioDataStream;/// summary/// 音频播放完的事件/// /summaryprivate Action m_AudioEndAction;/// summary/// 音频播放结束的布尔变量/// /summaryprivate bool m_NeedPlay false;private bool m_StreamReadEnd false;private const int m_SampleRate 16000;//最大支持60s音频 private const int m_BufferSize m_SampleRate * 60;//采样容量private const int m_UpdateSize m_SampleRate;//audioclip 设置过的数据个数private int m_TotalCount 0;private int m_DataIndex 0;#region Lifecycle functionprivate void Awake(){m_AudioSource GetComponentAudioSource();m_AzureTTSDataWithSSMLHandler new AzureTTSDataWithSSMLHandler();m_AzureTTSDataWithSSMLHandler.SetStartTTSPlayAction(() { Debug.Log( Play TTS ); });m_AzureTTSDataWithSSMLHandler.SetStartTTSStopAction(() { Debug.Log( Stop TTS ); AudioPlayEndEvent(); });m_AudioEndAction () { Debug.Log( End TTS ); };m_AzureTTSDataWithSSMLHandler.Initialized();}// Start is called before the first frame updatevoid Start(){m_AzureTTSDataWithSSMLHandler.Start(今朝有酒今朝醉人生几年百花春, OnGetAudioStream);}// Update is called once per frameprivate void Update(){UpdateAudio();}#endregion#region Audio handler/// summary/// 设置播放TTS的结束的结束事件/// /summary/// param nameact/parampublic void SetAudioEndAction(Action act){this.m_AudioEndAction act;}/// summary/// 处理获取到的TTS流式数据/// /summary/// param namestream流数据/parampublic async void OnGetAudioStream(AudioDataStream stream){m_StreamReadEnd false;m_NeedPlay true;m_AudioDataStream stream;Debug.Log([AzureTTSMono] OnGetAudioStream);MemoryStream memStream new MemoryStream();byte[] buffer new byte[m_UpdateSize * 2];uint bytesRead;m_DataIndex 0;m_TotalCount 0;m_AudioDataQueue.Clear();// 回到主线程进行数据处理Loom.QueueOnMainThread(() {m_AudioSource.Stop();m_AudioSource.clip null;m_AudioClip AudioClip.Create(SynthesizedAudio, m_BufferSize, 1, m_SampleRate, false);m_AudioSource.clip m_AudioClip;});do{bytesRead await System.Threading.Tasks.Task.Run(() m_AudioDataStream.ReadData(buffer));if (bytesRead 0){break;}// 读取写入数据memStream.Write(buffer, 0, (int)bytesRead);{var tempData memStream.ToArray();var audioData new float[memStream.Length / 2];for (int i 0; i audioData.Length; i){audioData[i] (short)(tempData[i * 2 1] 8 | tempData[i * 2]) / 32768.0F;}try{m_TotalCount audioData.Length;// 把数据添加到队列中m_AudioDataQueue.Enqueue(audioData);// new 获取新的地址为后面写入数据memStream new MemoryStream();}catch (Exception e){Debug.LogError(e.ToString());}}} while (bytesRead 0);m_StreamReadEnd true;}/// summary/// Update 播放音频/// /summaryprivate void UpdateAudio() {if (!m_NeedPlay) return;//数据操作if (m_AudioDataQueue.TryDequeue(out float[] audioData)){m_AudioClip.SetData(audioData, m_DataIndex);m_DataIndex (m_DataIndex audioData.Length) % m_BufferSize;}//检测是否停止if (m_StreamReadEnd m_AudioSource.timeSamples m_TotalCount){AudioPlayEndEvent();}if (!m_NeedPlay) return;//由于网络可能额有些数据还没有过来所以根据需要判断是否暂停播放if (m_AudioSource.timeSamples m_DataIndex m_AudioSource.isPlaying){m_AudioSource.timeSamples m_DataIndex;//暂停Debug.Log([AzureTTSMono] Pause);m_AudioSource.Pause();}//由于网络可能有些数据过来比较晚所以这里根据需要判断是否继续播放if (m_AudioSource.timeSamples m_DataIndex !m_AudioSource.isPlaying){//播放Debug.Log([AzureTTSMono] Play);m_AudioSource.Play();}}/// summary/// TTS 播放结束的事件/// /summaryprivate void AudioPlayEndEvent(){Debug.Log([AzureTTSMono] End);m_NeedPlay false;m_AudioSource.timeSamples 0;m_AudioSource.Stop();m_AudioEndAction?.Invoke();}#endregion }

查看全文

http://www.zqtcl.cn/news/471602/