一、概述

本方案采用​​Unity引擎配合​​BestHTTP​​插件,通过WebSocket协议对接科大讯飞语音评测流式版API(ISE),实现跨平台语音评测功能。


二、环境准备

1. 开发依赖

  • Unity 2019.3.6f1或更高版本(本文基于Unity 2022.3 LTS(2022.3.42f1c1)编写)

  • BestHTTP插件(Project Setting -> Player -> Other Settings -> Allow downloads over HTTP 设置为Always allowed)

  • 科大讯飞开发者账号(申请地址:https://www.xfyun.cn)


三、核心实现解析

1. 申请账号(https://www.xfyun.cn/

        (1)注册账号

        (2)进入控制台

        (3)创建应用并提交

        (4)找到语音评测,记录服务接口认证信息


2.语音评测逻辑

(1)参数

//signature
        private string signature_origin = null;
        private string signature_sha = null;
        private string signature = null;

        //author
        private string authorization_origin = null;
        private string authorization = null;

        //url
        private string url = "wss://ise-api.xfyun.cn/v2/open-ise";

        //WebSocket实例
        public WebSocket webSocket;

        //Websocket服务接口认证信息
        public string APPID = "91b4059e";
        public string APISecret = "MGNjOWY4YzgyNjAxNGFkNWIyY2IxMjUw";
        public string APIKey = "13f5b50899e2adc74fb939502616bc02";

        //接收
        public UnityEvent<Dictionary<string, string>> messageSend = new();

(2)编码

#region 编码

        /// <summary>
        ///空格替换为 %20
        /// </summary>
        /// <param name="input"></param>
        /// <returns></returns>
        private static string ReplaceSpacesWithPercent20(string input)
        {
            return string.IsNullOrEmpty(input) ? input : input.Replace(" ", "%20");
        }

        /// <summary>
        /// HmacSHA256  
        /// </summary>
        /// <param name="secret">A</param>
        /// <param name="signKey">B</param>
        /// <returns></returns>
        private static string HmacSHA256(string secret, string signKey)
        {
            var signRet = string.Empty;
            using HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(signKey));
            var hash = mac.ComputeHash(Encoding.UTF8.GetBytes(secret));
            signRet = Convert.ToBase64String(hash);
            return signRet;
        }

        /// <summary>
        /// Base64编码
        /// </summary>
        /// <remarks></remarks>
        /// <param name="value"></param>
        /// <returns></returns>
        private static string ToBase64String(string value)
        {
            if (string.IsNullOrEmpty(value))
            {
                return "";
            }

            var bytes = Encoding.UTF8.GetBytes(value);
            return Convert.ToBase64String(bytes);
        }

        #endregion

(3)鉴权

#region 鉴权/URL

        /// <summary>
        /// 获取URL
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private string GetUrl(string url)
        {
            var uri = new Uri(url);
            var
                date = DateTime.UtcNow.ToString("r"); //官方文档要求时间必须是UTC+0或GMT时区,RFC1123格式(Thu, 01 Aug 2019 01:53:21 GMT)。
            ComposeAuthUrl(uri, date);
            var uriStr = string.Format("{0}?authorization={1}&date={2}&host={3}", uri, authorization,
                ReplaceSpacesWithPercent20(date), uri.Host); //生成最终鉴权


            Debug.Log(uriStr);
            Debug.Log(date);
            return uriStr;
        }

        /// <summary>
        /// 组装生成鉴权
        /// </summary>
        private void ComposeAuthUrl(Uri uri, string date)
        {
            signature_origin =
                string.Format("host: " + uri.Host + "\ndate: " + date + "\nGET " + uri.AbsolutePath + " HTTP/1.1");
            //Debug.Log("signature_origin: " + signature_origin);
            signature_sha = HmacSHA256(signature_origin, APISecret); //使用hmac - sha256算法结合apiSecret对signature_origin签名
            //Debug.Log("signature_sha: " + signature_sha);
            signature = signature_sha;
            //Debug.Log("signature: " + signature);
            string auth = "api_key=\"{0}\", algorithm=\"{1}\", headers=\"{2}\", signature=\"{3}\"";
            authorization_origin = string.Format(auth, APIKey, "hmac-sha256", "host date request-line", signature);
            //Debug.Log("authorization_origin: " + authorization_origin);
            authorization = ToBase64String(authorization_origin);
            //Debug.Log("authorization: " + authorization);
        }

        #endregion

(4)WebSocket(BestHTTP)

#region Web

        /// <summary>
        /// 开启连接
        /// </summary>
        public void Content()
        {
            webSocket = new WebSocket(new Uri(GetUrl(url)));

            webSocket.OnOpen += OnOpen;
            webSocket.OnBinary += OnBinaryReceived;
            webSocket.OnMessage += OnMessage;
            webSocket.OnError += OnError;
            webSocket.OnClosed += OnClosed;

            webSocket.Open();

            Debug.Log("讯飞WebSocket打开成功!");
        }

        private void OnOpen(WebSocket ws)
        {
            Debug.Log("WebSocket 连接成功!");
        }

        private void OnBinaryReceived(WebSocket ws, byte[] data)
        {
        }

        private void OnMessage(WebSocket ws, string message)
        {
            try
            {
                JsonData js = JsonMapper.ToObject(message);

                // 检查响应的状态
                if (js["code"].ToString() != "0")
                {
                    Debug.LogError("联网错误:" + js["message"].ToString());
                    return;
                }

                // 检查返回结果
                if (js["message"].ToString() == "success" && js["data"] != null)
                {
                    // 获取数据状态
                    var status = Convert.ToString(js["data"]["status"]);
                    if (status == "2" && js["data"]["data"] != null)
                    {
                        // 解码 Base64 数据
                        byte[] decodedBytes = Convert.FromBase64String(js["data"]["data"].ToString());
                        string xmlString = System.Text.Encoding.UTF8.GetString(decodedBytes);

                        ProcessXml(xmlString);
                        webSocket.Close();
                    }
                    else
                    {
                        Debug.LogWarning("返回的数据状态不符合预期,status: " + status);
                    }
                }
                else
                {
                    Debug.LogError("错误:未能获取到有效的数据");
                    
                    webSocket.Close();
                }
            }
            catch (Exception ex)
            {
                Debug.LogError("处理消息时发生错误: " + ex.Message);
                
                webSocket.Close();
            }
        }

        private void ProcessXml(string xmlString)
        {
            try
            {
                // 加载 XML 字符串
                XmlDocument xmlDoc = new XmlDocument();
                xmlDoc.LoadXml(xmlString);

                // 获取所有 <read_chapter> 标签的节点
                XmlNodeList nodes = xmlDoc.GetElementsByTagName("read_chapter");

                if (nodes.Count == 0)
                {
                    Debug.Log("没有找到 'read_chapter' 标签节点。");
                    return;
                }

                var tempMessage = new Dictionary<string, string>();

                foreach (XmlNode node in nodes)
                {
                    if (node == null) continue;
                    // 处理节点内容
                    if (!string.IsNullOrEmpty(node.InnerText))
                    {
                        Debug.Log("Node InnerText: " + node.InnerText);
                    }

                    // 处理节点属性
                    if (node.Attributes != null)
                    {
                        foreach (XmlAttribute attribute in node.Attributes)
                        {
                            if (attribute != null)
                            {
                                Debug.Log("Attribute: " + attribute.Name + " = " + attribute.Value);
                                tempMessage[attribute.Name] = attribute.Value;
                            }
                        }
                    }

                    // 处理子节点
                    foreach (XmlNode childNode in node.ChildNodes)
                    {
                        if (childNode != null && !string.IsNullOrEmpty(childNode.InnerText))
                        {
                            Debug.Log("Child Node: " + childNode.Name + " = " + childNode.InnerText);
                        }
                    }
                }

                // 调用委托返回处理后的信息
                messageSend.Invoke(tempMessage);
            }
            catch (Exception ex)
            {
                Debug.LogError("处理 XML 数据时发生错误: " + ex.Message);
            }
        }

        private void OnError(WebSocket ws, string ex)
        {
            var sf = $"编码{ws.InternalRequest.Response.StatusCode},详细信息{ws.InternalRequest.Response.Message}";
            Debug.Log("错误编码: " + sf);
        }

        private void OnClosed(WebSocket ws, ushort code, string message)
        {
            Debug.Log("WebSocket 连接关闭,关闭代码: " + code + ", 原因: " + message);
        }

        #endregion

(5)分次发送

#region 发送

        /// <summary>
        /// 发送
        /// </summary>
        /// <param name="text"></param>
        /// <param name="audioData"></param>
        /// <returns></returns>
        private IEnumerator XinghuoISE(string text, byte[] audioData)
        {
            yield return new WaitUntil(()=>webSocket.IsOpen);
            
            // 剩余要发送的字节数
            var remainLength = audioData.Length;
            //第一次发送
            var jsonData = new JsonData();
            jsonData = CreateJson_1(text);
            webSocket.Send(JsonMapper.ToJson(jsonData));
            
            // 分段发送消息,直到所有数据都发送完成
            while (remainLength > 0)         
            {
                // 当前要发送的字节数组
                byte[] currBytes;
                
                var status = -1;
                var aus = -1;
                //非结尾帧
                if (remainLength > 1280) 
                {
                    //是否为第一帧
                    status = remainLength == audioData.Length ? 0 : 1;
                    aus = remainLength == audioData.Length ? 1 : 2;
                    
                    currBytes = new byte[1280];
                    Array.Copy(audioData, audioData.Length - remainLength, currBytes, 0, 1280);
                    
                    remainLength -= 1280;
                }
                //结尾帧
                else
                {
                    status = 2;
                    aus = 4;
                    
                    currBytes = new byte[remainLength];
                    Array.Copy(audioData, audioData.Length - remainLength, currBytes, 0, remainLength);

                    remainLength = 0;
                }
      
                //发送
                jsonData = CreateJson_2(aus,status,currBytes);
                webSocket.Send(JsonMapper.ToJson(jsonData));
        
                yield return null;      
            }         
            
            Debug.Log("数据发送完成");     
            yield break;
        }

        /// <summary>
        /// 第一次发送
        /// </summary>
        /// <returns></returns>
        private JsonData CreateJson_1(string text)
        {
            JsonData requestObj = new JsonData();

            // common 部分
            JsonData commonJson = new JsonData
            {
                ["app_id"] = APPID
            };
            requestObj["common"] = commonJson;

            JsonData businessJson = new JsonData()
            {
                ["sub"] = "ise",
                ["ent"] = "en_vip",
                ["category"] = "read_chapter",
                ["cmd"] = "ssb",
                ["text"] = '\uFEFF' + "[content]" + text,
                ["tte"] = "utf-8",
                ["ttp_skip"] = true,
                ["aue"] = "raw"
            };
            requestObj["business"] = businessJson;

            JsonData dataJson = new JsonData()
            {
                ["status"] = 0
            };
            requestObj["data"] = dataJson;

            return requestObj;
        }

        /// <summary>
        /// 第二次发送(音频发送阶段)
        /// </summary>
        /// <param name="aus"></param>
        /// <param name="status"></param>
        /// <param name="value"></param>
        /// <returns></returns>
        private JsonData CreateJson_2(int aus, int status,byte[] value)
        {
            JsonData requestObj = new JsonData();
            JsonData businessJson = new JsonData()
            {
                ["cmd"] = "auw",
                ["aus"] = aus,
            };
            requestObj["business"] = businessJson;
        
            JsonData dataJson = new JsonData()
            {
                ["status"]=status,
                ["data"] = Convert.ToBase64String(value)
            };
            requestObj["data"] = dataJson;

            return requestObj;
        }

        #endregion

(6)音频处理(AudioClip -> byte[])

#region 音频处理

        /// <summary>
        /// 处理音频
        /// </summary>
        /// <param name="audio">音频</param>
        /// <returns></returns>
        private static byte[] DealAudio(AudioClip audio)
        {
            var recordedAudioClip = audio;
            
            //剔除沉默音域
            recordedAudioClip = TrimSilence(recordedAudioClip, 0.01f);
            //bytes
            var bytes = AudioClipToBytes(recordedAudioClip);
            
            return bytes;
        }
        
        /// <summary>
        /// 将AudioClip转换成byte[]数据
        /// </summary>
        /// <param name="audioClip">Unity中的音频数据</param>
        /// <returns>byte[]数据</returns>
        private static byte[] AudioClipToBytes(AudioClip audioClip)
        {
            var data = new float[audioClip.samples];
            audioClip.GetData(data, 0);
            var rescaleFactor = 32767; //to convert float to Int16
            var outData = new byte[data.Length * 2];
            for (var i = 0; i < data.Length; i++)
            {
                var temshort = (short)(data[i] * rescaleFactor);
                var temdata = BitConverter.GetBytes(temshort);
                outData[i * 2] = temdata[0];
                outData[i * 2 + 1] = temdata[1];
            }

            return outData;
        }

        /// <summary>
        /// 剔除沉默音域
        /// </summary>
        /// <param name="clip"></param>
        /// <param name="min"></param>
        /// <returns></returns>
        private static AudioClip TrimSilence(AudioClip clip, float min)
        {
            var samples = new float[clip.samples];
            clip.GetData(samples, 0);

            return TrimSilence(new List<float>(samples), min, clip.channels, clip.frequency);
        }

        private static AudioClip TrimSilence(List<float> samples, float min, int channels, int hz, bool _3D = false)
        {
            var origSamples = samples.Count;

            int i;

            for (i = 0; i < samples.Count; i++)
            {
                if (Mathf.Abs(samples[i]) > min)
                {
                    break;
                }
            }

            i -= (int)(hz * .1f);
            i = Mathf.Max(i, 0);

            // Remove start silence
            samples.RemoveRange(0, i);

            for (i = samples.Count - 1; i > 0; i--)
            {
                if (Mathf.Abs(samples[i]) > min)
                {
                    break;
                }
            }

            // Add some tail onto it
            i += (int)(hz * .1f);
            i = Mathf.Min(i, samples.Count - 1);
            samples.RemoveRange(i, samples.Count - i);


            if (samples.Count == 0)
            {
                Debug.Log("剔除后的AudioClip长度为0");
                return null;
            }

            var clip = AudioClip.Create("TempClip", samples.Count, channels, hz, _3D);
            clip.SetData(samples.ToArray(), 0);

            return clip;
        }

        #endregion

(7)使用

/// <summary>
        /// 使用ISE
        /// </summary>
        /// <param name="text"></param>
        /// <param name="audio"></param>
        public void XingHuo_ISE_SA(string text, AudioClip audio)
        {
            //打开
            Content();
            //转二进制
            var _bytes = DealAudio(audio);
            //发送
            StartCoroutine(XinghuoISE(text, _bytes));
        }

(8)XingHuo_ISE 完整脚本

using System;
using System.Collections;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;
using UnityEngine;
using System.Xml;
using WebSocket = BestHTTP.WebSocket.WebSocket;
using LitJson;
using UnityEngine.Events;

namespace ISE
{
    public class XingHuo_ISE : MonoBehaviour
    {
        //signature
        private string signature_origin = null;
        private string signature_sha = null;
        private string signature = null;

        //author
        private string authorization_origin = null;
        private string authorization = null;

        //url
        private string url = "wss://ise-api.xfyun.cn/v2/open-ise";

        //WebSocket实例
        public WebSocket webSocket;

        //Websocket服务接口认证信息
        public string APPID = "91b4059e";
        public string APISecret = "MGNjOWY4YzgyNjAxNGFkNWIyY2IxMjUw";
        public string APIKey = "13f5b50899e2adc74fb939502616bc02";

        //接收
        public UnityEvent<Dictionary<string, string>> messageSend = new();

        /// <summary>
        /// 使用ISE
        /// </summary>
        /// <param name="text"></param>
        /// <param name="audio"></param>
        public void XingHuo_ISE_SA(string text, AudioClip audio)
        {
            //打开
            Content();
            //转二进制
            var _bytes = DealAudio(audio);
            //发送
            StartCoroutine(XinghuoISE(text, _bytes));
        }

        #region 发送

        /// <summary>
        /// 发送
        /// </summary>
        /// <param name="text"></param>
        /// <param name="audioData"></param>
        /// <returns></returns>
        private IEnumerator XinghuoISE(string text, byte[] audioData)
        {
            yield return new WaitUntil(()=>webSocket.IsOpen);
            
            // 剩余要发送的字节数
            var remainLength = audioData.Length;
            //第一次发送
            var jsonData = new JsonData();
            jsonData = CreateJson_1(text);
            webSocket.Send(JsonMapper.ToJson(jsonData));
            
            // 分段发送消息,直到所有数据都发送完成
            while (remainLength > 0)         
            {
                // 当前要发送的字节数组
                byte[] currBytes;
                
                var status = -1;
                var aus = -1;
                //非结尾帧
                if (remainLength > 1280) 
                {
                    //是否为第一帧
                    status = remainLength == audioData.Length ? 0 : 1;
                    aus = remainLength == audioData.Length ? 1 : 2;
                    
                    currBytes = new byte[1280];
                    Array.Copy(audioData, audioData.Length - remainLength, currBytes, 0, 1280);
                    
                    remainLength -= 1280;
                }
                //结尾帧
                else
                {
                    status = 2;
                    aus = 4;
                    
                    currBytes = new byte[remainLength];
                    Array.Copy(audioData, audioData.Length - remainLength, currBytes, 0, remainLength);

                    remainLength = 0;
                }
      
                //发送
                jsonData = CreateJson_2(aus,status,currBytes);
                webSocket.Send(JsonMapper.ToJson(jsonData));
        
                yield return null;      
            }         
            
            Debug.Log("数据发送完成");     
            yield break;
        }

        /// <summary>
        /// 第一次发送
        /// </summary>
        /// <returns></returns>
        private JsonData CreateJson_1(string text)
        {
            JsonData requestObj = new JsonData();

            // common 部分
            JsonData commonJson = new JsonData
            {
                ["app_id"] = APPID
            };
            requestObj["common"] = commonJson;

            JsonData businessJson = new JsonData()
            {
                ["sub"] = "ise",
                ["ent"] = "en_vip",
                ["category"] = "read_chapter",
                ["cmd"] = "ssb",
                ["text"] = '\uFEFF' + "[content]" + text,
                ["tte"] = "utf-8",
                ["ttp_skip"] = true,
                ["aue"] = "raw"
            };
            requestObj["business"] = businessJson;

            JsonData dataJson = new JsonData()
            {
                ["status"] = 0
            };
            requestObj["data"] = dataJson;

            return requestObj;
        }

        /// <summary>
        /// 第二次发送(音频发送阶段)
        /// </summary>
        /// <param name="aus"></param>
        /// <param name="status"></param>
        /// <param name="value"></param>
        /// <returns></returns>
        private JsonData CreateJson_2(int aus, int status,byte[] value)
        {
            JsonData requestObj = new JsonData();
            JsonData businessJson = new JsonData()
            {
                ["cmd"] = "auw",
                ["aus"] = aus,
            };
            requestObj["business"] = businessJson;
        
            JsonData dataJson = new JsonData()
            {
                ["status"]=status,
                ["data"] = Convert.ToBase64String(value)
            };
            requestObj["data"] = dataJson;

            return requestObj;
        }

        #endregion

        #region 音频处理

        /// <summary>
        /// 处理音频
        /// </summary>
        /// <param name="audio">音频</param>
        /// <returns></returns>
        private static byte[] DealAudio(AudioClip audio)
        {
            var recordedAudioClip = audio;
            
            //剔除沉默音域
            recordedAudioClip = TrimSilence(recordedAudioClip, 0.01f);
            //bytes
            var bytes = AudioClipToBytes(recordedAudioClip);
            
            return bytes;
        }
        
        /// <summary>
        /// 将AudioClip转换成byte[]数据
        /// </summary>
        /// <param name="audioClip">Unity中的音频数据</param>
        /// <returns>byte[]数据</returns>
        private static byte[] AudioClipToBytes(AudioClip audioClip)
        {
            var data = new float[audioClip.samples];
            audioClip.GetData(data, 0);
            var rescaleFactor = 32767; //to convert float to Int16
            var outData = new byte[data.Length * 2];
            for (var i = 0; i < data.Length; i++)
            {
                var temshort = (short)(data[i] * rescaleFactor);
                var temdata = BitConverter.GetBytes(temshort);
                outData[i * 2] = temdata[0];
                outData[i * 2 + 1] = temdata[1];
            }

            return outData;
        }

        /// <summary>
        /// 剔除沉默音域
        /// </summary>
        /// <param name="clip"></param>
        /// <param name="min"></param>
        /// <returns></returns>
        private static AudioClip TrimSilence(AudioClip clip, float min)
        {
            var samples = new float[clip.samples];
            clip.GetData(samples, 0);

            return TrimSilence(new List<float>(samples), min, clip.channels, clip.frequency);
        }

        private static AudioClip TrimSilence(List<float> samples, float min, int channels, int hz, bool _3D = false)
        {
            var origSamples = samples.Count;

            int i;

            for (i = 0; i < samples.Count; i++)
            {
                if (Mathf.Abs(samples[i]) > min)
                {
                    break;
                }
            }

            i -= (int)(hz * .1f);
            i = Mathf.Max(i, 0);

            // Remove start silence
            samples.RemoveRange(0, i);

            for (i = samples.Count - 1; i > 0; i--)
            {
                if (Mathf.Abs(samples[i]) > min)
                {
                    break;
                }
            }

            // Add some tail onto it
            i += (int)(hz * .1f);
            i = Mathf.Min(i, samples.Count - 1);
            samples.RemoveRange(i, samples.Count - i);


            if (samples.Count == 0)
            {
                Debug.Log("剔除后的AudioClip长度为0");
                return null;
            }

            var clip = AudioClip.Create("TempClip", samples.Count, channels, hz, _3D);
            clip.SetData(samples.ToArray(), 0);

            return clip;
        }

        #endregion

        #region Web

        /// <summary>
        /// 开启连接
        /// </summary>
        public void Content()
        {
            webSocket = new WebSocket(new Uri(GetUrl(url)));

            webSocket.OnOpen += OnOpen;
            webSocket.OnBinary += OnBinaryReceived;
            webSocket.OnMessage += OnMessage;
            webSocket.OnError += OnError;
            webSocket.OnClosed += OnClosed;

            webSocket.Open();

            Debug.Log("讯飞WebSocket打开成功!");
        }

        private void OnOpen(WebSocket ws)
        {
            Debug.Log("WebSocket 连接成功!");
        }

        private void OnBinaryReceived(WebSocket ws, byte[] data)
        {
        }

        private void OnMessage(WebSocket ws, string message)
        {
            try
            {
                JsonData js = JsonMapper.ToObject(message);

                // 检查响应的状态
                if (js["code"].ToString() != "0")
                {
                    Debug.LogError("联网错误:" + js["message"].ToString());
                    return;
                }

                // 检查返回结果
                if (js["message"].ToString() == "success" && js["data"] != null)
                {
                    // 获取数据状态
                    var status = Convert.ToString(js["data"]["status"]);
                    if (status == "2" && js["data"]["data"] != null)
                    {
                        // 解码 Base64 数据
                        byte[] decodedBytes = Convert.FromBase64String(js["data"]["data"].ToString());
                        string xmlString = System.Text.Encoding.UTF8.GetString(decodedBytes);

                        ProcessXml(xmlString);
                        webSocket.Close();
                    }
                    else
                    {
                        Debug.LogWarning("返回的数据状态不符合预期,status: " + status);
                    }
                }
                else
                {
                    Debug.LogError("错误:未能获取到有效的数据");
                    
                    webSocket.Close();
                }
            }
            catch (Exception ex)
            {
                Debug.LogError("处理消息时发生错误: " + ex.Message);
                
                webSocket.Close();
            }
        }

        private void ProcessXml(string xmlString)
        {
            try
            {
                // 加载 XML 字符串
                XmlDocument xmlDoc = new XmlDocument();
                xmlDoc.LoadXml(xmlString);

                // 获取所有 <read_chapter> 标签的节点
                XmlNodeList nodes = xmlDoc.GetElementsByTagName("read_chapter");

                if (nodes.Count == 0)
                {
                    Debug.Log("没有找到 'read_chapter' 标签节点。");
                    return;
                }

                var tempMessage = new Dictionary<string, string>();

                foreach (XmlNode node in nodes)
                {
                    if (node == null) continue;
                    // 处理节点内容
                    if (!string.IsNullOrEmpty(node.InnerText))
                    {
                        Debug.Log("Node InnerText: " + node.InnerText);
                    }

                    // 处理节点属性
                    if (node.Attributes != null)
                    {
                        foreach (XmlAttribute attribute in node.Attributes)
                        {
                            if (attribute != null)
                            {
                                Debug.Log("Attribute: " + attribute.Name + " = " + attribute.Value);
                                tempMessage[attribute.Name] = attribute.Value;
                            }
                        }
                    }

                    // 处理子节点
                    foreach (XmlNode childNode in node.ChildNodes)
                    {
                        if (childNode != null && !string.IsNullOrEmpty(childNode.InnerText))
                        {
                            Debug.Log("Child Node: " + childNode.Name + " = " + childNode.InnerText);
                        }
                    }
                }

                // 调用委托返回处理后的信息
                messageSend.Invoke(tempMessage);
            }
            catch (Exception ex)
            {
                Debug.LogError("处理 XML 数据时发生错误: " + ex.Message);
            }
        }

        private void OnError(WebSocket ws, string ex)
        {
            var sf = $"编码{ws.InternalRequest.Response.StatusCode},详细信息{ws.InternalRequest.Response.Message}";
            Debug.Log("错误编码: " + sf);
        }

        private void OnClosed(WebSocket ws, ushort code, string message)
        {
            Debug.Log("WebSocket 连接关闭,关闭代码: " + code + ", 原因: " + message);
        }

        #endregion

        #region 鉴权/URL

        /// <summary>
        /// 获取URL
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private string GetUrl(string url)
        {
            var uri = new Uri(url);
            var
                date = DateTime.UtcNow.ToString("r"); //官方文档要求时间必须是UTC+0或GMT时区,RFC1123格式(Thu, 01 Aug 2019 01:53:21 GMT)。
            ComposeAuthUrl(uri, date);
            var uriStr = string.Format("{0}?authorization={1}&date={2}&host={3}", uri, authorization,
                ReplaceSpacesWithPercent20(date), uri.Host); //生成最终鉴权


            Debug.Log(uriStr);
            Debug.Log(date);
            return uriStr;
        }

        /// <summary>
        /// 组装生成鉴权
        /// </summary>
        private void ComposeAuthUrl(Uri uri, string date)
        {
            signature_origin =
                string.Format("host: " + uri.Host + "\ndate: " + date + "\nGET " + uri.AbsolutePath + " HTTP/1.1");
            //Debug.Log("signature_origin: " + signature_origin);
            signature_sha = HmacSHA256(signature_origin, APISecret); //使用hmac - sha256算法结合apiSecret对signature_origin签名
            //Debug.Log("signature_sha: " + signature_sha);
            signature = signature_sha;
            //Debug.Log("signature: " + signature);
            string auth = "api_key=\"{0}\", algorithm=\"{1}\", headers=\"{2}\", signature=\"{3}\"";
            authorization_origin = string.Format(auth, APIKey, "hmac-sha256", "host date request-line", signature);
            //Debug.Log("authorization_origin: " + authorization_origin);
            authorization = ToBase64String(authorization_origin);
            //Debug.Log("authorization: " + authorization);
        }

        #endregion

        #region 编码

        /// <summary>
        ///空格替换为 %20
        /// </summary>
        /// <param name="input"></param>
        /// <returns></returns>
        private static string ReplaceSpacesWithPercent20(string input)
        {
            return string.IsNullOrEmpty(input) ? input : input.Replace(" ", "%20");
        }

        /// <summary>
        /// HmacSHA256  
        /// </summary>
        /// <param name="secret">A</param>
        /// <param name="signKey">B</param>
        /// <returns></returns>
        private static string HmacSHA256(string secret, string signKey)
        {
            var signRet = string.Empty;
            using HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(signKey));
            var hash = mac.ComputeHash(Encoding.UTF8.GetBytes(secret));
            signRet = Convert.ToBase64String(hash);
            return signRet;
        }

        /// <summary>
        /// Base64编码
        /// </summary>
        /// <remarks></remarks>
        /// <param name="value"></param>
        /// <returns></returns>
        private static string ToBase64String(string value)
        {
            if (string.IsNullOrEmpty(value))
            {
                return "";
            }

            var bytes = Encoding.UTF8.GetBytes(value);
            return Convert.ToBase64String(bytes);
        }

        #endregion
    }
}

(9)代码详解

该脚本主要封装了科大讯飞在线口语评测的完整接入流程:

XingHuo_ISE_SA 方法接收文本与 AudioClip,并将音频交由 DealAudio 处理;

DealAudio 依次调用 TrimSilence(及其重载)剔除前后静默段,再通过 AudioClipToBytes 将浮点 PCM 转为 16 位小端字节;

XinghuoISE 协程先用 CreateJson_1 构建首帧 JSON(包含 commonbusinessdata.status=0),通过 WebSocket 发送文本及评测参数,然后循环分 1280 字节调用 CreateJson_2 拼装音频分片 JSON(statusaus 表示帧类型)并依次推送;

Content 方法负责调用 GetUrl(内部用 ComposeAuthUrlReplaceSpacesWithPercent20HmacSHA256ToBase64String 构造带签名的鉴权 URL)打开 BestHTTP 的 WebSocket,并注册 OnOpenOnBinaryReceivedOnMessageOnErrorOnClosed 回调;

其中 OnMessage 检查 codestatus、解 Base64 得到 XML 字符串并交给 ProcessXml 解析 <read_chapter> 节点属性和子节点内容;

最终通过 UnityEvent<Dictionary<string,string>> messageSend 将评测结果发回给上层。


3.外部调用逻辑示例

(1)参数

//讯飞ISE
    public XingHuo_ISE XingHuoIse;
    //评分显示
    public Text textAsset_1;
    public Text textAsset_2;
    public Text textAsset_3;
    public Text textAsset_4;
    public Text textAsset_5;
    //麦克风选择
    public Dropdown microphoneDropdown;
    //选择的麦克风设备
    private string defaultMicrophone;
    //待评测文本
    public InputField inputField;
    //启动录音
    public Button Button_AMic;
    //停止录音
    public Button Button_SMic;
    //启动ISE
    public Button Button_ISE;
    //缓存录音音频
    public AudioClip audioClip_1;

(2)通过下拉菜单选择麦克风(UI-Dropdown)

/// <summary>
    /// 初始化下拉菜单
    /// </summary>
    public void UpdateMicrophoneDropdown()
    {
        microphoneDropdown.ClearOptions();
        string[] devices = Microphone.devices;
        
        List<string> options = new List<string>();
        foreach (string device in devices)
        {
            options.Add(device);
        }
        
        microphoneDropdown.AddOptions(options);
        
        if (devices.Length > 0)
        {
            microphoneDropdown.value = 0;
            defaultMicrophone = devices[0]; // 初始默认值
        }
        else
        {
            defaultMicrophone = "No Microphone Found";
        }
        
        microphoneDropdown.RefreshShownValue();
    }

    /// <summary>
    /// 当下拉菜单选项变化时调用
    /// </summary>
    /// <param name="selectedIndex"></param>
    private void OnMicrophoneDropdownChanged(int selectedIndex)
    {
        string[] devices = Microphone.devices;
        if (devices.Length > selectedIndex)
        {
            defaultMicrophone = devices[selectedIndex]; // 更新当前选中的麦克风
            Debug.Log("麦克风已切换至: " + defaultMicrophone);
        }
    }

(3)按钮监听

/// <summary>
    /// 开始录音
    /// </summary>
    public void OnClick_UseMic()
    {
        audioClip_1 = Microphone.Start(defaultMicrophone, true, 40, 16000);
    }

    /// <summary>
    /// 停止录音
    /// </summary>
    public void OnClick_StopMic()
    {
        Microphone.End(defaultMicrophone);
    }

    /// <summary>
    /// 开始评分
    /// </summary>
    public void OnClick_UseISE()
    {
        XingHuoIse.XingHuo_ISE_SA(inputField.text, audioClip_1);
    }

(4)返回评分监听

/// <summary>
    /// 评分回调
    /// </summary>
    /// <param name="parameters"></param>
    public void InterpretationParameters(Dictionary<string,string> parameters)
    {
        textAsset_1.text ="准确度:"+ parameters["accuracy_score"];
        textAsset_2.text ="标准度:"+ parameters["standard_score"];
        textAsset_3.text ="流利度:"+ parameters["fluency_score"];
        textAsset_4.text ="完整度:"+ parameters["integrity_score"];
        textAsset_5.text ="总分:  "+ parameters["total_score"];
    }

(5)下拉菜单初始化

private void Start()
    {
        // 初始化下拉菜单
        UpdateMicrophoneDropdown();

        // 监听下拉菜单的变化
        microphoneDropdown.onValueChanged.AddListener(OnMicrophoneDropdownChanged);
    }

(6)ISE_Test 完整脚本

using System;
using System.Collections;
using System.Collections.Generic;
using ISE;
using UnityEngine;
using UnityEngine.UI;

public class ISE_Test : MonoBehaviour
{
    //讯飞ISE
    public XingHuo_ISE XingHuoIse;
    //评分显示
    public Text textAsset_1;
    public Text textAsset_2;
    public Text textAsset_3;
    public Text textAsset_4;
    public Text textAsset_5;
    //麦克风选择
    public Dropdown microphoneDropdown;
    //选择的麦克风设备
    private string defaultMicrophone;
    //待评测文本
    public InputField inputField;
    //启动录音
    public Button Button_AMic;
    //停止录音
    public Button Button_SMic;
    //启动ISE
    public Button Button_ISE;
    //缓存录音音频
    public AudioClip audioClip_1;

    private void Start()
    {
        // 初始化下拉菜单
        UpdateMicrophoneDropdown();

        // 监听下拉菜单的变化
        microphoneDropdown.onValueChanged.AddListener(OnMicrophoneDropdownChanged);
    }

    /// <summary>
    /// 初始化下拉菜单
    /// </summary>
    public void UpdateMicrophoneDropdown()
    {
        microphoneDropdown.ClearOptions();
        string[] devices = Microphone.devices;
        
        List<string> options = new List<string>();
        foreach (string device in devices)
        {
            options.Add(device);
        }
        
        microphoneDropdown.AddOptions(options);
        
        if (devices.Length > 0)
        {
            microphoneDropdown.value = 0;
            defaultMicrophone = devices[0]; // 初始默认值
        }
        else
        {
            defaultMicrophone = "No Microphone Found";
        }
        
        microphoneDropdown.RefreshShownValue();
    }

    /// <summary>
    /// 当下拉菜单选项变化时调用
    /// </summary>
    /// <param name="selectedIndex"></param>
    private void OnMicrophoneDropdownChanged(int selectedIndex)
    {
        string[] devices = Microphone.devices;
        if (devices.Length > selectedIndex)
        {
            defaultMicrophone = devices[selectedIndex]; // 更新当前选中的麦克风
            Debug.Log("麦克风已切换至: " + defaultMicrophone);
        }
    }
    
    /// <summary>
    /// 开始录音
    /// </summary>
    public void OnClick_UseMic()
    {
        audioClip_1 = Microphone.Start(defaultMicrophone, true, 40, 16000);
    }

    /// <summary>
    /// 停止录音
    /// </summary>
    public void OnClick_StopMic()
    {
        Microphone.End(defaultMicrophone);
    }

    /// <summary>
    /// 开始评分
    /// </summary>
    public void OnClick_UseISE()
    {
        XingHuoIse.XingHuo_ISE_SA(inputField.text, audioClip_1);
    }

    /// <summary>
    /// 评分回调
    /// </summary>
    /// <param name="parameters"></param>
    public void InterpretationParameters(Dictionary<string,string> parameters)
    {
        textAsset_1.text ="准确度:"+ parameters["accuracy_score"];
        textAsset_2.text ="标准度:"+ parameters["standard_score"];
        textAsset_3.text ="流利度:"+ parameters["fluency_score"];
        textAsset_4.text ="完整度:"+ parameters["integrity_score"];
        textAsset_5.text ="总分:  "+ parameters["total_score"];
    }
}

(7)编辑器中使用

1)Project Setting中设置HTTP请求

2)添加脚本

创建空物体,命名为ISEManager。

在ISEManager上添加XingHuo_ISE脚本与ISE_Test脚本,并在XingHuo_ISE脚本中填写对应的服务接口认证信息。

3)创建并挂载对应的UI元素

创建

挂载

4)返回评分监听

5)为三个按钮分别注册监听

6)运行测试

(8)代码详解

该脚本首先通过 Microphone.devices 获取系统中所有可用的麦克风设备并将其填充到 Dropdown 列表中以供用户选择;

然后通过调用 Microphone.Start(defaultMicrophone, true, 40, 16000) 开始录制 16 kHz 的音频并在需要时用 Microphone.End(defaultMicrophone) 停止录音;

接着,用户在 InputField 中输入待测文本,点击评测按钮时将文本和录音得到的 AudioClip 一并传给 XingHuo_ISE_SA 方法发起在线口语评测;

最后在 InterpretationParameters 回调中分别将准确度、标准度、流利度、完整度和总分显示到对应的 Text 组件上。


四、后记&下载地址

        在本篇 Unity 技术博客即将付梓之际,作为一名刚入行的新人,我既怀揣着与大家分享所学所感的激动,也难免对自己文字与技术深度的不足心存忐忑。然而技法尚嫌浅薄,阐述不周或示例不够完善之处在所难免,代码编写之中有失偏颇之处,还望大佬们不吝赐教和批评。

BestHTTP2_v2.0.5下载地址(转载):GitCode - 全球开发者的开源社区,开源代码托管平台

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐