语音合成就是把文字转成语音,可以用于商家宣传、堂食语音播报提醒、候车提醒、驾校语音提醒等场景。那么PYTHON如何实现合成音频文件的同时又输出音频流呢,这就需要用到PYTHON的audio技术来实现及时播报。

# This is a sample Python script.
# 变量

from builtins import Exception, str, bytes

import pyaudio
import websocket
import datetime
import hashlib
import base64
import hmac
import json
from urllib.parse import urlencode
import time
import ssl
from wsgiref.handlers import format_date_time
from datetime import datetime
from time import mktime

host_url = "wss://tts-api.xfyun.cn/v2/tts"
appid = ""  # 控制台获取
api_secret = ""
api_key = ""
audio_file = "./2.pcm"


def product_url(api_secret, api_key):
    now_time = datetime.now()
    now_date = format_date_time(mktime(now_time.timetuple()))
    # print(now_date)
    # 拼接鉴权原始餐宿
    # now_date = "Fri, 18 Oct 2024 07:39:19 GMT"
    origin_base = "host: " + "tts-api.xfyun.cn" + "\n"
    origin_base += "date: " + now_date + "\n"
    origin_base += "GET " + "/v2/tts " + "HTTP/1.1"
    # print(origin_base)
    # sha256加密
    signature_sha = hmac.new(api_secret.encode('utf-8'), origin_base.encode('utf-8'),
                             digestmod=hashlib.sha256).digest()
    signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
    print(signature_sha)
    authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
        api_key, "hmac-sha256", "host date request-line", signature_sha)
    authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
    print(authorization)
    # 将请求的鉴权参数组合为字典
    dict_data = {
        "authorization": authorization,
        "date": now_date,
        "host": "tts-api.xfyun.cn"
    }
    ws_url = host_url + '?' + urlencode(dict_data)
    # print(ws_url)
    return ws_url


def on_message(ws, message):
    print(f"Received message: {message}")
    message = json.loads(message)  # 这个动作是格式化为JSON
    code = message["code"]
    sid = message["sid"]
    audio = base64.b64decode(message["data"]["audio"])
    status = message["data"]["status"]
    # print(f"我打印的---{audio}")
    if code != 0 and audio:
        error_message = message["message"]
        print("sid:%s call error:%s code is:%s" % (sid, error_message, code))
    else:
        audio_stream.write(audio)  # 实时播放
        with open(audio_file, 'ab') as f:
            f.write(audio)
    print(type(status))
    if status == 2:
        ws.close()


def on_error(ws, error):
    print(f"Error: {error},{ws}")


def on_close(ws, reason, res):
    print(f"WebSocket connection closed,{ws}")


def on_open(ws):
    print(f"WebSocket connection opened,{ws},ws连接建立成功...")
    # 这里可以发送初始消息给服务器,如果需要的话
    first_dict = {
        "common": {
            "app_id": appid
        },
        "business": {
            "aue": "raw",
            "sfl": 1,
            "tte": "UTF8",
            "ent": "intp65",
            "vcn": "x4_lingxiaolu_em_v2",
            "pitch": 50,
            "speed": 50
        },
        "data": {
            "status": 2,
            "text": str(base64.b64encode("欢迎来到科大讯飞开放平台".encode('utf-8')), "UTF8")
        }
    }
    ws.send(json.dumps(first_dict))  # 发送第一帧


def close_connection(ws):
    print("Closing WebSocket connection...")
    ws.close()


# 主函数入口
if __name__ == '__main__':
    start_time = datetime.now()
    websocket.enableTrace(False)
    ws_url = product_url(api_secret, api_key)
    ws_entity = websocket.WebSocketApp(ws_url, on_message=on_message, on_error=on_error, on_close=on_close,
                                       on_open=on_open)
    ws_entity.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
    end_time = datetime.now()
    print(f"合成耗时: {end_time - start_time}")

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐