FreeSWITCH 1.10 源码阅读(6)-unimrcp 模块原理
创始人
2025-05-29 18:07:10

文章目录

  • 1. 前言
  • 2. 源码分析
      • 2.1 unimrcp 模块的加载
      • 2.2 tts 功能的实现

1. 前言

MRCP(Media Resource Control Protocol, 媒体资源控制协议) 是一种通讯协议,用于语音服务器向客户端提供各种语音服务,例如 语音识别(ASR)和语音合成(TTS)。FreeSWITCH 中的 unimrcp模块 就是对接 MRCP 协议栈的中间层,提供了集成使用 AST、TTS 的能力。下图是 FreeSWITCH 中 unimrcp模块 的源码时序,下文将对源码进行分析

在这里插入图片描述

2. 源码分析

2.1 unimrcp 模块的加载

  1. 在 FreeSWITCH 1.10 源码阅读(1)-服务启动及 Event Socket 模块工作原理 中笔者分析了 FreeSWITCH 加载模块的主流程,unimrcp 模块被加载时将触发 mod_unimrcp.c#SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load) 执行。这个函数比较简练,大致有以下几个关键点:

    1. 调用 mod_unimrcp.c#mod_unimrcp_do_config() 函数获取 XML 配置中指定的 unimrcp.conf 名称下的配置内容,这部分不做赘述
    2. 调用 mod_unimrcp.c#mod_unimrcp_client_create() 函数创建 FreeSWITCH 本地的 MRCP 客户端,用于后续与 MRCP 服务器交互
    3. 调用 mod_unimrcp.c#synth_load() 函数加载创建 TTS 应用
    4. 调用 mod_unimrcp.c#recog_load() 函数加载创建 ASR 应用,与 TTS 应用加载类似,不做赘述
    5. 调用库函数 mrcp_client.c#mrcp_client_start() 新开线程启动 MRCP 客户端,涉及库函数不做赘述
    SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load)
    {if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CREATE) != SWITCH_STATUS_SUCCESS) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CREATE);return SWITCH_STATUS_TERM;}if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CLOSE) != SWITCH_STATUS_SUCCESS) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CLOSE);return SWITCH_STATUS_TERM;}if (switch_event_reserve_subclass(MY_EVENT_PROFILE_OPEN) != SWITCH_STATUS_SUCCESS) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_OPEN);return SWITCH_STATUS_TERM;}/* connect my internal structure to the blank pointer passed to me */*module_interface = switch_loadable_module_create_module_interface(pool, modname);memset(&globals, 0, sizeof(globals));switch_mutex_init(&globals.mutex, SWITCH_MUTEX_UNNESTED, pool);globals.speech_channel_number = 0;switch_core_hash_init_nocase(&globals.profiles);/* get MRCP module configuration */mod_unimrcp_do_config();if (zstr(globals.unimrcp_default_synth_profile)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-tts-profile\n");return SWITCH_STATUS_FALSE;}if (zstr(globals.unimrcp_default_recog_profile)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-asr-profile\n");return SWITCH_STATUS_FALSE;}/* link UniMRCP logs to FreeSWITCH */switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "UniMRCP log level = %s\n", globals.unimrcp_log_level);if (apt_log_instance_create(APT_LOG_OUTPUT_NONE, str_to_log_level(globals.unimrcp_log_level), pool) == FALSE) {/* already created */apt_log_priority_set(str_to_log_level(globals.unimrcp_log_level));}apt_log_ext_handler_set(unimrcp_log);/* Create the MRCP client */if ((globals.mrcp_client = mod_unimrcp_client_create(pool)) == NULL) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create mrcp client\n");return SWITCH_STATUS_FALSE;}/* Create the synthesizer interface */if (synth_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {return SWITCH_STATUS_FALSE;}/* Create the recognizer interface */if (recog_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {return SWITCH_STATUS_FALSE;}/* Start the client stack */mrcp_client_start(globals.mrcp_client);/* indicate that the module should continue to be loaded */return SWITCH_STATUS_SUCCESS;
    }
    
  2. mod_unimrcp.c#mod_unimrcp_client_create() 函数的关键点在于与底层 mrcp 库的交互,由于底层库已经不属于 FreeSWITCH 源码,本文不会再深入:

    1. 调用库函数 mrcp_client.c#mrcp_client_create() 创建 FreeSWITCH 作为客户端连接 MRCP 服务器的 mrcp_client_t 对象,并设置该对象上回调函数表中处理消息的函数为 mrcp_client.c#mrcp_client_msg_process()
    2. 调用库函数 mrcp_client_connection.c#mrcp_client_connection_agent_create() 创建 MRCP 连接端点对象 mrcp_connection_agent_t,用于管理底层 socket 数据读写
    3. 调用 mrcp_client.c#mrcp_client_connection_agent_register() 将 MRCP 连接端点注册到 FreeSWITCH 客户端对象中,并设置底层连接收到数据时的回调函数表为 mrcp_client.connection_method_vtable
    4. 解析 unimrcp 配置文件属性,创建对应的 profile,据此可以将多个 MRCP 服务器的连接信息隔离。如果是 v2 版本的 MRCP 协议,在 FreeSWITCH 和 MRCP 服务器之间还需要 SIP 信令交互,所以也会调用 mrcp_sofiasip_client_agent.c#mrcp_sofiasip_client_agent_create() 函数创建一个 SIP 交互的端点对象
     static mrcp_client_t *mod_unimrcp_client_create(switch_memory_pool_t *mod_pool)
    {switch_xml_t cfg = NULL, xml = NULL, profiles = NULL, profile = NULL;mrcp_client_t *client = NULL;apr_pool_t *pool = NULL;mrcp_resource_loader_t *resource_loader = NULL;mrcp_resource_factory_t *resource_factory = NULL;mpf_codec_manager_t *codec_manager = NULL;apr_size_t max_connection_count = 0;apt_bool_t offer_new_connection = FALSE;mrcp_connection_agent_t *connection_agent;mpf_engine_t *media_engine;apt_dir_layout_t *dir_layout;/* create the client */if ((dir_layout = apt_default_dir_layout_create("../", mod_pool)) == NULL) {goto done;}client = mrcp_client_create(dir_layout);if (!client) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP client\n");goto done;}pool = mrcp_client_memory_pool_get(client);if (!pool) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to get MRCP client memory pool\n");client = NULL;goto done;}/* load the synthesizer and recognizer resources */resource_loader = mrcp_resource_loader_create(FALSE, pool);if (!resource_loader) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP resource loader\n");client = NULL;goto done;} else {apt_str_t synth_resource;apt_str_t recog_resource;apt_string_set(&synth_resource, "speechsynth");mrcp_resource_load(resource_loader, &synth_resource);apt_string_set(&recog_resource, "speechrecog");mrcp_resource_load(resource_loader, &recog_resource);resource_factory = mrcp_resource_factory_get(resource_loader);mrcp_client_resource_factory_register(client, resource_factory);}codec_manager = mpf_engine_codec_manager_create(pool);if (!codec_manager) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF codec manager\n");client = NULL;goto done;}if (!mrcp_client_codec_manager_register(client, codec_manager)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP client codec manager\n");client = NULL;goto done;}/* set up MRCP connection agent that will be shared with all profiles */if (!zstr(globals.unimrcp_max_connection_count)) {max_connection_count = atoi(globals.unimrcp_max_connection_count);}if (max_connection_count <= 0) {max_connection_count = 100;}if (!zstr(globals.unimrcp_offer_new_connection)) {offer_new_connection = strcasecmp("true", globals.unimrcp_offer_new_connection);}connection_agent = mrcp_client_connection_agent_create("MRCPv2ConnectionAgent", max_connection_count, offer_new_connection, pool);if (!connection_agent) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP connection agent\n");client = NULL;goto done;}if (!zstr(globals.unimrcp_rx_buffer_size)) {apr_size_t rx_buffer_size = (apr_size_t)atol(globals.unimrcp_rx_buffer_size);if (rx_buffer_size > 0) {mrcp_client_connection_rx_size_set(connection_agent, rx_buffer_size);}}if (!zstr(globals.unimrcp_tx_buffer_size)) {apr_size_t tx_buffer_size = (apr_size_t)atol(globals.unimrcp_tx_buffer_size);if (tx_buffer_size > 0) {mrcp_client_connection_tx_size_set(connection_agent, tx_buffer_size);}}if (!zstr(globals.unimrcp_request_timeout)) {apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);if (request_timeout > 0) {mrcp_client_connection_timeout_set(connection_agent, request_timeout);}}if (!mrcp_client_connection_agent_register(client, connection_agent)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP connection agent\n");client = NULL;goto done;}/* Set up the media engine that will be shared with all profiles */media_engine = mpf_engine_create("MediaEngine", pool);if (!media_engine) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF media engine\n");client = NULL;goto done;}if (!mpf_engine_scheduler_rate_set(media_engine, 1)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to set MPF engine scheduler rate\n");client = NULL;goto done;}if (!mrcp_client_media_engine_register(client, media_engine)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to register MPF media engine\n");client = NULL;goto done;}/* configure the client profiles */if (!(xml = switch_xml_open_cfg(CONFIG_FILE, &cfg, NULL))) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Could not open %s\n", CONFIG_FILE);client = NULL;goto done;}if ((profiles = switch_xml_child(cfg, "profiles"))) {for (profile = switch_xml_child(profiles, "profile"); profile; profile = switch_xml_next(profile)) {/* a profile is a signaling agent + termination factory + media engine + connection agent (MRCPv2 only) */mrcp_sig_agent_t *agent = NULL;mpf_termination_factory_t *termination_factory = NULL;mrcp_profile_t *mprofile = NULL;mpf_rtp_config_t *rtp_config = NULL;mpf_rtp_settings_t *rtp_settings = mpf_rtp_settings_alloc(pool);mrcp_sig_settings_t *sig_settings = mrcp_signaling_settings_alloc(pool);profile_t *mod_profile = NULL;switch_xml_t default_params = NULL;mrcp_connection_agent_t *v2_profile_connection_agent = NULL;/* get profile attributes */const char *name = apr_pstrdup(pool, switch_xml_attr(profile, "name"));const char *version = switch_xml_attr(profile, "version");if (zstr(name) || zstr(version)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, " missing name or version attribute\n");client = NULL;goto done;}/* prepare mod_unimrcp's profile for configuration */profile_create(&mod_profile, name, mod_pool);if (mod_profile) {switch_core_hash_insert(globals.profiles, mod_profile->name, mod_profile);} else {client = NULL;goto done;}/* pull in any default SPEAK params */default_params = switch_xml_child(profile, "synthparams");if (default_params) {switch_xml_t param = NULL;switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK params\n");for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {const char *param_name = switch_xml_attr(param, "name");const char *param_value = switch_xml_attr(param, "value");if (zstr(param_name)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param name\n");client = NULL;goto done;}if (zstr(param_value)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param value\n");client = NULL;goto done;}switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK Param %s:%s\n", param_name, param_value);switch_core_hash_insert(mod_profile->default_synth_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));}}/* pull in any default RECOGNIZE params */default_params = switch_xml_child(profile, "recogparams");if (default_params) {switch_xml_t param = NULL;switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE params\n");for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {const char *param_name = switch_xml_attr(param, "name");const char *param_value = switch_xml_attr(param, "value");if (zstr(param_name)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param name\n");client = NULL;goto done;}if (zstr(param_value)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param value\n");client = NULL;goto done;}switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE Param %s:%s\n", param_name, param_value);switch_core_hash_insert(mod_profile->default_recog_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));}}/* create RTP config, common to MRCPv1 and MRCPv2 */rtp_config = mpf_rtp_config_alloc(pool);rtp_config->rtp_port_min = DEFAULT_RTP_PORT_MIN;rtp_config->rtp_port_max = DEFAULT_RTP_PORT_MAX;apt_string_set(&rtp_config->ip, DEFAULT_LOCAL_IP_ADDRESS);if (strcmp("1", version) == 0) {/* MRCPv1 configuration */switch_xml_t param = NULL;rtsp_client_config_t *config = mrcp_unirtsp_client_config_alloc(pool);config->origin = DEFAULT_SDP_ORIGIN;sig_settings->resource_location = DEFAULT_RESOURCE_LOCATION;v2_profile_connection_agent = NULL;if (!zstr(globals.unimrcp_request_timeout)) {apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);if (request_timeout > 0) {config->request_timeout = request_timeout;}}switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv1 profile: %s\n", name);for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {const char *param_name = switch_xml_attr(param, "name");const char *param_value = switch_xml_attr(param, "value");if (zstr(param_name)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");client = NULL;goto done;}switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);if (!process_mrcpv1_config(config, sig_settings, param_name, param_value, pool) &&!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);}}agent = mrcp_unirtsp_client_agent_create(name, config, pool);if (!agent) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP RTSP client agent\n");client = NULL;goto done;}} else if (strcmp("2", version) == 0) {/* MRCPv2 configuration */mrcp_sofia_client_config_t *config = mrcp_sofiasip_client_config_alloc(pool);switch_xml_t param = NULL;config->local_ip = DEFAULT_LOCAL_IP_ADDRESS;config->local_port = DEFAULT_SIP_LOCAL_PORT;sig_settings->server_ip = DEFAULT_REMOTE_IP_ADDRESS;sig_settings->server_port = DEFAULT_SIP_REMOTE_PORT;config->ext_ip = NULL;config->user_agent_name = DEFAULT_SOFIASIP_UA_NAME;config->origin = DEFAULT_SDP_ORIGIN;v2_profile_connection_agent = connection_agent;switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv2 profile: %s\n", name);for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {const char *param_name = switch_xml_attr(param, "name");const char *param_value = switch_xml_attr(param, "value");if (zstr(param_name)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");client = NULL;goto done;}switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);if (!process_mrcpv2_config(config, sig_settings, param_name, param_value, pool) &&!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);}}agent = mrcp_sofiasip_client_agent_create(name, config, pool);if (!agent) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP SIP client agent\n");client = NULL;goto done;}} else {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "version must be either \"1\" or \"2\"\n");client = NULL;goto done;}termination_factory = mpf_rtp_termination_factory_create(rtp_config, pool);if (!termination_factory) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create RTP termination factory\n");client = NULL;goto done;}mrcp_client_rtp_factory_register(client, termination_factory, name);mrcp_client_rtp_settings_register(client, rtp_settings, "RTP-Settings");mrcp_client_signaling_settings_register(client, sig_settings, "Signaling-Settings");mrcp_client_signaling_agent_register(client, agent);/* create the profile and register it */mprofile = mrcp_client_profile_create(NULL, agent, v2_profile_connection_agent, media_engine, termination_factory, rtp_settings, sig_settings, pool);if (!mprofile) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP client profile\n");client = NULL;goto done;}mrcp_client_profile_register(client, mprofile, name);}}done:if (xml) {switch_xml_free(xml);}return client;
    }
  3. mod_unimrcp.c#synth_load() 函数加载创建 TTS 功能应用的处理主要分为两个部分,

    1. 创建 SWITCH_SPEECH_INTERFACE 接口,将 TTS 相关功能封装到 FreeSWITCH 标准模块结构中,供上层使用
    2. 调用库函数 mrcp_application.c#mrcp_application_create() 创建 unimrcp 模块的 TTS 应用,这个部分主要是将 unimrcp 模块的处理逻辑嵌入到底层 MRCP 客户端,供底层回调通知上层
     static switch_status_t synth_load(switch_loadable_module_interface_t *module_interface, switch_memory_pool_t *pool)
    {/* link to FreeSWITCH ASR / TTS callbacks */switch_speech_interface_t *speech_interface = NULL;if ((speech_interface = (switch_speech_interface_t *) switch_loadable_module_create_interface(module_interface, SWITCH_SPEECH_INTERFACE)) == NULL) {return SWITCH_STATUS_FALSE;}speech_interface->interface_name = MOD_UNIMRCP;speech_interface->speech_open = synth_speech_open;speech_interface->speech_close = synth_speech_close;speech_interface->speech_feed_tts = synth_speech_feed_tts;speech_interface->speech_read_tts = synth_speech_read_tts;speech_interface->speech_flush_tts = synth_speech_flush_tts;speech_interface->speech_text_param_tts = synth_speech_text_param_tts;speech_interface->speech_numeric_param_tts = synth_speech_numeric_param_tts;speech_interface->speech_float_param_tts = synth_speech_float_param_tts;/* Create the synthesizer application and link its callbacks to UniMRCP */if ((globals.synth.app = mrcp_application_create(synth_message_handler, (void *) 0, pool)) == NULL) {return SWITCH_STATUS_FALSE;}globals.synth.dispatcher.on_session_update = NULL;globals.synth.dispatcher.on_session_terminate = speech_on_session_terminate;globals.synth.dispatcher.on_channel_add = speech_on_channel_add;globals.synth.dispatcher.on_channel_remove = speech_on_channel_remove;globals.synth.dispatcher.on_message_receive = synth_on_message_receive;globals.synth.audio_stream_vtable.destroy = NULL;globals.synth.audio_stream_vtable.open_rx = NULL;globals.synth.audio_stream_vtable.close_rx = NULL;globals.synth.audio_stream_vtable.read_frame = NULL;globals.synth.audio_stream_vtable.open_tx = NULL;globals.synth.audio_stream_vtable.close_tx = NULL;globals.synth.audio_stream_vtable.write_frame = synth_stream_write;mrcp_client_application_register(globals.mrcp_client, globals.synth.app, "synth");/* map FreeSWITCH params to MRCP param */switch_core_hash_init_nocase(&globals.synth.fs_param_map);switch_core_hash_insert(globals.synth.fs_param_map, "voice", "voice-name");/* map MRCP params to UniMRCP ID */switch_core_hash_init_nocase(&globals.synth.param_id_map);switch_core_hash_insert(globals.synth.param_id_map, "jump-size", unimrcp_param_id_create(SYNTHESIZER_HEADER_JUMP_SIZE, pool));switch_core_hash_insert(globals.synth.param_id_map, "kill-on-barge-in", unimrcp_param_id_create(SYNTHESIZER_HEADER_KILL_ON_BARGE_IN, pool));switch_core_hash_insert(globals.synth.param_id_map, "speaker-profile", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAKER_PROFILE, pool));switch_core_hash_insert(globals.synth.param_id_map, "completion-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_CAUSE, pool));switch_core_hash_insert(globals.synth.param_id_map, "completion-reason", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_REASON, pool));switch_core_hash_insert(globals.synth.param_id_map, "voice-gender", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_GENDER, pool));switch_core_hash_insert(globals.synth.param_id_map, "voice-age", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_AGE, pool));switch_core_hash_insert(globals.synth.param_id_map, "voice-variant", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_VARIANT, pool));switch_core_hash_insert(globals.synth.param_id_map, "voice-name", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_NAME, pool));switch_core_hash_insert(globals.synth.param_id_map, "prosody-volume", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_VOLUME, pool));switch_core_hash_insert(globals.synth.param_id_map, "prosody-rate", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_RATE, pool));switch_core_hash_insert(globals.synth.param_id_map, "speech-marker", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_MARKER, pool));switch_core_hash_insert(globals.synth.param_id_map, "speech-language", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_LANGUAGE, pool));switch_core_hash_insert(globals.synth.param_id_map, "fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_FETCH_HINT, pool));switch_core_hash_insert(globals.synth.param_id_map, "audio-fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_AUDIO_FETCH_HINT, pool));switch_core_hash_insert(globals.synth.param_id_map, "failed-uri", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI, pool));switch_core_hash_insert(globals.synth.param_id_map, "failed-uri-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI_CAUSE, pool));switch_core_hash_insert(globals.synth.param_id_map, "speak-restart", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_RESTART, pool));switch_core_hash_insert(globals.synth.param_id_map, "speak-length", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_LENGTH, pool));switch_core_hash_insert(globals.synth.param_id_map, "load-lexicon", unimrcp_param_id_create(SYNTHESIZER_HEADER_LOAD_LEXICON, pool));switch_core_hash_insert(globals.synth.param_id_map, "lexicon-search-order", unimrcp_param_id_create(SYNTHESIZER_HEADER_LEXICON_SEARCH_ORDER, pool));return SWITCH_STATUS_SUCCESS;
    }
    

2.2 tts 功能的实现

  1. speak 放音 APP 为例,当上层执行这个 APP 时实际调用到 mod_dptools.c#speak_function() 函数,可以看到该函数主要处理是校验参数合法性,然后调用 switch_ivr_play_say.c#switch_ivr_speak_text() 函数

    SWITCH_STANDARD_APP(speak_function)
    {switch_channel_t *channel = switch_core_session_get_channel(session);char buf[10];char *argv[3] = { 0 };int argc;const char *engine = NULL;const char *voice = NULL;char *text = NULL;char *mydata = NULL;switch_input_args_t args = { 0 };if (zstr(data) || !(mydata = switch_core_session_strdup(session, data))) {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");return;}argc = switch_separate_string(mydata, '|', argv, sizeof(argv) / sizeof(argv[0]));if (argc == 0) {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");return;} else if (argc == 1) {text = switch_core_session_strdup(session, data); /* unstripped text */} else if (argc == 2) {voice = argv[0];text = switch_core_session_strdup(session, data + (argv[1] - argv[0])); /* unstripped text */} else {engine = argv[0];voice = argv[1];text = switch_core_session_strdup(session, data + (argv[2] - argv[0])); /* unstripped text */}if (!engine) {engine = switch_channel_get_variable(channel, "tts_engine");}if (!voice) {voice = switch_channel_get_variable(channel, "tts_voice");}if (!(engine && voice && text)) {if (!engine) {engine = "NULL";}if (!voice) {voice = "NULL";}if (!text) {text = "NULL";}switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params! [%s][%s][%s]\n", engine, voice, text);switch_channel_hangup(channel, SWITCH_CAUSE_DESTINATION_OUT_OF_ORDER);}args.input_callback = on_dtmf;args.buf = buf;args.buflen = sizeof(buf);switch_channel_set_variable(channel, SWITCH_PLAYBACK_TERMINATOR_USED, "");switch_ivr_speak_text(session, engine, voice, text, &args);
    }
    
  2. switch_ivr_play_say.c#switch_ivr_speak_text() 函数核心处理为以下几步:

    1. 调用函数 switch_core_soeech.c#switch_core_speech_open() 使用本地 MRCP 客户端请求 MRCP 服务器新建会话
    2. 调用函数 switch_ivr_play_say.c#switch_ivr_speak_text_handle() 处理语音合成
    SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session,const char *tts_name, const char *voice_name, const char *text, switch_input_args_t *args)
    {switch_channel_t *channel = switch_core_session_get_channel(session);uint32_t rate = 0;int interval = 0;uint32_t channels;switch_frame_t write_frame = { 0 };switch_timer_t ltimer, *timer;switch_codec_t lcodec, *codec;switch_memory_pool_t *pool = switch_core_session_get_pool(session);char *codec_name;switch_status_t status = SWITCH_STATUS_SUCCESS;switch_speech_handle_t lsh, *sh;switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;const char *timer_name, *var;cached_speech_handle_t *cache_obj = NULL;int need_create = 1, need_alloc = 1;switch_codec_implementation_t read_impl = { 0 };switch_core_session_get_read_impl(session, &read_impl);if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {return SWITCH_STATUS_FALSE;}arg_recursion_check_start(args);sh = ↰codec = &lcodec;timer = <imer;if ((var = switch_channel_get_variable(channel, SWITCH_CACHE_SPEECH_HANDLES_VARIABLE)) && switch_true(var)) {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cache enabled");if ((cache_obj = (cached_speech_handle_t *) switch_channel_get_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME))) {need_create = 0;if (!strcasecmp(cache_obj->tts_name, tts_name)) {need_alloc = 0;} else {switch_ivr_clear_speech_cache(session);}}if (!cache_obj) {cache_obj = (cached_speech_handle_t *) switch_core_session_alloc(session, sizeof(*cache_obj));}if (need_alloc) {switch_copy_string(cache_obj->tts_name, tts_name, sizeof(cache_obj->tts_name));switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));switch_channel_set_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME, cache_obj);}sh = &cache_obj->sh;codec = &cache_obj->codec;timer = &cache_obj->timer;}timer_name = switch_channel_get_variable(channel, "timer_name");switch_core_session_reset(session, SWITCH_FALSE, SWITCH_FALSE);rate = read_impl.actual_samples_per_second;interval = read_impl.microseconds_per_packet / 1000;channels = read_impl.number_of_channels;if (need_create) {memset(sh, 0, sizeof(*sh));if ((status = switch_core_speech_open(sh, tts_name, voice_name, (uint32_t) rate, interval, read_impl.number_of_channels, &flags, NULL)) != SWITCH_STATUS_SUCCESS) {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid TTS module %s[%s]!\n", tts_name, voice_name);switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);switch_ivr_clear_speech_cache(session);arg_recursion_check_stop(args);return status;}} else if (cache_obj && strcasecmp(cache_obj->voice_name, voice_name)) {switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));switch_core_speech_text_param_tts(sh, "voice", voice_name);}if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {flags = 0;switch_core_speech_close(sh, &flags);arg_recursion_check_stop(args);return SWITCH_STATUS_FALSE;}switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "OPEN TTS %s\n", tts_name);codec_name = "L16";if (need_create) {if (switch_core_codec_init(codec,codec_name,NULL,NULL, (int) rate, interval, channels, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, NULL,pool) == SWITCH_STATUS_SUCCESS) {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activated\n");} else {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activation Failed %s@%uhz 1 channel %dms\n", codec_name,rate, interval);flags = 0;switch_core_speech_close(sh, &flags);switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);switch_ivr_clear_speech_cache(session);arg_recursion_check_stop(args);return SWITCH_STATUS_GENERR;}}write_frame.codec = codec;if (timer_name) {if (need_create) {if (switch_core_timer_init(timer, timer_name, interval, (int) sh->samples, pool) != SWITCH_STATUS_SUCCESS) {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Setup timer failed!\n");switch_core_codec_destroy(write_frame.codec);flags = 0;switch_core_speech_close(sh, &flags);switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);switch_ivr_clear_speech_cache(session);arg_recursion_check_stop(args);return SWITCH_STATUS_GENERR;}switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Setup timer success %u bytes per %d ms!\n", sh->samples * 2,interval);}switch_core_timer_sync(timer); // Sync timer/* start a thread to absorb incoming audio */switch_core_service_session(session);}status = switch_ivr_speak_text_handle(session, sh, write_frame.codec, timer_name ? timer : NULL, text, args);flags = 0;if (!cache_obj) {switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "no cache_obj");switch_core_speech_close(sh, &flags);switch_core_codec_destroy(codec);}if (timer_name) {/* End the audio absorbing thread */switch_core_thread_session_end(session);if (!cache_obj) {switch_core_timer_destroy(timer);}}switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE);arg_recursion_check_stop(args);return status;
    }
  3. switch_core_soeech.c#switch_core_speech_open() 函数实际只是通过核心注册的接口调用到 unimrcp 模块的 mod_unimrcp.c#synth_speech_open() 函数

    SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,const char *module_name,const char *voice_name,unsigned int rate, unsigned int interval, unsigned int channels,switch_speech_flag_t *flags, switch_memory_pool_t *pool)
    {switch_status_t status;char buf[256] = "";char *param = NULL;if (!sh || !flags || zstr(module_name)) {return SWITCH_STATUS_FALSE;}if (strchr(module_name, ':')) {switch_set_string(buf, module_name);if ((param = strchr(buf, ':'))) {*param++ = '\0';module_name = buf;}}if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) {switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid speech module [%s]!\n", module_name);return SWITCH_STATUS_GENERR;}sh->flags = *flags;if (pool) {sh->memory_pool = pool;} else {if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {UNPROTECT_INTERFACE(sh->speech_interface);return status;}switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);}sh->engine = switch_core_strdup(sh->memory_pool, module_name);if (param) {sh->param = switch_core_strdup(sh->memory_pool, param);}sh->rate = rate;sh->name = switch_core_strdup(sh->memory_pool, module_name);sh->samples = switch_samples_per_packet(rate, interval);sh->samplerate = rate;sh->native_rate = rate;sh->channels = channels;sh->real_channels = 1;if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, channels, flags)) == SWITCH_STATUS_SUCCESS) {switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN);} else {UNPROTECT_INTERFACE(sh->speech_interface);}return status;
    }
    
  4. mod_unimrcp.c#synth_speech_open() 函数的核心处理是创建一个 FreeSWITCH 层面的 speech_channel_t 对象,并调用 mod_unimrcp.c#speech_channel_open() 函数通过底层 MRCP 客户端建立与远程 MRCP 服务端之间的连接

    static switch_status_t synth_speech_open(switch_speech_handle_t *sh, const char *voice_name, int rate, int channels, switch_speech_flag_t *flags)
    {switch_status_t status = SWITCH_STATUS_SUCCESS;speech_channel_t *schannel = NULL;const char *profile_name = sh->param;profile_t *profile = NULL;int speech_channel_number = get_next_speech_channel_number();char *name = NULL;char *session_uuid = NULL;switch_hash_index_t *hi = NULL;/* Name the channel */if (profile_name && strchr(profile_name, ':')) {/* Profile has session name appended to it.  Pick it out */profile_name = switch_core_strdup(sh->memory_pool, profile_name);session_uuid = strchr(profile_name, ':');*session_uuid = '\0';session_uuid++;session_uuid = switch_core_strdup(sh->memory_pool, session_uuid);} else {/* check if session is associated w/ this memory pool */switch_core_session_t *session = switch_core_memory_pool_get_data(sh->memory_pool, "__session");if (session) {session_uuid = switch_core_session_get_uuid(session);}}name = switch_core_sprintf(sh->memory_pool, "TTS-%d", speech_channel_number);switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO,"speech_handle: name = %s, rate = %d, speed = %d, samples = %d, voice = %s, engine = %s, param = %s\n", sh->name, sh->rate,sh->speed, sh->samples, sh->voice, sh->engine, sh->param);switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO, "voice = %s, rate = %d\n", voice_name, rate);/* Allocate the channel */if (speech_channel_create(&schannel, name, session_uuid, SPEECH_CHANNEL_SYNTHESIZER, &globals.synth, (uint16_t) rate, sh->memory_pool) != SWITCH_STATUS_SUCCESS) {status = SWITCH_STATUS_FALSE;goto done;}sh->private_info = schannel;schannel->fsh = sh;/* Open the channel */if (zstr(profile_name)) {profile_name = globals.unimrcp_default_synth_profile;}profile = (profile_t *) switch_core_hash_find(globals.profiles, profile_name);if (!profile) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_ERROR, "(%s) Can't find profile, %s\n", name, profile_name);status = SWITCH_STATUS_FALSE;goto done;}if ((status = speech_channel_open(schannel, profile)) != SWITCH_STATUS_SUCCESS) {goto done;}/* Set session TTS params */if (!zstr(voice_name)) {speech_channel_set_param(schannel, "Voice-Name", voice_name);}/* Set default TTS params */for (hi = switch_core_hash_first(profile->default_synth_params); hi; hi = switch_core_hash_next(&hi)) {char *param_name = NULL, *param_val = NULL;const void *key;void *val;switch_core_hash_this(hi, &key, NULL, &val);param_name = (char *) key;param_val = (char *) val;speech_channel_set_param(schannel, param_name, param_val);}done:return status;
    }
    
  5. mod_unimrcp.c#speech_channel_open() 函数主要逻辑是调用底层库函数创建 MRCP 会话,并建立连接

    1. 调用库函数 mrcp_application.c#mrcp_application_session_create() 创建 MRCP 会话
    2. 调用库函数 mrcp_application.c#mrcp_application_channel_create() 创建 MRCP 会话下的 channel
    3. 调用库函数 mrcp_application.c#mrcp_application_channel_add() 请求远程 MRCP 服务器创建新会话
    static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t *profile)
    {switch_status_t status = SWITCH_STATUS_SUCCESS;mpf_termination_t *termination = NULL;mrcp_resource_type_e resource_type;int warned = 0;switch_mutex_lock(schannel->mutex);/* make sure we can open channel */if (schannel->state != SPEECH_CHANNEL_CLOSED) {status = SWITCH_STATUS_FALSE;goto done;}schannel->profile = profile;/* create MRCP session */if ((schannel->unimrcp_session = mrcp_application_session_create(schannel->application->app, profile->name, schannel)) == NULL) {/* profile doesn't exist? */switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create session with %s\n", schannel->name, profile->name);status = SWITCH_STATUS_RESTART;goto done;}mrcp_application_session_name_set(schannel->unimrcp_session, schannel->name);/* create audio termination and add to channel */if ((termination = speech_channel_create_mpf_termination(schannel)) == NULL) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create termination with %s\n", schannel->name, profile->name);mrcp_application_session_destroy(schannel->unimrcp_session);status = SWITCH_STATUS_FALSE;goto done;}if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {resource_type = MRCP_SYNTHESIZER_RESOURCE;} else {resource_type = MRCP_RECOGNIZER_RESOURCE;}if ((schannel->unimrcp_channel = mrcp_application_channel_create(schannel->unimrcp_session, resource_type, termination, NULL, schannel)) == NULL) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create channel with %s\n", schannel->name, profile->name);mrcp_application_session_destroy(schannel->unimrcp_session);status = SWITCH_STATUS_FALSE;goto done;}/* add channel to session... this establishes the connection to the MRCP server */if (mrcp_application_channel_add(schannel->unimrcp_session, schannel->unimrcp_channel) != TRUE) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to add channel to session with %s\n", schannel->name, profile->name);mrcp_application_session_destroy(schannel->unimrcp_session);status = SWITCH_STATUS_FALSE;goto done;}/* wait for channel to be ready */warned = 0;while (schannel->state == SPEECH_CHANNEL_CLOSED) {if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {warned = 1;switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not opened after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));}}if (schannel->state == SPEECH_CHANNEL_READY) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) channel is ready\n", schannel->name);} else if (schannel->state == SPEECH_CHANNEL_CLOSED) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Timed out waiting for channel to be ready\n", schannel->name);/* can't retry */status = SWITCH_STATUS_FALSE;} else if (schannel->state == SPEECH_CHANNEL_ERROR) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) Terminating MRCP session\n", schannel->name);if (!mrcp_application_session_terminate(schannel->unimrcp_session)) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) Unable to terminate application session\n", schannel->name);status = SWITCH_STATUS_FALSE;goto done;}/* Wait for session to be cleaned up */warned = 0;while (schannel->state == SPEECH_CHANNEL_ERROR) {if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {warned = 1;switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not cleaned up after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));}}if (schannel->state != SPEECH_CHANNEL_CLOSED) {/* major issue... can't retry */status = SWITCH_STATUS_FALSE;} else {/* failed to open profile, retry is allowed */status = SWITCH_STATUS_RESTART;}}done:switch_mutex_unlock(schannel->mutex);return status;
    }
  6. 此时回到本节步骤2第2步switch_ivr_play_say.c#switch_ivr_speak_text_handle() 函数是 tts 处理的功能主体,关键处理如下:

    1. 通过核心函数 switch_core.c#switch_core_speech_feed_tts() 调用到 mod_unimrcp.c#synth_speech_feed_tts() 函数发起 MRCP 语音合成请求
    2. 在 for 空循环中不断执行核心函数 switch_core.c#switch_core_speech_read_tts() 调用到 mod_unimrcp.c#synth_speech_read_tts() 函数尝试获取合成的语音
    3. 通过核心函数 switch_core.c#switch_core_session_write_frame() 将 MRCP 服务器返回的语音流写到当前会话,通过 RTP 传输到 SIP 终端播放
    SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session_t *session,switch_speech_handle_t *sh,switch_codec_t *codec, switch_timer_t *timer, const char *text, switch_input_args_t *args)
    {switch_channel_t *channel = switch_core_session_get_channel(session);short abuf[SWITCH_RECOMMENDED_BUFFER_SIZE];switch_dtmf_t dtmf = { 0 };uint32_t len = 0;switch_size_t ilen = 0;switch_frame_t write_frame = { 0 };switch_status_t status = SWITCH_STATUS_SUCCESS;switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;switch_size_t extra = 0;char *tmp = NULL;const char *star, *pound, *p;switch_size_t starlen, poundlen;if (!sh) {return SWITCH_STATUS_FALSE;}if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {return SWITCH_STATUS_FALSE;}if (!switch_core_codec_ready(codec)) {return SWITCH_STATUS_FALSE;}arg_recursion_check_start(args);write_frame.data = abuf;write_frame.buflen = sizeof(abuf);len = sh->samples * 2 * sh->channels;flags = 0;if (!(star = switch_channel_get_variable(channel, "star_replace"))) {star = "star";}if (!(pound = switch_channel_get_variable(channel, "pound_replace"))) {pound = "pound";}starlen = strlen(star);poundlen = strlen(pound);for (p = text; p && *p; p++) {if (*p == '*') {extra += starlen;} else if (*p == '#') {extra += poundlen;}}if (extra) {char *tp;switch_size_t mylen = strlen(text) + extra + 1;tmp = malloc(mylen);if (!tmp) {arg_recursion_check_stop(args);return SWITCH_STATUS_MEMERR;}memset(tmp, 0, mylen);tp = tmp;for (p = text; p && *p; p++) {if (*p == '*' ) {snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", star);tp += starlen;} else if (*p == '#') {snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", pound);tp += poundlen;} else {*tp++ = *p;}}text = tmp;}switch_core_speech_feed_tts(sh, text, &flags);switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Speaking text: %s\n", text);switch_safe_free(tmp);text = NULL;write_frame.rate = sh->rate;memset(write_frame.data, 0, len);write_frame.datalen = len;write_frame.samples = len / 2;write_frame.codec = codec;switch_assert(codec->implementation != NULL);switch_channel_audio_sync(channel);for (;;) {switch_event_t *event;ilen = len;if (!switch_channel_ready(channel)) {status = SWITCH_STATUS_FALSE;break;}if (switch_channel_test_flag(channel, CF_BREAK)) {switch_channel_clear_flag(channel, CF_BREAK);status = SWITCH_STATUS_BREAK;break;}switch_ivr_parse_all_events(session);if (args) {/* dtmf handler function you can hook up to be executed when a digit is dialed during playback* if you return anything but SWITCH_STATUS_SUCCESS the playback will stop.*/if (switch_channel_has_dtmf(channel)) {if (!args->input_callback && !args->buf && !args->dmachine) {status = SWITCH_STATUS_BREAK;break;}if (args->buf && !strcasecmp(args->buf, "_break_")) {status = SWITCH_STATUS_BREAK;} else {switch_channel_dequeue_dtmf(channel, &dtmf);if (args->dmachine) {char ds[2] = {dtmf.digit, '\0'};if ((status = switch_ivr_dmachine_feed(args->dmachine, ds, NULL)) != SWITCH_STATUS_SUCCESS) {break;}}if (args->input_callback) {status = args->input_callback(session, (void *) &dtmf, SWITCH_INPUT_TYPE_DTMF, args->buf, args->buflen);} else if (args->buf) {*((char *) args->buf) = dtmf.digit;status = SWITCH_STATUS_BREAK;}}}if (args->input_callback) {if (switch_core_session_dequeue_event(session, &event, SWITCH_FALSE) == SWITCH_STATUS_SUCCESS) {switch_status_t ostatus = args->input_callback(session, event, SWITCH_INPUT_TYPE_EVENT, args->buf, args->buflen);if (ostatus != SWITCH_STATUS_SUCCESS) {status = ostatus;}switch_event_destroy(&event);}}if (status != SWITCH_STATUS_SUCCESS) {break;}}if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_PAUSE)) {if (timer) {if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {break;}} else {switch_frame_t *read_frame;switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {switch_ivr_parse_all_messages(session);switch_yield(10000);}if (!SWITCH_READ_ACCEPTABLE(tstatus)) {break;}if (args && args->dmachine) {if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {goto done;}}if (args && (args->read_frame_callback)) {if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {goto done;}}}continue;}flags = SWITCH_SPEECH_FLAG_BLOCKING;status = switch_core_speech_read_tts(sh, abuf, &ilen, &flags);if (status != SWITCH_STATUS_SUCCESS) {if (status == SWITCH_STATUS_BREAK) {status = SWITCH_STATUS_SUCCESS;}break;}write_frame.datalen = (uint32_t) ilen;write_frame.samples = (uint32_t) (ilen / 2 / sh->channels);if (timer) {write_frame.timestamp = timer->samplecount;}if (switch_core_session_write_frame(session, &write_frame, SWITCH_IO_FLAG_NONE, 0) != SWITCH_STATUS_SUCCESS) {break;}if (timer) {if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {break;}} else {				/* time off the channel (if you must) */switch_frame_t *read_frame;switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {switch_ivr_parse_all_messages(session);switch_yield(10000);}if (!SWITCH_READ_ACCEPTABLE(tstatus)) {break;}if (args && args->dmachine) {if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {goto done;}}if (args && (args->read_frame_callback)) {if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {goto done;}}}}done:switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "done speaking text\n");flags = 0;switch_core_speech_flush_tts(sh);arg_recursion_check_stop(args);return status;
    }
  7. mod_unimrcp.c#synth_speech_feed_tts() 函数的核心其实是执行 mod_unimrcp.c#synth_channel_speak() 函数,mod_unimrcp.c#synth_channel_speak()函数的核心处理如下:

    1. 调用底层库函数 mrcp_application.c#mrcp_application_message_create() 创建 SYNTHESIZER_SPEAK tts 请求的消息结构
    2. 调用底层库函数 mrcp_application.c#mrcp_application_message_send() 触发执行向 MRCP 服务器发送语音合成请求
    3. 等待 MRCP 服务器返回,将当前 tts 的 channel 状态流转为 SPEECH_CHANNEL_PROCESSING。这个部分主要依靠 unimrcp 模块加载时嵌入到底层 MRCP 客户端的回调 mod_unimrcp.c#synth_on_message_receive() 函数完成
    static switch_status_t synth_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
    {switch_status_t status = SWITCH_STATUS_SUCCESS;speech_channel_t *schannel = (speech_channel_t *) sh->private_info;if (zstr(text)) {status = SWITCH_STATUS_FALSE;} else {status = synth_channel_speak(schannel, text);}return status;
    }static switch_status_t synth_channel_speak(speech_channel_t *schannel, const char *text)
    {switch_status_t status = SWITCH_STATUS_SUCCESS;mrcp_message_t *mrcp_message = NULL;mrcp_generic_header_t *generic_header = NULL;mrcp_synth_header_t *synth_header = NULL;int warned = 0;switch_mutex_lock(schannel->mutex);if (schannel->state != SPEECH_CHANNEL_READY) {status = SWITCH_STATUS_FALSE;goto done;}mrcp_message = mrcp_application_message_create(schannel->unimrcp_session, schannel->unimrcp_channel, SYNTHESIZER_SPEAK);if (mrcp_message == NULL) {switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Failed to create SPEAK message\n", schannel->name);status = SWITCH_STATUS_FALSE;goto done;}/* set generic header fields (content-type) */if ((generic_header = (mrcp_generic_header_t *) mrcp_generic_header_prepare(mrcp_message)) == NULL) {status = SWITCH_STATUS_FALSE;goto done;}/* good enough way of determining SSML or plain text body */if (text_starts_with(text, XML_ID) || text_starts_with(text, SSML_ID)) {apt_string_assign(&generic_header->content_type, schannel->profile->ssml_mime_type, mrcp_message->pool);} else {apt_string_assign(&generic_header->content_type, MIME_TYPE_PLAIN_TEXT, mrcp_message->pool);}mrcp_generic_header_property_add(mrcp_message, GENERIC_HEADER_CONTENT_TYPE);/* set synthesizer header fields (voice, rate, etc.) */if ((synth_header = (mrcp_synth_header_t *) mrcp_resource_header_prepare(mrcp_message)) == NULL) {status = SWITCH_STATUS_FALSE;goto done;}/* add params to MRCP message */synth_channel_set_params(schannel, mrcp_message, generic_header, synth_header);/* set body (plain text or SSML) */apt_string_assign(&mrcp_message->body, text, schannel->memory_pool);/* Empty audio queue and send SPEAK to MRCP server */audio_queue_clear(schannel->audio_queue);if (mrcp_application_message_send(schannel->unimrcp_session, schannel->unimrcp_channel, mrcp_message) == FALSE) {status = SWITCH_STATUS_FALSE;goto done;}/* wait for IN-PROGRESS */while (schannel->state == SPEECH_CHANNEL_READY) {if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {warned = 1;switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) SPEAK IN-PROGRESS not received after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));}}if (schannel->state != SPEECH_CHANNEL_PROCESSING) {status = SWITCH_STATUS_FALSE;goto done;}done:switch_mutex_unlock(schannel->mutex);return status;
    }
  8. mod_unimrcp.c#synth_speech_read_tts() 函数的核心是执行 mod_unimrcp.c#speech_channel_read()mod_unimrcp.c#speech_channel_read()函数的关键则是检查 tts 的 channel 状态,当其状态符合要求的时候从 channel 的语音流缓冲队列中读取数据。此时回到本节步骤6第3步switch_core.c#switch_core_session_write_frame() 函数会将从 MRCP 服务器传输过来到语音流数据写入到当前会话缓冲,经过编码转化,最终将通过 RTP 发送到终端播放,至此 tts 语音合成处理流程基本结束

    static switch_status_t synth_speech_read_tts(switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags)
    {switch_status_t status = SWITCH_STATUS_SUCCESS;switch_size_t bytes_read;speech_channel_t *schannel = (speech_channel_t *) sh->private_info;bytes_read = *datalen;if (speech_channel_read(schannel, data, &bytes_read, (*flags & SWITCH_SPEECH_FLAG_BLOCKING)) == SWITCH_STATUS_SUCCESS) {/* pad data, if not enough read */if (bytes_read < *datalen) {
    #ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUEswitch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) adding %ld bytes of padding\n", schannel->name, *datalen - bytes_read);
    #endifmemset((uint8_t *) data + bytes_read, schannel->silence, *datalen - bytes_read);}} else {/* ready for next speak request */speech_channel_set_state(schannel, SPEECH_CHANNEL_READY);*datalen = 0;status = SWITCH_STATUS_BREAK;}/* report negotiated sample rate back to FreeSWITCH */sh->native_rate = schannel->rate;return status;
    }static switch_status_t speech_channel_read(speech_channel_t *schannel, void *data, switch_size_t *len, int block)
    {switch_status_t status = SWITCH_STATUS_SUCCESS;if (!schannel || !schannel->mutex || !schannel->audio_queue) {return SWITCH_STATUS_FALSE;}switch (schannel->state) {case SPEECH_CHANNEL_DONE:/* pull any remaining audio - never blocking */if (audio_queue_read(schannel->audio_queue, data, len, 0) == SWITCH_STATUS_FALSE) {/* all frames read */status = SWITCH_STATUS_BREAK;}break;case SPEECH_CHANNEL_PROCESSING:/* IN-PROGRESS */audio_queue_read(schannel->audio_queue, data, len, block);break;default:status = SWITCH_STATUS_BREAK;}return status;
    }
    

相关内容

热门资讯

管涛:直面中东风险外溢, 稳住... 要点 随着冲突升级、持续时间延长,风险因素明显偏向更加不利的情景,能源价格大幅上行风险依然较高。这将...
恒生银行:以约2272万港元回... 8月5日消息,恒生银行在港交所发布公告称,当日在香港斥资约2271.7万港元回购20万股,每股回购价...
蚂蚁消金成功发行20亿元金融债... 8月5日消息,重庆蚂蚁消费金融有限公司披露2025年第一期金融债券发行情况:实际发行总额20亿元人民...
福建:加强就业观念教育引导,鼓... 8月5日消息,《中共福建省委 福建省人民政府关于全方位促进高质量充分就业的实施意见》8月5日对外发布...
青岛市发布31条惠企助企新举措... 8月5日消息,青岛市政府办公厅日前印发《2025年青岛市跨境贸易便利化专项行动方案》,围绕强化政策供...