You need to enable JavaScript to run this app.
导航

【C】延迟检测

最近更新时间2023.03.16 11:40:13

首次发布时间2023.03.16 11:40:13

完整例子
#include "sami_core.h"

// help function
std::vector<uint8_t> loadModelAsBinary(const std::string& path) {
    std::ifstream file(path, std::ios::binary | std::ios::ate);
    std::streamsize size = file.tellg();
    file.seekg(0, std::ios::beg);

    std::vector<uint8_t> buffer(size);
    if(file.read((char*)buffer.data(), size)) { return buffer; }

    return {};
}

// step 0, load model
const std::string model_path = "/path/to/time_align.model";
std::vector<uint_8> model_buf = loadModelAsBinary(model_path);
assert(model_buf.size() > 0);

// step 1, create handle
SAMICoreHandle executor;
SAMICoreExecutorContextCreateParameter engineCreateParameter;
engineCreateParameter.sampleRate = sample_rate;
engineCreateParameter.maxBlockSize = sample_rate / 10 // max block size;
engineCreateParameter.modelBuffer = (const char*)modelBuffer.data();
engineCreateParameter.modelLen = modelBuffer.size();
int ret = SAMICoreCreateHandleByIdentify(&executor, identify::SAMICoreIdentify_EngineExecutor_TimeAlign,
                                         &engineCreateParameter);
if(ret == SAMI_ENGINE_CREATE_ERROR) {
    std::cerr << "Cannot create Engine handle\n";
    return -1;
}

// step 2, create input and output audio block 
SAMICoreAudioBuffer mic_audio_buffer;
mic_audio_buffer.isInterleave = isInterleave;
mic_audio_buffer.numberChannels = num_channels_mic;
mic_audio_buffer.numberSamples = max_block_size;
mic_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_mic];

SAMICoreAudioBuffer ref_audio_buffer;
ref_audio_buffer.isInterleave = isInterleave;
ref_audio_buffer.numberChannels = num_channels_ref;
ref_audio_buffer.numberSamples = max_block_size;
ref_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_ref];

SAMICoreTimeAlignParameter timeAlignInput;
timeAlignInput.mic = &mic_audio_buffer;
timeAlignInput.ref = &ref_audio_buffer;

SAMICoreBlock samiCoreBlock;
memset(&samiCoreBlock, 0, sizeof(SAMICoreBlock));
samiCoreBlock.dataType = SAMICoreDataType_TimeAlign;
samiCoreBlock.numberAudioData = 1;
samiCoreBlock.audioData = &timeAlignInput;

// step 3, process block by block
for(;hasAudioSamples();
{
    copySamplesToInputBuffer(in_audio_buffer); //拷贝数据或者修改数据指针in_audio_buffer的指向
    int ret = SAMICoreProcessAsync(executor, &samiCoreBlock);
    assert(ret == SAMI_OK);

    // do something after process
    doSomethingAfterProcess(out_block);  //业务从out_block拷贝处理后的数据
}

// step 4, get output
SAMICoreProperty property;
ret = SAMICoreGetPropertyById((SAMICoreHandle)executor, SAMICoreEngineExecutorOutPut, &property);
if (ret == SAMI_ENGINE_GETOUTPUT_NO_OUTPUT) {
    std::cout << "Can't detect." << std::endl;
} else {
    float delay_ms = *reinterpret_cast<float*>(property.data);
    std::cout << "FINAL delay " << delay_ms << " ms" << std::endl;
}

// step 5, remember release resource
SAMICoreDestroyProperty(&property);
SAMICoreDestroyHandle(executor);
delete[] mic_audio_buffer.data;
delete[] ref_audio_buffer.data;
使用步骤

〇、从文件中读取模型

即读取整个模型文件到内存,实现方法自由发挥,例子中loadModelAsBinary仅供参考。

一、创建算法句柄

传入模型内存地址、模型大小、采样率和 maxBlockSize,通过 SAMICoreCreateHandleByIdentify 创建 handle。

SAMICoreHandle executor;
SAMICoreExecutorContextCreateParameter engineCreateParameter;
engineCreateParameter.sampleRate = sample_rate;
engineCreateParameter.maxBlockSize = max_block_size;
engineCreateParameter.modelBuffer = (const char*)modelBuffer.data();
engineCreateParameter.modelLen = modelBuffer.size();
int ret = SAMICoreCreateHandleByIdentify(&executor, identify::SAMICoreIdentify_EngineExecutor_TimeAlign,
                                         &engineCreateParameter);
if(ret == SAMI_ENGINE_CREATE_ERROR) {
    std::cerr << "Cannot create Engine handle\n";
    return -1;
}

算法支持的音频格式
采样率与模型相关,目前仅提供了16k和44.1k采样率的模型,支持交错/非交错音频,32位浮点数,单双声道均可
有几种情况会导致创建失败:

  1. 模型数据不正确,例如模型数据损坏或者大小不对。

  2. Block size 数据不正确。

  3. 采样率不匹配模型

二、创建 SAMICoreBlock 用于存放输入和输出

SAMICoreAudioBuffer,用于存放音频数据,它支持 Planar-Float 以及 Interleaved-Float 类型数据。更多关于音频数据格式请参看名词解释一节。SAMICoreBlock,用于存放需要处理的数据。

SAMICoreAudioBuffer mic_audio_buffer;
mic_audio_buffer.isInterleave = isInterleave;
mic_audio_buffer.numberChannels = num_channels_mic;
mic_audio_buffer.numberSamples = max_block_size;
mic_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_mic];

SAMICoreAudioBuffer ref_audio_buffer;
ref_audio_buffer.isInterleave = isInterleave;
ref_audio_buffer.numberChannels = num_channels_ref;
ref_audio_buffer.numberSamples = max_block_size;
ref_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_ref];

SAMICoreTimeAlignParameter timeAlignInput;
timeAlignInput.mic = &mic_audio_buffer;
timeAlignInput.ref = &ref_audio_buffer;

SAMICoreBlock samiCoreBlock;
memset(&samiCoreBlock, 0, sizeof(SAMICoreBlock));
samiCoreBlock.dataType = SAMICoreDataType_TimeAlign;
samiCoreBlock.numberAudioData = 1;
samiCoreBlock.audioData = &timeAlignInput;

三、处理音频

拷贝数据进行处理

将待处理的音频数据拷贝到 in_audio_buffer 中,经过 SAMICoreProcess 处理后,结果将拷贝至 output 中。示例中采用这种方法。

for(;hasAudioSamples();
{
    copySamplesToInputBuffer(in_audio_buffer); //拷贝数据或者修改数据指针in_audio_buffer的指向
    int ret = SAMICoreProcessAsync(executor, &samiCoreBlock);
    assert(ret == SAMI_OK);

    // do something after process
    doSomethingAfterProcess(out_block);  //业务从out_block拷贝处理后的数据
}

有几种情况导致处理失败:

  1. 无效的 handle。handle 创建失败了,但仍然拿错误的 handle 进行 process

  2. SAMICoreBlock 和 SAMICoreTimeAlignParameter 设置错误。

需要注意的是:当输入总体数据量较小时,结果可能精准度略差。

四、输出结果

目前算法支持流式处理,所以支持任意block_size, 但总体数据量较小时误差可能较大或者更容易无法计算延迟。当返回值为 SAMI_ENGINE_GETOUTPUT_NO_OUTPUT 时,即没有找到匹配的片段,无法计算延迟。

SAMICoreProperty property;
ret = SAMICoreGetPropertyById((SAMICoreHandle)executor, SAMICoreEngineExecutorOutPut, &property);
if (ret == SAMI_ENGINE_GETOUTPUT_NO_OUTPUT) {
    std::cout << "Can't detect." << std::endl;
} else {
    float delay_ms = *reinterpret_cast<float*>(property.data);
    std::cout << "FINAL delay " << delay_ms << " ms" << std::endl;
}
SAMICoreDestroyProperty(&property);

五、释放资源

释放 handle

ret = SAMICoreDestroyHandle(handle);

此外,还要注意音频数据数据的内存释放(如果有)。例如:

SAMICoreDestroyHandle(executor);
delete[] mic_audio_buffer.data;
delete[] ref_audio_buffer.data;