add files

2026-02-05 18:09:20 +08:00 · 2025-02-20 12:17:03 +08:00
parent a21dd4555c
commit edd008441b
667 changed files with 473123 additions and 0 deletions
--- a/funasr_local/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr_local/runtime/onnxruntime/src/CMakeLists.txt
@@ -0,0 +1,31 @@
+
+file(GLOB files1 "*.cpp")
+file(GLOB files2 "*.cc")
+
+set(files ${files1} ${files2})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+add_library(funasr ${files})
+
+if(WIN32)
+    set(EXTRA_LIBS pthread yaml-cpp csrc glog)
+    if(CMAKE_CL_64)
+        target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
+    else()
+        target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
+    endif()
+    target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
+    
+    target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
+else()
+    set(EXTRA_LIBS pthread yaml-cpp csrc glog )
+    include_directories(${ONNXRUNTIME_DIR}/include)
+endif()
+
+include_directories(${CMAKE_SOURCE_DIR}/include)
+target_link_libraries(funasr PUBLIC onnxruntime ${EXTRA_LIBS})
+
+add_executable(funasr-onnx-offline "funasr-onnx-offline.cpp")
+add_executable(funasr-onnx-offline-rtf "funasr-onnx-offline-rtf.cpp")
+target_link_libraries(funasr-onnx-offline PUBLIC funasr)
+target_link_libraries(funasr-onnx-offline-rtf PUBLIC funasr)
--- a/funasr_local/runtime/onnxruntime/src/alignedmem.cpp
+++ b/funasr_local/runtime/onnxruntime/src/alignedmem.cpp
@@ -0,0 +1,18 @@
+#include "precomp.h"
+void *AlignedMalloc(size_t alignment, size_t required_bytes)
+{
+    void *p1;  // original block
+    void **p2; // aligned block
+    int offset = alignment - 1 + sizeof(void *);
+    if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) {
+        return NULL;
+    }
+    p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1));
+    p2[-1] = p1;
+    return p2;
+}
+
+void AlignedFree(void *p)
+{
+    free(((void **)p)[-1]);
+}
--- a/funasr_local/runtime/onnxruntime/src/alignedmem.h
+++ b/funasr_local/runtime/onnxruntime/src/alignedmem.h
@@ -0,0 +1,8 @@
+
+#ifndef ALIGNEDMEM_H
+#define ALIGNEDMEM_H
+
+extern void *AlignedMalloc(size_t alignment, size_t required_bytes);
+extern void AlignedFree(void *p);
+
+#endif
--- a/funasr_local/runtime/onnxruntime/src/audio.cpp
+++ b/funasr_local/runtime/onnxruntime/src/audio.cpp
@@ -0,0 +1,522 @@
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fstream>
+#include <assert.h>
+
+#include "audio.h"
+#include "precomp.h"
+
+using namespace std;
+
+// see http://soundfile.sapp.org/doc/WaveFormat/
+// Note: We assume little endian here
+struct WaveHeader {
+  bool Validate() const {
+    //                 F F I R
+    if (chunk_id != 0x46464952) {
+      printf("Expected chunk_id RIFF. Given: 0x%08x\n", chunk_id);
+      return false;
+    }
+    //               E V A W
+    if (format != 0x45564157) {
+      printf("Expected format WAVE. Given: 0x%08x\n", format);
+      return false;
+    }
+
+    if (subchunk1_id != 0x20746d66) {
+      printf("Expected subchunk1_id 0x20746d66. Given: 0x%08x\n",
+                       subchunk1_id);
+      return false;
+    }
+
+    if (subchunk1_size != 16) {  // 16 for PCM
+      printf("Expected subchunk1_size 16. Given: %d\n",
+                       subchunk1_size);
+      return false;
+    }
+
+    if (audio_format != 1) {  // 1 for PCM
+      printf("Expected audio_format 1. Given: %d\n", audio_format);
+      return false;
+    }
+
+    if (num_channels != 1) {  // we support only single channel for now
+      printf("Expected single channel. Given: %d\n", num_channels);
+      return false;
+    }
+    if (byte_rate != (sample_rate * num_channels * bits_per_sample / 8)) {
+      return false;
+    }
+
+    if (block_align != (num_channels * bits_per_sample / 8)) {
+      return false;
+    }
+
+    if (bits_per_sample != 16) {  // we support only 16 bits per sample
+      printf("Expected bits_per_sample 16. Given: %d\n",
+                       bits_per_sample);
+      return false;
+    }
+    return true;
+  }
+
+  // See https://en.wikipedia.org/wiki/WAV#Metadata and
+  // https://www.robotplanet.dk/audio/wav_meta_data/riff_mci.pdf
+  void SeekToDataChunk(std::istream &is) {
+    //                              a t a d
+    while (is && subchunk2_id != 0x61746164) {
+      // const char *p = reinterpret_cast<const char *>(&subchunk2_id);
+      // printf("Skip chunk (%x): %c%c%c%c of size: %d\n", subchunk2_id, p[0],
+      //        p[1], p[2], p[3], subchunk2_size);
+      is.seekg(subchunk2_size, std::istream::cur);
+      is.read(reinterpret_cast<char *>(&subchunk2_id), sizeof(int32_t));
+      is.read(reinterpret_cast<char *>(&subchunk2_size), sizeof(int32_t));
+    }
+  }
+
+  int32_t chunk_id;
+  int32_t chunk_size;
+  int32_t format;
+  int32_t subchunk1_id;
+  int32_t subchunk1_size;
+  int16_t audio_format;
+  int16_t num_channels;
+  int32_t sample_rate;
+  int32_t byte_rate;
+  int16_t block_align;
+  int16_t bits_per_sample;
+  int32_t subchunk2_id;    // a tag of this chunk
+  int32_t subchunk2_size;  // size of subchunk2
+};
+static_assert(sizeof(WaveHeader) == WAV_HEADER_SIZE, "");
+
+class AudioWindow {
+  private:
+    int *window;
+    int in_idx;
+    int out_idx;
+    int sum;
+    int window_size = 0;
+
+  public:
+    AudioWindow(int window_size) : window_size(window_size)
+    {
+        window = (int *)calloc(sizeof(int), window_size + 1);
+        in_idx = 0;
+        out_idx = 1;
+        sum = 0;
+    };
+    ~AudioWindow(){
+        free(window);
+    };
+    int put(int val)
+    {
+        sum = sum + val - window[out_idx];
+        window[in_idx] = val;
+        in_idx = in_idx == window_size ? 0 : in_idx + 1;
+        out_idx = out_idx == window_size ? 0 : out_idx + 1;
+        return sum;
+    };
+};
+
+AudioFrame::AudioFrame(){};
+AudioFrame::AudioFrame(int len) : len(len)
+{
+    start = 0;
+};
+AudioFrame::~AudioFrame(){};
+int AudioFrame::SetStart(int val)
+{
+    start = val < 0 ? 0 : val;
+    return start;
+};
+
+int AudioFrame::SetEnd(int val)
+{
+    end = val;
+    len = end - start;
+    return end;
+};
+
+int AudioFrame::GetStart()
+{
+    return start;
+};
+
+int AudioFrame::GetLen()
+{
+    return len;
+};
+
+int AudioFrame::Disp()
+{
+    LOG(ERROR) << "Not imp!!!!";
+    return 0;
+};
+
+Audio::Audio(int data_type) : data_type(data_type)
+{
+    speech_buff = NULL;
+    speech_data = NULL;
+    align_size = 1360;
+}
+
+Audio::Audio(int data_type, int size) : data_type(data_type)
+{
+    speech_buff = NULL;
+    speech_data = NULL;
+    align_size = (float)size;
+}
+
+Audio::~Audio()
+{
+    if (speech_buff != NULL) {
+        free(speech_buff);
+        
+    }
+
+    if (speech_data != NULL) {
+        
+        free(speech_data);
+    }
+}
+
+void Audio::Disp()
+{
+    LOG(INFO) << "Audio time is " << (float)speech_len / MODEL_SAMPLE_RATE << " s. len is " << speech_len;
+}
+
+float Audio::GetTimeLen()
+{
+    return (float)speech_len / MODEL_SAMPLE_RATE;
+}
+
+void Audio::WavResample(int32_t sampling_rate, const float *waveform,
+                          int32_t n)
+{
+    LOG(INFO) << "Creating a resampler:\n"
+              << "   in_sample_rate: "<< sampling_rate << "\n"
+              << "   output_sample_rate: " << static_cast<int32_t>(MODEL_SAMPLE_RATE);
+    float min_freq =
+        std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE);
+    float lowpass_cutoff = 0.99 * 0.5 * min_freq;
+
+    int32_t lowpass_filter_width = 6;
+
+    auto resampler = std::make_unique<LinearResample>(
+          sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width);
+    std::vector<float> samples;
+    resampler->Resample(waveform, n, true, &samples);
+    //reset speech_data
+    speech_len = samples.size();
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    speech_data = (float*)malloc(sizeof(float) * speech_len);
+    memset(speech_data, 0, sizeof(float) * speech_len);
+    copy(samples.begin(), samples.end(), speech_data);
+}
+
+bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
+{
+    WaveHeader header;
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    
+    offset = 0;
+    std::ifstream is(filename, std::ifstream::binary);
+    is.read(reinterpret_cast<char *>(&header), sizeof(header));
+    if(!is){
+        LOG(ERROR) << "Failed to read " << filename;
+        return false;
+    }
+    
+    *sampling_rate = header.sample_rate;
+    // header.subchunk2_size contains the number of bytes in the data.
+    // As we assume each sample contains two bytes, so it is divided by 2 here
+    speech_len = header.subchunk2_size / 2;
+    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);
+
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
+        is.read(reinterpret_cast<char *>(speech_buff), header.subchunk2_size);
+        if (!is) {
+            LOG(ERROR) << "Failed to read " << filename;
+            return false;
+        }
+        speech_data = (float*)malloc(sizeof(float) * speech_len);
+        memset(speech_data, 0, sizeof(float) * speech_len);
+
+        float scale = 1;
+        if (data_type == 1) {
+            scale = 32768;
+        }
+        for (int32_t i = 0; i != speech_len; ++i) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+        //resample
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
+        }
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+
+        return true;
+    }
+    else
+        return false;
+}
+
+bool Audio::LoadWav(const char* buf, int n_file_len, int32_t* sampling_rate)
+{
+    WaveHeader header;
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    offset = 0;
+
+    std::memcpy(&header, buf, sizeof(header));
+
+    *sampling_rate = header.sample_rate;
+    speech_len = header.subchunk2_size / 2;
+    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
+        memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
+
+        speech_data = (float*)malloc(sizeof(float) * speech_len);
+        memset(speech_data, 0, sizeof(float) * speech_len);
+
+        float scale = 1;
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (int32_t i = 0; i != speech_len; ++i) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+        
+        //resample
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
+        }
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+
+        return true;
+    }
+    else
+        return false;
+}
+
+bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate)
+{
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    offset = 0;
+
+    speech_len = n_buf_len / 2;
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
+        memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
+
+        speech_data = (float*)malloc(sizeof(float) * speech_len);
+        memset(speech_data, 0, sizeof(float) * speech_len);
+
+        float scale = 1;
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (int32_t i = 0; i != speech_len; ++i) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+        
+        //resample
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
+        }
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+        return true;
+
+    }
+    else
+        return false;
+}
+
+bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
+{
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    offset = 0;
+
+    FILE* fp;
+    fp = fopen(filename, "rb");
+    if (fp == nullptr)
+	{
+        LOG(ERROR) << "Failed to read " << filename;
+        return false;
+	}
+    fseek(fp, 0, SEEK_END);
+    uint32_t n_file_len = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+
+    speech_len = (n_file_len) / 2;
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
+        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+        fclose(fp);
+
+        speech_data = (float*)malloc(sizeof(float) * speech_len);
+        memset(speech_data, 0, sizeof(float) * speech_len);
+
+        float scale = 1;
+        if (data_type == 1) {
+            scale = 32768;
+        }
+        for (int32_t i = 0; i != speech_len; ++i) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+        //resample
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
+        }
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+    
+        return true;
+    }
+    else
+        return false;
+
+}
+
+int Audio::FetchChunck(float *&dout, int len)
+{
+    if (offset >= speech_align_len) {
+        dout = NULL;
+        return S_ERR;
+    } else if (offset == speech_align_len - len) {
+        dout = speech_data + offset;
+        offset = speech_align_len;
+        // 临时解决 
+        AudioFrame *frame = frame_queue.front();
+        frame_queue.pop();
+        delete frame;
+
+        return S_END;
+    } else {
+        dout = speech_data + offset;
+        offset += len;
+        return S_MIDDLE;
+    }
+}
+
+int Audio::Fetch(float *&dout, int &len, int &flag)
+{
+    if (frame_queue.size() > 0) {
+        AudioFrame *frame = frame_queue.front();
+        frame_queue.pop();
+
+        dout = speech_data + frame->GetStart();
+        len = frame->GetLen();
+        delete frame;
+        flag = S_END;
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+void Audio::Padding()
+{
+    float num_samples = speech_len;
+    float frame_length = 400;
+    float frame_shift = 160;
+    float num_frames = floor((num_samples + (frame_shift / 2)) / frame_shift);
+    float num_new_samples = (num_frames - 1) * frame_shift + frame_length;
+    float num_padding = num_new_samples - num_samples;
+    float num_left_padding = (frame_length - frame_shift) / 2;
+    float num_right_padding = num_padding - num_left_padding;
+
+    float *new_data = (float *)malloc(num_new_samples * sizeof(float));
+    int i;
+    int tmp_off = 0;
+    for (i = 0; i < num_left_padding; i++) {
+        int ii = num_left_padding - i - 1;
+        new_data[i] = speech_data[ii];
+    }
+    tmp_off = num_left_padding;
+    memcpy(new_data + tmp_off, speech_data, speech_len * sizeof(float));
+    tmp_off += speech_len;
+
+    for (i = 0; i < num_right_padding; i++) {
+        int ii = speech_len - i - 1;
+        new_data[tmp_off + i] = speech_data[ii];
+    }
+    free(speech_data);
+    speech_data = new_data;
+    speech_len = num_new_samples;
+
+    AudioFrame *frame = new AudioFrame(num_new_samples);
+    frame_queue.push(frame);
+    frame = frame_queue.front();
+    frame_queue.pop();
+    delete frame;
+}
+
+void Audio::Split(Model* recog_obj)
+{
+    AudioFrame *frame;
+
+    frame = frame_queue.front();
+    frame_queue.pop();
+    int sp_len = frame->GetLen();
+    delete frame;
+    frame = NULL;
+
+    std::vector<float> pcm_data(speech_data, speech_data+sp_len);
+    vector<std::vector<int>> vad_segments = recog_obj->VadSeg(pcm_data);
+    int seg_sample = MODEL_SAMPLE_RATE/1000;
+    for(vector<int> segment:vad_segments)
+    {
+        frame = new AudioFrame();
+        int start = segment[0]*seg_sample;
+        int end = segment[1]*seg_sample;
+        frame->SetStart(start);
+        frame->SetEnd(end);
+        frame_queue.push(frame);
+        frame = NULL;
+    }
+}
--- a/funasr_local/runtime/onnxruntime/src/common-struct.h
+++ b/funasr_local/runtime/onnxruntime/src/common-struct.h
@@ -0,0 +1,6 @@
+
+#ifndef COMMONSTRUCT_H
+#define COMMONSTRUCT_H
+
+
+#endif
--- a/funasr_local/runtime/onnxruntime/src/commonfunc.h
+++ b/funasr_local/runtime/onnxruntime/src/commonfunc.h
@@ -0,0 +1,54 @@
+#pragma once 
+#include <algorithm>
+typedef struct
+{
+    std::string msg;
+    float  snippet_time;
+}FUNASR_RECOG_RESULT;
+
+
+#ifdef _WIN32
+#include <codecvt>
+
+inline std::wstring String2wstring(const std::string& str, const std::string& locale)
+{
+    typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
+    std::wstring_convert<F> strCnv(new F(locale));
+    return strCnv.from_bytes(str);
+}
+
+inline std::wstring  StrToWstr(std::string str) {
+    if (str.length() == 0)
+        return L"";
+    return  String2wstring(str, "zh-CN");
+
+}
+
+#endif
+
+inline void GetInputName(Ort::Session* session, string& inputName,int nIndex=0) {
+    size_t numInputNodes = session->GetInputCount();
+    if (numInputNodes > 0) {
+        Ort::AllocatorWithDefaultOptions allocator;
+        {
+            auto t = session->GetInputNameAllocated(nIndex, allocator);
+            inputName = t.get();
+        }
+    }
+}
+
+inline void GetOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
+    size_t numOutputNodes = session->GetOutputCount();
+    if (numOutputNodes > 0) {
+        Ort::AllocatorWithDefaultOptions allocator;
+        {
+            auto t = session->GetOutputNameAllocated(nIndex, allocator);
+            outputName = t.get();
+        }
+    }
+}
+
+template <class ForwardIterator>
+inline static size_t Argmax(ForwardIterator first, ForwardIterator last) {
+    return std::distance(first, std::max_element(first, last));
+}
--- a/funasr_local/runtime/onnxruntime/src/ct-transformer.cpp
+++ b/funasr_local/runtime/onnxruntime/src/ct-transformer.cpp
@@ -0,0 +1,187 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#include "precomp.h"
+
+CTTransformer::CTTransformer()
+:env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options{}
+{
+}
+
+void CTTransformer::InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num){
+    session_options.SetIntraOpNumThreads(thread_num);
+    session_options.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+    session_options.DisableCpuMemArena();
+
+    try{
+        m_session = std::make_unique<Ort::Session>(env_, punc_model.c_str(), session_options);
+    }
+    catch (std::exception const &e) {
+        LOG(ERROR) << "Error when load punc onnx model: " << e.what();
+        exit(0);
+    }
+    // read inputnames outputnames
+    string strName;
+    GetInputName(m_session.get(), strName);
+    m_strInputNames.push_back(strName.c_str());
+    GetInputName(m_session.get(), strName, 1);
+    m_strInputNames.push_back(strName);
+    
+    GetOutputName(m_session.get(), strName);
+    m_strOutputNames.push_back(strName);
+
+    for (auto& item : m_strInputNames)
+        m_szInputNames.push_back(item.c_str());
+    for (auto& item : m_strOutputNames)
+        m_szOutputNames.push_back(item.c_str());
+
+	m_tokenizer.OpenYaml(punc_config.c_str());
+}
+
+CTTransformer::~CTTransformer()
+{
+}
+
+string CTTransformer::AddPunc(const char* sz_input)
+{
+    string strResult;
+    vector<string> strOut;
+    vector<int> InputData;
+    m_tokenizer.Tokenize(sz_input, strOut, InputData); 
+
+    int nTotalBatch = ceil((float)InputData.size() / TOKEN_LEN);
+    int nCurBatch = -1;
+    int nSentEnd = -1, nLastCommaIndex = -1;
+    vector<int64_t> RemainIDs; // 
+    vector<string> RemainStr; //
+    vector<int> NewPunctuation; //
+    vector<string> NewString; //
+    vector<string> NewSentenceOut;
+    vector<int> NewPuncOut;
+    int nDiff = 0;
+    for (size_t i = 0; i < InputData.size(); i += TOKEN_LEN)
+    {
+        nDiff = (i + TOKEN_LEN) < InputData.size() ? (0) : (i + TOKEN_LEN - InputData.size());
+        vector<int64_t> InputIDs(InputData.begin() + i, InputData.begin() + i + TOKEN_LEN - nDiff);
+        vector<string> InputStr(strOut.begin() + i, strOut.begin() + i + TOKEN_LEN - nDiff);
+        InputIDs.insert(InputIDs.begin(), RemainIDs.begin(), RemainIDs.end()); // RemainIDs+InputIDs;
+        InputStr.insert(InputStr.begin(), RemainStr.begin(), RemainStr.end()); // RemainStr+InputStr;
+
+        auto Punction = Infer(InputIDs);
+        nCurBatch = i / TOKEN_LEN;
+        if (nCurBatch < nTotalBatch - 1) // not the last minisetence
+        {
+            nSentEnd = -1;
+            nLastCommaIndex = -1;
+            for (int nIndex = Punction.size() - 2; nIndex > 0; nIndex--)
+            {
+                if (m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(PERIOD_INDEX) || m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(QUESTION_INDEX))
+                {
+                    nSentEnd = nIndex;
+                    break;
+                }
+                if (nLastCommaIndex < 0 && m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(COMMA_INDEX))
+                {
+                    nLastCommaIndex = nIndex;
+                }
+            }
+            if (nSentEnd < 0 && InputStr.size() > CACHE_POP_TRIGGER_LIMIT && nLastCommaIndex > 0)
+            {
+                nSentEnd = nLastCommaIndex;
+                Punction[nSentEnd] = PERIOD_INDEX;
+            }
+            RemainStr.assign(InputStr.begin() + nSentEnd + 1, InputStr.end());
+            RemainIDs.assign(InputIDs.begin() + nSentEnd + 1, InputIDs.end());
+            InputStr.assign(InputStr.begin(), InputStr.begin() + nSentEnd + 1);  // minit_sentence
+            Punction.assign(Punction.begin(), Punction.begin() + nSentEnd + 1);
+        }
+        
+        NewPunctuation.insert(NewPunctuation.end(), Punction.begin(), Punction.end());
+        vector<string> WordWithPunc;
+        for (int i = 0; i < InputStr.size(); i++)
+        {
+            if (i > 0 && !(InputStr[i][0] & 0x80) && (i + 1) <InputStr.size() && !(InputStr[i+1][0] & 0x80))// <20>м<EFBFBD><D0BC>Ӣ<EFBFBD>ģ<EFBFBD>
+            {
+                InputStr[i] = InputStr[i]+ " ";
+            }
+            WordWithPunc.push_back(InputStr[i]);
+
+            if (Punction[i] != NOTPUNC_INDEX) // <20>»<EFBFBD><C2BB><EFBFBD>
+            {
+                WordWithPunc.push_back(m_tokenizer.Id2Punc(Punction[i]));
+            }
+        }
+
+        NewString.insert(NewString.end(), WordWithPunc.begin(), WordWithPunc.end()); // new_mini_sentence += "".join(words_with_punc)
+        NewSentenceOut = NewString;
+        NewPuncOut = NewPunctuation;
+        // last mini sentence
+        if(nCurBatch == nTotalBatch - 1)
+        {
+            if (NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(COMMA_INDEX) || NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(DUN_INDEX))
+            {
+                NewSentenceOut.assign(NewString.begin(), NewString.end() - 1);
+                NewSentenceOut.push_back(m_tokenizer.Id2Punc(PERIOD_INDEX));
+                NewPuncOut.assign(NewPunctuation.begin(), NewPunctuation.end() - 1);
+                NewPuncOut.push_back(PERIOD_INDEX);
+            }
+            else if (NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(PERIOD_INDEX) && NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(QUESTION_INDEX))
+            {
+                NewSentenceOut = NewString;
+                NewSentenceOut.push_back(m_tokenizer.Id2Punc(PERIOD_INDEX));
+                NewPuncOut = NewPunctuation;
+                NewPuncOut.push_back(PERIOD_INDEX);
+            }
+        }
+    }
+    for (auto& item : NewSentenceOut)
+        strResult += item;
+    return strResult;
+}
+
+vector<int> CTTransformer::Infer(vector<int64_t> input_data)
+{
+    Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+    vector<int> punction;
+    std::array<int64_t, 2> input_shape_{ 1, (int64_t)input_data.size()};
+    Ort::Value onnx_input = Ort::Value::CreateTensor<int64_t>(m_memoryInfo,
+        input_data.data(),
+        input_data.size(),
+        input_shape_.data(),
+        input_shape_.size());
+
+    std::array<int32_t,1> text_lengths{ (int32_t)input_data.size() };
+    std::array<int64_t,1> text_lengths_dim{ 1 };
+    Ort::Value onnx_text_lengths = Ort::Value::CreateTensor(
+        m_memoryInfo,
+        text_lengths.data(),
+        text_lengths.size() * sizeof(int32_t),
+        text_lengths_dim.data(),
+        text_lengths_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
+    std::vector<Ort::Value> input_onnx;
+    input_onnx.emplace_back(std::move(onnx_input));
+    input_onnx.emplace_back(std::move(onnx_text_lengths));
+        
+    try {
+        auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
+        std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+
+        int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+        float * floatData = outputTensor[0].GetTensorMutableData<float>();
+
+        for (int i = 0; i < outputCount; i += CANDIDATE_NUM)
+        {
+            int index = Argmax(floatData + i, floatData + i + CANDIDATE_NUM-1);
+            punction.push_back(index);
+        }
+    }
+    catch (std::exception const &e)
+    {
+        LOG(ERROR) << "Error when run punc onnx forword: " << (e.what());
+        exit(0);
+    }
+    return punction;
+}
+
--- a/funasr_local/runtime/onnxruntime/src/ct-transformer.h
+++ b/funasr_local/runtime/onnxruntime/src/ct-transformer.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#pragma once 
+
+class CTTransformer {
+/**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
+ * https://arxiv.org/pdf/2003.01309.pdf
+*/
+
+private:
+
+	CTokenizer m_tokenizer;
+	vector<string> m_strInputNames, m_strOutputNames;
+	vector<const char*> m_szInputNames;
+	vector<const char*> m_szOutputNames;
+
+	std::shared_ptr<Ort::Session> m_session;
+    Ort::Env env_;
+    Ort::SessionOptions session_options;
+public:
+
+	CTTransformer();
+	void InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num);
+	~CTTransformer();
+	vector<int>  Infer(vector<int64_t> input_data);
+	string AddPunc(const char* sz_input);
+};
--- a/funasr_local/runtime/onnxruntime/src/e2e-vad.h
+++ b/funasr_local/runtime/onnxruntime/src/e2e-vad.h
@@ -0,0 +1,791 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+ * Collaborators: zhuzizyf(China Telecom Shanghai)
+*/
+
+#include <utility>
+#include <vector>
+#include <string>
+#include <map>
+#include <cmath>
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <cassert>
+
+
+enum class VadStateMachine {
+    kVadInStateStartPointNotDetected = 1,
+    kVadInStateInSpeechSegment = 2,
+    kVadInStateEndPointDetected = 3
+};
+
+enum class FrameState {
+    kFrameStateInvalid = -1,
+    kFrameStateSpeech = 1,
+    kFrameStateSil = 0
+};
+
+// final voice/unvoice state per frame
+enum class AudioChangeState {
+    kChangeStateSpeech2Speech = 0,
+    kChangeStateSpeech2Sil = 1,
+    kChangeStateSil2Sil = 2,
+    kChangeStateSil2Speech = 3,
+    kChangeStateNoBegin = 4,
+    kChangeStateInvalid = 5
+};
+
+enum class VadDetectMode {
+    kVadSingleUtteranceDetectMode = 0,
+    kVadMutipleUtteranceDetectMode = 1
+};
+
+class VADXOptions {
+public:
+    int sample_rate;
+    int detect_mode;
+    int snr_mode;
+    int max_end_silence_time;
+    int max_start_silence_time;
+    bool do_start_point_detection;
+    bool do_end_point_detection;
+    int window_size_ms;
+    int sil_to_speech_time_thres;
+    int speech_to_sil_time_thres;
+    float speech_2_noise_ratio;
+    int do_extend;
+    int lookback_time_start_point;
+    int lookahead_time_end_point;
+    int max_single_segment_time;
+    int nn_eval_block_size;
+    int dcd_block_size;
+    float snr_thres;
+    int noise_frame_num_used_for_snr;
+    float decibel_thres;
+    float speech_noise_thres;
+    float fe_prior_thres;
+    int silence_pdf_num;
+    std::vector<int> sil_pdf_ids;
+    float speech_noise_thresh_low;
+    float speech_noise_thresh_high;
+    bool output_frame_probs;
+    int frame_in_ms;
+    int frame_length_ms;
+
+    explicit VADXOptions(
+            int sr = 16000,
+            int dm = static_cast<int>(VadDetectMode::kVadMutipleUtteranceDetectMode),
+            int sm = 0,
+            int mset = 800,
+            int msst = 3000,
+            bool dspd = true,
+            bool depd = true,
+            int wsm = 200,
+            int ststh = 150,
+            int sttsh = 150,
+            float s2nr = 1.0,
+            int de = 1,
+            int lbtps = 200,
+            int latsp = 100,
+            int mss = 15000,
+            int nebs = 8,
+            int dbs = 4,
+            float st = -100.0,
+            int nfnus = 100,
+            float dt = -100.0,
+            float snt = 0.9,
+            float fept = 1e-4,
+            int spn = 1,
+            std::vector<int> spids = {0},
+            float sntl = -0.1,
+            float snth = 0.3,
+            bool ofp = false,
+            int fim = 10,
+            int flm = 25
+    ) :
+            sample_rate(sr),
+            detect_mode(dm),
+            snr_mode(sm),
+            max_end_silence_time(mset),
+            max_start_silence_time(msst),
+            do_start_point_detection(dspd),
+            do_end_point_detection(depd),
+            window_size_ms(wsm),
+            sil_to_speech_time_thres(ststh),
+            speech_to_sil_time_thres(sttsh),
+            speech_2_noise_ratio(s2nr),
+            do_extend(de),
+            lookback_time_start_point(lbtps),
+            lookahead_time_end_point(latsp),
+            max_single_segment_time(mss),
+            nn_eval_block_size(nebs),
+            dcd_block_size(dbs),
+            snr_thres(st),
+            noise_frame_num_used_for_snr(nfnus),
+            decibel_thres(dt),
+            speech_noise_thres(snt),
+            fe_prior_thres(fept),
+            silence_pdf_num(spn),
+            sil_pdf_ids(std::move(spids)),
+            speech_noise_thresh_low(sntl),
+            speech_noise_thresh_high(snth),
+            output_frame_probs(ofp),
+            frame_in_ms(fim),
+            frame_length_ms(flm) {}
+};
+
+class E2EVadSpeechBufWithDoa {
+public:
+    int start_ms;
+    int end_ms;
+    std::vector<float> buffer;
+    bool contain_seg_start_point;
+    bool contain_seg_end_point;
+    int doa;
+
+    E2EVadSpeechBufWithDoa() :
+            start_ms(0),
+            end_ms(0),
+            buffer(),
+            contain_seg_start_point(false),
+            contain_seg_end_point(false),
+            doa(0) {}
+
+    void Reset() {
+        start_ms = 0;
+        end_ms = 0;
+        buffer.clear();
+        contain_seg_start_point = false;
+        contain_seg_end_point = false;
+        doa = 0;
+    }
+};
+
+class E2EVadFrameProb {
+public:
+    double noise_prob;
+    double speech_prob;
+    double score;
+    int frame_id;
+    int frm_state;
+
+    E2EVadFrameProb() :
+            noise_prob(0.0),
+            speech_prob(0.0),
+            score(0.0),
+            frame_id(0),
+            frm_state(0) {}
+};
+
+class WindowDetector {
+public:
+    int window_size_ms;
+    int sil_to_speech_time;
+    int speech_to_sil_time;
+    int frame_size_ms;
+    int win_size_frame;
+    int win_sum;
+    std::vector<int> win_state;
+    int cur_win_pos;
+    FrameState pre_frame_state;
+    FrameState cur_frame_state;
+    int sil_to_speech_frmcnt_thres;
+    int speech_to_sil_frmcnt_thres;
+    int voice_last_frame_count;
+    int noise_last_frame_count;
+    int hydre_frame_count;
+
+    WindowDetector(int window_size_ms, int sil_to_speech_time, int speech_to_sil_time, int frame_size_ms) :
+            window_size_ms(window_size_ms),
+            sil_to_speech_time(sil_to_speech_time),
+            speech_to_sil_time(speech_to_sil_time),
+            frame_size_ms(frame_size_ms),
+            win_size_frame(window_size_ms / frame_size_ms),
+            win_sum(0),
+            win_state(std::vector<int>(win_size_frame, 0)),
+            cur_win_pos(0),
+            pre_frame_state(FrameState::kFrameStateSil),
+            cur_frame_state(FrameState::kFrameStateSil),
+            sil_to_speech_frmcnt_thres(sil_to_speech_time / frame_size_ms),
+            speech_to_sil_frmcnt_thres(speech_to_sil_time / frame_size_ms),
+            voice_last_frame_count(0),
+            noise_last_frame_count(0),
+            hydre_frame_count(0) {}
+
+    void Reset() {
+        cur_win_pos = 0;
+        win_sum = 0;
+        win_state = std::vector<int>(win_size_frame, 0);
+        pre_frame_state = FrameState::kFrameStateSil;
+        cur_frame_state = FrameState::kFrameStateSil;
+        voice_last_frame_count = 0;
+        noise_last_frame_count = 0;
+        hydre_frame_count = 0;
+    }
+
+    int GetWinSize() {
+        return win_size_frame;
+    }
+
+    AudioChangeState DetectOneFrame(FrameState frameState, int frame_count) {
+        int cur_frame_state = 0;
+        if (frameState == FrameState::kFrameStateSpeech) {
+            cur_frame_state = 1;
+        } else if (frameState == FrameState::kFrameStateSil) {
+            cur_frame_state = 0;
+        } else {
+            return AudioChangeState::kChangeStateInvalid;
+        }
+        win_sum -= win_state[cur_win_pos];
+        win_sum += cur_frame_state;
+        win_state[cur_win_pos] = cur_frame_state;
+        cur_win_pos = (cur_win_pos + 1) % win_size_frame;
+        if (pre_frame_state == FrameState::kFrameStateSil && win_sum >= sil_to_speech_frmcnt_thres) {
+            pre_frame_state = FrameState::kFrameStateSpeech;
+            return AudioChangeState::kChangeStateSil2Speech;
+        }
+        if (pre_frame_state == FrameState::kFrameStateSpeech && win_sum <= speech_to_sil_frmcnt_thres) {
+            pre_frame_state = FrameState::kFrameStateSil;
+            return AudioChangeState::kChangeStateSpeech2Sil;
+        }
+        if (pre_frame_state == FrameState::kFrameStateSil) {
+            return AudioChangeState::kChangeStateSil2Sil;
+        }
+        if (pre_frame_state == FrameState::kFrameStateSpeech) {
+            return AudioChangeState::kChangeStateSpeech2Speech;
+        }
+        return AudioChangeState::kChangeStateInvalid;
+    }
+
+    int FrameSizeMs() {
+        return frame_size_ms;
+    }
+};
+
+class E2EVadModel {
+public:
+    E2EVadModel() {
+        this->vad_opts = VADXOptions();
+//    this->windows_detector = WindowDetector(200,150,150,10);
+        // this->encoder = encoder;
+        // init variables
+        this->is_final = false;
+        this->data_buf_start_frame = 0;
+        this->frm_cnt = 0;
+        this->latest_confirmed_speech_frame = 0;
+        this->lastest_confirmed_silence_frame = -1;
+        this->continous_silence_frame_count = 0;
+        this->vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+        this->confirmed_start_frame = -1;
+        this->confirmed_end_frame = -1;
+        this->number_end_time_detected = 0;
+        this->sil_frame = 0;
+        this->sil_pdf_ids = this->vad_opts.sil_pdf_ids;
+        this->noise_average_decibel = -100.0;
+        this->pre_end_silence_detected = false;
+        this->next_seg = true;
+//    this->output_data_buf = [];
+        this->output_data_buf_offset = 0;
+//    this->frame_probs = [];
+        this->max_end_sil_frame_cnt_thresh =
+                this->vad_opts.max_end_silence_time - this->vad_opts.speech_to_sil_time_thres;
+        this->speech_noise_thres = this->vad_opts.speech_noise_thres;
+        this->max_time_out = false;
+//    this->decibel = [];
+        this->ResetDetection();
+    }
+
+    std::vector<std::vector<int>>
+    operator()(const std::vector<std::vector<float>> &score, const std::vector<float> &waveform, bool is_final = false,
+               bool online = false, int max_end_sil = 800, int max_single_segment_time = 15000,
+               float speech_noise_thres = 0.8, int sample_rate = 16000) {
+        max_end_sil_frame_cnt_thresh = max_end_sil - vad_opts.speech_to_sil_time_thres;
+        this->waveform = waveform;
+        this->vad_opts.max_single_segment_time = max_single_segment_time;
+        this->vad_opts.speech_noise_thres = speech_noise_thres;
+        this->vad_opts.sample_rate = sample_rate;
+
+        ComputeDecibel();
+        ComputeScores(score);
+        if (!is_final) {
+            DetectCommonFrames();
+        } else {
+            DetectLastFrames();
+        }
+
+        std::vector<std::vector<int>> segment_batch;
+        if (output_data_buf.size() > 0) {
+            for (size_t i = output_data_buf_offset; i < output_data_buf.size(); i++) {
+              int start_ms;
+              int end_ms;
+              if (online) {
+
+                if (!output_data_buf[i].contain_seg_start_point) {
+                  continue;
+                }
+                if (!next_seg && !output_data_buf[i].contain_seg_end_point) {
+                  continue;
+                }
+                start_ms = next_seg ? output_data_buf[i].start_ms : -1;
+
+                if (output_data_buf[i].contain_seg_end_point) {
+                  end_ms = output_data_buf[i].end_ms;
+                  next_seg = true;
+                  output_data_buf_offset += 1;
+                } else {
+                  end_ms = -1;
+                  next_seg = false;
+                }
+              } else {
+                if (!is_final &&
+                    (!output_data_buf[i].contain_seg_start_point || !output_data_buf[i].contain_seg_end_point)) {
+                  continue;
+                }
+                start_ms = output_data_buf[i].start_ms;
+                end_ms = output_data_buf[i].end_ms;
+                output_data_buf_offset += 1;
+              }
+                std::vector<int> segment = {start_ms, end_ms};
+                segment_batch.push_back(segment);
+            }
+        }
+
+        if (is_final) {
+            AllResetDetection();
+        }
+        return segment_batch;
+    }
+
+private:
+    VADXOptions vad_opts;
+    WindowDetector windows_detector = WindowDetector(200, 150, 150, 10);
+    bool is_final;
+    int data_buf_start_frame;
+    int frm_cnt;
+    int latest_confirmed_speech_frame;
+    int lastest_confirmed_silence_frame;
+    int continous_silence_frame_count;
+    VadStateMachine vad_state_machine;
+    int confirmed_start_frame;
+    int confirmed_end_frame;
+    int number_end_time_detected;
+    int sil_frame;
+    std::vector<int> sil_pdf_ids;
+    float noise_average_decibel;
+    bool pre_end_silence_detected;
+    bool next_seg;
+    std::vector<E2EVadSpeechBufWithDoa> output_data_buf;
+    int output_data_buf_offset;
+    std::vector<E2EVadFrameProb> frame_probs;
+    int max_end_sil_frame_cnt_thresh;
+    float speech_noise_thres;
+    std::vector<std::vector<float>> scores;
+    int idx_pre_chunk = 0;
+    bool max_time_out;
+    std::vector<float> decibel;
+    int data_buf_size = 0;
+    int data_buf_all_size = 0;
+    std::vector<float> waveform;
+
+    void AllResetDetection() {
+        is_final = false;
+        data_buf_start_frame = 0;
+        frm_cnt = 0;
+        latest_confirmed_speech_frame = 0;
+        lastest_confirmed_silence_frame = -1;
+        continous_silence_frame_count = 0;
+        vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+        confirmed_start_frame = -1;
+        confirmed_end_frame = -1;
+        number_end_time_detected = 0;
+        sil_frame = 0;
+        sil_pdf_ids = vad_opts.sil_pdf_ids;
+        noise_average_decibel = -100.0;
+        pre_end_silence_detected = false;
+        next_seg = true;
+        output_data_buf.clear();
+        output_data_buf_offset = 0;
+        frame_probs.clear();
+        max_end_sil_frame_cnt_thresh = vad_opts.max_end_silence_time - vad_opts.speech_to_sil_time_thres;
+        speech_noise_thres = vad_opts.speech_noise_thres;
+        scores.clear();
+        idx_pre_chunk = 0;
+        max_time_out = false;
+        decibel.clear();
+        int data_buf_size = 0;
+        int data_buf_all_size = 0;
+        waveform.clear();
+        ResetDetection();
+    }
+
+    void ResetDetection() {
+        continous_silence_frame_count = 0;
+        latest_confirmed_speech_frame = 0;
+        lastest_confirmed_silence_frame = -1;
+        confirmed_start_frame = -1;
+        confirmed_end_frame = -1;
+        vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+        windows_detector.Reset();
+        sil_frame = 0;
+        frame_probs.clear();
+    }
+
+    void ComputeDecibel() {
+        int frame_sample_length = int(vad_opts.frame_length_ms * vad_opts.sample_rate / 1000);
+        int frame_shift_length = int(vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+        if (data_buf_all_size == 0) {
+          data_buf_all_size = waveform.size();
+          data_buf_size = data_buf_all_size;
+        } else {
+          data_buf_all_size += waveform.size();
+        }
+        for (int offset = 0; offset < waveform.size() - frame_sample_length + 1; offset += frame_shift_length) {
+            float sum = 0.0;
+            for (int i = 0; i < frame_sample_length; i++) {
+                sum += waveform[offset + i] * waveform[offset + i];
+            }
+            this->decibel.push_back(10 * log10(sum + 0.000001));
+        }
+    }
+
+    void ComputeScores(const std::vector<std::vector<float>> &scores) {
+        vad_opts.nn_eval_block_size = scores.size();
+        frm_cnt += scores.size();
+        this->scores = scores;
+    }
+
+    void PopDataBufTillFrame(int frame_idx) {
+      int frame_sample_length = int(vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+      while (data_buf_start_frame < frame_idx) {
+        if (data_buf_size >= frame_sample_length) {
+          data_buf_start_frame += 1;
+          data_buf_size = data_buf_all_size - data_buf_start_frame * frame_sample_length;
+        }
+      }
+    }
+
+    void PopDataToOutputBuf(int start_frm, int frm_cnt, bool first_frm_is_start_point, bool last_frm_is_end_point,
+                            bool end_point_is_sent_end) {
+        PopDataBufTillFrame(start_frm);
+        int expected_sample_number = int(frm_cnt * vad_opts.sample_rate * vad_opts.frame_in_ms / 1000);
+        if (last_frm_is_end_point) {
+            int extra_sample = std::max(0, int(vad_opts.frame_length_ms * vad_opts.sample_rate / 1000 -
+                                               vad_opts.sample_rate * vad_opts.frame_in_ms / 1000));
+            expected_sample_number += int(extra_sample);
+        }
+        if (end_point_is_sent_end) {
+            expected_sample_number = std::max(expected_sample_number, data_buf_size);
+        }
+        if (data_buf_size < expected_sample_number) {
+            std::cout << "error in calling pop data_buf\n";
+        }
+        if (output_data_buf.size() == 0 || first_frm_is_start_point) {
+            output_data_buf.push_back(E2EVadSpeechBufWithDoa());
+            output_data_buf[output_data_buf.size() - 1].Reset();
+            output_data_buf[output_data_buf.size() - 1].start_ms = start_frm * vad_opts.frame_in_ms;
+            output_data_buf[output_data_buf.size() - 1].end_ms = output_data_buf[output_data_buf.size() - 1].start_ms;
+            output_data_buf[output_data_buf.size() - 1].doa = 0;
+        }
+        E2EVadSpeechBufWithDoa &cur_seg = output_data_buf.back();
+        if (cur_seg.end_ms != start_frm * vad_opts.frame_in_ms) {
+            std::cout << "warning\n";
+        }
+        int out_pos = (int) cur_seg.buffer.size();
+        int data_to_pop;
+        if (end_point_is_sent_end) {
+            data_to_pop = expected_sample_number;
+        } else {
+            data_to_pop = int(frm_cnt * vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+        }
+        if (data_to_pop > data_buf_size) {
+            std::cout << "VAD data_to_pop is bigger than data_buf.size()!!!\n";
+            data_to_pop = data_buf_size;
+            expected_sample_number = data_buf_size;
+        }
+        cur_seg.doa = 0;
+        for (int sample_cpy_out = 0; sample_cpy_out < data_to_pop; sample_cpy_out++) {
+            cur_seg.buffer.push_back(data_buf.back());
+            out_pos++;
+        }
+        for (int sample_cpy_out = data_to_pop; sample_cpy_out < expected_sample_number; sample_cpy_out++) {
+            cur_seg.buffer.push_back(data_buf.back());
+            out_pos++;
+        }
+        if (cur_seg.end_ms != start_frm * vad_opts.frame_in_ms) {
+            std::cout << "Something wrong with the VAD algorithm\n";
+        }
+        data_buf_start_frame += frm_cnt;
+        cur_seg.end_ms = (start_frm + frm_cnt) * vad_opts.frame_in_ms;
+        if (first_frm_is_start_point) {
+            cur_seg.contain_seg_start_point = true;
+        }
+        if (last_frm_is_end_point) {
+            cur_seg.contain_seg_end_point = true;
+        }
+    }
+
+    void OnSilenceDetected(int valid_frame) {
+        lastest_confirmed_silence_frame = valid_frame;
+        if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+            PopDataBufTillFrame(valid_frame);
+        }
+        // silence_detected_callback_
+        // pass
+    }
+
+    void OnVoiceDetected(int valid_frame) {
+        latest_confirmed_speech_frame = valid_frame;
+        PopDataToOutputBuf(valid_frame, 1, false, false, false);
+    }
+
+    void OnVoiceStart(int start_frame, bool fake_result = false) {
+        if (vad_opts.do_start_point_detection) {
+            // pass
+        }
+        if (confirmed_start_frame != -1) {
+            std::cout << "not reset vad properly\n";
+        } else {
+            confirmed_start_frame = start_frame;
+        }
+        if (!fake_result && vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+            PopDataToOutputBuf(confirmed_start_frame, 1, true, false, false);
+        }
+    }
+
+
+    void OnVoiceEnd(int end_frame, bool fake_result, bool is_last_frame) {
+        for (int t = latest_confirmed_speech_frame + 1; t < end_frame; t++) {
+            OnVoiceDetected(t);
+        }
+        if (vad_opts.do_end_point_detection) {
+            // pass
+        }
+        if (confirmed_end_frame != -1) {
+            std::cout << "not reset vad properly\n";
+        } else {
+            confirmed_end_frame = end_frame;
+        }
+        if (!fake_result) {
+            sil_frame = 0;
+            PopDataToOutputBuf(confirmed_end_frame, 1, false, true, is_last_frame);
+        }
+        number_end_time_detected++;
+    }
+
+    void MaybeOnVoiceEndIfLastFrame(bool is_final_frame, int cur_frm_idx) {
+        if (is_final_frame) {
+            OnVoiceEnd(cur_frm_idx, false, true);
+            vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+        }
+    }
+
+    int GetLatency() {
+        return int(LatencyFrmNumAtStartPoint() * vad_opts.frame_in_ms);
+    }
+
+    int LatencyFrmNumAtStartPoint() {
+        int vad_latency = windows_detector.GetWinSize();
+        if (vad_opts.do_extend) {
+            vad_latency += int(vad_opts.lookback_time_start_point / vad_opts.frame_in_ms);
+        }
+        return vad_latency;
+    }
+
+    FrameState GetFrameState(int t) {
+        FrameState frame_state = FrameState::kFrameStateInvalid;
+        float cur_decibel = decibel[t];
+        float cur_snr = cur_decibel - noise_average_decibel;
+        if (cur_decibel < vad_opts.decibel_thres) {
+            frame_state = FrameState::kFrameStateSil;
+            DetectOneFrame(frame_state, t, false);
+            return frame_state;
+        }
+        float sum_score = 0.0;
+        float noise_prob = 0.0;
+        assert(sil_pdf_ids.size() == vad_opts.silence_pdf_num);
+        if (sil_pdf_ids.size() > 0) {
+            std::vector<float> sil_pdf_scores;
+            for (auto sil_pdf_id: sil_pdf_ids) {
+                sil_pdf_scores.push_back(scores[t - idx_pre_chunk][sil_pdf_id]);
+            }
+            sum_score = accumulate(sil_pdf_scores.begin(), sil_pdf_scores.end(), 0.0);
+            noise_prob = log(sum_score) * vad_opts.speech_2_noise_ratio;
+            float total_score = 1.0;
+            sum_score = total_score - sum_score;
+        }
+        float speech_prob = log(sum_score);
+        if (vad_opts.output_frame_probs) {
+            E2EVadFrameProb frame_prob;
+            frame_prob.noise_prob = noise_prob;
+            frame_prob.speech_prob = speech_prob;
+            frame_prob.score = sum_score;
+            frame_prob.frame_id = t;
+            frame_probs.push_back(frame_prob);
+        }
+        if (exp(speech_prob) >= exp(noise_prob) + speech_noise_thres) {
+            if (cur_snr >= vad_opts.snr_thres && cur_decibel >= vad_opts.decibel_thres) {
+                frame_state = FrameState::kFrameStateSpeech;
+            } else {
+                frame_state = FrameState::kFrameStateSil;
+            }
+        } else {
+            frame_state = FrameState::kFrameStateSil;
+            if (noise_average_decibel < -99.9) {
+                noise_average_decibel = cur_decibel;
+            } else {
+                noise_average_decibel =
+                        (cur_decibel + noise_average_decibel * (vad_opts.noise_frame_num_used_for_snr - 1)) /
+                        vad_opts.noise_frame_num_used_for_snr;
+            }
+        }
+        return frame_state;
+    }
+
+    int DetectCommonFrames() {
+        if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected) {
+            return 0;
+        }
+        for (int i = vad_opts.nn_eval_block_size - 1; i >= 0; i--) {
+            FrameState frame_state = FrameState::kFrameStateInvalid;
+            frame_state = GetFrameState(frm_cnt - 1 - i);
+            DetectOneFrame(frame_state, frm_cnt - 1 - i, false);
+        }
+        idx_pre_chunk += scores.size();
+        return 0;
+    }
+
+    int DetectLastFrames() {
+        if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected) {
+            return 0;
+        }
+        for (int i = vad_opts.nn_eval_block_size - 1; i >= 0; i--) {
+            FrameState frame_state = FrameState::kFrameStateInvalid;
+            frame_state = GetFrameState(frm_cnt - 1 - i);
+            if (i != 0) {
+                DetectOneFrame(frame_state, frm_cnt - 1 - i, false);
+            } else {
+                DetectOneFrame(frame_state, frm_cnt - 1, true);
+            }
+        }
+        return 0;
+    }
+
+    void DetectOneFrame(FrameState cur_frm_state, int cur_frm_idx, bool is_final_frame) {
+        FrameState tmp_cur_frm_state = FrameState::kFrameStateInvalid;
+        if (cur_frm_state == FrameState::kFrameStateSpeech) {
+            if (std::fabs(1.0) > vad_opts.fe_prior_thres) {
+                tmp_cur_frm_state = FrameState::kFrameStateSpeech;
+            } else {
+                tmp_cur_frm_state = FrameState::kFrameStateSil;
+            }
+        } else if (cur_frm_state == FrameState::kFrameStateSil) {
+            tmp_cur_frm_state = FrameState::kFrameStateSil;
+        }
+        AudioChangeState state_change = windows_detector.DetectOneFrame(tmp_cur_frm_state, cur_frm_idx);
+        int frm_shift_in_ms = vad_opts.frame_in_ms;
+        if (AudioChangeState::kChangeStateSil2Speech == state_change) {
+            int silence_frame_count = continous_silence_frame_count;
+            continous_silence_frame_count = 0;
+            pre_end_silence_detected = false;
+            int start_frame = 0;
+            if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+                start_frame = std::max(data_buf_start_frame, cur_frm_idx - LatencyFrmNumAtStartPoint());
+                OnVoiceStart(start_frame);
+                vad_state_machine = VadStateMachine::kVadInStateInSpeechSegment;
+                for (int t = start_frame + 1; t <= cur_frm_idx; t++) {
+                    OnVoiceDetected(t);
+                }
+            } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                for (int t = latest_confirmed_speech_frame + 1; t < cur_frm_idx; t++) {
+                    OnVoiceDetected(t);
+                }
+                if (cur_frm_idx - confirmed_start_frame + 1 > vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (!is_final_frame) {
+                    OnVoiceDetected(cur_frm_idx);
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        } else if (AudioChangeState::kChangeStateSpeech2Sil == state_change) {
+            continous_silence_frame_count = 0;
+            if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+                // do nothing
+            } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                if (cur_frm_idx - confirmed_start_frame + 1 >
+                    vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (!is_final_frame) {
+                    OnVoiceDetected(cur_frm_idx);
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        } else if (AudioChangeState::kChangeStateSpeech2Speech == state_change) {
+            continous_silence_frame_count = 0;
+            if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                if (cur_frm_idx - confirmed_start_frame + 1 >
+                    vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    max_time_out = true;
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (!is_final_frame) {
+                    OnVoiceDetected(cur_frm_idx);
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        } else if (AudioChangeState::kChangeStateSil2Sil == state_change) {
+            continous_silence_frame_count += 1;
+            if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+                if ((vad_opts.detect_mode == static_cast<int>(VadDetectMode::kVadSingleUtteranceDetectMode) &&
+                     (continous_silence_frame_count * frm_shift_in_ms > vad_opts.max_start_silence_time)) ||
+                    (is_final_frame && number_end_time_detected == 0)) {
+                    for (int t = lastest_confirmed_silence_frame + 1; t < cur_frm_idx; t++) {
+                        OnSilenceDetected(t);
+                    }
+                    OnVoiceStart(0, true);
+                    OnVoiceEnd(0, true, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else {
+                    if (cur_frm_idx >= LatencyFrmNumAtStartPoint()) {
+                        OnSilenceDetected(cur_frm_idx - LatencyFrmNumAtStartPoint());
+                    }
+                }
+            } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                if (continous_silence_frame_count * frm_shift_in_ms >= max_end_sil_frame_cnt_thresh) {
+                    int lookback_frame = max_end_sil_frame_cnt_thresh / frm_shift_in_ms;
+                    if (vad_opts.do_extend) {
+                        lookback_frame -= vad_opts.lookahead_time_end_point / frm_shift_in_ms;
+                        lookback_frame -= 1;
+                        lookback_frame = std::max(0, lookback_frame);
+                    }
+                    OnVoiceEnd(cur_frm_idx - lookback_frame, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (cur_frm_idx - confirmed_start_frame + 1 >
+                           vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (vad_opts.do_extend && !is_final_frame) {
+                    if (continous_silence_frame_count <= vad_opts.lookahead_time_end_point / frm_shift_in_ms) {
+                        OnVoiceDetected(cur_frm_idx);
+                    }
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        }
+        if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected &&
+            vad_opts.detect_mode == static_cast<int>(VadDetectMode::kVadMutipleUtteranceDetectMode)) {
+            ResetDetection();
+        }
+    }
+
+};
+
+
+
--- a/funasr_local/runtime/onnxruntime/src/fsmn-vad.cpp
+++ b/funasr_local/runtime/onnxruntime/src/fsmn-vad.cpp
@@ -0,0 +1,300 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#include <fstream>
+#include "precomp.h"
+
+void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, const std::string &vad_config) {
+    session_options_.SetIntraOpNumThreads(1);
+    session_options_.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+    session_options_.DisableCpuMemArena();
+
+    ReadModel(vad_model.c_str());
+    LoadCmvn(vad_cmvn.c_str());
+    LoadConfigFromYaml(vad_config.c_str());
+    InitCache();
+}
+
+void FsmnVad::LoadConfigFromYaml(const char* filename){
+
+    YAML::Node config;
+    try{
+        config = YAML::LoadFile(filename);
+    }catch(exception const &e){
+        LOG(ERROR) << "Error loading file, yaml file error or not exist.";
+        exit(-1);
+    }
+
+    try{
+        YAML::Node frontend_conf = config["frontend_conf"];
+        YAML::Node post_conf = config["vad_post_conf"];
+
+        this->vad_sample_rate_ = frontend_conf["fs"].as<int>();
+        this->vad_silence_duration_ =  post_conf["max_end_silence_time"].as<int>();
+        this->vad_max_len_ = post_conf["max_single_segment_time"].as<int>();
+        this->vad_speech_noise_thres_ = post_conf["speech_noise_thres"].as<double>();
+
+        fbank_opts.frame_opts.dither = frontend_conf["dither"].as<float>();
+        fbank_opts.mel_opts.num_bins = frontend_conf["n_mels"].as<int>();
+        fbank_opts.frame_opts.samp_freq = (float)vad_sample_rate_;
+        fbank_opts.frame_opts.window_type = frontend_conf["window"].as<string>();
+        fbank_opts.frame_opts.frame_shift_ms = frontend_conf["frame_shift"].as<float>();
+        fbank_opts.frame_opts.frame_length_ms = frontend_conf["frame_length"].as<float>();
+        fbank_opts.energy_floor = 0;
+        fbank_opts.mel_opts.debug_mel = false;
+    }catch(exception const &e){
+        LOG(ERROR) << "Error when load argument from vad config YAML.";
+        exit(-1);
+    }
+}
+
+void FsmnVad::ReadModel(const char* vad_model) {
+    try {
+        vad_session_ = std::make_shared<Ort::Session>(
+                env_, vad_model, session_options_);
+    } catch (std::exception const &e) {
+        LOG(ERROR) << "Error when load vad onnx model: " << e.what();
+        exit(0);
+    }
+    GetInputOutputInfo(vad_session_, &vad_in_names_, &vad_out_names_);
+}
+
+void FsmnVad::GetInputOutputInfo(
+        const std::shared_ptr<Ort::Session> &session,
+        std::vector<const char *> *in_names, std::vector<const char *> *out_names) {
+    Ort::AllocatorWithDefaultOptions allocator;
+    // Input info
+    int num_nodes = session->GetInputCount();
+    in_names->resize(num_nodes);
+    for (int i = 0; i < num_nodes; ++i) {
+        std::unique_ptr<char, Ort::detail::AllocatedFree> name = session->GetInputNameAllocated(i, allocator);
+        Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
+        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+        ONNXTensorElementDataType type = tensor_info.GetElementType();
+        std::vector<int64_t> node_dims = tensor_info.GetShape();
+        std::stringstream shape;
+        for (auto j: node_dims) {
+            shape << j;
+            shape << " ";
+        }
+        // LOG(INFO) << "\tInput " << i << " : name=" << name.get() << " type=" << type
+        //           << " dims=" << shape.str();
+        (*in_names)[i] = name.get();
+        name.release();
+    }
+    // Output info
+    num_nodes = session->GetOutputCount();
+    out_names->resize(num_nodes);
+    for (int i = 0; i < num_nodes; ++i) {
+        std::unique_ptr<char, Ort::detail::AllocatedFree> name = session->GetOutputNameAllocated(i, allocator);
+        Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
+        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+        ONNXTensorElementDataType type = tensor_info.GetElementType();
+        std::vector<int64_t> node_dims = tensor_info.GetShape();
+        std::stringstream shape;
+        for (auto j: node_dims) {
+            shape << j;
+            shape << " ";
+        }
+        // LOG(INFO) << "\tOutput " << i << " : name=" << name.get() << " type=" << type
+        //           << " dims=" << shape.str();
+        (*out_names)[i] = name.get();
+        name.release();
+    }
+}
+
+
+void FsmnVad::Forward(
+        const std::vector<std::vector<float>> &chunk_feats,
+        std::vector<std::vector<float>> *out_prob) {
+    Ort::MemoryInfo memory_info =
+            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+
+    int num_frames = chunk_feats.size();
+    const int feature_dim = chunk_feats[0].size();
+
+    //  2. Generate input nodes tensor
+    // vad node { batch,frame number,feature dim }
+    const int64_t vad_feats_shape[3] = {1, num_frames, feature_dim};
+    std::vector<float> vad_feats;
+    for (const auto &chunk_feat: chunk_feats) {
+        vad_feats.insert(vad_feats.end(), chunk_feat.begin(), chunk_feat.end());
+    }
+    Ort::Value vad_feats_ort = Ort::Value::CreateTensor<float>(
+            memory_info, vad_feats.data(), vad_feats.size(), vad_feats_shape, 3);
+    
+    // 3. Put nodes into onnx input vector
+    std::vector<Ort::Value> vad_inputs;
+    vad_inputs.emplace_back(std::move(vad_feats_ort));
+    // 4 caches
+    // cache node {batch,128,19,1}
+    const int64_t cache_feats_shape[4] = {1, 128, 19, 1};
+    for (int i = 0; i < in_cache_.size(); i++) {
+      vad_inputs.emplace_back(std::move(Ort::Value::CreateTensor<float>(
+              memory_info, in_cache_[i].data(), in_cache_[i].size(), cache_feats_shape, 4)));
+    }
+  
+    // 4. Onnx infer
+    std::vector<Ort::Value> vad_ort_outputs;
+    try {
+        vad_ort_outputs = vad_session_->Run(
+                Ort::RunOptions{nullptr}, vad_in_names_.data(), vad_inputs.data(),
+                vad_inputs.size(), vad_out_names_.data(), vad_out_names_.size());
+    } catch (std::exception const &e) {
+        LOG(ERROR) << "Error when run vad onnx forword: " << (e.what());
+        exit(0);
+    }
+
+    // 5. Change infer result to output shapes
+    float *logp_data = vad_ort_outputs[0].GetTensorMutableData<float>();
+    auto type_info = vad_ort_outputs[0].GetTensorTypeAndShapeInfo();
+
+    int num_outputs = type_info.GetShape()[1];
+    int output_dim = type_info.GetShape()[2];
+    out_prob->resize(num_outputs);
+    for (int i = 0; i < num_outputs; i++) {
+        (*out_prob)[i].resize(output_dim);
+        memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
+               sizeof(float) * output_dim);
+    }
+  
+    // get 4 caches outputs,each size is 128*19
+    for (int i = 1; i < 5; i++) {
+      float* data = vad_ort_outputs[i].GetTensorMutableData<float>();
+      memcpy(in_cache_[i-1].data(), data, sizeof(float) * 128*19);
+    }
+}
+
+void FsmnVad::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
+                         const std::vector<float> &waves) {
+    knf::OnlineFbank fbank(fbank_opts);
+
+    fbank.AcceptWaveform(sample_rate, &waves[0], waves.size());
+    int32_t frames = fbank.NumFramesReady();
+    for (int32_t i = 0; i != frames; ++i) {
+        const float *frame = fbank.GetFrame(i);
+        std::vector<float> frame_vector(frame, frame + fbank_opts.mel_opts.num_bins);
+        vad_feats.emplace_back(frame_vector);
+    }
+}
+
+void FsmnVad::LoadCmvn(const char *filename)
+{
+    try{
+        using namespace std;
+        ifstream cmvn_stream(filename);
+        if (!cmvn_stream.is_open()) {
+            LOG(ERROR) << "Failed to open file: " << filename;
+            exit(0);
+        }
+        string line;
+
+        while (getline(cmvn_stream, line)) {
+            istringstream iss(line);
+            vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+            if (line_item[0] == "<AddShift>") {
+                getline(cmvn_stream, line);
+                istringstream means_lines_stream(line);
+                vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+                if (means_lines[0] == "<LearnRateCoef>") {
+                    for (int j = 3; j < means_lines.size() - 1; j++) {
+                        means_list.push_back(stof(means_lines[j]));
+                    }
+                    continue;
+                }
+            }
+            else if (line_item[0] == "<Rescale>") {
+                getline(cmvn_stream, line);
+                istringstream vars_lines_stream(line);
+                vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+                if (vars_lines[0] == "<LearnRateCoef>") {
+                    for (int j = 3; j < vars_lines.size() - 1; j++) {
+                        // vars_list.push_back(stof(vars_lines[j])*scale);
+                        vars_list.push_back(stof(vars_lines[j]));
+                    }
+                    continue;
+                }
+            }
+        }
+    }catch(std::exception const &e) {
+        LOG(ERROR) << "Error when load vad cmvn : " << e.what();
+        exit(0);
+    }
+}
+
+std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats) {
+
+    std::vector<std::vector<float>> out_feats;
+    int T = vad_feats.size();
+    int T_lrf = ceil(1.0 * T / lfr_n);
+
+    // Pad frames at start(copy first frame)
+    for (int i = 0; i < (lfr_m - 1) / 2; i++) {
+        vad_feats.insert(vad_feats.begin(), vad_feats[0]);
+    }
+    // Merge lfr_m frames as one,lfr_n frames per window
+    T = T + (lfr_m - 1) / 2;
+    std::vector<float> p;
+    for (int i = 0; i < T_lrf; i++) {
+        if (lfr_m <= T - i * lfr_n) {
+            for (int j = 0; j < lfr_m; j++) {
+                p.insert(p.end(), vad_feats[i * lfr_n + j].begin(), vad_feats[i * lfr_n + j].end());
+            }
+            out_feats.emplace_back(p);
+            p.clear();
+        } else {
+            // Fill to lfr_m frames at last window if less than lfr_m frames  (copy last frame)
+            int num_padding = lfr_m - (T - i * lfr_n);
+            for (int j = 0; j < (vad_feats.size() - i * lfr_n); j++) {
+                p.insert(p.end(), vad_feats[i * lfr_n + j].begin(), vad_feats[i * lfr_n + j].end());
+            }
+            for (int j = 0; j < num_padding; j++) {
+                p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
+            }
+            out_feats.emplace_back(p);
+        }
+    }
+    // Apply cmvn
+    for (auto &out_feat: out_feats) {
+        for (int j = 0; j < means_list.size(); j++) {
+            out_feat[j] = (out_feat[j] + means_list[j]) * vars_list[j];
+        }
+    }
+    vad_feats = out_feats;
+    return vad_feats;
+}
+
+std::vector<std::vector<int>>
+FsmnVad::Infer(const std::vector<float> &waves) {
+    std::vector<std::vector<float>> vad_feats;
+    std::vector<std::vector<float>> vad_probs;
+    FbankKaldi(vad_sample_rate_, vad_feats, waves);
+    vad_feats = LfrCmvn(vad_feats);
+    Forward(vad_feats, &vad_probs);
+
+    E2EVadModel vad_scorer = E2EVadModel();
+    std::vector<std::vector<int>> vad_segments;
+    vad_segments = vad_scorer(vad_probs, waves, true, false, vad_silence_duration_, vad_max_len_,
+                              vad_speech_noise_thres_, vad_sample_rate_);
+    return vad_segments;
+}
+
+void FsmnVad::InitCache(){
+  std::vector<float> cache_feats(128 * 19 * 1, 0);
+  for (int i=0;i<4;i++){
+    in_cache_.emplace_back(cache_feats);
+  }
+};
+
+void FsmnVad::Reset(){
+  in_cache_.clear();
+  InitCache();
+};
+
+void FsmnVad::Test() {
+}
+
+FsmnVad::FsmnVad():env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options_{} {
+}
--- a/funasr_local/runtime/onnxruntime/src/fsmn-vad.h
+++ b/funasr_local/runtime/onnxruntime/src/fsmn-vad.h
@@ -0,0 +1,67 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#ifndef VAD_SERVER_FSMNVAD_H
+#define VAD_SERVER_FSMNVAD_H
+
+#include "precomp.h"
+
+class FsmnVad {
+/**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * Deep-FSMN for Large Vocabulary Continuous Speech Recognition
+ * https://arxiv.org/abs/1803.05030
+*/
+
+public:
+    FsmnVad();
+    void Test();
+    void InitVad(const std::string &vad_model, const std::string &vad_cmvn, const std::string &vad_config);
+
+    std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
+    void Reset();
+
+private:
+
+    void ReadModel(const char* vad_model);
+    void LoadConfigFromYaml(const char* filename);
+
+    static void GetInputOutputInfo(
+            const std::shared_ptr<Ort::Session> &session,
+            std::vector<const char *> *in_names, std::vector<const char *> *out_names);
+
+    void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
+                    const std::vector<float> &waves);
+
+    std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats);
+
+    void Forward(
+            const std::vector<std::vector<float>> &chunk_feats,
+            std::vector<std::vector<float>> *out_prob);
+
+    void LoadCmvn(const char *filename);
+    void InitCache();
+
+    std::shared_ptr<Ort::Session> vad_session_ = nullptr;
+    Ort::Env env_;
+    Ort::SessionOptions session_options_;
+    std::vector<const char *> vad_in_names_;
+    std::vector<const char *> vad_out_names_;
+    std::vector<std::vector<float>> in_cache_;
+    
+    knf::FbankOptions fbank_opts;
+    std::vector<float> means_list;
+    std::vector<float> vars_list;
+
+    int vad_sample_rate_ = MODEL_SAMPLE_RATE;
+    int vad_silence_duration_ = VAD_SILENCE_DURATION;
+    int vad_max_len_ = VAD_MAX_LEN;
+    double vad_speech_noise_thres_ = VAD_SPEECH_NOISE_THRES;
+    int lfr_m = VAD_LFR_M;
+    int lfr_n = VAD_LFR_N;
+};
+
+
+#endif //VAD_SERVER_FSMNVAD_H
--- a/funasr_local/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
+++ b/funasr_local/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
@@ -0,0 +1,188 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include <win_func.h>
+#endif
+
+#include <glog/logging.h>
+#include "libfunasrapi.h"
+#include "tclap/CmdLine.h"
+#include "com-define.h"
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <atomic>
+#include <mutex>
+#include <thread>
+#include <map>
+
+using namespace std;
+
+std::atomic<int> wav_index(0);
+std::mutex mtx;
+
+void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, 
+            float* total_length, long* total_time, int core_id) {
+    
+    struct timeval start, end;
+    long seconds = 0;
+    float n_total_length = 0.0f;
+    long n_total_time = 0;
+    
+    // warm up
+    for (size_t i = 0; i < 1; i++)
+    {
+        FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL);
+    }
+
+    while (true) {
+        // 使用原子变量获取索引并递增
+        int i = wav_index.fetch_add(1);
+        if (i >= wav_list.size()) {
+            break;
+        }
+
+        gettimeofday(&start, NULL);
+        FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL);
+
+        gettimeofday(&end, NULL);
+        seconds = (end.tv_sec - start.tv_sec);
+        long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+        n_total_time += taking_micros;
+
+        if(result){
+            string msg = FunASRGetResult(result, 0);
+            LOG(INFO) << "Thread: " << this_thread::get_id() <<" Result: " << msg.c_str();
+
+            float snippet_time = FunASRGetRetSnippetTime(result);
+            n_total_length += snippet_time;
+            FunASRFreeResult(result);
+        }else{
+            LOG(ERROR) << ("No return data!\n");
+        }
+    }
+    {
+        lock_guard<mutex> guard(mtx);
+        *total_length += n_total_length;
+        if(*total_time < n_total_time){
+            *total_time = n_total_time;
+        }
+    }
+}
+
+void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
+{
+    if (value_arg.isSet()){
+        model_path.insert({key, value_arg.getValue()});
+        LOG(INFO)<< key << " : " << value_arg.getValue();
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    google::InitGoogleLogging(argv[0]);
+    FLAGS_logtostderr = true;
+
+    TCLAP::CmdLine cmd("funasr-onnx-offline-rtf", ' ', "1.0");
+    TCLAP::ValueArg<std::string> vad_model("", VAD_MODEL_PATH, "vad model path", false, "", "string");
+    TCLAP::ValueArg<std::string> vad_cmvn("", VAD_CMVN_PATH, "vad cmvn path", false, "", "string");
+    TCLAP::ValueArg<std::string> vad_config("", VAD_CONFIG_PATH, "vad config path", false, "", "string");
+
+    TCLAP::ValueArg<std::string> am_model("", AM_MODEL_PATH, "am model path", false, "", "string");
+    TCLAP::ValueArg<std::string> am_cmvn("", AM_CMVN_PATH, "am cmvn path", false, "", "string");
+    TCLAP::ValueArg<std::string> am_config("", AM_CONFIG_PATH, "am config path", false, "", "string");
+
+    TCLAP::ValueArg<std::string> punc_model("", PUNC_MODEL_PATH, "punc model path", false, "", "string");
+    TCLAP::ValueArg<std::string> punc_config("", PUNC_CONFIG_PATH, "punc config path", false, "", "string");
+
+    TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", true, "", "string");
+    TCLAP::ValueArg<std::int32_t> thread_num("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t");
+
+    cmd.add(vad_model);
+    cmd.add(vad_cmvn);
+    cmd.add(vad_config);
+    cmd.add(am_model);
+    cmd.add(am_cmvn);
+    cmd.add(am_config);
+    cmd.add(punc_model);
+    cmd.add(punc_config);
+    cmd.add(wav_scp);
+    cmd.add(thread_num);
+    cmd.parse(argc, argv);
+
+    std::map<std::string, std::string> model_path;
+    GetValue(vad_model, VAD_MODEL_PATH, model_path);
+    GetValue(vad_cmvn, VAD_CMVN_PATH, model_path);
+    GetValue(vad_config, VAD_CONFIG_PATH, model_path);
+    GetValue(am_model, AM_MODEL_PATH, model_path);
+    GetValue(am_cmvn, AM_CMVN_PATH, model_path);
+    GetValue(am_config, AM_CONFIG_PATH, model_path);
+    GetValue(punc_model, PUNC_MODEL_PATH, model_path);
+    GetValue(punc_config, PUNC_CONFIG_PATH, model_path);
+    GetValue(wav_scp, WAV_SCP, model_path);
+
+    struct timeval start, end;
+    gettimeofday(&start, NULL);
+    FUNASR_HANDLE asr_handle=FunASRInit(model_path, 1);
+
+    if (!asr_handle)
+    {
+        LOG(ERROR) << "FunASR init failed";
+        exit(-1);
+    }
+
+    gettimeofday(&end, NULL);
+    long seconds = (end.tv_sec - start.tv_sec);
+    long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+    LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
+
+    // read wav_scp
+    vector<string> wav_list;
+    if(model_path.find(WAV_SCP)!=model_path.end()){
+        ifstream in(model_path.at(WAV_SCP));
+        if (!in.is_open()) {
+            LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP);
+            return 0;
+        }
+        string line;
+        while(getline(in, line))
+        {
+            istringstream iss(line);
+            string column1, column2;
+            iss >> column1 >> column2;
+            wav_list.emplace_back(column2); 
+        }
+        in.close();
+    }
+
+    // 多线程测试
+    float total_length = 0.0f;
+    long total_time = 0;
+    std::vector<std::thread> threads;
+
+    int rtf_threds = thread_num.getValue();
+    for (int i = 0; i < rtf_threds; i++)
+    {
+        threads.emplace_back(thread(runReg, asr_handle, wav_list, &total_length, &total_time, i));
+    }
+
+    for (auto& thread : threads)
+    {
+        thread.join();
+    }
+
+    LOG(INFO) << "total_time_wav " << (long)(total_length * 1000) << " ms";
+    LOG(INFO) << "total_time_comput " << total_time / 1000 << " ms";
+    LOG(INFO) << "total_rtf " << (double)total_time/ (total_length*1000000);
+    LOG(INFO) << "speedup " << 1.0/((double)total_time/ (total_length*1000000));
+
+    FunASRUninit(asr_handle);
+    return 0;
+}
--- a/funasr_local/runtime/onnxruntime/src/funasr-onnx-offline.cpp
+++ b/funasr_local/runtime/onnxruntime/src/funasr-onnx-offline.cpp
@@ -0,0 +1,144 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include <win_func.h>
+#endif
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <map>
+#include <glog/logging.h>
+#include "libfunasrapi.h"
+#include "tclap/CmdLine.h"
+#include "com-define.h"
+
+using namespace std;
+
+void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
+{
+    if (value_arg.isSet()){
+        model_path.insert({key, value_arg.getValue()});
+        LOG(INFO)<< key << " : " << value_arg.getValue();
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    google::InitGoogleLogging(argv[0]);
+    FLAGS_logtostderr = true;
+
+    TCLAP::CmdLine cmd("funasr-onnx-offline", ' ', "1.0");
+    TCLAP::ValueArg<std::string> vad_model("", VAD_MODEL_PATH, "vad model path", false, "", "string");
+    TCLAP::ValueArg<std::string> vad_cmvn("", VAD_CMVN_PATH, "vad cmvn path", false, "", "string");
+    TCLAP::ValueArg<std::string> vad_config("", VAD_CONFIG_PATH, "vad config path", false, "", "string");
+
+    TCLAP::ValueArg<std::string> am_model("", AM_MODEL_PATH, "am model path", true, "", "string");
+    TCLAP::ValueArg<std::string> am_cmvn("", AM_CMVN_PATH, "am cmvn path", true, "", "string");
+    TCLAP::ValueArg<std::string> am_config("", AM_CONFIG_PATH, "am config path", true, "", "string");
+
+    TCLAP::ValueArg<std::string> punc_model("", PUNC_MODEL_PATH, "punc model path", false, "", "string");
+    TCLAP::ValueArg<std::string> punc_config("", PUNC_CONFIG_PATH, "punc config path", false, "", "string");
+
+    TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "wave file path", false, "", "string");
+    TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", false, "", "string");
+
+    cmd.add(vad_model);
+    cmd.add(vad_cmvn);
+    cmd.add(vad_config);
+    cmd.add(am_model);
+    cmd.add(am_cmvn);
+    cmd.add(am_config);
+    cmd.add(punc_model);
+    cmd.add(punc_config);
+    cmd.add(wav_path);
+    cmd.add(wav_scp);
+    cmd.parse(argc, argv);
+
+    std::map<std::string, std::string> model_path;
+    GetValue(vad_model, VAD_MODEL_PATH, model_path);
+    GetValue(vad_cmvn, VAD_CMVN_PATH, model_path);
+    GetValue(vad_config, VAD_CONFIG_PATH, model_path);
+    GetValue(am_model, AM_MODEL_PATH, model_path);
+    GetValue(am_cmvn, AM_CMVN_PATH, model_path);
+    GetValue(am_config, AM_CONFIG_PATH, model_path);
+    GetValue(punc_model, PUNC_MODEL_PATH, model_path);
+    GetValue(punc_config, PUNC_CONFIG_PATH, model_path);
+    GetValue(wav_path, WAV_PATH, model_path);
+    GetValue(wav_scp, WAV_SCP, model_path);
+
+
+    struct timeval start, end;
+    gettimeofday(&start, NULL);
+    int thread_num = 1;
+    FUNASR_HANDLE asr_hanlde=FunASRInit(model_path, thread_num);
+
+    if (!asr_hanlde)
+    {
+        LOG(ERROR) << "FunASR init failed";
+        exit(-1);
+    }
+
+    gettimeofday(&end, NULL);
+    long seconds = (end.tv_sec - start.tv_sec);
+    long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+    LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
+
+    // read wav_path and wav_scp
+    vector<string> wav_list;
+
+    if(model_path.find(WAV_PATH)!=model_path.end()){
+        wav_list.emplace_back(model_path.at(WAV_PATH));
+    }
+    if(model_path.find(WAV_SCP)!=model_path.end()){
+        ifstream in(model_path.at(WAV_SCP));
+        if (!in.is_open()) {
+            LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ;
+            return 0;
+        }
+        string line;
+        while(getline(in, line))
+        {
+            istringstream iss(line);
+            string column1, column2;
+            iss >> column1 >> column2;
+            wav_list.emplace_back(column2); 
+        }
+        in.close();
+    }
+    
+    float snippet_time = 0.0f;
+    long taking_micros = 0;
+    for(auto& wav_file : wav_list){
+        gettimeofday(&start, NULL);
+        FUNASR_RESULT result=FunASRRecogFile(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL);
+        gettimeofday(&end, NULL);
+        seconds = (end.tv_sec - start.tv_sec);
+        taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+
+        if (result)
+        {
+            string msg = FunASRGetResult(result, 0);
+            setbuf(stdout, NULL);
+            printf("Result: %s \n", msg.c_str());
+            snippet_time += FunASRGetRetSnippetTime(result);
+            FunASRFreeResult(result);
+        }
+        else
+        {
+            LOG(ERROR) << ("No return data!\n");
+        }
+    }
+ 
+    LOG(INFO) << "Audio length: " << (double)snippet_time << " s";
+    LOG(INFO) << "Model inference takes: " << (double)taking_micros / 1000000 <<" s";
+    LOG(INFO) << "Model inference RTF: " << (double)taking_micros/ (snippet_time*1000000);
+    FunASRUninit(asr_hanlde);
+    return 0;
+}
+
--- a/funasr_local/runtime/onnxruntime/src/libfunasrapi.cpp
+++ b/funasr_local/runtime/onnxruntime/src/libfunasrapi.cpp
@@ -0,0 +1,210 @@
+#include "precomp.h"
+#ifdef __cplusplus 
+
+extern "C" {
+#endif
+
+	// APIs for funasr
+	_FUNASRAPI FUNASR_HANDLE  FunASRInit(std::map<std::string, std::string>& model_path, int thread_num)
+	{
+		Model* mm = CreateModel(model_path, thread_num);
+		return mm;
+	}
+
+	_FUNASRAPI FUNASR_HANDLE  FunVadInit(std::map<std::string, std::string>& model_path, int thread_num)
+	{
+		Model* mm = CreateModel(model_path, thread_num);
+		return mm;
+	}
+
+	_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	{
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
+			return nullptr;
+
+		int32_t sampling_rate = -1;
+		Audio audio(1);
+		if (!audio.LoadWav(sz_buf, n_len, &sampling_rate))
+			return nullptr;
+		if(recog_obj->UseVad()){
+			audio.Split(recog_obj);
+		}
+
+		float* buff;
+		int len;
+		int flag=0;
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg += msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(recog_obj->UsePunc()){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
+		}
+
+		return p_result;
+	}
+
+	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	{
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
+			return nullptr;
+
+		Audio audio(1);
+		if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
+			return nullptr;
+		if(recog_obj->UseVad()){
+			audio.Split(recog_obj);
+		}
+
+		float* buff;
+		int len;
+		int flag = 0;
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg += msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(recog_obj->UsePunc()){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
+		}
+
+		return p_result;
+	}
+
+	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	{
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
+			return nullptr;
+
+		Audio audio(1);
+		if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
+			return nullptr;
+		if(recog_obj->UseVad()){
+			audio.Split(recog_obj);
+		}
+
+		float* buff;
+		int len;
+		int flag = 0;
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg += msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(recog_obj->UsePunc()){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
+		}
+
+		return p_result;
+	}
+
+	_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	{
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
+			return nullptr;
+		
+		int32_t sampling_rate = -1;
+		Audio audio(1);
+		if(!audio.LoadWav(sz_wavfile, &sampling_rate))
+			return nullptr;
+		if(recog_obj->UseVad()){
+			audio.Split(recog_obj);
+		}
+
+		float* buff;
+		int len;
+		int flag = 0;
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg+= msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(recog_obj->UsePunc()){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
+		}
+	
+		return p_result;
+	}
+
+	_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result)
+	{
+		if (!result)
+			return 0;
+
+		return 1;
+	}
+
+
+	_FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result)
+	{
+		if (!result)
+			return 0.0f;
+
+		return ((FUNASR_RECOG_RESULT*)result)->snippet_time;
+	}
+
+	_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index)
+	{
+		FUNASR_RECOG_RESULT * p_result = (FUNASR_RECOG_RESULT*)result;
+		if(!p_result)
+			return nullptr;
+
+		return p_result->msg.c_str();
+	}
+
+	_FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result)
+	{
+		if (result)
+		{
+			delete (FUNASR_RECOG_RESULT*)result;
+		}
+	}
+
+	_FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle)
+	{
+		Model* recog_obj = (Model*)handle;
+
+		if (!recog_obj)
+			return;
+
+		delete recog_obj;
+	}
+
+#ifdef __cplusplus 
+
+}
+#endif
+
--- a/funasr_local/runtime/onnxruntime/src/model.cpp
+++ b/funasr_local/runtime/onnxruntime/src/model.cpp
@@ -0,0 +1,8 @@
+#include "precomp.h"
+
+Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num)
+{
+    Model *mm;
+    mm = new paraformer::Paraformer(model_path, thread_num);
+    return mm;
+}
--- a/funasr_local/runtime/onnxruntime/src/online-feature.cpp
+++ b/funasr_local/runtime/onnxruntime/src/online-feature.cpp
@@ -0,0 +1,133 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#include "online-feature.h"
+#include <utility>
+
+OnlineFeature::OnlineFeature(int sample_rate, knf::FbankOptions fbank_opts, int lfr_m, int lfr_n,
+                             std::vector<std::vector<float>> cmvns)
+  : sample_rate_(sample_rate),
+    fbank_opts_(std::move(fbank_opts)),
+    lfr_m_(lfr_m),
+    lfr_n_(lfr_n),
+    cmvns_(std::move(cmvns)) {
+  frame_sample_length_ = sample_rate_ / 1000 * 25;;
+  frame_shift_sample_length_ = sample_rate_ / 1000 * 10;
+}
+
+void OnlineFeature::ExtractFeats(vector<std::vector<float>> &vad_feats,
+                                 vector<float> waves, bool input_finished) {
+  input_finished_ = input_finished;
+  OnlineFbank(vad_feats, waves);
+  // cache deal & online lfr,cmvn
+  if (vad_feats.size() > 0) {
+    if (!reserve_waveforms_.empty()) {
+      waves.insert(waves.begin(), reserve_waveforms_.begin(), reserve_waveforms_.end());
+    }
+    if (lfr_splice_cache_.empty()) {
+      for (int i = 0; i < (lfr_m_ - 1) / 2; i++) {
+        lfr_splice_cache_.emplace_back(vad_feats[0]);
+      }
+    }
+    if (vad_feats.size() + lfr_splice_cache_.size() >= lfr_m_) {
+      vad_feats.insert(vad_feats.begin(), lfr_splice_cache_.begin(), lfr_splice_cache_.end());
+      int frame_from_waves = (waves.size() - frame_sample_length_) / frame_shift_sample_length_ + 1;
+      int minus_frame = reserve_waveforms_.empty() ? (lfr_m_ - 1) / 2 : 0;
+      int lfr_splice_frame_idxs = OnlineLfrCmvn(vad_feats);
+      int reserve_frame_idx = lfr_splice_frame_idxs - minus_frame;
+      reserve_waveforms_.clear();
+      reserve_waveforms_.insert(reserve_waveforms_.begin(),
+                                waves.begin() + reserve_frame_idx * frame_shift_sample_length_,
+                                waves.begin() + frame_from_waves * frame_shift_sample_length_);
+      int sample_length = (frame_from_waves - 1) * frame_shift_sample_length_ + frame_sample_length_;
+      waves.erase(waves.begin() + sample_length, waves.end());
+    } else {
+      reserve_waveforms_.clear();
+      reserve_waveforms_.insert(reserve_waveforms_.begin(),
+                                waves.begin() + frame_sample_length_ - frame_shift_sample_length_, waves.end());
+      lfr_splice_cache_.insert(lfr_splice_cache_.end(), vad_feats.begin(), vad_feats.end());
+    }
+
+  } else {
+    if (input_finished_) {
+      if (!reserve_waveforms_.empty()) {
+        waves = reserve_waveforms_;
+      }
+      vad_feats = lfr_splice_cache_;
+      OnlineLfrCmvn(vad_feats);
+      ResetCache();
+    }
+  }
+
+}
+
+int OnlineFeature::OnlineLfrCmvn(vector<vector<float>> &vad_feats) {
+  vector<vector<float>> out_feats;
+  int T = vad_feats.size();
+  int T_lrf = ceil((T - (lfr_m_ - 1) / 2) / lfr_n_);
+  int lfr_splice_frame_idxs = T_lrf;
+  vector<float> p;
+  for (int i = 0; i < T_lrf; i++) {
+    if (lfr_m_ <= T - i * lfr_n_) {
+      for (int j = 0; j < lfr_m_; j++) {
+        p.insert(p.end(), vad_feats[i * lfr_n_ + j].begin(), vad_feats[i * lfr_n_ + j].end());
+      }
+      out_feats.emplace_back(p);
+      p.clear();
+    } else {
+      if (input_finished_) {
+        int num_padding = lfr_m_ - (T - i * lfr_n_);
+        for (int j = 0; j < (vad_feats.size() - i * lfr_n_); j++) {
+          p.insert(p.end(), vad_feats[i * lfr_n_ + j].begin(), vad_feats[i * lfr_n_ + j].end());
+        }
+        for (int j = 0; j < num_padding; j++) {
+          p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
+        }
+        out_feats.emplace_back(p);
+      } else {
+        lfr_splice_frame_idxs = i;
+        break;
+      }
+    }
+  }
+  lfr_splice_frame_idxs = std::min(T - 1, lfr_splice_frame_idxs * lfr_n_);
+  lfr_splice_cache_.clear();
+  lfr_splice_cache_.insert(lfr_splice_cache_.begin(), vad_feats.begin() + lfr_splice_frame_idxs, vad_feats.end());
+
+  // Apply cmvn
+  for (auto &out_feat: out_feats) {
+    for (int j = 0; j < cmvns_[0].size(); j++) {
+      out_feat[j] = (out_feat[j] + cmvns_[0][j]) * cmvns_[1][j];
+    }
+  }
+  vad_feats = out_feats;
+  return lfr_splice_frame_idxs;
+}
+
+void OnlineFeature::OnlineFbank(vector<std::vector<float>> &vad_feats,
+                                vector<float> &waves) {
+
+  knf::OnlineFbank fbank(fbank_opts_);
+  // cache merge
+  waves.insert(waves.begin(), input_cache_.begin(), input_cache_.end());
+  int frame_number = ComputeFrameNum(waves.size(), frame_sample_length_, frame_shift_sample_length_);
+  // Send the audio after the last frame shift position to the cache
+  input_cache_.clear();
+  input_cache_.insert(input_cache_.begin(), waves.begin() + frame_number * frame_shift_sample_length_, waves.end());
+  if (frame_number == 0) {
+    return;
+  }
+  // Delete audio that haven't undergone fbank processing
+  waves.erase(waves.begin() + (frame_number - 1) * frame_shift_sample_length_ + frame_sample_length_, waves.end());
+
+  fbank.AcceptWaveform(sample_rate_, &waves[0], waves.size());
+  int32_t frames = fbank.NumFramesReady();
+  for (int32_t i = 0; i != frames; ++i) {
+    const float *frame = fbank.GetFrame(i);
+    vector<float> frame_vector(frame, frame + fbank_opts_.mel_opts.num_bins);
+    vad_feats.emplace_back(frame_vector);
+  }
+
+}
--- a/funasr_local/runtime/onnxruntime/src/online-feature.h
+++ b/funasr_local/runtime/onnxruntime/src/online-feature.h
@@ -0,0 +1,55 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#include <vector>
+#include "precomp.h"
+
+using namespace std;
+
+class OnlineFeature {
+
+public:
+  OnlineFeature(int sample_rate, knf::FbankOptions fbank_opts, int lfr_m_, int lfr_n_,
+                std::vector<std::vector<float>> cmvns_);
+
+  void ExtractFeats(vector<vector<float>> &vad_feats, vector<float> waves, bool input_finished);
+
+private:
+  void OnlineFbank(vector<vector<float>> &vad_feats, vector<float> &waves);
+  int OnlineLfrCmvn(vector<vector<float>> &vad_feats);
+  
+  static int ComputeFrameNum(int sample_length, int frame_sample_length, int frame_shift_sample_length) {
+    int frame_num = static_cast<int>((sample_length - frame_sample_length) / frame_shift_sample_length + 1);
+    if (frame_num >= 1 && sample_length >= frame_sample_length)
+      return frame_num;
+    else
+      return 0;
+  }
+
+  void ResetCache() {
+    reserve_waveforms_.clear();
+    input_cache_.clear();
+    lfr_splice_cache_.clear();
+    input_finished_ = false;
+
+  }
+
+  knf::FbankOptions fbank_opts_;
+  // The reserved waveforms by fbank
+  std::vector<float> reserve_waveforms_;
+  // waveforms reserved after last shift position
+  std::vector<float> input_cache_;
+  // lfr reserved cache
+  std::vector<std::vector<float>> lfr_splice_cache_;
+  std::vector<std::vector<float>> cmvns_;
+
+  int sample_rate_ = 16000;
+  int frame_sample_length_ = sample_rate_ / 1000 * 25;;
+  int frame_shift_sample_length_ = sample_rate_ / 1000 * 10;
+  int lfr_m_;
+  int lfr_n_;
+  bool input_finished_ = false;
+
+};
--- a/funasr_local/runtime/onnxruntime/src/paraformer.cpp
+++ b/funasr_local/runtime/onnxruntime/src/paraformer.cpp
@@ -0,0 +1,302 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#include "precomp.h"
+
+using namespace std;
+using namespace paraformer;
+
+Paraformer::Paraformer(std::map<std::string, std::string>& model_path,int thread_num)
+:env_(ORT_LOGGING_LEVEL_ERROR, "paraformer"),session_options{}{
+
+    // VAD model
+    if(model_path.find(VAD_MODEL_PATH) != model_path.end()){
+        use_vad = true;
+        string vad_model_path;
+        string vad_cmvn_path;
+        string vad_config_path;
+    
+        try{
+            vad_model_path = model_path.at(VAD_MODEL_PATH);
+            vad_cmvn_path = model_path.at(VAD_CMVN_PATH);
+            vad_config_path = model_path.at(VAD_CONFIG_PATH);
+        }catch(const out_of_range& e){
+            LOG(ERROR) << "Error when read "<< VAD_CMVN_PATH << " or " << VAD_CONFIG_PATH <<" :" << e.what();
+            exit(0);
+        }
+        vad_handle = make_unique<FsmnVad>();
+        vad_handle->InitVad(vad_model_path, vad_cmvn_path, vad_config_path);
+    }
+
+    // AM model
+    if(model_path.find(AM_MODEL_PATH) != model_path.end()){
+        string am_model_path;
+        string am_cmvn_path;
+        string am_config_path;
+    
+        try{
+            am_model_path = model_path.at(AM_MODEL_PATH);
+            am_cmvn_path = model_path.at(AM_CMVN_PATH);
+            am_config_path = model_path.at(AM_CONFIG_PATH);
+        }catch(const out_of_range& e){
+            LOG(ERROR) << "Error when read "<< AM_CONFIG_PATH << " or " << AM_CMVN_PATH <<" :" << e.what();
+            exit(0);
+        }
+        InitAM(am_model_path, am_cmvn_path, am_config_path, thread_num);
+    }
+
+    // PUNC model
+    if(model_path.find(PUNC_MODEL_PATH) != model_path.end()){
+        use_punc = true;
+        string punc_model_path;
+        string punc_config_path;
+    
+        try{
+            punc_model_path = model_path.at(PUNC_MODEL_PATH);
+            punc_config_path = model_path.at(PUNC_CONFIG_PATH);
+        }catch(const out_of_range& e){
+            LOG(ERROR) << "Error when read "<< PUNC_CONFIG_PATH <<" :" << e.what();
+            exit(0);
+        }
+
+        punc_handle = make_unique<CTTransformer>();
+        punc_handle->InitPunc(punc_model_path, punc_config_path, thread_num);
+    }
+}
+
+void Paraformer::InitAM(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){
+    // knf options
+    fbank_opts.frame_opts.dither = 0;
+    fbank_opts.mel_opts.num_bins = 80;
+    fbank_opts.frame_opts.samp_freq = MODEL_SAMPLE_RATE;
+    fbank_opts.frame_opts.window_type = "hamming";
+    fbank_opts.frame_opts.frame_shift_ms = 10;
+    fbank_opts.frame_opts.frame_length_ms = 25;
+    fbank_opts.energy_floor = 0;
+    fbank_opts.mel_opts.debug_mel = false;
+    // fbank_ = std::make_unique<knf::OnlineFbank>(fbank_opts);
+
+    // session_options.SetInterOpNumThreads(1);
+    session_options.SetIntraOpNumThreads(thread_num);
+    session_options.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+    // DisableCpuMemArena can improve performance
+    session_options.DisableCpuMemArena();
+
+    try {
+        m_session = std::make_unique<Ort::Session>(env_, am_model.c_str(), session_options);
+    } catch (std::exception const &e) {
+        LOG(ERROR) << "Error when load am onnx model: " << e.what();
+        exit(0);
+    }
+
+    string strName;
+    GetInputName(m_session.get(), strName);
+    m_strInputNames.push_back(strName.c_str());
+    GetInputName(m_session.get(), strName,1);
+    m_strInputNames.push_back(strName);
+    
+    GetOutputName(m_session.get(), strName);
+    m_strOutputNames.push_back(strName);
+    GetOutputName(m_session.get(), strName,1);
+    m_strOutputNames.push_back(strName);
+
+    for (auto& item : m_strInputNames)
+        m_szInputNames.push_back(item.c_str());
+    for (auto& item : m_strOutputNames)
+        m_szOutputNames.push_back(item.c_str());
+    vocab = new Vocab(am_config.c_str());
+    LoadCmvn(am_cmvn.c_str());
+}
+
+Paraformer::~Paraformer()
+{
+    if(vocab)
+        delete vocab;
+}
+
+void Paraformer::Reset()
+{
+}
+
+vector<std::vector<int>> Paraformer::VadSeg(std::vector<float>& pcm_data){
+    return vad_handle->Infer(pcm_data);
+}
+
+string Paraformer::AddPunc(const char* sz_input){
+    return punc_handle->AddPunc(sz_input);
+}
+
+vector<float> Paraformer::FbankKaldi(float sample_rate, const float* waves, int len) {
+    knf::OnlineFbank fbank_(fbank_opts);
+    fbank_.AcceptWaveform(sample_rate, waves, len);
+    //fbank_->InputFinished();
+    int32_t frames = fbank_.NumFramesReady();
+    int32_t feature_dim = fbank_opts.mel_opts.num_bins;
+    vector<float> features(frames * feature_dim);
+    float *p = features.data();
+
+    for (int32_t i = 0; i != frames; ++i) {
+        const float *f = fbank_.GetFrame(i);
+        std::copy(f, f + feature_dim, p);
+        p += feature_dim;
+    }
+
+    return features;
+}
+
+void Paraformer::LoadCmvn(const char *filename)
+{
+    ifstream cmvn_stream(filename);
+    if (!cmvn_stream.is_open()) {
+        LOG(ERROR) << "Failed to open file: " << filename;
+        exit(0);
+    }
+    string line;
+
+    while (getline(cmvn_stream, line)) {
+        istringstream iss(line);
+        vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+        if (line_item[0] == "<AddShift>") {
+            getline(cmvn_stream, line);
+            istringstream means_lines_stream(line);
+            vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+            if (means_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < means_lines.size() - 1; j++) {
+                    means_list.push_back(stof(means_lines[j]));
+                }
+                continue;
+            }
+        }
+        else if (line_item[0] == "<Rescale>") {
+            getline(cmvn_stream, line);
+            istringstream vars_lines_stream(line);
+            vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+            if (vars_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < vars_lines.size() - 1; j++) {
+                    vars_list.push_back(stof(vars_lines[j])*scale);
+                }
+                continue;
+            }
+        }
+    }
+}
+
+string Paraformer::GreedySearch(float * in, int n_len,  int64_t token_nums)
+{
+    vector<int> hyps;
+    int Tmax = n_len;
+    for (int i = 0; i < Tmax; i++) {
+        int max_idx;
+        float max_val;
+        FindMax(in + i * token_nums, token_nums, max_val, max_idx);
+        hyps.push_back(max_idx);
+    }
+
+    return vocab->Vector2StringV2(hyps);
+}
+
+vector<float> Paraformer::ApplyLfr(const std::vector<float> &in) 
+{
+    int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
+    int32_t in_num_frames = in.size() / in_feat_dim;
+    int32_t out_num_frames =
+        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;
+    int32_t out_feat_dim = in_feat_dim * lfr_window_size;
+
+    std::vector<float> out(out_num_frames * out_feat_dim);
+
+    const float *p_in = in.data();
+    float *p_out = out.data();
+
+    for (int32_t i = 0; i != out_num_frames; ++i) {
+      std::copy(p_in, p_in + out_feat_dim, p_out);
+
+      p_out += out_feat_dim;
+      p_in += lfr_window_shift * in_feat_dim;
+    }
+
+    return out;
+  }
+
+  void Paraformer::ApplyCmvn(std::vector<float> *v)
+  {
+    int32_t dim = means_list.size();
+    int32_t num_frames = v->size() / dim;
+
+    float *p = v->data();
+
+    for (int32_t i = 0; i != num_frames; ++i) {
+      for (int32_t k = 0; k != dim; ++k) {
+        p[k] = (p[k] + means_list[k]) * vars_list[k];
+      }
+
+      p += dim;
+    }
+  }
+
+string Paraformer::Forward(float* din, int len, int flag)
+{
+
+    int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
+    std::vector<float> wav_feats = FbankKaldi(MODEL_SAMPLE_RATE, din, len);
+    wav_feats = ApplyLfr(wav_feats);
+    ApplyCmvn(&wav_feats);
+
+    int32_t feat_dim = lfr_window_size*in_feat_dim;
+    int32_t num_frames = wav_feats.size() / feat_dim;
+
+#ifdef _WIN_X86
+        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+#else
+        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+#endif
+
+    const int64_t input_shape_[3] = {1, num_frames, feat_dim};
+    Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
+        wav_feats.data(),
+        wav_feats.size(),
+        input_shape_,
+        3);
+
+    const int64_t paraformer_length_shape[1] = {1};
+    std::vector<int32_t> paraformer_length;
+    paraformer_length.emplace_back(num_frames);
+    Ort::Value onnx_feats_len = Ort::Value::CreateTensor<int32_t>(
+          m_memoryInfo, paraformer_length.data(), paraformer_length.size(), paraformer_length_shape, 1);
+    
+    std::vector<Ort::Value> input_onnx;
+    input_onnx.emplace_back(std::move(onnx_feats));
+    input_onnx.emplace_back(std::move(onnx_feats_len));
+
+    string result;
+    try {
+        auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size());
+        std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+
+        int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+        float* floatData = outputTensor[0].GetTensorMutableData<float>();
+        auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
+        result = GreedySearch(floatData, *encoder_out_lens, outputShape[2]);
+    }
+    catch (std::exception const &e)
+    {
+        printf(e.what());
+    }
+
+    return result;
+}
+
+string Paraformer::ForwardChunk(float* din, int len, int flag)
+{
+
+    printf("Not Imp!!!!!!\n");
+    return "Hello";
+}
+
+string Paraformer::Rescoring()
+{
+    printf("Not Imp!!!!!!\n");
+    return "Hello";
+}
--- a/funasr_local/runtime/onnxruntime/src/paraformer.h
+++ b/funasr_local/runtime/onnxruntime/src/paraformer.h
@@ -0,0 +1,68 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#pragma once
+
+
+#ifndef PARAFORMER_MODELIMP_H
+#define PARAFORMER_MODELIMP_H
+
+#include "precomp.h"
+
+namespace paraformer {
+
+    class Paraformer : public Model {
+    /**
+     * Author: Speech Lab of DAMO Academy, Alibaba Group
+     * Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
+     * https://arxiv.org/pdf/2206.08317.pdf
+    */
+    private:
+        //std::unique_ptr<knf::OnlineFbank> fbank_;
+        knf::FbankOptions fbank_opts;
+
+        std::unique_ptr<FsmnVad> vad_handle;
+        std::unique_ptr<CTTransformer> punc_handle;
+
+        Vocab* vocab;
+        vector<float> means_list;
+        vector<float> vars_list;
+        const float scale = 22.6274169979695;
+        int32_t lfr_window_size = 7;
+        int32_t lfr_window_shift = 6;
+
+        void LoadCmvn(const char *filename);
+        vector<float> ApplyLfr(const vector<float> &in);
+        void ApplyCmvn(vector<float> *v);
+
+        string GreedySearch( float* in, int n_len, int64_t token_nums);
+
+        std::shared_ptr<Ort::Session> m_session;
+        Ort::Env env_;
+        Ort::SessionOptions session_options;
+
+        vector<string> m_strInputNames, m_strOutputNames;
+        vector<const char*> m_szInputNames;
+        vector<const char*> m_szOutputNames;
+        bool use_vad=false;
+        bool use_punc=false;
+
+    public:
+        Paraformer(std::map<std::string, std::string>& model_path, int thread_num=0);
+        ~Paraformer();
+        void InitAM(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
+        void Reset();
+        vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
+        string ForwardChunk(float* din, int len, int flag);
+        string Forward(float* din, int len, int flag);
+        string Rescoring();
+        std::vector<std::vector<int>> VadSeg(std::vector<float>& pcm_data);
+        string AddPunc(const char* sz_input);
+        bool UseVad(){return use_vad;};
+        bool UsePunc(){return use_punc;}; 
+    };
+
+} // namespace paraformer
+#endif
--- a/funasr_local/runtime/onnxruntime/src/precomp.h
+++ b/funasr_local/runtime/onnxruntime/src/precomp.h
@@ -0,0 +1,47 @@
+#pragma once 
+// system 
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <deque>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iterator>
+#include <list>
+#include <locale.h>
+#include <vector>
+#include <string>
+#include <math.h>
+#include <numeric>
+#include <cstring>
+
+using namespace std;
+// third part
+#include "onnxruntime_run_options_config_keys.h"
+#include "onnxruntime_cxx_api.h"
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/csrc/online-feature.h"
+
+// mine
+#include <glog/logging.h>
+#include "common-struct.h"
+#include "com-define.h"
+#include "commonfunc.h"
+#include "predefine-coe.h"
+#include "tokenizer.h"
+#include "ct-transformer.h"
+#include "fsmn-vad.h"
+#include "e2e-vad.h"
+#include "vocab.h"
+#include "audio.h"
+#include "tensor.h"
+#include "util.h"
+#include "resample.h"
+#include "model.h"
+//#include "vad-model.h"
+#include "paraformer.h"
+#include "libfunasrapi.h"
+
+using namespace paraformer;
--- a/funasr_local/runtime/onnxruntime/src/predefine-coe.h
+++ b/funasr_local/runtime/onnxruntime/src/predefine-coe.h
@@ -0,0 +1,592 @@
+#ifndef PREDEFINE_COE_H
+#define PREDEFINE_COE_H
+
+#include <stdint.h>
+
+const int32_t melcoe_hex[] = {
+
+    0x3f01050c, 0x3e0afb11, 0x3f5d413c, 0x3f547fd0, 0x3e2e00c1, 0x3f132970,
+    0x3ed9ad21, 0x3ebb8bb9, 0x3f223a24, 0x3e4de6f8, 0x3f4c8642, 0x3d9c0424,
+    0x3f6c7f7c, 0x3f7d295a, 0x3c35a961, 0x3f6fd497, 0x3d815b45, 0x3f6af197,
+    0x3da87344, 0x3f6dfce9, 0x3d9018b9, 0x3f787ebc, 0x3d2098fe, 0x3cf02873,
+    0x3f75f670, 0x3e08e423, 0x3f5dc6f7, 0x3e8161eb, 0x3f3f4f0b, 0x3eca38e2,
+    0x3f1ae38f, 0x3f0f2d23, 0x3ee1a5ba, 0x3f3e9a98, 0x3e82cad1, 0x3f7321ac,
+    0x3e321028, 0x3d4de548, 0x3f537bf6, 0x3ed50f76, 0x3f157845, 0x3f2cf6bc,
+    0x3ea61288, 0x3f739ea7, 0x3e794186, 0x3d461590, 0x3f41af9f, 0x3f0cdfd4,
+    0x3ee64058, 0x3f5f23aa, 0x3e53d467, 0x3e037156, 0x3f4b0ae6, 0x3f0e2fac,
+    0x3ee3a0a8, 0x3f6ab111, 0x3e94b1ed, 0x3daa7774, 0x3f35a70a, 0x3f2d08dc,
+    0x3d951fb4, 0x3ea5ee48, 0x3f6d5c09, 0x3ef61e1a, 0x3f04f0f3, 0x3f66305c,
+    0x3ea7def9, 0x3dce7d20, 0x3f2c1083, 0x3f44354b, 0x3e5baf49, 0x3e6f2ad2,
+    0x3f49142e, 0x3f2bfe35, 0x3e0d627b, 0x3ea80396, 0x3f5ca761, 0x3f1ce830,
+    0x3dc4d786, 0x3ec62fa0, 0x3f67650f, 0x3f165fc0, 0x3db1323f, 0x3ed34080,
+    0x3f69d9b8, 0x3f17def1, 0x3ddbd6b6, 0x3ed0421e, 0x3f648529, 0x3f20ebbd,
+    0x3e20901a, 0x3ebe2886, 0x3f57dbf9, 0x3f3116ac, 0x3e6edcc6, 0x3e9dd2a9,
+    0x3f4448ce, 0x3f47f9a3, 0x3eaba511, 0x3e601974, 0x3f2a2d77, 0x3f6536e2,
+    0x3eec3842, 0x3d0781f6, 0x3dd648ed, 0x3f09e3df, 0x3f7787e1, 0x3f1c411f,
+    0x3e45b702, 0x3ec77dc2, 0x3f4e9240, 0x3f47f500, 0x3ebf9c61, 0x3e602c00,
+    0x3f2031d0, 0x3f78f0f7, 0x3f135547, 0x3e3bcd78, 0x3ce1e12a, 0x3ed95573,
+    0x3f510ca2, 0x3f4bc3c2, 0x3ed37e77, 0x3d0ded37, 0x3e50f0f8, 0x3f1640c5,
+    0x3f77212d, 0x3f291bd1, 0x3e94df6c, 0x3eadc85e, 0x3f35904a, 0x3f6cd43b,
+    0x3f104351, 0x3e52dc63, 0x3d995e26, 0x3edf795f, 0x3f4b48e7, 0x3f5a29e7,
+    0x3f00963d, 0x3e1fdb2f, 0x3e175865, 0x3efed385, 0x3f580934, 0x3f50466d,
+    0x3ef30046, 0x3e0e7c6b, 0x3e3ee64e, 0x3f067fdd, 0x3f5c60e5, 0x3f4e9ea4,
+    0x3ef4f46a, 0x3e1cb596, 0x3e45856f, 0x3f0585cb, 0x3f58d29b, 0x3f54b3ef,
+    0x3f0309ad, 0x3e48aa5b, 0x3e2d3042, 0x3ef9eca6, 0x3f4dd569, 0x3f6212c4,
+    0x3f12be68, 0x3e8853a3, 0x3def69e0, 0x3eda8330, 0x3f3bd62e, 0x3f76516a,
+    0x3f2931b5, 0x3eb98e9b, 0x3d88773c, 0x3d1ae95c, 0x3ead9c96, 0x3f2338b2,
+    0x3f6ef119, 0x3f46054d, 0x3ef74eba, 0x3e47c83a, 0x3e67eace, 0x3f0458a3,
+    0x3f4e0df1, 0x3f68e26b, 0x3f207590, 0x3eb1515d, 0x3d8bc852, 0x3db8eca9,
+    0x3ebf14e0, 0x3f275751, 0x3f6e86f6, 0x3f4ae3f8, 0x3f04e6de, 0x3e7dfcce,
+    0x3e547020, 0x3ef63244, 0x3f4080cd, 0x3f7aaa80, 0x3f366659, 0x3ee560cb,
+    0x3e3e1967, 0x3caab00e, 0x3e93334e, 0x3f0d4f9a, 0x3f5079a6, 0x3f6ce5f8,
+    0x3f2acd10, 0x3ed272ff, 0x3e20a4c5, 0x3d98d042, 0x3eaa65e0, 0x3f16c680,
+    0x3f57d6cf, 0x3f679a1b, 0x3f278a40, 0x3ecfef5c, 0x3e2381fd, 0x3dc32f28,
+    0x3eb0eb80, 0x3f180852, 0x3f571f81, 0x3f6a42d8, 0x3f2c1ce8, 0x3edcd9d1,
+    0x3e44c475, 0x3dade93f, 0x3ea7c630, 0x3f119318, 0x3f4ecee3, 0x3f7467d4,
+    0x3f380f62, 0x3ef84c54, 0x3e815525, 0x3cb361d7, 0x3d3982c8, 0x3e8fe13b,
+    0x3f03d9d6, 0x3f3f556d, 0x3f7a64f1, 0x3f4af618, 0x3f10ba30, 0x3eadcbc5,
+    0x3debbe02, 0x3e5427a0, 0x3ede8b9f, 0x3f291a1d, 0x3f628840, 0x3f646e63,
+    0x3f2bc86b, 0x3ee70902, 0x3e6e854e, 0x3c83b300, 0x3ddc8cea, 0x3ea86f2a,
+    0x3f0c7b7f, 0x3f445eac, 0x3f7be268, 0x3f4cf80b, 0x3f162f6e, 0x3ebf8516,
+    0x3e26c0c2, 0x3e4c1fd5, 0x3ed3a124, 0x3f203d75, 0x3f564fd0, 0x3f73f733,
+    0x3f3e966d, 0x3f098cbf, 0x3ea9b21c, 0x3e01e917, 0x3d408cd1, 0x3e82d326,
+    0x3eece682, 0x3f2b26f2, 0x3f5f85ba, 0x3f6c6f56, 0x3f38b733, 0x3f0550d9,
+    0x3ea47689, 0x3dfbabd7, 0x3d9c8552, 0x3e8e919a, 0x3ef55e4f, 0x3f2dc4bb,
+    0x3f608a85, 0x3f6cfe84, 0x3f3ad56c, 0x3f08f945, 0x3eaed247, 0x3e189086,
+    0x3d980be2, 0x3e8a5528, 0x3eee0d76, 0x3f2896dc, 0x3f59dbde, 0x3f75295d,
+    0x3f4477f6, 0x3f140f14, 0x3ec7dbbd, 0x3e504e0f, 0x3c8fe67e, 0x3d2d6a38,
+    0x3e6e2028, 0x3ed7e1d9, 0x3f1c1221, 0x3f4bec7c, 0x3f7b80cc, 0x3f553023,
+    0x3f262589, 0x3eeebd40, 0x3e91b54d, 0x3dd4c6f8, 0x3e2b3f74, 0x3eb3b4ef,
+    0x3f08a160, 0x3f372559, 0x3f656721, 0x3f6c988d, 0x3f3ed8f9, 0x3f11596d,
+    0x3ec83270, 0x3e5c5ea4, 0x3d254149, 0x3d9b3b97, 0x3e824e0e, 0x3edd4d25,
+    0x3f1be6c8, 0x3f48e857, 0x3f75abeb, 0x3f5dcdd1, 0x3f318436, 0x3f0576a0,
+    0x3eb348db, 0x3e3833fb, 0x3c2bedc9, 0x3e08c8be, 0x3e9cf794, 0x3ef512c0,
+    0x3f265b92, 0x3f51f301, 0x3f7d5049, 0x3f578bfc, 0x3f2ca136, 0x3f01eecf,
+    0x3eaee867, 0x3e34c34c, 0x3c490794, 0x3e21d00f, 0x3ea6bd94, 0x3efc2262,
+    0x3f288bcc, 0x3f52cf2d, 0x3f7cdbe2, 0x3f594d89, 0x3f2fac87, 0x3f064092,
+    0x3eba1245, 0x3e5016cd, 0x3d335c27, 0x3e1ac9dd, 0x3ea0a6f1, 0x3ef37edc,
+    0x3f22f6de, 0x3f4bfa4d, 0x3f74ca3e, 0x3f6298cf, 0x3f3a2e5b, 0x3f11f5e8,
+    0x3ed3ddf9, 0x3e84323c, 0x3dd39eaa, 0x3deb3986, 0x3e8ba34a, 0x3edc142f,
+    0x3f161103, 0x3f3de6e2, 0x3f658c2b, 0x3f72feac, 0x3f4bb92e, 0x3f24a2e9,
+    0x3efb76d9, 0x3eae048f, 0x3e41dc34, 0x3d219509, 0x3d50153e, 0x3e511b46,
+    0x3eb6ba2d, 0x3f024494, 0x3f28fdb9, 0x3f4f88f3, 0x3f75e6af, 0x3f63e8a7,
+    0x3f3de4a8, 0x3f180cea, 0x3ee4c20e, 0x3e99c134, 0x3e1e2cfc, 0x3c1824f4,
+    0x3de0bac6, 0x3e8436b1, 0x3ecfe62d, 0x3f0d9ef9, 0x3f331f66, 0x3f5874c1,
+    0x3f7d9f6c, 0x3f5d6037, 0x3f3889c9, 0x3f13dcea, 0x3edeb27d, 0x3e95fcd3,
+    0x3e1b303e, 0x3c3075cb, 0x3e0a7f24, 0x3e8eec6e, 0x3ed8462b, 0x3f10a6c1,
+    0x3f350197, 0x3f5933f1, 0x3f7d3e29, 0x3f5edf68, 0x3f3b246a, 0x3f179088,
+    0x3ee846d8, 0x3ea1b983, 0x3e36f0d8, 0x3d2c1773, 0x3e048260, 0x3e89b72b,
+    0x3ed0def0, 0x3f0bdc94, 0x3f2f233e, 0x3f5243ca, 0x3f753e89, 0x3f67ec34,
+    0x3f453c1d, 0x3f22b0e2, 0x3f004a36, 0x3ebc0f98, 0x3e6fa55d, 0x3dcf7467,
+    0x3dc09e5f, 0x3e6b0f8d, 0x3eba9e3c, 0x3eff6b94, 0x3f21f834, 0x3f4416a9,
+    0x3f661173, 0x3f781723, 0x3f5662cf, 0x3f34d14a, 0x3f13624c, 0x3ee42b1a,
+    0x3ea1d591, 0x3e3f86e3, 0x3d6fa1a1, 0x3cfd1ba9, 0x3e2674c3, 0x3e965d6c,
+    0x3ed93b69, 0x3f0dea73, 0x3f2f1538, 0x3f501e47, 0x3f7105e6, 0x3f6e33a9,
+    0x3f4d8e22, 0x3f2d0944, 0x3f0ca4cd, 0x3ed8c0fd, 0x3e98782f, 0x3e30dd66,
+    0x3d452061, 0x3d8e62bc, 0x3e49c779, 0x3ea5ed78, 0x3ee6b665, 0x3f139f81,
+    0x3f33c3e8, 0x3f53c8a7, 0x3f73adfa, 0x3f6c8be0, 0x3f4ce4ab, 0x3f2d5c2a,
+    0x3f0df223, 0x3edd4cb5, 0x3e9ef12d, 0x3e41a276, 0x3d8bb1ba, 0x3d9ba0ff,
+    0x3e4c6d54, 0x3ea547ab, 0x3ee41bba, 0x3f1159a6, 0x3f30876a, 0x3f4f9762,
+    0x3f6e89c9, 0x3f72a12b, 0x3f53e942, 0x3f354e46, 0x3f16cffe, 0x3ef0dc6f,
+    0x3eb45177, 0x3e6ffd59, 0x3def8e9c
+
+};
+
+const int32_t window_hex[] = {
+    0x00000000, 0x398b03f6, 0x3a61d1c5, 0x3ae0ee32, 0x3b37623a, 0x3b85f871,
+    0x3bb69d19, 0x3bed453b, 0x3c14d40b, 0x3c35c45b, 0x3c59595d, 0x3c7f7c1d,
+    0x3c940c13, 0x3ca98d81, 0x3cc039eb, 0x3cd8098d, 0x3cf0f52e, 0x3d057b06,
+    0x3d1302e6, 0x3d210f33, 0x3d2f9d0e, 0x3d3ea9ba, 0x3d4e3293, 0x3d5e3510,
+    0x3d6eaebd, 0x3d7f9d38, 0x3d887f19, 0x3d9167b5, 0x3d9a8756, 0x3da3dce9,
+    0x3dad675d, 0x3db725ab, 0x3dc116cc, 0x3dcb39bf, 0x3dd58d86, 0x3de01126,
+    0x3deac3a7, 0x3df5a413, 0x3e0058bb, 0x3e05f571, 0x3e0ba7b2, 0x3e116f08,
+    0x3e174afe, 0x3e1d3b1c, 0x3e233ef0, 0x3e295605, 0x3e2f7fe7, 0x3e35bc23,
+    0x3e3c0a46, 0x3e4269de, 0x3e48da79, 0x3e4f5ba5, 0x3e55ecf2, 0x3e5c8ded,
+    0x3e633e26, 0x3e69fd2c, 0x3e70ca8f, 0x3e77a5de, 0x3e7e8eaa, 0x3e82c241,
+    0x3e86437c, 0x3e89cacd, 0x3e8d57fc, 0x3e90ead3, 0x3e948319, 0x3e982097,
+    0x3e9bc316, 0x3e9f6a5d, 0x3ea31636, 0x3ea6c66a, 0x3eaa7ac0, 0x3eae3303,
+    0x3eb1eefa, 0x3eb5ae6f, 0x3eb9712a, 0x3ebd36f6, 0x3ec0ff9b, 0x3ec4cae2,
+    0x3ec89895, 0x3ecc687d, 0x3ed03a64, 0x3ed40e13, 0x3ed7e354, 0x3edbb9f2,
+    0x3edf91b5, 0x3ee36a69, 0x3ee743d7, 0x3eeb1dca, 0x3eeef80c, 0x3ef2d267,
+    0x3ef6aca8, 0x3efa8698, 0x3efe6002, 0x3f011c59, 0x3f03083a, 0x3f04f389,
+    0x3f06de2d, 0x3f08c80b, 0x3f0ab10a, 0x3f0c990f, 0x3f0e8001, 0x3f1065c6,
+    0x3f124a45, 0x3f142d65, 0x3f160f0c, 0x3f17ef21, 0x3f19cd8b, 0x3f1baa32,
+    0x3f1d84fb, 0x3f1f5dd0, 0x3f213498, 0x3f230939, 0x3f24db9d, 0x3f26abaa,
+    0x3f28794a, 0x3f2a4464, 0x3f2c0ce1, 0x3f2dd2a9, 0x3f2f95a6, 0x3f3155bf,
+    0x3f3312e0, 0x3f34ccef, 0x3f3683d8, 0x3f383784, 0x3f39e7dd, 0x3f3b94cc,
+    0x3f3d3e3c, 0x3f3ee418, 0x3f40864a, 0x3f4224bd, 0x3f43bf5c, 0x3f455613,
+    0x3f46e8cc, 0x3f487774, 0x3f4a01f6, 0x3f4b883f, 0x3f4d0a3b, 0x3f4e87d6,
+    0x3f5000fe, 0x3f5175a0, 0x3f52e5a9, 0x3f545106, 0x3f55b7a5, 0x3f571975,
+    0x3f587664, 0x3f59ce60, 0x3f5b2158, 0x3f5c6f3b, 0x3f5db7f9, 0x3f5efb80,
+    0x3f6039c2, 0x3f6172af, 0x3f62a636, 0x3f63d448, 0x3f64fcd6, 0x3f661fd3,
+    0x3f673d2e, 0x3f6854db, 0x3f6966ca, 0x3f6a72ef, 0x3f6b793d, 0x3f6c79a5,
+    0x3f6d741d, 0x3f6e6896, 0x3f6f5706, 0x3f703f5f, 0x3f712198, 0x3f71fda4,
+    0x3f72d379, 0x3f73a30c, 0x3f746c52, 0x3f752f43, 0x3f75ebd4, 0x3f76a1fc,
+    0x3f7751b2, 0x3f77faee, 0x3f789da6, 0x3f7939d4, 0x3f79cf6e, 0x3f7a5e6f,
+    0x3f7ae6cf, 0x3f7b6886, 0x3f7be38f, 0x3f7c57e4, 0x3f7cc57f, 0x3f7d2c5b,
+    0x3f7d8c72, 0x3f7de5bf, 0x3f7e3840, 0x3f7e83ee, 0x3f7ec8c7, 0x3f7f06c7,
+    0x3f7f3deb, 0x3f7f6e31, 0x3f7f9795, 0x3f7fba17, 0x3f7fd5b4, 0x3f7fea6b,
+    0x3f7ff83b, 0x3f7fff23, 0x3f7fff23, 0x3f7ff83b, 0x3f7fea6b, 0x3f7fd5b4,
+    0x3f7fba17, 0x3f7f9795, 0x3f7f6e31, 0x3f7f3deb, 0x3f7f06c7, 0x3f7ec8c7,
+    0x3f7e83ee, 0x3f7e3840, 0x3f7de5bf, 0x3f7d8c72, 0x3f7d2c5b, 0x3f7cc57f,
+    0x3f7c57e4, 0x3f7be38f, 0x3f7b6886, 0x3f7ae6cf, 0x3f7a5e6f, 0x3f79cf6e,
+    0x3f7939d4, 0x3f789da6, 0x3f77faee, 0x3f7751b2, 0x3f76a1fc, 0x3f75ebd4,
+    0x3f752f43, 0x3f746c52, 0x3f73a30c, 0x3f72d379, 0x3f71fda4, 0x3f712198,
+    0x3f703f5f, 0x3f6f5706, 0x3f6e6896, 0x3f6d741d, 0x3f6c79a5, 0x3f6b793d,
+    0x3f6a72ef, 0x3f6966ca, 0x3f6854db, 0x3f673d2e, 0x3f661fd3, 0x3f64fcd6,
+    0x3f63d448, 0x3f62a636, 0x3f6172af, 0x3f6039c2, 0x3f5efb80, 0x3f5db7f9,
+    0x3f5c6f3b, 0x3f5b2158, 0x3f59ce60, 0x3f587664, 0x3f571975, 0x3f55b7a5,
+    0x3f545106, 0x3f52e5a9, 0x3f5175a0, 0x3f5000fe, 0x3f4e87d6, 0x3f4d0a3b,
+    0x3f4b883f, 0x3f4a01f6, 0x3f487774, 0x3f46e8cc, 0x3f455613, 0x3f43bf5c,
+    0x3f4224bd, 0x3f40864a, 0x3f3ee418, 0x3f3d3e3c, 0x3f3b94cc, 0x3f39e7dd,
+    0x3f383784, 0x3f3683d8, 0x3f34ccef, 0x3f3312e0, 0x3f3155bf, 0x3f2f95a6,
+    0x3f2dd2a9, 0x3f2c0ce1, 0x3f2a4464, 0x3f28794a, 0x3f26abaa, 0x3f24db9d,
+    0x3f230939, 0x3f213498, 0x3f1f5dd0, 0x3f1d84fb, 0x3f1baa32, 0x3f19cd8b,
+    0x3f17ef21, 0x3f160f0c, 0x3f142d65, 0x3f124a45, 0x3f1065c6, 0x3f0e8001,
+    0x3f0c990f, 0x3f0ab10a, 0x3f08c80b, 0x3f06de2d, 0x3f04f389, 0x3f03083a,
+    0x3f011c59, 0x3efe6002, 0x3efa8698, 0x3ef6aca8, 0x3ef2d267, 0x3eeef80c,
+    0x3eeb1dca, 0x3ee743d7, 0x3ee36a69, 0x3edf91b5, 0x3edbb9f2, 0x3ed7e354,
+    0x3ed40e13, 0x3ed03a64, 0x3ecc687d, 0x3ec89895, 0x3ec4cae2, 0x3ec0ff9b,
+    0x3ebd36f6, 0x3eb9712a, 0x3eb5ae6f, 0x3eb1eefa, 0x3eae3303, 0x3eaa7ac0,
+    0x3ea6c66a, 0x3ea31636, 0x3e9f6a5d, 0x3e9bc316, 0x3e982097, 0x3e948319,
+    0x3e90ead3, 0x3e8d57fc, 0x3e89cacd, 0x3e86437c, 0x3e82c241, 0x3e7e8eaa,
+    0x3e77a5de, 0x3e70ca8f, 0x3e69fd2c, 0x3e633e26, 0x3e5c8ded, 0x3e55ecf2,
+    0x3e4f5ba5, 0x3e48da79, 0x3e4269de, 0x3e3c0a46, 0x3e35bc23, 0x3e2f7fe7,
+    0x3e295605, 0x3e233ef0, 0x3e1d3b1c, 0x3e174afe, 0x3e116f08, 0x3e0ba7b2,
+    0x3e05f571, 0x3e0058bb, 0x3df5a413, 0x3deac3a7, 0x3de01126, 0x3dd58d86,
+    0x3dcb39bf, 0x3dc116cc, 0x3db725ab, 0x3dad675d, 0x3da3dce9, 0x3d9a8756,
+    0x3d9167b5, 0x3d887f19, 0x3d7f9d38, 0x3d6eaebd, 0x3d5e3510, 0x3d4e3293,
+    0x3d3ea9ba, 0x3d2f9d0e, 0x3d210f33, 0x3d1302e6, 0x3d057b06, 0x3cf0f52e,
+    0x3cd8098d, 0x3cc039eb, 0x3ca98d81, 0x3c940c13, 0x3c7f7c1d, 0x3c59595d,
+    0x3c35c45b, 0x3c14d40b, 0x3bed453b, 0x3bb69d19, 0x3b85f871, 0x3b37623a,
+    0x3ae0ee32, 0x3a61d1c5, 0x398b03f6, 0x00000000
+
+};
+
+const int32_t window_hamm_hex[] = {
+    0x3da3d70a, 0x3da3f4f1, 0x3da44ea4, 0x3da4e41d, 0x3da5b554, 0x3da6c239,
+    0x3da80abd, 0x3da98ecb, 0x3dab4e4a, 0x3dad491d, 0x3daf7f25, 0x3db1f03d,
+    0x3db49c3e, 0x3db782fd, 0x3dbaa449, 0x3dbdfff1, 0x3dc195be, 0x3dc56575,
+    0x3dc96ed9, 0x3dcdb1a8, 0x3dd22d9d, 0x3dd6e26e, 0x3ddbcfd0, 0x3de0f572,
+    0x3de65301, 0x3debe825, 0x3df1b484, 0x3df7b7c0, 0x3dfdf176, 0x3e0230a1,
+    0x3e05835d, 0x3e08f0ba, 0x3e0c7880, 0x3e101a75, 0x3e13d65f, 0x3e17ac00,
+    0x3e1b9b1b, 0x3e1fa36f, 0x3e23c4bc, 0x3e27febd, 0x3e2c512e, 0x3e30bbc9,
+    0x3e353e46, 0x3e39d85c, 0x3e3e89c0, 0x3e435226, 0x3e483140, 0x3e4d26be,
+    0x3e523251, 0x3e5753a7, 0x3e5c8a6b, 0x3e61d64a, 0x3e6736ec, 0x3e6cabfc,
+    0x3e72351f, 0x3e77d1fd, 0x3e7d8239, 0x3e81a2bc, 0x3e848dae, 0x3e8781c3,
+    0x3e8a7eca, 0x3e8d8495, 0x3e9092f0, 0x3e93a9ab, 0x3e96c894, 0x3e99ef77,
+    0x3e9d1e22, 0x3ea05460, 0x3ea391ff, 0x3ea6d6c8, 0x3eaa2286, 0x3ead7505,
+    0x3eb0ce0f, 0x3eb42d6c, 0x3eb792e6, 0x3ebafe46, 0x3ebe6f54, 0x3ec1e5d9,
+    0x3ec5619c, 0x3ec8e264, 0x3ecc67f8, 0x3ecff220, 0x3ed380a2, 0x3ed71344,
+    0x3edaa9cb, 0x3ede43fe, 0x3ee1e1a3, 0x3ee5827d, 0x3ee92653, 0x3eeccce9,
+    0x3ef07604, 0x3ef42168, 0x3ef7ceda, 0x3efb7e1d, 0x3eff2ef7, 0x3f017096,
+    0x3f034a3f, 0x3f052459, 0x3f06fec5, 0x3f08d967, 0x3f0ab41f, 0x3f0c8ed0,
+    0x3f0e695b, 0x3f1043a2, 0x3f121d87, 0x3f13f6ec, 0x3f15cfb4, 0x3f17a7bf,
+    0x3f197ef0, 0x3f1b5529, 0x3f1d2a4d, 0x3f1efe3d, 0x3f20d0db, 0x3f22a20b,
+    0x3f2471ae, 0x3f263fa8, 0x3f280bda, 0x3f29d628, 0x3f2b9e74, 0x3f2d64a2,
+    0x3f2f2895, 0x3f30ea30, 0x3f32a956, 0x3f3465ec, 0x3f361fd4, 0x3f37d6f3,
+    0x3f398b2d, 0x3f3b3c66, 0x3f3cea83, 0x3f3e9569, 0x3f403cfb, 0x3f41e121,
+    0x3f4381be, 0x3f451eb8, 0x3f46b7f6, 0x3f484d5d, 0x3f49ded3, 0x3f4b6c3f,
+    0x3f4cf588, 0x3f4e7a94, 0x3f4ffb4c, 0x3f517796, 0x3f52ef5a, 0x3f546282,
+    0x3f55d0f4, 0x3f573a9a, 0x3f589f5d, 0x3f59ff26, 0x3f5b59df, 0x3f5caf72,
+    0x3f5dffc9, 0x3f5f4acf, 0x3f60906f, 0x3f61d093, 0x3f630b29, 0x3f64401b,
+    0x3f656f57, 0x3f6698c9, 0x3f67bc5d, 0x3f68da03, 0x3f69f1a6, 0x3f6b0337,
+    0x3f6c0ea3, 0x3f6d13d9, 0x3f6e12c9, 0x3f6f0b62, 0x3f6ffd95, 0x3f70e953,
+    0x3f71ce8c, 0x3f72ad32, 0x3f738537, 0x3f74568d, 0x3f752127, 0x3f75e4f8,
+    0x3f76a1f3, 0x3f77580d, 0x3f780739, 0x3f78af6e, 0x3f79509f, 0x3f79eac3,
+    0x3f7a7dd1, 0x3f7b09be, 0x3f7b8e83, 0x3f7c0c15, 0x3f7c826e, 0x3f7cf187,
+    0x3f7d5957, 0x3f7db9d8, 0x3f7e1305, 0x3f7e64d7, 0x3f7eaf4a, 0x3f7ef258,
+    0x3f7f2dfe, 0x3f7f6237, 0x3f7f8f00, 0x3f7fb457, 0x3f7fd239, 0x3f7fe8a4,
+    0x3f7ff797, 0x3f7fff11, 0x3f7fff11, 0x3f7ff797, 0x3f7fe8a4, 0x3f7fd239,
+    0x3f7fb457, 0x3f7f8f00, 0x3f7f6237, 0x3f7f2dfe, 0x3f7ef258, 0x3f7eaf4a,
+    0x3f7e64d7, 0x3f7e1305, 0x3f7db9d8, 0x3f7d5957, 0x3f7cf187, 0x3f7c826e,
+    0x3f7c0c15, 0x3f7b8e83, 0x3f7b09be, 0x3f7a7dd1, 0x3f79eac3, 0x3f79509f,
+    0x3f78af6e, 0x3f780739, 0x3f77580d, 0x3f76a1f3, 0x3f75e4f8, 0x3f752127,
+    0x3f74568d, 0x3f738537, 0x3f72ad32, 0x3f71ce8c, 0x3f70e953, 0x3f6ffd95,
+    0x3f6f0b62, 0x3f6e12c9, 0x3f6d13d9, 0x3f6c0ea3, 0x3f6b0337, 0x3f69f1a6,
+    0x3f68da03, 0x3f67bc5d, 0x3f6698c9, 0x3f656f57, 0x3f64401b, 0x3f630b29,
+    0x3f61d093, 0x3f60906f, 0x3f5f4acf, 0x3f5dffc9, 0x3f5caf72, 0x3f5b59df,
+    0x3f59ff26, 0x3f589f5d, 0x3f573a9a, 0x3f55d0f4, 0x3f546282, 0x3f52ef5a,
+    0x3f517796, 0x3f4ffb4c, 0x3f4e7a94, 0x3f4cf588, 0x3f4b6c3f, 0x3f49ded3,
+    0x3f484d5d, 0x3f46b7f6, 0x3f451eb8, 0x3f4381be, 0x3f41e121, 0x3f403cfb,
+    0x3f3e9569, 0x3f3cea83, 0x3f3b3c66, 0x3f398b2d, 0x3f37d6f3, 0x3f361fd4,
+    0x3f3465ec, 0x3f32a956, 0x3f30ea30, 0x3f2f2895, 0x3f2d64a2, 0x3f2b9e74,
+    0x3f29d628, 0x3f280bda, 0x3f263fa8, 0x3f2471ae, 0x3f22a20b, 0x3f20d0db,
+    0x3f1efe3d, 0x3f1d2a4d, 0x3f1b5529, 0x3f197ef0, 0x3f17a7bf, 0x3f15cfb4,
+    0x3f13f6ec, 0x3f121d87, 0x3f1043a2, 0x3f0e695b, 0x3f0c8ed0, 0x3f0ab41f,
+    0x3f08d967, 0x3f06fec5, 0x3f052459, 0x3f034a3f, 0x3f017096, 0x3eff2ef7,
+    0x3efb7e1d, 0x3ef7ceda, 0x3ef42168, 0x3ef07604, 0x3eeccce9, 0x3ee92653,
+    0x3ee5827d, 0x3ee1e1a3, 0x3ede43fe, 0x3edaa9cb, 0x3ed71344, 0x3ed380a2,
+    0x3ecff220, 0x3ecc67f8, 0x3ec8e264, 0x3ec5619c, 0x3ec1e5d9, 0x3ebe6f54,
+    0x3ebafe46, 0x3eb792e6, 0x3eb42d6c, 0x3eb0ce0f, 0x3ead7505, 0x3eaa2286,
+    0x3ea6d6c8, 0x3ea391ff, 0x3ea05460, 0x3e9d1e22, 0x3e99ef77, 0x3e96c894,
+    0x3e93a9ab, 0x3e9092f0, 0x3e8d8495, 0x3e8a7eca, 0x3e8781c3, 0x3e848dae,
+    0x3e81a2bc, 0x3e7d8239, 0x3e77d1fd, 0x3e72351f, 0x3e6cabfc, 0x3e6736ec,
+    0x3e61d64a, 0x3e5c8a6b, 0x3e5753a7, 0x3e523251, 0x3e4d26be, 0x3e483140,
+    0x3e435226, 0x3e3e89c0, 0x3e39d85c, 0x3e353e46, 0x3e30bbc9, 0x3e2c512e,
+    0x3e27febd, 0x3e23c4bc, 0x3e1fa36f, 0x3e1b9b1b, 0x3e17ac00, 0x3e13d65f,
+    0x3e101a75, 0x3e0c7880, 0x3e08f0ba, 0x3e05835d, 0x3e0230a1, 0x3dfdf176,
+    0x3df7b7c0, 0x3df1b484, 0x3debe825, 0x3de65301, 0x3de0f572, 0x3ddbcfd0,
+    0x3dd6e26e, 0x3dd22d9d, 0x3dcdb1a8, 0x3dc96ed9, 0x3dc56575, 0x3dc195be,
+    0x3dbdfff1, 0x3dbaa449, 0x3db782fd, 0x3db49c3e, 0x3db1f03d, 0x3daf7f25,
+    0x3dad491d, 0x3dab4e4a, 0x3da98ecb, 0x3da80abd, 0x3da6c239, 0x3da5b554,
+    0x3da4e41d, 0x3da44ea4, 0x3da3f4f1, 0x3da3d70a
+
+};
+
+const int global_cmvn_mean_hex[] = {
+    0x413d6566, 0x4147923f, 0x4156ab15, 0x41613d12, 0x416b155b, 0x41722783,
+    0x4176cd05, 0x4178532a, 0x417aa3c3, 0x417aed19, 0x417d4d2c, 0x417e6abb,
+    0x41805848, 0x418122ab, 0x41812b23, 0x418161a8, 0x41810ef9, 0x4180863a,
+    0x41815d8f, 0x417ff8b2, 0x417de2aa, 0x4180a5f2, 0x417e8bd1, 0x418041ac,
+    0x417f2d60, 0x4180487f, 0x417eb835, 0x418018d8, 0x417ef8c1, 0x417ea302,
+    0x417f30cf, 0x417ea0bb, 0x417ebac2, 0x417faab6, 0x417fca4d, 0x41805e45,
+    0x4180e308, 0x4180ef3e, 0x418109fc, 0x4180afa3, 0x418113e2, 0x4180c915,
+    0x41819f86, 0x418190bf, 0x418220bd, 0x4182f2e5, 0x4183e1c7, 0x41843eec,
+    0x4184b066, 0x418574db, 0x41852611, 0x4184fc81, 0x41851b2a, 0x4185a1c7,
+    0x41861152, 0x41868c28, 0x41871930, 0x41871f83, 0x41868893, 0x4185d919,
+    0x4185664b, 0x418480a6, 0x41840e3a, 0x41836ace, 0x4182b217, 0x4181cb79,
+    0x4180fb13, 0x418098b9, 0x41805ded, 0x417ff69a, 0x417f49bd, 0x417ecef8,
+    0x417e286c, 0x417d9135, 0x417cfff4, 0x417ca8f7, 0x417b2e8f, 0x41773788,
+    0x4170b095, 0x4167417f};
+
+const int global_cmvn_std_hex[] = {
+    0x4040335e, 0x405235d3, 0x40589be4, 0x4054261f, 0x40544ba2, 0x40575418,
+    0x405b6528, 0x40617999, 0x40605fcf, 0x405c9c6d, 0x40590796, 0x405899fc,
+    0x405810b8, 0x40587c40, 0x40592b5e, 0x4057fb12, 0x4057028b, 0x405515d7,
+    0x4053d714, 0x405418c7, 0x405536bc, 0x4052f54e, 0x4052d382, 0x4051201d,
+    0x4050a8d2, 0x4050857f, 0x404ffe85, 0x4050a0da, 0x40517a8a, 0x40508862,
+    0x40504f68, 0x404f3159, 0x404f0930, 0x404e8a2e, 0x404e7383, 0x404eb185,
+    0x404edaa9, 0x404efed2, 0x404ea8f4, 0x404f6d0d, 0x404ee9d9, 0x404f4cca,
+    0x404fb13f, 0x405051c5, 0x40503f5e, 0x4050df6e, 0x4052974e, 0x4053d421,
+    0x40544d48, 0x40544ec8, 0x40550e57, 0x40558287, 0x4055d122, 0x4056b22a,
+    0x4058ea5c, 0x405acbc3, 0x405a89e7, 0x405a88ed, 0x405afadb, 0x405a1c60,
+    0x405a6f46, 0x405b0a24, 0x405b5f44, 0x405cc0a9, 0x405d984b, 0x405ef9b8,
+    0x4061178a, 0x406262bf, 0x40644904, 0x40660b20, 0x4067f7f1, 0x406a35e5,
+    0x406c1e97, 0x406e16a9, 0x406eadb1, 0x406d0cba, 0x406d9ca0, 0x406f5a14,
+    0x406e84a7, 0x406cd985};
+
+const int global_cmvn_mean_online_hex[] = {
+
+    0x413d5d27, 0x414785ae, 0x4156986a, 0x41612a4e, 0x416b063e, 0x41721c9b,
+    0x4176c505, 0x41784b5b, 0x417a9575, 0x417adfb2, 0x417d4153, 0x417e611e,
+    0x41805288, 0x41811c27, 0x4181250c, 0x41815cd4, 0x41810b77, 0x4180817c,
+    0x41815881, 0x417feaf2, 0x417dd2bf, 0x41809f37, 0x417e7b47, 0x41803a6a,
+    0x417f1ff4, 0x41804382, 0x417ead10, 0x41801220, 0x417eeb28, 0x417e9801,
+    0x417f26b9, 0x417e95f9, 0x417eac06, 0x417f9aa5, 0x417fbb16, 0x41805651,
+    0x4180daaa, 0x4180e84c, 0x41810566, 0x4180ab2c, 0x418111b0, 0x4180c6cc,
+    0x41819e27, 0x418190cc, 0x4182205c, 0x4182f265, 0x4183e1a2, 0x41844012,
+    0x4184b0cd, 0x41857447, 0x418527f7, 0x4184fdc6, 0x41851ad2, 0x4185a148,
+    0x41860f8b, 0x41868888, 0x418712e4, 0x41871702, 0x41867ec3, 0x4185cc48,
+    0x418559b4, 0x41847855, 0x418408f4, 0x418368f4, 0x4182b718, 0x4181d76d,
+    0x41810e52, 0x4180b204, 0x418078a4, 0x41801179, 0x417f5579, 0x417e93b7,
+    0x417d6f2c, 0x417c1a0b, 0x417a6c7a, 0x41787d18, 0x4174eceb, 0x416e3ed3,
+    0x41644af8, 0x41566dd4
+
+};
+
+const int global_cmvn_std_online_hex[] = {
+
+    0x40408fdd, 0x405293b6, 0x4058f2d2, 0x40546ddb, 0x4054984c, 0x4057971b,
+    0x405ba086, 0x4061afa7, 0x4060a24c, 0x405cbb7e, 0x405923f7, 0x4058c91f,
+    0x40585cf3, 0x4058c22a, 0x40594960, 0x405824a6, 0x405703f3, 0x40556377,
+    0x4053e02d, 0x40540a7e, 0x405553c7, 0x4052ead5, 0x4052d23d, 0x40510308,
+    0x4050a2f3, 0x40505b81, 0x404fed20, 0x4050a372, 0x40515196, 0x40504810,
+    0x40501fdd, 0x404f2225, 0x404f0931, 0x404e8a2b, 0x404e773b, 0x404ea782,
+    0x404ee17d, 0x404ef49c, 0x404e884d, 0x404f696b, 0x404edd0e, 0x404f23cc,
+    0x404f74d4, 0x40501e89, 0x405009f3, 0x4050c422, 0x4052902b, 0x4053987c,
+    0x40542997, 0x40543695, 0x4054cbef, 0x40553947, 0x4055ab7c, 0x4056887c,
+    0x4058b710, 0x405a8d28, 0x405a6a27, 0x405a6b3b, 0x405ac8d3, 0x405a031d,
+    0x405a2158, 0x405abb1b, 0x405b1350, 0x405c98c0, 0x405d5cf9, 0x405ead5b,
+    0x40609748, 0x4061dfb9, 0x4063aa9f, 0x40655831, 0x40671a35, 0x40694bf5,
+    0x406b1f59, 0x406cb49b, 0x406cf19e, 0x406b592b, 0x406b757c, 0x406c866d,
+    0x406ac24f, 0x406678d9
+
+};
+
+const unsigned int paraformer_cmvn_mean_hex[] = {
+
+    0xc104fd75, 0xc1099d56, 0xc119dad7, 0xc126f9a7, 0xc133681f, 0xc13e221f,
+    0xc145cc83, 0xc14a3166, 0xc14e1bda, 0xc14d4a62, 0xc14e41a9, 0xc14f4e7b,
+    0xc153297e, 0xc1567ee5, 0xc157dbab, 0xc158dfa4, 0xc158e6f9, 0xc1584e70,
+    0xc15aecea, 0xc15886b8, 0xc156bcb4, 0xc15a7ba9, 0xc1581d34, 0xc15c0a48,
+    0xc15c463f, 0xc15dfc3b, 0xc15bb28b, 0xc15b4413, 0xc158f8c0, 0xc1588ede,
+    0xc158c880, 0xc158ff19, 0xc159815a, 0xc159ed72, 0xc15a458d, 0xc15a93d3,
+    0xc15a06ec, 0xc15953d8, 0xc1592e92, 0xc1579518, 0xc1587d76, 0xc157bc56,
+    0xc159c47c, 0xc15a5ac4, 0xc15b7286, 0xc15cab60, 0xc15e7f8d, 0xc1607ee5,
+    0xc162e9ad, 0xc165bdb0, 0xc167bf3e, 0xc169a0a5, 0xc16b4b68, 0xc16d5682,
+    0xc16ebd51, 0xc170197a, 0xc170d1cc, 0xc1707fc1, 0xc16fd830, 0xc16ec4b1,
+    0xc16de888, 0xc16d3b06, 0xc16cc155, 0xc16c4e31, 0xc16b6abe, 0xc169cde8,
+    0xc1684578, 0xc166c2a4, 0xc165d326, 0xc164df46, 0xc163b4ad, 0xc1632d19,
+    0xc162a94a, 0xc16280fc, 0xc161ae3e, 0xc15fec42, 0xc15cbadc, 0xc15664c3,
+    0xc14c6d5d, 0xc13b64ae, 0xc104fd75, 0xc1099d56, 0xc119dad7, 0xc126f9a7,
+    0xc133681f, 0xc13e221f, 0xc145cc83, 0xc14a3166, 0xc14e1bda, 0xc14d4a62,
+    0xc14e41a9, 0xc14f4e7b, 0xc153297e, 0xc1567ee5, 0xc157dbab, 0xc158dfa4,
+    0xc158e6f9, 0xc1584e70, 0xc15aecea, 0xc15886b8, 0xc156bcb4, 0xc15a7ba9,
+    0xc1581d34, 0xc15c0a48, 0xc15c463f, 0xc15dfc3b, 0xc15bb28b, 0xc15b4413,
+    0xc158f8c0, 0xc1588ede, 0xc158c880, 0xc158ff19, 0xc159815a, 0xc159ed72,
+    0xc15a458d, 0xc15a93d3, 0xc15a06ec, 0xc15953d8, 0xc1592e92, 0xc1579518,
+    0xc1587d76, 0xc157bc56, 0xc159c47c, 0xc15a5ac4, 0xc15b7286, 0xc15cab60,
+    0xc15e7f8d, 0xc1607ee5, 0xc162e9ad, 0xc165bdb0, 0xc167bf3e, 0xc169a0a5,
+    0xc16b4b68, 0xc16d5682, 0xc16ebd51, 0xc170197a, 0xc170d1cc, 0xc1707fc1,
+    0xc16fd830, 0xc16ec4b1, 0xc16de888, 0xc16d3b06, 0xc16cc155, 0xc16c4e31,
+    0xc16b6abe, 0xc169cde8, 0xc1684578, 0xc166c2a4, 0xc165d326, 0xc164df46,
+    0xc163b4ad, 0xc1632d19, 0xc162a94a, 0xc16280fc, 0xc161ae3e, 0xc15fec42,
+    0xc15cbadc, 0xc15664c3, 0xc14c6d5d, 0xc13b64ae, 0xc104fd75, 0xc1099d56,
+    0xc119dad7, 0xc126f9a7, 0xc133681f, 0xc13e221f, 0xc145cc83, 0xc14a3166,
+    0xc14e1bda, 0xc14d4a62, 0xc14e41a9, 0xc14f4e7b, 0xc153297e, 0xc1567ee5,
+    0xc157dbab, 0xc158dfa4, 0xc158e6f9, 0xc1584e70, 0xc15aecea, 0xc15886b8,
+    0xc156bcb4, 0xc15a7ba9, 0xc1581d34, 0xc15c0a48, 0xc15c463f, 0xc15dfc3b,
+    0xc15bb28b, 0xc15b4413, 0xc158f8c0, 0xc1588ede, 0xc158c880, 0xc158ff19,
+    0xc159815a, 0xc159ed72, 0xc15a458d, 0xc15a93d3, 0xc15a06ec, 0xc15953d8,
+    0xc1592e92, 0xc1579518, 0xc1587d76, 0xc157bc56, 0xc159c47c, 0xc15a5ac4,
+    0xc15b7286, 0xc15cab60, 0xc15e7f8d, 0xc1607ee5, 0xc162e9ad, 0xc165bdb0,
+    0xc167bf3e, 0xc169a0a5, 0xc16b4b68, 0xc16d5682, 0xc16ebd51, 0xc170197a,
+    0xc170d1cc, 0xc1707fc1, 0xc16fd830, 0xc16ec4b1, 0xc16de888, 0xc16d3b06,
+    0xc16cc155, 0xc16c4e31, 0xc16b6abe, 0xc169cde8, 0xc1684578, 0xc166c2a4,
+    0xc165d326, 0xc164df46, 0xc163b4ad, 0xc1632d19, 0xc162a94a, 0xc16280fc,
+    0xc161ae3e, 0xc15fec42, 0xc15cbadc, 0xc15664c3, 0xc14c6d5d, 0xc13b64ae,
+    0xc104fd75, 0xc1099d56, 0xc119dad7, 0xc126f9a7, 0xc133681f, 0xc13e221f,
+    0xc145cc83, 0xc14a3166, 0xc14e1bda, 0xc14d4a62, 0xc14e41a9, 0xc14f4e7b,
+    0xc153297e, 0xc1567ee5, 0xc157dbab, 0xc158dfa4, 0xc158e6f9, 0xc1584e70,
+    0xc15aecea, 0xc15886b8, 0xc156bcb4, 0xc15a7ba9, 0xc1581d34, 0xc15c0a48,
+    0xc15c463f, 0xc15dfc3b, 0xc15bb28b, 0xc15b4413, 0xc158f8c0, 0xc1588ede,
+    0xc158c880, 0xc158ff19, 0xc159815a, 0xc159ed72, 0xc15a458d, 0xc15a93d3,
+    0xc15a06ec, 0xc15953d8, 0xc1592e92, 0xc1579518, 0xc1587d76, 0xc157bc56,
+    0xc159c47c, 0xc15a5ac4, 0xc15b7286, 0xc15cab60, 0xc15e7f8d, 0xc1607ee5,
+    0xc162e9ad, 0xc165bdb0, 0xc167bf3e, 0xc169a0a5, 0xc16b4b68, 0xc16d5682,
+    0xc16ebd51, 0xc170197a, 0xc170d1cc, 0xc1707fc1, 0xc16fd830, 0xc16ec4b1,
+    0xc16de888, 0xc16d3b06, 0xc16cc155, 0xc16c4e31, 0xc16b6abe, 0xc169cde8,
+    0xc1684578, 0xc166c2a4, 0xc165d326, 0xc164df46, 0xc163b4ad, 0xc1632d19,
+    0xc162a94a, 0xc16280fc, 0xc161ae3e, 0xc15fec42, 0xc15cbadc, 0xc15664c3,
+    0xc14c6d5d, 0xc13b64ae, 0xc104fd75, 0xc1099d56, 0xc119dad7, 0xc126f9a7,
+    0xc133681f, 0xc13e221f, 0xc145cc83, 0xc14a3166, 0xc14e1bda, 0xc14d4a62,
+    0xc14e41a9, 0xc14f4e7b, 0xc153297e, 0xc1567ee5, 0xc157dbab, 0xc158dfa4,
+    0xc158e6f9, 0xc1584e70, 0xc15aecea, 0xc15886b8, 0xc156bcb4, 0xc15a7ba9,
+    0xc1581d34, 0xc15c0a48, 0xc15c463f, 0xc15dfc3b, 0xc15bb28b, 0xc15b4413,
+    0xc158f8c0, 0xc1588ede, 0xc158c880, 0xc158ff19, 0xc159815a, 0xc159ed72,
+    0xc15a458d, 0xc15a93d3, 0xc15a06ec, 0xc15953d8, 0xc1592e92, 0xc1579518,
+    0xc1587d76, 0xc157bc56, 0xc159c47c, 0xc15a5ac4, 0xc15b7286, 0xc15cab60,
+    0xc15e7f8d, 0xc1607ee5, 0xc162e9ad, 0xc165bdb0, 0xc167bf3e, 0xc169a0a5,
+    0xc16b4b68, 0xc16d5682, 0xc16ebd51, 0xc170197a, 0xc170d1cc, 0xc1707fc1,
+    0xc16fd830, 0xc16ec4b1, 0xc16de888, 0xc16d3b06, 0xc16cc155, 0xc16c4e31,
+    0xc16b6abe, 0xc169cde8, 0xc1684578, 0xc166c2a4, 0xc165d326, 0xc164df46,
+    0xc163b4ad, 0xc1632d19, 0xc162a94a, 0xc16280fc, 0xc161ae3e, 0xc15fec42,
+    0xc15cbadc, 0xc15664c3, 0xc14c6d5d, 0xc13b64ae, 0xc104fd75, 0xc1099d56,
+    0xc119dad7, 0xc126f9a7, 0xc133681f, 0xc13e221f, 0xc145cc83, 0xc14a3166,
+    0xc14e1bda, 0xc14d4a62, 0xc14e41a9, 0xc14f4e7b, 0xc153297e, 0xc1567ee5,
+    0xc157dbab, 0xc158dfa4, 0xc158e6f9, 0xc1584e70, 0xc15aecea, 0xc15886b8,
+    0xc156bcb4, 0xc15a7ba9, 0xc1581d34, 0xc15c0a48, 0xc15c463f, 0xc15dfc3b,
+    0xc15bb28b, 0xc15b4413, 0xc158f8c0, 0xc1588ede, 0xc158c880, 0xc158ff19,
+    0xc159815a, 0xc159ed72, 0xc15a458d, 0xc15a93d3, 0xc15a06ec, 0xc15953d8,
+    0xc1592e92, 0xc1579518, 0xc1587d76, 0xc157bc56, 0xc159c47c, 0xc15a5ac4,
+    0xc15b7286, 0xc15cab60, 0xc15e7f8d, 0xc1607ee5, 0xc162e9ad, 0xc165bdb0,
+    0xc167bf3e, 0xc169a0a5, 0xc16b4b68, 0xc16d5682, 0xc16ebd51, 0xc170197a,
+    0xc170d1cc, 0xc1707fc1, 0xc16fd830, 0xc16ec4b1, 0xc16de888, 0xc16d3b06,
+    0xc16cc155, 0xc16c4e31, 0xc16b6abe, 0xc169cde8, 0xc1684578, 0xc166c2a4,
+    0xc165d326, 0xc164df46, 0xc163b4ad, 0xc1632d19, 0xc162a94a, 0xc16280fc,
+    0xc161ae3e, 0xc15fec42, 0xc15cbadc, 0xc15664c3, 0xc14c6d5d, 0xc13b64ae,
+    0xc104fd75, 0xc1099d56, 0xc119dad7, 0xc126f9a7, 0xc133681f, 0xc13e221f,
+    0xc145cc83, 0xc14a3166, 0xc14e1bda, 0xc14d4a62, 0xc14e41a9, 0xc14f4e7b,
+    0xc153297e, 0xc1567ee5, 0xc157dbab, 0xc158dfa4, 0xc158e6f9, 0xc1584e70,
+    0xc15aecea, 0xc15886b8, 0xc156bcb4, 0xc15a7ba9, 0xc1581d34, 0xc15c0a48,
+    0xc15c463f, 0xc15dfc3b, 0xc15bb28b, 0xc15b4413, 0xc158f8c0, 0xc1588ede,
+    0xc158c880, 0xc158ff19, 0xc159815a, 0xc159ed72, 0xc15a458d, 0xc15a93d3,
+    0xc15a06ec, 0xc15953d8, 0xc1592e92, 0xc1579518, 0xc1587d76, 0xc157bc56,
+    0xc159c47c, 0xc15a5ac4, 0xc15b7286, 0xc15cab60, 0xc15e7f8d, 0xc1607ee5,
+    0xc162e9ad, 0xc165bdb0, 0xc167bf3e, 0xc169a0a5, 0xc16b4b68, 0xc16d5682,
+    0xc16ebd51, 0xc170197a, 0xc170d1cc, 0xc1707fc1, 0xc16fd830, 0xc16ec4b1,
+    0xc16de888, 0xc16d3b06, 0xc16cc155, 0xc16c4e31, 0xc16b6abe, 0xc169cde8,
+    0xc1684578, 0xc166c2a4, 0xc165d326, 0xc164df46, 0xc163b4ad, 0xc1632d19,
+    0xc162a94a, 0xc16280fc, 0xc161ae3e, 0xc15fec42, 0xc15cbadc, 0xc15664c3,
+    0xc14c6d5d, 0xc13b64ae};
+
+const unsigned int paraformer_cmvn_var_hex[] = {
+
+    0x40619618, 0x405fb77c, 0x405d3028, 0x405bef11, 0x405a189d, 0x4057aad5,
+    0x4054f9cc, 0x40518e8c, 0x404fffdd, 0x40510d0d, 0x4052400d, 0x4052bab0,
+    0x40526416, 0x40515cb8, 0x40506aee, 0x404fef8d, 0x404ff527, 0x40505b95,
+    0x4050d61c, 0x4051d0a5, 0x4052abd2, 0x4052f14b, 0x4053d196, 0x4054800d,
+    0x405545f2, 0x4055d71f, 0x40567588, 0x4056de4d, 0x40579b72, 0x40584d35,
+    0x4058cd2f, 0x40594731, 0x4059a53f, 0x405a00ed, 0x405a34c1, 0x405a406e,
+    0x405a1748, 0x405a0300, 0x405a1547, 0x405a66a7, 0x405a9be4, 0x405b04b2,
+    0x405b5754, 0x405b9189, 0x405b9016, 0x405b7a07, 0x405b63f9, 0x405b3f45,
+    0x405b0cb4, 0x405ac80b, 0x405ac1f7, 0x405abbd9, 0x405ac86a, 0x405ad72b,
+    0x405af2f0, 0x405ab465, 0x405a6364, 0x405a1350, 0x4059baa3, 0x4059911d,
+    0x40597921, 0x40595564, 0x40593b8d, 0x4059310f, 0x40594e46, 0x40599bae,
+    0x4059e703, 0x4059feec, 0x405a053a, 0x4059feaa, 0x4059d7a0, 0x40599386,
+    0x40592d0e, 0x4058ce4c, 0x40587335, 0x4058396a, 0x40584ee1, 0x4058925a,
+    0x40592f6d, 0x405a9f0a, 0x40619618, 0x405fb77c, 0x405d3028, 0x405bef11,
+    0x405a189d, 0x4057aad5, 0x4054f9cc, 0x40518e8c, 0x404fffdd, 0x40510d0d,
+    0x4052400d, 0x4052bab0, 0x40526416, 0x40515cb8, 0x40506aee, 0x404fef8d,
+    0x404ff527, 0x40505b95, 0x4050d61c, 0x4051d0a5, 0x4052abd2, 0x4052f14b,
+    0x4053d196, 0x4054800d, 0x405545f2, 0x4055d71f, 0x40567588, 0x4056de4d,
+    0x40579b72, 0x40584d35, 0x4058cd2f, 0x40594731, 0x4059a53f, 0x405a00ed,
+    0x405a34c1, 0x405a406e, 0x405a1748, 0x405a0300, 0x405a1547, 0x405a66a7,
+    0x405a9be4, 0x405b04b2, 0x405b5754, 0x405b9189, 0x405b9016, 0x405b7a07,
+    0x405b63f9, 0x405b3f45, 0x405b0cb4, 0x405ac80b, 0x405ac1f7, 0x405abbd9,
+    0x405ac86a, 0x405ad72b, 0x405af2f0, 0x405ab465, 0x405a6364, 0x405a1350,
+    0x4059baa3, 0x4059911d, 0x40597921, 0x40595564, 0x40593b8d, 0x4059310f,
+    0x40594e46, 0x40599bae, 0x4059e703, 0x4059feec, 0x405a053a, 0x4059feaa,
+    0x4059d7a0, 0x40599386, 0x40592d0e, 0x4058ce4c, 0x40587335, 0x4058396a,
+    0x40584ee1, 0x4058925a, 0x40592f6d, 0x405a9f0a, 0x40619618, 0x405fb77c,
+    0x405d3028, 0x405bef11, 0x405a189d, 0x4057aad5, 0x4054f9cc, 0x40518e8c,
+    0x404fffdd, 0x40510d0d, 0x4052400d, 0x4052bab0, 0x40526416, 0x40515cb8,
+    0x40506aee, 0x404fef8d, 0x404ff527, 0x40505b95, 0x4050d61c, 0x4051d0a5,
+    0x4052abd2, 0x4052f14b, 0x4053d196, 0x4054800d, 0x405545f2, 0x4055d71f,
+    0x40567588, 0x4056de4d, 0x40579b72, 0x40584d35, 0x4058cd2f, 0x40594731,
+    0x4059a53f, 0x405a00ed, 0x405a34c1, 0x405a406e, 0x405a1748, 0x405a0300,
+    0x405a1547, 0x405a66a7, 0x405a9be4, 0x405b04b2, 0x405b5754, 0x405b9189,
+    0x405b9016, 0x405b7a07, 0x405b63f9, 0x405b3f45, 0x405b0cb4, 0x405ac80b,
+    0x405ac1f7, 0x405abbd9, 0x405ac86a, 0x405ad72b, 0x405af2f0, 0x405ab465,
+    0x405a6364, 0x405a1350, 0x4059baa3, 0x4059911d, 0x40597921, 0x40595564,
+    0x40593b8d, 0x4059310f, 0x40594e46, 0x40599bae, 0x4059e703, 0x4059feec,
+    0x405a053a, 0x4059feaa, 0x4059d7a0, 0x40599386, 0x40592d0e, 0x4058ce4c,
+    0x40587335, 0x4058396a, 0x40584ee1, 0x4058925a, 0x40592f6d, 0x405a9f0a,
+    0x40619618, 0x405fb77c, 0x405d3028, 0x405bef11, 0x405a189d, 0x4057aad5,
+    0x4054f9cc, 0x40518e8c, 0x404fffdd, 0x40510d0d, 0x4052400d, 0x4052bab0,
+    0x40526416, 0x40515cb8, 0x40506aee, 0x404fef8d, 0x404ff527, 0x40505b95,
+    0x4050d61c, 0x4051d0a5, 0x4052abd2, 0x4052f14b, 0x4053d196, 0x4054800d,
+    0x405545f2, 0x4055d71f, 0x40567588, 0x4056de4d, 0x40579b72, 0x40584d35,
+    0x4058cd2f, 0x40594731, 0x4059a53f, 0x405a00ed, 0x405a34c1, 0x405a406e,
+    0x405a1748, 0x405a0300, 0x405a1547, 0x405a66a7, 0x405a9be4, 0x405b04b2,
+    0x405b5754, 0x405b9189, 0x405b9016, 0x405b7a07, 0x405b63f9, 0x405b3f45,
+    0x405b0cb4, 0x405ac80b, 0x405ac1f7, 0x405abbd9, 0x405ac86a, 0x405ad72b,
+    0x405af2f0, 0x405ab465, 0x405a6364, 0x405a1350, 0x4059baa3, 0x4059911d,
+    0x40597921, 0x40595564, 0x40593b8d, 0x4059310f, 0x40594e46, 0x40599bae,
+    0x4059e703, 0x4059feec, 0x405a053a, 0x4059feaa, 0x4059d7a0, 0x40599386,
+    0x40592d0e, 0x4058ce4c, 0x40587335, 0x4058396a, 0x40584ee1, 0x4058925a,
+    0x40592f6d, 0x405a9f0a, 0x40619618, 0x405fb77c, 0x405d3028, 0x405bef11,
+    0x405a189d, 0x4057aad5, 0x4054f9cc, 0x40518e8c, 0x404fffdd, 0x40510d0d,
+    0x4052400d, 0x4052bab0, 0x40526416, 0x40515cb8, 0x40506aee, 0x404fef8d,
+    0x404ff527, 0x40505b95, 0x4050d61c, 0x4051d0a5, 0x4052abd2, 0x4052f14b,
+    0x4053d196, 0x4054800d, 0x405545f2, 0x4055d71f, 0x40567588, 0x4056de4d,
+    0x40579b72, 0x40584d35, 0x4058cd2f, 0x40594731, 0x4059a53f, 0x405a00ed,
+    0x405a34c1, 0x405a406e, 0x405a1748, 0x405a0300, 0x405a1547, 0x405a66a7,
+    0x405a9be4, 0x405b04b2, 0x405b5754, 0x405b9189, 0x405b9016, 0x405b7a07,
+    0x405b63f9, 0x405b3f45, 0x405b0cb4, 0x405ac80b, 0x405ac1f7, 0x405abbd9,
+    0x405ac86a, 0x405ad72b, 0x405af2f0, 0x405ab465, 0x405a6364, 0x405a1350,
+    0x4059baa3, 0x4059911d, 0x40597921, 0x40595564, 0x40593b8d, 0x4059310f,
+    0x40594e46, 0x40599bae, 0x4059e703, 0x4059feec, 0x405a053a, 0x4059feaa,
+    0x4059d7a0, 0x40599386, 0x40592d0e, 0x4058ce4c, 0x40587335, 0x4058396a,
+    0x40584ee1, 0x4058925a, 0x40592f6d, 0x405a9f0a, 0x40619618, 0x405fb77c,
+    0x405d3028, 0x405bef11, 0x405a189d, 0x4057aad5, 0x4054f9cc, 0x40518e8c,
+    0x404fffdd, 0x40510d0d, 0x4052400d, 0x4052bab0, 0x40526416, 0x40515cb8,
+    0x40506aee, 0x404fef8d, 0x404ff527, 0x40505b95, 0x4050d61c, 0x4051d0a5,
+    0x4052abd2, 0x4052f14b, 0x4053d196, 0x4054800d, 0x405545f2, 0x4055d71f,
+    0x40567588, 0x4056de4d, 0x40579b72, 0x40584d35, 0x4058cd2f, 0x40594731,
+    0x4059a53f, 0x405a00ed, 0x405a34c1, 0x405a406e, 0x405a1748, 0x405a0300,
+    0x405a1547, 0x405a66a7, 0x405a9be4, 0x405b04b2, 0x405b5754, 0x405b9189,
+    0x405b9016, 0x405b7a07, 0x405b63f9, 0x405b3f45, 0x405b0cb4, 0x405ac80b,
+    0x405ac1f7, 0x405abbd9, 0x405ac86a, 0x405ad72b, 0x405af2f0, 0x405ab465,
+    0x405a6364, 0x405a1350, 0x4059baa3, 0x4059911d, 0x40597921, 0x40595564,
+    0x40593b8d, 0x4059310f, 0x40594e46, 0x40599bae, 0x4059e703, 0x4059feec,
+    0x405a053a, 0x4059feaa, 0x4059d7a0, 0x40599386, 0x40592d0e, 0x4058ce4c,
+    0x40587335, 0x4058396a, 0x40584ee1, 0x4058925a, 0x40592f6d, 0x405a9f0a,
+    0x40619618, 0x405fb77c, 0x405d3028, 0x405bef11, 0x405a189d, 0x4057aad5,
+    0x4054f9cc, 0x40518e8c, 0x404fffdd, 0x40510d0d, 0x4052400d, 0x4052bab0,
+    0x40526416, 0x40515cb8, 0x40506aee, 0x404fef8d, 0x404ff527, 0x40505b95,
+    0x4050d61c, 0x4051d0a5, 0x4052abd2, 0x4052f14b, 0x4053d196, 0x4054800d,
+    0x405545f2, 0x4055d71f, 0x40567588, 0x4056de4d, 0x40579b72, 0x40584d35,
+    0x4058cd2f, 0x40594731, 0x4059a53f, 0x405a00ed, 0x405a34c1, 0x405a406e,
+    0x405a1748, 0x405a0300, 0x405a1547, 0x405a66a7, 0x405a9be4, 0x405b04b2,
+    0x405b5754, 0x405b9189, 0x405b9016, 0x405b7a07, 0x405b63f9, 0x405b3f45,
+    0x405b0cb4, 0x405ac80b, 0x405ac1f7, 0x405abbd9, 0x405ac86a, 0x405ad72b,
+    0x405af2f0, 0x405ab465, 0x405a6364, 0x405a1350, 0x4059baa3, 0x4059911d,
+    0x40597921, 0x40595564, 0x40593b8d, 0x4059310f, 0x40594e46, 0x40599bae,
+    0x4059e703, 0x4059feec, 0x405a053a, 0x4059feaa, 0x4059d7a0, 0x40599386,
+    0x40592d0e, 0x4058ce4c, 0x40587335, 0x4058396a, 0x40584ee1, 0x4058925a,
+    0x40592f6d, 0x405a9f0a
+
+};
+
+const int pos_enc_coe_hex[] = {
+    0x3f800000, 0x3f84b063, 0x3f898cc0, 0x3f8e96b2, 0x3f93cfe5, 0x3f993a15,
+    0x3f9ed70c, 0x3fa4a8a8, 0x3faab0d5, 0x3fb0f193, 0x3fb76cf5, 0x3fbe2520,
+    0x3fc51c50, 0x3fcc54d2, 0x3fd3d10c, 0x3fdb9378, 0x3fe39ea9, 0x3febf549,
+    0x3ff49a1b, 0x3ffd8ffe, 0x40036cf4, 0x40083d78, 0x400d3b22, 0x40126799,
+    0x4017c496, 0x401d53df, 0x4023174b, 0x402910c4, 0x402f4244, 0x4035adda,
+    0x403c55a4, 0x40433bd9, 0x404a62c2, 0x4051ccbd, 0x40597c3f, 0x406173d4,
+    0x4069b621, 0x407245e2, 0x407b25ed, 0x40822c9a, 0x4086f161, 0x408be2e0,
+    0x409102bc, 0x409652a6, 0x409bd461, 0x40a189c1, 0x40a774aa, 0x40ad9711,
+    0x40b3f300, 0x40ba8a92, 0x40c15ff6, 0x40c8756f, 0x40cfcd58, 0x40d76a1e,
+    0x40df4e48, 0x40e77c73, 0x40eff755, 0x40f8c1be, 0x4100ef4c, 0x4105a873,
+    0x410a8de6, 0x410fa144, 0x4114e43b, 0x411a588a, 0x41200000, 0x4125dc7c,
+    0x412beff0, 0x41323c5f, 0x4138c3df, 0x413f889a, 0x41468cd0, 0x414dd2d2,
+    0x41555d0a, 0x415d2df7, 0x41654832, 0x416dae69, 0x41766364, 0x417f6a07,
+    0x418462a7, 0x41893c2b, 0x418e432a, 0x4193794e, 0x4198e051, 0x419e79ff,
+    0x41a44831, 0x41aa4cd6, 0x41b089ea, 0x41b70180, 0x41bdb5bc, 0x41c4a8d7,
+    0x41cbdd1e, 0x41d354f5, 0x41db12d6, 0x41e31950, 0x41eb6b0d, 0x41f40ad0,
+    0x41fcfb72, 0x42031ff6, 0x4207eda7, 0x420ce865, 0x421211d5, 0x42176bad,
+    0x421cf7b4, 0x4222b7c0, 0x4228adb9, 0x422edb98, 0x4235436b, 0x423be74f,
+    0x4242c979, 0x4249ec31, 0x425151d4, 0x4258fcd6, 0x4260efc0, 0x42692d37,
+    0x4271b7f3, 0x427a92cb, 0x4281e057, 0x4286a253, 0x428b90ed, 0x4290adc8,
+    0x4295fa95, 0x429b7917, 0x42a12b1f, 0x42a71290, 0x42ad3160, 0x42b38995,
+    0x42ba1d4a, 0x42c0eead, 0x42c80000, 0x42cf539b, 0x42d6ebec, 0x42decb76,
+    0x42e6f4d6, 0x42ef6ac1, 0x42f83003, 0x4300a3c3, 0x43055a26, 0x430a3cbb,
+    0x430f4d1f, 0x43148d01, 0x4319fe1e, 0x431fa244, 0x43257b51, 0x432b8b36,
+    0x4331d3f4, 0x433857a1, 0x433f1865, 0x4346187e, 0x434d5a3e, 0x4354e00b,
+    0x435cac64, 0x4364c1e0, 0x436d232b, 0x4375d30c, 0x437ed466, 0x43841519,
+    0x4388ebc5, 0x438defd2, 0x439322e8, 0x439886c2, 0x439e1d27, 0x43a3e7f3,
+    0x43a9e911, 0x43b0227e, 0x43b6964a, 0x43bd4698, 0x43c435a1, 0x43cb65b0,
+    0x43d2d927, 0x43da927e, 0x43e29445, 0x43eae123, 0x43f37bd8, 0x43fc673e,
+    0x4402d325, 0x44079e06, 0x440c95d8, 0x4411bc42, 0x441712f8, 0x441c9bbf,
+    0x4422586d, 0x44284ae8, 0x442e7528, 0x4434d93a, 0x443b793b, 0x4442575d,
+    0x444975e6, 0x4450d734, 0x44587db7, 0x44606bfa, 0x4468a49c, 0x44712a58,
+    0x447a0000, 0x44819441, 0x44865373, 0x448b3f2a, 0x44905906, 0x4495a2b9,
+    0x449b1e02, 0x44a0ccb4, 0x44a6b0b0, 0x44accbe9, 0x44b32067, 0x44b9b042,
+    0x44c07da6, 0x44c78ad5, 0x44ceda26, 0x44d66e03, 0x44de48f1, 0x44e66d89,
+    0x44eede7f, 0x44f79e9e, 0x45005867, 0x45050c07, 0x4509ebbf, 0x450ef92c,
+    0x451435fb, 0x4519a3e8, 0x451f44bf, 0x45251a60, 0x452b26b7, 0x45316bc7,
+    0x4537eba3, 0x453ea872, 0x4545a471, 0x454ce1f0, 0x45546355, 0x455c2b1d,
+    0x45643bdc, 0x456c983e, 0x45754309, 0x457e3f1c, 0x4583c7b8, 0x45889b8f,
+    0x458d9cab, 0x4592ccb6, 0x45982d67, 0x459dc087, 0x45a387ee, 0x45a98587,
+    0x45afbb4e, 0x45b62b53, 0x45bcd7b6, 0x45c3c2af, 0x45caee88, 0x45d25da1,
+    0x45da1272, 0x45e20f88, 0x45ea5789, 0x45f2ed34, 0x45fbd360, 0x46028680,
+    0x46074e93, 0x460c437c, 0x461166e2, 0x4616ba77};
+
+const int pos_enc_div_term_hex[] = {
+    0x3f800000, 0x3f76f410, 0x3f6e39f8, 0x3f65ced3, 0x3f5dafd7, 0x3f55da52,
+    0x3f4e4bac, 0x3f470165, 0x3f3ff911, 0x3f39305c, 0x3f32a506, 0x3f2c54e5,
+    0x3f263de0, 0x3f205df3, 0x3f1ab32b, 0x3f153ba8, 0x3f0ff59a, 0x3f0adf41,
+    0x3f05f6ee, 0x3f013b01, 0x3ef953cf, 0x3ef0843c, 0x3ee80460, 0x3edfd167,
+    0x3ed7e89b, 0x3ed0475c, 0x3ec8eb24, 0x3ec1d181, 0x3ebaf81a, 0x3eb45caa,
+    0x3eadfcff, 0x3ea7d6fd, 0x3ea1e89b, 0x3e9c2fe1, 0x3e96aaea, 0x3e9157e1,
+    0x3e8c3504, 0x3e87409d, 0x3e827909, 0x3e7bb965, 0x3e72d424, 0x3e6a3f5c,
+    0x3e61f836, 0x3e59fbf3, 0x3e5247ed, 0x3e4ad998, 0x3e43ae7c, 0x3e3cc43a,
+    0x3e361887, 0x3e2fa92d, 0x3e29740a, 0x3e23770f, 0x3e1db040, 0x3e181db4,
+    0x3e12bd91, 0x3e0d8e0f, 0x3e088d77, 0x3e03ba20, 0x3dfe24e1, 0x3df529bb,
+    0x3dec7fd5, 0x3de42450, 0x3ddc1466, 0x3dd44d6c, 0x3dcccccd, 0x3dc5900d,
+    0x3dbe94c7, 0x3db7d8a9, 0x3db15978, 0x3dab150e, 0x3da50957, 0x3d9f3451,
+    0x3d99940e, 0x3d9426b0, 0x3d8eea6c, 0x3d89dd84, 0x3d84fe4d, 0x3d804b29,
+    0x3d778512, 0x3d6ec5da, 0x3d6655c3, 0x3d5e3202, 0x3d5657e4, 0x3d4ec4ce,
+    0x3d47763f, 0x3d4069ca, 0x3d399d19, 0x3d330dec, 0x3d2cba15, 0x3d269f7d,
+    0x3d20bc1d, 0x3d1b0e01, 0x3d159348, 0x3d104a21, 0x3d0b30cc, 0x3d064597,
+    0x3d0186e2, 0x3cf9e635, 0x3cf11176, 0x3ce88c9c, 0x3ce054d2, 0x3cd86761,
+    0x3cd0c1a8, 0x3cc9611d, 0x3cc24350, 0x3cbb65e3, 0x3cb4c691, 0x3cae6328,
+    0x3ca8398b, 0x3ca247ad, 0x3c9c8b97, 0x3c970362, 0x3c91ad39, 0x3c8c8757,
+    0x3c879008, 0x3c82c5a5, 0x3c7c4d33, 0x3c7362b9, 0x3c6ac8e7, 0x3c627ce5,
+    0x3c5a7bf1, 0x3c52c366, 0x3c4b50b4, 0x3c442163, 0x3c3d3311, 0x3c368373,
+    0x3c301052, 0x3c29d789, 0x3c23d70a, 0x3c1e0cd7, 0x3c187705, 0x3c1313ba,
+    0x3c0de12d, 0x3c08dda5, 0x3c040779, 0x3bfeba1b, 0x3bf5b9b0, 0x3bed0ab3,
+    0x3be4aa46, 0x3bdc95a0, 0x3bd4ca14, 0x3bcd450e, 0x3bc6040e, 0x3bbf04ae,
+    0x3bb8449c, 0x3bb1c19b, 0x3bab7983, 0x3ba56a3f, 0x3b9f91cc, 0x3b99ee3b,
+    0x3b947dae, 0x3b8f3e56, 0x3b8a2e77, 0x3b854c64, 0x3b80967d, 0x3b781668,
+    0x3b6f520d, 0x3b66dd02, 0x3b5eb47a, 0x3b56d5bf, 0x3b4f3e37, 0x3b47eb5e,
+    0x3b40dac5, 0x3b3a0a16, 0x3b33770f, 0x3b2d1f81, 0x3b270153, 0x3b211a7e,
+    0x3b1b690d, 0x3b15eb1c, 0x3b109edb, 0x3b0b8287, 0x3b06946f, 0x3b01d2f1,
+    0x3afa78f1, 0x3af19f03, 0x3ae91528, 0x3ae0d88b, 0x3ad8e673, 0x3ad13c3c,
+    0x3ac9d75c, 0x3ac2b561, 0x3abbd3ec, 0x3ab530b7, 0x3aaec98e, 0x3aa89c52,
+    0x3aa2a6f6, 0x3a9ce782, 0x3a975c0e, 0x3a9202c3, 0x3a8cd9db, 0x3a87dfa1,
+    0x3a83126f, 0x3a7ce158, 0x3a73f1a2, 0x3a6b52c4, 0x3a6301e2, 0x3a5afc3b,
+    0x3a533f27, 0x3a4bc816, 0x3a44948c, 0x3a3da229, 0x3a36ee9e, 0x3a3077b3,
+    0x3a2a3b43, 0x3a24373e, 0x3a1e69a5, 0x3a18d08b, 0x3a136a16, 0x3a0e347c,
+    0x3a092e02, 0x3a0454ff, 0x39ff4fad, 0x39f649f8, 0x39ed95e3, 0x39e5308a,
+    0x39dd1726, 0x39d54706, 0x39cdbd95, 0x39c67853, 0x39bf74d7, 0x39b8b0cf,
+    0x39b229fb, 0x39abde33, 0x39a5cb5f, 0x399fef7e, 0x399a489e, 0x3994d4df,
+    0x398f9272, 0x398a7f9b, 0x39859aa9, 0x3980e1fe, 0x3978a814, 0x396fde93,
+    0x39676491, 0x395f373e, 0x395753e5, 0x394fb7e7, 0x394860c1, 0x39414c02,
+    0x393a7753, 0x3933e06f, 0x392d8529, 0x39276363, 0x39217917, 0x391bc44d,
+    0x39164323, 0x3910f3c6, 0x390bd472, 0x3906e374, 0x39021f2b, 0x38fb0c03,
+    0x38f22ce3, 0x38e99e04, 0x38e15c92, 0x38d965ce};
+#endif
--- a/funasr_local/runtime/onnxruntime/src/resample.cc
+++ b/funasr_local/runtime/onnxruntime/src/resample.cc
@@ -0,0 +1,305 @@
+/**
+ * Copyright     2013  Pegah Ghahremani
+ *               2014  IMSL, PKU-HKUST (author: Wei Shi)
+ *               2014  Yanqing Sun, Junjie Wang
+ *               2014  Johns Hopkins University (author: Daniel Povey)
+ * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// this file is copied and modified from
+// kaldi/src/feat/resample.cc
+
+#include "resample.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include <cstdlib>
+#include <type_traits>
+
+#ifndef M_2PI
+#define M_2PI 6.283185307179586476925286766559005
+#endif
+
+#ifndef M_PI
+#define M_PI 3.1415926535897932384626433832795
+#endif
+
+template <class I>
+I Gcd(I m, I n) {
+  // this function is copied from kaldi/src/base/kaldi-math.h
+  if (m == 0 || n == 0) {
+    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
+      fprintf(stderr, "Undefined GCD since m = 0, n = 0.\n");
+      exit(-1);
+    }
+    return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m));
+    // return absolute value of whichever is nonzero
+  }
+  // could use compile-time assertion
+  // but involves messing with complex template stuff.
+  static_assert(std::is_integral<I>::value, "");
+  while (1) {
+    m %= n;
+    if (m == 0) return (n > 0 ? n : -n);
+    n %= m;
+    if (n == 0) return (m > 0 ? m : -m);
+  }
+}
+
+/// Returns the least common multiple of two integers.  Will
+/// crash unless the inputs are positive.
+template <class I>
+I Lcm(I m, I n) {
+  // This function is copied from kaldi/src/base/kaldi-math.h
+  assert(m > 0 && n > 0);
+  I gcd = Gcd(m, n);
+  return gcd * (m / gcd) * (n / gcd);
+}
+
+static float DotProduct(const float *a, const float *b, int32_t n) {
+  float sum = 0;
+  for (int32_t i = 0; i != n; ++i) {
+    sum += a[i] * b[i];
+  }
+  return sum;
+}
+
+LinearResample::LinearResample(int32_t samp_rate_in_hz,
+                               int32_t samp_rate_out_hz, float filter_cutoff_hz,
+                               int32_t num_zeros)
+    : samp_rate_in_(samp_rate_in_hz),
+      samp_rate_out_(samp_rate_out_hz),
+      filter_cutoff_(filter_cutoff_hz),
+      num_zeros_(num_zeros) {
+  assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 &&
+         filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz &&
+         filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0);
+
+  // base_freq is the frequency of the repeating unit, which is the gcd
+  // of the input frequencies.
+  int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_);
+  input_samples_in_unit_ = samp_rate_in_ / base_freq;
+  output_samples_in_unit_ = samp_rate_out_ / base_freq;
+
+  SetIndexesAndWeights();
+  Reset();
+}
+
+void LinearResample::SetIndexesAndWeights() {
+  first_index_.resize(output_samples_in_unit_);
+  weights_.resize(output_samples_in_unit_);
+
+  double window_width = num_zeros_ / (2.0 * filter_cutoff_);
+
+  for (int32_t i = 0; i < output_samples_in_unit_; i++) {
+    double output_t = i / static_cast<double>(samp_rate_out_);
+    double min_t = output_t - window_width, max_t = output_t + window_width;
+    // we do ceil on the min and floor on the max, because if we did it
+    // the other way around we would unnecessarily include indexes just
+    // outside the window, with zero coefficients.  It's possible
+    // if the arguments to the ceil and floor expressions are integers
+    // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
+    // that we unnecessarily include something with a zero coefficient,
+    // but this is only a slight efficiency issue.
+    int32_t min_input_index = ceil(min_t * samp_rate_in_),
+            max_input_index = floor(max_t * samp_rate_in_),
+            num_indices = max_input_index - min_input_index + 1;
+    first_index_[i] = min_input_index;
+    weights_[i].resize(num_indices);
+    for (int32_t j = 0; j < num_indices; j++) {
+      int32_t input_index = min_input_index + j;
+      double input_t = input_index / static_cast<double>(samp_rate_in_),
+             delta_t = input_t - output_t;
+      // sign of delta_t doesn't matter.
+      weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_;
+    }
+  }
+}
+
+/** Here, t is a time in seconds representing an offset from
+    the center of the windowed filter function, and FilterFunction(t)
+    returns the windowed filter function, described
+    in the header as h(t) = f(t)g(t), evaluated at t.
+*/
+float LinearResample::FilterFunc(float t) const {
+  float window,  // raised-cosine (Hanning) window of width
+                 // num_zeros_/2*filter_cutoff_
+      filter;    // sinc filter function
+  if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
+    window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
+  else
+    window = 0.0;  // outside support of window function
+  if (t != 0)
+    filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
+  else
+    filter = 2 * filter_cutoff_;  // limit of the function at t = 0
+  return filter * window;
+}
+
+void LinearResample::Reset() {
+  input_sample_offset_ = 0;
+  output_sample_offset_ = 0;
+  input_remainder_.resize(0);
+}
+
+void LinearResample::Resample(const float *input, int32_t input_dim, bool flush,
+                              std::vector<float> *output) {
+  int64_t tot_input_samp = input_sample_offset_ + input_dim,
+          tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
+
+  assert(tot_output_samp >= output_sample_offset_);
+
+  output->resize(tot_output_samp - output_sample_offset_);
+
+  // samp_out is the index into the total output signal, not just the part
+  // of it we are producing here.
+  for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp;
+       samp_out++) {
+    int64_t first_samp_in;
+    int32_t samp_out_wrapped;
+    GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
+    const std::vector<float> &weights = weights_[samp_out_wrapped];
+    // first_input_index is the first index into "input" that we have a weight
+    // for.
+    int32_t first_input_index =
+        static_cast<int32_t>(first_samp_in - input_sample_offset_);
+    float this_output;
+    if (first_input_index >= 0 &&
+        first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) {
+      this_output =
+          DotProduct(input + first_input_index, weights.data(), weights.size());
+    } else {  // Handle edge cases.
+      this_output = 0.0;
+      for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) {
+        float weight = weights[i];
+        int32_t input_index = first_input_index + i;
+        if (input_index < 0 &&
+            static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) {
+          this_output +=
+              weight * input_remainder_[input_remainder_.size() + input_index];
+        } else if (input_index >= 0 && input_index < input_dim) {
+          this_output += weight * input[input_index];
+        } else if (input_index >= input_dim) {
+          // We're past the end of the input and are adding zero; should only
+          // happen if the user specified flush == true, or else we would not
+          // be trying to output this sample.
+          assert(flush);
+        }
+      }
+    }
+    int32_t output_index =
+        static_cast<int32_t>(samp_out - output_sample_offset_);
+    (*output)[output_index] = this_output;
+  }
+
+  if (flush) {
+    Reset();  // Reset the internal state.
+  } else {
+    SetRemainder(input, input_dim);
+    input_sample_offset_ = tot_input_samp;
+    output_sample_offset_ = tot_output_samp;
+  }
+}
+
+int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp,
+                                            bool flush) const {
+  // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
+  // where tick_freq is the least common multiple of samp_rate_in_ and
+  // samp_rate_out_.
+  int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
+  int32_t ticks_per_input_period = tick_freq / samp_rate_in_;
+
+  // work out the number of ticks in the time interval
+  // [ 0, input_num_samp/samp_rate_in_ ).
+  int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period;
+  if (!flush) {
+    float window_width = num_zeros_ / (2.0 * filter_cutoff_);
+    // To count the window-width in ticks we take the floor.  This
+    // is because since we're looking for the largest integer num-out-samp
+    // that fits in the interval, which is open on the right, a reduction
+    // in interval length of less than a tick will never make a difference.
+    // For example, the largest integer in the interval [ 0, 2 ) and the
+    // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
+    // So when we're subtracting the window-width we can ignore the fractional
+    // part.
+    int32_t window_width_ticks = floor(window_width * tick_freq);
+    // The time-period of the output that we can sample gets reduced
+    // by the window-width (which is actually the distance from the
+    // center to the edge of the windowing function) if we're not
+    // "flushing the output".
+    interval_length_in_ticks -= window_width_ticks;
+  }
+  if (interval_length_in_ticks <= 0) return 0;
+
+  int32_t ticks_per_output_period = tick_freq / samp_rate_out_;
+  // Get the last output-sample in the closed interval, i.e. replacing [ ) with
+  // [ ].  Note: integer division rounds down.  See
+  // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
+  // the notation.
+  int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period;
+  // We need the last output-sample in the open interval, so if it takes us to
+  // the end of the interval exactly, subtract one.
+  if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
+    last_output_samp--;
+
+  // First output-sample index is zero, so the number of output samples
+  // is the last output-sample plus one.
+  int64_t num_output_samp = last_output_samp + 1;
+  return num_output_samp;
+}
+
+// inline
+void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in,
+                                int32_t *samp_out_wrapped) const {
+  // A unit is the smallest nonzero amount of time that is an exact
+  // multiple of the input and output sample periods.  The unit index
+  // is the answer to "which numbered unit we are in".
+  int64_t unit_index = samp_out / output_samples_in_unit_;
+  // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
+  *samp_out_wrapped =
+      static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_);
+  *first_samp_in =
+      first_index_[*samp_out_wrapped] + unit_index * input_samples_in_unit_;
+}
+
+void LinearResample::SetRemainder(const float *input, int32_t input_dim) {
+  std::vector<float> old_remainder(input_remainder_);
+  // max_remainder_needed is the width of the filter from side to side,
+  // measured in input samples.  you might think it should be half that,
+  // but you have to consider that you might be wanting to output samples
+  // that are "in the past" relative to the beginning of the latest
+  // input... anyway, storing more remainder than needed is not harmful.
+  int32_t max_remainder_needed =
+      ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_);
+  input_remainder_.resize(max_remainder_needed);
+  for (int32_t index = -static_cast<int32_t>(input_remainder_.size());
+       index < 0; index++) {
+    // we interpret "index" as an offset from the end of "input" and
+    // from the end of input_remainder_.
+    int32_t input_index = index + input_dim;
+    if (input_index >= 0) {
+      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
+          input[input_index];
+    } else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) {
+      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
+          old_remainder[input_index +
+                        static_cast<int32_t>(old_remainder.size())];
+      // else leave it at zero.
+    }
+  }
+}
--- a/funasr_local/runtime/onnxruntime/src/resample.h
+++ b/funasr_local/runtime/onnxruntime/src/resample.h
@@ -0,0 +1,137 @@
+/**
+ * Copyright     2013  Pegah Ghahremani
+ *               2014  IMSL, PKU-HKUST (author: Wei Shi)
+ *               2014  Yanqing Sun, Junjie Wang
+ *               2014  Johns Hopkins University (author: Daniel Povey)
+ * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// this file is copied and modified from
+// kaldi/src/feat/resample.h
+
+#include <cstdint>
+#include <vector>
+
+
+/*
+   We require that the input and output sampling rate be specified as
+   integers, as this is an easy way to specify that their ratio be rational.
+*/
+
+class LinearResample {
+ public:
+  /// Constructor.  We make the input and output sample rates integers, because
+  /// we are going to need to find a common divisor.  This should just remind
+  /// you that they need to be integers.  The filter cutoff needs to be less
+  /// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2.  num_zeros
+  /// controls the sharpness of the filter, more == sharper but less efficient.
+  /// We suggest around 4 to 10 for normal use.
+  LinearResample(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz,
+                 float filter_cutoff_hz, int32_t num_zeros);
+
+  /// Calling the function Reset() resets the state of the object prior to
+  /// processing a new signal; it is only necessary if you have called
+  /// Resample(x, x_size, false, y) for some signal, leading to a remainder of
+  /// the signal being called, but then abandon processing the signal before
+  /// calling Resample(x, x_size, true, y) for the last piece.  Call it
+  /// unnecessarily between signals will not do any harm.
+  void Reset();
+
+  /// This function does the resampling.  If you call it with flush == true and
+  /// you have never called it with flush == false, it just resamples the input
+  /// signal (it resizes the output to a suitable number of samples).
+  ///
+  /// You can also use this function to process a signal a piece at a time.
+  /// suppose you break it into piece1, piece2, ... pieceN.  You can call
+  /// \code{.cc}
+  /// Resample(piece1, piece1_size, false, &output1);
+  /// Resample(piece2, piece2_size, false, &output2);
+  /// Resample(piece3, piece3_size, true, &output3);
+  /// \endcode
+  /// If you call it with flush == false, it won't output the last few samples
+  /// but will remember them, so that if you later give it a second piece of
+  /// the input signal it can process it correctly.
+  /// If your most recent call to the object was with flush == false, it will
+  /// have internal state; you can remove this by calling Reset().
+  /// Empty input is acceptable.
+  void Resample(const float *input, int32_t input_dim, bool flush,
+                std::vector<float> *output);
+
+  //// Return the input and output sampling rates (for checks, for example)
+  int32_t GetInputSamplingRate() const { return samp_rate_in_; }
+  int32_t GetOutputSamplingRate() const { return samp_rate_out_; }
+
+ private:
+  void SetIndexesAndWeights();
+
+  float FilterFunc(float) const;
+
+  /// This function outputs the number of output samples we will output
+  /// for a signal with "input_num_samp" input samples.  If flush == true,
+  /// we return the largest n such that
+  /// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
+  /// and note that the interval is half-open.  If flush == false,
+  /// define window_width as num_zeros / (2.0 * filter_cutoff_);
+  /// we return the largest n such that (n/samp_rate_out_) is in the interval
+  /// [ 0, input_num_samp/samp_rate_in_ - window_width ).
+  int64_t GetNumOutputSamples(int64_t input_num_samp, bool flush) const;
+
+  /// Given an output-sample index, this function outputs to *first_samp_in the
+  /// first input-sample index that we have a weight on (may be negative),
+  /// and to *samp_out_wrapped the index into weights_ where we can get the
+  /// corresponding weights on the input.
+  inline void GetIndexes(int64_t samp_out, int64_t *first_samp_in,
+                         int32_t *samp_out_wrapped) const;
+
+  void SetRemainder(const float *input, int32_t input_dim);
+
+ private:
+  // The following variables are provided by the user.
+  int32_t samp_rate_in_;
+  int32_t samp_rate_out_;
+  float filter_cutoff_;
+  int32_t num_zeros_;
+
+  int32_t input_samples_in_unit_;  ///< The number of input samples in the
+                                   ///< smallest repeating unit: num_samp_in_ =
+                                   ///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
+                                   ///< samp_rate_out_hz)
+
+  int32_t output_samples_in_unit_;  ///< The number of output samples in the
+                                    ///< smallest repeating unit: num_samp_out_
+                                    ///< = samp_rate_out_hz /
+                                    ///< Gcd(samp_rate_in_hz, samp_rate_out_hz)
+
+  /// The first input-sample index that we sum over, for this output-sample
+  /// index.  May be negative; any truncation at the beginning is handled
+  /// separately.  This is just for the first few output samples, but we can
+  /// extrapolate the correct input-sample index for arbitrary output samples.
+  std::vector<int32_t> first_index_;
+
+  /// Weights on the input samples, for this output-sample index.
+  std::vector<std::vector<float>> weights_;
+
+  // the following variables keep track of where we are in a particular signal,
+  // if it is being provided over multiple calls to Resample().
+
+  int64_t input_sample_offset_;   ///< The number of input samples we have
+                                  ///< already received for this signal
+                                  ///< (including anything in remainder_)
+  int64_t output_sample_offset_;  ///< The number of samples we have already
+                                  ///< output for this signal.
+  std::vector<float> input_remainder_;  ///< A small trailing part of the
+                                        ///< previously seen input signal.
+};
--- a/funasr_local/runtime/onnxruntime/src/tensor.h
+++ b/funasr_local/runtime/onnxruntime/src/tensor.h
@@ -0,0 +1,155 @@
+#ifndef TENSOR_H
+#define TENSOR_H
+
+#include "alignedmem.h"
+
+using namespace std;
+
+template <typename T> class Tensor {
+  private:
+    void alloc_buff();
+    void free_buff();
+    int mem_size;
+
+  public:
+    T *buff;
+    int size[4];
+    int buff_size;
+    Tensor(Tensor<T> *in);
+    Tensor(int a);
+    Tensor(int a, int b);
+    Tensor(int a, int b, int c);
+    Tensor(int a, int b, int c, int d);
+    ~Tensor();
+    void zeros();
+    void shape();
+    void disp();
+    void dump(const char *mode);
+    void concat(Tensor<T> *din, int dim);
+    void resize(int a, int b, int c, int d);
+    void add(float coe, Tensor<T> *in);
+    void add(Tensor<T> *in);
+    void add(Tensor<T> *in1, Tensor<T> *in2);
+    void reload(Tensor<T> *in);
+};
+
+template <typename T> Tensor<T>::Tensor(int a) : size{1, 1, 1, a}
+{
+    alloc_buff();
+}
+
+template <typename T> Tensor<T>::Tensor(int a, int b) : size{1, 1, a, b}
+{
+    alloc_buff();
+}
+
+template <typename T> Tensor<T>::Tensor(int a, int b, int c) : size{1, a, b, c}
+{
+
+    alloc_buff();
+}
+
+template <typename T>
+Tensor<T>::Tensor(int a, int b, int c, int d) : size{a, b, c, d}
+{
+    alloc_buff();
+}
+
+template <typename T> Tensor<T>::Tensor(Tensor<T> *in)
+{
+    memcpy(size, in->size, 4 * sizeof(int));
+    alloc_buff();
+    memcpy(buff, in->buff, in->buff_size * sizeof(T));
+}
+
+template <typename T> Tensor<T>::~Tensor()
+{
+    free_buff();
+}
+
+template <typename T> void Tensor<T>::alloc_buff()
+{
+    buff_size = size[0] * size[1] * size[2] * size[3];
+    mem_size = buff_size;
+    buff = (T *)AlignedMalloc(32, buff_size * sizeof(T));
+}
+
+template <typename T> void Tensor<T>::free_buff()
+{
+    aligned_free(buff);
+}
+
+template <typename T> void Tensor<T>::zeros()
+{
+    memset(buff, 0, buff_size * sizeof(T));
+}
+
+template <typename T> void Tensor<T>::shape()
+{
+    printf("(%d,%d,%d,%d)\n", size[0], size[1], size[2], size[3]);
+}
+
+// TODO:: fix it!!!!
+template <typename T> void Tensor<T>::concat(Tensor<T> *din, int dim)
+{
+    memcpy(buff + buff_size, din->buff, din->buff_size * sizeof(T));
+    buff_size += din->buff_size;
+    size[dim] += din->size[dim];
+}
+
+// TODO:: fix it!!!!
+template <typename T> void Tensor<T>::resize(int a, int b, int c, int d)
+{
+    size[0] = a;
+    size[1] = b;
+    size[2] = c;
+    size[3] = d;
+    buff_size = size[0] * size[1] * size[2] * size[3];
+}
+
+template <typename T> void Tensor<T>::add(float coe, Tensor<T> *in)
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        buff[i] = buff[i] + coe * in->buff[i];
+    }
+}
+
+template <typename T> void Tensor<T>::add(Tensor<T> *in)
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        buff[i] = buff[i] + in->buff[i];
+    }
+}
+
+template <typename T> void Tensor<T>::add(Tensor<T> *in1, Tensor<T> *in2)
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        buff[i] = buff[i] + in1->buff[i] + in2->buff[i];
+    }
+}
+
+template <typename T> void Tensor<T>::reload(Tensor<T> *in)
+{
+    memcpy(buff, in->buff, in->buff_size * sizeof(T));
+}
+
+template <typename T> void Tensor<T>::disp()
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        cout << buff[i] << " ";
+    }
+    cout << endl;
+}
+
+template <typename T> void Tensor<T>::dump(const char *mode)
+{
+    FILE *fp;
+    fp = fopen("tmp.bin", mode);
+    fwrite(buff, 1, buff_size * sizeof(T), fp);
+    fclose(fp);
+}
+#endif
--- a/funasr_local/runtime/onnxruntime/src/tokenizer.cpp
+++ b/funasr_local/runtime/onnxruntime/src/tokenizer.cpp
@@ -0,0 +1,218 @@
+ /**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#include "precomp.h"
+
+CTokenizer::CTokenizer(const char* sz_yamlfile):m_ready(false)
+{
+	OpenYaml(sz_yamlfile);
+}
+
+CTokenizer::CTokenizer():m_ready(false)
+{
+}
+
+void CTokenizer::ReadYaml(const YAML::Node& node) 
+{
+	if (node.IsMap()) 
+	{//<2F><>map<61><70>
+		for (auto it = node.begin(); it != node.end(); ++it) 
+		{
+			ReadYaml(it->second);
+		}
+	}
+	if (node.IsSequence()) {//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+		for (size_t i = 0; i < node.size(); ++i) {
+			ReadYaml(node[i]);
+		}
+	}
+	if (node.IsScalar()) {//<2F>Ǳ<EFBFBD><C7B1><EFBFBD><EFBFBD><EFBFBD>
+		LOG(INFO) << node.as<string>();
+	}
+}
+
+bool CTokenizer::OpenYaml(const char* sz_yamlfile)
+{
+	YAML::Node m_Config;
+	try{
+		m_Config = YAML::LoadFile(sz_yamlfile);
+	}catch(exception const &e){
+        LOG(INFO) << "Error loading file, yaml file error or not exist.";
+        exit(-1);
+    }
+
+	try
+	{
+		auto Tokens = m_Config["token_list"];
+		if (Tokens.IsSequence())
+		{
+			for (size_t i = 0; i < Tokens.size(); ++i) 
+			{
+				if (Tokens[i].IsScalar())
+				{
+					m_id2token.push_back(Tokens[i].as<string>());
+					m_token2id.insert(make_pair<string, int>(Tokens[i].as<string>(), i));
+				}
+			}
+		}
+		auto Puncs = m_Config["punc_list"];
+		if (Puncs.IsSequence())
+		{
+			for (size_t i = 0; i < Puncs.size(); ++i)
+			{
+				if (Puncs[i].IsScalar())
+				{ 
+					m_id2punc.push_back(Puncs[i].as<string>());
+					m_punc2id.insert(make_pair<string, int>(Puncs[i].as<string>(), i));
+				}
+			}
+		}
+	}
+	catch (YAML::BadFile& e) {
+		LOG(ERROR) << "Read error!";
+		return  false;
+	}
+	m_ready = true;
+	return m_ready;
+}
+
+vector<string> CTokenizer::Id2String(vector<int> input)
+{
+	vector<string> result;
+	for (auto& item : input)
+	{
+		result.push_back(m_id2token[item]);
+	}
+	return result;
+}
+
+int CTokenizer::String2Id(string input)
+{
+	int nID = 0; // <blank>
+	if (m_token2id.find(input) != m_token2id.end())
+		nID=(m_token2id[input]);
+	else
+		nID=(m_token2id[UNK_CHAR]);
+	return nID;
+}
+
+vector<int> CTokenizer::String2Ids(vector<string> input)
+{
+	vector<int> result;
+	for (auto& item : input)
+	{	
+		transform(item.begin(), item.end(), item.begin(), ::tolower);
+		if (m_token2id.find(item) != m_token2id.end())
+			result.push_back(m_token2id[item]);
+		else
+			result.push_back(m_token2id[UNK_CHAR]);
+	}
+	return result;
+}
+
+vector<string> CTokenizer::Id2Punc(vector<int> input)
+{
+	vector<string> result;
+	for (auto& item : input)
+	{
+		result.push_back(m_id2punc[item]);
+	}
+	return result;
+}
+
+string CTokenizer::Id2Punc(int n_punc_id)
+{
+	return m_id2punc[n_punc_id];
+}
+
+vector<int> CTokenizer::Punc2Ids(vector<string> input)
+{
+	vector<int> result;
+	for (auto& item : input)
+	{
+		result.push_back(m_punc2id[item]);
+	}
+	return result;
+}
+
+vector<string> CTokenizer::SplitChineseString(const string & str_info)
+{
+	vector<string> list;
+	int strSize = str_info.size();
+	int i = 0;
+
+	while (i < strSize) {
+		int len = 1;
+		for (int j = 0; j < 6 && (str_info[i] & (0x80 >> j)); j++) {
+			len = j + 1;
+		}
+		list.push_back(str_info.substr(i, len));
+		i += len;
+	}
+	return list;
+}
+
+void CTokenizer::StrSplit(const string& str, const char split, vector<string>& res)
+{
+	if (str == "")
+	{
+		return;
+	}
+	string&& strs = str + split;
+	size_t pos = strs.find(split);
+
+	while (pos != string::npos)
+	{
+		res.emplace_back(strs.substr(0, pos));
+		strs = move(strs.substr(pos + 1, strs.size()));
+		pos = strs.find(split);
+	}
+}
+
+ void CTokenizer::Tokenize(const char* str_info, vector<string> & str_out, vector<int> & id_out)
+{
+	vector<string>  strList;
+	StrSplit(str_info,' ', strList);
+	string current_eng,current_chinese;
+	for (auto& item : strList)
+	{
+		current_eng = "";
+		current_chinese = "";
+		for (auto& ch : item)
+		{
+			if (!(ch& 0x80))
+			{ // Ӣ<><D3A2>
+				if (current_chinese.size() > 0)
+				{
+					// for utf-8 chinese
+					auto chineseList = SplitChineseString(current_chinese);
+					str_out.insert(str_out.end(), chineseList.begin(),chineseList.end());
+					current_chinese = "";
+				}
+				current_eng += ch;
+			}
+			else
+			{
+				if (current_eng.size() > 0)
+				{
+					str_out.push_back(current_eng);
+					current_eng = "";
+				}
+				current_chinese += ch;
+			}
+		}
+		if (current_chinese.size() > 0)
+		{
+			auto chineseList = SplitChineseString(current_chinese);
+			str_out.insert(str_out.end(), chineseList.begin(), chineseList.end());
+			current_chinese = "";
+		}
+		if (current_eng.size() > 0)
+		{
+			str_out.push_back(current_eng);
+		}
+	}
+	id_out= String2Ids(str_out);
+}
--- a/funasr_local/runtime/onnxruntime/src/tokenizer.h
+++ b/funasr_local/runtime/onnxruntime/src/tokenizer.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
+#pragma once
+#include <yaml-cpp/yaml.h>
+
+class CTokenizer {
+private:
+
+	bool  m_ready = false;
+	vector<string>   m_id2token,m_id2punc;
+	map<string, int>  m_token2id,m_punc2id;
+
+public:
+
+	CTokenizer(const char* sz_yamlfile);
+	CTokenizer();
+	bool OpenYaml(const char* sz_yamlfile);
+	void ReadYaml(const YAML::Node& node);
+	vector<string> Id2String(vector<int> input);
+	vector<int> String2Ids(vector<string> input);
+	int String2Id(string input);
+	vector<string> Id2Punc(vector<int> input);
+	string Id2Punc(int n_punc_id);
+	vector<int> Punc2Ids(vector<string> input);
+	vector<string> SplitChineseString(const string& str_info);
+	void StrSplit(const string& str, const char split, vector<string>& res);
+	void Tokenize(const char* str_info, vector<string>& str_out, vector<int>& id_out);
+
+};
--- a/funasr_local/runtime/onnxruntime/src/util.cpp
+++ b/funasr_local/runtime/onnxruntime/src/util.cpp
@@ -0,0 +1,180 @@
+
+#include "precomp.h"
+
+float *LoadParams(const char *filename)
+{
+
+    FILE *fp;
+    fp = fopen(filename, "rb");
+    fseek(fp, 0, SEEK_END);
+    uint32_t nFileLen = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+
+    float *params_addr = (float *)AlignedMalloc(32, nFileLen);
+    int n = fread(params_addr, 1, nFileLen, fp);
+    fclose(fp);
+
+    return params_addr;
+}
+
+int ValAlign(int val, int align)
+{
+    float tmp = ceil((float)val / (float)align) * (float)align;
+    return (int)tmp;
+}
+
+void DispParams(float *din, int size)
+{
+    int i;
+    for (i = 0; i < size; i++) {
+        printf("%f ", din[i]);
+    }
+    printf("\n");
+}
+void SaveDataFile(const char *filename, void *data, uint32_t len)
+{
+    FILE *fp;
+    fp = fopen(filename, "wb+");
+    fwrite(data, 1, len, fp);
+    fclose(fp);
+}
+
+void BasicNorm(Tensor<float> *&din, float norm)
+{
+
+    int Tmax = din->size[2];
+
+    int i, j;
+    for (i = 0; i < Tmax; i++) {
+        float sum = 0;
+        for (j = 0; j < 512; j++) {
+            int ii = i * 512 + j;
+            sum += din->buff[ii] * din->buff[ii];
+        }
+        float mean = sqrt(sum / 512 + norm);
+        for (j = 0; j < 512; j++) {
+            int ii = i * 512 + j;
+            din->buff[ii] = din->buff[ii] / mean;
+        }
+    }
+}
+
+void FindMax(float *din, int len, float &max_val, int &max_idx)
+{
+    int i;
+    max_val = -INFINITY;
+    max_idx = -1;
+    for (i = 0; i < len; i++) {
+        if (din[i] > max_val) {
+            max_val = din[i];
+            max_idx = i;
+        }
+    }
+}
+
+string PathAppend(const string &p1, const string &p2)
+{
+
+    char sep = '/';
+    string tmp = p1;
+
+#ifdef _WIN32
+    sep = '\\';
+#endif
+
+    if (p1[p1.length()-1] != sep) { // Need to add a
+        tmp += sep;               // path separator
+        return (tmp + p2);
+    } else
+        return (p1 + p2);
+}
+
+void Relu(Tensor<float> *din)
+{
+    int i;
+    for (i = 0; i < din->buff_size; i++) {
+        float val = din->buff[i];
+        din->buff[i] = val < 0 ? 0 : val;
+    }
+}
+
+void Swish(Tensor<float> *din)
+{
+    int i;
+    for (i = 0; i < din->buff_size; i++) {
+        float val = din->buff[i];
+        din->buff[i] = val / (1 + exp(-val));
+    }
+}
+
+void Sigmoid(Tensor<float> *din)
+{
+    int i;
+    for (i = 0; i < din->buff_size; i++) {
+        float val = din->buff[i];
+        din->buff[i] = 1 / (1 + exp(-val));
+    }
+}
+
+void DoubleSwish(Tensor<float> *din)
+{
+    int i;
+    for (i = 0; i < din->buff_size; i++) {
+        float val = din->buff[i];
+        din->buff[i] = val / (1 + exp(-val + 1));
+    }
+}
+
+void Softmax(float *din, int mask, int len)
+{
+    float *tmp = (float *)malloc(mask * sizeof(float));
+    int i;
+    float sum = 0;
+    float max = -INFINITY;
+
+    for (i = 0; i < mask; i++) {
+        max = max < din[i] ? din[i] : max;
+    }
+
+    for (i = 0; i < mask; i++) {
+        tmp[i] = exp(din[i] - max);
+        sum += tmp[i];
+    }
+    for (i = 0; i < mask; i++) {
+        din[i] = tmp[i] / sum;
+    }
+    free(tmp);
+    for (i = mask; i < len; i++) {
+        din[i] = 0;
+    }
+}
+
+void LogSoftmax(float *din, int len)
+{
+    float *tmp = (float *)malloc(len * sizeof(float));
+    int i;
+    float sum = 0;
+    for (i = 0; i < len; i++) {
+        tmp[i] = exp(din[i]);
+        sum += tmp[i];
+    }
+    for (i = 0; i < len; i++) {
+        din[i] = log(tmp[i] / sum);
+    }
+    free(tmp);
+}
+
+void Glu(Tensor<float> *din, Tensor<float> *dout)
+{
+    int mm = din->buff_size / 1024;
+    int i, j;
+    for (i = 0; i < mm; i++) {
+        for (j = 0; j < 512; j++) {
+            int in_off = i * 1024 + j;
+            int out_off = i * 512 + j;
+            float a = din->buff[in_off];
+            float b = din->buff[in_off + 512];
+            dout->buff[out_off] = a / (1 + exp(-b));
+        }
+    }
+}
--- a/funasr_local/runtime/onnxruntime/src/util.h
+++ b/funasr_local/runtime/onnxruntime/src/util.h
@@ -0,0 +1,30 @@
+
+
+#ifndef UTIL_H
+#define UTIL_H
+
+using namespace std;
+
+extern float *LoadParams(const char *filename);
+
+extern void SaveDataFile(const char *filename, void *data, uint32_t len);
+extern void Relu(Tensor<float> *din);
+extern void Swish(Tensor<float> *din);
+extern void Sigmoid(Tensor<float> *din);
+extern void DoubleSwish(Tensor<float> *din);
+
+extern void Softmax(float *din, int mask, int len);
+
+extern void LogSoftmax(float *din, int len);
+extern int ValAlign(int val, int align);
+extern void DispParams(float *din, int size);
+
+extern void BasicNorm(Tensor<float> *&din, float norm);
+
+extern void FindMax(float *din, int len, float &max_val, int &max_idx);
+
+extern void Glu(Tensor<float> *din, Tensor<float> *dout);
+
+string PathAppend(const string &p1, const string &p2);
+
+#endif
--- a/funasr_local/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr_local/runtime/onnxruntime/src/vocab.cpp
@@ -0,0 +1,153 @@
+#include "vocab.h"
+#include <yaml-cpp/yaml.h>
+#include <glog/logging.h>
+
+#include <fstream>
+#include <iostream>
+#include <list>
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+Vocab::Vocab(const char *filename)
+{
+    ifstream in(filename);
+    LoadVocabFromYaml(filename);
+}
+Vocab::~Vocab()
+{
+}
+
+void Vocab::LoadVocabFromYaml(const char* filename){
+    YAML::Node config;
+    try{
+        config = YAML::LoadFile(filename);
+    }catch(exception const &e){
+        LOG(INFO) << "Error loading file, yaml file error or not exist.";
+        exit(-1);
+    }
+    YAML::Node myList = config["token_list"];
+    for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
+        vocab.push_back(it->as<string>());
+    }
+}
+
+string Vocab::Vector2String(vector<int> in)
+{
+    int i;
+    stringstream ss;
+    for (auto it = in.begin(); it != in.end(); it++) {
+        ss << vocab[*it];
+    }
+    return ss.str();
+}
+
+int Str2Int(string str)
+{
+    const char *ch_array = str.c_str();
+    if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
+        ((ch_array[2] & 0xc0) != 0x80))
+        return 0;
+    int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
+              (ch_array[2] & 0x3f);
+    return val;
+}
+
+bool Vocab::IsChinese(string ch)
+{
+    if (ch.size() != 3) {
+        return false;
+    }
+    int unicode = Str2Int(ch);
+    if (unicode >= 19968 && unicode <= 40959) {
+        return true;
+    }
+    return false;
+}
+
+string Vocab::Vector2StringV2(vector<int> in)
+{
+    int i;
+    list<string> words;
+    int is_pre_english = false;
+    int pre_english_len = 0;
+    int is_combining = false;
+    string combine = "";
+
+    for (auto it = in.begin(); it != in.end(); it++) {
+        string word = vocab[*it];
+        // step1 space character skips
+        if (word == "<s>" || word == "</s>" || word == "<unk>")
+            continue;
+        // step2 combie phoneme to full word
+        {
+            int sub_word = !(word.find("@@") == string::npos);
+            // process word start and middle part
+            if (sub_word) {
+                combine += word.erase(word.length() - 2);
+                is_combining = true;
+                continue;
+            }
+            // process word end part
+            else if (is_combining) {
+                combine += word;
+                is_combining = false;
+                word = combine;
+                combine = "";
+            }
+        }
+
+        // step3 process english word deal with space , turn abbreviation to upper case
+        {
+            // input word is chinese, not need process 
+            if (IsChinese(word)) {
+                words.push_back(word);
+                is_pre_english = false;
+            }
+            // input word is english word
+            else {
+                // pre word is chinese
+                if (!is_pre_english) {
+                    word[0] = word[0] - 32;
+                    words.push_back(word);
+                    pre_english_len = word.size();
+
+                }
+                // pre word is english word
+                else {
+                    // single letter turn to upper case
+                    if (word.size() == 1) {
+                        word[0] = word[0] - 32;
+                    }
+
+                    if (pre_english_len > 1) {
+                        words.push_back(" ");
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    } 
+                    else {
+                        if (word.size() > 1) {
+                            words.push_back(" ");
+                        }
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    }
+                }
+                is_pre_english = true;
+            }
+        }
+    }
+
+    stringstream ss;
+    for (auto it = words.begin(); it != words.end(); it++) {
+        ss << *it;
+    }
+
+    return ss.str();
+}
+
+int Vocab::Size()
+{
+    return vocab.size();
+}
--- a/funasr_local/runtime/onnxruntime/src/vocab.h
+++ b/funasr_local/runtime/onnxruntime/src/vocab.h
@@ -0,0 +1,25 @@
+
+#ifndef VOCAB_H
+#define VOCAB_H
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+using namespace std;
+
+class Vocab {
+  private:
+    vector<string> vocab;
+    bool IsChinese(string ch);
+    bool IsEnglish(string ch);
+    void LoadVocabFromYaml(const char* filename);
+
+  public:
+    Vocab(const char *filename);
+    ~Vocab();
+    int Size();
+    string Vector2String(vector<int> in);
+    string Vector2StringV2(vector<int> in);
+};
+
+#endif