thewh1teagle · thewh1teagle · Mar 23, 2025 · Mar 19, 2025 · Mar 19, 2025 · Mar 23, 2025
diff --git a/crates/sherpa-rs/Cargo.toml b/crates/sherpa-rs/Cargo.toml
@@ -102,3 +102,11 @@ path = "../../examples/whisper.rs"
 [[example]]
 name = "moonshine"
 path = "../../examples/moonshine.rs"
+
+[[example]]
+name = "sense_voice"
+path = "../../examples/sense_voice.rs"
+
+[[example]]
+name = "paraformer"
+path = "../../examples/paraformer.rs"
diff --git a/crates/sherpa-rs/src/lib.rs b/crates/sherpa-rs/src/lib.rs
@@ -4,7 +4,9 @@ pub mod embedding_manager;
 pub mod keyword_spot;
 pub mod language_id;
 pub mod moonshine;
+pub mod paraformer;
 pub mod punctuate;
+pub mod sense_voice;
 pub mod speaker_id;
 pub mod vad;
 pub mod whisper;

diff --git a/crates/sherpa-rs/src/paraformer.rs b/crates/sherpa-rs/src/paraformer.rs
@@ -0,0 +1,155 @@
+use crate::{get_default_provider, utils::cstring_from_str};
+use eyre::{bail, Result};
+use std::ptr::null;
+
+#[derive(Debug)]
+pub struct ParaformerRecognizer {
+    recognizer: *const sherpa_rs_sys::SherpaOnnxOfflineRecognizer,
+}
+
+pub type ParaformerRecognizerResult = super::OfflineRecognizerResult;
+
+#[derive(Debug, Clone)]
+pub struct ParaformerConfig {
+    pub model: String,
+    pub tokens: String,
+    pub provider: Option<String>,
+    pub num_threads: Option<i32>,
+    pub debug: bool,
+}
+
+impl Default for ParaformerConfig {
+    fn default() -> Self {
+        Self {
+            model: String::new(),
+            tokens: String::new(),
+            debug: false,
+            provider: None,
+            num_threads: Some(1),
+        }
+    }
+}
+
+impl ParaformerRecognizer {
+    pub fn new(config: ParaformerConfig) -> Result<Self> {
+        let debug = config.debug.into();
+        let provider = config.provider.unwrap_or(get_default_provider());
+
+        // Prepare C strings
+        let provider_ptr = cstring_from_str(&provider);
+        let model_ptr = cstring_from_str(&config.model);
+        let tokens_ptr = cstring_from_str(&config.tokens);
+
+        // 创建 decoding_method 的 CString 对象并绑定到变量
+        let decoding_method_ptr = cstring_from_str("greedy_search");
+
+        // Paraformer model config
+        let paraformer_config = sherpa_rs_sys::SherpaOnnxOfflineParaformerModelConfig {
+            model: model_ptr.as_ptr(),
+        };
+
+        // Offline model config
+        let model_config = sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
+            debug,
+            num_threads: config.num_threads.unwrap_or(1),
+            provider: provider_ptr.as_ptr(),
+            tokens: tokens_ptr.as_ptr(),
+            paraformer: paraformer_config,
+
+            // Null other model types
+            bpe_vocab: null(),
+            model_type: null(),
+            modeling_unit: null(),
+            nemo_ctc: sherpa_rs_sys::SherpaOnnxOfflineNemoEncDecCtcModelConfig { model: null() },
+            tdnn: sherpa_rs_sys::SherpaOnnxOfflineTdnnModelConfig { model: null() },
+            telespeech_ctc: null(),
+            fire_red_asr: sherpa_rs_sys::SherpaOnnxOfflineFireRedAsrModelConfig {
+                encoder: null(),
+                decoder: null(),
+            },
+            transducer: sherpa_rs_sys::SherpaOnnxOfflineTransducerModelConfig {
+                encoder: null(),
+                decoder: null(),
+                joiner: null(),
+            },
+            whisper: sherpa_rs_sys::SherpaOnnxOfflineWhisperModelConfig {
+                encoder: null(),
+                decoder: null(),
+                language: null(),
+                task: null(),
+                tail_paddings: 0,
+            },
+            sense_voice: sherpa_rs_sys::SherpaOnnxOfflineSenseVoiceModelConfig {
+                model: null(),
+                language: null(),
+                use_itn: 0,
+            },
+            moonshine: sherpa_rs_sys::SherpaOnnxOfflineMoonshineModelConfig {
+                preprocessor: null(),
+                encoder: null(),
+                uncached_decoder: null(),
+                cached_decoder: null(),
+            },
+        };
+
+        // Recognizer config
+        let recognizer_config = sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {
+            decoding_method: decoding_method_ptr.as_ptr(),
+            feat_config: sherpa_rs_sys::SherpaOnnxFeatureConfig {
+                sample_rate: 16000,
+                feature_dim: 80,
+            },
+            model_config,
+            hotwords_file: null(),
+            hotwords_score: 0.0,
+            lm_config: sherpa_rs_sys::SherpaOnnxOfflineLMConfig {
+                model: null(),
+                scale: 0.0,
+            },
+            max_active_paths: 0,
+            rule_fars: null(),
+            rule_fsts: null(),
+            blank_penalty: 0.0,
+        };
+
+        let recognizer =
+            unsafe { sherpa_rs_sys::SherpaOnnxCreateOfflineRecognizer(&recognizer_config) };
+        if recognizer.is_null() {
+            bail!("Failed to create Paraformer recognizer");
+        }
+
+        Ok(Self { recognizer })
+    }
+
+    pub fn transcribe(&mut self, sample_rate: u32, samples: &[f32]) -> ParaformerRecognizerResult {
+        unsafe {
+            let stream = sherpa_rs_sys::SherpaOnnxCreateOfflineStream(self.recognizer);
+            sherpa_rs_sys::SherpaOnnxAcceptWaveformOffline(
+                stream,
+                sample_rate as i32,
+                samples.as_ptr(),
+                samples.len() as i32,
+            );
+            sherpa_rs_sys::SherpaOnnxDecodeOfflineStream(self.recognizer, stream);
+            let result_ptr = sherpa_rs_sys::SherpaOnnxGetOfflineStreamResult(stream);
+            let raw_result = result_ptr.read();
+            let result = ParaformerRecognizerResult::new(&raw_result);
+
+            sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizerResult(result_ptr);
+            sherpa_rs_sys::SherpaOnnxDestroyOfflineStream(stream);
+
+            result
+        }
+    }
+}
+
+unsafe impl Send for ParaformerRecognizer {}
+unsafe impl Sync for ParaformerRecognizer {}
+
+impl Drop for ParaformerRecognizer {
+    fn drop(&mut self) {
+        unsafe {
+            sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizer(self.recognizer);
+        }
+    }
+}
diff --git a/crates/sherpa-rs/src/sense_voice.rs b/crates/sherpa-rs/src/sense_voice.rs
@@ -0,0 +1,153 @@
+use crate::{get_default_provider, utils::cstring_from_str};
+use eyre::{bail, Result};
+use std::ptr::null;
+
+#[derive(Debug)]
+pub struct SenseVoiceRecognizer {
+    recognizer: *const sherpa_rs_sys::SherpaOnnxOfflineRecognizer,
+}
+
+pub type SenseVoiceRecognizerResult = super::OfflineRecognizerResult;
+
+#[derive(Debug, Clone)]
+pub struct SenseVoiceConfig {
+    pub model: String,
+    pub language: String,
+    pub use_itn: bool,
+    pub provider: Option<String>,
+    pub num_threads: Option<i32>,
+    pub debug: bool,
+    pub tokens: String,
+}
+
+impl Default for SenseVoiceConfig {
+    fn default() -> Self {
+        Self {
+            model: String::new(),
+            language: "auto".into(),
+            use_itn: true,
+            provider: None,
+            num_threads: Some(1),
+            debug: false,
+            tokens: String::new(),
+        }
+    }
+}
+
+impl SenseVoiceRecognizer {
+    pub fn new(config: SenseVoiceConfig) -> Result<Self> {
+        let debug = config.debug.into();
+        let provider = config.provider.unwrap_or(get_default_provider());
+        let provider_ptr = cstring_from_str(&provider);
+        let num_threads = config.num_threads.unwrap_or(1);
+
+        // SenseVoice specific config
+        let model_ptr = cstring_from_str(&config.model);
+        let language_ptr = cstring_from_str(&config.language);
+        let use_itn = if config.use_itn { 1 } else { 0 };
+
+        let sense_voice_config = sherpa_rs_sys::SherpaOnnxOfflineSenseVoiceModelConfig {
+            model: model_ptr.as_ptr(),
+            language: language_ptr.as_ptr(),
+            use_itn,
+        };
+
+        // General model config
+        let tokens_ptr = cstring_from_str(&config.tokens);
+        let model_config = sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
+            tokens: tokens_ptr.as_ptr(),
+            provider: provider_ptr.as_ptr(),
+            num_threads,
+            debug,
+            sense_voice: sense_voice_config,
+            // Other fields set to default/null
+            bpe_vocab: null(),
+            model_type: null(),
+            modeling_unit: null(),
+            nemo_ctc: sherpa_rs_sys::SherpaOnnxOfflineNemoEncDecCtcModelConfig { model: null() },
+            paraformer: sherpa_rs_sys::SherpaOnnxOfflineParaformerModelConfig { model: null() },
+            tdnn: sherpa_rs_sys::SherpaOnnxOfflineTdnnModelConfig { model: null() },
+            telespeech_ctc: null(),
+            fire_red_asr: sherpa_rs_sys::SherpaOnnxOfflineFireRedAsrModelConfig {
+                encoder: null(),
+                decoder: null(),
+            },
+            transducer: sherpa_rs_sys::SherpaOnnxOfflineTransducerModelConfig {
+                encoder: null(),
+                decoder: null(),
+                joiner: null(),
+            },
+            whisper: sherpa_rs_sys::SherpaOnnxOfflineWhisperModelConfig {
+                encoder: null(),
+                decoder: null(),
+                language: null(),
+                task: null(),
+                tail_paddings: 0,
+            },
+            moonshine: sherpa_rs_sys::SherpaOnnxOfflineMoonshineModelConfig {
+                preprocessor: null(),
+                encoder: null(),
+                uncached_decoder: null(),
+                cached_decoder: null(),
+            },
+        };
+
+        // Recognizer config
+        let config = sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {
+            decoding_method: null(),
+            feat_config: sherpa_rs_sys::SherpaOnnxFeatureConfig {
+                sample_rate: 16000,
+                feature_dim: 80,
+            },
+            hotwords_file: null(),
+            hotwords_score: 0.0,
+            lm_config: sherpa_rs_sys::SherpaOnnxOfflineLMConfig {
+                model: null(),
+                scale: 0.0,
+            },
+            max_active_paths: 0,
+            model_config,
+            rule_fars: null(),
+            rule_fsts: null(),
+            blank_penalty: 0.0,
+        };
+
+        let recognizer = unsafe { sherpa_rs_sys::SherpaOnnxCreateOfflineRecognizer(&config) };
+        if recognizer.is_null() {
+            bail!("Failed to create recognizer");
+        }
+
+        Ok(Self { recognizer })
+    }
+
+    pub fn transcribe(&mut self, sample_rate: u32, samples: &[f32]) -> SenseVoiceRecognizerResult {
+        unsafe {
+            let stream = sherpa_rs_sys::SherpaOnnxCreateOfflineStream(self.recognizer);
+            sherpa_rs_sys::SherpaOnnxAcceptWaveformOffline(
+                stream,
+                sample_rate as i32,
+                samples.as_ptr(),
+                samples.len().try_into().unwrap(),
+            );
+            sherpa_rs_sys::SherpaOnnxDecodeOfflineStream(self.recognizer, stream);
+            let result_ptr = sherpa_rs_sys::SherpaOnnxGetOfflineStreamResult(stream);
+            let raw_result = result_ptr.read();
+            let result = SenseVoiceRecognizerResult::new(&raw_result);
+            // Free resources
+            sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizerResult(result_ptr);
+            sherpa_rs_sys::SherpaOnnxDestroyOfflineStream(stream);
+            result
+        }
+    }
+}
+
+unsafe impl Send for SenseVoiceRecognizer {}
+unsafe impl Sync for SenseVoiceRecognizer {}
+
+impl Drop for SenseVoiceRecognizer {
+    fn drop(&mut self) {
+        unsafe {
+            sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizer(self.recognizer);
+        }
+    }
+}
diff --git a/crates/sherpa-rs/src/utils.rs b/crates/sherpa-rs/src/utils.rs
@@ -1,4 +1,4 @@
-use std::ffi::{ c_char, CString };
+use std::ffi::{c_char, CString};
 
 pub fn cstring_from_str(s: &str) -> CString {
     CString::new(s).expect("CString::new failed")

diff --git a/examples/paraformer.rs b/examples/paraformer.rs
@@ -0,0 +1,35 @@
+/*
+Transcribe wav file using SenseVoice
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2024-03-09.tar.bz2
+tar xvf sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2
+wget https://github.com/thewh1teagle/sherpa-rs/releases/download/v0.1.0/motivation.wav -O motivation.wav
+cargo run --example paraformer motivation.wav
+*/
+
+use sherpa_rs::{
+    paraformer::{ParaformerConfig, ParaformerRecognizer},
+    read_audio_file,
+};
+
+fn main() {
+    let path = std::env::args().nth(1).expect("Missing file path argument");
+    let provider = std::env::args().nth(2).unwrap_or("cpu".into());
+    let (samples, sample_rate) = read_audio_file(&path).unwrap();
+    assert_eq!(sample_rate, 16000, "The sample rate must be 16000.");
+
+    let config = ParaformerConfig {
+        model: "sherpa-onnx-paraformer-zh-2024-03-09/model.int8.onnx".into(),
+        tokens: "sherpa-onnx-paraformer-zh-2024-03-09/tokens.txt".into(),
+        provider: Some(provider),
+
+        ..Default::default()
+    };
+
+    let mut recognizer: ParaformerRecognizer = ParaformerRecognizer::new(config).unwrap();
+
+    let start_t = std::time::Instant::now();
+    let result = recognizer.transcribe(sample_rate, &samples);
+    println!("✅ Text: {}", result.text);
+    println!("⏱️ Time taken for transcription: {:?}", start_t.elapsed());
+}