diff --git a/README.md b/README.md index 18d02c3bc..9f345533a 100644 --- a/README.md +++ b/README.md @@ -144,7 +144,7 @@ for i in range(total_chunk_num): ``` Note: `chunk_size` is the configuration for streaming latency.` [0,10,5]` indicates that the real-time display granularity is `10*60=600ms`, and the lookahead information is `5*60=300ms`. Each inference input is `600ms` (sample points are `16000*0.6=960`), and the output is the corresponding text. For the last speech segment input, `is_final=True` needs to be set to force the output of the last word. -### Voice Activity Detection (streaming) +### Voice Activity Detection (Non-Streaming) ```python from funasr import AutoModel @@ -153,7 +153,7 @@ wav_file = f"{model.model_path}/example/asr_example.wav" res = model.generate(input=wav_file) print(res) ``` -### Voice Activity Detection (Non-streaming) +### Voice Activity Detection (Streaming) ```python from funasr import AutoModel diff --git a/examples/industrial_data_pretraining/seaco_paraformer/demo.py b/examples/industrial_data_pretraining/seaco_paraformer/demo.py index cc292c2db..065b698a3 100644 --- a/examples/industrial_data_pretraining/seaco_paraformer/demo.py +++ b/examples/industrial_data_pretraining/seaco_paraformer/demo.py @@ -17,6 +17,6 @@ res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav", hotword='达摩院 魔搭', - # sentence_timestamp=True, + # sentence_timestamp=True, # return sentence level information when spk_model is not given ) print(res) \ No newline at end of file diff --git a/funasr/models/campplus/utils.py b/funasr/models/campplus/utils.py index c81cb7ed5..935c674f9 100644 --- a/funasr/models/campplus/utils.py +++ b/funasr/models/campplus/utils.py @@ -212,7 +212,7 @@ def distribute_spk(sentence_list, sd_time_list): if overlap > max_overlap: max_overlap = overlap sentence_spk = spk - d['spk'] = sentence_spk + d['spk'] = int(sentence_spk) sd_sentence_list.append(d) return sd_sentence_list