@@ -260,7 +260,7 @@ def inference(self, input, input_len=None, model=None, kwargs=None, key=None, **
260
260
time_escape_total += time_escape
261
261
262
262
if pbar :
263
- pbar .update (1 )
263
+ # pbar.update(1)
264
264
pbar .set_description (f"rtf_avg: { time_escape_total / time_speech_total :0.3f} " )
265
265
torch .cuda .empty_cache ()
266
266
return asr_result_list
@@ -285,10 +285,10 @@ def inference_with_vad(self, input, input_len=None, **cfg):
285
285
286
286
key_list , data_list = prepare_data_iterator (input , input_len = input_len , data_type = kwargs .get ("data_type" , None ))
287
287
results_ret_list = []
288
- time_speech_total_all_samples = 0.0
288
+ time_speech_total_all_samples = 1e-6
289
289
290
290
beg_total = time .time ()
291
- pbar_total = tqdm (colour = "red" , total = len (res ) + 1 , dynamic_ncols = True )
291
+ pbar_total = tqdm (colour = "red" , total = len (res ), dynamic_ncols = True )
292
292
for i in range (len (res )):
293
293
key = res [i ]["key" ]
294
294
vadsegments = res [i ]["value" ]
@@ -310,14 +310,14 @@ def inference_with_vad(self, input, input_len=None, **cfg):
310
310
batch_size_ms_cum = 0
311
311
beg_idx = 0
312
312
beg_asr_total = time .time ()
313
- time_speech_total_per_sample = speech_lengths / 16000 + 1e-6
313
+ time_speech_total_per_sample = speech_lengths / 16000
314
314
time_speech_total_all_samples += time_speech_total_per_sample
315
315
316
- pbar_sample = tqdm (colour = "blue" , total = n + 1 , dynamic_ncols = True )
316
+ # pbar_sample = tqdm(colour="blue", total=n, dynamic_ncols=True)
317
317
318
318
all_segments = []
319
319
for j , _ in enumerate (range (0 , n )):
320
- pbar_sample .update (1 )
320
+ # pbar_sample.update(1)
321
321
batch_size_ms_cum += (sorted_data [j ][0 ][1 ] - sorted_data [j ][0 ][0 ])
322
322
if j < n - 1 and (
323
323
batch_size_ms_cum + sorted_data [j + 1 ][0 ][1 ] - sorted_data [j + 1 ][0 ][0 ]) < batch_size and (
@@ -336,19 +336,19 @@ def inference_with_vad(self, input, input_len=None, **cfg):
336
336
segments = sv_chunk (vad_segments )
337
337
all_segments .extend (segments )
338
338
speech_b = [i [2 ] for i in segments ]
339
- spk_res = self .inference (speech_b , input_len = None , model = self .spk_model , kwargs = kwargs , ** cfg )
339
+ spk_res = self .inference (speech_b , input_len = None , model = self .spk_model , kwargs = kwargs , disable_pbar = True , ** cfg )
340
340
results [_b ]['spk_embedding' ] = spk_res [0 ]['spk_embedding' ]
341
341
beg_idx = end_idx
342
342
if len (results ) < 1 :
343
343
continue
344
344
results_sorted .extend (results )
345
345
346
- end_asr_total = time .time ()
347
- time_escape_total_per_sample = end_asr_total - beg_asr_total
348
- pbar_sample .update (1 )
349
- pbar_sample .set_description (f"rtf_avg_per_sample: { time_escape_total_per_sample / time_speech_total_per_sample :0.3f} , "
350
- f"time_speech_total_per_sample: { time_speech_total_per_sample : 0.3f} , "
351
- f"time_escape_total_per_sample: { time_escape_total_per_sample :0.3f} " )
346
+ # end_asr_total = time.time()
347
+ # time_escape_total_per_sample = end_asr_total - beg_asr_total
348
+ # pbar_sample.update(1)
349
+ # pbar_sample.set_description(f"rtf_avg_per_sample: {time_escape_total_per_sample / time_speech_total_per_sample:0.3f}, "
350
+ # f"time_speech_total_per_sample: {time_speech_total_per_sample: 0.3f}, "
351
+ # f"time_escape_total_per_sample: {time_escape_total_per_sample:0.3f}")
352
352
353
353
restored_data = [0 ] * n
354
354
for j in range (n ):
@@ -386,7 +386,7 @@ def inference_with_vad(self, input, input_len=None, **cfg):
386
386
# step.3 compute punc model
387
387
if self .punc_model is not None :
388
388
self .punc_kwargs .update (cfg )
389
- punc_res = self .inference (result ["text" ], model = self .punc_model , kwargs = self .punc_kwargs , ** cfg )
389
+ punc_res = self .inference (result ["text" ], model = self .punc_model , kwargs = self .punc_kwargs , disable_pbar = True , ** cfg )
390
390
import copy ; raw_text = copy .copy (result ["text" ])
391
391
result ["text" ] = punc_res [0 ]["text" ]
392
392
@@ -418,13 +418,18 @@ def inference_with_vad(self, input, input_len=None, **cfg):
418
418
419
419
result ["key" ] = key
420
420
results_ret_list .append (result )
421
+ end_asr_total = time .time ()
422
+ time_escape_total_per_sample = end_asr_total - beg_asr_total
421
423
pbar_total .update (1 )
422
-
423
- pbar_total .update (1 )
424
+ pbar_total .set_description (f"rtf_avg: { time_escape_total_per_sample / time_speech_total_per_sample :0.3f} , "
425
+ f"time_speech: { time_speech_total_per_sample : 0.3f} , "
426
+ f"time_escape: { time_escape_total_per_sample :0.3f} " )
427
+
428
+
424
429
end_total = time .time ()
425
430
time_escape_total_all_samples = end_total - beg_total
426
- pbar_total . set_description (f"rtf_avg_all_samples : { time_escape_total_all_samples / time_speech_total_all_samples :0.3f} , "
427
- f"time_speech_total_all_samples : { time_speech_total_all_samples : 0.3f} , "
428
- f"time_escape_total_all_samples : { time_escape_total_all_samples :0.3f} " )
431
+ print (f"rtf_avg_all : { time_escape_total_all_samples / time_speech_total_all_samples :0.3f} , "
432
+ f"time_speech_all : { time_speech_total_all_samples : 0.3f} , "
433
+ f"time_escape_all : { time_escape_total_all_samples :0.3f} " )
429
434
return results_ret_list
430
435
0 commit comments