Skip to content

Commit

Permalink
skip transcription if no speech is found
Browse files Browse the repository at this point in the history
  • Loading branch information
MahmoudAshraf97 committed Aug 26, 2024
1 parent 54a5ed2 commit 64852b5
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions faster_whisper/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,9 @@ def transcribe(
all_language_probs,
) = self.get_language_and_tokenizer(audio, task, language)

duration_after_vad = sum(
(segment["end"] - segment["start"]) / sampling_rate
for segment in clip_timestamps
duration_after_vad = (
sum((segment["end"] - segment["start"]) for segment in clip_timestamps)
/ sampling_rate
)

# batched options: see the difference with default options in WhisperModel
Expand Down Expand Up @@ -438,13 +438,17 @@ def transcribe(
to_cpu = (
self.model.model.device == "cuda" and len(self.model.model.device_index) > 1
)
features = torch.stack(
[
self.model.feature_extractor(chunk, to_cpu=to_cpu)[
..., : self.model.feature_extractor.nb_max_frames
features = (
torch.stack(
[
self.model.feature_extractor(chunk, to_cpu=to_cpu)[
..., : self.model.feature_extractor.nb_max_frames
]
for chunk in audio_chunks
]
for chunk in audio_chunks
]
)
if duration_after_vad
else []
)

segments = self._batched_segments_generator(
Expand Down

0 comments on commit 64852b5

Please sign in to comment.