diff --git a/requirements.txt b/requirements.txt index 75066bb96..00c97cd5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ WeTextProcessing; sys_platform == 'linux' nemo_text_processing; sys_platform == 'linux' av pydub +torchvision diff --git a/tools/audio/av.py b/tools/audio/av.py index 333b423d6..cd3a7d66a 100644 --- a/tools/audio/av.py +++ b/tools/audio/av.py @@ -41,11 +41,11 @@ def wav2(i: BytesIO, o: BufferedWriter, format: str): def load_audio( - file: Union[str, BytesIO, Path], - sr: Optional[int] = None, - format: Optional[str] = None, - mono=True, - ) -> Union[np.ndarray, Tuple[np.ndarray, int]]: + file: Union[str, BytesIO, Path], + sr: Optional[int] = None, + format: Optional[str] = None, + mono=True, +) -> Union[np.ndarray, Tuple[np.ndarray, int]]: """ https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/blob/412a9950a1e371a018c381d1bfb8579c4b0de329/infer/lib/audio.py#L39 """ @@ -113,7 +113,7 @@ def frame_iter(container): np.copyto(decoded_audio[..., offset:end_index], frame_data) offset += len(frame_data[0]) - + container.close() # Truncate the array to the actual size @@ -124,4 +124,4 @@ def frame_iter(container): if sr is not None: return decoded_audio - return decoded_audio, rate \ No newline at end of file + return decoded_audio, rate