Skip to content

Commit

Permalink
Merge pull request #182 from hbredin/chore/remove-librosa
Browse files Browse the repository at this point in the history
chore: remove librosa dependency in favor of torchaudio
  • Loading branch information
iver56 authored Jan 15, 2025
2 parents 2b39513 + 9a68af9 commit bb90cf9
Show file tree
Hide file tree
Showing 13 changed files with 33 additions and 104 deletions.
1 change: 0 additions & 1 deletion .github/workflows/test_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ torch==1.11.0
torchaudio==0.11.0
audioread>=2.1.8
julius>=0.2.3,<0.3
librosa==0.9.1
py-cpuinfo>=7.0.0
pytest==5.3.4
pytest-cov==2.8.1
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ classification. It was successfully applied in the paper

* Add new transforms: `Mix`, `Padding`, `RandomCrop` and `SpliceOut`

### Changes

* Remove `librosa` dependency in favor of `torchaudio`

## [v0.11.2] - 2025-01-09

### Fixed
Expand Down
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ dependencies:
- black==23.12.1
- coverage==5.3
- julius>=0.2.3,<0.3
- librosa==0.9.1
- pandas==1.1.4
- py-cpuinfo==7.0.0
- pytest==7.4.4
Expand Down
18 changes: 9 additions & 9 deletions scripts/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import time
from pathlib import Path

import librosa
import numpy as np
import torch
from scipy.io import wavfile
Expand All @@ -30,6 +29,7 @@
from torch_audiomentations.augmentations.splice_out import SpliceOut
from torch_audiomentations.core.transforms_interface import ModeNotSupportedException
from torch_audiomentations.utils.object_dict import ObjectDict
from torch_audiomentations.utils.io import Audio

SAMPLE_RATE = 44100

Expand Down Expand Up @@ -88,14 +88,14 @@ def __exit__(self, type, value, traceback):
random.seed(43)

filenames = ["perfect-alley1.ogg", "perfect-alley2.ogg"]
samples1, _ = librosa.load(
os.path.join(TEST_FIXTURES_DIR, filenames[0]), sr=SAMPLE_RATE, mono=False
)
samples2, _ = librosa.load(
os.path.join(TEST_FIXTURES_DIR, filenames[1]), sr=SAMPLE_RATE, mono=False
)
samples = np.stack((samples1, samples2), axis=0)
samples = torch.from_numpy(samples)
audio = Audio(SAMPLE_RATE, mono=True)
samples1 = audio(os.path.join(TEST_FIXTURES_DIR, filenames[0]))
_, num_samples1 = samples1.shape
samples2 = audio(os.path.join(TEST_FIXTURES_DIR, filenames[1]))
_, num_samples2 = samples2.shape
num_samples = min(num_samples1, num_samples2)
samples = torch.stack([samples1[:, :num_samples], samples2[:, :num_samples]], dim=0)


modes = ["per_batch", "per_example", "per_channel"]
for mode in modes:
Expand Down
9 changes: 4 additions & 5 deletions scripts/measure_convolve_execution_time.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import librosa
import numpy as np
import torch
from cpuinfo import get_cpu_info
Expand All @@ -8,14 +7,14 @@
from scripts.demo import TEST_FIXTURES_DIR, timer
from scripts.plot import show_horizontal_bar_chart
from torch_audiomentations.utils.convolution import convolve as torch_convolve
from torch_audiomentations.utils.io import Audio

if __name__ == "__main__":
file_path = TEST_FIXTURES_DIR / "acoustic_guitar_0.wav"
sample_rate = 48000
samples, _ = librosa.load(file_path, sr=sample_rate)
ir_samples, _ = librosa.load(
TEST_FIXTURES_DIR / "ir" / "impulse_response_0.wav", sr=sample_rate
)
audio = Audio(sample_rate, mono=True)
samples = audio(file_path).numpy()
ir_samples = audio(TEST_FIXTURES_DIR / "ir" / "impulse_response_0.wav").numpy()

is_cuda_available = torch.cuda.is_available()
print("Is torch CUDA available:", is_cuda_available)
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def find_version(*file_paths):
),
install_requires=[
"julius>=0.2.3,<0.3",
"librosa>=0.6.0",
"torch>=1.7.0",
"torchaudio>=0.9.0",
"torch-pitch-shift>=1.2.2",
Expand Down
15 changes: 3 additions & 12 deletions tests/test_background_noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,18 @@

from torch_audiomentations import AddBackgroundNoise
from torch_audiomentations.utils.dsp import calculate_rms
from torch_audiomentations.utils.file import load_audio
from .utils import TEST_FIXTURES_DIR

from torch_audiomentations.utils.io import Audio

class TestAddBackgroundNoise(unittest.TestCase):
def setUp(self):
self.sample_rate = 16000
self.batch_size = 16
self.empty_input_audio = torch.empty(0, 1, 16000)
# TODO: use utils.io.Audio
self.input_audio = (
torch.from_numpy(
load_audio(
TEST_FIXTURES_DIR / "acoustic_guitar_0.wav",
sample_rate=self.sample_rate,
)
)
.unsqueeze(0)
.unsqueeze(0)
)

audio = Audio(self.sample_rate, mono=True)
self.input_audio = audio(TEST_FIXTURES_DIR / "acoustic_guitar_0.wav")[None]
self.input_audios = torch.cat([self.input_audio] * self.batch_size, dim=0)

self.bg_path = TEST_FIXTURES_DIR / "bg"
Expand Down
11 changes: 5 additions & 6 deletions tests/test_convolution.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import librosa
import torch
from numpy.testing import assert_almost_equal
from scipy.signal import convolve as scipy_convolve

from tests.utils import TEST_FIXTURES_DIR
from torch_audiomentations.utils.convolution import convolve as torch_convolve

from torch_audiomentations.utils.io import Audio

class TestConvolution:
def test_convolve(self):
sample_rate = 16000

file_path = TEST_FIXTURES_DIR / "acoustic_guitar_0.wav"
samples, _ = librosa.load(file_path, sr=sample_rate)
ir_samples, _ = librosa.load(
TEST_FIXTURES_DIR / "ir" / "impulse_response_0.wav", sr=sample_rate
)
audio = Audio(sample_rate, mono=True)
samples = audio(file_path).numpy()
ir_samples = audio(TEST_FIXTURES_DIR / "ir" / "impulse_response_0.wav").numpy()


expected_output = scipy_convolve(samples, ir_samples)
actual_output = torch_convolve(
Expand Down
14 changes: 3 additions & 11 deletions tests/test_impulse_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch

from torch_audiomentations import ApplyImpulseResponse
from torch_audiomentations.utils.file import load_audio
from torch_audiomentations.utils.io import Audio
from .utils import TEST_FIXTURES_DIR


Expand All @@ -15,16 +15,8 @@ def sample_rate():

@pytest.fixture
def input_audio(sample_rate):
return (
torch.from_numpy(
load_audio(
os.path.join(TEST_FIXTURES_DIR, "acoustic_guitar_0.wav"),
sample_rate=sample_rate,
)
)
.unsqueeze(0)
.unsqueeze(0)
)
audio = Audio(sample_rate, mono=True)
return audio(os.path.join(TEST_FIXTURES_DIR, "acoustic_guitar_0.wav"))[None]


@pytest.fixture
Expand Down
24 changes: 4 additions & 20 deletions tests/test_mix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,16 @@

from torch_audiomentations.augmentations.mix import Mix
from torch_audiomentations.utils.dsp import calculate_rms
from torch_audiomentations.utils.file import load_audio
from torch_audiomentations.utils.io import Audio
from .utils import TEST_FIXTURES_DIR


class TestMix(unittest.TestCase):
def setUp(self):
self.sample_rate = 16000
self.guitar = (
torch.from_numpy(
load_audio(
TEST_FIXTURES_DIR / "acoustic_guitar_0.wav",
sample_rate=self.sample_rate,
)
)
.unsqueeze(0)
.unsqueeze(0)
)
self.noise = (
torch.from_numpy(
load_audio(
TEST_FIXTURES_DIR / "bg" / "bg.wav", sample_rate=self.sample_rate
)
)
.unsqueeze(0)
.unsqueeze(0)
)
audio = Audio(self.sample_rate, mono=True)
self.guitar = audio(TEST_FIXTURES_DIR / "acoustic_guitar_0.wav")[None]
self.noise = audio(TEST_FIXTURES_DIR / "bg" / "bg.wav")[None]

common_num_samples = min(self.guitar.shape[-1], self.noise.shape[-1])
self.guitar = self.guitar[:, :, :common_num_samples]
Expand Down
8 changes: 0 additions & 8 deletions torch_audiomentations/utils/dsp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@ def calculate_desired_noise_rms(clean_rms, snr):
return noise_rms


def resample_audio(audio, orig_sr, target_sr):
# TODO: We can probably remove this function and call resample directly where needed
"""Resamples the audio to a new sampling rate."""
import librosa

return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)


def convert_decibels_to_amplitude_ratio(decibels):
return 10 ** (decibels / 20)

Expand Down
16 changes: 0 additions & 16 deletions torch_audiomentations/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
from pathlib import Path
from typing import List, Union

import soundfile

from .dsp import resample_audio

SUPPORTED_EXTENSIONS = (".wav",)

Expand Down Expand Up @@ -64,16 +61,3 @@ def find_audio_files(
break

return file_paths


def load_audio(audio_file_path, sample_rate=None, start=0, stop=None):
# TODO: Clarify whether start/stop is in samples or in seconds, and whether or not it
# relates to the original or the resampled audio.
"""Loads the audio given the path of an audio file."""
audio, source_sample_rate = soundfile.read(audio_file_path, start=start, stop=stop)

if sample_rate:
audio = resample_audio(audio, source_sample_rate, sample_rate)

# TODO: return sample rate as well
return audio
15 changes: 1 addition & 14 deletions torch_audiomentations/utils/io.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import warnings
from pathlib import Path
from typing import Text, Union

import librosa
import torch
import torchaudio
from torch import Tensor
Expand Down Expand Up @@ -155,18 +153,7 @@ def downmix_and_resample(self, samples: Tensor, sample_rate: int) -> Tensor:

# resample
if self.sample_rate != sample_rate:
samples = samples.numpy()
if self.mono:
# librosa expects mono audio to be of shape (n,), but we have (1, n).
samples = librosa.core.resample(
samples[0], orig_sr=sample_rate, target_sr=self.sample_rate
)[None]
else:
samples = librosa.core.resample(
samples.T, orig_sr=sample_rate, target_sr=self.sample_rate
).T

samples = torch.tensor(samples)
samples = torchaudio.functional.resample(samples, sample_rate, self.sample_rate)

return samples

Expand Down

0 comments on commit bb90cf9

Please sign in to comment.