-
Notifications
You must be signed in to change notification settings - Fork 65
/
convert_desed.py
32 lines (28 loc) · 988 Bytes
/
convert_desed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Ke Chen
# HTS-AT: A HIERARCHICAL TOKEN-SEMANTIC AUDIO TRANSFORMER FOR SOUND CLASSIFICATION AND DETECTION
# Convert the DESED dataset
import numpy as np
import os
import librosa
import config
from utils import float32_to_int16
import soundfile as sf
def main():
desed_folder = os.path.join(config.desed_folder, "audio", "eval", "public")
fl_files = os.listdir(desed_folder)
output_dir = os.path.join(config.desed_folder, "audio", "eval", "resample")
output_dict = []
for f in fl_files:
y, sr = librosa.load(os.path.join(desed_folder, f), sr = config.sample_rate)
sf.write(os.path.join(output_dir, f), y, sr)
print(f, sr, float32_to_int16(y))
temp_dict = {
"audio_name": f,
"waveform": float32_to_int16(y)
}
output_dict.append(temp_dict)
npy_file = os.path.join(config.desed_folder, "eval.npy")
np.save(npy_file, output_dict)
if __name__ == '__main__':
main()