Skip to content

Commit 8e3f1f9

Browse files
authored
Merge pull request #14 from moises-ai/fix-file-conversion
Fix file conversion
2 parents e889221 + 6b2786f commit 8e3f1f9

File tree

4 files changed

+194
-52
lines changed

4 files changed

+194
-52
lines changed

maestro_worker_python/convert_files.py

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
from __future__ import annotations
22

3-
import tempfile
3+
import concurrent.futures
44
import logging
55
import subprocess
6-
import concurrent.futures
7-
6+
import tempfile
7+
from contextlib import contextmanager
88
from dataclasses import dataclass
99
from typing import List
10+
1011
from .response import ValidationError
11-
from contextlib import contextmanager
1212

1313
logger = logging.getLogger(__name__)
1414

@@ -32,11 +32,17 @@ def convert_files(convert_files: List[FileToConvert]):
3232
futures = []
3333
with concurrent.futures.ThreadPoolExecutor() as executor:
3434
for convert_file in convert_files:
35-
target_function = _convert_to_m4a if convert_file.file_format == "m4a" else _convert_to_wav
35+
target_function = (
36+
_convert_to_m4a
37+
if convert_file.file_format == "m4a"
38+
else _convert_to_wav
39+
)
3640
futures.append(
3741
executor.submit(
38-
target_function, convert_file.input_file_path, convert_file.output_file_path,
39-
convert_file.max_duration
42+
target_function,
43+
convert_file.input_file_path,
44+
convert_file.output_file_path,
45+
convert_file.max_duration,
4046
)
4147
)
4248

@@ -55,10 +61,17 @@ def convert_files_manager(*convert_files: FileToConvert) -> None | str | list[st
5561
for convert_file in convert_files:
5662
file_format = ".m4a" if convert_file.file_format == "m4a" else ".wav"
5763
filename = tempfile.NamedTemporaryFile(suffix=file_format)
58-
target_function = _convert_to_m4a if convert_file.file_format == "m4a" else _convert_to_wav
64+
target_function = (
65+
_convert_to_m4a
66+
if convert_file.file_format == "m4a"
67+
else _convert_to_wav
68+
)
5969
thread_list.append(
6070
executor.submit(
61-
target_function, convert_file.input_file_path, filename.name, convert_file.max_duration
71+
target_function,
72+
convert_file.input_file_path,
73+
filename.name,
74+
convert_file.max_duration,
6275
)
6376
)
6477
list_objects.append(filename)
@@ -77,27 +90,68 @@ def convert_files_manager(*convert_files: FileToConvert) -> None | str | list[st
7790

7891

7992
def _convert_to_wav(input_file_path, output_file_path, max_duration):
80-
_run_subprocess(f"ffmpeg -y -hide_banner -loglevel error -t {max_duration} -i {input_file_path} -ar 44100 {output_file_path}")
93+
_run_subprocess(
94+
[
95+
"ffmpeg",
96+
"-y",
97+
"-hide_banner",
98+
"-loglevel",
99+
"error",
100+
"-t",
101+
str(max_duration),
102+
"-i",
103+
str(input_file_path),
104+
"-ar",
105+
"44100",
106+
str(output_file_path),
107+
]
108+
)
81109

82110

83111
def _convert_to_m4a(input_file_path, output_file_path, max_duration):
84-
_run_subprocess(f"ffmpeg -y -hide_banner -loglevel error -t {max_duration} -i {input_file_path} -c:a aac -b:a 192k -ar 44100 -movflags +faststart {output_file_path}")
112+
_run_subprocess(
113+
[
114+
"ffmpeg",
115+
"-y",
116+
"-hide_banner",
117+
"-loglevel",
118+
"error",
119+
"-t",
120+
str(max_duration),
121+
"-i",
122+
str(input_file_path),
123+
"-c:a",
124+
"aac",
125+
"-b:a",
126+
"192k",
127+
"-ar",
128+
"44100",
129+
"-movflags",
130+
"+faststart",
131+
str(output_file_path),
132+
]
133+
)
85134

86135

87136
def _run_subprocess(command):
88137
try:
89-
process = subprocess.run(command, shell=True, capture_output=True, check=True)
138+
process = subprocess.run(command, shell=False, capture_output=True, check=True)
90139
except subprocess.CalledProcessError as exc:
91140
invalid_file_errors = [
92141
"Invalid data found when processing input",
93142
"Output file #0 does not contain any stream",
94143
"Output file does not contain any stream",
95-
"Invalid argument"
144+
"Invalid argument",
96145
]
97146
if any(error in exc.stderr.decode() for error in invalid_file_errors):
98147
logger.warning(
99-
"Could not convert because the file is invalid",
100-
extra={"props": {"stderr": exc.stderr.decode(), "stdout": exc.stdout.decode()}}
148+
"Could not convert because the file is invalid",
149+
extra={
150+
"props": {
151+
"stderr": exc.stderr.decode(),
152+
"stdout": exc.stdout.decode(),
153+
}
154+
},
101155
)
102156
raise ValidationError(
103157
f"Could not convert because the file is invalid, ffmpeg stderr: {exc.stderr.decode()}"
@@ -110,6 +164,11 @@ def _run_subprocess(command):
110164
if process.stderr:
111165
logger.warning(
112166
"Non-falal error during conversion",
113-
extra={"props": {"stderr": process.stderr.decode(), "stdout": process.stdout.decode()}}
167+
extra={
168+
"props": {
169+
"stderr": process.stderr.decode(),
170+
"stdout": process.stdout.decode(),
171+
}
172+
},
114173
)
115174
logger.info(f"Conversion output: {process.stdout.decode()}")

tests/silent with space.ogg

4.02 KB
Binary file not shown.

tests/silent with space.wav

174 KB
Binary file not shown.

tests/test_convert_files.py

Lines changed: 119 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1-
import os
2-
import pytest
31
import hashlib
2+
import os
43
import subprocess
54
from pathlib import Path
6-
from maestro_worker_python.convert_files import convert_files_manager, convert_files, FileToConvert, FileConversionError
7-
from maestro_worker_python.response import ValidationError
85

6+
import pytest
7+
8+
from maestro_worker_python.convert_files import (
9+
FileConversionError,
10+
FileToConvert,
11+
convert_files,
12+
convert_files_manager,
13+
)
14+
from maestro_worker_python.response import ValidationError
915

1016
TEST_PATH = Path(__file__).resolve().parent
1117

@@ -28,72 +34,126 @@ def corrupt_audio_file(tmp_path_factory):
2834
def test_should_re_raise_exceptions_in_thread(invalid_audio_file, file_format):
2935
with pytest.raises(FileConversionError) as exc:
3036
convert_files(
31-
[FileToConvert(
32-
input_file_path=TEST_PATH / "foobar.mp3",
33-
output_file_path=f"{invalid_audio_file}.wav",
34-
file_format=file_format,
35-
)]
37+
[
38+
FileToConvert(
39+
input_file_path=TEST_PATH / "foobar.mp3",
40+
output_file_path=f"{invalid_audio_file}.wav",
41+
file_format=file_format,
42+
)
43+
]
3644
)
3745

3846

3947
@pytest.mark.parametrize("file_format", ["m4a", "wav"])
40-
def test_should_raise_validation_error_if_audio_file_is_invalid(invalid_audio_file, file_format):
48+
def test_should_raise_validation_error_if_audio_file_is_invalid(
49+
invalid_audio_file, file_format
50+
):
4151
with pytest.raises(ValidationError) as exc:
4252
convert_files(
43-
[FileToConvert(
44-
input_file_path=invalid_audio_file,
45-
output_file_path=f"{invalid_audio_file}.wav",
46-
file_format=file_format,
47-
)]
53+
[
54+
FileToConvert(
55+
input_file_path=invalid_audio_file,
56+
output_file_path=f"{invalid_audio_file}.wav",
57+
file_format=file_format,
58+
)
59+
]
4860
)
4961

5062
assert "Invalid data" in str(exc.value)
5163

5264

5365
@pytest.mark.parametrize("file_format", ["m4a", "wav"])
54-
def test_should_raise_validation_error_if_audio_file_is_corrupt(corrupt_audio_file, file_format):
66+
def test_should_raise_validation_error_if_audio_file_is_corrupt(
67+
corrupt_audio_file, file_format
68+
):
5569
with pytest.raises(ValidationError) as exc:
5670
convert_files(
57-
[FileToConvert(
58-
input_file_path=corrupt_audio_file,
59-
output_file_path=f"{corrupt_audio_file}.wav",
60-
file_format=file_format
61-
)]
71+
[
72+
FileToConvert(
73+
input_file_path=corrupt_audio_file,
74+
output_file_path=f"{corrupt_audio_file}.wav",
75+
file_format=file_format,
76+
)
77+
]
6278
)
6379

6480
assert "Invalid argument" in str(exc.value)
6581

6682

6783
@pytest.mark.parametrize("file_format", ["m4a", "wav"])
6884
def test_should_raise_validation_error_if_source_has_no_audio(file_format, caplog):
69-
input_file_path, output_file_path = TEST_PATH / "video-no-audio.mp4", TEST_PATH / "output.wav"
85+
input_file_path, output_file_path = (
86+
TEST_PATH / "video-no-audio.mp4",
87+
TEST_PATH / "output.wav",
88+
)
7089
with pytest.raises(ValidationError) as exc:
7190
convert_files(
72-
[FileToConvert(
73-
input_file_path=input_file_path,
74-
output_file_path=output_file_path,
75-
file_format=file_format,
76-
)]
91+
[
92+
FileToConvert(
93+
input_file_path=input_file_path,
94+
output_file_path=output_file_path,
95+
file_format=file_format,
96+
)
97+
]
7798
)
7899

79100
assert "does not contain any strea" in str(exc.value)
80101

81102

82-
def test_should_convert_valid_audio_file():
83-
input_file_path, output_file_path = TEST_PATH / "silent.ogg", TEST_PATH / "silent.wav"
103+
@pytest.mark.parametrize(
104+
"input_name, output_name, format",
105+
[
106+
("silent.ogg", "converted.wav", "wav"),
107+
("silent with space.ogg", "converted.wav", "wav"),
108+
],
109+
)
110+
def test_should_convert_valid_wav_audio_file(input_name, output_name, format):
111+
input_file_path, output_file_path = (
112+
TEST_PATH / input_name,
113+
TEST_PATH / output_name,
114+
)
84115
convert_files(
85-
[FileToConvert(
86-
input_file_path=input_file_path,
87-
output_file_path=output_file_path,
88-
file_format="wav",
89-
)]
116+
[
117+
FileToConvert(
118+
input_file_path=input_file_path,
119+
output_file_path=output_file_path,
120+
file_format=format,
121+
)
122+
]
90123
)
91124
assert _get_hash(input_file_path) == _get_hash(output_file_path)
92125
Path(output_file_path).unlink(missing_ok=True)
93126

94127

128+
@pytest.mark.parametrize(
129+
"input_name, output_name, format",
130+
[
131+
("silent.ogg", "converted.m4a", "m4a"),
132+
("silent with space.wav", "converted.m4a", "m4a"),
133+
],
134+
)
135+
def test_should_convert_valid_m4a_audio_file(input_name, output_name, format):
136+
input_file_path, output_file_path = (
137+
TEST_PATH / input_name,
138+
TEST_PATH / output_name,
139+
)
140+
convert_files(
141+
[
142+
FileToConvert(
143+
input_file_path=input_file_path,
144+
output_file_path=output_file_path,
145+
file_format=format,
146+
)
147+
]
148+
)
149+
Path(output_file_path).unlink(missing_ok=True)
150+
151+
95152
def test_should_convert_multiple_valid_audio_files_and_delete_after_context():
96-
input_file_path, output_file_path = TEST_PATH / "silent.ogg", TEST_PATH / "silent.wav"
153+
input_file_path, output_file_path = (
154+
TEST_PATH / "silent.ogg",
155+
TEST_PATH / "silent.wav",
156+
)
97157
converted_files_list = []
98158
with convert_files_manager(
99159
FileToConvert(
@@ -115,6 +175,29 @@ def test_should_convert_multiple_valid_audio_files_and_delete_after_context():
115175
def _get_hash(file_name):
116176
process = subprocess.run(
117177
f"ffmpeg -loglevel error -i {file_name} -map 0 -f hash -",
118-
shell=True, capture_output=True, check=True,
178+
shell=True,
179+
capture_output=True,
180+
check=True,
181+
)
182+
return process.stdout.split(b"=")[1].strip()
183+
184+
185+
def _get_hash(file_name):
186+
process = subprocess.run(
187+
[
188+
"ffmpeg",
189+
"-loglevel",
190+
"error",
191+
"-i",
192+
str(file_name),
193+
"-map",
194+
"0",
195+
"-f",
196+
"hash",
197+
"-",
198+
],
199+
shell=False,
200+
capture_output=True,
201+
check=True,
119202
)
120203
return process.stdout.split(b"=")[1].strip()

0 commit comments

Comments
 (0)