-
Notifications
You must be signed in to change notification settings - Fork 0
/
to_text.py
executable file
·139 lines (125 loc) · 4.92 KB
/
to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
from command_opts import opt, main_entry
from list_picker import list_picker
import gzip
import json
import mp3_splitter
import os
import pickle
import templater
ENGINES = {}
import engines.openai
import engines.transcribe
import engines.whisper
import engines.whisper_cpp
import engines.whisper_timestamped
import engines.whisperx
def setup_engines():
# Validate the engines implemenet the expected functions
to_setup = [
engines.openai,
engines.transcribe,
engines.whisper,
engines.whisper_cpp,
engines.whisper_timestamped,
engines.whisperx,
]
expected = [
('get_id', 'Get an unique ID for this engine'),
('get_name', 'Describe the engine'),
('get_opts', 'Get all available options for the engine'),
('get_settings', 'Get model specific settings'),
('run_engine', 'Run the engine and transcribe audio'),
('parse_data', 'Parse the output of run_engine to a normalized format'),
]
for module in to_setup:
if module.get_id() in ENGINES:
raise Exception(f"The engine ID '{module.get_id()}' was use more than once!")
ENGINES[module.get_id()] = module
for name, module in ENGINES.items():
for func, desc in expected:
if not hasattr(module, func):
raise Exception(f"Helper '{name}' does contain function {func}() for '{desc}!")
setup_engines()
@opt("Show all available transcription engines")
def show_engines():
for key, value in ENGINES.items():
print(f"{key}: '{value.get_name()}'")
for setting, desc in value.get_opts():
print(f' "{setting}": "{desc}",')
@opt("Interactivately reate a settings file example")
def create_settings():
fn = input("Please enter the filename to write the settings to: ")
settings = {}
settings["source_mp3"] = input("Please enter the filename of the source MP3 file: ")
settings["target_fn"] = input("Please enter the target output HTML name (blank to name after the MP3): ")
if len(settings["target_fn"]) == 0:
del settings["target_fn"]
settings["engine"] = list_picker([("Select engine:",)] + [(value.get_name(), key) for key, value in ENGINES.items()])
settings["engine_details"] = {}
for setting, desc in ENGINES[settings["engine"]].get_opts():
settings["engine_details"][setting] = input(desc + ": ")
print("Target settings:")
print(json.dumps(settings, indent=4))
with open(fn, "wt", newline="", encoding="utf-8") as f:
json.dump(settings, f, indent=4)
def enumerate_words(data):
for frame in data:
if len(frame) == 3:
word, start, end = frame
speaker = -1
else:
word, start, end, speaker = frame
yield word, start, end, speaker
@opt("Transcribe an MP3 file and create a webpage")
def create_webpage(settings_file):
with open(settings_file, "rt", encoding="utf-8") as f:
settings = json.load(f)
engine = ENGINES[settings["engine"]]
engine_settings = engine.get_settings()
data_fn = settings_file + ".gz"
if os.path.isfile(data_fn):
with gzip.open(data_fn, "rb") as f:
data = f.read()
else:
if 'limit_seconds' in engine_settings or 'limit_bytes' in engine_settings:
temp = []
print("Creating seperate chunks...")
chunks = mp3_splitter.chunk_mp3(
settings["source_mp3"],
duration_in_seconds=engine_settings.get('limit_seconds'),
size_in_bytes=engine_settings.get('limit_bytes'),
)
for chunk in chunks:
temp.append({
"offset": chunk["offset"],
"duration": chunk["duration"],
"data": engine.run_engine(settings["engine_details"], chunk['fn']),
})
os.unlink(chunk['fn'])
data = b'CHUNKED' + pickle.dumps(temp)
else:
data = engine.run_engine(settings["engine_details"], settings["source_mp3"])
with gzip.open(data_fn, "wb") as f:
f.write(data)
if data.startswith(b'CHUNKED'):
# For chunked data, parse each chunk in turn and offset the resulting data
temp = pickle.loads(data[7:])
data = []
for cur in temp:
chunk = engine.parse_data(cur['data'])
for word, start, end, speaker in enumerate_words(chunk):
data.append((word, start + cur['offset'], end + cur['offset'], speaker))
else:
# Non-chunked data, just read and parse it as is
data = engine.parse_data(data)
data = templater.fill_out(data, settings['source_mp3'])
if "target_fn" in settings:
dest = settings["target_fn"]
else:
dest = settings['source_mp3'] + ".html"
with open(dest, "wt", newline="") as f:
f.write(data)
print(f"{dest} created!")
if __name__ == "__main__":
main_entry('func')