-
Notifications
You must be signed in to change notification settings - Fork 22
/
server.py
97 lines (74 loc) · 2.76 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import asyncio
from typing import Tuple
import numpy as np
import uvicorn
from fastapi import Depends, FastAPI, Request
from faster_whisper import WhisperModel
app = FastAPI()
MODEL_TYPE = "large-v2"
RUN_TYPE = "gpu" # "cpu" or "gpu"
# For CPU usage (https://github.com/SYSTRAN/faster-whisper/issues/100#issuecomment-1492141352)
NUM_WORKERS = 10
CPU_THREADS = 4
# For GPU usage
GPU_DEVICE_INDICES = [0, 1, 2, 3]
VAD_FILTER = True
def create_whisper_model() -> WhisperModel:
if RUN_TYPE.lower() == "gpu":
whisper = WhisperModel(MODEL_TYPE,
device="cuda",
compute_type="float16",
device_index=GPU_DEVICE_INDICES,
download_root="./models")
elif RUN_TYPE.lower() == "cpu":
whisper = WhisperModel(MODEL_TYPE,
device="cpu",
compute_type="int8",
num_workers=NUM_WORKERS,
cpu_threads=CPU_THREADS,
download_root="./models")
else:
raise ValueError(f"Invalid model type: {RUN_TYPE}")
print("Loaded model")
return whisper
model = create_whisper_model()
print("Loaded model")
async def parse_body(request: Request):
data: bytes = await request.body()
return data
def execute_blocking_whisper_prediction(
model: WhisperModel,
audio_data_array: np.ndarray,
language_code: str = "") -> Tuple[str, str, float]:
language_code = language_code.lower().strip()
segments, info = model.transcribe(
audio_data_array,
language=language_code if language_code != "" else None,
beam_size=5,
vad_filter=VAD_FILTER,
vad_parameters=dict(min_silence_duration_ms=500))
segments = [s.text for s in segments]
transcription = " ".join(segments)
transcription = transcription.strip()
return transcription, info.language, info.language_probability
@app.post("/predict")
async def predict(
audio_data: bytes = Depends(parse_body), language_code: str = ""):
# Convert the audio bytes to a NumPy array
audio_data_array: np.ndarray = np.frombuffer(audio_data, np.int16).astype(
np.float32) / 255.0
try:
# Run the prediction on the audio data
result = await asyncio.get_running_loop().run_in_executor(
None, execute_blocking_whisper_prediction, model, audio_data_array,
language_code)
except Exception as e:
print(e)
result = "Error"
return {
"prediction": result[0],
"language": result[1],
"language_probability": result[2]
}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8008)