-
Notifications
You must be signed in to change notification settings - Fork 1
/
LLMinAbox.py
131 lines (109 loc) · 4.14 KB
/
LLMinAbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import time
import requests
import speech_recognition as sr
from typing import IO
from io import BytesIO
from elevenlabs import VoiceSettings
from elevenlabs.client import ElevenLabs
from dotenv import load_dotenv
import pygame
import tkinter as tk
from tkinter import messagebox
# Load environment variables from .env file
load_dotenv()
# Retrieve the API keys from environment variables
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
LLMINABOX_API_URL = os.getenv("LLMINABOX_API_URL")
# Initialize Eleven Labs client
eleven_labs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
# Function to convert text to speech and return as audio stream
def text_to_speech_stream(text: str) -> IO[bytes]:
start_time = time.time()
# Perform the text-to-speech conversion
response = eleven_labs_client.text_to_speech.convert_as_stream(
voice_id="jBpfuIE2acCO8z3wKNLl", # Adam pre-made voice
output_format="mp3_22050_32",
optimize_streaming_latency="4",
text=text,
model_id="eleven_turbo_v2_5",
voice_settings=VoiceSettings(
stability=0.0,
similarity_boost=1.0,
style=0.0,
use_speaker_boost=True,
),
)
# Create a BytesIO object to hold the audio data in memory
audio_stream = BytesIO()
# Write each chunk of audio data to the stream
for chunk in response:
if chunk:
audio_stream.write(chunk)
# Reset stream position to the beginning
audio_stream.seek(0)
# Return the stream for further use
return audio_stream
# Function to recognize speech
def recognize_speech():
recognizer = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
audio = recognizer.listen(source)
try:
text = recognizer.recognize_google(audio)
print(f"Recognized: {text}")
return text
except Exception as e:
print(f"Error: {str(e)}")
return None
# Function to send text to LLMinaBox API and get the response
def send_to_LLMinBox(user_input):
payload = {"question": user_input}
try:
response = requests.post(LLMINABOX_API_URL, json=payload , stream=True)
response.raise_for_status() # Raise an exception for bad status codes
print(f"Response status code: {response.status_code}")
# Try to parse the JSON response
json_response = response.json()
# Extract the text from the response
response_text = json_response.get('text', 'No text field in JSON')
return response_text
except requests.exceptions.RequestException as req_err:
print(f"Request to LLMinaBox failed: {req_err}")
return f"Error: Failed to connect to LLMinaBox. {str(req_err)}"
# Function to play audio from a BytesIO stream
def play_audio(audio_stream):
# Initialize pygame mixer
pygame.mixer.init()
# Load the audio stream into pygame
pygame.mixer.music.load(audio_stream)
# Play the audio
pygame.mixer.music.play()
# Wait for the audio to finish playing
while pygame.mixer.music.get_busy():
time.sleep(0.1)
# Function triggered by the Tkinter button to start the process
def start_recording():
user_input = recognize_speech()
if user_input:
response_text = send_to_LLMinBox(user_input)
print("LLMinaBox response:", response_text)
if not response_text.startswith("Error:"):
# Send the response_text directly to ElevenLabs for TTS
audio_stream = text_to_speech_stream(response_text)
play_audio(audio_stream)
else:
print("Skipping text-to-speech due to error in LLMinaBox response")
messagebox.showerror("Error", "LLMinaBox response error")
# Create the Tkinter UI
def create_gui():
window = tk.Tk()
window.title("Speech Recognition App")
# Create and place the button on the window
record_button = tk.Button(window, text="Start Recording", command=start_recording, padx=20, pady=10)
record_button.pack(pady=20)
# Start the Tkinter main loop
window.mainloop()
if __name__ == "__main__":
create_gui()