From fc1d9c0167d068f6696e7bc1fa539453b234d4b8 Mon Sep 17 00:00:00 2001 From: Denis Shulyaka Date: Fri, 25 Aug 2023 18:21:43 +0300 Subject: [PATCH] Simultaneous tts and snd --- rhasspy3/pipeline.py | 66 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/rhasspy3/pipeline.py b/rhasspy3/pipeline.py index 7a94223..84f9c1c 100644 --- a/rhasspy3/pipeline.py +++ b/rhasspy3/pipeline.py @@ -8,15 +8,16 @@ from .asr import DOMAIN as ASR_DOMAIN from .asr import Transcript, transcribe +from .audio import AudioChunk, AudioStop from .config import CommandConfig, PipelineConfig, PipelineProgramConfig from .core import Rhasspy -from .event import Event, Eventable, async_read_event +from .event import Event, Eventable, async_read_event, async_write_event from .handle import Handled, NotHandled, handle from .intent import Intent, NotRecognized, recognize from .mic import DOMAIN as MIC_DOMAIN from .program import create_process, run_command -from .snd import play -from .tts import synthesize +from .snd import play, Played, DOMAIN as SND_DOMAIN +from .tts import synthesize, Synthesize, DOMAIN as TTS_DOMAIN from .util.dataclasses_json import DataClassJsonMixin from .vad import segment from .wake import Detection, detect @@ -192,24 +193,63 @@ async def run( if (stop_after == StopAfterDomain.HANDLE) or (tts_program is None): return pipeline_result - # Text to speech if handle_result is not None: pipeline_result.handle_result = handle_result - if handle_result.text: + if not handle_result.text: + _LOGGER.debug("No text returned from handle") + + if (stop_after == StopAfterDomain.TTS) or (snd_program is None): + # Text to speech + if (handle_result is not None) and (handle_result.text): assert tts_program is not None, "Pipeline is missing tts" tts_wav_in = io.BytesIO() await synthesize(rhasspy, tts_program, handle_result.text, tts_wav_in) - else: - _LOGGER.debug("No text returned from handle") - if (stop_after == StopAfterDomain.TTS) or (snd_program is None): return pipeline_result - # Audio output - if tts_wav_in is not None: - tts_wav_in.seek(0) - assert snd_program is not None, "Pipeline is missing snd" - await play(rhasspy, snd_program, tts_wav_in, samples_per_chunk) + if (handle_result is None) or (not handle_result.text): + # Audio output + if tts_wav_in is not None: + tts_wav_in.seek(0) + assert snd_program is not None, "Pipeline is missing snd" + await play(rhasspy, snd_program, tts_wav_in, samples_per_chunk) + + return pipeline_result + + # Both Text to speech and Audio output + assert tts_program is not None, "Pipeline is missing tts" + assert snd_program is not None, "Pipeline is missing snd" + + async with (await create_process(rhasspy, TTS_DOMAIN, tts_program)) as tts_proc, ( + await create_process(rhasspy, SND_DOMAIN, snd_program) + ) as snd_proc: + assert tts_proc.stdin is not None + assert tts_proc.stdout is not None + assert snd_proc.stdin is not None + assert snd_proc.stdout is not None + + await async_write_event(Synthesize(text=handle_result.text).event(), tts_proc.stdin) + + while True: + event = await async_read_event(tts_proc.stdout) + if event is None: + break + + if AudioChunk.is_type(event.type): + await async_write_event(event, snd_proc.stdin) + + elif AudioStop.is_type(event.type): + await async_write_event(event, snd_proc.stdin) + break + + # Wait for confimation + while True: + event = await async_read_event(snd_proc.stdout) + if event is None: + break + + if Played.is_type(event.type): + break return pipeline_result