Skip to content

Commit

Permalink
122: Added integration with audio transcription tool
Browse files Browse the repository at this point in the history
  • Loading branch information
jarmatys committed Dec 20, 2024
1 parent 85f945f commit cb539d6
Show file tree
Hide file tree
Showing 23 changed files with 211 additions and 47 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<InternalsVisibleTo Include="ASSISTENTE.Infrastructure"/>
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\ASSISTENTE.Infrastructure.LLM.OpenAi\ASSISTENTE.Infrastructure.LLM.OpenAi.csproj" />
</ItemGroup>

</Project>
35 changes: 35 additions & 0 deletions API/ASSISTENTE.Infrastructure.Audio/Contracts/AudioFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using CSharpFunctionalExtensions;
using SOFTURE.Results;

namespace ASSISTENTE.Infrastructure.Audio.Contracts;

public sealed class AudioFile : ValueObject
{
private AudioFile(string name, Stream stream)
{
Name = name;
Stream = stream;
}

public string Name { get; }
public Stream Stream { get; }

public static Result<AudioFile> Create(string name, Stream stream)
{
if (string.IsNullOrEmpty(name))
return Result.Failure<AudioFile>(EmbeddingTextErrors.EmptyContent.Build());

return new AudioFile(name, stream);
}

protected override IEnumerable<IComparable> GetEqualityComponents()
{
yield return Name;
}
}

public static class EmbeddingTextErrors
{
public static readonly Error EmptyContent = new(
"Prompt.EmptyContent", "Prompt cannot be empty.");
}
6 changes: 6 additions & 0 deletions API/ASSISTENTE.Infrastructure.Audio/Contracts/AudioType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace ASSISTENTE.Infrastructure.Audio.Contracts;

public enum AudioType
{
OpenAi
}
8 changes: 8 additions & 0 deletions API/ASSISTENTE.Infrastructure.Audio/Contracts/IAudioClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
using CSharpFunctionalExtensions;

namespace ASSISTENTE.Infrastructure.Audio.Contracts;

public interface IAudioClient
{
Task<Result<Transcription>> GenerateTranscription(AudioFile audioFile);
}
27 changes: 27 additions & 0 deletions API/ASSISTENTE.Infrastructure.Audio/Contracts/Transcription.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using CSharpFunctionalExtensions;
using SOFTURE.Results;

namespace ASSISTENTE.Infrastructure.Audio.Contracts;

public sealed class Transcription : ValueObject
{
private Transcription(string text)
{
Text = text;
}

public string Text { get; }

public static Result<Transcription> Create(string? text)
{
if (string.IsNullOrEmpty(text))
return Result.Failure<Transcription>(CommonErrors.EmptyParameter.Build());

return new Transcription(text);
}

protected override IEnumerable<IComparable> GetEqualityComponents()
{
yield return Text;
}
}
20 changes: 20 additions & 0 deletions API/ASSISTENTE.Infrastructure.Audio/DependencyInjection.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using ASSISTENTE.Infrastructure.Audio.Contracts;
using ASSISTENTE.Infrastructure.LLM.OpenAi;
using ASSISTENTE.Infrastructure.LLM.OpenAi.Settings;
using Microsoft.Extensions.DependencyInjection;

namespace ASSISTENTE.Infrastructure.Audio
{
internal static class DependencyInjection
{
public static IServiceCollection AddOpenAiAudio<TSettings>(this IServiceCollection services)
where TSettings : IOpenAiSettings
{
services.AddOpenAi<TSettings>();

services.AddScoped<IAudioClient, OpenAiClient>();

return services;
}
}
}
29 changes: 29 additions & 0 deletions API/ASSISTENTE.Infrastructure.Audio/OpenAiClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
using ASSISTENTE.Infrastructure.Audio.Contracts;
using ASSISTENTE.Infrastructure.LLM.OpenAi.Errors;
using CSharpFunctionalExtensions;
using OpenAI;
using OpenAI.Audio;
using OpenAI.Models;

namespace ASSISTENTE.Infrastructure.Audio;

internal sealed class OpenAiClient(OpenAIClient client) : IAudioClient
{
public async Task<Result<Transcription>> GenerateTranscription(AudioFile audioFile)
{
var transcriptionRequest = new AudioTranscriptionRequest(
audio: audioFile.Stream,
audioName: audioFile.Name,
model: Model.Whisper1,
responseFormat: AudioResponseFormat.Text,
language: "pl"
);

var transcriptionText = await client.AudioEndpoint.CreateTranscriptionTextAsync(transcriptionRequest);

if (transcriptionText is null)
return Result.Failure<Transcription>(ClientErrors.EmptyAnswer.Build());

return Transcription.Create(transcriptionText);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<ItemGroup>
<InternalsVisibleTo Include="ASSISTENTE.Infrastructure.Embeddings"/>
<InternalsVisibleTo Include="ASSISTENTE.Infrastructure.LLM"/>
<InternalsVisibleTo Include="ASSISTENTE.Infrastructure.Audio"/>
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

<ItemGroup>
<ProjectReference Include="..\ASSISTENTE.Application.Abstractions\ASSISTENTE.Application.Abstractions.csproj" />
<ProjectReference Include="..\ASSISTENTE.Infrastructure.Audio\ASSISTENTE.Infrastructure.Audio.csproj" />
<ProjectReference Include="..\ASSISTENTE.Infrastructure.CodeParser\ASSISTENTE.Infrastructure.CodeParser.csproj"/>
<ProjectReference Include="..\ASSISTENTE.Infrastructure.Firecrawl\ASSISTENTE.Infrastructure.Firecrawl.csproj" />
<ProjectReference Include="..\ASSISTENTE.Infrastructure.Langfuse\ASSISTENTE.Infrastructure.Langfuse.csproj" />
Expand Down
3 changes: 2 additions & 1 deletion API/ASSISTENTE.Infrastructure/DependencyInjection.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using ASSISTENTE.Application.Abstractions.Interfaces;
using ASSISTENTE.Domain.Interfaces;
using ASSISTENTE.Infrastructure.Audio;
using ASSISTENTE.Infrastructure.CodeParser;
using ASSISTENTE.Infrastructure.Embeddings;
using ASSISTENTE.Infrastructure.Enums;
Expand All @@ -8,7 +9,6 @@
using ASSISTENTE.Infrastructure.Langfuse;
using ASSISTENTE.Infrastructure.Langfuse.Settings;
using ASSISTENTE.Infrastructure.LLM;
using ASSISTENTE.Infrastructure.LLM.Contracts;
using ASSISTENTE.Infrastructure.LLM.Ollama.Settings;
using ASSISTENTE.Infrastructure.LLM.OpenAi.Settings;
using ASSISTENTE.Infrastructure.MarkDownParser;
Expand Down Expand Up @@ -39,6 +39,7 @@ public static IServiceCollection AddInfrastructure<TSettings>(
{
services.AddOpenAiLlm<TSettings>();
services.AddOpenAiEmbeddings<TSettings>();
services.AddOpenAiAudio<TSettings>();
}

if (privacyMode == PrivacyMode.Local)
Expand Down
2 changes: 1 addition & 1 deletion API/ASSISTENTE.Module/DependencyInjection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static IServiceCollection AddAssistenteModule<TUserResolver, TSettings>(t
where TUserResolver : class, IUserResolver
where TSettings : IModuleSettings
{
services.AddInfrastructure<TSettings>(privacyMode: PrivacyMode.Local);
services.AddInfrastructure<TSettings>(privacyMode: PrivacyMode.Cloud);
services.AddPersistence<TUserResolver, TSettings>();
services.AddApplication();
services.AddEvents();
Expand Down
18 changes: 18 additions & 0 deletions API/ASSISTENTE.Playground/ASSISTENTE.Playground.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@
<None Update="Data\data.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Data\Audio\adam.m4a">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\Audio\agnieszka.m4a">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\Audio\ardian.m4a">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\Audio\michal.m4a">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\Audio\monika.m4a">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\Audio\rafal.m4a">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

<Target Name="CreateDirectories" AfterTargets="Build">
Expand Down
Binary file added API/ASSISTENTE.Playground/Data/Audio/adam.m4a
Binary file not shown.
Binary file not shown.
Binary file added API/ASSISTENTE.Playground/Data/Audio/ardian.m4a
Binary file not shown.
Binary file added API/ASSISTENTE.Playground/Data/Audio/michal.m4a
Binary file not shown.
Binary file added API/ASSISTENTE.Playground/Data/Audio/monika.m4a
Binary file not shown.
Binary file added API/ASSISTENTE.Playground/Data/Audio/rafal.m4a
Binary file not shown.
2 changes: 1 addition & 1 deletion API/ASSISTENTE.Playground/Models/TaskRequestModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ public class TaskRequestModel
public required string ApiKey { get; set; }

[JsonPropertyName("answer")]
public required object Answer { get; set; }
public required object? Answer { get; set; }
}
8 changes: 0 additions & 8 deletions API/ASSISTENTE.Playground/Playground.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using ASSISTENTE.Application.Abstractions.Interfaces;
using ASSISTENTE.Application.Handlers.Knowledge.Commands;
using ASSISTENTE.Infrastructure.Firecrawl.Contracts;
using ASSISTENTE.Infrastructure.Langfuse.Contracts;
using ASSISTENTE.Infrastructure.LLM.Contracts;
using ASSISTENTE.Playground.Models;
using ASSISTENTE.Playground.Tasks;
using CSharpFunctionalExtensions;
using MediatR;
using Microsoft.Extensions.Logging;
using SOFTURE.Common.Logging.Extensions;
Expand Down
1 change: 0 additions & 1 deletion API/ASSISTENTE.Playground/Tasks/WeekOne.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using System.Text.Json;
using System.Text.Json.Serialization;
using ASSISTENTE.Infrastructure.Firecrawl.Contracts;
using ASSISTENTE.Infrastructure.Langfuse.Contracts;
using ASSISTENTE.Infrastructure.LLM.Contracts;
using ASSISTENTE.Playground.Models;
using CSharpFunctionalExtensions;
Expand Down
73 changes: 38 additions & 35 deletions API/ASSISTENTE.Playground/Tasks/WeekTwo.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using ASSISTENTE.Infrastructure.Firecrawl.Contracts;
using ASSISTENTE.Infrastructure.Langfuse.Contracts;
using ASSISTENTE.Infrastructure.Audio.Contracts;
using ASSISTENTE.Infrastructure.LLM.Contracts;
using ASSISTENTE.Playground.Models;
using CSharpFunctionalExtensions;
Expand All @@ -11,52 +9,57 @@ namespace ASSISTENTE.Playground.Tasks;

public class WeekTwo(
HttpClient httpClient,
ILlmClient llmClient)
ILlmClient llmClient,
IAudioClient audioClient)
{
public async Task<Result> Task_01()
{
const string url = "https://centrala.ag3nts.org/report";
const string taskName = "JSON";
const string taskName = "mp3";
const string apiKey = "<API-KEY>";
const string filePath = "Data/data.json";
const string audioFilePath = "Data/Audio";

var filePaths = Directory.GetFiles(audioFilePath);

var transcriptions = new StringBuilder();

var fileContent = await File.ReadAllTextAsync(filePath);
var parsedFile = JsonSerializer.Deserialize<DataModel>(fileContent);

if (parsedFile is null)
return Result.Failure("Failed to parse data file.");

foreach (var item in parsedFile.Data)
var tasks = new List<Task>();
foreach (var filePath in filePaths)
{
if (item.AdditionalInformation is not null)
{
var answer = await Prompt.Create($"{item.AdditionalInformation.Question}")
.Bind(async prompt => await llmClient.GenerateAnswer(prompt));

item.AdditionalInformation.Answer = answer.Value.Text;
}

var calculation = item.Question.Split(" + ").Select(int.Parse).Sum();
if (calculation != item.Answer)
{
item.Answer = calculation;
}
var fileName = Path.GetFileName(filePath);

await using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read);

var task = AudioFile.Create(fileName, fileStream)
.Bind(audioClient.GenerateTranscription)
.Tap(transcription =>
{
transcriptions.Append($"Zeznanie z pliku: {fileName}\n");
transcriptions.Append(transcription.Text);
transcriptions.Append("\n\n");
});

tasks.Add(task);
}
await Task.WhenAll(tasks);

var updatedFileContent = JsonSerializer.Serialize(parsedFile, new JsonSerializerOptions
{
WriteIndented = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping
});
var context = transcriptions.ToString();

const string instuctions =
"1. Znajdź odpowiedź na pytanie, na jakiej ulicy znajduje się uczelnia, na której wykłada Andrzej Maj" +
"2. Pamiętaj, że zeznania świadków mogą być sprzeczne, niektórzy z nich mogą się mylić, a inni odpowiadać w dość dziwaczny sposób." +
"3. Nazwa ulicy nie pada w treści transkrypcji. Musisz użyć wiedzy wewnętrznej modelu, aby uzyskać odpowiedź.";

await File.WriteAllTextAsync(filePath, updatedFileContent);
var answer = await Prompt.Create($"<INSTRUKCJE>{instuctions}</INSTRUKCJE>\n\n <ZEZNANIA>{context}</ZEZNANIA>")
.Bind(async prompt => await llmClient.GenerateAnswer(prompt))
.Bind(answer => Prompt.Create($"Na podstawie otrzymanych informacji: \n <INFORMACJE>{answer.Text}</INFORMACJE>. \n\n Zwróć tylko i wyłącznie nazwę ulicy przy której znajduje się zidentifikowane miejsce."))
.Bind(async prompt => await llmClient.GenerateAnswer(prompt));

var request = new TaskRequestModel
{
Task = taskName,
ApiKey = apiKey,
Answer = parsedFile
Answer = answer.GetValueOrDefault(x => x.Text)
};

var response = await httpClient.PostAsync(
Expand All @@ -71,6 +74,6 @@ public async Task<Result> Task_01()

return response.IsSuccessStatusCode
? Result.Success(responseContent)
: Result.Failure("");
: Result.Failure(responseContent);
}
}
7 changes: 7 additions & 0 deletions API/ASSISTENTE.sln
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "LLM", "LLM", "{B478E50D-835
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ASSISTENTE.Infrastructure.LLM.Ollama", "ASSISTENTE.Infrastructure.LLM.Ollama\ASSISTENTE.Infrastructure.LLM.Ollama.csproj", "{1E060361-C372-4687-A5DE-586B9CF0603B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ASSISTENTE.Infrastructure.Audio", "ASSISTENTE.Infrastructure.Audio\ASSISTENTE.Infrastructure.Audio.csproj", "{5C2F9A52-1D52-43AD-837F-F513A2A8CD56}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -240,6 +242,10 @@ Global
{1E060361-C372-4687-A5DE-586B9CF0603B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1E060361-C372-4687-A5DE-586B9CF0603B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1E060361-C372-4687-A5DE-586B9CF0603B}.Release|Any CPU.Build.0 = Release|Any CPU
{5C2F9A52-1D52-43AD-837F-F513A2A8CD56}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5C2F9A52-1D52-43AD-837F-F513A2A8CD56}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5C2F9A52-1D52-43AD-837F-F513A2A8CD56}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5C2F9A52-1D52-43AD-837F-F513A2A8CD56}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{938FE15F-869E-4F71-B594-3EDA64C8FA74} = {2B12AFDA-6C6C-4A60-BBEE-EA9FDA0BD664}
Expand Down Expand Up @@ -281,5 +287,6 @@ Global
{B478E50D-8357-4B63-9367-B940E47EC993} = {450106FD-82CE-441D-93AD-10B7F1641A1A}
{E2668547-B17F-4182-AF4F-60AF5135163D} = {B478E50D-8357-4B63-9367-B940E47EC993}
{1E060361-C372-4687-A5DE-586B9CF0603B} = {B478E50D-8357-4B63-9367-B940E47EC993}
{5C2F9A52-1D52-43AD-837F-F513A2A8CD56} = {450106FD-82CE-441D-93AD-10B7F1641A1A}
EndGlobalSection
EndGlobal

0 comments on commit cb539d6

Please sign in to comment.