diff --git a/dotnet/samples/Concepts/Memory/VectorStore_GenericDataModel_Interop.cs b/dotnet/samples/Concepts/Memory/VectorStore_GenericDataModel_Interop.cs new file mode 100644 index 000000000000..faa5d579d3b8 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStore_GenericDataModel_Interop.cs @@ -0,0 +1,201 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using Azure.Identity; +using Memory.VectorStoreFixtures; +using Microsoft.SemanticKernel.Connectors.AzureOpenAI; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Data; +using Microsoft.SemanticKernel.Embeddings; +using Qdrant.Client; + +namespace Memory; + +/// +/// Semantic Kernel provides a generic data model for vector stores that can be used with any +/// schema. The schema still has to be provided in the form of a record definition, but no +/// custom data model is required. +/// +/// The sample shows how to +/// 1. Upsert data using the generic data model and retrieve it from the vector store using a custom data model. +/// 2. Upsert data using a custom data model and retrieve it from the vector store using the generic data model. +/// +public class VectorStore_GenericDataModel_Interop(ITestOutputHelper output, VectorStoreQdrantContainerFixture qdrantFixture) : BaseTest(output), IClassFixture +{ + private static readonly JsonSerializerOptions s_indentedSerializerOptions = new() { WriteIndented = true }; + + private static readonly VectorStoreRecordDefinition s_vectorStoreRecordDefinition = new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Key", typeof(ulong)), + new VectorStoreRecordDataProperty("Term", typeof(string)), + new VectorStoreRecordDataProperty("Definition", typeof(string)), + new VectorStoreRecordVectorProperty("DefinitionEmbedding", typeof(ReadOnlyMemory)) { Dimensions = 1536 } + } + }; + + [Fact] + public async Task UpsertWithGenericRetrieveWithCustomAsync() + { + // Create an embedding generation service. + var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + new AzureCliCredential()); + + // Initiate the docker container and construct the vector store. + await qdrantFixture.ManualInitializeAsync(); + var vectorStore = new QdrantVectorStore(new QdrantClient("localhost")); + + // Get and create collection if it doesn't exist using the generic data model and record definition that defines the schema. + var genericDataModelCollection = vectorStore.GetCollection>("skglossary", s_vectorStoreRecordDefinition); + await genericDataModelCollection.CreateCollectionIfNotExistsAsync(); + + // Create glossary entries and generate embeddings for them. + var glossaryEntries = CreateGenericGlossaryEntries().ToList(); + var tasks = glossaryEntries.Select(entry => Task.Run(async () => + { + entry.Vectors["DefinitionEmbedding"] = await textEmbeddingGenerationService.GenerateEmbeddingAsync((string)entry.Data["Definition"]!); + })); + await Task.WhenAll(tasks); + + // Upsert the glossary entries into the collection and return their keys. + var upsertedKeysTasks = glossaryEntries.Select(x => genericDataModelCollection.UpsertAsync(x)); + var upsertedKeys = await Task.WhenAll(upsertedKeysTasks); + + // Get the collection using the custom data model. + var customDataModelCollection = vectorStore.GetCollection("skglossary"); + + // Retrieve one of the upserted records from the collection. + var upsertedRecord = await customDataModelCollection.GetAsync(upsertedKeys.First(), new() { IncludeVectors = true }); + + // Write upserted keys and one of the upserted records to the console. + Console.WriteLine($"Upserted keys: {string.Join(", ", upsertedKeys)}"); + Console.WriteLine($"Upserted record: {JsonSerializer.Serialize(upsertedRecord, s_indentedSerializerOptions)}"); + } + + [Fact] + public async Task UpsertWithCustomRetrieveWithGenericAsync() + { + // Create an embedding generation service. + var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + new AzureCliCredential()); + + // Initiate the docker container and construct the vector store. + await qdrantFixture.ManualInitializeAsync(); + var vectorStore = new QdrantVectorStore(new QdrantClient("localhost")); + + // Get and create collection if it doesn't exist using the custom data model. + var customDataModelCollection = vectorStore.GetCollection("skglossary"); + await customDataModelCollection.CreateCollectionIfNotExistsAsync(); + + // Create glossary entries and generate embeddings for them. + var glossaryEntries = CreateCustomGlossaryEntries().ToList(); + var tasks = glossaryEntries.Select(entry => Task.Run(async () => + { + entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition); + })); + await Task.WhenAll(tasks); + + // Upsert the glossary entries into the collection and return their keys. + var upsertedKeysTasks = glossaryEntries.Select(x => customDataModelCollection.UpsertAsync(x)); + var upsertedKeys = await Task.WhenAll(upsertedKeysTasks); + + // Get the collection using the generic data model. + var genericDataModelCollection = vectorStore.GetCollection>("skglossary", s_vectorStoreRecordDefinition); + + // Retrieve one of the upserted records from the collection. + var upsertedRecord = await genericDataModelCollection.GetAsync(upsertedKeys.First(), new() { IncludeVectors = true }); + + // Write upserted keys and one of the upserted records to the console. + Console.WriteLine($"Upserted keys: {string.Join(", ", upsertedKeys)}"); + Console.WriteLine($"Upserted record: {JsonSerializer.Serialize(upsertedRecord, s_indentedSerializerOptions)}"); + } + + /// + /// Sample model class that represents a glossary entry. + /// + /// + /// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store. + /// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration. + /// + private sealed class Glossary + { + [VectorStoreRecordKey] + public ulong Key { get; set; } + + [VectorStoreRecordData] + public string Term { get; set; } + + [VectorStoreRecordData] + public string Definition { get; set; } + + [VectorStoreRecordVector(1536)] + public ReadOnlyMemory DefinitionEmbedding { get; set; } + } + + /// + /// Create some sample glossary entries using the custom data model. + /// + /// A list of sample glossary entries. + private static IEnumerable CreateCustomGlossaryEntries() + { + yield return new Glossary + { + Key = 1, + Term = "API", + Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data.", + }; + + yield return new Glossary + { + Key = 2, + Term = "Connectors", + Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc.", + }; + + yield return new Glossary + { + Key = 3, + Term = "RAG", + Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt).", + }; + } + + /// + /// Create some sample glossary entries using the generic data model. + /// + /// A list of sample glossary entries. + private static IEnumerable> CreateGenericGlossaryEntries() + { + yield return new VectorStoreGenericDataModel(1) + { + Data = new Dictionary + { + ["Term"] = "API", + ["Definition"] = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data.", + } + }; + + yield return new VectorStoreGenericDataModel(2) + { + Data = new Dictionary + { + ["Term"] = "Connectors", + ["Definition"] = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc.", + } + }; + + yield return new VectorStoreGenericDataModel(3) + { + Data = new Dictionary + { + ["Term"] = "RAG", + ["Definition"] = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt).", + } + }; + } +} diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md index 08bfd46791c9..61845a53291c 100644 --- a/dotnet/samples/Concepts/README.md +++ b/dotnet/samples/Concepts/README.md @@ -133,6 +133,7 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom - [VectorStore_DataIngestion_Simple: A simple example of how to do data ingestion into a vector store when getting started.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs) - [VectorStore_DataIngestion_MultiStore: An example of data ingestion that uses the same code to ingest into multiple vector stores types.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs) - [VectorStore_DataIngestion_CustomMapper: An example that shows how to use a custom mapper for when your data model and storage model doesn't match.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs) +- [VectorStore_GenericDataModel_Interop: An example that shows how you can use the built-in, generic data model from Semantic Kernel to read and write to a Vector Store.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_GenericDataModel_Interop.cs) ### Optimization - Examples of different cost and performance optimization techniques