From 31915be3068cbfce1f2194d1316b72618e78c203 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Thu, 16 May 2024 10:10:44 -0400 Subject: [PATCH] move the search and or summaize to the main chat area so Ollama can do that now --- .../app/NonFunctionSearchOrSummarize.php | 52 ++++++++++++------ .../app/Responses/NonFunctionResponseDto.php | 17 ++++++ .../tests/Feature/ClaudeClientTest.php | 17 +++--- app/Domains/Prompts/SearchOrSummarize.php | 2 +- .../Controllers/WebPageOutputController.php | 12 ++--- app/Jobs/DocumentReferenceJob.php | 36 +++++++++++++ ...SimpleSearchAndSummarizeOrchestrateJob.php | 53 +++++++++++++++++-- .../Http/Controllers/ChatControllerTest.php | 11 +++- .../WebPageOutputControllerTest.php | 20 +++++-- .../NonFunctionSearchOrSummarizeTest.php | 15 +++--- ...leSearchAndSummarizeOrchestrateJobTest.php | 42 +++++++++++++++ .../fixtures/claude_client_get_functions.json | 16 ++++++ tests/fixtures/messages_llama3.json | 24 +++------ tests/fixtures/ollama_chat_results.json | 17 +++--- .../fixtures/openai_client_get_functions.json | 19 +++++++ 15 files changed, 275 insertions(+), 78 deletions(-) create mode 100644 Modules/LlmDriver/app/Responses/NonFunctionResponseDto.php create mode 100644 app/Jobs/DocumentReferenceJob.php create mode 100644 tests/Feature/SimpleSearchAndSummarizeOrchestrateJobTest.php diff --git a/Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php b/Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php index c373c277..012250f5 100644 --- a/Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php +++ b/Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php @@ -10,17 +10,21 @@ use App\Models\DocumentChunk; use Facades\LlmLaraHub\LlmDriver\DistanceQuery; use Illuminate\Support\Facades\Log; +use LlmLaraHub\LlmDriver\Responses\NonFunctionResponseDto; class NonFunctionSearchOrSummarize { - protected string $results = ""; - - public function handle(string $input, Collection $collection) : string + public function handle(string $input, HasDrivers $collection): NonFunctionResponseDto { + $collection = $collection->getChatable(); + + if (! get_class($collection) === Collection::class) { + throw new \Exception('Can only do Collection class right now'); + } - Log::info("[LaraChain] - Using the Non Function Search and Summarize Prompt", [ + Log::info('[LaraChain] - Using the Non Function Search and Summarize Prompt', [ 'collection' => $collection->id, - 'input' => $input + 'input' => $input, ]); $prompt = SearchOrSummarize::prompt($input); @@ -35,7 +39,7 @@ public function handle(string $input, Collection $collection) : string if (str($response->content)->contains('search')) { Log::info('[LaraChain] - LLM Thinks it is Search', [ - 'response' => $response->content] + 'response' => $response->content] ); $embedding = LlmDriverFacade::driver( @@ -78,10 +82,16 @@ public function handle(string $input, Collection $collection) : string $collection->getDriver() )->completion($contentFlattened); - $this->results = $response->content; + return NonFunctionResponseDto::from( + [ + 'response' => $response->content, + 'documentChunks' => $documentChunkResults, + 'prompt' => $contentFlattened, + ] + ); } elseif (str($response->content)->contains('summarize')) { Log::info('[LaraChain] - LLM Thinks it is summarize', [ - 'response' => $response->content] + 'response' => $response->content] ); $content = []; @@ -94,8 +104,8 @@ public function handle(string $input, Collection $collection) : string $contentFlattened = implode(' ', $content); Log::info('[LaraChain] - Documents Flattened', [ - 'collection' => $collection->id, - 'content' => $content] + 'collection' => $collection->id, + 'content' => $content] ); $prompt = SummarizeDocumentPrompt::prompt($contentFlattened); @@ -104,11 +114,16 @@ public function handle(string $input, Collection $collection) : string $collection->getDriver() )->completion($prompt); - - $this->results = $response->content; + return NonFunctionResponseDto::from( + [ + 'response' => $response->content, + 'documentChunks' => collect(), + 'prompt' => $prompt, + ] + ); } else { Log::info('[LaraChain] - LLM is not sure :(', [ - 'response' => $response->content] + 'response' => $response->content] ); $embedding = LlmDriverFacade::driver( @@ -146,11 +161,14 @@ public function handle(string $input, Collection $collection) : string $collection->getDriver() )->completion($contentFlattened); - - $this->results = $response->content; + return NonFunctionResponseDto::from( + [ + 'response' => $response->content, + 'documentChunks' => collect(), + 'prompt' => $contentFlattened, + ] + ); } - - return $this->results; } } diff --git a/Modules/LlmDriver/app/Responses/NonFunctionResponseDto.php b/Modules/LlmDriver/app/Responses/NonFunctionResponseDto.php new file mode 100644 index 00000000..2f18bdd0 --- /dev/null +++ b/Modules/LlmDriver/app/Responses/NonFunctionResponseDto.php @@ -0,0 +1,17 @@ +assertInstanceOf(CompletionResponse::class, $results); Http::assertSent(function ($request) { - $message1 = $request->data()['messages'][0]['role']; - $message2 = $request->data()['messages'][1]['role']; + $messageAssistant = $request->data()['messages'][0]['role']; + $messageUser = $request->data()['messages'][1]['role']; - return $message2 === 'assistant' && - $message1 === 'user'; + return $messageAssistant === 'assistant' && + $messageUser === 'user'; }); } @@ -109,12 +109,11 @@ public function test_chat_with_multiple_assistant_messages(): void $this->assertInstanceOf(CompletionResponse::class, $results); Http::assertSent(function ($request) { - $message0 = $request->data()['messages'][0]['role']; - $message1 = $request->data()['messages'][1]['role']; - $message2 = $request->data()['messages'][2]['role']; + $messageAssistant = $request->data()['messages'][1]['role']; + $messageUser = $request->data()['messages'][2]['role']; - return $message0 === 'assistant' && - $message1 === 'user' && $message2 === 'assistant'; + return $messageAssistant === 'assistant' && + $messageUser === 'user'; }); } diff --git a/app/Domains/Prompts/SearchOrSummarize.php b/app/Domains/Prompts/SearchOrSummarize.php index e0641c57..18773b0a 100644 --- a/app/Domains/Prompts/SearchOrSummarize.php +++ b/app/Domains/Prompts/SearchOrSummarize.php @@ -9,7 +9,7 @@ class SearchOrSummarize public static function prompt(string $originalPrompt): string { - Log::info('[LaraChain] - Search or SearchAndSummarize'); + Log::info('[LaraChain] - SearchOrSummarize Prompt'); return << $validated['input']] ); + /** @var NonFunctionResponseDto $results */ $results = NonFunctionSearchOrSummarize::handle($input, $output->collection); - $this->setChatMessages($results, 'assistant'); + $this->setChatMessages($results->response, 'assistant'); return back(); } diff --git a/app/Jobs/DocumentReferenceJob.php b/app/Jobs/DocumentReferenceJob.php new file mode 100644 index 00000000..254dfafc --- /dev/null +++ b/app/Jobs/DocumentReferenceJob.php @@ -0,0 +1,36 @@ +saveDocumentReference($this->message, $this->documentChunks); + } +} diff --git a/app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php b/app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php index fb9ff248..c1c3a9ea 100644 --- a/app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php +++ b/app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php @@ -2,22 +2,29 @@ namespace App\Jobs; -use App\Models\Chat; -use Facades\LlmLaraHub\LlmDriver\SimpleSearchAndSummarizeOrchestrate; +use App\Domains\Messages\RoleEnum; +use App\Models\Collection; +use App\Models\PromptHistory; +use Facades\LlmLaraHub\LlmDriver\NonFunctionSearchOrSummarize; use Illuminate\Bus\Queueable; use Illuminate\Contracts\Queue\ShouldQueue; use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Log; +use LlmLaraHub\LlmDriver\HasDrivers; +use LlmLaraHub\LlmDriver\Helpers\CreateReferencesTrait; +use LlmLaraHub\LlmDriver\Responses\NonFunctionResponseDto; class SimpleSearchAndSummarizeOrchestrateJob implements ShouldQueue { + use CreateReferencesTrait; use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; /** * Create a new job instance. */ - public function __construct(public string $input, public Chat $chat) + public function __construct(public string $input, public HasDrivers $chat) { // } @@ -27,6 +34,44 @@ public function __construct(public string $input, public Chat $chat) */ public function handle(): void { - SimpleSearchAndSummarizeOrchestrate::handle($this->input, $this->chat); + Log::info('[LaraChain] Skipping over functions doing search and summarize'); + + notify_ui( + $this->chat->getChatable(), + 'Searching data now to summarize content' + ); + + $collection = $this->chat->getChatable(); + + if (get_class($collection) === Collection::class) { + /** @var NonFunctionResponseDto $results */ + $results = NonFunctionSearchOrSummarize::handle($this->input, $collection); + + $message = $this->chat->getChat()->addInput( + message: $results->response, + role: RoleEnum::Assistant, + show_in_thread: true + ); + + if ($results->prompt) { + PromptHistory::create([ + 'prompt' => $results->prompt, + 'chat_id' => $this->chat->getChat()->id, + 'message_id' => $message->id, + /** @phpstan-ignore-next-line */ + 'collection_id' => $this->chat->getChatable()?->id, + ]); + } + + if ($results->documentChunks->isNotEmpty()) { + $this->saveDocumentReference( + $message, + $results->documentChunks + ); + } + } else { + Log::info('Can only handle Collection model right now'); + } + } } diff --git a/tests/Feature/Http/Controllers/ChatControllerTest.php b/tests/Feature/Http/Controllers/ChatControllerTest.php index 74bc366b..b978006a 100644 --- a/tests/Feature/Http/Controllers/ChatControllerTest.php +++ b/tests/Feature/Http/Controllers/ChatControllerTest.php @@ -9,10 +9,11 @@ use App\Models\Message; use App\Models\User; use Facades\App\Domains\Agents\VerifyResponseAgent; +use Facades\LlmLaraHub\LlmDriver\NonFunctionSearchOrSummarize; use Facades\LlmLaraHub\LlmDriver\Orchestrate; -use Facades\LlmLaraHub\LlmDriver\SimpleSearchAndSummarizeOrchestrate; use LlmLaraHub\LlmDriver\LlmDriverFacade; use LlmLaraHub\LlmDriver\Responses\CompletionResponse; +use LlmLaraHub\LlmDriver\Responses\NonFunctionResponseDto; use Tests\TestCase; class ChatControllerTest extends TestCase @@ -133,7 +134,13 @@ public function test_no_functions() ]); LlmDriverFacade::shouldReceive('driver->hasFunctions')->once()->andReturn(false); - SimpleSearchAndSummarizeOrchestrate::shouldReceive('handle')->once()->andReturn('Yo'); + + NonFunctionSearchOrSummarize::shouldReceive('handle')->once()->andReturn( + NonFunctionResponseDto::from([ + 'response' => 'Foobar', + 'documentChunks' => collect(), + 'prompt' => 'Foobar', + ])); $this->actingAs($user)->post(route('chats.messages.create', [ 'chat' => $chat->id, diff --git a/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php b/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php index b757050c..ccec15d9 100644 --- a/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php +++ b/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php @@ -8,11 +8,10 @@ use App\Models\Output; use App\Models\User; use Facades\LlmLaraHub\LlmDriver\DistanceQuery; -use LlmLaraHub\LlmDriver\LlmDriverFacade; use Facades\LlmLaraHub\LlmDriver\NonFunctionSearchOrSummarize; +use LlmLaraHub\LlmDriver\LlmDriverFacade; use LlmLaraHub\LlmDriver\Responses\CompletionResponse; -use LlmLaraHub\LlmDriver\Responses\EmbeddingsResponseDto; -use Pgvector\Laravel\Vector; +use LlmLaraHub\LlmDriver\Responses\NonFunctionResponseDto; use Tests\TestCase; class WebPageOutputControllerTest extends TestCase @@ -71,7 +70,12 @@ public function test_chat_search() 'public' => true, ]); NonFunctionSearchOrSummarize::shouldReceive('handle') - ->once()->andReturn("Foo"); + ->once()->andReturn( + NonFunctionResponseDto::from([ + 'response' => 'Foobar', + 'documentChunks' => collect(), + 'prompt' => 'Foobar', + ])); $this->post(route( 'collections.outputs.web_page.chat', [ @@ -85,7 +89,13 @@ public function test_chat_search() public function test_no_search_no_summary() { NonFunctionSearchOrSummarize::shouldReceive('handle') - ->once()->andReturn("Foo"); + ->once()->andReturn( + NonFunctionResponseDto::from([ + 'response' => 'Foobar', + 'documentChunks' => collect(), + 'prompt' => 'Foobar', + ]) + ); $output = Output::factory()->create([ 'active' => true, diff --git a/tests/Feature/NonFunctionSearchOrSummarizeTest.php b/tests/Feature/NonFunctionSearchOrSummarizeTest.php index 7381627a..189910a3 100644 --- a/tests/Feature/NonFunctionSearchOrSummarizeTest.php +++ b/tests/Feature/NonFunctionSearchOrSummarizeTest.php @@ -4,8 +4,6 @@ use App\Models\DocumentChunk; use App\Models\Output; -use Illuminate\Foundation\Testing\RefreshDatabase; -use Illuminate\Foundation\Testing\WithFaker; use Facades\LlmLaraHub\LlmDriver\DistanceQuery; use LlmLaraHub\LlmDriver\LlmDriverFacade; use LlmLaraHub\LlmDriver\NonFunctionSearchOrSummarize; @@ -49,13 +47,14 @@ public function test_results(): void 'content' => 'search', ])); - $results = (new NonFunctionSearchOrSummarize())->handle("Search for foo", $output->collection); + $results = (new NonFunctionSearchOrSummarize())->handle('Search for foo', $output->collection); - $this->assertNotNull($results); + $this->assertNotNull($results->response); + $this->assertNotNull($results->documentChunks); } - - public function test_no_search_no_summary() { + public function test_no_search_no_summary() + { DocumentChunk::factory()->create(); @@ -85,9 +84,9 @@ public function test_no_search_no_summary() { 'content' => 'not sure :(', ])); - $results = (new NonFunctionSearchOrSummarize())->handle("Search for foo", $output->collection); + $results = (new NonFunctionSearchOrSummarize())->handle('Search for foo', $output->collection); - $this->assertNotNull($results); + $this->assertNotNull($results->response); } } diff --git a/tests/Feature/SimpleSearchAndSummarizeOrchestrateJobTest.php b/tests/Feature/SimpleSearchAndSummarizeOrchestrateJobTest.php new file mode 100644 index 00000000..ad62b92c --- /dev/null +++ b/tests/Feature/SimpleSearchAndSummarizeOrchestrateJobTest.php @@ -0,0 +1,42 @@ +create(); + + DocumentChunk::factory(3)->create(); + + NonFunctionSearchOrSummarize::shouldReceive('handle') + ->once()->andReturn( + NonFunctionResponseDto::from([ + 'response' => 'Foobar', + 'documentChunks' => DocumentChunk::all(), + 'prompt' => 'Foo bar', + ]) + ); + + (new SimpleSearchAndSummarizeOrchestrateJob( + $input, + $chat + ))->handle(); + + $this->assertDatabaseCount('messages', 1); + $this->assertDatabaseCount('prompt_histories', 1); + $this->assertDatabaseCount('message_document_references', 3); + } +} diff --git a/tests/fixtures/claude_client_get_functions.json b/tests/fixtures/claude_client_get_functions.json index fef82b83..c319adb5 100644 --- a/tests/fixtures/claude_client_get_functions.json +++ b/tests/fixtures/claude_client_get_functions.json @@ -16,5 +16,21 @@ "prompt" ] } + }, + { + "name": "search_and_summarize", + "description": "Used to embed users prompt, search database and return summarized results.", + "input_schema": { + "type": "object", + "properties": { + "prompt": { + "description": "This is the prompt the user is using to search the database and may or may not assist the results.", + "type": "string", + "enum": [], + "default": "" + } + }, + "required": [] + } } ] \ No newline at end of file diff --git a/tests/fixtures/messages_llama3.json b/tests/fixtures/messages_llama3.json index 923dbcc0..e0889a04 100644 --- a/tests/fixtures/messages_llama3.json +++ b/tests/fixtures/messages_llama3.json @@ -1,22 +1,14 @@ [ { - "content": "How can I have guardrails", - "role": "user" + "content": "test", + "role": "system", + "is_ai": false, + "show": true }, { - "content": "You are a helpful assistant in the RAG system: \nThis is data from the search results when entering the users prompt which is \n\n\n### START PROMPT \nHow can I have guardrails \n### END PROMPT\n\nPlease use this with the following context and only this, summarize it for the user and return as markdown so I can render it and strip out and formatting like extra spaces, tabs, periods etc: \n\n### START Context\nEach utterance is encoded into a semantic vector space, creating a semantic \"map\". That map\nhelps us line up semantic meaning and canonical forms.\nUser queries are encoded into the same vector space, allowing us to identify if they have a\nsemantic similarity to an existing cano\nWhen a new user query comes in, we encode it into the same semantic vector space as our\nprevious utterance examples. Using that map of utterances and the canonical forms they belong\nto, we can identify when a user query is semantically similar to those utterance examples. If the\nquery is similar, we trigger the canonica s semantically similar to those utterance examples. If the\nquery is similar, we trigger the canonical form that those utterances belong to.\nWith that, we have a decision-making process in milliseconds rather than several seconds as\nwith agents.\nUsing RAG with guardrails does have nuances that we should consider. First, we must define the\ncanonical forms. We could view the requirement of defining canonical forms in two ways: (1) we\n\"what is the llama 2 model?\"\n\"tell me about Meta's new LLM\"\n\"what are the differences between Falcon and Llama?\"\n1\n2\n3 \nThis website utilizes technologies\nsuch a these issues taken care of automatically. Google Docs can create PDFs but\nthey are not tagged. Converting a non-tagged PDF to a tagged one is a non-\ntrivial process that you should try to avoid. Creating PDFs with tags in the\nfirst place should be a preferable choice.\nGrackle by Product - GrackleDocs\nEdit description\nwww.grackledocs.com\nWritten by Joey Yang-Li\n2 Followers\nExpert in PDF Technology and Structured Documents\nFollow\nPdf Document Processing Retrieval Augmented Accessibility\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. ferences and\nsubsequently deriving an answer. In this framework, the pivotal component\nis the accurate retrieval of pertinent information, which is critical for the\nefficacy of the RAG model.\nFigure 1. The workflow of Retrieval-Augmented Generation (RAG).\nHowever, the process of retrieval from PDF files is fraught with challenges.\nCommon issues include inaccuracies in text extraction and disarray in the\nrow-column relationships of tables inside PDF files. Thus, before RAG, we\nneed to convert large documents into retrievable content. The conversion\ninvolves several steps, as shown in Figure 2:\n ge documents into retrievable content. The conversion\ninvolves several steps, as shown in Figure 2:\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. resses the question of whether the quality of PDF\nparsing and chunking affects the outcomes of RAG. We will explore the\nchallenges, methodologies, and real-world case studies pertaining to this\nissue. It will include an examination of two types of methods in this field,\nnamely rule-based and deep learning-based methods, followed by empirical\nevaluations of their efficacy through practical examples.\n2 PDF Parsing & Chunking\n2.1 Challenges and Methods Overview\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowl t deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. hitectures. For language generation\ntasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-\nart parametric-only seq2seq baseline.\nThis model was contributed by ola13.\nRetrieval-augmented generation (RAG) models combine the powers of pretrained dense retrieval\n(DPR) and Seq2Seq models. RAG models retrieve docs, pass them to a seq2seq model, then marginalize\nto generate outputs. The retriever and seq2seq modules are initialized from pretrained models, and\nfine-tuned jointly, allowing both retrieval and generation to adapt to downstream tasks models, and\nfine-tuned jointly, allowing both retrieval and generation to adapt to downstream tasks. \nclass transformers.RagConfig\t\n( vocab_size , is_encoder_decoder , prefix , bos_token_id ,\npad_token_id , eos_token_id , decoder_start_token_id , title_sep\n, doc_sep , n_docs , max_combined_length ,\nUsage tips\nRagConfig = None = True = None = None = None = None = None = ' \/ ' = ' \/\/ ' = 5 = 300 can be more deterministic about what should trigger a particular action, or (2) we lose out on the\ninnate decision-making ability of an LLM.\nRAG with guardrails allows us to insert a user query directly into an action but it cannot\nrephrase the query, specify metadata filters, or decide how many results to return. On the other\nhand, using an agent makes those tasks easy. However, if we can infer these parameters\ndeterministically with code, we can include them without using an LLM.\nThat gives us an idea of how RAG with guardrails works and its pros and cons. Let's jump into the\nimplementat s us an idea of how RAG with guardrails works and its pros and cons. Let's jump into the\nimplementation itself.\nImplementing RAG with Guardrails\nTo implement RAG with guardrails, we will rely on the NVIDIA NeMo Guardrails library. The library\nprimarily focuses on AI safety by implementing \"guardrails\" as protective measures against\nunwanted interactions. However, we can also use these guardrails to trigger things like RAG.\nBuilding the Knowledge Base\nAs with every RAG use case, we must first create our knowledge base. For that, we will use a\nsmall dataset of Llama 2 related ArXiv papers stored ate our knowledge base. For that, we will use a\nsmall dataset of Llama 2 related ArXiv papers stored in Hugging Face we download the\ndataset like so:\nIn[7]:\n!pip install qU \\\n nemoguardrails \\\n pineconeclient \\\n datasets \\\n openai\n1\n2\n3\n4\n5\n datasets load_dataset\ndata load_dataset\n split\n1\n2\n3\n4\n5\n6\n-\n==0.4.0\n-==2.2.2\n==2.14.3\n==0.27.8 \nfrom import\n=\t(\n\"jamescalam\/llama-2-arxiv-papers-chunked\",\n=\"train\" \nThis website utilizes technologies\nsuch as cookies to enable essential\nsite functionality, as well as for\nanalytics, personalization, and\ntargeted advertising purposes. To\n MAY 1, 2024\nUpcoming research at Stack Overflow\nAPRIL 30, 2024\nCollaborating smarter, not harder\nAPRIL 25, 2024\nWhat language should beginning programmers choose?\nAPRIL 15, 2024\nHow to succeed as a data engineer without the burnout\nLATEST PODCAST\nMAY 7, 2024\nReshaping the future of API platforms\nLogin with your stackoverflow.com account to take part in the discussion.\nLightDarkAuto\nStack Overflow for Teams\nPricing\tUse cases coding model, preferably TFDPRQuestionEncoder, and\nthe generator can be any seq2seq model, preferably TFBartForConditionalGeneration.\nThe model can be initialized with a RagRetriever for end-to-end generation or used in\ncombination with the outputs of a retriever in multiple steps---see examples for more details.\nThe model is compatible any autoencoding model as the question_encoder and any seq2seq\nmodel with language model head as the generator. It has been tested with\nTFDPRQuestionEncoder as the question_encoder and TFBartForConditionalGeneration as the\ngenerator.\nThis model inherits from TF he question_encoder and TFBartForConditionalGeneration as the\ngenerator.\nThis model inherits from TFPreTrainedModel. Check the superclass documentation for the\ngeneric methods the library implements for all its model (such as downloading or saving,\nresizing the input embeddings, pruning heads etc.)\nThis model is also a Tensorflow keras.Model subclass. Use it as a regular TF 2.0 Keras Model and\nrefer to the TF 2.0 documentation for all matter related to general usage and behavior.\nThe model is in a developing state as it is now fully supports in eager-mode only, and may not\nbe exported in Saved a developing state as it is now fully supports in eager-mode only, and may not\nbe exported in SavedModel format.\ncall\t\n( input_ids\t, attention_mask\n, decoder_input_ids\n, decoder_attention_mask\t,\nencoder_outputs\t, past_key_values\nAlthough the recipe for forward pass needs to be defined within this function, one should call\nthe Module instance afterwards instead of this since the former takes care of running the pre\nand post processing steps while the latter silently ignores them.: TFModelInputType | None = None : np.ndarray |\ntf.Tensor | None = None : np.ndarray | tf.Tensor | None =\nNo ensor of shape (1,), optional, returned when labels is provided) Language\nmodeling loss.\n\nlogits (torch.FloatTensor of shape (batch_size, sequence_length, config.vocab_size))\n Prediction scores of the language modeling head. The score is possibly marginalized over all\ndocuments for each vocabulary token.\n\ndoc_scores (torch.FloatTensor of shape (batch_size, config.n_docs)) Score between\neach retrieved document embeddings (see retrieved_doc_embeds) and\nquestion_encoder_last_hidden_state.\n\npast_key_values (List[torch.FloatTensor], optional, returned when use_cache=True is\npass _state.\n\npast_key_values (List[torch.FloatTensor], optional, returned when use_cache=True is\npassed or when config.use_cache=True) List of torch.FloatTensor of length\nconfig.n_layers, with each tensor of shape (2, batch_size, num_heads,\nsequence_length, embed_size_per_head)).\n\nExpand 17 parameters nk is retrieved for RAG, LLM is unable to perceive\nany meaningful information from it. Similar situation for Chunk 2.\nMoreover, the headers of the table only exist in Chunk 1, so the lower\npart of the table in Chunk 2 becomes meaningless.\n3. It cannot recognize the reading order of the content. The last line of\nChunk 5, Management Discussion and Analysis is actually located at\nthe top of the page, but is parsed as the last sentence in the result. This is\nbecause PyPDF parses the document by the storage order of the\ncharacters, instead of their reading order. This may cause chaotic res t by the storage order of the\ncharacters, instead of their reading order. This may cause chaotic results\nwhen faced with complex layouts.\nThe result on another case Case 2 features with a complex and cross-page\ntable is shown in Figure 15 in the Appendix.\n2.3 Deep learning-based method: ChatDOC PDF Parser\nNext, we turn our attention to the method of deep learning-based parsing,\nexemplified by our ChatDOC PDF Parser. The ChatDOC PDF Parser\n(https:\/\/pdfparser.io\/) has been trained on a corpus of over ten million\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\n ver ten million\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. \n### END Context", - "role": "assistant" - }, - { - "content": "When choosing a language for beginning programmers, Python stands out as an ideal option due to its intuitive syntax that emphasizes readability and simplicity, which is crucial for beginners. Its extensive range of libraries and frameworks makes it suitable for both web development with Django or Flask, data analysis using pandas and NumPy, artificial intelligence applications through scikit-learn, and more. The vast community and resources available also ensure that learning Python will equip you well for a broad spectrum of programming tasks in the future.\n\n### Answer\nPython", - "role": "assistant" - }, - { - "content": "How can I have guardrails", - "role": "user" - }, - { - "content": "You are a helpful assistant in the RAG system: \nThis is data from the search results when entering the users prompt which is \n\n\n### START PROMPT \nHow can I have guardrails \n### END PROMPT\n\nPlease use this with the following context and only this, summarize it for the user and return as markdown so I can render it and strip out and formatting like extra spaces, tabs, periods etc: \n\n### START Context\nEach utterance is encoded into a semantic vector space, creating a semantic \"map\". That map\nhelps us line up semantic meaning and canonical forms.\nUser queries are encoded into the same vector space, allowing us to identify if they have a\nsemantic similarity to an existing cano\nWhen a new user query comes in, we encode it into the same semantic vector space as our\nprevious utterance examples. Using that map of utterances and the canonical forms they belong\nto, we can identify when a user query is semantically similar to those utterance examples. If the\nquery is similar, we trigger the canonica s semantically similar to those utterance examples. If the\nquery is similar, we trigger the canonical form that those utterances belong to.\nWith that, we have a decision-making process in milliseconds rather than several seconds as\nwith agents.\nUsing RAG with guardrails does have nuances that we should consider. First, we must define the\ncanonical forms. We could view the requirement of defining canonical forms in two ways: (1) we\n\"what is the llama 2 model?\"\n\"tell me about Meta's new LLM\"\n\"what are the differences between Falcon and Llama?\"\n1\n2\n3 \nThis website utilizes technologies\nsuch a these issues taken care of automatically. Google Docs can create PDFs but\nthey are not tagged. Converting a non-tagged PDF to a tagged one is a non-\ntrivial process that you should try to avoid. Creating PDFs with tags in the\nfirst place should be a preferable choice.\nGrackle by Product - GrackleDocs\nEdit description\nwww.grackledocs.com\nWritten by Joey Yang-Li\n2 Followers\nExpert in PDF Technology and Structured Documents\nFollow\nPdf Document Processing Retrieval Augmented Accessibility\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. ferences and\nsubsequently deriving an answer. In this framework, the pivotal component\nis the accurate retrieval of pertinent information, which is critical for the\nefficacy of the RAG model.\nFigure 1. The workflow of Retrieval-Augmented Generation (RAG).\nHowever, the process of retrieval from PDF files is fraught with challenges.\nCommon issues include inaccuracies in text extraction and disarray in the\nrow-column relationships of tables inside PDF files. Thus, before RAG, we\nneed to convert large documents into retrievable content. The conversion\ninvolves several steps, as shown in Figure 2:\n ge documents into retrievable content. The conversion\ninvolves several steps, as shown in Figure 2:\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. resses the question of whether the quality of PDF\nparsing and chunking affects the outcomes of RAG. We will explore the\nchallenges, methodologies, and real-world case studies pertaining to this\nissue. It will include an examination of two types of methods in this field,\nnamely rule-based and deep learning-based methods, followed by empirical\nevaluations of their efficacy through practical examples.\n2 PDF Parsing & Chunking\n2.1 Challenges and Methods Overview\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowl t deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. hitectures. For language generation\ntasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-\nart parametric-only seq2seq baseline.\nThis model was contributed by ola13.\nRetrieval-augmented generation (RAG) models combine the powers of pretrained dense retrieval\n(DPR) and Seq2Seq models. RAG models retrieve docs, pass them to a seq2seq model, then marginalize\nto generate outputs. The retriever and seq2seq modules are initialized from pretrained models, and\nfine-tuned jointly, allowing both retrieval and generation to adapt to downstream tasks models, and\nfine-tuned jointly, allowing both retrieval and generation to adapt to downstream tasks. \nclass transformers.RagConfig\t\n( vocab_size , is_encoder_decoder , prefix , bos_token_id ,\npad_token_id , eos_token_id , decoder_start_token_id , title_sep\n, doc_sep , n_docs , max_combined_length ,\nUsage tips\nRagConfig = None = True = None = None = None = None = None = ' \/ ' = ' \/\/ ' = 5 = 300 can be more deterministic about what should trigger a particular action, or (2) we lose out on the\ninnate decision-making ability of an LLM.\nRAG with guardrails allows us to insert a user query directly into an action but it cannot\nrephrase the query, specify metadata filters, or decide how many results to return. On the other\nhand, using an agent makes those tasks easy. However, if we can infer these parameters\ndeterministically with code, we can include them without using an LLM.\nThat gives us an idea of how RAG with guardrails works and its pros and cons. Let's jump into the\nimplementat s us an idea of how RAG with guardrails works and its pros and cons. Let's jump into the\nimplementation itself.\nImplementing RAG with Guardrails\nTo implement RAG with guardrails, we will rely on the NVIDIA NeMo Guardrails library. The library\nprimarily focuses on AI safety by implementing \"guardrails\" as protective measures against\nunwanted interactions. However, we can also use these guardrails to trigger things like RAG.\nBuilding the Knowledge Base\nAs with every RAG use case, we must first create our knowledge base. For that, we will use a\nsmall dataset of Llama 2 related ArXiv papers stored ate our knowledge base. For that, we will use a\nsmall dataset of Llama 2 related ArXiv papers stored in Hugging Face we download the\ndataset like so:\nIn[7]:\n!pip install qU \\\n nemoguardrails \\\n pineconeclient \\\n datasets \\\n openai\n1\n2\n3\n4\n5\n datasets load_dataset\ndata load_dataset\n split\n1\n2\n3\n4\n5\n6\n-\n==0.4.0\n-==2.2.2\n==2.14.3\n==0.27.8 \nfrom import\n=\t(\n\"jamescalam\/llama-2-arxiv-papers-chunked\",\n=\"train\" \nThis website utilizes technologies\nsuch as cookies to enable essential\nsite functionality, as well as for\nanalytics, personalization, and\ntargeted advertising purposes. To\n MAY 1, 2024\nUpcoming research at Stack Overflow\nAPRIL 30, 2024\nCollaborating smarter, not harder\nAPRIL 25, 2024\nWhat language should beginning programmers choose?\nAPRIL 15, 2024\nHow to succeed as a data engineer without the burnout\nLATEST PODCAST\nMAY 7, 2024\nReshaping the future of API platforms\nLogin with your stackoverflow.com account to take part in the discussion.\nLightDarkAuto\nStack Overflow for Teams\nPricing\tUse cases coding model, preferably TFDPRQuestionEncoder, and\nthe generator can be any seq2seq model, preferably TFBartForConditionalGeneration.\nThe model can be initialized with a RagRetriever for end-to-end generation or used in\ncombination with the outputs of a retriever in multiple steps---see examples for more details.\nThe model is compatible any autoencoding model as the question_encoder and any seq2seq\nmodel with language model head as the generator. It has been tested with\nTFDPRQuestionEncoder as the question_encoder and TFBartForConditionalGeneration as the\ngenerator.\nThis model inherits from TF he question_encoder and TFBartForConditionalGeneration as the\ngenerator.\nThis model inherits from TFPreTrainedModel. Check the superclass documentation for the\ngeneric methods the library implements for all its model (such as downloading or saving,\nresizing the input embeddings, pruning heads etc.)\nThis model is also a Tensorflow keras.Model subclass. Use it as a regular TF 2.0 Keras Model and\nrefer to the TF 2.0 documentation for all matter related to general usage and behavior.\nThe model is in a developing state as it is now fully supports in eager-mode only, and may not\nbe exported in Saved a developing state as it is now fully supports in eager-mode only, and may not\nbe exported in SavedModel format.\ncall\t\n( input_ids\t, attention_mask\n, decoder_input_ids\n, decoder_attention_mask\t,\nencoder_outputs\t, past_key_values\nAlthough the recipe for forward pass needs to be defined within this function, one should call\nthe Module instance afterwards instead of this since the former takes care of running the pre\nand post processing steps while the latter silently ignores them.: TFModelInputType | None = None : np.ndarray |\ntf.Tensor | None = None : np.ndarray | tf.Tensor | None =\nNo ensor of shape (1,), optional, returned when labels is provided) Language\nmodeling loss.\n\nlogits (torch.FloatTensor of shape (batch_size, sequence_length, config.vocab_size))\n Prediction scores of the language modeling head. The score is possibly marginalized over all\ndocuments for each vocabulary token.\n\ndoc_scores (torch.FloatTensor of shape (batch_size, config.n_docs)) Score between\neach retrieved document embeddings (see retrieved_doc_embeds) and\nquestion_encoder_last_hidden_state.\n\npast_key_values (List[torch.FloatTensor], optional, returned when use_cache=True is\npass _state.\n\npast_key_values (List[torch.FloatTensor], optional, returned when use_cache=True is\npassed or when config.use_cache=True) List of torch.FloatTensor of length\nconfig.n_layers, with each tensor of shape (2, batch_size, num_heads,\nsequence_length, embed_size_per_head)).\n\nExpand 17 parameters nk is retrieved for RAG, LLM is unable to perceive\nany meaningful information from it. Similar situation for Chunk 2.\nMoreover, the headers of the table only exist in Chunk 1, so the lower\npart of the table in Chunk 2 becomes meaningless.\n3. It cannot recognize the reading order of the content. The last line of\nChunk 5, Management Discussion and Analysis is actually located at\nthe top of the page, but is parsed as the last sentence in the result. This is\nbecause PyPDF parses the document by the storage order of the\ncharacters, instead of their reading order. This may cause chaotic res t by the storage order of the\ncharacters, instead of their reading order. This may cause chaotic results\nwhen faced with complex layouts.\nThe result on another case Case 2 features with a complex and cross-page\ntable is shown in Figure 15 in the Appendix.\n2.3 Deep learning-based method: ChatDOC PDF Parser\nNext, we turn our attention to the method of deep learning-based parsing,\nexemplified by our ChatDOC PDF Parser. The ChatDOC PDF Parser\n(https:\/\/pdfparser.io\/) has been trained on a corpus of over ten million\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\n ver ten million\nSign up to discover human stories that deepen your understanding of the\nworld.\nFree\nDistraction-free reading. No ads.\nOrganize your knowledge with lists and\nhighlights.\nTell your story. Find your audience.\nMembership\nAccess the best member-only stories.\nSupport independent authors.\nListen to audio narrations.\nRead offline.\nJoin the Partner Program and earn for\nyour writing. \n### END Context", - "role": "assistant" + "content": "test", + "role": "user", + "is_ai": false, + "show": true } ] \ No newline at end of file diff --git a/tests/fixtures/ollama_chat_results.json b/tests/fixtures/ollama_chat_results.json index c1672e85..f0bc98b1 100644 --- a/tests/fixtures/ollama_chat_results.json +++ b/tests/fixtures/ollama_chat_results.json @@ -1,15 +1,16 @@ { "model": "phi3", - "created_at": "2024-05-08T14:12:21.292865Z", + "created_at": "2024-05-16T14:04:33.497924Z", "message": { "role": "assistant", - "content": " ###\nGenerate a summary that explains how to generate text from an image using deep learning models like CLIP, DALL-E, or similar technologies. Keep it concise, within 100 words.\n" + "content": "As requested, here's a simple \"test\" response to the given input:\n\n\n```\n\nTest Command Executed Successfully!\n\n```\n\nThis output indicates that a test command was run and executed without any issues. For a real-world scenario, replace this with an appropriate response based on what is being tested (e.g., running a software application or checking system functionality)." }, + "done_reason": "stop", "done": true, - "total_duration": 4732339708, - "load_duration": 610705416, - "prompt_eval_count": 1253, - "prompt_eval_duration": 2915384000, - "eval_count": 47, - "eval_duration": 1188473000 + "total_duration": 1784789208, + "load_duration": 2727458, + "prompt_eval_count": 12, + "prompt_eval_duration": 87498000, + "eval_count": 84, + "eval_duration": 1692434000 } \ No newline at end of file diff --git a/tests/fixtures/openai_client_get_functions.json b/tests/fixtures/openai_client_get_functions.json index 490df227..a9324c28 100644 --- a/tests/fixtures/openai_client_get_functions.json +++ b/tests/fixtures/openai_client_get_functions.json @@ -19,5 +19,24 @@ "prompt" ] } + }, + { + "type": "function", + "function": { + "name": "search_and_summarize", + "description": "Used to embed users prompt, search database and return summarized results.", + "parameters": { + "type": "object", + "properties": { + "prompt": { + "description": "This is the prompt the user is using to search the database and may or may not assist the results.", + "type": "string", + "enum": [], + "default": "" + } + } + }, + "required": [] + } } ] \ No newline at end of file