switching computers going to leave it for the day gotta get these pro…

…mpts right then I can verify the chunking is good
LlmLaraHub · May 8, 2024 · 8c70f0f · 8c70f0f
1 parent f36ed97
commit 8c70f0f
Show file tree

Hide file tree

Showing 17 changed files with 162 additions and 110 deletions.
diff --git a/Modules/LlmDriver/app/Functions/SearchAndSummarize.php b/Modules/LlmDriver/app/Functions/SearchAndSummarize.php
@@ -4,6 +4,7 @@
 
 use App\Domains\Agents\VerifyPromptInputDto;
 use App\Domains\Messages\RoleEnum;
+use App\Domains\Prompts\SummarizePrompt;
 use Facades\App\Domains\Agents\VerifyResponseAgent;
 use Facades\LlmLaraHub\LlmDriver\DistanceQuery;
 use Illuminate\Support\Facades\Log;
@@ -80,20 +81,10 @@ public function handle(
 
         $context = implode(' ', $content);
 
-        $contentFlattened = <<<PROMPT
-You are a helpful assistant in the Retrieval augmented generation system (RAG - an architectural approach that can improve the efficacy of large language model (LLM) applications by leveraging custom data) system: 
-This is data from the search results when entering the users prompt which is 
-
-### START PROMPT 
-{$originalPrompt} 
-### END PROMPT
-
-Please use this with the following context and only this, summarize it for the user and return as markdown so I can render it and strip out and formatting like extra spaces, tabs, periods etc: 
-
-### START Context
-$context
-### END Context
-PROMPT;
+        $contentFlattened = SummarizePrompt::prompt(
+            originalPrompt: $originalPrompt,
+            context: $context
+        );
 
         $model->getChat()->addInput(
             message: $contentFlattened,
@@ -102,7 +93,7 @@ public function handle(
             show_in_thread: false
         );
 
-        Log::info('[LaraChain] Getting the Summary from the search results', [
+        Log::info('[LaraChain] Getting the Search and Summary results', [
             'input' => $contentFlattened,
             'driver' => $model->getChat()->chatable->getDriver(),
         ]);

diff --git a/Modules/TagFunction/app/TagManager.php b/Modules/TagFunction/app/TagManager.php
@@ -18,6 +18,10 @@ class TagManager
 
     public function handle(Document $document): void
     {
+        if(!$document->summary) {
+            return;
+        }
+
         Log::info('[LaraChain] TagManager Tagging document');
         $summary = $document->summary;
         $prompt = <<<EOT

diff --git a/app/Domains/Agents/VerifyResponseAgent.php b/app/Domains/Agents/VerifyResponseAgent.php
@@ -2,6 +2,7 @@
 
 namespace App\Domains\Agents;
 
+use App\Domains\Prompts\VerificationPrompt;
 use Illuminate\Support\Facades\Log;
 use LlmLaraHub\LlmDriver\LlmDriverFacade;
 use LlmLaraHub\LlmDriver\Responses\CompletionResponse;
@@ -17,30 +18,7 @@ public function verify(VerifyPromptInputDto $input): VerifyPromptOutputDto
         $llmResponse = $input->llmResponse;
         $verifyPrompt = $input->verifyPrompt;
 
-        $prompt = <<<EOT
-As a Data Integrity Officer please review the following and return only what remains after you clean it up.
-DO NOT include text like "Here is the cleaned-up response" the user should not even know your step happened in the process.
-DO NOT get an information outside of this context.
-Just return the text as if answering the intial users prompt "ORIGINAL PROMPT"
-Using the CONTEXT make sure the LLM RESPONSE is accurent and just clean it up if not.
-
-$verifyPrompt
-
-
-### START ORIGINAL PROMPT 
-$originalPrompt
-### END ORIGINAL PROMPT
-
-### START CONTEXT
-$context
-### END CONTEXT
-
-### START LLM RESPONSE
-$llmResponse
-### END LLM RESPONSE
-
-
-EOT;
+        $prompt = VerificationPrompt::prompt($llmResponse, $context);
 
         Log::info('[LaraChain] VerifyResponseAgent::verify', [
             'prompt' => $prompt,

diff --git a/app/Domains/Documents/Transformers/PdfTransformer.php b/app/Domains/Documents/Transformers/PdfTransformer.php
@@ -52,7 +52,6 @@ public function handle(Document $document): Document
 
                     $chunks[] = [
                         new VectorlizeDataJob($DocumentChunk),
-                        new SummarizeDataJob($DocumentChunk),
                     ];
 
                 }

diff --git a/app/Domains/Documents/Transformers/PowerPointTransformer.php b/app/Domains/Documents/Transformers/PowerPointTransformer.php
@@ -55,7 +55,6 @@ public function handle(Document $document): array
 
                 $chunks[] = [
                     new VectorlizeDataJob($DocumentChunk),
-                    new SummarizeDataJob($DocumentChunk),
                 ];
             }
 

diff --git a/app/Domains/Messages/SearchAndSummarizeChatRepo.php b/app/Domains/Messages/SearchAndSummarizeChatRepo.php
@@ -4,6 +4,7 @@
 
 use App\Domains\Agents\VerifyPromptInputDto;
 use App\Domains\Agents\VerifyPromptOutputDto;
+use App\Domains\Prompts\SummarizePrompt;
 use App\Models\Chat;
 use App\Models\DocumentChunk;
 use Facades\App\Domains\Agents\VerifyResponseAgent;
@@ -58,21 +59,11 @@ public function search(Chat $chat, string $input): string
 
         $context = implode(' ', $content);
 
-        $contentFlattened = <<<PROMPT
-You are a helpful assistant in the Retrieval augmented generation system (RAG - an architectural approach that can improve the efficacy of large language model (LLM) applications by leveraging custom data): 
-This is data from the search results when entering the users prompt which is 
-
-
-### START PROMPT 
-$originalPrompt
-### END PROMPT
-
-Please use this with the following context and only this, summarize it for the user and return as markdown so I can render it and strip out and formatting like extra spaces, tabs, periods etc: 
+        $contentFlattened = SummarizePrompt::prompt(
+            originalPrompt: $originalPrompt,
+            context: $context
+        );
 
-### START Context
-$context
-### END Context
-PROMPT;
 
         $chat->addInput(
             message: $contentFlattened,
@@ -95,6 +86,10 @@ public function search(Chat $chat, string $input): string
             $chat->chatable->getDriver()
         )->completion($contentFlattened);
 
+        Log::info('[LaraChain] Summary Results before verification', [
+            'response' => $response->content,
+        ]);
+
         /**
          * Lets Verify
          */

diff --git a/app/Domains/Prompts/SummarizePrompt.php b/app/Domains/Prompts/SummarizePrompt.php
@@ -0,0 +1,42 @@
+<?php 
+
+namespace App\Domains\Prompts;
+
+
+class SummarizePrompt {
+
+    public static function prompt(string $originalPrompt, string $context) : string {
+  return <<<PROMPT
+  # **Role, Task, Format (R.T.F)**
+  **Role**: You are the core Agent of the Retrieval Augmented Generation system (RAG). Your primary role is to respond to user queries accurately by interpreting and synthesizing relevant information from provided documents.
+  **Task**: Prioritize the user’s query to guide your response, using the context from the documents to support and inform your answer.
+  **Format**: Provide responses in Markdown format that directly address the user’s question, supplemented by relevant information extracted from the context.
+  
+  # **Context, Action, Result, Example (C.A.R.E)**
+  **Context**: The text provided is a scientific article discussing Lyme borreliosis in Europe.
+  **Action**: Identify the user’s specific query and use key points from the article to construct a response that directly addresses this query, providing additional insights where relevant.
+  **Result**: A tailored response that directly answers the user's question, supported by accurate and pertinent information from the context.
+  **Example**: If the user asks about the effectiveness of the vaccine, focus your response on vaccine outcomes and supporting data mentioned in the article.
+  
+  # **Before, After, Bridge (B.A.B)**
+  **Before**: The user has a question that may require background information or specific details from a larger document.
+  **After**: The user receives a concise, informative answer that directly addresses their question, using the context to enhance the response.
+  **Bridge**: By analyzing the user's query and linking it with relevant information from the document, you bridge the gap between the user's need for specific information and the comprehensive details available in the context.
+  
+  # **Task, Action, Goal (T.A.G)**
+  **Task**: Directly respond to the user’s query.
+  **Action**: Use the document's context to inform and support your response, ensuring it is relevant and comprehensive.
+  **Goal**: Deliver an answer that satisfies the user's inquiry and provides them with a deeper understanding of the topic based on the provided document.
+  
+  ---
+
+
+**The User's Query**:
+$originalPrompt
+
+**Context from the database search of documents for Response**:
+$context
+
+PROMPT;
+    }
+}
diff --git a/app/Domains/Prompts/VerificationPrompt.php b/app/Domains/Prompts/VerificationPrompt.php
@@ -0,0 +1,42 @@
+<?php 
+
+namespace App\Domains\Prompts;
+
+
+class VerificationPrompt {
+
+    public static function prompt(string $llmResponse, string $context) : string {
+  return <<<PROMPT
+  # **Role, Task, Format (R.T.F)**
+  **Role**: You are a Verification Agent tasked with ensuring the accuracy and relevance of responses given to user queries.
+  **Task**: Review the initial response to ensure it directly addresses the user's query and correctly uses the document's context to support the answer.
+  **Format**: Provide feedback in a concise format, noting any discrepancies or areas for enhancement.
+  
+  # **Context, Action, Result, Example (C.A.R.E)**
+  **Context**: The initial response was crafted to answer a specific query using the context from a scientific article.
+  **Action**: Verify that the response adequately addresses the user's question and that the information from the context is accurately and effectively integrated.
+  **Result**: A confirmation that the response is accurate and fulfills the user's informational needs, or a correction if discrepancies are found.
+  **Example**: Ensure that the facts used to support the response are correctly interpreted from the article and that the user’s query is the central focus.
+  
+  # **Before, After, Bridge (B.A.B)**
+  **Before**: There may be concerns about the accuracy or relevance of the initial response.
+  **After**: The user receives a verified answer that is both accurate and highly relevant to their query.
+  **Bridge**: By critically reviewing the initial response and making necessary corrections, you ensure the integrity and usefulness of the information provided to the user.
+  
+  # **Task, Action, Goal (T.A.G)**
+  **Task**: Confirm the relevance and accuracy of the initial response.
+  **Action**: Scrutinize the response against the user's query and the document's context.
+  **Goal**: Provide assurance or necessary corrections to ensure the response adequately addresses the user's query with accurate support from the context.
+  
+  ---
+  
+  **Initial Response for Verification**:
+  $llmResponse
+
+  
+  **Context of data used in above response**:
+  $context
+          
+  PROMPT;
+    }
+}
diff --git a/app/Http/Controllers/TextDocumentController.php b/app/Http/Controllers/TextDocumentController.php
@@ -54,15 +54,9 @@ public function store(Collection $collection, Request $request)
                         'sort_order' => $page_number,
                     ]
                 );
-                /**
-                 * Soon taggings
-                 * And Summary
-                 */
+
                 $jobs[] = [
                     new VectorlizeDataJob($DocumentChunk),
-                    new SummarizeDataJob($DocumentChunk),
-                    //new TagDataJob($this->document),
-                    //then mark it all as done and notify the ui
                 ];
 
                 CollectionStatusEvent::dispatch($document->collection, CollectionStatusEnum::PROCESSING);

diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php
@@ -122,7 +122,6 @@ public function handle(): void
 
                 $this->batch()->add([
                     new VectorlizeDataJob($DocumentChunk),
-                    new SummarizeDataJob($DocumentChunk),
                 ]);
 
                 $page_number++;

diff --git a/app/Jobs/SummarizeDataJob.php b/app/Jobs/SummarizeDataJob.php
@@ -13,6 +13,7 @@
 use Illuminate\Foundation\Bus\Dispatchable;
 use Illuminate\Queue\InteractsWithQueue;
 use Illuminate\Queue\SerializesModels;
+use Illuminate\Support\Facades\Log;
 use LlmLaraHub\LlmDriver\Helpers\JobMiddlewareTrait;
 use LlmLaraHub\LlmDriver\LlmDriverFacade;
 use LlmLaraHub\LlmDriver\Responses\CompletionResponse;
@@ -42,50 +43,58 @@ public function middleware(): array
      */
     public function handle(): void
     {
-        if (optional($this->batch())->cancelled()) {
-            // Determine if the batch has been cancelled...
+        try {
+            if (optional($this->batch())->cancelled()) {
+                // Determine if the batch has been cancelled...
+                $this->documentChunk->update([
+                    'status_summary' => StatusEnum::Cancelled,
+                ]);
+
+                return;
+            }
+
+            $content = $this->documentChunk->content;
+            $prompt = <<<EOD
+    The following content is part of a larger document. I would like you to summarize it so 
+    I can show a summary view of all the other pages and this ones related to the same document.
+    Just return the summary, 1-2 lines if possible and no extra surrounding text.
+    The content to summarize follows:
+    
+    $content
+    EOD;
+
+            /** @var CompletionResponse $results */
+            $results = LlmDriverFacade::driver(
+                $this->documentChunk->getDriver()
+            )->completion($prompt);
+
+            $verifyPrompt = <<<'PROMPT'
+            This the content from a chunk of data in a document.
+            Can you verify the summary is correct?
+            PROMPT;
+
+            $dto = VerifyPromptInputDto::from(
+                [
+                    'chattable' => $this->documentChunk,
+                    'originalPrompt' => $prompt,
+                    'context' => $content,
+                    'llmResponse' => $results->content,
+                    'verifyPrompt' => $verifyPrompt,
+                ]
+            );
+
+            /** @var VerifyPromptOutputDto $response */
+            $response = VerifyResponseAgent::verify($dto);
+
             $this->documentChunk->update([
-                'status_summary' => StatusEnum::Cancelled,
+                'summary' => $response->response,
+                'status_summary' => StatusEnum::Complete,
+            ]);
+        } catch(\Exception $e) {
+            Log::error('SummarizeDataJob Error', [
+                'message' => $e->getMessage(),
             ]);
-
             return;
         }
-        $content = $this->documentChunk->content;
-        $prompt = <<<EOD
-The following content is part of a larger document. I would like you to summarize it so 
-I can show a summary view of all the other pages and this ones related to the same document.
-Just return the summary, 1-2 lines if possible and no extra surrounding text.
-The content to summarize follows:
-
-{$content}
-EOD;
-
-        /** @var CompletionResponse $results */
-        $results = LlmDriverFacade::driver(
-            $this->documentChunk->getDriver()
-        )->completion($prompt);
-
-        $verifyPrompt = <<<'PROMPT'
-        This the content from a chunk of data in a document.
-        Can you verify the summary is correct?
-        PROMPT;
-
-        $dto = VerifyPromptInputDto::from(
-            [
-                'chattable' => $this->documentChunk,
-                'originalPrompt' => $prompt,
-                'context' => $content,
-                'llmResponse' => $results->content,
-                'verifyPrompt' => $verifyPrompt,
-            ]
-        );
-
-        /** @var VerifyPromptOutputDto $response */
-        $response = VerifyResponseAgent::verify($dto);
-
-        $this->documentChunk->update([
-            'summary' => $response->response,
-            'status_summary' => StatusEnum::Complete,
-        ]);
     }
 }
diff --git a/resources/js/Components/TextArea.vue b/resources/js/Components/TextArea.vue
@@ -15,7 +15,7 @@ const input = ref(null);
 <template>
     <textarea
         ref="input"
-        class="border-gray-300 focus:border-indigo-500 focus:ring-indigo-500 rounded-md shadow-sm text-gray-800 dark:text-gray-200"
+        class="border-gray-300 focus:border-indigo-500 focus:ring-indigo-500 rounded-md shadow-sm text-gray-800 dark:text-gray-400"
         :value="modelValue"
         @input="$emit('update:modelValue', $event.target.value)"
     >

diff --git a/resources/js/Components/TextInput.vue b/resources/js/Components/TextInput.vue
@@ -21,7 +21,7 @@ defineExpose({ focus: () => input.value.focus() });
 <template>
     <input
         ref="input"
-        class="border-gray-300 focus:border-indigo-500 focus:ring-indigo-500 rounded-md shadow-sm text-gray-800 dark:text-gray-200"
+        class="border-gray-300 focus:border-indigo-500 focus:ring-indigo-500 rounded-md shadow-sm text-gray-800 dark:text-gray-400"
         :value="modelValue"
         @input="$emit('update:modelValue', $event.target.value)"
     >

diff --git a/resources/js/Pages/Collection/Components/ResourceForm.vue b/resources/js/Pages/Collection/Components/ResourceForm.vue
@@ -1,5 +1,5 @@
 <template>
-    <div class="grid grid-cols-6 gap-4 text-gray-900 dark:text-gray-200">
+    <div class="grid grid-cols-6 gap-4 text-gray-900 dark:text-gray-400">
 
         <div class="col-span-6 sm:col-span-6">
             <InputLabel value="Name" />
-Original file line number
+Diff line change
@@ Expand Up / @@ -52,7 +52,6 @@ public function handle(Document $document): Document @@
                         $chunks[] = [
                             new VectorlizeDataJob($DocumentChunk),
-                            new SummarizeDataJob($DocumentChunk),
                         ];
                     }
@@ Expand Down @@