From 161a616c4e21ecadcba0b1e4bd04f335f4981780 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Thu, 16 May 2024 09:09:26 -0400 Subject: [PATCH] move to shared query ability --- .../app/NonFunctionSearchOrSummarize.php | 156 ++++++++++++++++++ .../Controllers/WebPageOutputController.php | 128 +------------- .../WebPageOutputControllerTest.php | 52 +----- .../NonFunctionSearchOrSummarizeTest.php | 93 +++++++++++ 4 files changed, 257 insertions(+), 172 deletions(-) create mode 100644 Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php create mode 100644 tests/Feature/NonFunctionSearchOrSummarizeTest.php diff --git a/Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php b/Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php new file mode 100644 index 00000000..c373c277 --- /dev/null +++ b/Modules/LlmDriver/app/NonFunctionSearchOrSummarize.php @@ -0,0 +1,156 @@ + $collection->id, + 'input' => $input + ]); + + $prompt = SearchOrSummarize::prompt($input); + + $response = LlmDriverFacade::driver( + $collection->getDriver() + )->completion($prompt); + + Log::info('[LaraChain] - Results from search or summarize', [ + 'results' => $response->content, + ]); + + if (str($response->content)->contains('search')) { + Log::info('[LaraChain] - LLM Thinks it is Search', [ + 'response' => $response->content] + ); + + $embedding = LlmDriverFacade::driver( + $collection->getEmbeddingDriver() + )->embedData($input); + + $embeddingSize = get_embedding_size($collection->getEmbeddingDriver()); + + //put_fixture("anonymous_embedding_result.json", $embedding); + $documentChunkResults = DistanceQuery::distance( + $embeddingSize, + $collection->id, + $embedding->embedding + ); + + $content = []; + + /** @var DocumentChunk $result */ + foreach ($documentChunkResults as $result) { + $contentString = remove_ascii($result->content); + $content[] = $contentString; //reduce_text_size seem to mess up Claude? + } + + $context = implode(' ', $content); + + Log::info('[LaraChain] - Content Found', [ + 'content' => $content, + ]); + + $contentFlattened = SummarizePrompt::prompt( + originalPrompt: $input, + context: $context + ); + + Log::info('[LaraChain] - Prompt with Context', [ + 'prompt' => $contentFlattened, + ]); + + $response = LlmDriverFacade::driver( + $collection->getDriver() + )->completion($contentFlattened); + + $this->results = $response->content; + } elseif (str($response->content)->contains('summarize')) { + Log::info('[LaraChain] - LLM Thinks it is summarize', [ + 'response' => $response->content] + ); + + $content = []; + + foreach ($collection->documents as $result) { + $contentString = remove_ascii($result->summary); + $content[] = $contentString; //reduce_text_size seem to mess up Claude? + } + + $contentFlattened = implode(' ', $content); + + Log::info('[LaraChain] - Documents Flattened', [ + 'collection' => $collection->id, + 'content' => $content] + ); + + $prompt = SummarizeDocumentPrompt::prompt($contentFlattened); + + $response = LlmDriverFacade::driver( + $collection->getDriver() + )->completion($prompt); + + + $this->results = $response->content; + } else { + Log::info('[LaraChain] - LLM is not sure :(', [ + 'response' => $response->content] + ); + + $embedding = LlmDriverFacade::driver( + $collection->getEmbeddingDriver() + )->embedData($input); + + $embeddingSize = get_embedding_size($collection->getEmbeddingDriver()); + + $documentChunkResults = DistanceQuery::distance( + $embeddingSize, + $collection->id, + $embedding->embedding + ); + + $content = []; + + /** @var DocumentChunk $result */ + foreach ($documentChunkResults as $result) { + $contentString = remove_ascii($result->content); + $content[] = $contentString; //reduce_text_size seem to mess up Claude? + } + + $context = implode(' ', $content); + + Log::info('[LaraChain] - Content Found', [ + 'content' => $content, + ]); + + $contentFlattened = DefaultPrompt::prompt( + originalPrompt: $input, + context: $context + ); + + $response = LlmDriverFacade::driver( + $collection->getDriver() + )->completion($contentFlattened); + + + $this->results = $response->content; + + } + + return $this->results; + } +} diff --git a/app/Http/Controllers/WebPageOutputController.php b/app/Http/Controllers/WebPageOutputController.php index d0c9850f..5f2d2fa3 100644 --- a/app/Http/Controllers/WebPageOutputController.php +++ b/app/Http/Controllers/WebPageOutputController.php @@ -20,6 +20,7 @@ use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\Helpers\TrimText; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use Facades\LlmLaraHub\LlmDriver\NonFunctionSearchOrSummarize; use LlmLaraHub\LlmDriver\Requests\MessageInDto; class WebPageOutputController extends Controller @@ -85,132 +86,9 @@ public function chat(Output $output) 'message' => $validated['input']] ); - $prompt = SearchOrSummarize::prompt($input); + $results = NonFunctionSearchOrSummarize::handle($input, $output->collection); - $response = LlmDriverFacade::driver( - $output->collection->getDriver() - )->completion($prompt); - - Log::info('[LaraChain] - Results from search or summarize', [ - 'results' => $response->content, - ]); - - if (str($response->content)->contains('search')) { - Log::info('[LaraChain] - LLM Thinks it is Search', [ - 'response' => $response->content] - ); - - $embedding = LlmDriverFacade::driver( - $output->collection->getEmbeddingDriver() - )->embedData($input); - - $embeddingSize = get_embedding_size($output->collection->getEmbeddingDriver()); - - //put_fixture("anonymous_embedding_result.json", $embedding); - $documentChunkResults = DistanceQuery::distance( - $embeddingSize, - $output->collection->id, - $embedding->embedding - ); - - $content = []; - - /** @var DocumentChunk $result */ - foreach ($documentChunkResults as $result) { - $contentString = remove_ascii($result->content); - $content[] = $contentString; //reduce_text_size seem to mess up Claude? - } - - $context = implode(' ', $content); - - Log::info('[LaraChain] - Content Found', [ - 'content' => $content, - ]); - - $contentFlattened = SummarizePrompt::prompt( - originalPrompt: $input, - context: $context - ); - - Log::info('[LaraChain] - Prompt with Context', [ - 'prompt' => $contentFlattened, - ]); - - $response = LlmDriverFacade::driver( - $output->collection->getDriver() - )->completion($contentFlattened); - - $this->setChatMessages($response->content, 'assistant'); - - } elseif (str($response->content)->contains('summarize')) { - Log::info('[LaraChain] - LLM Thinks it is summarize', [ - 'response' => $response->content] - ); - - $content = []; - - foreach ($output->collection->documents as $result) { - $contentString = remove_ascii($result->summary); - $content[] = $contentString; //reduce_text_size seem to mess up Claude? - } - - $contentFlattened = implode(' ', $content); - - Log::info('[LaraChain] - Documents Flattened', [ - 'collection' => $output->collection_id, - 'content' => $content] - ); - - $prompt = SummarizeDocumentPrompt::prompt($contentFlattened); - - $response = LlmDriverFacade::driver( - $output->collection->getDriver() - )->completion($prompt); - - $this->setChatMessages($response->content, 'assistant'); - } else { - Log::info('[LaraChain] - LLM is not sure :(', [ - 'response' => $response->content] - ); - - $embedding = LlmDriverFacade::driver( - $output->collection->getEmbeddingDriver() - )->embedData($input); - - $embeddingSize = get_embedding_size($output->collection->getEmbeddingDriver()); - - $documentChunkResults = DistanceQuery::distance( - $embeddingSize, - $output->collection->id, - $embedding->embedding - ); - - $content = []; - - /** @var DocumentChunk $result */ - foreach ($documentChunkResults as $result) { - $contentString = remove_ascii($result->content); - $content[] = $contentString; //reduce_text_size seem to mess up Claude? - } - - $context = implode(' ', $content); - - Log::info('[LaraChain] - Content Found', [ - 'content' => $content, - ]); - - $contentFlattened = DefaultPrompt::prompt( - originalPrompt: $input, - context: $context - ); - - $response = LlmDriverFacade::driver( - $output->collection->getDriver() - )->completion($contentFlattened); - - $this->setChatMessages($response->content, 'assistant'); - - } + $this->setChatMessages($results, 'assistant'); return back(); } diff --git a/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php b/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php index 0f3950b1..b757050c 100644 --- a/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php +++ b/tests/Feature/Http/Controllers/WebPageOutputControllerTest.php @@ -9,6 +9,7 @@ use App\Models\User; use Facades\LlmLaraHub\LlmDriver\DistanceQuery; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use Facades\LlmLaraHub\LlmDriver\NonFunctionSearchOrSummarize; use LlmLaraHub\LlmDriver\Responses\CompletionResponse; use LlmLaraHub\LlmDriver\Responses\EmbeddingsResponseDto; use Pgvector\Laravel\Vector; @@ -65,34 +66,12 @@ public function test_chat_summarize() public function test_chat_search() { - - $documentChunk = DocumentChunk::factory()->create(); - - DistanceQuery::shouldReceive('distance')->once()->andReturn(DocumentChunk::all()); - $output = Output::factory()->create([ 'active' => true, 'public' => true, ]); - - $question = get_fixture('embedding_question_distance.json'); - - $vector = new Vector($question); - - LlmDriverFacade::shouldReceive('driver->embedData') - ->once() - ->andReturn(EmbeddingsResponseDto::from( - [ - 'embedding' => $vector, - 'token_count' => 2, - ] - )); - - LlmDriverFacade::shouldReceive('driver->completion') - ->twice() - ->andReturn(CompletionResponse::from([ - 'content' => 'search', - ])); + NonFunctionSearchOrSummarize::shouldReceive('handle') + ->once()->andReturn("Foo"); $this->post(route( 'collections.outputs.web_page.chat', [ @@ -105,35 +84,14 @@ public function test_chat_search() public function test_no_search_no_summary() { - - DocumentChunk::factory()->create(); - - DistanceQuery::shouldReceive('distance')->once()->andReturn(DocumentChunk::all()); + NonFunctionSearchOrSummarize::shouldReceive('handle') + ->once()->andReturn("Foo"); $output = Output::factory()->create([ 'active' => true, 'public' => true, ]); - $question = get_fixture('embedding_question_distance.json'); - - $vector = new Vector($question); - - LlmDriverFacade::shouldReceive('driver->embedData') - ->once() - ->andReturn(EmbeddingsResponseDto::from( - [ - 'embedding' => $vector, - 'token_count' => 2, - ] - )); - - LlmDriverFacade::shouldReceive('driver->completion') - ->twice() - ->andReturn(CompletionResponse::from([ - 'content' => 'not sure :(', - ])); - $this->post(route( 'collections.outputs.web_page.chat', [ 'output' => $output->id, diff --git a/tests/Feature/NonFunctionSearchOrSummarizeTest.php b/tests/Feature/NonFunctionSearchOrSummarizeTest.php new file mode 100644 index 00000000..7381627a --- /dev/null +++ b/tests/Feature/NonFunctionSearchOrSummarizeTest.php @@ -0,0 +1,93 @@ +create(); + + DistanceQuery::shouldReceive('distance')->once()->andReturn(DocumentChunk::all()); + + $output = Output::factory()->create([ + 'active' => true, + 'public' => true, + ]); + + $question = get_fixture('embedding_question_distance.json'); + + $vector = new Vector($question); + + LlmDriverFacade::shouldReceive('driver->embedData') + ->once() + ->andReturn(EmbeddingsResponseDto::from( + [ + 'embedding' => $vector, + 'token_count' => 2, + ] + )); + + LlmDriverFacade::shouldReceive('driver->completion') + ->twice() + ->andReturn(CompletionResponse::from([ + 'content' => 'search', + ])); + + $results = (new NonFunctionSearchOrSummarize())->handle("Search for foo", $output->collection); + + $this->assertNotNull($results); + } + + + public function test_no_search_no_summary() { + + DocumentChunk::factory()->create(); + + DistanceQuery::shouldReceive('distance')->once()->andReturn(DocumentChunk::all()); + + $output = Output::factory()->create([ + 'active' => true, + 'public' => true, + ]); + + $question = get_fixture('embedding_question_distance.json'); + + $vector = new Vector($question); + + LlmDriverFacade::shouldReceive('driver->embedData') + ->once() + ->andReturn(EmbeddingsResponseDto::from( + [ + 'embedding' => $vector, + 'token_count' => 2, + ] + )); + + LlmDriverFacade::shouldReceive('driver->completion') + ->twice() + ->andReturn(CompletionResponse::from([ + 'content' => 'not sure :(', + ])); + + $results = (new NonFunctionSearchOrSummarize())->handle("Search for foo", $output->collection); + + $this->assertNotNull($results); + + } +}