From 4651e0ebfa68424662c3ed894260b841fbd18ce2 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Fri, 10 May 2024 22:01:29 -0400 Subject: [PATCH] Trying to simplify the query --- Modules/LlmDriver/app/DistanceQuery.php | 37 ++----------------- .../Pages/Chat/Components/ReferenceTable.vue | 6 +-- tests/Feature/GenericSiteSpiderTest.php | 1 + tests/Feature/WebSearchSourceTest.php | 4 +- 4 files changed, 9 insertions(+), 39 deletions(-) diff --git a/Modules/LlmDriver/app/DistanceQuery.php b/Modules/LlmDriver/app/DistanceQuery.php index 81674cb4..1ffacb60 100644 --- a/Modules/LlmDriver/app/DistanceQuery.php +++ b/Modules/LlmDriver/app/DistanceQuery.php @@ -34,42 +34,15 @@ public function distance( $documentIds = Document::query() ->select('id') ->where('documents.collection_id', $collectionId) + ->orderBy('id') ->pluck('id'); $commonQuery = DocumentChunk::query() + ->orderBy('sort_order') + ->orderBy('section_number') ->whereIn('document_id', $documentIds); - // Find nearest neighbors using L2 distance - $documentChunkResults = $commonQuery - ->nearestNeighbors($embeddingSize, $embedding, Distance::L2) - ->take(5) - ->get(); - - // Get IDs of the nearest neighbors found 5 - $nearestNeighborIds = $documentChunkResults->pluck('id')->toArray(); - Log::info('[LaraChain] Nearest Neighbor IDs', [ - 'count' => count($nearestNeighborIds), - 'ids' => $nearestNeighborIds, - ]); - // Find nearest neighbors using InnerProduct distance - $neighborsInnerProduct = $commonQuery - ->whereNotIn('document_chunks.id', $nearestNeighborIds) - ->nearestNeighbors($embeddingSize, $embedding, Distance::InnerProduct) - ->get(); - - // Find nearest neighbors using Cosine distance found 0 - $neighborsInnerProductIds = $neighborsInnerProduct->pluck('id')->toArray(); - - Log::info('[LaraChain] Nearest Neighbor Inner Product IDs', [ - 'count' => count($neighborsInnerProductIds), - 'ids' => $neighborsInnerProductIds, - ]); - $neighborsCosine = $commonQuery - ->whereNotIn('id', $nearestNeighborIds) - ->when(! empty($neighborsInnerProductIds), function ($query) use ($neighborsInnerProductIds) { - return $query->whereNotIn('id', $neighborsInnerProductIds); - }) ->nearestNeighbors($embeddingSize, $embedding, Distance::Cosine) ->get(); @@ -79,10 +52,8 @@ public function distance( ]); $results = collect($neighborsCosine) - ->merge($neighborsInnerProduct) - ->merge($documentChunkResults) ->unique('id') - ->take(10); + ->take(5); $siblingsIncluded = collect(); diff --git a/resources/js/Pages/Chat/Components/ReferenceTable.vue b/resources/js/Pages/Chat/Components/ReferenceTable.vue index 4f0ca30f..be055e9c 100644 --- a/resources/js/Pages/Chat/Components/ReferenceTable.vue +++ b/resources/js/Pages/Chat/Components/ReferenceTable.vue @@ -6,8 +6,7 @@ Document Name Page - Section
Record ID - Distance + Section/
Record ID Summary @@ -23,7 +22,6 @@ {{ reference.page }} {{ reference.section_number }}/{{ reference.document_chunk_id }} - {{ reference.distance }} @@ -35,4 +33,4 @@ const props = defineProps({ message: Object }) - \ No newline at end of file + diff --git a/tests/Feature/GenericSiteSpiderTest.php b/tests/Feature/GenericSiteSpiderTest.php index 0cfd78b4..3e1fef42 100644 --- a/tests/Feature/GenericSiteSpiderTest.php +++ b/tests/Feature/GenericSiteSpiderTest.php @@ -15,6 +15,7 @@ class GenericSiteSpiderTest extends TestCase */ public function test_spider(): void { + $this->markTestSkipped('@NOTE not sure I will keep this'); $urls = [ 'https://alfrednutile.info/ssh-config', 'https://alnutile.medium.com/multiple-openai-functions-php-laravel-466cb72eefb8', diff --git a/tests/Feature/WebSearchSourceTest.php b/tests/Feature/WebSearchSourceTest.php index ab87ca5d..4d442347 100644 --- a/tests/Feature/WebSearchSourceTest.php +++ b/tests/Feature/WebSearchSourceTest.php @@ -21,8 +21,8 @@ public function test_searches(): void LlmDriverFacade::shouldReceive('driver->completion') ->once()->andReturn(CompletionResponse::from([ - 'content' => 'updated query', - ])); + 'content' => 'updated query', + ])); LlmDriverFacade::shouldReceive('driver->onQueue') ->once()->andReturn('ollama');