From a12e12bc37434476811298218924055900a9ce32 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 06:46:33 -0400 Subject: [PATCH 01/20] make chunker --- app/Helpers/TextChunker.php | 27 ++++++++++++++++ ...03204_chunk_the_chunks_document_chunks.php | 26 +++++++++++++++ tests/Feature/TextChunkerTest.php | 32 +++++++++++++++++++ tests/fixtures/chunkable_text.txt | 9 ++++++ tests/fixtures/chunkable_text_results.json | 11 +++++++ 5 files changed, 105 insertions(+) create mode 100644 app/Helpers/TextChunker.php create mode 100644 database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php create mode 100644 tests/Feature/TextChunkerTest.php create mode 100644 tests/fixtures/chunkable_text.txt create mode 100644 tests/fixtures/chunkable_text_results.json diff --git a/app/Helpers/TextChunker.php b/app/Helpers/TextChunker.php new file mode 100644 index 00000000..a8832c9b --- /dev/null +++ b/app/Helpers/TextChunker.php @@ -0,0 +1,27 @@ + $textLength) { + // Get the remaining text if it's shorter than the chunk size. + $chunks[] = substr($text, $start); + break; + } + + // Get the chunk from the text. + $chunks[] = substr($text, $start, $chunkSize); + } + + return $chunks; + } +} \ No newline at end of file diff --git a/database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php b/database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php new file mode 100644 index 00000000..0e7bc29f --- /dev/null +++ b/database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php @@ -0,0 +1,26 @@ +integer('section')->nullable(); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + // + } +}; diff --git a/tests/Feature/TextChunkerTest.php b/tests/Feature/TextChunkerTest.php new file mode 100644 index 00000000..7511d8dc --- /dev/null +++ b/tests/Feature/TextChunkerTest.php @@ -0,0 +1,32 @@ +assertCount(4, $results); + + $text = get_fixture("chunkable_text.txt", false); + + $results = TextChunker::handle($text, 300, 100); + + $this->assertCount(9, $results); + + //put_fixture("chunkable_text_results.json", $results); + $this->assertEquals(get_fixture("chunkable_text_results.json"), $results); + } +} diff --git a/tests/fixtures/chunkable_text.txt b/tests/fixtures/chunkable_text.txt new file mode 100644 index 00000000..e76e2b37 --- /dev/null +++ b/tests/fixtures/chunkable_text.txt @@ -0,0 +1,9 @@ +In this study, we explored the relationship between two variables commonly associated with environmental changes, namely temperature and precipitation, and their impacts on the migration patterns of various bird species. The primary hypothesis posited that significant changes in these environmental factors would correlate strongly with alterations in migratory routes and timings. + +The methodology employed involved the collection of over a decade of meteorological data coupled with extensive bird migration observations across three continents. Statistical analyses were conducted using regression models to determine the strength of correlations between the changes in temperature and precipitation with the shifts in migration patterns. + +Results indicated a moderate to strong correlation in most species observed, suggesting that even slight variations in environmental conditions can precipitously affect the behavior of these migratory birds. In particular, species that rely heavily on coastal regions exhibited the most pronounced changes, likely due to their sensitivity to sea level changes influenced by temperature increases. + +Discussion of these findings highlights the potential for significant ecological disruptions if current trends in global temperature rise continue. Furthermore, the data suggests that continued monitoring and research into the adaptive behaviors of migratory species are crucial for predicting and mitigating future impacts on biodiversity. + +In conclusion, the study underscores the intricate connections between climate change and animal behavior, advocating for enhanced research initiatives to further understand these dynamics and potentially develop strategies to mitigate adverse outcomes. diff --git a/tests/fixtures/chunkable_text_results.json b/tests/fixtures/chunkable_text_results.json new file mode 100644 index 00000000..7b2f10f8 --- /dev/null +++ b/tests/fixtures/chunkable_text_results.json @@ -0,0 +1,11 @@ +[ + "In this study, we explored the relationship between two variables commonly associated with environmental changes, namely temperature and precipitation, and their impacts on the migration patterns of various bird species. The primary hypothesis posited that significant changes in these environmental ", + "arious bird species. The primary hypothesis posited that significant changes in these environmental factors would correlate strongly with alterations in migratory routes and timings.\n\nThe methodology employed involved the collection of over a decade of meteorological data coupled with extensive bird", + "employed involved the collection of over a decade of meteorological data coupled with extensive bird migration observations across three continents. Statistical analyses were conducted using regression models to determine the strength of correlations between the changes in temperature and precipitat", + "n models to determine the strength of correlations between the changes in temperature and precipitation with the shifts in migration patterns.\n\nResults indicated a moderate to strong correlation in most species observed, suggesting that even slight variations in environmental conditions can precipit", + "st species observed, suggesting that even slight variations in environmental conditions can precipitously affect the behavior of these migratory birds. In particular, species that rely heavily on coastal regions exhibited the most pronounced changes, likely due to their sensitivity to sea level chan", + "tal regions exhibited the most pronounced changes, likely due to their sensitivity to sea level changes influenced by temperature increases.\n\nDiscussion of these findings highlights the potential for significant ecological disruptions if current trends in global temperature rise continue. Furthermor", + "significant ecological disruptions if current trends in global temperature rise continue. Furthermore, the data suggests that continued monitoring and research into the adaptive behaviors of migratory species are crucial for predicting and mitigating future impacts on biodiversity.\n\nIn conclusion, t", + " species are crucial for predicting and mitigating future impacts on biodiversity.\n\nIn conclusion, the study underscores the intricate connections between climate change and animal behavior, advocating for enhanced research initiatives to further understand these dynamics and potentially develop str", + "g for enhanced research initiatives to further understand these dynamics and potentially develop strategies to mitigate adverse outcomes.\n" +] \ No newline at end of file From 7ba9746a7ef6c9b8fa3551956922370b7cfd8d46 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 06:49:20 -0400 Subject: [PATCH 02/20] add the fields section_number --- database/factories/DocumentChunkFactory.php | 2 ++ .../2024_05_08_103204_chunk_the_chunks_document_chunks.php | 2 +- tests/Feature/Models/DocumentChunkTest.php | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/database/factories/DocumentChunkFactory.php b/database/factories/DocumentChunkFactory.php index f24d96fc..e2df75d2 100644 --- a/database/factories/DocumentChunkFactory.php +++ b/database/factories/DocumentChunkFactory.php @@ -48,6 +48,8 @@ public function definition(): array return [ 'guid' => fake()->uuid(), 'content' => fake()->sentence(10), + 'sort_order' => fake()->numberBetween(1, 100), + 'section_number' => fake()->numberBetween(1, 100), 'status_embeddings' => StatusEnum::random(), 'status_tagging' => StatusEnum::random(), 'status_summary' => StatusEnum::random(), diff --git a/database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php b/database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php index 0e7bc29f..f924c131 100644 --- a/database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php +++ b/database/migrations/2024_05_08_103204_chunk_the_chunks_document_chunks.php @@ -12,7 +12,7 @@ public function up(): void { Schema::table('document_chunks', function (Blueprint $table) { - $table->integer('section')->nullable(); + $table->integer('section_number')->nullable(); }); } diff --git a/tests/Feature/Models/DocumentChunkTest.php b/tests/Feature/Models/DocumentChunkTest.php index 109ab0a9..cb7e1ee6 100644 --- a/tests/Feature/Models/DocumentChunkTest.php +++ b/tests/Feature/Models/DocumentChunkTest.php @@ -15,6 +15,8 @@ public function test_dc_factory() $model = DocumentChunk::factory()->create(); $this->assertNotNull($model->content); $this->assertNotNull($model->meta_data); + $this->assertNotNull($model->section_number); + $this->assertNotNull($model->sort_order); } public function test_original_boot() From ac4447bf4f540bf39300a35a8e207e5360cc64e5 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 06:49:32 -0400 Subject: [PATCH 03/20] add the fields section_number --- app/Helpers/TextChunker.php | 9 ++++----- tests/Feature/TextChunkerTest.php | 8 +++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/app/Helpers/TextChunker.php b/app/Helpers/TextChunker.php index a8832c9b..ad1627e8 100644 --- a/app/Helpers/TextChunker.php +++ b/app/Helpers/TextChunker.php @@ -1,10 +1,9 @@ -assertCount(4, $results); - $text = get_fixture("chunkable_text.txt", false); + $text = get_fixture('chunkable_text.txt', false); $results = TextChunker::handle($text, 300, 100); $this->assertCount(9, $results); //put_fixture("chunkable_text_results.json", $results); - $this->assertEquals(get_fixture("chunkable_text_results.json"), $results); + $this->assertEquals(get_fixture('chunkable_text_results.json'), $results); } } From 0ab0327d03b7e8372aff489a72c2eaaf79ca20d2 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 07:44:05 -0400 Subject: [PATCH 04/20] chunking now will work for the pdf etc --- .../Documents/Transformers/PdfTransformer.php | 41 ++++++++++------- .../Transformers/PowerPointTransformer.php | 46 ++++++++++++------- app/Events/CollectionStatusEvent.php | 3 +- app/Jobs/GetWebContentJob.php | 13 ++++-- app/helpers.php | 14 ++++++ tests/Feature/Jobs/GetWebContentJobTest.php | 2 +- tests/Feature/PdfTransformerTest.php | 17 +++++-- tests/Feature/PowerPointTransformerTest.php | 3 +- tests/Feature/TextChunkerTest.php | 9 ++++ tests/fixtures/example_pdf_text.txt | 46 +++++++++++++++++++ 10 files changed, 149 insertions(+), 45 deletions(-) create mode 100644 tests/fixtures/example_pdf_text.txt diff --git a/app/Domains/Documents/Transformers/PdfTransformer.php b/app/Domains/Documents/Transformers/PdfTransformer.php index 1771fcdd..67b6fbd0 100644 --- a/app/Domains/Documents/Transformers/PdfTransformer.php +++ b/app/Domains/Documents/Transformers/PdfTransformer.php @@ -4,6 +4,7 @@ use App\Domains\Collections\CollectionStatusEnum; use App\Events\CollectionStatusEvent; +use App\Helpers\TextChunker; use App\Jobs\SummarizeDataJob; use App\Jobs\SummarizeDocumentJob; use App\Jobs\VectorlizeDataJob; @@ -34,23 +35,31 @@ public function handle(Document $document): Document try { $page_number = $page_number + 1; $pageContent = $page->getText(); - $guid = md5($pageContent); - $DocumentChunk = DocumentChunk::updateOrCreate( - [ - 'guid' => $guid, - 'document_id' => $this->document->id, - ], - [ - 'content' => $pageContent, - 'sort_order' => $page_number, - ] - ); - $chunks[] = [ - new VectorlizeDataJob($DocumentChunk), - new SummarizeDataJob($DocumentChunk), - ]; - CollectionStatusEvent::dispatch($document->collection, CollectionStatusEnum::PROCESSING); + $chunked_chunks = TextChunker::handle($pageContent); + foreach($chunked_chunks as $chunkSection => $chunkContent) { + $guid = md5($chunkContent); + $DocumentChunk = DocumentChunk::updateOrCreate( + [ + 'document_id' => $this->document->id, + 'sort_order' => $page_number, + 'section_number' => $chunkSection, + ], + [ + 'guid' => $guid, + 'content' => $chunkContent, + ] + ); + + $chunks[] = [ + new VectorlizeDataJob($DocumentChunk), + new SummarizeDataJob($DocumentChunk), + ]; + + + } + notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document'); + } catch (\Exception $e) { Log::error('Error parsing PDF', ['error' => $e->getMessage()]); } diff --git a/app/Domains/Documents/Transformers/PowerPointTransformer.php b/app/Domains/Documents/Transformers/PowerPointTransformer.php index 7ba2032a..a4373d27 100644 --- a/app/Domains/Documents/Transformers/PowerPointTransformer.php +++ b/app/Domains/Documents/Transformers/PowerPointTransformer.php @@ -2,7 +2,9 @@ namespace App\Domains\Documents\Transformers; +use App\Domains\Collections\CollectionStatusEnum; use App\Domains\UnStructured\StructuredDto; +use App\Helpers\TextChunker; use App\Jobs\SummarizeDataJob; use App\Jobs\VectorlizeDataJob; use App\Models\Document; @@ -32,26 +34,38 @@ public function handle(Document $document): array while ($results->valid()) { /** @var StructuredDto $dto */ $dto = $results->current(); - $DocumentChunk = DocumentChunk::updateOrCreate( - [ - 'guid' => $dto->guid, - 'document_id' => $this->document->id, - ], - [ - 'content' => $dto->content, - 'sort_order' => $dto->page, - 'meta_data' => $dto->toArray(), - ] - ); - - $chunks[] = [ - new VectorlizeDataJob($DocumentChunk), - new SummarizeDataJob($DocumentChunk), - ]; + + $content = $dto->content; + + $chunked_chunks = TextChunker::handle($content); + + foreach($chunked_chunks as $chunkSection => $chunkContent) { + $DocumentChunk = DocumentChunk::updateOrCreate( + [ + 'document_id' => $this->document->id, + 'sort_order' => $dto->page, + 'section_number' => $chunkSection, + ], + [ + 'guid' => $dto->guid, + 'content' => $chunkContent, + 'meta_data' => $dto->toArray(), + ] + ); + + $chunks[] = [ + new VectorlizeDataJob($DocumentChunk), + new SummarizeDataJob($DocumentChunk), + ]; + } + $results->next(); } + notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document'); + + Log::info('PowerPointTransformer:handle', ['chunks' => count($chunks)]); return $chunks; diff --git a/app/Events/CollectionStatusEvent.php b/app/Events/CollectionStatusEvent.php index 02c74b00..2f2056af 100644 --- a/app/Events/CollectionStatusEvent.php +++ b/app/Events/CollectionStatusEvent.php @@ -17,7 +17,7 @@ class CollectionStatusEvent implements ShouldBroadcast /** * Create a new event instance. */ - public function __construct(public Collection $collection, public CollectionStatusEnum $status) + public function __construct(public Collection $collection, public CollectionStatusEnum $status, public string $message = '') { // } @@ -44,6 +44,7 @@ public function broadcastWith(): array return [ 'id' => $this->collection->id, 'status' => $this->status->value, + 'message' => $this->message, ]; } diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index 10aef6c3..da1a3eae 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -5,6 +5,7 @@ use App\Domains\Documents\StatusEnum; use App\Domains\Documents\TypesEnum; use App\Domains\Sources\WebSearch\Response\WebResponseDto; +use App\Helpers\TextChunker; use App\Models\Document; use App\Models\DocumentChunk; use App\Models\Source; @@ -99,17 +100,21 @@ public function handle(): void $page_number = 1; - $chunks = chunk_string($results, $maxTokenSize); + $chunked_chunks = TextChunker::handle($results); + + foreach($chunked_chunks as $chunkSection => $chunkContent) { + + $guid = md5($chunkContent); - foreach ($chunks as $chunk) { $DocumentChunk = DocumentChunk::updateOrCreate( [ - 'guid' => $guid.'-'.$page_number, 'document_id' => $document->id, 'sort_order' => $page_number, + 'section_number' => $chunkSection, ], [ - 'content' => $chunk, + 'guid' => $guid, + 'content' => $chunkContent, ] ); diff --git a/app/helpers.php b/app/helpers.php index 0d5fcf73..9ae32de9 100644 --- a/app/helpers.php +++ b/app/helpers.php @@ -1,6 +1,9 @@ $e->getMessage()]); + } + } +} + if (! function_exists('remove_ascii')) { function remove_ascii($string): string { diff --git a/tests/Feature/Jobs/GetWebContentJobTest.php b/tests/Feature/Jobs/GetWebContentJobTest.php index 0ae983fe..5c97ec07 100644 --- a/tests/Feature/Jobs/GetWebContentJobTest.php +++ b/tests/Feature/Jobs/GetWebContentJobTest.php @@ -50,7 +50,7 @@ public function test_job_html(): void $job->handle(); $this->assertDatabaseCount('documents', 1); - $this->assertDatabaseCount('document_chunks', 4); + $this->assertDatabaseCount('document_chunks', 82); } diff --git a/tests/Feature/PdfTransformerTest.php b/tests/Feature/PdfTransformerTest.php index 5dfc5d15..acd7c395 100644 --- a/tests/Feature/PdfTransformerTest.php +++ b/tests/Feature/PdfTransformerTest.php @@ -4,7 +4,9 @@ use App\Domains\Documents\Transformers\PdfTransformer; use App\Models\Document; +use App\Models\DocumentChunk; use Illuminate\Support\Facades\Bus; +use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\File; use Tests\TestCase; @@ -24,11 +26,17 @@ public function test_gets_data_from_pdf() { Bus::fake(); $this->webFileDownloadSetup(); - $document = Document::factory()->pdf()->create(); $this->assertDatabaseCount('document_chunks', 0); $transformer = new PdfTransformer(); $transformer->handle($this->document); - $this->assertDatabaseCount('document_chunks', 10); + $this->assertDatabaseCount('document_chunks', 66); + + $pages = 10; + + $this->assertCount(10, DB::table('document_chunks') + ->where("section_number", 0) + ->where("document_id", $this->document->id) + ->get()); Bus::assertBatchCount(1); @@ -38,12 +46,11 @@ public function test_does_not_repeat() { Bus::fake(); $this->webFileDownloadSetup(); - $document = Document::factory()->pdf()->create(); $this->assertDatabaseCount('document_chunks', 0); $transformer = new PdfTransformer(); $transformer->handle($this->document); - $this->assertDatabaseCount('document_chunks', 10); + $this->assertDatabaseCount('document_chunks', 66); $transformer->handle($this->document); - $this->assertDatabaseCount('document_chunks', 10); + $this->assertDatabaseCount('document_chunks', 66); } } diff --git a/tests/Feature/PowerPointTransformerTest.php b/tests/Feature/PowerPointTransformerTest.php index 71af9daf..ac095a71 100644 --- a/tests/Feature/PowerPointTransformerTest.php +++ b/tests/Feature/PowerPointTransformerTest.php @@ -15,7 +15,7 @@ class PowerPointTransformerTest extends TestCase protected function setUp(): void { parent::setUp(); // TODO: Change the autogenerated stub - $this->markTestIncomplete('@TODO It works with a test PPTX but I might try a different approach since more complicated PPTX did not'); + //$this->markTestIncomplete('@TODO It works with a test PPTX but I might try a different approach since more complicated PPTX did not'); } protected function tearDown(): void @@ -36,7 +36,6 @@ public function test_gets_data_from_pptx() $transformer->handle($this->document); $this->assertDatabaseCount('document_chunks', 5); - Bus::assertBatchCount(1); } diff --git a/tests/Feature/TextChunkerTest.php b/tests/Feature/TextChunkerTest.php index d6016b6a..f7ef3e53 100644 --- a/tests/Feature/TextChunkerTest.php +++ b/tests/Feature/TextChunkerTest.php @@ -27,4 +27,13 @@ public function test_chunking(): void //put_fixture("chunkable_text_results.json", $results); $this->assertEquals(get_fixture('chunkable_text_results.json'), $results); } + + public function test_larger_file(): void + { + $text = get_fixture('example_pdf_text.txt', false); + + $results = TextChunker::handle($text); + + $this->assertCount(8, $results); + } } diff --git a/tests/fixtures/example_pdf_text.txt b/tests/fixtures/example_pdf_text.txt new file mode 100644 index 00000000..1772d831 --- /dev/null +++ b/tests/fixtures/example_pdf_text.txt @@ -0,0 +1,46 @@ +Waste Discharge Requirements General Order R5-2013-0120-09 8 +Growers within the Tulare Lake Basin Area +September 2013 – Last Revised April 2021 +areas based on definitions provided in Attachment E to this Order and guidance provided +in the MRP for development of the Groundwater Quality Assessment Report. The +Executive Officer will review Third-Party proposed high and low vulnerability areas and +make the final determination of these areas. High and low vulnerability areas will be +reviewed and updated throughout the implementation of this Order. A Member who is +covered under this Order must comply with MRP Order R5-2013-0120-09 which is part of +this Order, and future revisions thereto by the Executive Officer or Board. +25. The surface water quality monitoring and trend groundwater quality monitoring under this +Order are regional and representative in nature and do not measure individual field +discharge. The surface water quality monitoring will take place in surface water bodies +that are representative of surface waters receiving irrigated agricultural discharges. The +trend groundwater monitoring will take place in aquifers that are representative of +aquifers receiving irrigated agricultural discharges. The benefits of regional monitoring +include the ability to determine whether water bodies accepting discharges from +numerous irrigated lands are meeting water quality objectives and to determine whether +practices, at the watershed level, are protective of water quality. There is a cost savings +with representative monitoring, since all surface waters or all groundwater aquifers that +receive irrigated agricultural discharges do not need to be monitored. Surface water and +groundwater monitoring sites are selected to represent areas with similar conditions (e.g., +crops grown, soil type). However, there are limitations to regional monitoring’s +effectiveness in determining possible sources of water quality problems, the effectiveness +of management practices, and individual compliance with this Order’s requirements. +Therefore, through the reporting and evaluation of applied nitrogen versus removed +nitrogen, the Management Practices Evaluation Program, development and utilization of +Groundwater Protection Targets, Surface Water Quality Management Plans, and +Groundwater Quality Management Plans, the Third-Party must evaluate the effectiveness +of management practices in protecting water quality. In addition, Members must report +the practices they are implementing to protect water quality and comply with Surface and +Groundwater Quality Management Plans as applicable. Through the evaluations and +studies conducted by the Third-Party, the reporting of applied and removed nitrogen as +well as the management practices used by the Members, and the Board’s compliance +and enforcement activities, the Board will be able to determine whether a Member is +complying with the Order. +Where required monitoring and evaluation does not allow the Central Valley Water Board +to determine potential sources of water quality problems or identify whether management +practices are effective, this Order requires the Third-Party to provide technical reports at +the direction of the Executive Officer. Such technical reports are needed when monitoring +or other available information is not sufficient to determine the effects of irrigated +agricultural waste discharges to state waters. It may also be necessary for the Central +Valley Water Board to conduct investigations by obtaining information directly from +Members to address individual compliance. +26. The Basin Plan designates beneficial uses, establishes water quality objectives, contains +programs of implementation needed to achieve water quality objectives, and references \ No newline at end of file From 9ddfc9ea48f615a81549c2629d8e11c2cca2f3a0 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 07:56:46 -0400 Subject: [PATCH 05/20] ok the section chunks are working for different content types --- app/Http/Resources/DocumentResource.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Http/Resources/DocumentResource.php b/app/Http/Resources/DocumentResource.php index d2e76aec..c2d985fd 100644 --- a/app/Http/Resources/DocumentResource.php +++ b/app/Http/Resources/DocumentResource.php @@ -21,7 +21,7 @@ public function toArray(Request $request): array 'summary_markdown' => str($this->summary)->markdown(), 'type' => str($this->type->name)->title()->toString(), 'status' => str($this->status->name)->headline()->toString(), - 'document_chunks_count' => $this->document_chunks()->count(), + 'document_chunks_count' => $this->document_chunks()->where('section_number', 0)->count(), 'tags' => TagResource::collection($this->tags), ]; } From 7805fa0a34d69b7ea50ae372652b2e02b3ed8e46 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 08:04:59 -0400 Subject: [PATCH 06/20] update text input as well --- .../Controllers/TextDocumentController.php | 35 ++++++------------- .../TextDocumentControllerTest.php | 13 ++----- 2 files changed, 14 insertions(+), 34 deletions(-) diff --git a/app/Http/Controllers/TextDocumentController.php b/app/Http/Controllers/TextDocumentController.php index 930a786b..58978236 100644 --- a/app/Http/Controllers/TextDocumentController.php +++ b/app/Http/Controllers/TextDocumentController.php @@ -6,6 +6,7 @@ use App\Domains\Documents\StatusEnum; use App\Domains\Documents\TypesEnum; use App\Events\CollectionStatusEvent; +use App\Helpers\TextChunker; use App\Jobs\SummarizeDataJob; use App\Jobs\SummarizeDocumentJob; use App\Jobs\VectorlizeDataJob; @@ -36,39 +37,23 @@ public function store(Collection $collection, Request $request) 'status_summary' => StatusEnum::Pending, ]); - $chunks = []; - - $page_number = 0; - - $prompt = <<getDriver()) - ->completion($prompt); $jobs = []; + $page_number = 1; + $chunked_chunks = TextChunker::handle($validated['content']); + foreach($chunked_chunks as $chunkSection => $chunkContent) { - $decoded = json_decode($chunks->content); - - foreach ($decoded as $chunk) { try { - $page_number = $page_number + 1; - $guid = Str::uuid(); + $guid = md5($chunkContent); $DocumentChunk = DocumentChunk::updateOrCreate( [ - 'guid' => $guid, 'document_id' => $document->id, + 'sort_order' => $page_number, + 'section_number' => $chunkSection, ], [ - 'content' => $chunk, + 'guid' => $guid, + 'content' => $chunkContent, 'sort_order' => $page_number, ] ); @@ -87,8 +72,10 @@ public function store(Collection $collection, Request $request) } catch (\Exception $e) { Log::error('Error parsing PDF', ['error' => $e->getMessage()]); } + } + Bus::batch($jobs) ->name("Chunking Document - $document->file_path") ->finally(function (Batch $batch) use ($document) { diff --git a/tests/Feature/Http/Controllers/TextDocumentControllerTest.php b/tests/Feature/Http/Controllers/TextDocumentControllerTest.php index f2835329..1c1d1946 100644 --- a/tests/Feature/Http/Controllers/TextDocumentControllerTest.php +++ b/tests/Feature/Http/Controllers/TextDocumentControllerTest.php @@ -17,30 +17,23 @@ public function test_create(): void { Bus::fake(); - $data = get_fixture('chunks.json'); - - LlmDriverFacade::shouldReceive('driver->completion') - ->once() - ->andReturn(CompletionResponse::from([ - 'content' => $data, - ])); - $user = $this->createUserWithCurrentTeam(); $collection = Collection::factory()->create([ 'team_id' => $user->currentTeam->id, ]); + $content = get_fixture("chunkable_text.txt", false); $this->assertDatabaseCount('documents', 0); $this->assertDatabaseCount('document_chunks', 0); $this->actingAs($user)->post(route('text-documents.store', [ 'collection' => $collection->id, 'name' => 'Foo bar', ]), [ - 'content' => 'This is a text document', + 'content' => $content, ])->assertStatus(302); $this->assertDatabaseCount('documents', 1); - $this->assertDatabaseCount('document_chunks', 15); + $this->assertDatabaseCount('document_chunks', 4); Bus::assertBatchCount(1); } } From ba5d29bcf1e775002ab8d415aff5743855a4d28c Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 08:05:19 -0400 Subject: [PATCH 07/20] update text input as well --- app/Domains/Documents/Transformers/PdfTransformer.php | 6 ++---- .../Documents/Transformers/PowerPointTransformer.php | 6 ++---- app/Http/Controllers/TextDocumentController.php | 6 +----- app/Jobs/GetWebContentJob.php | 2 +- .../Http/Controllers/TextDocumentControllerTest.php | 4 +--- tests/Feature/PdfTransformerTest.php | 8 +++----- tests/Feature/PowerPointTransformerTest.php | 1 - 7 files changed, 10 insertions(+), 23 deletions(-) diff --git a/app/Domains/Documents/Transformers/PdfTransformer.php b/app/Domains/Documents/Transformers/PdfTransformer.php index 67b6fbd0..6a8aaf17 100644 --- a/app/Domains/Documents/Transformers/PdfTransformer.php +++ b/app/Domains/Documents/Transformers/PdfTransformer.php @@ -3,7 +3,6 @@ namespace App\Domains\Documents\Transformers; use App\Domains\Collections\CollectionStatusEnum; -use App\Events\CollectionStatusEvent; use App\Helpers\TextChunker; use App\Jobs\SummarizeDataJob; use App\Jobs\SummarizeDocumentJob; @@ -37,7 +36,7 @@ public function handle(Document $document): Document $pageContent = $page->getText(); $chunked_chunks = TextChunker::handle($pageContent); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { $guid = md5($chunkContent); $DocumentChunk = DocumentChunk::updateOrCreate( [ @@ -55,8 +54,7 @@ public function handle(Document $document): Document new VectorlizeDataJob($DocumentChunk), new SummarizeDataJob($DocumentChunk), ]; - - + } notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document'); diff --git a/app/Domains/Documents/Transformers/PowerPointTransformer.php b/app/Domains/Documents/Transformers/PowerPointTransformer.php index a4373d27..078db793 100644 --- a/app/Domains/Documents/Transformers/PowerPointTransformer.php +++ b/app/Domains/Documents/Transformers/PowerPointTransformer.php @@ -39,7 +39,7 @@ public function handle(Document $document): array $chunked_chunks = TextChunker::handle($content); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { $DocumentChunk = DocumentChunk::updateOrCreate( [ 'document_id' => $this->document->id, @@ -52,20 +52,18 @@ public function handle(Document $document): array 'meta_data' => $dto->toArray(), ] ); - + $chunks[] = [ new VectorlizeDataJob($DocumentChunk), new SummarizeDataJob($DocumentChunk), ]; } - $results->next(); } notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document'); - Log::info('PowerPointTransformer:handle', ['chunks' => count($chunks)]); return $chunks; diff --git a/app/Http/Controllers/TextDocumentController.php b/app/Http/Controllers/TextDocumentController.php index 58978236..bbd1abb3 100644 --- a/app/Http/Controllers/TextDocumentController.php +++ b/app/Http/Controllers/TextDocumentController.php @@ -17,8 +17,6 @@ use Illuminate\Http\Request; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; -use Illuminate\Support\Str; -use LlmLaraHub\LlmDriver\LlmDriverFacade; class TextDocumentController extends Controller { @@ -37,11 +35,10 @@ public function store(Collection $collection, Request $request) 'status_summary' => StatusEnum::Pending, ]); - $jobs = []; $page_number = 1; $chunked_chunks = TextChunker::handle($validated['content']); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { try { $guid = md5($chunkContent); @@ -75,7 +72,6 @@ public function store(Collection $collection, Request $request) } - Bus::batch($jobs) ->name("Chunking Document - $document->file_path") ->finally(function (Batch $batch) use ($document) { diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index da1a3eae..409d768c 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -102,7 +102,7 @@ public function handle(): void $chunked_chunks = TextChunker::handle($results); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { $guid = md5($chunkContent); diff --git a/tests/Feature/Http/Controllers/TextDocumentControllerTest.php b/tests/Feature/Http/Controllers/TextDocumentControllerTest.php index 1c1d1946..0c4ed0e1 100644 --- a/tests/Feature/Http/Controllers/TextDocumentControllerTest.php +++ b/tests/Feature/Http/Controllers/TextDocumentControllerTest.php @@ -4,8 +4,6 @@ use App\Models\Collection; use Illuminate\Support\Facades\Bus; -use LlmLaraHub\LlmDriver\LlmDriverFacade; -use LlmLaraHub\LlmDriver\Responses\CompletionResponse; use Tests\TestCase; class TextDocumentControllerTest extends TestCase @@ -23,7 +21,7 @@ public function test_create(): void 'team_id' => $user->currentTeam->id, ]); - $content = get_fixture("chunkable_text.txt", false); + $content = get_fixture('chunkable_text.txt', false); $this->assertDatabaseCount('documents', 0); $this->assertDatabaseCount('document_chunks', 0); $this->actingAs($user)->post(route('text-documents.store', [ diff --git a/tests/Feature/PdfTransformerTest.php b/tests/Feature/PdfTransformerTest.php index acd7c395..0492fe8c 100644 --- a/tests/Feature/PdfTransformerTest.php +++ b/tests/Feature/PdfTransformerTest.php @@ -3,8 +3,6 @@ namespace Tests\Feature; use App\Domains\Documents\Transformers\PdfTransformer; -use App\Models\Document; -use App\Models\DocumentChunk; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\File; @@ -34,9 +32,9 @@ public function test_gets_data_from_pdf() $pages = 10; $this->assertCount(10, DB::table('document_chunks') - ->where("section_number", 0) - ->where("document_id", $this->document->id) - ->get()); + ->where('section_number', 0) + ->where('document_id', $this->document->id) + ->get()); Bus::assertBatchCount(1); diff --git a/tests/Feature/PowerPointTransformerTest.php b/tests/Feature/PowerPointTransformerTest.php index ac095a71..a3678ffb 100644 --- a/tests/Feature/PowerPointTransformerTest.php +++ b/tests/Feature/PowerPointTransformerTest.php @@ -36,7 +36,6 @@ public function test_gets_data_from_pptx() $transformer->handle($this->document); $this->assertDatabaseCount('document_chunks', 5); - } public function test_does_not_repeat() From a35e327533ff16b0f1cc5dbe0c841aab3d5af593 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 13:02:14 -0400 Subject: [PATCH 08/20] this should do it but I made a few changes I might convert back --- Modules/LlmDriver/app/BaseClient.php | 2 +- Modules/LlmDriver/app/DistanceQuery.php | 44 +- .../app/Functions/SearchAndSummarize.php | 11 +- .../tests/Feature/DistanceQueryTest.php | 83 ++ .../TagFunction/app/Jobs/TagDocumentJob.php | 2 +- Modules/TagFunction/app/TagManager.php | 5 +- app/Domains/Agents/VerifyResponseAgent.php | 5 +- .../Messages/SearchAndSummarizeChatRepo.php | 20 +- app/Events/ChatUpdatedEvent.php | 3 +- app/Http/Controllers/ChatController.php | 14 +- app/Http/Resources/ChatResource.php | 1 + .../MessageDocumentReferenceResource.php | 2 + app/Http/Resources/MessageResource.php | 2 +- app/Jobs/OrchestrateJob.php | 32 + ...SimpleSearchAndSummarizeOrchestrateJob.php | 32 + resources/js/Pages/Chat/ChatInputThreaded.vue | 30 +- .../Pages/Chat/Components/ReferenceTable.vue | 2 + resources/js/Pages/Collection/Show.vue | 7 + ...mbedding_question_distance_php_search.json | 1026 +++++++++++++++++ tests/fixtures/messages_llama3.json | 16 +- tests/fixtures/ollama_chat_results.json | 18 +- tests/fixtures/results_that_match.json | 25 + 22 files changed, 1332 insertions(+), 50 deletions(-) create mode 100644 app/Jobs/OrchestrateJob.php create mode 100644 app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php create mode 100644 tests/fixtures/embedding_question_distance_php_search.json create mode 100644 tests/fixtures/results_that_match.json diff --git a/Modules/LlmDriver/app/BaseClient.php b/Modules/LlmDriver/app/BaseClient.php index 7618bbb4..46d1fd59 100644 --- a/Modules/LlmDriver/app/BaseClient.php +++ b/Modules/LlmDriver/app/BaseClient.php @@ -178,7 +178,7 @@ function ($item) { )->implode('\n'); $systemPrompt = <<select('id') ->where('documents.collection_id', $collectionId) @@ -77,6 +84,39 @@ public function distance( ->unique('id') ->take(10); - return $results; + $siblingsIncluded = collect(); + + foreach ($results as $result) { + if ($result->section_number === 0) { + $siblingsIncluded->push($result); + } else { + if ($sibling = $this->getSiblingOrNot($result, $result->section_number - 1)) { + $siblingsIncluded->push($sibling); + } + + $siblingsIncluded->push($result); + } + + if ($sibling = $this->getSiblingOrNot($result, $result->section_number + 1)) { + $siblingsIncluded->push($sibling); + } + } + + return $siblingsIncluded; + } + + protected function getSiblingOrNot(DocumentChunk $result, int $sectionNumber): false|DocumentChunk + { + $sibling = DocumentChunk::query() + ->where('document_id', $result->document_id) + ->where('sort_order', $result->sort_order) + ->where('section_number', $sectionNumber) + ->first(); + + if ($sibling?->id) { + return $sibling; + } + + return false; } } diff --git a/Modules/LlmDriver/app/Functions/SearchAndSummarize.php b/Modules/LlmDriver/app/Functions/SearchAndSummarize.php index 51e15c3b..bdcefaba 100644 --- a/Modules/LlmDriver/app/Functions/SearchAndSummarize.php +++ b/Modules/LlmDriver/app/Functions/SearchAndSummarize.php @@ -81,7 +81,7 @@ public function handle( $context = implode(' ', $content); $contentFlattened = << $contentFlattened, + 'driver' => $model->getChat()->chatable->getDriver(), + ]); $messageArray = MessageInDto::from([ 'content' => $contentFlattened, @@ -114,7 +117,7 @@ public function handle( /** @var CompletionResponse $response */ $response = LlmDriverFacade::driver( $model->getChatable()->getDriver() - )->chat([$messageArray]); + )->completion($contentFlattened); /** * Lets Verify @@ -142,6 +145,8 @@ public function handle( $message = $model->getChat()->addInput($response->response, RoleEnum::Assistant); $this->saveDocumentReference($message, $documentChunkResults); + + notify_ui($model->getChat(), 'Complete'); return FunctionResponse::from( [ diff --git a/Modules/LlmDriver/tests/Feature/DistanceQueryTest.php b/Modules/LlmDriver/tests/Feature/DistanceQueryTest.php index a4c0a5df..7017f023 100644 --- a/Modules/LlmDriver/tests/Feature/DistanceQueryTest.php +++ b/Modules/LlmDriver/tests/Feature/DistanceQueryTest.php @@ -35,4 +35,87 @@ public function test_results() $this->assertCount(1, $results); } + + public function test_has_sibling_below() + { + $files = File::files(base_path('tests/fixtures/document_chunks')); + $document = Document::factory()->create([ + 'id' => 31, + ]); + + foreach ($files as $file) { + $data = json_decode(File::get($file), true); + DocumentChunk::factory()->create($data); + } + + $documentSibling = DocumentChunk::where('guid', 'ffc97910f334c141b55af33b3c0b67c4')->first(); + + $documentSibling->section_number = 0; + + $documentSibling->save(); + + $nextSibling = DocumentChunk::factory()->create([ + 'document_id' => 31, + 'sort_order' => $documentSibling->sort_order, + 'section_number' => 1, + 'guid' => 'ffc97910f334c141b55af33b3c0b67c4', + ]); + + $question = get_fixture('embedding_question_distance.json'); + + $vector = new Vector($question); + + $results = (new DistanceQuery())->distance( + 'embedding_1024', + $document->collection_id, + $vector); + + $this->assertCount(2, $results); + + } + + public function test_has_sibling_above_and_below() + { + $files = File::files(base_path('tests/fixtures/document_chunks')); + $document = Document::factory()->create([ + 'id' => 31, + ]); + + foreach ($files as $file) { + $data = json_decode(File::get($file), true); + DocumentChunk::factory()->create($data); + } + + $documentSibling = DocumentChunk::where('guid', 'ffc97910f334c141b55af33b3c0b67c4')->first(); + + $documentSibling->section_number = 1; + + $documentSibling->save(); + + DocumentChunk::factory()->create([ + 'document_id' => 31, + 'sort_order' => $documentSibling->sort_order, + 'section_number' => 0, + 'guid' => 'ffc97910f334c141b55af33b3c0b67c4', + ]); + + DocumentChunk::factory()->create([ + 'document_id' => 31, + 'sort_order' => $documentSibling->sort_order, + 'section_number' => 2, + 'guid' => 'ffc97910f334c141b55af33b3c0b67c4', + ]); + + $question = get_fixture('embedding_question_distance.json'); + + $vector = new Vector($question); + + $results = (new DistanceQuery())->distance( + 'embedding_1024', + $document->collection_id, + $vector); + + $this->assertCount(3, $results); + + } } diff --git a/Modules/TagFunction/app/Jobs/TagDocumentJob.php b/Modules/TagFunction/app/Jobs/TagDocumentJob.php index 37da015c..127e485b 100644 --- a/Modules/TagFunction/app/Jobs/TagDocumentJob.php +++ b/Modules/TagFunction/app/Jobs/TagDocumentJob.php @@ -28,7 +28,7 @@ public function __construct(public Document $document) */ public function handle(): void { - if ($this->batch()->cancelled()) { + if ($this->batch()?->cancelled()) { // Determine if the batch has been cancelled... return; diff --git a/Modules/TagFunction/app/TagManager.php b/Modules/TagFunction/app/TagManager.php index affed277..a1d648e6 100644 --- a/Modules/TagFunction/app/TagManager.php +++ b/Modules/TagFunction/app/TagManager.php @@ -4,6 +4,7 @@ use App\Domains\Agents\VerifyPromptInputDto; use App\Domains\Agents\VerifyPromptOutputDto; +use App\Domains\Collections\CollectionStatusEnum; use App\Models\Document; use Facades\App\Domains\Agents\VerifyResponseAgent; use Illuminate\Support\Collection; @@ -63,7 +64,7 @@ public function handle(Document $document): void }); foreach ($document->document_chunks as $chunk) { - $tagsFlat = $this->tags->implode(','); + $tagsFlat = $this->tags->take(20)->implode(','); $summary = $chunk->summary; $prompt = <<sort_order} in the document , Can you make some tags I can use. @@ -123,5 +124,7 @@ public function handle(Document $document): void $this->tags->push($tag); } } + + notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Tags added'); } } diff --git a/app/Domains/Agents/VerifyResponseAgent.php b/app/Domains/Agents/VerifyResponseAgent.php index d20eaa2a..9ca4ab28 100644 --- a/app/Domains/Agents/VerifyResponseAgent.php +++ b/app/Domains/Agents/VerifyResponseAgent.php @@ -24,6 +24,9 @@ public function verify(VerifyPromptInputDto $input): VerifyPromptOutputDto Just return the text as if answering the intial users prompt "ORIGINAL PROMPT" Using the CONTEXT make sure the LLM RESPONSE is accurent and just clean it up if not. +$verifyPrompt + + ### START ORIGINAL PROMPT $originalPrompt ### END ORIGINAL PROMPT @@ -39,8 +42,6 @@ public function verify(VerifyPromptInputDto $input): VerifyPromptOutputDto EOT; - //put_fixture("verified_prompt_not_working.txt", $prompt, false); - Log::info('[LaraChain] VerifyResponseAgent::verify', [ 'prompt' => $prompt, ]); diff --git a/app/Domains/Messages/SearchAndSummarizeChatRepo.php b/app/Domains/Messages/SearchAndSummarizeChatRepo.php index a8a4510d..4d7af5c2 100644 --- a/app/Domains/Messages/SearchAndSummarizeChatRepo.php +++ b/app/Domains/Messages/SearchAndSummarizeChatRepo.php @@ -59,12 +59,12 @@ public function search(Chat $chat, string $input): string $context = implode(' ', $content); $contentFlattened = <<getChatResponse(); - Log::info('[LaraChain] Getting the Summary'); + Log::info('[LaraChain] Getting the Summary', [ + 'input' => $contentFlattened, + 'driver' => $chat->chatable->getDriver(), + ]); notify_ui($chat, 'Building Summary'); /** @var CompletionResponse $response */ $response = LlmDriverFacade::driver( $chat->chatable->getDriver() - )->chat($latestMessagesArray); + )->completion($contentFlattened); /** * Lets Verify */ - $verifyPrompt = <<<'PROMPT' + $verifyPrompt = <<<'EOD' This is the results from a Vector search based on the Users Prompt. Then that was passed into the LLM to summarize the results. -PROMPT; +EOD; $dto = VerifyPromptInputDto::from( [ @@ -115,9 +118,14 @@ public function search(Chat $chat, string $input): string /** @var VerifyPromptOutputDto $response */ $response = VerifyResponseAgent::verify($dto); + Log::info('[LaraChain] Verification', [ + 'output' => $response->response, + ]); + $message = $chat->addInput($response->response, RoleEnum::Assistant); $this->saveDocumentReference($message, $documentChunkResults); + notify_ui($chat, 'Complete'); return $response->response; } diff --git a/app/Events/ChatUpdatedEvent.php b/app/Events/ChatUpdatedEvent.php index d5fd93bb..359782c9 100644 --- a/app/Events/ChatUpdatedEvent.php +++ b/app/Events/ChatUpdatedEvent.php @@ -7,10 +7,11 @@ use Illuminate\Broadcasting\InteractsWithSockets; use Illuminate\Broadcasting\PrivateChannel; use Illuminate\Contracts\Broadcasting\ShouldBroadcast; +use Illuminate\Contracts\Queue\ShouldQueue; use Illuminate\Foundation\Events\Dispatchable; use Illuminate\Queue\SerializesModels; -class ChatUpdatedEvent implements ShouldBroadcast +class ChatUpdatedEvent implements ShouldBroadcast, ShouldQueue { use Dispatchable, InteractsWithSockets, SerializesModels; diff --git a/app/Http/Controllers/ChatController.php b/app/Http/Controllers/ChatController.php index d7c22618..a7f10663 100644 --- a/app/Http/Controllers/ChatController.php +++ b/app/Http/Controllers/ChatController.php @@ -9,11 +9,11 @@ use App\Http\Resources\ChatResource; use App\Http\Resources\CollectionResource; use App\Http\Resources\MessageResource; +use App\Jobs\OrchestrateJob; +use App\Jobs\SimpleSearchAndSummarizeOrchestrateJob; use App\Models\Chat; use App\Models\Collection; use Facades\App\Domains\Agents\VerifyResponseAgent; -use Facades\LlmLaraHub\LlmDriver\Orchestrate; -use Facades\LlmLaraHub\LlmDriver\SimpleSearchAndSummarizeOrchestrate; use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\LlmDriverFacade; use LlmLaraHub\LlmDriver\Requests\MessageInDto; @@ -98,15 +98,15 @@ public function chat(Chat $chat) show_in_thread: true); } elseif (LlmDriverFacade::driver($chat->getDriver())->hasFunctions()) { - Log::info('[LaraChain] Running Orchestrate'); - $response = Orchestrate::handle($messagesArray, $chat); + Log::info('[LaraChain] Running Orchestrate added to queue'); + OrchestrateJob::dispatch($messagesArray, $chat); } else { - Log::info('[LaraChain] Simple Search and Summarize'); - $response = SimpleSearchAndSummarizeOrchestrate::handle($validated['input'], $chat); + Log::info('[LaraChain] Simple Search and Summarize added to queue'); + SimpleSearchAndSummarizeOrchestrateJob::dispatch($validated['input'], $chat); } ChatUpdatedEvent::dispatch($chat->chatable, $chat); - return response()->json(['message' => $response]); + return response()->json(['message' => 'ok']); } } diff --git a/app/Http/Resources/ChatResource.php b/app/Http/Resources/ChatResource.php index f190fd66..3597d339 100644 --- a/app/Http/Resources/ChatResource.php +++ b/app/Http/Resources/ChatResource.php @@ -16,6 +16,7 @@ public function toArray(Request $request): array { return [ 'id' => $this->id, + 'chatable_id' => $this->chatable_id, 'user_id' => new UserResource($this->user), ]; } diff --git a/app/Http/Resources/MessageDocumentReferenceResource.php b/app/Http/Resources/MessageDocumentReferenceResource.php index a6583e83..e6c4eaf3 100644 --- a/app/Http/Resources/MessageDocumentReferenceResource.php +++ b/app/Http/Resources/MessageDocumentReferenceResource.php @@ -24,6 +24,8 @@ public function toArray(Request $request): array 'document_name' => $this->document_chunk?->document->file_path, 'page' => $this->document_chunk?->sort_order, 'distance' => round($this->distance, 2), + 'document_chunk_id' => $this->document_chunk_id, + 'section_number' => $this->document_chunk?->section_number + 1, //since 0 does not look good in the ui 'summary' => str($this->document_chunk?->summary)->markdown(), 'taggings' => $tags, ]; diff --git a/app/Http/Resources/MessageResource.php b/app/Http/Resources/MessageResource.php index ed248996..14b59d63 100644 --- a/app/Http/Resources/MessageResource.php +++ b/app/Http/Resources/MessageResource.php @@ -24,7 +24,7 @@ public function toArray(Request $request): array 'body_markdown' => str($this->body)->markdown(), 'diff_for_humans' => $this->created_at->diffForHumans(), 'message_document_references' => MessageDocumentReferenceResource::collection( - $this->message_document_references()->orderBy('distance', 'asc')->limit(5)->get()), + $this->message_document_references()->orderBy('distance', 'asc')->limit(10)->get()), ]; } } diff --git a/app/Jobs/OrchestrateJob.php b/app/Jobs/OrchestrateJob.php new file mode 100644 index 00000000..0216233a --- /dev/null +++ b/app/Jobs/OrchestrateJob.php @@ -0,0 +1,32 @@ +messagesArray, $this->chat); + } +} diff --git a/app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php b/app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php new file mode 100644 index 00000000..fb9ff248 --- /dev/null +++ b/app/Jobs/SimpleSearchAndSummarizeOrchestrateJob.php @@ -0,0 +1,32 @@ +input, $this->chat); + } +} diff --git a/resources/js/Pages/Chat/ChatInputThreaded.vue b/resources/js/Pages/Chat/ChatInputThreaded.vue index f9770b1f..72aad77b 100644 --- a/resources/js/Pages/Chat/ChatInputThreaded.vue +++ b/resources/js/Pages/Chat/ChatInputThreaded.vue @@ -1,7 +1,7 @@ \ No newline at end of file diff --git a/tests/Feature/Models/PromptHistoryTest.php b/tests/Feature/Models/PromptHistoryTest.php index 84f20909..98386030 100644 --- a/tests/Feature/Models/PromptHistoryTest.php +++ b/tests/Feature/Models/PromptHistoryTest.php @@ -17,6 +17,7 @@ public function test_model(): void $this->assertNotNull($model->collection->id); $this->assertNotNull($model->collection->prompt_history); $this->assertNotNull($model->message->id); + $this->assertNotNull($model->message->prompt_histories); } } From fe54b173d7106e74dfeb9a72c8c1a78abfbac272 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Thu, 9 May 2024 09:13:42 -0400 Subject: [PATCH 16/20] add prompts to the ui --- resources/js/Pages/Chat/ChatBaloon.vue | 4 ++-- resources/js/Pages/Chat/Components/History.vue | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/resources/js/Pages/Chat/ChatBaloon.vue b/resources/js/Pages/Chat/ChatBaloon.vue index 473499d3..90578999 100644 --- a/resources/js/Pages/Chat/ChatBaloon.vue +++ b/resources/js/Pages/Chat/ChatBaloon.vue @@ -69,7 +69,7 @@ const props = defineProps({ -
+
@@ -78,7 +78,7 @@ const props = defineProps({
-
+
diff --git a/resources/js/Pages/Chat/Components/History.vue b/resources/js/Pages/Chat/Components/History.vue index 8b3e2361..64a92644 100644 --- a/resources/js/Pages/Chat/Components/History.vue +++ b/resources/js/Pages/Chat/Components/History.vue @@ -1,7 +1,16 @@