From ba5d29bcf1e775002ab8d415aff5743855a4d28c Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Wed, 8 May 2024 08:05:19 -0400 Subject: [PATCH] update text input as well --- app/Domains/Documents/Transformers/PdfTransformer.php | 6 ++---- .../Documents/Transformers/PowerPointTransformer.php | 6 ++---- app/Http/Controllers/TextDocumentController.php | 6 +----- app/Jobs/GetWebContentJob.php | 2 +- .../Http/Controllers/TextDocumentControllerTest.php | 4 +--- tests/Feature/PdfTransformerTest.php | 8 +++----- tests/Feature/PowerPointTransformerTest.php | 1 - 7 files changed, 10 insertions(+), 23 deletions(-) diff --git a/app/Domains/Documents/Transformers/PdfTransformer.php b/app/Domains/Documents/Transformers/PdfTransformer.php index 67b6fbd0..6a8aaf17 100644 --- a/app/Domains/Documents/Transformers/PdfTransformer.php +++ b/app/Domains/Documents/Transformers/PdfTransformer.php @@ -3,7 +3,6 @@ namespace App\Domains\Documents\Transformers; use App\Domains\Collections\CollectionStatusEnum; -use App\Events\CollectionStatusEvent; use App\Helpers\TextChunker; use App\Jobs\SummarizeDataJob; use App\Jobs\SummarizeDocumentJob; @@ -37,7 +36,7 @@ public function handle(Document $document): Document $pageContent = $page->getText(); $chunked_chunks = TextChunker::handle($pageContent); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { $guid = md5($chunkContent); $DocumentChunk = DocumentChunk::updateOrCreate( [ @@ -55,8 +54,7 @@ public function handle(Document $document): Document new VectorlizeDataJob($DocumentChunk), new SummarizeDataJob($DocumentChunk), ]; - - + } notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document'); diff --git a/app/Domains/Documents/Transformers/PowerPointTransformer.php b/app/Domains/Documents/Transformers/PowerPointTransformer.php index a4373d27..078db793 100644 --- a/app/Domains/Documents/Transformers/PowerPointTransformer.php +++ b/app/Domains/Documents/Transformers/PowerPointTransformer.php @@ -39,7 +39,7 @@ public function handle(Document $document): array $chunked_chunks = TextChunker::handle($content); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { $DocumentChunk = DocumentChunk::updateOrCreate( [ 'document_id' => $this->document->id, @@ -52,20 +52,18 @@ public function handle(Document $document): array 'meta_data' => $dto->toArray(), ] ); - + $chunks[] = [ new VectorlizeDataJob($DocumentChunk), new SummarizeDataJob($DocumentChunk), ]; } - $results->next(); } notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document'); - Log::info('PowerPointTransformer:handle', ['chunks' => count($chunks)]); return $chunks; diff --git a/app/Http/Controllers/TextDocumentController.php b/app/Http/Controllers/TextDocumentController.php index 58978236..bbd1abb3 100644 --- a/app/Http/Controllers/TextDocumentController.php +++ b/app/Http/Controllers/TextDocumentController.php @@ -17,8 +17,6 @@ use Illuminate\Http\Request; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; -use Illuminate\Support\Str; -use LlmLaraHub\LlmDriver\LlmDriverFacade; class TextDocumentController extends Controller { @@ -37,11 +35,10 @@ public function store(Collection $collection, Request $request) 'status_summary' => StatusEnum::Pending, ]); - $jobs = []; $page_number = 1; $chunked_chunks = TextChunker::handle($validated['content']); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { try { $guid = md5($chunkContent); @@ -75,7 +72,6 @@ public function store(Collection $collection, Request $request) } - Bus::batch($jobs) ->name("Chunking Document - $document->file_path") ->finally(function (Batch $batch) use ($document) { diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index da1a3eae..409d768c 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -102,7 +102,7 @@ public function handle(): void $chunked_chunks = TextChunker::handle($results); - foreach($chunked_chunks as $chunkSection => $chunkContent) { + foreach ($chunked_chunks as $chunkSection => $chunkContent) { $guid = md5($chunkContent); diff --git a/tests/Feature/Http/Controllers/TextDocumentControllerTest.php b/tests/Feature/Http/Controllers/TextDocumentControllerTest.php index 1c1d1946..0c4ed0e1 100644 --- a/tests/Feature/Http/Controllers/TextDocumentControllerTest.php +++ b/tests/Feature/Http/Controllers/TextDocumentControllerTest.php @@ -4,8 +4,6 @@ use App\Models\Collection; use Illuminate\Support\Facades\Bus; -use LlmLaraHub\LlmDriver\LlmDriverFacade; -use LlmLaraHub\LlmDriver\Responses\CompletionResponse; use Tests\TestCase; class TextDocumentControllerTest extends TestCase @@ -23,7 +21,7 @@ public function test_create(): void 'team_id' => $user->currentTeam->id, ]); - $content = get_fixture("chunkable_text.txt", false); + $content = get_fixture('chunkable_text.txt', false); $this->assertDatabaseCount('documents', 0); $this->assertDatabaseCount('document_chunks', 0); $this->actingAs($user)->post(route('text-documents.store', [ diff --git a/tests/Feature/PdfTransformerTest.php b/tests/Feature/PdfTransformerTest.php index acd7c395..0492fe8c 100644 --- a/tests/Feature/PdfTransformerTest.php +++ b/tests/Feature/PdfTransformerTest.php @@ -3,8 +3,6 @@ namespace Tests\Feature; use App\Domains\Documents\Transformers\PdfTransformer; -use App\Models\Document; -use App\Models\DocumentChunk; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\File; @@ -34,9 +32,9 @@ public function test_gets_data_from_pdf() $pages = 10; $this->assertCount(10, DB::table('document_chunks') - ->where("section_number", 0) - ->where("document_id", $this->document->id) - ->get()); + ->where('section_number', 0) + ->where('document_id', $this->document->id) + ->get()); Bus::assertBatchCount(1); diff --git a/tests/Feature/PowerPointTransformerTest.php b/tests/Feature/PowerPointTransformerTest.php index ac095a71..a3678ffb 100644 --- a/tests/Feature/PowerPointTransformerTest.php +++ b/tests/Feature/PowerPointTransformerTest.php @@ -36,7 +36,6 @@ public function test_gets_data_from_pptx() $transformer->handle($this->document); $this->assertDatabaseCount('document_chunks', 5); - } public function test_does_not_repeat()