Skip to content

Commit

Permalink
update text input as well
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed May 8, 2024
1 parent 7805fa0 commit ba5d29b
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 23 deletions.
6 changes: 2 additions & 4 deletions app/Domains/Documents/Transformers/PdfTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
namespace App\Domains\Documents\Transformers;

use App\Domains\Collections\CollectionStatusEnum;
use App\Events\CollectionStatusEvent;
use App\Helpers\TextChunker;
use App\Jobs\SummarizeDataJob;
use App\Jobs\SummarizeDocumentJob;
Expand Down Expand Up @@ -37,7 +36,7 @@ public function handle(Document $document): Document
$pageContent = $page->getText();

$chunked_chunks = TextChunker::handle($pageContent);
foreach($chunked_chunks as $chunkSection => $chunkContent) {
foreach ($chunked_chunks as $chunkSection => $chunkContent) {
$guid = md5($chunkContent);
$DocumentChunk = DocumentChunk::updateOrCreate(
[
Expand All @@ -55,8 +54,7 @@ public function handle(Document $document): Document
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
];



}
notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document');

Expand Down
6 changes: 2 additions & 4 deletions app/Domains/Documents/Transformers/PowerPointTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public function handle(Document $document): array

$chunked_chunks = TextChunker::handle($content);

foreach($chunked_chunks as $chunkSection => $chunkContent) {
foreach ($chunked_chunks as $chunkSection => $chunkContent) {
$DocumentChunk = DocumentChunk::updateOrCreate(
[
'document_id' => $this->document->id,
Expand All @@ -52,20 +52,18 @@ public function handle(Document $document): array
'meta_data' => $dto->toArray(),
]
);

$chunks[] = [
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
];
}


$results->next();
}

notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Document');


Log::info('PowerPointTransformer:handle', ['chunks' => count($chunks)]);

return $chunks;
Expand Down
6 changes: 1 addition & 5 deletions app/Http/Controllers/TextDocumentController.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Bus;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Str;
use LlmLaraHub\LlmDriver\LlmDriverFacade;

class TextDocumentController extends Controller
{
Expand All @@ -37,11 +35,10 @@ public function store(Collection $collection, Request $request)
'status_summary' => StatusEnum::Pending,
]);


$jobs = [];
$page_number = 1;
$chunked_chunks = TextChunker::handle($validated['content']);
foreach($chunked_chunks as $chunkSection => $chunkContent) {
foreach ($chunked_chunks as $chunkSection => $chunkContent) {

try {
$guid = md5($chunkContent);
Expand Down Expand Up @@ -75,7 +72,6 @@ public function store(Collection $collection, Request $request)

}


Bus::batch($jobs)
->name("Chunking Document - $document->file_path")
->finally(function (Batch $batch) use ($document) {
Expand Down
2 changes: 1 addition & 1 deletion app/Jobs/GetWebContentJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public function handle(): void

$chunked_chunks = TextChunker::handle($results);

foreach($chunked_chunks as $chunkSection => $chunkContent) {
foreach ($chunked_chunks as $chunkSection => $chunkContent) {

$guid = md5($chunkContent);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

use App\Models\Collection;
use Illuminate\Support\Facades\Bus;
use LlmLaraHub\LlmDriver\LlmDriverFacade;
use LlmLaraHub\LlmDriver\Responses\CompletionResponse;
use Tests\TestCase;

class TextDocumentControllerTest extends TestCase
Expand All @@ -23,7 +21,7 @@ public function test_create(): void
'team_id' => $user->currentTeam->id,
]);

$content = get_fixture("chunkable_text.txt", false);
$content = get_fixture('chunkable_text.txt', false);
$this->assertDatabaseCount('documents', 0);
$this->assertDatabaseCount('document_chunks', 0);
$this->actingAs($user)->post(route('text-documents.store', [
Expand Down
8 changes: 3 additions & 5 deletions tests/Feature/PdfTransformerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
namespace Tests\Feature;

use App\Domains\Documents\Transformers\PdfTransformer;
use App\Models\Document;
use App\Models\DocumentChunk;
use Illuminate\Support\Facades\Bus;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\File;
Expand Down Expand Up @@ -34,9 +32,9 @@ public function test_gets_data_from_pdf()
$pages = 10;

$this->assertCount(10, DB::table('document_chunks')
->where("section_number", 0)
->where("document_id", $this->document->id)
->get());
->where('section_number', 0)
->where('document_id', $this->document->id)
->get());

Bus::assertBatchCount(1);

Expand Down
1 change: 0 additions & 1 deletion tests/Feature/PowerPointTransformerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ public function test_gets_data_from_pptx()
$transformer->handle($this->document);
$this->assertDatabaseCount('document_chunks', 5);


}

public function test_does_not_repeat()
Expand Down

0 comments on commit ba5d29b

Please sign in to comment.