Skip to content

Commit

Permalink
try catch on a parse job
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed Mar 28, 2024
1 parent eee37a7 commit b296b4f
Showing 1 changed file with 28 additions and 23 deletions.
51 changes: 28 additions & 23 deletions app/Domains/Documents/Transformers/PdfTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use App\Models\DocumentChunk;
use Illuminate\Bus\Batch;
use Illuminate\Support\Facades\Bus;
use Illuminate\Support\Facades\Log;
use Smalot\PdfParser\Parser;

class PdfTransformer
Expand All @@ -28,30 +29,34 @@ public function handle(Document $document): Document
$pages = $pdf->getPages();
$chunks = [];
foreach ($pages as $page_number => $page) {
$page_number = $page_number + 1;
$pageContent = $page->getText();
$guid = md5($pageContent);
$DocumentChunk = DocumentChunk::updateOrCreate(
[
'guid' => $guid,
'document_id' => $this->document->id,
],
[
'content' => $pageContent,
'sort_order' => $page_number,
]
);
/**
* Soon taggings
* And Summary
*/
$chunks[] = [
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
//Tagging
];
try {
$page_number = $page_number + 1;
$pageContent = $page->getText();
$guid = md5($pageContent);
$DocumentChunk = DocumentChunk::updateOrCreate(
[
'guid' => $guid,
'document_id' => $this->document->id,
],
[
'content' => $pageContent,
'sort_order' => $page_number,
]
);
/**
* Soon taggings
* And Summary
*/
$chunks[] = [
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
//Tagging
];

CollectionStatusEvent::dispatch($document->collection, CollectionStatusEnum::PROCESSING);
CollectionStatusEvent::dispatch($document->collection, CollectionStatusEnum::PROCESSING);
} catch (\Exception $e) {
Log::error('Error parsing PDF', ['error' => $e->getMessage()]);
}
}

$batch = Bus::batch($chunks)
Expand Down

0 comments on commit b296b4f

Please sign in to comment.