-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
506 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
<?php | ||
|
||
namespace App\Domains\Documents\Transformers; | ||
|
||
use App\Domains\Collections\CollectionStatusEnum; | ||
use App\Domains\Documents\StatusEnum; | ||
use App\Domains\Documents\TypesEnum; | ||
use App\Helpers\TextChunker; | ||
use App\Imports\DocumentsImport; | ||
use App\Models\Document; | ||
use App\Models\DocumentChunk; | ||
use Illuminate\Support\Facades\Log; | ||
use Illuminate\Support\Facades\Storage; | ||
|
||
class CSVTransformer | ||
{ | ||
protected Document $document; | ||
|
||
protected TypesEnum $mimeType = TypesEnum::CSV; | ||
|
||
public function handle(Document $document): array | ||
{ | ||
$this->document = $document; | ||
|
||
$filePath = $this->document->pathToFile(); | ||
|
||
//$filePath = null, string $disk = null, string $readerType = null | ||
$collection = (new DocumentsImport()) | ||
->toCollection($filePath, null, \Maatwebsite\Excel\Excel::CSV); | ||
|
||
$rows = $collection->first(); | ||
|
||
$chunks = []; | ||
|
||
/** | ||
* Going to turn into a document then chunks | ||
*/ | ||
foreach ($rows as $rowNumber => $row) { | ||
$file_name = 'row_'.$rowNumber.'_'.$document->file_path; | ||
|
||
$encoded = json_encode($row); | ||
|
||
Storage::disk('collections') | ||
->put((string) $document->collection->id.'/'.$file_name, $encoded); | ||
|
||
$documentRow = Document::updateOrCreate([ | ||
'collection_id' => $document->collection_id, | ||
'file_path' => $file_name, | ||
'type' => $this->mimeType, | ||
], [ | ||
'status' => StatusEnum::Pending, | ||
'summary' => $encoded, | ||
'meta_data' => $row, | ||
'original_content' => $encoded, | ||
'subject' => "Row $rowNumber import from ".$document->file_path, | ||
]); | ||
|
||
$size = config('llmdriver.chunking.default_size'); | ||
|
||
$chunked_chunks = TextChunker::handle($encoded, $size); | ||
|
||
if ($documentRow->wasRecentlyCreated) { | ||
foreach ($chunked_chunks as $chunkSection => $chunkContent) { | ||
|
||
$guid = md5($chunkContent); | ||
|
||
$DocumentChunk = DocumentChunk::updateOrCreate( | ||
[ | ||
'document_id' => $documentRow->id, | ||
'sort_order' => $rowNumber, | ||
'section_number' => $chunkSection, | ||
], | ||
[ | ||
'guid' => $guid, | ||
'content' => $chunkContent, | ||
'meta_data' => $row, | ||
'original_content' => $encoded, | ||
] | ||
); | ||
|
||
$chunks[] = $DocumentChunk; | ||
} | ||
} else { | ||
$documentRow->updateQuietly([ | ||
'status' => StatusEnum::Complete, | ||
]); | ||
} | ||
|
||
} | ||
|
||
notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Processing Documents'); | ||
|
||
Log::info('CSVTransformer:handle', ['chunks' => count($chunks)]); | ||
|
||
$document->delete(); | ||
|
||
return $chunks; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<?php | ||
|
||
namespace App\Imports; | ||
|
||
use Illuminate\Support\Collection; | ||
use Maatwebsite\Excel\Concerns\Importable; | ||
use Maatwebsite\Excel\Concerns\ToCollection; | ||
use Maatwebsite\Excel\Concerns\WithHeadingRow; | ||
|
||
class DocumentsImport implements ToCollection, WithHeadingRow | ||
{ | ||
use Importable; | ||
|
||
public function collection(Collection $collection) | ||
{ | ||
return $collection; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
<?php | ||
|
||
namespace App\Jobs; | ||
|
||
use App\Models\Document; | ||
use Facades\App\Domains\Documents\Transformers\CSVTransformer; | ||
use Illuminate\Bus\Batch; | ||
use Illuminate\Bus\Batchable; | ||
use Illuminate\Bus\Queueable; | ||
use Illuminate\Contracts\Queue\ShouldQueue; | ||
use Illuminate\Foundation\Bus\Dispatchable; | ||
use Illuminate\Queue\InteractsWithQueue; | ||
use Illuminate\Queue\SerializesModels; | ||
use Illuminate\Support\Facades\Bus; | ||
use LlmLaraHub\LlmDriver\LlmDriverFacade; | ||
use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; | ||
|
||
class ProcessCSVJob implements ShouldQueue | ||
{ | ||
use Batchable; | ||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; | ||
|
||
/** | ||
* Create a new job instance. | ||
*/ | ||
public function __construct(public Document $document) | ||
{ | ||
// | ||
} | ||
|
||
/** | ||
* Execute the job. | ||
*/ | ||
public function handle(): void | ||
{ | ||
if ($this->batch()->cancelled()) { | ||
// Determine if the batch has been cancelled... | ||
|
||
return; | ||
} | ||
|
||
$chunks = CSVTransformer::handle($this->document); | ||
|
||
foreach ($chunks as $chunk) { | ||
$document = $chunk->document; | ||
Bus::batch([ | ||
new VectorlizeDataJob($chunk), | ||
]) | ||
->name(sprintf('Process %s Document Chunks - %d', $document->type->value, $document->id)) | ||
->finally(function (Batch $batch) use ($document) { | ||
Bus::batch([ | ||
new SummarizeDocumentJob($document), | ||
new TagDocumentJob($document), | ||
new DocumentProcessingCompleteJob($document), | ||
])->name(sprintf('Part 2 of Process for %s Document - %d', | ||
$document->type->value, $document->id)) | ||
->allowFailures() | ||
->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) | ||
->dispatch(); | ||
}) | ||
->allowFailures() | ||
->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) | ||
->dispatch(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.