diff --git a/app/Domains/Documents/Transformers/PdfTransformer.php b/app/Domains/Documents/Transformers/PdfTransformer.php index 0a306d5b..9975fd65 100644 --- a/app/Domains/Documents/Transformers/PdfTransformer.php +++ b/app/Domains/Documents/Transformers/PdfTransformer.php @@ -4,6 +4,7 @@ use App\Domains\Collections\CollectionStatusEnum; use App\Events\CollectionStatusEvent; +use App\Jobs\SummarizeDataJob; use App\Jobs\VectorlizeDataJob; use App\Models\Collection; use App\Models\Document; @@ -45,7 +46,8 @@ public function handle(Document $document): Document * And Summary */ $chunks[] = [ - new VectorlizeDataJob($DocumentChunk) + new VectorlizeDataJob($DocumentChunk), + new SummarizeDataJob($DocumentChunk) ]; } diff --git a/app/Jobs/ProcessFileJob.php b/app/Jobs/ProcessFileJob.php index 089dd341..63eb708c 100644 --- a/app/Jobs/ProcessFileJob.php +++ b/app/Jobs/ProcessFileJob.php @@ -48,7 +48,7 @@ public function handle(): void //new TagDataJob($this->document), //then mark it all as done and notify the ui ]) - ->name('OptOutRequests') + ->name('Process PDF Document - ' . $document->id) ->finally(function (Batch $batch) use ($document) { /** * @TODO diff --git a/app/Jobs/SummarizeDataJob.php b/app/Jobs/SummarizeDataJob.php new file mode 100644 index 00000000..c4301741 --- /dev/null +++ b/app/Jobs/SummarizeDataJob.php @@ -0,0 +1,50 @@ +batch())->cancelled()) { + // Determine if the batch has been cancelled... + $this->documentChunk->update([ + 'status_summary' => StatusEnum::Cancelled, + ]); + return; + } + $content = $this->documentChunk->content; + + /** @var CompletionResponse $results */ + $results = LlmDriverFacade::completion($content); + + $this->documentChunk->update([ + 'summary' => $results->content, + 'status_summary' => StatusEnum::Complete, + ]); + } +} diff --git a/app/Jobs/VectorlizeDataJob.php b/app/Jobs/VectorlizeDataJob.php index 94cd0dcf..fde29168 100644 --- a/app/Jobs/VectorlizeDataJob.php +++ b/app/Jobs/VectorlizeDataJob.php @@ -2,6 +2,7 @@ namespace App\Jobs; +use App\Domains\Documents\StatusEnum; use App\LlmDriver\LlmDriverClient; use App\LlmDriver\LlmDriverFacade; use App\Models\DocumentChunk; @@ -11,10 +12,11 @@ use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; use App\LlmDriver\Responses\EmbeddingsResponseDto; +use Illuminate\Bus\Batchable; class VectorlizeDataJob implements ShouldQueue { - use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; + use Batchable, Dispatchable, InteractsWithQueue, Queueable, SerializesModels; /** * Create a new job instance. @@ -29,6 +31,15 @@ public function __construct(public DocumentChunk $documentChunk) */ public function handle(): void { + + if ($this->batch()->cancelled()) { + // Determine if the batch has been cancelled... + $this->documentChunk->update([ + 'status_embeddings' => StatusEnum::Cancelled, + ]); + return; + } + $content = $this->documentChunk->content; /** @var EmbeddingsResponseDto $results */ @@ -36,6 +47,7 @@ public function handle(): void $this->documentChunk->update([ 'embedding' => $results->embedding, + 'status_embeddings' => StatusEnum::Complete, ]); } } diff --git a/app/LlmDriver/BaseClient.php b/app/LlmDriver/BaseClient.php index f474f569..fa814f40 100644 --- a/app/LlmDriver/BaseClient.php +++ b/app/LlmDriver/BaseClient.php @@ -2,6 +2,22 @@ namespace App\LlmDriver; +use App\LlmDriver\Responses\EmbeddingsResponseDto; +use Illuminate\Support\Facades\Log; +use OpenAI\Resources\Embeddings; + abstract class BaseClient { + public function embedData(string $data) : EmbeddingsResponseDto { + + Log::info("LlmDriver::MockClient::embedData"); + + $data = get_fixture('embedding_response.json'); + + return new EmbeddingsResponseDto( + data_get($data, 'data.0.embedding'), + 1000, + ); + } + } \ No newline at end of file diff --git a/app/LlmDriver/MockClient.php b/app/LlmDriver/MockClient.php index 2139145b..154170c4 100644 --- a/app/LlmDriver/MockClient.php +++ b/app/LlmDriver/MockClient.php @@ -2,22 +2,24 @@ namespace App\LlmDriver; +use App\LlmDriver\Responses\CompletionResponse; use App\LlmDriver\Responses\EmbeddingsResponseDto; use Illuminate\Support\Facades\Log; use OpenAI\Resources\Embeddings; class MockClient extends BaseClient { - - public function embedData(string $data) : EmbeddingsResponseDto { - - Log::info("LlmDriver::MockClient::embedData"); + + public function completion(string $prompt) : CompletionResponse { + Log::info("LlmDriver::MockClient::completion"); - $data = get_fixture('embedding_response.json'); + $data = << StatusEnum::random(), 'status_summary' => StatusEnum::random(), 'original_content' => fake()->sentence(10), + 'summary' => fake()->sentence(5), 'document_id' => Document::factory(), 'embedding' => data_get($embeddings, 'data.0.embedding'), ]; diff --git a/database/migrations/2024_03_26_005306_add_summary_to_document_chunks.php b/database/migrations/2024_03_26_005306_add_summary_to_document_chunks.php new file mode 100644 index 00000000..da5f5518 --- /dev/null +++ b/database/migrations/2024_03_26_005306_add_summary_to_document_chunks.php @@ -0,0 +1,28 @@ +longText('summary')->nullable(); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('document_chunks', function (Blueprint $table) { + // + }); + } +}; diff --git a/tests/Feature/Jobs/VectorlizeDataJobTest.php b/tests/Feature/Jobs/VectorlizeDataJobTest.php index 2af7035e..c17e504b 100644 --- a/tests/Feature/Jobs/VectorlizeDataJobTest.php +++ b/tests/Feature/Jobs/VectorlizeDataJobTest.php @@ -3,6 +3,7 @@ namespace Tests\Feature\Jobs; use App\Jobs\VectorlizeDataJob; +use App\LlmDriver\LlmDriverFacade; use App\Models\DocumentChunk; use Illuminate\Foundation\Testing\RefreshDatabase; use Illuminate\Foundation\Testing\WithFaker; @@ -15,6 +16,17 @@ class VectorlizeDataJobTest extends TestCase */ public function test_gets_data(): void { + $embedding = get_fixture('embedding_response.json'); + + $dto = new \App\LlmDriver\Responses\EmbeddingsResponseDto( + data_get($embedding, 'data.0.embedding'), + 1000 + ); + + LlmDriverFacade::shouldReceive('embedData') + ->once() + ->andReturn($dto); + $documentChunk = DocumentChunk::factory()->create([ 'embedding' => null ]); diff --git a/tests/Feature/MockClientTest.php b/tests/Feature/MockClientTest.php index 23c8525a..3106b578 100644 --- a/tests/Feature/MockClientTest.php +++ b/tests/Feature/MockClientTest.php @@ -4,6 +4,7 @@ use App\LlmDriver\Responses\EmbeddingsResponseDto; use App\LlmDriver\MockClient; +use App\LlmDriver\Responses\CompletionResponse; use Illuminate\Foundation\Testing\RefreshDatabase; use Illuminate\Foundation\Testing\WithFaker; use OpenAI\Resources\Embeddings; @@ -24,4 +25,15 @@ public function test_embeddings(): void $this->assertInstanceOf(EmbeddingsResponseDto::class, $results); } + + public function test_completion(): void + { + + $client = new MockClient(); + + $results = $client->completion('test'); + + $this->assertInstanceOf(CompletionResponse::class, $results); + + } } diff --git a/tests/Feature/SummarizeDataJobTest.php b/tests/Feature/SummarizeDataJobTest.php new file mode 100644 index 00000000..14f1161e --- /dev/null +++ b/tests/Feature/SummarizeDataJobTest.php @@ -0,0 +1,36 @@ +once() + ->andReturn($dto); + + $documentChunk = DocumentChunk::factory()->create([ + 'summary' => null + ]); + + $job = new SummarizeDataJob($documentChunk); + $job->handle(); + + $this->assertEquals("Foo bar", $documentChunk->refresh()->summary); + } +}