Skip to content

Commit

Permalink
mock driver working with summary and vector
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed Mar 26, 2024
1 parent ceac4ba commit c0a0cad
Show file tree
Hide file tree
Showing 12 changed files with 194 additions and 12 deletions.
4 changes: 3 additions & 1 deletion app/Domains/Documents/Transformers/PdfTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use App\Domains\Collections\CollectionStatusEnum;
use App\Events\CollectionStatusEvent;
use App\Jobs\SummarizeDataJob;
use App\Jobs\VectorlizeDataJob;
use App\Models\Collection;
use App\Models\Document;
Expand Down Expand Up @@ -45,7 +46,8 @@ public function handle(Document $document): Document
* And Summary
*/
$chunks[] = [
new VectorlizeDataJob($DocumentChunk)
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk)
];
}

Expand Down
2 changes: 1 addition & 1 deletion app/Jobs/ProcessFileJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public function handle(): void
//new TagDataJob($this->document),
//then mark it all as done and notify the ui
])
->name('OptOutRequests')
->name('Process PDF Document - ' . $document->id)
->finally(function (Batch $batch) use ($document) {
/**
* @TODO
Expand Down
50 changes: 50 additions & 0 deletions app/Jobs/SummarizeDataJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

namespace App\Jobs;

use App\Domains\Documents\StatusEnum;
use App\LlmDriver\LlmDriverFacade;
use App\Models\DocumentChunk;
use Illuminate\Bus\Batchable;
use App\LlmDriver\Responses\CompletionResponse;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;

class SummarizeDataJob implements ShouldQueue
{
use Batchable, Dispatchable, InteractsWithQueue, Queueable, SerializesModels;

/**
* Create a new job instance.
*/
public function __construct(public DocumentChunk $documentChunk)
{
//
}

/**
* Execute the job.
*/
public function handle(): void
{
if (optional($this->batch())->cancelled()) {
// Determine if the batch has been cancelled...
$this->documentChunk->update([
'status_summary' => StatusEnum::Cancelled,
]);
return;
}
$content = $this->documentChunk->content;

/** @var CompletionResponse $results */
$results = LlmDriverFacade::completion($content);

$this->documentChunk->update([
'summary' => $results->content,
'status_summary' => StatusEnum::Complete,
]);
}
}
14 changes: 13 additions & 1 deletion app/Jobs/VectorlizeDataJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace App\Jobs;

use App\Domains\Documents\StatusEnum;
use App\LlmDriver\LlmDriverClient;
use App\LlmDriver\LlmDriverFacade;
use App\Models\DocumentChunk;
Expand All @@ -11,10 +12,11 @@
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use App\LlmDriver\Responses\EmbeddingsResponseDto;
use Illuminate\Bus\Batchable;

class VectorlizeDataJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
use Batchable, Dispatchable, InteractsWithQueue, Queueable, SerializesModels;

/**
* Create a new job instance.
Expand All @@ -29,13 +31,23 @@ public function __construct(public DocumentChunk $documentChunk)
*/
public function handle(): void
{

if ($this->batch()->cancelled()) {
// Determine if the batch has been cancelled...
$this->documentChunk->update([
'status_embeddings' => StatusEnum::Cancelled,
]);
return;
}

$content = $this->documentChunk->content;

/** @var EmbeddingsResponseDto $results */
$results = LlmDriverFacade::embedData($content);

$this->documentChunk->update([
'embedding' => $results->embedding,
'status_embeddings' => StatusEnum::Complete,
]);
}
}
16 changes: 16 additions & 0 deletions app/LlmDriver/BaseClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@

namespace App\LlmDriver;

use App\LlmDriver\Responses\EmbeddingsResponseDto;
use Illuminate\Support\Facades\Log;
use OpenAI\Resources\Embeddings;

abstract class BaseClient {

public function embedData(string $data) : EmbeddingsResponseDto {

Log::info("LlmDriver::MockClient::embedData");

$data = get_fixture('embedding_response.json');

return new EmbeddingsResponseDto(
data_get($data, 'data.0.embedding'),
1000,
);
}

}
20 changes: 11 additions & 9 deletions app/LlmDriver/MockClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,24 @@

namespace App\LlmDriver;

use App\LlmDriver\Responses\CompletionResponse;
use App\LlmDriver\Responses\EmbeddingsResponseDto;
use Illuminate\Support\Facades\Log;
use OpenAI\Resources\Embeddings;

class MockClient extends BaseClient {


public function embedData(string $data) : EmbeddingsResponseDto {

Log::info("LlmDriver::MockClient::embedData");

public function completion(string $prompt) : CompletionResponse {
Log::info("LlmDriver::MockClient::completion");

$data = get_fixture('embedding_response.json');
$data = <<<EOD
Voluptate irure cillum dolor anim officia reprehenderit dolor. Eiusmod veniam nostrud consectetur incididunt proident id. Anim adipisicing pariatur amet duis Lorem sunt veniam veniam est. Deserunt ea aliquip cillum pariatur consectetur. Dolor in reprehenderit adipisicing consectetur cupidatat ad cupidatat reprehenderit. Nostrud mollit voluptate aliqua anim pariatur excepteur eiusmod velit quis exercitation tempor quis excepteur.
EOD;

return new EmbeddingsResponseDto(
data_get($data, 'data.0.embedding'),
1000,
);
return new CompletionResponse($data);
}



}
11 changes: 11 additions & 0 deletions app/LlmDriver/Responses/CompletionResponse.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

namespace App\LlmDriver\Responses;

class CompletionResponse extends \Spatie\LaravelData\Data
{
public function __construct(
public string $content
) {
}
}
1 change: 1 addition & 0 deletions database/factories/DocumentChunkFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public function definition(): array
'status_tagging' => StatusEnum::random(),
'status_summary' => StatusEnum::random(),
'original_content' => fake()->sentence(10),
'summary' => fake()->sentence(5),
'document_id' => Document::factory(),
'embedding' => data_get($embeddings, 'data.0.embedding'),
];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;

return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('document_chunks', function (Blueprint $table) {
$table->longText('summary')->nullable();
});
}

/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('document_chunks', function (Blueprint $table) {
//
});
}
};
12 changes: 12 additions & 0 deletions tests/Feature/Jobs/VectorlizeDataJobTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Tests\Feature\Jobs;

use App\Jobs\VectorlizeDataJob;
use App\LlmDriver\LlmDriverFacade;
use App\Models\DocumentChunk;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Foundation\Testing\WithFaker;
Expand All @@ -15,6 +16,17 @@ class VectorlizeDataJobTest extends TestCase
*/
public function test_gets_data(): void
{
$embedding = get_fixture('embedding_response.json');

$dto = new \App\LlmDriver\Responses\EmbeddingsResponseDto(
data_get($embedding, 'data.0.embedding'),
1000
);

LlmDriverFacade::shouldReceive('embedData')
->once()
->andReturn($dto);

$documentChunk = DocumentChunk::factory()->create([
'embedding' => null
]);
Expand Down
12 changes: 12 additions & 0 deletions tests/Feature/MockClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use App\LlmDriver\Responses\EmbeddingsResponseDto;
use App\LlmDriver\MockClient;
use App\LlmDriver\Responses\CompletionResponse;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Foundation\Testing\WithFaker;
use OpenAI\Resources\Embeddings;
Expand All @@ -24,4 +25,15 @@ public function test_embeddings(): void
$this->assertInstanceOf(EmbeddingsResponseDto::class, $results);

}

public function test_completion(): void
{

$client = new MockClient();

$results = $client->completion('test');

$this->assertInstanceOf(CompletionResponse::class, $results);

}
}
36 changes: 36 additions & 0 deletions tests/Feature/SummarizeDataJobTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?php

namespace Tests\Feature;

use App\Jobs\SummarizeDataJob;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Foundation\Testing\WithFaker;
use Tests\TestCase;
use App\LlmDriver\LlmDriverFacade;
use App\Models\DocumentChunk;

class SummarizeDataJobTest extends TestCase
{
/**
* A basic feature test example.
*/
public function test_gets_data(): void
{

$data = "Foo bar";
$dto = new \App\LlmDriver\Responses\CompletionResponse($data);

LlmDriverFacade::shouldReceive('completion')
->once()
->andReturn($dto);

$documentChunk = DocumentChunk::factory()->create([
'summary' => null
]);

$job = new SummarizeDataJob($documentChunk);
$job->handle();

$this->assertEquals("Foo bar", $documentChunk->refresh()->summary);
}
}

0 comments on commit c0a0cad

Please sign in to comment.