Skip to content

Commit

Permalink
add document references
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed Apr 28, 2024
1 parent db21d08 commit f29aea7
Show file tree
Hide file tree
Showing 12 changed files with 223 additions and 86 deletions.
51 changes: 13 additions & 38 deletions Modules/LlmDriver/app/Functions/SearchAndSummarize.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
namespace LlmLaraHub\LlmDriver\Functions;

use App\Domains\Messages\RoleEnum;
use App\Models\DocumentChunk;
use App\Models\Message;
use Illuminate\Database\Eloquent\Collection;
use Illuminate\Support\Facades\Log;
use Laravel\Pennant\Feature;
use LlmLaraHub\LlmDriver\HasDrivers;
use LlmLaraHub\LlmDriver\Helpers\CreateReferencesTrait;
use LlmLaraHub\LlmDriver\Helpers\DistanceQueryTrait;
use LlmLaraHub\LlmDriver\LlmDriverFacade;
use LlmLaraHub\LlmDriver\Requests\MessageInDto;
use LlmLaraHub\LlmDriver\Responses\CompletionResponse;
Expand All @@ -17,6 +16,8 @@

class SearchAndSummarize extends FunctionContract
{
use CreateReferencesTrait, DistanceQueryTrait;

protected string $name = 'search_and_summarize';

protected string $description = 'Used to embed users prompt, search database and return summarized results.';
Expand All @@ -29,6 +30,8 @@ public function handle(
HasDrivers $model,
FunctionCallDto $functionCallDto): FunctionResponse
{
Log::info('[LaraChain] Using Function: SearchAndSummarize');

/**
* @TODO
*
Expand All @@ -50,22 +53,11 @@ public function handle(

$embeddingSize = get_embedding_size($model->getEmbeddingDriver());

/**
* @TODO
* Track the document page for referehce
*
* @see https://github.com/orgs/LlmLaraHub/projects/1?pane=issue&itemId=60394288
*/
$results = DocumentChunk::query()
->join('documents', 'documents.id', '=', 'document_chunks.document_id')
->selectRaw(
"document_chunks.{$embeddingSize} <-> ? as distance, document_chunks.content, document_chunks.{$embeddingSize} as embedding, document_chunks.id as id, document_chunks.summary as summary, document_chunks.document_id as document_id",
[$embedding->embedding]
)
->where('documents.collection_id', $model->getChatable()->id)
->limit(10)
->orderByRaw('distance')
->get();
$documentChunkResults = $this->distance(
$embeddingSize,
$model->getChatable()->id,
$embedding->embedding
);

$content = [];

Expand All @@ -74,7 +66,7 @@ public function handle(
* Yes this is a lot like the SearchAndSummarizeChatRepo
* But just getting a sense of things
*/
foreach ($results as $result) {
foreach ($documentChunkResults as $result) {
$contentString = remove_ascii($result->content);
if (Feature::active('reduce_text')) {
$result = reduce_text_size($contentString);
Expand Down Expand Up @@ -107,10 +99,7 @@ public function handle(

$message = $model->getChat()->addInput($response->content, RoleEnum::Assistant);

/**
* We want to trigger the job to build up document reference history
*/
$this->saveDocumentReference($message, $results);
$this->saveDocumentReference($message, $documentChunkResults);

return FunctionResponse::from(
[
Expand All @@ -120,20 +109,6 @@ public function handle(
);
}

protected function saveDocumentReference(
Message $model,
Collection $documentChunks
): void {
//add each one to a batch job or do the work here.
foreach ($documentChunks as $documentChunk) {
$model->message_document_references()->create([
'document_chunk_id' => $documentChunk->id,
'distance' => $documentChunk->distance,
'reference' => $documentChunk->summary,
]);
}
}

/**
* @return PropertyDto[]
*/
Expand Down
22 changes: 22 additions & 0 deletions Modules/LlmDriver/app/Helpers/CreateReferencesTrait.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

namespace LlmLaraHub\LlmDriver\Helpers;

use App\Models\Message;
use Illuminate\Database\Eloquent\Collection;

trait CreateReferencesTrait
{
protected function saveDocumentReference(
Message $model,
Collection $documentChunks
): void {
//add each one to a batch job or do the work here.
foreach ($documentChunks as $documentChunk) {
$model->message_document_references()->create([
'document_chunk_id' => $documentChunk->id,
'distance' => $documentChunk->distance,
]);
}
}
}
37 changes: 37 additions & 0 deletions Modules/LlmDriver/app/Helpers/DistanceQueryTrait.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

namespace LlmLaraHub\LlmDriver\Helpers;

use App\Models\DocumentChunk;
use Illuminate\Database\Eloquent\Collection;
use Pgvector\Laravel\Vector;

trait DistanceQueryTrait
{
/**
* @TODO
* Track the document page for referehce
*
* @see https://github.com/orgs/LlmLaraHub/projects/1?pane=issue&itemId=60394288
*/
protected function distance(
string $embeddingSize,
int $collectionId,
Vector $embedding
): Collection {

$documentChunkResults = DocumentChunk::query()
->join('documents', 'documents.id', '=', 'document_chunks.document_id')
->selectRaw(
"document_chunks.{$embeddingSize} <-> ? as distance, document_chunks.content as content, document_chunks.{$embeddingSize} as embedding, document_chunks.id as id, document_chunks.summary as summary, document_chunks.document_id as document_id",
[$embedding]
)
/** @phpstan-ignore-next-line */
->where('documents.collection_id', $collectionId)
->limit(10)
->orderByRaw('distance')
->get();

return $documentChunkResults;
}
}
36 changes: 17 additions & 19 deletions app/Domains/Messages/SearchAndSummarizeChatRepo.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@
use App\Models\DocumentChunk;
use Illuminate\Support\Facades\Log;
use Laravel\Pennant\Feature;
use LlmLaraHub\LlmDriver\Helpers\CreateReferencesTrait;
use LlmLaraHub\LlmDriver\Helpers\DistanceQueryTrait;
use LlmLaraHub\LlmDriver\LlmDriverFacade;
use LlmLaraHub\LlmDriver\Responses\CompletionResponse;
use LlmLaraHub\LlmDriver\Responses\EmbeddingsResponseDto;

class SearchAndSummarizeChatRepo
{
use CreateReferencesTrait, DistanceQueryTrait;

public function search(Chat $chat, string $input): string
{
/**
Expand All @@ -29,26 +33,16 @@ public function search(Chat $chat, string $input): string

$embeddingSize = get_embedding_size($chat->chatable->getEmbeddingDriver());

/**
* @TODO
* Track the document page for referehce
*
* @see https://github.com/orgs/LlmLaraHub/projects/1?pane=issue&itemId=60394288
*/
$results = DocumentChunk::query()
->join('documents', 'documents.id', '=', 'document_chunks.document_id')
->selectRaw(
"document_chunks.{$embeddingSize} <-> ? as distance, document_chunks.content, document_chunks.{$embeddingSize} as embedding, document_chunks.id as id",
[$embedding->embedding]
)
->where('documents.collection_id', $chat->chatable->id)
->limit(10)
->orderByRaw('distance')
->get();

$documentChunkResults = $this->distance(
$embeddingSize,
/** @phpstan-ignore-next-line */
$chat->getChatable()->id,
$embedding->embedding
);
$content = [];

foreach ($results as $result) {
/** @var DocumentChunk $result */
foreach ($documentChunkResults as $result) {
$contentString = remove_ascii($result->content);
if (Feature::active('reduce_text')) {
$result = reduce_text_size($contentString);
Expand All @@ -68,13 +62,17 @@ public function search(Chat $chat, string $input): string
);

$latestMessagesArray = $chat->getChatResponse();

Log::info('[LaraChain] Getting the Summary');

/** @var CompletionResponse $response */
$response = LlmDriverFacade::driver(
$chat->chatable->getDriver()
)->chat($latestMessagesArray);

$chat->addInput($response->content, RoleEnum::Assistant);
$message = $chat->addInput($response->content, RoleEnum::Assistant);

$this->saveDocumentReference($message, $documentChunkResults);

return $response->content;
}
Expand Down
26 changes: 26 additions & 0 deletions app/Http/Resources/MessageDocumentReferenceResource.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

namespace App\Http\Resources;

use Illuminate\Http\Request;
use Illuminate\Http\Resources\Json\JsonResource;

class MessageDocumentReferenceResource extends JsonResource
{
/**
* Transform the resource into an array.
*
* @return array<string, mixed>
*/
public function toArray(Request $request): array
{
return [
'id' => $this->id,
'document_name' => $this->document_chunk->document->file_path,
'page' => $this->document_chunk->sort_order,
'distance' => round($this->distance, 2),
'summary' => str($this->document_chunk->summary)->markdown(),
'taggings' => TagResource::collection($this->document_chunk->tags),
];
}
}
2 changes: 2 additions & 0 deletions app/Http/Resources/MessageResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public function toArray(Request $request): array
'body' => $this->body,
'body_markdown' => str($this->body)->markdown(),
'diff_for_humans' => $this->created_at->diffForHumans(),
'message_document_references' => MessageDocumentReferenceResource::collection(
$this->message_document_references()->orderBy('distance', 'asc')->limit(5)->get()),
];
}
}
5 changes: 4 additions & 1 deletion app/Http/Resources/TagResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ class TagResource extends JsonResource
*/
public function toArray(Request $request): array
{
return parent::toArray($request);
return [
'id' => $this->id,
'name' => str($this->name)->headline()->toString(),
];
}
}
1 change: 0 additions & 1 deletion database/factories/MessageDocumentReferenceFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ public function definition(): array
return [
'message_id' => Message::factory(),
'document_chunk_id' => DocumentChunk::factory(),
'reference' => fake()->text(),
'distance' => 11.321851037400464,
];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ public function up(): void
$table->id();
$table->foreignIdFor(Message::class);
$table->foreignIdFor(DocumentChunk::class);
$table->string('reference')->nullable();
$table->decimal('distance', 18, 15)->nullable();
$table->timestamps();
});
Expand Down
Loading

0 comments on commit f29aea7

Please sign in to comment.