Skip to content

Commit

Permalink
this should do it but I made a few changes I might convert back
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed May 8, 2024
1 parent ba5d29b commit a35e327
Show file tree
Hide file tree
Showing 22 changed files with 1,332 additions and 50 deletions.
2 changes: 1 addition & 1 deletion Modules/LlmDriver/app/BaseClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ function ($item) {
)->implode('\n');

$systemPrompt = <<<EOD
You are a helpful assistant in a RAG system with tools and functions to help perform tasks.
You are a helpful assistant in a Retrieval augmented generation system (RAG - an architectural approach that can improve the efficacy of large language model (LLM) applications by leveraging custom data) system with tools and functions to help perform tasks.
When you find the right function make sure to return just the JSON that represents the requirements of that function.
If no function is found just return {} empty json
Expand Down
44 changes: 42 additions & 2 deletions Modules/LlmDriver/app/DistanceQuery.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@ class DistanceQuery
protected int $distanceThreshold = 0;

/**
* @NOTES
* Some of the reasoning:
* Cosine Similarity: Cosine similarity is often considered one of the most effective metrics for measuring similarity between documents, especially when dealing with high-dimensional data like text documents. It's robust to differences in document length and is effective at capturing semantic similarity.
* Inner Product: Inner product similarity is another metric that can be effective, particularly for certain types of data. It measures the alignment between vectors, which can be useful in contexts where the direction of the vectors is important.
* L2 (Euclidean) Distance: L2 distance is a straightforward metric that measures the straight-line distance between vectors. While it's commonly used and easy to understand, it may not always be the most effective for capturing complex relationships between documents, especially in high-dimensional spaces.
*
* @TODO
* Track the document page for referehce
* I save distance should I save cosine and inner_product
*
* @see https://github.com/orgs/LlmLaraHub/projects/1?pane=issue&itemId=60394288
*/
Expand All @@ -24,6 +30,7 @@ public function distance(
int $collectionId,
Vector $embedding
): Collection {

$documentIds = Document::query()
->select('id')
->where('documents.collection_id', $collectionId)
Expand Down Expand Up @@ -77,6 +84,39 @@ public function distance(
->unique('id')
->take(10);

return $results;
$siblingsIncluded = collect();

foreach ($results as $result) {
if ($result->section_number === 0) {
$siblingsIncluded->push($result);
} else {
if ($sibling = $this->getSiblingOrNot($result, $result->section_number - 1)) {
$siblingsIncluded->push($sibling);
}

$siblingsIncluded->push($result);
}

if ($sibling = $this->getSiblingOrNot($result, $result->section_number + 1)) {
$siblingsIncluded->push($sibling);
}
}

return $siblingsIncluded;
}

protected function getSiblingOrNot(DocumentChunk $result, int $sectionNumber): false|DocumentChunk
{
$sibling = DocumentChunk::query()
->where('document_id', $result->document_id)
->where('sort_order', $result->sort_order)
->where('section_number', $sectionNumber)
->first();

if ($sibling?->id) {
return $sibling;
}

return false;
}
}
11 changes: 8 additions & 3 deletions Modules/LlmDriver/app/Functions/SearchAndSummarize.php
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public function handle(
$context = implode(' ', $content);

$contentFlattened = <<<PROMPT
You are a helpful assistant in the RAG system:
You are a helpful assistant in the Retrieval augmented generation system (RAG - an architectural approach that can improve the efficacy of large language model (LLM) applications by leveraging custom data) system:
This is data from the search results when entering the users prompt which is
### START PROMPT
Expand All @@ -102,7 +102,10 @@ public function handle(
show_in_thread: false
);

Log::info('[LaraChain] Getting the Summary from the search results');
Log::info('[LaraChain] Getting the Summary from the search results', [
'input' => $contentFlattened,
'driver' => $model->getChat()->chatable->getDriver(),
]);

$messageArray = MessageInDto::from([
'content' => $contentFlattened,
Expand All @@ -114,7 +117,7 @@ public function handle(
/** @var CompletionResponse $response */
$response = LlmDriverFacade::driver(
$model->getChatable()->getDriver()
)->chat([$messageArray]);
)->completion($contentFlattened);

/**
* Lets Verify
Expand Down Expand Up @@ -142,6 +145,8 @@ public function handle(
$message = $model->getChat()->addInput($response->response, RoleEnum::Assistant);

$this->saveDocumentReference($message, $documentChunkResults);

notify_ui($model->getChat(), 'Complete');

return FunctionResponse::from(
[
Expand Down
83 changes: 83 additions & 0 deletions Modules/LlmDriver/tests/Feature/DistanceQueryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,87 @@ public function test_results()
$this->assertCount(1, $results);

}

public function test_has_sibling_below()
{
$files = File::files(base_path('tests/fixtures/document_chunks'));
$document = Document::factory()->create([
'id' => 31,
]);

foreach ($files as $file) {
$data = json_decode(File::get($file), true);
DocumentChunk::factory()->create($data);
}

$documentSibling = DocumentChunk::where('guid', 'ffc97910f334c141b55af33b3c0b67c4')->first();

$documentSibling->section_number = 0;

$documentSibling->save();

$nextSibling = DocumentChunk::factory()->create([
'document_id' => 31,
'sort_order' => $documentSibling->sort_order,
'section_number' => 1,
'guid' => 'ffc97910f334c141b55af33b3c0b67c4',
]);

$question = get_fixture('embedding_question_distance.json');

$vector = new Vector($question);

$results = (new DistanceQuery())->distance(
'embedding_1024',
$document->collection_id,
$vector);

$this->assertCount(2, $results);

}

public function test_has_sibling_above_and_below()
{
$files = File::files(base_path('tests/fixtures/document_chunks'));
$document = Document::factory()->create([
'id' => 31,
]);

foreach ($files as $file) {
$data = json_decode(File::get($file), true);
DocumentChunk::factory()->create($data);
}

$documentSibling = DocumentChunk::where('guid', 'ffc97910f334c141b55af33b3c0b67c4')->first();

$documentSibling->section_number = 1;

$documentSibling->save();

DocumentChunk::factory()->create([
'document_id' => 31,
'sort_order' => $documentSibling->sort_order,
'section_number' => 0,
'guid' => 'ffc97910f334c141b55af33b3c0b67c4',
]);

DocumentChunk::factory()->create([
'document_id' => 31,
'sort_order' => $documentSibling->sort_order,
'section_number' => 2,
'guid' => 'ffc97910f334c141b55af33b3c0b67c4',
]);

$question = get_fixture('embedding_question_distance.json');

$vector = new Vector($question);

$results = (new DistanceQuery())->distance(
'embedding_1024',
$document->collection_id,
$vector);

$this->assertCount(3, $results);

}
}
2 changes: 1 addition & 1 deletion Modules/TagFunction/app/Jobs/TagDocumentJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public function __construct(public Document $document)
*/
public function handle(): void
{
if ($this->batch()->cancelled()) {
if ($this->batch()?->cancelled()) {
// Determine if the batch has been cancelled...

return;
Expand Down
5 changes: 4 additions & 1 deletion Modules/TagFunction/app/TagManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use App\Domains\Agents\VerifyPromptInputDto;
use App\Domains\Agents\VerifyPromptOutputDto;
use App\Domains\Collections\CollectionStatusEnum;
use App\Models\Document;
use Facades\App\Domains\Agents\VerifyResponseAgent;
use Illuminate\Support\Collection;
Expand Down Expand Up @@ -63,7 +64,7 @@ public function handle(Document $document): void
});

foreach ($document->document_chunks as $chunk) {
$tagsFlat = $this->tags->implode(',');
$tagsFlat = $this->tags->take(20)->implode(',');
$summary = $chunk->summary;
$prompt = <<<EOT
This is one chunk or page number {$chunk->sort_order} in the document , Can you make some tags I can use.
Expand Down Expand Up @@ -123,5 +124,7 @@ public function handle(Document $document): void
$this->tags->push($tag);
}
}

notify_collection_ui($document->collection, CollectionStatusEnum::PROCESSING, 'Tags added');
}
}
5 changes: 3 additions & 2 deletions app/Domains/Agents/VerifyResponseAgent.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ public function verify(VerifyPromptInputDto $input): VerifyPromptOutputDto
Just return the text as if answering the intial users prompt "ORIGINAL PROMPT"
Using the CONTEXT make sure the LLM RESPONSE is accurent and just clean it up if not.
$verifyPrompt
### START ORIGINAL PROMPT
$originalPrompt
### END ORIGINAL PROMPT
Expand All @@ -39,8 +42,6 @@ public function verify(VerifyPromptInputDto $input): VerifyPromptOutputDto
EOT;

//put_fixture("verified_prompt_not_working.txt", $prompt, false);

Log::info('[LaraChain] VerifyResponseAgent::verify', [
'prompt' => $prompt,
]);
Expand Down
20 changes: 14 additions & 6 deletions app/Domains/Messages/SearchAndSummarizeChatRepo.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ public function search(Chat $chat, string $input): string
$context = implode(' ', $content);

$contentFlattened = <<<PROMPT
You are a helpful assistant in the RAG system:
You are a helpful assistant in the Retrieval augmented generation system (RAG - an architectural approach that can improve the efficacy of large language model (LLM) applications by leveraging custom data):
This is data from the search results when entering the users prompt which is
### START PROMPT
{$originalPrompt}
$originalPrompt
### END PROMPT
Please use this with the following context and only this, summarize it for the user and return as markdown so I can render it and strip out and formatting like extra spaces, tabs, periods etc:
Expand All @@ -83,22 +83,25 @@ public function search(Chat $chat, string $input): string

$latestMessagesArray = $chat->getChatResponse();

Log::info('[LaraChain] Getting the Summary');
Log::info('[LaraChain] Getting the Summary', [
'input' => $contentFlattened,
'driver' => $chat->chatable->getDriver(),
]);

notify_ui($chat, 'Building Summary');

/** @var CompletionResponse $response */
$response = LlmDriverFacade::driver(
$chat->chatable->getDriver()
)->chat($latestMessagesArray);
)->completion($contentFlattened);

/**
* Lets Verify
*/
$verifyPrompt = <<<'PROMPT'
$verifyPrompt = <<<'EOD'
This is the results from a Vector search based on the Users Prompt.
Then that was passed into the LLM to summarize the results.
PROMPT;
EOD;

$dto = VerifyPromptInputDto::from(
[
Expand All @@ -115,9 +118,14 @@ public function search(Chat $chat, string $input): string
/** @var VerifyPromptOutputDto $response */
$response = VerifyResponseAgent::verify($dto);

Log::info('[LaraChain] Verification', [
'output' => $response->response,
]);

$message = $chat->addInput($response->response, RoleEnum::Assistant);

$this->saveDocumentReference($message, $documentChunkResults);
notify_ui($chat, 'Complete');

return $response->response;
}
Expand Down
3 changes: 2 additions & 1 deletion app/Events/ChatUpdatedEvent.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
use Illuminate\Broadcasting\InteractsWithSockets;
use Illuminate\Broadcasting\PrivateChannel;
use Illuminate\Contracts\Broadcasting\ShouldBroadcast;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Events\Dispatchable;
use Illuminate\Queue\SerializesModels;

class ChatUpdatedEvent implements ShouldBroadcast
class ChatUpdatedEvent implements ShouldBroadcast, ShouldQueue
{
use Dispatchable, InteractsWithSockets, SerializesModels;

Expand Down
14 changes: 7 additions & 7 deletions app/Http/Controllers/ChatController.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
use App\Http\Resources\ChatResource;
use App\Http\Resources\CollectionResource;
use App\Http\Resources\MessageResource;
use App\Jobs\OrchestrateJob;
use App\Jobs\SimpleSearchAndSummarizeOrchestrateJob;
use App\Models\Chat;
use App\Models\Collection;
use Facades\App\Domains\Agents\VerifyResponseAgent;
use Facades\LlmLaraHub\LlmDriver\Orchestrate;
use Facades\LlmLaraHub\LlmDriver\SimpleSearchAndSummarizeOrchestrate;
use Illuminate\Support\Facades\Log;
use LlmLaraHub\LlmDriver\LlmDriverFacade;
use LlmLaraHub\LlmDriver\Requests\MessageInDto;
Expand Down Expand Up @@ -98,15 +98,15 @@ public function chat(Chat $chat)
show_in_thread: true);

} elseif (LlmDriverFacade::driver($chat->getDriver())->hasFunctions()) {
Log::info('[LaraChain] Running Orchestrate');
$response = Orchestrate::handle($messagesArray, $chat);
Log::info('[LaraChain] Running Orchestrate added to queue');
OrchestrateJob::dispatch($messagesArray, $chat);
} else {
Log::info('[LaraChain] Simple Search and Summarize');
$response = SimpleSearchAndSummarizeOrchestrate::handle($validated['input'], $chat);
Log::info('[LaraChain] Simple Search and Summarize added to queue');
SimpleSearchAndSummarizeOrchestrateJob::dispatch($validated['input'], $chat);
}

ChatUpdatedEvent::dispatch($chat->chatable, $chat);

return response()->json(['message' => $response]);
return response()->json(['message' => 'ok']);
}
}
1 change: 1 addition & 0 deletions app/Http/Resources/ChatResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public function toArray(Request $request): array
{
return [
'id' => $this->id,
'chatable_id' => $this->chatable_id,
'user_id' => new UserResource($this->user),
];
}
Expand Down
2 changes: 2 additions & 0 deletions app/Http/Resources/MessageDocumentReferenceResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ public function toArray(Request $request): array
'document_name' => $this->document_chunk?->document->file_path,
'page' => $this->document_chunk?->sort_order,
'distance' => round($this->distance, 2),
'document_chunk_id' => $this->document_chunk_id,
'section_number' => $this->document_chunk?->section_number + 1, //since 0 does not look good in the ui
'summary' => str($this->document_chunk?->summary)->markdown(),
'taggings' => $tags,
];
Expand Down
2 changes: 1 addition & 1 deletion app/Http/Resources/MessageResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public function toArray(Request $request): array
'body_markdown' => str($this->body)->markdown(),
'diff_for_humans' => $this->created_at->diffForHumans(),
'message_document_references' => MessageDocumentReferenceResource::collection(
$this->message_document_references()->orderBy('distance', 'asc')->limit(5)->get()),
$this->message_document_references()->orderBy('distance', 'asc')->limit(10)->get()),
];
}
}
Loading

0 comments on commit a35e327

Please sign in to comment.