Skip to content

Commit

Permalink
Lots of updates when using ollama to batch the jobs so they can wait …
Browse files Browse the repository at this point in the history
…else the jobs stack up and timeout

Signed-off-by: alnutile <[email protected]>
  • Loading branch information
alnutile committed Apr 19, 2024
1 parent 363a63a commit 1c044da
Show file tree
Hide file tree
Showing 30 changed files with 1,041 additions and 43 deletions.
Binary file modified .DS_Store
Binary file not shown.
10 changes: 10 additions & 0 deletions Modules/LlmDriver/app/BaseClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ public function isAsync(): bool
return true;
}

public function hasFunctions(): bool
{
return count($this->getFunctions()) > 0;
}

public function getFunctions(): array
{
$functions = LlmDriverFacade::getFunctions();
Expand Down Expand Up @@ -157,4 +162,9 @@ protected function remapMessages(array $messages): array
{
return $messages;
}

public function onQueue(): string
{
return 'default';
}
}
9 changes: 9 additions & 0 deletions Modules/LlmDriver/app/OllamaClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ public function getFunctions(): array
{
$functions = LlmDriverFacade::getFunctions();

if (! Feature::activate('ollama-functions')) {
return [];
}

return collect($functions)->map(function ($function) {
$function = $function->toArray();
$properties = [];
Expand Down Expand Up @@ -205,4 +209,9 @@ public function isAsync(): bool
{
return false;
}

public function onQueue(): string
{
return 'ollama';
}
}
5 changes: 5 additions & 0 deletions Modules/LlmDriver/app/Orchestrate.php
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ public function handle(array $messagesArray, Chat $chat): ?string

return $this->response;
} else {
Log::info('[LaraChain] Orchestration No Fucntions SearchAnd Summarize');
/**
* @NOTE
* this assumes way too much
Expand All @@ -135,6 +136,10 @@ function ($message) {
}
)->content;

Log::info('[LaraChain] Orchestration No Fucntions SearchAnd Summarize', [
'message' => $message,
]);

return SearchOrSummarizeChatRepo::search($chat, $message);
}
}
Expand Down
35 changes: 35 additions & 0 deletions Modules/LlmDriver/app/SimpleSearchAndSummarizeOrchestrate.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

namespace LlmLaraHub\LlmDriver;

use App\Events\ChatUiUpdateEvent;
use App\Models\Chat;
use Facades\App\Domains\Messages\SearchOrSummarizeChatRepo;
use Illuminate\Support\Facades\Log;

class SimpleSearchAndSummarizeOrchestrate
{
protected string $response = '';

protected bool $requiresFollowup = false;

public function handle(string $message, Chat $chat): ?string
{
Log::info('[LaraChain] Skipping over functions doing search and summarize');

ChatUiUpdateEvent::dispatch(
$chat->chatable,
$chat,
'Searching data now to summarize content'
);

$response = SearchOrSummarizeChatRepo::search($chat, $message);

return $response;
}

protected function hasFunctions(array $functions): bool
{
return is_array($functions) && count($functions) > 0;
}
}
6 changes: 5 additions & 1 deletion app/Domains/Documents/Transformers/PdfTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
use Illuminate\Bus\Batch;
use Illuminate\Support\Facades\Bus;
use Illuminate\Support\Facades\Log;
use LlmLaraHub\LlmDriver\LlmDriverFacade;
use LlmLaraHub\TagFunction\Database\Seeders\TagFunctionDatabaseSeeder;
use LlmLaraHub\TagFunction\Functions\TaggingFunction;
use Smalot\PdfParser\Parser;

class PdfTransformer
Expand Down Expand Up @@ -50,7 +53,7 @@ public function handle(Document $document): Document
$chunks[] = [
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
//new TagDataJob($this->document),
//new TaggingFunction($this->document),
//then mark it all as done and notify the ui
];

Expand All @@ -66,6 +69,7 @@ public function handle(Document $document): Document
SummarizeDocumentJob::dispatch($document);
})
->allowFailures()
->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue())
->dispatch();

return $this->document;
Expand Down
71 changes: 71 additions & 0 deletions app/Domains/Documents/Transformers/PowerPointTransformer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
<?php

namespace App\Domains\Documents\Transformers;

use App\Domains\Collections\CollectionStatusEnum;
use App\Events\CollectionStatusEvent;
use App\Jobs\SummarizeDataJob;
use App\Jobs\VectorlizeDataJob;
use App\Models\Document;
use App\Models\DocumentChunk;
use Illuminate\Support\Facades\Log;
use PhpOffice\PhpPresentation\IOFactory;
use PhpOffice\PhpPresentation\Shape\RichText;

class PowerPointTransformer
{
protected Document $document;

public function handle(Document $document): array
{
$this->document = $document;

$filePath = $this->document->pathToFile();

$parser = IOFactory::createReader('PowerPoint2007');
if (! $parser->canRead($filePath)) {
throw new \Exception('Can not read the document '.$filePath);
}

$oPHPPresentation = $parser->load($filePath);

$chunks = [];
foreach ($oPHPPresentation->getAllSlides() as $page_number => $page) {
try {
foreach ($page->getShapeCollection() as $shape) {
// Check if shape contains text
if ($shape instanceof RichText) {
// Get the text from the shape
$page_number = $page_number + 1;
$pageContent = $shape->getPlainText();
$guid = $filePath.'_'.$page_number;
$DocumentChunk = DocumentChunk::updateOrCreate(
[
'guid' => $guid,
'document_id' => $this->document->id,
],
[
'content' => $pageContent,
'sort_order' => $page_number,
]
);

$chunks[] = [
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
//new TagDataJob($this->document),
//then mark it all as done and notify the ui
];
}
}
CollectionStatusEvent::dispatch($document->collection, CollectionStatusEnum::PROCESSING);
} catch (\Exception $e) {
Log::error('Error parsing PDF', ['error' => $e->getMessage()]);
}
}

Log::info('PowerPointTransformer:handle', ['chunks' => count($chunks)]);

return $chunks;
}
}
13 changes: 9 additions & 4 deletions app/Domains/Messages/SearchOrSummarizeChatRepo.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use App\Models\Chat;
use App\Models\DocumentChunk;
use Illuminate\Support\Facades\Log;
use Laravel\Pennant\Feature;
use LlmLaraHub\LlmDriver\LlmDriverFacade;
use LlmLaraHub\LlmDriver\Responses\CompletionResponse;
use LlmLaraHub\LlmDriver\Responses\EmbeddingsResponseDto;
Expand All @@ -19,7 +20,7 @@ public function search(Chat $chat, string $input): string
* by searching the data or a summary of the data.
* For now we will search.
*/
Log::info('ChatController:chat getting embedding', ['input' => $input]);
Log::info('[LaraChain] Embedding and Searching');

/** @var EmbeddingsResponseDto $embedding */
$embedding = LlmDriverFacade::driver(
Expand All @@ -42,12 +43,16 @@ public function search(Chat $chat, string $input): string
$content = [];

foreach ($results as $result) {
$content[] = remove_ascii(reduce_text_size($result->content)); //reduce_text_size seem to mess up Claude?
$contentString = remove_ascii($result->content);
if (Feature::active('reduce_text')) {
$result = reduce_text_size($contentString);
}
$content[] = $contentString; //reduce_text_size seem to mess up Claude?
}

$content = implode(' ', $content);

$content = "This is data from the search results when entering the users prompt which is ### START PROMPT ### {$input} ### END PROMPT ### please use this with the following context and only this and return as markdown so I can render it: ".$content;
$content = "This is data from the search results when entering the users prompt which is ### START PROMPT ### {$input} ### END PROMPT ### please use this with the following context and only this, summarize it for the user and return as markdown so I can render it and strip out and formatting like extra spaces, tabs, periods etc: ".$content;

$chat->addInput(
message: $content,
Expand All @@ -57,7 +62,7 @@ public function search(Chat $chat, string $input): string
);

$latestMessagesArray = $chat->getChatResponse();

Log::info('[LaraChain] Getting the Summary');
/** @var CompletionResponse $response */
$response = LlmDriverFacade::driver(
$chat->chatable->getDriver()
Expand Down
21 changes: 20 additions & 1 deletion app/Http/Controllers/ChatController.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
use App\Models\Chat;
use App\Models\Collection;
use Facades\LlmLaraHub\LlmDriver\Orchestrate;
use Facades\LlmLaraHub\LlmDriver\SimpleSearchAndSummarizeOrchestrate;
use Illuminate\Support\Facades\Log;
use LlmLaraHub\LlmDriver\LlmDriverFacade;
use LlmLaraHub\LlmDriver\Requests\MessageInDto;

class ChatController extends Controller
Expand Down Expand Up @@ -44,6 +47,7 @@ public function chat(Chat $chat)
{
$validated = request()->validate([
'input' => 'required|string',
'completion' => 'boolean'
]);

$chat->addInput(
Expand All @@ -58,7 +62,22 @@ public function chat(Chat $chat)
'role' => 'user',
]);

$response = Orchestrate::handle($messagesArray, $chat);
if(data_get($validated, 'completion', false)) {
Log::info('[LaraChain] Running Simple Completion');
$prompt = $validated['input'];
$response = LlmDriverFacade::driver($chat->getDriver())->completion($prompt);
$response = $response->content;
$chat->addInput(
message: $response,
role: RoleEnum::Assistant,
show_in_thread: true);
} elseif (LlmDriverFacade::driver($chat->getDriver())->hasFunctions()) {
Log::info('[LaraChain] Running Orchestrate');
$response = Orchestrate::handle($messagesArray, $chat);
} else {
Log::info('[LaraChain] Simple Search and Summarize');
$response = SimpleSearchAndSummarizeOrchestrate::handle($validated['input'], $chat);
}

ChatUpdatedEvent::dispatch($chat->chatable, $chat);

Expand Down
24 changes: 19 additions & 5 deletions app/Http/Controllers/CollectionController.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use App\Jobs\ProcessFileJob;
use App\Models\Collection;
use App\Models\Document;
use Illuminate\Support\Facades\Log;

class CollectionController extends Controller
{
Expand Down Expand Up @@ -92,11 +93,24 @@ public function filesUpload(Collection $collection)
]);

foreach ($validated['files'] as $file) {
$document = Document::create([
'collection_id' => $collection->id,
'file_path' => $file->getClientOriginalName(),
'type' => TypesEnum::PDF,
]);
$mimetype = $file->getMimeType();

//if pptx
Log::info($mimetype);

if ($mimetype === 'application/vnd.openxmlformats-officedocument.presentationml.presentation') {
$document = Document::create([
'collection_id' => $collection->id,
'file_path' => $file->getClientOriginalName(),
'type' => TypesEnum::Pptx,
]);
} else {
$document = Document::create([
'collection_id' => $collection->id,
'file_path' => $file->getClientOriginalName(),
'type' => TypesEnum::PDF,
]);
}

$file->storeAs(
path: $collection->id,
Expand Down
44 changes: 44 additions & 0 deletions app/Jobs/ParsePowerPointJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php

namespace App\Jobs;

use App\Models\Document;
use Facades\App\Domains\Documents\Transformers\PowerPointTransformer;
use Illuminate\Bus\Batchable;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;

class ParsePowerPointJob implements ShouldQueue
{
use Batchable, Dispatchable, InteractsWithQueue, Queueable, SerializesModels;

/**
* Create a new job instance.
*/
public function __construct(public Document $document)
{
//
}

/**
* Execute the job.
*/
public function handle(): void
{
if ($this->batch()->cancelled()) {
// Determine if the batch has been cancelled...

return;
}

$chunks = PowerPointTransformer::handle($this->document);

foreach ($chunks as $chunk) {
$this->batch()->add($chunk);
}

}
}
Loading

0 comments on commit 1c044da

Please sign in to comment.