Skip to content

Commit

Permalink
need to make ui for sources and websources
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed May 6, 2024
1 parent 2cb7e62 commit 30836ee
Show file tree
Hide file tree
Showing 24 changed files with 246 additions and 135 deletions.
Binary file modified .DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion Modules/LlmDriver/app/BaseClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,10 @@ public function onQueue(): string
return 'api_request';
}

public function getMaxTokenSize(string $driver) : int
public function getMaxTokenSize(string $driver): int
{
$driver = config("llmdriver.drivers.$driver");

return data_get($driver, 'max_tokens', 8192);
}
}
3 changes: 2 additions & 1 deletion Modules/LlmDriver/tests/Feature/MockClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ public function test_Chat(): void

}

public function test_max_token() : void {
public function test_max_token(): void
{
$client = new MockClient();

$results = $client->getMaxTokenSize('claude');
Expand Down
6 changes: 3 additions & 3 deletions Modules/TagFunction/app/TagManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public function handle(Document $document): void
And nothing else. Here is the summary:
### START SUMMARY
{$summary}
$summary
### END SUMMARY
EOT;
Expand Down Expand Up @@ -75,11 +75,11 @@ public function handle(Document $document): void
And nothing else. Here is the summary:
### START SUMMARY
{$summary}
$summary
### END SUMMARY
### AND HERE ARE EXISTING TAGS
{$tagsFlat}
$tagsFlat
### END EXISTING TAGS
EOT;
Expand Down
2 changes: 1 addition & 1 deletion app/Console/Commands/ClearAllHorizonQueues.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ClearAllHorizonQueues extends Command
*
* @var string
*/
protected $signature = 'app:clear-all-horizon-queues';
protected $signature = 'horizon:clear-all-horizon-queues';

/**
* The console command description.
Expand Down
5 changes: 4 additions & 1 deletion app/Domains/Sources/WebSearch/GetPage.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ public function handle(string $url): string
->dismissDialogs()
->fullPage();

$name = str($url)->afterLast('/')->toString().'.pdf';
/**
* @TODO this can repeat
*/
$name = md5($url).'.pdf';

Storage::disk('collections')->put($this->collection->id.'/'.$name, $results->pdf());

Expand Down
19 changes: 7 additions & 12 deletions app/Domains/Sources/WebSearchSource.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ public function handle(Source $source): void
$limit = data_get($meta_data, 'limit', 5);
$driver = data_get($meta_data, 'driver', 'mock');



$prompt = <<<PROMPT
The user is asking to search the web but I want you to review the query and clean it up so I can pass
it to an api to get results: Just return their content but with your rework so I can pass it right to the
Expand All @@ -34,18 +32,17 @@ public function handle(Source $source): void
### END USER QUERY
PROMPT;

Log::info('[LaraChain] Asking LLM to optimize search query');

$response = LlmDriverFacade::driver($source->getDriver())
->completion($prompt);
->completion($prompt);

$search = $response->content;

Log::info('[LaraChain] Starting web search ', [
'content reworked' => $search,
]);


/** @var SearchResponseDto $response */
$response = WebSearchFacade::driver($driver)->search(
Expand All @@ -68,21 +65,19 @@ public function handle(Source $source): void
]);

Bus::batch($jobs)
->name("Getting Web Content for Source - {$source->title}")
->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue())
->allowFailures()
->dispatch();
->name("Getting Web Content for Source - {$source->title}")
->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue())
->allowFailures()
->dispatch();

$source->last_run = now();
$source->save();


} catch(\Exception $e) {
} catch (\Exception $e) {
Log::error('[LaraChain] - Error running WebSearchSource', [
'error' => $e->getMessage(),
]);
}


}
}
17 changes: 17 additions & 0 deletions app/Http/Controllers/WebSourceController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

namespace App\Http\Controllers;

use App\Http\Resources\SourceResource;
use App\Models\Collection;

class WebSourceController extends Controller
{
public function index(Collection $collection)
{
return inertia('Sources/WebSource/Index', [
'collection' => $collection,
'sources' => SourceResource::collection($collection->sources()->paginate(10)),
]);
}
}
19 changes: 19 additions & 0 deletions app/Http/Resources/SourceResource.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php

namespace App\Http\Resources;

use Illuminate\Http\Request;
use Illuminate\Http\Resources\Json\JsonResource;

class SourceResource extends JsonResource
{
/**
* Transform the resource into an array.
*
* @return array<string, mixed>
*/
public function toArray(Request $request): array
{
return parent::toArray($request);
}
}
107 changes: 63 additions & 44 deletions app/Jobs/GetWebContentJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
use App\Models\DocumentChunk;
use App\Models\Source;
use Facades\App\Domains\Sources\WebSearch\GetPage;
use Illuminate\Bus\Batch;
use Illuminate\Bus\Batchable;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Bus;
use Illuminate\Support\Facades\Log;
use Laravel\Pennant\Feature;
use LlmLaraHub\LlmDriver\LlmDriverFacade;

class GetWebContentJob implements ShouldQueue
Expand Down Expand Up @@ -43,67 +46,83 @@ public function handle(): void
return;
}

Log::info("[Larachain] GetWebContentJob - {$this->source->title} - URL: {$this->webResponseDto->url}");
$html = GetPage::make($this->source->collection)->handle($this->webResponseDto->url);

$results = GetPage::make($this->source->collection)->parseHtml($html);

$page_number = 1;
$guid = md5($this->webResponseDto->url);

/**
* Document can reference a source
*/
$document = Document::updateOrCreate(
[
'source_id' => $this->source->id,
'type' => TypesEnum::HTML
'type' => TypesEnum::HTML,
'file_path' => $this->webResponseDto->url,
'collection_id' => $this->source->collection_id,
],
[
'status' => StatusEnum::Pending,
'status_summary' => StatusEnum::Pending,
'file_path' => $this->webResponseDto->url,
'collection_id' => $this->source->collection_id,
'meta_data' => $this->webResponseDto->toArray(),
]
);

Log::info("[Larachain] GetWebContentJob - {$this->source->title} - URL: {$this->webResponseDto->url}");
$html = GetPage::make($this->source->collection)->handle($this->webResponseDto->url);

/**
* @TODO
* I need to use the token_counter and the break up the string till
* all of it fits into that limit
* In the meantime just doing below
*/

$maxTokenSize = LlmDriverFacade::driver($this->source->getDriver())
->getMaxTokenSize($this->source->getDriver());

$page_number = 1;

$chunks = chunk_string($results, $maxTokenSize);

foreach ($chunks as $chunk) {
$DocumentChunk = DocumentChunk::updateOrCreate(
[
'guid' => $guid . '-' . $page_number,
'document_id' => $document->id,
],
[
'content' => $chunk,
'sort_order' => $page_number,
]
);

Log::info('[Larachain] adding to new batch');

$this->batch()->add([
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
if (! Feature::active('html_to_text')) {
$document->update([
'type' => TypesEnum::PDF,
'file_path' => md5($this->webResponseDto->url).'.pdf',
]);

$page_number++;
$batch = Bus::batch([
new ParsePdfFileJob($document),
])
->name('Process PDF Document - '.$document->id)
->finally(function (Batch $batch) {
//this is triggered in the PdfTransformer class
})
->allowFailures()
->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue())
->dispatch();
} else {
$results = GetPage::make($this->source->collection)->parseHtml($html);

$page_number = 1;
$guid = md5($this->webResponseDto->url);

/**
* @TODO
* I need to use the token_counter and the break up the string till
* all of it fits into that limit
* In the meantime just doing below
*/
$maxTokenSize = LlmDriverFacade::driver($this->source->getDriver())
->getMaxTokenSize($this->source->getDriver());

$page_number = 1;

$chunks = chunk_string($results, $maxTokenSize);

foreach ($chunks as $chunk) {
$DocumentChunk = DocumentChunk::updateOrCreate(
[
'guid' => $guid.'-'.$page_number,
'document_id' => $document->id,
'sort_order' => $page_number,
],
[
'content' => $chunk,
]
);

Log::info('[Larachain] adding to new batch');

$this->batch()->add([
new VectorlizeDataJob($DocumentChunk),
new SummarizeDataJob($DocumentChunk),
]);

$page_number++;
}
}
return;

}
}
2 changes: 1 addition & 1 deletion app/Models/Document.php
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public function getEmbeddingDriver(): string
return $this->collection->embedding_driver->value;
}

public function source() : BelongsTo
public function source(): BelongsTo
{
return $this->belongsTo(Source::class);
}
Expand Down
3 changes: 1 addition & 2 deletions app/Models/Source.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ public function getId(): int
return $this->collection->getId();
}


public function getDriver(): string
{
return $this->collection->getDriver();
Expand Down Expand Up @@ -78,7 +77,7 @@ public function collection(): BelongsTo
return $this->belongsTo(Collection::class);
}

public function documents() : HasMany
public function documents(): HasMany
{
return $this->hasMany(Document::class);
}
Expand Down
9 changes: 6 additions & 3 deletions app/helpers.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
use Illuminate\Support\Facades\File;
use Illuminate\Support\Facades\Log;
use LlmLaraHub\LlmDriver\HasDrivers;
use Illuminate\Support\Str;
use LlmLaraHub\LlmDriver\Helpers\TrimText;
use SundanceSolutions\LarachainTokenCount\Facades\LarachainTokenCount;

Expand All @@ -28,12 +27,16 @@ function put_fixture($file_name, $content = [], $json = true)
}

if (! function_exists('chunk_string')) {
function chunk_string(string $string, int $maxTokenSize) : array
{
function chunk_string(string $string, int $maxTokenSize): array
{
$tokenCountWithBuffer = token_counter($string) * 1.25; // buffer for the response of the llm

$chunksToMake = ceil($tokenCountWithBuffer / $maxTokenSize) + 2; //still needs a ton of work

/**
* @TDOO remove this ignore and fix
*/
/** @phpstan-ignore-next-line */
$chunks = str_split($string, round(strlen($string) / $chunksToMake));

return $chunks;
Expand Down
7 changes: 6 additions & 1 deletion resources/js/Pages/Collection/Show.vue
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,12 @@ const reset = () => {
</td>
<td
class="whitespace-nowrap py-4 pl-4 pr-3 text-sm font-medium text-gray-900 sm:pl-3">
{{ document.file_path }}

<a class="underline" target="_blank" :href="route('download.document', {
collection: collection.data.id,
document_name: document.file_path
})">{{ document.file_path }}</a>

</td>
<td
class="whitespace-nowrap py-4 pl-4 pr-3 text-sm font-medium text-gray-900 sm:pl-3">
Expand Down
Loading

0 comments on commit 30836ee

Please sign in to comment.