From 0bfed835b762c101fb2b9e84b016b20f722c136f Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Sat, 20 Jul 2024 09:25:54 -0400 Subject: [PATCH 01/19] ok fix up some things thanksto stan --- app/Domains/Chat/MetaDataDto.php | 1 + .../Documents/Transformers/CSVTransformer.php | 15 +- app/Domains/EmailParser/EmailClient.php | 3 +- app/Domains/EmailParser/MailDto.php | 4 +- app/Domains/Sources/BaseSource.php | 16 ++ app/Domains/Sources/EmailSource.php | 173 ++++++++---------- app/Helpers/ChatHelperTrait.php | 36 ++++ .../AssistantEmailBoxSourceController.php | 1 + app/Http/Controllers/BaseSourceController.php | 4 + app/Http/Controllers/ChatController.php | 2 +- .../Sources/EmailBoxSourceController.php | 1 + .../Sources/FeedSourceController.php | 1 + .../Sources/GoogleSheetSourceController.php | 1 + .../Sources/JsonSourceController.php | 1 + .../Sources/SiteMapSourceController.php | 1 + .../Sources/WebPageSourceController.php | 1 + .../Sources/WebhookSourceController.php | 1 + app/Jobs/ChunkDocumentJob.php | 38 ++++ app/Models/Source.php | 10 + app/Providers/AppServiceProvider.php | 1 - database/factories/SourceFactory.php | 3 + .../2024_07_20_130256_add_chat_to_soruces.php | 30 +++ tests/Feature/EmailSourceTest.php | 47 ++--- .../WebhookSourceControllerTest.php | 2 + tests/Feature/Jobs/ChunkDocumentJobTest.php | 18 ++ tests/fixtures/.DS_Store | Bin 14340 -> 14340 bytes 26 files changed, 280 insertions(+), 131 deletions(-) create mode 100644 app/Helpers/ChatHelperTrait.php create mode 100644 app/Jobs/ChunkDocumentJob.php create mode 100644 database/migrations/2024_07_20_130256_add_chat_to_soruces.php create mode 100644 tests/Feature/Jobs/ChunkDocumentJobTest.php diff --git a/app/Domains/Chat/MetaDataDto.php b/app/Domains/Chat/MetaDataDto.php index da07d549..58bb6cd2 100644 --- a/app/Domains/Chat/MetaDataDto.php +++ b/app/Domains/Chat/MetaDataDto.php @@ -15,6 +15,7 @@ public function __construct( public mixed $date_range = '', public mixed $input = '', public mixed $driver = '', + public mixed $source = '', public mixed $reference_collection_id = '', ) { diff --git a/app/Domains/Documents/Transformers/CSVTransformer.php b/app/Domains/Documents/Transformers/CSVTransformer.php index 85c5c604..12934481 100644 --- a/app/Domains/Documents/Transformers/CSVTransformer.php +++ b/app/Domains/Documents/Transformers/CSVTransformer.php @@ -26,7 +26,6 @@ public function handle(Document $document): array $filePath = $this->document->pathToFile(); - //$filePath = null, string $disk = null, string $readerType = null $collection = (new DocumentsImport()) ->toCollection($filePath, null, $this->readerType); @@ -47,6 +46,20 @@ public function handle(Document $document): array return remove_ascii($key.': '.$item); })->implode("\n"); + /** + * @TODO + * We have the text but what does the user want to do with the text + * 1) Here we should have a source with a chat_id or make the chat id + * 2) this becomes a message (that is a lot of them?) + * 3) then the LLM gets the sources prompt and sees what the user wants to do with the data. + * 4) Example "Take these dates and save them to the document start and end data then save the content to the document as an event" + * Then tag the document by the Region seen in the data (or hard coded in the prompt) + * 5) The Prompt using OrchestrateV2 should take the Chat and Message and start building out the results + * this will update or create a document + * this will find start_date and end_date new fields in a document + * this will tag the document Region: Foobar + * NOTE: We already have date_range so bummer it is created_at + */ $file_name = 'row_'.$rowNumber.'_'.str($document->file_path)->beforeLast('.')->toString().'.txt'; Storage::disk('collections') diff --git a/app/Domains/EmailParser/EmailClient.php b/app/Domains/EmailParser/EmailClient.php index 8fc9b57f..3604487e 100644 --- a/app/Domains/EmailParser/EmailClient.php +++ b/app/Domains/EmailParser/EmailClient.php @@ -35,7 +35,7 @@ public function setConfig(array $config): self * @throws \Webklex\PHPIMAP\Exceptions\MessageNotFoundException * @throws \Webklex\PHPIMAP\Exceptions\RuntimeException */ - public function handle(CredentialsDto $credentials, bool $delete = true): array + public function handle(CredentialsDto $credentials, bool $delete = false): array { $mail = []; @@ -107,6 +107,7 @@ public function handle(CredentialsDto $credentials, bool $delete = true): array 'subject' => $message->getSubject(), 'date' => $message->getDate()->toString(), 'header' => $message->getHeader()->raw, + 'email_message' => $message, ]); $mail[] = $messageDto; diff --git a/app/Domains/EmailParser/MailDto.php b/app/Domains/EmailParser/MailDto.php index db3b86b1..307f5634 100644 --- a/app/Domains/EmailParser/MailDto.php +++ b/app/Domains/EmailParser/MailDto.php @@ -3,6 +3,7 @@ namespace App\Domains\EmailParser; use Spatie\LaravelData\Data; +use Webklex\PHPIMAP\Message; class MailDto extends Data { @@ -12,7 +13,8 @@ public function __construct( public ?string $to, public ?string $body, public ?string $header, - public ?string $date + public ?string $date, + public ?Message $email_message = null ) { } diff --git a/app/Domains/Sources/BaseSource.php b/app/Domains/Sources/BaseSource.php index 0892bf1b..ad1d044a 100644 --- a/app/Domains/Sources/BaseSource.php +++ b/app/Domains/Sources/BaseSource.php @@ -4,6 +4,7 @@ use App\Domains\Prompts\PromptMerge; use App\Domains\UnStructured\StructuredTypeEnum; +use App\Helpers\ChatHelperTrait; use App\Jobs\DocumentProcessingCompleteJob; use App\Jobs\SummarizeDocumentJob; use App\Jobs\VectorlizeDataJob; @@ -21,6 +22,8 @@ abstract class BaseSource { + use ChatHelperTrait; + public string $batchTitle = 'Chunking Source'; public static string $description = 'Sources are ways we get data into the system. They are the core of the system.'; @@ -215,4 +218,17 @@ protected function getEmailSummary(Document $document): string return $content; } + + public function getSourceFromSlug(string $slug): ?Source + { + $source = Source::where('type', $this->sourceTypeEnum) + ->slug($slug) + ->first(); + + if ($source) { + return $source; + } + + return null; + } } diff --git a/app/Domains/Sources/EmailSource.php b/app/Domains/Sources/EmailSource.php index e6de92ca..1d5091ba 100644 --- a/app/Domains/Sources/EmailSource.php +++ b/app/Domains/Sources/EmailSource.php @@ -2,13 +2,20 @@ namespace App\Domains\Sources; +use App\Domains\Chat\MetaDataDto; +use App\Domains\Documents\StatusEnum; +use App\Domains\Documents\TypesEnum; use App\Domains\EmailParser\MailDto; +use App\Domains\Messages\RoleEnum; +use App\Domains\Prompts\PromptMerge; +use App\Jobs\ChunkDocumentJob; use App\Models\Document; use App\Models\Source; -use App\Models\Transformer; use Facades\App\Domains\EmailParser\Client; -use Facades\App\Domains\Transformers\EmailTransformer; +use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; +use Laravel\Prompts\Prompt; +use LlmLaraHub\LlmDriver\LlmDriverFacade; class EmailSource extends BaseSource { @@ -38,7 +45,7 @@ public function handle(Source $source): void return; } - $this->source = $source; + $this->source = $this->checkForChat($source); $this->content = $this->mailDto->getContent(); @@ -46,102 +53,72 @@ public function handle(Source $source): void $this->meta_data = $this->mailDto->toArray(); - $this->transformers = $source->transformers; - Log::info('[LaraChain] - Running Email Source'); - /** - * @TODO - * I missed the point here. I just need to keep making tools work - * and the prompt the user gives in the Source - * let it do the work. - */ - try { - /** - * @TODO - * This turns the email into a document but what if the user wants to do something with the - * data in the email. Like Parse URLs or Recipe ideas etc - * The Prompt of the Source should drive all of this - */ - $baseSource = EmailTransformer::transform(baseSource: $this); - /** - * @NOTE - * Examples - * Example One: Maybe there is 1 transformer to make a reply to the email - * Transformer 1 of 1 ReplyTo Email - * Take the email - * Use Collection as voice - * Make reply to email - * The Transformer as an Output attached to it and the reply is sent. - * - * Example Two: CRM Transformer - * Take the email and make document (Type Email) and chunks from the email - * After that take the content and make who is it to, who is it from - * and make Documents for each for those of type Contact - * Relate those to the document (Type Email) - * and now there are relations for later use - * - * @TODO - * some transformers assume they are never 0 in the chain - * like CRM assumes the one before was EmailTransformer - * and the document is set - */ - Log::info("[LaraChain] - Source has Transformers let's figure out which one to run"); - - foreach ($source->transformers as $transformerChainLink) { - $class = '\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - if (class_exists($class)) { - $facade = '\\Facades\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - $baseSource = $facade::transform($this); - } else { - Log::info('[LaraChain] - No Class found ', [ - 'class' => $class, - ]); - } - } - $this->batchTransformedSource($baseSource, $source); - - } catch (\Exception $e) { - Log::error('[LaraChain] - Error running Email Source', [ - 'error' => $e->getMessage(), - ]); - } - - } - - public function getSourceFromSlug(string $slug): ?Source - { - $source = Source::where('type', $this->sourceTypeEnum) - ->slug($slug) - ->first(); - - if ($source) { - return $source; - } - - return null; - } - - protected function getSummarizeDocumentPrompt(): string - { - if (str($this->source->details)->contains('[CONTEXT]')) { - return $this->source->details; - } - - return <<<'PROMPT' - -The following content is from an email. I would like you to summarize it with the following format. - -To: **TO HERE** -From: **From Here** -Subject: **Subject Here** -Body: -**Summary Here** - - -** CONTEXT IS BELOW THIS LINE** -[CONTEXT] -PROMPT; - + //use the users source prompt to create this next step + //track the results in a chat_id thread + //and as a message? + //then using the source the results will do something with the content + // and then delete the email + // but we can leave it if it returns false? + // still too much coding there how do I let the tool do it. + + $prompt = PromptMerge::merge( + ['[CONTEXT]'], + [$this->content], + $source->getPrompt() + ); + + $chat = $source->chat; + + $chat->addInput( + message: $prompt, + role: RoleEnum::User, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $source->getDriver(), + 'source' => $source->title, + ]), + ); + + $results = LlmDriverFacade::driver( + $source->getDriver() + )->completion($prompt); + + $chat->addInput( + message: $results->content, + role: RoleEnum::Assistant, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $source->getDriver(), + 'source' => $source->title, + ]), + ); + + //@TODO how to look for false + // surface this "power" into the UI. + // tag or store the fact we checked this emails + + $document = Document::updateOrCreate([ + 'source_id' => $source->id, + 'type' => TypesEnum::Email, + 'subject' => $this->mailDto->subject, + 'collection_id' => $source->collection_id, + ], [ + 'summary' => $results->content, + 'meta_data' => $this->mailDto->toArray(), + 'original_content' => $this->mailDto->body, + 'status_summary' => StatusEnum::Pending, + 'status' => StatusEnum::Pending, + ]); + + Bus::batch([new ChunkDocumentJob($document)]) + ->name("Processing Email {$this->mailDto->subject}") + ->allowFailures() + ->dispatch(); + + //should we delete the email? + // right now it gets set to seen + // on the MailDto we have options } } diff --git a/app/Helpers/ChatHelperTrait.php b/app/Helpers/ChatHelperTrait.php new file mode 100644 index 00000000..1ac3c767 --- /dev/null +++ b/app/Helpers/ChatHelperTrait.php @@ -0,0 +1,36 @@ +chat_id) { + //@NOTE should I go to Source as the chatable? + $chat = Chat::create([ + 'chatable_id' => $source->collection_id, + 'chatable_type' => Collection::class, + 'user_id' => $source->collection->team?->user_id, + ]); + $source->update([ + 'chat_id' => $chat->id, + ]); + } + + return $source->refresh(); + } + + public function getUserId(Collection $collection): ?int + { + if (auth()->check()) { + return auth()->user()->id; + } + + return $collection->team?->user_id; + } +} diff --git a/app/Http/Controllers/AssistantEmailBoxSourceController.php b/app/Http/Controllers/AssistantEmailBoxSourceController.php index ed03ea87..ef32e200 100644 --- a/app/Http/Controllers/AssistantEmailBoxSourceController.php +++ b/app/Http/Controllers/AssistantEmailBoxSourceController.php @@ -31,6 +31,7 @@ protected function makeSource(array $validated, Collection $collection): void 'recurring' => $validated['recurring'], 'active' => $validated['active'], 'collection_id' => $collection->id, + 'user_id' => $this->getUserId($collection), 'type' => $this->sourceTypeEnum, 'slug' => str(Str::random(12))->remove('+')->toString(), 'meta_data' => [], diff --git a/app/Http/Controllers/BaseSourceController.php b/app/Http/Controllers/BaseSourceController.php index 3f644c2d..7cf64545 100644 --- a/app/Http/Controllers/BaseSourceController.php +++ b/app/Http/Controllers/BaseSourceController.php @@ -4,6 +4,7 @@ use App\Domains\Recurring\RecurringTypeEnum; use App\Domains\Sources\SourceTypeEnum; +use App\Helpers\ChatHelperTrait; use App\Http\Resources\CollectionResource; use App\Http\Resources\DocumentResource; use App\Http\Resources\FilterResource; @@ -16,6 +17,8 @@ class BaseSourceController extends Controller { + use ChatHelperTrait; + protected SourceTypeEnum $sourceTypeEnum = SourceTypeEnum::WebSearchSource; protected string $edit_path = 'Sources/WebSource/Edit'; @@ -60,6 +63,7 @@ protected function makeSource(array $validated, Collection $collection): void 'active' => $validated['active'], 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, + 'user_id' => $this->getUserId($collection), 'meta_data' => [ 'driver' => 'brave', 'limit' => 5, diff --git a/app/Http/Controllers/ChatController.php b/app/Http/Controllers/ChatController.php index d297022f..bb2ad09e 100644 --- a/app/Http/Controllers/ChatController.php +++ b/app/Http/Controllers/ChatController.php @@ -58,7 +58,7 @@ function ($item) { 'audiences' => AudienceResource::collection(Audience::all()), 'system_prompt' => $collection->systemPrompt(), 'settings' => [ - 'supports_functions' => LlmDriverFacade::driver($chat->getDriver())->hasFunctions(), + 'supports_functions' => LlmDriverFacade::driver($chat->getDriver())->hasFunctions(), ], 'messages' => MessageResource::collection($chat->latest_messages), ]); diff --git a/app/Http/Controllers/Sources/EmailBoxSourceController.php b/app/Http/Controllers/Sources/EmailBoxSourceController.php index bab4cdad..b39936aa 100644 --- a/app/Http/Controllers/Sources/EmailBoxSourceController.php +++ b/app/Http/Controllers/Sources/EmailBoxSourceController.php @@ -35,6 +35,7 @@ protected function makeSource(array $validated, Collection $collection): void 'details' => $validated['details'], 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'user_id' => $this->getUserId($collection), 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, 'slug' => str(Str::random(12))->remove('+')->toString(), diff --git a/app/Http/Controllers/Sources/FeedSourceController.php b/app/Http/Controllers/Sources/FeedSourceController.php index 9dcdc4de..a19ba614 100644 --- a/app/Http/Controllers/Sources/FeedSourceController.php +++ b/app/Http/Controllers/Sources/FeedSourceController.php @@ -49,6 +49,7 @@ protected function makeSource(array $validated, Collection $collection): void 'title' => $validated['title'], 'details' => $validated['details'], 'recurring' => $validated['recurring'], + 'user_id' => $this->getUserId($collection), 'active' => $validated['active'], 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, diff --git a/app/Http/Controllers/Sources/GoogleSheetSourceController.php b/app/Http/Controllers/Sources/GoogleSheetSourceController.php index 13f4b48a..0cec5ef4 100644 --- a/app/Http/Controllers/Sources/GoogleSheetSourceController.php +++ b/app/Http/Controllers/Sources/GoogleSheetSourceController.php @@ -53,6 +53,7 @@ protected function makeSource(array $validated, Collection $collection): void 'recurring' => $validated['recurring'], 'active' => $validated['active'], 'collection_id' => $collection->id, + 'user_id' => $this->getUserId($collection), 'type' => $this->sourceTypeEnum, 'meta_data' => $validated['meta_data'], ]); diff --git a/app/Http/Controllers/Sources/JsonSourceController.php b/app/Http/Controllers/Sources/JsonSourceController.php index 0399d3bb..6c81950e 100644 --- a/app/Http/Controllers/Sources/JsonSourceController.php +++ b/app/Http/Controllers/Sources/JsonSourceController.php @@ -44,6 +44,7 @@ protected function makeSource(array $validated, Collection $collection): void 'recurring' => $validated['recurring'], 'active' => $validated['active'], 'collection_id' => $collection->id, + 'user_id' => $this->getUserId($collection), 'slug' => str(Str::random(16))->toString(), 'type' => $this->sourceTypeEnum, 'meta_data' => json_decode($validated['meta_data'], true, 512), diff --git a/app/Http/Controllers/Sources/SiteMapSourceController.php b/app/Http/Controllers/Sources/SiteMapSourceController.php index 638462ce..85f70d3d 100644 --- a/app/Http/Controllers/Sources/SiteMapSourceController.php +++ b/app/Http/Controllers/Sources/SiteMapSourceController.php @@ -48,6 +48,7 @@ protected function makeSource(array $validated, Collection $collection): void Source::create([ 'title' => $validated['title'], 'details' => $validated['details'], + 'user_id' => $this->getUserId($collection), 'recurring' => $validated['recurring'], 'active' => $validated['active'], 'collection_id' => $collection->id, diff --git a/app/Http/Controllers/Sources/WebPageSourceController.php b/app/Http/Controllers/Sources/WebPageSourceController.php index 65781918..cf2d8abf 100644 --- a/app/Http/Controllers/Sources/WebPageSourceController.php +++ b/app/Http/Controllers/Sources/WebPageSourceController.php @@ -42,6 +42,7 @@ protected function makeSource(array $validated, Collection $collection): void 'active' => $validated['active'], 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, + 'user_id' => $this->getUserId($collection), 'meta_data' => $validated['meta_data'], ]); } diff --git a/app/Http/Controllers/Sources/WebhookSourceController.php b/app/Http/Controllers/Sources/WebhookSourceController.php index 6f91a623..1773911c 100644 --- a/app/Http/Controllers/Sources/WebhookSourceController.php +++ b/app/Http/Controllers/Sources/WebhookSourceController.php @@ -34,6 +34,7 @@ protected function makeSource(array $validated, Collection $collection): void 'details' => $validated['details'], 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'user_id' => $this->getUserId($collection), 'collection_id' => $collection->id, 'slug' => str(Str::random(16))->toString(), 'type' => $this->sourceTypeEnum, diff --git a/app/Jobs/ChunkDocumentJob.php b/app/Jobs/ChunkDocumentJob.php new file mode 100644 index 00000000..643d4528 --- /dev/null +++ b/app/Jobs/ChunkDocumentJob.php @@ -0,0 +1,38 @@ +batch()->cancelled()) { + // Determine if the batch has been cancelled... + return; + } + + } +} diff --git a/app/Models/Source.php b/app/Models/Source.php index 18de9813..400ccf39 100644 --- a/app/Models/Source.php +++ b/app/Models/Source.php @@ -43,6 +43,11 @@ protected static function booted(): void }); } + public function getPrompt(): string + { + return $this->details; + } + public function getChatable(): HasDrivers { return $this->collection->getChatable(); @@ -77,6 +82,11 @@ public function getDriver(): string return $this->collection->getDriver(); } + public function chat(): BelongsTo + { + return $this->belongsTo(Chat::class); + } + public function getEmbeddingDriver(): string { return $this->collection->getEmbeddingDriver(); diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index 9603aae5..9b0aa424 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -65,7 +65,6 @@ public function boot(): void return false; }); - Feature::define('verification_prompt', function (User $user) { return false; }); diff --git a/database/factories/SourceFactory.php b/database/factories/SourceFactory.php index a0883505..91da122b 100644 --- a/database/factories/SourceFactory.php +++ b/database/factories/SourceFactory.php @@ -4,6 +4,7 @@ use App\Domains\Recurring\RecurringTypeEnum; use App\Domains\Sources\SourceTypeEnum; +use App\Models\Chat; use App\Models\Collection; use Illuminate\Database\Eloquent\Factories\Factory; @@ -23,6 +24,8 @@ public function definition(): array 'title' => $this->faker->name, 'slug' => fake()->word, 'collection_id' => Collection::factory(), + 'user_id' => null, + 'chat_id' => Chat::factory(), 'details' => $this->faker->sentence, // 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. 'recurring' => RecurringTypeEnum::Daily, 'active' => true, diff --git a/database/migrations/2024_07_20_130256_add_chat_to_soruces.php b/database/migrations/2024_07_20_130256_add_chat_to_soruces.php new file mode 100644 index 00000000..e45050ea --- /dev/null +++ b/database/migrations/2024_07_20_130256_add_chat_to_soruces.php @@ -0,0 +1,30 @@ +foreignIdFor(\App\Models\Chat::class)->nullable(); + $table->foreignIdFor(\App\Models\User::class)->nullable(); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('sources', function (Blueprint $table) { + $table->dropColumn('chat_id'); + $table->dropColumn('user_id'); + }); + } +}; diff --git a/tests/Feature/EmailSourceTest.php b/tests/Feature/EmailSourceTest.php index 81b13dab..d23c7940 100644 --- a/tests/Feature/EmailSourceTest.php +++ b/tests/Feature/EmailSourceTest.php @@ -2,17 +2,14 @@ namespace Tests\Feature; -use App\Domains\Documents\TypesEnum; use App\Domains\EmailParser\MailDto; use App\Domains\Sources\SourceTypeEnum; -use App\Domains\Transformers\TypeEnum; -use App\Domains\UnStructured\StructuredTypeEnum; -use App\Models\Document; use App\Models\Source; -use App\Models\Transformer; use Facades\App\Domains\EmailParser\Client; use Facades\App\Domains\Sources\EmailSource; use Illuminate\Support\Facades\Bus; +use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\LlmDriver\Responses\CompletionResponse; use Tests\TestCase; class EmailSourceTest extends TestCase @@ -46,6 +43,12 @@ public function test_batches() 'type' => SourceTypeEnum::EmailSource, ]); + LlmDriverFacade::shouldReceive('driver->completion')->once()->andReturn( + CompletionResponse::from([ + 'content' => 'foo bar', + ]) + ); + $body = <<<'BODY' Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. @@ -69,8 +72,6 @@ public function test_batches() $this->assertDatabaseCount('documents', 1); - $this->assertDatabaseCount('document_chunks', 8); - Bus::assertBatchCount(1); } @@ -87,7 +88,7 @@ public function test_run() $source->run(); } - public function test_related_tranformers() + public function tests_creates_chat_and_message() { Bus::fake(); $source = Source::factory()->create([ @@ -95,6 +96,12 @@ public function test_related_tranformers() 'type' => SourceTypeEnum::EmailSource, ]); + LlmDriverFacade::shouldReceive('driver->completion')->once()->andReturn( + CompletionResponse::from([ + 'content' => 'foo bar', + ]) + ); + $body = <<<'BODY' Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. @@ -105,17 +112,6 @@ public function test_related_tranformers() BODY; - $transformer = Transformer::factory()->create( - [ - 'transformable_id' => $source->id, - 'transformable_type' => Source::class, - 'parent_id' => null, - 'last_run' => null, - 'active' => true, - 'type' => TypeEnum::CrmTransformer, - ] - ); - $dto = MailDto::from([ 'to' => 'info+12345@llmassistant.io', 'from' => 'foo@var.com', @@ -127,17 +123,12 @@ public function test_related_tranformers() $emailSource = new \App\Domains\Sources\EmailSource(); $emailSource->setMailDto($dto)->handle($source); - $this->assertDatabaseCount('documents', 3); + $this->assertDatabaseCount('documents', 1); + $this->assertDatabaseCount('chats', 1); + $this->assertDatabaseCount('messages', 2); - $this->assertDatabaseCount('document_chunks', 10); + $this->assertNotNull($source->chat_id); Bus::assertBatchCount(1); - - $documentTo = Document::whereType(TypesEnum::Contact)->exists(); - $this->assertTrue($documentTo); - $documentTo = Document::where('child_type', StructuredTypeEnum::EmailFrom)->exists(); - $this->assertTrue($documentTo); - $documentTo = Document::where('child_type', StructuredTypeEnum::EmailTo)->exists(); - $this->assertTrue($documentTo); } } diff --git a/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php b/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php index 954fa5b9..c4d0baae 100644 --- a/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php +++ b/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php @@ -39,6 +39,8 @@ public function test_store(): void $this->assertEquals(SourceTypeEnum::WebhookSource, $source->type); $this->assertEquals('foobar', $source->secrets['token']); + + $this->assertNotNull($source->user_id); } public function test_update() diff --git a/tests/Feature/Jobs/ChunkDocumentJobTest.php b/tests/Feature/Jobs/ChunkDocumentJobTest.php new file mode 100644 index 00000000..a5f716d7 --- /dev/null +++ b/tests/Feature/Jobs/ChunkDocumentJobTest.php @@ -0,0 +1,18 @@ +get('/'); + + $response->assertStatus(200); + } +} diff --git a/tests/fixtures/.DS_Store b/tests/fixtures/.DS_Store index 45af69f5d6bf2bebe08cdf94ff12fb1e21466fc4..5fb14f776abfccb74c3a7b1507e559338be85783 100644 GIT binary patch delta 1449 zcmds$OKjXk7{}-Dq}lPBHk0RUQtdWvN(H2kZ7DQ`ywf6*Hce>~@@Q%B#?xfLYcFf> zCMk!#1l0D(BJn5!L_xfyQV}6QoRARWz(Yb2QZGmd2@r4r0Vkl0?WG_D2gD5@#{RyU z??3a+|L2w5O77Cr4O_IIGdWv&emZ>_T2=MBni$pN#x>lCPsHz>l5?(8zS6um%?h44 zCT!D}MRCCNg*WAkC4Wqw7Bilk_Xl0OkacFvrSVF(yqhV&?y?*D7 zo!tYs4If$CP|ND-8w_JcTEg+=DJk+3i_-EJrcG~7IF@wgCyUaP8C#4yW>(Zyb?f?C zT`k+d6TFqOb~;ta$omS?UkvU$)aZmLSWjc%Wx~^I68EQ{6$A};fStcvwG4c$#Kwc$pk$1`aRG%fhX}4 zF5-2(fj99EKEQ|g2%q6=e1mWC6Rv3WTBFvaHEUc;Xvyf{8Y&G;UTq}&&~}!4>RY$l zUNM)k-3%dG;ctmf8S&fWL;sEFq@!?GI%Tjb#j76H?(I+^DnaLd!(i2l zTz#!|DAB@=9-~hW{uoBW#RG#%D&Ua*8z9QBB$A~Dv?!Z9umfrA#vWyLH+s<*+I?7= zJ&L24#&Ko%NoDy0?tzUg94ulPr$g5t3|&8mG9JbwcodK00-nS3cmXfsCA^GFe{pyC m&+b+eg0};ESLa)vttq#man)$+rg!|_?f<)3wi-+S68jC_3QHOQ delta 1476 zcmds#Uu+ab9LMLog?9Hw=^X9#?s}`#6cE!=&uS1-DlPqk2&DwB+#kVauRFG5w|nR9 z_FAfVFZEA+F>p~s;zJSCH$!MhNQe^UMG~I$66Hk`GSj;!4ZrhDg3>3CB$)Z$W8Tx4yAS)k5?E{a7d+qB&zR#TVV`owlbc z8_R+(j2B%|u$^PP!$CTloT+%|@+RRcqm9wqa9;PLG@NGGhhP zJI!R5Rn4z#?$Gt;y~( z%+!KLPw%99x*WU{J5W#VZlQW+sxUVvwK(1|!RDoUdcRChvSz>F?HVzKTkwnWJ;tPttasj^Y5FSS(O6hNqNh~$wOTzFX{o5K zfeZIX_^+uEl_bbkvXcyu6cMCEPLVgrS@Iz{PcD(GKLdlMbnjgIIhKXLNde`RgY)Z zRP$MWgm$WuA+|mUx6*B@q=z*IVOL_iDuHE+AJSx^SCdq-h9K-p?9fz(AlOCwG|4Ah zS8E@Q#iRW?rHn{i{VsbG!>WXgss9Fr>e<*sl?W2pf-Y&bXLK+6up7@{4+ilZ4qyz2 zr0H4Ya2zKvi5Y47EG*bqz#>lK6)fX*>HAxFTe|)(-oyJ?!8x4A1>gTG_-u`%``0*% f&pk_5kH#-FuF{Tvb^0IG{~yJ#G_E!!e{cE|EWKK@ From 345972af0faa4e18ce03082b466b0753cbdc6d94 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Sun, 21 Jul 2024 08:35:47 -0400 Subject: [PATCH 02/19] email source working with false option that is great --- app/Domains/Sources/BaseSource.php | 3 +- app/Domains/Sources/EmailSource.php | 107 +++++++++--------- app/Helpers/ChatHelperTrait.php | 15 +++ app/Jobs/ChunkDocumentJob.php | 68 +++++++++++ .../EmailSource/Components/Resources.vue | 4 +- tests/Feature/EmailSourceTest.php | 45 ++++++++ tests/Feature/Jobs/ChunkDocumentJobTest.php | 21 +++- 7 files changed, 205 insertions(+), 58 deletions(-) diff --git a/app/Domains/Sources/BaseSource.php b/app/Domains/Sources/BaseSource.php index ad1d044a..98de2a8f 100644 --- a/app/Domains/Sources/BaseSource.php +++ b/app/Domains/Sources/BaseSource.php @@ -18,11 +18,12 @@ use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\LlmDriver\ToolsHelper; use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; abstract class BaseSource { - use ChatHelperTrait; + use ChatHelperTrait, ToolsHelper; public string $batchTitle = 'Chunking Source'; diff --git a/app/Domains/Sources/EmailSource.php b/app/Domains/Sources/EmailSource.php index 1d5091ba..ee4406b7 100644 --- a/app/Domains/Sources/EmailSource.php +++ b/app/Domains/Sources/EmailSource.php @@ -53,72 +53,73 @@ public function handle(Source $source): void $this->meta_data = $this->mailDto->toArray(); - Log::info('[LaraChain] - Running Email Source'); - - //use the users source prompt to create this next step - //track the results in a chat_id thread - //and as a message? - //then using the source the results will do something with the content - // and then delete the email - // but we can leave it if it returns false? - // still too much coding there how do I let the tool do it. - $prompt = PromptMerge::merge( ['[CONTEXT]'], [$this->content], $source->getPrompt() ); + Log::info('[LaraChain] - Running Email Source', [ + 'prompt' => $prompt, + ]); + + $chat = $source->chat; - $chat->addInput( - message: $prompt, - role: RoleEnum::User, - show_in_thread: true, - meta_data: MetaDataDto::from([ - 'driver' => $source->getDriver(), - 'source' => $source->title, - ]), - ); + $results = LlmDriverFacade::driver( $source->getDriver() )->completion($prompt); - $chat->addInput( - message: $results->content, - role: RoleEnum::Assistant, - show_in_thread: true, - meta_data: MetaDataDto::from([ - 'driver' => $source->getDriver(), - 'source' => $source->title, - ]), - ); - - //@TODO how to look for false - // surface this "power" into the UI. - // tag or store the fact we checked this emails - - $document = Document::updateOrCreate([ - 'source_id' => $source->id, - 'type' => TypesEnum::Email, - 'subject' => $this->mailDto->subject, - 'collection_id' => $source->collection_id, - ], [ - 'summary' => $results->content, - 'meta_data' => $this->mailDto->toArray(), - 'original_content' => $this->mailDto->body, - 'status_summary' => StatusEnum::Pending, - 'status' => StatusEnum::Pending, - ]); - - Bus::batch([new ChunkDocumentJob($document)]) - ->name("Processing Email {$this->mailDto->subject}") - ->allowFailures() - ->dispatch(); + if($this->ifNotActionRequired($results->content)) { + Log::info('[LaraChain] - Email Source Skipping', [ + 'prompt' => $prompt, + ]); + } else { + + $userMessage = $chat->addInput( + message: $prompt, + role: RoleEnum::User, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $source->getDriver(), + 'source' => $source->title, + ]), + ); + + $document = Document::updateOrCreate([ + 'source_id' => $source->id, + 'type' => TypesEnum::Email, + 'subject' => $this->mailDto->subject, + 'collection_id' => $source->collection_id, + ], [ + 'summary' => $results->content, + 'meta_data' => $this->mailDto->toArray(), + 'original_content' => $this->mailDto->body, + 'status_summary' => StatusEnum::Pending, + 'status' => StatusEnum::Pending, + ]); + + Bus::batch([new ChunkDocumentJob($document)]) + ->name("Processing Email {$this->mailDto->subject}") + ->allowFailures() + ->dispatch(); + + $assistantMessage = $chat->addInput( + message: $results->content, + role: RoleEnum::Assistant, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $source->getDriver(), + 'source' => $source->title, + ]), + ); + + $this->savePromptHistory( + message: $assistantMessage, + prompt: $prompt); + } - //should we delete the email? - // right now it gets set to seen - // on the MailDto we have options } } diff --git a/app/Helpers/ChatHelperTrait.php b/app/Helpers/ChatHelperTrait.php index 1ac3c767..344e0b78 100644 --- a/app/Helpers/ChatHelperTrait.php +++ b/app/Helpers/ChatHelperTrait.php @@ -33,4 +33,19 @@ public function getUserId(Collection $collection): ?int return $collection->team?->user_id; } + + public function ifNotActionRequired(string $results): bool { + // @NOTE llms sometimes do not return the right + // string for example. + // false becomes false, "false" or "False" etc. + + $results = str($results) + ->trim() + ->lower() + ->remove('"') + ->remove("'") + ->toString(); + + return $results == "false"; + } } diff --git a/app/Jobs/ChunkDocumentJob.php b/app/Jobs/ChunkDocumentJob.php index 643d4528..336a512f 100644 --- a/app/Jobs/ChunkDocumentJob.php +++ b/app/Jobs/ChunkDocumentJob.php @@ -2,7 +2,9 @@ namespace App\Jobs; +use App\Helpers\TextChunker; use App\Models\Document; +use App\Models\DocumentChunk; use Illuminate\Bus\Batch; use Illuminate\Bus\Batchable; use Illuminate\Bus\Queueable; @@ -10,7 +12,16 @@ use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Bus; +use Illuminate\Support\Facades\Log; +use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; +/** + * @NOTE + * Only really good for chunking content that can fit in the Original Content + * of a Document model. + */ class ChunkDocumentJob implements ShouldQueue { use Batchable; @@ -34,5 +45,62 @@ public function handle(): void return; } + + Log::info('[LaraChain] - Chunking Document', [ + 'document' => $this->document->id, + ]); + + $document = $this->document; + + $page_number = 0; + + $pageContent = $this->document->original_content; + + $size = config('llmdriver.chunking.default_size'); + + $chunked_chunks = TextChunker::handle($pageContent, $size); + + foreach ($chunked_chunks as $chunkSection => $chunkContent) { + $guid = md5($chunkContent); + + $DocumentChunk = DocumentChunk::updateOrCreate( + [ + 'document_id' => $document->id, + 'sort_order' => $page_number, + 'section_number' => $chunkSection, + ], + [ + 'guid' => $guid, + 'content' => to_utf8($chunkContent), + ] + ); + + $chunks[] = [ + new VectorlizeDataJob($DocumentChunk), + ]; + } + + $name = sprintf($document->type->name, $document->id); + + + + Bus::batch($chunks) + ->name($name) + ->allowFailures() + ->finally(function (Batch $batch) use ($document) { + + Bus::batch([ + [ + new SummarizeDocumentJob($document), + new TagDocumentJob($document), + new DocumentProcessingCompleteJob($document), + ] + ]) + ->name(sprintf("Final Document Steps Document %s id %d", $document->type->name, $document->id)) + ->allowFailures() + ->dispatch(); + }) + ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->dispatch(); } } diff --git a/resources/js/Pages/Sources/EmailSource/Components/Resources.vue b/resources/js/Pages/Sources/EmailSource/Components/Resources.vue index 3195660b..6c3555c3 100644 --- a/resources/js/Pages/Sources/EmailSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/EmailSource/Components/Resources.vue @@ -9,7 +9,9 @@
-
diff --git a/tests/Feature/EmailSourceTest.php b/tests/Feature/EmailSourceTest.php index d23c7940..64154f4b 100644 --- a/tests/Feature/EmailSourceTest.php +++ b/tests/Feature/EmailSourceTest.php @@ -131,4 +131,49 @@ public function tests_creates_chat_and_message() Bus::assertBatchCount(1); } + + + public function test_no_action_required() + { + Bus::fake(); + $source = Source::factory()->create([ + 'slug' => 'test', + 'type' => SourceTypeEnum::EmailSource, + ]); + + LlmDriverFacade::shouldReceive('driver->completion')->once()->andReturn( + CompletionResponse::from([ + 'content' => 'False', + ]) + ); + + $body = <<<'BODY' +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. + +BODY; + + $dto = MailDto::from([ + 'to' => 'info+12345@llmassistant.io', + 'from' => 'foo@var.com', + 'subject' => 'This is it', + 'header' => 'This is header', + 'body' => $body, + ]); + + $emailSource = new \App\Domains\Sources\EmailSource(); + $emailSource->setMailDto($dto)->handle($source); + + $this->assertDatabaseCount('documents', 0); + $this->assertDatabaseCount('chats', 1); + $this->assertDatabaseCount('messages', 1); + + $this->assertNotNull($source->chat_id); + + Bus::assertBatchCount(0); + } } diff --git a/tests/Feature/Jobs/ChunkDocumentJobTest.php b/tests/Feature/Jobs/ChunkDocumentJobTest.php index a5f716d7..983fa1fc 100644 --- a/tests/Feature/Jobs/ChunkDocumentJobTest.php +++ b/tests/Feature/Jobs/ChunkDocumentJobTest.php @@ -2,6 +2,9 @@ namespace Tests\Feature\Jobs; +use App\Jobs\ChunkDocumentJob; +use App\Models\Document; +use Illuminate\Support\Facades\Bus; use Tests\TestCase; class ChunkDocumentJobTest extends TestCase @@ -9,10 +12,22 @@ class ChunkDocumentJobTest extends TestCase /** * A basic feature test example. */ - public function test_example(): void + public function test_chunking(): void { - $response = $this->get('/'); + Bus::fake(); - $response->assertStatus(200); + $document = Document::factory()->create(); + + $this->assertDatabaseCount('document_chunks', 0); + + + [$job, $batch] = (new ChunkDocumentJob($document))->withFakeBatch(); + + + $job->handle(); + + $this->assertDatabaseCount('document_chunks', 1); + + Bus::assertBatchCount(1); } } From 685e59ca026f4176528f5d40b75eb63b6c79ea75 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Sun, 21 Jul 2024 08:49:17 -0400 Subject: [PATCH 03/19] Ok Source works for EmailSource and Prompting now to move it to all other Soruces --- app/Domains/Sources/EmailSource.php | 6 +----- app/Helpers/ChatHelperTrait.php | 5 +++-- app/Jobs/ChunkDocumentJob.php | 11 +++++------ tests/Feature/EmailSourceTest.php | 3 +-- tests/Feature/Jobs/ChunkDocumentJobTest.php | 2 -- 5 files changed, 10 insertions(+), 17 deletions(-) diff --git a/app/Domains/Sources/EmailSource.php b/app/Domains/Sources/EmailSource.php index ee4406b7..d346e9d1 100644 --- a/app/Domains/Sources/EmailSource.php +++ b/app/Domains/Sources/EmailSource.php @@ -14,7 +14,6 @@ use Facades\App\Domains\EmailParser\Client; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; -use Laravel\Prompts\Prompt; use LlmLaraHub\LlmDriver\LlmDriverFacade; class EmailSource extends BaseSource @@ -63,16 +62,13 @@ public function handle(Source $source): void 'prompt' => $prompt, ]); - $chat = $source->chat; - - $results = LlmDriverFacade::driver( $source->getDriver() )->completion($prompt); - if($this->ifNotActionRequired($results->content)) { + if ($this->ifNotActionRequired($results->content)) { Log::info('[LaraChain] - Email Source Skipping', [ 'prompt' => $prompt, ]); diff --git a/app/Helpers/ChatHelperTrait.php b/app/Helpers/ChatHelperTrait.php index 344e0b78..ff4f0bed 100644 --- a/app/Helpers/ChatHelperTrait.php +++ b/app/Helpers/ChatHelperTrait.php @@ -34,7 +34,8 @@ public function getUserId(Collection $collection): ?int return $collection->team?->user_id; } - public function ifNotActionRequired(string $results): bool { + public function ifNotActionRequired(string $results): bool + { // @NOTE llms sometimes do not return the right // string for example. // false becomes false, "false" or "False" etc. @@ -46,6 +47,6 @@ public function ifNotActionRequired(string $results): bool { ->remove("'") ->toString(); - return $results == "false"; + return $results == 'false'; } } diff --git a/app/Jobs/ChunkDocumentJob.php b/app/Jobs/ChunkDocumentJob.php index 336a512f..a817ee52 100644 --- a/app/Jobs/ChunkDocumentJob.php +++ b/app/Jobs/ChunkDocumentJob.php @@ -45,13 +45,14 @@ public function handle(): void return; } - Log::info('[LaraChain] - Chunking Document', [ 'document' => $this->document->id, ]); $document = $this->document; + $chunks = []; + $page_number = 0; $pageContent = $this->document->original_content; @@ -80,9 +81,7 @@ public function handle(): void ]; } - $name = sprintf($document->type->name, $document->id); - - + $name = sprintf("Chunking Document Type %s id %d ", $document->type->name, $document->id); Bus::batch($chunks) ->name($name) @@ -94,9 +93,9 @@ public function handle(): void new SummarizeDocumentJob($document), new TagDocumentJob($document), new DocumentProcessingCompleteJob($document), - ] + ], ]) - ->name(sprintf("Final Document Steps Document %s id %d", $document->type->name, $document->id)) + ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) ->allowFailures() ->dispatch(); }) diff --git a/tests/Feature/EmailSourceTest.php b/tests/Feature/EmailSourceTest.php index 64154f4b..ff3c69b6 100644 --- a/tests/Feature/EmailSourceTest.php +++ b/tests/Feature/EmailSourceTest.php @@ -132,7 +132,6 @@ public function tests_creates_chat_and_message() Bus::assertBatchCount(1); } - public function test_no_action_required() { Bus::fake(); @@ -170,7 +169,7 @@ public function test_no_action_required() $this->assertDatabaseCount('documents', 0); $this->assertDatabaseCount('chats', 1); - $this->assertDatabaseCount('messages', 1); + $this->assertDatabaseCount('messages', 0); $this->assertNotNull($source->chat_id); diff --git a/tests/Feature/Jobs/ChunkDocumentJobTest.php b/tests/Feature/Jobs/ChunkDocumentJobTest.php index 983fa1fc..0260f93b 100644 --- a/tests/Feature/Jobs/ChunkDocumentJobTest.php +++ b/tests/Feature/Jobs/ChunkDocumentJobTest.php @@ -20,10 +20,8 @@ public function test_chunking(): void $this->assertDatabaseCount('document_chunks', 0); - [$job, $batch] = (new ChunkDocumentJob($document))->withFakeBatch(); - $job->handle(); $this->assertDatabaseCount('document_chunks', 1); From ad99c93169517c1102df11877768f489c57bf537 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Sun, 21 Jul 2024 13:11:42 -0400 Subject: [PATCH 04/19] ok now to track so all sources know not to repeat --- app/Domains/EmailParser/Client.php | 55 ++++++++++++++----------- app/Domains/EmailParser/EmailClient.php | 6 +-- app/Domains/Prompts/SpecificTopic.php | 28 +++++++++++++ 3 files changed, 59 insertions(+), 30 deletions(-) create mode 100644 app/Domains/Prompts/SpecificTopic.php diff --git a/app/Domains/EmailParser/Client.php b/app/Domains/EmailParser/Client.php index 8c1be3fd..7ea39f07 100644 --- a/app/Domains/EmailParser/Client.php +++ b/app/Domains/EmailParser/Client.php @@ -40,35 +40,40 @@ public function handle(): void /** @var Message $message */ foreach ($messages as $message) { - $messageDto = MailDto::from([ - 'to' => $message->getTo()->toString(), - 'from' => $message->getFrom()->toString(), - 'body' => $message->getTextBody(), - 'subject' => $message->getSubject(), - 'date' => $message->getDate()->toString(), - 'header' => $message->getHeader()->raw, - ]); - - \Illuminate\Support\Facades\Log::info('Checking To', [ - 'to' => $message->getTo()->toString(), - ]); - - /** - * Just check if it is for this system - */ - $slug = slug_from_email($message->getTo()->toString()); - if (EmailSource::getSourceFromSlug($slug)) { - \Illuminate\Support\Facades\Log::info('Found Source with Slug To', [ + $flags = $message->getFlags(); + + if (! $flags->contains('Seen')) { + + $messageDto = MailDto::from([ 'to' => $message->getTo()->toString(), - 'slug' => $slug, + 'from' => $message->getFrom()->toString(), + 'body' => $message->getTextBody(), + 'subject' => $message->getSubject(), + 'date' => $message->getDate()->toString(), + 'header' => $message->getHeader()->raw, ]); - $mail[] = new MailBoxParserJob($messageDto); - $message->delete(expunge: true); - } else { - \Illuminate\Support\Facades\Log::info('Did not find Source with Slug To', [ + + \Illuminate\Support\Facades\Log::info('Checking To', [ 'to' => $message->getTo()->toString(), - 'slug' => $slug, ]); + + /** + * Just check if it is for this system + */ + $slug = slug_from_email($message->getTo()->toString()); + if (EmailSource::getSourceFromSlug($slug)) { + \Illuminate\Support\Facades\Log::info('Found Source with Slug To', [ + 'to' => $message->getTo()->toString(), + 'slug' => $slug, + ]); + $mail[] = new MailBoxParserJob($messageDto); + $message->addFlag('Seen'); + } else { + \Illuminate\Support\Facades\Log::info('Did not find Source with Slug To', [ + 'to' => $message->getTo()->toString(), + 'slug' => $slug, + ]); + } } } diff --git a/app/Domains/EmailParser/EmailClient.php b/app/Domains/EmailParser/EmailClient.php index 3604487e..19338d42 100644 --- a/app/Domains/EmailParser/EmailClient.php +++ b/app/Domains/EmailParser/EmailClient.php @@ -112,11 +112,7 @@ public function handle(CredentialsDto $credentials, bool $delete = false): array $mail[] = $messageDto; - if ($delete) { - $message->delete(expunge: true); - } else { - $message->addFlag('Seen'); - } + $message->addFlag('Seen'); } else { Log::info('[LaraChain] - Flag Seen', [ 'flags' => $flags->toArray(), diff --git a/app/Domains/Prompts/SpecificTopic.php b/app/Domains/Prompts/SpecificTopic.php new file mode 100644 index 00000000..b01a0fe4 --- /dev/null +++ b/app/Domains/Prompts/SpecificTopic.php @@ -0,0 +1,28 @@ + Date: Sun, 21 Jul 2024 13:33:30 -0400 Subject: [PATCH 05/19] ok we track the source task in the model now we can repeat check a source with different source getters eg an email box might have a source for marketing a source for customer feedback --- app/Domains/EmailParser/Client.php | 65 ++++++++++--------- app/Domains/EmailParser/EmailClient.php | 46 ++++++------- app/Domains/Prompts/SpecificTopic.php | 27 +++++--- app/Domains/Sources/EmailSource.php | 12 ++++ .../AssistantEmailBoxSourceController.php | 3 +- app/Models/SourceTask.php | 19 ++++++ database/factories/SourceTaskFactory.php | 23 +++++++ ...07_21_171214_create_source_tasks_table.php | 33 ++++++++++ tests/Feature/EmailSourceTest.php | 48 ++++++++++++++ tests/Feature/Models/SourceTaskTest.php | 20 ++++++ 10 files changed, 231 insertions(+), 65 deletions(-) create mode 100644 app/Models/SourceTask.php create mode 100644 database/factories/SourceTaskFactory.php create mode 100644 database/migrations/2024_07_21_171214_create_source_tasks_table.php create mode 100644 tests/Feature/Models/SourceTaskTest.php diff --git a/app/Domains/EmailParser/Client.php b/app/Domains/EmailParser/Client.php index 7ea39f07..212047aa 100644 --- a/app/Domains/EmailParser/Client.php +++ b/app/Domains/EmailParser/Client.php @@ -18,7 +18,7 @@ class Client 'Drafts', ]; - public function handle(): void + public function handle(int $limit = 10): void { $mail = []; @@ -31,7 +31,7 @@ public function handle(): void $full_name = data_get($folder, 'full_name'); if (! in_array($full_name, $this->ignore)) { - $messages = $folder->messages()->all()->limit(10, 0)->get(); + $messages = $folder->messages()->all()->limit($limit, 0)->get(); logger('[LaraChain] - Email Count', [ 'count' => $messages->count(), @@ -40,40 +40,41 @@ public function handle(): void /** @var Message $message */ foreach ($messages as $message) { - $flags = $message->getFlags(); - - if (! $flags->contains('Seen')) { - - $messageDto = MailDto::from([ + //@NOTE the Seen flag made it too hard to + // then have different sources + // check the same email box. + // the Source will track repeats + //$flags = $message->getFlags(); + + $messageDto = MailDto::from([ + 'to' => $message->getTo()->toString(), + 'from' => $message->getFrom()->toString(), + 'body' => $message->getTextBody(), + 'subject' => $message->getSubject(), + 'date' => $message->getDate()->toString(), + 'header' => $message->getHeader()->raw, + ]); + + \Illuminate\Support\Facades\Log::info('Checking To', [ + 'to' => $message->getTo()->toString(), + ]); + + /** + * Just check if it is for this system + */ + $slug = slug_from_email($message->getTo()->toString()); + if (EmailSource::getSourceFromSlug($slug)) { + \Illuminate\Support\Facades\Log::info('Found Source with Slug To', [ 'to' => $message->getTo()->toString(), - 'from' => $message->getFrom()->toString(), - 'body' => $message->getTextBody(), - 'subject' => $message->getSubject(), - 'date' => $message->getDate()->toString(), - 'header' => $message->getHeader()->raw, + 'slug' => $slug, ]); - - \Illuminate\Support\Facades\Log::info('Checking To', [ + $mail[] = new MailBoxParserJob($messageDto); + $message->addFlag('Seen'); + } else { + \Illuminate\Support\Facades\Log::info('Did not find Source with Slug To', [ 'to' => $message->getTo()->toString(), + 'slug' => $slug, ]); - - /** - * Just check if it is for this system - */ - $slug = slug_from_email($message->getTo()->toString()); - if (EmailSource::getSourceFromSlug($slug)) { - \Illuminate\Support\Facades\Log::info('Found Source with Slug To', [ - 'to' => $message->getTo()->toString(), - 'slug' => $slug, - ]); - $mail[] = new MailBoxParserJob($messageDto); - $message->addFlag('Seen'); - } else { - \Illuminate\Support\Facades\Log::info('Did not find Source with Slug To', [ - 'to' => $message->getTo()->toString(), - 'slug' => $slug, - ]); - } } } diff --git a/app/Domains/EmailParser/EmailClient.php b/app/Domains/EmailParser/EmailClient.php index 19338d42..4d612d73 100644 --- a/app/Domains/EmailParser/EmailClient.php +++ b/app/Domains/EmailParser/EmailClient.php @@ -35,7 +35,9 @@ public function setConfig(array $config): self * @throws \Webklex\PHPIMAP\Exceptions\MessageNotFoundException * @throws \Webklex\PHPIMAP\Exceptions\RuntimeException */ - public function handle(CredentialsDto $credentials, bool $delete = false): array + public function handle(CredentialsDto $credentials, + bool $delete = false, + int $limit = 10): array { $mail = []; @@ -88,7 +90,7 @@ public function handle(CredentialsDto $credentials, bool $delete = false): array 'folders_to_check' => $foldersToCheck, ]); - $messages = $folder->messages()->all()->get(); + $messages = $folder->messages()->all()->limit($limit, 0)->get(); Log::info('[LaraChain] - Email Box Count', [ 'count' => $messages->count(), @@ -97,27 +99,25 @@ public function handle(CredentialsDto $credentials, bool $delete = false): array /** @var Message $message */ foreach ($messages as $message) { - $flags = $message->getFlags(); - - if (! $flags->contains('Seen')) { - $messageDto = MailDto::from([ - 'to' => $message->getTo()->toString(), - 'from' => $message->getFrom()->toString(), - 'body' => $message->getTextBody(), - 'subject' => $message->getSubject(), - 'date' => $message->getDate()->toString(), - 'header' => $message->getHeader()->raw, - 'email_message' => $message, - ]); - - $mail[] = $messageDto; - - $message->addFlag('Seen'); - } else { - Log::info('[LaraChain] - Flag Seen', [ - 'flags' => $flags->toArray(), - ]); - } + //@NOTE the Seen flag made it too hard to + // then have different sources + // check the same email box. + // the Source will track repeats + //$flags = $message->getFlags(); + + $messageDto = MailDto::from([ + 'to' => $message->getTo()->toString(), + 'from' => $message->getFrom()->toString(), + 'body' => $message->getTextBody(), + 'subject' => $message->getSubject(), + 'date' => $message->getDate()->toString(), + 'header' => $message->getHeader()->raw, + 'email_message' => $message, + ]); + + $mail[] = $messageDto; + + $message->addFlag('Seen'); } } diff --git a/app/Domains/Prompts/SpecificTopic.php b/app/Domains/Prompts/SpecificTopic.php index b01a0fe4..9b1b579c 100644 --- a/app/Domains/Prompts/SpecificTopic.php +++ b/app/Domains/Prompts/SpecificTopic.php @@ -4,24 +4,33 @@ use Illuminate\Support\Facades\Log; -class EmailToDocumentSummary +class SpecificTopic { public static function prompt(string $context): string { - Log::info('[LaraChain] - EmailToDocumentSummary'); + Log::info('[LaraChain] - SpecificTopic'); return <<<'PROMPT' -The following content is from an email. I would like you to summarize it with the following format. + +You are an email reading assistant who will follow the prompts to help parse my email box. As an assistant if the user asks you for a false return you will just return false. NOTHING MORE -To: **TO HERE** -From: **From Here** -Subject: **Subject Here** -Body: -**Summary Here** + +If the email content passed in is about Web Application work the frame work then keep and and summarize it. Else if it is about anything else just return the word false and only the word false. Please IGNORE Spam emails or Subjects that are about web applications but then the body is SPAM + +On a non false response, Summary and original message as Markdown. +On a false response just the word false, -** CONTEXT IS BELOW THIS LINE** + +I would like to hire you to build an awesome application for me with DailyAi +"You have an email from Teddy asking you to use DailAi to automate his business. + +I would like to sell you property in Alaska +False + + + [CONTEXT] PROMPT; } diff --git a/app/Domains/Sources/EmailSource.php b/app/Domains/Sources/EmailSource.php index d346e9d1..86a924ef 100644 --- a/app/Domains/Sources/EmailSource.php +++ b/app/Domains/Sources/EmailSource.php @@ -11,6 +11,7 @@ use App\Jobs\ChunkDocumentJob; use App\Models\Document; use App\Models\Source; +use App\Models\SourceTask; use Facades\App\Domains\EmailParser\Client; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; @@ -46,6 +47,17 @@ public function handle(Source $source): void $this->source = $this->checkForChat($source); + $key = md5($this->mailDto->date.$this->mailDto->from.$source->id); + + if(SourceTask::where('source_id', $source->id)->where('task_key', $key)->exists()) { + return; + } + + SourceTask::create([ + 'source_id' => $source->id, + 'task_key' => $key, + ]); + $this->content = $this->mailDto->getContent(); $this->documentSubject = $this->mailDto->subject; diff --git a/app/Http/Controllers/AssistantEmailBoxSourceController.php b/app/Http/Controllers/AssistantEmailBoxSourceController.php index ef32e200..53588faa 100644 --- a/app/Http/Controllers/AssistantEmailBoxSourceController.php +++ b/app/Http/Controllers/AssistantEmailBoxSourceController.php @@ -4,6 +4,7 @@ use App\Domains\Prompts\EmailToDocumentSummary; use App\Domains\Prompts\EmailToWebContent; +use App\Domains\Prompts\SpecificTopic; use App\Domains\Sources\SourceTypeEnum; use App\Models\Collection; use App\Models\Source; @@ -41,8 +42,8 @@ protected function makeSource(array $validated, Collection $collection): void public function getPrompts(): array { return [ + 'skip_emails_based_on_content' => SpecificTopic::prompt('[CONTEXT]'), 'summarize_email' => EmailToDocumentSummary::prompt('[CONTEXT]'), - 'get_web_page' => EmailToWebContent::prompt('[CONTEXT]'), ]; } diff --git a/app/Models/SourceTask.php b/app/Models/SourceTask.php new file mode 100644 index 00000000..006e09a4 --- /dev/null +++ b/app/Models/SourceTask.php @@ -0,0 +1,19 @@ +belongsTo(Source::class); + } +} diff --git a/database/factories/SourceTaskFactory.php b/database/factories/SourceTaskFactory.php new file mode 100644 index 00000000..23056713 --- /dev/null +++ b/database/factories/SourceTaskFactory.php @@ -0,0 +1,23 @@ + + */ +class SourceTaskFactory extends Factory +{ + /** + * Define the model's default state. + * + * @return array + */ + public function definition(): array + { + return [ + // + ]; + } +} diff --git a/database/migrations/2024_07_21_171214_create_source_tasks_table.php b/database/migrations/2024_07_21_171214_create_source_tasks_table.php new file mode 100644 index 00000000..6ee16595 --- /dev/null +++ b/database/migrations/2024_07_21_171214_create_source_tasks_table.php @@ -0,0 +1,33 @@ +id(); + $table->foreignIdFor(\App\Models\Source::class); + $table->text("task_key"); + $table->timestamps(); + }); + + Schema::table('source_tasks', function(Blueprint $table) { + $table->index(['source_id', 'task_key']); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('source_tasks'); + } +}; diff --git a/tests/Feature/EmailSourceTest.php b/tests/Feature/EmailSourceTest.php index ff3c69b6..2cdb0417 100644 --- a/tests/Feature/EmailSourceTest.php +++ b/tests/Feature/EmailSourceTest.php @@ -5,6 +5,7 @@ use App\Domains\EmailParser\MailDto; use App\Domains\Sources\SourceTypeEnum; use App\Models\Source; +use App\Models\SourceTask; use Facades\App\Domains\EmailParser\Client; use Facades\App\Domains\Sources\EmailSource; use Illuminate\Support\Facades\Bus; @@ -126,12 +127,59 @@ public function tests_creates_chat_and_message() $this->assertDatabaseCount('documents', 1); $this->assertDatabaseCount('chats', 1); $this->assertDatabaseCount('messages', 2); + $this->assertDatabaseCount('source_tasks', 1); $this->assertNotNull($source->chat_id); Bus::assertBatchCount(1); } + public function test_repeat_tasks() + { + Bus::fake(); + $source = Source::factory()->create([ + 'slug' => 'test', + 'type' => SourceTypeEnum::EmailSource, + ]); + + + LlmDriverFacade::shouldReceive('driver->completion')->never(); + + $body = <<<'BODY' +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. + +BODY; + + $dto = MailDto::from([ + 'to' => 'info+12345@llmassistant.io', + 'from' => 'foo@var.com', + 'subject' => 'This is it', + 'header' => 'This is header', + 'body' => $body, + ]); + + SourceTask::factory()->create([ + 'source_id' => $source->id, + 'task_key' => md5($dto->date.$dto->from.$source->id), + ]); + + $emailSource = new \App\Domains\Sources\EmailSource(); + $emailSource->setMailDto($dto)->handle($source); + + $this->assertDatabaseCount('documents', 0); + $this->assertDatabaseCount('chats', 1); + $this->assertDatabaseCount('messages', 0); + + $this->assertNotNull($source->chat_id); + + Bus::assertBatchCount(0); + } + public function test_no_action_required() { Bus::fake(); diff --git a/tests/Feature/Models/SourceTaskTest.php b/tests/Feature/Models/SourceTaskTest.php new file mode 100644 index 00000000..8e63f086 --- /dev/null +++ b/tests/Feature/Models/SourceTaskTest.php @@ -0,0 +1,20 @@ +get('/'); + + $response->assertStatus(200); + } +} From 126699dfa6700d0f7bd8db915f065e7c35dfc182 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Sun, 21 Jul 2024 13:34:00 -0400 Subject: [PATCH 06/19] ok we track the source task in the model now we can repeat check a source with different source getters eg an email box might have a source for marketing a source for customer feedback --- app/Domains/EmailParser/EmailClient.php | 4 ++-- app/Domains/Sources/EmailSource.php | 2 +- app/Jobs/ChunkDocumentJob.php | 2 +- .../2024_07_21_171214_create_source_tasks_table.php | 4 ++-- tests/Feature/EmailSourceTest.php | 1 - tests/Feature/Models/SourceTaskTest.php | 2 -- 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/app/Domains/EmailParser/EmailClient.php b/app/Domains/EmailParser/EmailClient.php index 4d612d73..48cfd57e 100644 --- a/app/Domains/EmailParser/EmailClient.php +++ b/app/Domains/EmailParser/EmailClient.php @@ -36,8 +36,8 @@ public function setConfig(array $config): self * @throws \Webklex\PHPIMAP\Exceptions\RuntimeException */ public function handle(CredentialsDto $credentials, - bool $delete = false, - int $limit = 10): array + bool $delete = false, + int $limit = 10): array { $mail = []; diff --git a/app/Domains/Sources/EmailSource.php b/app/Domains/Sources/EmailSource.php index 86a924ef..48c3d476 100644 --- a/app/Domains/Sources/EmailSource.php +++ b/app/Domains/Sources/EmailSource.php @@ -49,7 +49,7 @@ public function handle(Source $source): void $key = md5($this->mailDto->date.$this->mailDto->from.$source->id); - if(SourceTask::where('source_id', $source->id)->where('task_key', $key)->exists()) { + if (SourceTask::where('source_id', $source->id)->where('task_key', $key)->exists()) { return; } diff --git a/app/Jobs/ChunkDocumentJob.php b/app/Jobs/ChunkDocumentJob.php index a817ee52..5c130ae6 100644 --- a/app/Jobs/ChunkDocumentJob.php +++ b/app/Jobs/ChunkDocumentJob.php @@ -81,7 +81,7 @@ public function handle(): void ]; } - $name = sprintf("Chunking Document Type %s id %d ", $document->type->name, $document->id); + $name = sprintf('Chunking Document Type %s id %d ', $document->type->name, $document->id); Bus::batch($chunks) ->name($name) diff --git a/database/migrations/2024_07_21_171214_create_source_tasks_table.php b/database/migrations/2024_07_21_171214_create_source_tasks_table.php index 6ee16595..ce10be85 100644 --- a/database/migrations/2024_07_21_171214_create_source_tasks_table.php +++ b/database/migrations/2024_07_21_171214_create_source_tasks_table.php @@ -14,11 +14,11 @@ public function up(): void Schema::create('source_tasks', function (Blueprint $table) { $table->id(); $table->foreignIdFor(\App\Models\Source::class); - $table->text("task_key"); + $table->text('task_key'); $table->timestamps(); }); - Schema::table('source_tasks', function(Blueprint $table) { + Schema::table('source_tasks', function (Blueprint $table) { $table->index(['source_id', 'task_key']); }); } diff --git a/tests/Feature/EmailSourceTest.php b/tests/Feature/EmailSourceTest.php index 2cdb0417..9933a046 100644 --- a/tests/Feature/EmailSourceTest.php +++ b/tests/Feature/EmailSourceTest.php @@ -142,7 +142,6 @@ public function test_repeat_tasks() 'type' => SourceTypeEnum::EmailSource, ]); - LlmDriverFacade::shouldReceive('driver->completion')->never(); $body = <<<'BODY' diff --git a/tests/Feature/Models/SourceTaskTest.php b/tests/Feature/Models/SourceTaskTest.php index 8e63f086..94beb274 100644 --- a/tests/Feature/Models/SourceTaskTest.php +++ b/tests/Feature/Models/SourceTaskTest.php @@ -2,8 +2,6 @@ namespace Tests\Feature\Models; -use Illuminate\Foundation\Testing\RefreshDatabase; -use Illuminate\Foundation\Testing\WithFaker; use Tests\TestCase; class SourceTaskTest extends TestCase From 3ce0da582cca265401f836a0f512dafd521db1bc Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Sun, 21 Jul 2024 13:58:23 -0400 Subject: [PATCH 07/19] The Email Assistant and Email Box are both working --- app/Domains/EmailParser/Client.php | 2 + app/Domains/EmailParser/EmailClient.php | 2 + app/Domains/Sources/EmailBoxSource.php | 39 +------------------ .../Sources/EmailBoxSourceController.php | 2 + 4 files changed, 8 insertions(+), 37 deletions(-) diff --git a/app/Domains/EmailParser/Client.php b/app/Domains/EmailParser/Client.php index 212047aa..7c4db593 100644 --- a/app/Domains/EmailParser/Client.php +++ b/app/Domains/EmailParser/Client.php @@ -5,6 +5,7 @@ use App\Jobs\MailBoxParserJob; use Facades\App\Domains\Sources\EmailSource; use Illuminate\Support\Facades\Bus; +use Illuminate\Support\Facades\Log; use Illuminate\Support\Str; use Webklex\IMAP\Facades\Client as ClientFacade; use Webklex\PHPIMAP\Message; @@ -25,6 +26,7 @@ public function handle(int $limit = 10): void $client = ClientFacade::account('default'); $client->connect(); + $folders = $client->getFolders(false); foreach ($folders as $folder) { diff --git a/app/Domains/EmailParser/EmailClient.php b/app/Domains/EmailParser/EmailClient.php index 48cfd57e..20e58c90 100644 --- a/app/Domains/EmailParser/EmailClient.php +++ b/app/Domains/EmailParser/EmailClient.php @@ -65,6 +65,8 @@ public function handle(CredentialsDto $credentials, $client = EmailClientFacade::setConfig($config); try { + + $client->connect(); Log::info('Connected to email box', [ diff --git a/app/Domains/Sources/EmailBoxSource.php b/app/Domains/Sources/EmailBoxSource.php index 2511a6f3..e8ea202c 100644 --- a/app/Domains/Sources/EmailBoxSource.php +++ b/app/Domains/Sources/EmailBoxSource.php @@ -7,6 +7,7 @@ use Facades\App\Domains\EmailParser\EmailClient; use Facades\App\Domains\Transformers\EmailTransformer; use Illuminate\Support\Facades\Log; +use Facades\App\Domains\Sources\EmailSource as EmailSourceFacade; class EmailBoxSource extends EmailSource { @@ -22,43 +23,7 @@ public function handle(Source $source): void $this->source = $source; foreach ($mails as $mailDto) { - $this->mailDto = $mailDto; - - $this->content = $this->mailDto->getContent(); - - $this->documentSubject = $this->mailDto->subject; - - $this->meta_data = $this->mailDto->toArray(); - - $this->transformers = $source->transformers; - - Log::info('[LaraChain] - Running Email Source'); - - try { - Log::info('Do something!'); - $baseSource = EmailTransformer::transform(baseSource: $this); - foreach ($source->transformers as $transformerChainLink) { - Log::info("[LaraChain] - Source has Transformers let's figure out which one to run", [ - 'type' => $transformerChainLink->type->name, - ]); - - $class = '\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - if (class_exists($class)) { - $facade = '\\Facades\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - $baseSource = $facade::transform($this); - } else { - Log::info('[LaraChain] - No Class found ', [ - 'class' => $class, - ]); - } - } - - $this->batchTransformedSource($baseSource, $source); - } catch (\Exception $e) { - Log::error('[LaraChain] - Error running Email Source', [ - 'error' => $e->getMessage(), - ]); - } + EmailSourceFacade::setMailDto($mailDto)->handle($source); } } diff --git a/app/Http/Controllers/Sources/EmailBoxSourceController.php b/app/Http/Controllers/Sources/EmailBoxSourceController.php index b39936aa..b2c1212d 100644 --- a/app/Http/Controllers/Sources/EmailBoxSourceController.php +++ b/app/Http/Controllers/Sources/EmailBoxSourceController.php @@ -4,6 +4,7 @@ use App\Domains\Prompts\EmailToDocumentSummary; use App\Domains\Prompts\EmailToWebContent; +use App\Domains\Prompts\SpecificTopic; use App\Domains\Sources\SourceTypeEnum; use App\Http\Controllers\BaseSourceController; use App\Models\Collection; @@ -84,6 +85,7 @@ protected function updateSource(Source $source, array $validated): void public function getPrompts(): array { return [ + 'skip_emails_based_on_content' => SpecificTopic::prompt('[CONTEXT]'), 'summarize_email' => EmailToDocumentSummary::prompt('[CONTEXT]'), 'get_web_page' => EmailToWebContent::prompt('[CONTEXT]'), ]; From 3f4e3778357e5143191801c2b40fbd92fcee67de Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Sun, 21 Jul 2024 16:18:19 -0400 Subject: [PATCH 08/19] adds new source pattern to Get Web Feed --- app/Domains/EmailParser/Client.php | 2 - app/Domains/EmailParser/EmailClient.php | 1 - app/Domains/Sources/EmailBoxSource.php | 2 - app/Domains/Sources/FeedSource.php | 3 + app/Domains/Sources/WebSearch/GetPage.php | 2 +- app/Jobs/GetWebContentJob.php | 150 ++++++++++++-------- tests/Feature/Jobs/GetWebContentJobTest.php | 44 ++---- tests/fixtures/test_block_of_text.txt | 39 +++++ 8 files changed, 144 insertions(+), 99 deletions(-) create mode 100644 tests/fixtures/test_block_of_text.txt diff --git a/app/Domains/EmailParser/Client.php b/app/Domains/EmailParser/Client.php index 7c4db593..212047aa 100644 --- a/app/Domains/EmailParser/Client.php +++ b/app/Domains/EmailParser/Client.php @@ -5,7 +5,6 @@ use App\Jobs\MailBoxParserJob; use Facades\App\Domains\Sources\EmailSource; use Illuminate\Support\Facades\Bus; -use Illuminate\Support\Facades\Log; use Illuminate\Support\Str; use Webklex\IMAP\Facades\Client as ClientFacade; use Webklex\PHPIMAP\Message; @@ -26,7 +25,6 @@ public function handle(int $limit = 10): void $client = ClientFacade::account('default'); $client->connect(); - $folders = $client->getFolders(false); foreach ($folders as $folder) { diff --git a/app/Domains/EmailParser/EmailClient.php b/app/Domains/EmailParser/EmailClient.php index 20e58c90..450c2ed3 100644 --- a/app/Domains/EmailParser/EmailClient.php +++ b/app/Domains/EmailParser/EmailClient.php @@ -66,7 +66,6 @@ public function handle(CredentialsDto $credentials, try { - $client->connect(); Log::info('Connected to email box', [ diff --git a/app/Domains/Sources/EmailBoxSource.php b/app/Domains/Sources/EmailBoxSource.php index e8ea202c..02ac8478 100644 --- a/app/Domains/Sources/EmailBoxSource.php +++ b/app/Domains/Sources/EmailBoxSource.php @@ -5,8 +5,6 @@ use App\Domains\EmailParser\CredentialsDto; use App\Models\Source; use Facades\App\Domains\EmailParser\EmailClient; -use Facades\App\Domains\Transformers\EmailTransformer; -use Illuminate\Support\Facades\Log; use Facades\App\Domains\Sources\EmailSource as EmailSourceFacade; class EmailBoxSource extends EmailSource diff --git a/app/Domains/Sources/FeedSource.php b/app/Domains/Sources/FeedSource.php index 98b54148..a9fe1636 100644 --- a/app/Domains/Sources/FeedSource.php +++ b/app/Domains/Sources/FeedSource.php @@ -37,6 +37,7 @@ public function handle(Source $source): void $jobs = []; foreach ($feedItems as $feedItem) { + $webResponseDto = WebResponseDto::from([ 'url' => $feedItem['link'], 'title' => $feedItem['title'], @@ -44,7 +45,9 @@ public function handle(Source $source): void 'meta_data' => $feedItem, 'profile' => [], ]); + $jobs[] = new GetWebContentJob($source, $webResponseDto); + } Bus::batch($jobs) diff --git a/app/Domains/Sources/WebSearch/GetPage.php b/app/Domains/Sources/WebSearch/GetPage.php index 6b56c4d2..ba4b16b3 100644 --- a/app/Domains/Sources/WebSearch/GetPage.php +++ b/app/Domains/Sources/WebSearch/GetPage.php @@ -24,7 +24,7 @@ public static function make(Collection $collection): self return new static($collection); } - public function handle(string $url): string + public function handle(string $url, bool $parseHtml = true): string { $results = Browsershot::url($url) ->dismissDialogs() diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index e9c6d817..24cccd33 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -2,13 +2,18 @@ namespace App\Jobs; +use App\Domains\Chat\MetaDataDto; use App\Domains\Documents\StatusEnum; use App\Domains\Documents\TypesEnum; +use App\Domains\Messages\RoleEnum; +use App\Domains\Prompts\PromptMerge; use App\Domains\Sources\WebSearch\Response\WebResponseDto; +use App\Helpers\ChatHelperTrait; use App\Helpers\TextChunker; use App\Models\Document; use App\Models\DocumentChunk; use App\Models\Source; +use App\Models\SourceTask; use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Bus\Batch; use Illuminate\Bus\Batchable; @@ -21,12 +26,15 @@ use Illuminate\Support\Facades\Log; use Laravel\Pennant\Feature; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\LlmDriver\ToolsHelper; use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; class GetWebContentJob implements ShouldQueue { use Batchable, Dispatchable, InteractsWithQueue, Queueable, SerializesModels; + use ChatHelperTrait, ToolsHelper; + /** * Create a new job instance. */ @@ -48,95 +56,119 @@ public function handle(): void return; } - /** - * Document can reference a source - */ - $document = Document::updateOrCreate( - [ - 'source_id' => $this->source->id, - 'type' => TypesEnum::HTML, - 'subject' => $this->webResponseDto->title, - 'link' => $this->webResponseDto->url, - 'collection_id' => $this->source->collection_id, - ], - [ - 'status' => StatusEnum::Pending, - 'file_path' => $this->webResponseDto->url, - 'status_summary' => StatusEnum::Pending, - 'meta_data' => $this->webResponseDto->toArray(), - ] - ); + $this->source = $this->checkForChat($this->source); + + $key = md5($this->webResponseDto->url.$this->source->id); + + if (SourceTask::where('source_id', $this->source->id)->where('task_key', $key)->exists()) { + return; + } + + SourceTask::create([ + 'source_id' => $this->source->id, + 'task_key' => $key, + ]); Log::info("[LaraChain] GetWebContentJob - {$this->source->title} - URL: {$this->webResponseDto->url}"); - $html = GetPage::make($this->source->collection)->handle($this->webResponseDto->url); /** * @NOTE - * making them PDF for now - * I ran into "noise" issues - * of just a lot of script tags and stuff - * there is some code in the getPage for html - * that might be worth it later + * Sometimes the HTML is too big */ - if (Feature::active('html_to_pdf')) { - $document->update([ - 'type' => TypesEnum::PDF, - 'file_path' => md5($this->webResponseDto->url).'.pdf', - ]); + $htmlResults = GetPage::make($this->source->collection) + ->handle($this->webResponseDto->url, true); - Bus::batch([ - new ParsePdfFileJob($document), - ]) - ->name('Process PDF Document - '.$document->id) - ->finally(function (Batch $batch) { - //this is triggered in the PdfTransformer class - }) - ->allowFailures() - ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) - ->dispatch(); - } else { - $results = GetPage::parseHtml($html); + $prompt = PromptMerge::merge( + ['[CONTEXT]'], + [$htmlResults], + $this->source->getPrompt() + ); - $results = to_utf8($results); + $results = LlmDriverFacade::driver( + $this->source->getDriver() + )->completion($prompt); - $document->update([ - 'type' => TypesEnum::HTML, - 'file_path' => $this->webResponseDto->url, - 'original_content' => $results, + if ($this->ifNotActionRequired($results->content)) { + Log::info('[LaraChain] - Web Source Skipping', [ + 'prompt' => $prompt, ]); + } else { + $promptResults = $results->content; + $chat = $this->source->chat; + $userMessage = $chat->addInput( + message: $prompt, + role: RoleEnum::User, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $this->source->getDriver(), + 'source' => $this->source->title, + ]), + ); + + /** + * Document can reference a source + */ + $document = Document::updateOrCreate( + [ + 'source_id' => $this->source->id, + 'type' => TypesEnum::HTML, + 'subject' => $this->webResponseDto->title, + 'link' => $this->webResponseDto->url, + 'collection_id' => $this->source->collection_id, + ], + [ + 'status' => StatusEnum::Pending, + 'file_path' => $this->webResponseDto->url, + 'status_summary' => StatusEnum::Pending, + 'meta_data' => $this->webResponseDto->toArray(), + 'original_content' => $htmlResults, + ] + ); $page_number = 1; - $chunked_chunks = TextChunker::handle($results); + $chunked_chunks = TextChunker::handle($promptResults); - foreach ($chunked_chunks as $chunkSection => $chunkContent) { + $chunks = []; + foreach ($chunked_chunks as $chunkSection => $chunkContent) { $guid = md5($chunkContent); $DocumentChunk = DocumentChunk::updateOrCreate( [ 'document_id' => $document->id, - 'sort_order' => $page_number, - 'section_number' => $chunkSection, + 'guid' => $guid, ], [ - 'guid' => $guid, - 'content' => $chunkContent, + 'sort_order' => $page_number, + 'section_number' => $chunkSection, + 'content' => to_utf8($chunkContent), ] ); Log::info('[LaraChain] adding to new batch'); - $this->batch()->add([ - new VectorlizeDataJob($DocumentChunk), - new SummarizeDocumentJob($document), - new TagDocumentJob($document), - new DocumentProcessingCompleteJob($document), - ]); + $chunks[] = new VectorlizeDataJob($DocumentChunk); $page_number++; } - } + Bus::batch($chunks) + ->name("Chunking Document from Web - {$this->webResponseDto->url}") + ->allowFailures() + ->finally(function (Batch $batch) use ($document) { + Bus::batch([ + new SummarizeDocumentJob($document), + new TagDocumentJob($document), + new DocumentProcessingCompleteJob($document), + ]) + ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) + ->allowFailures() + ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->dispatch(); + }) + ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) + ->dispatch(); + } } } diff --git a/tests/Feature/Jobs/GetWebContentJobTest.php b/tests/Feature/Jobs/GetWebContentJobTest.php index a81d0c67..2a9e9120 100644 --- a/tests/Feature/Jobs/GetWebContentJobTest.php +++ b/tests/Feature/Jobs/GetWebContentJobTest.php @@ -9,6 +9,8 @@ use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Support\Facades\Bus; use Laravel\Pennant\Feature; +use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\LlmDriver\Responses\CompletionResponse; use Tests\TestCase; class GetWebContentJobTest extends TestCase @@ -37,7 +39,13 @@ public function test_job_html(): void GetPage::shouldReceive('make->handle')->once()->andReturn($html); - GetPage::makePartial(); + LlmDriverFacade::shouldReceive('driver->onQueue')->andReturn('default'); + + LlmDriverFacade::shouldReceive('driver->completion') + ->once() + ->andReturn(CompletionResponse::from([ + 'content' => get_fixture('test_block_of_text.txt', false), + ])); $this->assertDatabaseCount('documents', 0); $this->assertDatabaseCount('document_chunks', 0); @@ -45,42 +53,10 @@ public function test_job_html(): void $job->handle(); $this->assertDatabaseCount('documents', 1); - $this->assertDatabaseCount('document_chunks', 33); + $this->assertDatabaseCount('document_chunks', 17); $document = Document::first(); $this->assertEquals('Example', $document->subject); } - public function test_job_pdf(): void - { - Feature::define('html_to_pdf', function () { - return true; - }); - Bus::fake(); - - $source = Source::factory()->create(); - - $webResponseDto = WebResponseDto::from([ - 'url' => 'https://example.com', - 'title' => 'Example', - 'age' => '1 day', - 'description' => 'Example description', - 'meta_data' => ['key' => 'value'], - 'thumbnail' => 'https://example.com/thumbnail.jpg', - 'profile' => ['key' => 'value'], - ]); - - $content = fake()->sentences(900, true); - - GetPage::shouldReceive('make->handle') - ->once() - ->andReturn('foobar'); - - [$job, $batch] = (new GetWebContentJob($source, $webResponseDto))->withFakeBatch(); - - $job->handle(); - - Bus::assertBatchCount(1); - - } } diff --git a/tests/fixtures/test_block_of_text.txt b/tests/fixtures/test_block_of_text.txt new file mode 100644 index 00000000..d0a724aa --- /dev/null +++ b/tests/fixtures/test_block_of_text.txt @@ -0,0 +1,39 @@ +Sint aliqua voluptate ex fugiat quis tempor sit cupidatat labore in consequat. Dolore magna non esse sit. In deserunt occaecat incididunt non ea exercitation ad occaecat magna. Cillum in do eu et ad id elit reprehenderit aliquip elit anim amet voluptate. Consequat et aute aute aliqua voluptate consectetur adipisicing enim id est consectetur nisi occaecat. Sint duis esse cillum labore aute dolore minim consectetur. Elit anim id dolore qui elit ipsum dolore veniam dolor. Elit mollit deserunt cillum aute magna cillum nulla reprehenderit nulla cupidatat. + +Laboris ipsum id enim. Nisi fugiat et dolor commodo anim deserunt. Non officia ullamco enim velit fugiat do dolor adipisicing eiusmod do esse anim ea ut irure. Duis tempor eu ad velit eu commodo. Fugiat est magna consectetur nisi pariatur esse amet cupidatat nulla consequat reprehenderit aliquip ipsum mollit. Culpa commodo fugiat exercitation eiusmod nulla ipsum sunt enim esse labore. Ut proident non excepteur consequat. Occaecat veniam sint aliqua ad id non ad incididunt est eu sunt aute ipsum. + +Dolore aute sunt ipsum amet id et aute veniam amet anim mollit fugiat veniam. In ut anim amet culpa duis qui id aliquip magna sit tempor ullamco irure in. Est non eiusmod Lorem labore elit in proident esse est fugiat voluptate fugiat. Dolor nulla aliqua eu quis do adipisicing excepteur nisi magna laborum nostrud proident sint irure. Ex proident non commodo aliquip excepteur ad minim excepteur. + +Laborum sint irure in pariatur adipisicing nulla voluptate labore ipsum dolore sunt laborum id mollit eu. Ea reprehenderit ipsum irure Lorem fugiat aute fugiat consequat incididunt. Cupidatat elit ea tempor elit duis aliqua commodo irure est ex sint adipisicing. Eiusmod Lorem excepteur Lorem ad ullamco. Amet est excepteur minim aliquip do est irure qui in ut aliqua laboris excepteur consectetur. Mollit deserunt nisi aliquip velit incididunt dolor nostrud. + +Nulla id laborum ex. Ipsum elit quis duis officia irure aliqua nisi cupidatat ex pariatur sunt. Elit labore laboris voluptate eiusmod consectetur aute adipisicing elit voluptate irure qui commodo non. Ea dolor ea aute tempor quis occaecat ut est minim anim cupidatat nostrud enim aliqua ut. Aliqua adipisicing nulla tempor proident sit officia cupidatat. + +Velit sit non ea Lorem laboris fugiat dolor dolor incididunt ad occaecat ex. Eiusmod nisi ex reprehenderit id. Commodo officia irure commodo. Ipsum veniam voluptate dolore. Consectetur enim tempor fugiat adipisicing. Cillum dolor ea aliquip enim cupidatat nostrud. Ut enim eiusmod elit dolor do Lorem magna ex do. + +Labore et nisi officia excepteur mollit. Deserunt sint qui qui. Nostrud ut et fugiat esse eu ex deserunt minim dolore aliqua laboris. Laboris adipisicing sit incididunt eiusmod. Minim reprehenderit duis enim cillum occaecat anim officia ex occaecat officia irure officia laboris proident nisi. Nisi aute sit aute sint in magna amet cillum officia excepteur nisi. + +Et occaecat labore laborum velit eu voluptate eu. Laboris ea culpa mollit amet elit non laboris. Tempor sit proident laboris commodo exercitation labore est esse culpa laboris fugiat reprehenderit excepteur veniam. Et voluptate ullamco voluptate consectetur anim elit aute. Proident aliqua nisi esse aute est. In amet ullamco minim duis reprehenderit ullamco sunt sit sit id. Ut consequat enim veniam mollit excepteur velit irure dolore. + +Ut sint elit duis. Pariatur consectetur cupidatat labore amet tempor irure do consequat minim reprehenderit laboris aliqua reprehenderit occaecat. Dolore consequat in pariatur exercitation dolor. Ad veniam nostrud Lorem qui in proident consectetur excepteur magna ea commodo do sunt proident. Voluptate quis ut labore excepteur amet nisi sunt ad voluptate eiusmod occaecat aute. Nisi magna deserunt elit ullamco. Amet in sint reprehenderit ad eu pariatur. Officia voluptate enim ipsum id incididunt. + +Adipisicing aute eiusmod sint dolor Lorem elit. Cillum culpa cupidatat sunt aliqua sunt. Nostrud mollit commodo adipisicing ea aute duis Lorem aute eu. Incididunt minim labore in labore ex nostrud elit aliquip labore dolor aute duis. Quis cillum fugiat dolore irure est incididunt velit amet enim laborum adipisicing. Voluptate sunt nulla sit amet proident quis qui aliquip occaecat ex. Laborum excepteur veniam cupidatat. Mollit qui commodo nulla deserunt sit laboris laboris. + +Aliquip eu sunt esse incididunt non eiusmod reprehenderit commodo duis commodo elit dolor occaecat. Sint aliquip duis fugiat mollit mollit ad officia nostrud Lorem eu consequat excepteur do esse. Cupidatat cillum duis minim anim ullamco quis ex ex. Aute fugiat mollit veniam. + +Ut proident eiusmod aliquip amet. Sunt sunt esse id non. Ipsum dolore aliquip duis excepteur Lorem laboris ex et aliqua officia exercitation sunt id. Qui sint laborum eu non irure fugiat eiusmod ea id aliqua magna. Fugiat ut elit dolor sint anim veniam Lorem elit aliquip proident consequat proident officia. + +In nisi eu officia cupidatat et duis. Consectetur do aliqua sit occaecat esse ullamco est sit fugiat. Adipisicing quis nostrud mollit qui anim culpa. Aliqua ea in duis eiusmod. Laborum incididunt id proident nisi eiusmod aute magna do Lorem non ad. Eu Lorem excepteur cupidatat est irure occaecat elit veniam. Reprehenderit nostrud id anim ad nulla veniam mollit. Minim consequat enim aute pariatur. + +Cupidatat esse minim mollit veniam anim aliquip sit ex. Nisi laboris tempor est ut reprehenderit commodo mollit qui. Sunt labore duis occaecat reprehenderit commodo. Labore aliquip eiusmod id commodo ipsum non enim officia magna nostrud in consequat eiusmod. Dolor proident sunt esse consectetur duis qui anim ea velit mollit labore sint qui. Nulla culpa occaecat cupidatat. Sint ut commodo dolore dolore consequat consectetur occaecat commodo amet pariatur adipisicing ipsum pariatur id. Tempor ad sunt non est sunt. + +Dolore ipsum nostrud fugiat consectetur. Sunt et voluptate enim incididunt in consequat enim in irure id ullamco et tempor. Est sunt sit in. Labore non id eu laborum amet non exercitation dolore proident. + +Anim laborum exercitation veniam non adipisicing culpa do et. Esse cillum dolore et. Qui consectetur irure occaecat enim amet. Ullamco occaecat amet laboris adipisicing excepteur voluptate minim sint. Aliquip reprehenderit ex et sint. Cupidatat adipisicing anim fugiat aliquip. Voluptate id ut eu ut. Laborum sit quis enim aliquip labore nisi. + +Nulla deserunt pariatur enim ullamco voluptate eu do anim nisi voluptate Lorem. Aliqua aute aliqua eu Lorem pariatur elit tempor exercitation do exercitation nostrud officia cillum. Minim commodo consectetur dolor. Qui ea nulla aliquip id ex id duis tempor. Qui exercitation dolore commodo culpa deserunt ad officia fugiat culpa nulla minim consequat. Magna ullamco id deserunt. Est proident ut amet laborum consectetur est. + +Fugiat veniam ut proident do nulla incididunt laborum irure ad deserunt pariatur. Exercitation proident minim eiusmod excepteur tempor sint eiusmod aute. Velit incididunt cupidatat nostrud ut id velit ipsum cillum consequat ea. Labore eiusmod amet adipisicing aute laborum Lorem. Magna officia esse culpa mollit proident mollit dolore irure tempor ea consequat dolore cillum est. Proident laboris culpa reprehenderit officia nisi magna magna incididunt excepteur velit duis cupidatat. Consequat ex pariatur aliquip eiusmod cupidatat cupidatat laborum exercitation fugiat tempor. + +Culpa elit quis aliqua ea ut ad Lorem ea. Aliqua consequat commodo est quis exercitation aliqua. Tempor consectetur consequat dolore duis magna. Incididunt aliquip tempor pariatur mollit nisi ullamco elit nisi irure consequat nulla esse ea aute eiusmod. Sit officia eu enim ut cillum irure cupidatat in amet pariatur eiusmod est incididunt aliqua velit. Tempor ipsum ea do nostrud laborum esse velit incididunt. Deserunt et sint consectetur incididunt aliqua id elit eiusmod ea anim. + +Do ex veniam mollit occaecat reprehenderit mollit dolore amet velit nostrud in aute exercitation. Laborum occaecat veniam incididunt tempor exercitation id quis eu fugiat magna anim dolor est in. Veniam sunt fugiat sit irure. Dolor sit consequat culpa Lorem. From e7596c316e4d9b344f7a664cf42726fd98e80f62 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Mon, 22 Jul 2024 06:04:46 -0400 Subject: [PATCH 09/19] move Webpage source to the new pattern --- app/Domains/Sources/WebPageSource.php | 1 - app/Jobs/GetWebContentJob.php | 19 ++++++++- app/Jobs/WebPageSourceJob.php | 53 ++++++++++---------------- tests/Feature/WebPageSourceJobTest.php | 12 +----- 4 files changed, 39 insertions(+), 46 deletions(-) diff --git a/app/Domains/Sources/WebPageSource.php b/app/Domains/Sources/WebPageSource.php index 0c994575..835b8321 100644 --- a/app/Domains/Sources/WebPageSource.php +++ b/app/Domains/Sources/WebPageSource.php @@ -33,7 +33,6 @@ public function handle(Source $source): void foreach ($urls as $url) { $jobs[] = new WebPageSourceJob($source, $url); - } Bus::batch($jobs) diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index 24cccd33..d535226d 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -95,7 +95,7 @@ public function handle(): void } else { $promptResults = $results->content; $chat = $this->source->chat; - $userMessage = $chat->addInput( + $chat->addInput( message: $prompt, role: RoleEnum::User, show_in_thread: true, @@ -112,7 +112,7 @@ public function handle(): void [ 'source_id' => $this->source->id, 'type' => TypesEnum::HTML, - 'subject' => $this->webResponseDto->title, + 'subject' => to_utf8($this->webResponseDto->title), 'link' => $this->webResponseDto->url, 'collection_id' => $this->source->collection_id, ], @@ -169,6 +169,21 @@ public function handle(): void }) ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) ->dispatch(); + + + $assistantMessage = $chat->addInput( + message: $promptResults, + role: RoleEnum::Assistant, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $this->source->getDriver(), + 'source' => $this->source->title, + ]), + ); + + $this->savePromptHistory( + message: $assistantMessage, + prompt: $prompt); } } } diff --git a/app/Jobs/WebPageSourceJob.php b/app/Jobs/WebPageSourceJob.php index 67d18f19..8ad498b1 100644 --- a/app/Jobs/WebPageSourceJob.php +++ b/app/Jobs/WebPageSourceJob.php @@ -4,6 +4,7 @@ use App\Domains\Documents\StatusEnum; use App\Domains\Documents\TypesEnum; +use App\Domains\Sources\WebSearch\Response\WebResponseDto; use App\Models\Document; use App\Models\Source; use Facades\App\Domains\Sources\WebSearch\GetPage; @@ -14,6 +15,8 @@ use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Bus; +use LlmLaraHub\LlmDriver\LlmDriverFacade; class WebPageSourceJob implements ShouldQueue { @@ -42,41 +45,25 @@ public function handle(): void return; } - $jobs = []; - - $html = GetPage::make($this->source->collection)->handle($this->url); - - $html = GetPage::parseHtml($html); - - $html = to_utf8($html); - $title = sprintf('WebPageSource - source: %s', $this->url); - $parseTitle = str($html)->limit(50)->toString(); - - if (! empty($parseTitle)) { - $title = $parseTitle; - } - - $document = Document::updateOrCreate( - [ - 'source_id' => $this->source->id, - 'link' => $this->url, - 'collection_id' => $this->source->collection_id, - ], - [ - 'status' => StatusEnum::Pending, - 'type' => TypesEnum::HTML, - 'subject' => to_utf8($title), - 'file_path' => $this->url, - 'summary' => str($html)->limit(254)->toString(), - 'status_summary' => StatusEnum::Pending, - 'original_content' => $html, - 'meta_data' => $this->source->meta_data, - ] - ); - - $this->processDocument($document); + $webResponseDto = WebResponseDto::from([ + 'url' => $this->url, + 'title' => $title, + 'age' => now()->toString(), + 'description' => sprintf("From Source %s" ,$this->source->title), + 'meta_data' => [], + 'thumbnail' => null, + 'profile' => [], + ]); + + Bus::batch([ + new GetWebContentJob($this->source, $webResponseDto), + ]) + ->name("Getting Web content for Source - {$this->url}") + ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) + ->allowFailures() + ->dispatch(); } } diff --git a/tests/Feature/WebPageSourceJobTest.php b/tests/Feature/WebPageSourceJobTest.php index 4c11dd95..21f04e52 100644 --- a/tests/Feature/WebPageSourceJobTest.php +++ b/tests/Feature/WebPageSourceJobTest.php @@ -7,6 +7,7 @@ use App\Models\Source; use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Support\Facades\Bus; +use LlmLaraHub\LlmDriver\LlmDriverFacade; use Tests\TestCase; class WebPageSourceJobTest extends TestCase @@ -18,11 +19,7 @@ public function test_makes_documents_triggers_jobs(): void { Bus::fake(); - $html = get_fixture('test_medium_2.html', false); - - GetPage::shouldReceive('make->handle')->once()->andReturn($html); - - GetPage::makePartial(); + LlmDriverFacade::shouldReceive('driver->onQueue')->andReturn('default'); $source = Source::factory()->create([ 'slug' => 'test', @@ -36,11 +33,6 @@ public function test_makes_documents_triggers_jobs(): void [$job, $batch] = (new WebPageSourceJob($source, 'https://larallama.io/posts/numerous-ui-updates-prompt-template-improvements-and-more'))->withFakeBatch(); $job->handle(); - $this->assertDatabaseCount('documents', 1); - - $this->assertNotEmpty($source->documents->first()->summary); - $this->assertNotEmpty($source->documents->first()->original_content); - Bus::assertBatchCount(1); } } From 23726d61e88997e3e01bbb7777b46ae88eda71a2 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Mon, 22 Jul 2024 06:05:07 -0400 Subject: [PATCH 10/19] move Webpage source to the new pattern --- app/Jobs/GetWebContentJob.php | 11 ++++------- app/Jobs/WebPageSourceJob.php | 6 +----- tests/Feature/Jobs/GetWebContentJobTest.php | 1 - tests/Feature/WebPageSourceJobTest.php | 1 - 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index d535226d..bfe38da8 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -24,7 +24,6 @@ use Illuminate\Queue\SerializesModels; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; -use Laravel\Pennant\Feature; use LlmLaraHub\LlmDriver\LlmDriverFacade; use LlmLaraHub\LlmDriver\ToolsHelper; use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; @@ -32,7 +31,6 @@ class GetWebContentJob implements ShouldQueue { use Batchable, Dispatchable, InteractsWithQueue, Queueable, SerializesModels; - use ChatHelperTrait, ToolsHelper; /** @@ -96,10 +94,10 @@ public function handle(): void $promptResults = $results->content; $chat = $this->source->chat; $chat->addInput( - message: $prompt, - role: RoleEnum::User, - show_in_thread: true, - meta_data: MetaDataDto::from([ + message: $prompt, + role: RoleEnum::User, + show_in_thread: true, + meta_data: MetaDataDto::from([ 'driver' => $this->source->getDriver(), 'source' => $this->source->title, ]), @@ -170,7 +168,6 @@ public function handle(): void ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) ->dispatch(); - $assistantMessage = $chat->addInput( message: $promptResults, role: RoleEnum::Assistant, diff --git a/app/Jobs/WebPageSourceJob.php b/app/Jobs/WebPageSourceJob.php index 8ad498b1..0116d3ce 100644 --- a/app/Jobs/WebPageSourceJob.php +++ b/app/Jobs/WebPageSourceJob.php @@ -2,12 +2,8 @@ namespace App\Jobs; -use App\Domains\Documents\StatusEnum; -use App\Domains\Documents\TypesEnum; use App\Domains\Sources\WebSearch\Response\WebResponseDto; -use App\Models\Document; use App\Models\Source; -use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Bus\Batch; use Illuminate\Bus\Batchable; use Illuminate\Bus\Queueable; @@ -51,7 +47,7 @@ public function handle(): void 'url' => $this->url, 'title' => $title, 'age' => now()->toString(), - 'description' => sprintf("From Source %s" ,$this->source->title), + 'description' => sprintf('From Source %s', $this->source->title), 'meta_data' => [], 'thumbnail' => null, 'profile' => [], diff --git a/tests/Feature/Jobs/GetWebContentJobTest.php b/tests/Feature/Jobs/GetWebContentJobTest.php index 2a9e9120..331a288c 100644 --- a/tests/Feature/Jobs/GetWebContentJobTest.php +++ b/tests/Feature/Jobs/GetWebContentJobTest.php @@ -58,5 +58,4 @@ public function test_job_html(): void $this->assertEquals('Example', $document->subject); } - } diff --git a/tests/Feature/WebPageSourceJobTest.php b/tests/Feature/WebPageSourceJobTest.php index 21f04e52..534c646a 100644 --- a/tests/Feature/WebPageSourceJobTest.php +++ b/tests/Feature/WebPageSourceJobTest.php @@ -5,7 +5,6 @@ use App\Domains\Sources\SourceTypeEnum; use App\Jobs\WebPageSourceJob; use App\Models\Source; -use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Support\Facades\Bus; use LlmLaraHub\LlmDriver\LlmDriverFacade; use Tests\TestCase; From b86da759f744f72d1bc0bf42d3cbdb3057eef7c9 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Mon, 22 Jul 2024 07:39:24 -0400 Subject: [PATCH 11/19] ok the list feature is working now too from array --- app/Domains/Prompts/EventPagePrompt.php | 38 +++++ .../Sources/WebPageSourceController.php | 4 + app/Http/Resources/SourceEditResource.php | 1 + app/Http/Resources/SourceResource.php | 1 + app/Jobs/GetWebContentJob.php | 143 +++++++++++------- database/factories/SourceFactory.php | 1 + ...2024_07_22_112043_add_force_to_sources.php | 28 ++++ resources/js/Components/Templates.vue | 34 +++-- .../WebPageSource/Components/Resources.vue | 15 +- .../js/Pages/Sources/WebPageSource/Create.vue | 1 + .../js/Pages/Sources/WebPageSource/Edit.vue | 1 + tests/Feature/Jobs/GetWebContentJobTest.php | 43 +++++- 12 files changed, 244 insertions(+), 66 deletions(-) create mode 100644 app/Domains/Prompts/EventPagePrompt.php create mode 100644 database/migrations/2024_07_22_112043_add_force_to_sources.php diff --git a/app/Domains/Prompts/EventPagePrompt.php b/app/Domains/Prompts/EventPagePrompt.php new file mode 100644 index 00000000..183a1e50 --- /dev/null +++ b/app/Domains/Prompts/EventPagePrompt.php @@ -0,0 +1,38 @@ + +You are an assistant helping to get event data from a website. + + + +If the site has no data about events just return false. Else +return the Event Title, then Start Date, End Date, Location, Description, and any other relevant data. + + +On a non false response you will return the following: +Title: Event Title +Start Date: Start Date +End Date: End Date +Location: Location +Description: Description + + +** WEBSITE HTML IS BELOW THIS LINE ** + + +$context + +PROMPT; + } +} diff --git a/app/Http/Controllers/Sources/WebPageSourceController.php b/app/Http/Controllers/Sources/WebPageSourceController.php index cf2d8abf..62200315 100644 --- a/app/Http/Controllers/Sources/WebPageSourceController.php +++ b/app/Http/Controllers/Sources/WebPageSourceController.php @@ -2,6 +2,7 @@ namespace App\Http\Controllers\Sources; +use App\Domains\Prompts\EventPagePrompt; use App\Domains\Prompts\WebPagePrompt; use App\Domains\Sources\SourceTypeEnum; use App\Http\Controllers\BaseSourceController; @@ -28,6 +29,7 @@ protected function getValidationRules(): array 'title' => 'required|string', 'details' => 'required|string', 'active' => ['boolean', 'required'], + 'force' => ['boolean', 'nullable'], 'recurring' => ['string', 'required'], 'meta_data.urls' => ['required', 'string'], ]; @@ -40,6 +42,7 @@ protected function makeSource(array $validated, Collection $collection): void 'details' => $validated['details'], 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'force' => $validated['force'], 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, 'user_id' => $this->getUserId($collection), @@ -51,6 +54,7 @@ public function getPrompts(): array { return [ 'web_page' => WebPagePrompt::prompt('[CONTEXT]'), + 'event_data' => EventPagePrompt::prompt('[CONTEXT]'), ]; } } diff --git a/app/Http/Resources/SourceEditResource.php b/app/Http/Resources/SourceEditResource.php index 57f23bb6..289ec59f 100644 --- a/app/Http/Resources/SourceEditResource.php +++ b/app/Http/Resources/SourceEditResource.php @@ -24,6 +24,7 @@ public function toArray(Request $request): array 'collection_id' => $this->collection_id, 'details' => $this->details, 'active' => $this->active, + 'force' => $this->force, 'recurring' => $this->recurring->value, 'description' => $this->description, 'slug' => $this->slug, diff --git a/app/Http/Resources/SourceResource.php b/app/Http/Resources/SourceResource.php index d25699c6..bfe18d33 100644 --- a/app/Http/Resources/SourceResource.php +++ b/app/Http/Resources/SourceResource.php @@ -23,6 +23,7 @@ public function toArray(Request $request): array return [ 'id' => $this->id, 'title' => $this->title, + 'force' => $this->force ? 'Yes' : 'No', 'collection_id' => $this->collection_id, 'details' => $this->details, 'active' => $this->active ? 'Yes' : 'No', diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index bfe38da8..83f0b0c9 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -22,6 +22,7 @@ use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Arr; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\LlmDriverFacade; @@ -58,7 +59,10 @@ public function handle(): void $key = md5($this->webResponseDto->url.$this->source->id); - if (SourceTask::where('source_id', $this->source->id)->where('task_key', $key)->exists()) { + if ( + ! $this->source->force && + SourceTask::where('source_id', $this->source->id)->where('task_key', $key)->exists()) { + Log::info('[LaraChain] GetWebContentJob - Skipping - already ran'); return; } @@ -91,8 +95,10 @@ public function handle(): void 'prompt' => $prompt, ]); } else { - $promptResults = $results->content; + $promptResultsOriginal = $results->content; + $chat = $this->source->chat; + $chat->addInput( message: $prompt, role: RoleEnum::User, @@ -103,73 +109,100 @@ public function handle(): void ]), ); - /** - * Document can reference a source - */ - $document = Document::updateOrCreate( - [ - 'source_id' => $this->source->id, - 'type' => TypesEnum::HTML, - 'subject' => to_utf8($this->webResponseDto->title), - 'link' => $this->webResponseDto->url, - 'collection_id' => $this->source->collection_id, - ], - [ - 'status' => StatusEnum::Pending, - 'file_path' => $this->webResponseDto->url, - 'status_summary' => StatusEnum::Pending, - 'meta_data' => $this->webResponseDto->toArray(), - 'original_content' => $htmlResults, - ] - ); + $promptResults = json_decode($promptResultsOriginal, true); - $page_number = 1; + if(is_null($promptResults)) { + $promptResults = Arr::wrap($promptResultsOriginal); + } - $chunked_chunks = TextChunker::handle($promptResults); + /** + * @NOTE all the user to build array results + * Like Events from a webpage + */ + foreach($promptResults as $promptResultIndex => $promptResult) { - $chunks = []; + $promptResult = json_encode($promptResult); - foreach ($chunked_chunks as $chunkSection => $chunkContent) { - $guid = md5($chunkContent); + $title = sprintf('WebPageSource - item #%d source: %s', + $promptResultIndex + 1, + $this->webResponseDto->url); - $DocumentChunk = DocumentChunk::updateOrCreate( + /** + * Document can reference a source + */ + $document = Document::updateOrCreate( [ - 'document_id' => $document->id, - 'guid' => $guid, + 'source_id' => $this->source->id, + 'type' => TypesEnum::HTML, + 'subject' => to_utf8($title), + 'link' => $this->webResponseDto->url, + 'collection_id' => $this->source->collection_id, ], [ - 'sort_order' => $page_number, - 'section_number' => $chunkSection, - 'content' => to_utf8($chunkContent), + 'status' => StatusEnum::Pending, + 'file_path' => $this->webResponseDto->url, + 'status_summary' => StatusEnum::Pending, + 'meta_data' => $this->webResponseDto->toArray(), + 'original_content' => $promptResult, ] ); - Log::info('[LaraChain] adding to new batch'); - - $chunks[] = new VectorlizeDataJob($DocumentChunk); - - $page_number++; + $page_number = 1; + + $chunked_chunks = TextChunker::handle($promptResult); + + $chunks = []; + + foreach ($chunked_chunks as $chunkSection => $chunkContent) { + $guid = md5($chunkContent); + + $DocumentChunk = DocumentChunk::updateOrCreate( + [ + 'document_id' => $document->id, + 'guid' => $guid, + ], + [ + 'sort_order' => $page_number, + 'section_number' => $chunkSection, + 'content' => to_utf8($chunkContent), + ] + ); + + Log::info('[LaraChain] adding to new batch'); + + $chunks[] = new VectorlizeDataJob($DocumentChunk); + + $page_number++; + } + + Bus::batch($chunks) + ->name("Chunking Document from Web - {$this->webResponseDto->url}") + ->allowFailures() + ->finally(function (Batch $batch) use ($document) { + Bus::batch([ + [ + new SummarizeDocumentJob($document), + new TagDocumentJob($document), + new DocumentProcessingCompleteJob($document), + ] + ]) + ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) + ->allowFailures() + ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->dispatch(); + }) + ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) + ->dispatch(); } - Bus::batch($chunks) - ->name("Chunking Document from Web - {$this->webResponseDto->url}") - ->allowFailures() - ->finally(function (Batch $batch) use ($document) { - Bus::batch([ - new SummarizeDocumentJob($document), - new TagDocumentJob($document), - new DocumentProcessingCompleteJob($document), - ]) - ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) - ->allowFailures() - ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) - ->dispatch(); - }) - ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) - ->dispatch(); + /** + * @NOTE + * I could move this into the loop if it is not + * enough here + */ $assistantMessage = $chat->addInput( - message: $promptResults, + message: json_encode($promptResults), role: RoleEnum::Assistant, show_in_thread: true, meta_data: MetaDataDto::from([ @@ -181,6 +214,8 @@ public function handle(): void $this->savePromptHistory( message: $assistantMessage, prompt: $prompt); + + } } } diff --git a/database/factories/SourceFactory.php b/database/factories/SourceFactory.php index 91da122b..96b05b41 100644 --- a/database/factories/SourceFactory.php +++ b/database/factories/SourceFactory.php @@ -23,6 +23,7 @@ public function definition(): array return [ 'title' => $this->faker->name, 'slug' => fake()->word, + 'force' => false, 'collection_id' => Collection::factory(), 'user_id' => null, 'chat_id' => Chat::factory(), diff --git a/database/migrations/2024_07_22_112043_add_force_to_sources.php b/database/migrations/2024_07_22_112043_add_force_to_sources.php new file mode 100644 index 00000000..cc71ae3c --- /dev/null +++ b/database/migrations/2024_07_22_112043_add_force_to_sources.php @@ -0,0 +1,28 @@ +boolean('force')->default(false); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('sources', function (Blueprint $table) { + // + }); + } +}; diff --git a/resources/js/Components/Templates.vue b/resources/js/Components/Templates.vue index 96c98491..96332b5c 100644 --- a/resources/js/Components/Templates.vue +++ b/resources/js/Components/Templates.vue @@ -1,6 +1,7 @@ diff --git a/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue b/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue index c31b221f..11b1f58d 100644 --- a/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue @@ -10,7 +10,7 @@
@@ -32,7 +32,18 @@ https://docs.larallama.io/developing.html"
- +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
diff --git a/resources/js/Pages/Sources/WebPageSource/Create.vue b/resources/js/Pages/Sources/WebPageSource/Create.vue index 7f2a417c..0baaab78 100644 --- a/resources/js/Pages/Sources/WebPageSource/Create.vue +++ b/resources/js/Pages/Sources/WebPageSource/Create.vue @@ -31,6 +31,7 @@ const form = useForm({ title: '', details: '', recurring: 'not', + force: true, meta_data: { example: "bob@bobsburgers.com", }, diff --git a/resources/js/Pages/Sources/WebPageSource/Edit.vue b/resources/js/Pages/Sources/WebPageSource/Edit.vue index 56662621..2a28daa0 100644 --- a/resources/js/Pages/Sources/WebPageSource/Edit.vue +++ b/resources/js/Pages/Sources/WebPageSource/Edit.vue @@ -32,6 +32,7 @@ const form = useForm({ title: props.source.data.title, details: props.source.data.details, active: props.source.data.active, + force: props.source.data.force, recurring: props.source.data.recurring, meta_data: { urls: props.source.data.meta_data.urls diff --git a/tests/Feature/Jobs/GetWebContentJobTest.php b/tests/Feature/Jobs/GetWebContentJobTest.php index 331a288c..a04fa688 100644 --- a/tests/Feature/Jobs/GetWebContentJobTest.php +++ b/tests/Feature/Jobs/GetWebContentJobTest.php @@ -55,7 +55,48 @@ public function test_job_html(): void $this->assertDatabaseCount('documents', 1); $this->assertDatabaseCount('document_chunks', 17); $document = Document::first(); - $this->assertEquals('Example', $document->subject); + $this->assertStringContainsString('WebPageSource - item #1 source', $document->subject); + + } + + public function test_array(): void + { + + Bus::fake(); + + $source = Source::factory()->create(); + + $webResponseDto = WebResponseDto::from([ + 'url' => 'https://example.com', + 'title' => 'Example', + 'age' => '1 day', + 'description' => 'Example description', + 'meta_data' => ['key' => 'value'], + 'thumbnail' => 'https://example.com/thumbnail.jpg', + 'profile' => ['key' => 'value'], + ]); + + $html = get_fixture('test_medium_2.html', false); + + GetPage::shouldReceive('make->handle')->once()->andReturn($html); + + LlmDriverFacade::shouldReceive('driver->onQueue')->andReturn('default'); + + LlmDriverFacade::shouldReceive('driver->completion') + ->once() + ->andReturn(CompletionResponse::from([ + 'content' => "[{\"content\":\"Test 1\"},{\"content\":\"Test 2\"}]", + ])); + + $this->assertDatabaseCount('documents', 0); + $this->assertDatabaseCount('document_chunks', 0); + [$job, $batch] = (new GetWebContentJob($source, $webResponseDto))->withFakeBatch(); + + $job->handle(); + $this->assertDatabaseCount('documents', 2); + $this->assertDatabaseCount('document_chunks', 2); + $document = Document::first(); + $this->assertStringContainsString('WebPageSource - item #1 source', $document->subject); } } From 51d554a97b761e3c9ba08071a14afee9661c9279 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Mon, 22 Jul 2024 07:41:43 -0400 Subject: [PATCH 12/19] add new prompt template for event data --- app/Domains/Prompts/EventPagePrompt.php | 58 +++++++++++++++++-------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/app/Domains/Prompts/EventPagePrompt.php b/app/Domains/Prompts/EventPagePrompt.php index 183a1e50..37e14024 100644 --- a/app/Domains/Prompts/EventPagePrompt.php +++ b/app/Domains/Prompts/EventPagePrompt.php @@ -12,26 +12,50 @@ public static function prompt(string $context): string return << -You are an assistant helping to get event data from a website. +You are an AI assistant tasked with extracting event data from website content. + + +1. Analyze the provided website HTML content below the tag. +2. Look for information about events within the content. +3. If no event data is found, respond with a single word: false +4. If event data is found, extract the following information for each event: + - Event Title + - Start Date + - End Date + - Location + - Description + - Any other relevant data +5. Format the extracted data as a JSON array according to the specifications below. + + +If events are found, return a JSON array with the following structure: + +[ + { + "title": "Event Title", + "startDate": "Start Date", + "endDate": "End Date", + "location": "Location", + "description": "Description", + "additionalInfo": "Any other relevant data" + }, + { + "title": "Event Title", + "startDate": "Start Date", + "endDate": "End Date", + "location": "Location", + "description": "Description", + "additionalInfo": "Any other relevant data" + } +] + +If no events are found, return an empty JSON array: [] - - -If the site has no data about events just return false. Else -return the Event Title, then Start Date, End Date, Location, Description, and any other relevant data. - - -On a non false response you will return the following: -Title: Event Title -Start Date: Start Date -End Date: End Date -Location: Location -Description: Description - - -** WEBSITE HTML IS BELOW THIS LINE ** - $context + + +Respond only with the JSON array or 'false' if no events are found. Do not include any explanations or additional text in your response. PROMPT; } From f31c435e303ec6ccded5adfa569304d0106d7839 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Mon, 22 Jul 2024 07:42:11 -0400 Subject: [PATCH 13/19] add new prompt template for event data --- app/Jobs/GetWebContentJob.php | 11 +++++------ tests/Feature/Jobs/GetWebContentJobTest.php | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index 83f0b0c9..62f08c3a 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -63,6 +63,7 @@ public function handle(): void ! $this->source->force && SourceTask::where('source_id', $this->source->id)->where('task_key', $key)->exists()) { Log::info('[LaraChain] GetWebContentJob - Skipping - already ran'); + return; } @@ -111,15 +112,15 @@ public function handle(): void $promptResults = json_decode($promptResultsOriginal, true); - if(is_null($promptResults)) { - $promptResults = Arr::wrap($promptResultsOriginal); + if (is_null($promptResults)) { + $promptResults = Arr::wrap($promptResultsOriginal); } /** * @NOTE all the user to build array results * Like Events from a webpage */ - foreach($promptResults as $promptResultIndex => $promptResult) { + foreach ($promptResults as $promptResultIndex => $promptResult) { $promptResult = json_encode($promptResult); @@ -184,7 +185,7 @@ public function handle(): void new SummarizeDocumentJob($document), new TagDocumentJob($document), new DocumentProcessingCompleteJob($document), - ] + ], ]) ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) ->allowFailures() @@ -195,7 +196,6 @@ public function handle(): void ->dispatch(); } - /** * @NOTE * I could move this into the loop if it is not @@ -215,7 +215,6 @@ public function handle(): void message: $assistantMessage, prompt: $prompt); - } } } diff --git a/tests/Feature/Jobs/GetWebContentJobTest.php b/tests/Feature/Jobs/GetWebContentJobTest.php index a04fa688..f65d88c0 100644 --- a/tests/Feature/Jobs/GetWebContentJobTest.php +++ b/tests/Feature/Jobs/GetWebContentJobTest.php @@ -85,7 +85,7 @@ public function test_array(): void LlmDriverFacade::shouldReceive('driver->completion') ->once() ->andReturn(CompletionResponse::from([ - 'content' => "[{\"content\":\"Test 1\"},{\"content\":\"Test 2\"}]", + 'content' => '[{"content":"Test 1"},{"content":"Test 2"}]', ])); $this->assertDatabaseCount('documents', 0); From 92b99f77500a02b8ab874e588a65c0a2f624a0c4 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Mon, 22 Jul 2024 08:03:28 -0400 Subject: [PATCH 14/19] Ok web source now has array --- app/Domains/Sources/WebSearch/GetPage.php | 8 +- tests/fixtures/web_page_source_events.html | 6848 +++++++++++++++++ .../web_page_source_events_parsed.html | 1 + 3 files changed, 6856 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/web_page_source_events.html create mode 100644 tests/fixtures/web_page_source_events_parsed.html diff --git a/app/Domains/Sources/WebSearch/GetPage.php b/app/Domains/Sources/WebSearch/GetPage.php index ba4b16b3..372d3471 100644 --- a/app/Domains/Sources/WebSearch/GetPage.php +++ b/app/Domains/Sources/WebSearch/GetPage.php @@ -37,7 +37,13 @@ public function handle(string $url, bool $parseHtml = true): string Storage::disk('collections')->put($this->collection->id.'/'.$name, $results->pdf()); - return $results->bodyHtml(); + $body = $results->bodyHtml(); + + if($parseHtml) { + $body = $this->parseHtml($body); + } + + return $body; } public function parseHtml(string $html): string diff --git a/tests/fixtures/web_page_source_events.html b/tests/fixtures/web_page_source_events.html new file mode 100644 index 00000000..d5c0a795 --- /dev/null +++ b/tests/fixtures/web_page_source_events.html @@ -0,0 +1,6848 @@ + + + + + + + + + + + + + +Dallas Cowboys | Official Site of the Dallas Cowboys + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to main content + + + + +
+ + + + + + + +
+ + + + + + + + + + +
+ + + + + + + + + + + +
+ +
+
+
+ + +
+ Advertising +
+ +
+
+ +
+ + + + +

Dallas Cowboys Schedule

+
+
+ + +
+
+ +

+ + Dallas Cowboys Schedule + +

+ + +
+ + Presented by + + +
+
+
+
+ + +
+
+ + + + + +
+
+ + + + +
+ +
+
+
+ + + + +
+
+ + + + +
+ + + + +
+ + + + + +
+ + +
+ ScheduleHeader_2460x1440 +
+ +
+ +
+ +
+
+ + + + + +
+
+
+ +
+
+
+
+ +
+
+ +
+
+ + +
+ + + + + + +
+
+
+ + + + +
+
+ + + + +
+ + + +
+ + + + +
+
+
+
+ +
+
+ + + +
+

Calendar event(s) copied!

+

1. Access your calendar

+

2. Add url to calendar and subscribe

+

3. Ensure that newly added Cowboys's calendar is synced to your account

+
+
+
+
+ + +
+
+
+
+

Pick your method to subscribe to the calendar

+ +
+
+ + + +
+
+
+ 1. Automatically Download to System Default Mail App (Recommended) +
+
+ + + + +
+
+ 2. Connect to Calendar Provider of Choice (Manual) +
+
+
+ +
+
+
+ + + +
+

Calendar event(s) copied!

+

1. Access your calendar

+

2. Add url to calendar and subscribe

+

3. Ensure that newly added Cowboys's calendar is synced to your account

+
+
+
+
+
+
+ + + + + +
+
+
+ + + + +
+ +
+ + + + +
+
+ + + + +
+ + + + + + +
+
+ + +
+
+ +

+ + PRESEASON + +

+ + +
+
+ +
+ + + +
+
+

+ + + WEEK 1 + + · Sun 08/11 + · 3:30 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Los Angeles Rams +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS 11 + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + SoFi Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 2 + + · Sat 08/17 + · 9:00 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Las Vegas Raiders +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS 11 + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Allegiant Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 3 + + · Sat 08/24 + · 3:00 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ Los Angeles Chargers +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS 11 + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+ + +
+
+ +

+ + REGULAR SEASON + +

+ + +
+
+ +
+ + + +
+
+

+ + + WEEK 1 + + · Sun 09/08 + · 3:25 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Cleveland Browns +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Cleveland Browns Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 2 + + · Sun 09/15 + · 12:00 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ New Orleans Saints +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+
+

+ + + WEEK 3 + + · Sun 09/22 + · 3:25 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ Baltimore Ravens +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 4 + + · Thu 09/26 + · 7:15 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ New York Giants +

+
+
+
+
+ + +
+ + + + + + + + + + + + Prime Video + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + MetLife Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 5 + + · Sun 10/06 + · 7:20 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Pittsburgh Steelers +

+
+
+
+
+ + +
+ + + + + + + + + + + + NBC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Acrisure Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 6 + + · Sun 10/13 + · 3:25 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ Detroit Lions +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + +
+
+

+ + WEEK 7 + +

+
+
+

+ BYE +

+
+
+
+ + + +
+
+

+ + + WEEK 8 + + · Sun 10/27 + · 7:20 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ San Francisco 49ers +

+
+
+
+
+ + +
+ + + + + + + + + + + + NBC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Levi's® Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 9 + + · Sun 11/03 + · 12:00 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Atlanta Falcons +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Mercedes-Benz Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 10 + + · Sun 11/10 + · 3:25 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Philadelphia Eagles +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+
+

+ + + WEEK 11 + + · Mon 11/18 + · 7:15 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Houston Texans +

+
+
+
+
+ + +
+ + + + + + + + + + + + ESPN + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 12 + + · Sun 11/24 + · 12:00 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Washington Commanders +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Commanders Field + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 13 + + · Thu 11/28 + · 3:30 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ New York Giants +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 14 + + · Mon 12/09 + · 7:15 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Cincinnati Bengals +

+
+
+
+
+ + +
+ + + + + + + + + + + + ESPN • ABC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+
+

+ + + WEEK 15 + + · Sun 12/15 + · 12:00 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Carolina Panthers +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Bank of America Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 16 + + · Sun 12/22 + · 7:20 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Tampa Bay Buccaneers +

+
+
+
+
+ + +
+ + + + + + + + + + + + NBC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 17 + + · Sun 12/29 + · 3:25 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Philadelphia Eagles +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Lincoln Financial Field + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 18 + + · TBD +

+
+
+
+ + + + +
+

+ +

+

+ Washington Commanders +

+
+
+
+
+ + +
+ + + + + + + + + + + + TBD + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+ + + + + + + + +
+
+
+
+ + +
+

Please be aware that there are certain games that are subject to flexible scheduling and the date and time of those games may be changed from what is currently reflected on the schedule and what may appear on the ticket. For more detailed information about NFL flexible scheduling procedures for the 2024 NFL Season, please visit https://www.nfl.com/schedules/flexible-scheduling-procedures.

+ +
+
+
+
+ +
+ +
+
+
+ + + + +
+
+ + +
+
+ +

+ + Network Information + +

+ + +
+
+ + + +
+
+ + + + + + + +
+ Advertising +
+
+
+
+
+ + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/fixtures/web_page_source_events_parsed.html b/tests/fixtures/web_page_source_events_parsed.html new file mode 100644 index 00000000..4385e5fe --- /dev/null +++ b/tests/fixtures/web_page_source_events_parsed.html @@ -0,0 +1 @@ +Dallas Cowboys | Official Site of the Dallas Cowboys OneTrust Cookies Consent Notice start OneTrust Cookies Consent Notice end IAB CCPA script start IAB CCPA script end Skip to main content Interstitial OOP SLOT Background-Skin SLOT [if IE 9]> Date: Mon, 22 Jul 2024 08:32:30 -0400 Subject: [PATCH 15/19] fix test --- app/Domains/Sources/WebSearch/GetPage.php | 2 +- app/Http/Controllers/Sources/WebPageSourceController.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/Domains/Sources/WebSearch/GetPage.php b/app/Domains/Sources/WebSearch/GetPage.php index 372d3471..4ab9c374 100644 --- a/app/Domains/Sources/WebSearch/GetPage.php +++ b/app/Domains/Sources/WebSearch/GetPage.php @@ -39,7 +39,7 @@ public function handle(string $url, bool $parseHtml = true): string $body = $results->bodyHtml(); - if($parseHtml) { + if ($parseHtml) { $body = $this->parseHtml($body); } diff --git a/app/Http/Controllers/Sources/WebPageSourceController.php b/app/Http/Controllers/Sources/WebPageSourceController.php index 62200315..546c2ed4 100644 --- a/app/Http/Controllers/Sources/WebPageSourceController.php +++ b/app/Http/Controllers/Sources/WebPageSourceController.php @@ -42,7 +42,7 @@ protected function makeSource(array $validated, Collection $collection): void 'details' => $validated['details'], 'recurring' => $validated['recurring'], 'active' => $validated['active'], - 'force' => $validated['force'], + 'force' => data_get($validated, 'force', false), 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, 'user_id' => $this->getUserId($collection), From 5ffd95560f4c26817cc207847993690643990c6b Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Tue, 23 Jul 2024 14:14:57 -0400 Subject: [PATCH 16/19] working on the webhooks interface --- .../Documents/Transformers/CSVTransformer.php | 1 - app/Domains/Sources/WebhookSource.php | 123 ++++++++++-------- app/Helpers/ChatHelperTrait.php | 47 +++++++ app/Jobs/GetWebContentJob.php | 27 +--- tests/Feature/WebhookSourceTest.php | 12 +- 5 files changed, 128 insertions(+), 82 deletions(-) diff --git a/app/Domains/Documents/Transformers/CSVTransformer.php b/app/Domains/Documents/Transformers/CSVTransformer.php index e9841fa9..c3a30e53 100644 --- a/app/Domains/Documents/Transformers/CSVTransformer.php +++ b/app/Domains/Documents/Transformers/CSVTransformer.php @@ -35,7 +35,6 @@ public function handle(Document $document): array $chunks = []; - /** * Going to turn into a document then chunks */ diff --git a/app/Domains/Sources/WebhookSource.php b/app/Domains/Sources/WebhookSource.php index 677ec136..8c687194 100644 --- a/app/Domains/Sources/WebhookSource.php +++ b/app/Domains/Sources/WebhookSource.php @@ -2,23 +2,30 @@ namespace App\Domains\Sources; +use App\Domains\Chat\MetaDataDto; use App\Domains\Documents\StatusEnum; use App\Domains\Documents\TypesEnum; +use App\Domains\Messages\RoleEnum; use App\Domains\Prompts\PromptMerge; +use App\Helpers\ChatHelperTrait; use App\Helpers\TextChunker; use App\Jobs\DocumentProcessingCompleteJob; +use App\Jobs\SummarizeDocumentJob; use App\Jobs\VectorlizeDataJob; use App\Models\Document; use App\Models\DocumentChunk; use App\Models\Source; +use App\Models\SourceTask; use Illuminate\Bus\Batch; use Illuminate\Support\Arr; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; class WebhookSource extends BaseSource { + public SourceTypeEnum $sourceTypeEnum = SourceTypeEnum::WebhookSource; protected array $payload = []; @@ -46,68 +53,72 @@ public function handle(Source $source): void 'payload' => $this->payload, ]); - $chunks = []; + $this->source = $this->checkForChat($source); + $payloadMd5 = md5(json_encode($this->payload, 128)); + $key = md5($payloadMd5.$this->source->id); + + if($this->skip($this->source, $key)) { + return; + } + $this->createSourceTask($this->source, $key); $encoded = json_encode($this->payload, 128); - $prompt = PromptMerge::merge([ - '[CONTEXT]', - ], [ - $encoded, - ], $source->details); + $prompt = PromptMerge::merge( + ['[CONTEXT]'], + [$encoded], + $this->source->getPrompt() + ); $results = LlmDriverFacade::driver( $source->getDriver() )->completion($prompt); - Log::info('[LaraChain] - WebhookSource Transformation Results', [ - 'results' => $results, - ]); - - $content = $results->content; + if ($this->ifNotActionRequired($results->content)) { + Log::info('[LaraChain] - Webhook Skipping', [ + 'prompt' => $prompt, + ]); + } else { + Log::info('[LaraChain] - WebhookSource Transformation Results', [ + 'results' => $results, + ]); - /** - * @TODO - * There is too big of an assumption here - * The user might just make this TEXT it is their - * prompt to do what they want - */ - $content = str($content) - ->replace('```json', '') - ->replaceLast('```', '') - ->toString(); + $promptResultsOriginal = $results->content; - try { + $chat = $source->chat; - $results = $this->checkIfJsonOrJustText($results, $content); + $this->addUserMessage($chat, $promptResultsOriginal); - $page_number = 0; + $promptResults = $this->arrifyPromptResults($promptResultsOriginal); - foreach ($results as $index => $result) { - if (is_array($result)) { - $result = json_encode($result); - } + foreach ($promptResults as $promptResultIndex => $promptResult) { + $promptResult = json_encode($promptResult); - $id = $this->getIdFromPayload($result); + /** + * Could even do ONE more look at the data + * with the Source Prompt and LLM + */ + $title = sprintf('WebhookSource - item #%d source: %s', + $promptResultIndex + 1, md5($promptResult)); $document = Document::updateOrCreate([ 'type' => TypesEnum::WebHook, 'source_id' => $source->id, - 'subject' => 'Webhook: '.$id, + 'subject' => $title, + 'collection_id' => $source->collection_id, ], [ 'status' => StatusEnum::Pending, 'meta_data' => $this->payload, - 'collection_id' => $source->collection_id, 'status_summary' => StatusEnum::Pending, - 'summary' => $result, + 'summary' => $promptResult, + 'original_content' => $promptResult, ]); - $this->document = $document; + $page_number = 1; - $page_number = $page_number + 1; - $pageContent = $result; - $size = config('llmdriver.chunking.default_size'); - $chunked_chunks = TextChunker::handle($pageContent, $size); + $chunked_chunks = TextChunker::handle($promptResult); + + $chunks = []; foreach ($chunked_chunks as $chunkSection => $chunkContent) { $guid = md5($chunkContent); @@ -115,35 +126,43 @@ public function handle(Source $source): void $DocumentChunk = DocumentChunk::updateOrCreate( [ 'document_id' => $document->id, - 'sort_order' => $page_number, - 'section_number' => $chunkSection, + 'guid' => $guid, ], [ - 'guid' => $guid, + 'sort_order' => $page_number, + 'section_number' => $chunkSection, 'content' => to_utf8($chunkContent), + 'original_content' => to_utf8($chunkContent), ] ); - $chunks[] = [ - new VectorlizeDataJob($DocumentChunk), - ]; + Log::info('[LaraLlama] WebhookSource adding to new batch'); + + $chunks[] = new VectorlizeDataJob($DocumentChunk); + + $page_number++; } Bus::batch($chunks) - ->name("Chunking Document from Webhook - {$this->document->id} {$this->document->file_path}") + ->name("Chunking Document from WebhookSource - {$this->source->id}") ->allowFailures() ->finally(function (Batch $batch) use ($document) { - DocumentProcessingCompleteJob::dispatch($document); + Bus::batch([ + [ + new SummarizeDocumentJob($document), + new TagDocumentJob($document), + new DocumentProcessingCompleteJob($document), + ], + ]) + ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) + ->allowFailures() + ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->dispatch(); }) - ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) ->dispatch(); - } - } catch (\Exception $e) { - Log::error('[LaraChain] - Error running WebhookSource Job Level', [ - 'error' => $e->getMessage(), - 'results' => $results, - ]); + } } diff --git a/app/Helpers/ChatHelperTrait.php b/app/Helpers/ChatHelperTrait.php index ff4f0bed..a2c840b9 100644 --- a/app/Helpers/ChatHelperTrait.php +++ b/app/Helpers/ChatHelperTrait.php @@ -2,9 +2,14 @@ namespace App\Helpers; +use App\Domains\Chat\MetaDataDto; +use App\Domains\Messages\RoleEnum; use App\Models\Chat; use App\Models\Collection; use App\Models\Source; +use App\Models\SourceTask; +use Illuminate\Support\Arr; +use Illuminate\Support\Facades\Log; trait ChatHelperTrait { @@ -49,4 +54,46 @@ public function ifNotActionRequired(string $results): bool return $results == 'false'; } + + public function skip(Source $source, string $key): bool + { + if(! $source->force && + SourceTask::where('source_id', $source->id)->where('task_key', $key)->exists()) { + Log::info('[LaraChain] GetWebContentJob - Skipping - already ran'); + return true; + } else { + return false; + } + } + + public function createSourceTask(Source $source, string $key): SourceTask + { + return SourceTask::create([ + 'source_id' => $this->source->id, + 'task_key' => $key, + ]); + } + + public function addUserMessage(Chat $chat, string $message): void + { + $chat->addInput( + message: $message, + role: RoleEnum::User, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $this->source->getDriver(), + 'source' => $this->source->title, + ]), + ); + } + + public function arrifyPromptResults(string $original) : array { + $promptResults = json_decode($original, true); + + if (is_null($promptResults)) { + $promptResults = Arr::wrap($original); + } + + return $promptResults; + } } diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index 62f08c3a..9cb89e3c 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -59,18 +59,11 @@ public function handle(): void $key = md5($this->webResponseDto->url.$this->source->id); - if ( - ! $this->source->force && - SourceTask::where('source_id', $this->source->id)->where('task_key', $key)->exists()) { - Log::info('[LaraChain] GetWebContentJob - Skipping - already ran'); - + if($this->skip($this->source, $key)) { return; } - SourceTask::create([ - 'source_id' => $this->source->id, - 'task_key' => $key, - ]); + $this->createSourceTask($this->source, $key); Log::info("[LaraChain] GetWebContentJob - {$this->source->title} - URL: {$this->webResponseDto->url}"); @@ -100,21 +93,9 @@ public function handle(): void $chat = $this->source->chat; - $chat->addInput( - message: $prompt, - role: RoleEnum::User, - show_in_thread: true, - meta_data: MetaDataDto::from([ - 'driver' => $this->source->getDriver(), - 'source' => $this->source->title, - ]), - ); + $this->addUserMessage($chat, $promptResultsOriginal); - $promptResults = json_decode($promptResultsOriginal, true); - - if (is_null($promptResults)) { - $promptResults = Arr::wrap($promptResultsOriginal); - } + $promptResults = $this->arrifyPromptResults($promptResultsOriginal); /** * @NOTE all the user to build array results diff --git a/tests/Feature/WebhookSourceTest.php b/tests/Feature/WebhookSourceTest.php index 0f51b394..e23a2092 100644 --- a/tests/Feature/WebhookSourceTest.php +++ b/tests/Feature/WebhookSourceTest.php @@ -75,10 +75,10 @@ public function test_prevent_duplicates_github() $payload = get_fixture('example_github.json'); LlmDriverFacade::shouldReceive('driver->onQueue') - ->times(4)->andReturn('default'); + ->times(2)->andReturn('default'); LlmDriverFacade::shouldReceive('driver->completion') - ->twice()->andReturn( + ->once()->andReturn( CompletionResponse::from([ 'content' => get_fixture('github_transformed.json', false), ]) @@ -97,7 +97,7 @@ public function test_prevent_duplicates_github() $this->assertDatabaseCount('documents', 2); $this->assertDatabaseCount('document_chunks', 2); - Bus::assertBatchCount(4); + Bus::assertBatchCount(2); } @@ -111,10 +111,10 @@ public function test_prevent_duplicates_statamic() $payload['content'] = $payload; LlmDriverFacade::shouldReceive('driver->onQueue') - ->times(2)->andReturn('default'); + ->once()->andReturn('default'); LlmDriverFacade::shouldReceive('driver->completion') - ->times(2)->andReturn( + ->times(1)->andReturn( CompletionResponse::from([ 'content' => 'Foo Bar', ]) @@ -133,7 +133,7 @@ public function test_prevent_duplicates_statamic() $this->assertDatabaseCount('documents', 1); $this->assertDatabaseCount('document_chunks', 1); - Bus::assertBatchCount(2); + Bus::assertBatchCount(1); } } From b859925a7ab4ee354f698872bfb29afeb689750c Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Tue, 23 Jul 2024 14:25:26 -0400 Subject: [PATCH 17/19] webhook working --- app/Domains/Sources/WebhookSource.php | 11 ++--------- app/Helpers/ChatHelperTrait.php | 16 +++++++++------- app/Jobs/GetWebContentJob.php | 10 +++------- app/Models/Source.php | 6 +----- 4 files changed, 15 insertions(+), 28 deletions(-) diff --git a/app/Domains/Sources/WebhookSource.php b/app/Domains/Sources/WebhookSource.php index 8c687194..680d9d49 100644 --- a/app/Domains/Sources/WebhookSource.php +++ b/app/Domains/Sources/WebhookSource.php @@ -2,12 +2,9 @@ namespace App\Domains\Sources; -use App\Domains\Chat\MetaDataDto; use App\Domains\Documents\StatusEnum; use App\Domains\Documents\TypesEnum; -use App\Domains\Messages\RoleEnum; use App\Domains\Prompts\PromptMerge; -use App\Helpers\ChatHelperTrait; use App\Helpers\TextChunker; use App\Jobs\DocumentProcessingCompleteJob; use App\Jobs\SummarizeDocumentJob; @@ -15,7 +12,6 @@ use App\Models\Document; use App\Models\DocumentChunk; use App\Models\Source; -use App\Models\SourceTask; use Illuminate\Bus\Batch; use Illuminate\Support\Arr; use Illuminate\Support\Facades\Bus; @@ -25,7 +21,6 @@ class WebhookSource extends BaseSource { - public SourceTypeEnum $sourceTypeEnum = SourceTypeEnum::WebhookSource; protected array $payload = []; @@ -57,7 +52,7 @@ public function handle(Source $source): void $payloadMd5 = md5(json_encode($this->payload, 128)); $key = md5($payloadMd5.$this->source->id); - if($this->skip($this->source, $key)) { + if ($this->skip($this->source, $key)) { return; } @@ -85,9 +80,7 @@ public function handle(Source $source): void $promptResultsOriginal = $results->content; - $chat = $source->chat; - - $this->addUserMessage($chat, $promptResultsOriginal); + $this->addUserMessage($source, $promptResultsOriginal); $promptResults = $this->arrifyPromptResults($promptResultsOriginal); diff --git a/app/Helpers/ChatHelperTrait.php b/app/Helpers/ChatHelperTrait.php index a2c840b9..aae45796 100644 --- a/app/Helpers/ChatHelperTrait.php +++ b/app/Helpers/ChatHelperTrait.php @@ -57,9 +57,10 @@ public function ifNotActionRequired(string $results): bool public function skip(Source $source, string $key): bool { - if(! $source->force && + if (! $source->force && SourceTask::where('source_id', $source->id)->where('task_key', $key)->exists()) { Log::info('[LaraChain] GetWebContentJob - Skipping - already ran'); + return true; } else { return false; @@ -69,25 +70,26 @@ public function skip(Source $source, string $key): bool public function createSourceTask(Source $source, string $key): SourceTask { return SourceTask::create([ - 'source_id' => $this->source->id, + 'source_id' => $source->id, 'task_key' => $key, ]); } - public function addUserMessage(Chat $chat, string $message): void + public function addUserMessage(Source $source, string $message): void { - $chat->addInput( + $source->refresh()->getChat()->addInput( message: $message, role: RoleEnum::User, show_in_thread: true, meta_data: MetaDataDto::from([ - 'driver' => $this->source->getDriver(), - 'source' => $this->source->title, + 'driver' => $source->getDriver(), + 'source' => $source->title, ]), ); } - public function arrifyPromptResults(string $original) : array { + public function arrifyPromptResults(string $original): array + { $promptResults = json_decode($original, true); if (is_null($promptResults)) { diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index 9cb89e3c..28d98de6 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -13,7 +13,6 @@ use App\Models\Document; use App\Models\DocumentChunk; use App\Models\Source; -use App\Models\SourceTask; use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Bus\Batch; use Illuminate\Bus\Batchable; @@ -22,7 +21,6 @@ use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; -use Illuminate\Support\Arr; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\LlmDriverFacade; @@ -59,7 +57,7 @@ public function handle(): void $key = md5($this->webResponseDto->url.$this->source->id); - if($this->skip($this->source, $key)) { + if ($this->skip($this->source, $key)) { return; } @@ -91,9 +89,7 @@ public function handle(): void } else { $promptResultsOriginal = $results->content; - $chat = $this->source->chat; - - $this->addUserMessage($chat, $promptResultsOriginal); + $this->addUserMessage($this->source, $promptResultsOriginal); $promptResults = $this->arrifyPromptResults($promptResultsOriginal); @@ -182,7 +178,7 @@ public function handle(): void * I could move this into the loop if it is not * enough here */ - $assistantMessage = $chat->addInput( + $assistantMessage = $this->source->getChat()->addInput( message: json_encode($promptResults), role: RoleEnum::Assistant, show_in_thread: true, diff --git a/app/Models/Source.php b/app/Models/Source.php index 400ccf39..996c464e 100644 --- a/app/Models/Source.php +++ b/app/Models/Source.php @@ -55,11 +55,7 @@ public function getChatable(): HasDrivers public function getChat(): ?Chat { - /** - * @TODO - * I need to come back to this - */ - return $this->collection->chats()->first(); + return $this->chat ?: $this->collection->chats()->first(); } public function getSummary(): string From 3c3f73a6c4eab31a4d07611d04e32aece4eaa231 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Tue, 23 Jul 2024 21:07:53 -0400 Subject: [PATCH 18/19] Wow! All sources now have the new feature to Prompt values out of the data. Also to return false if the data does not have what you want Lastly you can prompt out arrays to make more than one document out of that data. --- app/Domains/Sources/JsonSource.php | 28 ----- .../Sources/JsonSource/Components/Card.vue | 51 --------- .../JsonSource/Components/Resources.vue | 75 ------------- .../js/Pages/Sources/JsonSource/Create.vue | 96 ----------------- .../js/Pages/Sources/JsonSource/Edit.vue | 101 ------------------ tests/Feature/JsonSourceTest.php | 21 ---- 6 files changed, 372 deletions(-) delete mode 100644 app/Domains/Sources/JsonSource.php delete mode 100644 resources/js/Pages/Sources/JsonSource/Components/Card.vue delete mode 100644 resources/js/Pages/Sources/JsonSource/Components/Resources.vue delete mode 100644 resources/js/Pages/Sources/JsonSource/Create.vue delete mode 100644 resources/js/Pages/Sources/JsonSource/Edit.vue delete mode 100644 tests/Feature/JsonSourceTest.php diff --git a/app/Domains/Sources/JsonSource.php b/app/Domains/Sources/JsonSource.php deleted file mode 100644 index c62272f1..00000000 --- a/app/Domains/Sources/JsonSource.php +++ /dev/null @@ -1,28 +0,0 @@ - - -import {Link, useForm} from "@inertiajs/vue3"; -import {useToast} from "vue-toastification"; -import Settings from "@/Pages/Sources/Cards/Settings.vue"; -import Clipboard from "@/Components/Clipboard.vue"; -import {computed} from "vue"; -const toast = useToast(); - -const props = defineProps({ - source: Object -}) - -const form = useForm({}) - - - -const run = (source) => { - form.post(route('collections.sources.run', { - source: source.id - }), { - onStart: params => { - toast("Running"); - }, - preserveScroll: true, - onSuccess: () => { - toast.success('Source is running'); - } - }); -} - - - - - - diff --git a/resources/js/Pages/Sources/JsonSource/Components/Resources.vue b/resources/js/Pages/Sources/JsonSource/Components/Resources.vue deleted file mode 100644 index cc8c2509..00000000 --- a/resources/js/Pages/Sources/JsonSource/Components/Resources.vue +++ /dev/null @@ -1,75 +0,0 @@ - - - diff --git a/resources/js/Pages/Sources/JsonSource/Create.vue b/resources/js/Pages/Sources/JsonSource/Create.vue deleted file mode 100644 index 9ed0abda..00000000 --- a/resources/js/Pages/Sources/JsonSource/Create.vue +++ /dev/null @@ -1,96 +0,0 @@ - - - diff --git a/resources/js/Pages/Sources/JsonSource/Edit.vue b/resources/js/Pages/Sources/JsonSource/Edit.vue deleted file mode 100644 index c9358fa1..00000000 --- a/resources/js/Pages/Sources/JsonSource/Edit.vue +++ /dev/null @@ -1,101 +0,0 @@ - - - diff --git a/tests/Feature/JsonSourceTest.php b/tests/Feature/JsonSourceTest.php deleted file mode 100644 index 148b1c1d..00000000 --- a/tests/Feature/JsonSourceTest.php +++ /dev/null @@ -1,21 +0,0 @@ -markTestSkipped('@TODO not sure this class is needed yet or at all'); - $source = Source::factory()->create([ - 'slug' => 'test', - 'type' => SourceTypeEnum::JsonSource, - ]); - - $source->run(); - } -} From e75649fb8d95ade8df92cecc79f96e4b87e34963 Mon Sep 17 00:00:00 2001 From: Alfred Nutile Date: Tue, 23 Jul 2024 21:12:40 -0400 Subject: [PATCH 19/19] fix the npm fail --- app/Domains/Sources/BaseSource.php | 2 + app/Domains/Sources/EmailSource.php | 98 +++++++++---------- app/Domains/Sources/FeedSource.php | 17 ++-- app/Domains/Sources/GoogleSheetSource.php | 2 + app/Domains/Sources/SiteMapSource.php | 17 ++-- app/Http/Controllers/BaseSourceController.php | 2 + .../Sources/FeedSourceController.php | 2 + .../Sources/SiteMapSourceController.php | 2 + .../Sources/WebhookSourceController.php | 1 + .../FeedSource/Components/Resources.vue | 13 +++ .../js/Pages/Sources/FeedSource/Create.vue | 1 + .../js/Pages/Sources/FeedSource/Edit.vue | 1 + resources/js/Pages/Sources/Index.vue | 5 - .../SiteMapSource/Components/Resources.vue | 14 +++ .../js/Pages/Sources/SiteMapSource/Create.vue | 1 + .../js/Pages/Sources/SiteMapSource/Edit.vue | 1 + .../WebSource/Components/Resources.vue | 14 +++ .../js/Pages/Sources/WebSource/Create.vue | 1 + resources/js/Pages/Sources/WebSource/Edit.vue | 1 + .../WebhookSource/Components/Resources.vue | 14 +++ .../js/Pages/Sources/WebhookSource/Create.vue | 1 + .../js/Pages/Sources/WebhookSource/Edit.vue | 1 + tests/Feature/FeedSourceTest.php | 2 +- 23 files changed, 139 insertions(+), 74 deletions(-) diff --git a/app/Domains/Sources/BaseSource.php b/app/Domains/Sources/BaseSource.php index 98de2a8f..5beeb601 100644 --- a/app/Domains/Sources/BaseSource.php +++ b/app/Domains/Sources/BaseSource.php @@ -27,6 +27,8 @@ abstract class BaseSource public string $batchTitle = 'Chunking Source'; + public bool $promptPower = true; + public static string $description = 'Sources are ways we get data into the system. They are the core of the system.'; public ?Document $document = null; diff --git a/app/Domains/Sources/EmailSource.php b/app/Domains/Sources/EmailSource.php index 48c3d476..12bcf1fa 100644 --- a/app/Domains/Sources/EmailSource.php +++ b/app/Domains/Sources/EmailSource.php @@ -11,7 +11,6 @@ use App\Jobs\ChunkDocumentJob; use App\Models\Document; use App\Models\Source; -use App\Models\SourceTask; use Facades\App\Domains\EmailParser\Client; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; @@ -45,18 +44,17 @@ public function handle(Source $source): void return; } + $assistantMessage = null; + $this->source = $this->checkForChat($source); $key = md5($this->mailDto->date.$this->mailDto->from.$source->id); - if (SourceTask::where('source_id', $source->id)->where('task_key', $key)->exists()) { + if ($this->skip($this->source, $key)) { return; } - SourceTask::create([ - 'source_id' => $source->id, - 'task_key' => $key, - ]); + $this->createSourceTask($this->source, $key); $this->content = $this->mailDto->getContent(); @@ -74,8 +72,6 @@ public function handle(Source $source): void 'prompt' => $prompt, ]); - $chat = $source->chat; - $results = LlmDriverFacade::driver( $source->getDriver() )->completion($prompt); @@ -85,48 +81,52 @@ public function handle(Source $source): void 'prompt' => $prompt, ]); } else { + $this->addUserMessage($source, $prompt); + + $promptResultsOriginal = $results->content; + $promptResults = $this->arrifyPromptResults($promptResultsOriginal); + foreach ($promptResults as $promptResultIndex => $promptResult) { + $promptResult = json_encode($promptResult); + + $title = sprintf('Email Subject - item #%d -%s', + $promptResultIndex + 1, + $this->mailDto->subject); + + $document = Document::updateOrCreate([ + 'source_id' => $source->id, + 'type' => TypesEnum::Email, + 'subject' => $title, + 'collection_id' => $source->collection_id, + ], [ + 'summary' => $promptResult, + 'meta_data' => $this->mailDto->toArray(), + 'original_content' => $this->mailDto->body, + 'status_summary' => StatusEnum::Pending, + 'status' => StatusEnum::Pending, + ]); + + Bus::batch([new ChunkDocumentJob($document)]) + ->name("Processing Email {$this->mailDto->subject}") + ->allowFailures() + ->dispatch(); + + $assistantMessage = $source->getChat()->addInput( + message: $results->content, + role: RoleEnum::Assistant, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $source->getDriver(), + 'source' => $source->title, + ]), + ); + } + + if ($assistantMessage?->id) { + $this->savePromptHistory( + message: $assistantMessage, + prompt: $prompt); + } - $userMessage = $chat->addInput( - message: $prompt, - role: RoleEnum::User, - show_in_thread: true, - meta_data: MetaDataDto::from([ - 'driver' => $source->getDriver(), - 'source' => $source->title, - ]), - ); - - $document = Document::updateOrCreate([ - 'source_id' => $source->id, - 'type' => TypesEnum::Email, - 'subject' => $this->mailDto->subject, - 'collection_id' => $source->collection_id, - ], [ - 'summary' => $results->content, - 'meta_data' => $this->mailDto->toArray(), - 'original_content' => $this->mailDto->body, - 'status_summary' => StatusEnum::Pending, - 'status' => StatusEnum::Pending, - ]); - - Bus::batch([new ChunkDocumentJob($document)]) - ->name("Processing Email {$this->mailDto->subject}") - ->allowFailures() - ->dispatch(); - - $assistantMessage = $chat->addInput( - message: $results->content, - role: RoleEnum::Assistant, - show_in_thread: true, - meta_data: MetaDataDto::from([ - 'driver' => $source->getDriver(), - 'source' => $source->title, - ]), - ); - - $this->savePromptHistory( - message: $assistantMessage, - prompt: $prompt); } } diff --git a/app/Domains/Sources/FeedSource.php b/app/Domains/Sources/FeedSource.php index a9fe1636..48b24233 100644 --- a/app/Domains/Sources/FeedSource.php +++ b/app/Domains/Sources/FeedSource.php @@ -34,8 +34,6 @@ public function handle(Source $source): void $feedItems = $this->getFeedFromUrl($source->meta_data['feed_url']); - $jobs = []; - foreach ($feedItems as $feedItem) { $webResponseDto = WebResponseDto::from([ @@ -46,16 +44,15 @@ public function handle(Source $source): void 'profile' => [], ]); - $jobs[] = new GetWebContentJob($source, $webResponseDto); - + Bus::batch([ + new GetWebContentJob($source, $webResponseDto), + ]) + ->name("Getting Feed Data - {$source->title}") + ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) + ->allowFailures() + ->dispatch(); } - Bus::batch($jobs) - ->name("Getting Feed Data - {$source->title}") - ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) - ->allowFailures() - ->dispatch(); - $source->last_run = now(); $source->save(); diff --git a/app/Domains/Sources/GoogleSheetSource.php b/app/Domains/Sources/GoogleSheetSource.php index 3fdc331e..e14c3c95 100644 --- a/app/Domains/Sources/GoogleSheetSource.php +++ b/app/Domains/Sources/GoogleSheetSource.php @@ -17,6 +17,8 @@ class GoogleSheetSource extends BaseSource public static string $description = 'Add an URL that is Public Viewable and the system will keep an eye on it'; + public bool $promptPower = false; + /** * Here you can add content coming in from an API, * Email etc to documents. or you can React to the data coming in and for example diff --git a/app/Domains/Sources/SiteMapSource.php b/app/Domains/Sources/SiteMapSource.php index 6a76b7e6..1a0a0880 100644 --- a/app/Domains/Sources/SiteMapSource.php +++ b/app/Domains/Sources/SiteMapSource.php @@ -35,8 +35,6 @@ public function handle(Source $source): void */ $feedItems = SiteMapParserWrapper::handle($source->meta_data['feed_url'])->take(10); - $jobs = []; - foreach ($feedItems as $feedItem) { $webResponseDto = WebResponseDto::from([ 'url' => $feedItem->link, @@ -45,14 +43,15 @@ public function handle(Source $source): void 'meta_data' => $feedItem->toArray(), 'profile' => [], ]); - $jobs[] = new GetWebContentJob($source, $webResponseDto); - } - Bus::batch($jobs) - ->name("Getting Feed Data - {$source->title}") - ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) - ->allowFailures() - ->dispatch(); + Bus::batch([ + new GetWebContentJob($source, $webResponseDto), + ]) + ->name("Getting Sitemap site for Source - {$webResponseDto->url}") + ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) + ->allowFailures() + ->dispatch(); + } $source->last_run = now(); $source->save(); diff --git a/app/Http/Controllers/BaseSourceController.php b/app/Http/Controllers/BaseSourceController.php index 7cf64545..ba2b1953 100644 --- a/app/Http/Controllers/BaseSourceController.php +++ b/app/Http/Controllers/BaseSourceController.php @@ -63,6 +63,7 @@ protected function makeSource(array $validated, Collection $collection): void 'active' => $validated['active'], 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, + 'force' => data_get($validated, 'force', false), 'user_id' => $this->getUserId($collection), 'meta_data' => [ 'driver' => 'brave', @@ -150,6 +151,7 @@ protected function getValidationRules(): array 'title' => 'required|string', 'details' => 'required|string', 'active' => ['boolean', 'required'], + 'force' => ['nullable', 'boolean'], 'recurring' => ['string', 'required'], 'meta_data' => ['nullable', 'array'], 'secrets' => ['nullable', 'array'], diff --git a/app/Http/Controllers/Sources/FeedSourceController.php b/app/Http/Controllers/Sources/FeedSourceController.php index a19ba614..f0d8ccda 100644 --- a/app/Http/Controllers/Sources/FeedSourceController.php +++ b/app/Http/Controllers/Sources/FeedSourceController.php @@ -38,6 +38,7 @@ protected function getValidationRules(): array 'active' => ['boolean', 'required'], 'recurring' => ['string', 'required'], 'meta_data' => ['required', 'array'], + 'force' => ['nullable', 'boolean'], 'meta_data.feed_url' => ['required', 'string'], 'secrets' => ['nullable', 'array'], ]; @@ -52,6 +53,7 @@ protected function makeSource(array $validated, Collection $collection): void 'user_id' => $this->getUserId($collection), 'active' => $validated['active'], 'collection_id' => $collection->id, + 'force' => data_get($validated, 'force', false), 'type' => $this->sourceTypeEnum, 'meta_data' => $validated['meta_data'], ]); diff --git a/app/Http/Controllers/Sources/SiteMapSourceController.php b/app/Http/Controllers/Sources/SiteMapSourceController.php index 85f70d3d..f2acd29b 100644 --- a/app/Http/Controllers/Sources/SiteMapSourceController.php +++ b/app/Http/Controllers/Sources/SiteMapSourceController.php @@ -40,6 +40,7 @@ protected function getValidationRules(): array 'meta_data' => ['required', 'array'], 'meta_data.feed_url' => ['required', 'string'], 'secrets' => ['nullable', 'array'], + 'force' => ['nullable', 'boolean'], ]; } @@ -51,6 +52,7 @@ protected function makeSource(array $validated, Collection $collection): void 'user_id' => $this->getUserId($collection), 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'force' => data_get($validated, 'force', false), 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, 'meta_data' => $validated['meta_data'], diff --git a/app/Http/Controllers/Sources/WebhookSourceController.php b/app/Http/Controllers/Sources/WebhookSourceController.php index 1773911c..ce19e88f 100644 --- a/app/Http/Controllers/Sources/WebhookSourceController.php +++ b/app/Http/Controllers/Sources/WebhookSourceController.php @@ -34,6 +34,7 @@ protected function makeSource(array $validated, Collection $collection): void 'details' => $validated['details'], 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'force' => data_get($validated, 'force', false), 'user_id' => $this->getUserId($collection), 'collection_id' => $collection->id, 'slug' => str(Str::random(16))->toString(), diff --git a/resources/js/Pages/Sources/FeedSource/Components/Resources.vue b/resources/js/Pages/Sources/FeedSource/Components/Resources.vue index 41f9131e..2349f461 100644 --- a/resources/js/Pages/Sources/FeedSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/FeedSource/Components/Resources.vue @@ -16,6 +16,19 @@
+
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+

This is meta data

diff --git a/resources/js/Pages/Sources/FeedSource/Create.vue b/resources/js/Pages/Sources/FeedSource/Create.vue index 86d70d00..68b521a5 100644 --- a/resources/js/Pages/Sources/FeedSource/Create.vue +++ b/resources/js/Pages/Sources/FeedSource/Create.vue @@ -31,6 +31,7 @@ const form = useForm({ title: '', details: '', recurring: 'not', + force: false, meta_data: { feed_url: "https://larallama.io/feed", }, diff --git a/resources/js/Pages/Sources/FeedSource/Edit.vue b/resources/js/Pages/Sources/FeedSource/Edit.vue index db85e805..07b2324d 100644 --- a/resources/js/Pages/Sources/FeedSource/Edit.vue +++ b/resources/js/Pages/Sources/FeedSource/Edit.vue @@ -31,6 +31,7 @@ const props = defineProps({ const form = useForm({ title: props.source.data.title, details: props.source.data.details, + force: props.source.data.force, active: props.source.data.active, recurring: props.source.data.recurring, meta_data: { diff --git a/resources/js/Pages/Sources/Index.vue b/resources/js/Pages/Sources/Index.vue index e33c1a8b..7bdded7e 100644 --- a/resources/js/Pages/Sources/Index.vue +++ b/resources/js/Pages/Sources/Index.vue @@ -13,7 +13,6 @@ import Card from "@/Pages/Sources/Cards/Card.vue"; import EmailCard from "@/Pages/Sources/EmailSource/Components/Card.vue"; import EmailBoxCard from "@/Pages/Sources/EmailBoxSource/Components/Card.vue"; import WebhookSource from "@/Pages/Sources/WebhookSource/Components/Card.vue"; -import JsonSource from "@/Pages/Sources/JsonSource/Components/Card.vue"; import FeedSource from "@/Pages/Sources/FeedSource/Components/Card.vue"; import WebPageSource from "@/Pages/Sources/WebPageSource/Components/Card.vue"; import SiteMapSource from "@/Pages/Sources/SiteMapSource/Components/Card.vue"; @@ -63,10 +62,7 @@ const props = defineProps({ Sources are ways you can add data to your collection beyond uploading documents. You can add via a websearch, and soon email and calendar. - - -
@@ -79,7 +75,6 @@ const props = defineProps({ - diff --git a/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue b/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue index 56afa4e5..6a445628 100644 --- a/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue @@ -16,6 +16,20 @@
+ +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+

This is meta data

diff --git a/resources/js/Pages/Sources/SiteMapSource/Create.vue b/resources/js/Pages/Sources/SiteMapSource/Create.vue index f60a7675..d4e42ffb 100644 --- a/resources/js/Pages/Sources/SiteMapSource/Create.vue +++ b/resources/js/Pages/Sources/SiteMapSource/Create.vue @@ -30,6 +30,7 @@ const props = defineProps({ const form = useForm({ title: '', details: '', + force: false, recurring: 'not', meta_data: { feed_url: "https://larallama.io/sitemap.xml", diff --git a/resources/js/Pages/Sources/SiteMapSource/Edit.vue b/resources/js/Pages/Sources/SiteMapSource/Edit.vue index 56c0d1f5..f1e4431c 100644 --- a/resources/js/Pages/Sources/SiteMapSource/Edit.vue +++ b/resources/js/Pages/Sources/SiteMapSource/Edit.vue @@ -32,6 +32,7 @@ const form = useForm({ title: props.source.data.title, details: props.source.data.details, active: props.source.data.active, + force: props.source.data.force, recurring: props.source.data.recurring, meta_data: { feed_url: props.source.data.meta_data.feed_url diff --git a/resources/js/Pages/Sources/WebSource/Components/Resources.vue b/resources/js/Pages/Sources/WebSource/Components/Resources.vue index 834e8daf..a64f64f8 100644 --- a/resources/js/Pages/Sources/WebSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/WebSource/Components/Resources.vue @@ -14,6 +14,20 @@
+ +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+
diff --git a/resources/js/Pages/Sources/WebSource/Create.vue b/resources/js/Pages/Sources/WebSource/Create.vue index 308c2953..bd391ead 100644 --- a/resources/js/Pages/Sources/WebSource/Create.vue +++ b/resources/js/Pages/Sources/WebSource/Create.vue @@ -28,6 +28,7 @@ const props = defineProps({ const form = useForm({ title: '', details: '', + force: false, recurring: 'not', active: true }); diff --git a/resources/js/Pages/Sources/WebSource/Edit.vue b/resources/js/Pages/Sources/WebSource/Edit.vue index 981e827b..544cc750 100644 --- a/resources/js/Pages/Sources/WebSource/Edit.vue +++ b/resources/js/Pages/Sources/WebSource/Edit.vue @@ -35,6 +35,7 @@ const form = useForm({ title: props.source.data.title, details: props.source.data.details, active: props.source.data.active, + force: props.source.data.force, recurring: props.source.data.recurring }); diff --git a/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue b/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue index 80463b79..4662c934 100644 --- a/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue @@ -17,6 +17,20 @@
+ +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+
run(); - Bus::assertBatchCount(1); + Bus::assertBatchCount(17); } }