diff --git a/app/Domains/Chat/MetaDataDto.php b/app/Domains/Chat/MetaDataDto.php index da07d549..58bb6cd2 100644 --- a/app/Domains/Chat/MetaDataDto.php +++ b/app/Domains/Chat/MetaDataDto.php @@ -15,6 +15,7 @@ public function __construct( public mixed $date_range = '', public mixed $input = '', public mixed $driver = '', + public mixed $source = '', public mixed $reference_collection_id = '', ) { diff --git a/app/Domains/Documents/Transformers/CSVTransformer.php b/app/Domains/Documents/Transformers/CSVTransformer.php index c4fc10cf..c3a30e53 100644 --- a/app/Domains/Documents/Transformers/CSVTransformer.php +++ b/app/Domains/Documents/Transformers/CSVTransformer.php @@ -28,7 +28,6 @@ public function handle(Document $document): array $filePath = $this->document->pathToFile(); - //$filePath = null, string $disk = null, string $readerType = null $collection = (new DocumentsImport()) ->toCollection($filePath, null, $this->readerType); diff --git a/app/Domains/EmailParser/Client.php b/app/Domains/EmailParser/Client.php index 8c1be3fd..212047aa 100644 --- a/app/Domains/EmailParser/Client.php +++ b/app/Domains/EmailParser/Client.php @@ -18,7 +18,7 @@ class Client 'Drafts', ]; - public function handle(): void + public function handle(int $limit = 10): void { $mail = []; @@ -31,7 +31,7 @@ public function handle(): void $full_name = data_get($folder, 'full_name'); if (! in_array($full_name, $this->ignore)) { - $messages = $folder->messages()->all()->limit(10, 0)->get(); + $messages = $folder->messages()->all()->limit($limit, 0)->get(); logger('[LaraChain] - Email Count', [ 'count' => $messages->count(), @@ -40,6 +40,12 @@ public function handle(): void /** @var Message $message */ foreach ($messages as $message) { + //@NOTE the Seen flag made it too hard to + // then have different sources + // check the same email box. + // the Source will track repeats + //$flags = $message->getFlags(); + $messageDto = MailDto::from([ 'to' => $message->getTo()->toString(), 'from' => $message->getFrom()->toString(), @@ -63,7 +69,7 @@ public function handle(): void 'slug' => $slug, ]); $mail[] = new MailBoxParserJob($messageDto); - $message->delete(expunge: true); + $message->addFlag('Seen'); } else { \Illuminate\Support\Facades\Log::info('Did not find Source with Slug To', [ 'to' => $message->getTo()->toString(), diff --git a/app/Domains/EmailParser/EmailClient.php b/app/Domains/EmailParser/EmailClient.php index 8fc9b57f..450c2ed3 100644 --- a/app/Domains/EmailParser/EmailClient.php +++ b/app/Domains/EmailParser/EmailClient.php @@ -35,7 +35,9 @@ public function setConfig(array $config): self * @throws \Webklex\PHPIMAP\Exceptions\MessageNotFoundException * @throws \Webklex\PHPIMAP\Exceptions\RuntimeException */ - public function handle(CredentialsDto $credentials, bool $delete = true): array + public function handle(CredentialsDto $credentials, + bool $delete = false, + int $limit = 10): array { $mail = []; @@ -63,6 +65,7 @@ public function handle(CredentialsDto $credentials, bool $delete = true): array $client = EmailClientFacade::setConfig($config); try { + $client->connect(); Log::info('Connected to email box', [ @@ -88,7 +91,7 @@ public function handle(CredentialsDto $credentials, bool $delete = true): array 'folders_to_check' => $foldersToCheck, ]); - $messages = $folder->messages()->all()->get(); + $messages = $folder->messages()->all()->limit($limit, 0)->get(); Log::info('[LaraChain] - Email Box Count', [ 'count' => $messages->count(), @@ -97,30 +100,25 @@ public function handle(CredentialsDto $credentials, bool $delete = true): array /** @var Message $message */ foreach ($messages as $message) { - $flags = $message->getFlags(); - - if (! $flags->contains('Seen')) { - $messageDto = MailDto::from([ - 'to' => $message->getTo()->toString(), - 'from' => $message->getFrom()->toString(), - 'body' => $message->getTextBody(), - 'subject' => $message->getSubject(), - 'date' => $message->getDate()->toString(), - 'header' => $message->getHeader()->raw, - ]); - - $mail[] = $messageDto; - - if ($delete) { - $message->delete(expunge: true); - } else { - $message->addFlag('Seen'); - } - } else { - Log::info('[LaraChain] - Flag Seen', [ - 'flags' => $flags->toArray(), - ]); - } + //@NOTE the Seen flag made it too hard to + // then have different sources + // check the same email box. + // the Source will track repeats + //$flags = $message->getFlags(); + + $messageDto = MailDto::from([ + 'to' => $message->getTo()->toString(), + 'from' => $message->getFrom()->toString(), + 'body' => $message->getTextBody(), + 'subject' => $message->getSubject(), + 'date' => $message->getDate()->toString(), + 'header' => $message->getHeader()->raw, + 'email_message' => $message, + ]); + + $mail[] = $messageDto; + + $message->addFlag('Seen'); } } diff --git a/app/Domains/EmailParser/MailDto.php b/app/Domains/EmailParser/MailDto.php index db3b86b1..307f5634 100644 --- a/app/Domains/EmailParser/MailDto.php +++ b/app/Domains/EmailParser/MailDto.php @@ -3,6 +3,7 @@ namespace App\Domains\EmailParser; use Spatie\LaravelData\Data; +use Webklex\PHPIMAP\Message; class MailDto extends Data { @@ -12,7 +13,8 @@ public function __construct( public ?string $to, public ?string $body, public ?string $header, - public ?string $date + public ?string $date, + public ?Message $email_message = null ) { } diff --git a/app/Domains/Prompts/EventPagePrompt.php b/app/Domains/Prompts/EventPagePrompt.php new file mode 100644 index 00000000..37e14024 --- /dev/null +++ b/app/Domains/Prompts/EventPagePrompt.php @@ -0,0 +1,62 @@ + +You are an AI assistant tasked with extracting event data from website content. + + +1. Analyze the provided website HTML content below the tag. +2. Look for information about events within the content. +3. If no event data is found, respond with a single word: false +4. If event data is found, extract the following information for each event: + - Event Title + - Start Date + - End Date + - Location + - Description + - Any other relevant data +5. Format the extracted data as a JSON array according to the specifications below. + + +If events are found, return a JSON array with the following structure: + +[ + { + "title": "Event Title", + "startDate": "Start Date", + "endDate": "End Date", + "location": "Location", + "description": "Description", + "additionalInfo": "Any other relevant data" + }, + { + "title": "Event Title", + "startDate": "Start Date", + "endDate": "End Date", + "location": "Location", + "description": "Description", + "additionalInfo": "Any other relevant data" + } +] + +If no events are found, return an empty JSON array: [] + + +$context + + +Respond only with the JSON array or 'false' if no events are found. Do not include any explanations or additional text in your response. + +PROMPT; + } +} diff --git a/app/Domains/Prompts/SpecificTopic.php b/app/Domains/Prompts/SpecificTopic.php new file mode 100644 index 00000000..9b1b579c --- /dev/null +++ b/app/Domains/Prompts/SpecificTopic.php @@ -0,0 +1,37 @@ + +You are an email reading assistant who will follow the prompts to help parse my email box. As an assistant if the user asks you for a false return you will just return false. NOTHING MORE + + +If the email content passed in is about Web Application work the frame work then keep and and summarize it. Else if it is about anything else just return the word false and only the word false. Please IGNORE Spam emails or Subjects that are about web applications but then the body is SPAM + + +On a non false response, Summary and original message as Markdown. +On a false response just the word false, + + +I would like to hire you to build an awesome application for me with DailyAi +"You have an email from Teddy asking you to use DailAi to automate his business. + +I would like to sell you property in Alaska +False + + + +[CONTEXT] +PROMPT; + } +} diff --git a/app/Domains/Sources/BaseSource.php b/app/Domains/Sources/BaseSource.php index 0892bf1b..5beeb601 100644 --- a/app/Domains/Sources/BaseSource.php +++ b/app/Domains/Sources/BaseSource.php @@ -4,6 +4,7 @@ use App\Domains\Prompts\PromptMerge; use App\Domains\UnStructured\StructuredTypeEnum; +use App\Helpers\ChatHelperTrait; use App\Jobs\DocumentProcessingCompleteJob; use App\Jobs\SummarizeDocumentJob; use App\Jobs\VectorlizeDataJob; @@ -17,12 +18,17 @@ use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\LlmDriver\ToolsHelper; use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; abstract class BaseSource { + use ChatHelperTrait, ToolsHelper; + public string $batchTitle = 'Chunking Source'; + public bool $promptPower = true; + public static string $description = 'Sources are ways we get data into the system. They are the core of the system.'; public ?Document $document = null; @@ -215,4 +221,17 @@ protected function getEmailSummary(Document $document): string return $content; } + + public function getSourceFromSlug(string $slug): ?Source + { + $source = Source::where('type', $this->sourceTypeEnum) + ->slug($slug) + ->first(); + + if ($source) { + return $source; + } + + return null; + } } diff --git a/app/Domains/Sources/EmailBoxSource.php b/app/Domains/Sources/EmailBoxSource.php index 2511a6f3..02ac8478 100644 --- a/app/Domains/Sources/EmailBoxSource.php +++ b/app/Domains/Sources/EmailBoxSource.php @@ -5,8 +5,7 @@ use App\Domains\EmailParser\CredentialsDto; use App\Models\Source; use Facades\App\Domains\EmailParser\EmailClient; -use Facades\App\Domains\Transformers\EmailTransformer; -use Illuminate\Support\Facades\Log; +use Facades\App\Domains\Sources\EmailSource as EmailSourceFacade; class EmailBoxSource extends EmailSource { @@ -22,43 +21,7 @@ public function handle(Source $source): void $this->source = $source; foreach ($mails as $mailDto) { - $this->mailDto = $mailDto; - - $this->content = $this->mailDto->getContent(); - - $this->documentSubject = $this->mailDto->subject; - - $this->meta_data = $this->mailDto->toArray(); - - $this->transformers = $source->transformers; - - Log::info('[LaraChain] - Running Email Source'); - - try { - Log::info('Do something!'); - $baseSource = EmailTransformer::transform(baseSource: $this); - foreach ($source->transformers as $transformerChainLink) { - Log::info("[LaraChain] - Source has Transformers let's figure out which one to run", [ - 'type' => $transformerChainLink->type->name, - ]); - - $class = '\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - if (class_exists($class)) { - $facade = '\\Facades\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - $baseSource = $facade::transform($this); - } else { - Log::info('[LaraChain] - No Class found ', [ - 'class' => $class, - ]); - } - } - - $this->batchTransformedSource($baseSource, $source); - } catch (\Exception $e) { - Log::error('[LaraChain] - Error running Email Source', [ - 'error' => $e->getMessage(), - ]); - } + EmailSourceFacade::setMailDto($mailDto)->handle($source); } } diff --git a/app/Domains/Sources/EmailSource.php b/app/Domains/Sources/EmailSource.php index e6de92ca..12bcf1fa 100644 --- a/app/Domains/Sources/EmailSource.php +++ b/app/Domains/Sources/EmailSource.php @@ -2,13 +2,19 @@ namespace App\Domains\Sources; +use App\Domains\Chat\MetaDataDto; +use App\Domains\Documents\StatusEnum; +use App\Domains\Documents\TypesEnum; use App\Domains\EmailParser\MailDto; +use App\Domains\Messages\RoleEnum; +use App\Domains\Prompts\PromptMerge; +use App\Jobs\ChunkDocumentJob; use App\Models\Document; use App\Models\Source; -use App\Models\Transformer; use Facades\App\Domains\EmailParser\Client; -use Facades\App\Domains\Transformers\EmailTransformer; +use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; +use LlmLaraHub\LlmDriver\LlmDriverFacade; class EmailSource extends BaseSource { @@ -38,110 +44,90 @@ public function handle(Source $source): void return; } - $this->source = $source; + $assistantMessage = null; - $this->content = $this->mailDto->getContent(); - - $this->documentSubject = $this->mailDto->subject; + $this->source = $this->checkForChat($source); - $this->meta_data = $this->mailDto->toArray(); - - $this->transformers = $source->transformers; - - Log::info('[LaraChain] - Running Email Source'); - - /** - * @TODO - * I missed the point here. I just need to keep making tools work - * and the prompt the user gives in the Source - * let it do the work. - */ - try { - /** - * @TODO - * This turns the email into a document but what if the user wants to do something with the - * data in the email. Like Parse URLs or Recipe ideas etc - * The Prompt of the Source should drive all of this - */ - $baseSource = EmailTransformer::transform(baseSource: $this); - /** - * @NOTE - * Examples - * Example One: Maybe there is 1 transformer to make a reply to the email - * Transformer 1 of 1 ReplyTo Email - * Take the email - * Use Collection as voice - * Make reply to email - * The Transformer as an Output attached to it and the reply is sent. - * - * Example Two: CRM Transformer - * Take the email and make document (Type Email) and chunks from the email - * After that take the content and make who is it to, who is it from - * and make Documents for each for those of type Contact - * Relate those to the document (Type Email) - * and now there are relations for later use - * - * @TODO - * some transformers assume they are never 0 in the chain - * like CRM assumes the one before was EmailTransformer - * and the document is set - */ - Log::info("[LaraChain] - Source has Transformers let's figure out which one to run"); - - foreach ($source->transformers as $transformerChainLink) { - $class = '\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - if (class_exists($class)) { - $facade = '\\Facades\\App\\Domains\\Transformers\\'.$transformerChainLink->type->name; - $baseSource = $facade::transform($this); - } else { - Log::info('[LaraChain] - No Class found ', [ - 'class' => $class, - ]); - } - } - $this->batchTransformedSource($baseSource, $source); + $key = md5($this->mailDto->date.$this->mailDto->from.$source->id); - } catch (\Exception $e) { - Log::error('[LaraChain] - Error running Email Source', [ - 'error' => $e->getMessage(), - ]); + if ($this->skip($this->source, $key)) { + return; } - } + $this->createSourceTask($this->source, $key); - public function getSourceFromSlug(string $slug): ?Source - { - $source = Source::where('type', $this->sourceTypeEnum) - ->slug($slug) - ->first(); + $this->content = $this->mailDto->getContent(); - if ($source) { - return $source; - } + $this->documentSubject = $this->mailDto->subject; - return null; - } + $this->meta_data = $this->mailDto->toArray(); - protected function getSummarizeDocumentPrompt(): string - { - if (str($this->source->details)->contains('[CONTEXT]')) { - return $this->source->details; - } + $prompt = PromptMerge::merge( + ['[CONTEXT]'], + [$this->content], + $source->getPrompt() + ); - return <<<'PROMPT' + Log::info('[LaraChain] - Running Email Source', [ + 'prompt' => $prompt, + ]); -The following content is from an email. I would like you to summarize it with the following format. + $results = LlmDriverFacade::driver( + $source->getDriver() + )->completion($prompt); -To: **TO HERE** -From: **From Here** -Subject: **Subject Here** -Body: -**Summary Here** + if ($this->ifNotActionRequired($results->content)) { + Log::info('[LaraChain] - Email Source Skipping', [ + 'prompt' => $prompt, + ]); + } else { + $this->addUserMessage($source, $prompt); + + $promptResultsOriginal = $results->content; + $promptResults = $this->arrifyPromptResults($promptResultsOriginal); + foreach ($promptResults as $promptResultIndex => $promptResult) { + $promptResult = json_encode($promptResult); + + $title = sprintf('Email Subject - item #%d -%s', + $promptResultIndex + 1, + $this->mailDto->subject); + + $document = Document::updateOrCreate([ + 'source_id' => $source->id, + 'type' => TypesEnum::Email, + 'subject' => $title, + 'collection_id' => $source->collection_id, + ], [ + 'summary' => $promptResult, + 'meta_data' => $this->mailDto->toArray(), + 'original_content' => $this->mailDto->body, + 'status_summary' => StatusEnum::Pending, + 'status' => StatusEnum::Pending, + ]); + + Bus::batch([new ChunkDocumentJob($document)]) + ->name("Processing Email {$this->mailDto->subject}") + ->allowFailures() + ->dispatch(); + + $assistantMessage = $source->getChat()->addInput( + message: $results->content, + role: RoleEnum::Assistant, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $source->getDriver(), + 'source' => $source->title, + ]), + ); + } + if ($assistantMessage?->id) { + $this->savePromptHistory( + message: $assistantMessage, + prompt: $prompt); + } -** CONTEXT IS BELOW THIS LINE** -[CONTEXT] -PROMPT; + } } } diff --git a/app/Domains/Sources/FeedSource.php b/app/Domains/Sources/FeedSource.php index 98b54148..48b24233 100644 --- a/app/Domains/Sources/FeedSource.php +++ b/app/Domains/Sources/FeedSource.php @@ -34,9 +34,8 @@ public function handle(Source $source): void $feedItems = $this->getFeedFromUrl($source->meta_data['feed_url']); - $jobs = []; - foreach ($feedItems as $feedItem) { + $webResponseDto = WebResponseDto::from([ 'url' => $feedItem['link'], 'title' => $feedItem['title'], @@ -44,14 +43,15 @@ public function handle(Source $source): void 'meta_data' => $feedItem, 'profile' => [], ]); - $jobs[] = new GetWebContentJob($source, $webResponseDto); - } - Bus::batch($jobs) - ->name("Getting Feed Data - {$source->title}") - ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) - ->allowFailures() - ->dispatch(); + Bus::batch([ + new GetWebContentJob($source, $webResponseDto), + ]) + ->name("Getting Feed Data - {$source->title}") + ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) + ->allowFailures() + ->dispatch(); + } $source->last_run = now(); $source->save(); diff --git a/app/Domains/Sources/GoogleSheetSource.php b/app/Domains/Sources/GoogleSheetSource.php index 3fdc331e..e14c3c95 100644 --- a/app/Domains/Sources/GoogleSheetSource.php +++ b/app/Domains/Sources/GoogleSheetSource.php @@ -17,6 +17,8 @@ class GoogleSheetSource extends BaseSource public static string $description = 'Add an URL that is Public Viewable and the system will keep an eye on it'; + public bool $promptPower = false; + /** * Here you can add content coming in from an API, * Email etc to documents. or you can React to the data coming in and for example diff --git a/app/Domains/Sources/JsonSource.php b/app/Domains/Sources/JsonSource.php deleted file mode 100644 index c62272f1..00000000 --- a/app/Domains/Sources/JsonSource.php +++ /dev/null @@ -1,28 +0,0 @@ -meta_data['feed_url'])->take(10); - $jobs = []; - foreach ($feedItems as $feedItem) { $webResponseDto = WebResponseDto::from([ 'url' => $feedItem->link, @@ -45,14 +43,15 @@ public function handle(Source $source): void 'meta_data' => $feedItem->toArray(), 'profile' => [], ]); - $jobs[] = new GetWebContentJob($source, $webResponseDto); - } - Bus::batch($jobs) - ->name("Getting Feed Data - {$source->title}") - ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) - ->allowFailures() - ->dispatch(); + Bus::batch([ + new GetWebContentJob($source, $webResponseDto), + ]) + ->name("Getting Sitemap site for Source - {$webResponseDto->url}") + ->onQueue(LlmDriverFacade::driver($source->getDriver())->onQueue()) + ->allowFailures() + ->dispatch(); + } $source->last_run = now(); $source->save(); diff --git a/app/Domains/Sources/WebPageSource.php b/app/Domains/Sources/WebPageSource.php index 0c994575..835b8321 100644 --- a/app/Domains/Sources/WebPageSource.php +++ b/app/Domains/Sources/WebPageSource.php @@ -33,7 +33,6 @@ public function handle(Source $source): void foreach ($urls as $url) { $jobs[] = new WebPageSourceJob($source, $url); - } Bus::batch($jobs) diff --git a/app/Domains/Sources/WebSearch/GetPage.php b/app/Domains/Sources/WebSearch/GetPage.php index 6b56c4d2..4ab9c374 100644 --- a/app/Domains/Sources/WebSearch/GetPage.php +++ b/app/Domains/Sources/WebSearch/GetPage.php @@ -24,7 +24,7 @@ public static function make(Collection $collection): self return new static($collection); } - public function handle(string $url): string + public function handle(string $url, bool $parseHtml = true): string { $results = Browsershot::url($url) ->dismissDialogs() @@ -37,7 +37,13 @@ public function handle(string $url): string Storage::disk('collections')->put($this->collection->id.'/'.$name, $results->pdf()); - return $results->bodyHtml(); + $body = $results->bodyHtml(); + + if ($parseHtml) { + $body = $this->parseHtml($body); + } + + return $body; } public function parseHtml(string $html): string diff --git a/app/Domains/Sources/WebhookSource.php b/app/Domains/Sources/WebhookSource.php index 677ec136..680d9d49 100644 --- a/app/Domains/Sources/WebhookSource.php +++ b/app/Domains/Sources/WebhookSource.php @@ -7,6 +7,7 @@ use App\Domains\Prompts\PromptMerge; use App\Helpers\TextChunker; use App\Jobs\DocumentProcessingCompleteJob; +use App\Jobs\SummarizeDocumentJob; use App\Jobs\VectorlizeDataJob; use App\Models\Document; use App\Models\DocumentChunk; @@ -16,6 +17,7 @@ use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; class WebhookSource extends BaseSource { @@ -46,68 +48,70 @@ public function handle(Source $source): void 'payload' => $this->payload, ]); - $chunks = []; + $this->source = $this->checkForChat($source); + $payloadMd5 = md5(json_encode($this->payload, 128)); + $key = md5($payloadMd5.$this->source->id); + if ($this->skip($this->source, $key)) { + return; + } + + $this->createSourceTask($this->source, $key); $encoded = json_encode($this->payload, 128); - $prompt = PromptMerge::merge([ - '[CONTEXT]', - ], [ - $encoded, - ], $source->details); + $prompt = PromptMerge::merge( + ['[CONTEXT]'], + [$encoded], + $this->source->getPrompt() + ); $results = LlmDriverFacade::driver( $source->getDriver() )->completion($prompt); - Log::info('[LaraChain] - WebhookSource Transformation Results', [ - 'results' => $results, - ]); - - $content = $results->content; - - /** - * @TODO - * There is too big of an assumption here - * The user might just make this TEXT it is their - * prompt to do what they want - */ - $content = str($content) - ->replace('```json', '') - ->replaceLast('```', '') - ->toString(); + if ($this->ifNotActionRequired($results->content)) { + Log::info('[LaraChain] - Webhook Skipping', [ + 'prompt' => $prompt, + ]); + } else { + Log::info('[LaraChain] - WebhookSource Transformation Results', [ + 'results' => $results, + ]); - try { + $promptResultsOriginal = $results->content; - $results = $this->checkIfJsonOrJustText($results, $content); + $this->addUserMessage($source, $promptResultsOriginal); - $page_number = 0; + $promptResults = $this->arrifyPromptResults($promptResultsOriginal); - foreach ($results as $index => $result) { - if (is_array($result)) { - $result = json_encode($result); - } + foreach ($promptResults as $promptResultIndex => $promptResult) { + $promptResult = json_encode($promptResult); - $id = $this->getIdFromPayload($result); + /** + * Could even do ONE more look at the data + * with the Source Prompt and LLM + */ + $title = sprintf('WebhookSource - item #%d source: %s', + $promptResultIndex + 1, md5($promptResult)); $document = Document::updateOrCreate([ 'type' => TypesEnum::WebHook, 'source_id' => $source->id, - 'subject' => 'Webhook: '.$id, + 'subject' => $title, + 'collection_id' => $source->collection_id, ], [ 'status' => StatusEnum::Pending, 'meta_data' => $this->payload, - 'collection_id' => $source->collection_id, 'status_summary' => StatusEnum::Pending, - 'summary' => $result, + 'summary' => $promptResult, + 'original_content' => $promptResult, ]); - $this->document = $document; + $page_number = 1; - $page_number = $page_number + 1; - $pageContent = $result; - $size = config('llmdriver.chunking.default_size'); - $chunked_chunks = TextChunker::handle($pageContent, $size); + $chunked_chunks = TextChunker::handle($promptResult); + + $chunks = []; foreach ($chunked_chunks as $chunkSection => $chunkContent) { $guid = md5($chunkContent); @@ -115,35 +119,43 @@ public function handle(Source $source): void $DocumentChunk = DocumentChunk::updateOrCreate( [ 'document_id' => $document->id, - 'sort_order' => $page_number, - 'section_number' => $chunkSection, + 'guid' => $guid, ], [ - 'guid' => $guid, + 'sort_order' => $page_number, + 'section_number' => $chunkSection, 'content' => to_utf8($chunkContent), + 'original_content' => to_utf8($chunkContent), ] ); - $chunks[] = [ - new VectorlizeDataJob($DocumentChunk), - ]; + Log::info('[LaraLlama] WebhookSource adding to new batch'); + + $chunks[] = new VectorlizeDataJob($DocumentChunk); + + $page_number++; } Bus::batch($chunks) - ->name("Chunking Document from Webhook - {$this->document->id} {$this->document->file_path}") + ->name("Chunking Document from WebhookSource - {$this->source->id}") ->allowFailures() ->finally(function (Batch $batch) use ($document) { - DocumentProcessingCompleteJob::dispatch($document); + Bus::batch([ + [ + new SummarizeDocumentJob($document), + new TagDocumentJob($document), + new DocumentProcessingCompleteJob($document), + ], + ]) + ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) + ->allowFailures() + ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->dispatch(); }) - ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) ->dispatch(); - } - } catch (\Exception $e) { - Log::error('[LaraChain] - Error running WebhookSource Job Level', [ - 'error' => $e->getMessage(), - 'results' => $results, - ]); + } } diff --git a/app/Helpers/ChatHelperTrait.php b/app/Helpers/ChatHelperTrait.php new file mode 100644 index 00000000..aae45796 --- /dev/null +++ b/app/Helpers/ChatHelperTrait.php @@ -0,0 +1,101 @@ +chat_id) { + //@NOTE should I go to Source as the chatable? + $chat = Chat::create([ + 'chatable_id' => $source->collection_id, + 'chatable_type' => Collection::class, + 'user_id' => $source->collection->team?->user_id, + ]); + $source->update([ + 'chat_id' => $chat->id, + ]); + } + + return $source->refresh(); + } + + public function getUserId(Collection $collection): ?int + { + if (auth()->check()) { + return auth()->user()->id; + } + + return $collection->team?->user_id; + } + + public function ifNotActionRequired(string $results): bool + { + // @NOTE llms sometimes do not return the right + // string for example. + // false becomes false, "false" or "False" etc. + + $results = str($results) + ->trim() + ->lower() + ->remove('"') + ->remove("'") + ->toString(); + + return $results == 'false'; + } + + public function skip(Source $source, string $key): bool + { + if (! $source->force && + SourceTask::where('source_id', $source->id)->where('task_key', $key)->exists()) { + Log::info('[LaraChain] GetWebContentJob - Skipping - already ran'); + + return true; + } else { + return false; + } + } + + public function createSourceTask(Source $source, string $key): SourceTask + { + return SourceTask::create([ + 'source_id' => $source->id, + 'task_key' => $key, + ]); + } + + public function addUserMessage(Source $source, string $message): void + { + $source->refresh()->getChat()->addInput( + message: $message, + role: RoleEnum::User, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $source->getDriver(), + 'source' => $source->title, + ]), + ); + } + + public function arrifyPromptResults(string $original): array + { + $promptResults = json_decode($original, true); + + if (is_null($promptResults)) { + $promptResults = Arr::wrap($original); + } + + return $promptResults; + } +} diff --git a/app/Http/Controllers/AssistantEmailBoxSourceController.php b/app/Http/Controllers/AssistantEmailBoxSourceController.php index ed03ea87..53588faa 100644 --- a/app/Http/Controllers/AssistantEmailBoxSourceController.php +++ b/app/Http/Controllers/AssistantEmailBoxSourceController.php @@ -4,6 +4,7 @@ use App\Domains\Prompts\EmailToDocumentSummary; use App\Domains\Prompts\EmailToWebContent; +use App\Domains\Prompts\SpecificTopic; use App\Domains\Sources\SourceTypeEnum; use App\Models\Collection; use App\Models\Source; @@ -31,6 +32,7 @@ protected function makeSource(array $validated, Collection $collection): void 'recurring' => $validated['recurring'], 'active' => $validated['active'], 'collection_id' => $collection->id, + 'user_id' => $this->getUserId($collection), 'type' => $this->sourceTypeEnum, 'slug' => str(Str::random(12))->remove('+')->toString(), 'meta_data' => [], @@ -40,8 +42,8 @@ protected function makeSource(array $validated, Collection $collection): void public function getPrompts(): array { return [ + 'skip_emails_based_on_content' => SpecificTopic::prompt('[CONTEXT]'), 'summarize_email' => EmailToDocumentSummary::prompt('[CONTEXT]'), - 'get_web_page' => EmailToWebContent::prompt('[CONTEXT]'), ]; } diff --git a/app/Http/Controllers/BaseSourceController.php b/app/Http/Controllers/BaseSourceController.php index 84584537..dea44064 100644 --- a/app/Http/Controllers/BaseSourceController.php +++ b/app/Http/Controllers/BaseSourceController.php @@ -4,6 +4,7 @@ use App\Domains\Recurring\RecurringTypeEnum; use App\Domains\Sources\SourceTypeEnum; +use App\Helpers\ChatHelperTrait; use App\Http\Resources\CollectionResource; use App\Http\Resources\DocumentResource; use App\Http\Resources\FilterResource; @@ -16,6 +17,8 @@ class BaseSourceController extends Controller { + use ChatHelperTrait; + protected SourceTypeEnum $sourceTypeEnum = SourceTypeEnum::WebSearchSource; protected string $edit_path = 'Sources/WebSource/Edit'; @@ -60,6 +63,8 @@ protected function makeSource(array $validated, Collection $collection): void 'active' => $validated['active'], 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, + 'force' => data_get($validated, 'force', false), + 'user_id' => $this->getUserId($collection), 'meta_data' => [ 'driver' => 'brave', 'limit' => 5, @@ -146,6 +151,7 @@ protected function getValidationRules(): array 'title' => 'required|string', 'details' => 'required|string', 'active' => ['boolean', 'required'], + 'force' => ['nullable', 'boolean'], 'recurring' => ['string', 'required'], 'meta_data' => ['nullable', 'array'], 'secrets' => ['nullable', 'array'], diff --git a/app/Http/Controllers/Sources/EmailBoxSourceController.php b/app/Http/Controllers/Sources/EmailBoxSourceController.php index bab4cdad..b2c1212d 100644 --- a/app/Http/Controllers/Sources/EmailBoxSourceController.php +++ b/app/Http/Controllers/Sources/EmailBoxSourceController.php @@ -4,6 +4,7 @@ use App\Domains\Prompts\EmailToDocumentSummary; use App\Domains\Prompts\EmailToWebContent; +use App\Domains\Prompts\SpecificTopic; use App\Domains\Sources\SourceTypeEnum; use App\Http\Controllers\BaseSourceController; use App\Models\Collection; @@ -35,6 +36,7 @@ protected function makeSource(array $validated, Collection $collection): void 'details' => $validated['details'], 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'user_id' => $this->getUserId($collection), 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, 'slug' => str(Str::random(12))->remove('+')->toString(), @@ -83,6 +85,7 @@ protected function updateSource(Source $source, array $validated): void public function getPrompts(): array { return [ + 'skip_emails_based_on_content' => SpecificTopic::prompt('[CONTEXT]'), 'summarize_email' => EmailToDocumentSummary::prompt('[CONTEXT]'), 'get_web_page' => EmailToWebContent::prompt('[CONTEXT]'), ]; diff --git a/app/Http/Controllers/Sources/FeedSourceController.php b/app/Http/Controllers/Sources/FeedSourceController.php index 9dcdc4de..f0d8ccda 100644 --- a/app/Http/Controllers/Sources/FeedSourceController.php +++ b/app/Http/Controllers/Sources/FeedSourceController.php @@ -38,6 +38,7 @@ protected function getValidationRules(): array 'active' => ['boolean', 'required'], 'recurring' => ['string', 'required'], 'meta_data' => ['required', 'array'], + 'force' => ['nullable', 'boolean'], 'meta_data.feed_url' => ['required', 'string'], 'secrets' => ['nullable', 'array'], ]; @@ -49,8 +50,10 @@ protected function makeSource(array $validated, Collection $collection): void 'title' => $validated['title'], 'details' => $validated['details'], 'recurring' => $validated['recurring'], + 'user_id' => $this->getUserId($collection), 'active' => $validated['active'], 'collection_id' => $collection->id, + 'force' => data_get($validated, 'force', false), 'type' => $this->sourceTypeEnum, 'meta_data' => $validated['meta_data'], ]); diff --git a/app/Http/Controllers/Sources/GoogleSheetSourceController.php b/app/Http/Controllers/Sources/GoogleSheetSourceController.php index 13f4b48a..0cec5ef4 100644 --- a/app/Http/Controllers/Sources/GoogleSheetSourceController.php +++ b/app/Http/Controllers/Sources/GoogleSheetSourceController.php @@ -53,6 +53,7 @@ protected function makeSource(array $validated, Collection $collection): void 'recurring' => $validated['recurring'], 'active' => $validated['active'], 'collection_id' => $collection->id, + 'user_id' => $this->getUserId($collection), 'type' => $this->sourceTypeEnum, 'meta_data' => $validated['meta_data'], ]); diff --git a/app/Http/Controllers/Sources/JsonSourceController.php b/app/Http/Controllers/Sources/JsonSourceController.php index 0399d3bb..6c81950e 100644 --- a/app/Http/Controllers/Sources/JsonSourceController.php +++ b/app/Http/Controllers/Sources/JsonSourceController.php @@ -44,6 +44,7 @@ protected function makeSource(array $validated, Collection $collection): void 'recurring' => $validated['recurring'], 'active' => $validated['active'], 'collection_id' => $collection->id, + 'user_id' => $this->getUserId($collection), 'slug' => str(Str::random(16))->toString(), 'type' => $this->sourceTypeEnum, 'meta_data' => json_decode($validated['meta_data'], true, 512), diff --git a/app/Http/Controllers/Sources/SiteMapSourceController.php b/app/Http/Controllers/Sources/SiteMapSourceController.php index 638462ce..f2acd29b 100644 --- a/app/Http/Controllers/Sources/SiteMapSourceController.php +++ b/app/Http/Controllers/Sources/SiteMapSourceController.php @@ -40,6 +40,7 @@ protected function getValidationRules(): array 'meta_data' => ['required', 'array'], 'meta_data.feed_url' => ['required', 'string'], 'secrets' => ['nullable', 'array'], + 'force' => ['nullable', 'boolean'], ]; } @@ -48,8 +49,10 @@ protected function makeSource(array $validated, Collection $collection): void Source::create([ 'title' => $validated['title'], 'details' => $validated['details'], + 'user_id' => $this->getUserId($collection), 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'force' => data_get($validated, 'force', false), 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, 'meta_data' => $validated['meta_data'], diff --git a/app/Http/Controllers/Sources/WebPageSourceController.php b/app/Http/Controllers/Sources/WebPageSourceController.php index ccbb61a7..1866213c 100644 --- a/app/Http/Controllers/Sources/WebPageSourceController.php +++ b/app/Http/Controllers/Sources/WebPageSourceController.php @@ -2,6 +2,7 @@ namespace App\Http\Controllers\Sources; +use App\Domains\Prompts\EventPagePrompt; use App\Domains\Prompts\WebPagePrompt; use App\Domains\Sources\SourceTypeEnum; use App\Http\Controllers\BaseSourceController; @@ -40,10 +41,11 @@ protected function makeSource(array $validated, Collection $collection): void 'title' => $validated['title'], 'details' => $validated['details'], 'recurring' => $validated['recurring'], - 'force' => data_get($validated, 'force', false), 'active' => $validated['active'], + 'force' => data_get($validated, 'force', false), 'collection_id' => $collection->id, 'type' => $this->sourceTypeEnum, + 'user_id' => $this->getUserId($collection), 'meta_data' => $validated['meta_data'], ]); } @@ -52,6 +54,7 @@ public function getPrompts(): array { return [ 'web_page' => WebPagePrompt::prompt('[CONTEXT]'), + 'event_data' => EventPagePrompt::prompt('[CONTEXT]'), ]; } } diff --git a/app/Http/Controllers/Sources/WebhookSourceController.php b/app/Http/Controllers/Sources/WebhookSourceController.php index 6f91a623..ce19e88f 100644 --- a/app/Http/Controllers/Sources/WebhookSourceController.php +++ b/app/Http/Controllers/Sources/WebhookSourceController.php @@ -34,6 +34,8 @@ protected function makeSource(array $validated, Collection $collection): void 'details' => $validated['details'], 'recurring' => $validated['recurring'], 'active' => $validated['active'], + 'force' => data_get($validated, 'force', false), + 'user_id' => $this->getUserId($collection), 'collection_id' => $collection->id, 'slug' => str(Str::random(16))->toString(), 'type' => $this->sourceTypeEnum, diff --git a/app/Http/Resources/SourceResource.php b/app/Http/Resources/SourceResource.php index d25699c6..bfe18d33 100644 --- a/app/Http/Resources/SourceResource.php +++ b/app/Http/Resources/SourceResource.php @@ -23,6 +23,7 @@ public function toArray(Request $request): array return [ 'id' => $this->id, 'title' => $this->title, + 'force' => $this->force ? 'Yes' : 'No', 'collection_id' => $this->collection_id, 'details' => $this->details, 'active' => $this->active ? 'Yes' : 'No', diff --git a/app/Jobs/ChunkDocumentJob.php b/app/Jobs/ChunkDocumentJob.php new file mode 100644 index 00000000..5c130ae6 --- /dev/null +++ b/app/Jobs/ChunkDocumentJob.php @@ -0,0 +1,105 @@ +batch()->cancelled()) { + // Determine if the batch has been cancelled... + return; + } + + Log::info('[LaraChain] - Chunking Document', [ + 'document' => $this->document->id, + ]); + + $document = $this->document; + + $chunks = []; + + $page_number = 0; + + $pageContent = $this->document->original_content; + + $size = config('llmdriver.chunking.default_size'); + + $chunked_chunks = TextChunker::handle($pageContent, $size); + + foreach ($chunked_chunks as $chunkSection => $chunkContent) { + $guid = md5($chunkContent); + + $DocumentChunk = DocumentChunk::updateOrCreate( + [ + 'document_id' => $document->id, + 'sort_order' => $page_number, + 'section_number' => $chunkSection, + ], + [ + 'guid' => $guid, + 'content' => to_utf8($chunkContent), + ] + ); + + $chunks[] = [ + new VectorlizeDataJob($DocumentChunk), + ]; + } + + $name = sprintf('Chunking Document Type %s id %d ', $document->type->name, $document->id); + + Bus::batch($chunks) + ->name($name) + ->allowFailures() + ->finally(function (Batch $batch) use ($document) { + + Bus::batch([ + [ + new SummarizeDocumentJob($document), + new TagDocumentJob($document), + new DocumentProcessingCompleteJob($document), + ], + ]) + ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) + ->allowFailures() + ->dispatch(); + }) + ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->dispatch(); + } +} diff --git a/app/Jobs/GetWebContentJob.php b/app/Jobs/GetWebContentJob.php index e9c6d817..28d98de6 100644 --- a/app/Jobs/GetWebContentJob.php +++ b/app/Jobs/GetWebContentJob.php @@ -2,9 +2,13 @@ namespace App\Jobs; +use App\Domains\Chat\MetaDataDto; use App\Domains\Documents\StatusEnum; use App\Domains\Documents\TypesEnum; +use App\Domains\Messages\RoleEnum; +use App\Domains\Prompts\PromptMerge; use App\Domains\Sources\WebSearch\Response\WebResponseDto; +use App\Helpers\ChatHelperTrait; use App\Helpers\TextChunker; use App\Models\Document; use App\Models\DocumentChunk; @@ -19,13 +23,14 @@ use Illuminate\Queue\SerializesModels; use Illuminate\Support\Facades\Bus; use Illuminate\Support\Facades\Log; -use Laravel\Pennant\Feature; use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\LlmDriver\ToolsHelper; use LlmLaraHub\TagFunction\Jobs\TagDocumentJob; class GetWebContentJob implements ShouldQueue { use Batchable, Dispatchable, InteractsWithQueue, Queueable, SerializesModels; + use ChatHelperTrait, ToolsHelper; /** * Create a new job instance. @@ -48,95 +53,145 @@ public function handle(): void return; } - /** - * Document can reference a source - */ - $document = Document::updateOrCreate( - [ - 'source_id' => $this->source->id, - 'type' => TypesEnum::HTML, - 'subject' => $this->webResponseDto->title, - 'link' => $this->webResponseDto->url, - 'collection_id' => $this->source->collection_id, - ], - [ - 'status' => StatusEnum::Pending, - 'file_path' => $this->webResponseDto->url, - 'status_summary' => StatusEnum::Pending, - 'meta_data' => $this->webResponseDto->toArray(), - ] - ); + $this->source = $this->checkForChat($this->source); + + $key = md5($this->webResponseDto->url.$this->source->id); + + if ($this->skip($this->source, $key)) { + return; + } + + $this->createSourceTask($this->source, $key); Log::info("[LaraChain] GetWebContentJob - {$this->source->title} - URL: {$this->webResponseDto->url}"); - $html = GetPage::make($this->source->collection)->handle($this->webResponseDto->url); /** * @NOTE - * making them PDF for now - * I ran into "noise" issues - * of just a lot of script tags and stuff - * there is some code in the getPage for html - * that might be worth it later + * Sometimes the HTML is too big */ - if (Feature::active('html_to_pdf')) { - $document->update([ - 'type' => TypesEnum::PDF, - 'file_path' => md5($this->webResponseDto->url).'.pdf', - ]); + $htmlResults = GetPage::make($this->source->collection) + ->handle($this->webResponseDto->url, true); - Bus::batch([ - new ParsePdfFileJob($document), - ]) - ->name('Process PDF Document - '.$document->id) - ->finally(function (Batch $batch) { - //this is triggered in the PdfTransformer class - }) - ->allowFailures() - ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) - ->dispatch(); - } else { - $results = GetPage::parseHtml($html); + $prompt = PromptMerge::merge( + ['[CONTEXT]'], + [$htmlResults], + $this->source->getPrompt() + ); - $results = to_utf8($results); + $results = LlmDriverFacade::driver( + $this->source->getDriver() + )->completion($prompt); - $document->update([ - 'type' => TypesEnum::HTML, - 'file_path' => $this->webResponseDto->url, - 'original_content' => $results, + if ($this->ifNotActionRequired($results->content)) { + Log::info('[LaraChain] - Web Source Skipping', [ + 'prompt' => $prompt, ]); + } else { + $promptResultsOriginal = $results->content; - $page_number = 1; + $this->addUserMessage($this->source, $promptResultsOriginal); - $chunked_chunks = TextChunker::handle($results); + $promptResults = $this->arrifyPromptResults($promptResultsOriginal); - foreach ($chunked_chunks as $chunkSection => $chunkContent) { + /** + * @NOTE all the user to build array results + * Like Events from a webpage + */ + foreach ($promptResults as $promptResultIndex => $promptResult) { - $guid = md5($chunkContent); + $promptResult = json_encode($promptResult); - $DocumentChunk = DocumentChunk::updateOrCreate( + $title = sprintf('WebPageSource - item #%d source: %s', + $promptResultIndex + 1, + $this->webResponseDto->url); + + /** + * Document can reference a source + */ + $document = Document::updateOrCreate( [ - 'document_id' => $document->id, - 'sort_order' => $page_number, - 'section_number' => $chunkSection, + 'source_id' => $this->source->id, + 'type' => TypesEnum::HTML, + 'subject' => to_utf8($title), + 'link' => $this->webResponseDto->url, + 'collection_id' => $this->source->collection_id, ], [ - 'guid' => $guid, - 'content' => $chunkContent, + 'status' => StatusEnum::Pending, + 'file_path' => $this->webResponseDto->url, + 'status_summary' => StatusEnum::Pending, + 'meta_data' => $this->webResponseDto->toArray(), + 'original_content' => $promptResult, ] ); - Log::info('[LaraChain] adding to new batch'); + $page_number = 1; + + $chunked_chunks = TextChunker::handle($promptResult); + + $chunks = []; + + foreach ($chunked_chunks as $chunkSection => $chunkContent) { + $guid = md5($chunkContent); + + $DocumentChunk = DocumentChunk::updateOrCreate( + [ + 'document_id' => $document->id, + 'guid' => $guid, + ], + [ + 'sort_order' => $page_number, + 'section_number' => $chunkSection, + 'content' => to_utf8($chunkContent), + ] + ); + + Log::info('[LaraChain] adding to new batch'); + + $chunks[] = new VectorlizeDataJob($DocumentChunk); + + $page_number++; + } + + Bus::batch($chunks) + ->name("Chunking Document from Web - {$this->webResponseDto->url}") + ->allowFailures() + ->finally(function (Batch $batch) use ($document) { + Bus::batch([ + [ + new SummarizeDocumentJob($document), + new TagDocumentJob($document), + new DocumentProcessingCompleteJob($document), + ], + ]) + ->name(sprintf('Final Document Steps Document %s id %d', $document->type->name, $document->id)) + ->allowFailures() + ->onQueue(LlmDriverFacade::driver($document->getDriver())->onQueue()) + ->dispatch(); + }) + ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) + ->dispatch(); + } - $this->batch()->add([ - new VectorlizeDataJob($DocumentChunk), - new SummarizeDocumentJob($document), - new TagDocumentJob($document), - new DocumentProcessingCompleteJob($document), - ]); + /** + * @NOTE + * I could move this into the loop if it is not + * enough here + */ + $assistantMessage = $this->source->getChat()->addInput( + message: json_encode($promptResults), + role: RoleEnum::Assistant, + show_in_thread: true, + meta_data: MetaDataDto::from([ + 'driver' => $this->source->getDriver(), + 'source' => $this->source->title, + ]), + ); + + $this->savePromptHistory( + message: $assistantMessage, + prompt: $prompt); - $page_number++; - } } - } } diff --git a/app/Jobs/WebPageSourceJob.php b/app/Jobs/WebPageSourceJob.php index 67d18f19..0116d3ce 100644 --- a/app/Jobs/WebPageSourceJob.php +++ b/app/Jobs/WebPageSourceJob.php @@ -2,11 +2,8 @@ namespace App\Jobs; -use App\Domains\Documents\StatusEnum; -use App\Domains\Documents\TypesEnum; -use App\Models\Document; +use App\Domains\Sources\WebSearch\Response\WebResponseDto; use App\Models\Source; -use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Bus\Batch; use Illuminate\Bus\Batchable; use Illuminate\Bus\Queueable; @@ -14,6 +11,8 @@ use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Bus; +use LlmLaraHub\LlmDriver\LlmDriverFacade; class WebPageSourceJob implements ShouldQueue { @@ -42,41 +41,25 @@ public function handle(): void return; } - $jobs = []; - - $html = GetPage::make($this->source->collection)->handle($this->url); - - $html = GetPage::parseHtml($html); - - $html = to_utf8($html); - $title = sprintf('WebPageSource - source: %s', $this->url); - $parseTitle = str($html)->limit(50)->toString(); - - if (! empty($parseTitle)) { - $title = $parseTitle; - } - - $document = Document::updateOrCreate( - [ - 'source_id' => $this->source->id, - 'link' => $this->url, - 'collection_id' => $this->source->collection_id, - ], - [ - 'status' => StatusEnum::Pending, - 'type' => TypesEnum::HTML, - 'subject' => to_utf8($title), - 'file_path' => $this->url, - 'summary' => str($html)->limit(254)->toString(), - 'status_summary' => StatusEnum::Pending, - 'original_content' => $html, - 'meta_data' => $this->source->meta_data, - ] - ); - - $this->processDocument($document); + $webResponseDto = WebResponseDto::from([ + 'url' => $this->url, + 'title' => $title, + 'age' => now()->toString(), + 'description' => sprintf('From Source %s', $this->source->title), + 'meta_data' => [], + 'thumbnail' => null, + 'profile' => [], + ]); + + Bus::batch([ + new GetWebContentJob($this->source, $webResponseDto), + ]) + ->name("Getting Web content for Source - {$this->url}") + ->onQueue(LlmDriverFacade::driver($this->source->getDriver())->onQueue()) + ->allowFailures() + ->dispatch(); } } diff --git a/app/Models/Source.php b/app/Models/Source.php index 18de9813..996c464e 100644 --- a/app/Models/Source.php +++ b/app/Models/Source.php @@ -43,6 +43,11 @@ protected static function booted(): void }); } + public function getPrompt(): string + { + return $this->details; + } + public function getChatable(): HasDrivers { return $this->collection->getChatable(); @@ -50,11 +55,7 @@ public function getChatable(): HasDrivers public function getChat(): ?Chat { - /** - * @TODO - * I need to come back to this - */ - return $this->collection->chats()->first(); + return $this->chat ?: $this->collection->chats()->first(); } public function getSummary(): string @@ -77,6 +78,11 @@ public function getDriver(): string return $this->collection->getDriver(); } + public function chat(): BelongsTo + { + return $this->belongsTo(Chat::class); + } + public function getEmbeddingDriver(): string { return $this->collection->getEmbeddingDriver(); diff --git a/app/Models/SourceTask.php b/app/Models/SourceTask.php new file mode 100644 index 00000000..006e09a4 --- /dev/null +++ b/app/Models/SourceTask.php @@ -0,0 +1,19 @@ +belongsTo(Source::class); + } +} diff --git a/database/factories/SourceFactory.php b/database/factories/SourceFactory.php index a0883505..96b05b41 100644 --- a/database/factories/SourceFactory.php +++ b/database/factories/SourceFactory.php @@ -4,6 +4,7 @@ use App\Domains\Recurring\RecurringTypeEnum; use App\Domains\Sources\SourceTypeEnum; +use App\Models\Chat; use App\Models\Collection; use Illuminate\Database\Eloquent\Factories\Factory; @@ -22,7 +23,10 @@ public function definition(): array return [ 'title' => $this->faker->name, 'slug' => fake()->word, + 'force' => false, 'collection_id' => Collection::factory(), + 'user_id' => null, + 'chat_id' => Chat::factory(), 'details' => $this->faker->sentence, // 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. 'recurring' => RecurringTypeEnum::Daily, 'active' => true, diff --git a/database/factories/SourceTaskFactory.php b/database/factories/SourceTaskFactory.php new file mode 100644 index 00000000..23056713 --- /dev/null +++ b/database/factories/SourceTaskFactory.php @@ -0,0 +1,23 @@ + + */ +class SourceTaskFactory extends Factory +{ + /** + * Define the model's default state. + * + * @return array + */ + public function definition(): array + { + return [ + // + ]; + } +} diff --git a/database/migrations/2024_07_20_130256_add_chat_to_soruces.php b/database/migrations/2024_07_20_130256_add_chat_to_soruces.php new file mode 100644 index 00000000..e45050ea --- /dev/null +++ b/database/migrations/2024_07_20_130256_add_chat_to_soruces.php @@ -0,0 +1,30 @@ +foreignIdFor(\App\Models\Chat::class)->nullable(); + $table->foreignIdFor(\App\Models\User::class)->nullable(); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('sources', function (Blueprint $table) { + $table->dropColumn('chat_id'); + $table->dropColumn('user_id'); + }); + } +}; diff --git a/database/migrations/2024_07_21_171214_create_source_tasks_table.php b/database/migrations/2024_07_21_171214_create_source_tasks_table.php new file mode 100644 index 00000000..ce10be85 --- /dev/null +++ b/database/migrations/2024_07_21_171214_create_source_tasks_table.php @@ -0,0 +1,33 @@ +id(); + $table->foreignIdFor(\App\Models\Source::class); + $table->text('task_key'); + $table->timestamps(); + }); + + Schema::table('source_tasks', function (Blueprint $table) { + $table->index(['source_id', 'task_key']); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('source_tasks'); + } +}; diff --git a/database/migrations/2024_07_22_112043_add_force_to_sources.php b/database/migrations/2024_07_22_112043_add_force_to_sources.php new file mode 100644 index 00000000..cc71ae3c --- /dev/null +++ b/database/migrations/2024_07_22_112043_add_force_to_sources.php @@ -0,0 +1,28 @@ +boolean('force')->default(false); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('sources', function (Blueprint $table) { + // + }); + } +}; diff --git a/resources/js/Components/Templates.vue b/resources/js/Components/Templates.vue index 96c98491..96332b5c 100644 --- a/resources/js/Components/Templates.vue +++ b/resources/js/Components/Templates.vue @@ -1,6 +1,7 @@ diff --git a/resources/js/Pages/Sources/EmailSource/Components/Resources.vue b/resources/js/Pages/Sources/EmailSource/Components/Resources.vue index 3195660b..6c3555c3 100644 --- a/resources/js/Pages/Sources/EmailSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/EmailSource/Components/Resources.vue @@ -9,7 +9,9 @@
-
diff --git a/resources/js/Pages/Sources/FeedSource/Components/Resources.vue b/resources/js/Pages/Sources/FeedSource/Components/Resources.vue index 41f9131e..2349f461 100644 --- a/resources/js/Pages/Sources/FeedSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/FeedSource/Components/Resources.vue @@ -16,6 +16,19 @@ +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+

This is meta data

diff --git a/resources/js/Pages/Sources/FeedSource/Create.vue b/resources/js/Pages/Sources/FeedSource/Create.vue index 86d70d00..68b521a5 100644 --- a/resources/js/Pages/Sources/FeedSource/Create.vue +++ b/resources/js/Pages/Sources/FeedSource/Create.vue @@ -31,6 +31,7 @@ const form = useForm({ title: '', details: '', recurring: 'not', + force: false, meta_data: { feed_url: "https://larallama.io/feed", }, diff --git a/resources/js/Pages/Sources/FeedSource/Edit.vue b/resources/js/Pages/Sources/FeedSource/Edit.vue index db85e805..07b2324d 100644 --- a/resources/js/Pages/Sources/FeedSource/Edit.vue +++ b/resources/js/Pages/Sources/FeedSource/Edit.vue @@ -31,6 +31,7 @@ const props = defineProps({ const form = useForm({ title: props.source.data.title, details: props.source.data.details, + force: props.source.data.force, active: props.source.data.active, recurring: props.source.data.recurring, meta_data: { diff --git a/resources/js/Pages/Sources/Index.vue b/resources/js/Pages/Sources/Index.vue index e33c1a8b..7bdded7e 100644 --- a/resources/js/Pages/Sources/Index.vue +++ b/resources/js/Pages/Sources/Index.vue @@ -13,7 +13,6 @@ import Card from "@/Pages/Sources/Cards/Card.vue"; import EmailCard from "@/Pages/Sources/EmailSource/Components/Card.vue"; import EmailBoxCard from "@/Pages/Sources/EmailBoxSource/Components/Card.vue"; import WebhookSource from "@/Pages/Sources/WebhookSource/Components/Card.vue"; -import JsonSource from "@/Pages/Sources/JsonSource/Components/Card.vue"; import FeedSource from "@/Pages/Sources/FeedSource/Components/Card.vue"; import WebPageSource from "@/Pages/Sources/WebPageSource/Components/Card.vue"; import SiteMapSource from "@/Pages/Sources/SiteMapSource/Components/Card.vue"; @@ -63,10 +62,7 @@ const props = defineProps({ Sources are ways you can add data to your collection beyond uploading documents. You can add via a websearch, and soon email and calendar. - - -
@@ -79,7 +75,6 @@ const props = defineProps({ - diff --git a/resources/js/Pages/Sources/JsonSource/Components/Card.vue b/resources/js/Pages/Sources/JsonSource/Components/Card.vue deleted file mode 100644 index 57c38665..00000000 --- a/resources/js/Pages/Sources/JsonSource/Components/Card.vue +++ /dev/null @@ -1,51 +0,0 @@ - - -
-
- -
- - Edit -
-
-
- - - diff --git a/resources/js/Pages/Sources/JsonSource/Components/Resources.vue b/resources/js/Pages/Sources/JsonSource/Components/Resources.vue deleted file mode 100644 index cc8c2509..00000000 --- a/resources/js/Pages/Sources/JsonSource/Components/Resources.vue +++ /dev/null @@ -1,75 +0,0 @@ - - - diff --git a/resources/js/Pages/Sources/JsonSource/Create.vue b/resources/js/Pages/Sources/JsonSource/Create.vue deleted file mode 100644 index 9ed0abda..00000000 --- a/resources/js/Pages/Sources/JsonSource/Create.vue +++ /dev/null @@ -1,96 +0,0 @@ - - - diff --git a/resources/js/Pages/Sources/JsonSource/Edit.vue b/resources/js/Pages/Sources/JsonSource/Edit.vue deleted file mode 100644 index c9358fa1..00000000 --- a/resources/js/Pages/Sources/JsonSource/Edit.vue +++ /dev/null @@ -1,101 +0,0 @@ - - - diff --git a/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue b/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue index 56afa4e5..6a445628 100644 --- a/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/SiteMapSource/Components/Resources.vue @@ -16,6 +16,20 @@
+ +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+

This is meta data

diff --git a/resources/js/Pages/Sources/SiteMapSource/Create.vue b/resources/js/Pages/Sources/SiteMapSource/Create.vue index f60a7675..d4e42ffb 100644 --- a/resources/js/Pages/Sources/SiteMapSource/Create.vue +++ b/resources/js/Pages/Sources/SiteMapSource/Create.vue @@ -30,6 +30,7 @@ const props = defineProps({ const form = useForm({ title: '', details: '', + force: false, recurring: 'not', meta_data: { feed_url: "https://larallama.io/sitemap.xml", diff --git a/resources/js/Pages/Sources/SiteMapSource/Edit.vue b/resources/js/Pages/Sources/SiteMapSource/Edit.vue index 56c0d1f5..f1e4431c 100644 --- a/resources/js/Pages/Sources/SiteMapSource/Edit.vue +++ b/resources/js/Pages/Sources/SiteMapSource/Edit.vue @@ -32,6 +32,7 @@ const form = useForm({ title: props.source.data.title, details: props.source.data.details, active: props.source.data.active, + force: props.source.data.force, recurring: props.source.data.recurring, meta_data: { feed_url: props.source.data.meta_data.feed_url diff --git a/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue b/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue index d30025f1..90606d7e 100644 --- a/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/WebPageSource/Components/Resources.vue @@ -10,7 +10,7 @@
@@ -32,7 +32,18 @@ https://docs.larallama.io/developing.html"
- +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
diff --git a/resources/js/Pages/Sources/WebPageSource/Create.vue b/resources/js/Pages/Sources/WebPageSource/Create.vue index 8f27f557..340b18ac 100644 --- a/resources/js/Pages/Sources/WebPageSource/Create.vue +++ b/resources/js/Pages/Sources/WebPageSource/Create.vue @@ -32,6 +32,7 @@ const form = useForm({ details: '', force: false, recurring: 'not', + force: true, meta_data: { example: "bob@bobsburgers.com", }, diff --git a/resources/js/Pages/Sources/WebSource/Components/Resources.vue b/resources/js/Pages/Sources/WebSource/Components/Resources.vue index 834e8daf..a64f64f8 100644 --- a/resources/js/Pages/Sources/WebSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/WebSource/Components/Resources.vue @@ -14,6 +14,20 @@
+ +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+
diff --git a/resources/js/Pages/Sources/WebSource/Create.vue b/resources/js/Pages/Sources/WebSource/Create.vue index 308c2953..bd391ead 100644 --- a/resources/js/Pages/Sources/WebSource/Create.vue +++ b/resources/js/Pages/Sources/WebSource/Create.vue @@ -28,6 +28,7 @@ const props = defineProps({ const form = useForm({ title: '', details: '', + force: false, recurring: 'not', active: true }); diff --git a/resources/js/Pages/Sources/WebSource/Edit.vue b/resources/js/Pages/Sources/WebSource/Edit.vue index 981e827b..544cc750 100644 --- a/resources/js/Pages/Sources/WebSource/Edit.vue +++ b/resources/js/Pages/Sources/WebSource/Edit.vue @@ -35,6 +35,7 @@ const form = useForm({ title: props.source.data.title, details: props.source.data.details, active: props.source.data.active, + force: props.source.data.force, recurring: props.source.data.recurring }); diff --git a/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue b/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue index 80463b79..4662c934 100644 --- a/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue +++ b/resources/js/Pages/Sources/WebhookSource/Components/Resources.vue @@ -17,6 +17,20 @@
+ +
+ + + +
+ by default the system will only run the first time for a url or an email. + But if you want to to try again just check this box. + This can be good if you are checking a home page for updates. + Or a feed for updates. But NOT if you are checking an email box for emails and + do not want to repeat check the same email. +
+
+
SourceTypeEnum::EmailSource, ]); + LlmDriverFacade::shouldReceive('driver->completion')->once()->andReturn( + CompletionResponse::from([ + 'content' => 'foo bar', + ]) + ); + $body = <<<'BODY' Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. @@ -69,8 +73,6 @@ public function test_batches() $this->assertDatabaseCount('documents', 1); - $this->assertDatabaseCount('document_chunks', 8); - Bus::assertBatchCount(1); } @@ -87,7 +89,7 @@ public function test_run() $source->run(); } - public function test_related_tranformers() + public function tests_creates_chat_and_message() { Bus::fake(); $source = Source::factory()->create([ @@ -95,6 +97,12 @@ public function test_related_tranformers() 'type' => SourceTypeEnum::EmailSource, ]); + LlmDriverFacade::shouldReceive('driver->completion')->once()->andReturn( + CompletionResponse::from([ + 'content' => 'foo bar', + ]) + ); + $body = <<<'BODY' Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. @@ -105,17 +113,6 @@ public function test_related_tranformers() BODY; - $transformer = Transformer::factory()->create( - [ - 'transformable_id' => $source->id, - 'transformable_type' => Source::class, - 'parent_id' => null, - 'last_run' => null, - 'active' => true, - 'type' => TypeEnum::CrmTransformer, - ] - ); - $dto = MailDto::from([ 'to' => 'info+12345@llmassistant.io', 'from' => 'foo@var.com', @@ -127,17 +124,102 @@ public function test_related_tranformers() $emailSource = new \App\Domains\Sources\EmailSource(); $emailSource->setMailDto($dto)->handle($source); - $this->assertDatabaseCount('documents', 3); + $this->assertDatabaseCount('documents', 1); + $this->assertDatabaseCount('chats', 1); + $this->assertDatabaseCount('messages', 2); + $this->assertDatabaseCount('source_tasks', 1); - $this->assertDatabaseCount('document_chunks', 10); + $this->assertNotNull($source->chat_id); Bus::assertBatchCount(1); + } + + public function test_repeat_tasks() + { + Bus::fake(); + $source = Source::factory()->create([ + 'slug' => 'test', + 'type' => SourceTypeEnum::EmailSource, + ]); + + LlmDriverFacade::shouldReceive('driver->completion')->never(); + + $body = <<<'BODY' +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. + +BODY; + + $dto = MailDto::from([ + 'to' => 'info+12345@llmassistant.io', + 'from' => 'foo@var.com', + 'subject' => 'This is it', + 'header' => 'This is header', + 'body' => $body, + ]); + + SourceTask::factory()->create([ + 'source_id' => $source->id, + 'task_key' => md5($dto->date.$dto->from.$source->id), + ]); + + $emailSource = new \App\Domains\Sources\EmailSource(); + $emailSource->setMailDto($dto)->handle($source); + + $this->assertDatabaseCount('documents', 0); + $this->assertDatabaseCount('chats', 1); + $this->assertDatabaseCount('messages', 0); + + $this->assertNotNull($source->chat_id); + + Bus::assertBatchCount(0); + } + + public function test_no_action_required() + { + Bus::fake(); + $source = Source::factory()->create([ + 'slug' => 'test', + 'type' => SourceTypeEnum::EmailSource, + ]); + + LlmDriverFacade::shouldReceive('driver->completion')->once()->andReturn( + CompletionResponse::from([ + 'content' => 'False', + ]) + ); + + $body = <<<'BODY' +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut.Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. +Quis ea esse velit id id eu consectetur deserunt exercitation exercitation. Nisi aliqua ipsum fugiat laborum aliquip nostrud eu tempor non cillum Lorem non dolor proident sunt. Irure commodo aliqua reprehenderit deserunt sint irure in excepteur quis eiusmod ullamco aliquip. Dolore tempor ea non ut. + +BODY; + + $dto = MailDto::from([ + 'to' => 'info+12345@llmassistant.io', + 'from' => 'foo@var.com', + 'subject' => 'This is it', + 'header' => 'This is header', + 'body' => $body, + ]); + + $emailSource = new \App\Domains\Sources\EmailSource(); + $emailSource->setMailDto($dto)->handle($source); + + $this->assertDatabaseCount('documents', 0); + $this->assertDatabaseCount('chats', 1); + $this->assertDatabaseCount('messages', 0); + + $this->assertNotNull($source->chat_id); - $documentTo = Document::whereType(TypesEnum::Contact)->exists(); - $this->assertTrue($documentTo); - $documentTo = Document::where('child_type', StructuredTypeEnum::EmailFrom)->exists(); - $this->assertTrue($documentTo); - $documentTo = Document::where('child_type', StructuredTypeEnum::EmailTo)->exists(); - $this->assertTrue($documentTo); + Bus::assertBatchCount(0); } } diff --git a/tests/Feature/FeedSourceTest.php b/tests/Feature/FeedSourceTest.php index afeb3a30..bdd56bba 100644 --- a/tests/Feature/FeedSourceTest.php +++ b/tests/Feature/FeedSourceTest.php @@ -24,6 +24,6 @@ public function test_run() $source->run(); - Bus::assertBatchCount(1); + Bus::assertBatchCount(17); } } diff --git a/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php b/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php index 954fa5b9..c4d0baae 100644 --- a/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php +++ b/tests/Feature/Http/Controllers/WebhookSourceControllerTest.php @@ -39,6 +39,8 @@ public function test_store(): void $this->assertEquals(SourceTypeEnum::WebhookSource, $source->type); $this->assertEquals('foobar', $source->secrets['token']); + + $this->assertNotNull($source->user_id); } public function test_update() diff --git a/tests/Feature/Jobs/ChunkDocumentJobTest.php b/tests/Feature/Jobs/ChunkDocumentJobTest.php new file mode 100644 index 00000000..0260f93b --- /dev/null +++ b/tests/Feature/Jobs/ChunkDocumentJobTest.php @@ -0,0 +1,31 @@ +create(); + + $this->assertDatabaseCount('document_chunks', 0); + + [$job, $batch] = (new ChunkDocumentJob($document))->withFakeBatch(); + + $job->handle(); + + $this->assertDatabaseCount('document_chunks', 1); + + Bus::assertBatchCount(1); + } +} diff --git a/tests/Feature/Jobs/GetWebContentJobTest.php b/tests/Feature/Jobs/GetWebContentJobTest.php index a81d0c67..f65d88c0 100644 --- a/tests/Feature/Jobs/GetWebContentJobTest.php +++ b/tests/Feature/Jobs/GetWebContentJobTest.php @@ -9,6 +9,8 @@ use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Support\Facades\Bus; use Laravel\Pennant\Feature; +use LlmLaraHub\LlmDriver\LlmDriverFacade; +use LlmLaraHub\LlmDriver\Responses\CompletionResponse; use Tests\TestCase; class GetWebContentJobTest extends TestCase @@ -37,7 +39,13 @@ public function test_job_html(): void GetPage::shouldReceive('make->handle')->once()->andReturn($html); - GetPage::makePartial(); + LlmDriverFacade::shouldReceive('driver->onQueue')->andReturn('default'); + + LlmDriverFacade::shouldReceive('driver->completion') + ->once() + ->andReturn(CompletionResponse::from([ + 'content' => get_fixture('test_block_of_text.txt', false), + ])); $this->assertDatabaseCount('documents', 0); $this->assertDatabaseCount('document_chunks', 0); @@ -45,17 +53,15 @@ public function test_job_html(): void $job->handle(); $this->assertDatabaseCount('documents', 1); - $this->assertDatabaseCount('document_chunks', 33); + $this->assertDatabaseCount('document_chunks', 17); $document = Document::first(); - $this->assertEquals('Example', $document->subject); + $this->assertStringContainsString('WebPageSource - item #1 source', $document->subject); } - public function test_job_pdf(): void + public function test_array(): void { - Feature::define('html_to_pdf', function () { - return true; - }); + Bus::fake(); $source = Source::factory()->create(); @@ -70,17 +76,27 @@ public function test_job_pdf(): void 'profile' => ['key' => 'value'], ]); - $content = fake()->sentences(900, true); + $html = get_fixture('test_medium_2.html', false); + + GetPage::shouldReceive('make->handle')->once()->andReturn($html); - GetPage::shouldReceive('make->handle') + LlmDriverFacade::shouldReceive('driver->onQueue')->andReturn('default'); + + LlmDriverFacade::shouldReceive('driver->completion') ->once() - ->andReturn('foobar'); + ->andReturn(CompletionResponse::from([ + 'content' => '[{"content":"Test 1"},{"content":"Test 2"}]', + ])); + $this->assertDatabaseCount('documents', 0); + $this->assertDatabaseCount('document_chunks', 0); [$job, $batch] = (new GetWebContentJob($source, $webResponseDto))->withFakeBatch(); $job->handle(); - - Bus::assertBatchCount(1); + $this->assertDatabaseCount('documents', 2); + $this->assertDatabaseCount('document_chunks', 2); + $document = Document::first(); + $this->assertStringContainsString('WebPageSource - item #1 source', $document->subject); } } diff --git a/tests/Feature/JsonSourceTest.php b/tests/Feature/JsonSourceTest.php deleted file mode 100644 index 148b1c1d..00000000 --- a/tests/Feature/JsonSourceTest.php +++ /dev/null @@ -1,21 +0,0 @@ -markTestSkipped('@TODO not sure this class is needed yet or at all'); - $source = Source::factory()->create([ - 'slug' => 'test', - 'type' => SourceTypeEnum::JsonSource, - ]); - - $source->run(); - } -} diff --git a/tests/Feature/Models/SourceTaskTest.php b/tests/Feature/Models/SourceTaskTest.php new file mode 100644 index 00000000..94beb274 --- /dev/null +++ b/tests/Feature/Models/SourceTaskTest.php @@ -0,0 +1,18 @@ +get('/'); + + $response->assertStatus(200); + } +} diff --git a/tests/Feature/WebPageSourceJobTest.php b/tests/Feature/WebPageSourceJobTest.php index 4c11dd95..534c646a 100644 --- a/tests/Feature/WebPageSourceJobTest.php +++ b/tests/Feature/WebPageSourceJobTest.php @@ -5,8 +5,8 @@ use App\Domains\Sources\SourceTypeEnum; use App\Jobs\WebPageSourceJob; use App\Models\Source; -use Facades\App\Domains\Sources\WebSearch\GetPage; use Illuminate\Support\Facades\Bus; +use LlmLaraHub\LlmDriver\LlmDriverFacade; use Tests\TestCase; class WebPageSourceJobTest extends TestCase @@ -18,11 +18,7 @@ public function test_makes_documents_triggers_jobs(): void { Bus::fake(); - $html = get_fixture('test_medium_2.html', false); - - GetPage::shouldReceive('make->handle')->once()->andReturn($html); - - GetPage::makePartial(); + LlmDriverFacade::shouldReceive('driver->onQueue')->andReturn('default'); $source = Source::factory()->create([ 'slug' => 'test', @@ -36,11 +32,6 @@ public function test_makes_documents_triggers_jobs(): void [$job, $batch] = (new WebPageSourceJob($source, 'https://larallama.io/posts/numerous-ui-updates-prompt-template-improvements-and-more'))->withFakeBatch(); $job->handle(); - $this->assertDatabaseCount('documents', 1); - - $this->assertNotEmpty($source->documents->first()->summary); - $this->assertNotEmpty($source->documents->first()->original_content); - Bus::assertBatchCount(1); } } diff --git a/tests/Feature/WebhookSourceTest.php b/tests/Feature/WebhookSourceTest.php index 0f51b394..e23a2092 100644 --- a/tests/Feature/WebhookSourceTest.php +++ b/tests/Feature/WebhookSourceTest.php @@ -75,10 +75,10 @@ public function test_prevent_duplicates_github() $payload = get_fixture('example_github.json'); LlmDriverFacade::shouldReceive('driver->onQueue') - ->times(4)->andReturn('default'); + ->times(2)->andReturn('default'); LlmDriverFacade::shouldReceive('driver->completion') - ->twice()->andReturn( + ->once()->andReturn( CompletionResponse::from([ 'content' => get_fixture('github_transformed.json', false), ]) @@ -97,7 +97,7 @@ public function test_prevent_duplicates_github() $this->assertDatabaseCount('documents', 2); $this->assertDatabaseCount('document_chunks', 2); - Bus::assertBatchCount(4); + Bus::assertBatchCount(2); } @@ -111,10 +111,10 @@ public function test_prevent_duplicates_statamic() $payload['content'] = $payload; LlmDriverFacade::shouldReceive('driver->onQueue') - ->times(2)->andReturn('default'); + ->once()->andReturn('default'); LlmDriverFacade::shouldReceive('driver->completion') - ->times(2)->andReturn( + ->times(1)->andReturn( CompletionResponse::from([ 'content' => 'Foo Bar', ]) @@ -133,7 +133,7 @@ public function test_prevent_duplicates_statamic() $this->assertDatabaseCount('documents', 1); $this->assertDatabaseCount('document_chunks', 1); - Bus::assertBatchCount(2); + Bus::assertBatchCount(1); } } diff --git a/tests/fixtures/.DS_Store b/tests/fixtures/.DS_Store index 45af69f5..5fb14f77 100644 Binary files a/tests/fixtures/.DS_Store and b/tests/fixtures/.DS_Store differ diff --git a/tests/fixtures/test_block_of_text.txt b/tests/fixtures/test_block_of_text.txt new file mode 100644 index 00000000..d0a724aa --- /dev/null +++ b/tests/fixtures/test_block_of_text.txt @@ -0,0 +1,39 @@ +Sint aliqua voluptate ex fugiat quis tempor sit cupidatat labore in consequat. Dolore magna non esse sit. In deserunt occaecat incididunt non ea exercitation ad occaecat magna. Cillum in do eu et ad id elit reprehenderit aliquip elit anim amet voluptate. Consequat et aute aute aliqua voluptate consectetur adipisicing enim id est consectetur nisi occaecat. Sint duis esse cillum labore aute dolore minim consectetur. Elit anim id dolore qui elit ipsum dolore veniam dolor. Elit mollit deserunt cillum aute magna cillum nulla reprehenderit nulla cupidatat. + +Laboris ipsum id enim. Nisi fugiat et dolor commodo anim deserunt. Non officia ullamco enim velit fugiat do dolor adipisicing eiusmod do esse anim ea ut irure. Duis tempor eu ad velit eu commodo. Fugiat est magna consectetur nisi pariatur esse amet cupidatat nulla consequat reprehenderit aliquip ipsum mollit. Culpa commodo fugiat exercitation eiusmod nulla ipsum sunt enim esse labore. Ut proident non excepteur consequat. Occaecat veniam sint aliqua ad id non ad incididunt est eu sunt aute ipsum. + +Dolore aute sunt ipsum amet id et aute veniam amet anim mollit fugiat veniam. In ut anim amet culpa duis qui id aliquip magna sit tempor ullamco irure in. Est non eiusmod Lorem labore elit in proident esse est fugiat voluptate fugiat. Dolor nulla aliqua eu quis do adipisicing excepteur nisi magna laborum nostrud proident sint irure. Ex proident non commodo aliquip excepteur ad minim excepteur. + +Laborum sint irure in pariatur adipisicing nulla voluptate labore ipsum dolore sunt laborum id mollit eu. Ea reprehenderit ipsum irure Lorem fugiat aute fugiat consequat incididunt. Cupidatat elit ea tempor elit duis aliqua commodo irure est ex sint adipisicing. Eiusmod Lorem excepteur Lorem ad ullamco. Amet est excepteur minim aliquip do est irure qui in ut aliqua laboris excepteur consectetur. Mollit deserunt nisi aliquip velit incididunt dolor nostrud. + +Nulla id laborum ex. Ipsum elit quis duis officia irure aliqua nisi cupidatat ex pariatur sunt. Elit labore laboris voluptate eiusmod consectetur aute adipisicing elit voluptate irure qui commodo non. Ea dolor ea aute tempor quis occaecat ut est minim anim cupidatat nostrud enim aliqua ut. Aliqua adipisicing nulla tempor proident sit officia cupidatat. + +Velit sit non ea Lorem laboris fugiat dolor dolor incididunt ad occaecat ex. Eiusmod nisi ex reprehenderit id. Commodo officia irure commodo. Ipsum veniam voluptate dolore. Consectetur enim tempor fugiat adipisicing. Cillum dolor ea aliquip enim cupidatat nostrud. Ut enim eiusmod elit dolor do Lorem magna ex do. + +Labore et nisi officia excepteur mollit. Deserunt sint qui qui. Nostrud ut et fugiat esse eu ex deserunt minim dolore aliqua laboris. Laboris adipisicing sit incididunt eiusmod. Minim reprehenderit duis enim cillum occaecat anim officia ex occaecat officia irure officia laboris proident nisi. Nisi aute sit aute sint in magna amet cillum officia excepteur nisi. + +Et occaecat labore laborum velit eu voluptate eu. Laboris ea culpa mollit amet elit non laboris. Tempor sit proident laboris commodo exercitation labore est esse culpa laboris fugiat reprehenderit excepteur veniam. Et voluptate ullamco voluptate consectetur anim elit aute. Proident aliqua nisi esse aute est. In amet ullamco minim duis reprehenderit ullamco sunt sit sit id. Ut consequat enim veniam mollit excepteur velit irure dolore. + +Ut sint elit duis. Pariatur consectetur cupidatat labore amet tempor irure do consequat minim reprehenderit laboris aliqua reprehenderit occaecat. Dolore consequat in pariatur exercitation dolor. Ad veniam nostrud Lorem qui in proident consectetur excepteur magna ea commodo do sunt proident. Voluptate quis ut labore excepteur amet nisi sunt ad voluptate eiusmod occaecat aute. Nisi magna deserunt elit ullamco. Amet in sint reprehenderit ad eu pariatur. Officia voluptate enim ipsum id incididunt. + +Adipisicing aute eiusmod sint dolor Lorem elit. Cillum culpa cupidatat sunt aliqua sunt. Nostrud mollit commodo adipisicing ea aute duis Lorem aute eu. Incididunt minim labore in labore ex nostrud elit aliquip labore dolor aute duis. Quis cillum fugiat dolore irure est incididunt velit amet enim laborum adipisicing. Voluptate sunt nulla sit amet proident quis qui aliquip occaecat ex. Laborum excepteur veniam cupidatat. Mollit qui commodo nulla deserunt sit laboris laboris. + +Aliquip eu sunt esse incididunt non eiusmod reprehenderit commodo duis commodo elit dolor occaecat. Sint aliquip duis fugiat mollit mollit ad officia nostrud Lorem eu consequat excepteur do esse. Cupidatat cillum duis minim anim ullamco quis ex ex. Aute fugiat mollit veniam. + +Ut proident eiusmod aliquip amet. Sunt sunt esse id non. Ipsum dolore aliquip duis excepteur Lorem laboris ex et aliqua officia exercitation sunt id. Qui sint laborum eu non irure fugiat eiusmod ea id aliqua magna. Fugiat ut elit dolor sint anim veniam Lorem elit aliquip proident consequat proident officia. + +In nisi eu officia cupidatat et duis. Consectetur do aliqua sit occaecat esse ullamco est sit fugiat. Adipisicing quis nostrud mollit qui anim culpa. Aliqua ea in duis eiusmod. Laborum incididunt id proident nisi eiusmod aute magna do Lorem non ad. Eu Lorem excepteur cupidatat est irure occaecat elit veniam. Reprehenderit nostrud id anim ad nulla veniam mollit. Minim consequat enim aute pariatur. + +Cupidatat esse minim mollit veniam anim aliquip sit ex. Nisi laboris tempor est ut reprehenderit commodo mollit qui. Sunt labore duis occaecat reprehenderit commodo. Labore aliquip eiusmod id commodo ipsum non enim officia magna nostrud in consequat eiusmod. Dolor proident sunt esse consectetur duis qui anim ea velit mollit labore sint qui. Nulla culpa occaecat cupidatat. Sint ut commodo dolore dolore consequat consectetur occaecat commodo amet pariatur adipisicing ipsum pariatur id. Tempor ad sunt non est sunt. + +Dolore ipsum nostrud fugiat consectetur. Sunt et voluptate enim incididunt in consequat enim in irure id ullamco et tempor. Est sunt sit in. Labore non id eu laborum amet non exercitation dolore proident. + +Anim laborum exercitation veniam non adipisicing culpa do et. Esse cillum dolore et. Qui consectetur irure occaecat enim amet. Ullamco occaecat amet laboris adipisicing excepteur voluptate minim sint. Aliquip reprehenderit ex et sint. Cupidatat adipisicing anim fugiat aliquip. Voluptate id ut eu ut. Laborum sit quis enim aliquip labore nisi. + +Nulla deserunt pariatur enim ullamco voluptate eu do anim nisi voluptate Lorem. Aliqua aute aliqua eu Lorem pariatur elit tempor exercitation do exercitation nostrud officia cillum. Minim commodo consectetur dolor. Qui ea nulla aliquip id ex id duis tempor. Qui exercitation dolore commodo culpa deserunt ad officia fugiat culpa nulla minim consequat. Magna ullamco id deserunt. Est proident ut amet laborum consectetur est. + +Fugiat veniam ut proident do nulla incididunt laborum irure ad deserunt pariatur. Exercitation proident minim eiusmod excepteur tempor sint eiusmod aute. Velit incididunt cupidatat nostrud ut id velit ipsum cillum consequat ea. Labore eiusmod amet adipisicing aute laborum Lorem. Magna officia esse culpa mollit proident mollit dolore irure tempor ea consequat dolore cillum est. Proident laboris culpa reprehenderit officia nisi magna magna incididunt excepteur velit duis cupidatat. Consequat ex pariatur aliquip eiusmod cupidatat cupidatat laborum exercitation fugiat tempor. + +Culpa elit quis aliqua ea ut ad Lorem ea. Aliqua consequat commodo est quis exercitation aliqua. Tempor consectetur consequat dolore duis magna. Incididunt aliquip tempor pariatur mollit nisi ullamco elit nisi irure consequat nulla esse ea aute eiusmod. Sit officia eu enim ut cillum irure cupidatat in amet pariatur eiusmod est incididunt aliqua velit. Tempor ipsum ea do nostrud laborum esse velit incididunt. Deserunt et sint consectetur incididunt aliqua id elit eiusmod ea anim. + +Do ex veniam mollit occaecat reprehenderit mollit dolore amet velit nostrud in aute exercitation. Laborum occaecat veniam incididunt tempor exercitation id quis eu fugiat magna anim dolor est in. Veniam sunt fugiat sit irure. Dolor sit consequat culpa Lorem. diff --git a/tests/fixtures/web_page_source_events.html b/tests/fixtures/web_page_source_events.html new file mode 100644 index 00000000..d5c0a795 --- /dev/null +++ b/tests/fixtures/web_page_source_events.html @@ -0,0 +1,6848 @@ + + + + + + + + + + + + + +Dallas Cowboys | Official Site of the Dallas Cowboys + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to main content + + + + +
+ + + + + + + +
+ + + + + + + + + + +
+ + + + + + + + + + + +
+ +
+
+
+ + +
+ Advertising +
+ +
+
+ +
+ + + + +

Dallas Cowboys Schedule

+
+
+ + +
+
+ +

+ + Dallas Cowboys Schedule + +

+ + +
+ + Presented by + + +
+
+
+
+ + +
+
+ + + + + +
+
+ + + + +
+ +
+
+
+ + + + +
+
+ + + + +
+ + + + +
+ + + + + +
+ + +
+ ScheduleHeader_2460x1440 +
+ +
+ +
+ +
+
+ + + + + +
+
+
+ +
+
+
+
+ +
+
+ +
+
+ + +
+ + + + + + +
+
+
+ + + + +
+
+ + + + +
+ + + +
+ + + + +
+
+
+
+ +
+
+ + + +
+

Calendar event(s) copied!

+

1. Access your calendar

+

2. Add url to calendar and subscribe

+

3. Ensure that newly added Cowboys's calendar is synced to your account

+
+
+
+
+ + +
+
+
+
+

Pick your method to subscribe to the calendar

+ +
+
+ + + +
+
+
+ 1. Automatically Download to System Default Mail App (Recommended) +
+
+ + + + +
+
+ 2. Connect to Calendar Provider of Choice (Manual) +
+
+
+ +
+
+
+ + + +
+

Calendar event(s) copied!

+

1. Access your calendar

+

2. Add url to calendar and subscribe

+

3. Ensure that newly added Cowboys's calendar is synced to your account

+
+
+
+
+
+
+ + + + + +
+
+
+ + + + +
+ +
+ + + + +
+
+ + + + +
+ + + + + + +
+
+ + +
+
+ +

+ + PRESEASON + +

+ + +
+
+ +
+ + + +
+
+

+ + + WEEK 1 + + · Sun 08/11 + · 3:30 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Los Angeles Rams +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS 11 + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + SoFi Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 2 + + · Sat 08/17 + · 9:00 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Las Vegas Raiders +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS 11 + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Allegiant Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 3 + + · Sat 08/24 + · 3:00 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ Los Angeles Chargers +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS 11 + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+ + +
+
+ +

+ + REGULAR SEASON + +

+ + +
+
+ +
+ + + +
+
+

+ + + WEEK 1 + + · Sun 09/08 + · 3:25 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Cleveland Browns +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Cleveland Browns Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 2 + + · Sun 09/15 + · 12:00 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ New Orleans Saints +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+
+

+ + + WEEK 3 + + · Sun 09/22 + · 3:25 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ Baltimore Ravens +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 4 + + · Thu 09/26 + · 7:15 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ New York Giants +

+
+
+
+
+ + +
+ + + + + + + + + + + + Prime Video + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + MetLife Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 5 + + · Sun 10/06 + · 7:20 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Pittsburgh Steelers +

+
+
+
+
+ + +
+ + + + + + + + + + + + NBC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Acrisure Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 6 + + · Sun 10/13 + · 3:25 PM CDT +

+
+
+
+ + + + +
+

+ +

+

+ Detroit Lions +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + +
+
+

+ + WEEK 7 + +

+
+
+

+ BYE +

+
+
+
+ + + +
+
+

+ + + WEEK 8 + + · Sun 10/27 + · 7:20 PM CDT +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ San Francisco 49ers +

+
+
+
+
+ + +
+ + + + + + + + + + + + NBC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Levi's® Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 9 + + · Sun 11/03 + · 12:00 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Atlanta Falcons +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Mercedes-Benz Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 10 + + · Sun 11/10 + · 3:25 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Philadelphia Eagles +

+
+
+
+
+ + +
+ + + + + + + + + + + + CBS + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+
+

+ + + WEEK 11 + + · Mon 11/18 + · 7:15 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Houston Texans +

+
+
+
+
+ + +
+ + + + + + + + + + + + ESPN + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 12 + + · Sun 11/24 + · 12:00 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Washington Commanders +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Commanders Field + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 13 + + · Thu 11/28 + · 3:30 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ New York Giants +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 14 + + · Mon 12/09 + · 7:15 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Cincinnati Bengals +

+
+
+
+
+ + +
+ + + + + + + + + + + + ESPN • ABC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+
+

+ + + WEEK 15 + + · Sun 12/15 + · 12:00 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Carolina Panthers +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Bank of America Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 16 + + · Sun 12/22 + · 7:20 PM CST +

+
+
+
+ + + + +
+

+ +

+

+ Tampa Bay Buccaneers +

+
+
+
+
+ + +
+ + + + + + + + + + + + NBC + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 17 + + · Sun 12/29 + · 3:25 PM CST +

+
+
+
+

+ AT +

+ + + + +
+

+ +

+

+ Philadelphia Eagles +

+
+
+
+
+ + +
+ + + + + + + + + + + + FOX + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + Lincoln Financial Field + +
+
+ + + + + +
+ + + + + +
+
+ + + +
+
+

+ + + WEEK 18 + + · TBD +

+
+
+
+ + + + +
+

+ +

+

+ Washington Commanders +

+
+
+
+
+ + +
+ + + + + + + + + + + + TBD + +
+
+ + + + + + + + + + + + 105.3 The FAN + +
+ + + + + + + + + + + + Add to Calendar + + +
+ + +
+ + + + + + + + + + + + AT&T Stadium + +
+
+ + + + + +
+ + + + + +
+
+
+
+
+ + + +
+ + + + + + + + +
+
+
+
+ + +
+

Please be aware that there are certain games that are subject to flexible scheduling and the date and time of those games may be changed from what is currently reflected on the schedule and what may appear on the ticket. For more detailed information about NFL flexible scheduling procedures for the 2024 NFL Season, please visit https://www.nfl.com/schedules/flexible-scheduling-procedures.

+ +
+
+
+
+ +
+ +
+
+
+ + + + +
+
+ + +
+
+ +

+ + Network Information + +

+ + +
+
+ + + +
+
+ + + + + + + +
+ Advertising +
+
+
+
+
+ + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/fixtures/web_page_source_events_parsed.html b/tests/fixtures/web_page_source_events_parsed.html new file mode 100644 index 00000000..4385e5fe --- /dev/null +++ b/tests/fixtures/web_page_source_events_parsed.html @@ -0,0 +1 @@ +Dallas Cowboys | Official Site of the Dallas Cowboys OneTrust Cookies Consent Notice start OneTrust Cookies Consent Notice end IAB CCPA script start IAB CCPA script end Skip to main content Interstitial OOP SLOT Background-Skin SLOT [if IE 9]>