Skip to content

Commit

Permalink
Ok FireCrawl added
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed Aug 6, 2024
1 parent 4b0f100 commit a06f2dc
Show file tree
Hide file tree
Showing 12 changed files with 1,067 additions and 47 deletions.
9 changes: 3 additions & 6 deletions app/Domains/Prompts/EventPagePrompt.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ public static function prompt(string $context): string
<INSTRUCTIONS>
1. Analyze the provided website HTML content below the <CONTENT> tag.
2. Look for information about events within the content.
3. If no event data is found, respond with a single word: false
2. Look for information about sporting events within the content.
3. If no event data is found summarize what is on the page
4. If event data is found, extract the following information for each event:
- Event Title
- Start Date
Expand All @@ -37,15 +37,12 @@ public static function prompt(string $context): string
"additionalInfo": "Any other relevant data"
If no events are found, return the word false
If no events are found, return the words "No Content Found" and summarize what was on the page
<CONTENT>
$context
</CONTENT>
Respond only with Markdown or 'false' if no events are found. Do not include any explanations or additional text in your response.
PROMPT;
}
}
10 changes: 4 additions & 6 deletions app/Domains/Sources/WebSearch/GetPage.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

namespace App\Domains\Sources\WebSearch;

use Facades\App\Domains\WebParser\DefaultClient;
use Facades\App\Domains\WebParser\FireCrawlClient;
use App\Domains\WebParser\WebContentResultsDto;
use App\Models\Collection;
use App\Models\Setting;
use Facades\App\Domains\WebParser\DefaultClient;
use Facades\App\Domains\WebParser\FireCrawlClient;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use League\HTMLToMarkdown\Converter\CodeConverter;
Expand All @@ -15,7 +15,6 @@
use League\HTMLToMarkdown\Converter\TextConverter;
use League\HTMLToMarkdown\Environment;
use League\HTMLToMarkdown\HtmlConverter;
use Spatie\Browsershot\Browsershot;

class GetPage
{
Expand All @@ -36,15 +35,14 @@ public function handle(string $url, bool $parseHtml = true): WebContentResultsDt
* @TODO
* Make this a driver like the rest of the system
*/
if(Setting::getSecret('fire_crawl', 'api_token')) {
if (Setting::getSecret('fire_crawl', 'api_key')) {
Log::info('Using FireCrawl');
$results = FireCrawlClient::scrape($url);
} else {
Log::info('Using Default Browsershot');
/** @var WebContentResultsDto $results */
$results = DefaultClient::scrape($url);
/** @phpstan-ignore-next-line */
Storage::disk('collections')->put($this->collection->id.'/'.$name, $results->pdf());
Storage::disk('collections')->put($this->collection->id.'/'.$name, $results->browserShot->pdf());
}

return $results;
Expand Down
6 changes: 2 additions & 4 deletions app/Domains/WebParser/DefaultClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

namespace App\Domains\WebParser;

use App\Domains\Sources\WebSearch\GetPage;
use League\HTMLToMarkdown\Converter\CodeConverter;
use League\HTMLToMarkdown\Converter\PreformattedConverter;
use League\HTMLToMarkdown\Converter\TableConverter;
Expand All @@ -14,14 +13,13 @@

class DefaultClient extends BaseClient
{

public function scrape(string $url): WebContentResultsDto {
public function scrape(string $url): WebContentResultsDto
{
$results = Browsershot::url($url)
->userAgent('DailyAI Studio Browser 1.0, helping users automate workflows')
->dismissDialogs()
->fullPage();


$plainResults = $this->parseHtml($results->bodyHtml());

return WebContentResultsDto::from([
Expand Down
2 changes: 1 addition & 1 deletion app/Domains/WebParser/FireCrawlClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public function scrape(string $url): WebContentResultsDto
protected function getClient(): PendingRequest
{
$url = Setting::getSecret('fire_crawl', 'api_url');
$token = Setting::getSecret('fire_crawl', 'api_token');
$token = Setting::getSecret('fire_crawl', 'api_key');

return Http::baseUrl($url)->withHeaders([
'Authorization' => 'Bearer '.$token,
Expand Down
4 changes: 2 additions & 2 deletions app/Domains/WebParser/Results/FireCrawResultsDto.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ class FireCrawResultsDto extends WebContentResultsDto
public function __construct(
#[MapInputName('data.metadata.title')]
public string $title,
#[MapInputName('data.metadata.description')]
public string $description,
#[MapInputName('data.markdown')]
public string $content,
#[MapInputName('data.content')]
public string $content_raw,
#[MapInputName('data.metadata.sourceURL')]
public string $url,
#[MapInputName('data.metadata.description')]
public string $description = '',
) {

}
Expand Down
2 changes: 1 addition & 1 deletion app/Domains/WebParser/WebContentResultsDto.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ class WebContentResultsDto extends Data
{
public function __construct(
public string $title,
public string $description,
public string $content,
public string $url,
public string $description = '',
public ?Browsershot $browserShot = null,
) {

Expand Down
2 changes: 2 additions & 0 deletions app/Jobs/GetWebContentJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ public function handle(): void
$prompt = Templatizer::appendContext(true)
->handle($this->source->getPrompt(), $htmlResults->content);

put_fixture('web_page_prompt.txt', $prompt, false);

$results = LlmDriverFacade::driver(
$this->source->getDriver()
)->completion($prompt);
Expand Down
4 changes: 2 additions & 2 deletions resources/js/Components/Templates.vue
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ const checkTemplate = (label) => {
</div>

<template v-for="(prompt, label) in prompts" :key="label">
<div class="collapse bg-base-200">
<div class="collapse collapse-arrow bg-base-200">
<input type="radio" name="templates" :checked="checkedTemplate === label"/>
<button type="button"
@click="checkTemplate(label)"
class="collapse-title text-xl font-medium">Type: <span class="uppercase">{{label}}</span>
class="collapse-title text-md font-medium">Type: <span class="uppercase">{{label}}</span>
</button>
<div class="collapse-content">
<div class="overflow-y-scroll prose">
Expand Down
2 changes: 1 addition & 1 deletion tests/Feature/FireCrawlClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public function test_parse(): void
'secrets' => [
'fire_crawl' => [
'api_url' => 'https://api.firecrawl.dev',
'api_token' => 'foo',
'api_key' => 'foo',
],
],
]);
Expand Down
5 changes: 0 additions & 5 deletions tests/Feature/GetPageTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,10 @@

use App\Models\Collection;
use Facades\App\Domains\Sources\WebSearch\GetPage;
use Illuminate\Support\Facades\Storage;
use League\HTMLToMarkdown\HtmlConverter;
use Tests\TestCase;

class GetPageTest extends TestCase
{


public function test_iterator()
{
$html = get_fixture('test_blog.html', false);
Expand All @@ -22,5 +18,4 @@ public function test_iterator()

$this->assertNotEmpty($results);
}

}
22 changes: 3 additions & 19 deletions tests/fixtures/claude_payload_chat.json

Large diffs are not rendered by default.

Loading

0 comments on commit a06f2dc

Please sign in to comment.