-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
397 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<?php | ||
|
||
namespace App\Domains\WebParser; | ||
|
||
abstract class BaseWebParserClient | ||
{ | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
<?php | ||
|
||
namespace App\Domains\WebParser; | ||
|
||
use App\Domains\Sources\WebSearch\GetPage; | ||
use League\HTMLToMarkdown\Converter\CodeConverter; | ||
use League\HTMLToMarkdown\Converter\PreformattedConverter; | ||
use League\HTMLToMarkdown\Converter\TableConverter; | ||
use League\HTMLToMarkdown\Converter\TextConverter; | ||
use League\HTMLToMarkdown\Environment; | ||
use League\HTMLToMarkdown\HtmlConverter; | ||
use LlmLaraHub\LlmDriver\BaseClient; | ||
use Spatie\Browsershot\Browsershot; | ||
|
||
class DefaultClient extends BaseClient | ||
{ | ||
|
||
public function scrape(string $url): WebContentResultsDto { | ||
$results = Browsershot::url($url) | ||
->userAgent('DailyAI Studio Browser 1.0, helping users automate workflows') | ||
->dismissDialogs() | ||
->fullPage(); | ||
|
||
|
||
$plainResults = $this->parseHtml($results->bodyHtml()); | ||
|
||
return WebContentResultsDto::from([ | ||
'title' => str($plainResults)->limit(128)->title()->toString(), | ||
'description' => str($plainResults)->limit(256)->title()->toString(), | ||
'content' => $plainResults, | ||
'content_raw' => $results->bodyHtml(), | ||
'url' => $url, | ||
'browserShot' => $results, | ||
]); | ||
} | ||
|
||
public function parseHtml(string $html): string | ||
{ | ||
$environment = new Environment([ | ||
'strip_tags' => true, | ||
'suppress_errors' => true, | ||
'hard_break' => true, | ||
'strip_placeholder_links' => true, | ||
'remove_nodes' => 'nav footer header script style meta', | ||
]); | ||
$environment->addConverter(new TableConverter()); | ||
$environment->addConverter(new CodeConverter()); | ||
$environment->addConverter(new PreformattedConverter()); | ||
$environment->addConverter(new TextConverter()); | ||
|
||
$converter = new HtmlConverter($environment); | ||
|
||
$markdown = $converter->convert($html); | ||
|
||
return str($markdown)->trim()->toString(); | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
<?php | ||
|
||
namespace App\Domains\WebParser; | ||
|
||
use App\Domains\WebParser\Results\FireCrawResultsDto; | ||
use App\Models\Setting; | ||
use Illuminate\Http\Client\PendingRequest; | ||
use Illuminate\Support\Facades\Http; | ||
|
||
class FireCrawlClient extends BaseWebParserClient | ||
{ | ||
public function scrape(string $url): WebContentResultsDto | ||
{ | ||
$results = $this->getClient()->post('/scrape', [ | ||
'url' => $url, | ||
]); | ||
|
||
if ($results->failed()) { | ||
throw new \Exception('FireCrawl API Error '.$results->json()); | ||
} | ||
|
||
$data = $results->json(); | ||
|
||
return FireCrawResultsDto::from($data); | ||
} | ||
|
||
protected function getClient(): PendingRequest | ||
{ | ||
$url = Setting::getSecret('fire_crawl', 'api_url'); | ||
$token = Setting::getSecret('fire_crawl', 'api_token'); | ||
|
||
return Http::baseUrl($url)->withHeaders([ | ||
'Authorization' => 'Bearer '.$token, | ||
]); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<?php | ||
|
||
namespace App\Domains\WebParser\Results; | ||
|
||
use App\Domains\WebParser\WebContentResultsDto; | ||
use Spatie\LaravelData\Attributes\MapInputName; | ||
|
||
class FireCrawResultsDto extends WebContentResultsDto | ||
{ | ||
public function __construct( | ||
#[MapInputName('data.metadata.title')] | ||
public string $title, | ||
#[MapInputName('data.metadata.description')] | ||
public string $description, | ||
#[MapInputName('data.markdown')] | ||
public string $content, | ||
#[MapInputName('data.content')] | ||
public string $content_raw, | ||
#[MapInputName('data.metadata.sourceURL')] | ||
public string $url, | ||
) { | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
<?php | ||
|
||
namespace App\Domains\WebParser; | ||
|
||
use Spatie\Browsershot\Browsershot; | ||
use Spatie\LaravelData\Data; | ||
|
||
class WebContentResultsDto extends Data | ||
{ | ||
public function __construct( | ||
public string $title, | ||
public string $description, | ||
public string $content, | ||
public string $url, | ||
public ?Browsershot $browserShot = null, | ||
) { | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
database/migrations/2024_08_06_002122_add_fields_to_settings.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
<?php | ||
|
||
use Illuminate\Database\Migrations\Migration; | ||
use Illuminate\Database\Schema\Blueprint; | ||
use Illuminate\Support\Facades\Schema; | ||
|
||
return new class extends Migration | ||
{ | ||
/** | ||
* Run the migrations. | ||
*/ | ||
public function up(): void | ||
{ | ||
Schema::table('settings', function (Blueprint $table) { | ||
$table->longText('main_prompt')->nullable(); | ||
$table->longText('source_prompt')->nullable(); | ||
$table->longText('output_prompt')->nullable(); | ||
}); | ||
} | ||
|
||
/** | ||
* Reverse the migrations. | ||
*/ | ||
public function down(): void | ||
{ | ||
Schema::table('settings', function (Blueprint $table) { | ||
// | ||
}); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
<script setup> | ||
import { ref } from 'vue'; | ||
import { Link, router, useForm } from '@inertiajs/vue3'; | ||
import ActionMessage from '@/Components/ActionMessage.vue'; | ||
import FormSection from '@/Components/FormSection.vue'; | ||
import InputError from '@/Components/InputError.vue'; | ||
import InputLabel from '@/Components/InputLabel.vue'; | ||
import PrimaryButton from '@/Components/PrimaryButton.vue'; | ||
import SecondaryButton from '@/Components/SecondaryButton.vue'; | ||
import TextInput from '@/Components/TextInput.vue'; | ||
import SecretInput from "@/Components/SecretInput.vue"; | ||
const props = defineProps({ | ||
setting: Object, | ||
}); | ||
const form = useForm({ | ||
_method: 'PUT', | ||
api_key: props.setting.secrets?.fire_crawl?.api_key , | ||
api_url: props.setting.secrets?.fire_crawl?.api_url ?? "https://api.firecrawl.dev/v0", | ||
}); | ||
const updateSecrets = () => { | ||
form.put(route('settings.update.fire_crawl', { | ||
setting: props.setting.id, | ||
}), { | ||
errorBag: 'updateFireCrawlInformation', | ||
preserveScroll: true, | ||
}); | ||
}; | ||
</script> | ||
<template> | ||
<FormSection @submitted="updateSecrets"> | ||
<template #title> | ||
Add FireCrawl Token and Url | ||
</template> | ||
<template #description> | ||
This service can boost the web scraping quality over the | ||
default scraper built in. | ||
<a | ||
class="underline" | ||
href="https://docs.firecrawl.dev/features/scrape" target="_blank">here</a> | ||
</template> | ||
<template #form> | ||
<!-- Name --> | ||
<div class="col-span-6 sm:col-span-4"> | ||
<InputLabel for="name" value="Api Token" /> | ||
<SecretInput v-model="form.api_key" class="mt-1 block w-full" /> | ||
<InputError :message="form.errors.api_key" class="mt-2" /> | ||
</div> | ||
<div class="col-span-6 sm:col-span-4"> | ||
<InputLabel for="name" value="Api Url" /> | ||
<TextInput | ||
id="name" | ||
v-model="form.api_url" | ||
type="text" | ||
class="mt-1 block w-full" | ||
/> | ||
<InputError :message="form.errors.api_url" class="mt-2" /> | ||
</div> | ||
</template> | ||
<template #actions> | ||
<ActionMessage :on="form.recentlySuccessful" class="me-3"> | ||
Saved. | ||
</ActionMessage> | ||
<PrimaryButton :class="{ 'opacity-25': form.processing }" :disabled="form.processing"> | ||
Save | ||
</PrimaryButton> | ||
</template> | ||
</FormSection> | ||
</template> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.