Skip to content

Commit

Permalink
docs: choosing an embedding model (#396)
Browse files Browse the repository at this point in the history
  • Loading branch information
giladgd authored Dec 8, 2024
1 parent 28c7984 commit 6a54163
Show file tree
Hide file tree
Showing 43 changed files with 222 additions and 20 deletions.
73 changes: 55 additions & 18 deletions .vitepress/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {Resvg, initWasm as initResvgWasm, type ResvgRenderOptions} from "@resvg/
import {BlogPageInfoPlugin} from "./config/BlogPageInfoPlugin.js";
import {getApiReferenceSidebar} from "./config/apiReferenceSidebar.js";
import {ensureLocalImage} from "./utils/ensureLocalImage.js";
import {getExcerptFromMarkdownFile} from "./utils/getExcerptFromMarkdownFile.js";
import type {Element as HastElement, Parent} from "hast";

import type {Node as UnistNode} from "unist";
Expand All @@ -28,6 +29,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
const packageJson: typeof import("../package.json") = fs.readJsonSync(path.join(__dirname, "..", "package.json"));
const env = envVar.from(process.env);

const docsDir = path.join(__dirname, "..", "docs");
const urlBase = env.get("DOCS_URL_BASE")
.asString();
const packageVersion = env.get("DOCS_PACKAGE_VERSION")
Expand Down Expand Up @@ -66,9 +68,9 @@ function resolveHref(href: string, withDomain: boolean = false): string {
}

const defaultImageMetaTags: HeadConfig[] = [
["meta", {name: "og:image", content: socialPosterLink}],
["meta", {name: "og:image:width", content: "4096"}],
["meta", {name: "og:image:height", content: "2048"}],
["meta", {property: "og:image", content: socialPosterLink}],
["meta", {property: "og:image:width", content: "4096"}],
["meta", {property: "og:image:height", content: "2048"}],
["meta", {name: "twitter:image", content: socialPosterLink}],
["meta", {name: "twitter:card", content: "summary_large_image"}]
];
Expand Down Expand Up @@ -185,9 +187,9 @@ export default defineConfig({
["link", {rel: "alternate", title: "Blog", type: "application/atom+xml", href: resolveHref("/blog/feed.atom", true)}],
["meta", {name: "theme-color", content: "#cd8156"}],
["meta", {name: "theme-color", content: "#dd773e", media: "(prefers-color-scheme: dark)"}],
["meta", {name: "og:type", content: "website"}],
["meta", {name: "og:locale", content: "en"}],
["meta", {name: "og:site_name", content: "node-llama-cpp"}],
["meta", {property: "og:type", content: "website"}],
["meta", {property: "og:locale", content: "en"}],
["meta", {property: "og:site_name", content: "node-llama-cpp"}],
["script", {async: "", src: "https://www.googletagmanager.com/gtag/js?id=G-Q2SWE5Z1ST"}],
[
"script",
Expand All @@ -198,8 +200,10 @@ export default defineConfig({
["style", {}]
],
async transformHead({pageData, head}) {
let description = pageData.description;
if (pageData.filePath === "index.md") {
head.push(...defaultImageMetaTags);
description ||= defaultPageDescription;
} else if (pageData.relativePath === "404.md")
head.push(...defaultImageMetaTags);

Expand All @@ -209,7 +213,6 @@ export default defineConfig({
]
.filter(Boolean)
.join(" - ") || defaultPageTitle;
const description = pageData.description || defaultPageDescription;

if (pageData.filePath.startsWith("blog/") && pageData.frontmatter.image != null) {
let imageDir = pageData.filePath;
Expand All @@ -220,7 +223,7 @@ export default defineConfig({
const coverImage = await ensureLocalImage(pageData.frontmatter.image, "cover", {
baseDestLocation: imageDir.split("/")
});
head.push(["meta", {name: "og:image", content: resolveHref(coverImage.urlPath.absolute, true)}]);
head.push(["meta", {property: "og:image", content: resolveHref(coverImage.urlPath.absolute, true)}]);
} else if (typeof pageData.frontmatter.image === "object") {
const coverImage = typeof pageData.frontmatter.image.url === "string"
? await ensureLocalImage(pageData.frontmatter.image.url, "cover", {
Expand All @@ -230,28 +233,53 @@ export default defineConfig({

if (typeof pageData.frontmatter.image.url === "string")
head.push(["meta", {
name: "og:image",
property: "og:image",
content: resolveHref(coverImage?.urlPath.absolute ?? pageData.frontmatter.image.url, true)
}]);

if (pageData.frontmatter.image.width != null)
head.push(["meta", {
name: "og:image:width",
property: "og:image:width",
content: String(coverImage?.width ?? pageData.frontmatter.image.width)
}]);

if (pageData.frontmatter.image.height != null)
head.push(["meta", {
name: "og:image:height",
property: "og:image:height",
content: String(coverImage?.height ?? pageData.frontmatter.image.height)
}]);
}
}

head.push(["meta", {name: "og:title", content: title}]);
head.push(["meta", {name: "og:description", content: description}]);
const markdownFilePath = path.join(docsDir, pageData.filePath);
if ((description == null || description === "") && await fs.pathExists(markdownFilePath) && !pageData.filePath.startsWith("api/")) {
const excerpt = await getExcerptFromMarkdownFile(await fs.readFile(markdownFilePath, "utf8"));
if (excerpt != null && excerpt !== "")
description = excerpt.replaceAll('"', "'").replaceAll("\n", " ");
}

pageData.description = description;

if (description != null && description !== "" &&
(pageData.frontmatter.description == null || pageData.frontmatter.description === "")
) {
pageData.frontmatter.description = description;
for (let i = 0; i < head.length; i++) {
const header = head[i]!;
if (header[0] === "meta" && header[1]?.name === "description") {
head[i] = ["meta", {name: "description", content: description}];
break;
}
}
}

head.push(["meta", {property: "og:title", content: title}]);
if (description != null && description !== "")
head.push(["meta", {property: "og:description", content: description}]);

head.push(["meta", {name: "twitter:title", content: title}]);
head.push(["meta", {name: "twitter:description", content: description}]);
if (description != null && description !== "")
head.push(["meta", {name: "twitter:description", content: description}]);
},
transformPageData(pageData) {
if (pageData.filePath.startsWith("api/")) {
Expand Down Expand Up @@ -307,7 +335,7 @@ export default defineConfig({
plugins: [
GitChangelog({
repoURL: () => "https://github.com/withcatai/node-llama-cpp",
cwd: path.join(__dirname, "..", "docs")
cwd: docsDir
}) as VitepressPlugin,
GitChangelogMarkdownSection({
exclude: (id) => (
Expand Down Expand Up @@ -703,19 +731,28 @@ export default defineConfig({
return bDate.getTime() - aDate.getTime();
});

for (const {url, excerpt, frontmatter, html} of blogPosts) {
const ogImageElement = findElementInHtml(html, (element) => element.tagName === "meta" && element.properties?.name === "og:imag");
for (const {url, frontmatter, html, src, excerpt: originalExcerpt} of blogPosts) {
const ogImageElement = findElementInHtml(html, (element) => (
element.tagName === "meta" && (element.properties?.name === "og:image" || element.properties?.property === "og:image")
));
const date = new Date(frontmatter.date);
if (Number.isNaN(date.getTime()))
throw new Error(`Invalid date for blog post: ${url}`);
else if (frontmatter.title == null || frontmatter.title === "")
throw new Error(`Invalid title for blog post: ${url}`);

let description: string | undefined = frontmatter.description;
if ((description == null || description == "") && src != null)
description = await getExcerptFromMarkdownFile(src);

if ((description == null || description === "") && originalExcerpt != null && originalExcerpt !== "")
description = originalExcerpt;

feed.addItem({
title: frontmatter.title,
id: resolveHref(url, true),
link: resolveHref(url, true),
description: excerpt || frontmatter.description || undefined,
description,
content: html,
author: [{
name: frontmatter.author?.name,
Expand Down
83 changes: 83 additions & 0 deletions .vitepress/utils/getExcerptFromMarkdownFile.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import {getMarkdownRenderer} from "./getMarkdownRenderer.js";

export async function getExcerptFromMarkdownFile(
markdownContent: string,
removeTitle: boolean = true,
maxLength: number = 80
) {
const renderer = await getMarkdownRenderer();
let content = markdownContent.trim().replaceAll("\r\n", "\n");

if (content.startsWith("---")) {
const frontMatterEndIndex = content.indexOf("\n---", "---".length);
const nextNewLine = content.indexOf("\n", frontMatterEndIndex + "\n---".length);
if (frontMatterEndIndex >= 0 && nextNewLine >= 0)
content = content.slice(nextNewLine + 1).trim();
}

if (removeTitle && content.startsWith("# ")) {
const nextNewLine = content.indexOf("\n");
if (nextNewLine >= 0)
content = content.slice(nextNewLine + "\n".length).trim();
}

const renderedText = markdownToPlainText(renderer, content).trim();

if (renderedText.length > maxLength) {
if (renderedText[maxLength] === " ")
return renderedText.slice(0, maxLength);

const lastSpaceIndex = renderedText.lastIndexOf(" ", maxLength);
if (lastSpaceIndex >= 0)
return renderedText.slice(0, lastSpaceIndex);

return renderedText.slice(0, maxLength);
}

return renderedText;
}

function markdownToPlainText(
markdownIt: Awaited<ReturnType<typeof getMarkdownRenderer>>,
markdown: string,
includeNotes: boolean = false,
includeCode: boolean = false
) {
const env = {};
const pageTokens = markdownIt.parse(markdown, env);

function toText(tokens: typeof pageTokens) {
let text = "";
let addedParagraphSpace = false;

for (const token of tokens) {
if (!includeNotes && token.type === "inline" && token.level === 2)
continue;

if (token.children != null) {
const childrenText = toText(token.children);
if (addedParagraphSpace && childrenText.startsWith(" "))
text += childrenText.slice(" ".length);
else
text += childrenText;
} else if (
["text", "code_block", "code_inline", "emoji"].includes(token.type) ||
(includeCode && ["fence"].includes(token.type))
) {
if (addedParagraphSpace && token.content.startsWith(" "))
text += token.content.slice(" ".length);
else
text += token.content;

addedParagraphSpace = false;
} else if (token.type.endsWith("_close")) {
text += " ";
addedParagraphSpace = true;
}
}

return text;
}

return toText(pageTokens);
}
1 change: 1 addition & 0 deletions docs/cli/chat.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'chat' command reference"
---
# `chat` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/complete.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'complete' command reference"
---
# `complete` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/index.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: CLI commands reference
---
# CLI

Expand Down
1 change: 1 addition & 0 deletions docs/cli/infill.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'infill' command reference"
---
# `infill` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/init.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'init' command reference"
---
# `init` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/inspect.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'inspect' command reference"
---
# `inspect` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/inspect/estimate.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'inspect estimate' command reference"
---
# `inspect estimate` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/inspect/gguf.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'inspect gguf' command reference"
---
# `inspect gguf` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/inspect/gpu.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'inspect gpu' command reference"
---
# `inspect gpu` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/inspect/measure.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'inspect measure' command reference"
---
# `inspect measure` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/pull.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'pull' command reference"
---
# `pull` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/source.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'source' command reference"
---
# `source` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/source/build.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'source build' command reference"
---
# `source build` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/source/clear.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'source clear' command reference"
---
# `source clear` command

Expand Down
1 change: 1 addition & 0 deletions docs/cli/source/download.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: deep
description: "'source download' command reference"
---
# `source download` command

Expand Down
1 change: 1 addition & 0 deletions docs/guide/CUDA.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: [2, 3]
description: CUDA support in node-llama-cpp
---
# CUDA Support
> CUDA is a parallel computing platform and API created by NVIDIA for NVIDIA GPUs
Expand Down
3 changes: 3 additions & 0 deletions docs/guide/Metal.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
---
description: Metal support in node-llama-cpp
---
# Metal Support
> Metal is a low-level 3D graphics and compute API created by Apple for Apple platforms
Expand Down
1 change: 1 addition & 0 deletions docs/guide/Vulkan.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
outline: [2, 3]
description: Vulkan support in node-llama-cpp
---
# Using Vulkan
> Vulkan is a low-overhead, cross-platform 3D graphics and computing API
Expand Down
3 changes: 3 additions & 0 deletions docs/guide/awesome.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
---
description: Awesome projects that use node-llama-cpp
---
# Awesome `node-llama-cpp`
Awesome projects that use `node-llama-cpp`.

Expand Down
3 changes: 3 additions & 0 deletions docs/guide/batching.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
---
description: Using batching in node-llama-cpp
---
# Using Batching
> Batching is the process of grouping multiple input sequences together to be processed simultaneously,
> which improves computational efficiently and reduces overall inference times.
Expand Down
Loading

0 comments on commit 6a54163

Please sign in to comment.