-
-
Notifications
You must be signed in to change notification settings - Fork 112
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs: choosing an embedding model (#396)
- Loading branch information
Showing
43 changed files
with
222 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import {getMarkdownRenderer} from "./getMarkdownRenderer.js"; | ||
|
||
export async function getExcerptFromMarkdownFile( | ||
markdownContent: string, | ||
removeTitle: boolean = true, | ||
maxLength: number = 80 | ||
) { | ||
const renderer = await getMarkdownRenderer(); | ||
let content = markdownContent.trim().replaceAll("\r\n", "\n"); | ||
|
||
if (content.startsWith("---")) { | ||
const frontMatterEndIndex = content.indexOf("\n---", "---".length); | ||
const nextNewLine = content.indexOf("\n", frontMatterEndIndex + "\n---".length); | ||
if (frontMatterEndIndex >= 0 && nextNewLine >= 0) | ||
content = content.slice(nextNewLine + 1).trim(); | ||
} | ||
|
||
if (removeTitle && content.startsWith("# ")) { | ||
const nextNewLine = content.indexOf("\n"); | ||
if (nextNewLine >= 0) | ||
content = content.slice(nextNewLine + "\n".length).trim(); | ||
} | ||
|
||
const renderedText = markdownToPlainText(renderer, content).trim(); | ||
|
||
if (renderedText.length > maxLength) { | ||
if (renderedText[maxLength] === " ") | ||
return renderedText.slice(0, maxLength); | ||
|
||
const lastSpaceIndex = renderedText.lastIndexOf(" ", maxLength); | ||
if (lastSpaceIndex >= 0) | ||
return renderedText.slice(0, lastSpaceIndex); | ||
|
||
return renderedText.slice(0, maxLength); | ||
} | ||
|
||
return renderedText; | ||
} | ||
|
||
function markdownToPlainText( | ||
markdownIt: Awaited<ReturnType<typeof getMarkdownRenderer>>, | ||
markdown: string, | ||
includeNotes: boolean = false, | ||
includeCode: boolean = false | ||
) { | ||
const env = {}; | ||
const pageTokens = markdownIt.parse(markdown, env); | ||
|
||
function toText(tokens: typeof pageTokens) { | ||
let text = ""; | ||
let addedParagraphSpace = false; | ||
|
||
for (const token of tokens) { | ||
if (!includeNotes && token.type === "inline" && token.level === 2) | ||
continue; | ||
|
||
if (token.children != null) { | ||
const childrenText = toText(token.children); | ||
if (addedParagraphSpace && childrenText.startsWith(" ")) | ||
text += childrenText.slice(" ".length); | ||
else | ||
text += childrenText; | ||
} else if ( | ||
["text", "code_block", "code_inline", "emoji"].includes(token.type) || | ||
(includeCode && ["fence"].includes(token.type)) | ||
) { | ||
if (addedParagraphSpace && token.content.startsWith(" ")) | ||
text += token.content.slice(" ".length); | ||
else | ||
text += token.content; | ||
|
||
addedParagraphSpace = false; | ||
} else if (token.type.endsWith("_close")) { | ||
text += " "; | ||
addedParagraphSpace = true; | ||
} | ||
} | ||
|
||
return text; | ||
} | ||
|
||
return toText(pageTokens); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'chat' command reference" | ||
--- | ||
# `chat` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'complete' command reference" | ||
--- | ||
# `complete` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: CLI commands reference | ||
--- | ||
# CLI | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'infill' command reference" | ||
--- | ||
# `infill` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'init' command reference" | ||
--- | ||
# `init` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'inspect' command reference" | ||
--- | ||
# `inspect` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'inspect estimate' command reference" | ||
--- | ||
# `inspect estimate` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'inspect gguf' command reference" | ||
--- | ||
# `inspect gguf` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'inspect gpu' command reference" | ||
--- | ||
# `inspect gpu` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'inspect measure' command reference" | ||
--- | ||
# `inspect measure` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'pull' command reference" | ||
--- | ||
# `pull` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'source' command reference" | ||
--- | ||
# `source` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'source build' command reference" | ||
--- | ||
# `source build` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'source clear' command reference" | ||
--- | ||
# `source clear` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
--- | ||
outline: deep | ||
description: "'source download' command reference" | ||
--- | ||
# `source download` command | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.