Skip to content

Commit 73454d9

Browse files
authored
feat: shorter model URIs (#421)
* feat: shorter model URIs * feat: Hugging Face URL shortcuts * feat: `did you mean "<URI>"?` help log * fix: show a loader when resolving a URI from the CLI
1 parent 6e4bf3d commit 73454d9

21 files changed

+1192
-209
lines changed

docs/cli/pull.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ If a file already exists and its size matches the expected size, it will not be
2020

2121
The supported URI schemes are:
2222
- **HTTP:** `https://`, `http://`
23+
- **Hugging Face:** `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](../guide/downloading-models.md#hf-scheme-specify-quant))
2324
- **Hugging Face:** `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
2425

2526
Learn more about using model URIs in the [Downloading Models guide](../guide/downloading-models.md#model-uris).

docs/guide/downloading-models.md

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,19 @@ You can reference models using a URI instead of their full download URL when usi
7575

7676
When downloading a model from a URI, the model files will be prefixed with a corresponding adaptation of the URI.
7777

78-
To reference a model from Hugging Face, you can use the scheme
79-
<br/>
80-
`hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional).
78+
To reference a model from Hugging Face, you can use one of these schemes:
79+
* `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](#hf-scheme-specify-quant))
80+
* `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
8181

82-
Here's an example usage of the Hugging Face URI scheme:
82+
Here are example usages of the Hugging Face URI scheme:
83+
::: code-group
84+
```[With quant]
85+
hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M
8386
```
87+
```[Specific file]
8488
hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
8589
```
90+
:::
8691

8792
When using a URI to reference a model,
8893
it's recommended [to add it to your `package.json` file](#cli) to ensure it's downloaded when running `npm install`,
@@ -98,7 +103,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
98103
const modelsDirectory = path.join(__dirname, "models");
99104

100105
const modelPath = await resolveModelFile(
101-
"hf:user/model/model-file.gguf",
106+
"hf:user/model:quant",
102107
modelsDirectory
103108
);
104109

@@ -114,6 +119,21 @@ When a file is being downloaded, the download progress is shown in the console b
114119
Set the [`cli`](../api/type-aliases/ResolveModelFileOptions#cli) option to `false` to disable this behavior.
115120
:::
116121

122+
::: tip TIP {#hf-scheme-specify-quant}
123+
When using the `hf:<user>/<model>:<quant>` scheme, always specify the quantization level in the URI (`:<quant>`).
124+
<br/>
125+
Doing this allows the resolver to resolve to a local model file without checking the model metadata on Hugging Face first,
126+
so it will be resolved offline and faster.
127+
:::
128+
129+
::: tip Shortcuts for quick experimentation {#uri-shortcuts}
130+
You can copy the page URLs of models and files on Hugging Face
131+
and use them with any of the [CLI commands](../cli/index.md).
132+
133+
**Important:** do not use these page URL shortcuts in production code, and do not commit them to your codebase.
134+
The resolving of such page URL shortcuts are inefficient and unreliable for production use.
135+
:::
136+
117137
## Downloading Gated Models From Hugging Face {#hf-token}
118138
Some models on Hugging Face are "gated", meaning they require a manual consent from you before you can download them.
119139

src/cli/commands/PullCommand.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ export const PullCommand: CommandModule<object, PullCommand> = {
117117
deleteTempFileOnCancel: noTempFile,
118118
skipExisting: !override,
119119
fileName: filename || undefined,
120-
parallelDownloads: parallel
120+
parallelDownloads: parallel,
121+
_showUriResolvingProgress: !noProgress
121122
});
122123

123124
if (!override && downloader.totalFiles === 1 && await fs.pathExists(downloader.entrypointFilePath)) {

src/cli/commands/inspect/commands/InspectEstimateCommand.ts

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import process from "process";
12
import {CommandModule} from "yargs";
23
import chalk from "chalk";
4+
import fs from "fs-extra";
35
import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js";
46
import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js";
57
import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
@@ -17,9 +19,11 @@ import {Llama} from "../../../../bindings/Llama.js";
1719
import {getGgufFileTypeName} from "../../../../gguf/utils/getGgufFileTypeName.js";
1820
import {getPrettyBuildGpuName} from "../../../../bindings/consts.js";
1921
import withOra from "../../../../utils/withOra.js";
20-
import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js";
22+
import {resolveModelArgToFilePathOrUrl} from "../../../../utils/resolveModelDestination.js";
2123
import {printModelDestination} from "../../../utils/printModelDestination.js";
2224
import {toBytes} from "../../../utils/toBytes.js";
25+
import {printDidYouMeanUri} from "../../../utils/resolveCommandGgufPath.js";
26+
import {isModelUri} from "../../../../utils/parseModelUri.js";
2327

2428
type InspectEstimateCommand = {
2529
modelPath: string,
@@ -121,13 +125,23 @@ export const InspectEstimateCommand: CommandModule<object, InspectEstimateComman
121125
if (contextSizeArg === -1) contextSizeArg = undefined;
122126
if (contextSizeArg === -2) contextSizeArg = "train";
123127

124-
const resolvedModelDestination = resolveModelDestination(ggufPath);
125-
const resolvedGgufPath = resolvedModelDestination.type == "file"
126-
? resolvedModelDestination.path
127-
: resolvedModelDestination.url;
128-
129128
const headers = resolveHeaderFlag(headerArg);
130129

130+
const [resolvedModelDestination, resolvedGgufPath] = isModelUri(ggufPath)
131+
? await withOra({
132+
loading: chalk.blue("Resolving model URI"),
133+
success: chalk.blue("Resolved model URI"),
134+
fail: chalk.blue("Failed to resolve model URI"),
135+
noSuccessLiveStatus: true
136+
}, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
137+
: await resolveModelArgToFilePathOrUrl(ggufPath, headers);
138+
139+
if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
140+
console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
141+
printDidYouMeanUri(ggufPath);
142+
process.exit(1);
143+
}
144+
131145
const llama = gpu == null
132146
? await getLlama("lastBuild", {
133147
logLevel: LlamaLogLevel.error

src/cli/commands/inspect/commands/InspectGgufCommand.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js";
1010
import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
1111
import {documentationPageUrls} from "../../../../config.js";
1212
import withOra from "../../../../utils/withOra.js";
13-
import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js";
13+
import {resolveModelArgToFilePathOrUrl} from "../../../../utils/resolveModelDestination.js";
1414
import {printModelDestination} from "../../../utils/printModelDestination.js";
1515
import {getGgufMetadataKeyValue} from "../../../../gguf/utils/getGgufMetadataKeyValue.js";
1616
import {GgufTensorInfo} from "../../../../gguf/types/GgufTensorInfoTypes.js";
1717
import {toBytes} from "../../../utils/toBytes.js";
18+
import {printDidYouMeanUri} from "../../../utils/resolveCommandGgufPath.js";
19+
import {isModelUri} from "../../../../utils/parseModelUri.js";
1820

1921
type InspectGgufCommand = {
2022
modelPath: string,
@@ -91,13 +93,23 @@ export const InspectGgufCommand: CommandModule<object, InspectGgufCommand> = {
9193
async handler({
9294
modelPath: ggufPath, header: headerArg, key, noSplice, fullTensorInfo, fullMetadataArrays, plainJson, outputToJsonFile
9395
}: InspectGgufCommand) {
94-
const resolvedModelDestination = resolveModelDestination(ggufPath);
95-
const resolvedGgufPath = resolvedModelDestination.type == "file"
96-
? resolvedModelDestination.path
97-
: resolvedModelDestination.url;
98-
9996
const headers = resolveHeaderFlag(headerArg);
10097

98+
const [resolvedModelDestination, resolvedGgufPath] = (!plainJson && isModelUri(ggufPath))
99+
? await withOra({
100+
loading: chalk.blue("Resolving model URI"),
101+
success: chalk.blue("Resolved model URI"),
102+
fail: chalk.blue("Failed to resolve model URI"),
103+
noSuccessLiveStatus: true
104+
}, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
105+
: await resolveModelArgToFilePathOrUrl(ggufPath, headers);
106+
107+
if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
108+
console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
109+
printDidYouMeanUri(ggufPath);
110+
process.exit(1);
111+
}
112+
101113
if (!plainJson)
102114
printModelDestination(resolvedModelDestination);
103115

0 commit comments

Comments
 (0)