withcatai
diff --git a/‎docs/cli/pull.md
Lines changed: 1 addition & 0 deletions b/‎docs/cli/pull.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/guide/downloading-models.md
Lines changed: 25 additions & 5 deletions b/‎docs/guide/downloading-models.md
Lines changed: 25 additions & 5 deletions
diff --git a/‎src/cli/commands/PullCommand.ts
Lines changed: 2 additions & 1 deletion b/‎src/cli/commands/PullCommand.ts
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/cli/commands/inspect/commands/InspectEstimateCommand.ts
Lines changed: 20 additions & 6 deletions b/‎src/cli/commands/inspect/commands/InspectEstimateCommand.ts
Lines changed: 20 additions & 6 deletions
diff --git a/‎src/cli/commands/inspect/commands/InspectGgufCommand.ts
Lines changed: 18 additions & 6 deletions b/‎src/cli/commands/inspect/commands/InspectGgufCommand.ts
Lines changed: 18 additions & 6 deletions
@@ -20,6 +20,7 @@ If a file already exists and its size matches the expected size, it will not be
 
 The supported URI schemes are:
 - **HTTP:** `https://`, `http://`
+- **Hugging Face:** `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](../guide/downloading-models.md#hf-scheme-specify-quant))
 - **Hugging Face:** `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
 
 Learn more about using model URIs in the [Downloading Models guide](../guide/downloading-models.md#model-uris).
 
@@ -75,14 +75,19 @@ You can reference models using a URI instead of their full download URL when usi
 
 When downloading a model from a URI, the model files will be prefixed with a corresponding adaptation of the URI.
 
-To reference a model from Hugging Face, you can use the scheme
-<br/>
-`hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional).
+To reference a model from Hugging Face, you can use one of these schemes:
+* `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](#hf-scheme-specify-quant))
+* `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
 
-Here's an example usage of the Hugging Face URI scheme:
+Here are example usages of the Hugging Face URI scheme:
+::: code-group
+```[With quant]
+hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M
 ```
+```[Specific file]
 hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
 ```
+:::
 
 When using a URI to reference a model,
 it's recommended [to add it to your `package.json` file](#cli) to ensure it's downloaded when running `npm install`,
@@ -98,7 +103,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const modelsDirectory = path.join(__dirname, "models");
 
 const modelPath = await resolveModelFile(
-    "hf:user/model/model-file.gguf",
+    "hf:user/model:quant",
     modelsDirectory
 );
 
@@ -114,6 +119,21 @@ When a file is being downloaded, the download progress is shown in the console b
 Set the [`cli`](../api/type-aliases/ResolveModelFileOptions#cli) option to `false` to disable this behavior.
 :::
 
+::: tip TIP {#hf-scheme-specify-quant}
+When using the `hf:<user>/<model>:<quant>` scheme, always specify the quantization level in the URI (`:<quant>`).
+<br/>
+Doing this allows the resolver to resolve to a local model file without checking the model metadata on Hugging Face first,
+so it will be resolved offline and faster.
+:::
+
+::: tip Shortcuts for quick experimentation {#uri-shortcuts}
+You can copy the page URLs of models and files on Hugging Face
+and use them with any of the [CLI commands](../cli/index.md).
+
+**Important:** do not use these page URL shortcuts in production code, and do not commit them to your codebase.
+The resolving of such page URL shortcuts are inefficient and unreliable for production use.
+:::
+
 ## Downloading Gated Models From Hugging Face {#hf-token}
 Some models on Hugging Face are "gated", meaning they require a manual consent from you before you can download them.
 
 
@@ -117,7 +117,8 @@ export const PullCommand: CommandModule<object, PullCommand> = {
                 deleteTempFileOnCancel: noTempFile,
                 skipExisting: !override,
                 fileName: filename || undefined,
-                parallelDownloads: parallel
+                parallelDownloads: parallel,
+                _showUriResolvingProgress: !noProgress
             });
 
             if (!override && downloader.totalFiles === 1 && await fs.pathExists(downloader.entrypointFilePath)) {
 
@@ -1,5 +1,7 @@
+import process from "process";
 import {CommandModule} from "yargs";
 import chalk from "chalk";
+import fs from "fs-extra";
 import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js";
 import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js";
 import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
@@ -17,9 +19,11 @@ import {Llama} from "../../../../bindings/Llama.js";
 import {getGgufFileTypeName} from "../../../../gguf/utils/getGgufFileTypeName.js";
 import {getPrettyBuildGpuName} from "../../../../bindings/consts.js";
 import withOra from "../../../../utils/withOra.js";
-import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js";
+import {resolveModelArgToFilePathOrUrl} from "../../../../utils/resolveModelDestination.js";
 import {printModelDestination} from "../../../utils/printModelDestination.js";
 import {toBytes} from "../../../utils/toBytes.js";
+import {printDidYouMeanUri} from "../../../utils/resolveCommandGgufPath.js";
+import {isModelUri} from "../../../../utils/parseModelUri.js";
 
 type InspectEstimateCommand = {
     modelPath: string,
@@ -121,13 +125,23 @@ export const InspectEstimateCommand: CommandModule<object, InspectEstimateComman
         if (contextSizeArg === -1) contextSizeArg = undefined;
         if (contextSizeArg === -2) contextSizeArg = "train";
 
-        const resolvedModelDestination = resolveModelDestination(ggufPath);
-        const resolvedGgufPath = resolvedModelDestination.type == "file"
-            ? resolvedModelDestination.path
-            : resolvedModelDestination.url;
-
         const headers = resolveHeaderFlag(headerArg);
 
+        const [resolvedModelDestination, resolvedGgufPath] = isModelUri(ggufPath)
+            ? await withOra({
+                loading: chalk.blue("Resolving model URI"),
+                success: chalk.blue("Resolved model URI"),
+                fail: chalk.blue("Failed to resolve model URI"),
+                noSuccessLiveStatus: true
+            }, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
+            : await resolveModelArgToFilePathOrUrl(ggufPath, headers);
+
+        if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
+            console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
+            printDidYouMeanUri(ggufPath);
+            process.exit(1);
+        }
+
         const llama = gpu == null
             ? await getLlama("lastBuild", {
                 logLevel: LlamaLogLevel.error
 
@@ -10,11 +10,13 @@ import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js";
 import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
 import {documentationPageUrls} from "../../../../config.js";
 import withOra from "../../../../utils/withOra.js";
-import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js";
+import {resolveModelArgToFilePathOrUrl} from "../../../../utils/resolveModelDestination.js";
 import {printModelDestination} from "../../../utils/printModelDestination.js";
 import {getGgufMetadataKeyValue} from "../../../../gguf/utils/getGgufMetadataKeyValue.js";
 import {GgufTensorInfo} from "../../../../gguf/types/GgufTensorInfoTypes.js";
 import {toBytes} from "../../../utils/toBytes.js";
+import {printDidYouMeanUri} from "../../../utils/resolveCommandGgufPath.js";
+import {isModelUri} from "../../../../utils/parseModelUri.js";
 
 type InspectGgufCommand = {
     modelPath: string,
@@ -91,13 +93,23 @@ export const InspectGgufCommand: CommandModule<object, InspectGgufCommand> = {
     async handler({
         modelPath: ggufPath, header: headerArg, key, noSplice, fullTensorInfo, fullMetadataArrays, plainJson, outputToJsonFile
     }: InspectGgufCommand) {
-        const resolvedModelDestination = resolveModelDestination(ggufPath);
-        const resolvedGgufPath = resolvedModelDestination.type == "file"
-            ? resolvedModelDestination.path
-            : resolvedModelDestination.url;
-
         const headers = resolveHeaderFlag(headerArg);
 
+        const [resolvedModelDestination, resolvedGgufPath] = (!plainJson && isModelUri(ggufPath))
+            ? await withOra({
+                loading: chalk.blue("Resolving model URI"),
+                success: chalk.blue("Resolved model URI"),
+                fail: chalk.blue("Failed to resolve model URI"),
+                noSuccessLiveStatus: true
+            }, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
+            : await resolveModelArgToFilePathOrUrl(ggufPath, headers);
+
+        if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
+            console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
+            printDidYouMeanUri(ggufPath);
+            process.exit(1);
+        }
+
         if (!plainJson)
             printModelDestination(resolvedModelDestination);