diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml
index ca46ebd4c7..38b73f67cc 100644
--- a/.github/workflows/ollama.yml
+++ b/.github/workflows/ollama.yml
@@ -32,5 +32,6 @@ jobs:
run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
- name: run summarize-ollama-phi3
run: yarn test:summarize --model ollama:phi3.5 --out ./temp/summarize-ollama-phi3
-# - name: run vector-search
-# run: yarn run:script vector-search --model ollama:phi3 --out ./temp/rag
+ env:
+ OLLAMA_HOST: "http://localhost:11434"
+
diff --git a/docs/src/content/docs/getting-started/configuration.mdx b/docs/src/content/docs/getting-started/configuration.mdx
index 1da01e221a..6e368bd475 100644
--- a/docs/src/content/docs/getting-started/configuration.mdx
+++ b/docs/src/content/docs/getting-started/configuration.mdx
@@ -720,12 +720,7 @@ script({
-## Local Models
-
-There are many projects that allow you to run models locally on your machine,
-or in a container.
-
-### LocalAI
+## LocalAI
[LocalAI](https://localai.io/) act as a drop-in replacement REST API that’s compatible
with OpenAI API specifications for local inferencing. It uses free Open Source models
@@ -758,7 +753,7 @@ OPENAI_API_TYPE=localai
-### Ollama
+## Ollama
[Ollama](https://ollama.ai/) is a desktop application that let you download and run model locally.
@@ -766,6 +761,12 @@ Running tools locally may require additional GPU resources depending on the mode
Use the `ollama` provider to access Ollama models.
+:::note
+
+GenAIScript is currently using the OpenAI API compatibility layer of Ollama.
+
+:::
+
@@ -795,6 +796,18 @@ GenAIScript will automatically pull the model, which may take some time dependin
+-
+
+If Ollama runs on a server or a different computer or on a different port,
+you have to configure the `OLLAMA_HOST` environment variable to connect to a remote Ollama server.
+
+```txt title=".env"
+OLLAMA_HOST=https://:/ # server url
+OLLAMA_HOST=0.0.0.0:12345 # different port
+```
+
+
+
@@ -817,21 +830,14 @@ script({
})
```
-If Ollama runs on a server or a different computer, you have to configure the `OLLAMA_API_BASE` environment variable.
-
-```txt OLLAMA_API_BASE
-OLLAMA_API_BASE=http://:/v1
-```
-As GenAIScript uses OpenAI style api, you must use the `/v1` endpoints and not `/api`.
-
-### Llamafile
+## Llamafile
[https://llamafile.ai/](https://llamafile.ai/) is a single file desktop application
that allows you to run an LLM locally.
The provider is `llamafile` and the model name is ignored.
-### Jan, LMStudio, LLaMA.cpp
+## Jan, LMStudio, LLaMA.cpp
[Jan](https://jan.ai/), [LMStudio](https://lmstudio.ai/),
[LLaMA.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)
@@ -855,7 +861,7 @@ OPENAI_API_BASE=http://localhost:...
-### Model specific environment variables
+## Model specific environment variables
You can provide different environment variables
for each named model by using the `PROVIDER_MODEL_API_...` prefix or `PROVIDER_API_...` prefix.
diff --git a/package.json b/package.json
index cc423a2283..d6402ea3c4 100644
--- a/package.json
+++ b/package.json
@@ -71,7 +71,9 @@
"prd": "node packages/cli/built/genaiscript.cjs run prd -prd",
"genai": "node packages/cli/built/genaiscript.cjs run",
"upgrade:deps": "zx scripts/upgrade-deps.mjs",
- "cli": "node packages/cli/built/genaiscript.cjs"
+ "cli": "node packages/cli/built/genaiscript.cjs",
+ "ollama": "docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama",
+ "ollama:stop": "docker stop ollama && docker rm ollama"
},
"release-it": {
"github": {
diff --git a/packages/core/src/connection.ts b/packages/core/src/connection.ts
index af4a549a54..0588a64335 100644
--- a/packages/core/src/connection.ts
+++ b/packages/core/src/connection.ts
@@ -30,6 +30,7 @@ import {
AzureCredentialsType,
} from "./host"
import { parseModelIdentifier } from "./models"
+import { parseHostVariable } from "./ollama"
import { normalizeFloat, trimTrailingSlash } from "./util"
export async function parseDefaultsFromEnv(env: Record) {
@@ -276,6 +277,19 @@ export async function parseTokenFromEnv(
}
}
+ if (provider === MODEL_PROVIDER_OLLAMA) {
+ const host = parseHostVariable(env)
+ const base = cleanApiBase(host)
+ return {
+ provider,
+ model,
+ base,
+ token: "ollama",
+ type: "openai",
+ source: "env: OLLAMA_HOST",
+ }
+ }
+
const prefixes = [
tag ? `${provider}_${model}_${tag}` : undefined,
provider ? `${provider}_${model}` : undefined,
@@ -307,17 +321,6 @@ export async function parseTokenFromEnv(
}
}
- if (provider === MODEL_PROVIDER_OLLAMA) {
- return {
- provider,
- model,
- base: OLLAMA_API_BASE,
- token: "ollama",
- type: "openai",
- source: "default",
- }
- }
-
if (provider === MODEL_PROVIDER_LLAMAFILE) {
return {
provider,
@@ -358,6 +361,13 @@ export async function parseTokenFromEnv(
`/openai/deployments`
return b
}
+
+ function cleanApiBase(b: string) {
+ if (!b) return b
+ b = trimTrailingSlash(b)
+ if (!/\/v1$/.test(b)) b += "/v1"
+ return b
+ }
}
export async function updateConnectionConfiguration(
diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
index 510206423d..a001fdfc35 100644
--- a/packages/core/src/constants.ts
+++ b/packages/core/src/constants.ts
@@ -124,6 +124,7 @@ export const PROMPT_FENCE = "```"
export const MARKDOWN_PROMPT_FENCE = "`````"
export const OPENAI_API_BASE = "https://api.openai.com/v1"
+export const OLLAMA_DEFAUT_PORT = 11434
export const OLLAMA_API_BASE = "http://localhost:11434/v1"
export const LLAMAFILE_API_BASE = "http://localhost:8080/v1"
export const LOCALAI_API_BASE = "http://localhost:8080/v1"
diff --git a/packages/core/src/ollama.test.ts b/packages/core/src/ollama.test.ts
new file mode 100644
index 0000000000..dcf8cd645e
--- /dev/null
+++ b/packages/core/src/ollama.test.ts
@@ -0,0 +1,54 @@
+import { describe, test } from "node:test"
+import assert from "node:assert/strict"
+import { parseHostVariable } from "./ollama"
+import { OLLAMA_API_BASE, OLLAMA_DEFAUT_PORT } from "./constants"
+
+describe("parseHostVariable", () => {
+ test("parses OLLAMA_HOST environment variable correctly", () => {
+ const env = { OLLAMA_HOST: "http://localhost:3000" }
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, "http://localhost:3000/")
+ })
+
+ test("parses OLLAMA_API_BASE environment variable correctly", () => {
+ const env = { OLLAMA_API_BASE: "http://api.ollama.com" }
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, "http://api.ollama.com/")
+ })
+
+ test("falls back to OLLAMA_API_BASE constant if no environment variable is set", () => {
+ const env = {}
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, OLLAMA_API_BASE)
+ })
+
+ test("parses IP address with port correctly", () => {
+ const env = { OLLAMA_HOST: "192.168.1.1:8080" }
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, "http://192.168.1.1:8080")
+ })
+
+ test("parses IP address without port correctly", () => {
+ const env = { OLLAMA_HOST: "192.168.1.1" }
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, `http://192.168.1.1:${OLLAMA_DEFAUT_PORT}`)
+ })
+
+ test("parses 0.0.0.0 with port correctly", () => {
+ const env = { OLLAMA_HOST: "0.0.0.0:4000" }
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, "http://0.0.0.0:4000")
+ })
+
+ test("parses localhost with port correctly", () => {
+ const env = { OLLAMA_HOST: "localhost:4000" }
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, "http://localhost:4000")
+ })
+
+ test("parses 0.0.0.0 without port correctly", () => {
+ const env = { OLLAMA_HOST: "0.0.0.0" }
+ const result = parseHostVariable(env)
+ assert.strictEqual(result, `http://0.0.0.0:${OLLAMA_DEFAUT_PORT}`)
+ })
+})
diff --git a/packages/core/src/ollama.ts b/packages/core/src/ollama.ts
index 2eac4a940f..dc986f39b9 100644
--- a/packages/core/src/ollama.ts
+++ b/packages/core/src/ollama.ts
@@ -1,11 +1,16 @@
// Import necessary modules and types for handling chat completions and model management
import { ChatCompletionHandler, LanguageModel, LanguageModelInfo } from "./chat"
-import { MODEL_PROVIDER_OLLAMA } from "./constants"
+import {
+ MODEL_PROVIDER_OLLAMA,
+ OLLAMA_API_BASE,
+ OLLAMA_DEFAUT_PORT,
+} from "./constants"
import { isRequestError } from "./error"
import { createFetch } from "./fetch"
import { parseModelIdentifier } from "./models"
import { OpenAIChatCompletion } from "./openai"
import { LanguageModelConfiguration, host } from "./host"
+import { URL } from "url"
/**
* Handles chat completion requests using the Ollama model.
@@ -105,3 +110,17 @@ export const OllamaModel = Object.freeze({
id: MODEL_PROVIDER_OLLAMA,
listModels,
})
+
+export function parseHostVariable(env: Record) {
+ const s = (
+ env.OLLAMA_HOST ||
+ env.OLLAMA_API_BASE ||
+ OLLAMA_API_BASE
+ )?.trim()
+ const ipm =
+ /^(?(localhost|\d+\.\d+\.\d+\.\d+))(:(?\d+))?$/i.exec(s)
+ if (ipm)
+ return `http://${ipm.groups.address}:${ipm.groups.port || OLLAMA_DEFAUT_PORT}`
+ const url = new URL(s)
+ return url.href
+}