diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml index ca46ebd4c7..38b73f67cc 100644 --- a/.github/workflows/ollama.yml +++ b/.github/workflows/ollama.yml @@ -32,5 +32,6 @@ jobs: run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama - name: run summarize-ollama-phi3 run: yarn test:summarize --model ollama:phi3.5 --out ./temp/summarize-ollama-phi3 -# - name: run vector-search -# run: yarn run:script vector-search --model ollama:phi3 --out ./temp/rag + env: + OLLAMA_HOST: "http://localhost:11434" + diff --git a/docs/src/content/docs/getting-started/configuration.mdx b/docs/src/content/docs/getting-started/configuration.mdx index 1da01e221a..6e368bd475 100644 --- a/docs/src/content/docs/getting-started/configuration.mdx +++ b/docs/src/content/docs/getting-started/configuration.mdx @@ -720,12 +720,7 @@ script({ -## Local Models - -There are many projects that allow you to run models locally on your machine, -or in a container. - -### LocalAI +## LocalAI [LocalAI](https://localai.io/) act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It uses free Open Source models @@ -758,7 +753,7 @@ OPENAI_API_TYPE=localai -### Ollama +## Ollama [Ollama](https://ollama.ai/) is a desktop application that let you download and run model locally. @@ -766,6 +761,12 @@ Running tools locally may require additional GPU resources depending on the mode Use the `ollama` provider to access Ollama models. +:::note + +GenAIScript is currently using the OpenAI API compatibility layer of Ollama. + +::: +
    @@ -795,6 +796,18 @@ GenAIScript will automatically pull the model, which may take some time dependin +
  1. + +If Ollama runs on a server or a different computer or on a different port, +you have to configure the `OLLAMA_HOST` environment variable to connect to a remote Ollama server. + +```txt title=".env" +OLLAMA_HOST=https://:/ # server url +OLLAMA_HOST=0.0.0.0:12345 # different port +``` + +
  2. +
@@ -817,21 +830,14 @@ script({ }) ``` -If Ollama runs on a server or a different computer, you have to configure the `OLLAMA_API_BASE` environment variable. - -```txt OLLAMA_API_BASE -OLLAMA_API_BASE=http://:/v1 -``` -As GenAIScript uses OpenAI style api, you must use the `/v1` endpoints and not `/api`. - -### Llamafile +## Llamafile [https://llamafile.ai/](https://llamafile.ai/) is a single file desktop application that allows you to run an LLM locally. The provider is `llamafile` and the model name is ignored. -### Jan, LMStudio, LLaMA.cpp +## Jan, LMStudio, LLaMA.cpp [Jan](https://jan.ai/), [LMStudio](https://lmstudio.ai/), [LLaMA.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) @@ -855,7 +861,7 @@ OPENAI_API_BASE=http://localhost:... -### Model specific environment variables +## Model specific environment variables You can provide different environment variables for each named model by using the `PROVIDER_MODEL_API_...` prefix or `PROVIDER_API_...` prefix. diff --git a/package.json b/package.json index cc423a2283..d6402ea3c4 100644 --- a/package.json +++ b/package.json @@ -71,7 +71,9 @@ "prd": "node packages/cli/built/genaiscript.cjs run prd -prd", "genai": "node packages/cli/built/genaiscript.cjs run", "upgrade:deps": "zx scripts/upgrade-deps.mjs", - "cli": "node packages/cli/built/genaiscript.cjs" + "cli": "node packages/cli/built/genaiscript.cjs", + "ollama": "docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama", + "ollama:stop": "docker stop ollama && docker rm ollama" }, "release-it": { "github": { diff --git a/packages/core/src/connection.ts b/packages/core/src/connection.ts index af4a549a54..0588a64335 100644 --- a/packages/core/src/connection.ts +++ b/packages/core/src/connection.ts @@ -30,6 +30,7 @@ import { AzureCredentialsType, } from "./host" import { parseModelIdentifier } from "./models" +import { parseHostVariable } from "./ollama" import { normalizeFloat, trimTrailingSlash } from "./util" export async function parseDefaultsFromEnv(env: Record) { @@ -276,6 +277,19 @@ export async function parseTokenFromEnv( } } + if (provider === MODEL_PROVIDER_OLLAMA) { + const host = parseHostVariable(env) + const base = cleanApiBase(host) + return { + provider, + model, + base, + token: "ollama", + type: "openai", + source: "env: OLLAMA_HOST", + } + } + const prefixes = [ tag ? `${provider}_${model}_${tag}` : undefined, provider ? `${provider}_${model}` : undefined, @@ -307,17 +321,6 @@ export async function parseTokenFromEnv( } } - if (provider === MODEL_PROVIDER_OLLAMA) { - return { - provider, - model, - base: OLLAMA_API_BASE, - token: "ollama", - type: "openai", - source: "default", - } - } - if (provider === MODEL_PROVIDER_LLAMAFILE) { return { provider, @@ -358,6 +361,13 @@ export async function parseTokenFromEnv( `/openai/deployments` return b } + + function cleanApiBase(b: string) { + if (!b) return b + b = trimTrailingSlash(b) + if (!/\/v1$/.test(b)) b += "/v1" + return b + } } export async function updateConnectionConfiguration( diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 510206423d..a001fdfc35 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -124,6 +124,7 @@ export const PROMPT_FENCE = "```" export const MARKDOWN_PROMPT_FENCE = "`````" export const OPENAI_API_BASE = "https://api.openai.com/v1" +export const OLLAMA_DEFAUT_PORT = 11434 export const OLLAMA_API_BASE = "http://localhost:11434/v1" export const LLAMAFILE_API_BASE = "http://localhost:8080/v1" export const LOCALAI_API_BASE = "http://localhost:8080/v1" diff --git a/packages/core/src/ollama.test.ts b/packages/core/src/ollama.test.ts new file mode 100644 index 0000000000..dcf8cd645e --- /dev/null +++ b/packages/core/src/ollama.test.ts @@ -0,0 +1,54 @@ +import { describe, test } from "node:test" +import assert from "node:assert/strict" +import { parseHostVariable } from "./ollama" +import { OLLAMA_API_BASE, OLLAMA_DEFAUT_PORT } from "./constants" + +describe("parseHostVariable", () => { + test("parses OLLAMA_HOST environment variable correctly", () => { + const env = { OLLAMA_HOST: "http://localhost:3000" } + const result = parseHostVariable(env) + assert.strictEqual(result, "http://localhost:3000/") + }) + + test("parses OLLAMA_API_BASE environment variable correctly", () => { + const env = { OLLAMA_API_BASE: "http://api.ollama.com" } + const result = parseHostVariable(env) + assert.strictEqual(result, "http://api.ollama.com/") + }) + + test("falls back to OLLAMA_API_BASE constant if no environment variable is set", () => { + const env = {} + const result = parseHostVariable(env) + assert.strictEqual(result, OLLAMA_API_BASE) + }) + + test("parses IP address with port correctly", () => { + const env = { OLLAMA_HOST: "192.168.1.1:8080" } + const result = parseHostVariable(env) + assert.strictEqual(result, "http://192.168.1.1:8080") + }) + + test("parses IP address without port correctly", () => { + const env = { OLLAMA_HOST: "192.168.1.1" } + const result = parseHostVariable(env) + assert.strictEqual(result, `http://192.168.1.1:${OLLAMA_DEFAUT_PORT}`) + }) + + test("parses 0.0.0.0 with port correctly", () => { + const env = { OLLAMA_HOST: "0.0.0.0:4000" } + const result = parseHostVariable(env) + assert.strictEqual(result, "http://0.0.0.0:4000") + }) + + test("parses localhost with port correctly", () => { + const env = { OLLAMA_HOST: "localhost:4000" } + const result = parseHostVariable(env) + assert.strictEqual(result, "http://localhost:4000") + }) + + test("parses 0.0.0.0 without port correctly", () => { + const env = { OLLAMA_HOST: "0.0.0.0" } + const result = parseHostVariable(env) + assert.strictEqual(result, `http://0.0.0.0:${OLLAMA_DEFAUT_PORT}`) + }) +}) diff --git a/packages/core/src/ollama.ts b/packages/core/src/ollama.ts index 2eac4a940f..dc986f39b9 100644 --- a/packages/core/src/ollama.ts +++ b/packages/core/src/ollama.ts @@ -1,11 +1,16 @@ // Import necessary modules and types for handling chat completions and model management import { ChatCompletionHandler, LanguageModel, LanguageModelInfo } from "./chat" -import { MODEL_PROVIDER_OLLAMA } from "./constants" +import { + MODEL_PROVIDER_OLLAMA, + OLLAMA_API_BASE, + OLLAMA_DEFAUT_PORT, +} from "./constants" import { isRequestError } from "./error" import { createFetch } from "./fetch" import { parseModelIdentifier } from "./models" import { OpenAIChatCompletion } from "./openai" import { LanguageModelConfiguration, host } from "./host" +import { URL } from "url" /** * Handles chat completion requests using the Ollama model. @@ -105,3 +110,17 @@ export const OllamaModel = Object.freeze({ id: MODEL_PROVIDER_OLLAMA, listModels, }) + +export function parseHostVariable(env: Record) { + const s = ( + env.OLLAMA_HOST || + env.OLLAMA_API_BASE || + OLLAMA_API_BASE + )?.trim() + const ipm = + /^(?
(localhost|\d+\.\d+\.\d+\.\d+))(:(?\d+))?$/i.exec(s) + if (ipm) + return `http://${ipm.groups.address}:${ipm.groups.port || OLLAMA_DEFAUT_PORT}` + const url = new URL(s) + return url.href +}