From b33ecca509b2476be27d86beeebac3e0cfe39d82 Mon Sep 17 00:00:00 2001 From: Camilo Velez Date: Thu, 10 Oct 2024 23:57:59 -0400 Subject: [PATCH 1/8] feat: improve error logging, management in aithena-services api --- services/aithena-services/.bumpversion.cfg | 12 +- services/aithena-services/.dockerignore | 186 +++++++++++++ services/aithena-services/README.md | 11 +- .../aithena-services/{docker => }/VERSION | 0 services/aithena-services/api/main.py | 204 -------------- services/aithena-services/pyproject.toml | 2 +- .../src/aithena_services/__init__.py | 2 + .../aithena_services}/api/__init__.py | 0 .../src/aithena_services/api/main.py | 262 ++++++++++++++++++ .../{envvars.py => config.py} | 31 ++- .../embeddings/azure_openai.py | 6 +- .../src/aithena_services/embeddings/ollama.py | 11 +- .../src/aithena_services/llms/azure_openai.py | 9 +- .../src/aithena_services/llms/ollama.py | 18 +- .../src/aithena_services/llms/openai.py | 9 +- .../aithena_services/test_azure_aithena.py | 2 +- .../tests/aithena_services/test_fapi.py | 2 +- .../aithena_services/test_ollama_aithena.py | 2 +- 18 files changed, 518 insertions(+), 251 deletions(-) create mode 100644 services/aithena-services/.dockerignore rename services/aithena-services/{docker => }/VERSION (100%) delete mode 100644 services/aithena-services/api/main.py rename services/aithena-services/{ => src/aithena_services}/api/__init__.py (100%) create mode 100644 services/aithena-services/src/aithena_services/api/main.py rename services/aithena-services/src/aithena_services/{envvars.py => config.py} (63%) diff --git a/services/aithena-services/.bumpversion.cfg b/services/aithena-services/.bumpversion.cfg index c312e30..f0cc614 100644 --- a/services/aithena-services/.bumpversion.cfg +++ b/services/aithena-services/.bumpversion.cfg @@ -1,16 +1,16 @@ [bumpversion] -current_version = 0.1.0-dev3 +current_version = 0.1.0-dev2 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}-{release}{dev} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = _ first_value = dev -values = +values = dev _ @@ -20,8 +20,8 @@ values = search = version = "{current_version}" replace = version = "{new_version}" -[bumpversion:file:docker/VERSION] -search = {current_version} -replace = {new_version} +[bumpversion:file:VERSION] [bumpversion:file:README.md] + +[bumpversion:file:src/polus/aithena/aithena_services/__init__.py] \ No newline at end of file diff --git a/services/aithena-services/.dockerignore b/services/aithena-services/.dockerignore new file mode 100644 index 0000000..a36bb5c --- /dev/null +++ b/services/aithena-services/.dockerignore @@ -0,0 +1,186 @@ +.vscode +**/.venv/ +.mypy_cache +.env + +#logs +*.out +*.log + +# poetry +poetry.lock + +# Byte-compiled / optimized / DLL files +__pycache__ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# vscode +.vscode + + +#macOS +*.DS_Store + + +#husky +node_modules + +# secrets +**/**-secret.yaml diff --git a/services/aithena-services/README.md b/services/aithena-services/README.md index 3f912e1..39b7674 100644 --- a/services/aithena-services/README.md +++ b/services/aithena-services/README.md @@ -1,14 +1,13 @@ # aithena-services 0.1.0-dev3 -Aithena-services provide a unified way to interact with many llms. +Aithena-services provides a unified way to interact with many LLMs. -It uses llama-index to interact with several existing llm backends: +It uses llama-index to interact with several existing LLM backends: - ollama - openai - azure_openai -The package can be used directly as a python client or can be deployed -as a rest service. +The package can be used directly as a python client or can be deployed as a REST service. ## Configuration @@ -108,6 +107,4 @@ Currently the image needs to be build from the top-level directory: `cd services/aithena-services` `./docker/build-docker.sh` -Make sure no .env file is present is `services/aithena-services/src/aithena_services` -or this file will be committed with the image leaking your secrets and -it will also prevent any later configuration attempt. +Make sure no .env file is present is `services/aithena-services/src/aithena_services` or this file will be committed with the image leaking your secrets and it will also prevent any later configuration attempt. diff --git a/services/aithena-services/docker/VERSION b/services/aithena-services/VERSION similarity index 100% rename from services/aithena-services/docker/VERSION rename to services/aithena-services/VERSION diff --git a/services/aithena-services/api/main.py b/services/aithena-services/api/main.py deleted file mode 100644 index f7d6c86..0000000 --- a/services/aithena-services/api/main.py +++ /dev/null @@ -1,204 +0,0 @@ -# mypy: disable-error-code="import-untyped" -"""Aithena-Services FastAPI REST Endpoints. """ - -# pylint: disable=W1203, C0412, C0103, W0212 - -import json -from logging import getLogger -from typing import Optional - -import httpx -import requests -from fastapi import FastAPI, HTTPException -from fastapi.responses import StreamingResponse - -from aithena_services.embeddings.azure_openai import AzureOpenAIEmbedding -from aithena_services.embeddings.ollama import OllamaEmbedding -from aithena_services.envvars import OLLAMA_HOST -from aithena_services.llms.azure_openai import AzureOpenAI -from aithena_services.llms.ollama import Ollama - -logger = getLogger(__name__) - - -app = FastAPI() - - -def check_platform(platform: str): - """Check if the platform is valid.""" - if platform not in ["ollama", "azure"]: - raise HTTPException( - status_code=400, - # detail="Invalid platform, must be 'ollama', 'openai' or 'azure'", - detail="Invalid platform, must be 'ollama' or 'azure'", - ) - - -@app.get("/test") -def test(): - """Test FastAPI deployment.""" - return {"status": "success"} - - -@app.get("/chat/list") -def list_chat_models(): - """List all available chat models.""" - try: - az = AzureOpenAI.list_models() - ol = Ollama.list_models() - except Exception as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - return [*az, *ol] - - -@app.get("/chat/list/{platform}") -def list_chat_models_by_platform(platform: str): - """List all available chat models by platform.""" - check_platform(platform) - if platform == "azure": - try: - return AzureOpenAI.list_models() - except Exception as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - try: - return Ollama.list_models() - except Exception as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - - -@app.get("/embed/list") -def list_embed_models(): - """List all available embed models.""" - az = AzureOpenAIEmbedding.list_models() - ol = OllamaEmbedding.list_models() - return [*az, *ol] - - -@app.get("/embed/list/{platform}") -def list_embed_models_by_platform(platform: str): - """List all available embed models by platform.""" - check_platform(platform) - if platform == "azure": - return AzureOpenAIEmbedding.list_models() - return OllamaEmbedding.list_models() - - -def resolve_client_chat(model: str, num_ctx: Optional[int]): - """Resolve client for chat models.""" - if model in AzureOpenAI.list_models(): - return AzureOpenAI(deployment=model) - if f"{model}:latest" in Ollama.list_models(): - if num_ctx: - return Ollama( - model=f"{model}:latest", context_window=num_ctx, request_timeout=500 - ) - return Ollama(model=f"{model}:latest") - if model in Ollama.list_models(): - if num_ctx: - return Ollama(model=model, context_window=num_ctx, request_timeout=500) - return Ollama(model=model) - raise HTTPException(status_code=400, detail="Invalid model.") - - -def resolve_client_embed(model: str): - """Resolve client for embed models.""" - if model in AzureOpenAIEmbedding.list_models(): - return AzureOpenAIEmbedding(deployment=model) - if f"{model}:latest" in OllamaEmbedding.list_models(): - return OllamaEmbedding(model=f"{model}:latest") - if model in OllamaEmbedding.list_models(): - return OllamaEmbedding(model=model) - raise HTTPException(status_code=400, detail="Invalid model.") - - -@app.post("/chat/{model}/generate") -async def generate_from_msgs( - model: str, - messages: list[dict] | str, - stream: bool = False, - num_ctx: Optional[int] = None, -): - """Generate a chat completion from a list of messages.""" - - print( - f"For {model} chat, received {messages}, stream: {stream}, num_ctx: {num_ctx}" - ) - client = resolve_client_chat(model, num_ctx) - - if isinstance(messages, str): - messages = [{"role": "user", "content": messages}] - - if stream: - - async def stream_response(messages): - async for chunk in await client.astream_chat(messages): - response = chunk.__dict__ - response["message"] = chunk.message.as_json() - yield json.dumps(response, default=lambda x: x.model_dump_json()) + "\n" - - return StreamingResponse( - stream_response(messages), media_type="application/json" - ) - try: - res = await client.achat(messages) - except Exception as exc: - logger.error(f"Error in chat generation: {exc}") - raise HTTPException(status_code=400, detail=str(exc)) from exc - return res.as_json() - - -@app.post("/embed/{model}/generate") -async def text_embeddings( - model: str, - text: str | list[str], -): - """Get text embeddings.""" - client = resolve_client_embed(model) - try: - print(f"Embedding with client: {client}") - if isinstance(text, str): - res = await client._aget_text_embedding(text) - if isinstance(text, list): - res = await client._aget_text_embeddings(text) - except Exception as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - return res - - -@app.post("/ollama/pull/{model}") -async def pull_ollama_model(model: str): - """Pull Ollama model.""" - try: - - async def stream_response(): - async with httpx.AsyncClient() as client: - async with client.stream( - "POST", f"{OLLAMA_HOST}/api/pull", json={"name": model} - ) as response: - async for line in response.aiter_lines(): - yield line + "\n" - - return StreamingResponse(stream_response(), media_type="application/json") - except Exception as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - - -@app.get("/ollama/ps") -async def ollama_ps(): - """List Ollama models running.""" - try: - res = httpx.get(f"{OLLAMA_HOST}/api/ps").json() - except Exception as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - return res - - -@app.delete("/ollama/delete/{model}") -async def ollama_delete(model: str): - """Delete Ollama model.""" - try: - res = requests.delete(f"{OLLAMA_HOST}/api/delete", json={"name": model}) - res.raise_for_status() - except Exception as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - return {"status": "success"} diff --git a/services/aithena-services/pyproject.toml b/services/aithena-services/pyproject.toml index 074b058..e764b17 100644 --- a/services/aithena-services/pyproject.toml +++ b/services/aithena-services/pyproject.toml @@ -13,6 +13,7 @@ openai = "^1.35.13" requests = "^2.32.3" ollama = "^0.3.1" fastapi = {extras = ["standard"], version = ">=0.112.0"} +polus-aithena-common = {path= "../../common", develop=true} pytest-asyncio = "^0.23.8" python-dotenv = "^1.0.1" llama-index = "^0.11.0" @@ -22,7 +23,6 @@ llama-index-llms-openai = "^0.2.0" llama-index-llms-ollama = "^0.3.0" llama-index-llms-azure-openai = "^0.2.0" typing-extensions = "^4.12.2" -solara = "^1.39.0" uvicorn = "^0.30.6" httpx = "^0.27.2" diff --git a/services/aithena-services/src/aithena_services/__init__.py b/services/aithena-services/src/aithena_services/__init__.py index f2f5fdd..7daba62 100644 --- a/services/aithena-services/src/aithena_services/__init__.py +++ b/services/aithena-services/src/aithena_services/__init__.py @@ -1 +1,3 @@ """Aithena Services.""" + +__version__ = "0.1.0-dev2" diff --git a/services/aithena-services/api/__init__.py b/services/aithena-services/src/aithena_services/api/__init__.py similarity index 100% rename from services/aithena-services/api/__init__.py rename to services/aithena-services/src/aithena_services/api/__init__.py diff --git a/services/aithena-services/src/aithena_services/api/main.py b/services/aithena-services/src/aithena_services/api/main.py new file mode 100644 index 0000000..8dbe6ab --- /dev/null +++ b/services/aithena-services/src/aithena_services/api/main.py @@ -0,0 +1,262 @@ +# mypy: disable-error-code="import-untyped" +"""Aithena-Services FastAPI REST Endpoints. """ + +# pylint: disable=W1203, C0412, C0103, W0212 + +import json +from typing import Optional + +import httpx +import requests +from fastapi import FastAPI, HTTPException +from fastapi.responses import StreamingResponse + +from aithena_services.embeddings.azure_openai import AzureOpenAIEmbedding +from aithena_services.embeddings.ollama import OllamaEmbedding +from aithena_services.config import OLLAMA_HOST, TIMEOUT +from aithena_services.llms.azure_openai import AzureOpenAI +from aithena_services.llms.ollama import Ollama +from polus.aithena.common.logger import get_logger + +logger = get_logger("aithena_services.api") + + +app = FastAPI() + + +def check_platform(platform: str): + """Check if the platform is valid.""" + if platform not in ["ollama", "azure"]: + logger.error(f"Invalid platform: {platform}") + raise HTTPException( + status_code=404, + # detail="Invalid platform, must be 'ollama', 'openai' or 'azure'", + detail="Invalid platform, must be 'ollama' or 'azure'", + ) + + +@app.get("/test") +def test(): + """Test FastAPI deployment.""" + logger.debug("Testing FastAPI deployment") + return {"status": "success"} + + +@app.get("/chat/list") +def list_chat_models(): + """List all available chat models.""" + try: + az = AzureOpenAI.list_models() + ol = Ollama.list_models() + except Exception as exc: + logger.error(f"Error in listing chat models: {exc}") + raise HTTPException(status_code=400, detail=str(exc)) + return [*az, *ol] + + +@app.get("/chat/list/{platform}") +def list_chat_models_by_platform(platform: str): + """List all available chat models by platform.""" + check_platform(platform) + if platform == "azure": + try: + return AzureOpenAI.list_models() + except Exception as exc: + logger.error(f"Error in listing chat models in Azure: {exc}") + raise HTTPException(status_code=400, detail=f"There was a problem listing chat models in Azure: {str(exc)}") + try: + return Ollama.list_models() + except Exception as exc: + logger.error(f"Error in listing chat models in Ollama: {exc}") + raise HTTPException(status_code=400, detail=f"There was a problem listing chat models in Ollama: {str(exc)}") + + +@app.get("/embed/list") +def list_embed_models(): + """List all available embed models.""" + az = AzureOpenAIEmbedding.list_models() + ol = OllamaEmbedding.list_models() + return [*az, *ol] + + +@app.get("/embed/list/{platform}") +def list_embed_models_by_platform(platform: str): + """List all available embed models by platform.""" + check_platform(platform) + if platform == "azure": + try: + return AzureOpenAIEmbedding.list_models() + except Exception as exc: + raise HTTPException(status_code=400, detail=f"There was a problem listing embed models in Azure: {str(exc)}") + try: + return OllamaEmbedding.list_models() + except Exception as exc: + raise HTTPException(status_code=400, detail=f"There was a problem listing embed models in Ollama: {str(exc)}") + +def resolve_client_chat(model: str, num_ctx: Optional[int]): + """Resolve client for chat models.""" + if model in AzureOpenAI.list_models(): + try: + return AzureOpenAI(deployment=model) + except Exception as exc: + logger.error(f"Error in resolving Azure client for model: {model}") + raise HTTPException(status_code=400, detail=f"Error in resolving Azure chat client for model: {model}, {str(exc)}") + if f"{model}:latest" in Ollama.list_models(): + return resolve_client_chat(f"{model}:latest", num_ctx) + if model in Ollama.list_models(): + try: + if num_ctx: + return Ollama(model=model, context_window=num_ctx, request_timeout=500) + except Exception as exc: + logger.error(f"Error in resolving Ollama client for model: {model}") + raise HTTPException(status_code=400, detail=f"Error in resolving Ollama chat client for model: {model}, {str(exc)}") + try: + return Ollama(model=model) + except Exception as exc: + logger.error(f"Error in resolving Ollama client for model: {model}") + raise HTTPException(status_code=400, detail=f"Error in resolving Ollama chat client for model: {model}, {str(exc)}") + logger.error(f"Chat model not found: {model}") + raise HTTPException(status_code=404, detail="Chat model not found.") + + +def resolve_client_embed(model: str): + """Resolve client for embed models.""" + if model in AzureOpenAIEmbedding.list_models(): + try: + return AzureOpenAIEmbedding(deployment=model) + except Exception as exc: + logger.error(f"Error in resolving Azure embed client for model: {model}") + raise HTTPException(status_code=400, detail=f"Error in resolving Azure embed client for model: {model}, {str(exc)}") + if f"{model}:latest" in OllamaEmbedding.list_models(): + try: + return OllamaEmbedding(model=f"{model}:latest") + except Exception as exc: + logger.error(f"Error in resolving Ollama embed client for model: {model}") + raise HTTPException(status_code=400, detail=f"Error in resolving Ollama embed client for model: {model}, {str(exc)}") + if model in OllamaEmbedding.list_models(): + try: + return OllamaEmbedding(model=model) + except Exception as exc: + logger.error(f"Error in resolving Ollama embed client for model: {model}") + raise HTTPException(status_code=400, detail=f"Error in resolving Ollama embed client for model: {model}, {str(exc)}") + logger.error(f"Embed model not found: {model}") + raise HTTPException(status_code=404, detail="Embed model not found.") + + +@app.post("/chat/{model}/generate") +async def generate_from_msgs( + model: str, + messages: list[dict] | str, + stream: bool = False, + num_ctx: Optional[int] = None, +): + """Generate a chat completion from a list of messages.""" + + logger.debug( + f"For {model} chat, received {messages}, stream: {stream}, num_ctx: {num_ctx}" + ) + client = resolve_client_chat(model, num_ctx) + + if isinstance(messages, str): + messages = [{"role": "user", "content": messages}] + + if stream: + + async def stream_response(messages): + try: + async for chunk in await client.astream_chat(messages): + response = chunk.__dict__ + response["message"] = chunk.message.as_json() + yield json.dumps(response, default=lambda x: x.model_dump_json()) + "\n" + except httpx.ReadTimeout as exc: + logger.error(f"Timeout error in chat stream response") + yield json.dumps({"error": "Timeout error in chat stream response"}) + "\n" + # raise HTTPException(status_code=408, detail="Timeout error in chat stream response") + except Exception as exc: + logger.error(f"Error in chat stream response: {str(exc)}") + yield json.dumps({"error": f"Error in chat stream response: {str(exc)}"}) + "\n" + + return StreamingResponse( + stream_response(messages), media_type="application/json" + ) + try: + res = await client.achat(messages) + return res.as_json() + except httpx.ReadTimeout as exc: + logger.error(f"Timeout error in chat response") + raise HTTPException(status_code=408, detail="Timeout error in chat response") + except Exception as exc: + logger.error(f"Error in chat generation: {str(exc)}") + raise HTTPException(status_code=400, detail=str(exc)) + +@app.post("/embed/{model}/generate") +async def text_embeddings( + model: str, + text: str | list[str], +): + """Get text embeddings.""" + client = resolve_client_embed(model) + try: + logger.info(f"Embedding with client: {client}") + if isinstance(text, str): + res = await client._aget_text_embedding(text) + if isinstance(text, list): + res = await client._aget_text_embeddings(text) + except httpx.ReadTimeout as exc: + logger.error(f"Timeout error in embedding generation") + raise HTTPException(status_code=408, detail="Timeout error in embedding generation") + except Exception as exc: + logger.error(f"Error in text embeddings: {str(exc)}") + raise HTTPException(status_code=400, detail=str(exc)) + return res + + +@app.post("/ollama/pull/{model}") +async def pull_ollama_model(model: str): + """Pull Ollama model.""" + logger.debug(f"Pulling Ollama model: {model}") + + async def stream_response(): + try: + async with httpx.AsyncClient(timeout=TIMEOUT) as client: + async with client.stream( + "POST", f"{OLLAMA_HOST}/api/pull", json={"name": model} + ) as response: + async for line in response.aiter_lines(): + yield line + "\n" + except httpx.ReadTimeout as exc: + logger.error(f"Timeout error in Ollama model pull") + yield json.dumps({"error": "Timeout error in Ollama model pull"}) + "\n" + except Exception as exc: + logger.error(f"Error in chat Ollama model pull: {str(exc)}") + yield json.dumps({"error": f"Error in Ollama model pull: {str(exc)}"}) + "\n" + + return StreamingResponse(stream_response(), media_type="application/json") + + +@app.get("/ollama/ps") +async def ollama_ps(): + """List Ollama models running.""" + logger.debug(f"Listing Ollama models running") + try: + res = httpx.get(f"{OLLAMA_HOST}/api/ps", timeout=TIMEOUT).json() + except httpx.ReadTimeout as exc: + logger.error(f"Timeout error in Ollama ps") + raise HTTPException(status_code=408, detail="Timeout error in Ollama ps") + except Exception as exc: + logger.error(f"Error in Ollama ps: {exc}") + raise HTTPException(status_code=400, detail=str(exc)) + return res + + +@app.delete("/ollama/delete/{model}") +async def ollama_delete(model: str): + """Delete Ollama model.""" + logger.debug(f"Deleting Ollama model: {model}") + try: + res = requests.delete(f"{OLLAMA_HOST}/api/delete", json={"name": model}, timeout=TIMEOUT) + res.raise_for_status() + except Exception as exc: + logger.error(f"Error in deleting Ollama model: {exc}") + raise HTTPException(status_code=400, detail=str(exc)) + return {"status": "success"} diff --git a/services/aithena-services/src/aithena_services/envvars.py b/services/aithena-services/src/aithena_services/config.py similarity index 63% rename from services/aithena-services/src/aithena_services/envvars.py rename to services/aithena-services/src/aithena_services/config.py index 2e11e31..c0e4633 100644 --- a/services/aithena-services/src/aithena_services/envvars.py +++ b/services/aithena-services/src/aithena_services/config.py @@ -1,12 +1,15 @@ -"""Environment Variable Configuration for Aithena Services.""" - -import logging import os from dotenv import find_dotenv, load_dotenv +from polus.aithena.common.logger import get_logger +from polus.aithena.common.utils import time_logger load_dotenv(find_dotenv(), override=True) +TIMEOUT = int(os.environ.get("TIMEOUT") or "30") +RETRY_AFTER = int(os.environ.get("RETRY_AFTER") or "5") +RETRY_ATTEMPTS = int(os.environ.get("RETRY_ATTEMPTS") or "3") + env = os.environ OPENAI_KEY = os.getenv("OPENAI_API_KEY", None) @@ -34,18 +37,22 @@ AZURE_OPENAI_EMBED_MODELS_DICT[k] = value +logger = get_logger(__file__) + +logger.info(f""" +Aithena-Services started with TIMEOUT: {TIMEOUT}, RETRY_AFTER: {RETRY_AFTER}, RETRY_ATTEMPTS: {RETRY_ATTEMPTS}, +OPENAI_KEY: {OPENAI_KEY}, OLLAMA_HOST: {OLLAMA_HOST}, AZURE_OPENAI_ENV: {AZURE_OPENAI_ENV_DICT}, +AZURE_OPENAI_CHAT_MODELS: {list(AZURE_OPENAI_CHAT_MODELS_DICT.keys())}, AZURE_OPENAI_EMBED_MODELS: {list(AZURE_OPENAI_EMBED_MODELS_DICT.keys())} +""") + + __all__ = [ + "TIMEOUT", + "RETRY_AFTER", + "RETRY_ATTEMPTS", "OPENAI_KEY", "OLLAMA_HOST", "AZURE_OPENAI_ENV_DICT", "AZURE_OPENAI_CHAT_MODELS_DICT", "AZURE_OPENAI_EMBED_MODELS_DICT", -] - -logger = logging.getLogger(__name__) -logger.info("Environment variables loaded.") -logger.info(f"OPENAI_KEY: {OPENAI_KEY}") -logger.info(f"OLLAMA_HOST: {OLLAMA_HOST}") -logger.info(f"AZURE_OPENAI_ENV_DICT: {AZURE_OPENAI_ENV_DICT}") -logger.info(f"AZURE_OPENAI_CHAT_MODELS_DICT: {AZURE_OPENAI_CHAT_MODELS_DICT}") -logger.info(f"AZURE_OPENAI_EMBED_MODELS_DICT: {AZURE_OPENAI_EMBED_MODELS_DICT}") +] \ No newline at end of file diff --git a/services/aithena-services/src/aithena_services/embeddings/azure_openai.py b/services/aithena-services/src/aithena_services/embeddings/azure_openai.py index 8d76312..c49824e 100644 --- a/services/aithena-services/src/aithena_services/embeddings/azure_openai.py +++ b/services/aithena-services/src/aithena_services/embeddings/azure_openai.py @@ -9,11 +9,13 @@ ) from aithena_services.common.azure import resolve_azure_deployment -from aithena_services.envvars import ( +from aithena_services.config import ( AZURE_OPENAI_EMBED_MODELS_DICT, AZURE_OPENAI_ENV_DICT, ) +from polus.aithena.common.logger import get_logger +logger = get_logger("aithena_services.embeddings.azure_openai") class AzureOpenAIEmbedding(LlamaIndexAzureOpenAI): """Azure OpenAI embeddings.""" @@ -26,6 +28,7 @@ def list_deployments() -> list[str]: that are listed as environment variables in the correct format. The format is `AZURE_OPENAI_DEPLOYMENT_EMBED_{name}={value}`. """ + logger.debug(f"Listing Azure OpenAI embedding deployments") return list(AZURE_OPENAI_EMBED_MODELS_DICT.keys()) list_models = list_deployments # Alias @@ -40,6 +43,7 @@ def __init__( kwargs["azure_deployment"] = resolve_azure_deployment( deployment, AZURE_OPENAI_EMBED_MODELS_DICT ) + logger.debug(f"Initializing Azure OpenAI embeddings with kwargs: {kwargs}") super().__init__(**kwargs) def aget_text_embeddings(self, texts: list[str]) -> list[list[float]]: diff --git a/services/aithena-services/src/aithena_services/embeddings/ollama.py b/services/aithena-services/src/aithena_services/embeddings/ollama.py index 36865e2..f153777 100644 --- a/services/aithena-services/src/aithena_services/embeddings/ollama.py +++ b/services/aithena-services/src/aithena_services/embeddings/ollama.py @@ -7,12 +7,10 @@ import requests # type: ignore from llama_index.embeddings.ollama import OllamaEmbedding as LlamaIndexOllama -from aithena_services.envvars import OLLAMA_HOST - -from logging import getLogger - -logger = getLogger(__name__) +from aithena_services.config import OLLAMA_HOST +from polus.aithena.common.logger import get_logger +logger = get_logger("aithena_services.embeddings.ollama") class OllamaEmbedding(LlamaIndexOllama): """Ollama embeddings.""" @@ -24,12 +22,13 @@ def __init__(self, **kwargs: Any): kwargs["model_name"] = kwargs["model"] if "base_url" not in kwargs or kwargs["base_url"] is None: kwargs["base_url"] = OLLAMA_HOST + logger.debug(f"Initalizing Ollama embedding with kwargs: {kwargs}") super().__init__(**kwargs) @staticmethod def list_models(url: str = OLLAMA_HOST) -> list[str]: # type: ignore """List available Ollama models.""" - logger.debug(f"Listing Ollama models at {url}") + logger.debug(f"Listing Ollama embedding models at {url}") r = [ x["name"] for x in requests.get(url + "/api/tags", timeout=40).json()["models"] diff --git a/services/aithena-services/src/aithena_services/llms/azure_openai.py b/services/aithena-services/src/aithena_services/llms/azure_openai.py index f2a31d9..7494ff2 100644 --- a/services/aithena-services/src/aithena_services/llms/azure_openai.py +++ b/services/aithena-services/src/aithena_services/llms/azure_openai.py @@ -7,9 +7,10 @@ from llama_index.llms.azure_openai import AzureOpenAI as LlamaIndexAzureOpenAI from aithena_services.common.azure import resolve_azure_deployment -from aithena_services.envvars import ( +from aithena_services.config import ( AZURE_OPENAI_CHAT_MODELS_DICT, AZURE_OPENAI_ENV_DICT, + TIMEOUT, ) from aithena_services.llms.types import Message from aithena_services.llms.types.base import AithenaLLM, chataithena, streamchataithena @@ -19,7 +20,9 @@ ChatResponseGen, ) from aithena_services.llms.utils import check_and_cast_messages +from polus.aithena.common.logger import get_logger +logger = get_logger("aithena_services.llms.azure_openai") class AzureOpenAI(LlamaIndexAzureOpenAI, AithenaLLM): """Azure OpenAI LLMs. @@ -51,11 +54,12 @@ def list_deployments() -> list[str]: that are listed as environment variables in the correct format. The format is `AZURE_OPENAI_DEPLOYMENT_CHAT_{name}={value}`. """ + logger.debug(f"Listing Azure OpenAI chat deployments") return list(AZURE_OPENAI_CHAT_MODELS_DICT.keys()) list_models = list_deployments # Alias - def __init__(self, **kwargs: Any): + def __init__(self, timeout=TIMEOUT, **kwargs: Any): for arg in ["api_key", "azure_endpoint", "api_version"]: if arg not in kwargs or kwargs[arg] is None: kwargs[arg] = AZURE_OPENAI_ENV_DICT[arg] @@ -66,6 +70,7 @@ def __init__(self, **kwargs: Any): kwargs["engine"] = resolve_azure_deployment( kwargs["engine"], AZURE_OPENAI_CHAT_MODELS_DICT ) + logger.debug(f"Initializing Azure OpenAI with kwargs: {kwargs}") super().__init__(**kwargs) @chataithena diff --git a/services/aithena-services/src/aithena_services/llms/ollama.py b/services/aithena-services/src/aithena_services/llms/ollama.py index bd1c77f..67e4d01 100644 --- a/services/aithena-services/src/aithena_services/llms/ollama.py +++ b/services/aithena-services/src/aithena_services/llms/ollama.py @@ -8,7 +8,7 @@ import requests # type: ignore from llama_index.llms.ollama import Ollama as LlamaIndexOllama # type: ignore -from aithena_services.envvars import OLLAMA_HOST +from aithena_services.config import OLLAMA_HOST, TIMEOUT from aithena_services.llms.types import Message from aithena_services.llms.types.base import AithenaLLM, chataithena, streamchataithena from aithena_services.llms.types.response import ( @@ -18,7 +18,9 @@ ) from aithena_services.llms.utils import check_and_cast_messages -logger = logging.getLogger("aithena_services.llms.ollama") +from polus.aithena.common.logger import get_logger + +logger = get_logger("aithena_services.llms.ollama") # TODO: check how to set multiple stop sequences, because Ollama supports it @@ -47,19 +49,21 @@ class Ollama(LlamaIndexOllama, AithenaLLM): """ - def __init__(self, **kwargs: Any): + def __init__(self, timeout=TIMEOUT, **kwargs: Any): if "base_url" not in kwargs or kwargs["base_url"] is None: kwargs["base_url"] = OLLAMA_HOST - logger.debug(f"Initalizing Ollama with kwargs: {kwargs}") - super().__init__(**kwargs) + if "request_timeout" in kwargs: + kwargs.pop("request_timeout") + logger.debug(f"Initalizing Ollama chat with kwargs: {kwargs}") + super().__init__(request_timeout=timeout, **kwargs) @staticmethod def list_models(url: str = OLLAMA_HOST) -> list[str]: # type: ignore """List available Ollama models.""" - logger.debug(f"Listing Ollama models at {url}") + logger.debug(f"Listing Ollama chat models at {url}") r = [ x["name"] - for x in requests.get(url + "/api/tags", timeout=40).json()["models"] + for x in requests.get(url + "/api/tags", timeout=TIMEOUT).json()["models"] ] return [x for x in r if "embed" not in x] diff --git a/services/aithena-services/src/aithena_services/llms/openai.py b/services/aithena-services/src/aithena_services/llms/openai.py index 4335d3f..66448da 100644 --- a/services/aithena-services/src/aithena_services/llms/openai.py +++ b/services/aithena-services/src/aithena_services/llms/openai.py @@ -15,7 +15,10 @@ ChatResponseGen, ) from aithena_services.llms.utils import check_and_cast_messages +from polus.aithena.common.logger import get_logger +from aithena_services.config import TIMEOUT +logger = get_logger("aithena_services.llms.openai") def custom_sort_for_openai_models(name: str) -> tuple[int, str]: """Custom sort function for OpenAI models.""" @@ -41,18 +44,20 @@ def list_openai_models() -> list[str]: class OpenAI(LlamaIndexOpenAI, AithenaLLM): """OpenAI models.""" - def __init__(self, **kwargs: Any): + def __init__(self, timeout=TIMEOUT, **kwargs: Any): if "model" not in kwargs: raise ValueError(f"Model not specified. Available models: {OPENAI_MODELS}") if kwargs["model"] not in OPENAI_MODELS: raise ValueError( f"Model {kwargs['model']} not available. Available models: {OPENAI_MODELS}" ) - super().__init__(**kwargs) + logger.debug(f"Initializing OpenAI chat with kwargs: {kwargs}") + super().__init__(timeout=timeout, **kwargs) @staticmethod def list_models() -> list[str]: """List available OpenAI chat models.""" + logger.debug(f"Listing OpenAI chat models") return list_openai_models() @chataithena diff --git a/services/aithena-services/tests/aithena_services/test_azure_aithena.py b/services/aithena-services/tests/aithena_services/test_azure_aithena.py index 4ff35e3..803fd16 100644 --- a/services/aithena-services/tests/aithena_services/test_azure_aithena.py +++ b/services/aithena-services/tests/aithena_services/test_azure_aithena.py @@ -14,7 +14,7 @@ # this is after dotenv in case .env for tests # defines different values for these variables -from aithena_services.envvars import AZURE_OPENAI_AVAILABLE, AZURE_OPENAI_ENV_DICT +from aithena_services.config import AZURE_OPENAI_AVAILABLE, AZURE_OPENAI_ENV_DICT from aithena_services.llms.types import ChatResponse, Message DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT") diff --git a/services/aithena-services/tests/aithena_services/test_fapi.py b/services/aithena-services/tests/aithena_services/test_fapi.py index a48ca1d..1c3f13a 100644 --- a/services/aithena-services/tests/aithena_services/test_fapi.py +++ b/services/aithena-services/tests/aithena_services/test_fapi.py @@ -5,7 +5,7 @@ import random # pylint: disable=C0415, W0621, C0413, C0103 -from aithena_services.envvars import ( +from aithena_services.config import ( AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_ENV_DICT, OLLAMA_HOST, diff --git a/services/aithena-services/tests/aithena_services/test_ollama_aithena.py b/services/aithena-services/tests/aithena_services/test_ollama_aithena.py index 2a1c9b8..f763cbf 100644 --- a/services/aithena-services/tests/aithena_services/test_ollama_aithena.py +++ b/services/aithena-services/tests/aithena_services/test_ollama_aithena.py @@ -12,7 +12,7 @@ # this is after dotenv in case .env for tests # defines different values for these variables -from aithena_services.envvars import OLLAMA_HOST +from aithena_services.config import OLLAMA_HOST from aithena_services.llms.types import ChatResponse, Message From 8ffcbb8606e831e7d4bdd7f570ab226b6c6286ef Mon Sep 17 00:00:00 2001 From: Camilo Velez Date: Thu, 10 Oct 2024 23:58:57 -0400 Subject: [PATCH 2/8] fix: fix bumpversion.cfg incorrect path --- services/aithena-services/.bumpversion.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/aithena-services/.bumpversion.cfg b/services/aithena-services/.bumpversion.cfg index f0cc614..20f469d 100644 --- a/services/aithena-services/.bumpversion.cfg +++ b/services/aithena-services/.bumpversion.cfg @@ -24,4 +24,4 @@ replace = version = "{new_version}" [bumpversion:file:README.md] -[bumpversion:file:src/polus/aithena/aithena_services/__init__.py] \ No newline at end of file +[bumpversion:file:src/aithena_services/__init__.py] \ No newline at end of file From 8aa6292fa50c9d1a365f36fe30f40e484454c80f Mon Sep 17 00:00:00 2001 From: Camilo Velez Date: Fri, 11 Oct 2024 00:00:06 -0400 Subject: [PATCH 3/8] =?UTF-8?q?Bump=20version:=200.1.0-dev2=20=E2=86=92=20?= =?UTF-8?q?0.1.1-dev0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/aithena-services/.bumpversion.cfg | 8 ++++---- services/aithena-services/README.md | 2 +- services/aithena-services/VERSION | 2 +- services/aithena-services/pyproject.toml | 2 +- .../aithena-services/src/aithena_services/__init__.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/services/aithena-services/.bumpversion.cfg b/services/aithena-services/.bumpversion.cfg index 20f469d..7f21225 100644 --- a/services/aithena-services/.bumpversion.cfg +++ b/services/aithena-services/.bumpversion.cfg @@ -1,16 +1,16 @@ [bumpversion] -current_version = 0.1.0-dev2 +current_version = 0.1.1-dev0 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}-{release}{dev} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = _ first_value = dev -values = +values = dev _ @@ -24,4 +24,4 @@ replace = version = "{new_version}" [bumpversion:file:README.md] -[bumpversion:file:src/aithena_services/__init__.py] \ No newline at end of file +[bumpversion:file:src/aithena_services/__init__.py] diff --git a/services/aithena-services/README.md b/services/aithena-services/README.md index 39b7674..d1933d0 100644 --- a/services/aithena-services/README.md +++ b/services/aithena-services/README.md @@ -1,4 +1,4 @@ -# aithena-services 0.1.0-dev3 +# aithena-services 0.1.1-dev0 Aithena-services provides a unified way to interact with many LLMs. diff --git a/services/aithena-services/VERSION b/services/aithena-services/VERSION index b17f82d..44bf4db 100644 --- a/services/aithena-services/VERSION +++ b/services/aithena-services/VERSION @@ -1 +1 @@ -0.1.0-dev3 +0.1.1-dev0 diff --git a/services/aithena-services/pyproject.toml b/services/aithena-services/pyproject.toml index e764b17..f483c98 100644 --- a/services/aithena-services/pyproject.toml +++ b/services/aithena-services/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aithena-services" -version = "0.1.0-dev3" +version = "0.1.1-dev0" description = "" authors = ["Camilo Velez , Antoine Gerardin "] readme = "README.md" diff --git a/services/aithena-services/src/aithena_services/__init__.py b/services/aithena-services/src/aithena_services/__init__.py index 7daba62..d61976c 100644 --- a/services/aithena-services/src/aithena_services/__init__.py +++ b/services/aithena-services/src/aithena_services/__init__.py @@ -1,3 +1,3 @@ """Aithena Services.""" -__version__ = "0.1.0-dev2" +__version__ = "0.1.1-dev0" From 6221be643fdf6fda6c1928d1ad63987de7fd3178 Mon Sep 17 00:00:00 2001 From: Camilo Velez Date: Wed, 16 Oct 2024 12:32:05 -0400 Subject: [PATCH 4/8] fix: fix azure_openai docstring --- .../src/aithena_services/llms/azure_openai.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/services/aithena-services/src/aithena_services/llms/azure_openai.py b/services/aithena-services/src/aithena_services/llms/azure_openai.py index 7494ff2..3136c09 100644 --- a/services/aithena-services/src/aithena_services/llms/azure_openai.py +++ b/services/aithena-services/src/aithena_services/llms/azure_openai.py @@ -1,11 +1,9 @@ # mypy: disable-error-code="import-untyped" -"""Ollama implementation based on LlamaIndex.""" +"""AzureOpenAI implementation based on LlamaIndex.""" # pylint: disable=too-many-ancestors from typing import Any, Sequence -from llama_index.llms.azure_openai import AzureOpenAI as LlamaIndexAzureOpenAI - from aithena_services.common.azure import resolve_azure_deployment from aithena_services.config import ( AZURE_OPENAI_CHAT_MODELS_DICT, @@ -20,10 +18,12 @@ ChatResponseGen, ) from aithena_services.llms.utils import check_and_cast_messages +from llama_index.llms.azure_openai import AzureOpenAI as LlamaIndexAzureOpenAI from polus.aithena.common.logger import get_logger logger = get_logger("aithena_services.llms.azure_openai") + class AzureOpenAI(LlamaIndexAzureOpenAI, AithenaLLM): """Azure OpenAI LLMs. @@ -65,7 +65,8 @@ def __init__(self, timeout=TIMEOUT, **kwargs: Any): kwargs[arg] = AZURE_OPENAI_ENV_DICT[arg] if "deployment" in kwargs: if "engine" in kwargs: - raise ValueError("Cannot specify both `deployment` and `engine`.") + raise ValueError( + "Cannot specify both `deployment` and `engine`.") kwargs["engine"] = kwargs.pop("deployment") kwargs["engine"] = resolve_azure_deployment( kwargs["engine"], AZURE_OPENAI_CHAT_MODELS_DICT From 5a956f409228d0fbe29f1c0f44dcba6fef52e4ee Mon Sep 17 00:00:00 2001 From: Camilo Velez Date: Tue, 5 Nov 2024 11:18:13 -0500 Subject: [PATCH 5/8] feat: save list of models api -aithenaservices --- .../src/aithena_services/api/main.py | 50 +++++++++++++++---- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/services/aithena-services/src/aithena_services/api/main.py b/services/aithena-services/src/aithena_services/api/main.py index 8dbe6ab..d29daa5 100644 --- a/services/aithena-services/src/aithena_services/api/main.py +++ b/services/aithena-services/src/aithena_services/api/main.py @@ -22,6 +22,8 @@ app = FastAPI() +OLLAMA_MODELS = {"EMBED": OllamaEmbedding.list_models(), "CHAT": Ollama.list_models()} +AZURE_MODELS = {"EMBED": AzureOpenAIEmbedding.list_models(), "CHAT": AzureOpenAI.list_models()} def check_platform(platform: str): @@ -41,6 +43,22 @@ def test(): logger.debug("Testing FastAPI deployment") return {"status": "success"} +@app.put("/update") +def update_model_lists(): + """Update chat/embed model lists.""" + try: + az = AzureOpenAI.list_models() + ol = Ollama.list_models() + OLLAMA_MODELS["CHAT"] = ol + AZURE_MODELS["CHAT"] = az + az = AzureOpenAIEmbedding.list_models() + ol = OllamaEmbedding.list_models() + OLLAMA_MODELS["EMBED"] = ol + AZURE_MODELS["EMBED"] = az + except Exception as exc: + logger.error(f"Error in updating model lists: {exc}") + raise HTTPException(status_code=400, detail=str(exc)) + return {"status": "success"} @app.get("/chat/list") def list_chat_models(): @@ -48,6 +66,8 @@ def list_chat_models(): try: az = AzureOpenAI.list_models() ol = Ollama.list_models() + OLLAMA_MODELS["CHAT"] = ol + AZURE_MODELS["CHAT"] = az except Exception as exc: logger.error(f"Error in listing chat models: {exc}") raise HTTPException(status_code=400, detail=str(exc)) @@ -60,12 +80,16 @@ def list_chat_models_by_platform(platform: str): check_platform(platform) if platform == "azure": try: - return AzureOpenAI.list_models() + r = AzureOpenAI.list_models() + AZURE_MODELS["CHAT"] = r + return r except Exception as exc: logger.error(f"Error in listing chat models in Azure: {exc}") raise HTTPException(status_code=400, detail=f"There was a problem listing chat models in Azure: {str(exc)}") try: - return Ollama.list_models() + r = Ollama.list_models() + OLLAMA_MODELS["CHAT"] = r + return r except Exception as exc: logger.error(f"Error in listing chat models in Ollama: {exc}") raise HTTPException(status_code=400, detail=f"There was a problem listing chat models in Ollama: {str(exc)}") @@ -76,6 +100,8 @@ def list_embed_models(): """List all available embed models.""" az = AzureOpenAIEmbedding.list_models() ol = OllamaEmbedding.list_models() + OLLAMA_MODELS["EMBED"] = ol + AZURE_MODELS["EMBED"] = az return [*az, *ol] @@ -85,25 +111,29 @@ def list_embed_models_by_platform(platform: str): check_platform(platform) if platform == "azure": try: - return AzureOpenAIEmbedding.list_models() + r = AzureOpenAIEmbedding.list_models() + AZURE_MODELS["EMBED"] = r + return r except Exception as exc: raise HTTPException(status_code=400, detail=f"There was a problem listing embed models in Azure: {str(exc)}") try: - return OllamaEmbedding.list_models() + r = OllamaEmbedding.list_models() + OLLAMA_MODELS["EMBED"] = r + return r except Exception as exc: raise HTTPException(status_code=400, detail=f"There was a problem listing embed models in Ollama: {str(exc)}") def resolve_client_chat(model: str, num_ctx: Optional[int]): """Resolve client for chat models.""" - if model in AzureOpenAI.list_models(): + if model in AZURE_MODELS["CHAT"]: try: return AzureOpenAI(deployment=model) except Exception as exc: logger.error(f"Error in resolving Azure client for model: {model}") raise HTTPException(status_code=400, detail=f"Error in resolving Azure chat client for model: {model}, {str(exc)}") - if f"{model}:latest" in Ollama.list_models(): + if f"{model}:latest" in OLLAMA_MODELS["CHAT"]: return resolve_client_chat(f"{model}:latest", num_ctx) - if model in Ollama.list_models(): + if model in OLLAMA_MODELS["CHAT"]: try: if num_ctx: return Ollama(model=model, context_window=num_ctx, request_timeout=500) @@ -121,19 +151,19 @@ def resolve_client_chat(model: str, num_ctx: Optional[int]): def resolve_client_embed(model: str): """Resolve client for embed models.""" - if model in AzureOpenAIEmbedding.list_models(): + if model in AZURE_MODELS["EMBED"]: try: return AzureOpenAIEmbedding(deployment=model) except Exception as exc: logger.error(f"Error in resolving Azure embed client for model: {model}") raise HTTPException(status_code=400, detail=f"Error in resolving Azure embed client for model: {model}, {str(exc)}") - if f"{model}:latest" in OllamaEmbedding.list_models(): + if f"{model}:latest" in OLLAMA_MODELS["EMBED"]: try: return OllamaEmbedding(model=f"{model}:latest") except Exception as exc: logger.error(f"Error in resolving Ollama embed client for model: {model}") raise HTTPException(status_code=400, detail=f"Error in resolving Ollama embed client for model: {model}, {str(exc)}") - if model in OllamaEmbedding.list_models(): + if model in OLLAMA_MODELS["EMBED"]: try: return OllamaEmbedding(model=model) except Exception as exc: From 6a6d0712944722693d4af72701b02c2d77667718 Mon Sep 17 00:00:00 2001 From: Camilo Velez Date: Thu, 14 Nov 2024 15:32:14 +0000 Subject: [PATCH 6/8] chore: update docker files --- services/aithena-services/Dockerfile | 22 ++++++++++ services/aithena-services/build-docker.sh | 40 +++++++++++++++++++ services/aithena-services/docker/Dockerfile | 13 ------ .../aithena-services/docker/build-docker.sh | 5 --- 4 files changed, 62 insertions(+), 18 deletions(-) create mode 100644 services/aithena-services/Dockerfile create mode 100755 services/aithena-services/build-docker.sh delete mode 100644 services/aithena-services/docker/Dockerfile delete mode 100755 services/aithena-services/docker/build-docker.sh diff --git a/services/aithena-services/Dockerfile b/services/aithena-services/Dockerfile new file mode 100644 index 0000000..e24648d --- /dev/null +++ b/services/aithena-services/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.11-slim + +# environment variables +ENV AITHENA_LOG_LEVEL="DEBUG" +ENV EXEC_DIR="/opt/executables" +# Work directory defined in the base container +WORKDIR ${EXEC_DIR} + +# TOOL_DIR Argument to the Dockerfile +ARG TOOL_DIR="." + +# Copy the repository into the container +RUN mkdir aithena +COPY $TOOL_DIR ${EXEC_DIR}/aithena/${TOOL_DIR} +COPY common ${EXEC_DIR}/aithena/common + +# Install the tool +RUN pip3 install "${EXEC_DIR}/aithena/${TOOL_DIR}" + +EXPOSE 80 + +CMD ["bash", "-c", "uvicorn aithena_services.api.main:app --host 0.0.0.0 --port 80"] diff --git a/services/aithena-services/build-docker.sh b/services/aithena-services/build-docker.sh new file mode 100755 index 0000000..5f276ca --- /dev/null +++ b/services/aithena-services/build-docker.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Derive the tool name from the folder name +script_dir=$(dirname "$(realpath "$0")") +parent_folder=$(basename "$script_dir") +tool_name=$parent_folder + +# The root of the repo +repo_root=$(git rev-parse --show-toplevel) + +# Get the path to this tool from the repository root +tool_dir=$(python3 -c "import os.path; print(os.path.relpath('$script_dir', '$repo_root'))") + +echo "Building docker image for tool: ${tool_name}" +echo "Tool path from root: ${tool_dir}" + +# The version is read from the VERSION file +version=$( Date: Tue, 26 Nov 2024 03:17:25 +0000 Subject: [PATCH 7/8] chore: update helm charts - ollama-aithser --- backends/ollama-backend/helm/Chart.yaml | 4 ++-- services/aithena-services/helm/Chart.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/ollama-backend/helm/Chart.yaml b/backends/ollama-backend/helm/Chart.yaml index 27f718e..794d201 100644 --- a/backends/ollama-backend/helm/Chart.yaml +++ b/backends/ollama-backend/helm/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.1.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.3.10" +appVersion: "0.4.4" # My common code in my library chart dependencies: diff --git a/services/aithena-services/helm/Chart.yaml b/services/aithena-services/helm/Chart.yaml index 4231520..019ec29 100644 --- a/services/aithena-services/helm/Chart.yaml +++ b/services/aithena-services/helm/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.1.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.1.0-dev3" +appVersion: "0.1.1-dev0" # My common code in my library chart dependencies: From a9286364cf3aa33b6aa89862121e4c1706a833f5 Mon Sep 17 00:00:00 2001 From: Camilo Velez Date: Wed, 4 Dec 2024 00:27:24 +0000 Subject: [PATCH 8/8] chore: update helm templates no manual storageclass --- deployments/helm/templates/job-lib-chart/templates/_pv.yaml | 1 - deployments/helm/templates/job-lib-chart/templates/_pvc.yaml | 1 - .../helm/templates/service-lib-chart/templates/_depl.yaml | 3 +++ .../helm/templates/service-lib-chart/templates/_pv.yaml | 1 - .../helm/templates/service-lib-chart/templates/_pvc.yaml | 1 - 5 files changed, 3 insertions(+), 4 deletions(-) diff --git a/deployments/helm/templates/job-lib-chart/templates/_pv.yaml b/deployments/helm/templates/job-lib-chart/templates/_pv.yaml index 20f4931..f8553ad 100644 --- a/deployments/helm/templates/job-lib-chart/templates/_pv.yaml +++ b/deployments/helm/templates/job-lib-chart/templates/_pv.yaml @@ -13,7 +13,6 @@ spec: accessModes: - {{ .Values.persistentVolume.accessMode | default "ReadWriteOnce" | quote }} persistentVolumeReclaimPolicy: {{ .Values.persistentVolume.reclaimPolicy | default "Retain" | quote }} - storageClassName: {{ .Values.persistentVolume.storageClass | default "manual" | quote }} hostPath: path: {{required "Provide the path to storage on local filesystem." .Values.persistentVolume.hostPath | quote}} {{- end -}} diff --git a/deployments/helm/templates/job-lib-chart/templates/_pvc.yaml b/deployments/helm/templates/job-lib-chart/templates/_pvc.yaml index d636db2..d6a4d06 100644 --- a/deployments/helm/templates/job-lib-chart/templates/_pvc.yaml +++ b/deployments/helm/templates/job-lib-chart/templates/_pvc.yaml @@ -13,7 +13,6 @@ spec: resources: requests: storage: {{required "Provide requested storage size." .Values.persistentVolumeClaim.storage | quote}} - storageClassName: {{ .Values.persistentVolumeClaim.storageClass | default "manual" | quote }} {{- end -}} {{- define "job-lib-chart.pvc" -}} diff --git a/deployments/helm/templates/service-lib-chart/templates/_depl.yaml b/deployments/helm/templates/service-lib-chart/templates/_depl.yaml index 435a887..d5f3964 100644 --- a/deployments/helm/templates/service-lib-chart/templates/_depl.yaml +++ b/deployments/helm/templates/service-lib-chart/templates/_depl.yaml @@ -33,6 +33,9 @@ spec: {{- if .Values.service.port }} ports: - containerPort: {{ .Values.service.port }} + {{- if .Values.service.hostPort }} + hostPort: {{ .Values.service.hostPort }} + {{- end }} {{- end }} # Mount all declared secrets diff --git a/deployments/helm/templates/service-lib-chart/templates/_pv.yaml b/deployments/helm/templates/service-lib-chart/templates/_pv.yaml index 727fb4d..6cf68fc 100644 --- a/deployments/helm/templates/service-lib-chart/templates/_pv.yaml +++ b/deployments/helm/templates/service-lib-chart/templates/_pv.yaml @@ -11,7 +11,6 @@ spec: accessModes: - {{ .Values.persistentVolume.accessMode | default "ReadWriteOnce" | quote }} persistentVolumeReclaimPolicy: {{ .Values.persistentVolume.reclaimPolicy | default "Retain" | quote }} - storageClassName: {{ .Values.persistentVolume.storageClass | default "manual" | quote }} hostPath: path: {{required "Provide the path to storage on local filesystem." .Values.persistentVolume.hostPath | quote}} {{- end -}} diff --git a/deployments/helm/templates/service-lib-chart/templates/_pvc.yaml b/deployments/helm/templates/service-lib-chart/templates/_pvc.yaml index 7d26e5f..c93b921 100644 --- a/deployments/helm/templates/service-lib-chart/templates/_pvc.yaml +++ b/deployments/helm/templates/service-lib-chart/templates/_pvc.yaml @@ -11,7 +11,6 @@ spec: resources: requests: storage: {{required "Provide requested storage size." .Values.persistentVolumeClaim.storage | quote}} - storageClassName: {{ .Values.persistentVolumeClaim.storageClass | default "manual" | quote }} {{- end -}} {{- define "aithena-lib-chart.pvc" -}}