Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/5599.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add DB Tables for Model Deployment (`model_deployments`, `model_revisions`, `deployment_routes`, `deployment_states`, `model_deployment_tokens`, `model_deployment_auto_scaling_rules`)
33 changes: 33 additions & 0 deletions src/ai/backend/common/data/model_deployment/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from enum import StrEnum


class ReadinessStatus(StrEnum):
NOT_CHECKED = "NOT_CHECKED"
HEALTHY = "HEALTHY"
UNHEALTHY = "UNHEALTHY"


class LivenessStatus(StrEnum):
NOT_CHECKED = "NOT_CHECKED"
HEALTHY = "HEALTHY"
UNHEALTHY = "UNHEALTHY"
DEGRADED = "DEGRADED"


class ActivenessStatus(StrEnum):
ACTIVE = "ACTIVE"
INACTIVE = "INACTIVE"


class ModelDeploymentStatus(StrEnum):
PENDING = "PENDING"
SCALING = "SCALING"
DEPLOYING = "DEPLOYING"
READY = "READY"
STOPPING = "STOPPING"
STOPPED = "STOPPED"


class DeploymentStrategy(StrEnum):
ROLLING = "ROLLING"
BLUE_GREEN = "BLUE_GREEN"
18 changes: 18 additions & 0 deletions src/ai/backend/manager/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from . import association_container_registries_groups as _association_container_registries_groups
from . import audit_log as _auditlog
from . import container_registry as _container_registry
from . import deployment_route as _deployment_route
from . import deployment_state as _deployment_state
from . import domain as _domain
from . import dotfile as _dotfile
from . import endpoint as _endpoint
Expand All @@ -18,6 +20,10 @@
from . import image as _image
from . import kernel as _kernel
from . import keypair as _keypair
from . import model_deployment as _model_deployment
from . import model_deployment_auto_scaling_rules as _model_deployment_auto_scaling_rules
from . import model_deployment_tokens as _model_deployment_tokens
from . import model_revision as _model_revision
from . import network as _network
from . import object_storage as _object_storage
from . import rbac as _rbac
Expand Down Expand Up @@ -61,6 +67,12 @@
*_object_storage.__all__,
*_user.__all__,
*_vfolder.__all__,
*_model_deployment.__all__,
*_model_revision.__all__,
*_deployment_route.__all__,
*_model_deployment_auto_scaling_rules.__all__,
*_model_deployment_tokens.__all__,
*_deployment_state.__all__,
*_dotfile.__all__,
*_rbac.__all__,
*_rbac_models.__all__,
Expand Down Expand Up @@ -117,3 +129,9 @@
from .gql_models.kernel import * # noqa
from .gql_models.session import * # noqa
from .reservoir_registry import * # noqa
from .model_deployment import * # noqa
from .model_revision import * # noqa
from .deployment_route import * # noqa
from .model_deployment_auto_scaling_rules import * # noqa
from .model_deployment_tokens import * # noqa
from .deployment_state import * # noqa
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""Add Model Deployment related tables

Revision ID: 96ab6ecb5f9d
Revises: 5b171528a6f5
Create Date: 2025-09-04 11:49:56.488145

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

from ai.backend.common.data.model_deployment.types import (
ActivenessStatus,
DeploymentStrategy,
LivenessStatus,
ModelDeploymentStatus,
ReadinessStatus,
)
from ai.backend.common.types import RuntimeVariant, VFolderMount
from ai.backend.manager.models.base import (
GUID,
DecimalType,
ResourceSlotColumn,
StrEnumType,
StructuredJSONObjectListColumn,
)

# revision identifiers, used by Alembic.
revision = "96ab6ecb5f9d"
down_revision = "5b171528a6f5"
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"deployment_routes",
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
sa.Column("deployment_id", GUID(), nullable=False),
sa.Column("session_id", GUID(), nullable=False),
sa.Column("revision_id", GUID(), nullable=False),
sa.Column("weight", sa.Integer(), nullable=False),
sa.Column("readiness_status", StrEnumType(ReadinessStatus, length=64), nullable=False),
sa.Column("liveness_status", StrEnumType(LivenessStatus, length=64), nullable=False),
sa.Column("activeness_status", StrEnumType(ActivenessStatus, length=64), nullable=False),
sa.Column("detail", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_deployment_routes")),
)
op.create_table(
"deployment_states",
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
sa.Column("deployment_id", GUID(), nullable=False),
sa.Column("prev_revision_id", GUID(), nullable=False),
sa.Column("next_revision_id", GUID(), nullable=True),
sa.Column("strategy", StrEnumType(DeploymentStrategy, length=64), nullable=False),
sa.Column("status", StrEnumType(ModelDeploymentStatus, length=64), nullable=False),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_deployment_states")),
)
op.create_table(
"model_deployment_auto_scaling_rules",
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
sa.Column("model_deployment_id", GUID(), nullable=False),
sa.Column("metric_source", sa.String(), nullable=False),
sa.Column("metric_name", sa.String(), nullable=False),
sa.Column("min_threshold", DecimalType(), nullable=True),
sa.Column("max_threshold", DecimalType(), nullable=True),
sa.Column("step_size", sa.Integer(), nullable=False),
sa.Column("time_window", sa.Integer(), nullable=False),
sa.Column("min_replicas", sa.Integer(), nullable=True),
sa.Column("max_replicas", sa.Integer(), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("last_triggered_at", sa.DateTime(timezone=True), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployment_auto_scaling_rules")),
)
op.create_table(
"model_deployment_tokens",
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
sa.Column("token", sa.String(), nullable=False),
sa.Column("model_deployment_id", GUID(), nullable=False),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployment_tokens")),
)
op.create_table(
"model_deployments",
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column("status", StrEnumType(ModelDeploymentStatus, length=64), nullable=False),
sa.Column("tags", sa.String(), nullable=False),
sa.Column("endpoint_url", sa.String(), nullable=True),
sa.Column("preferred_domain_name", sa.String(), nullable=True),
sa.Column("open_to_public", sa.Boolean(), nullable=False),
sa.Column("desired_replica_count", sa.Integer(), nullable=False),
sa.Column("created_user_id", GUID(), nullable=False),
sa.Column("current_revision_id", GUID(), nullable=False),
sa.Column(
"default_deployment_strategy_type",
StrEnumType(DeploymentStrategy, length=64),
nullable=False,
),
sa.Column("deployment_state_id", GUID(), nullable=True),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
),
sa.Column(
"updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployments")),
)
op.create_index(
op.f("ix_model_deployments_created_user_id"),
"model_deployments",
["created_user_id"],
unique=False,
)
op.create_index(op.f("ix_model_deployments_name"), "model_deployments", ["name"], unique=False)
op.create_table(
"model_revisions",
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column("model_deployment_id", GUID(), nullable=False),
sa.Column("tags", sa.TEXT(), nullable=False),
sa.Column("runtime_variant", StrEnumType(RuntimeVariant, length=64), nullable=False),
sa.Column(
"inference_runtime_config", postgresql.JSONB(astext_type=sa.Text()), nullable=False
),
sa.Column("environment_variables", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
sa.Column("model_vfolder_id", GUID(), nullable=False),
sa.Column("model_mount_destination", sa.String(), server_default="/models", nullable=False),
sa.Column("model_definition_path", sa.String(), nullable=False),
sa.Column("image_id", GUID(), nullable=False),
sa.Column("cluster_mode", sa.String(), server_default="single-node", nullable=False),
sa.Column("cluster_size", sa.Integer(), server_default="1", nullable=False),
sa.Column("resource_group", sa.String(), nullable=False),
sa.Column("resource_slots", ResourceSlotColumn(), nullable=False),
sa.Column("resource_opts", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
sa.Column(
"extra_mount",
StructuredJSONObjectListColumn(VFolderMount),
server_default="[]",
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_revisions")),
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("model_revisions")
op.drop_index(op.f("ix_model_deployments_name"), table_name="model_deployments")
op.drop_index(op.f("ix_model_deployments_created_user_id"), table_name="model_deployments")
op.drop_table("model_deployments")
op.drop_table("model_deployment_tokens")
op.drop_table("model_deployment_auto_scaling_rules")
op.drop_table("deployment_states")
op.drop_table("deployment_routes")
# ### end Alembic commands ###
Loading
Loading