Skip to content

Commit b129034

Browse files
committed
feat: Add DB tables related to Model Deployment
1 parent 9cf4cf3 commit b129034

File tree

10 files changed

+990
-0
lines changed

10 files changed

+990
-0
lines changed

changes/5599.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add DB Tables for Model Deployment (`model_deployments`, `model_revisions`, `deployment_routes`, `deployment_states`, `model_deployment_tokens`, `model_deployment_auto_scaling_rules`)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from enum import StrEnum
2+
3+
4+
class ReadinessStatus(StrEnum):
5+
NOT_CHECKED = "NOT_CHECKED"
6+
HEALTHY = "HEALTHY"
7+
UNHEALTHY = "UNHEALTHY"
8+
9+
10+
class LivenessStatus(StrEnum):
11+
NOT_CHECKED = "NOT_CHECKED"
12+
HEALTHY = "HEALTHY"
13+
UNHEALTHY = "UNHEALTHY"
14+
DEGRADED = "DEGRADED"
15+
16+
17+
class ActivenessStatus(StrEnum):
18+
ACTIVE = "ACTIVE"
19+
INACTIVE = "INACTIVE"
20+
21+
22+
class ModelDeploymentStatus(StrEnum):
23+
PENDING = "PENDING"
24+
SCALING = "SCALING"
25+
DEPLOYING = "DEPLOYING"
26+
READY = "READY"
27+
STOPPING = "STOPPING"
28+
STOPPED = "STOPPED"
29+
30+
31+
class DeploymentStrategy(StrEnum):
32+
ROLLING = "ROLLING"
33+
BLUE_GREEN = "BLUE_GREEN"

src/ai/backend/manager/models/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from . import association_container_registries_groups as _association_container_registries_groups
88
from . import audit_log as _auditlog
99
from . import container_registry as _container_registry
10+
from . import deployment_route as _deployment_route
11+
from . import deployment_state as _deployment_state
1012
from . import domain as _domain
1113
from . import dotfile as _dotfile
1214
from . import endpoint as _endpoint
@@ -18,6 +20,10 @@
1820
from . import image as _image
1921
from . import kernel as _kernel
2022
from . import keypair as _keypair
23+
from . import model_deployment as _model_deployment
24+
from . import model_deployment_auto_scaling_rules as _model_deployment_auto_scaling_rules
25+
from . import model_deployment_tokens as _model_deployment_tokens
26+
from . import model_revision as _model_revision
2127
from . import network as _network
2228
from . import object_storage as _object_storage
2329
from . import rbac as _rbac
@@ -61,6 +67,12 @@
6167
*_object_storage.__all__,
6268
*_user.__all__,
6369
*_vfolder.__all__,
70+
*_model_deployment.__all__,
71+
*_model_revision.__all__,
72+
*_deployment_route.__all__,
73+
*_model_deployment_auto_scaling_rules.__all__,
74+
*_model_deployment_tokens.__all__,
75+
*_deployment_state.__all__,
6476
*_dotfile.__all__,
6577
*_rbac.__all__,
6678
*_rbac_models.__all__,
@@ -117,3 +129,9 @@
117129
from .gql_models.kernel import * # noqa
118130
from .gql_models.session import * # noqa
119131
from .reservoir_registry import * # noqa
132+
from .model_deployment import * # noqa
133+
from .model_revision import * # noqa
134+
from .deployment_route import * # noqa
135+
from .model_deployment_auto_scaling_rules import * # noqa
136+
from .model_deployment_tokens import * # noqa
137+
from .deployment_state import * # noqa
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
"""Add Model Deployment related tables
2+
3+
Revision ID: 96ab6ecb5f9d
4+
Revises: 5b171528a6f5
5+
Create Date: 2025-09-04 11:49:56.488145
6+
7+
"""
8+
9+
import sqlalchemy as sa
10+
from alembic import op
11+
from sqlalchemy.dialects import postgresql
12+
13+
from ai.backend.common.data.model_deployment.types import (
14+
ActivenessStatus,
15+
DeploymentStrategy,
16+
LivenessStatus,
17+
ModelDeploymentStatus,
18+
ReadinessStatus,
19+
)
20+
from ai.backend.common.types import RuntimeVariant, VFolderMount
21+
from ai.backend.manager.models.base import (
22+
GUID,
23+
DecimalType,
24+
ResourceSlotColumn,
25+
StrEnumType,
26+
StructuredJSONObjectListColumn,
27+
)
28+
29+
# revision identifiers, used by Alembic.
30+
revision = "96ab6ecb5f9d"
31+
down_revision = "5b171528a6f5"
32+
branch_labels = None
33+
depends_on = None
34+
35+
36+
def upgrade() -> None:
37+
# ### commands auto generated by Alembic - please adjust! ###
38+
op.create_table(
39+
"deployment_routes",
40+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
41+
sa.Column("deployment_id", GUID(), nullable=False),
42+
sa.Column("session_id", GUID(), nullable=False),
43+
sa.Column("revision_id", GUID(), nullable=False),
44+
sa.Column("weight", sa.Integer(), nullable=False),
45+
sa.Column("readiness_status", StrEnumType(ReadinessStatus, length=64), nullable=False),
46+
sa.Column("liveness_status", StrEnumType(LivenessStatus, length=64), nullable=False),
47+
sa.Column("activeness_status", StrEnumType(ActivenessStatus, length=64), nullable=False),
48+
sa.Column("detail", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
49+
sa.Column(
50+
"created_at",
51+
sa.DateTime(timezone=True),
52+
server_default=sa.text("now()"),
53+
nullable=False,
54+
),
55+
sa.Column(
56+
"updated_at",
57+
sa.DateTime(timezone=True),
58+
server_default=sa.text("now()"),
59+
nullable=False,
60+
),
61+
sa.PrimaryKeyConstraint("id", name=op.f("pk_deployment_routes")),
62+
)
63+
op.create_table(
64+
"deployment_states",
65+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
66+
sa.Column("deployment_id", GUID(), nullable=False),
67+
sa.Column("prev_revision_id", GUID(), nullable=False),
68+
sa.Column("next_revision_id", GUID(), nullable=True),
69+
sa.Column("strategy", StrEnumType(DeploymentStrategy, length=64), nullable=False),
70+
sa.Column("status", StrEnumType(ModelDeploymentStatus, length=64), nullable=False),
71+
sa.Column("created_at", sa.DateTime(), nullable=False),
72+
sa.Column("updated_at", sa.DateTime(), nullable=False),
73+
sa.PrimaryKeyConstraint("id", name=op.f("pk_deployment_states")),
74+
)
75+
op.create_table(
76+
"model_deployment_auto_scaling_rules",
77+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
78+
sa.Column("model_deployment_id", GUID(), nullable=False),
79+
sa.Column("metric_source", sa.String(), nullable=False),
80+
sa.Column("metric_name", sa.String(), nullable=False),
81+
sa.Column("min_threshold", DecimalType(), nullable=True),
82+
sa.Column("max_threshold", DecimalType(), nullable=True),
83+
sa.Column("step_size", sa.Integer(), nullable=False),
84+
sa.Column("time_window", sa.Integer(), nullable=False),
85+
sa.Column("min_replicas", sa.Integer(), nullable=True),
86+
sa.Column("max_replicas", sa.Integer(), nullable=True),
87+
sa.Column(
88+
"created_at",
89+
sa.DateTime(timezone=True),
90+
server_default=sa.text("now()"),
91+
nullable=False,
92+
),
93+
sa.Column("last_triggered_at", sa.DateTime(timezone=True), nullable=False),
94+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployment_auto_scaling_rules")),
95+
)
96+
op.create_table(
97+
"model_deployment_tokens",
98+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
99+
sa.Column("token", sa.String(), nullable=False),
100+
sa.Column("model_deployment_id", GUID(), nullable=False),
101+
sa.Column(
102+
"created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
103+
),
104+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployment_tokens")),
105+
)
106+
op.create_table(
107+
"model_deployments",
108+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
109+
sa.Column("name", sa.String(), nullable=False),
110+
sa.Column("status", StrEnumType(ModelDeploymentStatus, length=64), nullable=False),
111+
sa.Column("tags", sa.String(), nullable=False),
112+
sa.Column("endpoint_url", sa.String(), nullable=True),
113+
sa.Column("preferred_domain_name", sa.String(), nullable=True),
114+
sa.Column("open_to_public", sa.Boolean(), nullable=False),
115+
sa.Column("desired_replica_count", sa.Integer(), nullable=False),
116+
sa.Column("created_user_id", GUID(), nullable=False),
117+
sa.Column("current_revision_id", GUID(), nullable=False),
118+
sa.Column(
119+
"default_deployment_strategy_type",
120+
StrEnumType(DeploymentStrategy, length=64),
121+
nullable=False,
122+
),
123+
sa.Column("deployment_state_id", GUID(), nullable=True),
124+
sa.Column(
125+
"created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
126+
),
127+
sa.Column(
128+
"updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
129+
),
130+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployments")),
131+
)
132+
op.create_index(
133+
op.f("ix_model_deployments_created_user_id"),
134+
"model_deployments",
135+
["created_user_id"],
136+
unique=False,
137+
)
138+
op.create_index(op.f("ix_model_deployments_name"), "model_deployments", ["name"], unique=False)
139+
op.create_table(
140+
"model_revisions",
141+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
142+
sa.Column("name", sa.String(), nullable=False),
143+
sa.Column("model_deployment_id", GUID(), nullable=False),
144+
sa.Column("tags", sa.TEXT(), nullable=False),
145+
sa.Column("runtime_variant", StrEnumType(RuntimeVariant, length=64), nullable=False),
146+
sa.Column(
147+
"inference_runtime_config", postgresql.JSONB(astext_type=sa.Text()), nullable=False
148+
),
149+
sa.Column("environment_variables", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
150+
sa.Column("model_vfolder_id", GUID(), nullable=False),
151+
sa.Column("model_mount_destination", sa.String(), server_default="/models", nullable=False),
152+
sa.Column("model_definition_path", sa.String(), nullable=False),
153+
sa.Column("image_id", GUID(), nullable=False),
154+
sa.Column("cluster_mode", sa.String(), server_default="single-node", nullable=False),
155+
sa.Column("cluster_size", sa.Integer(), server_default="1", nullable=False),
156+
sa.Column("resource_group", sa.String(), nullable=False),
157+
sa.Column("resource_slots", ResourceSlotColumn(), nullable=False),
158+
sa.Column("resource_opts", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
159+
sa.Column(
160+
"extra_mount",
161+
StructuredJSONObjectListColumn(VFolderMount),
162+
server_default="[]",
163+
nullable=False,
164+
),
165+
sa.Column(
166+
"created_at",
167+
sa.DateTime(timezone=True),
168+
server_default=sa.text("now()"),
169+
nullable=False,
170+
),
171+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_revisions")),
172+
)
173+
# ### end Alembic commands ###
174+
175+
176+
def downgrade() -> None:
177+
# ### commands auto generated by Alembic - please adjust! ###
178+
op.drop_table("model_revisions")
179+
op.drop_index(op.f("ix_model_deployments_name"), table_name="model_deployments")
180+
op.drop_index(op.f("ix_model_deployments_created_user_id"), table_name="model_deployments")
181+
op.drop_table("model_deployments")
182+
op.drop_table("model_deployment_tokens")
183+
op.drop_table("model_deployment_auto_scaling_rules")
184+
op.drop_table("deployment_states")
185+
op.drop_table("deployment_routes")
186+
# ### end Alembic commands ###

0 commit comments

Comments
 (0)