Skip to content

Commit 1fed460

Browse files
committed
feat: Add db tables for model deployment
1 parent 8896eb4 commit 1fed460

File tree

11 files changed

+972
-25
lines changed

11 files changed

+972
-25
lines changed

changes/5599.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add DB Tables for Model Deployment (`model_deployments`, `model_revisions`, `deployment_session_bindings`, `blue_green_strategy`, `model_deployment_tokens`, `model_deployment_auto_scaling_rules`)

python.lock

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6295,19 +6295,19 @@
62956295
"artifacts": [
62966296
{
62976297
"algorithm": "sha256",
6298-
"hash": "657c83f876047ffc242b34bfcd9167f201d1b02e914ee854f16e589aa95c0d45",
6299-
"url": "https://files.pythonhosted.org/packages/28/78/0d8ffa40e9ec6cbbabe4d93675092fea1cadc4c280495375fc1f2fa42793/types_aiofiles-24.1.0.20250809-py3-none-any.whl"
6298+
"hash": "0ec8f8909e1a85a5a79aed0573af7901f53120dd2a29771dd0b3ef48e12328b0",
6299+
"url": "https://files.pythonhosted.org/packages/bc/8e/5e6d2215e1d8f7c2a94c6e9d0059ae8109ce0f5681956d11bb0a228cef04/types_aiofiles-24.1.0.20250822-py3-none-any.whl"
63006300
},
63016301
{
63026302
"algorithm": "sha256",
6303-
"hash": "4dc9734330b1324d9251f92edfc94fd6827fbb829c593313f034a77ac33ae327",
6304-
"url": "https://files.pythonhosted.org/packages/03/b8/34a4f9da445a104d240bb26365a10ef68953bebdc812859ea46847c7fdcb/types_aiofiles-24.1.0.20250809.tar.gz"
6303+
"hash": "9ab90d8e0c307fe97a7cf09338301e3f01a163e39f3b529ace82466355c84a7b",
6304+
"url": "https://files.pythonhosted.org/packages/19/48/c64471adac9206cc844afb33ed311ac5a65d2f59df3d861e0f2d0cad7414/types_aiofiles-24.1.0.20250822.tar.gz"
63056305
}
63066306
],
63076307
"project_name": "types-aiofiles",
63086308
"requires_dists": [],
63096309
"requires_python": ">=3.9",
6310-
"version": "24.1.0.20250809"
6310+
"version": "24.1.0.20250822"
63116311
},
63126312
{
63136313
"artifacts": [
@@ -6331,21 +6331,21 @@
63316331
"artifacts": [
63326332
{
63336333
"algorithm": "sha256",
6334-
"hash": "d25a61025306cf2276a3010e11dc694e48da64d21fda694f3b323a9faddc3ea2",
6335-
"url": "https://files.pythonhosted.org/packages/e6/00/a2eb7cef00e4d82f1ed3741a698a66691e37e30b1a1fd2513055a5a08e9f/types_cffi-1.17.0.20250809-py3-none-any.whl"
6334+
"hash": "183dd76c1871a48936d7b931488e41f0f25a7463abe10b5816be275fc11506d5",
6335+
"url": "https://files.pythonhosted.org/packages/21/f7/68029931e7539e3246b33386a19c475f234c71d2a878411847b20bb31960/types_cffi-1.17.0.20250822-py3-none-any.whl"
63366336
},
63376337
{
63386338
"algorithm": "sha256",
6339-
"hash": "426099ada3e54a525795e04259edf10d96c8ea245b4efff29dcd086bdf242a64",
6340-
"url": "https://files.pythonhosted.org/packages/c2/69/11aed81db94bd2ecd37cf52bef20053f82024cb015ec6cc6bb6bf135d4a0/types_cffi-1.17.0.20250809.tar.gz"
6339+
"hash": "bf6f5a381ea49da7ff895fae69711271e6192c434470ce6139bf2b2e0d0fa08d",
6340+
"url": "https://files.pythonhosted.org/packages/da/0c/76a48cb6e742cac4d61a4ec632dd30635b6d302f5acdc2c0a27572ac7ae3/types_cffi-1.17.0.20250822.tar.gz"
63416341
}
63426342
],
63436343
"project_name": "types-cffi",
63446344
"requires_dists": [
63456345
"types-setuptools"
63466346
],
63476347
"requires_python": ">=3.9",
6348-
"version": "1.17.0.20250809"
6348+
"version": "1.17.0.20250822"
63496349
},
63506350
{
63516351
"artifacts": [
@@ -6410,37 +6410,37 @@
64106410
"artifacts": [
64116411
{
64126412
"algorithm": "sha256",
6413-
"hash": "768890cac4f2d7fd9e0feb6f3217fce2abbfdfc0cadd38d11fba325a815e4b9f",
6414-
"url": "https://files.pythonhosted.org/packages/43/5e/67312e679f612218d07fcdbd14017e6d571ce240a5ba1ad734f15a8523cc/types_python_dateutil-2.9.0.20250809-py3-none-any.whl"
6413+
"hash": "849d52b737e10a6dc6621d2bd7940ec7c65fcb69e6aa2882acf4e56b2b508ddc",
6414+
"url": "https://files.pythonhosted.org/packages/ab/d9/a29dfa84363e88b053bf85a8b7f212a04f0d7343a4d24933baa45c06e08b/types_python_dateutil-2.9.0.20250822-py3-none-any.whl"
64156415
},
64166416
{
64176417
"algorithm": "sha256",
6418-
"hash": "69cbf8d15ef7a75c3801d65d63466e46ac25a0baa678d89d0a137fc31a608cc1",
6419-
"url": "https://files.pythonhosted.org/packages/a3/53/07dac71db45fb6b3c71c2fd29a87cada2239eac7ecfb318e6ebc7da00a3b/types_python_dateutil-2.9.0.20250809.tar.gz"
6418+
"hash": "84c92c34bd8e68b117bff742bc00b692a1e8531262d4507b33afcc9f7716cd53",
6419+
"url": "https://files.pythonhosted.org/packages/0c/0a/775f8551665992204c756be326f3575abba58c4a3a52eef9909ef4536428/types_python_dateutil-2.9.0.20250822.tar.gz"
64206420
}
64216421
],
64226422
"project_name": "types-python-dateutil",
64236423
"requires_dists": [],
64246424
"requires_python": ">=3.9",
6425-
"version": "2.9.0.20250809"
6425+
"version": "2.9.0.20250822"
64266426
},
64276427
{
64286428
"artifacts": [
64296429
{
64306430
"algorithm": "sha256",
6431-
"hash": "032b6003b798e7de1a1ddfeefee32fac6486bdfe4845e0ae0e7fb3ee4512b52f",
6432-
"url": "https://files.pythonhosted.org/packages/35/3e/0346d09d6e338401ebf406f12eaf9d0b54b315b86f1ec29e34f1a0aedae9/types_pyyaml-6.0.12.20250809-py3-none-any.whl"
6431+
"hash": "1fe1a5e146aa315483592d292b72a172b65b946a6d98aa6ddd8e4aa838ab7098",
6432+
"url": "https://files.pythonhosted.org/packages/32/8e/8f0aca667c97c0d76024b37cffa39e76e2ce39ca54a38f285a64e6ae33ba/types_pyyaml-6.0.12.20250822-py3-none-any.whl"
64336433
},
64346434
{
64356435
"algorithm": "sha256",
6436-
"hash": "af4a1aca028f18e75297da2ee0da465f799627370d74073e96fee876524f61b5",
6437-
"url": "https://files.pythonhosted.org/packages/36/21/52ffdbddea3c826bc2758d811ccd7f766912de009c5cf096bd5ebba44680/types_pyyaml-6.0.12.20250809.tar.gz"
6436+
"hash": "259f1d93079d335730a9db7cff2bcaf65d7e04b4a56b5927d49a612199b59413",
6437+
"url": "https://files.pythonhosted.org/packages/49/85/90a442e538359ab5c9e30de415006fb22567aa4301c908c09f19e42975c2/types_pyyaml-6.0.12.20250822.tar.gz"
64386438
}
64396439
],
64406440
"project_name": "types-pyyaml",
64416441
"requires_dists": [],
64426442
"requires_python": ">=3.9",
6443-
"version": "6.0.12.20250809"
6443+
"version": "6.0.12.20250822"
64446444
},
64456445
{
64466446
"artifacts": [
@@ -6467,19 +6467,19 @@
64676467
"artifacts": [
64686468
{
64696469
"algorithm": "sha256",
6470-
"hash": "7c6539b4c7ac7b4ab4db2be66d8a58fb1e28affa3ee3834be48acafd94f5976a",
6471-
"url": "https://files.pythonhosted.org/packages/ca/1d/ad4fd409b377904324cbd2dc3a11e29ba13e2cf603c5a14cd88a35da5be0/types_setuptools-80.9.0.20250809-py3-none-any.whl"
6470+
"hash": "53bf881cb9d7e46ed12c76ef76c0aaf28cfe6211d3fab12e0b83620b1a8642c3",
6471+
"url": "https://files.pythonhosted.org/packages/b6/2d/475bf15c1cdc172e7a0d665b6e373ebfb1e9bf734d3f2f543d668b07a142/types_setuptools-80.9.0.20250822-py3-none-any.whl"
64726472
},
64736473
{
64746474
"algorithm": "sha256",
6475-
"hash": "e986ba37ffde364073d76189e1d79d9928fb6f5278c7d07589cde353d0218864",
6476-
"url": "https://files.pythonhosted.org/packages/43/4f/d78a04083ee3cc0a7c14406afb2f1e7b63e70da95b777571d665d89b1765/types_setuptools-80.9.0.20250809.tar.gz"
6475+
"hash": "070ea7716968ec67a84c7f7768d9952ff24d28b65b6594797a464f1b3066f965",
6476+
"url": "https://files.pythonhosted.org/packages/19/bd/1e5f949b7cb740c9f0feaac430e301b8f1c5f11a81e26324299ea671a237/types_setuptools-80.9.0.20250822.tar.gz"
64776477
}
64786478
],
64796479
"project_name": "types-setuptools",
64806480
"requires_dists": [],
64816481
"requires_python": ">=3.9",
6482-
"version": "80.9.0.20250809"
6482+
"version": "80.9.0.20250822"
64836483
},
64846484
{
64856485
"artifacts": [
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from enum import StrEnum
2+
3+
4+
class ReadinessStatus(StrEnum):
5+
PENDING = "PENDING"
6+
PREPARING = "PREPARING"
7+
READY = "READY"
8+
UNHEALTHY = "UNHEALTHY"
9+
TERMINATING = "TERMINATING"
10+
TERMINATED = "TERMINATED"
11+
12+
13+
class ModelDeploymentStatus(StrEnum):
14+
ACTIVE = "ACTIVE"
15+
INACTIVE = "INACTIVE"
16+
CREATED = "CREATED"
17+
DEPLOYING = "DEPLOYING"
18+
READY = "READY"
19+
STOPPING = "STOPPING"
20+
STOPPED = "STOPPED"
21+
22+
23+
class DeploymentStrategy(StrEnum):
24+
ROLLING = "ROLLING"
25+
BLUE_GREEN = "BLUE_GREEN"

src/ai/backend/manager/models/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from . import association_artifacts_storages as _association_artifacts_storages
66
from . import association_container_registries_groups as _association_container_registries_groups
77
from . import audit_log as _auditlog
8+
from . import blue_green_strategies as _blue_green_strategies
89
from . import container_registry as _container_registry
10+
from . import deployment_session_bindings as _deployment_session_bindings
911
from . import domain as _domain
1012
from . import dotfile as _dotfile
1113
from . import endpoint as _endpoint
@@ -17,6 +19,10 @@
1719
from . import image as _image
1820
from . import kernel as _kernel
1921
from . import keypair as _keypair
22+
from . import model_deployment as _model_deployment
23+
from . import model_deployment_auto_scaling_rules as _model_deployment_auto_scaling_rules
24+
from . import model_deployment_tokens as _model_deployment_tokens
25+
from . import model_revision as _model_revision
2026
from . import network as _network
2127
from . import object_storage as _object_storage
2228
from . import rbac as _rbac
@@ -58,6 +64,12 @@
5864
*_object_storage.__all__,
5965
*_user.__all__,
6066
*_vfolder.__all__,
67+
*_model_deployment.__all__,
68+
*_model_revision.__all__,
69+
*_deployment_session_bindings.__all__,
70+
*_model_deployment_auto_scaling_rules.__all__,
71+
*_model_deployment_tokens.__all__,
72+
*_blue_green_strategies.__all__,
6173
*_dotfile.__all__,
6274
*_rbac.__all__,
6375
*_rbac_models.__all__,
@@ -111,3 +123,9 @@
111123
from .gql_models.agent import * # noqa
112124
from .gql_models.kernel import * # noqa
113125
from .gql_models.session import * # noqa
126+
from .model_deployment import * # noqa
127+
from .model_revision import * # noqa
128+
from .deployment_session_bindings import * # noqa
129+
from .model_deployment_auto_scaling_rules import * # noqa
130+
from .model_deployment_tokens import * # noqa
131+
from .blue_green_strategies import * # noqa
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
"""Add Model Deployment related tables
2+
3+
Revision ID: 0a3480997c98
4+
Revises: 345551145801
5+
Create Date: 2025-08-25 10:02:33.684728
6+
7+
"""
8+
9+
import sqlalchemy as sa
10+
from alembic import op
11+
from sqlalchemy.dialects import postgresql
12+
13+
from ai.backend.common.data.model_deployment.types import (
14+
DeploymentStrategy,
15+
ModelDeploymentStatus,
16+
ReadinessStatus,
17+
)
18+
from ai.backend.common.types import RuntimeVariant, VFolderMount
19+
from ai.backend.manager.models.base import (
20+
GUID,
21+
DecimalType,
22+
ResourceSlotColumn,
23+
StrEnumType,
24+
StructuredJSONObjectListColumn,
25+
)
26+
27+
# revision identifiers, used by Alembic.
28+
revision = "0a3480997c98"
29+
down_revision = "345551145801"
30+
branch_labels = None
31+
depends_on = None
32+
33+
34+
def upgrade() -> None:
35+
# ### commands auto generated by Alembic - please adjust! ###
36+
op.create_table(
37+
"blue_green_strategies",
38+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
39+
sa.Column("pre_switch_wait_time", sa.Integer(), nullable=False),
40+
sa.Column("switch_timeout", sa.Integer(), nullable=False),
41+
sa.Column("is_active", sa.Boolean(), nullable=False),
42+
sa.Column("created_at", sa.DateTime(), nullable=False),
43+
sa.Column("updated_at", sa.DateTime(), nullable=False),
44+
sa.PrimaryKeyConstraint("id", name=op.f("pk_blue_green_strategies")),
45+
)
46+
op.create_table(
47+
"deployment_session_bindings",
48+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
49+
sa.Column("deployment_id", GUID(), nullable=False),
50+
sa.Column("session_id", GUID(), nullable=False),
51+
sa.Column("weight", DecimalType(), nullable=False),
52+
sa.Column("readiness_status", StrEnumType(ReadinessStatus, length=64), nullable=False),
53+
sa.Column(
54+
"created_at",
55+
sa.DateTime(timezone=True),
56+
server_default=sa.text("now()"),
57+
nullable=False,
58+
),
59+
sa.Column(
60+
"updated_at",
61+
sa.DateTime(timezone=True),
62+
server_default=sa.text("now()"),
63+
nullable=False,
64+
),
65+
sa.PrimaryKeyConstraint("id", name=op.f("pk_deployment_session_bindings")),
66+
)
67+
op.create_table(
68+
"model_deployment_auto_scaling_rules",
69+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
70+
sa.Column("model_deployment_id", GUID(), nullable=False),
71+
sa.Column("metric_source", sa.String(), nullable=False),
72+
sa.Column("metric_name", sa.String(), nullable=False),
73+
sa.Column("min_threshold", DecimalType(), nullable=True),
74+
sa.Column("max_threshold", DecimalType(), nullable=True),
75+
sa.Column("step_size", sa.Integer(), nullable=False),
76+
sa.Column("time_window", sa.Integer(), nullable=False),
77+
sa.Column("min_replicas", sa.Integer(), nullable=True),
78+
sa.Column("max_replicas", sa.Integer(), nullable=True),
79+
sa.Column(
80+
"created_at",
81+
sa.DateTime(timezone=True),
82+
server_default=sa.text("now()"),
83+
nullable=False,
84+
),
85+
sa.Column("last_triggered_at", sa.DateTime(timezone=True), nullable=False),
86+
sa.CheckConstraint(
87+
"(min_threshold IS NOT NULL AND max_threshold IS NULL) OR (min_threshold IS NULL AND max_threshold IS NOT NULL)",
88+
name=op.f("ck_model_deployment_auto_scaling_rules_check_single_threshold"),
89+
),
90+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployment_auto_scaling_rules")),
91+
sa.UniqueConstraint(
92+
"model_deployment_id", "metric_name", name="uq_model_deployment_metric"
93+
),
94+
)
95+
op.create_table(
96+
"model_deployment_tokens",
97+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
98+
sa.Column("token", sa.String(), nullable=False),
99+
sa.Column("model_deployment_id", GUID(), nullable=False),
100+
sa.Column(
101+
"created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
102+
),
103+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployment_tokens")),
104+
)
105+
op.create_table(
106+
"model_deployments",
107+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
108+
sa.Column("name", sa.String(), nullable=False),
109+
sa.Column("status", StrEnumType(ModelDeploymentStatus, length=64), nullable=False),
110+
sa.Column("tags", sa.String(), nullable=False),
111+
sa.Column("endpoint_url", sa.String(), nullable=True),
112+
sa.Column("preferred_domain_name", sa.String(), nullable=True),
113+
sa.Column("open_to_public", sa.Boolean(), nullable=False),
114+
sa.Column("desired_replica_count", sa.Integer(), nullable=False),
115+
sa.Column("created_user_id", GUID(), nullable=False),
116+
sa.Column("current_revision_id", GUID(), nullable=False),
117+
sa.Column(
118+
"deployment_strategy_type", StrEnumType(DeploymentStrategy, length=64), nullable=False
119+
),
120+
sa.Column("deployment_strategy_id", GUID(), nullable=False),
121+
sa.Column(
122+
"created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
123+
),
124+
sa.Column(
125+
"updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True
126+
),
127+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_deployments")),
128+
)
129+
op.create_index(
130+
op.f("ix_model_deployments_created_user_id"),
131+
"model_deployments",
132+
["created_user_id"],
133+
unique=False,
134+
)
135+
op.create_index(op.f("ix_model_deployments_name"), "model_deployments", ["name"], unique=False)
136+
op.create_table(
137+
"model_revisions",
138+
sa.Column("id", GUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False),
139+
sa.Column("name", sa.String(), nullable=False),
140+
sa.Column("model_deployment_id", GUID(), nullable=False),
141+
sa.Column("tags", sa.TEXT(), nullable=False),
142+
sa.Column("runtime_variant", StrEnumType(RuntimeVariant, length=64), nullable=False),
143+
sa.Column(
144+
"inference_runtime_config", postgresql.JSONB(astext_type=sa.Text()), nullable=False
145+
),
146+
sa.Column("environment_variables", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
147+
sa.Column("model_vfolder_id", GUID(), nullable=False),
148+
sa.Column("model_mount_destination", sa.String(), server_default="/models", nullable=False),
149+
sa.Column("model_definition_path", sa.String(), nullable=False),
150+
sa.Column("image_id", GUID(), nullable=False),
151+
sa.Column("cluster_mode", sa.String(), server_default="single-node", nullable=False),
152+
sa.Column("cluster_size", sa.Integer(), server_default="1", nullable=False),
153+
sa.Column("resource_group", sa.String(), nullable=False),
154+
sa.Column("resource_slots", ResourceSlotColumn(), nullable=False),
155+
sa.Column("resource_opts", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
156+
sa.Column(
157+
"extra_mount",
158+
StructuredJSONObjectListColumn(VFolderMount),
159+
server_default="[]",
160+
nullable=False,
161+
),
162+
sa.Column(
163+
"created_at",
164+
sa.DateTime(timezone=True),
165+
server_default=sa.text("now()"),
166+
nullable=False,
167+
),
168+
sa.PrimaryKeyConstraint("id", name=op.f("pk_model_revisions")),
169+
)
170+
# ### end Alembic commands ###
171+
172+
173+
def downgrade() -> None:
174+
# ### commands auto generated by Alembic - please adjust! ###
175+
op.drop_table("model_revisions")
176+
op.drop_index(op.f("ix_model_deployments_name"), table_name="model_deployments")
177+
op.drop_index(op.f("ix_model_deployments_created_user_id"), table_name="model_deployments")
178+
op.drop_table("model_deployments")
179+
op.drop_table("model_deployment_tokens")
180+
op.drop_table("model_deployment_auto_scaling_rules")
181+
op.drop_table("deployment_session_bindings")
182+
op.drop_table("blue_green_strategies")
183+
# ### end Alembic commands ###

0 commit comments

Comments
 (0)