Skip to content

Commit b0c2f74

Browse files
committed
feat: Add db tables for model deployment
1 parent c8189ef commit b0c2f74

File tree

11 files changed

+913
-25
lines changed

11 files changed

+913
-25
lines changed

changes/5599.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add DB Tables for Model Deployment (`model_deployments`, `model_revisions`, `deployment_session_bindings`, `blue_green_strategy`, `model_deployment_tokens`, `model_deployment_auto_scaling_rules`)

python.lock

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6295,19 +6295,19 @@
62956295
"artifacts": [
62966296
{
62976297
"algorithm": "sha256",
6298-
"hash": "657c83f876047ffc242b34bfcd9167f201d1b02e914ee854f16e589aa95c0d45",
6299-
"url": "https://files.pythonhosted.org/packages/28/78/0d8ffa40e9ec6cbbabe4d93675092fea1cadc4c280495375fc1f2fa42793/types_aiofiles-24.1.0.20250809-py3-none-any.whl"
6298+
"hash": "0ec8f8909e1a85a5a79aed0573af7901f53120dd2a29771dd0b3ef48e12328b0",
6299+
"url": "https://files.pythonhosted.org/packages/bc/8e/5e6d2215e1d8f7c2a94c6e9d0059ae8109ce0f5681956d11bb0a228cef04/types_aiofiles-24.1.0.20250822-py3-none-any.whl"
63006300
},
63016301
{
63026302
"algorithm": "sha256",
6303-
"hash": "4dc9734330b1324d9251f92edfc94fd6827fbb829c593313f034a77ac33ae327",
6304-
"url": "https://files.pythonhosted.org/packages/03/b8/34a4f9da445a104d240bb26365a10ef68953bebdc812859ea46847c7fdcb/types_aiofiles-24.1.0.20250809.tar.gz"
6303+
"hash": "9ab90d8e0c307fe97a7cf09338301e3f01a163e39f3b529ace82466355c84a7b",
6304+
"url": "https://files.pythonhosted.org/packages/19/48/c64471adac9206cc844afb33ed311ac5a65d2f59df3d861e0f2d0cad7414/types_aiofiles-24.1.0.20250822.tar.gz"
63056305
}
63066306
],
63076307
"project_name": "types-aiofiles",
63086308
"requires_dists": [],
63096309
"requires_python": ">=3.9",
6310-
"version": "24.1.0.20250809"
6310+
"version": "24.1.0.20250822"
63116311
},
63126312
{
63136313
"artifacts": [
@@ -6331,21 +6331,21 @@
63316331
"artifacts": [
63326332
{
63336333
"algorithm": "sha256",
6334-
"hash": "d25a61025306cf2276a3010e11dc694e48da64d21fda694f3b323a9faddc3ea2",
6335-
"url": "https://files.pythonhosted.org/packages/e6/00/a2eb7cef00e4d82f1ed3741a698a66691e37e30b1a1fd2513055a5a08e9f/types_cffi-1.17.0.20250809-py3-none-any.whl"
6334+
"hash": "183dd76c1871a48936d7b931488e41f0f25a7463abe10b5816be275fc11506d5",
6335+
"url": "https://files.pythonhosted.org/packages/21/f7/68029931e7539e3246b33386a19c475f234c71d2a878411847b20bb31960/types_cffi-1.17.0.20250822-py3-none-any.whl"
63366336
},
63376337
{
63386338
"algorithm": "sha256",
6339-
"hash": "426099ada3e54a525795e04259edf10d96c8ea245b4efff29dcd086bdf242a64",
6340-
"url": "https://files.pythonhosted.org/packages/c2/69/11aed81db94bd2ecd37cf52bef20053f82024cb015ec6cc6bb6bf135d4a0/types_cffi-1.17.0.20250809.tar.gz"
6339+
"hash": "bf6f5a381ea49da7ff895fae69711271e6192c434470ce6139bf2b2e0d0fa08d",
6340+
"url": "https://files.pythonhosted.org/packages/da/0c/76a48cb6e742cac4d61a4ec632dd30635b6d302f5acdc2c0a27572ac7ae3/types_cffi-1.17.0.20250822.tar.gz"
63416341
}
63426342
],
63436343
"project_name": "types-cffi",
63446344
"requires_dists": [
63456345
"types-setuptools"
63466346
],
63476347
"requires_python": ">=3.9",
6348-
"version": "1.17.0.20250809"
6348+
"version": "1.17.0.20250822"
63496349
},
63506350
{
63516351
"artifacts": [
@@ -6410,37 +6410,37 @@
64106410
"artifacts": [
64116411
{
64126412
"algorithm": "sha256",
6413-
"hash": "768890cac4f2d7fd9e0feb6f3217fce2abbfdfc0cadd38d11fba325a815e4b9f",
6414-
"url": "https://files.pythonhosted.org/packages/43/5e/67312e679f612218d07fcdbd14017e6d571ce240a5ba1ad734f15a8523cc/types_python_dateutil-2.9.0.20250809-py3-none-any.whl"
6413+
"hash": "849d52b737e10a6dc6621d2bd7940ec7c65fcb69e6aa2882acf4e56b2b508ddc",
6414+
"url": "https://files.pythonhosted.org/packages/ab/d9/a29dfa84363e88b053bf85a8b7f212a04f0d7343a4d24933baa45c06e08b/types_python_dateutil-2.9.0.20250822-py3-none-any.whl"
64156415
},
64166416
{
64176417
"algorithm": "sha256",
6418-
"hash": "69cbf8d15ef7a75c3801d65d63466e46ac25a0baa678d89d0a137fc31a608cc1",
6419-
"url": "https://files.pythonhosted.org/packages/a3/53/07dac71db45fb6b3c71c2fd29a87cada2239eac7ecfb318e6ebc7da00a3b/types_python_dateutil-2.9.0.20250809.tar.gz"
6418+
"hash": "84c92c34bd8e68b117bff742bc00b692a1e8531262d4507b33afcc9f7716cd53",
6419+
"url": "https://files.pythonhosted.org/packages/0c/0a/775f8551665992204c756be326f3575abba58c4a3a52eef9909ef4536428/types_python_dateutil-2.9.0.20250822.tar.gz"
64206420
}
64216421
],
64226422
"project_name": "types-python-dateutil",
64236423
"requires_dists": [],
64246424
"requires_python": ">=3.9",
6425-
"version": "2.9.0.20250809"
6425+
"version": "2.9.0.20250822"
64266426
},
64276427
{
64286428
"artifacts": [
64296429
{
64306430
"algorithm": "sha256",
6431-
"hash": "032b6003b798e7de1a1ddfeefee32fac6486bdfe4845e0ae0e7fb3ee4512b52f",
6432-
"url": "https://files.pythonhosted.org/packages/35/3e/0346d09d6e338401ebf406f12eaf9d0b54b315b86f1ec29e34f1a0aedae9/types_pyyaml-6.0.12.20250809-py3-none-any.whl"
6431+
"hash": "1fe1a5e146aa315483592d292b72a172b65b946a6d98aa6ddd8e4aa838ab7098",
6432+
"url": "https://files.pythonhosted.org/packages/32/8e/8f0aca667c97c0d76024b37cffa39e76e2ce39ca54a38f285a64e6ae33ba/types_pyyaml-6.0.12.20250822-py3-none-any.whl"
64336433
},
64346434
{
64356435
"algorithm": "sha256",
6436-
"hash": "af4a1aca028f18e75297da2ee0da465f799627370d74073e96fee876524f61b5",
6437-
"url": "https://files.pythonhosted.org/packages/36/21/52ffdbddea3c826bc2758d811ccd7f766912de009c5cf096bd5ebba44680/types_pyyaml-6.0.12.20250809.tar.gz"
6436+
"hash": "259f1d93079d335730a9db7cff2bcaf65d7e04b4a56b5927d49a612199b59413",
6437+
"url": "https://files.pythonhosted.org/packages/49/85/90a442e538359ab5c9e30de415006fb22567aa4301c908c09f19e42975c2/types_pyyaml-6.0.12.20250822.tar.gz"
64386438
}
64396439
],
64406440
"project_name": "types-pyyaml",
64416441
"requires_dists": [],
64426442
"requires_python": ">=3.9",
6443-
"version": "6.0.12.20250809"
6443+
"version": "6.0.12.20250822"
64446444
},
64456445
{
64466446
"artifacts": [
@@ -6467,19 +6467,19 @@
64676467
"artifacts": [
64686468
{
64696469
"algorithm": "sha256",
6470-
"hash": "7c6539b4c7ac7b4ab4db2be66d8a58fb1e28affa3ee3834be48acafd94f5976a",
6471-
"url": "https://files.pythonhosted.org/packages/ca/1d/ad4fd409b377904324cbd2dc3a11e29ba13e2cf603c5a14cd88a35da5be0/types_setuptools-80.9.0.20250809-py3-none-any.whl"
6470+
"hash": "53bf881cb9d7e46ed12c76ef76c0aaf28cfe6211d3fab12e0b83620b1a8642c3",
6471+
"url": "https://files.pythonhosted.org/packages/b6/2d/475bf15c1cdc172e7a0d665b6e373ebfb1e9bf734d3f2f543d668b07a142/types_setuptools-80.9.0.20250822-py3-none-any.whl"
64726472
},
64736473
{
64746474
"algorithm": "sha256",
6475-
"hash": "e986ba37ffde364073d76189e1d79d9928fb6f5278c7d07589cde353d0218864",
6476-
"url": "https://files.pythonhosted.org/packages/43/4f/d78a04083ee3cc0a7c14406afb2f1e7b63e70da95b777571d665d89b1765/types_setuptools-80.9.0.20250809.tar.gz"
6475+
"hash": "070ea7716968ec67a84c7f7768d9952ff24d28b65b6594797a464f1b3066f965",
6476+
"url": "https://files.pythonhosted.org/packages/19/bd/1e5f949b7cb740c9f0feaac430e301b8f1c5f11a81e26324299ea671a237/types_setuptools-80.9.0.20250822.tar.gz"
64776477
}
64786478
],
64796479
"project_name": "types-setuptools",
64806480
"requires_dists": [],
64816481
"requires_python": ">=3.9",
6482-
"version": "80.9.0.20250809"
6482+
"version": "80.9.0.20250822"
64836483
},
64846484
{
64856485
"artifacts": [
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from enum import StrEnum
2+
3+
4+
class ReadinessStatus(StrEnum):
5+
PENDING = "PENDING"
6+
PREPARING = "PREPARING"
7+
READY = "READY"
8+
UNHEALTHY = "UNHEALTHY"
9+
TERMINATING = "TERMINATING"
10+
TERMINATED = "TERMINATED"
11+
12+
13+
class ModelDeploymentStatus(StrEnum):
14+
ACTIVE = "ACTIVE"
15+
INACTIVE = "INACTIVE"
16+
CREATED = "CREATED"
17+
DEPLOYING = "DEPLOYING"
18+
READY = "READY"
19+
STOPPING = "STOPPING"
20+
STOPPED = "STOPPED"
21+
22+
23+
class DeploymentStrategy(StrEnum):
24+
ROLLING = "ROLLING"
25+
BLUE_GREEN = "BLUE_GREEN"

src/ai/backend/manager/models/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from . import association_artifacts_storages as _association_artifacts_storages
66
from . import association_container_registries_groups as _association_container_registries_groups
77
from . import audit_log as _auditlog
8+
from . import blue_green_strategies as _blue_green_strategies
89
from . import container_registry as _container_registry
10+
from . import deployment_session_bindings as _deployment_session_bindings
911
from . import domain as _domain
1012
from . import dotfile as _dotfile
1113
from . import endpoint as _endpoint
@@ -17,6 +19,10 @@
1719
from . import image as _image
1820
from . import kernel as _kernel
1921
from . import keypair as _keypair
22+
from . import model_deployment as _model_deployment
23+
from . import model_deployment_auto_scaling_rules as _model_deployment_auto_scaling_rules
24+
from . import model_deployment_tokens as _model_deployment_tokens
25+
from . import model_revision as _model_revision
2026
from . import network as _network
2127
from . import object_storage as _object_storage
2228
from . import rbac as _rbac
@@ -58,6 +64,12 @@
5864
*_object_storage.__all__,
5965
*_user.__all__,
6066
*_vfolder.__all__,
67+
*_model_deployment.__all__,
68+
*_model_revision.__all__,
69+
*_deployment_session_bindings.__all__,
70+
*_model_deployment_auto_scaling_rules.__all__,
71+
*_model_deployment_tokens.__all__,
72+
*_blue_green_strategies.__all__,
6173
*_dotfile.__all__,
6274
*_rbac.__all__,
6375
*_rbac_models.__all__,
@@ -111,3 +123,9 @@
111123
from .gql_models.agent import * # noqa
112124
from .gql_models.kernel import * # noqa
113125
from .gql_models.session import * # noqa
126+
from .model_deployment import * # noqa
127+
from .model_revision import * # noqa
128+
from .deployment_session_bindings import * # noqa
129+
from .model_deployment_auto_scaling_rules import * # noqa
130+
from .model_deployment_tokens import * # noqa
131+
from .blue_green_strategies import * # noqa
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""Add Model Deployment related tables
2+
3+
Revision ID: 0368087c787d
4+
Revises: 345551145801
5+
Create Date: 2025-08-25 00:54:30.288511
6+
7+
"""
8+
import sqlalchemy as sa
9+
from alembic import op
10+
from sqlalchemy.dialects import postgresql
11+
# revision identifiers, used by Alembic.
12+
revision = '0368087c787d'
13+
down_revision = '345551145801'
14+
branch_labels = None
15+
depends_on = None
16+
17+
18+
def upgrade() -> None:
19+
# ### commands auto generated by Alembic - please adjust! ###
20+
op.create_table('blue_green_strategies',
21+
sa.Column('id', ai.backend.manager.models.base.GUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
22+
sa.Column('pre_switch_wait_time', sa.Integer(), nullable=False),
23+
sa.Column('switch_timeout', sa.Integer(), nullable=False),
24+
sa.Column('is_active', sa.Boolean(), nullable=False),
25+
sa.Column('created_at', sa.DateTime(), nullable=False),
26+
sa.Column('updated_at', sa.DateTime(), nullable=False),
27+
sa.PrimaryKeyConstraint('id', name=op.f('pk_blue_green_strategies'))
28+
)
29+
op.create_table('model_deployments',
30+
sa.Column('id', ai.backend.manager.models.base.GUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
31+
sa.Column('name', sa.String(), nullable=False),
32+
sa.Column('status', ai.backend.manager.models.base.StrEnumType(length=64), nullable=False),
33+
sa.Column('tags', sa.String(), nullable=False),
34+
sa.Column('endpoint_url', sa.String(), nullable=True),
35+
sa.Column('preferred_domain_name', sa.String(), nullable=True),
36+
sa.Column('open_to_public', sa.Boolean(), nullable=False),
37+
sa.Column('desired_replica_count', sa.Integer(), nullable=False),
38+
sa.Column('created_user_id', ai.backend.manager.models.base.GUID(), nullable=False),
39+
sa.Column('current_revision_id', ai.backend.manager.models.base.GUID(), nullable=False),
40+
sa.Column('deployment_strategy_type', ai.backend.manager.models.base.StrEnumType(length=64), nullable=False),
41+
sa.Column('deployment_strategy_id', ai.backend.manager.models.base.GUID(), nullable=False),
42+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
43+
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
44+
sa.ForeignKeyConstraint(['created_user_id'], ['users.uuid'], name=op.f('fk_model_deployments_created_user_id_users')),
45+
sa.ForeignKeyConstraint(['deployment_strategy_id'], ['blue_green_strategies.id'], name=op.f('fk_model_deployments_deployment_strategy_id_blue_green_strategies')),
46+
sa.PrimaryKeyConstraint('id', name=op.f('pk_model_deployments'))
47+
)
48+
op.create_index(op.f('ix_model_deployments_created_user_id'), 'model_deployments', ['created_user_id'], unique=False)
49+
op.create_index(op.f('ix_model_deployments_name'), 'model_deployments', ['name'], unique=False)
50+
op.create_table('model_deployment_auto_scaling_rules',
51+
sa.Column('id', ai.backend.manager.models.base.GUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
52+
sa.Column('model_deployment_id', ai.backend.manager.models.base.GUID(), nullable=False),
53+
sa.Column('metric_source', sa.String(), nullable=False),
54+
sa.Column('metric_name', sa.String(), nullable=False),
55+
sa.Column('min_threshold', ai.backend.manager.models.base.DecimalType(), nullable=True),
56+
sa.Column('max_threshold', ai.backend.manager.models.base.DecimalType(), nullable=True),
57+
sa.Column('step_size', sa.Integer(), nullable=False),
58+
sa.Column('time_window', sa.Integer(), nullable=False),
59+
sa.Column('min_replicas', sa.Integer(), nullable=True),
60+
sa.Column('max_replicas', sa.Integer(), nullable=True),
61+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
62+
sa.Column('last_triggered_at', sa.DateTime(timezone=True), nullable=False),
63+
sa.CheckConstraint('(min_threshold IS NOT NULL AND max_threshold IS NULL) OR (min_threshold IS NULL AND max_threshold IS NOT NULL)', name=op.f('ck_model_deployment_auto_scaling_rules_check_single_threshold')),
64+
sa.ForeignKeyConstraint(['model_deployment_id'], ['model_deployments.id'], name=op.f('fk_model_deployment_auto_scaling_rules_model_deployment_id_model_deployments')),
65+
sa.PrimaryKeyConstraint('id', name=op.f('pk_model_deployment_auto_scaling_rules')),
66+
sa.UniqueConstraint('model_deployment_id', 'metric_name', name='uq_model_deployment_metric')
67+
)
68+
op.create_table('model_deployment_tokens',
69+
sa.Column('id', ai.backend.manager.models.base.GUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
70+
sa.Column('token', sa.String(), nullable=False),
71+
sa.Column('model_deployment_id', ai.backend.manager.models.base.GUID(), nullable=False),
72+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
73+
sa.ForeignKeyConstraint(['model_deployment_id'], ['model_deployments.id'], name=op.f('fk_model_deployment_tokens_model_deployment_id_model_deployments')),
74+
sa.PrimaryKeyConstraint('id', name=op.f('pk_model_deployment_tokens'))
75+
)
76+
op.create_table('model_revisions',
77+
sa.Column('id', ai.backend.manager.models.base.GUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
78+
sa.Column('name', sa.String(), nullable=False),
79+
sa.Column('model_deployment_id', ai.backend.manager.models.base.GUID(), nullable=False),
80+
sa.Column('tags', sa.TEXT(), nullable=False),
81+
sa.Column('runtime_variant', ai.backend.manager.models.base.StrEnumType(length=64), nullable=False),
82+
sa.Column('inference_runtime_config', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
83+
sa.Column('environment_variables', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
84+
sa.Column('model_vfolder_id', ai.backend.manager.models.base.GUID(), nullable=False),
85+
sa.Column('model_mount_destination', sa.String(), server_default='/models', nullable=False),
86+
sa.Column('model_definition_path', sa.String(), nullable=False),
87+
sa.Column('image_id', ai.backend.manager.models.base.GUID(), nullable=False),
88+
sa.Column('cluster_mode', sa.String(), server_default='single-node', nullable=False),
89+
sa.Column('cluster_size', sa.Integer(), server_default='1', nullable=False),
90+
sa.Column('resource_group', sa.String(), nullable=False),
91+
sa.Column('resource_slots', ai.backend.manager.models.base.ResourceSlotColumn(astext_type=Text()), nullable=False),
92+
sa.Column('resource_opts', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
93+
sa.Column('extra_mount', ai.backend.manager.models.base.StructuredJSONObjectListColumn(astext_type=Text()), server_default='[]', nullable=False),
94+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
95+
sa.ForeignKeyConstraint(['image_id'], ['images.id'], name=op.f('fk_model_revisions_image_id_images')),
96+
sa.ForeignKeyConstraint(['model_deployment_id'], ['model_deployments.id'], name=op.f('fk_model_revisions_model_deployment_id_model_deployments')),
97+
sa.ForeignKeyConstraint(['model_vfolder_id'], ['vfolders.id'], name=op.f('fk_model_revisions_model_vfolder_id_vfolders')),
98+
sa.PrimaryKeyConstraint('id', name=op.f('pk_model_revisions'))
99+
)
100+
op.create_table('deployment_session_bindings',
101+
sa.Column('id', ai.backend.manager.models.base.GUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
102+
sa.Column('deployment_id', ai.backend.manager.models.base.GUID(), nullable=False),
103+
sa.Column('session_id', ai.backend.manager.models.base.GUID(), nullable=False),
104+
sa.Column('weight', ai.backend.manager.models.base.DecimalType(), nullable=False),
105+
sa.Column('readiness_status', ai.backend.manager.models.base.StrEnumType(length=64), nullable=False),
106+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
107+
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
108+
sa.ForeignKeyConstraint(['deployment_id'], ['model_deployments.id'], name=op.f('fk_deployment_session_bindings_deployment_id_model_deployments')),
109+
sa.ForeignKeyConstraint(['session_id'], ['sessions.id'], name=op.f('fk_deployment_session_bindings_session_id_sessions')),
110+
sa.PrimaryKeyConstraint('id', name=op.f('pk_deployment_session_bindings'))
111+
)
112+
# ### end Alembic commands ###
113+
114+
115+
def downgrade() -> None:
116+
# ### commands auto generated by Alembic - please adjust! ###
117+
op.drop_table('deployment_session_bindings')
118+
op.drop_table('model_revisions')
119+
op.drop_table('model_deployment_tokens')
120+
op.drop_table('model_deployment_auto_scaling_rules')
121+
op.drop_index(op.f('ix_model_deployments_name'), table_name='model_deployments')
122+
op.drop_index(op.f('ix_model_deployments_created_user_id'), table_name='model_deployments')
123+
op.drop_table('model_deployments')
124+
op.drop_table('blue_green_strategies')
125+
# ### end Alembic commands ###

0 commit comments

Comments
 (0)