Skip to content

Commit 1086e6d

Browse files
authored
fix(ray): update high scale model config (#264)
Because - current config cannot handle concurrent requests > 100 This commit - enlarge max queue size
1 parent 0bc15de commit 1086e6d

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

instill/helpers/const.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ class ImageToImageInput:
125125
"PYTHONPATH": os.getcwd(),
126126
},
127127
}
128-
DEFAULT_MAX_ONGOING_REQUESTS = 6
129-
DEFAULT_MAX_QUEUED_REQUESTS = 100
128+
DEFAULT_MAX_ONGOING_REQUESTS = 4
129+
DEFAULT_MAX_QUEUED_REQUESTS = 1000
130130

131131
RAM_MINIMUM_RESERVE = 1 # GB
132132
RAM_UPSCALE_FACTOR = 1.25

instill/helpers/ray_config.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ def __init__(self, deployable: Deployment) -> None:
8484
if is_high_scale_model is not None and is_high_scale_model.lower() == "true":
8585
self._update_upscale_delay(120)
8686
self._update_downscale_delay(600)
87+
self._update_target_ongoing_requests(4)
88+
self._update_max_concurrent_requests(6)
8789

8890
def _determine_vram_usage(self, model_path: str, total_vram: str):
8991
warn(
@@ -189,6 +191,21 @@ def _update_max_replicas(self, num_replicas: int):
189191

190192
return self
191193

194+
def _update_target_ongoing_requests(self, target_ongoing_requests: int):
195+
self._autoscaling_config["target_num_ongoing_requests_per_replica"] = (
196+
target_ongoing_requests
197+
)
198+
self._autoscaling_config["target_ongoing_requests"] = target_ongoing_requests
199+
self._deployment = self._deployment.options(
200+
autoscaling_config=self._autoscaling_config
201+
)
202+
203+
def _update_max_concurrent_requests(self, max_concurrent_requests: int):
204+
self._deployment = self._deployment.options(
205+
max_concurrent_queries=max_concurrent_requests,
206+
max_ongoing_requests=max_concurrent_requests,
207+
)
208+
192209
def _update_upscale_delay(self, upscale_delay_s: int):
193210
self._autoscaling_config["upscale_delay_s"] = upscale_delay_s
194211
self._deployment = self._deployment.options(

0 commit comments

Comments
 (0)