Skip to content

Commit 647eaf7

Browse files
committed
try and fix status returns
1 parent ed29235 commit 647eaf7

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

src/kairos/scheduler.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,15 @@ def run_job(self, gpu_slot: GPUSlot, job: Job, job_index: int) -> int:
132132
return return_code
133133

134134
def run_jobs(self, jobs: List[Job]) -> List[int]:
135-
"""Run multiple jobs across available GPU slots."""
135+
"""Run multiple jobs across available GPU slots.
136+
137+
Args:
138+
jobs: List of Job objects to execute
139+
140+
Returns:
141+
List[int]: Status code for each job
142+
(0 for success, non-zero for failure)
143+
"""
136144
total_slots = len(self.gpu_slots)
137145
self.logger.info(
138146
f"Starting {len(jobs)} jobs across {self.n_gpus} "
@@ -147,6 +155,7 @@ def run_jobs(self, jobs: List[Job]) -> List[int]:
147155
for slot in self.gpu_slots:
148156
slot_queue.put(slot)
149157

158+
# Initialize results list with None values
150159
results = [None] * len(jobs)
151160
active_jobs = set()
152161
job_lock = threading.Lock()
@@ -184,12 +193,22 @@ def worker():
184193
) as executor:
185194
futures = []
186195

196+
# Queue up all jobs
187197
for i, job in enumerate(jobs):
188198
job_queue.put((i, job))
189199

200+
# Start workers
190201
for _ in range(total_slots):
191202
futures.append(executor.submit(worker))
192203

204+
# Wait for all workers to complete
193205
concurrent.futures.wait(futures)
194206

195207
self.logger.info("\nAll jobs completed!")
208+
209+
# Ensure all jobs have a result
210+
assert all(
211+
result is not None for result in results
212+
), "Some jobs did not complete"
213+
214+
return results

0 commit comments

Comments
 (0)