3
3
import importlib
4
4
import os
5
5
import sys
6
+ from tkinter import ALL
6
7
from typing import Any , Callable
7
8
from uuid import UUID
8
9
21
22
22
23
23
24
from .helpers .executor import get_executor
24
- from .helpers .trigger import get_trigger
25
+ from .helpers .trigger import get_trigger , ALL_TRIGGER_KWARGS
25
26
26
27
27
28
class PipelineManager :
@@ -136,36 +137,26 @@ def _get_driver(
136
137
self ._load_module (name )
137
138
138
139
if with_tracker :
139
- project_id = kwargs .pop ("project_id" , None ) or self .cfg .tracker .pipeline [
140
- name
141
- ].get ("project_id" , None )
142
- username = kwargs .pop ("username" , None ) or self .cfg .tracker .get (
143
- "username" , None
144
- )
145
- dag_name = kwargs .pop ("dag_name" , None ) or self .cfg .tracker .pipeline [
146
- name
147
- ].get ("dag_name" , None )
148
- tags = kwargs .pop ("tags" , None ) or self .cfg .tracker .pipeline [name ].get (
149
- "tags" , None
150
- )
151
- api_url = kwargs .pop ("api_url" , None ) or self .cfg .tracker .get (
152
- "api_url" , None
153
- )
154
- ui_url = kwargs .pop ("ui_url" , None ) or self .cfg .tracker .get ("ui_url" , None )
140
+ tracker_cfg = self .cfg .tracker .pipeline .get (name , {})
141
+ tracker_kwargs = {
142
+ key : kwargs .pop (key , None ) or tracker_cfg .get (key , None )
143
+ for key in [
144
+ "project_id" ,
145
+ "username" ,
146
+ "dag_name" ,
147
+ "tags" ,
148
+ "api_url" ,
149
+ "ui_url" ,
150
+ ]
151
+ }
152
+ project_id = tracker_kwargs .get ("project_id" , None )
155
153
156
154
if project_id is None :
157
155
raise ValueError (
158
156
"Please provide a project_id if you want to use the tracker"
159
157
)
160
158
161
- tracker = adapters .HamiltonTracker (
162
- project_id = project_id ,
163
- username = username ,
164
- dag_name = dag_name ,
165
- tags = tags ,
166
- hamilton_api_url = api_url ,
167
- hamilton_ui_url = ui_url ,
168
- )
159
+ tracker = adapters .HamiltonTracker (project_id = project_id , ** tracker_kwargs )
169
160
170
161
dr = (
171
162
driver .Builder ()
@@ -190,9 +181,9 @@ def _run(
190
181
self ,
191
182
name : str ,
192
183
environment : str = "dev" ,
193
- executor : str | None = None ,
194
184
inputs : dict | None = None ,
195
185
final_vars : list | None = None ,
186
+ executor : str | None = None ,
196
187
with_tracker : bool | None = None ,
197
188
reload : bool = False ,
198
189
** kwargs ,
@@ -215,18 +206,23 @@ def _run(
215
206
"""
216
207
logger .info (f"Starting pipeline { name } in environment { environment } " )
217
208
218
- pipeline_cfg = self .cfg .pipeline
219
- run_params = pipeline_cfg .run .get (name )[environment ]
209
+ run_params = self .cfg .pipeline .run .get (name )[environment ]
220
210
221
211
final_vars = final_vars or run_params .get ("final_vars" , [])
222
- inputs = {** (run_params .get ("inputs" , {}) or {}), ** (inputs or {})}
223
- with_tracker = with_tracker or run_params .get ("with_tracker" , False )
212
+ inputs = {
213
+ ** (run_params .get ("inputs" , {}) or {}),
214
+ ** (inputs or {}),
215
+ } # <-- inputs override and adds to run_params
216
+
217
+ kwargs .update (
218
+ {
219
+ arg : eval (arg ) or run_params .get (arg , None )
220
+ for arg in ["executor" , "with_tracker" , "reload" ]
221
+ }
222
+ )
224
223
225
224
dr , shutdown = self ._get_driver (
226
225
name = name ,
227
- executor = executor ,
228
- with_tracker = with_tracker ,
229
- reload = reload ,
230
226
** kwargs ,
231
227
)
232
228
@@ -362,8 +358,6 @@ def add_job(
362
358
with SchedulerManager (
363
359
name = name , base_dir = self ._base_dir , role = "scheduler"
364
360
) as sm :
365
- # if not any([task.id == "run-pipeline" for task in sm.get_tasks()]):
366
- # sm.configure_task(func_or_task_id="run-pipeline", func=self._run)
367
361
return sm .add_job (
368
362
self ._run ,
369
363
args = (
@@ -385,11 +379,11 @@ def add_job(
385
379
def schedule (
386
380
self ,
387
381
name : str ,
382
+ inputs : dict | None = None ,
383
+ final_vars : list | None = None ,
388
384
environment : str = "dev" ,
389
385
executor : str | None = None ,
390
386
trigger_type : str | None = None ,
391
- inputs : dict | None = None ,
392
- final_vars : list | None = None ,
393
387
with_tracker : bool | None = None ,
394
388
paused : bool = False ,
395
389
coalesce : str = "latest" ,
@@ -431,49 +425,43 @@ def schedule(
431
425
if SchedulerManager is None :
432
426
raise ValueError ("APScheduler4 not installed. Please install it first." )
433
427
434
- trigger_kwargs = {}
435
428
if "pipeline" in self .cfg .scheduler :
436
- scheduler_cfg = self .cfg .scheduler .pipeline .get (name , None ).copy ()
429
+ scheduler_cfg = self .cfg .scheduler .pipeline .get (name , None ) # .copy()
437
430
else :
438
- scheduler_cfg = None
439
-
440
- if scheduler_cfg is not None :
441
- trigger_type = trigger_type or scheduler_cfg .pop ("trigger_type" , None )
442
- for key in [
443
- "crontab" ,
444
- "year" ,
445
- "month" ,
446
- "week" ,
447
- "day" ,
448
- "days_of_week" ,
449
- "hour" ,
450
- "minute" ,
451
- "second" ,
452
- "timezone" ,
453
- ]:
454
- trigger_kwargs [key ] = scheduler_cfg .pop (key , None )
431
+ scheduler_cfg = {}
432
+
433
+ trigger_type = trigger_type or scheduler_cfg .get ("trigger_type" , None )
434
+
435
+ trigger_kwargs = {
436
+ key : kwargs .pop (key , None ) or scheduler_cfg .get (key , None )
437
+ for key in ALL_TRIGGER_KWARGS .get (trigger_type , [])
438
+ if key in kwargs or key in scheduler_cfg
439
+ }
440
+
441
+ schedule_kwargs = {
442
+ arg : eval (arg ) or scheduler_cfg .get (arg , None )
443
+ for arg in [
444
+ "executor" ,
445
+ "paused" ,
446
+ "coalesce" ,
447
+ "misfire_grace_time" ,
448
+ "max_jitter" ,
449
+ "max_running_jobs" ,
450
+ "conflict_policy" ,
451
+ ]
452
+ }
455
453
456
454
with SchedulerManager (
457
455
name = name , base_dir = self ._base_dir , role = "scheduler"
458
456
) as sm :
459
- # if not any([task.id == "run-pipeline" for task in sm.get_tasks()]):
460
- # sm.configure_task(func_or_task_id="run-pipeline", func=self._run)
461
- trigger , kwargs = get_trigger (trigger_type , ** kwargs )
457
+ trigger = get_trigger (trigger_type , ** trigger_kwargs )
462
458
463
459
id_ = sm .add_schedule (
464
460
self ._run ,
465
461
trigger = trigger ,
466
462
args = (name , environment , executor , inputs , final_vars , with_tracker ),
467
463
kwargs = kwargs ,
468
- job_executor = executor
469
- if executor in ["async" , "threadpool" , "processpool" ]
470
- else "async" ,
471
- paused = paused ,
472
- coalesce = coalesce ,
473
- misfire_grace_time = misfire_grace_time ,
474
- max_jitter = max_jitter ,
475
- max_running_jobs = max_running_jobs ,
476
- conflict_policy = conflict_policy ,
464
+ ** schedule_kwargs ,
477
465
)
478
466
logger .success (
479
467
f"Added scheduler for { name } in environment { environment } with id { id_ } "
0 commit comments