Skip to content

Commit 86a6c45

Browse files
angel-coreOrbax Authors
authored andcommitted
Add DeletionOptions to Orbax v1 Context.
PiperOrigin-RevId: 886843908
1 parent 2fbebca commit 86a6c45

File tree

5 files changed

+63
-2
lines changed

5 files changed

+63
-2
lines changed

checkpoint/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
- #v1 Add `use_load_and_broadcast` option.
1313
- Add PyTorch DCP (Distributed Checkpoint) to the benchmark suite.
14+
- #v1 Add `DeletionOptions` to configure V1 Checkpointer's checkpoint deletion
15+
behavior.
1416

1517
### Removed
1618

checkpoint/orbax/checkpoint/experimental/v1/_src/context/context.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ class Context(epy.ContextManager):
101101
checkpointables_options: Options for controlling checkpointables behavior.
102102
pathways_options: Options for Pathways checkpointing.
103103
checkpoint_layout: The layout of the checkpoint. Defaults to ORBAX.
104+
deletion_options: Options for controlling deletion behavior.
104105
"""
105106

106107
def __init__(
@@ -115,6 +116,7 @@ def __init__(
115116
checkpointables_options: options_lib.CheckpointablesOptions | None = None,
116117
pathways_options: options_lib.PathwaysOptions | None = None,
117118
checkpoint_layout: options_lib.CheckpointLayout | None = None,
119+
deletion_options: options_lib.DeletionOptions | None = None,
118120
):
119121
self._pytree_options = pytree_options or (
120122
context.pytree_options if context else options_lib.PyTreeOptions()
@@ -146,6 +148,9 @@ def __init__(
146148
if context
147149
else options_lib.CheckpointLayout.ORBAX
148150
)
151+
self._deletion_options = deletion_options or (
152+
context.deletion_options if context else options_lib.DeletionOptions()
153+
)
149154

150155
@property
151156
def pytree_options(self) -> options_lib.PyTreeOptions:
@@ -179,6 +184,10 @@ def pathways_options(self) -> options_lib.PathwaysOptions:
179184
def checkpoint_layout(self) -> options_lib.CheckpointLayout:
180185
return self._checkpoint_layout
181186

187+
@property
188+
def deletion_options(self) -> options_lib.DeletionOptions:
189+
return self._deletion_options
190+
182191
def operation_id(self) -> str:
183192
return synchronization.OperationIdGenerator.get_current_operation_id()
184193

checkpoint/orbax/checkpoint/experimental/v1/_src/context/options.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,44 @@ class PathwaysOptions:
494494
checkpointing_impl: pathways_types.CheckpointingImpl | None = None
495495

496496

497+
@dataclasses.dataclass(frozen=True, kw_only=True)
498+
class DeletionOptions:
499+
"""Options used to configure checkpoint deletion behavior.
500+
501+
Attributes:
502+
gcs_deletion_options: Deletion options specific to GCS.
503+
"""
504+
505+
@dataclasses.dataclass(frozen=True, kw_only=True)
506+
class GcsDeletionOptions:
507+
"""Deletion options specific to GCS.
508+
509+
Attributes:
510+
todelete_full_path: A path relative to the bucket root for "soft-deleting"
511+
checkpoints on Google Cloud Storage (GCS). Instead of being permanently
512+
removed, checkpoints are moved to this new location within the same
513+
bucket. This is useful if direct deletion on GCS is time-consuming, as
514+
it allows an external component to manage the actual removal.
515+
516+
This option gathers all "deleted" items in a centralized path at the
517+
bucket level for future cleanup.
518+
519+
For instance, if a checkpoint is in
520+
gs://my-bucket/experiments/run1/, providing the value 'trash' will move
521+
a deleted step to gs://my-bucket/trash/<step_id>. Useful when direct
522+
deletion is time consuming. It gathers all deleted items in a
523+
centralized path for future cleanup.
524+
"""
525+
526+
todelete_full_path: str | None = None
527+
528+
529+
gcs_deletion_options: GcsDeletionOptions = dataclasses.field(
530+
default_factory=GcsDeletionOptions
531+
)
532+
533+
534+
497535
class CheckpointLayout(enum.Enum):
498536
"""The layout of the checkpoint.
499537

checkpoint/orbax/checkpoint/experimental/v1/_src/training/checkpointer.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,7 @@ def __init__(
170170
preservation_policy=preservation_policy,
171171
step_name_format=step_name_format,
172172
max_to_keep=None, # Unlimited.
173-
# TODO(b/401541834) Configure todelete_subdir.
174-
# TODO(b/401541834) Enable background deletion.
173+
todelete_full_path=context.deletion_options.gcs_deletion_options.todelete_full_path,
175174
async_options=context.async_options.v0(),
176175
file_options=context.file_options.v0(),
177176
multiprocessing_options=context.multiprocessing_options.v0(),

checkpoint/orbax/checkpoint/experimental/v1/_src/training/checkpointer_test_base.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,3 +669,16 @@ def test_preservation_metrics(self, policy, expected_steps):
669669
[all_metrics[step] for step in expected_steps],
670670
)
671671
checkpointer.close()
672+
673+
def test_gcs_deletion_options(self):
674+
deletion_options = ocp.options.DeletionOptions(
675+
gcs_deletion_options=ocp.options.DeletionOptions.GcsDeletionOptions(
676+
todelete_full_path='trash'
677+
)
678+
)
679+
with ocp.Context(deletion_options=deletion_options):
680+
checkpointer = Checkpointer(self.directory)
681+
self.assertEqual(
682+
checkpointer._manager._options.todelete_full_path, 'trash'
683+
)
684+

0 commit comments

Comments
 (0)