Skip to content

Commit

Permalink
Refactor state formats handling
Browse files Browse the repository at this point in the history
  • Loading branch information
tolik0 committed Jan 30, 2025
1 parent cd74ed2 commit 25b6980
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,11 @@ def close_partition(self, partition: Partition) -> None:
partition_key in self._finished_partitions
and self._semaphore_per_partition[partition_key]._value == 0
):
if self._new_global_cursor is None or self._extract_cursor_value_from_state(
self._new_global_cursor
) < self._extract_cursor_value_from_state(cursor.state):
if (
self._new_global_cursor is None
or self._new_global_cursor[self.cursor_field.cursor_field_key]
< cursor.state[self.cursor_field.cursor_field_key]
):
self._new_global_cursor = copy.deepcopy(cursor.state)
if not self._use_global_cursor:
self._emit_state_message()
Expand Down Expand Up @@ -304,8 +306,7 @@ def _set_initial_state(self, stream_state: StreamState) -> None:
):
# We assume that `stream_state` is in a global format that can be applied to all partitions.
# Example: {"global_state_format_key": "global_state_format_value"}
self._global_cursor = deepcopy(stream_state)
self._new_global_cursor = deepcopy(stream_state)
self._set_global_state(stream_state)

else:
self._use_global_cursor = stream_state.get("use_global_cursor", False)
Expand All @@ -322,8 +323,7 @@ def _set_initial_state(self, stream_state: StreamState) -> None:

# set default state for missing partitions if it is per partition with fallback to global
if self._GLOBAL_STATE_KEY in stream_state:
self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
self._set_global_state(stream_state[self._GLOBAL_STATE_KEY])

# Set initial parent state
if stream_state.get("parent_state"):
Expand All @@ -332,6 +332,27 @@ def _set_initial_state(self, stream_state: StreamState) -> None:
# Set parent state for partition routers based on parent streams
self._partition_router.set_initial_state(stream_state)

def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
"""
Initializes the global cursor state from the provided stream state.
If the cursor field key is present in the stream state, its value is parsed,
formatted, and stored as the global cursor. This ensures consistency in state
representation across partitions.
"""
if self.cursor_field.cursor_field_key in stream_state:
global_state_value = stream_state[self.cursor_field.cursor_field_key]
final_format_global_state_value = self._connector_state_converter.output_format(
self._connector_state_converter.parse_value(global_state_value)
)

fixed_global_state = {
self.cursor_field.cursor_field_key: final_format_global_state_value
}

self._global_cursor = deepcopy(fixed_global_state)
self._new_global_cursor = deepcopy(fixed_global_state)

def observe(self, record: Record) -> None:
if not self._use_global_cursor and self.limit_reached():
self._use_global_cursor = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,9 @@ def _run_read(
PARENT_POSTS_CURSOR = "2024-01-05T00:00:00Z" # Parent posts cursor (expected in state)

INITIAL_STATE_PARTITION_10_CURSOR = "2024-01-02T00:00:01Z"
INITIAL_STATE_PARTITION_10_CURSOR_TIMESTAMP = 1704153601000
INITIAL_STATE_PARTITION_11_CURSOR = "2024-01-03T00:00:02Z"
INITIAL_STATE_PARTITION_11_CURSOR_TIMESTAMP = 1704240002000
INITIAL_GLOBAL_CURSOR = INITIAL_STATE_PARTITION_11_CURSOR
INITIAL_GLOBAL_CURSOR_DATE = datetime.fromisoformat(
INITIAL_STATE_PARTITION_11_CURSOR.replace("Z", "")
Expand Down Expand Up @@ -663,7 +665,7 @@ def _run_read(
"id": 10,
"parent_slice": {"id": 1, "parent_slice": {}},
},
"cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
"cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR_TIMESTAMP},
},
{
"partition": {
Expand All @@ -673,7 +675,7 @@ def _run_read(
"cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
},
],
"state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
"state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR_TIMESTAMP},
"lookback_window": 86400,
},
# Expected state
Expand Down

0 comments on commit 25b6980

Please sign in to comment.