Skip to content

Commit 1fb40b4

Browse files
htahir1claude
andauthored
Fix set serialization in metadata system (#4249)
Sets were documented as a supported MetadataType but failed during serialization because json.dumps() cannot serialize Python sets. This change uses the standard pydantic_encoder to convert sets (and tuples) to lists before JSON serialization, making them compatible with JSON while preserving type information via MetadataTypeEnum. The fix uses pydantic_encoder directly (via json.dumps(value, default=pydantic_encoder)) at all serialization points, following the same pattern used throughout the ZenML codebase. This ensures consistency and proper handling of all supported types including nested sets/tuples, UUIDs, datetimes, etc. Changes: - Update validate_metadata() in metadata_types.py to use pydantic_encoder - Update Client.create_run_metadata() to use pydantic_encoder - Update SQLZenStore.create_run_metadata() to use pydantic_encoder - Add unit tests for set/tuple validation - Document supported metadata types in user guide Fixes #4248 Co-authored-by: Claude <[email protected]>
1 parent d8bd68a commit 1fb40b4

File tree

5 files changed

+87
-3
lines changed

5 files changed

+87
-3
lines changed

docs/book/how-to/metadata/metadata.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ ZenML makes it easy to log and retrieve this information through a simple interf
2222

2323
The primary way to log metadata in ZenML is through the `log_metadata` function, which allows you to attach JSON-serializable key-value pairs to various entities.
2424

25+
{% hint style="info" %}
26+
Metadata supports primitive types (`str`, `int`, `float`, `bool`), collections (`list`, `dict`, `set`, `tuple`), and special ZenML types (`Uri`, `Path`, `DType`, `StorageSize`). Sets and tuples are automatically converted to lists during storage.
27+
{% endhint %}
28+
2529
```python
2630
from zenml import log_metadata
2731

src/zenml/client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5498,12 +5498,16 @@ def create_run_metadata(
54985498
this metadata automatically.
54995499
"""
55005500
from zenml.metadata.metadata_types import get_metadata_type
5501+
from zenml.utils.json_utils import pydantic_encoder
55015502

55025503
values: Dict[str, "MetadataType"] = {}
55035504
types: Dict[str, "MetadataTypeEnum"] = {}
55045505
for key, value in metadata.items():
55055506
# Skip metadata that is too large to be stored in the database.
5506-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
5507+
if (
5508+
len(json.dumps(value, default=pydantic_encoder))
5509+
> TEXT_FIELD_MAX_LENGTH
5510+
):
55075511
logger.warning(
55085512
f"Metadata value for key '{key}' is too large to be "
55095513
"stored in the database. Skipping."

src/zenml/metadata/metadata_types.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from zenml.constants import STR_FIELD_MAX_LENGTH, TEXT_FIELD_MAX_LENGTH
2323
from zenml.logger import get_logger
2424
from zenml.utils.enum_utils import StrEnum
25+
from zenml.utils.json_utils import pydantic_encoder
2526

2627
logger = get_logger(__name__)
2728

@@ -234,7 +235,10 @@ def validate_metadata(
234235
)
235236
continue
236237

237-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
238+
if (
239+
len(json.dumps(value, default=pydantic_encoder))
240+
> TEXT_FIELD_MAX_LENGTH
241+
):
238242
logger.warning(
239243
f"Metadata value for key '{key}' is too large to be "
240244
"stored in the database. Skipping."

src/zenml/zen_stores/sql_zen_store.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7183,6 +7183,8 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71837183
)
71847184

71857185
if run_metadata.resources:
7186+
from zenml.utils.json_utils import pydantic_encoder
7187+
71867188
for key, value in run_metadata.values.items():
71877189
type_ = run_metadata.types[key]
71887190

@@ -7191,7 +7193,7 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71917193
user_id=run_metadata.user,
71927194
stack_component_id=run_metadata.stack_component_id,
71937195
key=key,
7194-
value=json.dumps(value),
7196+
value=json.dumps(value, default=pydantic_encoder),
71957197
type=type_,
71967198
publisher_step_id=run_metadata.publisher_step_id,
71977199
)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright (c) ZenML GmbH 2025. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at:
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
# or implied. See the License for the specific language governing
13+
# permissions and limitations under the License.
14+
"""Unit tests for metadata_types module."""
15+
16+
from zenml.metadata.metadata_types import (
17+
MetadataTypeEnum,
18+
get_metadata_type,
19+
validate_metadata,
20+
)
21+
22+
23+
class TestValidateMetadata:
24+
"""Tests for validate_metadata function."""
25+
26+
def test_validate_metadata_with_set(self):
27+
"""Test that metadata with sets is validated without errors."""
28+
metadata = {"my_set": {1, 2, 3}, "my_string": "hello"}
29+
validated = validate_metadata(metadata)
30+
# Both entries should be kept
31+
assert "my_set" in validated
32+
assert "my_string" in validated
33+
assert validated["my_set"] == {1, 2, 3}
34+
assert validated["my_string"] == "hello"
35+
36+
def test_validate_metadata_with_tuple(self):
37+
"""Test that metadata with tuples is validated without errors."""
38+
metadata = {"my_tuple": (1, 2, 3)}
39+
validated = validate_metadata(metadata)
40+
assert "my_tuple" in validated
41+
assert validated["my_tuple"] == (1, 2, 3)
42+
43+
def test_validate_metadata_with_nested_sets(self):
44+
"""Test that metadata with nested sets is validated without errors."""
45+
metadata = {
46+
"nested": {
47+
"my_set": {1, 2, 3},
48+
"my_tuple": (4, 5, 6),
49+
}
50+
}
51+
validated = validate_metadata(metadata)
52+
assert "nested" in validated
53+
assert validated["nested"]["my_set"] == {1, 2, 3}
54+
assert validated["nested"]["my_tuple"] == (4, 5, 6)
55+
56+
57+
class TestGetMetadataType:
58+
"""Tests for get_metadata_type function."""
59+
60+
def test_get_metadata_type_for_set(self):
61+
"""Test that the correct type enum is returned for sets."""
62+
test_set = {1, 2, 3}
63+
result = get_metadata_type(test_set)
64+
assert result == MetadataTypeEnum.SET
65+
66+
def test_get_metadata_type_for_tuple(self):
67+
"""Test that the correct type enum is returned for tuples."""
68+
test_tuple = (1, 2, 3)
69+
result = get_metadata_type(test_tuple)
70+
assert result == MetadataTypeEnum.TUPLE

0 commit comments

Comments
 (0)