Skip to content

Commit 04e40e1

Browse files
committed
Fix set serialization in metadata system
Sets were documented as a supported MetadataType but failed during serialization because json.dumps() cannot serialize Python sets. This change uses the standard pydantic_encoder to convert sets (and tuples) to lists before JSON serialization, making them compatible with JSON while preserving type information via MetadataTypeEnum. The fix uses pydantic_encoder directly (via json.dumps(value, default=pydantic_encoder)) at all serialization points, following the same pattern used throughout the ZenML codebase. This ensures consistency and proper handling of all supported types including nested sets/tuples, UUIDs, datetimes, etc. Changes: - Update validate_metadata() in metadata_types.py to use pydantic_encoder - Update Client.create_run_metadata() to use pydantic_encoder - Update SQLZenStore.create_run_metadata() to use pydantic_encoder - Add unit tests for set/tuple validation - Document supported metadata types in user guide Fixes #4248
1 parent b5dfe3f commit 04e40e1

File tree

5 files changed

+116
-4
lines changed

5 files changed

+116
-4
lines changed

docs/book/how-to/metadata/metadata.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,39 @@ client.create_run_metadata(
323323
)
324324
~~~
325325

326+
## Supported Metadata Value Types
327+
328+
ZenML metadata values support the following Python types:
329+
330+
* **Primitive types**: `str`, `int`, `float`, `bool`
331+
* **Collection types**: `list`, `dict`, `set`, `tuple`
332+
* **Special ZenML types**: `Uri`, `Path`, `DType`, `StorageSize` (see below)
333+
334+
{% hint style="info" %}
335+
Since metadata is stored as JSON, sets and tuples are automatically converted to lists during serialization. The type information is preserved separately, so you can still identify the original type when retrieving metadata.
336+
{% endhint %}
337+
338+
```python
339+
from zenml import log_metadata
340+
341+
# All of these are valid metadata values
342+
log_metadata(
343+
metadata={
344+
"accuracy": 0.95, # float
345+
"epochs": 100, # int
346+
"model_name": "bert-base", # str
347+
"is_production": True, # bool
348+
"hyperparameters": { # dict
349+
"learning_rate": 0.001,
350+
"batch_size": 32
351+
},
352+
"loss_history": [0.5, 0.3, 0.2], # list
353+
"tags": {"ml", "nlp", "transformer"}, # set (stored as list)
354+
"dimensions": (128, 128, 3), # tuple (stored as list)
355+
}
356+
)
357+
```
358+
326359
## Special Metadata Types
327360

328361
ZenML includes several special metadata types that provide standardized ways to represent common metadata:

src/zenml/client.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"""Client implementation."""
1515

1616
import functools
17-
import json
1817
import os
1918
from abc import ABCMeta
2019
from datetime import datetime
@@ -5498,12 +5497,16 @@ def create_run_metadata(
54985497
this metadata automatically.
54995498
"""
55005499
from zenml.metadata.metadata_types import get_metadata_type
5500+
from zenml.utils.json_utils import pydantic_encoder
55015501

55025502
values: Dict[str, "MetadataType"] = {}
55035503
types: Dict[str, "MetadataTypeEnum"] = {}
55045504
for key, value in metadata.items():
55055505
# Skip metadata that is too large to be stored in the database.
5506-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
5506+
if (
5507+
len(json.dumps(value, default=pydantic_encoder))
5508+
> TEXT_FIELD_MAX_LENGTH
5509+
):
55075510
logger.warning(
55085511
f"Metadata value for key '{key}' is too large to be "
55095512
"stored in the database. Skipping."

src/zenml/metadata/metadata_types.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from zenml.constants import STR_FIELD_MAX_LENGTH, TEXT_FIELD_MAX_LENGTH
2323
from zenml.logger import get_logger
2424
from zenml.utils.enum_utils import StrEnum
25+
from zenml.utils.json_utils import pydantic_encoder
2526

2627
logger = get_logger(__name__)
2728

@@ -234,7 +235,10 @@ def validate_metadata(
234235
)
235236
continue
236237

237-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
238+
if (
239+
len(json.dumps(value, default=pydantic_encoder))
240+
> TEXT_FIELD_MAX_LENGTH
241+
):
238242
logger.warning(
239243
f"Metadata value for key '{key}' is too large to be "
240244
"stored in the database. Skipping."

src/zenml/zen_stores/sql_zen_store.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7172,6 +7172,8 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71727172
)
71737173

71747174
if run_metadata.resources:
7175+
from zenml.utils.json_utils import pydantic_encoder
7176+
71757177
for key, value in run_metadata.values.items():
71767178
type_ = run_metadata.types[key]
71777179

@@ -7180,7 +7182,7 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71807182
user_id=run_metadata.user,
71817183
stack_component_id=run_metadata.stack_component_id,
71827184
key=key,
7183-
value=json.dumps(value),
7185+
value=json.dumps(value, default=pydantic_encoder),
71847186
type=type_,
71857187
publisher_step_id=run_metadata.publisher_step_id,
71867188
)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright (c) ZenML GmbH 2025. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at:
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
# or implied. See the License for the specific language governing
13+
# permissions and limitations under the License.
14+
"""Unit tests for metadata_types module."""
15+
16+
from zenml.metadata.metadata_types import (
17+
MetadataTypeEnum,
18+
get_metadata_type,
19+
validate_metadata,
20+
)
21+
22+
23+
class TestValidateMetadata:
24+
"""Tests for validate_metadata function."""
25+
26+
def test_validate_metadata_with_set(self):
27+
"""Test that metadata with sets is validated without errors."""
28+
metadata = {"my_set": {1, 2, 3}, "my_string": "hello"}
29+
validated = validate_metadata(metadata)
30+
# Both entries should be kept
31+
assert "my_set" in validated
32+
assert "my_string" in validated
33+
assert validated["my_set"] == {1, 2, 3}
34+
assert validated["my_string"] == "hello"
35+
36+
def test_validate_metadata_with_tuple(self):
37+
"""Test that metadata with tuples is validated without errors."""
38+
metadata = {"my_tuple": (1, 2, 3)}
39+
validated = validate_metadata(metadata)
40+
assert "my_tuple" in validated
41+
assert validated["my_tuple"] == (1, 2, 3)
42+
43+
def test_validate_metadata_with_nested_sets(self):
44+
"""Test that metadata with nested sets is validated without errors."""
45+
metadata = {
46+
"nested": {
47+
"my_set": {1, 2, 3},
48+
"my_tuple": (4, 5, 6),
49+
}
50+
}
51+
validated = validate_metadata(metadata)
52+
assert "nested" in validated
53+
assert validated["nested"]["my_set"] == {1, 2, 3}
54+
assert validated["nested"]["my_tuple"] == (4, 5, 6)
55+
56+
57+
class TestGetMetadataType:
58+
"""Tests for get_metadata_type function."""
59+
60+
def test_get_metadata_type_for_set(self):
61+
"""Test that the correct type enum is returned for sets."""
62+
test_set = {1, 2, 3}
63+
result = get_metadata_type(test_set)
64+
assert result == MetadataTypeEnum.SET
65+
66+
def test_get_metadata_type_for_tuple(self):
67+
"""Test that the correct type enum is returned for tuples."""
68+
test_tuple = (1, 2, 3)
69+
result = get_metadata_type(test_tuple)
70+
assert result == MetadataTypeEnum.TUPLE

0 commit comments

Comments
 (0)