Skip to content

Commit fcda9c8

Browse files
committed
Fix set serialization in metadata system
Sets were documented as a supported MetadataType but failed during serialization because json.dumps() cannot serialize Python sets. This change adds a helper function to convert sets (and tuples) to lists before JSON serialization, making them compatible with JSON while preserving type information via MetadataTypeEnum. Changes: - Add serialize_metadata_value() helper in metadata_types.py - Update validate_metadata() to use the new helper - Update Client.create_run_metadata() to use the helper - Update SQLZenStore.create_run_metadata() to use the helper - Add comprehensive unit tests for set/tuple serialization Fixes #4248
1 parent b5dfe3f commit fcda9c8

File tree

5 files changed

+193
-5
lines changed

5 files changed

+193
-5
lines changed

docs/book/how-to/metadata/metadata.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,39 @@ client.create_run_metadata(
323323
)
324324
~~~
325325

326+
## Supported Metadata Value Types
327+
328+
ZenML metadata values support the following Python types:
329+
330+
* **Primitive types**: `str`, `int`, `float`, `bool`
331+
* **Collection types**: `list`, `dict`, `set`, `tuple`
332+
* **Special ZenML types**: `Uri`, `Path`, `DType`, `StorageSize` (see below)
333+
334+
{% hint style="info" %}
335+
Since metadata is stored as JSON, sets and tuples are automatically converted to lists during serialization. The type information is preserved separately, so you can still identify the original type when retrieving metadata.
336+
{% endhint %}
337+
338+
```python
339+
from zenml import log_metadata
340+
341+
# All of these are valid metadata values
342+
log_metadata(
343+
metadata={
344+
"accuracy": 0.95, # float
345+
"epochs": 100, # int
346+
"model_name": "bert-base", # str
347+
"is_production": True, # bool
348+
"hyperparameters": { # dict
349+
"learning_rate": 0.001,
350+
"batch_size": 32
351+
},
352+
"loss_history": [0.5, 0.3, 0.2], # list
353+
"tags": {"ml", "nlp", "transformer"}, # set (stored as list)
354+
"dimensions": (128, 128, 3), # tuple (stored as list)
355+
}
356+
)
357+
```
358+
326359
## Special Metadata Types
327360

328361
ZenML includes several special metadata types that provide standardized ways to represent common metadata:

src/zenml/client.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"""Client implementation."""
1515

1616
import functools
17-
import json
1817
import os
1918
from abc import ABCMeta
2019
from datetime import datetime
@@ -5497,13 +5496,16 @@ def create_run_metadata(
54975496
publisher_step_id: The ID of the step execution that publishes
54985497
this metadata automatically.
54995498
"""
5500-
from zenml.metadata.metadata_types import get_metadata_type
5499+
from zenml.metadata.metadata_types import (
5500+
get_metadata_type,
5501+
serialize_metadata_value,
5502+
)
55015503

55025504
values: Dict[str, "MetadataType"] = {}
55035505
types: Dict[str, "MetadataTypeEnum"] = {}
55045506
for key, value in metadata.items():
55055507
# Skip metadata that is too large to be stored in the database.
5506-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
5508+
if len(serialize_metadata_value(value)) > TEXT_FIELD_MAX_LENGTH:
55075509
logger.warning(
55085510
f"Metadata value for key '{key}' is too large to be "
55095511
"stored in the database. Skipping."

src/zenml/metadata/metadata_types.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,24 @@ def cast_to_metadata_type(
210210
return typed_value # type: ignore[no-any-return]
211211

212212

213+
def serialize_metadata_value(value: MetadataType) -> str:
214+
"""Serialize a metadata value to JSON.
215+
216+
Converts non-JSON-serializable types (sets, tuples) to lists before
217+
serialization, since JSON doesn't support these types natively. The type
218+
information is preserved separately via MetadataTypeEnum.
219+
220+
Args:
221+
value: The metadata value to serialize.
222+
223+
Returns:
224+
The JSON-serialized value as a string.
225+
"""
226+
if isinstance(value, (set, tuple)):
227+
value = list(value)
228+
return json.dumps(value)
229+
230+
213231
def validate_metadata(
214232
metadata: Dict[str, MetadataType],
215233
) -> Dict[str, MetadataType]:
@@ -234,7 +252,7 @@ def validate_metadata(
234252
)
235253
continue
236254

237-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
255+
if len(serialize_metadata_value(value)) > TEXT_FIELD_MAX_LENGTH:
238256
logger.warning(
239257
f"Metadata value for key '{key}' is too large to be "
240258
"stored in the database. Skipping."

src/zenml/zen_stores/sql_zen_store.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7172,6 +7172,10 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71727172
)
71737173

71747174
if run_metadata.resources:
7175+
from zenml.metadata.metadata_types import (
7176+
serialize_metadata_value,
7177+
)
7178+
71757179
for key, value in run_metadata.values.items():
71767180
type_ = run_metadata.types[key]
71777181

@@ -7180,7 +7184,7 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71807184
user_id=run_metadata.user,
71817185
stack_component_id=run_metadata.stack_component_id,
71827186
key=key,
7183-
value=json.dumps(value),
7187+
value=serialize_metadata_value(value),
71847188
type=type_,
71857189
publisher_step_id=run_metadata.publisher_step_id,
71867190
)
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# Copyright (c) ZenML GmbH 2025. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at:
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
# or implied. See the License for the specific language governing
13+
# permissions and limitations under the License.
14+
"""Unit tests for metadata_types module."""
15+
16+
import json
17+
18+
from zenml.metadata.metadata_types import (
19+
MetadataTypeEnum,
20+
get_metadata_type,
21+
serialize_metadata_value,
22+
validate_metadata,
23+
)
24+
25+
26+
class TestSerializeMetadataValue:
27+
"""Tests for serialize_metadata_value function."""
28+
29+
def test_serialize_set_to_list(self):
30+
"""Test that sets are converted to lists before JSON serialization."""
31+
test_set = {1, 2, 3}
32+
result = serialize_metadata_value(test_set)
33+
# The result should be valid JSON
34+
deserialized = json.loads(result)
35+
# Sets are unordered, so check that all elements are present
36+
assert isinstance(deserialized, list)
37+
assert sorted(deserialized) == [1, 2, 3]
38+
39+
def test_serialize_tuple_to_list(self):
40+
"""Test that tuples are converted to lists before JSON serialization."""
41+
test_tuple = (1, 2, 3)
42+
result = serialize_metadata_value(test_tuple)
43+
deserialized = json.loads(result)
44+
assert deserialized == [1, 2, 3]
45+
46+
def test_serialize_nested_set(self):
47+
"""Test that nested sets are properly converted."""
48+
test_dict = {"my_set": {1, 2, 3}}
49+
result = serialize_metadata_value(test_dict)
50+
deserialized = json.loads(result)
51+
# Note: nested sets within dicts won't be converted automatically
52+
# This is expected - only the top-level value is converted
53+
assert isinstance(deserialized, dict)
54+
55+
def test_serialize_string(self):
56+
"""Test that strings are serialized normally."""
57+
test_string = "hello world"
58+
result = serialize_metadata_value(test_string)
59+
deserialized = json.loads(result)
60+
assert deserialized == "hello world"
61+
62+
def test_serialize_int(self):
63+
"""Test that integers are serialized normally."""
64+
test_int = 42
65+
result = serialize_metadata_value(test_int)
66+
deserialized = json.loads(result)
67+
assert deserialized == 42
68+
69+
def test_serialize_float(self):
70+
"""Test that floats are serialized normally."""
71+
test_float = 3.14
72+
result = serialize_metadata_value(test_float)
73+
deserialized = json.loads(result)
74+
assert deserialized == 3.14
75+
76+
def test_serialize_bool(self):
77+
"""Test that booleans are serialized normally."""
78+
result = serialize_metadata_value(True)
79+
deserialized = json.loads(result)
80+
assert deserialized is True
81+
82+
def test_serialize_dict(self):
83+
"""Test that dictionaries are serialized normally."""
84+
test_dict = {"key": "value", "number": 42}
85+
result = serialize_metadata_value(test_dict)
86+
deserialized = json.loads(result)
87+
assert deserialized == test_dict
88+
89+
def test_serialize_list(self):
90+
"""Test that lists are serialized normally."""
91+
test_list = [1, 2, 3, "four"]
92+
result = serialize_metadata_value(test_list)
93+
deserialized = json.loads(result)
94+
assert deserialized == test_list
95+
96+
97+
class TestValidateMetadata:
98+
"""Tests for validate_metadata function."""
99+
100+
def test_validate_metadata_with_set(self):
101+
"""Test that metadata with sets is validated without errors."""
102+
metadata = {"my_set": {1, 2, 3}, "my_string": "hello"}
103+
validated = validate_metadata(metadata)
104+
# Both entries should be kept
105+
assert "my_set" in validated
106+
assert "my_string" in validated
107+
assert validated["my_set"] == {1, 2, 3}
108+
assert validated["my_string"] == "hello"
109+
110+
def test_validate_metadata_with_tuple(self):
111+
"""Test that metadata with tuples is validated without errors."""
112+
metadata = {"my_tuple": (1, 2, 3)}
113+
validated = validate_metadata(metadata)
114+
assert "my_tuple" in validated
115+
assert validated["my_tuple"] == (1, 2, 3)
116+
117+
118+
class TestGetMetadataType:
119+
"""Tests for get_metadata_type function."""
120+
121+
def test_get_metadata_type_for_set(self):
122+
"""Test that the correct type enum is returned for sets."""
123+
test_set = {1, 2, 3}
124+
result = get_metadata_type(test_set)
125+
assert result == MetadataTypeEnum.SET
126+
127+
def test_get_metadata_type_for_tuple(self):
128+
"""Test that the correct type enum is returned for tuples."""
129+
test_tuple = (1, 2, 3)
130+
result = get_metadata_type(test_tuple)
131+
assert result == MetadataTypeEnum.TUPLE

0 commit comments

Comments
 (0)