Skip to content

Commit 084095b

Browse files
committed
Fix set serialization in metadata system
Sets were documented as a supported MetadataType but failed during serialization because json.dumps() cannot serialize Python sets. This change adds a helper function to convert sets (and tuples) to lists before JSON serialization, making them compatible with JSON while preserving type information via MetadataTypeEnum. Changes: - Add serialize_metadata_value() helper in metadata_types.py - Update validate_metadata() to use the new helper - Update Client.create_run_metadata() to use the helper - Update SQLZenStore.create_run_metadata() to use the helper - Add comprehensive unit tests for set/tuple serialization Fixes #4248
1 parent b5dfe3f commit 084095b

File tree

4 files changed

+160
-5
lines changed

4 files changed

+160
-5
lines changed

src/zenml/client.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"""Client implementation."""
1515

1616
import functools
17-
import json
1817
import os
1918
from abc import ABCMeta
2019
from datetime import datetime
@@ -5497,13 +5496,16 @@ def create_run_metadata(
54975496
publisher_step_id: The ID of the step execution that publishes
54985497
this metadata automatically.
54995498
"""
5500-
from zenml.metadata.metadata_types import get_metadata_type
5499+
from zenml.metadata.metadata_types import (
5500+
get_metadata_type,
5501+
serialize_metadata_value,
5502+
)
55015503

55025504
values: Dict[str, "MetadataType"] = {}
55035505
types: Dict[str, "MetadataTypeEnum"] = {}
55045506
for key, value in metadata.items():
55055507
# Skip metadata that is too large to be stored in the database.
5506-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
5508+
if len(serialize_metadata_value(value)) > TEXT_FIELD_MAX_LENGTH:
55075509
logger.warning(
55085510
f"Metadata value for key '{key}' is too large to be "
55095511
"stored in the database. Skipping."

src/zenml/metadata/metadata_types.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,24 @@ def cast_to_metadata_type(
210210
return typed_value # type: ignore[no-any-return]
211211

212212

213+
def serialize_metadata_value(value: MetadataType) -> str:
214+
"""Serialize a metadata value to JSON.
215+
216+
Converts non-JSON-serializable types (sets, tuples) to lists before
217+
serialization, since JSON doesn't support these types natively. The type
218+
information is preserved separately via MetadataTypeEnum.
219+
220+
Args:
221+
value: The metadata value to serialize.
222+
223+
Returns:
224+
The JSON-serialized value as a string.
225+
"""
226+
if isinstance(value, (set, tuple)):
227+
value = list(value)
228+
return json.dumps(value)
229+
230+
213231
def validate_metadata(
214232
metadata: Dict[str, MetadataType],
215233
) -> Dict[str, MetadataType]:
@@ -234,7 +252,7 @@ def validate_metadata(
234252
)
235253
continue
236254

237-
if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
255+
if len(serialize_metadata_value(value)) > TEXT_FIELD_MAX_LENGTH:
238256
logger.warning(
239257
f"Metadata value for key '{key}' is too large to be "
240258
"stored in the database. Skipping."

src/zenml/zen_stores/sql_zen_store.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7172,6 +7172,10 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71727172
)
71737173

71747174
if run_metadata.resources:
7175+
from zenml.metadata.metadata_types import (
7176+
serialize_metadata_value,
7177+
)
7178+
71757179
for key, value in run_metadata.values.items():
71767180
type_ = run_metadata.types[key]
71777181

@@ -7180,7 +7184,7 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
71807184
user_id=run_metadata.user,
71817185
stack_component_id=run_metadata.stack_component_id,
71827186
key=key,
7183-
value=json.dumps(value),
7187+
value=serialize_metadata_value(value),
71847188
type=type_,
71857189
publisher_step_id=run_metadata.publisher_step_id,
71867190
)
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# Copyright (c) ZenML GmbH 2025. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at:
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
# or implied. See the License for the specific language governing
13+
# permissions and limitations under the License.
14+
"""Unit tests for metadata_types module."""
15+
16+
import json
17+
18+
from zenml.metadata.metadata_types import (
19+
MetadataTypeEnum,
20+
get_metadata_type,
21+
serialize_metadata_value,
22+
validate_metadata,
23+
)
24+
25+
26+
class TestSerializeMetadataValue:
27+
"""Tests for serialize_metadata_value function."""
28+
29+
def test_serialize_set_to_list(self):
30+
"""Test that sets are converted to lists before JSON serialization."""
31+
test_set = {1, 2, 3}
32+
result = serialize_metadata_value(test_set)
33+
# The result should be valid JSON
34+
deserialized = json.loads(result)
35+
# Sets are unordered, so check that all elements are present
36+
assert isinstance(deserialized, list)
37+
assert sorted(deserialized) == [1, 2, 3]
38+
39+
def test_serialize_tuple_to_list(self):
40+
"""Test that tuples are converted to lists before JSON serialization."""
41+
test_tuple = (1, 2, 3)
42+
result = serialize_metadata_value(test_tuple)
43+
deserialized = json.loads(result)
44+
assert deserialized == [1, 2, 3]
45+
46+
def test_serialize_nested_set(self):
47+
"""Test that nested sets are properly converted."""
48+
test_dict = {"my_set": {1, 2, 3}}
49+
result = serialize_metadata_value(test_dict)
50+
deserialized = json.loads(result)
51+
# Note: nested sets within dicts won't be converted automatically
52+
# This is expected - only the top-level value is converted
53+
assert isinstance(deserialized, dict)
54+
55+
def test_serialize_string(self):
56+
"""Test that strings are serialized normally."""
57+
test_string = "hello world"
58+
result = serialize_metadata_value(test_string)
59+
deserialized = json.loads(result)
60+
assert deserialized == "hello world"
61+
62+
def test_serialize_int(self):
63+
"""Test that integers are serialized normally."""
64+
test_int = 42
65+
result = serialize_metadata_value(test_int)
66+
deserialized = json.loads(result)
67+
assert deserialized == 42
68+
69+
def test_serialize_float(self):
70+
"""Test that floats are serialized normally."""
71+
test_float = 3.14
72+
result = serialize_metadata_value(test_float)
73+
deserialized = json.loads(result)
74+
assert deserialized == 3.14
75+
76+
def test_serialize_bool(self):
77+
"""Test that booleans are serialized normally."""
78+
result = serialize_metadata_value(True)
79+
deserialized = json.loads(result)
80+
assert deserialized is True
81+
82+
def test_serialize_dict(self):
83+
"""Test that dictionaries are serialized normally."""
84+
test_dict = {"key": "value", "number": 42}
85+
result = serialize_metadata_value(test_dict)
86+
deserialized = json.loads(result)
87+
assert deserialized == test_dict
88+
89+
def test_serialize_list(self):
90+
"""Test that lists are serialized normally."""
91+
test_list = [1, 2, 3, "four"]
92+
result = serialize_metadata_value(test_list)
93+
deserialized = json.loads(result)
94+
assert deserialized == test_list
95+
96+
97+
class TestValidateMetadata:
98+
"""Tests for validate_metadata function."""
99+
100+
def test_validate_metadata_with_set(self):
101+
"""Test that metadata with sets is validated without errors."""
102+
metadata = {"my_set": {1, 2, 3}, "my_string": "hello"}
103+
validated = validate_metadata(metadata)
104+
# Both entries should be kept
105+
assert "my_set" in validated
106+
assert "my_string" in validated
107+
assert validated["my_set"] == {1, 2, 3}
108+
assert validated["my_string"] == "hello"
109+
110+
def test_validate_metadata_with_tuple(self):
111+
"""Test that metadata with tuples is validated without errors."""
112+
metadata = {"my_tuple": (1, 2, 3)}
113+
validated = validate_metadata(metadata)
114+
assert "my_tuple" in validated
115+
assert validated["my_tuple"] == (1, 2, 3)
116+
117+
118+
class TestGetMetadataType:
119+
"""Tests for get_metadata_type function."""
120+
121+
def test_get_metadata_type_for_set(self):
122+
"""Test that the correct type enum is returned for sets."""
123+
test_set = {1, 2, 3}
124+
result = get_metadata_type(test_set)
125+
assert result == MetadataTypeEnum.SET
126+
127+
def test_get_metadata_type_for_tuple(self):
128+
"""Test that the correct type enum is returned for tuples."""
129+
test_tuple = (1, 2, 3)
130+
result = get_metadata_type(test_tuple)
131+
assert result == MetadataTypeEnum.TUPLE

0 commit comments

Comments
 (0)