Fix set serialization in metadata system (#4249)

htahir1 · claude · web-flow · commit 1fb40b4f8ec6 · 2025-11-26T16:33:06.000+01:00
Sets were documented as a supported MetadataType but failed during serialization because json.dumps() cannot serialize Python sets. This change uses the standard pydantic_encoder to convert sets (and tuples) to lists before JSON serialization, making them compatible with JSON while preserving type information via MetadataTypeEnum. The fix uses pydantic_encoder directly (via json.dumps(value, default=pydantic_encoder)) at all serialization points, following the same pattern used throughout the ZenML codebase. This ensures consistency and proper handling of all supported types including nested sets/tuples, UUIDs, datetimes, etc. Changes: - Update validate_metadata() in metadata_types.py to use pydantic_encoder - Update Client.create_run_metadata() to use pydantic_encoder - Update SQLZenStore.create_run_metadata() to use pydantic_encoder - Add unit tests for set/tuple validation - Document supported metadata types in user guide Fixes #4248 Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/docs/book/how-to/metadata/metadata.md b/docs/book/how-to/metadata/metadata.md
@@ -22,6 +22,10 @@ ZenML makes it easy to log and retrieve this information through a simple interf
 
 The primary way to log metadata in ZenML is through the `log_metadata` function, which allows you to attach JSON-serializable key-value pairs to various entities.
 
+{% hint style="info" %}
+Metadata supports primitive types (`str`, `int`, `float`, `bool`), collections (`list`, `dict`, `set`, `tuple`), and special ZenML types (`Uri`, `Path`, `DType`, `StorageSize`). Sets and tuples are automatically converted to lists during storage.
+{% endhint %}
+
 ```python
 from zenml import log_metadata
 
diff --git a/src/zenml/client.py b/src/zenml/client.py
@@ -5498,12 +5498,16 @@ def create_run_metadata(
                 this metadata automatically.
         """
         from zenml.metadata.metadata_types import get_metadata_type
+        from zenml.utils.json_utils import pydantic_encoder
 
         values: Dict[str, "MetadataType"] = {}
         types: Dict[str, "MetadataTypeEnum"] = {}
         for key, value in metadata.items():
             # Skip metadata that is too large to be stored in the database.
-            if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
+            if (
+                len(json.dumps(value, default=pydantic_encoder))
+                > TEXT_FIELD_MAX_LENGTH
+            ):
                 logger.warning(
                     f"Metadata value for key '{key}' is too large to be "
                     "stored in the database. Skipping."
diff --git a/src/zenml/metadata/metadata_types.py b/src/zenml/metadata/metadata_types.py
@@ -22,6 +22,7 @@
 from zenml.constants import STR_FIELD_MAX_LENGTH, TEXT_FIELD_MAX_LENGTH
 from zenml.logger import get_logger
 from zenml.utils.enum_utils import StrEnum
+from zenml.utils.json_utils import pydantic_encoder
 
 logger = get_logger(__name__)
 
@@ -234,7 +235,10 @@ def validate_metadata(
             )
             continue
 
-        if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
+        if (
+            len(json.dumps(value, default=pydantic_encoder))
+            > TEXT_FIELD_MAX_LENGTH
+        ):
             logger.warning(
                 f"Metadata value for key '{key}' is too large to be "
                 "stored in the database. Skipping."
diff --git a/src/zenml/zen_stores/sql_zen_store.py b/src/zenml/zen_stores/sql_zen_store.py
@@ -7183,6 +7183,8 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
                 )
 
             if run_metadata.resources:
+                from zenml.utils.json_utils import pydantic_encoder
+
                 for key, value in run_metadata.values.items():
                     type_ = run_metadata.types[key]
 
@@ -7191,7 +7193,7 @@ def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None:
                         user_id=run_metadata.user,
                         stack_component_id=run_metadata.stack_component_id,
                         key=key,
-                        value=json.dumps(value),
+                        value=json.dumps(value, default=pydantic_encoder),
                         type=type_,
                         publisher_step_id=run_metadata.publisher_step_id,
                     )
diff --git a/tests/unit/metadata/test_metadata_types.py b/tests/unit/metadata/test_metadata_types.py
@@ -0,0 +1,70 @@
+#  Copyright (c) ZenML GmbH 2025. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Unit tests for metadata_types module."""
+
+from zenml.metadata.metadata_types import (
+    MetadataTypeEnum,
+    get_metadata_type,
+    validate_metadata,
+)
+
+
+class TestValidateMetadata:
+    """Tests for validate_metadata function."""
+
+    def test_validate_metadata_with_set(self):
+        """Test that metadata with sets is validated without errors."""
+        metadata = {"my_set": {1, 2, 3}, "my_string": "hello"}
+        validated = validate_metadata(metadata)
+        # Both entries should be kept
+        assert "my_set" in validated
+        assert "my_string" in validated
+        assert validated["my_set"] == {1, 2, 3}
+        assert validated["my_string"] == "hello"
+
+    def test_validate_metadata_with_tuple(self):
+        """Test that metadata with tuples is validated without errors."""
+        metadata = {"my_tuple": (1, 2, 3)}
+        validated = validate_metadata(metadata)
+        assert "my_tuple" in validated
+        assert validated["my_tuple"] == (1, 2, 3)
+
+    def test_validate_metadata_with_nested_sets(self):
+        """Test that metadata with nested sets is validated without errors."""
+        metadata = {
+            "nested": {
+                "my_set": {1, 2, 3},
+                "my_tuple": (4, 5, 6),
+            }
+        }
+        validated = validate_metadata(metadata)
+        assert "nested" in validated
+        assert validated["nested"]["my_set"] == {1, 2, 3}
+        assert validated["nested"]["my_tuple"] == (4, 5, 6)
+
+
+class TestGetMetadataType:
+    """Tests for get_metadata_type function."""
+
+    def test_get_metadata_type_for_set(self):
+        """Test that the correct type enum is returned for sets."""
+        test_set = {1, 2, 3}
+        result = get_metadata_type(test_set)
+        assert result == MetadataTypeEnum.SET
+
+    def test_get_metadata_type_for_tuple(self):
+        """Test that the correct type enum is returned for tuples."""
+        test_tuple = (1, 2, 3)
+        result = get_metadata_type(test_tuple)
+        assert result == MetadataTypeEnum.TUPLE