zenml-io
diff --git a/‎.github/workflows/linting.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/linting.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/unit-test.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/unit-test.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/book/how-to/steps-pipelines/dynamic_pipelines.md‎
Lines changed: 89 additions & 0 deletions b/‎docs/book/how-to/steps-pipelines/dynamic_pipelines.md‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎src/zenml/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎src/zenml/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/zenml/artifacts/utils.py‎
Lines changed: 1 addition & 0 deletions b/‎src/zenml/artifacts/utils.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/zenml/config/compiler.py‎
Lines changed: 10 additions & 5 deletions b/‎src/zenml/config/compiler.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎src/zenml/config/step_configurations.py‎
Lines changed: 31 additions & 3 deletions b/‎src/zenml/config/step_configurations.py‎
Lines changed: 31 additions & 3 deletions
diff --git a/‎src/zenml/execution/pipeline/dynamic/outputs.py‎
Lines changed: 36 additions & 24 deletions b/‎src/zenml/execution/pipeline/dynamic/outputs.py‎
Lines changed: 36 additions & 24 deletions
@@ -75,6 +75,7 @@ jobs:
           remove-android: 'true'
           remove-haskell: 'true'
           build-mount-path: /var/lib/docker/
+        if: inputs.os == 'ubuntu-latest'
       - name: Checkout code
         uses: actions/[email protected]
         with:
 
@@ -86,6 +86,7 @@ jobs:
           remove-android: 'true'
           remove-haskell: 'true'
           build-mount-path: /var/lib/docker/
+        if: inputs.os == 'ubuntu-latest'
       - name: Checkout code
         uses: actions/[email protected]
         with:
 
@@ -96,6 +96,90 @@ Use `runtime="inline"` when you need:
 - Shared resources with the orchestrator
 - Sequential execution
 
+### Map/Reduce over collections
+
+Dynamic pipelines support a high-level map/reduce pattern over sequence-like step outputs. This lets you fan out a step across items of a collection and then reduce the results without manually writing loops or loading data in the orchestration environment.
+
+```python
+from zenml import pipeline, step
+
+@step
+def producer() -> list[int]:
+    return [1, 2, 3]
+
+@step
+def worker(value: int) -> int:
+    return value * 2
+
+@step
+def reducer(values: list[int]) -> int:
+    return sum(values)
+
+@pipeline(dynamic=True, enable_cache=False)
+def map_reduce():
+    values = producer()
+    results = worker.map(values)   # fan out over collection
+    reducer(results)               # pass list of artifacts directly
+```
+
+Key points:
+- `step.map(...)` fans out a step over sequence-like inputs.
+- Steps can accept lists of artifacts directly as inputs (useful for reducers).
+- You can pass the mapped output directly to a downstream step without loading in the orchestration environment.
+
+#### Mapping semantics: map vs product
+
+- `step.map(...)`: If multiple sequence-like inputs are provided, all must have the same length `n`. ZenML creates `n` mapped steps where the i-th step receives the i-th element from each input.
+- `step.product(...)`: Creates a mapped step for each combination of elements across all input sequences (cartesian product).
+
+Example (cartesian product):
+
+```python
+from zenml import pipeline, step
+
+@step
+def int_values() -> list[int]:
+    return [1, 2]
+
+@step
+def str_values() -> list[str]:
+    return ["a", "b", "c"]
+
+@step
+def do_something(a: int, b: str) -> int:
+    ...
+
+@pipeline(dynamic=True)
+def cartesian_example():
+    a = int_values()
+    b = str_values()
+    # Produces 2 * 3 = 6 mapped steps
+    combine.product(a, b)
+```
+
+#### Broadcasting inputs with unmapped(...)
+
+If you want to pass a sequence-like artifact as a whole to each mapped invocation (i.e., avoid splitting), wrap it with `unmapped(...)`:
+
+```python
+from zenml import pipeline, step, unmapped
+
+@step
+def producer(length: int) -> list[int]:
+    return [1] * length
+
+@step
+def consumer(a: int, b: list[int]) -> None:
+    # `b` is the full list for every mapped call
+    ...
+
+@pipeline(dynamic=True)
+def unmapped_example():
+    a = producer(length=3)   # list of 3 ints
+    b = producer(length=4)   # list of 4 ints
+    consumer.map(a=a, b=unmapped(b))
+```
+
 ### Parallel Step Execution
 
 Dynamic pipelines support true parallel execution using `step.submit()`. This method returns a `StepRunFuture` that you can use to wait for results or pass to downstream steps:
@@ -205,6 +289,11 @@ def dynamic_pipeline():
 
 When you call `.load()` on an artifact in a dynamic pipeline, it synchronously loads the data. For large artifacts or when you want to maintain parallelism, consider passing the step outputs (future or artifact) directly to downstream steps instead of loading them.
 
+### Mapping Limitations
+
+- Mapping is currently supported only over artifacts produced within the same pipeline run (mapping over raw data or external artifacts is not supported).
+- Chunk size for mapped collection loading defaults to 1 and is not yet configurable.
+
 ## Best Practices
 
 1. **Use `runtime="isolated"` for parallel steps**: This ensures better resource isolation and prevents interference between concurrent step executions.
 
@@ -61,7 +61,7 @@ def __getattr__(name: str) -> Any:
 from zenml.steps.utils import log_step_metadata
 from zenml.utils.metadata_utils import log_metadata, bulk_log_metadata
 from zenml.utils.tag_utils import Tag, add_tags, remove_tags
-
+from zenml.execution.pipeline.dynamic.utils import unmapped
 
 __all__ = [
     "add_tags",
@@ -84,4 +84,5 @@ def __getattr__(name: str) -> Any:
     "register_artifact",
     "show",
     "step",
+    "unmapped",
 ]
@@ -208,6 +208,7 @@ def _store_artifact_data_and_prepare_request(
         visualizations=visualizations,
         has_custom_name=has_custom_name,
         save_type=save_type,
+        item_count=materializer.get_item_count(data),
         metadata=validate_metadata(combined_metadata)
         if combined_metadata
         else None,
 
@@ -468,11 +468,16 @@ def _get_step_spec(
             The step spec.
         """
         inputs = {
-            key: InputSpec(
-                step_name=artifact.invocation_id,
-                output_name=artifact.output_name,
-            )
-            for key, artifact in invocation.input_artifacts.items()
+            key: [
+                InputSpec(
+                    step_name=artifact.invocation_id,
+                    output_name=artifact.output_name,
+                    chunk_index=artifact.chunk_index,
+                    chunk_size=artifact.chunk_size,
+                )
+                for artifact in artifact_list
+            ]
+            for key, artifact_list in invocation.input_artifacts.items()
         }
         return StepSpec(
             source=invocation.step.resolve(),
 
@@ -407,25 +407,53 @@ class InputSpec(FrozenBaseModel):
 
     step_name: str
     output_name: str
+    chunk_index: Optional[int] = None
+    chunk_size: Optional[int] = None
 
 
 class StepSpec(FrozenBaseModel):
     """Specification of a pipeline."""
 
     source: SourceWithValidator
     upstream_steps: List[str]
-    inputs: Dict[str, InputSpec] = {}
+    # TODO: This should be `Dict[str, List[InputSpec]]`, but that would break
+    # client-server compatibility. In the next major release, change this and
+    # uncomment the code that migrates legacy specs.
+    inputs: Dict[str, Union[List[InputSpec], InputSpec]] = {}
     invocation_id: str
     enable_heartbeat: bool = False
 
     @model_validator(mode="before")
     @classmethod
     @before_validator_handler
-    def _migrate_invocation_id(cls, data: Dict[str, Any]) -> Dict[str, Any]:
+    def _migrate_legacy_fields(cls, data: Dict[str, Any]) -> Dict[str, Any]:
         if "invocation_id" not in data:
             data["invocation_id"] = data.pop("pipeline_parameter_name", "")
+
+        # converted_inputs = {}
+        # for key, value in data.get("inputs", {}).items():
+        #     if isinstance(value, (InputSpec, dict)):
+        #         converted_inputs[key] = [value]
+        #     else:
+        #         converted_inputs[key] = value
+        # data["inputs"] = converted_inputs
+
         return data
 
+    # TODO: Remove this and use the `inputs` property once we change the type
+    # of the `inputs` field.
+    @property
+    def inputs_v2(self) -> Dict[str, List[InputSpec]]:
+        """Inputs of the step spec in v2 format.
+
+        Returns:
+            The inputs of the step spec in v2 format.
+        """
+        return {
+            key: [value] if isinstance(value, InputSpec) else value
+            for key, value in self.inputs.items()
+        }
+
     def __eq__(self, other: Any) -> bool:
         """Returns whether the other object is referring to the same step.
 
@@ -445,7 +473,7 @@ def __eq__(self, other: Any) -> bool:
             if self.upstream_steps != other.upstream_steps:
                 return False
 
-            if self.inputs != other.inputs:
+            if self.inputs_v2 != other.inputs_v2:
                 return False
 
             if self.invocation_id != other.invocation_id:
 
@@ -14,12 +14,10 @@
 """Dynamic pipeline execution outputs."""
 
 from concurrent.futures import Future
-from typing import Any, List, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union, overload
 
 from zenml.logger import get_logger
-from zenml.models import (
-    ArtifactVersionResponse,
-)
+from zenml.models import ArtifactVersionResponse
 
 logger = get_logger(__name__)
 
@@ -29,6 +27,8 @@ class OutputArtifact(ArtifactVersionResponse):
 
     output_name: str
     step_name: str
+    chunk_index: Optional[int] = None
+    chunk_size: Optional[int] = None
 
 
 StepRunOutputs = Union[None, OutputArtifact, Tuple[OutputArtifact, ...]]
@@ -191,34 +191,46 @@ def load(self, disable_cache: bool = False) -> Any:
         else:
             raise ValueError(f"Invalid step run output: {result}")
 
-    def __getitem__(self, key: Any) -> ArtifactFuture:
-        """Get an artifact future by key or index.
+    @overload
+    def __getitem__(self, key: int) -> ArtifactFuture: ...
+
+    @overload
+    def __getitem__(self, key: slice) -> Tuple[ArtifactFuture, ...]: ...
+
+    def __getitem__(
+        self, key: Union[int, slice]
+    ) -> Union[ArtifactFuture, Tuple[ArtifactFuture, ...]]:
+        """Get an artifact future.
 
         Args:
-            key: The key or index of the artifact future.
+            key: The index or slice of the artifact futures.
 
         Raises:
-            TypeError: If the key is not an integer.
-            IndexError: If the index is out of range.
+            TypeError: If the key is not an integer or slice.
 
         Returns:
-            The artifact future.
+            The artifact futures.
         """
-        if not isinstance(key, int):
-            raise TypeError(f"Invalid key type: {type(key)}")
+        if isinstance(key, int):
+            output_key = self._output_keys[key]
 
-        # Convert to positive index if necessary
-        if key < 0:
-            key += len(self._output_keys)
-
-        if key > len(self._output_keys):
-            raise IndexError(f"Index out of range: {key}")
-
-        return ArtifactFuture(
-            wrapped=self._wrapped,
-            invocation_id=self._invocation_id,
-            index=key,
-        )
+            return ArtifactFuture(
+                wrapped=self._wrapped,
+                invocation_id=self._invocation_id,
+                index=self._output_keys.index(output_key),
+            )
+        elif isinstance(key, slice):
+            output_keys = self._output_keys[key]
+            return tuple(
+                ArtifactFuture(
+                    wrapped=self._wrapped,
+                    invocation_id=self._invocation_id,
+                    index=self._output_keys.index(output_key),
+                )
+                for output_key in output_keys
+            )
+        else:
+            raise TypeError(f"Invalid key type: {type(key)}")
 
     def __iter__(self) -> Any:
         """Iterate over the artifact futures.