diff --git a/docs/concepts/transforms.rst b/docs/concepts/transforms.rst index ab095caae..0a74f698b 100644 --- a/docs/concepts/transforms.rst +++ b/docs/concepts/transforms.rst @@ -120,44 +120,6 @@ about the state of the tasks at given points. Here is an example: In the above example, we can be sure that every task dict has a string field called ``foo``, and may or may not have a boolean field called ``bar``. -Keyed By -........ - -Fields in the input tasks can be "keyed by" another value in the task. -For example, a task's ``max-runtime`` may be keyed by ``platform``. -In the task, this looks like: - -.. code-block:: yaml - - max-runtime: - by-platform: - android: 7200 - windows: 3600 - default: 1800 - -This is a simple but powerful way to encode business rules in the tasks -provided as input to the transforms, rather than expressing those rules in the -transforms themselves. The structure is easily resolved to a single value -using the :func:`~taskgraph.util.schema.resolve_keyed_by` utility function: - -.. code-block:: python - - from taskgraph.util.schema import resolve_keyed_by - - @transforms.add - def resolve_max_runtime(config, tasks): - for task in tasks: - # Note that task["label"] is not a standard key, use whatever best - # identifies your task at this stage of the transformation. - resolve_keyed_by(task, "max-runtime", task["label"]) - yield task - -Exact matches are used immediately. If no exact matches are found, each -alternative is treated as a regular expression, matched against the whole -value. Thus ``android.*`` would match ``android-arm/debug``. If nothing -matches as a regular expression, but there is a ``default`` alternative, it is -used. Otherwise, an exception is raised and graph generation stops. - Organization ------------- diff --git a/docs/howto/index.rst b/docs/howto/index.rst index c7bb3f18b..c1cb317e5 100644 --- a/docs/howto/index.rst +++ b/docs/howto/index.rst @@ -10,6 +10,7 @@ A collection of how-to guides. run-locally debugging bootstrap-taskgraph + resolve-keyed-by use-fetches docker create-actions diff --git a/docs/howto/resolve-keyed-by.rst b/docs/howto/resolve-keyed-by.rst new file mode 100644 index 000000000..f3a75382d --- /dev/null +++ b/docs/howto/resolve-keyed-by.rst @@ -0,0 +1,251 @@ +Use Keyed By +============ + +Often fields in a task can depend on other values in the task. For example, a +task's ``max-runtime`` may depend on the ``platform``. To handle this, you +could re-define ``max-runtime`` in each task's definition like so: + +.. code-block:: yaml + + tasks: + taskA: + platform: android + worker: + max-runtime: 7200 + + taskB: + platform: ios + worker: + max-runtime: 7200 + + taskC: + platform: windows + worker: + max-runtime: 3600 + + taskD: + platform: mac + worker: + max-runtime: 1800 + + ... + +This is simple, but if you have lots of tasks it's also tedious and makes +updating the configuration a pain. To avoid this duplication you could use a +:doc:`transform `: + +.. code-block:: python + + @transforms.add + def set_max_runtime(config, tasks): + for task in tasks: + if task["platform"] in ("android", "ios"): + task["worker"]["max-runtime"] = 7200 + elif task["platform"] == "windows": + task["worker"]["max-runtime"] = 3600 + else: + task["worker"]["max-runtime"] = 1800 + + yield task + +This works but now we've hardcoded constants into our code logic far away from +the task's original definition! Besides this is pretty verbose and it can get +complicated if you want to be able to change these constants per task. + +An Alternative Approach +----------------------- + +Another way to accomplish the same thing is to use Taskgraph's "keyed by" +feature. This can be used in combination with the ``task-defaults`` key to +express the same logic directly in the ``kind.yml`` file: + +.. code-block:: yaml + + task-defaults: + worker: + max-runtime: + by-platform: + (ios|android): 7200 + windows: 3600 + default: 1800 + + tasks: + taskA: + platform: android + + taskB: + platform: windows + + taskC: + platform: mac + + ... + + +The structure under the ``by-platform`` key is resolved to a single value using +the :func:`~taskgraph.util.schema.resolve_keyed_by` utility function. When +"keying by" another attribute in the task, you must call this utility later on +in a transform: + +.. code-block:: python + + from taskgraph.util.schema import resolve_keyed_by + + @transforms.add + def resolve_max_runtime(config, tasks): + for task in tasks: + resolve_keyed_by(task, "worker.max-runtime", f"Task {task['label']") + yield task + +In this example, :func:`~taskgraph.util.schema.resolve_keyed_by` takes the root +container object (aka, the task), the subkey to operate on, and a descriptor +that will be used in any exceptions that get raised. + +Exact matches are used immediately. If no exact matches are found, each +alternative is treated as a regular expression, matched against the whole +value. Thus ``android.*`` would match ``android-arm/debug``. If nothing +matches as a regular expression, but there is a ``default`` alternative, it is +used. Otherwise, an exception is raised and graph generation stops. + +Passing Additional Context +-------------------------- + +By default when you use the pattern ``by-`` and then feed it into +:func:`~taskgraph.util.schema.resolve_keyed_by`, ```` is assumed to be a +valid top-level key in the task definition. However, sometimes you want to key +by some other value that is either nested deeper in the task definition, or not +even known ahead of time! + +For this reason you can specify additional context via ``**kwargs``. Typically +it will make the most sense to use this following a prior transform that sets +some value that's not known statically. This comes up frequently when splitting +a task from one definition into several. For example: + +.. code-block:: yaml + + tasks: + task: + platforms: [android, windows, mac] + worker: + max-runtime: + by-platform: + (ios|android): 7200 + windows: 3600 + default: 1800 + +.. code-block:: python + + @transforms.add + def split_platforms(config, tasks): + for task in tasks: + for platform in task.pop("platforms"): + new_task = deepcopy(task) + # ... + resolve_keyed_by( + new_task, + "worker.max-runtime", + task["label"], + platform=platform, + ) + yield task + +Here we did not know the value of "platform" ahead of time, but it was still +possible to use it in a "keyed by" statement thanks to the ability to pass in +extra context. + +.. note:: + A good rule of thumb is to only consider using "keyed by" in + ``task-defaults`` or in a task definition that will be split into many + tasks down the line. + +Specifying the Subkey +--------------------- + +The subkey in :func:`~taskgraph.util.schema.resolve_keyed_by` is expressed in +dot path notation with each part of the path representing a nested dictionary. +If any part of the subkey is a list, each item in the list will be operated on. +For example, consider this excerpt of a task definition: + +.. code-block:: yaml + + worker: + artifacts: + - name: foo + path: + by-platform: + windows: foo.zip + default: foo.tar.gz + - name: bar + path: + by-platform: + windows: bar.zip + default: bar.tar.gz + +With the associated transform: + +.. code-block:: python + + @transforms.add + def resolve_artifact_paths(config, tasks): + for task in tasks: + resolve_keyed_by(task, "worker.artifacts.path", task["label"]) + yield task + +In this example, Taskgraph resolves ``by-platform`` in both the *foo* and *bar* +artifacts. + +.. note:: + Calling ``resolve_keyed_by`` on a subkey that doesn't contain a ``by-*`` + field is a no-op. + +Creating Schemas with Keyed By +------------------------------ + +Having fields of a task that may or may not be keyed by another field, can cause +problems for any schemas your transforms define. For that reason Taskgraph provides +the :func:`~taskgraph.util.schema.optionally_keyed_by` utility function. + +It can be used to generate a valid schema that allows a field to either use +"keyed by" or not. For example: + +.. code-block:: python + + from taskgraph.util.schema import Schema, optionally_keyed_by + + + schema = Schema({ + # ... + Optional("worker"): { + Optional("max-run-time"): optionally_keyed_by("platform", int), + }, + }) + + transforms.add_validate(schema) + +The example above allows both of the following task definitions: + +.. code-block:: yaml + + taskA: + worker: + max-run-time: 3600 + + taskB: + worker: + max-run-time: + by-platform: + windows: 7200 + default: 3600 + +If there are more than one fields that another field may be keyed by, it +can be specified like this: + +.. code-block:: python + + Optional("max-run-time"): optionally_keyed_by("platform", "build-type", int) + + +In this example either ``by-platform`` or ``by-build-type`` may be used. You +may specify fields as you like this way, as long as the last argument to +:func:`~taskgraph.util.schema.optionally_keyed_by` is the type of the field +after resolving is finished (or if keyed by is unused). diff --git a/src/taskgraph/util/keyed_by.py b/src/taskgraph/util/keyed_by.py index 00c84ba98..5afb9fe63 100644 --- a/src/taskgraph/util/keyed_by.py +++ b/src/taskgraph/util/keyed_by.py @@ -2,8 +2,40 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +from typing import Any, Dict, Generator, Tuple -from .attributes import keymatch +from taskgraph.util.attributes import keymatch + + +def iter_dot_path( + container: Dict[str, Any], subfield: str +) -> Generator[Tuple[Dict[str, Any], str], None, None]: + """Given a container and a subfield in dot path notation, yield the parent + container of the dotpath's leaf node, along with the leaf node name that it + contains. + + If the dot path contains a list object, each item in the list will be + yielded. + + Args: + container (dict): The container to search for the dot path. + subfield (str): The dot path to search for. + """ + while "." in subfield: + f, subfield = subfield.split(".", 1) + + if f not in container: + return + + if isinstance(container[f], list): + for item in container[f]: + yield from iter_dot_path(item, subfield) + return + + container = container[f] + + if isinstance(container, dict) and subfield in container: + yield container, subfield def evaluate_keyed_by( diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index 8b2b59051..590273676 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -10,8 +10,7 @@ import voluptuous import taskgraph - -from .keyed_by import evaluate_keyed_by +from taskgraph.util.keyed_by import evaluate_keyed_by, iter_dot_path def validate_schema(schema, obj, msg_prefix): @@ -125,26 +124,14 @@ def resolve_keyed_by( Returns: dict: item which has also been modified in-place. """ - # find the field, returning the item unchanged if anything goes wrong - container, subfield = item, field - while "." in subfield: - f, subfield = subfield.split(".", 1) - if f not in container: - return item - container = container[f] - if not isinstance(container, dict): - return item - - if subfield not in container: - return item - - container[subfield] = evaluate_keyed_by( - value=container[subfield], - item_name=f"`{field}` in `{item_name}`", - defer=defer, - enforce_single_match=enforce_single_match, - attributes=dict(item, **extra_values), - ) + for container, subfield in iter_dot_path(item, field): + container[subfield] = evaluate_keyed_by( + value=container[subfield], + item_name=f"`{field}` in `{item_name}`", + defer=defer, + enforce_single_match=enforce_single_match, + attributes=dict(item, **extra_values), + ) return item diff --git a/test/test_util_keyed_by.py b/test/test_util_keyed_by.py new file mode 100644 index 000000000..1a8ae5e79 --- /dev/null +++ b/test/test_util_keyed_by.py @@ -0,0 +1,29 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from pprint import pprint + +import pytest + +from taskgraph.util.keyed_by import iter_dot_path + + +@pytest.mark.parametrize( + "container,subfield,expected", + ( + pytest.param( + {"a": {"b": {"c": 1}}, "d": 2}, "a.b.c", [({"c": 1}, "c")], id="simple case" + ), + pytest.param( + {"a": [{"b": 1}, {"b": 2}, {"b": 3}], "d": 2}, + "a.b", + [({"b": 1}, "b"), ({"b": 2}, "b"), ({"b": 3}, "b")], + id="list case", + ), + ), +) +def test_iter_dot_paths(container, subfield, expected): + result = list(iter_dot_path(container, subfield)) + pprint(result, indent=2) + assert result == expected diff --git a/test/test_util_schema.py b/test/test_util_schema.py index c56f818ac..5a8d6df4f 100644 --- a/test/test_util_schema.py +++ b/test/test_util_schema.py @@ -106,6 +106,20 @@ def test_nested(self): {"x": {"by-bar": {"B1": 11, "B2": 12}}}, ) + def test_list(self): + item = { + "y": { + "by-foo": { + "F1": 10, + "F2": 20, + }, + } + } + self.assertEqual( + resolve_keyed_by({"x": [item, item]}, "x.y", "name", foo="F1"), + {"x": [{"y": 10}, {"y": 10}]}, + ) + def test_no_by_empty_dict(self): self.assertEqual(resolve_keyed_by({"x": {}}, "x", "n"), {"x": {}})