From 8851e481c67f86b36795add0aef967068298cd79 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Thu, 23 Jan 2025 08:23:28 +0100
Subject: [PATCH 01/27] Move dumping test fixtures to `conftest.py`

---
 tests/conftest.py                     | 84 +++++++++++++++++++++++++++
 tests/tools/dumping/test_processes.py | 78 -------------------------
 2 files changed, 84 insertions(+), 78 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 89b0a1bad7..5aa0ef3b89 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -951,3 +951,87 @@ def cat_path() -> Path:
     run_process = subprocess.run(['which', 'cat'], capture_output=True, check=True)
     path = run_process.stdout.decode('utf-8').strip()
     return Path(path)
+
+
+@pytest.fixture
+def generate_calculation_node_io(generate_calculation_node, tmp_path):
+    def _generate_calculation_node_io(entry_point: str | None = None, attach_outputs: bool = True):
+        import io
+
+        import numpy as np
+
+        from aiida.orm import ArrayData, FolderData, SinglefileData
+
+        filename = 'file.txt'
+        filecontent = 'a'
+        singlefiledata_linklabel = 'singlefile'
+        folderdata_linklabel = 'folderdata'
+        folderdata_relpath = Path('relative_path')
+        arraydata_linklabel = 'arraydata'
+
+        singlefiledata_input = SinglefileData.from_string(content=filecontent, filename=filename)
+        # ? Use instance for folderdata
+        folderdata = FolderData()
+        folderdata.put_object_from_filelike(handle=io.StringIO(filecontent), path=str(folderdata_relpath / filename))  # type: ignore[arg-type]
+        arraydata_input = ArrayData(arrays=np.ones(3))
+
+        # Create calculation inputs, outputs
+        calculation_node_inputs = {
+            singlefiledata_linklabel: singlefiledata_input,
+            folderdata_linklabel: folderdata,
+            arraydata_linklabel: arraydata_input,
+        }
+
+        singlefiledata_output = singlefiledata_input.clone()
+        folderdata_output = folderdata.clone()
+
+        if attach_outputs:
+            calculation_outputs = {
+                folderdata_linklabel: folderdata_output,
+                singlefiledata_linklabel: singlefiledata_output,
+            }
+        else:
+            calculation_outputs = None
+
+        # Actually write repository file and then read it in when generating calculation_node
+        (tmp_path / filename).write_text(filecontent)
+
+        calculation_node = generate_calculation_node(
+            repository=tmp_path,
+            inputs=calculation_node_inputs,
+            outputs=calculation_outputs,
+            entry_point=entry_point,
+        )
+        return calculation_node
+
+    return _generate_calculation_node_io
+
+
+@pytest.fixture
+def generate_workchain_node_io():
+    def _generate_workchain_node_io(cj_nodes, store_all: bool = True):
+        """Generate an instance of a `WorkChain` that contains a sub-`WorkChain` and a `Calculation` with file io."""
+        from aiida.orm import WorkflowNode
+
+        wc_node = WorkflowNode()
+        wc_node_sub = WorkflowNode()
+
+        # Add sub-workchain that calls a calculation
+        wc_node_sub.base.links.add_incoming(wc_node, link_type=LinkType.CALL_WORK, link_label='sub_workflow')
+        for cj_node in cj_nodes:
+            cj_node.base.links.add_incoming(wc_node_sub, link_type=LinkType.CALL_CALC, link_label='calculation')
+
+        # Set process_state so that tests don't throw exception for build_call_graph of README generation
+        [cj_node.set_process_state('finished') for cj_node in cj_nodes]
+        wc_node.set_process_state('finished')
+        wc_node_sub.set_process_state('finished')
+
+        # Need to store so that outputs are being dumped
+        if store_all:
+            wc_node.store()
+            wc_node_sub.store()
+            [cj_node.store() for cj_node in cj_nodes]
+
+        return wc_node
+
+    return _generate_workchain_node_io
diff --git a/tests/tools/dumping/test_processes.py b/tests/tools/dumping/test_processes.py
index accfbd17d2..88dad0323e 100644
--- a/tests/tools/dumping/test_processes.py
+++ b/tests/tools/dumping/test_processes.py
@@ -10,13 +10,11 @@
 
 from __future__ import annotations
 
-import io
 import shutil
 from pathlib import Path
 
 import pytest
 
-from aiida.common.links import LinkType
 from aiida.tools.dumping.processes import ProcessDumper
 
 # Non-AiiDA variables
@@ -38,82 +36,6 @@
 node_metadata_file = '.aiida_node_metadata.yaml'
 
 
-# Helper functions to generate the actual `WorkflowNode`s and `CalculationNode`s used for testing
-@pytest.fixture
-def generate_calculation_node_io(generate_calculation_node, tmp_path):
-    def _generate_calculation_node_io(entry_point: str | None = None, attach_outputs: bool = True):
-        import numpy as np
-
-        from aiida.orm import ArrayData, FolderData, SinglefileData
-
-        singlefiledata_input = SinglefileData.from_string(content=filecontent, filename=filename)
-        # ? Use instance for folderdata
-        folderdata = FolderData()
-        folderdata.put_object_from_filelike(handle=io.StringIO(filecontent), path=str(folderdata_relpath / filename))  # type: ignore[arg-type]
-        arraydata_input = ArrayData(arrays=np.ones(3))
-
-        # Create calculation inputs, outputs
-        calculation_node_inputs = {
-            singlefiledata_linklabel: singlefiledata_input,
-            folderdata_linklabel: folderdata,
-            arraydata_linklabel: arraydata_input,
-        }
-
-        singlefiledata_output = singlefiledata_input.clone()
-        folderdata_output = folderdata.clone()
-
-        if attach_outputs:
-            calculation_outputs = {
-                folderdata_linklabel: folderdata_output,
-                singlefiledata_linklabel: singlefiledata_output,
-            }
-        else:
-            calculation_outputs = None
-
-        # Actually write repository file and then read it in when generating calculation_node
-        (tmp_path / filename).write_text(filecontent)
-
-        calculation_node = generate_calculation_node(
-            repository=tmp_path,
-            inputs=calculation_node_inputs,
-            outputs=calculation_outputs,
-            entry_point=entry_point,
-        )
-        return calculation_node
-
-    return _generate_calculation_node_io
-
-
-@pytest.fixture
-def generate_workchain_node_io():
-    def _generate_workchain_node_io(cj_nodes, store_all: bool = True):
-        """Generate an instance of a `WorkChain` that contains a sub-`WorkChain` and a `Calculation` with file io."""
-        from aiida.orm import WorkflowNode
-
-        wc_node = WorkflowNode()
-        wc_node_sub = WorkflowNode()
-
-        # Add sub-workchain that calls a calculation
-        wc_node_sub.base.links.add_incoming(wc_node, link_type=LinkType.CALL_WORK, link_label='sub_workflow')
-        for cj_node in cj_nodes:
-            cj_node.base.links.add_incoming(wc_node_sub, link_type=LinkType.CALL_CALC, link_label='calculation')
-
-        # Set process_state so that tests don't throw exception for build_call_graph of README generation
-        [cj_node.set_process_state('finished') for cj_node in cj_nodes]
-        wc_node.set_process_state('finished')
-        wc_node_sub.set_process_state('finished')
-
-        # Need to store so that outputs are being dumped
-        if store_all:
-            wc_node.store()
-            wc_node_sub.store()
-            [cj_node.store() for cj_node in cj_nodes]
-
-        return wc_node
-
-    return _generate_workchain_node_io
-
-
 # Only test top-level actions, like path and README creation
 # Other things tested via `_dump_workflow` and `_dump_calculation`
 def test_dump(generate_calculation_node_io, generate_workchain_node_io, tmp_path):

From 98ea050a929a42df265a67db8857d860c976a391 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Thu, 23 Jan 2025 09:52:56 +0100
Subject: [PATCH 02/27] First working version with `DataDumper` and
 `CollectionDumper`

Take it from here
---
 src/aiida/cmdline/commands/cmd_process.py     | 124 ++++---
 src/aiida/cmdline/commands/cmd_profile.py     | 282 +++++++++++++++
 src/aiida/cmdline/params/options/main.py      | 277 ++++++++++++++
 src/aiida/tools/dumping/__init__.py           |   6 +-
 src/aiida/tools/dumping/collection.py         | 338 ++++++++++++++++++
 src/aiida/tools/dumping/data.py               | 292 +++++++++++++++
 src/aiida/tools/dumping/parser.py             |  56 +++
 src/aiida/tools/dumping/processes.py          | 280 ++++++++++++---
 src/aiida/tools/dumping/rich.py               |  86 +++++
 src/aiida/tools/dumping/test-config-file.yaml |  23 ++
 src/aiida/tools/dumping/utils.py              |  94 +++++
 tests/tools/dumping/test_processes.py         |   1 +
 12 files changed, 1756 insertions(+), 103 deletions(-)
 create mode 100644 src/aiida/tools/dumping/collection.py
 create mode 100644 src/aiida/tools/dumping/data.py
 create mode 100644 src/aiida/tools/dumping/parser.py
 create mode 100644 src/aiida/tools/dumping/rich.py
 create mode 100644 src/aiida/tools/dumping/test-config-file.yaml

diff --git a/src/aiida/cmdline/commands/cmd_process.py b/src/aiida/cmdline/commands/cmd_process.py
index 5ad7c5d53c..ed0b3ccdd0 100644
--- a/src/aiida/cmdline/commands/cmd_process.py
+++ b/src/aiida/cmdline/commands/cmd_process.py
@@ -558,42 +558,19 @@ def process_repair(manager, broker, dry_run):
             echo.echo_report(f'Revived process `{pid}`')
 
 
-@verdi_process.command('dump')
+@verdi_process.command("dump")
 @arguments.PROCESS()
 @options.PATH()
 @options.OVERWRITE()
-@click.option(
-    '--include-inputs/--exclude-inputs',
-    default=True,
-    show_default=True,
-    help='Include the linked input nodes of the `CalculationNode`(s).',
-)
-@click.option(
-    '--include-outputs/--exclude-outputs',
-    default=False,
-    show_default=True,
-    help='Include the linked output nodes of the `CalculationNode`(s).',
-)
-@click.option(
-    '--include-attributes/--exclude-attributes',
-    default=True,
-    show_default=True,
-    help='Include attributes in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
-)
-@click.option(
-    '--include-extras/--exclude-extras',
-    default=True,
-    show_default=True,
-    help='Include extras in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
-)
-@click.option(
-    '-f',
-    '--flat',
-    is_flag=True,
-    default=False,
-    show_default=True,
-    help='Dump files in a flat directory for every step of the workflow.',
-)
+@options.FLAT()
+@options.INCLUDE_INPUTS()
+@options.INCLUDE_OUTPUTS()
+@options.INCLUDE_ATTRIBUTES()
+@options.INCLUDE_EXTRAS()
+@options.ALSO_RAW()
+@options.ALSO_RICH()
+@options.RICH_SPEC()
+@options.RICH_DUMP_ALL()
 @click.option(
     '--dump-unsealed',
     is_flag=True,
@@ -602,17 +579,23 @@ def process_repair(manager, broker, dry_run):
     help='Also allow the dumping of unsealed process nodes.',
 )
 @options.INCREMENTAL()
+# TODO: Also add CONFIG_FILE option here
+# TODO: Currently, setting rich options is not supported here directly
 def process_dump(
     process,
     path,
     overwrite,
+    flat,
     include_inputs,
     include_outputs,
     include_attributes,
     include_extras,
-    flat,
     dump_unsealed,
     incremental,
+    also_raw,
+    also_rich,
+    rich_spec,
+    rich_dump_all,
 ) -> None:
     """Dump process input and output files to disk.
 
@@ -630,29 +613,74 @@ def process_dump(
     node data for further inspection.
     """
 
-    from aiida.tools.archive.exceptions import ExportValidationError
+    from aiida.tools.dumping.data import DataDumper
     from aiida.tools.dumping.processes import ProcessDumper
+    from aiida.tools.archive.exceptions import ExportValidationError
+
+    # from aiida.tools.dumping.utils import validate_rich_options
+    from aiida.tools.dumping.rich import rich_from_cli
+
+    processdumper_kwargs = {
+        "include_inputs": include_inputs,
+        "include_outputs": include_outputs,
+        "include_attributes": include_attributes,
+        "include_extras": include_extras,
+        "flat": flat,
+        "dump_unsealed": dump_unsealed,
+        "incremental": incremental,
+    }
+
+    rich_kwargs = {
+        "rich_dump_all": rich_dump_all,
+    }
+
+    datadumper_kwargs = {
+        "also_raw": also_raw,
+        "also_rich": also_rich,
+    }
+
+    # if also_rich:
+    #     try:
+    #         validate_rich_options(
+    #             rich_options=rich_options, rich_config_file=rich_config_file
+    #         )
+    #     except ValueError as exc:
+    #         echo.echo_critical(f"{exc!s}")
+
+    if rich_spec is not None:
+        rich_spec_dict = rich_from_cli(rich_spec=rich_spec, **rich_kwargs)
+    else:
+        rich_spec_dict = {}
+
+    data_dumper = DataDumper(
+        overwrite=overwrite,
+        rich_spec_dict=rich_spec_dict,
+        **datadumper_kwargs,
+        **rich_kwargs,
+    )
 
     process_dumper = ProcessDumper(
-        include_inputs=include_inputs,
-        include_outputs=include_outputs,
-        include_attributes=include_attributes,
-        include_extras=include_extras,
         overwrite=overwrite,
-        flat=flat,
-        dump_unsealed=dump_unsealed,
-        incremental=incremental,
+        **processdumper_kwargs,
+        **rich_kwargs,
+        data_dumper=data_dumper,
     )
 
     try:
-        dump_path = process_dumper.dump(process_node=process, output_path=path)
+        dump_path = process_dumper.dump(
+            process_node=process,
+            output_path=path,
+        )
+        echo.echo_success(
+            f"Raw files for {process.__class__.__name__} <{process.pk}> dumped into folder `{dump_path}`."
+        )
     except FileExistsError:
         echo.echo_critical(
-            'Dumping directory exists and overwrite is False. Set overwrite to True, or delete directory manually.'
+            "Dumping directory exists and overwrite is False. Set overwrite to True, or delete directory manually."
         )
     except ExportValidationError as e:
-        echo.echo_critical(f'{e!s}')
+        echo.echo_critical(f"{e!s}")
     except Exception as e:
-        echo.echo_critical(f'Unexpected error while dumping {process.__class__.__name__} <{process.pk}>:\n ({e!s}).')
-
-    echo.echo_success(f'Raw files for {process.__class__.__name__} <{process.pk}> dumped into folder `{dump_path}`.')
+        echo.echo_critical(
+            f"Unexpected error while dumping {process.__class__.__name__} <{process.pk}>:\n ({e!s})."
+        )
diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 7cb0e018ae..f035977f2c 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -19,6 +19,7 @@
 from aiida.cmdline.utils import defaults, echo
 from aiida.common import exceptions
 from aiida.manage.configuration import Profile, create_profile, get_config
+from aiida.tools.dumping import CollectionDumper, DataDumper, ProcessDumper
 
 
 @verdi.group('profile')
@@ -269,3 +270,284 @@ def profile_delete(force, delete_data, profiles):
 
         get_config().delete_profile(profile.name, delete_storage=delete_data)
         echo.echo_success(f'Profile `{profile.name}` was deleted.')
+
+
+# ? Specify groups via giving the groups, or just enabling "groups" and then all are dumped?
+# ? Provide some mechanism to allow for both, e.g. if no argument is provided, all groups are dumped
+@verdi_profile.command('mirror')
+@options.PATH()
+@options.OVERWRITE()
+@options.INCREMENTAL()
+@options.ORGANIZE_BY_GROUPS()
+@options.DRY_RUN()
+@options.DUMP_PROCESSES()
+@options.ONLY_TOP_LEVEL_WORKFLOWS()
+@options.DUMP_DATA()
+@options.DEDUPLICATE()
+@options.DATA_HIDDEN()
+@options.ALSO_RAW()
+@options.ALSO_RICH()
+@options.INCLUDE_INPUTS()
+@options.INCLUDE_OUTPUTS()
+@options.INCLUDE_ATTRIBUTES()
+@options.INCLUDE_EXTRAS()
+@options.FLAT()
+@options.RICH_SPEC()
+@options.RICH_DUMP_ALL()
+@options.DUMP_CONFIG_FILE()
+@options.NODES()
+@options.GROUPS()
+@click.pass_context
+def storage_mirror(
+    ctx,
+    path,
+    overwrite,
+    incremental,
+    organize_by_groups,
+    dry_run,
+    dump_processes,
+    only_top_level_workflows,
+    dump_data,
+    deduplicate,
+    data_hidden,
+    also_raw,
+    also_rich,
+    include_inputs,
+    include_outputs,
+    include_attributes,
+    include_extras,
+    flat,
+    rich_spec,
+    rich_dump_all,
+    dump_config_file,
+    nodes,
+    groups,
+):
+    """Dump all data in an AiiDA profile's storage to disk."""
+
+
+    from aiida import orm
+    from aiida.tools.dumping.parser import DumpConfigParser
+    from aiida.tools.dumping.rich import (
+        DEFAULT_CORE_EXPORT_MAPPING,
+        rich_from_cli,
+        rich_from_config,
+    )
+    from aiida.tools.dumping.utils import prepare_dump_path
+
+    profile = ctx.obj['profile']
+
+    # from aiida.manage.manager import get_manager
+
+    # manager = get_manager()
+    # storage = manager.get_profile_storage()
+
+    # with spinner():
+    #     data = storage.get_info(detailed=True)
+
+    # echo.echo_dictionary(data, sort_keys=False, fmt='yaml')
+
+    # print(f"Profile name: {profile_name}")
+
+    # # TODO: export computers alone, and groups
+    # t1 = time.time()
+    # qb = orm.QueryBuilder().append(orm.Node, tag='node', project=['uuid'])
+    # all_uuids = qb.all(flat=True)
+    # print(f"All UUIDs retrieved in {time.time() - t1:6.3f} s.")
+
+    # t1 = time.time()
+    # with open('all-source-uuids.json', 'w') as fhandle:
+    #     json.dump({'profile_name': profile_name, 'uuids': all_uuids}, fhandle)
+    # print(f"{len(all_uuids)} UUIDs written in {time.time() - t1:6.3f} s.")
+
+    if nodes and groups:
+        echo.echo_critical('`nodes` and `groups` specified. Set only one.')
+    # if all_entries and groups:
+    #     echo.echo_critical('`all_entries` and `groups` specified. Set only one.')
+
+    if dump_config_file is None:
+        general_kwargs = {
+            'path': path,
+            'overwrite': overwrite,
+            'incremental': incremental,
+            'dry_run': dry_run,
+        }
+
+        processdumper_kwargs = {
+            'include_inputs': include_inputs,
+            'include_outputs': include_outputs,
+            'include_attributes': include_attributes,
+            'include_extras': include_extras,
+            'flat': flat,
+            # "calculations_hidden": calculations_hidden
+        }
+
+        datadumper_kwargs = {
+            'also_raw': also_raw,
+            'also_rich': also_rich,
+            'data_hidden': data_hidden,
+        }
+
+        collection_kwargs = {
+            'should_dump_processes': dump_processes,
+            'should_dump_data': dump_data,
+            'only_top_level_workflows': only_top_level_workflows,
+        }
+
+        rich_kwargs = {
+            'rich_dump_all': rich_dump_all,
+        }
+
+        if rich_spec is not None:
+            rich_spec_dict = rich_from_cli(rich_spec=rich_spec, **rich_kwargs)
+        else:
+            rich_spec_dict = DEFAULT_CORE_EXPORT_MAPPING
+
+    # TODO: Also allow for mixing. Currently one can _only_ specify either the config file, or the arguments on the
+    # TODO: command line
+    else:
+        kwarg_dicts_from_config = DumpConfigParser.parse_config_file(dump_config_file)
+
+        general_kwargs = kwarg_dicts_from_config['general_kwargs']
+        processdumper_kwargs = kwarg_dicts_from_config['processdumper_kwargs']
+        datadumper_kwargs = kwarg_dicts_from_config['datadumper_kwargs']
+        collection_kwargs = kwarg_dicts_from_config['collection_kwargs']
+        rich_kwargs = kwarg_dicts_from_config['rich_kwargs']
+
+        rich_spec_dict = rich_from_config(kwarg_dicts_from_config['rich_spec'], **rich_kwargs)
+
+    # Obtain these specifically for easy access and modifications
+    path = general_kwargs['path']
+    overwrite = general_kwargs['overwrite']
+    dry_run = general_kwargs['dry_run']
+    incremental = general_kwargs['incremental']
+
+    if not overwrite and incremental:
+        echo.echo_report('Overwrite set to false, but incremental dumping selected. Will keep existing directories.')
+
+    if not str(path).endswith(profile.name):
+        path /= profile.name
+
+    # TODO: Implement proper dry-run feature
+    dry_run_message = f"Dry run for dumping of profile `{profile.name}`'s data at path: `{path}`.\n"
+    dry_run_message += 'Only directories will be created.'
+
+    if dry_run or (not collection_kwargs['should_dump_processes'] and not collection_kwargs['should_dump_data']):
+        echo.echo_report(dry_run_message)
+        return
+
+    else:
+        echo.echo_report(f"Dumping of profile `{profile.name}`'s data at path: `{path}`.")
+
+    SAFEGUARD_FILE = '.verdi_storage_dump'  # noqa: N806
+
+    try:
+        prepare_dump_path(
+            path_to_validate=path,
+            overwrite=overwrite,
+            incremental=incremental,
+            safeguard_file=SAFEGUARD_FILE,
+        )
+    except FileExistsError as exc:
+        echo.echo_critical(str(exc))
+
+    (path / SAFEGUARD_FILE).touch()
+
+    data_dumper = DataDumper(
+        dump_parent_path=path,
+        overwrite=overwrite,
+        incremental=incremental,
+        rich_spec_dict=rich_spec_dict,
+        **datadumper_kwargs,
+    )
+    # dumper_pretty_print(data_dumper)
+
+    process_dumper = ProcessDumper(
+        dump_parent_path=path,
+        overwrite=overwrite,
+        incremental=incremental,
+        data_dumper=data_dumper,
+        **processdumper_kwargs,
+    )
+    # dumper_pretty_print(process_dumper)
+
+    from aiida.tools.dumping.incremental import DumpNodeCollector
+
+    dumpnodecollector = DumpNodeCollector(dump_parent_path=path)
+
+    dumpnodecollector.update_uuids_before_dump()
+    dumpnodecollector.create_organized_uuid_dicts()
+    # dumpnodecollector.populate_uuid_dict()
+
+    # raise SystemExit()
+
+    # TODO: Possibly implement specifying specific computers
+    # TODO: Although, users could just specify the relevant nodes
+    # TODO: Also add option to specify node types via entry points
+
+    # === Dump the data that is not associated with any group ===
+    if not groups:
+        collection_dumper = CollectionDumper(
+            dump_parent_path=path,
+            output_path=path,
+            overwrite=overwrite,
+            incremental=incremental,
+            nodes=nodes,
+            **collection_kwargs,
+            **rich_kwargs,
+            data_dumper=data_dumper,
+            process_dumper=process_dumper,
+            deduplicate=deduplicate,
+        )
+        collection_dumper.create_entity_counter()
+        # dumper_pretty_print(collection_dumper, include_private_and_dunder=False)
+
+        if dump_processes and collection_dumper._should_dump_processes():
+            echo.echo_report(f'Dumping processes not in any group for profile `{profile.name}`...')
+            collection_dumper.dump_processes()
+        if dump_data:
+            if not also_rich and not also_raw:
+                echo.echo_critical('`--dump-data was given, but neither --also-raw or --also-rich specified.')
+            echo.echo_report(f'Dumping data not in any group for profile {profile.name}...')
+
+            collection_dumper.dump_data_rich()
+        # collection_dumper.dump_plugin_data()
+
+    # === Dump data per-group if Groups exist in profile or are selected ===
+    # TODO: Invert default behavior here, as I typically want to dump all entries
+    # TODO: Possibly define a new click option instead
+    # all_entries = not all_entries
+    if not groups:  # and all_entries:
+        groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
+
+    if groups is not None and not nodes:
+        for group in groups:
+            if organize_by_groups:
+                group_subdir = Path(*group.type_string.split('.'))
+                group_path = path / 'groups' / group_subdir / group.label
+            else:
+                group_path = path
+
+            collection_dumper = CollectionDumper(
+                dump_parent_path=path,
+                output_path=group_path,
+                overwrite=overwrite,
+                incremental=incremental,
+                group=group,
+                **collection_kwargs,
+                **rich_kwargs,
+                process_dumper=process_dumper,
+                data_dumper=data_dumper,
+            )
+            collection_dumper.create_entity_counter()
+            if dump_processes:
+                # The additional `_should_dump_processes` check here ensures that no reporting like
+                # "Dumping processes for group `SSSP/1.3/PBE/efficiency`" is printed for groups that
+                # don't contain processes
+                if collection_dumper._should_dump_processes():
+                    echo.echo_report(f'Dumping processes for group `{group.label}`...')
+                    collection_dumper.dump_processes()
+            if dump_data:
+                echo.echo_report(f'Dumping data for group `{group.label}`...')
+                collection_dumper.dump_data_rich()
+                # collection_dumper.dump_plugin_data()
\ No newline at end of file
diff --git a/src/aiida/cmdline/params/options/main.py b/src/aiida/cmdline/params/options/main.py
index c2ce719375..6f19a3c465 100644
--- a/src/aiida/cmdline/params/options/main.py
+++ b/src/aiida/cmdline/params/options/main.py
@@ -27,6 +27,8 @@
     'ALL',
     'ALL_STATES',
     'ALL_USERS',
+    'ALSO_RAW',
+    'ALSO_RICH',
     'APPEND_TEXT',
     'ARCHIVE_FORMAT',
     'BROKER_HOST',
@@ -44,6 +46,7 @@
     'COMPUTERS',
     'CONFIG_FILE',
     'DATA',
+    'DATA_HIDDEN',
     'DATUM',
     'DB_BACKEND',
     'DB_ENGINE',
@@ -53,13 +56,18 @@
     'DB_PORT',
     'DB_USERNAME',
     'DEBUG',
+    'DEDUPLICATE',
     'DESCRIPTION',
     'DICT_FORMAT',
     'DICT_KEYS',
     'DRY_RUN',
+    'DUMP_CONFIG_FILE',
+    'DUMP_DATA',
+    'DUMP_PROCESSES',
     'EXIT_STATUS',
     'EXPORT_FORMAT',
     'FAILED',
+    'FLAT',
     'FORCE',
     'FORMULA_MODE',
     'FREQUENCY',
@@ -68,6 +76,10 @@
     'GROUP_CLEAR',
     'HOSTNAME',
     'IDENTIFIER',
+    'INCLUDE_ATTRIBUTES',
+    'INCLUDE_EXTRAS',
+    'INCLUDE_INPUTS',
+    'INCLUDE_OUTPUTS',
     'INCREMENTAL',
     'INPUT_FORMAT',
     'INPUT_PLUGIN',
@@ -78,8 +90,10 @@
     'NODES',
     'NON_INTERACTIVE',
     'OLDER_THAN',
+    'ONLY_TOP_LEVEL_WORKFLOWS',
     'ORDER_BY',
     'ORDER_DIRECTION',
+    'ORGANIZE_BY_GROUPS',
     'OVERWRITE',
     'PAST_DAYS',
     'PATH',
@@ -95,6 +109,8 @@
     'PROJECT',
     'RAW',
     'REPOSITORY_PATH',
+    'RICH_DUMP_ALL',
+    'RICH_SPEC',
     'SCHEDULER',
     'SILENT',
     'SORT',
@@ -783,6 +799,182 @@ def set_log_level(ctx, _param, value):
     show_default=True,
 )
 
+DEDUPLICATE = OverridableOption(
+    '--deduplicate/--no-deduplicate',
+    is_flag=True,
+    default=False,
+    show_default=True,
+    help='',
+)
+
+DUMP_PROCESSES = OverridableOption(
+    '--dump-processes/--no-dump-processes',
+    is_flag=True,
+    default=True,
+    show_default=True,
+    help='Dump process data.',
+)
+
+DUMP_DATA = OverridableOption(
+    '--dump-data/--no-dump-data',
+    is_flag=True,
+    default=True,
+    type=bool,
+    show_default=True,
+    help='Dump data nodes in a dedicated directory.',
+)
+
+DATA_HIDDEN = OverridableOption(
+    '--data-hidden/--data-non-hidden',
+    is_flag=True,
+    default=True,
+    show_default=True,
+    help='Dump all `orm.Data` in the hidden directory and link to there.',
+)
+
+ALSO_RAW = OverridableOption(
+    '--also-raw/--not-also-raw',
+    is_flag=True,
+    default=False,
+    show_default=True,
+    help='Dump the `attributes` of all nodes related to the Process.',
+)
+
+ALSO_RICH = OverridableOption(
+    '--also-rich/--not-also-rich',
+    is_flag=True,
+    default=True,
+    show_default=True,
+    help='Dump also nicely prepared outputs, e.g. CIF for structures or PDF image for bands.',
+)
+
+RICH_SPEC = OverridableOption(
+    '--rich-spec',
+    default=None,
+    type=str,
+    help='Specifications for rich data dumping.',
+)
+
+DUMP_CONFIG_FILE = OverridableOption(
+    '--dump-config-file',
+    default=None,
+    type=types.FileOrUrl(),
+    help='Provide dumping options via a config file in YAML format.',
+)
+
+RICH_DUMP_ALL = OverridableOption(
+    '--rich-dump-all/--no-rich-dump-all',
+    default=True,
+    is_flag=True,
+    type=bool,
+    show_default=True,
+    help='If a rich specification is provided, this triggers if all other Data nodes should also be dumped or not.',
+)
+
+ORGANIZE_BY_GROUPS = OverridableOption(
+    '--organize-by-groups/--no-organize-by-groups',
+    default=True,
+    is_flag=True,
+    type=bool,
+    show_default=True,
+    help='If the collection of nodes to be dumped is organized in groups, reproduce its hierarchy.',
+)
+
+INCLUDE_INPUTS = OverridableOption(
+    '--include-inputs/--exclude-inputs',
+    default=True,
+    show_default=True,
+    help='Include the linked input nodes of the `CalculationNode`(s).',
+)
+
+INCLUDE_OUTPUTS = OverridableOption(
+    '--include-outputs/--exclude-outputs',
+    default=False,
+    show_default=True,
+    help='Include the linked output nodes of the `CalculationNode`(s).',
+)
+
+INCLUDE_ATTRIBUTES = OverridableOption(
+    '--include-attributes/--exclude-attributes',
+    default=True,
+    show_default=True,
+    help='Include attributes in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
+)
+
+INCLUDE_EXTRAS = OverridableOption(
+    '--include-extras/--exclude-extras',
+    default=True,
+    show_default=True,
+    help='Include extras in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
+)
+
+FLAT = OverridableOption(
+    '-f',
+    '--flat',
+    is_flag=True,
+    default=False,
+    help='Dump files in a flat directory for every step of a workflow.',
+)
+
+ONLY_TOP_LEVEL_WORKFLOWS = OverridableOption(
+    '--only-top-level-workflows/--not-only-top-level-workflows',
+    is_flag=True,
+    default=True,
+    type=bool,
+    show_default=True,
+    help='Dump only the top-level workflows in their own dedicated directories.',
+)
+
+DUMP_PROCESSES = OverridableOption(
+    '--dump-processes/--no-dump-processes',
+    is_flag=True,
+    default=True,
+    show_default=True,
+    help='Dump process data.',
+)
+
+DUMP_DATA = OverridableOption(
+    '--dump-data/--no-dump-data',
+    is_flag=True,
+    default=True,
+    type=bool,
+    show_default=True,
+    help='Dump data nodes in a dedicated directory.',
+)
+
+CALCULATIONS_HIDDEN = OverridableOption(
+    '--calculations-hidden/--calculations-non-hidden',
+    is_flag=True,
+    default=True,
+    type=bool,
+    show_default=True,
+    help='Dump all `orm.CalculationNode`s in the hidden directory and link to there.',
+)
+
+DATA_HIDDEN = OverridableOption(
+    '--data-hidden/--data-non-hidden',
+    is_flag=True,
+    default=True,
+    show_default=True,
+    help='Dump all `orm.Data` in the hidden directory and link to there.',
+)
+
+ALSO_RAW = OverridableOption(
+    '--also-raw/--not-also-raw',
+    is_flag=True,
+    default=False,
+    show_default=True,
+    help='Dump the `attributes` of all nodes related to the Process.',
+)
+
+ALSO_RICH = OverridableOption(
+    '--also-rich/--not-also-rich',
+    is_flag=True,
+    default=True,
+    show_default=True,
+    help='Dump also nicely prepared outputs, e.g. CIF for structures or PDF image for bands.',
+)
+
 INCREMENTAL = OverridableOption(
     '--incremental/--no-incremental',
     is_flag=True,
@@ -790,3 +982,88 @@ def set_log_level(ctx, _param, value):
     show_default=True,
     help="Incremental dumping of data to disk. Doesn't require using overwrite to clean previous directories.",
 )
+
+RICH_OPTIONS = OverridableOption(
+    '--rich-options',
+    default=None,
+    type=str,
+    help='Specifications for rich data dumping.',
+)
+
+DUMP_CONFIG_FILE = OverridableOption(
+    '--dump-config-file',
+    default=None,
+    type=types.FileOrUrl(),
+    help='Provide dumping options via a config file in YAML format.',
+)
+
+RICH_DUMP_ALL = OverridableOption(
+    '--rich-dump-all/--no-rich-dump-all',
+    default=True,
+    is_flag=True,
+    type=bool,
+    show_default=True,
+    help='If a rich specification is provided, this triggers if all other Data nodes should also be dumped or not.',
+)
+
+ORGANIZE_BY_GROUPS = OverridableOption(
+    '--organize-by-groups/--no-organize-by-groups',
+    default=True,
+    is_flag=True,
+    type=bool,
+    show_default=True,
+    help='If the collection of nodes to be dumped is organized in groups, reproduce its hierarchy.',
+)
+
+INCLUDE_INPUTS = OverridableOption(
+    '--include-inputs/--exclude-inputs',
+    default=True,
+    show_default=True,
+    help='Include the linked input nodes of the `CalculationNode`(s).',
+)
+
+INCLUDE_OUTPUTS = OverridableOption(
+    '--include-outputs/--exclude-outputs',
+    default=False,
+    show_default=True,
+    help='Include the linked output nodes of the `CalculationNode`(s).',
+)
+
+INCLUDE_ATTRIBUTES = OverridableOption(
+    '--include-attributes/--exclude-attributes',
+    default=True,
+    show_default=True,
+    help='Include attributes in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
+)
+
+INCLUDE_EXTRAS = OverridableOption(
+    '--include-extras/--exclude-extras',
+    default=True,
+    show_default=True,
+    help='Include extras in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
+)
+
+FLAT = OverridableOption(
+    '-f',
+    '--flat',
+    is_flag=True,
+    default=False,
+    help='Dump files in a flat directory for every step of a workflow.',
+)
+
+ONLY_TOP_LEVEL_WORKFLOWS = OverridableOption(
+    '--only-top-level-workflows/--not-only-top-level-workflows',
+    is_flag=True,
+    default=True,
+    type=bool,
+    show_default=True,
+    help='Dump only the top-level workflows in their own dedicated directories.',
+)
+
+INCREMENTAL = OverridableOption(
+    '--incremental/--non-incremental',
+    is_flag=True,
+    default=True,
+    show_default=True,
+    help='Dump files incrementally when dumping collections of data to disk.',
+)
diff --git a/src/aiida/tools/dumping/__init__.py b/src/aiida/tools/dumping/__init__.py
index a746fa171e..49713c9b8a 100644
--- a/src/aiida/tools/dumping/__init__.py
+++ b/src/aiida/tools/dumping/__init__.py
@@ -8,4 +8,8 @@
 ###########################################################################
 """Modules related to the dumping of AiiDA data."""
 
-__all__ = ('processes',)
+from .collection import CollectionDumper
+from .data import DataDumper
+from .processes import ProcessDumper
+
+__all__ = ('CollectionDumper', 'DataDumper', 'ProcessDumper')
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
new file mode 100644
index 0000000000..169f5b3862
--- /dev/null
+++ b/src/aiida/tools/dumping/collection.py
@@ -0,0 +1,338 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+"""Functionality for dumping of a Collection of AiiDA ORM entities."""
+
+from __future__ import annotations
+
+import contextlib
+import itertools as it
+import logging
+import os
+from collections import Counter
+from pathlib import Path
+
+from aiida import orm
+from aiida.tools.dumping.data import DataDumper
+from aiida.tools.dumping.processes import ProcessDumper
+from aiida.tools.dumping.utils import sanitize_file_extension
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_PROCESSES_TO_DUMP = [orm.CalculationNode, orm.WorkflowNode]
+DEFAULT_DATA_TO_DUMP = [orm.StructureData, orm.Code, orm.Computer, orm.BandsData, orm.UpfData]
+# DEFAULT_COLLECTIONS_TO_DUMP ??
+DEFAULT_ENTITIES_TO_DUMP = DEFAULT_PROCESSES_TO_DUMP + DEFAULT_DATA_TO_DUMP
+
+
+# ! This class is instantiated once for every group, or once for the full profile
+class CollectionDumper:
+    def __init__(
+        self,
+        *args,
+        dump_parent_path: Path = Path().cwd(),
+        output_path: Path = Path().cwd(),
+        overwrite: bool = False,
+        incremental: bool = True,
+        should_dump_processes: bool = False,
+        should_dump_data: bool = False,
+        only_top_level_workflows: bool = True,
+        group: orm.Group | None = None,
+        nodes: set = {},
+        process_dumper: ProcessDumper | None = None,
+        data_dumper: DataDumper | None = None,
+        **kwargs,
+    ):
+        self.args = args
+        self.dump_parent_path = dump_parent_path
+        self.output_path = output_path
+        self.overwrite = overwrite
+        self.incremental = incremental
+        self.should_dump_processes = should_dump_processes
+        self.should_dump_data = should_dump_data
+        self.only_top_level_workflows = only_top_level_workflows
+        self.nodes = nodes
+        self.process_dumper = process_dumper
+        self.data_dumper = data_dumper
+        self.kwargs = kwargs
+
+        self.hidden_aiida_path = dump_parent_path / '.aiida-raw-data'
+
+        # Allow passing of group via label
+        if isinstance(group, str):
+            group = orm.Group.get(self.group)
+        self.group = group
+
+        self.output_path = output_path
+
+        if not hasattr(self, 'entity_counter'):
+            self.create_entity_counter()
+
+    def create_entity_counter(self) -> Counter:
+        entity_counter = Counter()
+        if self.group is not None:
+            # If the group only has one WorkChain assigned to it, this will only return a count of 1 for the
+            # WorkChainNode, nothing more, that is, it doesn't work recursively.
+            nodes = self.group.nodes
+        elif self.nodes is not None:
+            nodes = self.nodes
+        else:
+            nodes = orm.QueryBuilder().append(orm.Node).all(flat=True)
+
+        # Iterate over all the entities in the group
+        for node in nodes:
+            # Count the type string of each entity
+            entity_counter[node.__class__] += 1
+
+        # Convert the Counter to a dictionary (optional)
+        self.entity_counter = entity_counter
+
+        return entity_counter
+
+    def get_collection_nodes(self):
+        if self.nodes:
+            self.collection_nodes = self.nodes
+
+        # if hasattr(self, 'collection_nodes'):
+        #     return self.collection_nodes
+
+        # Get all nodes that are in the group
+        if self.group is not None:
+            nodes = list(self.group.nodes)
+
+        # Get all nodes that are _not_ in any group
+        else:
+            groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
+            nodes_in_groups = [node.pk for group in groups for node in group.nodes]
+            # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
+            # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
+            sub_nodes_in_groups = list(
+                it.chain(
+                    *[
+                        orm.load_node(node).called_descendants
+                        for node in nodes_in_groups
+                        if isinstance(orm.load_node(node), orm.WorkflowNode)
+                    ]
+                )
+            )
+            sub_nodes_in_groups = [node.pk for node in sub_nodes_in_groups]
+            nodes_in_groups = nodes_in_groups + sub_nodes_in_groups
+
+            profile_nodes = orm.QueryBuilder().append(orm.Node, project=['pk']).all(flat=True)
+            nodes = [profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups]
+            nodes = [orm.load_node(node) for node in nodes]
+
+        self.collection_nodes = nodes
+
+        return nodes
+
+    def _should_dump_processes(self) -> bool:
+        if not self.nodes:
+            return (
+                sum(
+                    self.entity_counter.get(orm_process_class, 0)
+                    for orm_process_class in [
+                        orm.CalcJobNode,
+                        orm.CalcFunctionNode,
+                        orm.WorkChainNode,
+                        orm.WorkFunctionNode,
+                        orm.ProcessNode,
+                    ]
+                )
+                > 0
+            )
+        else:
+            return len([node for node in self.nodes if isinstance(node, orm.ProcessNode)]) > 0
+
+    def _dump_calculations_hidden(self, calculations):
+        # ? Dump only top-level workchains, as that includes sub-workchains already
+
+        for calculation in calculations:
+            calculation_dumper = self.process_dumper
+
+            calculation_dump_path = self.hidden_aiida_path / 'calculations' / calculation.uuid
+
+            # if not self.dry_run:
+            # with contextlib.suppress(FileExistsError):
+            try:
+                calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
+            except:
+                raise
+
+            # # To make development quicker
+            # if iworkflow_ > 1:
+            #     break
+
+    def _dump_link_workflows(self, workflows, link_calculations: bool = True):
+        # workflow_nodes = get_nodes_from_db(aiida_node_type=orm.WorkflowNode, with_group=self.group, flat=True)
+        for workflow in workflows:
+            workflow_dumper = self.process_dumper
+
+            link_calculations_dir = self.hidden_aiida_path / 'calculations'
+            # TODO: If the GroupDumper is called from somewhere else outside, prefix the path with `groups/core` etc
+            workflow_dump_path = (
+                self.output_path
+                / 'workflows'
+                / workflow_dumper._generate_default_dump_path(process_node=workflow, prefix=None)
+            )
+            # logger.report(f'WORKFLOW_DUMP_PATH: {workflow_dump_path}')
+
+            workflow_dumper._dump_workflow(
+                workflow_node=workflow,
+                output_path=workflow_dump_path,
+                link_calculations=link_calculations,
+                link_calculations_dir=link_calculations_dir,
+            )
+
+    def _link_calculations_hidden(self, calculations):
+        # calculation_nodes = get_nodes_from_db(aiida_node_type=orm.CalculationNode, with_group=self.group, flat=True)
+        for calculation_node in calculations:
+            calculation_dumper = self.process_dumper
+
+            link_calculations_dir = self.hidden_aiida_path / 'calculations'
+
+            calculation_dump_path = self.output_path / 'calculations'
+            calculation_dump_path.mkdir(parents=True, exist_ok=True)
+            calculation_dump_path = calculation_dump_path / calculation_dumper._generate_default_dump_path(
+                process_node=calculation_node
+            )
+
+            with contextlib.suppress(FileExistsError):
+                os.symlink(link_calculations_dir / calculation_node.uuid, calculation_dump_path)
+
+    def dump_processes(self):
+        # ? Here, these could be all kinds of entities that could be grouped in AiiDA
+        # if len(self.entities_to_dump) > 0:
+        #     pass
+        #     # nodes = self.entities_to_dump
+        # else:
+        nodes = self.get_collection_nodes()
+        workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
+
+        if self.only_top_level_workflows:
+            workflows = [workflow for workflow in workflows if workflow.caller is None]
+
+        # Also need to obtain sub-calculations that were called by workflows of the group
+        # These are not contained in the group.nodes directly
+        called_calculations = []
+        for workflow in workflows:
+            called_calculations += [
+                node for node in workflow.called_descendants if isinstance(node, orm.CalculationNode)
+            ]
+
+        calculations = set([node for node in nodes if isinstance(node, orm.CalculationNode)] + called_calculations)
+
+        if len(workflows) + len(calculations) == 0:
+            return
+
+        self.output_path.mkdir(exist_ok=True, parents=True)
+
+        print(f'self.process_dumper.calculations_hidden: {self.process_dumper.calculations_hidden}')
+        print(f'self.output_path: {self.output_path}')
+        if self.process_dumper.calculations_hidden:
+            print('dump hidden')
+            self._dump_calculations_hidden(calculations=calculations)
+            self._dump_link_workflows(workflows=workflows)
+            self._link_calculations_hidden(calculations=calculations)
+        else:
+            print('dump non-hidden')
+            for workflow in workflows:
+                workflow_path = (
+                    self.output_path
+                    / 'workflows'
+                    / self.process_dumper._generate_default_dump_path(process_node=workflow)
+                )
+                self.process_dumper.dump(process_node=workflow, output_path=workflow_path)
+
+    # TODO: Add `dump_data_raw` here, as well
+    def dump_data_rich(self):
+        nodes = self.get_collection_nodes()
+        nodes = [node for node in nodes if isinstance(node, (orm.Data, orm.Computer))]
+        # Here, when providing logic to set the exporters and fileformat via the rich-options, don't have to filter
+        # anymore for `core`
+        nodes = [node for node in nodes if node.entry_point.name.startswith('core')]
+        if len(nodes) == 0:
+            return
+
+        self.output_path.mkdir(exist_ok=True, parents=True)
+        data_dumper = self.data_dumper
+
+        for data_node in nodes:
+            node_entry_point_name = data_node.entry_point.name
+
+            # Get the fileformat and exporter for the data node
+            try:
+                fileformat = data_dumper.rich_spec_dict[node_entry_point_name]['export_format']
+                exporter = data_dumper.rich_spec_dict[node_entry_point_name]['exporter']
+
+            # If options for the rich dumping are specified and not all the other defaults are being used
+            # Some entry_points might not be inside the `rich_spec_dict`
+            except KeyError:
+                continue
+
+            except:
+                # Raise all exceptions here during development
+                raise
+
+            # Don't go further if no importer implemented for a data type anyway
+            if exporter is None:
+                continue
+
+            try:
+                # Generate a nice filename and sanitize it
+                nice_output_path = self.output_path / 'data' / data_node.__class__.__name__.lower()
+                nice_fname = data_dumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat).replace(
+                    '__', '_'
+                )
+                nice_fname = sanitize_file_extension(nice_fname)
+
+                if data_dumper.data_hidden:
+                    # Define paths for hidden dump and linking
+                    hidden_output_path = self.hidden_aiida_path / 'data' / data_node.__class__.__name__.lower()
+                    uuid_fname = sanitize_file_extension(f'{data_node.uuid}.{fileformat}')
+
+                    # Dump the data in the hidden directory
+                    data_dumper.dump_core_data_node_rich(data_node, hidden_output_path, uuid_fname)
+
+                    # Link the hidden file to the expected output path
+                    (nice_output_path / nice_fname).parent.mkdir(exist_ok=True, parents=True)
+                    os.symlink(hidden_output_path / uuid_fname, nice_output_path / nice_fname)
+
+                else:
+                    # Dump the data in the non-hidden directory
+                    data_dumper.dump_core_data_node_rich(data_node, nice_output_path, nice_fname)
+
+            except TypeError:
+                # Handle case when no exporter is implemented for a given data_node type
+                raise
+            except OSError:
+                # A Data node, e.g. a Code might already be existent, so don't worry about this exception
+                continue
+            except Exception:
+                raise
+
+    def dump_plugin_data(self):
+        return
+        # from importlib.metadata import entry_points
+
+        # plugin_data_entry_points = [entry_point.name for entry_point in entry_points(group='aiida.data')]
+        # # print(plugin_data_entry_points)
+        # # print(self.entity_counter)
+        # from aiida.manage.manager import get_manager
+
+        # manager = get_manager()
+        # storage = manager.get_profile_storage()
+        # orm_entities = storage.get_orm_entities(detailed=True)['Nodes']['node_types']
+        # non_core_data_entities = [
+        #     orm_entity
+        #     for orm_entity in orm_entities
+        #     if orm_entity.startswith('data') and not orm_entity.startswith('data.core')
+        # ]
+        # # TODO: Implement dumping here. Stashed for now, as both `HubbardStructureData` and `UpfData` I wanted to use
+        # # TODO: for testing don't implement `export` either way
+        # # print(non_core_data_entities)
diff --git a/src/aiida/tools/dumping/data.py b/src/aiida/tools/dumping/data.py
new file mode 100644
index 0000000000..3a75d8d743
--- /dev/null
+++ b/src/aiida/tools/dumping/data.py
@@ -0,0 +1,292 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+"""Functionality for dumping of Data nodes."""
+
+from __future__ import annotations
+
+import logging
+from functools import singledispatchmethod
+from pathlib import Path
+
+import yaml
+
+from aiida import orm
+
+logger = logging.getLogger(__name__)
+
+
+class DataDumper:
+    def __init__(
+        self,
+        *args,
+        dump_parent_path: Path = Path.cwd(),
+        overwrite: bool = False,
+        incremental: bool = True,
+        data_hidden: bool = False,
+        also_raw: bool = False,
+        also_rich: bool = False,
+        rich_spec_dict: dict | None = None,
+        **kwargs,
+    ) -> None:
+        self.args = args
+        self.dump_parent_path = dump_parent_path
+        self.overwrite = overwrite
+        self.incremental = incremental
+        self.data_hidden = data_hidden
+        self.also_raw = also_raw
+        self.also_rich = also_rich
+        self.kwargs = kwargs
+
+        self.rich_spec_dict = rich_spec_dict
+
+        self.hidden_aiida_path = dump_parent_path / '.aiida-raw-data'
+
+    @singledispatchmethod
+    def dump_core_data_node_rich(self, data_node, output_path, output_fname):
+        # raise NotImplementedError(f'Dumping not implemented for type {type(data_node)}')
+        # print(f'No specific handler found for type <{type(data_node)}> <{data_node}>, doing nothing.')
+        # output_path /= 'general'
+        # This is effectively the `rich` dumping
+        data_node_entry_point_name = data_node.entry_point.name
+        export_settings = self.rich_spec_dict[data_node_entry_point_name]
+        exporter = export_settings['exporter']
+        fileformat = export_settings['export_format']
+        if exporter is not None:
+            output_path.mkdir(exist_ok=True, parents=True)
+            exporter(
+                node=data_node,
+                output_fname=output_path / output_fname,
+                fileformat=fileformat,
+                overwrite=self.overwrite,
+            )
+        # This is for orm.Data types for which no default dumping is implemented, e.g. Bool or Float
+        # except ValueError:
+        #     pass
+        # This is for orm.Data types for whose entry_point names no entry exists in the DEFAULT_CORE_EXPORT_MAPPING
+        # This is now captured outside in the `CollectionDumper`, so should not be relevant anymore
+        # except TypeError:
+        #     raise
+
+    @dump_core_data_node_rich.register
+    def _(
+        self,
+        data_node: orm.StructureData,
+        output_path: str | Path | None = None,
+        output_fname: str | None = None,
+    ):
+        if type(data_node) is orm.StructureData:
+            self._dump_structuredata(data_node, output_path=output_path, output_fname=output_fname)
+        else:
+            # Handle the case where data_node is a subclass of orm.StructureData
+            # Just use the default dispatch function implementation
+            self.dump_core_data_node_rich.dispatch(object)(self, data_node, output_path, output_fname)
+
+    @dump_core_data_node_rich.register
+    def _(
+        self,
+        data_node: orm.Code,
+        output_path: str | Path | None = None,
+        output_fname: str | None = None,
+    ):
+        self._dump_code(data_node=data_node, output_path=output_path, output_fname=output_fname)
+
+    @dump_core_data_node_rich.register
+    def _(
+        self,
+        data_node: orm.Computer,
+        output_path: str | Path | None = None,
+        output_fname: str | None = None,
+    ):
+        self._dump_computer_setup(data_node=data_node, output_path=output_path, output_fname=output_fname)
+        self._dump_computer_config(data_node=data_node, output_path=output_path, output_fname=output_fname)
+
+    @dump_core_data_node_rich.register
+    def _(
+        self,
+        data_node: orm.BandsData,
+        output_path: str | Path | None = None,
+        output_fname: str | None = None,
+    ):
+        self._dump_bandsdata(data_node=data_node, output_path=output_path, output_fname=output_fname)
+
+    # These are the rich dumping implementations that actually differ from the default dispatch
+    def _dump_structuredata(
+        self,
+        data_node: orm.StructureData,
+        output_path: Path | None = None,
+        output_fname: str | None = None,
+    ):
+        from aiida.common.exceptions import UnsupportedSpeciesError
+
+        node_entry_point_name = data_node.entry_point.name
+        exporter = self.rich_spec_dict[node_entry_point_name]['exporter']
+        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
+
+        if output_fname is None:
+            output_fname = DataDumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat)
+
+        # ? There also exists a CifData file type
+        # output_path /= 'structures'
+        output_path.mkdir(exist_ok=True, parents=True)
+        try:
+            exporter(
+                node=data_node,
+                output_fname=output_path / output_fname,
+                fileformat=fileformat,
+                overwrite=self.overwrite,
+            )
+        except UnsupportedSpeciesError:
+            # This is the case for, e.g. HubbardStructureData that has species like `Mn0`
+            # Not sure how to resolve this. Wouldn't add a singledispatch for data types defined in plugins. Currently,
+            # do strict type check. HubbardStructureData doesn't implement an export method itself, though.
+            pass
+
+    def _dump_code(
+        self,
+        data_node: orm.Code,
+        output_path: Path | None = None,
+        output_fname: str | None = None,
+    ):
+        # output_path /= 'codes'
+
+        node_entry_point_name = data_node.entry_point.name
+        exporter = self.rich_spec_dict[node_entry_point_name]['exporter']
+        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
+
+        if fileformat != 'yaml':
+            raise NotImplementedError('No other fileformats supported so far apart from YAML.')
+        output_path.mkdir(exist_ok=True, parents=True)
+        if output_fname is None:
+            output_fname = DataDumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat)
+
+        exporter(
+            node=data_node,
+            output_fname=output_path / output_fname,
+            fileformat=fileformat,
+            overwrite=self.overwrite,
+        )
+
+    def _dump_computer_setup(
+        self,
+        data_node: orm.Computer,
+        output_path: Path | None = None,
+        output_fname: str | None = None,
+    ):
+        node_entry_point_name = data_node.entry_point.name
+        # TODO: Don't use the `exporter` here, as `Computer` doesn't derive from Data, so custom implementation
+        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
+
+        if fileformat != 'yaml':
+            raise NotImplementedError('No other fileformats supported so far apart from YAML.')
+
+        output_path.mkdir(exist_ok=True, parents=True)
+
+        # This is a bit of a hack. Should split this up into two different functions.
+        if output_fname is None:
+            output_fname = output_path / f'{data_node.full_label}-setup-{data_node.pk}.{fileformat}'
+
+        # ? Copied over from `cmd_computer` as importing `computer_export_setup` led to click Context error:
+        # TypeError: Context.__init__() got an unexpected keyword argument 'computer'
+        computer_setup = {
+            'label': data_node.label,
+            'hostname': data_node.hostname,
+            'description': data_node.description,
+            'transport': data_node.transport_type,
+            'scheduler': data_node.scheduler_type,
+            'shebang': data_node.get_shebang(),
+            'work_dir': data_node.get_workdir(),
+            'mpirun_command': ' '.join(data_node.get_mpirun_command()),
+            'mpiprocs_per_machine': data_node.get_default_mpiprocs_per_machine(),
+            'default_memory_per_machine': data_node.get_default_memory_per_machine(),
+            'use_double_quotes': data_node.get_use_double_quotes(),
+            'prepend_text': data_node.get_prepend_text(),
+            'append_text': data_node.get_append_text(),
+        }
+
+        if not output_fname.is_file():
+            output_fname.write_text(yaml.dump(computer_setup, sort_keys=False), 'utf-8')
+
+    def _dump_computer_config(
+        self,
+        data_node: orm.Computer,
+        output_path: Path | None = None,
+        output_fname: str | None = None,
+    ):
+        from aiida.orm import User
+
+        node_entry_point_name = data_node.entry_point.name
+        # TODO: Don't use the `exporter` here, as `Computer` doesn't derive from Data, so custom implementation
+        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
+
+        # output_path /= 'computers'
+        if fileformat != 'yaml':
+            raise NotImplementedError('No other fileformats supported so far apart from YAML.')
+
+        output_path.mkdir(exist_ok=True, parents=True)
+
+        # This is a bit of a hack. Should split this up into two different functions.
+        if output_fname is None:
+            output_fname = output_path / f'{data_node.full_label}-config-{data_node.pk}.{fileformat}'
+
+        users = User.collection.all()
+        for user in users:
+            computer_configuration = data_node.get_configuration(user)
+            if not output_fname.is_file():
+                output_fname.write_text(yaml.dump(computer_configuration, sort_keys=False), 'utf-8')
+
+    def _dump_bandsdata(
+        self,
+        data_node: orm.BandsData,
+        output_path: Path | None = None,
+        output_fname: str | None = None,
+    ):
+        node_entry_point_name = data_node.entry_point.name
+        exporter = self.rich_spec_dict[node_entry_point_name]['exporter']
+        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
+
+        from aiida.tools.dumping.utils import sanitize_file_extension
+
+        output_path.mkdir(exist_ok=True, parents=True)
+
+        if output_fname is None:
+            output_fname = DataDumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat)
+
+        output_fname = sanitize_file_extension(output_fname)
+
+        exporter(
+            node=data_node,
+            output_fname=output_path / output_fname,
+            fileformat=fileformat,
+            overwrite=self.overwrite,
+        )
+
+    def _dump_user_info(self): ...
+
+    def dump_core_data_node_raw(self, data_node: orm.Data, output_path: Path, output_fname: str | None = None):
+        output_path.mkdir(exist_ok=True, parents=True)
+
+        if output_fname is None:
+            output_fname = DataDumper.generate_output_fname_raw(data_node=data_node)
+
+        with open(output_path.resolve() / output_fname, 'w') as handle:
+            yaml.dump(data_node.attributes, handle)
+
+    @staticmethod
+    def generate_output_fname_raw(data_node, prefix: str | None = None):
+        if prefix is None:
+            return f'{data_node.__class__.__name__}-{data_node.pk}_attrs.yaml'
+        else:
+            return f'{prefix}-{data_node.__class__.__name__}-{data_node.pk}_attrs.yaml'
+
+    @staticmethod
+    def generate_output_fname_rich(data_node, fileformat, prefix: str | None = None):
+        if prefix is None:
+            return f'{data_node.__class__.__name__}-{data_node.pk}.{fileformat}'
+        else:
+            return f'{prefix}-{data_node.__class__.__name__}-{data_node.pk}.{fileformat}'
diff --git a/src/aiida/tools/dumping/parser.py b/src/aiida/tools/dumping/parser.py
new file mode 100644
index 0000000000..cc19b0f141
--- /dev/null
+++ b/src/aiida/tools/dumping/parser.py
@@ -0,0 +1,56 @@
+from pathlib import Path
+
+import yaml
+
+
+class DumpConfigParser:
+    @staticmethod
+    def parse_config_file(config_file: str | Path | None) -> dict:
+        if isinstance(config_file, (str, Path)):
+            with open(config_file, 'r') as file:
+                config = yaml.safe_load(file)
+        else:
+            config = yaml.safe_load(config_file)
+
+        general_kwargs = {
+            'path': Path(config.get('path', Path.cwd())),
+            'overwrite': config.get('overwrite', False),
+            'incremental': config.get('incremental', True),
+            'dry_run': config.get('dry_run', False),
+        }
+
+        processdumper_kwargs = {
+            'include_inputs': config.get('include_inputs', True),
+            'include_outputs': config.get('include_outputs', True),
+            'include_attributes': config.get('include_attributes', True),
+            'include_extras': config.get('include_extras', False),
+            'flat': config.get('flat', False),
+            'calculations_hidden': config.get('calculations_hidden', True),
+        }
+
+        datadumper_kwargs = {
+            'also_raw': config.get('also_raw', False),
+            'also_rich': config.get('also_rich', True),
+            'data_hidden': config.get('data_hidden', True),
+        }
+
+        collection_kwargs = {
+            'should_dump_processes': config.get('dump_processes', True),
+            'should_dump_data': config.get('dump_data', True),
+            'only_top_level_workflows': config.get('only_top_level_workflows', True),
+        }
+
+        rich_kwargs = {
+            'rich_dump_all': config.get('rich_dump_all', True),
+        }
+
+        rich_spec = config.get('rich_spec', None)
+
+        return {
+            'general_kwargs': general_kwargs,
+            'processdumper_kwargs': processdumper_kwargs,
+            'datadumper_kwargs': datadumper_kwargs,
+            'collection_kwargs': collection_kwargs,
+            'rich_kwargs': rich_kwargs,
+            'rich_spec': rich_spec,
+        }
diff --git a/src/aiida/tools/dumping/processes.py b/src/aiida/tools/dumping/processes.py
index 794b1fcab2..29fbef07c9 100644
--- a/src/aiida/tools/dumping/processes.py
+++ b/src/aiida/tools/dumping/processes.py
@@ -8,9 +8,23 @@
 ###########################################################################
 """Functionality for dumping of ProcessNodes."""
 
+# ? Possibly add dry_run option here
+# TODO: Add symlinking feature
+# -> This would be for calculations which are subprocesses of the workflow
+# -> But also PPs
+# -> Could define a symlink-mapping based on a dict in the form:
+# {
+# CalculationNode: <Path-to-calculations>,
+# PPs: <Path-to-PPs>
+# }
+# Based on this, I could check the linked directory for the entity based on its UUID
+# TODO: Or, could add a `programmatic` option that doesn't create the README.md, and does a few other things, as well
+
 from __future__ import annotations
 
+import contextlib
 import logging
+import os
 from pathlib import Path
 from types import SimpleNamespace
 from typing import List
@@ -23,7 +37,6 @@
     CalcFunctionNode,
     CalcJobNode,
     CalculationNode,
-    LinkManager,
     ProcessNode,
     WorkChainNode,
     WorkflowNode,
@@ -31,6 +44,7 @@
 )
 from aiida.orm.utils import LinkTriple
 from aiida.tools.archive.exceptions import ExportValidationError
+from aiida.tools.dumping.data import DataDumper
 from aiida.tools.dumping.utils import prepare_dump_path
 
 LOGGER = logging.getLogger(__name__)
@@ -39,26 +53,43 @@
 class ProcessDumper:
     def __init__(
         self,
+        *args,
+        dump_parent_path: Path = Path.cwd(),
+        overwrite: bool = False,
+        incremental: bool = True,
+        flat: bool = False,
+        calculations_hidden: bool = True,
         include_inputs: bool = True,
         include_outputs: bool = False,
         include_attributes: bool = True,
         include_extras: bool = True,
-        overwrite: bool = False,
-        flat: bool = False,
+        rich_options: str = '',
+        rich_config_file: Path | None = None,
+        rich_dump_all: bool = True,
+        data_dumper: DataDumper | None = DataDumper(),
         dump_unsealed: bool = False,
-        incremental: bool = True,
+        **kwargs,
     ) -> None:
+        self.args = args
+        self.dump_parent_path = dump_parent_path
+        self.overwrite = overwrite
+        self.incremental = incremental
+        self.flat = flat
         self.include_inputs = include_inputs
         self.include_outputs = include_outputs
         self.include_attributes = include_attributes
         self.include_extras = include_extras
-        self.overwrite = overwrite
-        self.flat = flat
+        self.rich_options = rich_options
+        self.rich_config_file = rich_config_file
+        self.rich_dump_all = rich_dump_all
+        self.data_dumper = data_dumper
+        self.kwargs = kwargs
         self.dump_unsealed = dump_unsealed
-        self.incremental = incremental
+
+        self.hidden_aiida_path = dump_parent_path / '.aiida-raw-data'
 
     @staticmethod
-    def _generate_default_dump_path(process_node: ProcessNode) -> Path:
+    def _generate_default_dump_path(process_node: ProcessNode, prefix: str = 'dump') -> Path:
         """Simple helper function to generate the default parent-dumping directory if none given.
 
         This function is not called for the recursive sub-calls of `_dump_calculation` as it just creates the default
@@ -69,11 +100,12 @@ def _generate_default_dump_path(process_node: ProcessNode) -> Path:
         """
 
         pk = process_node.pk
+        # TODO: Use UUID[:8] here
         try:
-            return Path(f'dump-{process_node.process_label}-{pk}')
+            return Path(f'{prefix}-{process_node.process_label}-{pk}')
         except AttributeError:
             # This case came up during testing, not sure how relevant it actually is
-            return Path(f'dump-{process_node.process_type}-{pk}')
+            return Path(f'{prefix}-{process_node.process_type}-{pk}')
 
     @staticmethod
     def _generate_readme(process_node: ProcessNode, output_path: Path) -> None:
@@ -165,15 +197,15 @@ def _generate_child_node_label(index: int, link_triple: LinkTriple) -> str:
         node_label = '-'.join(label_list)
         # `CALL-` as part of the link labels also for MultiplyAddWorkChain -> Seems general enough, so remove
         node_label = node_label.replace('CALL-', '')
-        node_label = node_label.replace('None-', '')
-
-        return node_label
+        return node_label.replace('None-', '')
 
     def dump(
         self,
         process_node: ProcessNode,
         output_path: Path | None,
         io_dump_paths: List[str | Path] | None = None,
+        *args,
+        **kwargs,
     ) -> Path:
         """Dumps all data involved in a `ProcessNode`, including its outgoing links.
 
@@ -192,6 +224,13 @@ def dump(
                 f'Process `{process_node.pk} must be sealed before it can be dumped, or `dump_unsealed` set to True.'
             )
 
+        # This here is mainly for `include_attributes` and `include_extras`.
+        # I don't want to include them in the general class `__init__`, as they don't really fit there.
+        # But the `_dump_node_yaml` function is private, so it's never called outside by the user.
+        # Setting the class attributes here dynamically is probably not a good solution, but it works for now.
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
         if output_path is None:
             output_path = self._generate_default_dump_path(process_node=process_node)
 
@@ -216,7 +255,12 @@ def dump(
         return output_path
 
     def _dump_workflow(
-        self, workflow_node: WorkflowNode, output_path: Path, io_dump_paths: List[str | Path] | None = None
+        self,
+        workflow_node: WorkflowNode,
+        output_path: Path,
+        io_dump_paths: List[str | Path] | None = None,
+        link_calculations: bool = False,
+        link_calculations_dir: str | None = None,
     ) -> None:
         """Recursive function to traverse a `WorkflowNode` and dump its `CalculationNode` s.
 
@@ -242,15 +286,25 @@ def _dump_workflow(
                     workflow_node=child_node,
                     output_path=child_output_path,
                     io_dump_paths=io_dump_paths,
+                    # TODO: Always need to pass this stuff through due to the recursive nature of the function call...
+                    # TODO: Maybe one can make a separate method that only does the linking
+                    link_calculations=link_calculations,
+                    link_calculations_dir=link_calculations_dir,
                 )
 
             # Once a `CalculationNode` as child reached, dump it
             elif isinstance(child_node, CalculationNode):
-                self._dump_calculation(
-                    calculation_node=child_node,
-                    output_path=child_output_path,
-                    io_dump_paths=io_dump_paths,
-                )
+                if not link_calculations:
+                    self._dump_calculation(
+                        calculation_node=child_node,
+                        output_path=child_output_path,
+                        io_dump_paths=io_dump_paths,
+                    )
+                else:
+                    try:
+                        os.symlink(link_calculations_dir / child_node.uuid, child_output_path)
+                    except FileExistsError:
+                        pass
 
     def _dump_calculation(
         self,
@@ -275,30 +329,75 @@ def _dump_calculation(
         calculation_node.base.repository.copy_tree(output_path.resolve() / io_dump_mapping.repository)
 
         # Dump the repository contents of `outputs.retrieved`
-        try:
+        with contextlib.suppress(NotExistentAttributeError):
             calculation_node.outputs.retrieved.base.repository.copy_tree(
                 output_path.resolve() / io_dump_mapping.retrieved
             )
-        except NotExistentAttributeError:
-            pass
+
+        if self.data_dumper.also_raw:
+            # TODO: Replace with attached self.data_dumper attribute
+            self.data_dumper.dump_core_data_node_raw(data_node=calculation_node, output_path=output_path)
 
         # Dump the node_inputs
         if self.include_inputs:
             input_links = calculation_node.base.links.get_incoming(link_type=LinkType.INPUT_CALC)
-            self._dump_calculation_io(parent_path=output_path / io_dump_mapping.inputs, link_triples=input_links)
+            # Need to create the path before, otherwise getting Exception
+            input_path = output_path / io_dump_mapping.inputs
+            input_path.mkdir(parents=True, exist_ok=True)
+
+            self._dump_calculation_io_files(parent_path=output_path / io_dump_mapping.inputs, link_triples=input_links)
+
+            if self.data_dumper.also_raw:
+                # Always dump the `raw` data inside the calculation directories
+                # I don't see a reason why one would want all the node attribute files in a centralized location
+                self._dump_calculation_io_files_raw(
+                    output_path=output_path / io_dump_mapping.inputs, link_triples=input_links
+                )
+
 
+            if self.data_dumper.also_rich:
+                rich_data_output_path = output_path / io_dump_mapping.inputs
+            #     if not self.data_dumper.data_hidden:
+            #         rich_data_output_path = output_path / io_dump_mapping.inputs
+            #     else:
+            #         # TODO: Currently, when dumping only one selected workflow, if rich dumping is activated, but
+            #         # TODO: `data-hidden` is set, no data nodes were actually being dumped
+            #         # TODO: With the current implementation below, they are dumped, but not in the same structure as for the
+            #         # TODO: `dump_rich_core` function. Quick fix for now
+            #         pass
+
+                # Only dump the rich data output files in the process directories if data_hidden is False
+                self._dump_calculation_io_files_rich(
+                    output_path=rich_data_output_path, link_triples=input_links
+                )
         # Dump the node_outputs apart from `retrieved`
         if self.include_outputs:
             output_links = list(calculation_node.base.links.get_outgoing(link_type=LinkType.CREATE))
             output_links = [output_link for output_link in output_links if output_link.link_label != 'retrieved']
 
-            self._dump_calculation_io(
+            self._dump_calculation_io_files(
                 parent_path=output_path / io_dump_mapping.outputs,
                 link_triples=output_links,
             )
 
-    def _dump_calculation_io(self, parent_path: Path, link_triples: LinkManager | List[LinkTriple]):
-        """Small helper function to dump linked input/output nodes of a `CalculationNode`.
+            if self.data_dumper.also_raw:
+                self._dump_calculation_io_files_raw(
+                    output_path=output_path / io_dump_mapping.outputs,
+                    link_triples=output_links,
+                )
+
+            if self.data_dumper.also_rich:
+                self._dump_calculation_io_files_rich(
+                    output_path=output_path / io_dump_mapping.outputs,
+                    link_triples=output_links,
+                )
+
+    def _dump_calculation_io_files(
+        self,
+        parent_path: Path,
+        link_triples: orm.LinkManager | List[orm.LinkTriple],
+    ):
+        """Small helper function to dump linked input/output nodes of a `orm.CalculationNode`.
 
         :param parent_path: Parent directory for dumping the linked node contents.
         :param link_triples: List of link triples.
@@ -315,6 +414,92 @@ def _dump_calculation_io(self, parent_path: Path, link_triples: LinkManager | Li
 
             link_triple.node.base.repository.copy_tree(linked_node_path.resolve())
 
+    def _dump_calculation_io_files_raw(
+        self,
+        output_path: Path,
+        link_triples: orm.LinkManager | List[orm.LinkTriple],
+    ):
+        """Small helper function to dump linked input/output nodes of a `orm.CalculationNode`.
+
+        :param parent_path: Parent directory for dumping the linked node contents.
+        :param link_triples: List of link triples.
+        """
+
+        output_path /= 'raw'
+
+        for link_triple in link_triples:
+            link_label = link_triple.link_label
+            data_node = link_triple.node
+
+            # linked_node_path.parent.mkdir(parents=True, exist_ok=True)
+            output_path.mkdir(parents=True, exist_ok=True)
+
+            # Then dump the node attributes for each node
+            output_fname = DataDumper.generate_output_fname_raw(prefix=link_label, data_node=data_node)
+            output_fname = output_fname.replace('__', '_')
+
+            if self.data_dumper.data_hidden:
+                self.data_dumper.dump_core_data_node_raw(
+                    data_node=data_node, output_path=output_path, output_fname=output_fname
+                )
+            self.data_dumper.dump_core_data_node_raw(
+                data_node=data_node, output_path=output_path, output_fname=output_fname
+            )
+
+    def _dump_calculation_io_files_rich(
+        self,
+        output_path: Path,
+        link_triples: orm.LinkManager | List[orm.LinkTriple],
+    ):
+        """Small helper function to dump linked input/output nodes of a `orm.CalculationNode`.
+
+        :param parent_path: Parent directory for dumping the linked node contents.
+        :param link_triples: List of link triples.
+        """
+
+        # Set up the rich parsing functions
+
+        # Extend (at least the keys) by the dynamic entry points
+        rich_spec_dict = self.data_dumper.rich_spec_dict
+
+        for link_triple in link_triples:
+            link_label = link_triple.link_label
+            data_node = link_triple.node
+
+            node = link_triple.node
+            node_entry_point = node.entry_point
+            node_entry_point_name = node_entry_point.name
+
+            # TODO: Somehow obtain sensible filenames -> Should this be done here, or by the export function that is
+            # TODO: possibly written by the plugin developer
+            if node_entry_point_name.startswith('core'):
+                # Obtain settings from the export dict
+                # TODO: -> This might break when plugin is missing
+                try:
+                    exporter = rich_spec_dict[node_entry_point_name]['exporter']
+                    fileformat = rich_spec_dict[node_entry_point_name]['export_format']
+                    output_fname = self.data_dumper.generate_output_fname_rich(
+                        prefix=link_label, data_node=data_node, fileformat=fileformat
+                    )
+                    output_fname = output_fname.replace('__', '_')
+                except KeyError:
+                    continue
+
+                # No exporter set
+                if exporter is None:
+                    continue
+
+                # Only create subdirectory if `Data` node has an exporter
+                rich_output_path = output_path / 'rich' / node.__class__.__name__.lower()
+                rich_output_path.mkdir(parents=True, exist_ok=True)
+
+                # TODO: Here, if data_hidden is True, dump in hidden directory, else in output_path
+                self.data_dumper.dump_core_data_node_rich(
+                    node,
+                    output_path=rich_output_path,
+                    output_fname=output_fname,
+                )
+
     def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | None = None) -> SimpleNamespace:
         """Helper function to generate mapping for entities dumped for each `CalculationNode`.
 
@@ -328,12 +513,12 @@ def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | Non
 
         aiida_entities_to_dump = ['repository', 'retrieved', 'inputs', 'outputs']
         default_calculation_io_dump_paths = ['inputs', 'outputs', 'node_inputs', 'node_outputs']
-        empty_calculation_io_dump_paths = [''] * 4
-
         if self.flat and io_dump_paths is None:
             LOGGER.info(
                 'Flat set to True and no `io_dump_paths`. Dumping in a flat directory, files might be overwritten.'
             )
+            empty_calculation_io_dump_paths = [''] * 4
+
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, empty_calculation_io_dump_paths)))
 
         elif not self.flat and io_dump_paths is None:
@@ -343,7 +528,7 @@ def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | Non
             )
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, default_calculation_io_dump_paths)))
 
-        elif self.flat and io_dump_paths is not None:
+        elif self.flat:
             LOGGER.info('Flat set to True but `io_dump_paths` provided. These will be used, but `inputs` not nested.')
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, io_dump_paths)))
         else:
@@ -381,44 +566,31 @@ def _dump_node_yaml(
 
         computer_properties = ('label', 'hostname', 'scheduler_type', 'transport_type')
 
-        node_dict = {}
-        metadata_dict = {}
-
-        # Add actual node `@property`s to dictionary
-        for metadata_property in node_properties:
-            metadata_dict[metadata_property] = getattr(process_node, metadata_property)
-
-        node_dict['Node data'] = metadata_dict
-
+        metadata_dict = {
+            metadata_property: getattr(process_node, metadata_property) for metadata_property in node_properties
+        }
+        node_dict = {'Node data': metadata_dict}
         # Add user data
-        try:
+        with contextlib.suppress(AttributeError):
             node_dbuser = process_node.user
-            user_dict = {}
-            for user_property in user_properties:
-                user_dict[user_property] = getattr(node_dbuser, user_property)
+            user_dict = {user_property: getattr(node_dbuser, user_property) for user_property in user_properties}
             node_dict['User data'] = user_dict
-        except AttributeError:
-            pass
 
         # Add computer data
-        try:
+        with contextlib.suppress(AttributeError):
             node_dbcomputer = process_node.computer
-            computer_dict = {}
-            for computer_property in computer_properties:
-                computer_dict[computer_property] = getattr(node_dbcomputer, computer_property)
+            computer_dict = {
+                computer_property: getattr(node_dbcomputer, computer_property)
+                for computer_property in computer_properties
+            }
             node_dict['Computer data'] = computer_dict
-        except AttributeError:
-            pass
-
         # Add node attributes
         if self.include_attributes:
             node_attributes = process_node.base.attributes.all
             node_dict['Node attributes'] = node_attributes
 
-        # Add node extras
         if self.include_extras:
-            node_extras = process_node.base.extras.all
-            if node_extras:
+            if node_extras := process_node.base.extras.all:
                 node_dict['Node extras'] = node_extras
 
         output_file = output_path.resolve() / output_filename
diff --git a/src/aiida/tools/dumping/rich.py b/src/aiida/tools/dumping/rich.py
new file mode 100644
index 0000000000..bbc89ef072
--- /dev/null
+++ b/src/aiida/tools/dumping/rich.py
@@ -0,0 +1,86 @@
+from aiida.cmdline.commands.cmd_data.cmd_export import data_export
+
+__all__ = ('DEFAULT_CORE_EXPORT_MAPPING', 'rich_from_cli', 'rich_from_config')
+
+DEFAULT_CORE_EXPORT_MAPPING = {
+    'core.array': {'exporter': data_export, 'export_format': 'json'},
+    'core.array.bands': {'exporter': data_export, 'export_format': 'mpl_pdf'},
+    'core.array.kpoints': {'exporter': data_export, 'export_format': 'json'},
+    'core.array.projection': {'exporter': data_export, 'export_format': 'json'},
+    'core.array.trajectory': {'exporter': data_export, 'export_format': 'cif'},
+    'core.array.xy': {'exporter': data_export, 'export_format': 'json'},
+    'core.base': {'exporter': None, 'export_format': None},
+    'core.bool': {'exporter': None, 'export_format': None},
+    'core.cif': {'exporter': data_export, 'export_format': 'cif'},
+    'core.code': {'exporter': data_export, 'export_format': 'yaml'},
+    'core.code.containerized': {'exporter': data_export, 'export_format': 'yaml'},
+    'core.code.installed': {'exporter': data_export, 'export_format': 'yaml'},
+    'core.code.portable': {'exporter': data_export, 'export_format': 'yaml'},
+    'core.dict': {'exporter': None, 'export_format': None},
+    'core.enum': {'exporter': None, 'export_format': None},
+    'core.float': {'exporter': None, 'export_format': None},
+    # TODO: Just use copy-tree
+    'core.folder': {'exporter': None, 'export_format': None},
+    'core.int': {'exporter': None, 'export_format': None},
+    'core.jsonable': {
+        'exporter': data_export,
+        'export_format': 'json',  # duh
+    },
+    'core.list': {'exporter': None, 'export_format': None},
+    'core.numeric': {'exporter': None, 'export_format': None},
+    'core.orbital': {'exporter': None, 'export_format': None},
+    # TODO: Here, try-except existance on remote and if so, dump it here locally
+    'core.remote': {'exporter': None, 'export_format': None},
+    'core.remote.stash': {'exporter': None, 'export_format': None},
+    'core.remote.stash.folder': {'exporter': None, 'export_format': None},
+    'core.singlefile': {'exporter': None, 'export_format': None},
+    'core.str': {'exporter': None, 'export_format': None},
+    'core.structure': {'exporter': data_export, 'export_format': 'cif'},
+    'core.upf': {'exporter': data_export, 'export_format': 'upf'},
+}
+
+
+def rich_from_cli(rich_spec, rich_dump_all):
+    # If all, also non-specified data types should be exported, then set the default exporter dict here
+    if rich_dump_all:
+        options_dict = DEFAULT_CORE_EXPORT_MAPPING
+    else:
+        options_dict = {}
+
+    if rich_spec:
+        entries = rich_spec.split(',')
+        # print(f'ENTRIES: {entries}')
+        for entry in entries:
+            entry_list = entry.split(':')
+            entry_point = entry_list[0]
+
+            # This is the case if no exporter explicitly provided, then we set it to the default exporter
+            exporter = entry_list[1] or DEFAULT_CORE_EXPORT_MAPPING[entry_point]['exporter']
+
+            # This is the case if no fileformat explicitly provided, then we set it to the default fileformat
+            export_format = entry_list[2] or DEFAULT_CORE_EXPORT_MAPPING[entry_point]['export_format']
+
+            # If it is provided, then the assignment is done with an equal sign and we resolve it
+            if '=' in export_format:
+                export_format = export_format.split('=')[1]
+
+            # print(f'ENTRY_LIST: {entry_list}')
+
+            options_dict[entry_point] = {'exporter': exporter, 'export_format': export_format}
+
+    return options_dict
+
+
+def rich_from_config(rich_spec, rich_dump_all):
+    if rich_dump_all:
+        options_dict = DEFAULT_CORE_EXPORT_MAPPING
+    else:
+        options_dict = {}
+
+    for entry_point, spec in rich_spec.items():
+        export_format = spec.get('format') or DEFAULT_CORE_EXPORT_MAPPING[entry_point]['export_format']
+        exporter = spec.get('exporter') or DEFAULT_CORE_EXPORT_MAPPING[entry_point]['exporter']
+
+        options_dict[entry_point] = {'exporter': exporter, 'export_format': export_format}
+
+    return options_dict
diff --git a/src/aiida/tools/dumping/test-config-file.yaml b/src/aiida/tools/dumping/test-config-file.yaml
new file mode 100644
index 0000000000..b868f8afcb
--- /dev/null
+++ b/src/aiida/tools/dumping/test-config-file.yaml
@@ -0,0 +1,23 @@
+path: /home/geiger_j/aiida_projects/verdi-profile-dump/dev-dumps/storage-mirror
+overwrite: true
+incremental: true
+dry_run: false
+organize_by_groups: true
+dump_processes: true
+only_top_level_workflows: true
+dump_data: true
+calculations_hidden: true
+data_hidden: true
+also_raw: false
+also_rich: true
+include_inputs: true
+include_outputs: true
+include_attributes: true
+include_extras: false
+flat: false
+rich_spec:
+  core.array.bands:
+    format: mpl_png
+  core.structure:
+    format: xsf
+rich_dump_all: false
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index a631ac25e5..c4c1ac0fc1 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -14,6 +14,9 @@
 import shutil
 from pathlib import Path
 
+from rich.console import Console
+from rich.table import Table
+
 __all__ = ['prepare_dump_path']
 
 logger = logging.getLogger(__name__)
@@ -73,3 +76,94 @@ def prepare_dump_path(
     # Create directory if it doesn't exist or was removed
     path_to_validate.mkdir(exist_ok=True, parents=True)
     (path_to_validate / safeguard_file).touch()
+
+
+def get_nodes_from_db(qb_instance, qb_filters: t.List | None = None, flat=False):
+    # Computers cannot be associated via `with_group`
+    # for qb_filter in qb_filters:
+    #     qb.add_filter(**qb_filter)
+
+    return_iterable = qb_instance.iterall() if qb_instance.count() > 10 ^ 3 else qb_instance.all()
+
+    # Manual flattening as `iterall` doesn't have `flat` option unlike `all`
+    if flat:
+        return_iterable = [_[0] for _ in return_iterable]
+
+    return return_iterable
+
+
+# def validate_rich_options(rich_options, rich_config_file):
+#     if rich_options is not None and rich_config_file is not None:
+#         raise ValueError('Specify rich options either via CLI or config file, not both.')
+
+#     else:
+#         logger.report('Neither `--rich-options` nor `--rich-config` set, using defaults.')
+
+
+@staticmethod
+def dumper_pretty_print(dumper_instance, include_private_and_dunder: bool = False):
+    console = Console()
+    table = Table(title=f'Attributes and Methods of {dumper_instance.__class__.__name__}')
+
+    # Adding columns to the table
+    table.add_column('Name', justify='left')
+    table.add_column('Type', justify='left')
+    table.add_column('Value', justify='left')
+
+    # Lists to store attributes and methods
+    entries = []
+
+    # Iterate over the class attributes and methods
+    for attr_name in dir(dumper_instance):
+        # Exclude private attributes and dunder methods
+        attr_value = getattr(dumper_instance, attr_name)
+        entry_type = 'Attribute' if not callable(attr_value) else 'Method'
+
+        if attr_name.startswith('_'):
+            if include_private_and_dunder:
+                entries.append((attr_name, entry_type, str(attr_value)))
+            else:
+                pass
+        else:
+            entries.append((attr_name, entry_type, str(attr_value)))
+
+    # Sort entries: attributes first, then methods
+    entries.sort(key=lambda x: (x[1] == 'Method', x[0]))
+
+    # Add sorted entries to the table
+    for name, entry_type, value in entries:
+        table.add_row(name, entry_type, value)
+
+    # Print the formatted table
+    console.print(table)
+
+
+# def check_storage_size_user():
+#     from aiida.manage.manager import get_manager
+
+#     manager = get_manager()
+#     storage = manager.get_profile_storage()
+
+#     data = storage.get_info(detailed=True)
+#     repository_data = data['repository']['Size (MB)']
+#     total_size_gb = sum(repository_data.values()) / 1024
+#     if total_size_gb > 10:
+#         user_input = (
+#             input('Repository size larger than 10gb. Do you still want to dump the profile data? (y/N): ')
+#             .strip()
+#             .lower()
+#         )
+
+#         if user_input != 'y':
+#             sys.exit()
+
+
+def sanitize_file_extension(filename: str | Path):
+    if isinstance(filename, Path):
+        filename = str(filename)
+    if filename.endswith('.mpl_pdf'):
+        filename = filename.replace('.mpl_pdf', '.pdf')
+    if filename.endswith('.mpl_png'):
+        filename = filename.replace('.mpl_png', '.png')
+
+    return Path(filename)
diff --git a/tests/tools/dumping/test_processes.py b/tests/tools/dumping/test_processes.py
index 88dad0323e..c409a438dc 100644
--- a/tests/tools/dumping/test_processes.py
+++ b/tests/tools/dumping/test_processes.py
@@ -404,6 +404,7 @@ def test_dump_node_yaml(generate_calculation_node_io, tmp_path, generate_workcha
 
     process_dumper = ProcessDumper(include_attributes=False, include_extras=False)
 
+    (tmp_path / node_metadata_file).unlink()
     process_dumper._dump_node_yaml(process_node=wc_node, output_path=tmp_path)
 
     # Open the dumped YAML file and read its contents

From 65214e8e323f19a5a6c36af72c460f98fca82e2e Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Thu, 23 Jan 2025 17:06:44 +0100
Subject: [PATCH 03/27] Mirroring of workflows and calculations works

Either in groups, or not associated with any group.
Either sorted by groups, or in a top-level flat hierarchy.
"De-duplication" works by symlinking calculations if they are part
of a workflow.

Next, check what happens if a workflow is part of two groups -> Here,
de-deplucation should actually make more sense.
---
 docs/source/reference/command_line.rst        |   1 +
 src/aiida/cmdline/commands/cmd_profile.py     | 132 +++------
 src/aiida/cmdline/params/options/main.py      | 142 +---------
 src/aiida/tools/dumping/collection.py         | 253 +++++++-----------
 src/aiida/tools/dumping/parser.py             |   2 -
 src/aiida/tools/dumping/processes.py          |  54 ++--
 src/aiida/tools/dumping/test-config-file.yaml |   1 -
 7 files changed, 179 insertions(+), 406 deletions(-)

diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst
index 1e8370e5e8..283993fac9 100644
--- a/docs/source/reference/command_line.rst
+++ b/docs/source/reference/command_line.rst
@@ -398,6 +398,7 @@ Below is a list with all available subcommands.
       configure-rabbitmq  Configure RabbitMQ for a profile.
       delete              Delete one or more profiles.
       list                Display a list of all available profiles.
+      mirror              Dump all data in an AiiDA profile's storage to disk.
       set-default         Set a profile as the default profile.
       setdefault          (Deprecated) Set a profile as the default profile.
       setup               Set up a new profile.
diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index f035977f2c..48fc6e98a3 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -277,39 +277,36 @@ def profile_delete(force, delete_data, profiles):
 @verdi_profile.command('mirror')
 @options.PATH()
 @options.OVERWRITE()
-@options.INCREMENTAL()
-@options.ORGANIZE_BY_GROUPS()
-@options.DRY_RUN()
+# @options.INCREMENTAL()
 @options.DUMP_PROCESSES()
-@options.ONLY_TOP_LEVEL_WORKFLOWS()
 @options.DUMP_DATA()
 @options.DEDUPLICATE()
-@options.DATA_HIDDEN()
-@options.ALSO_RAW()
-@options.ALSO_RICH()
 @options.INCLUDE_INPUTS()
 @options.INCLUDE_OUTPUTS()
 @options.INCLUDE_ATTRIBUTES()
 @options.INCLUDE_EXTRAS()
 @options.FLAT()
+@options.ALSO_RAW()
+@options.ALSO_RICH()
 @options.RICH_SPEC()
 @options.RICH_DUMP_ALL()
 @options.DUMP_CONFIG_FILE()
 @options.NODES()
 @options.GROUPS()
+@options.ORGANIZE_BY_GROUPS()
+@options.ONLY_TOP_LEVEL_WORKFLOWS()
+@options.DRY_RUN()
 @click.pass_context
-def storage_mirror(
+def profile_mirror(
     ctx,
     path,
     overwrite,
-    incremental,
     organize_by_groups,
     dry_run,
     dump_processes,
     only_top_level_workflows,
     dump_data,
     deduplicate,
-    data_hidden,
     also_raw,
     also_rich,
     include_inputs,
@@ -325,6 +322,7 @@ def storage_mirror(
 ):
     """Dump all data in an AiiDA profile's storage to disk."""
 
+    from pathlib import Path
 
     from aiida import orm
     from aiida.tools.dumping.parser import DumpConfigParser
@@ -337,39 +335,14 @@ def storage_mirror(
 
     profile = ctx.obj['profile']
 
-    # from aiida.manage.manager import get_manager
-
-    # manager = get_manager()
-    # storage = manager.get_profile_storage()
-
-    # with spinner():
-    #     data = storage.get_info(detailed=True)
-
-    # echo.echo_dictionary(data, sort_keys=False, fmt='yaml')
-
-    # print(f"Profile name: {profile_name}")
-
-    # # TODO: export computers alone, and groups
-    # t1 = time.time()
-    # qb = orm.QueryBuilder().append(orm.Node, tag='node', project=['uuid'])
-    # all_uuids = qb.all(flat=True)
-    # print(f"All UUIDs retrieved in {time.time() - t1:6.3f} s.")
-
-    # t1 = time.time()
-    # with open('all-source-uuids.json', 'w') as fhandle:
-    #     json.dump({'profile_name': profile_name, 'uuids': all_uuids}, fhandle)
-    # print(f"{len(all_uuids)} UUIDs written in {time.time() - t1:6.3f} s.")
-
     if nodes and groups:
         echo.echo_critical('`nodes` and `groups` specified. Set only one.')
-    # if all_entries and groups:
-    #     echo.echo_critical('`all_entries` and `groups` specified. Set only one.')
 
     if dump_config_file is None:
         general_kwargs = {
             'path': path,
             'overwrite': overwrite,
-            'incremental': incremental,
+            # 'incremental': incremental,
             'dry_run': dry_run,
         }
 
@@ -379,19 +352,18 @@ def storage_mirror(
             'include_attributes': include_attributes,
             'include_extras': include_extras,
             'flat': flat,
-            # "calculations_hidden": calculations_hidden
         }
 
         datadumper_kwargs = {
             'also_raw': also_raw,
             'also_rich': also_rich,
-            'data_hidden': data_hidden,
         }
 
         collection_kwargs = {
             'should_dump_processes': dump_processes,
             'should_dump_data': dump_data,
             'only_top_level_workflows': only_top_level_workflows,
+            'organize_by_groups': organize_by_groups,
         }
 
         rich_kwargs = {
@@ -420,13 +392,10 @@ def storage_mirror(
     path = general_kwargs['path']
     overwrite = general_kwargs['overwrite']
     dry_run = general_kwargs['dry_run']
-    incremental = general_kwargs['incremental']
-
-    if not overwrite and incremental:
-        echo.echo_report('Overwrite set to false, but incremental dumping selected. Will keep existing directories.')
+    incremental = not overwrite
 
-    if not str(path).endswith(profile.name):
-        path /= profile.name
+    if path is None:
+        path = Path.cwd() / f'{profile.name}-mirror'
 
     # TODO: Implement proper dry-run feature
     dry_run_message = f"Dry run for dumping of profile `{profile.name}`'s data at path: `{path}`.\n"
@@ -439,7 +408,7 @@ def storage_mirror(
     else:
         echo.echo_report(f"Dumping of profile `{profile.name}`'s data at path: `{path}`.")
 
-    SAFEGUARD_FILE = '.verdi_storage_dump'  # noqa: N806
+    SAFEGUARD_FILE = '.verdi_profile_mirror'  # noqa: N806
 
     try:
         prepare_dump_path(
@@ -471,25 +440,15 @@ def storage_mirror(
     )
     # dumper_pretty_print(process_dumper)
 
-    from aiida.tools.dumping.incremental import DumpNodeCollector
-
-    dumpnodecollector = DumpNodeCollector(dump_parent_path=path)
-
-    dumpnodecollector.update_uuids_before_dump()
-    dumpnodecollector.create_organized_uuid_dicts()
-    # dumpnodecollector.populate_uuid_dict()
-
-    # raise SystemExit()
-
     # TODO: Possibly implement specifying specific computers
     # TODO: Although, users could just specify the relevant nodes
     # TODO: Also add option to specify node types via entry points
+    # TODO: Use `batch_iter` from aiida.tools.archive.common
 
     # === Dump the data that is not associated with any group ===
     if not groups:
         collection_dumper = CollectionDumper(
             dump_parent_path=path,
-            output_path=path,
             overwrite=overwrite,
             incremental=incremental,
             nodes=nodes,
@@ -505,13 +464,13 @@ def storage_mirror(
         if dump_processes and collection_dumper._should_dump_processes():
             echo.echo_report(f'Dumping processes not in any group for profile `{profile.name}`...')
             collection_dumper.dump_processes()
+
         if dump_data:
             if not also_rich and not also_raw:
                 echo.echo_critical('`--dump-data was given, but neither --also-raw or --also-rich specified.')
             echo.echo_report(f'Dumping data not in any group for profile {profile.name}...')
 
-            collection_dumper.dump_data_rich()
-        # collection_dumper.dump_plugin_data()
+            # collection_dumper.dump_data_rich()
 
     # === Dump data per-group if Groups exist in profile or are selected ===
     # TODO: Invert default behavior here, as I typically want to dump all entries
@@ -520,34 +479,29 @@ def storage_mirror(
     if not groups:  # and all_entries:
         groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
 
-    if groups is not None and not nodes:
-        for group in groups:
-            if organize_by_groups:
-                group_subdir = Path(*group.type_string.split('.'))
-                group_path = path / 'groups' / group_subdir / group.label
-            else:
-                group_path = path
-
-            collection_dumper = CollectionDumper(
-                dump_parent_path=path,
-                output_path=group_path,
-                overwrite=overwrite,
-                incremental=incremental,
-                group=group,
-                **collection_kwargs,
-                **rich_kwargs,
-                process_dumper=process_dumper,
-                data_dumper=data_dumper,
-            )
-            collection_dumper.create_entity_counter()
-            if dump_processes:
-                # The additional `_should_dump_processes` check here ensures that no reporting like
-                # "Dumping processes for group `SSSP/1.3/PBE/efficiency`" is printed for groups that
-                # don't contain processes
-                if collection_dumper._should_dump_processes():
-                    echo.echo_report(f'Dumping processes for group `{group.label}`...')
-                    collection_dumper.dump_processes()
-            if dump_data:
-                echo.echo_report(f'Dumping data for group `{group.label}`...')
-                collection_dumper.dump_data_rich()
-                # collection_dumper.dump_plugin_data()
\ No newline at end of file
+    if groups:
+        if not nodes:
+            for group in groups:
+                collection_dumper = CollectionDumper(
+                    dump_parent_path=path,
+                    overwrite=overwrite,
+                    incremental=incremental,
+                    group=group,
+                    **collection_kwargs,
+                    **rich_kwargs,
+                    process_dumper=process_dumper,
+                    data_dumper=data_dumper,
+                    deduplicate=deduplicate,
+                )
+
+                collection_dumper.create_entity_counter()
+                if dump_processes:
+                    # The additional `_should_dump_processes` check here ensures that no reporting like
+                    # "Dumping processes for group `SSSP/1.3/PBE/efficiency`" is printed for groups that
+                    # don't contain processes
+                    if collection_dumper._should_dump_processes():
+                        echo.echo_report(f'Dumping processes for group `{group.label}`...')
+                        collection_dumper.dump_processes()
+                if dump_data:
+                    echo.echo_report(f'Dumping data for group `{group.label}`...')
+                    collection_dumper.dump_data_rich()
diff --git a/src/aiida/cmdline/params/options/main.py b/src/aiida/cmdline/params/options/main.py
index 6f19a3c465..4d1c308c43 100644
--- a/src/aiida/cmdline/params/options/main.py
+++ b/src/aiida/cmdline/params/options/main.py
@@ -818,7 +818,7 @@ def set_log_level(ctx, _param, value):
 DUMP_DATA = OverridableOption(
     '--dump-data/--no-dump-data',
     is_flag=True,
-    default=True,
+    default=False,
     type=bool,
     show_default=True,
     help='Dump data nodes in a dedicated directory.',
@@ -833,7 +833,7 @@ def set_log_level(ctx, _param, value):
 )
 
 ALSO_RAW = OverridableOption(
-    '--also-raw/--not-also-raw',
+    '--also-raw/--no-also-raw',
     is_flag=True,
     default=False,
     show_default=True,
@@ -841,9 +841,9 @@ def set_log_level(ctx, _param, value):
 )
 
 ALSO_RICH = OverridableOption(
-    '--also-rich/--not-also-rich',
+    '--also-rich/--no-also-rich',
     is_flag=True,
-    default=True,
+    default=False,
     show_default=True,
     help='Dump also nicely prepared outputs, e.g. CIF for structures or PDF image for bands.',
 )
@@ -884,14 +884,14 @@ def set_log_level(ctx, _param, value):
     '--include-inputs/--exclude-inputs',
     default=True,
     show_default=True,
-    help='Include the linked input nodes of the `CalculationNode`(s).',
+    help='Include linked input nodes of `CalculationNode`(s).',
 )
 
 INCLUDE_OUTPUTS = OverridableOption(
     '--include-outputs/--exclude-outputs',
     default=False,
     show_default=True,
-    help='Include the linked output nodes of the `CalculationNode`(s).',
+    help='Include linked output nodes of `CalculationNode`(s).',
 )
 
 INCLUDE_ATTRIBUTES = OverridableOption(
@@ -917,7 +917,7 @@ def set_log_level(ctx, _param, value):
 )
 
 ONLY_TOP_LEVEL_WORKFLOWS = OverridableOption(
-    '--only-top-level-workflows/--not-only-top-level-workflows',
+    '--only-top-level-workflows/--no-only-top-level-workflows',
     is_flag=True,
     default=True,
     type=bool,
@@ -925,56 +925,6 @@ def set_log_level(ctx, _param, value):
     help='Dump only the top-level workflows in their own dedicated directories.',
 )
 
-DUMP_PROCESSES = OverridableOption(
-    '--dump-processes/--no-dump-processes',
-    is_flag=True,
-    default=True,
-    show_default=True,
-    help='Dump process data.',
-)
-
-DUMP_DATA = OverridableOption(
-    '--dump-data/--no-dump-data',
-    is_flag=True,
-    default=True,
-    type=bool,
-    show_default=True,
-    help='Dump data nodes in a dedicated directory.',
-)
-
-CALCULATIONS_HIDDEN = OverridableOption(
-    '--calculations-hidden/--calculations-non-hidden',
-    is_flag=True,
-    default=True,
-    type=bool,
-    show_default=True,
-    help='Dump all `orm.CalculationNode`s in the hidden directory and link to there.',
-)
-
-DATA_HIDDEN = OverridableOption(
-    '--data-hidden/--data-non-hidden',
-    is_flag=True,
-    default=True,
-    show_default=True,
-    help='Dump all `orm.Data` in the hidden directory and link to there.',
-)
-
-ALSO_RAW = OverridableOption(
-    '--also-raw/--not-also-raw',
-    is_flag=True,
-    default=False,
-    show_default=True,
-    help='Dump the `attributes` of all nodes related to the Process.',
-)
-
-ALSO_RICH = OverridableOption(
-    '--also-rich/--not-also-rich',
-    is_flag=True,
-    default=True,
-    show_default=True,
-    help='Dump also nicely prepared outputs, e.g. CIF for structures or PDF image for bands.',
-)
-
 INCREMENTAL = OverridableOption(
     '--incremental/--no-incremental',
     is_flag=True,
@@ -989,81 +939,3 @@ def set_log_level(ctx, _param, value):
     type=str,
     help='Specifications for rich data dumping.',
 )
-
-DUMP_CONFIG_FILE = OverridableOption(
-    '--dump-config-file',
-    default=None,
-    type=types.FileOrUrl(),
-    help='Provide dumping options via a config file in YAML format.',
-)
-
-RICH_DUMP_ALL = OverridableOption(
-    '--rich-dump-all/--no-rich-dump-all',
-    default=True,
-    is_flag=True,
-    type=bool,
-    show_default=True,
-    help='If a rich specification is provided, this triggers if all other Data nodes should also be dumped or not.',
-)
-
-ORGANIZE_BY_GROUPS = OverridableOption(
-    '--organize-by-groups/--no-organize-by-groups',
-    default=True,
-    is_flag=True,
-    type=bool,
-    show_default=True,
-    help='If the collection of nodes to be dumped is organized in groups, reproduce its hierarchy.',
-)
-
-INCLUDE_INPUTS = OverridableOption(
-    '--include-inputs/--exclude-inputs',
-    default=True,
-    show_default=True,
-    help='Include the linked input nodes of the `CalculationNode`(s).',
-)
-
-INCLUDE_OUTPUTS = OverridableOption(
-    '--include-outputs/--exclude-outputs',
-    default=False,
-    show_default=True,
-    help='Include the linked output nodes of the `CalculationNode`(s).',
-)
-
-INCLUDE_ATTRIBUTES = OverridableOption(
-    '--include-attributes/--exclude-attributes',
-    default=True,
-    show_default=True,
-    help='Include attributes in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
-)
-
-INCLUDE_EXTRAS = OverridableOption(
-    '--include-extras/--exclude-extras',
-    default=True,
-    show_default=True,
-    help='Include extras in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
-)
-
-FLAT = OverridableOption(
-    '-f',
-    '--flat',
-    is_flag=True,
-    default=False,
-    help='Dump files in a flat directory for every step of a workflow.',
-)
-
-ONLY_TOP_LEVEL_WORKFLOWS = OverridableOption(
-    '--only-top-level-workflows/--not-only-top-level-workflows',
-    is_flag=True,
-    default=True,
-    type=bool,
-    show_default=True,
-    help='Dump only the top-level workflows in their own dedicated directories.',
-)
-
-INCREMENTAL = OverridableOption(
-    '--incremental/--non-incremental',
-    is_flag=True,
-    default=True,
-    show_default=True,
-    help='Dump files incrementally when dumping collections of data to disk.',
-)
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index 169f5b3862..9cf0dccde6 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -10,17 +10,14 @@
 
 from __future__ import annotations
 
-import contextlib
 import itertools as it
 import logging
-import os
 from collections import Counter
 from pathlib import Path
 
 from aiida import orm
 from aiida.tools.dumping.data import DataDumper
 from aiida.tools.dumping.processes import ProcessDumper
-from aiida.tools.dumping.utils import sanitize_file_extension
 
 logger = logging.getLogger(__name__)
 
@@ -34,41 +31,46 @@
 class CollectionDumper:
     def __init__(
         self,
-        *args,
         dump_parent_path: Path = Path().cwd(),
-        output_path: Path = Path().cwd(),
+        nodes: set = {},
+        group: orm.Group | str | None = None,
         overwrite: bool = False,
         incremental: bool = True,
         should_dump_processes: bool = False,
         should_dump_data: bool = False,
         only_top_level_workflows: bool = True,
-        group: orm.Group | None = None,
-        nodes: set = {},
+        rich_dump_all: bool = True,
+        deduplicate: bool = True,
+        organize_by_groups: bool = True,
         process_dumper: ProcessDumper | None = None,
         data_dumper: DataDumper | None = None,
-        **kwargs,
     ):
-        self.args = args
         self.dump_parent_path = dump_parent_path
-        self.output_path = output_path
         self.overwrite = overwrite
         self.incremental = incremental
         self.should_dump_processes = should_dump_processes
         self.should_dump_data = should_dump_data
         self.only_top_level_workflows = only_top_level_workflows
         self.nodes = nodes
+        self.deduplicate = deduplicate
+
         self.process_dumper = process_dumper
         self.data_dumper = data_dumper
-        self.kwargs = kwargs
-
-        self.hidden_aiida_path = dump_parent_path / '.aiida-raw-data'
 
         # Allow passing of group via label
         if isinstance(group, str):
-            group = orm.Group.get(self.group)
+            group = orm.Group.get(group)
+
         self.group = group
 
-        self.output_path = output_path
+        if organize_by_groups:
+            if group is not None:
+                group_subdir = Path(*group.type_string.split('.'))
+                self.output_path = self.dump_parent_path / 'groups' / group_subdir / self.group.label
+            else:
+                self.output_path = self.dump_parent_path / 'no-group'
+        else:
+            self.output_path = self.dump_parent_path
 
         if not hasattr(self, 'entity_counter'):
             self.create_entity_counter()
@@ -149,31 +151,27 @@ def _should_dump_processes(self) -> bool:
         else:
             return len([node for node in self.nodes if isinstance(node, orm.ProcessNode)]) > 0
 
-    def _dump_calculations_hidden(self, calculations):
-        # ? Dump only top-level workchains, as that includes sub-workchains already
-
+    def _dump_calculations(self, calculations):
         for calculation in calculations:
             calculation_dumper = self.process_dumper
 
-            calculation_dump_path = self.hidden_aiida_path / 'calculations' / calculation.uuid
+            calculation_dump_path = (
+                self.output_path
+                / 'calculations'
+                / calculation_dumper._generate_default_dump_path(process_node=calculation, prefix='')
+            )
 
-            # if not self.dry_run:
-            # with contextlib.suppress(FileExistsError):
-            try:
+            if calculation.caller is None or (calculation.caller is not None and self.deduplicate):
                 calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
-            except:
-                raise
-
-            # # To make development quicker
-            # if iworkflow_ > 1:
-            #     break
 
-    def _dump_link_workflows(self, workflows, link_calculations: bool = True):
+    def _dump_workflows(self, workflows):
         # workflow_nodes = get_nodes_from_db(aiida_node_type=orm.WorkflowNode, with_group=self.group, flat=True)
         for workflow in workflows:
+            # if workflow.pk == 47:
+            #     breakpoint()
+
             workflow_dumper = self.process_dumper
 
-            link_calculations_dir = self.hidden_aiida_path / 'calculations'
             # TODO: If the GroupDumper is called from somewhere else outside, prefix the path with `groups/core` etc
             workflow_dump_path = (
                 self.output_path
@@ -181,36 +179,14 @@ def _dump_link_workflows(self, workflows, link_calculations: bool = True):
                 / workflow_dumper._generate_default_dump_path(process_node=workflow, prefix=None)
             )
             # logger.report(f'WORKFLOW_DUMP_PATH: {workflow_dump_path}')
-
             workflow_dumper._dump_workflow(
                 workflow_node=workflow,
                 output_path=workflow_dump_path,
-                link_calculations=link_calculations,
-                link_calculations_dir=link_calculations_dir,
+                link_calculations=self.deduplicate,
+                link_calculations_dir=self.output_path / 'calculations',
             )
 
-    def _link_calculations_hidden(self, calculations):
-        # calculation_nodes = get_nodes_from_db(aiida_node_type=orm.CalculationNode, with_group=self.group, flat=True)
-        for calculation_node in calculations:
-            calculation_dumper = self.process_dumper
-
-            link_calculations_dir = self.hidden_aiida_path / 'calculations'
-
-            calculation_dump_path = self.output_path / 'calculations'
-            calculation_dump_path.mkdir(parents=True, exist_ok=True)
-            calculation_dump_path = calculation_dump_path / calculation_dumper._generate_default_dump_path(
-                process_node=calculation_node
-            )
-
-            with contextlib.suppress(FileExistsError):
-                os.symlink(link_calculations_dir / calculation_node.uuid, calculation_dump_path)
-
     def dump_processes(self):
-        # ? Here, these could be all kinds of entities that could be grouped in AiiDA
-        # if len(self.entities_to_dump) > 0:
-        #     pass
-        #     # nodes = self.entities_to_dump
-        # else:
         nodes = self.get_collection_nodes()
         workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
 
@@ -232,107 +208,72 @@ def dump_processes(self):
 
         self.output_path.mkdir(exist_ok=True, parents=True)
 
-        print(f'self.process_dumper.calculations_hidden: {self.process_dumper.calculations_hidden}')
-        print(f'self.output_path: {self.output_path}')
-        if self.process_dumper.calculations_hidden:
-            print('dump hidden')
-            self._dump_calculations_hidden(calculations=calculations)
-            self._dump_link_workflows(workflows=workflows)
-            self._link_calculations_hidden(calculations=calculations)
-        else:
-            print('dump non-hidden')
-            for workflow in workflows:
-                workflow_path = (
-                    self.output_path
-                    / 'workflows'
-                    / self.process_dumper._generate_default_dump_path(process_node=workflow)
-                )
-                self.process_dumper.dump(process_node=workflow, output_path=workflow_path)
+        self._dump_calculations(calculations=calculations)
+        self._dump_workflows(workflows=workflows)
 
     # TODO: Add `dump_data_raw` here, as well
-    def dump_data_rich(self):
-        nodes = self.get_collection_nodes()
-        nodes = [node for node in nodes if isinstance(node, (orm.Data, orm.Computer))]
-        # Here, when providing logic to set the exporters and fileformat via the rich-options, don't have to filter
-        # anymore for `core`
-        nodes = [node for node in nodes if node.entry_point.name.startswith('core')]
-        if len(nodes) == 0:
-            return
-
-        self.output_path.mkdir(exist_ok=True, parents=True)
-        data_dumper = self.data_dumper
-
-        for data_node in nodes:
-            node_entry_point_name = data_node.entry_point.name
-
-            # Get the fileformat and exporter for the data node
-            try:
-                fileformat = data_dumper.rich_spec_dict[node_entry_point_name]['export_format']
-                exporter = data_dumper.rich_spec_dict[node_entry_point_name]['exporter']
-
-            # If options for the rich dumping are specified and not all the other defaults are being used
-            # Some entry_points might not be inside the `rich_spec_dict`
-            except KeyError:
-                continue
-
-            except:
-                # Raise all exceptions here during development
-                raise
-
-            # Don't go further if no importer implemented for a data type anyway
-            if exporter is None:
-                continue
-
-            try:
-                # Generate a nice filename and sanitize it
-                nice_output_path = self.output_path / 'data' / data_node.__class__.__name__.lower()
-                nice_fname = data_dumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat).replace(
-                    '__', '_'
-                )
-                nice_fname = sanitize_file_extension(nice_fname)
-
-                if data_dumper.data_hidden:
-                    # Define paths for hidden dump and linking
-                    hidden_output_path = self.hidden_aiida_path / 'data' / data_node.__class__.__name__.lower()
-                    uuid_fname = sanitize_file_extension(f'{data_node.uuid}.{fileformat}')
-
-                    # Dump the data in the hidden directory
-                    data_dumper.dump_core_data_node_rich(data_node, hidden_output_path, uuid_fname)
-
-                    # Link the hidden file to the expected output path
-                    (nice_output_path / nice_fname).parent.mkdir(exist_ok=True, parents=True)
-                    os.symlink(hidden_output_path / uuid_fname, nice_output_path / nice_fname)
-
-                else:
-                    # Dump the data in the non-hidden directory
-                    data_dumper.dump_core_data_node_rich(data_node, nice_output_path, nice_fname)
-
-            except TypeError:
-                # Handle case when no exporter is implemented for a given data_node type
-                raise
-            except OSError:
-                # A Data node, e.g. a Code might already be existent, so don't worry about this exception
-                continue
-            except Exception:
-                raise
-
-    def dump_plugin_data(self):
-        return
-        # from importlib.metadata import entry_points
-
-        # plugin_data_entry_points = [entry_point.name for entry_point in entry_points(group='aiida.data')]
-        # # print(plugin_data_entry_points)
-        # # print(self.entity_counter)
-        # from aiida.manage.manager import get_manager
-
-        # manager = get_manager()
-        # storage = manager.get_profile_storage()
-        # orm_entities = storage.get_orm_entities(detailed=True)['Nodes']['node_types']
-        # non_core_data_entities = [
-        #     orm_entity
-        #     for orm_entity in orm_entities
-        #     if orm_entity.startswith('data') and not orm_entity.startswith('data.core')
-        # ]
-        # # TODO: Implement dumping here. Stashed for now, as both `HubbardStructureData` and `UpfData` I wanted to use
-        # # TODO: for testing don't implement `export` either way
-        # # print(non_core_data_entities)
+    # def dump_data_rich(self):
+    #     nodes = self.get_collection_nodes()
+    #     nodes = [node for node in nodes if isinstance(node, (orm.Data, orm.Computer))]
+    #     # Here, when providing logic to set the exporters and fileformat via the rich-options, don't have to filter
+    #     # anymore for `core`
+    #     nodes = [node for node in nodes if node.entry_point.name.startswith('core')]
+    #     if len(nodes) == 0:
+    #         return
+
+    #     self.output_path.mkdir(exist_ok=True, parents=True)
+    #     data_dumper = self.data_dumper
+
+    #     for data_node in nodes:
+    #         node_entry_point_name = data_node.entry_point.name
+
+    #         # Get the fileformat and exporter for the data node
+    #         try:
+    #             fileformat = data_dumper.rich_spec_dict[node_entry_point_name]['export_format']
+    #             exporter = data_dumper.rich_spec_dict[node_entry_point_name]['exporter']
+
+    #         # If options for the rich dumping are specified and not all the other defaults are being used
+    #         # Some entry_points might not be inside the `rich_spec_dict`
+    #         except KeyError:
+    #             continue
+
+    #         except:
+    #             # Raise all exceptions here during development
+    #             raise
+
+    #         # Don't go further if no importer implemented for a data type anyway
+    #         if exporter is None:
+    #             continue
+
+    #         try:
+    #             # Generate a nice filename and sanitize it
+    #             nice_output_path = self.output_path / 'data' / data_node.__class__.__name__.lower()
+    #             nice_fname = data_dumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat).replace(
+    #                 '__', '_'
+    #             )
+    #             nice_fname = sanitize_file_extension(nice_fname)
+
+    #             if data_dumper.data_hidden:
+    #                 # Define paths for hidden dump and linking
+    #                 hidden_output_path = self.hidden_aiida_path / 'data' / data_node.__class__.__name__.lower()
+    #                 uuid_fname = sanitize_file_extension(f'{data_node.uuid}.{fileformat}')
+
+    #                 # Dump the data in the hidden directory
+    #                 data_dumper.dump_core_data_node_rich(data_node, hidden_output_path, uuid_fname)
+
+    #                 # Link the hidden file to the expected output path
+    #                 (nice_output_path / nice_fname).parent.mkdir(exist_ok=True, parents=True)
+    #                 os.symlink(hidden_output_path / uuid_fname, nice_output_path / nice_fname)
+
+    #             else:
+    #                 # Dump the data in the non-hidden directory
+    #                 data_dumper.dump_core_data_node_rich(data_node, nice_output_path, nice_fname)
+
+    #         except TypeError:
+    #             # Handle case when no exporter is implemented for a given data_node type
+    #             raise
+    #         except OSError:
+    #             # A Data node, e.g. a Code might already be existent, so don't worry about this exception
+    #             continue
+    #         except Exception:
+    #             raise
diff --git a/src/aiida/tools/dumping/parser.py b/src/aiida/tools/dumping/parser.py
index cc19b0f141..39288929e6 100644
--- a/src/aiida/tools/dumping/parser.py
+++ b/src/aiida/tools/dumping/parser.py
@@ -25,13 +25,11 @@ def parse_config_file(config_file: str | Path | None) -> dict:
             'include_attributes': config.get('include_attributes', True),
             'include_extras': config.get('include_extras', False),
             'flat': config.get('flat', False),
-            'calculations_hidden': config.get('calculations_hidden', True),
         }
 
         datadumper_kwargs = {
             'also_raw': config.get('also_raw', False),
             'also_rich': config.get('also_rich', True),
-            'data_hidden': config.get('data_hidden', True),
         }
 
         collection_kwargs = {
diff --git a/src/aiida/tools/dumping/processes.py b/src/aiida/tools/dumping/processes.py
index 29fbef07c9..fcc8671ff6 100644
--- a/src/aiida/tools/dumping/processes.py
+++ b/src/aiida/tools/dumping/processes.py
@@ -58,7 +58,6 @@ def __init__(
         overwrite: bool = False,
         incremental: bool = True,
         flat: bool = False,
-        calculations_hidden: bool = True,
         include_inputs: bool = True,
         include_outputs: bool = False,
         include_attributes: bool = True,
@@ -66,7 +65,7 @@ def __init__(
         rich_options: str = '',
         rich_config_file: Path | None = None,
         rich_dump_all: bool = True,
-        data_dumper: DataDumper | None = DataDumper(),
+        data_dumper: DataDumper = DataDumper(),
         dump_unsealed: bool = False,
         **kwargs,
     ) -> None:
@@ -86,8 +85,6 @@ def __init__(
         self.kwargs = kwargs
         self.dump_unsealed = dump_unsealed
 
-        self.hidden_aiida_path = dump_parent_path / '.aiida-raw-data'
-
     @staticmethod
     def _generate_default_dump_path(process_node: ProcessNode, prefix: str = 'dump') -> Path:
         """Simple helper function to generate the default parent-dumping directory if none given.
@@ -99,13 +96,21 @@ def _generate_default_dump_path(process_node: ProcessNode, prefix: str = 'dump')
         :return: The absolute default parent dump path.
         """
 
-        pk = process_node.pk
-        # TODO: Use UUID[:8] here
+        entities_to_dump = []
+
+        if prefix:
+            # No '' and None
+            entities_to_dump += [prefix]
+
         try:
-            return Path(f'{prefix}-{process_node.process_label}-{pk}')
+            entities_to_dump += [process_node.process_label]
         except AttributeError:
             # This case came up during testing, not sure how relevant it actually is
-            return Path(f'{prefix}-{process_node.process_type}-{pk}')
+            entities_to_dump += [process_node.process_type]
+
+        entities_to_dump += [str(process_node.pk)]
+
+        return Path('-'.join(entities_to_dump))
 
     @staticmethod
     def _generate_readme(process_node: ProcessNode, output_path: Path) -> None:
@@ -171,7 +176,7 @@ def _generate_readme(process_node: ProcessNode, output_path: Path) -> None:
         (output_path / 'README.md').write_text(_readme_string)
 
     @staticmethod
-    def _generate_child_node_label(index: int, link_triple: LinkTriple) -> str:
+    def _generate_child_node_label(index: int, link_triple: LinkTriple, append_pk: bool = True) -> str:
         """Small helper function to generate and clean directory label for child nodes during recursion.
 
         :param index: Index assigned to step at current level of recursion.
@@ -194,6 +199,9 @@ def _generate_child_node_label(index: int, link_triple: LinkTriple) -> str:
             if process_type is not None and process_type != link_label:
                 label_list += [process_type]
 
+        if append_pk:
+            label_list += [str(node.pk)]
+
         node_label = '-'.join(label_list)
         # `CALL-` as part of the link labels also for MultiplyAddWorkChain -> Seems general enough, so remove
         node_label = node_label.replace('CALL-', '')
@@ -260,7 +268,7 @@ def _dump_workflow(
         output_path: Path,
         io_dump_paths: List[str | Path] | None = None,
         link_calculations: bool = False,
-        link_calculations_dir: str | None = None,
+        link_calculations_dir: Path | None = None,
     ) -> None:
         """Recursive function to traverse a `WorkflowNode` and dump its `CalculationNode` s.
 
@@ -302,7 +310,10 @@ def _dump_workflow(
                     )
                 else:
                     try:
-                        os.symlink(link_calculations_dir / child_node.uuid, child_output_path)
+                        calculation_dump_path = link_calculations_dir / ProcessDumper._generate_default_dump_path(
+                            process_node=child_node, prefix=''
+                        )
+                        os.symlink(calculation_dump_path, child_output_path)
                     except FileExistsError:
                         pass
 
@@ -354,22 +365,19 @@ def _dump_calculation(
                     output_path=output_path / io_dump_mapping.inputs, link_triples=input_links
                 )
 
-
             if self.data_dumper.also_rich:
                 rich_data_output_path = output_path / io_dump_mapping.inputs
-            #     if not self.data_dumper.data_hidden:
-            #         rich_data_output_path = output_path / io_dump_mapping.inputs
-            #     else:
-            #         # TODO: Currently, when dumping only one selected workflow, if rich dumping is activated, but
-            #         # TODO: `data-hidden` is set, no data nodes were actually being dumped
-            #         # TODO: With the current implementation below, they are dumped, but not in the same structure as for the
-            #         # TODO: `dump_rich_core` function. Quick fix for now
-            #         pass
+                #     if not self.data_dumper.data_hidden:
+                #         rich_data_output_path = output_path / io_dump_mapping.inputs
+                #     else:
+                #         # TODO: Currently, when dumping only one selected workflow, if rich dumping is activated, but
+                #         # TODO: `data-hidden` is set, no data nodes were actually being dumped
+                #         # TODO: With the current implementation below, they are dumped, but not in the same structure as for the
+                #         # TODO: `dump_rich_core` function. Quick fix for now
+                #         pass
 
                 # Only dump the rich data output files in the process directories if data_hidden is False
-                self._dump_calculation_io_files_rich(
-                    output_path=rich_data_output_path, link_triples=input_links
-                )
+                self._dump_calculation_io_files_rich(output_path=rich_data_output_path, link_triples=input_links)
         # Dump the node_outputs apart from `retrieved`
         if self.include_outputs:
             output_links = list(calculation_node.base.links.get_outgoing(link_type=LinkType.CREATE))
diff --git a/src/aiida/tools/dumping/test-config-file.yaml b/src/aiida/tools/dumping/test-config-file.yaml
index b868f8afcb..63bbe38180 100644
--- a/src/aiida/tools/dumping/test-config-file.yaml
+++ b/src/aiida/tools/dumping/test-config-file.yaml
@@ -6,7 +6,6 @@ organize_by_groups: true
 dump_processes: true
 only_top_level_workflows: true
 dump_data: true
-calculations_hidden: true
 data_hidden: true
 also_raw: false
 also_rich: true

From 17f2730ec87030896f3bf3fcb12708f68381a2d7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 23 Jan 2025 16:08:52 +0000
Subject: [PATCH 04/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/cmdline/commands/cmd_process.py | 34 +++++++++++------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/aiida/cmdline/commands/cmd_process.py b/src/aiida/cmdline/commands/cmd_process.py
index ed0b3ccdd0..842232fc8e 100644
--- a/src/aiida/cmdline/commands/cmd_process.py
+++ b/src/aiida/cmdline/commands/cmd_process.py
@@ -558,7 +558,7 @@ def process_repair(manager, broker, dry_run):
             echo.echo_report(f'Revived process `{pid}`')
 
 
-@verdi_process.command("dump")
+@verdi_process.command('dump')
 @arguments.PROCESS()
 @options.PATH()
 @options.OVERWRITE()
@@ -613,30 +613,30 @@ def process_dump(
     node data for further inspection.
     """
 
+    from aiida.tools.archive.exceptions import ExportValidationError
     from aiida.tools.dumping.data import DataDumper
     from aiida.tools.dumping.processes import ProcessDumper
-    from aiida.tools.archive.exceptions import ExportValidationError
 
     # from aiida.tools.dumping.utils import validate_rich_options
     from aiida.tools.dumping.rich import rich_from_cli
 
     processdumper_kwargs = {
-        "include_inputs": include_inputs,
-        "include_outputs": include_outputs,
-        "include_attributes": include_attributes,
-        "include_extras": include_extras,
-        "flat": flat,
-        "dump_unsealed": dump_unsealed,
-        "incremental": incremental,
+        'include_inputs': include_inputs,
+        'include_outputs': include_outputs,
+        'include_attributes': include_attributes,
+        'include_extras': include_extras,
+        'flat': flat,
+        'dump_unsealed': dump_unsealed,
+        'incremental': incremental,
     }
 
     rich_kwargs = {
-        "rich_dump_all": rich_dump_all,
+        'rich_dump_all': rich_dump_all,
     }
 
     datadumper_kwargs = {
-        "also_raw": also_raw,
-        "also_rich": also_rich,
+        'also_raw': also_raw,
+        'also_rich': also_rich,
     }
 
     # if also_rich:
@@ -672,15 +672,13 @@ def process_dump(
             output_path=path,
         )
         echo.echo_success(
-            f"Raw files for {process.__class__.__name__} <{process.pk}> dumped into folder `{dump_path}`."
+            f'Raw files for {process.__class__.__name__} <{process.pk}> dumped into folder `{dump_path}`.'
         )
     except FileExistsError:
         echo.echo_critical(
-            "Dumping directory exists and overwrite is False. Set overwrite to True, or delete directory manually."
+            'Dumping directory exists and overwrite is False. Set overwrite to True, or delete directory manually.'
         )
     except ExportValidationError as e:
-        echo.echo_critical(f"{e!s}")
+        echo.echo_critical(f'{e!s}')
     except Exception as e:
-        echo.echo_critical(
-            f"Unexpected error while dumping {process.__class__.__name__} <{process.pk}>:\n ({e!s})."
-        )
+        echo.echo_critical(f'Unexpected error while dumping {process.__class__.__name__} <{process.pk}>:\n ({e!s}).')

From 0d37e59ec4d0b68890d5a0215de1fd82ddcce85d Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Mon, 27 Jan 2025 13:31:28 +0100
Subject: [PATCH 05/27] Major code refactor

Add `BaseDumper`, `ProfileDumper` and `CollecionDumper` -> `GroupDumper`
Remove code related to data and rich dumping
---
 src/aiida/cmdline/commands/cmd_process.py     |  37 +--
 src/aiida/cmdline/commands/cmd_profile.py     | 196 +++---------
 src/aiida/cmdline/params/options/main.py      |  10 -
 src/aiida/tools/dumping/__init__.py           |  10 +-
 src/aiida/tools/dumping/base.py               |  25 ++
 src/aiida/tools/dumping/data.py               | 292 ------------------
 .../tools/dumping/{collection.py => group.py} | 159 ++--------
 src/aiida/tools/dumping/parser.py             |   1 -
 .../dumping/{processes.py => process.py}      | 202 ++----------
 src/aiida/tools/dumping/profile.py            | 102 ++++++
 src/aiida/tools/dumping/test-config-file.yaml |   1 -
 tests/tools/dumping/test_processes.py         |   2 +-
 12 files changed, 240 insertions(+), 797 deletions(-)
 create mode 100644 src/aiida/tools/dumping/base.py
 delete mode 100644 src/aiida/tools/dumping/data.py
 rename src/aiida/tools/dumping/{collection.py => group.py} (52%)
 rename src/aiida/tools/dumping/{processes.py => process.py} (71%)
 create mode 100644 src/aiida/tools/dumping/profile.py

diff --git a/src/aiida/cmdline/commands/cmd_process.py b/src/aiida/cmdline/commands/cmd_process.py
index 842232fc8e..30e75a4295 100644
--- a/src/aiida/cmdline/commands/cmd_process.py
+++ b/src/aiida/cmdline/commands/cmd_process.py
@@ -614,11 +614,7 @@ def process_dump(
     """
 
     from aiida.tools.archive.exceptions import ExportValidationError
-    from aiida.tools.dumping.data import DataDumper
-    from aiida.tools.dumping.processes import ProcessDumper
-
-    # from aiida.tools.dumping.utils import validate_rich_options
-    from aiida.tools.dumping.rich import rich_from_cli
+    from aiida.tools.dumping.process import ProcessDumper
 
     processdumper_kwargs = {
         'include_inputs': include_inputs,
@@ -630,40 +626,9 @@ def process_dump(
         'incremental': incremental,
     }
 
-    rich_kwargs = {
-        'rich_dump_all': rich_dump_all,
-    }
-
-    datadumper_kwargs = {
-        'also_raw': also_raw,
-        'also_rich': also_rich,
-    }
-
-    # if also_rich:
-    #     try:
-    #         validate_rich_options(
-    #             rich_options=rich_options, rich_config_file=rich_config_file
-    #         )
-    #     except ValueError as exc:
-    #         echo.echo_critical(f"{exc!s}")
-
-    if rich_spec is not None:
-        rich_spec_dict = rich_from_cli(rich_spec=rich_spec, **rich_kwargs)
-    else:
-        rich_spec_dict = {}
-
-    data_dumper = DataDumper(
-        overwrite=overwrite,
-        rich_spec_dict=rich_spec_dict,
-        **datadumper_kwargs,
-        **rich_kwargs,
-    )
-
     process_dumper = ProcessDumper(
         overwrite=overwrite,
         **processdumper_kwargs,
-        **rich_kwargs,
-        data_dumper=data_dumper,
     )
 
     try:
diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 48fc6e98a3..2c16566672 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -19,7 +19,7 @@
 from aiida.cmdline.utils import defaults, echo
 from aiida.common import exceptions
 from aiida.manage.configuration import Profile, create_profile, get_config
-from aiida.tools.dumping import CollectionDumper, DataDumper, ProcessDumper
+from aiida.tools.dumping import GroupDumper, ProfileDumper, ProcessDumper
 
 
 @verdi.group('profile')
@@ -272,29 +272,20 @@ def profile_delete(force, delete_data, profiles):
         echo.echo_success(f'Profile `{profile.name}` was deleted.')
 
 
-# ? Specify groups via giving the groups, or just enabling "groups" and then all are dumped?
-# ? Provide some mechanism to allow for both, e.g. if no argument is provided, all groups are dumped
 @verdi_profile.command('mirror')
 @options.PATH()
 @options.OVERWRITE()
 # @options.INCREMENTAL()
 @options.DUMP_PROCESSES()
-@options.DUMP_DATA()
 @options.DEDUPLICATE()
 @options.INCLUDE_INPUTS()
 @options.INCLUDE_OUTPUTS()
 @options.INCLUDE_ATTRIBUTES()
 @options.INCLUDE_EXTRAS()
 @options.FLAT()
-@options.ALSO_RAW()
-@options.ALSO_RICH()
-@options.RICH_SPEC()
-@options.RICH_DUMP_ALL()
 @options.DUMP_CONFIG_FILE()
-@options.NODES()
 @options.GROUPS()
 @options.ORGANIZE_BY_GROUPS()
-@options.ONLY_TOP_LEVEL_WORKFLOWS()
 @options.DRY_RUN()
 @click.pass_context
 def profile_mirror(
@@ -304,94 +295,41 @@ def profile_mirror(
     organize_by_groups,
     dry_run,
     dump_processes,
-    only_top_level_workflows,
-    dump_data,
     deduplicate,
-    also_raw,
-    also_rich,
     include_inputs,
     include_outputs,
     include_attributes,
     include_extras,
     flat,
-    rich_spec,
-    rich_dump_all,
     dump_config_file,
-    nodes,
     groups,
 ):
     """Dump all data in an AiiDA profile's storage to disk."""
 
     from pathlib import Path
+    from datetime import datetime
 
     from aiida import orm
     from aiida.tools.dumping.parser import DumpConfigParser
-    from aiida.tools.dumping.rich import (
-        DEFAULT_CORE_EXPORT_MAPPING,
-        rich_from_cli,
-        rich_from_config,
-    )
     from aiida.tools.dumping.utils import prepare_dump_path
+    from aiida.tools.dumping.base import BaseDumper
 
     profile = ctx.obj['profile']
 
-    if nodes and groups:
-        echo.echo_critical('`nodes` and `groups` specified. Set only one.')
-
-    if dump_config_file is None:
-        general_kwargs = {
-            'path': path,
-            'overwrite': overwrite,
-            # 'incremental': incremental,
-            'dry_run': dry_run,
-        }
-
-        processdumper_kwargs = {
-            'include_inputs': include_inputs,
-            'include_outputs': include_outputs,
-            'include_attributes': include_attributes,
-            'include_extras': include_extras,
-            'flat': flat,
-        }
-
-        datadumper_kwargs = {
-            'also_raw': also_raw,
-            'also_rich': also_rich,
-        }
-
-        collection_kwargs = {
-            'should_dump_processes': dump_processes,
-            'should_dump_data': dump_data,
-            'only_top_level_workflows': only_top_level_workflows,
-            'organize_by_groups': organize_by_groups,
-        }
-
-        rich_kwargs = {
-            'rich_dump_all': rich_dump_all,
-        }
-
-        if rich_spec is not None:
-            rich_spec_dict = rich_from_cli(rich_spec=rich_spec, **rich_kwargs)
-        else:
-            rich_spec_dict = DEFAULT_CORE_EXPORT_MAPPING
+    # if nodes and groups:
+    #     echo.echo_critical('`nodes` and `groups` specified. Set only one.')
 
-    # TODO: Also allow for mixing. Currently one can _only_ specify either the config file, or the arguments on the
-    # TODO: command line
-    else:
-        kwarg_dicts_from_config = DumpConfigParser.parse_config_file(dump_config_file)
+    # if dump_config_file is None:
 
-        general_kwargs = kwarg_dicts_from_config['general_kwargs']
-        processdumper_kwargs = kwarg_dicts_from_config['processdumper_kwargs']
-        datadumper_kwargs = kwarg_dicts_from_config['datadumper_kwargs']
-        collection_kwargs = kwarg_dicts_from_config['collection_kwargs']
-        rich_kwargs = kwarg_dicts_from_config['rich_kwargs']
+    # # TODO: Also allow for mixing. Currently one can _only_ specify either the config file, or the arguments on the
+    # # TODO: command line
+    # else:
+    #     kwarg_dicts_from_config = DumpConfigParser.parse_config_file(dump_config_file)
 
-        rich_spec_dict = rich_from_config(kwarg_dicts_from_config['rich_spec'], **rich_kwargs)
+    #     general_kwargs = kwarg_dicts_from_config['general_kwargs']
+    #     processdumper_kwargs = kwarg_dicts_from_config['processdumper_kwargs']
+    #     datadumper_kwargs = kwarg_dicts_from_config['datadumper_kwargs']
 
-    # Obtain these specifically for easy access and modifications
-    path = general_kwargs['path']
-    overwrite = general_kwargs['overwrite']
-    dry_run = general_kwargs['dry_run']
     incremental = not overwrite
 
     if path is None:
@@ -401,14 +339,15 @@ def profile_mirror(
     dry_run_message = f"Dry run for dumping of profile `{profile.name}`'s data at path: `{path}`.\n"
     dry_run_message += 'Only directories will be created.'
 
-    if dry_run or (not collection_kwargs['should_dump_processes'] and not collection_kwargs['should_dump_data']):
+    if dry_run:
         echo.echo_report(dry_run_message)
         return
 
     else:
         echo.echo_report(f"Dumping of profile `{profile.name}`'s data at path: `{path}`.")
 
-    SAFEGUARD_FILE = '.verdi_profile_mirror'  # noqa: N806
+    SAFEGUARD_FILE: str = '.verdi_profile_mirror'
+    safeguard_file_path: Path = path / SAFEGUARD_FILE
 
     try:
         prepare_dump_path(
@@ -420,88 +359,41 @@ def profile_mirror(
     except FileExistsError as exc:
         echo.echo_critical(str(exc))
 
-    (path / SAFEGUARD_FILE).touch()
+    try:
+        with safeguard_file_path.open("r") as fhandle:
+            last_dump_time = datetime.fromisoformat(fhandle.readlines()[-1].strip().split()[-1]).astimezone()
+    except IndexError:
+        last_dump_time = None
 
-    data_dumper = DataDumper(
+    base_dumper = BaseDumper(
         dump_parent_path=path,
         overwrite=overwrite,
         incremental=incremental,
-        rich_spec_dict=rich_spec_dict,
-        **datadumper_kwargs,
+        last_dump_time=last_dump_time,
     )
-    # dumper_pretty_print(data_dumper)
 
     process_dumper = ProcessDumper(
-        dump_parent_path=path,
-        overwrite=overwrite,
-        incremental=incremental,
-        data_dumper=data_dumper,
-        **processdumper_kwargs,
+        base=base_dumper,
+        include_inputs= include_inputs,
+        include_outputs= include_outputs,
+        include_attributes= include_attributes,
+        include_extras=include_extras,
+        flat=flat,
     )
-    # dumper_pretty_print(process_dumper)
 
-    # TODO: Possibly implement specifying specific computers
-    # TODO: Although, users could just specify the relevant nodes
-    # TODO: Also add option to specify node types via entry points
-    # TODO: Use `batch_iter` from aiida.tools.archive.common
+    profile_dumper = ProfileDumper(
+        base_dumper=base_dumper,
+        process_dumper=process_dumper,
+        groups=groups,
+        organize_by_groups=organize_by_groups,
+        deduplicate=deduplicate,
+        profile=profile,
+        dump_processes=dump_processes,
+    )
 
-    # === Dump the data that is not associated with any group ===
-    if not groups:
-        collection_dumper = CollectionDumper(
-            dump_parent_path=path,
-            overwrite=overwrite,
-            incremental=incremental,
-            nodes=nodes,
-            **collection_kwargs,
-            **rich_kwargs,
-            data_dumper=data_dumper,
-            process_dumper=process_dumper,
-            deduplicate=deduplicate,
-        )
-        collection_dumper.create_entity_counter()
-        # dumper_pretty_print(collection_dumper, include_private_and_dunder=False)
-
-        if dump_processes and collection_dumper._should_dump_processes():
-            echo.echo_report(f'Dumping processes not in any group for profile `{profile.name}`...')
-            collection_dumper.dump_processes()
-
-        if dump_data:
-            if not also_rich and not also_raw:
-                echo.echo_critical('`--dump-data was given, but neither --also-raw or --also-rich specified.')
-            echo.echo_report(f'Dumping data not in any group for profile {profile.name}...')
-
-            # collection_dumper.dump_data_rich()
-
-    # === Dump data per-group if Groups exist in profile or are selected ===
-    # TODO: Invert default behavior here, as I typically want to dump all entries
-    # TODO: Possibly define a new click option instead
-    # all_entries = not all_entries
-    if not groups:  # and all_entries:
-        groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
-
-    if groups:
-        if not nodes:
-            for group in groups:
-                collection_dumper = CollectionDumper(
-                    dump_parent_path=path,
-                    overwrite=overwrite,
-                    incremental=incremental,
-                    group=group,
-                    **collection_kwargs,
-                    **rich_kwargs,
-                    process_dumper=process_dumper,
-                    data_dumper=data_dumper,
-                    deduplicate=deduplicate,
-                )
-
-                collection_dumper.create_entity_counter()
-                if dump_processes:
-                    # The additional `_should_dump_processes` check here ensures that no reporting like
-                    # "Dumping processes for group `SSSP/1.3/PBE/efficiency`" is printed for groups that
-                    # don't contain processes
-                    if collection_dumper._should_dump_processes():
-                        echo.echo_report(f'Dumping processes for group `{group.label}`...')
-                        collection_dumper.dump_processes()
-                if dump_data:
-                    echo.echo_report(f'Dumping data for group `{group.label}`...')
-                    collection_dumper.dump_data_rich()
+    profile_dumper.dump()
+
+    # Append the current time to the file
+    last_dump_time = datetime.now().astimezone().isoformat()
+    with safeguard_file_path.open("a") as fhandle:
+        fhandle.write(f"Last profile mirror time: {last_dump_time}\n")
diff --git a/src/aiida/cmdline/params/options/main.py b/src/aiida/cmdline/params/options/main.py
index 4d1c308c43..e7a18eedc1 100644
--- a/src/aiida/cmdline/params/options/main.py
+++ b/src/aiida/cmdline/params/options/main.py
@@ -90,7 +90,6 @@
     'NODES',
     'NON_INTERACTIVE',
     'OLDER_THAN',
-    'ONLY_TOP_LEVEL_WORKFLOWS',
     'ORDER_BY',
     'ORDER_DIRECTION',
     'ORGANIZE_BY_GROUPS',
@@ -916,15 +915,6 @@ def set_log_level(ctx, _param, value):
     help='Dump files in a flat directory for every step of a workflow.',
 )
 
-ONLY_TOP_LEVEL_WORKFLOWS = OverridableOption(
-    '--only-top-level-workflows/--no-only-top-level-workflows',
-    is_flag=True,
-    default=True,
-    type=bool,
-    show_default=True,
-    help='Dump only the top-level workflows in their own dedicated directories.',
-)
-
 INCREMENTAL = OverridableOption(
     '--incremental/--no-incremental',
     is_flag=True,
diff --git a/src/aiida/tools/dumping/__init__.py b/src/aiida/tools/dumping/__init__.py
index 49713c9b8a..c6031fc35a 100644
--- a/src/aiida/tools/dumping/__init__.py
+++ b/src/aiida/tools/dumping/__init__.py
@@ -8,8 +8,10 @@
 ###########################################################################
 """Modules related to the dumping of AiiDA data."""
 
-from .collection import CollectionDumper
-from .data import DataDumper
-from .processes import ProcessDumper
+from .base import BaseDumper
+from .profile import ProfileDumper
+from .group import GroupDumper
+from .process import ProcessDumper
+# from .collection import CollectionDumper
 
-__all__ = ('CollectionDumper', 'DataDumper', 'ProcessDumper')
+__all__ = ('BaseDumper', 'ProfileDumper', 'GroupDumper', 'ProcessDumper')  #, 'CollectionDumper')
diff --git a/src/aiida/tools/dumping/base.py b/src/aiida/tools/dumping/base.py
new file mode 100644
index 0000000000..03d72c6f72
--- /dev/null
+++ b/src/aiida/tools/dumping/base.py
@@ -0,0 +1,25 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+
+from pathlib import Path
+from datetime import datetime
+
+
+class BaseDumper:
+    def __init__(
+        self,
+        dump_parent_path: Path = Path.cwd(),
+        overwrite: bool = False,
+        incremental: bool = True,
+        last_dump_time: datetime | None = None,
+    ):
+        self.dump_parent_path = dump_parent_path
+        self.overwrite = overwrite
+        self.incremental = incremental
+        self.last_dump_time = last_dump_time
\ No newline at end of file
diff --git a/src/aiida/tools/dumping/data.py b/src/aiida/tools/dumping/data.py
deleted file mode 100644
index 3a75d8d743..0000000000
--- a/src/aiida/tools/dumping/data.py
+++ /dev/null
@@ -1,292 +0,0 @@
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved.                     #
-# This file is part of the AiiDA code.                                    #
-#                                                                         #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file        #
-# For further information please visit http://www.aiida.net               #
-###########################################################################
-"""Functionality for dumping of Data nodes."""
-
-from __future__ import annotations
-
-import logging
-from functools import singledispatchmethod
-from pathlib import Path
-
-import yaml
-
-from aiida import orm
-
-logger = logging.getLogger(__name__)
-
-
-class DataDumper:
-    def __init__(
-        self,
-        *args,
-        dump_parent_path: Path = Path.cwd(),
-        overwrite: bool = False,
-        incremental: bool = True,
-        data_hidden: bool = False,
-        also_raw: bool = False,
-        also_rich: bool = False,
-        rich_spec_dict: dict | None = None,
-        **kwargs,
-    ) -> None:
-        self.args = args
-        self.dump_parent_path = dump_parent_path
-        self.overwrite = overwrite
-        self.incremental = incremental
-        self.data_hidden = data_hidden
-        self.also_raw = also_raw
-        self.also_rich = also_rich
-        self.kwargs = kwargs
-
-        self.rich_spec_dict = rich_spec_dict
-
-        self.hidden_aiida_path = dump_parent_path / '.aiida-raw-data'
-
-    @singledispatchmethod
-    def dump_core_data_node_rich(self, data_node, output_path, output_fname):
-        # raise NotImplementedError(f'Dumping not implemented for type {type(data_node)}')
-        # print(f'No specific handler found for type <{type(data_node)}> <{data_node}>, doing nothing.')
-        # output_path /= 'general'
-        # This is effectively the `rich` dumping
-        data_node_entry_point_name = data_node.entry_point.name
-        export_settings = self.rich_spec_dict[data_node_entry_point_name]
-        exporter = export_settings['exporter']
-        fileformat = export_settings['export_format']
-        if exporter is not None:
-            output_path.mkdir(exist_ok=True, parents=True)
-            exporter(
-                node=data_node,
-                output_fname=output_path / output_fname,
-                fileformat=fileformat,
-                overwrite=self.overwrite,
-            )
-        # This is for orm.Data types for which no default dumping is implemented, e.g. Bool or Float
-        # except ValueError:
-        #     pass
-        # This is for orm.Data types for whose entry_point names no entry exists in the DEFAULT_CORE_EXPORT_MAPPING
-        # This is now captured outside in the `CollectionDumper`, so should not be relevant anymore
-        # except TypeError:
-        #     raise
-
-    @dump_core_data_node_rich.register
-    def _(
-        self,
-        data_node: orm.StructureData,
-        output_path: str | Path | None = None,
-        output_fname: str | None = None,
-    ):
-        if type(data_node) is orm.StructureData:
-            self._dump_structuredata(data_node, output_path=output_path, output_fname=output_fname)
-        else:
-            # Handle the case where data_node is a subclass of orm.StructureData
-            # Just use the default dispatch function implementation
-            self.dump_core_data_node_rich.dispatch(object)(self, data_node, output_path, output_fname)
-
-    @dump_core_data_node_rich.register
-    def _(
-        self,
-        data_node: orm.Code,
-        output_path: str | Path | None = None,
-        output_fname: str | None = None,
-    ):
-        self._dump_code(data_node=data_node, output_path=output_path, output_fname=output_fname)
-
-    @dump_core_data_node_rich.register
-    def _(
-        self,
-        data_node: orm.Computer,
-        output_path: str | Path | None = None,
-        output_fname: str | None = None,
-    ):
-        self._dump_computer_setup(data_node=data_node, output_path=output_path, output_fname=output_fname)
-        self._dump_computer_config(data_node=data_node, output_path=output_path, output_fname=output_fname)
-
-    @dump_core_data_node_rich.register
-    def _(
-        self,
-        data_node: orm.BandsData,
-        output_path: str | Path | None = None,
-        output_fname: str | None = None,
-    ):
-        self._dump_bandsdata(data_node=data_node, output_path=output_path, output_fname=output_fname)
-
-    # These are the rich dumping implementations that actually differ from the default dispatch
-    def _dump_structuredata(
-        self,
-        data_node: orm.StructureData,
-        output_path: Path | None = None,
-        output_fname: str | None = None,
-    ):
-        from aiida.common.exceptions import UnsupportedSpeciesError
-
-        node_entry_point_name = data_node.entry_point.name
-        exporter = self.rich_spec_dict[node_entry_point_name]['exporter']
-        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
-
-        if output_fname is None:
-            output_fname = DataDumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat)
-
-        # ? There also exists a CifData file type
-        # output_path /= 'structures'
-        output_path.mkdir(exist_ok=True, parents=True)
-        try:
-            exporter(
-                node=data_node,
-                output_fname=output_path / output_fname,
-                fileformat=fileformat,
-                overwrite=self.overwrite,
-            )
-        except UnsupportedSpeciesError:
-            # This is the case for, e.g. HubbardStructureData that has species like `Mn0`
-            # Not sure how to resolve this. Wouldn't add a singledispatch for data types defined in plugins. Currently,
-            # do strict type check. HubbardStructureData doesn't implement an export method itself, though.
-            pass
-
-    def _dump_code(
-        self,
-        data_node: orm.Code,
-        output_path: Path | None = None,
-        output_fname: str | None = None,
-    ):
-        # output_path /= 'codes'
-
-        node_entry_point_name = data_node.entry_point.name
-        exporter = self.rich_spec_dict[node_entry_point_name]['exporter']
-        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
-
-        if fileformat != 'yaml':
-            raise NotImplementedError('No other fileformats supported so far apart from YAML.')
-        output_path.mkdir(exist_ok=True, parents=True)
-        if output_fname is None:
-            output_fname = DataDumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat)
-
-        exporter(
-            node=data_node,
-            output_fname=output_path / output_fname,
-            fileformat=fileformat,
-            overwrite=self.overwrite,
-        )
-
-    def _dump_computer_setup(
-        self,
-        data_node: orm.Computer,
-        output_path: Path | None = None,
-        output_fname: str | None = None,
-    ):
-        node_entry_point_name = data_node.entry_point.name
-        # TODO: Don't use the `exporter` here, as `Computer` doesn't derive from Data, so custom implementation
-        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
-
-        if fileformat != 'yaml':
-            raise NotImplementedError('No other fileformats supported so far apart from YAML.')
-
-        output_path.mkdir(exist_ok=True, parents=True)
-
-        # This is a bit of a hack. Should split this up into two different functions.
-        if output_fname is None:
-            output_fname = output_path / f'{data_node.full_label}-setup-{data_node.pk}.{fileformat}'
-
-        # ? Copied over from `cmd_computer` as importing `computer_export_setup` led to click Context error:
-        # TypeError: Context.__init__() got an unexpected keyword argument 'computer'
-        computer_setup = {
-            'label': data_node.label,
-            'hostname': data_node.hostname,
-            'description': data_node.description,
-            'transport': data_node.transport_type,
-            'scheduler': data_node.scheduler_type,
-            'shebang': data_node.get_shebang(),
-            'work_dir': data_node.get_workdir(),
-            'mpirun_command': ' '.join(data_node.get_mpirun_command()),
-            'mpiprocs_per_machine': data_node.get_default_mpiprocs_per_machine(),
-            'default_memory_per_machine': data_node.get_default_memory_per_machine(),
-            'use_double_quotes': data_node.get_use_double_quotes(),
-            'prepend_text': data_node.get_prepend_text(),
-            'append_text': data_node.get_append_text(),
-        }
-
-        if not output_fname.is_file():
-            output_fname.write_text(yaml.dump(computer_setup, sort_keys=False), 'utf-8')
-
-    def _dump_computer_config(
-        self,
-        data_node: orm.Computer,
-        output_path: Path | None = None,
-        output_fname: str | None = None,
-    ):
-        from aiida.orm import User
-
-        node_entry_point_name = data_node.entry_point.name
-        # TODO: Don't use the `exporter` here, as `Computer` doesn't derive from Data, so custom implementation
-        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
-
-        # output_path /= 'computers'
-        if fileformat != 'yaml':
-            raise NotImplementedError('No other fileformats supported so far apart from YAML.')
-
-        output_path.mkdir(exist_ok=True, parents=True)
-
-        # This is a bit of a hack. Should split this up into two different functions.
-        if output_fname is None:
-            output_fname = output_path / f'{data_node.full_label}-config-{data_node.pk}.{fileformat}'
-
-        users = User.collection.all()
-        for user in users:
-            computer_configuration = data_node.get_configuration(user)
-            if not output_fname.is_file():
-                output_fname.write_text(yaml.dump(computer_configuration, sort_keys=False), 'utf-8')
-
-    def _dump_bandsdata(
-        self,
-        data_node: orm.BandsData,
-        output_path: Path | None = None,
-        output_fname: str | None = None,
-    ):
-        node_entry_point_name = data_node.entry_point.name
-        exporter = self.rich_spec_dict[node_entry_point_name]['exporter']
-        fileformat = self.rich_spec_dict[node_entry_point_name]['export_format']
-
-        from aiida.tools.dumping.utils import sanitize_file_extension
-
-        output_path.mkdir(exist_ok=True, parents=True)
-
-        if output_fname is None:
-            output_fname = DataDumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat)
-
-        output_fname = sanitize_file_extension(output_fname)
-
-        exporter(
-            node=data_node,
-            output_fname=output_path / output_fname,
-            fileformat=fileformat,
-            overwrite=self.overwrite,
-        )
-
-    def _dump_user_info(self): ...
-
-    def dump_core_data_node_raw(self, data_node: orm.Data, output_path: Path, output_fname: str | None = None):
-        output_path.mkdir(exist_ok=True, parents=True)
-
-        if output_fname is None:
-            output_fname = DataDumper.generate_output_fname_raw(data_node=data_node)
-
-        with open(output_path.resolve() / output_fname, 'w') as handle:
-            yaml.dump(data_node.attributes, handle)
-
-    @staticmethod
-    def generate_output_fname_raw(data_node, prefix: str | None = None):
-        if prefix is None:
-            return f'{data_node.__class__.__name__}-{data_node.pk}_attrs.yaml'
-        else:
-            return f'{prefix}-{data_node.__class__.__name__}-{data_node.pk}_attrs.yaml'
-
-    @staticmethod
-    def generate_output_fname_rich(data_node, fileformat, prefix: str | None = None):
-        if prefix is None:
-            return f'{data_node.__class__.__name__}-{data_node.pk}.{fileformat}'
-        else:
-            return f'{prefix}-{data_node.__class__.__name__}-{data_node.pk}.{fileformat}'
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/group.py
similarity index 52%
rename from src/aiida/tools/dumping/collection.py
rename to src/aiida/tools/dumping/group.py
index 9cf0dccde6..6f6cb7c214 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/group.py
@@ -14,63 +14,46 @@
 import logging
 from collections import Counter
 from pathlib import Path
+from datetime import datetime
 
 from aiida import orm
-from aiida.tools.dumping.data import DataDumper
-from aiida.tools.dumping.processes import ProcessDumper
+from aiida.tools.dumping.process import ProcessDumper
+from aiida.tools.dumping.base import BaseDumper
 
 logger = logging.getLogger(__name__)
 
 DEFAULT_PROCESSES_TO_DUMP = [orm.CalculationNode, orm.WorkflowNode]
-DEFAULT_DATA_TO_DUMP = [orm.StructureData, orm.Code, orm.Computer, orm.BandsData, orm.UpfData]
+# DEFAULT_DATA_TO_DUMP = [orm.StructureData, orm.Code, orm.Computer, orm.BandsData, orm.UpfData]
 # DEFAULT_COLLECTIONS_TO_DUMP ??
-DEFAULT_ENTITIES_TO_DUMP = DEFAULT_PROCESSES_TO_DUMP + DEFAULT_DATA_TO_DUMP
+DEFAULT_ENTITIES_TO_DUMP = DEFAULT_PROCESSES_TO_DUMP # + DEFAULT_DATA_TO_DUMP
 
 
 # ! This class is instantiated once for every group, or once for the full profile
-class CollectionDumper:
+class GroupDumper:
     def __init__(
         self,
-        dump_parent_path: Path = Path().cwd(),
-        nodes: set = {},
+        base_dumper: BaseDumper | None = None,
+        process_dumper: ProcessDumper | None = None,
         group: orm.Group | str | None = None,
-        overwrite: bool = False,
-        incremental: bool = True,
-        should_dump_processes: bool = False,
-        should_dump_data: bool = False,
-        only_top_level_workflows: bool = True,
-        rich_dump_all: bool = True,
         deduplicate: bool = True,
-        organize_by_groups: bool = True,
-        process_dumper: ProcessDumper | None = None,
-        data_dumper: DataDumper | None = None,
+        output_path: str | Path | None = None
     ):
-        self.dump_parent_path = dump_parent_path
-        self.overwrite = overwrite
-        self.incremental = incremental
-        self.should_dump_processes = should_dump_processes
-        self.should_dump_data = should_dump_data
-        self.only_top_level_workflows = only_top_level_workflows
-        self.nodes = nodes
         self.deduplicate = deduplicate
 
-        self.process_dumper = process_dumper
-        self.data_dumper = data_dumper
-
         # Allow passing of group via label
         if isinstance(group, str):
             group = orm.Group.get(group)
 
         self.group = group
+        self.output_path = output_path
 
-        if organize_by_groups:
-            if group is not None:
-                group_subdir = Path(*group.type_string.split('.'))
-                self.output_path = self.dump_parent_path / 'groups' / group_subdir / self.group.label
-            else:
-                self.output_path = self.dump_parent_path / 'no-group'
-        else:
-            self.output_path = self.dump_parent_path
+        if base_dumper is None:
+            base_dumper = BaseDumper()
+        self.base_dumper: BaseDumper = base_dumper
+
+        if process_dumper is None:
+            process_dumper = ProcessDumper()
+        self.process_dumper: ProcessDumper  = process_dumper
 
         if not hasattr(self, 'entity_counter'):
             self.create_entity_counter()
@@ -81,8 +64,8 @@ def create_entity_counter(self) -> Counter:
             # If the group only has one WorkChain assigned to it, this will only return a count of 1 for the
             # WorkChainNode, nothing more, that is, it doesn't work recursively.
             nodes = self.group.nodes
-        elif self.nodes is not None:
-            nodes = self.nodes
+        # elif self.nodes is not None:
+        #     nodes = self.nodes
         else:
             nodes = orm.QueryBuilder().append(orm.Node).all(flat=True)
 
@@ -96,9 +79,9 @@ def create_entity_counter(self) -> Counter:
 
         return entity_counter
 
-    def get_collection_nodes(self):
-        if self.nodes:
-            self.collection_nodes = self.nodes
+    def get_group_nodes(self):
+        # if self.nodes:
+        #     self.collection_nodes = self.nodes
 
         # if hasattr(self, 'collection_nodes'):
         #     return self.collection_nodes
@@ -129,27 +112,20 @@ def get_collection_nodes(self):
             nodes = [profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups]
             nodes = [orm.load_node(node) for node in nodes]
 
+        if self.base_dumper.last_dump_time is not None:
+            # breakpoint()
+            nodes = [node for node in nodes if node.mtime > self.base_dumper.last_dump_time]
+
         self.collection_nodes = nodes
 
         return nodes
 
     def _should_dump_processes(self) -> bool:
-        if not self.nodes:
-            return (
-                sum(
-                    self.entity_counter.get(orm_process_class, 0)
-                    for orm_process_class in [
-                        orm.CalcJobNode,
-                        orm.CalcFunctionNode,
-                        orm.WorkChainNode,
-                        orm.WorkFunctionNode,
-                        orm.ProcessNode,
-                    ]
-                )
-                > 0
-            )
-        else:
-            return len([node for node in self.nodes if isinstance(node, orm.ProcessNode)]) > 0
+
+        if not hasattr(self, 'group_nodes'):
+            self.get_group_nodes()
+
+        return len([node for node in self.collection_nodes if isinstance(node, orm.ProcessNode)]) > 0
 
     def _dump_calculations(self, calculations):
         for calculation in calculations:
@@ -186,11 +162,11 @@ def _dump_workflows(self, workflows):
                 link_calculations_dir=self.output_path / 'calculations',
             )
 
-    def dump_processes(self):
-        nodes = self.get_collection_nodes()
+    def _dump_processes(self):
+        nodes = self.get_group_nodes()
         workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
 
-        if self.only_top_level_workflows:
+        if self.deduplicate:
             workflows = [workflow for workflow in workflows if workflow.caller is None]
 
         # Also need to obtain sub-calculations that were called by workflows of the group
@@ -210,70 +186,3 @@ def dump_processes(self):
 
         self._dump_calculations(calculations=calculations)
         self._dump_workflows(workflows=workflows)
-
-    # TODO: Add `dump_data_raw` here, as well
-    # def dump_data_rich(self):
-    #     nodes = self.get_collection_nodes()
-    #     nodes = [node for node in nodes if isinstance(node, (orm.Data, orm.Computer))]
-    #     # Here, when providing logic to set the exporters and fileformat via the rich-options, don't have to filter
-    #     # anymore for `core`
-    #     nodes = [node for node in nodes if node.entry_point.name.startswith('core')]
-    #     if len(nodes) == 0:
-    #         return
-
-    #     self.output_path.mkdir(exist_ok=True, parents=True)
-    #     data_dumper = self.data_dumper
-
-    #     for data_node in nodes:
-    #         node_entry_point_name = data_node.entry_point.name
-
-    #         # Get the fileformat and exporter for the data node
-    #         try:
-    #             fileformat = data_dumper.rich_spec_dict[node_entry_point_name]['export_format']
-    #             exporter = data_dumper.rich_spec_dict[node_entry_point_name]['exporter']
-
-    #         # If options for the rich dumping are specified and not all the other defaults are being used
-    #         # Some entry_points might not be inside the `rich_spec_dict`
-    #         except KeyError:
-    #             continue
-
-    #         except:
-    #             # Raise all exceptions here during development
-    #             raise
-
-    #         # Don't go further if no importer implemented for a data type anyway
-    #         if exporter is None:
-    #             continue
-
-    #         try:
-    #             # Generate a nice filename and sanitize it
-    #             nice_output_path = self.output_path / 'data' / data_node.__class__.__name__.lower()
-    #             nice_fname = data_dumper.generate_output_fname_rich(data_node=data_node, fileformat=fileformat).replace(
-    #                 '__', '_'
-    #             )
-    #             nice_fname = sanitize_file_extension(nice_fname)
-
-    #             if data_dumper.data_hidden:
-    #                 # Define paths for hidden dump and linking
-    #                 hidden_output_path = self.hidden_aiida_path / 'data' / data_node.__class__.__name__.lower()
-    #                 uuid_fname = sanitize_file_extension(f'{data_node.uuid}.{fileformat}')
-
-    #                 # Dump the data in the hidden directory
-    #                 data_dumper.dump_core_data_node_rich(data_node, hidden_output_path, uuid_fname)
-
-    #                 # Link the hidden file to the expected output path
-    #                 (nice_output_path / nice_fname).parent.mkdir(exist_ok=True, parents=True)
-    #                 os.symlink(hidden_output_path / uuid_fname, nice_output_path / nice_fname)
-
-    #             else:
-    #                 # Dump the data in the non-hidden directory
-    #                 data_dumper.dump_core_data_node_rich(data_node, nice_output_path, nice_fname)
-
-    #         except TypeError:
-    #             # Handle case when no exporter is implemented for a given data_node type
-    #             raise
-    #         except OSError:
-    #             # A Data node, e.g. a Code might already be existent, so don't worry about this exception
-    #             continue
-    #         except Exception:
-    #             raise
diff --git a/src/aiida/tools/dumping/parser.py b/src/aiida/tools/dumping/parser.py
index 39288929e6..96412eb421 100644
--- a/src/aiida/tools/dumping/parser.py
+++ b/src/aiida/tools/dumping/parser.py
@@ -35,7 +35,6 @@ def parse_config_file(config_file: str | Path | None) -> dict:
         collection_kwargs = {
             'should_dump_processes': config.get('dump_processes', True),
             'should_dump_data': config.get('dump_data', True),
-            'only_top_level_workflows': config.get('only_top_level_workflows', True),
         }
 
         rich_kwargs = {
diff --git a/src/aiida/tools/dumping/processes.py b/src/aiida/tools/dumping/process.py
similarity index 71%
rename from src/aiida/tools/dumping/processes.py
rename to src/aiida/tools/dumping/process.py
index fcc8671ff6..f45c0692e4 100644
--- a/src/aiida/tools/dumping/processes.py
+++ b/src/aiida/tools/dumping/process.py
@@ -33,60 +33,36 @@
 
 from aiida.common import LinkType
 from aiida.common.exceptions import NotExistentAttributeError
-from aiida.orm import (
-    CalcFunctionNode,
-    CalcJobNode,
-    CalculationNode,
-    ProcessNode,
-    WorkChainNode,
-    WorkflowNode,
-    WorkFunctionNode,
-)
+from aiida import orm
 from aiida.orm.utils import LinkTriple
 from aiida.tools.archive.exceptions import ExportValidationError
-from aiida.tools.dumping.data import DataDumper
+from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.utils import prepare_dump_path
 
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 class ProcessDumper:
     def __init__(
         self,
-        *args,
-        dump_parent_path: Path = Path.cwd(),
-        overwrite: bool = False,
-        incremental: bool = True,
+        base: BaseDumper = BaseDumper(),
         flat: bool = False,
         include_inputs: bool = True,
         include_outputs: bool = False,
         include_attributes: bool = True,
         include_extras: bool = True,
-        rich_options: str = '',
-        rich_config_file: Path | None = None,
-        rich_dump_all: bool = True,
-        data_dumper: DataDumper = DataDumper(),
         dump_unsealed: bool = False,
-        **kwargs,
     ) -> None:
-        self.args = args
-        self.dump_parent_path = dump_parent_path
-        self.overwrite = overwrite
-        self.incremental = incremental
         self.flat = flat
+        self.base = base
         self.include_inputs = include_inputs
         self.include_outputs = include_outputs
         self.include_attributes = include_attributes
         self.include_extras = include_extras
-        self.rich_options = rich_options
-        self.rich_config_file = rich_config_file
-        self.rich_dump_all = rich_dump_all
-        self.data_dumper = data_dumper
-        self.kwargs = kwargs
         self.dump_unsealed = dump_unsealed
 
     @staticmethod
-    def _generate_default_dump_path(process_node: ProcessNode, prefix: str = 'dump') -> Path:
+    def _generate_default_dump_path(process_node: orm.ProcessNode, prefix: str = 'dump') -> Path:
         """Simple helper function to generate the default parent-dumping directory if none given.
 
         This function is not called for the recursive sub-calls of `_dump_calculation` as it just creates the default
@@ -113,7 +89,7 @@ def _generate_default_dump_path(process_node: ProcessNode, prefix: str = 'dump')
         return Path('-'.join(entities_to_dump))
 
     @staticmethod
-    def _generate_readme(process_node: ProcessNode, output_path: Path) -> None:
+    def _generate_readme(process_node: orm.ProcessNode, output_path: Path) -> None:
         """Generate README.md file in main dumping directory.
 
         :param process_node: `CalculationNode` or `WorkflowNode`.
@@ -158,11 +134,11 @@ def _generate_readme(process_node: ProcessNode, output_path: Path) -> None:
 
         # `verdi process report`
         # Copied over from `cmd_process`
-        if isinstance(process_node, CalcJobNode):
+        if isinstance(process_node, orm.CalcJobNode):
             process_report = get_calcjob_report(process_node)
-        elif isinstance(process_node, WorkChainNode):
+        elif isinstance(process_node, orm.WorkChainNode):
             process_report = get_workchain_report(process_node, levelname='REPORT', indent_size=2, max_depth=None)
-        elif isinstance(process_node, (CalcFunctionNode, WorkFunctionNode)):
+        elif isinstance(process_node, (orm.CalcFunctionNode, orm.WorkFunctionNode)):
             process_report = get_process_function_report(process_node)
         else:
             process_report = f'Nothing to show for node type {process_node.__class__}'
@@ -209,11 +185,9 @@ def _generate_child_node_label(index: int, link_triple: LinkTriple, append_pk: b
 
     def dump(
         self,
-        process_node: ProcessNode,
+        process_node: orm.ProcessNode,
         output_path: Path | None,
         io_dump_paths: List[str | Path] | None = None,
-        *args,
-        **kwargs,
     ) -> Path:
         """Dumps all data involved in a `ProcessNode`, including its outgoing links.
 
@@ -236,22 +210,22 @@ def dump(
         # I don't want to include them in the general class `__init__`, as they don't really fit there.
         # But the `_dump_node_yaml` function is private, so it's never called outside by the user.
         # Setting the class attributes here dynamically is probably not a good solution, but it works for now.
-        for key, value in kwargs.items():
-            setattr(self, key, value)
+        # for key, value in kwargs.items():
+        #     setattr(self, key, value)
 
         if output_path is None:
             output_path = self._generate_default_dump_path(process_node=process_node)
 
-        prepare_dump_path(path_to_validate=output_path, overwrite=self.overwrite, incremental=self.incremental)
+        prepare_dump_path(path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental)
 
-        if isinstance(process_node, CalculationNode):
+        if isinstance(process_node, orm.CalculationNode):
             self._dump_calculation(
                 calculation_node=process_node,
                 output_path=output_path,
                 io_dump_paths=io_dump_paths,
             )
 
-        elif isinstance(process_node, WorkflowNode):
+        elif isinstance(process_node, orm.WorkflowNode):
             self._dump_workflow(
                 workflow_node=process_node,
                 output_path=output_path,
@@ -264,7 +238,7 @@ def dump(
 
     def _dump_workflow(
         self,
-        workflow_node: WorkflowNode,
+        workflow_node: orm.WorkflowNode,
         output_path: Path,
         io_dump_paths: List[str | Path] | None = None,
         link_calculations: bool = False,
@@ -277,7 +251,7 @@ def _dump_workflow(
         :param io_dump_paths: Custom subdirectories for `CalculationNode` s, defaults to None
         """
 
-        prepare_dump_path(path_to_validate=output_path, overwrite=self.overwrite, incremental=self.incremental)
+        prepare_dump_path(path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental)
         self._dump_node_yaml(process_node=workflow_node, output_path=output_path)
 
         called_links = workflow_node.base.links.get_outgoing(link_type=(LinkType.CALL_CALC, LinkType.CALL_WORK)).all()
@@ -289,7 +263,7 @@ def _dump_workflow(
             child_output_path = output_path.resolve() / child_label
 
             # Recursive function call for `WorkFlowNode`
-            if isinstance(child_node, WorkflowNode):
+            if isinstance(child_node, orm.WorkflowNode):
                 self._dump_workflow(
                     workflow_node=child_node,
                     output_path=child_output_path,
@@ -301,7 +275,7 @@ def _dump_workflow(
                 )
 
             # Once a `CalculationNode` as child reached, dump it
-            elif isinstance(child_node, CalculationNode):
+            elif isinstance(child_node, orm.CalculationNode):
                 if not link_calculations:
                     self._dump_calculation(
                         calculation_node=child_node,
@@ -319,7 +293,7 @@ def _dump_workflow(
 
     def _dump_calculation(
         self,
-        calculation_node: CalculationNode,
+        calculation_node: orm.CalculationNode,
         output_path: Path,
         io_dump_paths: List[str | Path] | None = None,
     ) -> None:
@@ -331,7 +305,7 @@ def _dump_calculation(
             Default: ['inputs', 'outputs', 'node_inputs', 'node_outputs']
         """
 
-        prepare_dump_path(path_to_validate=output_path, overwrite=self.overwrite, incremental=self.incremental)
+        prepare_dump_path(path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental)
         self._dump_node_yaml(process_node=calculation_node, output_path=output_path)
 
         io_dump_mapping = self._generate_calculation_io_mapping(io_dump_paths=io_dump_paths)
@@ -345,10 +319,6 @@ def _dump_calculation(
                 output_path.resolve() / io_dump_mapping.retrieved
             )
 
-        if self.data_dumper.also_raw:
-            # TODO: Replace with attached self.data_dumper attribute
-            self.data_dumper.dump_core_data_node_raw(data_node=calculation_node, output_path=output_path)
-
         # Dump the node_inputs
         if self.include_inputs:
             input_links = calculation_node.base.links.get_incoming(link_type=LinkType.INPUT_CALC)
@@ -358,26 +328,6 @@ def _dump_calculation(
 
             self._dump_calculation_io_files(parent_path=output_path / io_dump_mapping.inputs, link_triples=input_links)
 
-            if self.data_dumper.also_raw:
-                # Always dump the `raw` data inside the calculation directories
-                # I don't see a reason why one would want all the node attribute files in a centralized location
-                self._dump_calculation_io_files_raw(
-                    output_path=output_path / io_dump_mapping.inputs, link_triples=input_links
-                )
-
-            if self.data_dumper.also_rich:
-                rich_data_output_path = output_path / io_dump_mapping.inputs
-                #     if not self.data_dumper.data_hidden:
-                #         rich_data_output_path = output_path / io_dump_mapping.inputs
-                #     else:
-                #         # TODO: Currently, when dumping only one selected workflow, if rich dumping is activated, but
-                #         # TODO: `data-hidden` is set, no data nodes were actually being dumped
-                #         # TODO: With the current implementation below, they are dumped, but not in the same structure as for the
-                #         # TODO: `dump_rich_core` function. Quick fix for now
-                #         pass
-
-                # Only dump the rich data output files in the process directories if data_hidden is False
-                self._dump_calculation_io_files_rich(output_path=rich_data_output_path, link_triples=input_links)
         # Dump the node_outputs apart from `retrieved`
         if self.include_outputs:
             output_links = list(calculation_node.base.links.get_outgoing(link_type=LinkType.CREATE))
@@ -388,18 +338,6 @@ def _dump_calculation(
                 link_triples=output_links,
             )
 
-            if self.data_dumper.also_raw:
-                self._dump_calculation_io_files_raw(
-                    output_path=output_path / io_dump_mapping.outputs,
-                    link_triples=output_links,
-                )
-
-            if self.data_dumper.also_rich:
-                self._dump_calculation_io_files_rich(
-                    output_path=output_path / io_dump_mapping.outputs,
-                    link_triples=output_links,
-                )
-
     def _dump_calculation_io_files(
         self,
         parent_path: Path,
@@ -422,92 +360,6 @@ def _dump_calculation_io_files(
 
             link_triple.node.base.repository.copy_tree(linked_node_path.resolve())
 
-    def _dump_calculation_io_files_raw(
-        self,
-        output_path: Path,
-        link_triples: orm.LinkManager | List[orm.LinkTriple],
-    ):
-        """Small helper function to dump linked input/output nodes of a `orm.CalculationNode`.
-
-        :param parent_path: Parent directory for dumping the linked node contents.
-        :param link_triples: List of link triples.
-        """
-
-        output_path /= 'raw'
-
-        for link_triple in link_triples:
-            link_label = link_triple.link_label
-            data_node = link_triple.node
-
-            # linked_node_path.parent.mkdir(parents=True, exist_ok=True)
-            output_path.mkdir(parents=True, exist_ok=True)
-
-            # Then dump the node attributes for each node
-            output_fname = DataDumper.generate_output_fname_raw(prefix=link_label, data_node=data_node)
-            output_fname = output_fname.replace('__', '_')
-
-            if self.data_dumper.data_hidden:
-                self.data_dumper.dump_core_data_node_raw(
-                    data_node=data_node, output_path=output_path, output_fname=output_fname
-                )
-            self.data_dumper.dump_core_data_node_raw(
-                data_node=data_node, output_path=output_path, output_fname=output_fname
-            )
-
-    def _dump_calculation_io_files_rich(
-        self,
-        output_path: Path,
-        link_triples: orm.LinkManager | List[orm.LinkTriple],
-    ):
-        """Small helper function to dump linked input/output nodes of a `orm.CalculationNode`.
-
-        :param parent_path: Parent directory for dumping the linked node contents.
-        :param link_triples: List of link triples.
-        """
-
-        # Set up the rich parsing functions
-
-        # Extend (at least the keys) by the dynamic entry points
-        rich_spec_dict = self.data_dumper.rich_spec_dict
-
-        for link_triple in link_triples:
-            link_label = link_triple.link_label
-            data_node = link_triple.node
-
-            node = link_triple.node
-            node_entry_point = node.entry_point
-            node_entry_point_name = node_entry_point.name
-
-            # TODO: Somehow obtain sensible filenames -> Should this be done here, or by the export function that is
-            # TODO: possibly written by the plugin developer
-            if node_entry_point_name.startswith('core'):
-                # Obtain settings from the export dict
-                # TODO: -> This might break when plugin is missing
-                try:
-                    exporter = rich_spec_dict[node_entry_point_name]['exporter']
-                    fileformat = rich_spec_dict[node_entry_point_name]['export_format']
-                    output_fname = self.data_dumper.generate_output_fname_rich(
-                        prefix=link_label, data_node=data_node, fileformat=fileformat
-                    )
-                    output_fname = output_fname.replace('__', '_')
-                except KeyError:
-                    continue
-
-                # No exporter set
-                if exporter is None:
-                    continue
-
-                # Only create subdirectory if `Data` node has an exporter
-                rich_output_path = output_path / 'rich' / node.__class__.__name__.lower()
-                rich_output_path.mkdir(parents=True, exist_ok=True)
-
-                # TODO: Here, if data_hidden is True, dump in hidden directory, else in output_path
-                self.data_dumper.dump_core_data_node_rich(
-                    node,
-                    output_path=rich_output_path,
-                    output_fname=output_fname,
-                )
-
     def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | None = None) -> SimpleNamespace:
         """Helper function to generate mapping for entities dumped for each `CalculationNode`.
 
@@ -522,7 +374,7 @@ def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | Non
         aiida_entities_to_dump = ['repository', 'retrieved', 'inputs', 'outputs']
         default_calculation_io_dump_paths = ['inputs', 'outputs', 'node_inputs', 'node_outputs']
         if self.flat and io_dump_paths is None:
-            LOGGER.info(
+            logger.info(
                 'Flat set to True and no `io_dump_paths`. Dumping in a flat directory, files might be overwritten.'
             )
             empty_calculation_io_dump_paths = [''] * 4
@@ -530,24 +382,24 @@ def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | Non
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, empty_calculation_io_dump_paths)))
 
         elif not self.flat and io_dump_paths is None:
-            LOGGER.info(
+            logger.info(
                 'Flat set to False but no `io_dump_paths` provided. '
                 + f'Will use the defaults {default_calculation_io_dump_paths}.'
             )
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, default_calculation_io_dump_paths)))
 
         elif self.flat:
-            LOGGER.info('Flat set to True but `io_dump_paths` provided. These will be used, but `inputs` not nested.')
+            logger.info('Flat set to True but `io_dump_paths` provided. These will be used, but `inputs` not nested.')
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, io_dump_paths)))
         else:
-            LOGGER.info(
+            logger.info(
                 'Flat set to False but no `io_dump_paths` provided. These will be used, but `node_inputs` flattened.'
             )
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, io_dump_paths)))  # type: ignore[arg-type]
 
     def _dump_node_yaml(
         self,
-        process_node: ProcessNode,
+        process_node: orm.ProcessNode,
         output_path: Path,
         output_filename: str = '.aiida_node_metadata.yaml',
     ) -> None:
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
new file mode 100644
index 0000000000..5b88fe8d55
--- /dev/null
+++ b/src/aiida/tools/dumping/profile.py
@@ -0,0 +1,102 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+
+# TODO: Use `batch_iter` from aiida.tools.archive.common
+
+from __future__ import annotations
+from pathlib import Path
+import logging
+from aiida import orm
+from aiida.cmdline.params.options.main import ORGANIZE_BY_GROUPS
+from aiida.tools.dumping.base import BaseDumper
+from aiida.tools.dumping.process import ProcessDumper
+from aiida.tools.dumping.group import GroupDumper
+from aiida.manage.configuration.profile import Profile
+
+logger = logging.getLogger(__name__)
+
+class ProfileDumper:
+    def __init__(
+        self,
+        profile: str | Profile,
+        base_dumper: BaseDumper | None = None,
+        process_dumper: ProcessDumper | None = None,
+        organize_by_groups: bool = True,
+        deduplicate: bool = True,
+        groups: list[str | orm.Group]  | None = None,
+        dump_processes: bool = True,
+    ):
+        self.organize_by_groups = organize_by_groups
+        self.deduplicate = deduplicate
+        self.profile = profile
+        self.dump_processes = dump_processes
+
+        if base_dumper is None:
+            base_dumper = BaseDumper()
+        self.base_dumper: BaseDumper = base_dumper
+
+        if process_dumper is None:
+            process_dumper = ProcessDumper()
+        self.process_dumper: ProcessDumper  = process_dumper
+
+        if not groups:
+            groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
+        self.groups = groups
+
+
+    def dump(self):
+        
+        self._dump_processes_not_in_any_group()
+        self._dump_processes_per_group()
+
+    
+    def _dump_processes_not_in_any_group(self):
+
+        # === Dump the data that is not associated with any group ===
+        if self.organize_by_groups:
+            output_path = self.base_dumper.dump_parent_path / 'no-group'
+        else:
+            output_path = self.base_dumper.dump_parent_path
+            
+        no_group_dumper = GroupDumper(
+            base_dumper=self.base_dumper,
+            process_dumper=self.process_dumper,
+            group=None,
+            deduplicate=self.deduplicate,
+            output_path=output_path,
+        )
+        
+        if self.dump_processes and no_group_dumper._should_dump_processes():
+            
+            logger.report(f'Dumping processes not in any group for profile `{self.profile.name}`...')
+
+            no_group_dumper._dump_processes()
+
+    def _dump_processes_per_group(self):
+        # === Dump data per-group if Groups exist in profile or are selected ===
+
+        for group in self.groups:
+ 
+            if self.organize_by_groups:
+                output_path = self.base_dumper.dump_parent_path / group.label
+            else:
+                output_path = self.base_dumper.dump_parent_path
+
+            group_dumper = GroupDumper(
+                base_dumper=self.base_dumper,
+                process_dumper=self.process_dumper,
+                group=group,
+                deduplicate=self.deduplicate,
+                output_path=output_path,
+            )
+
+            if self.dump_processes and group_dumper._should_dump_processes():
+                logger.report(f'Dumping processes in group {group.label} for profile `{self.profile.name}`...')
+
+                group_dumper._dump_processes()
diff --git a/src/aiida/tools/dumping/test-config-file.yaml b/src/aiida/tools/dumping/test-config-file.yaml
index 63bbe38180..6d1db5e967 100644
--- a/src/aiida/tools/dumping/test-config-file.yaml
+++ b/src/aiida/tools/dumping/test-config-file.yaml
@@ -4,7 +4,6 @@ incremental: true
 dry_run: false
 organize_by_groups: true
 dump_processes: true
-only_top_level_workflows: true
 dump_data: true
 data_hidden: true
 also_raw: false
diff --git a/tests/tools/dumping/test_processes.py b/tests/tools/dumping/test_processes.py
index c409a438dc..e2f5633939 100644
--- a/tests/tools/dumping/test_processes.py
+++ b/tests/tools/dumping/test_processes.py
@@ -15,7 +15,7 @@
 
 import pytest
 
-from aiida.tools.dumping.processes import ProcessDumper
+from aiida.tools.dumping.process import ProcessDumper
 
 # Non-AiiDA variables
 filename = 'file.txt'

From 0105c08a9efed85e0950030d3c0a9b779afe4889 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 27 Jan 2025 12:32:03 +0000
Subject: [PATCH 06/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/cmdline/commands/cmd_profile.py | 20 +++++++++-----------
 src/aiida/tools/dumping/__init__.py       |  5 +++--
 src/aiida/tools/dumping/base.py           |  4 ++--
 src/aiida/tools/dumping/group.py          | 10 ++++------
 src/aiida/tools/dumping/process.py        | 14 ++++++++++----
 src/aiida/tools/dumping/profile.py        | 23 +++++++++--------------
 6 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 2c16566672..4c4926399b 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -19,7 +19,7 @@
 from aiida.cmdline.utils import defaults, echo
 from aiida.common import exceptions
 from aiida.manage.configuration import Profile, create_profile, get_config
-from aiida.tools.dumping import GroupDumper, ProfileDumper, ProcessDumper
+from aiida.tools.dumping import ProcessDumper, ProfileDumper
 
 
 @verdi.group('profile')
@@ -306,13 +306,11 @@ def profile_mirror(
 ):
     """Dump all data in an AiiDA profile's storage to disk."""
 
-    from pathlib import Path
     from datetime import datetime
+    from pathlib import Path
 
-    from aiida import orm
-    from aiida.tools.dumping.parser import DumpConfigParser
-    from aiida.tools.dumping.utils import prepare_dump_path
     from aiida.tools.dumping.base import BaseDumper
+    from aiida.tools.dumping.utils import prepare_dump_path
 
     profile = ctx.obj['profile']
 
@@ -360,7 +358,7 @@ def profile_mirror(
         echo.echo_critical(str(exc))
 
     try:
-        with safeguard_file_path.open("r") as fhandle:
+        with safeguard_file_path.open('r') as fhandle:
             last_dump_time = datetime.fromisoformat(fhandle.readlines()[-1].strip().split()[-1]).astimezone()
     except IndexError:
         last_dump_time = None
@@ -374,9 +372,9 @@ def profile_mirror(
 
     process_dumper = ProcessDumper(
         base=base_dumper,
-        include_inputs= include_inputs,
-        include_outputs= include_outputs,
-        include_attributes= include_attributes,
+        include_inputs=include_inputs,
+        include_outputs=include_outputs,
+        include_attributes=include_attributes,
         include_extras=include_extras,
         flat=flat,
     )
@@ -395,5 +393,5 @@ def profile_mirror(
 
     # Append the current time to the file
     last_dump_time = datetime.now().astimezone().isoformat()
-    with safeguard_file_path.open("a") as fhandle:
-        fhandle.write(f"Last profile mirror time: {last_dump_time}\n")
+    with safeguard_file_path.open('a') as fhandle:
+        fhandle.write(f'Last profile mirror time: {last_dump_time}\n')
diff --git a/src/aiida/tools/dumping/__init__.py b/src/aiida/tools/dumping/__init__.py
index c6031fc35a..48b73eee65 100644
--- a/src/aiida/tools/dumping/__init__.py
+++ b/src/aiida/tools/dumping/__init__.py
@@ -9,9 +9,10 @@
 """Modules related to the dumping of AiiDA data."""
 
 from .base import BaseDumper
-from .profile import ProfileDumper
 from .group import GroupDumper
 from .process import ProcessDumper
+from .profile import ProfileDumper
+
 # from .collection import CollectionDumper
 
-__all__ = ('BaseDumper', 'ProfileDumper', 'GroupDumper', 'ProcessDumper')  #, 'CollectionDumper')
+__all__ = ('BaseDumper', 'GroupDumper', 'ProcessDumper', 'ProfileDumper')  # , 'CollectionDumper')
diff --git a/src/aiida/tools/dumping/base.py b/src/aiida/tools/dumping/base.py
index 03d72c6f72..8a89e464d2 100644
--- a/src/aiida/tools/dumping/base.py
+++ b/src/aiida/tools/dumping/base.py
@@ -7,8 +7,8 @@
 # For further information please visit http://www.aiida.net               #
 ###########################################################################
 
-from pathlib import Path
 from datetime import datetime
+from pathlib import Path
 
 
 class BaseDumper:
@@ -22,4 +22,4 @@ def __init__(
         self.dump_parent_path = dump_parent_path
         self.overwrite = overwrite
         self.incremental = incremental
-        self.last_dump_time = last_dump_time
\ No newline at end of file
+        self.last_dump_time = last_dump_time
diff --git a/src/aiida/tools/dumping/group.py b/src/aiida/tools/dumping/group.py
index 6f6cb7c214..6ea4c960d3 100644
--- a/src/aiida/tools/dumping/group.py
+++ b/src/aiida/tools/dumping/group.py
@@ -14,18 +14,17 @@
 import logging
 from collections import Counter
 from pathlib import Path
-from datetime import datetime
 
 from aiida import orm
-from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.base import BaseDumper
+from aiida.tools.dumping.process import ProcessDumper
 
 logger = logging.getLogger(__name__)
 
 DEFAULT_PROCESSES_TO_DUMP = [orm.CalculationNode, orm.WorkflowNode]
 # DEFAULT_DATA_TO_DUMP = [orm.StructureData, orm.Code, orm.Computer, orm.BandsData, orm.UpfData]
 # DEFAULT_COLLECTIONS_TO_DUMP ??
-DEFAULT_ENTITIES_TO_DUMP = DEFAULT_PROCESSES_TO_DUMP # + DEFAULT_DATA_TO_DUMP
+DEFAULT_ENTITIES_TO_DUMP = DEFAULT_PROCESSES_TO_DUMP  # + DEFAULT_DATA_TO_DUMP
 
 
 # ! This class is instantiated once for every group, or once for the full profile
@@ -36,7 +35,7 @@ def __init__(
         process_dumper: ProcessDumper | None = None,
         group: orm.Group | str | None = None,
         deduplicate: bool = True,
-        output_path: str | Path | None = None
+        output_path: str | Path | None = None,
     ):
         self.deduplicate = deduplicate
 
@@ -53,7 +52,7 @@ def __init__(
 
         if process_dumper is None:
             process_dumper = ProcessDumper()
-        self.process_dumper: ProcessDumper  = process_dumper
+        self.process_dumper: ProcessDumper = process_dumper
 
         if not hasattr(self, 'entity_counter'):
             self.create_entity_counter()
@@ -121,7 +120,6 @@ def get_group_nodes(self):
         return nodes
 
     def _should_dump_processes(self) -> bool:
-
         if not hasattr(self, 'group_nodes'):
             self.get_group_nodes()
 
diff --git a/src/aiida/tools/dumping/process.py b/src/aiida/tools/dumping/process.py
index f45c0692e4..00f8173d4b 100644
--- a/src/aiida/tools/dumping/process.py
+++ b/src/aiida/tools/dumping/process.py
@@ -31,9 +31,9 @@
 
 import yaml
 
+from aiida import orm
 from aiida.common import LinkType
 from aiida.common.exceptions import NotExistentAttributeError
-from aiida import orm
 from aiida.orm.utils import LinkTriple
 from aiida.tools.archive.exceptions import ExportValidationError
 from aiida.tools.dumping.base import BaseDumper
@@ -216,7 +216,9 @@ def dump(
         if output_path is None:
             output_path = self._generate_default_dump_path(process_node=process_node)
 
-        prepare_dump_path(path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental)
+        prepare_dump_path(
+            path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental
+        )
 
         if isinstance(process_node, orm.CalculationNode):
             self._dump_calculation(
@@ -251,7 +253,9 @@ def _dump_workflow(
         :param io_dump_paths: Custom subdirectories for `CalculationNode` s, defaults to None
         """
 
-        prepare_dump_path(path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental)
+        prepare_dump_path(
+            path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental
+        )
         self._dump_node_yaml(process_node=workflow_node, output_path=output_path)
 
         called_links = workflow_node.base.links.get_outgoing(link_type=(LinkType.CALL_CALC, LinkType.CALL_WORK)).all()
@@ -305,7 +309,9 @@ def _dump_calculation(
             Default: ['inputs', 'outputs', 'node_inputs', 'node_outputs']
         """
 
-        prepare_dump_path(path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental)
+        prepare_dump_path(
+            path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental
+        )
         self._dump_node_yaml(process_node=calculation_node, output_path=output_path)
 
         io_dump_mapping = self._generate_calculation_io_mapping(io_dump_paths=io_dump_paths)
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 5b88fe8d55..c343e1c617 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -10,17 +10,18 @@
 # TODO: Use `batch_iter` from aiida.tools.archive.common
 
 from __future__ import annotations
-from pathlib import Path
+
 import logging
+
 from aiida import orm
-from aiida.cmdline.params.options.main import ORGANIZE_BY_GROUPS
+from aiida.manage.configuration.profile import Profile
 from aiida.tools.dumping.base import BaseDumper
-from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.group import GroupDumper
-from aiida.manage.configuration.profile import Profile
+from aiida.tools.dumping.process import ProcessDumper
 
 logger = logging.getLogger(__name__)
 
+
 class ProfileDumper:
     def __init__(
         self,
@@ -29,7 +30,7 @@ def __init__(
         process_dumper: ProcessDumper | None = None,
         organize_by_groups: bool = True,
         deduplicate: bool = True,
-        groups: list[str | orm.Group]  | None = None,
+        groups: list[str | orm.Group] | None = None,
         dump_processes: bool = True,
     ):
         self.organize_by_groups = organize_by_groups
@@ -43,27 +44,23 @@ def __init__(
 
         if process_dumper is None:
             process_dumper = ProcessDumper()
-        self.process_dumper: ProcessDumper  = process_dumper
+        self.process_dumper: ProcessDumper = process_dumper
 
         if not groups:
             groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
         self.groups = groups
 
-
     def dump(self):
-        
         self._dump_processes_not_in_any_group()
         self._dump_processes_per_group()
 
-    
     def _dump_processes_not_in_any_group(self):
-
         # === Dump the data that is not associated with any group ===
         if self.organize_by_groups:
             output_path = self.base_dumper.dump_parent_path / 'no-group'
         else:
             output_path = self.base_dumper.dump_parent_path
-            
+
         no_group_dumper = GroupDumper(
             base_dumper=self.base_dumper,
             process_dumper=self.process_dumper,
@@ -71,9 +68,8 @@ def _dump_processes_not_in_any_group(self):
             deduplicate=self.deduplicate,
             output_path=output_path,
         )
-        
+
         if self.dump_processes and no_group_dumper._should_dump_processes():
-            
             logger.report(f'Dumping processes not in any group for profile `{self.profile.name}`...')
 
             no_group_dumper._dump_processes()
@@ -82,7 +78,6 @@ def _dump_processes_per_group(self):
         # === Dump data per-group if Groups exist in profile or are selected ===
 
         for group in self.groups:
- 
             if self.organize_by_groups:
                 output_path = self.base_dumper.dump_parent_path / group.label
             else:

From 64b715eb9ba13c5496a53fd1140acd3a8989453e Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Mon, 27 Jan 2025 17:52:14 +0100
Subject: [PATCH 07/27] Symlinking of workflows between groups works.

---
 src/aiida/cmdline/commands/cmd_process.py |   8 --
 src/aiida/cmdline/commands/cmd_profile.py |  14 --
 src/aiida/cmdline/params/options/main.py  |  57 +-------
 src/aiida/tools/dumping/group.py          | 157 +++++++++++-----------
 src/aiida/tools/dumping/parser.py         |   8 +-
 src/aiida/tools/dumping/process.py        |   5 +-
 src/aiida/tools/dumping/profile.py        |  25 +++-
 7 files changed, 107 insertions(+), 167 deletions(-)

diff --git a/src/aiida/cmdline/commands/cmd_process.py b/src/aiida/cmdline/commands/cmd_process.py
index 30e75a4295..9984b9ee7b 100644
--- a/src/aiida/cmdline/commands/cmd_process.py
+++ b/src/aiida/cmdline/commands/cmd_process.py
@@ -567,10 +567,6 @@ def process_repair(manager, broker, dry_run):
 @options.INCLUDE_OUTPUTS()
 @options.INCLUDE_ATTRIBUTES()
 @options.INCLUDE_EXTRAS()
-@options.ALSO_RAW()
-@options.ALSO_RICH()
-@options.RICH_SPEC()
-@options.RICH_DUMP_ALL()
 @click.option(
     '--dump-unsealed',
     is_flag=True,
@@ -592,10 +588,6 @@ def process_dump(
     include_extras,
     dump_unsealed,
     incremental,
-    also_raw,
-    also_rich,
-    rich_spec,
-    rich_dump_all,
 ) -> None:
     """Dump process input and output files to disk.
 
diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 4c4926399b..9008ccabe9 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -314,20 +314,6 @@ def profile_mirror(
 
     profile = ctx.obj['profile']
 
-    # if nodes and groups:
-    #     echo.echo_critical('`nodes` and `groups` specified. Set only one.')
-
-    # if dump_config_file is None:
-
-    # # TODO: Also allow for mixing. Currently one can _only_ specify either the config file, or the arguments on the
-    # # TODO: command line
-    # else:
-    #     kwarg_dicts_from_config = DumpConfigParser.parse_config_file(dump_config_file)
-
-    #     general_kwargs = kwarg_dicts_from_config['general_kwargs']
-    #     processdumper_kwargs = kwarg_dicts_from_config['processdumper_kwargs']
-    #     datadumper_kwargs = kwarg_dicts_from_config['datadumper_kwargs']
-
     incremental = not overwrite
 
     if path is None:
diff --git a/src/aiida/cmdline/params/options/main.py b/src/aiida/cmdline/params/options/main.py
index e7a18eedc1..8ee982ad1f 100644
--- a/src/aiida/cmdline/params/options/main.py
+++ b/src/aiida/cmdline/params/options/main.py
@@ -27,8 +27,6 @@
     'ALL',
     'ALL_STATES',
     'ALL_USERS',
-    'ALSO_RAW',
-    'ALSO_RICH',
     'APPEND_TEXT',
     'ARCHIVE_FORMAT',
     'BROKER_HOST',
@@ -46,7 +44,6 @@
     'COMPUTERS',
     'CONFIG_FILE',
     'DATA',
-    'DATA_HIDDEN',
     'DATUM',
     'DB_BACKEND',
     'DB_ENGINE',
@@ -62,7 +59,6 @@
     'DICT_KEYS',
     'DRY_RUN',
     'DUMP_CONFIG_FILE',
-    'DUMP_DATA',
     'DUMP_PROCESSES',
     'EXIT_STATUS',
     'EXPORT_FORMAT',
@@ -108,8 +104,6 @@
     'PROJECT',
     'RAW',
     'REPOSITORY_PATH',
-    'RICH_DUMP_ALL',
-    'RICH_SPEC',
     'SCHEDULER',
     'SILENT',
     'SORT',
@@ -801,7 +795,7 @@ def set_log_level(ctx, _param, value):
 DEDUPLICATE = OverridableOption(
     '--deduplicate/--no-deduplicate',
     is_flag=True,
-    default=False,
+    default=True,
     show_default=True,
     help='',
 )
@@ -814,46 +808,6 @@ def set_log_level(ctx, _param, value):
     help='Dump process data.',
 )
 
-DUMP_DATA = OverridableOption(
-    '--dump-data/--no-dump-data',
-    is_flag=True,
-    default=False,
-    type=bool,
-    show_default=True,
-    help='Dump data nodes in a dedicated directory.',
-)
-
-DATA_HIDDEN = OverridableOption(
-    '--data-hidden/--data-non-hidden',
-    is_flag=True,
-    default=True,
-    show_default=True,
-    help='Dump all `orm.Data` in the hidden directory and link to there.',
-)
-
-ALSO_RAW = OverridableOption(
-    '--also-raw/--no-also-raw',
-    is_flag=True,
-    default=False,
-    show_default=True,
-    help='Dump the `attributes` of all nodes related to the Process.',
-)
-
-ALSO_RICH = OverridableOption(
-    '--also-rich/--no-also-rich',
-    is_flag=True,
-    default=False,
-    show_default=True,
-    help='Dump also nicely prepared outputs, e.g. CIF for structures or PDF image for bands.',
-)
-
-RICH_SPEC = OverridableOption(
-    '--rich-spec',
-    default=None,
-    type=str,
-    help='Specifications for rich data dumping.',
-)
-
 DUMP_CONFIG_FILE = OverridableOption(
     '--dump-config-file',
     default=None,
@@ -861,15 +815,6 @@ def set_log_level(ctx, _param, value):
     help='Provide dumping options via a config file in YAML format.',
 )
 
-RICH_DUMP_ALL = OverridableOption(
-    '--rich-dump-all/--no-rich-dump-all',
-    default=True,
-    is_flag=True,
-    type=bool,
-    show_default=True,
-    help='If a rich specification is provided, this triggers if all other Data nodes should also be dumped or not.',
-)
-
 ORGANIZE_BY_GROUPS = OverridableOption(
     '--organize-by-groups/--no-organize-by-groups',
     default=True,
diff --git a/src/aiida/tools/dumping/group.py b/src/aiida/tools/dumping/group.py
index 6ea4c960d3..e7032ef823 100644
--- a/src/aiida/tools/dumping/group.py
+++ b/src/aiida/tools/dumping/group.py
@@ -10,9 +10,10 @@
 
 from __future__ import annotations
 
+import os
+from collections import defaultdict
 import itertools as it
 import logging
-from collections import Counter
 from pathlib import Path
 
 from aiida import orm
@@ -27,7 +28,6 @@
 DEFAULT_ENTITIES_TO_DUMP = DEFAULT_PROCESSES_TO_DUMP  # + DEFAULT_DATA_TO_DUMP
 
 
-# ! This class is instantiated once for every group, or once for the full profile
 class GroupDumper:
     def __init__(
         self,
@@ -36,6 +36,7 @@ def __init__(
         group: orm.Group | str | None = None,
         deduplicate: bool = True,
         output_path: str | Path | None = None,
+        global_log_dict: dict[str, Path] | None = None
     ):
         self.deduplicate = deduplicate
 
@@ -45,6 +46,7 @@ def __init__(
 
         self.group = group
         self.output_path = output_path
+        self.global_log_dict = global_log_dict
 
         if base_dumper is None:
             base_dumper = BaseDumper()
@@ -54,36 +56,14 @@ def __init__(
             process_dumper = ProcessDumper()
         self.process_dumper: ProcessDumper = process_dumper
 
-        if not hasattr(self, 'entity_counter'):
-            self.create_entity_counter()
+        self.nodes = self._get_nodes()
+        self.log_dict = {}
 
-    def create_entity_counter(self) -> Counter:
-        entity_counter = Counter()
-        if self.group is not None:
-            # If the group only has one WorkChain assigned to it, this will only return a count of 1 for the
-            # WorkChainNode, nothing more, that is, it doesn't work recursively.
-            nodes = self.group.nodes
-        # elif self.nodes is not None:
-        #     nodes = self.nodes
-        else:
-            nodes = orm.QueryBuilder().append(orm.Node).all(flat=True)
-
-        # Iterate over all the entities in the group
-        for node in nodes:
-            # Count the type string of each entity
-            entity_counter[node.__class__] += 1
-
-        # Convert the Counter to a dictionary (optional)
-        self.entity_counter = entity_counter
-
-        return entity_counter
+    def _should_dump_processes(self) -> bool:
 
-    def get_group_nodes(self):
-        # if self.nodes:
-        #     self.collection_nodes = self.nodes
+        return len([node for node in self.nodes if isinstance(node, orm.ProcessNode)]) > 0
 
-        # if hasattr(self, 'collection_nodes'):
-        #     return self.collection_nodes
+    def _get_nodes(self):
 
         # Get all nodes that are in the group
         if self.group is not None:
@@ -92,7 +72,7 @@ def get_group_nodes(self):
         # Get all nodes that are _not_ in any group
         else:
             groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
-            nodes_in_groups = [node.pk for group in groups for node in group.nodes]
+            nodes_in_groups = [node.uuid for group in groups for node in group.nodes]
             # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
             # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
             sub_nodes_in_groups = list(
@@ -104,66 +84,24 @@ def get_group_nodes(self):
                     ]
                 )
             )
-            sub_nodes_in_groups = [node.pk for node in sub_nodes_in_groups]
+            sub_nodes_in_groups = [node.uuid for node in sub_nodes_in_groups]
             nodes_in_groups = nodes_in_groups + sub_nodes_in_groups
 
-            profile_nodes = orm.QueryBuilder().append(orm.Node, project=['pk']).all(flat=True)
+            profile_nodes = orm.QueryBuilder().append(orm.Node, project=['uuid']).all(flat=True)
             nodes = [profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups]
             nodes = [orm.load_node(node) for node in nodes]
 
         if self.base_dumper.last_dump_time is not None:
-            # breakpoint()
             nodes = [node for node in nodes if node.mtime > self.base_dumper.last_dump_time]
 
-        self.collection_nodes = nodes
-
         return nodes
 
-    def _should_dump_processes(self) -> bool:
-        if not hasattr(self, 'group_nodes'):
-            self.get_group_nodes()
-
-        return len([node for node in self.collection_nodes if isinstance(node, orm.ProcessNode)]) > 0
-
-    def _dump_calculations(self, calculations):
-        for calculation in calculations:
-            calculation_dumper = self.process_dumper
-
-            calculation_dump_path = (
-                self.output_path
-                / 'calculations'
-                / calculation_dumper._generate_default_dump_path(process_node=calculation, prefix='')
-            )
+    def _get_processes(self):
 
-            if calculation.caller is None or (calculation.caller is not None and self.deduplicate):
-                calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
-
-    def _dump_workflows(self, workflows):
-        # workflow_nodes = get_nodes_from_db(aiida_node_type=orm.WorkflowNode, with_group=self.group, flat=True)
-        for workflow in workflows:
-            # if workflow.pk == 47:
-            #     breakpoint()
-
-            workflow_dumper = self.process_dumper
-
-            # TODO: If the GroupDumper is called from somewhere else outside, prefix the path with `groups/core` etc
-            workflow_dump_path = (
-                self.output_path
-                / 'workflows'
-                / workflow_dumper._generate_default_dump_path(process_node=workflow, prefix=None)
-            )
-            # logger.report(f'WORKFLOW_DUMP_PATH: {workflow_dump_path}')
-            workflow_dumper._dump_workflow(
-                workflow_node=workflow,
-                output_path=workflow_dump_path,
-                link_calculations=self.deduplicate,
-                link_calculations_dir=self.output_path / 'calculations',
-            )
-
-    def _dump_processes(self):
-        nodes = self.get_group_nodes()
+        nodes = self.nodes
         workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
 
+        # Make sure that only top-level workflows are dumped in their own directories when de-duplcation is enabled
         if self.deduplicate:
             workflows = [workflow for workflow in workflows if workflow.caller is None]
 
@@ -177,10 +115,69 @@ def _dump_processes(self):
 
         calculations = set([node for node in nodes if isinstance(node, orm.CalculationNode)] + called_calculations)
 
-        if len(workflows) + len(calculations) == 0:
+        self.calculations = calculations
+        self.workflows = workflows
+
+        self.log_dict = {
+            'calculations': {},
+            # dict.fromkeys([c.uuid for c in self.calculations], None),
+            'workflows': dict.fromkeys([w.uuid for w in workflows], None)
+        }
+
+    def _dump_processes(self):
+
+        self._get_processes()
+
+        if len(self.workflows) + len(self.calculations) == 0:
+            logger.report("No workflows or calculations to dump in group.")
             return
 
         self.output_path.mkdir(exist_ok=True, parents=True)
 
-        self._dump_calculations(calculations=calculations)
-        self._dump_workflows(workflows=workflows)
+        self._dump_calculations()
+        self._dump_workflows()
+
+    def _dump_calculations(self):
+
+        calculations_path = self.output_path / 'calculations'
+
+        for calculation in self.calculations:
+            calculation_dumper = self.process_dumper
+
+            calculation_dump_path = (
+                calculations_path / calculation_dumper._generate_default_dump_path(process_node=calculation, prefix='')
+            )
+
+            if calculation.caller is None:
+                # or (calculation.caller is not None and not self.deduplicate):
+                calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
+
+                self.log_dict['calculations'][calculation.uuid] = calculation_dump_path
+
+    def _dump_workflows(self):
+        # workflow_nodes = get_nodes_from_db(aiida_node_type=orm.WorkflowNode, with_group=self.group, flat=True)
+        workflow_path = self.output_path / 'workflows'
+        workflow_path.mkdir(exist_ok=True, parents=True)
+
+        for workflow in self.workflows:
+
+            workflow_dumper = self.process_dumper
+
+            workflow_dump_path = (
+                workflow_path / workflow_dumper._generate_default_dump_path(process_node=workflow, prefix=None)
+            )
+
+            if self.deduplicate and workflow.uuid in self.global_log_dict["workflows"].keys():
+                os.symlink(
+                    src=self.global_log_dict["workflows"][workflow.uuid],
+                    dst=workflow_dump_path,
+                )
+            else:
+                workflow_dumper._dump_workflow(
+                    workflow_node=workflow,
+                    output_path=workflow_dump_path,
+                    # link_calculations=not self.deduplicate,
+                    # link_calculations_dir=self.output_path / 'calculations',
+                )
+
+                self.log_dict['workflows'][workflow.uuid] = workflow_dump_path
diff --git a/src/aiida/tools/dumping/parser.py b/src/aiida/tools/dumping/parser.py
index 96412eb421..c895d2717d 100644
--- a/src/aiida/tools/dumping/parser.py
+++ b/src/aiida/tools/dumping/parser.py
@@ -27,10 +27,10 @@ def parse_config_file(config_file: str | Path | None) -> dict:
             'flat': config.get('flat', False),
         }
 
-        datadumper_kwargs = {
-            'also_raw': config.get('also_raw', False),
-            'also_rich': config.get('also_rich', True),
-        }
+        # datadumper_kwargs = {
+        #     'also_raw': config.get('also_raw', False),
+        #     'also_rich': config.get('also_rich', True),
+        # }
 
         collection_kwargs = {
             'should_dump_processes': config.get('dump_processes', True),
diff --git a/src/aiida/tools/dumping/process.py b/src/aiida/tools/dumping/process.py
index 00f8173d4b..92102332fd 100644
--- a/src/aiida/tools/dumping/process.py
+++ b/src/aiida/tools/dumping/process.py
@@ -245,6 +245,7 @@ def _dump_workflow(
         io_dump_paths: List[str | Path] | None = None,
         link_calculations: bool = False,
         link_calculations_dir: Path | None = None,
+        workflow_symlink: Path | None = None,
     ) -> None:
         """Recursive function to traverse a `WorkflowNode` and dump its `CalculationNode` s.
 
@@ -254,7 +255,9 @@ def _dump_workflow(
         """
 
         prepare_dump_path(
-            path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental
+            path_to_validate=output_path,
+            overwrite=self.base.overwrite,
+            incremental=self.base.incremental,
         )
         self._dump_node_yaml(process_node=workflow_node, output_path=output_path)
 
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index c343e1c617..5093178297 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -12,7 +12,11 @@
 from __future__ import annotations
 
 import logging
+import itertools as it
+from rich.pretty import pprint
+from pathlib import Path
 
+from collections import Counter
 from aiida import orm
 from aiida.manage.configuration.profile import Profile
 from aiida.tools.dumping.base import BaseDumper
@@ -37,6 +41,7 @@ def __init__(
         self.deduplicate = deduplicate
         self.profile = profile
         self.dump_processes = dump_processes
+        self.groups = groups
 
         if base_dumper is None:
             base_dumper = BaseDumper()
@@ -46,12 +51,14 @@ def __init__(
             process_dumper = ProcessDumper()
         self.process_dumper: ProcessDumper = process_dumper
 
-        if not groups:
-            groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
-        self.groups = groups
+        # self.log_dict: dict[dict[str, Path]] = {}
+        self.log_dict= {'calculations': {}, 'workflows': {}}
 
     def dump(self):
-        self._dump_processes_not_in_any_group()
+        if not self.groups:
+            self._dump_processes_not_in_any_group()
+            self.groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
+
         self._dump_processes_per_group()
 
     def _dump_processes_not_in_any_group(self):
@@ -67,6 +74,7 @@ def _dump_processes_not_in_any_group(self):
             group=None,
             deduplicate=self.deduplicate,
             output_path=output_path,
+            global_log_dict=self.log_dict,
         )
 
         if self.dump_processes and no_group_dumper._should_dump_processes():
@@ -74,6 +82,8 @@ def _dump_processes_not_in_any_group(self):
 
             no_group_dumper._dump_processes()
 
+            self.log_dict.update(no_group_dumper.log_dict)
+
     def _dump_processes_per_group(self):
         # === Dump data per-group if Groups exist in profile or are selected ===
 
@@ -89,9 +99,16 @@ def _dump_processes_per_group(self):
                 group=group,
                 deduplicate=self.deduplicate,
                 output_path=output_path,
+                global_log_dict=self.log_dict,
             )
 
             if self.dump_processes and group_dumper._should_dump_processes():
                 logger.report(f'Dumping processes in group {group.label} for profile `{self.profile.name}`...')
 
                 group_dumper._dump_processes()
+                for entity in ['calculations', 'workflows']:
+                    self.log_dict[entity].update(group_dumper.log_dict[entity])
+
+                pprint(group_dumper.log_dict)
+                pprint(self.log_dict)
+

From a8c5aacd4095bf83eb218c23da6086d502532585 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 27 Jan 2025 16:52:34 +0000
Subject: [PATCH 08/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/tools/dumping/group.py   | 27 ++++++++++-----------------
 src/aiida/tools/dumping/profile.py |  7 ++-----
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/src/aiida/tools/dumping/group.py b/src/aiida/tools/dumping/group.py
index e7032ef823..ee7c64f136 100644
--- a/src/aiida/tools/dumping/group.py
+++ b/src/aiida/tools/dumping/group.py
@@ -10,10 +10,9 @@
 
 from __future__ import annotations
 
-import os
-from collections import defaultdict
 import itertools as it
 import logging
+import os
 from pathlib import Path
 
 from aiida import orm
@@ -36,7 +35,7 @@ def __init__(
         group: orm.Group | str | None = None,
         deduplicate: bool = True,
         output_path: str | Path | None = None,
-        global_log_dict: dict[str, Path] | None = None
+        global_log_dict: dict[str, Path] | None = None,
     ):
         self.deduplicate = deduplicate
 
@@ -60,11 +59,9 @@ def __init__(
         self.log_dict = {}
 
     def _should_dump_processes(self) -> bool:
-
         return len([node for node in self.nodes if isinstance(node, orm.ProcessNode)]) > 0
 
     def _get_nodes(self):
-
         # Get all nodes that are in the group
         if self.group is not None:
             nodes = list(self.group.nodes)
@@ -97,7 +94,6 @@ def _get_nodes(self):
         return nodes
 
     def _get_processes(self):
-
         nodes = self.nodes
         workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
 
@@ -121,15 +117,14 @@ def _get_processes(self):
         self.log_dict = {
             'calculations': {},
             # dict.fromkeys([c.uuid for c in self.calculations], None),
-            'workflows': dict.fromkeys([w.uuid for w in workflows], None)
+            'workflows': dict.fromkeys([w.uuid for w in workflows], None),
         }
 
     def _dump_processes(self):
-
         self._get_processes()
 
         if len(self.workflows) + len(self.calculations) == 0:
-            logger.report("No workflows or calculations to dump in group.")
+            logger.report('No workflows or calculations to dump in group.')
             return
 
         self.output_path.mkdir(exist_ok=True, parents=True)
@@ -138,14 +133,13 @@ def _dump_processes(self):
         self._dump_workflows()
 
     def _dump_calculations(self):
-
         calculations_path = self.output_path / 'calculations'
 
         for calculation in self.calculations:
             calculation_dumper = self.process_dumper
 
-            calculation_dump_path = (
-                calculations_path / calculation_dumper._generate_default_dump_path(process_node=calculation, prefix='')
+            calculation_dump_path = calculations_path / calculation_dumper._generate_default_dump_path(
+                process_node=calculation, prefix=''
             )
 
             if calculation.caller is None:
@@ -160,16 +154,15 @@ def _dump_workflows(self):
         workflow_path.mkdir(exist_ok=True, parents=True)
 
         for workflow in self.workflows:
-
             workflow_dumper = self.process_dumper
 
-            workflow_dump_path = (
-                workflow_path / workflow_dumper._generate_default_dump_path(process_node=workflow, prefix=None)
+            workflow_dump_path = workflow_path / workflow_dumper._generate_default_dump_path(
+                process_node=workflow, prefix=None
             )
 
-            if self.deduplicate and workflow.uuid in self.global_log_dict["workflows"].keys():
+            if self.deduplicate and workflow.uuid in self.global_log_dict['workflows'].keys():
                 os.symlink(
-                    src=self.global_log_dict["workflows"][workflow.uuid],
+                    src=self.global_log_dict['workflows'][workflow.uuid],
                     dst=workflow_dump_path,
                 )
             else:
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 5093178297..282bad1372 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -12,11 +12,9 @@
 from __future__ import annotations
 
 import logging
-import itertools as it
+
 from rich.pretty import pprint
-from pathlib import Path
 
-from collections import Counter
 from aiida import orm
 from aiida.manage.configuration.profile import Profile
 from aiida.tools.dumping.base import BaseDumper
@@ -52,7 +50,7 @@ def __init__(
         self.process_dumper: ProcessDumper = process_dumper
 
         # self.log_dict: dict[dict[str, Path]] = {}
-        self.log_dict= {'calculations': {}, 'workflows': {}}
+        self.log_dict = {'calculations': {}, 'workflows': {}}
 
     def dump(self):
         if not self.groups:
@@ -111,4 +109,3 @@ def _dump_processes_per_group(self):
 
                 pprint(group_dumper.log_dict)
                 pprint(self.log_dict)
-

From fbdf478aa813cd39b5254f4e746a85860a584f72 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Tue, 28 Jan 2025 11:14:22 +0100
Subject: [PATCH 09/27] Fix `verdi process dump` tests

- Use the `BaseDumper` instead of passing arguments to the
  `ProcessDumper`
- Append PKs to the test output paths and use `aiida_profile_clean`
  fixture for reproducible results
---
 src/aiida/cmdline/commands/cmd_process.py     | 24 ++++---
 src/aiida/cmdline/commands/cmd_profile.py     |  8 +--
 src/aiida/tools/dumping/process.py            | 70 ++++++++++--------
 .../{test_processes.py => test_process.py}    | 71 ++++++++++++-------
 4 files changed, 100 insertions(+), 73 deletions(-)
 rename tests/tools/dumping/{test_processes.py => test_process.py} (88%)

diff --git a/src/aiida/cmdline/commands/cmd_process.py b/src/aiida/cmdline/commands/cmd_process.py
index 9984b9ee7b..395c74de5e 100644
--- a/src/aiida/cmdline/commands/cmd_process.py
+++ b/src/aiida/cmdline/commands/cmd_process.py
@@ -606,21 +606,23 @@ def process_dump(
     """
 
     from aiida.tools.archive.exceptions import ExportValidationError
+    from aiida.tools.dumping.base import BaseDumper
     from aiida.tools.dumping.process import ProcessDumper
 
-    processdumper_kwargs = {
-        'include_inputs': include_inputs,
-        'include_outputs': include_outputs,
-        'include_attributes': include_attributes,
-        'include_extras': include_extras,
-        'flat': flat,
-        'dump_unsealed': dump_unsealed,
-        'incremental': incremental,
-    }
+    base_dumper = BaseDumper(
+        dump_parent_path=path,
+        overwrite=overwrite,
+        incremental=incremental,
+    )
 
     process_dumper = ProcessDumper(
-        overwrite=overwrite,
-        **processdumper_kwargs,
+        base_dumper=base_dumper,
+        include_inputs=include_inputs,
+        include_outputs=include_outputs,
+        include_attributes=include_attributes,
+        include_extras=include_extras,
+        flat=flat,
+        dump_unsealed=dump_unsealed,
     )
 
     try:
diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 9008ccabe9..4f6fc99b60 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -330,7 +330,7 @@ def profile_mirror(
     else:
         echo.echo_report(f"Dumping of profile `{profile.name}`'s data at path: `{path}`.")
 
-    SAFEGUARD_FILE: str = '.verdi_profile_mirror'
+    SAFEGUARD_FILE: str = '.verdi_profile_mirror'  # noqa: N806
     safeguard_file_path: Path = path / SAFEGUARD_FILE
 
     try:
@@ -357,7 +357,7 @@ def profile_mirror(
     )
 
     process_dumper = ProcessDumper(
-        base=base_dumper,
+        base_dumper=base_dumper,
         include_inputs=include_inputs,
         include_outputs=include_outputs,
         include_attributes=include_attributes,
@@ -378,6 +378,6 @@ def profile_mirror(
     profile_dumper.dump()
 
     # Append the current time to the file
-    last_dump_time = datetime.now().astimezone().isoformat()
+    last_dump_time = datetime.now().astimezone()
     with safeguard_file_path.open('a') as fhandle:
-        fhandle.write(f'Last profile mirror time: {last_dump_time}\n')
+        fhandle.write(f'Last profile mirror time: {last_dump_time.isoformat()}\n')
diff --git a/src/aiida/tools/dumping/process.py b/src/aiida/tools/dumping/process.py
index 92102332fd..2ed2aa894b 100644
--- a/src/aiida/tools/dumping/process.py
+++ b/src/aiida/tools/dumping/process.py
@@ -27,7 +27,6 @@
 import os
 from pathlib import Path
 from types import SimpleNamespace
-from typing import List
 
 import yaml
 
@@ -45,24 +44,29 @@
 class ProcessDumper:
     def __init__(
         self,
-        base: BaseDumper = BaseDumper(),
-        flat: bool = False,
+        base_dumper: BaseDumper | None = None,
         include_inputs: bool = True,
         include_outputs: bool = False,
         include_attributes: bool = True,
         include_extras: bool = True,
+        flat: bool = False,
         dump_unsealed: bool = False,
     ) -> None:
-        self.flat = flat
-        self.base = base
         self.include_inputs = include_inputs
         self.include_outputs = include_outputs
         self.include_attributes = include_attributes
         self.include_extras = include_extras
+        self.flat = flat
         self.dump_unsealed = dump_unsealed
 
+        if base_dumper is None:
+            base_dumper = BaseDumper()
+        self.base_dumper: BaseDumper = base_dumper
+
     @staticmethod
-    def _generate_default_dump_path(process_node: orm.ProcessNode, prefix: str = 'dump') -> Path:
+    def _generate_default_dump_path(
+        process_node: orm.ProcessNode, prefix: str | None = 'dump', append_pk: bool = True
+    ) -> Path:
         """Simple helper function to generate the default parent-dumping directory if none given.
 
         This function is not called for the recursive sub-calls of `_dump_calculation` as it just creates the default
@@ -74,17 +78,20 @@ def _generate_default_dump_path(process_node: orm.ProcessNode, prefix: str = 'du
 
         entities_to_dump = []
 
-        if prefix:
-            # No '' and None
+        # No '' and None
+        if prefix is not None:
             entities_to_dump += [prefix]
 
         try:
-            entities_to_dump += [process_node.process_label]
+            if process_node.process_label is not None:
+                entities_to_dump.append(process_node.process_label)
         except AttributeError:
             # This case came up during testing, not sure how relevant it actually is
-            entities_to_dump += [process_node.process_type]
+            if process_node.process_type is not None:
+                entities_to_dump.append(process_node.process_type)
 
-        entities_to_dump += [str(process_node.pk)]
+        if append_pk:
+            entities_to_dump += [str(process_node.pk)]
 
         return Path('-'.join(entities_to_dump))
 
@@ -187,7 +194,7 @@ def dump(
         self,
         process_node: orm.ProcessNode,
         output_path: Path | None,
-        io_dump_paths: List[str | Path] | None = None,
+        io_dump_paths: list[str | Path] | None = None,
     ) -> Path:
         """Dumps all data involved in a `ProcessNode`, including its outgoing links.
 
@@ -217,7 +224,7 @@ def dump(
             output_path = self._generate_default_dump_path(process_node=process_node)
 
         prepare_dump_path(
-            path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental
+            path_to_validate=output_path, overwrite=self.base_dumper.overwrite, incremental=self.base_dumper.incremental
         )
 
         if isinstance(process_node, orm.CalculationNode):
@@ -242,7 +249,7 @@ def _dump_workflow(
         self,
         workflow_node: orm.WorkflowNode,
         output_path: Path,
-        io_dump_paths: List[str | Path] | None = None,
+        io_dump_paths: list[str | Path] | None = None,
         link_calculations: bool = False,
         link_calculations_dir: Path | None = None,
         workflow_symlink: Path | None = None,
@@ -256,8 +263,8 @@ def _dump_workflow(
 
         prepare_dump_path(
             path_to_validate=output_path,
-            overwrite=self.base.overwrite,
-            incremental=self.base.incremental,
+            overwrite=self.base_dumper.overwrite,
+            incremental=self.base_dumper.incremental,
         )
         self._dump_node_yaml(process_node=workflow_node, output_path=output_path)
 
@@ -289,11 +296,11 @@ def _dump_workflow(
                         output_path=child_output_path,
                         io_dump_paths=io_dump_paths,
                     )
-                else:
+                elif link_calculations_dir is not None:
+                    calculation_dump_path = link_calculations_dir / ProcessDumper._generate_default_dump_path(
+                        process_node=child_node, prefix=''
+                    )
                     try:
-                        calculation_dump_path = link_calculations_dir / ProcessDumper._generate_default_dump_path(
-                            process_node=child_node, prefix=''
-                        )
                         os.symlink(calculation_dump_path, child_output_path)
                     except FileExistsError:
                         pass
@@ -302,7 +309,7 @@ def _dump_calculation(
         self,
         calculation_node: orm.CalculationNode,
         output_path: Path,
-        io_dump_paths: List[str | Path] | None = None,
+        io_dump_paths: list[str | Path] | None = None,
     ) -> None:
         """Dump the contents of a `CalculationNode` to a specified output path.
 
@@ -313,7 +320,7 @@ def _dump_calculation(
         """
 
         prepare_dump_path(
-            path_to_validate=output_path, overwrite=self.base.overwrite, incremental=self.base.incremental
+            path_to_validate=output_path, overwrite=self.base_dumper.overwrite, incremental=self.base_dumper.incremental
         )
         self._dump_node_yaml(process_node=calculation_node, output_path=output_path)
 
@@ -350,12 +357,12 @@ def _dump_calculation(
     def _dump_calculation_io_files(
         self,
         parent_path: Path,
-        link_triples: orm.LinkManager | List[orm.LinkTriple],
+        link_triples: orm.LinkManager | list[orm.LinkTriple],
     ):
         """Small helper function to dump linked input/output nodes of a `orm.CalculationNode`.
 
         :param parent_path: Parent directory for dumping the linked node contents.
-        :param link_triples: List of link triples.
+        :param link_triples: list of link triples.
         """
 
         for link_triple in link_triples:
@@ -369,7 +376,7 @@ def _dump_calculation_io_files(
 
             link_triple.node.base.repository.copy_tree(linked_node_path.resolve())
 
-    def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | None = None) -> SimpleNamespace:
+    def _generate_calculation_io_mapping(self, io_dump_paths: list[str | Path] | None = None) -> SimpleNamespace:
         """Helper function to generate mapping for entities dumped for each `CalculationNode`.
 
         This is to avoid exposing AiiDA terminology, like `repository` to the user, while keeping track of which
@@ -380,8 +387,8 @@ def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | Non
         :return: SimpleNamespace mapping.
         """
 
-        aiida_entities_to_dump = ['repository', 'retrieved', 'inputs', 'outputs']
-        default_calculation_io_dump_paths = ['inputs', 'outputs', 'node_inputs', 'node_outputs']
+        aiida_entities_to_dump: list[str] = ['repository', 'retrieved', 'inputs', 'outputs']
+        default_calculation_io_dump_paths: list[str | Path] = ['inputs', 'outputs', 'node_inputs', 'node_outputs']
         if self.flat and io_dump_paths is None:
             logger.info(
                 'Flat set to True and no `io_dump_paths`. Dumping in a flat directory, files might be overwritten.'
@@ -390,21 +397,22 @@ def _generate_calculation_io_mapping(self, io_dump_paths: List[str | Path] | Non
 
             return SimpleNamespace(**dict(zip(aiida_entities_to_dump, empty_calculation_io_dump_paths)))
 
-        elif not self.flat and io_dump_paths is None:
+        if not self.flat and io_dump_paths is None:
             logger.info(
                 'Flat set to False but no `io_dump_paths` provided. '
                 + f'Will use the defaults {default_calculation_io_dump_paths}.'
             )
-            return SimpleNamespace(**dict(zip(aiida_entities_to_dump, default_calculation_io_dump_paths)))
+            io_dump_paths = default_calculation_io_dump_paths
 
         elif self.flat:
             logger.info('Flat set to True but `io_dump_paths` provided. These will be used, but `inputs` not nested.')
-            return SimpleNamespace(**dict(zip(aiida_entities_to_dump, io_dump_paths)))
         else:
             logger.info(
                 'Flat set to False but no `io_dump_paths` provided. These will be used, but `node_inputs` flattened.'
             )
-            return SimpleNamespace(**dict(zip(aiida_entities_to_dump, io_dump_paths)))  # type: ignore[arg-type]
+
+        assert io_dump_paths is not None
+        return SimpleNamespace(**dict(zip(aiida_entities_to_dump, io_dump_paths)))
 
     def _dump_node_yaml(
         self,
diff --git a/tests/tools/dumping/test_processes.py b/tests/tools/dumping/test_process.py
similarity index 88%
rename from tests/tools/dumping/test_processes.py
rename to tests/tools/dumping/test_process.py
index e2f5633939..47d39ba75e 100644
--- a/tests/tools/dumping/test_processes.py
+++ b/tests/tools/dumping/test_process.py
@@ -15,6 +15,7 @@
 
 import pytest
 
+from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.process import ProcessDumper
 
 # Non-AiiDA variables
@@ -38,6 +39,7 @@
 
 # Only test top-level actions, like path and README creation
 # Other things tested via `_dump_workflow` and `_dump_calculation`
+@pytest.mark.usefixtures('aiida_profile_clean')
 def test_dump(generate_calculation_node_io, generate_workchain_node_io, tmp_path):
     from aiida.tools.archive.exceptions import ExportValidationError
 
@@ -59,6 +61,7 @@ def test_dump(generate_calculation_node_io, generate_workchain_node_io, tmp_path
     assert return_path == dump_parent_path
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 def test_dump_workflow(generate_calculation_node_io, generate_workchain_node_io, tmp_path):
     # Need to generate parent path for dumping, as I don't want the sub-workchains to be dumped directly into `tmp_path`
     dump_parent_path = tmp_path / 'wc-workflow_dump-test-io'
@@ -68,15 +71,16 @@ def test_dump_workflow(generate_calculation_node_io, generate_workchain_node_io,
     wc_node = generate_workchain_node_io(cj_nodes=cj_nodes)
     process_dumper._dump_workflow(workflow_node=wc_node, output_path=dump_parent_path)
 
-    input_path = '01-sub_workflow/01-calculation/inputs/file.txt'
-    singlefiledata_path = '01-sub_workflow/01-calculation/node_inputs/singlefile/file.txt'
-    folderdata_path = '01-sub_workflow/01-calculation/node_inputs/folderdata/relative_path/file.txt'
-    arraydata_path = '01-sub_workflow/01-calculation/node_inputs/arraydata/default.npy'
+    base_path = Path('01-sub_workflow-8/01-calculation-9')
+    input_path = base_path / 'inputs/file.txt'
+    singlefiledata_path = base_path / 'node_inputs/singlefile/file.txt'
+    folderdata_path = base_path / 'node_inputs/folderdata/relative_path/file.txt'
+    arraydata_path = base_path / 'node_inputs/arraydata/default.npy'
     node_metadata_paths = [
         node_metadata_file,
-        f'01-sub_workflow/{node_metadata_file}',
-        f'01-sub_workflow/01-calculation/{node_metadata_file}',
-        f'01-sub_workflow/02-calculation/{node_metadata_file}',
+        f'01-sub_workflow-8/{node_metadata_file}',
+        f'{base_path}/{node_metadata_file}',
+        f'01-sub_workflow-8/02-calculation-10/{node_metadata_file}',
     ]
 
     expected_files = [input_path, singlefiledata_path, folderdata_path, arraydata_path, *node_metadata_paths]
@@ -89,14 +93,14 @@ def test_dump_workflow(generate_calculation_node_io, generate_workchain_node_io,
     process_dumper = ProcessDumper(flat=True)
     process_dumper._dump_workflow(workflow_node=wc_node, output_path=dump_parent_path)
 
-    input_path = '01-sub_workflow/01-calculation/file.txt'
-    arraydata_path = '01-sub_workflow/01-calculation/default.npy'
-    folderdata_path = '01-sub_workflow/01-calculation/relative_path/file.txt'
+    input_path = base_path / 'file.txt'
+    arraydata_path = base_path / 'default.npy'
+    folderdata_path = base_path / 'relative_path/file.txt'
     node_metadata_paths = [
         node_metadata_file,
-        f'01-sub_workflow/{node_metadata_file}',
-        f'01-sub_workflow/01-calculation/{node_metadata_file}',
-        f'01-sub_workflow/02-calculation/{node_metadata_file}',
+        f'01-sub_workflow-8/{node_metadata_file}',
+        f'{base_path}/{node_metadata_file}',
+        f'01-sub_workflow-8/02-calculation-10/{node_metadata_file}',
     ]
 
     expected_files = [input_path, folderdata_path, arraydata_path, *node_metadata_paths]
@@ -105,21 +109,27 @@ def test_dump_workflow(generate_calculation_node_io, generate_workchain_node_io,
     assert all([expected_file.is_file() for expected_file in expected_files])
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 def test_dump_multiply_add(tmp_path, generate_workchain_multiply_add):
     dump_parent_path = tmp_path / 'wc-dump-test-multiply-add'
     process_dumper = ProcessDumper()
     wc_node = generate_workchain_multiply_add()
     process_dumper.dump(process_node=wc_node, output_path=dump_parent_path)
 
-    input_files = ['_aiidasubmit.sh', 'aiida.in', '.aiida/job_tmpl.json', '.aiida/calcinfo.json']
-    output_files = ['_scheduler-stderr.txt', '_scheduler-stdout.txt', 'aiida.out']
+    arithmetic_add_path = dump_parent_path / '02-ArithmeticAddCalculation-8'
+    multiply_path = dump_parent_path / '01-multiply-6'
+
     input_files = [
-        dump_parent_path / '02-ArithmeticAddCalculation' / inputs_relpath / input_file for input_file in input_files
-    ]
-    input_files += [dump_parent_path / '01-multiply' / inputs_relpath / 'source_file']
-    output_files = [
-        dump_parent_path / '02-ArithmeticAddCalculation' / outputs_relpath / output_file for output_file in output_files
+        '_aiidasubmit.sh',
+        'aiida.in',
+        '.aiida/job_tmpl.json',
+        '.aiida/calcinfo.json',
     ]
+    output_files = ['_scheduler-stderr.txt', '_scheduler-stdout.txt', 'aiida.out']
+
+    input_files = [arithmetic_add_path / inputs_relpath / input_file for input_file in input_files]
+    input_files += [multiply_path / inputs_relpath / 'source_file']
+    output_files = [arithmetic_add_path / outputs_relpath / output_file for output_file in output_files]
 
     # No node_inputs contained in MultiplyAddWorkChain
     assert all([input_file.is_file() for input_file in input_files])
@@ -130,7 +140,7 @@ def test_dump_multiply_add(tmp_path, generate_workchain_multiply_add):
     process_dumper = ProcessDumper(flat=True)
     process_dumper.dump(process_node=wc_node, output_path=dump_parent_path)
 
-    multiply_file = dump_parent_path / '01-multiply' / 'source_file'
+    multiply_file = dump_parent_path / '01-multiply-6' / 'source_file'
     arithmetic_add_files = [
         '_aiidasubmit.sh',
         'aiida.in',
@@ -141,7 +151,7 @@ def test_dump_multiply_add(tmp_path, generate_workchain_multiply_add):
         'aiida.out',
     ]
     arithmetic_add_files = [
-        dump_parent_path / '02-ArithmeticAddCalculation' / arithmetic_add_file
+        dump_parent_path / '02-ArithmeticAddCalculation-8' / arithmetic_add_file
         for arithmetic_add_file in arithmetic_add_files
     ]
 
@@ -202,7 +212,8 @@ def test_dump_calculation_flat(tmp_path, generate_calculation_node_io):
 def test_dump_calculation_overwr_incr(tmp_path, generate_calculation_node_io):
     """Tests the ProcessDumper for the overwrite and incremental option."""
     dump_parent_path = tmp_path / 'cj-dump-test-overwrite'
-    process_dumper = ProcessDumper(overwrite=False, incremental=False)
+    base_dumper = BaseDumper(overwrite=False, incremental=False)
+    process_dumper = ProcessDumper(base_dumper=base_dumper)
     calculation_node = generate_calculation_node_io()
     calculation_node.seal()
     # Create safeguard file to mock existing dump directory
@@ -212,7 +223,8 @@ def test_dump_calculation_overwr_incr(tmp_path, generate_calculation_node_io):
     with pytest.raises(FileExistsError):
         process_dumper._dump_calculation(calculation_node=calculation_node, output_path=dump_parent_path)
     # With overwrite option true no error is raised and the dumping can run through.
-    process_dumper = ProcessDumper(overwrite=True, incremental=False)
+    base_dumper = BaseDumper(overwrite=True, incremental=False)
+    process_dumper = ProcessDumper(base_dumper=base_dumper)
     process_dumper._dump_calculation(calculation_node=calculation_node, output_path=dump_parent_path)
     assert (dump_parent_path / inputs_relpath / filename).is_file()
 
@@ -221,7 +233,8 @@ def test_dump_calculation_overwr_incr(tmp_path, generate_calculation_node_io):
     # Incremental also does work
     dump_parent_path.mkdir()
     (dump_parent_path / '.aiida_node_metadata.yaml').touch()
-    process_dumper = ProcessDumper(overwrite=False, incremental=True)
+    base_dumper = BaseDumper(overwrite=False, incremental=True)
+    process_dumper = ProcessDumper(base_dumper=base_dumper)
     process_dumper._dump_calculation(calculation_node=calculation_node, output_path=dump_parent_path)
     assert (dump_parent_path / inputs_relpath / filename).is_file()
 
@@ -235,6 +248,7 @@ def test_dump_calculation_no_inputs(tmp_path, generate_calculation_node_io):
     assert not (dump_parent_path / node_inputs_relpath).is_dir()
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 def test_dump_calculation_add(tmp_path, generate_calculation_node_add):
     dump_parent_path = tmp_path / 'cj-dump-test-add'
 
@@ -314,6 +328,7 @@ def test_prepare_dump_path(tmp_path):
     assert test_file.is_file()
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 def test_generate_default_dump_path(
     generate_calculation_node_add,
     generate_workchain_multiply_add,
@@ -343,6 +358,7 @@ def test_generate_calculation_io_mapping():
     assert calculation_io_mapping.outputs == 'node_outputs_'
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 def test_generate_child_node_label(
     generate_workchain_multiply_add, generate_calculation_node_io, generate_workchain_node_io
 ):
@@ -364,7 +380,7 @@ def test_generate_child_node_label(
             for index, output_node in enumerate(output_triples)
         ]
     )
-    assert output_paths == ['00-sub_workflow', '01-calculation']
+    assert output_paths == ['00-sub_workflow-5', '01-calculation-6']
 
     # Check with multiply_add workchain node
     multiply_add_node = generate_workchain_multiply_add()
@@ -374,7 +390,8 @@ def test_generate_child_node_label(
     output_paths = sorted(
         [process_dumper._generate_child_node_label(_, output_node) for _, output_node in enumerate(output_triples)]
     )
-    assert output_paths == ['00-multiply', '01-ArithmeticAddCalculation', '02-result']
+    print(output_paths)
+    assert output_paths == ['00-multiply-12', '01-ArithmeticAddCalculation-14', '02-result-17']
 
 
 def test_dump_node_yaml(generate_calculation_node_io, tmp_path, generate_workchain_multiply_add):

From b98c61adbb66618aedae1a6d7446b7b278ed73c2 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Tue, 28 Jan 2025 12:58:22 +0100
Subject: [PATCH 10/27] Fix mypy complaints

---
 src/aiida/cmdline/params/options/main.py |   7 --
 src/aiida/tools/dumping/base.py          |   4 +-
 src/aiida/tools/dumping/group.py         |  74 +++++++--------
 src/aiida/tools/dumping/logger.py        |  18 ++++
 src/aiida/tools/dumping/process.py       |   4 +-
 src/aiida/tools/dumping/profile.py       |  63 ++++++-------
 src/aiida/tools/dumping/utils.py         | 110 ++++++++++-------------
 tests/tools/dumping/test_process.py      |   2 +-
 8 files changed, 139 insertions(+), 143 deletions(-)
 create mode 100644 src/aiida/tools/dumping/logger.py

diff --git a/src/aiida/cmdline/params/options/main.py b/src/aiida/cmdline/params/options/main.py
index 8ee982ad1f..82d4fda8d8 100644
--- a/src/aiida/cmdline/params/options/main.py
+++ b/src/aiida/cmdline/params/options/main.py
@@ -867,10 +867,3 @@ def set_log_level(ctx, _param, value):
     show_default=True,
     help="Incremental dumping of data to disk. Doesn't require using overwrite to clean previous directories.",
 )
-
-RICH_OPTIONS = OverridableOption(
-    '--rich-options',
-    default=None,
-    type=str,
-    help='Specifications for rich data dumping.',
-)
diff --git a/src/aiida/tools/dumping/base.py b/src/aiida/tools/dumping/base.py
index 8a89e464d2..a2e2c379e8 100644
--- a/src/aiida/tools/dumping/base.py
+++ b/src/aiida/tools/dumping/base.py
@@ -14,12 +14,12 @@
 class BaseDumper:
     def __init__(
         self,
-        dump_parent_path: Path = Path.cwd(),
+        dump_parent_path: Path | None = None,
         overwrite: bool = False,
         incremental: bool = True,
         last_dump_time: datetime | None = None,
     ):
-        self.dump_parent_path = dump_parent_path
+        self.dump_parent_path = dump_parent_path or Path.cwd()
         self.overwrite = overwrite
         self.incremental = incremental
         self.last_dump_time = last_dump_time
diff --git a/src/aiida/tools/dumping/group.py b/src/aiida/tools/dumping/group.py
index ee7c64f136..38bf25c380 100644
--- a/src/aiida/tools/dumping/group.py
+++ b/src/aiida/tools/dumping/group.py
@@ -11,15 +11,16 @@
 from __future__ import annotations
 
 import itertools as it
-import logging
 import os
 from pathlib import Path
 
 from aiida import orm
+from aiida.common.log import AIIDA_LOGGER
 from aiida.tools.dumping.base import BaseDumper
+from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
 
-logger = logging.getLogger(__name__)
+logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 DEFAULT_PROCESSES_TO_DUMP = [orm.CalculationNode, orm.WorkflowNode]
 # DEFAULT_DATA_TO_DUMP = [orm.StructureData, orm.Code, orm.Computer, orm.BandsData, orm.UpfData]
@@ -32,31 +33,28 @@ def __init__(
         self,
         base_dumper: BaseDumper | None = None,
         process_dumper: ProcessDumper | None = None,
+        dump_logger: DumpLogger | None = None,
         group: orm.Group | str | None = None,
         deduplicate: bool = True,
-        output_path: str | Path | None = None,
-        global_log_dict: dict[str, Path] | None = None,
+        output_path: Path | str | None = None,
     ):
         self.deduplicate = deduplicate
 
         # Allow passing of group via label
         if isinstance(group, str):
-            group = orm.Group.get(group)
+            group = orm.load_group(group)
 
         self.group = group
-        self.output_path = output_path
-        self.global_log_dict = global_log_dict
 
-        if base_dumper is None:
-            base_dumper = BaseDumper()
-        self.base_dumper: BaseDumper = base_dumper
+        self.base_dumper = base_dumper or BaseDumper()
+        self.process_dumper = process_dumper or ProcessDumper()
+        self.dump_logger = dump_logger or DumpLogger()
 
-        if process_dumper is None:
-            process_dumper = ProcessDumper()
-        self.process_dumper: ProcessDumper = process_dumper
+        # Properly set the `output_path` attribute
+
+        self.output_path = Path(output_path or self.base_dumper.dump_parent_path)
 
         self.nodes = self._get_nodes()
-        self.log_dict = {}
 
     def _should_dump_processes(self) -> bool:
         return len([node for node in self.nodes if isinstance(node, orm.ProcessNode)]) > 0
@@ -68,21 +66,23 @@ def _get_nodes(self):
 
         # Get all nodes that are _not_ in any group
         else:
-            groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
+            groups: list[orm.Group] = orm.QueryBuilder().append(orm.Group).all(flat=True)  # type: ignore[assignment]
             nodes_in_groups = [node.uuid for group in groups for node in group.nodes]
+
             # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
             # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
-            sub_nodes_in_groups = list(
-                it.chain(
-                    *[
-                        orm.load_node(node).called_descendants
-                        for node in nodes_in_groups
-                        if isinstance(orm.load_node(node), orm.WorkflowNode)
-                    ]
-                )
+            # Get the called descendants of WorkflowNodes within the nodes_in_groups list
+            called_descendants_generator = (
+                orm.load_node(node).called_descendants
+                for node in nodes_in_groups
+                if isinstance(orm.load_node(node), orm.WorkflowNode)
             )
+
+            # Flatten the list of called descendants
+            sub_nodes_in_groups = list(it.chain(*called_descendants_generator))
+
             sub_nodes_in_groups = [node.uuid for node in sub_nodes_in_groups]
-            nodes_in_groups = nodes_in_groups + sub_nodes_in_groups
+            nodes_in_groups += sub_nodes_in_groups
 
             profile_nodes = orm.QueryBuilder().append(orm.Node, project=['uuid']).all(flat=True)
             nodes = [profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups]
@@ -114,11 +114,9 @@ def _get_processes(self):
         self.calculations = calculations
         self.workflows = workflows
 
-        self.log_dict = {
-            'calculations': {},
-            # dict.fromkeys([c.uuid for c in self.calculations], None),
-            'workflows': dict.fromkeys([w.uuid for w in workflows], None),
-        }
+    def dump(self):
+        self.output_path.mkdir(exist_ok=True, parents=True)
+        self._dump_processes()
 
     def _dump_processes(self):
         self._get_processes()
@@ -127,13 +125,12 @@ def _dump_processes(self):
             logger.report('No workflows or calculations to dump in group.')
             return
 
-        self.output_path.mkdir(exist_ok=True, parents=True)
-
         self._dump_calculations()
         self._dump_workflows()
 
     def _dump_calculations(self):
         calculations_path = self.output_path / 'calculations'
+        dumped_calculations = {}
 
         for calculation in self.calculations:
             calculation_dumper = self.process_dumper
@@ -146,12 +143,15 @@ def _dump_calculations(self):
                 # or (calculation.caller is not None and not self.deduplicate):
                 calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
 
-                self.log_dict['calculations'][calculation.uuid] = calculation_dump_path
+                dumped_calculations[calculation.uuid] = calculation_dump_path
+
+        self.dump_logger.update_calculations(dumped_calculations)
 
     def _dump_workflows(self):
         # workflow_nodes = get_nodes_from_db(aiida_node_type=orm.WorkflowNode, with_group=self.group, flat=True)
         workflow_path = self.output_path / 'workflows'
         workflow_path.mkdir(exist_ok=True, parents=True)
+        dumped_workflows = {}
 
         for workflow in self.workflows:
             workflow_dumper = self.process_dumper
@@ -160,9 +160,11 @@ def _dump_workflows(self):
                 process_node=workflow, prefix=None
             )
 
-            if self.deduplicate and workflow.uuid in self.global_log_dict['workflows'].keys():
+            logged_workflows = self.dump_logger.get_logs()['workflows']
+
+            if self.deduplicate and workflow.uuid in logged_workflows.keys():
                 os.symlink(
-                    src=self.global_log_dict['workflows'][workflow.uuid],
+                    src=logged_workflows[workflow.uuid],
                     dst=workflow_dump_path,
                 )
             else:
@@ -173,4 +175,6 @@ def _dump_workflows(self):
                     # link_calculations_dir=self.output_path / 'calculations',
                 )
 
-                self.log_dict['workflows'][workflow.uuid] = workflow_dump_path
+                dumped_workflows[workflow.uuid] = workflow_dump_path
+
+        self.dump_logger.update_workflows(dumped_workflows)
diff --git a/src/aiida/tools/dumping/logger.py b/src/aiida/tools/dumping/logger.py
new file mode 100644
index 0000000000..eecf611911
--- /dev/null
+++ b/src/aiida/tools/dumping/logger.py
@@ -0,0 +1,18 @@
+from pathlib import Path
+
+
+class DumpLogger:
+    def __init__(self):
+        self.log_dict: dict[str, dict[str, Path]] = {'calculations': {}, 'workflows': {}}
+
+    def update_calculations(self, new_calculations: dict[str, Path]):
+        """Update the log with new calculations."""
+        self.log_dict['calculations'].update(new_calculations)
+
+    def update_workflows(self, new_workflows: dict[str, Path]):
+        """Update the log with new workflows."""
+        self.log_dict['workflows'].update(new_workflows)
+
+    def get_logs(self):
+        """Retrieve the current state of the log."""
+        return self.log_dict
diff --git a/src/aiida/tools/dumping/process.py b/src/aiida/tools/dumping/process.py
index 2ed2aa894b..f65da5a15e 100644
--- a/src/aiida/tools/dumping/process.py
+++ b/src/aiida/tools/dumping/process.py
@@ -59,9 +59,7 @@ def __init__(
         self.flat = flat
         self.dump_unsealed = dump_unsealed
 
-        if base_dumper is None:
-            base_dumper = BaseDumper()
-        self.base_dumper: BaseDumper = base_dumper
+        self.base_dumper = base_dumper or BaseDumper()
 
     @staticmethod
     def _generate_default_dump_path(
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 282bad1372..2b2d5294c1 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -11,25 +11,25 @@
 
 from __future__ import annotations
 
-import logging
-
-from rich.pretty import pprint
-
 from aiida import orm
+from aiida.common.log import AIIDA_LOGGER
+from aiida.manage import get_manager, load_profile
 from aiida.manage.configuration.profile import Profile
 from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.group import GroupDumper
+from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
 
-logger = logging.getLogger(__name__)
+logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
 class ProfileDumper:
     def __init__(
         self,
-        profile: str | Profile,
+        profile: str | Profile | None = None,
         base_dumper: BaseDumper | None = None,
         process_dumper: ProcessDumper | None = None,
+        dump_logger: DumpLogger | None = None,
         organize_by_groups: bool = True,
         deduplicate: bool = True,
         groups: list[str | orm.Group] | None = None,
@@ -37,27 +37,37 @@ def __init__(
     ):
         self.organize_by_groups = organize_by_groups
         self.deduplicate = deduplicate
-        self.profile = profile
         self.dump_processes = dump_processes
         self.groups = groups
 
-        if base_dumper is None:
-            base_dumper = BaseDumper()
-        self.base_dumper: BaseDumper = base_dumper
+        self.base_dumper = base_dumper or BaseDumper()
+        self.process_dumper = process_dumper or ProcessDumper()
+        self.dump_logger = dump_logger or DumpLogger()
 
-        if process_dumper is None:
-            process_dumper = ProcessDumper()
-        self.process_dumper: ProcessDumper = process_dumper
+        # Load the profile
+        if isinstance(profile, str):
+            profile = load_profile(profile)
 
-        # self.log_dict: dict[dict[str, Path]] = {}
-        self.log_dict = {'calculations': {}, 'workflows': {}}
+        if profile is None:
+            manager = get_manager()
+            profile = manager.get_profile()
+
+        assert profile is not None
+        self.profile = profile
 
     def dump(self):
+        # No groups selected, dump data which is not part of any group
+        # If groups selected, however, this data should not also be dumped automatically
         if not self.groups:
             self._dump_processes_not_in_any_group()
-            self.groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
 
-        self._dump_processes_per_group()
+            # Still, even without selecting groups, by default, all profile data should be dumped
+            # Thus, we obtain all groups in the profile here
+            profile_groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
+            self._dump_processes_per_group(groups=profile_groups)
+
+        else:
+            self._dump_processes_per_group(groups=self.groups)
 
     def _dump_processes_not_in_any_group(self):
         # === Dump the data that is not associated with any group ===
@@ -71,21 +81,19 @@ def _dump_processes_not_in_any_group(self):
             process_dumper=self.process_dumper,
             group=None,
             deduplicate=self.deduplicate,
+            dump_logger=self.dump_logger,
             output_path=output_path,
-            global_log_dict=self.log_dict,
         )
 
         if self.dump_processes and no_group_dumper._should_dump_processes():
             logger.report(f'Dumping processes not in any group for profile `{self.profile.name}`...')
 
-            no_group_dumper._dump_processes()
+            no_group_dumper.dump()
 
-            self.log_dict.update(no_group_dumper.log_dict)
-
-    def _dump_processes_per_group(self):
+    def _dump_processes_per_group(self, groups):
         # === Dump data per-group if Groups exist in profile or are selected ===
 
-        for group in self.groups:
+        for group in groups:
             if self.organize_by_groups:
                 output_path = self.base_dumper.dump_parent_path / group.label
             else:
@@ -94,18 +102,13 @@ def _dump_processes_per_group(self):
             group_dumper = GroupDumper(
                 base_dumper=self.base_dumper,
                 process_dumper=self.process_dumper,
+                dump_logger=self.dump_logger,
                 group=group,
                 deduplicate=self.deduplicate,
                 output_path=output_path,
-                global_log_dict=self.log_dict,
             )
 
             if self.dump_processes and group_dumper._should_dump_processes():
                 logger.report(f'Dumping processes in group {group.label} for profile `{self.profile.name}`...')
 
-                group_dumper._dump_processes()
-                for entity in ['calculations', 'workflows']:
-                    self.log_dict[entity].update(group_dumper.log_dict[entity])
-
-                pprint(group_dumper.log_dict)
-                pprint(self.log_dict)
+                group_dumper.dump()
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index c4c1ac0fc1..438c8a7c6b 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -10,16 +10,14 @@
 
 from __future__ import annotations
 
-import logging
 import shutil
 from pathlib import Path
 
-from rich.console import Console
-from rich.table import Table
+from aiida.common.log import AIIDA_LOGGER
 
 __all__ = ['prepare_dump_path']
 
-logger = logging.getLogger(__name__)
+logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
 def prepare_dump_path(
@@ -41,10 +39,12 @@ def prepare_dump_path(
     :raises FileNotFoundError: If no `safeguard_file` is found."""
 
     if overwrite and incremental:
-        raise ValueError('Both overwrite and incremental set to True. Only specify one.')
+        msg = 'Both overwrite and incremental set to True. Only specify one.'
+        raise ValueError(msg)
 
     if path_to_validate.is_file():
-        raise FileExistsError(f'A file at the given path `{path_to_validate}` already exists.')
+        msg = f'A file at the given path `{path_to_validate}` already exists.'
+        raise FileExistsError(msg)
 
     # Handle existing directory
     if path_to_validate.is_dir():
@@ -53,89 +53,69 @@ def prepare_dump_path(
         # Case 1: Non-empty directory and overwrite is False
         if not is_empty and not overwrite:
             if incremental:
-                logger.info('Incremental dumping selected. Will keep directory.')
+                msg = f'Incremental dumping selected. Will update directory `{path_to_validate}` with new data.'
+                logger.report(msg)
             else:
-                raise FileExistsError(
-                    f'Path `{path_to_validate}` already exists, and neither overwrite nor incremental is enabled.'
-                )
+                msg = f'Path `{path_to_validate}` already exists, and neither overwrite nor incremental is enabled.'
+                raise FileExistsError(msg)
 
         # Case 2: Non-empty directory, overwrite is True
         if not is_empty and overwrite:
             safeguard_exists = (path_to_validate / safeguard_file).is_file()
 
             if safeguard_exists:
-                logger.info(f'Overwriting directory `{path_to_validate}`.')
+                msg = f'Overwriting directory `{path_to_validate}`.'
+                logger.report(msg)
                 shutil.rmtree(path_to_validate)
 
             else:
-                raise FileNotFoundError(
-                    f'Path `{path_to_validate}` exists without safeguard file '
-                    f'`{safeguard_file}`. Not removing because path might be a directory not created by AiiDA.'
+                msg = (
+                    f'Path `{path_to_validate}` exists without safeguard file `{safeguard_file}`. '
+                    f'Not removing because path might be a directory not created by AiiDA.'
                 )
+                raise FileNotFoundError(msg)
 
     # Create directory if it doesn't exist or was removed
     path_to_validate.mkdir(exist_ok=True, parents=True)
     (path_to_validate / safeguard_file).touch()
 
 
-def get_nodes_from_db(qb_instance, qb_filters: t.List | None = None, flat=False):
-    # Computers cannot be associated via `with_group`
-    # for qb_filter in qb_filters:
-    #     qb.add_filter(**qb_filter)
-
-    return_iterable = qb_instance.iterall() if qb_instance.count() > 10 ^ 3 else qb_instance.all()
-
-    # Manual flattening as `iterall` doesn't have `flat` option unlike `all`
-    if flat:
-        return_iterable = [_[0] for _ in return_iterable]
-
-    return return_iterable
-
+# @staticmethod
+# def dumper_pretty_print(dumper_instance, include_private_and_dunder: bool = False):
+#     console = Console()
+#     table = Table(title=f'Attributes and Methods of {dumper_instance.__class__.__name__}')
 
-# def validate_rich_options(rich_options, rich_config_file):
-#     if rich_options is not None and rich_config_file is not None:
-#         raise ValueError('Specify rich options either via CLI or config file, not both.')
+#     # Adding columns to the table
+#     table.add_column('Name', justify='left')
+#     table.add_column('Type', justify='left')
+#     table.add_column('Value', justify='left')
 
-#     else:
-#         logger.report('Neither `--rich-options` nor `--rich-config` set, using defaults.')
+#     # Lists to store attributes and methods
+#     entries = []
 
+#     # Iterate over the class attributes and methods
+#     for attr_name in dir(dumper_instance):
+#         # Exclude private attributes and dunder methods
+#         attr_value = getattr(dumper_instance, attr_name)
+#         entry_type = 'Attribute' if not callable(attr_value) else 'Method'
 
-@staticmethod
-def dumper_pretty_print(dumper_instance, include_private_and_dunder: bool = False):
-    console = Console()
-    table = Table(title=f'Attributes and Methods of {dumper_instance.__class__.__name__}')
-
-    # Adding columns to the table
-    table.add_column('Name', justify='left')
-    table.add_column('Type', justify='left')
-    table.add_column('Value', justify='left')
-
-    # Lists to store attributes and methods
-    entries = []
-
-    # Iterate over the class attributes and methods
-    for attr_name in dir(dumper_instance):
-        # Exclude private attributes and dunder methods
-        attr_value = getattr(dumper_instance, attr_name)
-        entry_type = 'Attribute' if not callable(attr_value) else 'Method'
-
-        if attr_name.startswith('_'):
-            if include_private_and_dunder:
-                entries.append((attr_name, entry_type, str(attr_value)))
-            else:
-                pass
-        else:
-            entries.append((attr_name, entry_type, str(attr_value)))
+#         if attr_name.startswith('_'):
+#             if include_private_and_dunder:
+#                 entries.append((attr_name, entry_type, str(attr_value)))
+#             else:
+#                 pass
+#         else:
+#             entries.append((attr_name, entry_type, str(attr_value)))
 
-    # Sort entries: attributes first, then methods
-    entries.sort(key=lambda x: (x[1] == 'Method', x[0]))
+#     # Sort entries: attributes first, then methods
+#     entries.sort(key=lambda x: (x[1] == 'Method', x[0]))
 
-    # Add sorted entries to the table
-    for name, entry_type, value in entries:
-        table.add_row(name, entry_type, value)
+#     # Add sorted entries to the table
+#     for name, entry_type, value in entries:
+#         table.add_row(name, entry_type, value)
 
-    # Print the formatted table
-    console.print(table)
+#     # Print the formatted table
+#     console.print(table)
 
 
 # def check_storage_size_user():
diff --git a/tests/tools/dumping/test_process.py b/tests/tools/dumping/test_process.py
index 47d39ba75e..683e3c4707 100644
--- a/tests/tools/dumping/test_process.py
+++ b/tests/tools/dumping/test_process.py
@@ -6,7 +6,7 @@
 # For further information on the license, see the LICENSE.txt file        #
 # For further information please visit http://www.aiida.net               #
 ###########################################################################
-"""Tests for the dumping of ProcessNode data to disk."""
+"""Tests for the dumping of process data to disk."""
 
 from __future__ import annotations
 

From 2dfe2ca6930cf5a1e5b478be7c60de306c5181b7 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Tue, 28 Jan 2025 17:24:45 +0100
Subject: [PATCH 11/27] Start to work on group testing

---
 src/aiida/tools/dumping/group.py    | 14 ++----
 tests/tools/dumping/__init__.py     |  0
 tests/tools/dumping/test_group.py   | 75 +++++++++++++++++++++++++++++
 tests/tools/dumping/test_profile.py |  9 ++++
 4 files changed, 88 insertions(+), 10 deletions(-)
 create mode 100644 tests/tools/dumping/__init__.py
 create mode 100644 tests/tools/dumping/test_group.py
 create mode 100644 tests/tools/dumping/test_profile.py

diff --git a/src/aiida/tools/dumping/group.py b/src/aiida/tools/dumping/group.py
index 38bf25c380..86d63e69b0 100644
--- a/src/aiida/tools/dumping/group.py
+++ b/src/aiida/tools/dumping/group.py
@@ -22,12 +22,6 @@
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
-DEFAULT_PROCESSES_TO_DUMP = [orm.CalculationNode, orm.WorkflowNode]
-# DEFAULT_DATA_TO_DUMP = [orm.StructureData, orm.Code, orm.Computer, orm.BandsData, orm.UpfData]
-# DEFAULT_COLLECTIONS_TO_DUMP ??
-DEFAULT_ENTITIES_TO_DUMP = DEFAULT_PROCESSES_TO_DUMP  # + DEFAULT_DATA_TO_DUMP
-
-
 class GroupDumper:
     def __init__(
         self,
@@ -114,10 +108,6 @@ def _get_processes(self):
         self.calculations = calculations
         self.workflows = workflows
 
-    def dump(self):
-        self.output_path.mkdir(exist_ok=True, parents=True)
-        self._dump_processes()
-
     def _dump_processes(self):
         self._get_processes()
 
@@ -178,3 +168,7 @@ def _dump_workflows(self):
                 dumped_workflows[workflow.uuid] = workflow_dump_path
 
         self.dump_logger.update_workflows(dumped_workflows)
+
+    def dump(self):
+        self.output_path.mkdir(exist_ok=True, parents=True)
+        self._dump_processes()
diff --git a/tests/tools/dumping/__init__.py b/tests/tools/dumping/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/tools/dumping/test_group.py b/tests/tools/dumping/test_group.py
new file mode 100644
index 0000000000..b008fc2293
--- /dev/null
+++ b/tests/tools/dumping/test_group.py
@@ -0,0 +1,75 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+"""Tests for the dumping of group data to disk."""
+
+# TODO: Test that de-duplication also works for calculations
+
+import pytest
+from pathlib import Path
+from aiida import orm
+
+
+@pytest.mark.usefixtures('aiida_profile_clean')
+@pytest.fixture(scope='session', autouse=True)
+def setup_profile_groups(generate_calculation_node_add, generate_workchain_multiply_add):
+    # Create nodes for profile storage
+    int_node = orm.Int(1).store()
+    _ = generate_calculation_node_add()
+    _ = generate_workchain_multiply_add()
+    cj_node = generate_calculation_node_add()
+    wc_node = generate_workchain_multiply_add()
+
+    # Create the various groups
+    add_group = orm.Group.collection.get_or_create(label='add')[0]
+    multiply_add_group = orm.Group.collection.get_or_create(label='multiply-add')[0]
+    cj_dupl_group = orm.Group.collection.get_or_create(label='cj-dupl')[0]
+    wc_dupl_group = orm.Group.collection.get_or_create(label='wc-dupl')[0]
+    no_process_group = orm.Group.collection.get_or_create(label='add')[0]
+
+    # Populate groups
+    add_group.add_nodes([cj_node])
+    multiply_add_group.add_nodes([wc_node])
+    cj_dupl_group.add_nodes([cj_node])
+    wc_dupl_group.add_nodes([wc_node])
+    no_process_group.add_nodes([int_node])
+
+    # Not sure if this is actually needed?
+    return {
+        'add_group': add_group,
+        'multiply_add_group': multiply_add_group,
+        'cj_dupl_group': cj_dupl_group,
+        'wc_dupl_group': wc_dupl_group,
+        'no_process_group': no_process_group,
+    }
+
+
+class TestGroupDumper:
+
+    def test_should_dump_processes(self):
+        print(orm.QueryBuilder().append(orm.Group).all(flat=True))
+        assert False
+        # pass
+
+    def test_get_nodes(self):
+        pass
+
+    def test_get_processes(self):
+        pass
+
+    def test_dump_processes(self):
+        pass
+
+    def test_dump_calculations(self):
+        pass
+
+    def test_dump_workflows(self):
+        pass
+
+    def test_dump(self):
+        pass
diff --git a/tests/tools/dumping/test_profile.py b/tests/tools/dumping/test_profile.py
new file mode 100644
index 0000000000..574688d8ae
--- /dev/null
+++ b/tests/tools/dumping/test_profile.py
@@ -0,0 +1,9 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+"""Tests for the dumping of profile data to disk."""

From 492f87f7ba1b1ec057395001033ba33148d21576 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 28 Jan 2025 16:27:12 +0000
Subject: [PATCH 12/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/tools/dumping/group.py  | 1 +
 tests/tools/dumping/test_group.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/aiida/tools/dumping/group.py b/src/aiida/tools/dumping/group.py
index 86d63e69b0..6350d413ae 100644
--- a/src/aiida/tools/dumping/group.py
+++ b/src/aiida/tools/dumping/group.py
@@ -22,6 +22,7 @@
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
+
 class GroupDumper:
     def __init__(
         self,
diff --git a/tests/tools/dumping/test_group.py b/tests/tools/dumping/test_group.py
index b008fc2293..fa07a879eb 100644
--- a/tests/tools/dumping/test_group.py
+++ b/tests/tools/dumping/test_group.py
@@ -10,8 +10,9 @@
 
 # TODO: Test that de-duplication also works for calculations
 
+
 import pytest
-from pathlib import Path
+
 from aiida import orm
 
 
@@ -50,7 +51,6 @@ def setup_profile_groups(generate_calculation_node_add, generate_workchain_multi
 
 
 class TestGroupDumper:
-
     def test_should_dump_processes(self):
         print(orm.QueryBuilder().append(orm.Group).all(flat=True))
         assert False

From 3887130a2631d5de2d4c399e589339e24c4c04e6 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Tue, 28 Jan 2025 16:39:53 +0100
Subject: [PATCH 13/27] Add ArithmeticAdd CJ Node fixture without `run`

---
 tests/conftest.py | 153 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index 5aa0ef3b89..251b354462 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -852,6 +852,159 @@ def _generate_calculation_node_add():
     return _generate_calculation_node_add
 
 
+@pytest.fixture(scope='class')
+def construct_calculation_node_add(tmp_path_factory):
+    def _construct_calculation_node_add(x: int = 1, y: int = 2):
+        import textwrap
+        from aiida.orm import InstalledCode, Int, CalcJobNode, Computer, FolderData
+        from aiida.common import LinkType
+        import json
+        from _pytest.tmpdir import tmp_path_factory
+
+        # Create a minimal computer
+        # Not using any of the `aiida_localhost` or `aiida_computer_local` fixtures as they are function-scoped
+        created, computer = Computer.collection.get_or_create(
+            label='mock_computer',
+            hostname='localhost',
+            transport_type='core.local',
+            scheduler_type='core.direct'
+        )
+        if created:
+            computer.store()
+
+        # Create the calculation node
+        calc_node = CalcJobNode(computer=computer)
+
+        # Create input nodes
+        x_node = Int(x)
+        y_node = Int(y)
+        code_node = InstalledCode(computer=computer, filepath_executable='/bin/bash')
+
+        # Store input nodes
+        x_node.store()
+        y_node.store()
+        code_node.store()
+
+        # Input files
+        input_content = f'echo $(({x} + {y}))\n'
+        calc_node.base.repository.put_object_from_bytes(input_content.encode(), 'aiida.in')
+
+        # .aiida folder content
+        calcinfo_dict = {
+            "codes_info": [
+                {
+                    "stdin_name": "aiida.in",
+                    "stdout_name": "aiida.out",
+                    "code_uuid": code_node.uuid
+                }
+            ],
+            "retrieve_list": [
+                "aiida.out",
+                "_scheduler-stdout.txt",
+                "_scheduler-stderr.txt"
+            ],
+            "uuid": calc_node.uuid,
+            "file_copy_operation_order": [2, 0, 1]
+        }
+
+        job_tmpl_dict = {
+            "submit_as_hold": False,
+            "rerunnable": False,
+            "job_name": "aiida-42",
+            "sched_output_path": "_scheduler-stdout.txt",
+            "shebang": "#!/bin/bash",
+            "sched_error_path": "_scheduler-stderr.txt",
+            "sched_join_files": False,
+            "prepend_text": "",
+            "append_text": "",
+            "job_resource": {
+                "num_machines": 1,
+                "num_mpiprocs_per_machine": 1,
+                "num_cores_per_machine": None,
+                "num_cores_per_mpiproc": None,
+                "tot_num_mpiprocs": 1
+            },
+            "codes_info": [
+                {
+                    "prepend_cmdline_params": [],
+                    "cmdline_params": ["/usr/bin/bash"],
+                    "use_double_quotes": [False, False],
+                    "wrap_cmdline_params": False,
+                    "stdin_name": "aiida.in",
+                    "stdout_name": "aiida.out",
+                    "stderr_name": None,
+                    "join_files": False
+                }
+            ],
+            "codes_run_mode": 0,
+            "import_sys_environment": True,
+            "job_environment": {},
+            "environment_variables_double_quotes": False,
+            "max_memory_kb": None,
+            'max_wallclock_seconds': 3600,
+        }
+
+        calc_node.base.repository.put_object_from_bytes(
+            json.dumps(calcinfo_dict, indent=4).encode(),
+            '.aiida/calcinfo.json'
+        )
+        calc_node.base.repository.put_object_from_bytes(
+            json.dumps(job_tmpl_dict, indent=4).encode(),
+            '.aiida/job_tmpl.json'
+        )
+
+        # Submit script
+        submit_script = textwrap.dedent("""\
+            #!/bin/bash
+            exec > _scheduler-stdout.txt
+            exec 2> _scheduler-stderr.txt
+
+            '/usr/bin/bash' < 'aiida.in' > 'aiida.out'
+        """)
+
+        calc_node.base.repository.put_object_from_bytes(submit_script.encode(), '_aiidasubmit.sh')
+
+        # Store CalcInfo in node attributes
+        calc_node.base.attributes.set('input_filename', 'aiida.in')
+        calc_node.base.attributes.set('output_filename', 'aiida.out')
+
+        # Add input links
+        calc_node.base.links.add_incoming(x_node, link_type=LinkType.INPUT_CALC, link_label='x')
+        calc_node.base.links.add_incoming(y_node, link_type=LinkType.INPUT_CALC, link_label='y')
+        calc_node.base.links.add_incoming(code_node, link_type=LinkType.INPUT_CALC, link_label='code')
+
+        # Must store CalcjobNode before I can add output files
+        calc_node.store()
+
+
+        # Create FolderData node for retrieved
+        retrieved_folder = FolderData()
+        output_content = f'{x+y}\n'.encode()
+        retrieved_folder.put_object_from_bytes(output_content, 'aiida.out')
+
+        scheduler_stdout = '\n'.encode()
+        scheduler_stderr = '\n'.encode()
+        retrieved_folder.base.repository.put_object_from_bytes(scheduler_stdout, '_scheduler-stdout.txt')
+        retrieved_folder.base.repository.put_object_from_bytes(scheduler_stderr, '_scheduler-stderr.txt')
+        retrieved_folder.store()
+
+        retrieved_folder.base.links.add_incoming(calc_node, link_type=LinkType.CREATE, link_label='retrieved')
+
+        # Create and link output node (sum)
+        output_node = Int(x+y)
+        output_node.store()
+        output_node.base.links.add_incoming(calc_node, link_type=LinkType.CREATE, link_label='sum')
+
+        # Set process properties
+        calc_node.set_process_state('finished')
+        calc_node.set_process_label('ArithmeticAddCalculation')
+        calc_node.set_process_type('aiida.calculations:core.arithmetic.add')
+        calc_node.set_exit_status(0)
+
+        return calc_node
+
+    return _construct_calculation_node_add
+
 @pytest.fixture
 def generate_workchain_multiply_add(aiida_localhost):
     def _generate_workchain_multiply_add():

From f452ab2b70f92c17af8c6f4e04d23f95853bb0ef Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 28 Jan 2025 16:28:18 +0000
Subject: [PATCH 14/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/conftest.py                 | 97 +++++++++++++------------------
 tests/tools/dumping/test_group.py |  1 -
 2 files changed, 41 insertions(+), 57 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 251b354462..2dfad75dfe 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -855,19 +855,16 @@ def _generate_calculation_node_add():
 @pytest.fixture(scope='class')
 def construct_calculation_node_add(tmp_path_factory):
     def _construct_calculation_node_add(x: int = 1, y: int = 2):
+        import json
         import textwrap
-        from aiida.orm import InstalledCode, Int, CalcJobNode, Computer, FolderData
+
         from aiida.common import LinkType
-        import json
-        from _pytest.tmpdir import tmp_path_factory
+        from aiida.orm import CalcJobNode, Computer, FolderData, InstalledCode, Int
 
         # Create a minimal computer
         # Not using any of the `aiida_localhost` or `aiida_computer_local` fixtures as they are function-scoped
         created, computer = Computer.collection.get_or_create(
-            label='mock_computer',
-            hostname='localhost',
-            transport_type='core.local',
-            scheduler_type='core.direct'
+            label='mock_computer', hostname='localhost', transport_type='core.local', scheduler_type='core.direct'
         )
         if created:
             computer.store()
@@ -891,66 +888,54 @@ def _construct_calculation_node_add(x: int = 1, y: int = 2):
 
         # .aiida folder content
         calcinfo_dict = {
-            "codes_info": [
-                {
-                    "stdin_name": "aiida.in",
-                    "stdout_name": "aiida.out",
-                    "code_uuid": code_node.uuid
-                }
-            ],
-            "retrieve_list": [
-                "aiida.out",
-                "_scheduler-stdout.txt",
-                "_scheduler-stderr.txt"
-            ],
-            "uuid": calc_node.uuid,
-            "file_copy_operation_order": [2, 0, 1]
+            'codes_info': [{'stdin_name': 'aiida.in', 'stdout_name': 'aiida.out', 'code_uuid': code_node.uuid}],
+            'retrieve_list': ['aiida.out', '_scheduler-stdout.txt', '_scheduler-stderr.txt'],
+            'uuid': calc_node.uuid,
+            'file_copy_operation_order': [2, 0, 1],
         }
 
         job_tmpl_dict = {
-            "submit_as_hold": False,
-            "rerunnable": False,
-            "job_name": "aiida-42",
-            "sched_output_path": "_scheduler-stdout.txt",
-            "shebang": "#!/bin/bash",
-            "sched_error_path": "_scheduler-stderr.txt",
-            "sched_join_files": False,
-            "prepend_text": "",
-            "append_text": "",
-            "job_resource": {
-                "num_machines": 1,
-                "num_mpiprocs_per_machine": 1,
-                "num_cores_per_machine": None,
-                "num_cores_per_mpiproc": None,
-                "tot_num_mpiprocs": 1
+            'submit_as_hold': False,
+            'rerunnable': False,
+            'job_name': 'aiida-42',
+            'sched_output_path': '_scheduler-stdout.txt',
+            'shebang': '#!/bin/bash',
+            'sched_error_path': '_scheduler-stderr.txt',
+            'sched_join_files': False,
+            'prepend_text': '',
+            'append_text': '',
+            'job_resource': {
+                'num_machines': 1,
+                'num_mpiprocs_per_machine': 1,
+                'num_cores_per_machine': None,
+                'num_cores_per_mpiproc': None,
+                'tot_num_mpiprocs': 1,
             },
-            "codes_info": [
+            'codes_info': [
                 {
-                    "prepend_cmdline_params": [],
-                    "cmdline_params": ["/usr/bin/bash"],
-                    "use_double_quotes": [False, False],
-                    "wrap_cmdline_params": False,
-                    "stdin_name": "aiida.in",
-                    "stdout_name": "aiida.out",
-                    "stderr_name": None,
-                    "join_files": False
+                    'prepend_cmdline_params': [],
+                    'cmdline_params': ['/usr/bin/bash'],
+                    'use_double_quotes': [False, False],
+                    'wrap_cmdline_params': False,
+                    'stdin_name': 'aiida.in',
+                    'stdout_name': 'aiida.out',
+                    'stderr_name': None,
+                    'join_files': False,
                 }
             ],
-            "codes_run_mode": 0,
-            "import_sys_environment": True,
-            "job_environment": {},
-            "environment_variables_double_quotes": False,
-            "max_memory_kb": None,
+            'codes_run_mode': 0,
+            'import_sys_environment': True,
+            'job_environment': {},
+            'environment_variables_double_quotes': False,
+            'max_memory_kb': None,
             'max_wallclock_seconds': 3600,
         }
 
         calc_node.base.repository.put_object_from_bytes(
-            json.dumps(calcinfo_dict, indent=4).encode(),
-            '.aiida/calcinfo.json'
+            json.dumps(calcinfo_dict, indent=4).encode(), '.aiida/calcinfo.json'
         )
         calc_node.base.repository.put_object_from_bytes(
-            json.dumps(job_tmpl_dict, indent=4).encode(),
-            '.aiida/job_tmpl.json'
+            json.dumps(job_tmpl_dict, indent=4).encode(), '.aiida/job_tmpl.json'
         )
 
         # Submit script
@@ -976,7 +961,6 @@ def _construct_calculation_node_add(x: int = 1, y: int = 2):
         # Must store CalcjobNode before I can add output files
         calc_node.store()
 
-
         # Create FolderData node for retrieved
         retrieved_folder = FolderData()
         output_content = f'{x+y}\n'.encode()
@@ -991,7 +975,7 @@ def _construct_calculation_node_add(x: int = 1, y: int = 2):
         retrieved_folder.base.links.add_incoming(calc_node, link_type=LinkType.CREATE, link_label='retrieved')
 
         # Create and link output node (sum)
-        output_node = Int(x+y)
+        output_node = Int(x + y)
         output_node.store()
         output_node.base.links.add_incoming(calc_node, link_type=LinkType.CREATE, link_label='sum')
 
@@ -1005,6 +989,7 @@ def _construct_calculation_node_add(x: int = 1, y: int = 2):
 
     return _construct_calculation_node_add
 
+
 @pytest.fixture
 def generate_workchain_multiply_add(aiida_localhost):
     def _generate_workchain_multiply_add():
diff --git a/tests/tools/dumping/test_group.py b/tests/tools/dumping/test_group.py
index fa07a879eb..ed13151f34 100644
--- a/tests/tools/dumping/test_group.py
+++ b/tests/tools/dumping/test_group.py
@@ -10,7 +10,6 @@
 
 # TODO: Test that de-duplication also works for calculations
 
-
 import pytest
 
 from aiida import orm

From abbfaff06c6a0f86aab1877817f799992f80c820 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Fri, 31 Jan 2025 15:56:10 +0100
Subject: [PATCH 15/27] First tests for node collection dumping

And back to `CollectionDumper`
---
 src/aiida/tools/dumping/__init__.py           |   4 +-
 .../tools/dumping/{group.py => collection.py} |  61 +++-----
 src/aiida/tools/dumping/profile.py            |  43 +++++-
 src/aiida/tools/dumping/utils.py              |  68 ++-------
 tests/tools/dumping/test_collection.py        | 138 ++++++++++++++++++
 tests/tools/dumping/test_group.py             |  74 ----------
 6 files changed, 208 insertions(+), 180 deletions(-)
 rename src/aiida/tools/dumping/{group.py => collection.py} (71%)
 create mode 100644 tests/tools/dumping/test_collection.py
 delete mode 100644 tests/tools/dumping/test_group.py

diff --git a/src/aiida/tools/dumping/__init__.py b/src/aiida/tools/dumping/__init__.py
index 48b73eee65..6bc7b9c2c0 100644
--- a/src/aiida/tools/dumping/__init__.py
+++ b/src/aiida/tools/dumping/__init__.py
@@ -9,10 +9,10 @@
 """Modules related to the dumping of AiiDA data."""
 
 from .base import BaseDumper
-from .group import GroupDumper
+from .collection import CollectionDumper
 from .process import ProcessDumper
 from .profile import ProfileDumper
 
 # from .collection import CollectionDumper
 
-__all__ = ('BaseDumper', 'GroupDumper', 'ProcessDumper', 'ProfileDumper')  # , 'CollectionDumper')
+__all__ = ('BaseDumper', 'CollectionDumper', 'ProcessDumper', 'ProfileDumper')  # , 'CollectionDumper')
diff --git a/src/aiida/tools/dumping/group.py b/src/aiida/tools/dumping/collection.py
similarity index 71%
rename from src/aiida/tools/dumping/group.py
rename to src/aiida/tools/dumping/collection.py
index 6350d413ae..89428b4e18 100644
--- a/src/aiida/tools/dumping/group.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -10,8 +10,8 @@
 
 from __future__ import annotations
 
-import itertools as it
 import os
+from functools import cached_property
 from pathlib import Path
 
 from aiida import orm
@@ -19,27 +19,31 @@
 from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
+from aiida.tools.dumping.utils import filter_by_last_dump_time
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
-class GroupDumper:
+class CollectionDumper:
     def __init__(
         self,
         base_dumper: BaseDumper | None = None,
         process_dumper: ProcessDumper | None = None,
         dump_logger: DumpLogger | None = None,
-        group: orm.Group | str | None = None,
+        collection: orm.Group | str | list[str] | None = None,
         deduplicate: bool = True,
         output_path: Path | str | None = None,
     ):
         self.deduplicate = deduplicate
 
-        # Allow passing of group via label
-        if isinstance(group, str):
-            group = orm.load_group(group)
+        # Pass the collection type. Could be Group or just list of nodes
+        if isinstance(collection, str):
+            try:
+                collection = orm.load_group(collection)
+            except:
+                raise
 
-        self.group = group
+        self.collection = collection
 
         self.base_dumper = base_dumper or BaseDumper()
         self.process_dumper = process_dumper or ProcessDumper()
@@ -49,44 +53,19 @@ def __init__(
 
         self.output_path = Path(output_path or self.base_dumper.dump_parent_path)
 
-        self.nodes = self._get_nodes()
-
-    def _should_dump_processes(self) -> bool:
-        return len([node for node in self.nodes if isinstance(node, orm.ProcessNode)]) > 0
+    @cached_property
+    def nodes(self):
+        return self._get_nodes()
 
     def _get_nodes(self):
-        # Get all nodes that are in the group
-        if self.group is not None:
-            nodes = list(self.group.nodes)
-
-        # Get all nodes that are _not_ in any group
-        else:
-            groups: list[orm.Group] = orm.QueryBuilder().append(orm.Group).all(flat=True)  # type: ignore[assignment]
-            nodes_in_groups = [node.uuid for group in groups for node in group.nodes]
-
-            # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
-            # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
-            # Get the called descendants of WorkflowNodes within the nodes_in_groups list
-            called_descendants_generator = (
-                orm.load_node(node).called_descendants
-                for node in nodes_in_groups
-                if isinstance(orm.load_node(node), orm.WorkflowNode)
-            )
-
-            # Flatten the list of called descendants
-            sub_nodes_in_groups = list(it.chain(*called_descendants_generator))
-
-            sub_nodes_in_groups = [node.uuid for node in sub_nodes_in_groups]
-            nodes_in_groups += sub_nodes_in_groups
-
-            profile_nodes = orm.QueryBuilder().append(orm.Node, project=['uuid']).all(flat=True)
-            nodes = [profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups]
-            nodes = [orm.load_node(node) for node in nodes]
+        if isinstance(self.collection, orm.Group):
+            nodes: list[str] = list(self.collection.nodes)
 
-        if self.base_dumper.last_dump_time is not None:
-            nodes = [node for node in nodes if node.mtime > self.base_dumper.last_dump_time]
+        return filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
 
-        return nodes
+    def _should_dump_processes(self, nodes: list[orm.Node] | None = None) -> bool:
+        test_nodes = nodes or self.nodes
+        return len([node for node in test_nodes if isinstance(node, orm.ProcessNode)]) > 0
 
     def _get_processes(self):
         nodes = self.nodes
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 2b2d5294c1..17b88ecefc 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -11,14 +11,17 @@
 
 from __future__ import annotations
 
+from typing import cast
+
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
 from aiida.manage import get_manager, load_profile
 from aiida.manage.configuration.profile import Profile
 from aiida.tools.dumping.base import BaseDumper
-from aiida.tools.dumping.group import GroupDumper
+from aiida.tools.dumping.collection import CollectionDumper
 from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
+from aiida.tools.dumping.utils import filter_by_last_dump_time
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
@@ -76,10 +79,12 @@ def _dump_processes_not_in_any_group(self):
         else:
             output_path = self.base_dumper.dump_parent_path
 
-        no_group_dumper = GroupDumper(
+        no_group_nodes = self._get_no_group_nodes()
+
+        no_group_dumper = CollectionDumper(
             base_dumper=self.base_dumper,
             process_dumper=self.process_dumper,
-            group=None,
+            collection=no_group_nodes,
             deduplicate=self.deduplicate,
             dump_logger=self.dump_logger,
             output_path=output_path,
@@ -99,11 +104,11 @@ def _dump_processes_per_group(self, groups):
             else:
                 output_path = self.base_dumper.dump_parent_path
 
-            group_dumper = GroupDumper(
+            group_dumper = CollectionDumper(
                 base_dumper=self.base_dumper,
                 process_dumper=self.process_dumper,
                 dump_logger=self.dump_logger,
-                group=group,
+                collection=group,
                 deduplicate=self.deduplicate,
                 output_path=output_path,
             )
@@ -112,3 +117,31 @@ def _dump_processes_per_group(self, groups):
                 logger.report(f'Dumping processes in group {group.label} for profile `{self.profile.name}`...')
 
                 group_dumper.dump()
+
+    def _get_no_group_nodes(self) -> list[str]:
+        # Get all nodes that are _not_ in any group
+        group_qb = orm.QueryBuilder().append(orm.Group)
+        profile_groups = cast(list[orm.Group], group_qb.all(flat=True))
+        node_qb = orm.QueryBuilder().append(orm.Node, project=['uuid'])
+        profile_nodes = cast(list[str], node_qb.all(flat=True))
+
+        nodes_in_groups: list[str] = [node.uuid for group in profile_groups for node in group.nodes]
+
+        # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
+        # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
+        # Get the called descendants of WorkflowNodes within the nodes_in_groups list
+
+        sub_nodes_in_groups: list[str] = [
+            node.uuid
+            for n in nodes_in_groups
+            if isinstance((workflow_node := orm.load_node(n)), orm.WorkflowNode)
+            for node in workflow_node.called_descendants
+        ]
+
+        # sub_nodes_in_groups: list[str] = [node.uuid for node in sub_nodes_in_groups]
+        nodes_in_groups += sub_nodes_in_groups
+
+        nodes: list[str] = [profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups]
+        nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
+
+        return nodes
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index 438c8a7c6b..0758e81770 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -11,8 +11,10 @@
 from __future__ import annotations
 
 import shutil
+from datetime import datetime
 from pathlib import Path
 
+from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
 
 __all__ = ['prepare_dump_path']
@@ -80,64 +82,6 @@ def prepare_dump_path(
     (path_to_validate / safeguard_file).touch()
 
 
-# @staticmethod
-# def dumper_pretty_print(dumper_instance, include_private_and_dunder: bool = False):
-#     console = Console()
-#     table = Table(title=f'Attributes and Methods of {dumper_instance.__class__.__name__}')
-
-#     # Adding columns to the table
-#     table.add_column('Name', justify='left')
-#     table.add_column('Type', justify='left')
-#     table.add_column('Value', justify='left')
-
-#     # Lists to store attributes and methods
-#     entries = []
-
-#     # Iterate over the class attributes and methods
-#     for attr_name in dir(dumper_instance):
-#         # Exclude private attributes and dunder methods
-#         attr_value = getattr(dumper_instance, attr_name)
-#         entry_type = 'Attribute' if not callable(attr_value) else 'Method'
-
-#         if attr_name.startswith('_'):
-#             if include_private_and_dunder:
-#                 entries.append((attr_name, entry_type, str(attr_value)))
-#             else:
-#                 pass
-#         else:
-#             entries.append((attr_name, entry_type, str(attr_value)))
-
-#     # Sort entries: attributes first, then methods
-#     entries.sort(key=lambda x: (x[1] == 'Method', x[0]))
-
-#     # Add sorted entries to the table
-#     for name, entry_type, value in entries:
-#         table.add_row(name, entry_type, value)
-
-#     # Print the formatted table
-#     console.print(table)
-
-
-# def check_storage_size_user():
-#     from aiida.manage.manager import get_manager
-
-#     manager = get_manager()
-#     storage = manager.get_profile_storage()
-
-#     data = storage.get_info(detailed=True)
-#     repository_data = data['repository']['Size (MB)']
-#     total_size_gb = sum(repository_data.values()) / 1024
-#     if total_size_gb > 10:
-#         user_input = (
-#             input('Repository size larger than 10gb. Do you still want to dump the profile data? (y/N): ')
-#             .strip()
-#             .lower()
-#         )
-
-#         if user_input != 'y':
-#             sys.exit()
-
-
 def sanitize_file_extension(filename: str | Path):
     if isinstance(filename, Path):
         filename = str(filename)
@@ -147,3 +91,11 @@ def sanitize_file_extension(filename: str | Path):
         filename = filename.replace('.mpl_png', '.png')
 
     return Path(filename)
+
+
+def filter_by_last_dump_time(nodes: list[str], last_dump_time: datetime | None = None) -> list[str]:
+    if last_dump_time is not None:
+        orm_nodes = [orm.load_node(node) for node in nodes]
+        return [node.uuid for node in orm_nodes if node.mtime > last_dump_time]
+    else:
+        return nodes
diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
new file mode 100644
index 0000000000..50a2f357ef
--- /dev/null
+++ b/tests/tools/dumping/test_collection.py
@@ -0,0 +1,138 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+"""Tests for the dumping of group data to disk."""
+
+# TODO: Test that de-duplication also works for calculations
+# TODO: Test incremental dumping
+
+import pytest
+
+from aiida import orm
+from aiida.tools.dumping import BaseDumper, CollectionDumper, ProcessDumper
+
+# Fixture that depends on generate_calculation_node_add_class
+# @pytest.fixture(scope="class")
+# def setup_calculation_node_add_class(generate_calculation_node_add_class):
+#     # This will make sure the fixture runs and is available for setup_class
+#     generate_calculation_node_add_class()  # You can also do any additional setup here
+
+
+@pytest.fixture()
+def setup_no_process_group():
+    no_process_group, _ = orm.Group.collection.get_or_create(label='no-process')
+    if no_process_group.is_empty:
+        int_node = orm.Int(1).store()
+        no_process_group.add_nodes([int_node])
+    return no_process_group
+
+
+@pytest.fixture()
+def setup_add_group(generate_calculation_node_add):
+    add_group, _ = orm.Group.collection.get_or_create(label='add')
+    if add_group.is_empty:
+        add_node = generate_calculation_node_add()
+        add_group.add_nodes([add_node])
+    return add_group
+
+
+@pytest.fixture()
+def setup_multiply_add_group(generate_workchain_multiply_add):
+    multiply_add_group, _ = orm.Group.collection.get_or_create(label='multiply-add')
+    if multiply_add_group.is_empty:
+        multiply_add_node = generate_workchain_multiply_add()
+        multiply_add_group.add_nodes([multiply_add_node])
+    return multiply_add_group
+
+
+@pytest.fixture()
+def multiply_process_groups(): ...
+
+
+@pytest.mark.usefixtures('aiida_profile_clean_class')
+class TestCollectionDumper:
+    def test_should_dump_processes(self, setup_no_process_group, setup_add_group):
+        """"""
+        no_process_group: orm.Group = setup_no_process_group
+        add_group: orm.Group = setup_add_group
+
+        base_dumper = BaseDumper()
+        process_dumper = ProcessDumper()
+
+        group_dumper = CollectionDumper(base_dumper=base_dumper, process_dumper=process_dumper, collection=no_process_group)
+
+        assert group_dumper._should_dump_processes() is False
+
+        group_dumper = CollectionDumper(base_dumper=base_dumper, process_dumper=process_dumper, collection=add_group)
+
+        assert group_dumper._should_dump_processes() is True
+
+    # def test_get_nodes(self):
+    #     pass
+
+    # def test_get_processes(self):
+    #     pass
+
+    # def test_dump_processes(self):
+    #     pass
+
+    # def test_dump_calculations(self):
+    #     pass
+
+    # def test_dump_workflows(self):
+    #     pass
+
+    # def test_dump(self):
+    #     pass
+
+
+#######3
+
+# def test_setup_profile(
+#     self,
+#     generate_calculation_node_add,
+#     generate_workchain_multiply_add,
+#     generate_calculation_node_io,
+#     generate_workchain_node_io,
+# ):
+#     # TODO: This is a hack... and not actually a real test
+#     # TODO: I'm using the `aiida_profile_clean_class` fiture to make sure I have a clean profile for this class
+#     # TODO: However, this method is not an actual test, but sets up the profile data how I want it for testing
+#     # TODO: Ideally, I'd create a class-scoped fixture that does the setup
+#     # TODO: Or define a `setup_class` method
+#     # TODO: However, as most of AiiDA's fixtures are function-scoped, I didn't manage to get any of these approaches
+#     # TODO: To work, due to pytest's ScopeMismatch exceptions
+
+# # Create nodes for profile storage
+# ## Not in any group
+# int_node = orm.Int(1).store()
+# _ = generate_calculation_node_add()
+# _ = generate_workchain_multiply_add()
+# ## For putting into groups
+# add_node = generate_calculation_node_add()
+# multiply_add_node = generate_workchain_multiply_add()
+
+# # Create the various groups
+# add_group, _ = orm.Group.collection.get_or_create(label='add')
+# multiply_add_group, _ = orm.Group.collection.get_or_create(label='multiply-add')
+# cj_dupl_group, _ = orm.Group.collection.get_or_create(label='cj-dupl')
+# wc_dupl_group, _ = orm.Group.collection.get_or_create(label='wc-dupl')
+# no_process_group, _ = orm.Group.collection.get_or_create(label='no-process')
+
+# # Populate groups
+# add_group.add_nodes([add_node])
+# multiply_add_group.add_nodes([multiply_add_node])
+# cj_dupl_group.add_nodes([add_node])
+# wc_dupl_group.add_nodes([multiply_add_node])
+# no_process_group.add_nodes([int_node])
+
+# self.add_group = add_group
+# self.multiply_add_group = multiply_add_group
+# self.cj_dupl_group = cj_dupl_group
+# self.wc_dupl_group = wc_dupl_group
+# self.no_process_group = no_process_group
diff --git a/tests/tools/dumping/test_group.py b/tests/tools/dumping/test_group.py
deleted file mode 100644
index ed13151f34..0000000000
--- a/tests/tools/dumping/test_group.py
+++ /dev/null
@@ -1,74 +0,0 @@
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved.                     #
-# This file is part of the AiiDA code.                                    #
-#                                                                         #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file        #
-# For further information please visit http://www.aiida.net               #
-###########################################################################
-"""Tests for the dumping of group data to disk."""
-
-# TODO: Test that de-duplication also works for calculations
-
-import pytest
-
-from aiida import orm
-
-
-@pytest.mark.usefixtures('aiida_profile_clean')
-@pytest.fixture(scope='session', autouse=True)
-def setup_profile_groups(generate_calculation_node_add, generate_workchain_multiply_add):
-    # Create nodes for profile storage
-    int_node = orm.Int(1).store()
-    _ = generate_calculation_node_add()
-    _ = generate_workchain_multiply_add()
-    cj_node = generate_calculation_node_add()
-    wc_node = generate_workchain_multiply_add()
-
-    # Create the various groups
-    add_group = orm.Group.collection.get_or_create(label='add')[0]
-    multiply_add_group = orm.Group.collection.get_or_create(label='multiply-add')[0]
-    cj_dupl_group = orm.Group.collection.get_or_create(label='cj-dupl')[0]
-    wc_dupl_group = orm.Group.collection.get_or_create(label='wc-dupl')[0]
-    no_process_group = orm.Group.collection.get_or_create(label='add')[0]
-
-    # Populate groups
-    add_group.add_nodes([cj_node])
-    multiply_add_group.add_nodes([wc_node])
-    cj_dupl_group.add_nodes([cj_node])
-    wc_dupl_group.add_nodes([wc_node])
-    no_process_group.add_nodes([int_node])
-
-    # Not sure if this is actually needed?
-    return {
-        'add_group': add_group,
-        'multiply_add_group': multiply_add_group,
-        'cj_dupl_group': cj_dupl_group,
-        'wc_dupl_group': wc_dupl_group,
-        'no_process_group': no_process_group,
-    }
-
-
-class TestGroupDumper:
-    def test_should_dump_processes(self):
-        print(orm.QueryBuilder().append(orm.Group).all(flat=True))
-        assert False
-        # pass
-
-    def test_get_nodes(self):
-        pass
-
-    def test_get_processes(self):
-        pass
-
-    def test_dump_processes(self):
-        pass
-
-    def test_dump_calculations(self):
-        pass
-
-    def test_dump_workflows(self):
-        pass
-
-    def test_dump(self):
-        pass

From e09e0789fdf55af383d1ac871b5c2271ae2a8a32 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 31 Jan 2025 14:56:57 +0000
Subject: [PATCH 16/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/tools/dumping/test_collection.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
index 50a2f357ef..36a570ee62 100644
--- a/tests/tools/dumping/test_collection.py
+++ b/tests/tools/dumping/test_collection.py
@@ -64,7 +64,9 @@ def test_should_dump_processes(self, setup_no_process_group, setup_add_group):
         base_dumper = BaseDumper()
         process_dumper = ProcessDumper()
 
-        group_dumper = CollectionDumper(base_dumper=base_dumper, process_dumper=process_dumper, collection=no_process_group)
+        group_dumper = CollectionDumper(
+            base_dumper=base_dumper, process_dumper=process_dumper, collection=no_process_group
+        )
 
         assert group_dumper._should_dump_processes() is False
 

From 48acce7a66b9834b9f416dc89f412c563a4760b0 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@gmx.net>
Date: Wed, 5 Feb 2025 13:20:35 +0100
Subject: [PATCH 17/27] Improve logging and add dry-run feature.

---
 src/aiida/cmdline/commands/cmd_profile.py | 36 ++++++---
 src/aiida/tools/dumping/collection.py     | 55 ++++++++-----
 src/aiida/tools/dumping/logger.py         | 98 ++++++++++++++++++++---
 src/aiida/tools/dumping/profile.py        | 12 ++-
 src/aiida/tools/dumping/utils.py          |  5 +-
 5 files changed, 161 insertions(+), 45 deletions(-)

diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 4f6fc99b60..a69acbba3c 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -20,6 +20,7 @@
 from aiida.common import exceptions
 from aiida.manage.configuration import Profile, create_profile, get_config
 from aiida.tools.dumping import ProcessDumper, ProfileDumper
+from aiida.tools.dumping.logger import DumpLogger
 
 
 @verdi.group('profile')
@@ -306,6 +307,7 @@ def profile_mirror(
 ):
     """Dump all data in an AiiDA profile's storage to disk."""
 
+    import json
     from datetime import datetime
     from pathlib import Path
 
@@ -319,17 +321,6 @@ def profile_mirror(
     if path is None:
         path = Path.cwd() / f'{profile.name}-mirror'
 
-    # TODO: Implement proper dry-run feature
-    dry_run_message = f"Dry run for dumping of profile `{profile.name}`'s data at path: `{path}`.\n"
-    dry_run_message += 'Only directories will be created.'
-
-    if dry_run:
-        echo.echo_report(dry_run_message)
-        return
-
-    else:
-        echo.echo_report(f"Dumping of profile `{profile.name}`'s data at path: `{path}`.")
-
     SAFEGUARD_FILE: str = '.verdi_profile_mirror'  # noqa: N806
     safeguard_file_path: Path = path / SAFEGUARD_FILE
 
@@ -349,6 +340,24 @@ def profile_mirror(
     except IndexError:
         last_dump_time = None
 
+    if dry_run:
+        node_counts = ProfileDumper._get_number_of_nodes_to_dump(last_dump_time)
+        node_counts_str = ' & '.join(f'{count} {node_type}' for node_type, count in node_counts.items())
+        dry_run_message = f'Dry run for mirroring of profile `{profile.name}`: {node_counts_str} to dump.\n'
+        echo.echo_report(dry_run_message)
+        return
+
+    echo.echo_report(f"Dumping of profile `{profile.name}`'s data at path: `{path}`.")
+
+    if incremental:
+        msg = 'Incremental dumping selected. Will update directory.'
+        echo.echo_report(msg)
+
+    try:
+        dump_logger = DumpLogger.from_file(dump_parent_path=path)
+    except (json.JSONDecodeError, OSError):
+        dump_logger = DumpLogger(dump_parent_path=path)
+
     base_dumper = BaseDumper(
         dump_parent_path=path,
         overwrite=overwrite,
@@ -368,6 +377,7 @@ def profile_mirror(
     profile_dumper = ProfileDumper(
         base_dumper=base_dumper,
         process_dumper=process_dumper,
+        dump_logger=dump_logger,
         groups=groups,
         organize_by_groups=organize_by_groups,
         deduplicate=deduplicate,
@@ -381,3 +391,7 @@ def profile_mirror(
     last_dump_time = datetime.now().astimezone()
     with safeguard_file_path.open('a') as fhandle:
         fhandle.write(f'Last profile mirror time: {last_dump_time.isoformat()}\n')
+
+    dump_logger.save_log()
+
+    echo.echo_report(f'Dumped {dump_logger.counter} new nodes.')
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index 89428b4e18..1852bc2245 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -11,13 +11,14 @@
 from __future__ import annotations
 
 import os
+from datetime import datetime
 from functools import cached_property
 from pathlib import Path
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
 from aiida.tools.dumping.base import BaseDumper
-from aiida.tools.dumping.logger import DumpLogger
+from aiida.tools.dumping.logger import DumpLog, DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.utils import filter_by_last_dump_time
 
@@ -47,28 +48,38 @@ def __init__(
 
         self.base_dumper = base_dumper or BaseDumper()
         self.process_dumper = process_dumper or ProcessDumper()
-        self.dump_logger = dump_logger or DumpLogger()
+        self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dumper.dump_parent_path)
 
         # Properly set the `output_path` attribute
 
         self.output_path = Path(output_path or self.base_dumper.dump_parent_path)
 
     @cached_property
-    def nodes(self):
+    def nodes(self) -> list[str]:
         return self._get_nodes()
 
-    def _get_nodes(self):
+    def _get_nodes(self) -> list[str]:
+        nodes: list[str] | None = None
         if isinstance(self.collection, orm.Group):
-            nodes: list[str] = list(self.collection.nodes)
+            nodes = [n.uuid for n in list(self.collection.nodes)]
+        elif isinstance(self.collection, list) and len(self.collection) > 0:
+            if all(isinstance(n, str) for n in self.collection):
+                nodes = self.collection
+            else:
+                msg = 'A collection of nodes must be passed via their UUIDs.'
+                raise TypeError(msg)
+        else:
+            nodes = []
 
-        return filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
+        filtered_nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
+        return filtered_nodes
 
-    def _should_dump_processes(self, nodes: list[orm.Node] | None = None) -> bool:
+    def _should_dump_processes(self, nodes: list[str] | None = None) -> bool:
         test_nodes = nodes or self.nodes
-        return len([node for node in test_nodes if isinstance(node, orm.ProcessNode)]) > 0
+        return len([node for node in test_nodes if isinstance(orm.load_node(node), orm.ProcessNode)]) > 0
 
     def _get_processes(self):
-        nodes = self.nodes
+        nodes = [orm.load_node(n) for n in self.nodes]
         workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
 
         # Make sure that only top-level workflows are dumped in their own directories when de-duplcation is enabled
@@ -99,6 +110,8 @@ def _dump_processes(self):
         self._dump_workflows()
 
     def _dump_calculations(self):
+        if len(self.calculations) == 0:
+            return
         calculations_path = self.output_path / 'calculations'
         dumped_calculations = {}
 
@@ -106,19 +119,22 @@ def _dump_calculations(self):
             calculation_dumper = self.process_dumper
 
             calculation_dump_path = calculations_path / calculation_dumper._generate_default_dump_path(
-                process_node=calculation, prefix=''
+                process_node=calculation, prefix=None
             )
 
             if calculation.caller is None:
-                # or (calculation.caller is not None and not self.deduplicate):
                 calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
 
-                dumped_calculations[calculation.uuid] = calculation_dump_path
+                dumped_calculations[calculation.uuid] = DumpLog(
+                    path=calculation_dump_path,
+                    time=datetime.now().astimezone(),
+                )
 
         self.dump_logger.update_calculations(dumped_calculations)
 
-    def _dump_workflows(self):
-        # workflow_nodes = get_nodes_from_db(aiida_node_type=orm.WorkflowNode, with_group=self.group, flat=True)
+    def _dump_workflows(self) -> None:
+        if len(self.workflows) == 0:
+            return
         workflow_path = self.output_path / 'workflows'
         workflow_path.mkdir(exist_ok=True, parents=True)
         dumped_workflows = {}
@@ -130,22 +146,23 @@ def _dump_workflows(self):
                 process_node=workflow, prefix=None
             )
 
-            logged_workflows = self.dump_logger.get_logs()['workflows']
+            logged_workflows = self.dump_logger.get_log()['workflows']
 
             if self.deduplicate and workflow.uuid in logged_workflows.keys():
                 os.symlink(
-                    src=logged_workflows[workflow.uuid],
+                    src=logged_workflows[workflow.uuid].path,
                     dst=workflow_dump_path,
                 )
             else:
                 workflow_dumper._dump_workflow(
                     workflow_node=workflow,
                     output_path=workflow_dump_path,
-                    # link_calculations=not self.deduplicate,
-                    # link_calculations_dir=self.output_path / 'calculations',
                 )
 
-                dumped_workflows[workflow.uuid] = workflow_dump_path
+                dumped_workflows[workflow.uuid] = DumpLog(
+                    path=workflow_dump_path,
+                    time=datetime.now().astimezone(),
+                )
 
         self.dump_logger.update_workflows(dumped_workflows)
 
diff --git a/src/aiida/tools/dumping/logger.py b/src/aiida/tools/dumping/logger.py
index eecf611911..34fd2170e8 100644
--- a/src/aiida/tools/dumping/logger.py
+++ b/src/aiida/tools/dumping/logger.py
@@ -1,18 +1,96 @@
+import json
+from dataclasses import dataclass
+from datetime import datetime
 from pathlib import Path
+from typing import TypeAlias
+
+
+@dataclass
+class DumpLog:
+    """Represents a single dump log entry."""
+
+    path: Path
+    time: datetime
+
+
+DumpDict: TypeAlias = dict[str, DumpLog]
 
 
 class DumpLogger:
-    def __init__(self):
-        self.log_dict: dict[str, dict[str, Path]] = {'calculations': {}, 'workflows': {}}
+    """Main logger class using dataclasses for better structure."""
 
-    def update_calculations(self, new_calculations: dict[str, Path]):
-        """Update the log with new calculations."""
-        self.log_dict['calculations'].update(new_calculations)
+    DUMP_FILE: str = '.dump_log.json'
 
-    def update_workflows(self, new_workflows: dict[str, Path]):
-        """Update the log with new workflows."""
-        self.log_dict['workflows'].update(new_workflows)
+    def __init__(
+        self,
+        dump_parent_path: Path,
+        calculations: DumpDict | None = None,
+        workflows: DumpDict | None = None,
+        counter: int = 0,
+    ) -> None:
+        self.dump_parent_path = dump_parent_path
+        self.calculations = calculations or {}
+        self.workflows = workflows or {}
+        self.counter = 0
 
-    def get_logs(self):
+    @property
+    def dump_file(self) -> Path:
+        """Get the path to the dump file."""
+        return self.dump_parent_path / self.DUMP_FILE
+
+    def update_calculations(self, new_calculations: DumpDict) -> None:
+        """Update the calculations log."""
+        self.calculations.update(new_calculations)
+        self.counter += len(new_calculations)
+
+    def update_workflows(self, new_workflows: DumpDict) -> None:
+        """Update the workflows log."""
+        self.workflows.update(new_workflows)
+        self.counter += len(new_workflows)
+
+    def get_log(self) -> dict[str, DumpDict]:
         """Retrieve the current state of the log."""
-        return self.log_dict
+        return {'calculations': self.calculations, 'workflows': self.workflows}
+
+    def save_log(self) -> None:
+        """Save the log to a JSON file."""
+
+        def serialize_logs(logs: DumpDict) -> dict:
+            serialized = {}
+            for uuid, entry in logs.items():
+                serialized[uuid] = {'path': str(entry.path), 'time': entry.time.isoformat()}
+            return serialized
+
+        log_dict = {
+            'calculations': serialize_logs(self.calculations),
+            'workflows': serialize_logs(self.workflows),
+        }
+
+        with self.dump_file.open('w', encoding='utf-8') as f:
+            json.dump(log_dict, f, indent=4)
+
+    @classmethod
+    def from_file(cls, dump_parent_path: Path) -> 'DumpLogger':
+        """Alternative constructor to load from an existing JSON file."""
+        instance = cls(dump_parent_path=dump_parent_path)
+
+        if not instance.dump_file.exists():
+            return instance
+
+        try:
+            with instance.dump_file.open('r', encoding='utf-8') as f:
+                data = json.load(f)
+
+            def deserialize_logs(category_data: dict) -> DumpDict:
+                deserialized = {}
+                for uuid, entry in category_data.items():
+                    deserialized[uuid] = DumpLog(path=Path(entry['path']), time=datetime.fromisoformat(entry['time']))
+                return deserialized
+
+            instance.calculations = deserialize_logs(data['calculations'])
+            instance.workflows = deserialize_logs(data['workflows'])
+
+        except (json.JSONDecodeError, OSError):
+            raise
+
+        return instance
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 17b88ecefc..a591d50269 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -45,7 +45,7 @@ def __init__(
 
         self.base_dumper = base_dumper or BaseDumper()
         self.process_dumper = process_dumper or ProcessDumper()
-        self.dump_logger = dump_logger or DumpLogger()
+        self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dumper.dump_parent_path)
 
         # Load the profile
         if isinstance(profile, str):
@@ -145,3 +145,13 @@ def _get_no_group_nodes(self) -> list[str]:
         nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
 
         return nodes
+
+    @staticmethod
+    def _get_number_of_nodes_to_dump(last_dump_time) -> dict[str, int]:
+        result = {}
+        for node_type in (orm.CalculationNode, orm.WorkflowNode):
+            qb = orm.QueryBuilder().append(node_type, project=['uuid'])
+            nodes = cast(list[str], qb.all(flat=True))
+            nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=last_dump_time)
+            result[node_type.class_node_type.split('.')[-2] + 's'] = len(nodes)
+        return result
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index 0758e81770..fc6813e676 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -54,10 +54,7 @@ def prepare_dump_path(
 
         # Case 1: Non-empty directory and overwrite is False
         if not is_empty and not overwrite:
-            if incremental:
-                msg = f'Incremental dumping selected. Will update directory `{path_to_validate}` with new data.'
-                logger.report(msg)
-            else:
+            if not incremental:
                 msg = f'Path `{path_to_validate}` already exists, and neither overwrite nor incremental is enabled.'
                 raise FileExistsError(msg)
 

From b2aba2f0d554c8547f0bda50cf53e98b60db79e7 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@gmx.net>
Date: Thu, 6 Feb 2025 10:20:12 +0100
Subject: [PATCH 18/27] BaseDumper dataclass. get_processes return dict. Extend
 tests.

---
 src/aiida/cmdline/commands/cmd_profile.py |  13 +--
 src/aiida/tools/dumping/base.py           |  21 ++--
 src/aiida/tools/dumping/collection.py     |  67 ++++++------
 src/aiida/tools/dumping/logger.py         |   4 +-
 src/aiida/tools/dumping/utils.py          |   2 +-
 tests/tools/dumping/test_collection.py    | 120 ++++++++++++++++++----
 tests/tools/dumping/test_process.py       |  63 ------------
 tests/tools/dumping/test_utils.py         |  78 ++++++++++++++
 8 files changed, 233 insertions(+), 135 deletions(-)
 create mode 100644 tests/tools/dumping/test_utils.py

diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index a69acbba3c..78c7e62686 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -19,8 +19,6 @@
 from aiida.cmdline.utils import defaults, echo
 from aiida.common import exceptions
 from aiida.manage.configuration import Profile, create_profile, get_config
-from aiida.tools.dumping import ProcessDumper, ProfileDumper
-from aiida.tools.dumping.logger import DumpLogger
 
 
 @verdi.group('profile')
@@ -311,7 +309,9 @@ def profile_mirror(
     from datetime import datetime
     from pathlib import Path
 
+    from aiida.tools.dumping import ProcessDumper, ProfileDumper
     from aiida.tools.dumping.base import BaseDumper
+    from aiida.tools.dumping.logger import DumpLogger
     from aiida.tools.dumping.utils import prepare_dump_path
 
     profile = ctx.obj['profile']
@@ -321,6 +321,8 @@ def profile_mirror(
     if path is None:
         path = Path.cwd() / f'{profile.name}-mirror'
 
+    echo.echo_report(f'Mirroring data of profile `{profile.name}`at path: `{path}`.')
+
     SAFEGUARD_FILE: str = '.verdi_profile_mirror'  # noqa: N806
     safeguard_file_path: Path = path / SAFEGUARD_FILE
 
@@ -347,10 +349,8 @@ def profile_mirror(
         echo.echo_report(dry_run_message)
         return
 
-    echo.echo_report(f"Dumping of profile `{profile.name}`'s data at path: `{path}`.")
-
     if incremental:
-        msg = 'Incremental dumping selected. Will update directory.'
+        msg = 'Incremental mirroring selected. Will update directory.'
         echo.echo_report(msg)
 
     try:
@@ -392,6 +392,7 @@ def profile_mirror(
     with safeguard_file_path.open('a') as fhandle:
         fhandle.write(f'Last profile mirror time: {last_dump_time.isoformat()}\n')
 
+    # Write the logging json file to disk
     dump_logger.save_log()
 
-    echo.echo_report(f'Dumped {dump_logger.counter} new nodes.')
+    echo.echo_success(f'Dumped {dump_logger.counter} new nodes.')
diff --git a/src/aiida/tools/dumping/base.py b/src/aiida/tools/dumping/base.py
index a2e2c379e8..bbe63c9301 100644
--- a/src/aiida/tools/dumping/base.py
+++ b/src/aiida/tools/dumping/base.py
@@ -7,19 +7,18 @@
 # For further information please visit http://www.aiida.net               #
 ###########################################################################
 
+from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
 
 
+@dataclass
 class BaseDumper:
-    def __init__(
-        self,
-        dump_parent_path: Path | None = None,
-        overwrite: bool = False,
-        incremental: bool = True,
-        last_dump_time: datetime | None = None,
-    ):
-        self.dump_parent_path = dump_parent_path or Path.cwd()
-        self.overwrite = overwrite
-        self.incremental = incremental
-        self.last_dump_time = last_dump_time
+    dump_parent_path: Path | None = None
+    overwrite: bool = False
+    incremental: bool = True
+    last_dump_time: datetime | None = None
+
+    def __post_init__(self):
+        if self.dump_parent_path is None:
+            self.dump_parent_path = Path.cwd()
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index 1852bc2245..8ea026382c 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -14,6 +14,7 @@
 from datetime import datetime
 from functools import cached_property
 from pathlib import Path
+from typing import TYPE_CHECKING, TypeVar, cast
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
@@ -22,6 +23,12 @@
 from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.utils import filter_by_last_dump_time
 
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+T = TypeVar('T', bound='orm.ProcessNode')
+
+
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
@@ -33,11 +40,11 @@ def __init__(
         dump_logger: DumpLogger | None = None,
         collection: orm.Group | str | list[str] | None = None,
         deduplicate: bool = True,
-        output_path: Path | str | None = None,
+        output_path: Path | None = None,
     ):
         self.deduplicate = deduplicate
 
-        # Pass the collection type. Could be Group or just list of nodes
+        # Collection could be a Group or a list of nodes
         if isinstance(collection, str):
             try:
                 collection = orm.load_group(collection)
@@ -51,8 +58,10 @@ def __init__(
         self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dumper.dump_parent_path)
 
         # Properly set the `output_path` attribute
-
-        self.output_path = Path(output_path or self.base_dumper.dump_parent_path)
+        if output_path is not None:
+            self.output_path = output_path
+        else:
+            self.output_path = Path.cwd()
 
     @cached_property
     def nodes(self) -> list[str]:
@@ -61,7 +70,7 @@ def nodes(self) -> list[str]:
     def _get_nodes(self) -> list[str]:
         nodes: list[str] | None = None
         if isinstance(self.collection, orm.Group):
-            nodes = [n.uuid for n in list(self.collection.nodes)]
+            nodes = [n.uuid for n in self.collection.nodes]
         elif isinstance(self.collection, list) and len(self.collection) > 0:
             if all(isinstance(n, str) for n in self.collection):
                 nodes = self.collection
@@ -78,7 +87,7 @@ def _should_dump_processes(self, nodes: list[str] | None = None) -> bool:
         test_nodes = nodes or self.nodes
         return len([node for node in test_nodes if isinstance(orm.load_node(node), orm.ProcessNode)]) > 0
 
-    def _get_processes(self):
+    def _get_processes(self) -> dict[str, Sequence[orm.ProcessNode]]:
         nodes = [orm.load_node(n) for n in self.nodes]
         workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
 
@@ -94,28 +103,20 @@ def _get_processes(self):
                 node for node in workflow.called_descendants if isinstance(node, orm.CalculationNode)
             ]
 
-        calculations = set([node for node in nodes if isinstance(node, orm.CalculationNode)] + called_calculations)
-
-        self.calculations = calculations
-        self.workflows = workflows
+        calculations = [node for node in nodes if isinstance(node, orm.CalculationNode)] + called_calculations
+        return {
+            'calculations': cast(Sequence[orm.ProcessNode], calculations),
+            'workflows': cast(Sequence[orm.ProcessNode], workflows),
+        }
+        # return {'calculations': calculations, 'workflows': workflows}
 
-    def _dump_processes(self):
-        self._get_processes()
-
-        if len(self.workflows) + len(self.calculations) == 0:
-            logger.report('No workflows or calculations to dump in group.')
-            return
-
-        self._dump_calculations()
-        self._dump_workflows()
-
-    def _dump_calculations(self):
-        if len(self.calculations) == 0:
+    def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> None:
+        if len(calculations) == 0:
             return
         calculations_path = self.output_path / 'calculations'
         dumped_calculations = {}
 
-        for calculation in self.calculations:
+        for calculation in calculations:
             calculation_dumper = self.process_dumper
 
             calculation_dump_path = calculations_path / calculation_dumper._generate_default_dump_path(
@@ -123,7 +124,9 @@ def _dump_calculations(self):
             )
 
             if calculation.caller is None:
-                calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
+                calculation_dumper._dump_calculation(
+                    calculation_node=cast(orm.CalculationNode, calculation), output_path=calculation_dump_path
+                )
 
                 dumped_calculations[calculation.uuid] = DumpLog(
                     path=calculation_dump_path,
@@ -132,14 +135,14 @@ def _dump_calculations(self):
 
         self.dump_logger.update_calculations(dumped_calculations)
 
-    def _dump_workflows(self) -> None:
-        if len(self.workflows) == 0:
+    def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
+        if len(workflows) == 0:
             return
         workflow_path = self.output_path / 'workflows'
         workflow_path.mkdir(exist_ok=True, parents=True)
         dumped_workflows = {}
 
-        for workflow in self.workflows:
+        for workflow in workflows:
             workflow_dumper = self.process_dumper
 
             workflow_dump_path = workflow_path / workflow_dumper._generate_default_dump_path(
@@ -148,14 +151,14 @@ def _dump_workflows(self) -> None:
 
             logged_workflows = self.dump_logger.get_log()['workflows']
 
-            if self.deduplicate and workflow.uuid in logged_workflows.keys():
+            if self.deduplicate and workflow in logged_workflows.keys():
                 os.symlink(
                     src=logged_workflows[workflow.uuid].path,
                     dst=workflow_dump_path,
                 )
             else:
                 workflow_dumper._dump_workflow(
-                    workflow_node=workflow,
+                    workflow_node=cast(orm.WorkflowNode, workflow),
                     output_path=workflow_dump_path,
                 )
 
@@ -166,6 +169,8 @@ def _dump_workflows(self) -> None:
 
         self.dump_logger.update_workflows(dumped_workflows)
 
-    def dump(self):
+    def dump(self) -> None:
         self.output_path.mkdir(exist_ok=True, parents=True)
-        self._dump_processes()
+        collection_processes = self._get_processes()
+        self._dump_calculations(calculations=collection_processes['calculations'])
+        self._dump_workflows(workflows=collection_processes['workflows'])
diff --git a/src/aiida/tools/dumping/logger.py b/src/aiida/tools/dumping/logger.py
index 34fd2170e8..7489df0bbd 100644
--- a/src/aiida/tools/dumping/logger.py
+++ b/src/aiida/tools/dumping/logger.py
@@ -23,12 +23,12 @@ class DumpLogger:
 
     def __init__(
         self,
-        dump_parent_path: Path,
+        dump_parent_path: Path | None = None,
         calculations: DumpDict | None = None,
         workflows: DumpDict | None = None,
         counter: int = 0,
     ) -> None:
-        self.dump_parent_path = dump_parent_path
+        self.dump_parent_path = dump_parent_path or Path.cwd()
         self.calculations = calculations or {}
         self.workflows = workflows or {}
         self.counter = 0
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index fc6813e676..0573fede09 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -63,7 +63,7 @@ def prepare_dump_path(
             safeguard_exists = (path_to_validate / safeguard_file).is_file()
 
             if safeguard_exists:
-                msg = f'Overwriting directory `{path_to_validate}`.'
+                msg = '`--overwrite` option selected. Will recreate directory.'
                 logger.report(msg)
                 shutil.rmtree(path_to_validate)
 
diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
index 36a570ee62..0e3f11c130 100644
--- a/tests/tools/dumping/test_collection.py
+++ b/tests/tools/dumping/test_collection.py
@@ -11,10 +11,12 @@
 # TODO: Test that de-duplication also works for calculations
 # TODO: Test incremental dumping
 
+from datetime import datetime
+
 import pytest
 
 from aiida import orm
-from aiida.tools.dumping import BaseDumper, CollectionDumper, ProcessDumper
+from aiida.tools.dumping import CollectionDumper
 
 # Fixture that depends on generate_calculation_node_add_class
 # @pytest.fixture(scope="class")
@@ -24,7 +26,7 @@
 
 
 @pytest.fixture()
-def setup_no_process_group():
+def setup_no_process_group() -> orm.Group:
     no_process_group, _ = orm.Group.collection.get_or_create(label='no-process')
     if no_process_group.is_empty:
         int_node = orm.Int(1).store()
@@ -33,7 +35,7 @@ def setup_no_process_group():
 
 
 @pytest.fixture()
-def setup_add_group(generate_calculation_node_add):
+def setup_add_group(generate_calculation_node_add) -> orm.Group:
     add_group, _ = orm.Group.collection.get_or_create(label='add')
     if add_group.is_empty:
         add_node = generate_calculation_node_add()
@@ -42,7 +44,7 @@ def setup_add_group(generate_calculation_node_add):
 
 
 @pytest.fixture()
-def setup_multiply_add_group(generate_workchain_multiply_add):
+def setup_multiply_add_group(generate_workchain_multiply_add) -> orm.Group:
     multiply_add_group, _ = orm.Group.collection.get_or_create(label='multiply-add')
     if multiply_add_group.is_empty:
         multiply_add_node = generate_workchain_multiply_add()
@@ -51,7 +53,13 @@ def setup_multiply_add_group(generate_workchain_multiply_add):
 
 
 @pytest.fixture()
-def multiply_process_groups(): ...
+def duplicate_group():
+    def _duplicate_group(source_group: orm.Group, dest_group_label: str):
+        dupl_group, created = orm.Group.collection.get_or_create(label=dest_group_label)
+        dupl_group.add_nodes(list(source_group.nodes))
+        return dupl_group
+
+    return _duplicate_group
 
 
 @pytest.mark.usefixtures('aiida_profile_clean_class')
@@ -61,30 +69,100 @@ def test_should_dump_processes(self, setup_no_process_group, setup_add_group):
         no_process_group: orm.Group = setup_no_process_group
         add_group: orm.Group = setup_add_group
 
-        base_dumper = BaseDumper()
-        process_dumper = ProcessDumper()
+        collection_dumper = CollectionDumper(collection=no_process_group)
 
-        group_dumper = CollectionDumper(
-            base_dumper=base_dumper, process_dumper=process_dumper, collection=no_process_group
-        )
+        assert collection_dumper._should_dump_processes() is False
 
-        assert group_dumper._should_dump_processes() is False
+        collection_dumper = CollectionDumper(collection=add_group)
 
-        group_dumper = CollectionDumper(base_dumper=base_dumper, process_dumper=process_dumper, collection=add_group)
+        assert collection_dumper._should_dump_processes() is True
 
-        assert group_dumper._should_dump_processes() is True
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_get_nodes(
+        self, setup_no_process_group, setup_add_group, setup_multiply_add_group, generate_calculation_node_add
+    ):
+        add_group: orm.Group = setup_add_group
 
-    # def test_get_nodes(self):
-    #     pass
+        collection_dumper = CollectionDumper(collection=add_group)
+        nodes = collection_dumper._get_nodes()
+        group_node = orm.load_node(nodes[0])
+        group_node_uuid = nodes[0]
 
-    # def test_get_processes(self):
-    #     pass
+        assert len(nodes) == 1
+        assert isinstance(nodes[0], str)
+        assert isinstance(group_node, orm.CalcJobNode)
+        assert nodes[0] == group_node_uuid
 
-    # def test_dump_processes(self):
-    #     pass
+        # Now, add another CalcJobNode to the profile
+        # As not part of the group, should not be returned
+        cj_node1 = generate_calculation_node_add()
+        nodes = collection_dumper._get_nodes()
+        assert len(nodes) == 1
 
-    # def test_dump_calculations(self):
-    #     pass
+        # Now, add the node to the group, should be captured by get_nodes
+        add_group.add_nodes([cj_node1])
+        nodes = collection_dumper._get_nodes()
+        assert len(nodes) == 2
+
+        # Filtering by time should work
+        collection_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
+
+        cj_node2 = generate_calculation_node_add()
+        add_group.add_nodes([cj_node2])
+
+        nodes = collection_dumper._get_nodes()
+        assert len(nodes) == 1
+        assert nodes[0] == cj_node2.uuid
+
+        with pytest.raises(TypeError):
+            collection_dumper = CollectionDumper(collection=[1])
+            collection_dumper._get_nodes()
+
+    def test_get_processes(self, setup_add_group, setup_multiply_add_group, duplicate_group):
+        add_group: orm.Group = setup_add_group
+        multiply_add_group: orm.Group = setup_multiply_add_group
+
+        add_nodes = list(add_group.nodes)
+        multiply_add_nodes = list(multiply_add_group.nodes)
+
+        add_dumper = CollectionDumper(collection=add_group)
+        multiply_add_dumper = CollectionDumper(collection=multiply_add_group)
+
+        add_process_dict = add_dumper._get_processes()
+        assert len(add_process_dict['calculations']) == 1
+        assert add_process_dict['calculations'][0].uuid == add_nodes[0].uuid
+        assert len(add_process_dict['workflows']) == 0
+
+        multiply_add_process_dict = multiply_add_dumper._get_processes()
+
+        assert len(multiply_add_process_dict['calculations']) == 2
+        assert set(multiply_add_process_dict['calculations']) == set(multiply_add_nodes[0].called_descendants)
+        assert len(multiply_add_process_dict['workflows']) == 1
+        assert multiply_add_process_dict['calculations'][0].uuid == multiply_add_nodes[0].uuid
+
+        # TODO: Test here also de-duplication with a Workflow with a sub-workflow
+
+    def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_path):
+        add_group: orm.Group = setup_add_group
+        multiply_add_group: orm.Group = setup_multiply_add_group
+
+        add_group_path = tmp_path / 'add_group'
+        multiply_add_group_path = tmp_path / 'multiply_add_group'
+
+        add_dumper = CollectionDumper(collection=add_group, output_path=add_group_path)
+        multiply_add_dumper = CollectionDumper(collection=multiply_add_group, output_path=multiply_add_group_path)
+
+        add_process_dict = add_dumper._get_processes()
+
+        add_dumper._dump_calculations(add_process_dict['calculations'])
+
+        assert (add_group_path / 'calculations' / 'ArithmeticAddCalculation-4' / 'inputs' / 'aiida.in').exists()
+
+        multiply_add_process_dict = multiply_add_dumper._get_processes()
+
+        multiply_add_dumper._dump_calculations(multiply_add_process_dict['calculations'])
+
+        pytest.set_trace()
 
     # def test_dump_workflows(self):
     #     pass
diff --git a/tests/tools/dumping/test_process.py b/tests/tools/dumping/test_process.py
index 683e3c4707..56fb356054 100644
--- a/tests/tools/dumping/test_process.py
+++ b/tests/tools/dumping/test_process.py
@@ -265,69 +265,6 @@ def test_dump_calculation_add(tmp_path, generate_calculation_node_add):
     assert all([output_file.is_file() for output_file in output_files])
 
 
-# Tests for helper methods
-@pytest.mark.usefixtures('chdir_tmp_path')
-def test_prepare_dump_path(tmp_path):
-    from aiida.tools.dumping.utils import prepare_dump_path
-
-    test_dir = tmp_path / Path('test-dir')
-    test_file = test_dir / filename
-    safeguard_file = node_metadata_file
-    safeguard_file_path = test_dir / safeguard_file
-
-    # Cannot set both overwrite and incremental to True
-    with pytest.raises(ValueError):
-        prepare_dump_path(path_to_validate=test_dir, overwrite=True, incremental=True)
-
-    # Check that fails if file with same name as output dir
-    test_dir.touch()
-    with pytest.raises(FileExistsError):
-        prepare_dump_path(path_to_validate=test_dir)
-    test_dir.unlink()
-
-    # Check if path created if non-existent
-    prepare_dump_path(path_to_validate=test_dir)
-    assert test_dir.exists()
-    assert safeguard_file_path.is_file()
-
-    # Directory exists, but empty -> is fine
-    safeguard_file_path.unlink()
-    prepare_dump_path(path_to_validate=test_dir)
-    assert test_dir.exists()
-    assert safeguard_file_path.is_file()
-
-    # Fails if directory not empty, safeguard file existent, and overwrite set to False
-    test_file.touch()
-    safeguard_file_path.touch()
-    with pytest.raises(FileExistsError):
-        prepare_dump_path(path_to_validate=test_dir, overwrite=False, incremental=False)
-
-    # Fails if directory not empty, overwrite set to True, but safeguard_file not found (for safety reasons)
-    safeguard_file_path.unlink()
-    test_file.touch()
-    with pytest.raises(FileNotFoundError):
-        prepare_dump_path(path_to_validate=test_dir, overwrite=True, incremental=False)
-
-    # Works if directory not empty, overwrite set to True and safeguard_file contained
-    # -> After function call, test_file is deleted, and safeguard_file again created
-    safeguard_file_path.touch()
-    prepare_dump_path(
-        path_to_validate=test_dir,
-        safeguard_file=safeguard_file,
-        overwrite=True,
-        incremental=False,
-    )
-    assert not test_file.is_file()
-    assert safeguard_file_path.is_file()
-
-    # Works if directory not empty, but incremental=True and safeguard_file (e.g. `.aiida_node_metadata.yaml`) contained
-    # -> After function call, test file and safeguard_file still there
-    test_file.touch()
-    prepare_dump_path(path_to_validate=test_dir, safeguard_file=safeguard_file, incremental=True)
-    assert safeguard_file_path.is_file()
-    assert test_file.is_file()
-
-
 @pytest.mark.usefixtures('aiida_profile_clean')
 def test_generate_default_dump_path(
     generate_calculation_node_add,
diff --git a/tests/tools/dumping/test_utils.py b/tests/tools/dumping/test_utils.py
new file mode 100644
index 0000000000..108a8c612a
--- /dev/null
+++ b/tests/tools/dumping/test_utils.py
@@ -0,0 +1,78 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+"""Tests for utility functions for the dumping data to disk."""
+
+from pathlib import Path
+
+import pytest
+
+filename = 'file.txt'
+node_metadata_file = '.aiida_node_metadata.yaml'
+
+
+@pytest.mark.usefixtures('chdir_tmp_path')
+def test_prepare_dump_path(tmp_path):
+    from aiida.tools.dumping.utils import prepare_dump_path
+
+    test_dir = tmp_path / Path('test-dir')
+    test_file = test_dir / filename
+    safeguard_file = node_metadata_file
+    safeguard_file_path = test_dir / safeguard_file
+
+    # Cannot set both overwrite and incremental to True
+    with pytest.raises(ValueError):
+        prepare_dump_path(path_to_validate=test_dir, overwrite=True, incremental=True)
+
+    # Check that fails if file with same name as output dir
+    test_dir.touch()
+    with pytest.raises(FileExistsError):
+        prepare_dump_path(path_to_validate=test_dir)
+    test_dir.unlink()
+
+    # Check if path created if non-existent
+    prepare_dump_path(path_to_validate=test_dir)
+    assert test_dir.exists()
+    assert safeguard_file_path.is_file()
+
+    # Directory exists, but empty -> is fine
+    safeguard_file_path.unlink()
+    prepare_dump_path(path_to_validate=test_dir)
+    assert test_dir.exists()
+    assert safeguard_file_path.is_file()
+
+    # Fails if directory not empty, safeguard file existent, and overwrite set to False
+    test_file.touch()
+    safeguard_file_path.touch()
+    with pytest.raises(FileExistsError):
+        prepare_dump_path(path_to_validate=test_dir, overwrite=False, incremental=False)
+
+    # Fails if directory not empty, overwrite set to True, but safeguard_file not found (for safety reasons)
+    safeguard_file_path.unlink()
+    test_file.touch()
+    with pytest.raises(FileNotFoundError):
+        prepare_dump_path(path_to_validate=test_dir, overwrite=True, incremental=False)
+
+    # Works if directory not empty, overwrite set to True and safeguard_file contained
+    # -> After function call, test_file is deleted, and safeguard_file again created
+    safeguard_file_path.touch()
+    prepare_dump_path(
+        path_to_validate=test_dir,
+        safeguard_file=safeguard_file,
+        overwrite=True,
+        incremental=False,
+    )
+    assert not test_file.is_file()
+    assert safeguard_file_path.is_file()
+
+    # Works if directory not empty, but incremental=True and safeguard_file (e.g. `.aiida_node_metadata.yaml`) contained
+    # -> After function call, test file and safeguard_file still there
+    test_file.touch()
+    prepare_dump_path(path_to_validate=test_dir, safeguard_file=safeguard_file, incremental=True)
+    assert safeguard_file_path.is_file()
+    assert test_file.is_file()

From 7c905e6c1dba2ea9d87fb657472171ffea6230e0 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Thu, 6 Feb 2025 12:04:12 +0100
Subject: [PATCH 19/27] Add `ProcessesToDump` NamedTuple

---
 src/aiida/tools/dumping/collection.py  | 98 +++++++++++++++-----------
 src/aiida/tools/dumping/profile.py     |  7 +-
 tests/tools/dumping/test_collection.py | 32 ++++-----
 3 files changed, 77 insertions(+), 60 deletions(-)

diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index 8ea026382c..c1de2dc1b5 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -14,7 +14,7 @@
 from datetime import datetime
 from functools import cached_property
 from pathlib import Path
-from typing import TYPE_CHECKING, TypeVar, cast
+from typing import TYPE_CHECKING, NamedTuple, TypeVar
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
@@ -26,12 +26,19 @@
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
+    from aiida.tools.dumping.logger import DumpDict
+
 T = TypeVar('T', bound='orm.ProcessNode')
 
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
+class ProcessesToDump(NamedTuple):
+    calculations: Sequence[orm.CalculationNode]
+    workflows: Sequence[orm.WorkflowNode]
+
+
 class CollectionDumper:
     def __init__(
         self,
@@ -41,6 +48,7 @@ def __init__(
         collection: orm.Group | str | list[str] | None = None,
         deduplicate: bool = True,
         output_path: Path | None = None,
+        processes_to_dump: ProcessesToDump | None = None,
     ):
         self.deduplicate = deduplicate
 
@@ -83,36 +91,41 @@ def _get_nodes(self) -> list[str]:
         filtered_nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
         return filtered_nodes
 
-    def _should_dump_processes(self, nodes: list[str] | None = None) -> bool:
-        test_nodes = nodes or self.nodes
-        return len([node for node in test_nodes if isinstance(orm.load_node(node), orm.ProcessNode)]) > 0
+    @cached_property
+    def processes_to_dump(self) -> ProcessesToDump:
+        return self._get_processes_to_dump()
 
-    def _get_processes(self) -> dict[str, Sequence[orm.ProcessNode]]:
+    def _get_processes_to_dump(self) -> ProcessesToDump:
         nodes = [orm.load_node(n) for n in self.nodes]
         workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
+        calculations = [node for node in nodes if isinstance(node, orm.CalculationNode)]
 
         # Make sure that only top-level workflows are dumped in their own directories when de-duplcation is enabled
         if self.deduplicate:
             workflows = [workflow for workflow in workflows if workflow.caller is None]
 
-        # Also need to obtain sub-calculations that were called by workflows of the group
-        # These are not contained in the group.nodes directly
-        called_calculations = []
-        for workflow in workflows:
-            called_calculations += [
-                node for node in workflow.called_descendants if isinstance(node, orm.CalculationNode)
-            ]
-
-        calculations = [node for node in nodes if isinstance(node, orm.CalculationNode)] + called_calculations
-        return {
-            'calculations': cast(Sequence[orm.ProcessNode], calculations),
-            'workflows': cast(Sequence[orm.ProcessNode], workflows),
-        }
-        # return {'calculations': calculations, 'workflows': workflows}
+        else:
+            # If no deduplication, also sub-calculations that were called by workflows of the group, and which are not
+            # contained in the group.nodes directly are being dumped explicitly
+            called_calculations = []
+            for workflow in workflows:
+                called_calculations += [
+                    node for node in workflow.called_descendants if isinstance(node, orm.CalculationNode)
+                ]
+
+            calculations += called_calculations
+
+        return ProcessesToDump(
+            calculations=calculations,
+            workflows=workflows,
+        )
+
+    def should_dump_processes(self) -> bool:
+        # if self.processes_to_dump is None:
+        #     self._get_processes_to_dump()
+        return (len(self.processes_to_dump.calculations) + len(self.processes_to_dump.workflows)) > 0
 
     def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> None:
-        if len(calculations) == 0:
-            return
         calculations_path = self.output_path / 'calculations'
         dumped_calculations = {}
 
@@ -123,34 +136,32 @@ def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> Non
                 process_node=calculation, prefix=None
             )
 
-            if calculation.caller is None:
-                calculation_dumper._dump_calculation(
-                    calculation_node=cast(orm.CalculationNode, calculation), output_path=calculation_dump_path
-                )
+            # This is handled in the get_processes method: `if calculation.caller is None:`
+            calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
 
-                dumped_calculations[calculation.uuid] = DumpLog(
-                    path=calculation_dump_path,
-                    time=datetime.now().astimezone(),
-                )
+            dumped_calculations[calculation.uuid] = DumpLog(
+                path=calculation_dump_path,
+                time=datetime.now().astimezone(),
+            )
 
-        self.dump_logger.update_calculations(dumped_calculations)
+        self.dump_logger.update_calculations(new_calculations=dumped_calculations)
 
     def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
-        if len(workflows) == 0:
-            return
-        workflow_path = self.output_path / 'workflows'
+        workflow_path: Path = self.output_path / 'workflows'
+        dumped_workflows: dict[str, DumpLog] = {}
+
         workflow_path.mkdir(exist_ok=True, parents=True)
-        dumped_workflows = {}
 
         for workflow in workflows:
-            workflow_dumper = self.process_dumper
+            workflow_dumper: ProcessDumper = self.process_dumper
 
-            workflow_dump_path = workflow_path / workflow_dumper._generate_default_dump_path(
+            workflow_dump_path: Path = workflow_path / workflow_dumper._generate_default_dump_path(
                 process_node=workflow, prefix=None
             )
 
-            logged_workflows = self.dump_logger.get_log()['workflows']
+            logged_workflows: DumpDict = self.dump_logger.get_log()['workflows']
 
+            # Symlink here, if deduplication enabled and workflow was already dumped
             if self.deduplicate and workflow in logged_workflows.keys():
                 os.symlink(
                     src=logged_workflows[workflow.uuid].path,
@@ -158,7 +169,7 @@ def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
                 )
             else:
                 workflow_dumper._dump_workflow(
-                    workflow_node=cast(orm.WorkflowNode, workflow),
+                    workflow_node=workflow,
                     output_path=workflow_dump_path,
                 )
 
@@ -167,10 +178,13 @@ def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
                     time=datetime.now().astimezone(),
                 )
 
-        self.dump_logger.update_workflows(dumped_workflows)
+        self.dump_logger.update_workflows(new_workflows=dumped_workflows)
 
     def dump(self) -> None:
         self.output_path.mkdir(exist_ok=True, parents=True)
-        collection_processes = self._get_processes()
-        self._dump_calculations(calculations=collection_processes['calculations'])
-        self._dump_workflows(workflows=collection_processes['workflows'])
+        collection_processes: ProcessesToDump = self._get_processes_to_dump()
+
+        if len(collection_processes.calculations) > 1:
+            self._dump_calculations(calculations=collection_processes.calculations)
+        if len(collection_processes.workflows) > 1:
+            self._dump_workflows(workflows=collection_processes.workflows)
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index a591d50269..03f3643b18 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -74,6 +74,9 @@ def dump(self):
 
     def _dump_processes_not_in_any_group(self):
         # === Dump the data that is not associated with any group ===
+
+        # `dump_parent_path` is set in the `post_init` method of the `BaseDumper` dataclass
+        assert self.base_dumper.dump_parent_path is not None
         if self.organize_by_groups:
             output_path = self.base_dumper.dump_parent_path / 'no-group'
         else:
@@ -90,7 +93,7 @@ def _dump_processes_not_in_any_group(self):
             output_path=output_path,
         )
 
-        if self.dump_processes and no_group_dumper._should_dump_processes():
+        if self.dump_processes and no_group_dumper.should_dump_processes():
             logger.report(f'Dumping processes not in any group for profile `{self.profile.name}`...')
 
             no_group_dumper.dump()
@@ -113,7 +116,7 @@ def _dump_processes_per_group(self, groups):
                 output_path=output_path,
             )
 
-            if self.dump_processes and group_dumper._should_dump_processes():
+            if self.dump_processes and group_dumper.should_dump_processes():
                 logger.report(f'Dumping processes in group {group.label} for profile `{self.profile.name}`...')
 
                 group_dumper.dump()
diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
index 0e3f11c130..d6c7ed067e 100644
--- a/tests/tools/dumping/test_collection.py
+++ b/tests/tools/dumping/test_collection.py
@@ -71,11 +71,11 @@ def test_should_dump_processes(self, setup_no_process_group, setup_add_group):
 
         collection_dumper = CollectionDumper(collection=no_process_group)
 
-        assert collection_dumper._should_dump_processes() is False
+        assert collection_dumper.should_dump_processes() is False
 
         collection_dumper = CollectionDumper(collection=add_group)
 
-        assert collection_dumper._should_dump_processes() is True
+        assert collection_dumper.should_dump_processes() is True
 
     @pytest.mark.usefixtures('aiida_profile_clean')
     def test_get_nodes(
@@ -118,7 +118,7 @@ def test_get_nodes(
             collection_dumper = CollectionDumper(collection=[1])
             collection_dumper._get_nodes()
 
-    def test_get_processes(self, setup_add_group, setup_multiply_add_group, duplicate_group):
+    def test_get_processes_to_dump(self, setup_add_group, setup_multiply_add_group, duplicate_group):
         add_group: orm.Group = setup_add_group
         multiply_add_group: orm.Group = setup_multiply_add_group
 
@@ -128,17 +128,17 @@ def test_get_processes(self, setup_add_group, setup_multiply_add_group, duplicat
         add_dumper = CollectionDumper(collection=add_group)
         multiply_add_dumper = CollectionDumper(collection=multiply_add_group)
 
-        add_process_dict = add_dumper._get_processes()
-        assert len(add_process_dict['calculations']) == 1
-        assert add_process_dict['calculations'][0].uuid == add_nodes[0].uuid
-        assert len(add_process_dict['workflows']) == 0
+        add_process_to_dump = add_dumper._get_processes_to_dump()
+        assert len(add_process_to_dump.calculations) == 1
+        assert add_process_to_dump.calculations[0].uuid == add_nodes[0].uuid
+        assert len(add_process_to_dump.workflows) == 0
 
-        multiply_add_process_dict = multiply_add_dumper._get_processes()
+        multiply_add_processes_to_dump = multiply_add_dumper._get_processes_to_dump()
 
-        assert len(multiply_add_process_dict['calculations']) == 2
-        assert set(multiply_add_process_dict['calculations']) == set(multiply_add_nodes[0].called_descendants)
-        assert len(multiply_add_process_dict['workflows']) == 1
-        assert multiply_add_process_dict['calculations'][0].uuid == multiply_add_nodes[0].uuid
+        assert len(multiply_add_processes_to_dump.calculations) == 2
+        assert set(multiply_add_processes_to_dump.calculations) == set(multiply_add_nodes[0].called_descendants)
+        assert len(multiply_add_processes_to_dump.workflows) == 1
+        assert multiply_add_processes_to_dump.calculations[0].uuid == multiply_add_nodes[0].uuid
 
         # TODO: Test here also de-duplication with a Workflow with a sub-workflow
 
@@ -152,15 +152,15 @@ def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_
         add_dumper = CollectionDumper(collection=add_group, output_path=add_group_path)
         multiply_add_dumper = CollectionDumper(collection=multiply_add_group, output_path=multiply_add_group_path)
 
-        add_process_dict = add_dumper._get_processes()
+        add_processes_to_dump = add_dumper._get_processes_to_dump()
 
-        add_dumper._dump_calculations(add_process_dict['calculations'])
+        add_dumper._dump_calculations(add_processes_to_dump.calculations)
 
         assert (add_group_path / 'calculations' / 'ArithmeticAddCalculation-4' / 'inputs' / 'aiida.in').exists()
 
-        multiply_add_process_dict = multiply_add_dumper._get_processes()
+        multiply_add_processes_to_dump = multiply_add_dumper._get_processes_to_dump()
 
-        multiply_add_dumper._dump_calculations(multiply_add_process_dict['calculations'])
+        multiply_add_dumper._dump_calculations(multiply_add_processes_to_dump.calculations)
 
         pytest.set_trace()
 

From 7dba485fb8bc2012de2e6aa1dc6ef7a9c03e8a50 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Thu, 6 Feb 2025 13:57:05 +0100
Subject: [PATCH 20/27] Use `compare_tree` utility function for dumping tests

---
 src/aiida/cmdline/commands/cmd_profile.py |   2 +
 src/aiida/tools/dumping/collection.py     |   4 +-
 src/aiida/tools/dumping/profile.py        |   4 +-
 tests/tools/dumping/test_collection.py    | 100 +++++++++++-----------
 tests/tools/dumping/test_utils.py         |  29 +++++++
 5 files changed, 84 insertions(+), 55 deletions(-)

diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 78c7e62686..215af6c2b6 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -336,6 +336,8 @@ def profile_mirror(
     except FileExistsError as exc:
         echo.echo_critical(str(exc))
 
+    breakpoint()
+
     try:
         with safeguard_file_path.open('r') as fhandle:
             last_dump_time = datetime.fromisoformat(fhandle.readlines()[-1].strip().split()[-1]).astimezone()
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index c1de2dc1b5..c1c1674442 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -184,7 +184,7 @@ def dump(self) -> None:
         self.output_path.mkdir(exist_ok=True, parents=True)
         collection_processes: ProcessesToDump = self._get_processes_to_dump()
 
-        if len(collection_processes.calculations) > 1:
+        if len(collection_processes.calculations) > 0:
             self._dump_calculations(calculations=collection_processes.calculations)
-        if len(collection_processes.workflows) > 1:
+        if len(collection_processes.workflows) > 0:
             self._dump_workflows(workflows=collection_processes.workflows)
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 03f3643b18..bc9f45fa80 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -101,9 +101,11 @@ def _dump_processes_not_in_any_group(self):
     def _dump_processes_per_group(self, groups):
         # === Dump data per-group if Groups exist in profile or are selected ===
 
+        assert self.base_dumper.dump_parent_path is not None
+
         for group in groups:
             if self.organize_by_groups:
-                output_path = self.base_dumper.dump_parent_path / group.label
+                output_path = self.base_dumper.dump_parent_path / f"group-{group.label}"
             else:
                 output_path = self.base_dumper.dump_parent_path
 
diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
index d6c7ed067e..82491a7ff4 100644
--- a/tests/tools/dumping/test_collection.py
+++ b/tests/tools/dumping/test_collection.py
@@ -12,12 +12,15 @@
 # TODO: Test incremental dumping
 
 from datetime import datetime
+from pathlib import Path
 
 import pytest
 
 from aiida import orm
 from aiida.tools.dumping import CollectionDumper
 
+from .test_utils import compare_tree
+
 # Fixture that depends on generate_calculation_node_add_class
 # @pytest.fixture(scope="class")
 # def setup_calculation_node_add_class(generate_calculation_node_add_class):
@@ -146,21 +149,61 @@ def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_
         add_group: orm.Group = setup_add_group
         multiply_add_group: orm.Group = setup_multiply_add_group
 
-        add_group_path = tmp_path / 'add_group'
-        multiply_add_group_path = tmp_path / 'multiply_add_group'
+        add_group_path = Path('add_group')
+        multiply_add_group_path = Path('multiply_add_group')
 
-        add_dumper = CollectionDumper(collection=add_group, output_path=add_group_path)
-        multiply_add_dumper = CollectionDumper(collection=multiply_add_group, output_path=multiply_add_group_path)
+        add_dumper = CollectionDumper(collection=add_group, output_path=tmp_path / add_group_path)
+        multiply_add_dumper = CollectionDumper(
+            collection=multiply_add_group, output_path=tmp_path / multiply_add_group_path
+        )
 
         add_processes_to_dump = add_dumper._get_processes_to_dump()
 
         add_dumper._dump_calculations(add_processes_to_dump.calculations)
 
-        assert (add_group_path / 'calculations' / 'ArithmeticAddCalculation-4' / 'inputs' / 'aiida.in').exists()
+        expected_tree = {
+            'calculations': {
+                'ArithmeticAddCalculation-4': {
+                    'inputs': ['_aiidasubmit.sh', 'aiida.in'],
+                    'node_inputs': [],
+                    'outputs': ['_scheduler-stderr.txt', '_scheduler-stdout.txt', 'aiida.out'],
+                }
+            }
+        }
+
+        compare_tree(expected=expected_tree, base_path=tmp_path, relative_path=add_group_path)
 
         multiply_add_processes_to_dump = multiply_add_dumper._get_processes_to_dump()
 
+        # No calculations to dump when deduplication is enabled
         multiply_add_dumper._dump_calculations(multiply_add_processes_to_dump.calculations)
+        multiply_add_test_path: Path = multiply_add_group_path / 'calculations'
+
+        assert not multiply_add_test_path.exists()
+
+        multiply_add_dumper_no_dedup = CollectionDumper(
+            collection=multiply_add_group, output_path=multiply_add_group_path, deduplicate=False
+        )
+        multiply_add_processes_to_dump = multiply_add_dumper_no_dedup._get_processes_to_dump()
+
+        #  calculations to dump when deduplication is enabled
+        multiply_add_dumper_no_dedup._dump_calculations(multiply_add_processes_to_dump.calculations)
+
+        expected_tree_no_dedup = {
+            'calculations': {
+                'ArithmeticAddCalculation-15': {
+                    'inputs': ['_aiidasubmit.sh', 'aiida.in'],
+                    'node_inputs': [],
+                    'outputs': ['_scheduler-stderr.txt', '_scheduler-stdout.txt', 'aiida.out'],
+                },
+                'multiply-13': {
+                    'inputs': ['source_file'],
+                    'node_inputs': [],
+                },
+            }
+        }
+
+        compare_tree(expected=expected_tree_no_dedup, base_path=tmp_path, relative_path=multiply_add_group_path)
 
         pytest.set_trace()
 
@@ -169,50 +212,3 @@ def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_
 
     # def test_dump(self):
     #     pass
-
-
-#######3
-
-# def test_setup_profile(
-#     self,
-#     generate_calculation_node_add,
-#     generate_workchain_multiply_add,
-#     generate_calculation_node_io,
-#     generate_workchain_node_io,
-# ):
-#     # TODO: This is a hack... and not actually a real test
-#     # TODO: I'm using the `aiida_profile_clean_class` fiture to make sure I have a clean profile for this class
-#     # TODO: However, this method is not an actual test, but sets up the profile data how I want it for testing
-#     # TODO: Ideally, I'd create a class-scoped fixture that does the setup
-#     # TODO: Or define a `setup_class` method
-#     # TODO: However, as most of AiiDA's fixtures are function-scoped, I didn't manage to get any of these approaches
-#     # TODO: To work, due to pytest's ScopeMismatch exceptions
-
-# # Create nodes for profile storage
-# ## Not in any group
-# int_node = orm.Int(1).store()
-# _ = generate_calculation_node_add()
-# _ = generate_workchain_multiply_add()
-# ## For putting into groups
-# add_node = generate_calculation_node_add()
-# multiply_add_node = generate_workchain_multiply_add()
-
-# # Create the various groups
-# add_group, _ = orm.Group.collection.get_or_create(label='add')
-# multiply_add_group, _ = orm.Group.collection.get_or_create(label='multiply-add')
-# cj_dupl_group, _ = orm.Group.collection.get_or_create(label='cj-dupl')
-# wc_dupl_group, _ = orm.Group.collection.get_or_create(label='wc-dupl')
-# no_process_group, _ = orm.Group.collection.get_or_create(label='no-process')
-
-# # Populate groups
-# add_group.add_nodes([add_node])
-# multiply_add_group.add_nodes([multiply_add_node])
-# cj_dupl_group.add_nodes([add_node])
-# wc_dupl_group.add_nodes([multiply_add_node])
-# no_process_group.add_nodes([int_node])
-
-# self.add_group = add_group
-# self.multiply_add_group = multiply_add_group
-# self.cj_dupl_group = cj_dupl_group
-# self.wc_dupl_group = wc_dupl_group
-# self.no_process_group = no_process_group
diff --git a/tests/tools/dumping/test_utils.py b/tests/tools/dumping/test_utils.py
index 108a8c612a..b30e49c146 100644
--- a/tests/tools/dumping/test_utils.py
+++ b/tests/tools/dumping/test_utils.py
@@ -76,3 +76,32 @@ def test_prepare_dump_path(tmp_path):
     prepare_dump_path(path_to_validate=test_dir, safeguard_file=safeguard_file, incremental=True)
     assert safeguard_file_path.is_file()
     assert test_file.is_file()
+
+
+def compare_tree(expected: dict, base_path: Path, relative_path: Path = Path()):
+    """Recursively compares an expected directory structure with an actual path.
+
+    Args:
+        expected (dict): The expected directory structure.
+        base_path (Path): The root directory where the actual structure is located.
+        relative_path (Path): The relative path inside the base directory (used internally for recursion).
+    """
+    actual_path = base_path / relative_path
+
+    assert actual_path.exists(), f'Path does not exist: {actual_path}'
+    assert actual_path.is_dir(), f'Path is not a directory: {actual_path}'
+
+    for name, content in expected.items():
+        item_path = actual_path / name
+        assert item_path.exists(), f'Missing: {item_path}'
+
+        if isinstance(content, list):  # It's a directory with files (list of filenames)
+            assert item_path.is_dir(), f'Expected a directory: {item_path}'
+            # Check that all files exist inside the directory
+            for filename in content:
+                file_path = item_path / filename
+                assert file_path.exists(), f'Missing file: {file_path}'
+                assert file_path.is_file(), f'Expected a file: {file_path}'
+        elif isinstance(content, dict):  # It's a subdirectory
+            assert item_path.is_dir(), f'Expected a directory: {item_path}'
+            compare_tree(content, base_path, relative_path / name)

From 8ddaa159c576882111ecf910584482deff8e70c3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 6 Feb 2025 12:57:29 +0000
Subject: [PATCH 21/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/tools/dumping/profile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index bc9f45fa80..6b9f33a58e 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -105,7 +105,7 @@ def _dump_processes_per_group(self, groups):
 
         for group in groups:
             if self.organize_by_groups:
-                output_path = self.base_dumper.dump_parent_path / f"group-{group.label}"
+                output_path = self.base_dumper.dump_parent_path / f'group-{group.label}'
             else:
                 output_path = self.base_dumper.dump_parent_path
 

From 42e76ceb8bbd9c6bd861243216478f3fb26c09d1 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Thu, 6 Feb 2025 15:55:18 +0100
Subject: [PATCH 22/27] Start making test methods smaller

---
 src/aiida/tools/dumping/collection.py  |   6 +-
 tests/tools/dumping/test_collection.py | 110 ++++++++++++++++++-------
 2 files changed, 84 insertions(+), 32 deletions(-)

diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index c1c1674442..edc8219b98 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -88,8 +88,10 @@ def _get_nodes(self) -> list[str]:
         else:
             nodes = []
 
-        filtered_nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
-        return filtered_nodes
+        # TODO: Possibly have `last_dump_time` as attribute of CollectionDumper instead
+        # nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.last_dump_time)
+        nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
+        return nodes
 
     @cached_property
     def processes_to_dump(self) -> ProcessesToDump:
diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
index 82491a7ff4..5ad3ddd01b 100644
--- a/tests/tools/dumping/test_collection.py
+++ b/tests/tools/dumping/test_collection.py
@@ -17,7 +17,7 @@
 import pytest
 
 from aiida import orm
-from aiida.tools.dumping import CollectionDumper
+from aiida.tools.dumping import CollectionDumper, collection
 
 from .test_utils import compare_tree
 
@@ -28,6 +28,7 @@
 #     generate_calculation_node_add_class()  # You can also do any additional setup here
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def setup_no_process_group() -> orm.Group:
     no_process_group, _ = orm.Group.collection.get_or_create(label='no-process')
@@ -37,6 +38,7 @@ def setup_no_process_group() -> orm.Group:
     return no_process_group
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def setup_add_group(generate_calculation_node_add) -> orm.Group:
     add_group, _ = orm.Group.collection.get_or_create(label='add')
@@ -46,6 +48,7 @@ def setup_add_group(generate_calculation_node_add) -> orm.Group:
     return add_group
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def setup_multiply_add_group(generate_workchain_multiply_add) -> orm.Group:
     multiply_add_group, _ = orm.Group.collection.get_or_create(label='multiply-add')
@@ -55,6 +58,7 @@ def setup_multiply_add_group(generate_workchain_multiply_add) -> orm.Group:
     return multiply_add_group
 
 
+@pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def duplicate_group():
     def _duplicate_group(source_group: orm.Group, dest_group_label: str):
@@ -80,46 +84,51 @@ def test_should_dump_processes(self, setup_no_process_group, setup_add_group):
 
         assert collection_dumper.should_dump_processes() is True
 
-    @pytest.mark.usefixtures('aiida_profile_clean')
-    def test_get_nodes(
-        self, setup_no_process_group, setup_add_group, setup_multiply_add_group, generate_calculation_node_add
-    ):
+
+    def test_get_nodes_add_group(self, setup_add_group):
+
         add_group: orm.Group = setup_add_group
 
         collection_dumper = CollectionDumper(collection=add_group)
-        nodes = collection_dumper._get_nodes()
-        group_node = orm.load_node(nodes[0])
-        group_node_uuid = nodes[0]
 
-        assert len(nodes) == 1
-        assert isinstance(nodes[0], str)
-        assert isinstance(group_node, orm.CalcJobNode)
-        assert nodes[0] == group_node_uuid
-
-        # Now, add another CalcJobNode to the profile
-        # As not part of the group, should not be returned
-        cj_node1 = generate_calculation_node_add()
         nodes = collection_dumper._get_nodes()
         assert len(nodes) == 1
+        # add_group: orm.Group = setup_add_group
 
-        # Now, add the node to the group, should be captured by get_nodes
-        add_group.add_nodes([cj_node1])
-        nodes = collection_dumper._get_nodes()
-        assert len(nodes) == 2
+        # collection_dumper = CollectionDumper(collection=add_group)
+        # nodes = collection_dumper._get_nodes()
+        # group_node = orm.load_node(nodes[0])
+        # group_node_uuid = nodes[0]
 
-        # Filtering by time should work
-        collection_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
+        # assert len(nodes) == 1
+        # assert isinstance(nodes[0], str)
+        # assert isinstance(group_node, orm.CalcJobNode)
+        # assert nodes[0] == group_node_uuid
 
-        cj_node2 = generate_calculation_node_add()
-        add_group.add_nodes([cj_node2])
+        # # Now, add another CalcJobNode to the profile
+        # # As not part of the group, should not be returned
+        # cj_node1 = generate_calculation_node_add()
+        # nodes = collection_dumper._get_nodes()
+        # assert len(nodes) == 1
 
-        nodes = collection_dumper._get_nodes()
-        assert len(nodes) == 1
-        assert nodes[0] == cj_node2.uuid
+        # # Now, add the node to the group, should be captured by get_nodes
+        # add_group.add_nodes([cj_node1])
+        # nodes = collection_dumper._get_nodes()
+        # assert len(nodes) == 2
+
+        # # Filtering by time should work
+        # collection_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
 
-        with pytest.raises(TypeError):
-            collection_dumper = CollectionDumper(collection=[1])
-            collection_dumper._get_nodes()
+        # cj_node2 = generate_calculation_node_add()
+        # add_group.add_nodes([cj_node2])
+
+        # nodes = collection_dumper._get_nodes()
+        # assert len(nodes) == 1
+        # assert nodes[0] == cj_node2.uuid
+
+        # with pytest.raises(TypeError):
+        #     collection_dumper = CollectionDumper(collection=[1])
+        #     collection_dumper._get_nodes()
 
     def test_get_processes_to_dump(self, setup_add_group, setup_multiply_add_group, duplicate_group):
         add_group: orm.Group = setup_add_group
@@ -212,3 +221,44 @@ def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_
 
     # def test_dump(self):
     #     pass
+
+    # @pytest.mark.usefixtures('aiida_profile_clean')
+    # def test_get_nodes(
+    #     self, setup_no_process_group, setup_add_group, setup_multiply_add_group, generate_calculation_node_add
+    # ):
+    #     add_group: orm.Group = setup_add_group
+
+    #     collection_dumper = CollectionDumper(collection=add_group)
+    #     nodes = collection_dumper._get_nodes()
+    #     group_node = orm.load_node(nodes[0])
+    #     group_node_uuid = nodes[0]
+
+    #     assert len(nodes) == 1
+    #     assert isinstance(nodes[0], str)
+    #     assert isinstance(group_node, orm.CalcJobNode)
+    #     assert nodes[0] == group_node_uuid
+
+    #     # Now, add another CalcJobNode to the profile
+    #     # As not part of the group, should not be returned
+    #     cj_node1 = generate_calculation_node_add()
+    #     nodes = collection_dumper._get_nodes()
+    #     assert len(nodes) == 1
+
+    #     # Now, add the node to the group, should be captured by get_nodes
+    #     add_group.add_nodes([cj_node1])
+    #     nodes = collection_dumper._get_nodes()
+    #     assert len(nodes) == 2
+
+    #     # Filtering by time should work
+    #     collection_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
+
+    #     cj_node2 = generate_calculation_node_add()
+    #     add_group.add_nodes([cj_node2])
+
+    #     nodes = collection_dumper._get_nodes()
+    #     assert len(nodes) == 1
+    #     assert nodes[0] == cj_node2.uuid
+
+    #     with pytest.raises(TypeError):
+    #         collection_dumper = CollectionDumper(collection=[1])
+    #         collection_dumper._get_nodes()
\ No newline at end of file

From de8f92f9318f5f3241161f3131c4ddbe6c14fec2 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Mon, 10 Feb 2025 18:56:41 +0100
Subject: [PATCH 23/27] Commit changes to continue.

---
 src/aiida/cmdline/commands/cmd_profile.py |  71 +++--
 src/aiida/cmdline/params/options/main.py  |  23 +-
 src/aiida/common/utils.py                 |   2 +
 src/aiida/repository/repository.py        |   1 +
 src/aiida/tools/dumping/base.py           |   4 +
 src/aiida/tools/dumping/collection.py     | 318 +++++++++++++++++-----
 src/aiida/tools/dumping/config.py         |  11 +
 src/aiida/tools/dumping/process.py        |   9 +-
 src/aiida/tools/dumping/profile.py        | 131 +++++----
 src/aiida/tools/dumping/utils.py          |  88 ++++--
 tests/tools/dumping/test_collection.py    | 151 +++++-----
 11 files changed, 553 insertions(+), 256 deletions(-)
 create mode 100644 src/aiida/tools/dumping/config.py

diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 215af6c2b6..e616debd06 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -273,37 +273,57 @@ def profile_delete(force, delete_data, profiles):
 
 @verdi_profile.command('mirror')
 @options.PATH()
+@options.DRY_RUN()
 @options.OVERWRITE()
-# @options.INCREMENTAL()
 @options.DUMP_PROCESSES()
-@options.DEDUPLICATE()
+@options.GROUPS()
+@options.ORGANIZE_BY_GROUPS()
+# @options.DEDUPLICATE()
+# @click.option(
+#     '--check-dirs/--no-check-dirs',
+#     default=False,
+#     show_default=True,
+#     help='Check for existence of dump directories. Otherwise, incremental mirroring is only evaluated from the log.')
+@click.option(
+    '--symlink-duplicates/--no-symlink-duplicates',
+    default=True,
+    show_default=True,
+    help='Symlink data if the same node is contained in multiple groups.')
+@click.option(
+    '--delete-missing/--no-delete-missing',
+    default=False,
+    show_default=True,
+    help="If a previously dumped node is deleted from AiiDA's DB, also delete the corresponding dump directory.")
+@click.option(
+    '--extra-calc-dirs/--no-extra-calc-dirs',
+    default=False,
+    show_default=True,
+    help='If a top-level process calls sub-processes, create a designated directory only for the top-level process.')
 @options.INCLUDE_INPUTS()
 @options.INCLUDE_OUTPUTS()
 @options.INCLUDE_ATTRIBUTES()
 @options.INCLUDE_EXTRAS()
 @options.FLAT()
-@options.DUMP_CONFIG_FILE()
-@options.GROUPS()
-@options.ORGANIZE_BY_GROUPS()
-@options.DRY_RUN()
 @click.pass_context
 def profile_mirror(
     ctx,
     path,
-    overwrite,
-    organize_by_groups,
     dry_run,
+    overwrite,
     dump_processes,
-    deduplicate,
+    groups,
+    organize_by_groups,
+    symlink_duplicates,
+    delete_missing,
+    extra_calc_dirs,
+    # check_dirs,
     include_inputs,
     include_outputs,
     include_attributes,
     include_extras,
     flat,
-    dump_config_file,
-    groups,
 ):
-    """Dump all data in an AiiDA profile's storage to disk."""
+    """Dump all data in an AiiDA profile's storage to disk in a human-readable directory tree."""
 
     import json
     from datetime import datetime
@@ -313,6 +333,7 @@ def profile_mirror(
     from aiida.tools.dumping.base import BaseDumper
     from aiida.tools.dumping.logger import DumpLogger
     from aiida.tools.dumping.utils import prepare_dump_path
+    from aiida.tools.dumping.config import ProfileDumpConfig
 
     profile = ctx.obj['profile']
 
@@ -321,7 +342,7 @@ def profile_mirror(
     if path is None:
         path = Path.cwd() / f'{profile.name}-mirror'
 
-    echo.echo_report(f'Mirroring data of profile `{profile.name}`at path: `{path}`.')
+    echo.echo_report(f'Mirroring data of profile `{profile.name}` at path: `{path}`.')
 
     SAFEGUARD_FILE: str = '.verdi_profile_mirror'  # noqa: N806
     safeguard_file_path: Path = path / SAFEGUARD_FILE
@@ -336,8 +357,6 @@ def profile_mirror(
     except FileExistsError as exc:
         echo.echo_critical(str(exc))
 
-    breakpoint()
-
     try:
         with safeguard_file_path.open('r') as fhandle:
             last_dump_time = datetime.fromisoformat(fhandle.readlines()[-1].strip().split()[-1]).astimezone()
@@ -346,9 +365,10 @@ def profile_mirror(
 
     if dry_run:
         node_counts = ProfileDumper._get_number_of_nodes_to_dump(last_dump_time)
-        node_counts_str = ' & '.join(f'{count} {node_type}' for node_type, count in node_counts.items())
-        dry_run_message = f'Dry run for mirroring of profile `{profile.name}`: {node_counts_str} to dump.\n'
+        dry_run_message = f'Dry run for mirroring of profile `{profile.name}`. Would dump:'
         echo.echo_report(dry_run_message)
+        for count, node_type in node_counts.items():
+            echo.echo_report(f'{count}: {node_type}')
         return
 
     if incremental:
@@ -376,18 +396,25 @@ def profile_mirror(
         flat=flat,
     )
 
+    # breakpoint()
+    profile_dump_config = ProfileDumpConfig(
+        dump_processes=dump_processes,
+        symlink_duplicates=symlink_duplicates,
+        delete_missing=delete_missing,
+        extra_calc_dirs=extra_calc_dirs,
+        organize_by_groups=organize_by_groups,
+    )
+
     profile_dumper = ProfileDumper(
+        profile=profile,
+        profile_dump_config=profile_dump_config,
         base_dumper=base_dumper,
         process_dumper=process_dumper,
         dump_logger=dump_logger,
         groups=groups,
-        organize_by_groups=organize_by_groups,
-        deduplicate=deduplicate,
-        profile=profile,
-        dump_processes=dump_processes,
     )
 
-    profile_dumper.dump()
+    profile_dumper.dump_processes()
 
     # Append the current time to the file
     last_dump_time = datetime.now().astimezone()
diff --git a/src/aiida/cmdline/params/options/main.py b/src/aiida/cmdline/params/options/main.py
index 82d4fda8d8..a806d1b1a2 100644
--- a/src/aiida/cmdline/params/options/main.py
+++ b/src/aiida/cmdline/params/options/main.py
@@ -53,12 +53,10 @@
     'DB_PORT',
     'DB_USERNAME',
     'DEBUG',
-    'DEDUPLICATE',
     'DESCRIPTION',
     'DICT_FORMAT',
     'DICT_KEYS',
     'DRY_RUN',
-    'DUMP_CONFIG_FILE',
     'DUMP_PROCESSES',
     'EXIT_STATUS',
     'EXPORT_FORMAT',
@@ -792,13 +790,13 @@ def set_log_level(ctx, _param, value):
     show_default=True,
 )
 
-DEDUPLICATE = OverridableOption(
-    '--deduplicate/--no-deduplicate',
-    is_flag=True,
-    default=True,
-    show_default=True,
-    help='',
-)
+# DEDUPLICATE = OverridableOption(
+#     '--deduplicate/--no-deduplicate',
+#     is_flag=True,
+#     default=True,
+#     show_default=True,
+#     help='',
+# )
 
 DUMP_PROCESSES = OverridableOption(
     '--dump-processes/--no-dump-processes',
@@ -808,13 +806,6 @@ def set_log_level(ctx, _param, value):
     help='Dump process data.',
 )
 
-DUMP_CONFIG_FILE = OverridableOption(
-    '--dump-config-file',
-    default=None,
-    type=types.FileOrUrl(),
-    help='Provide dumping options via a config file in YAML format.',
-)
-
 ORGANIZE_BY_GROUPS = OverridableOption(
     '--organize-by-groups/--no-organize-by-groups',
     default=True,
diff --git a/src/aiida/common/utils.py b/src/aiida/common/utils.py
index 1b2f2b14ce..8cd1046dfb 100644
--- a/src/aiida/common/utils.py
+++ b/src/aiida/common/utils.py
@@ -17,6 +17,8 @@
 from datetime import datetime
 from typing import Any, Dict
 from uuid import UUID
+from aiida.manage import get_manager, load_profile
+from aiida.manage.configuration.profile import Profile
 
 from .lang import classproperty
 
diff --git a/src/aiida/repository/repository.py b/src/aiida/repository/repository.py
index 992a96447d..32351ddeef 100644
--- a/src/aiida/repository/repository.py
+++ b/src/aiida/repository/repository.py
@@ -519,6 +519,7 @@ def copy_tree(self, target: Union[str, pathlib.Path], path: Optional[FilePath] =
                 dirpath.mkdir(parents=True, exist_ok=True)
 
                 with self.open(root / filename) as handle:
+                    # TODO: Possibly skip 
                     filepath.write_bytes(handle.read())
 
     # these methods are not actually used in aiida-core, but are here for completeness
diff --git a/src/aiida/tools/dumping/base.py b/src/aiida/tools/dumping/base.py
index bbe63c9301..6bbd5b505e 100644
--- a/src/aiida/tools/dumping/base.py
+++ b/src/aiida/tools/dumping/base.py
@@ -14,9 +14,13 @@
 
 @dataclass
 class BaseDumper:
+    """Container for shared arguments of all Dumper classes."""
+
     dump_parent_path: Path | None = None
     overwrite: bool = False
     incremental: bool = True
+    check_dirs: bool = False
+    # TODO: Make this a per-class attribute?
     last_dump_time: datetime | None = None
 
     def __post_init__(self):
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index edc8219b98..56352b4574 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -10,26 +10,27 @@
 
 from __future__ import annotations
 
+from dataclasses import dataclass
 import os
 from datetime import datetime
-from functools import cached_property
 from pathlib import Path
-from typing import TYPE_CHECKING, NamedTuple, TypeVar
+from typing import TYPE_CHECKING, NamedTuple
 
 from aiida import orm
+from aiida.common.exceptions import NotExistent
 from aiida.common.log import AIIDA_LOGGER
 from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.logger import DumpLog, DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.utils import filter_by_last_dump_time
+from aiida.tools.dumping.config import ProfileDumpConfig
+from typing import Literal
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
     from aiida.tools.dumping.logger import DumpDict
 
-T = TypeVar('T', bound='orm.ProcessNode')
-
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
@@ -38,98 +39,175 @@ class ProcessesToDump(NamedTuple):
     calculations: Sequence[orm.CalculationNode]
     workflows: Sequence[orm.WorkflowNode]
 
+    @property
+    def is_empty(self) -> bool:
+        """Check if there are any processes to dump."""
+        return len(self.calculations) == 0 and len(self.workflows) == 0
+
+
+# @dataclass
+# class CollectionDumpConfig:
+#     dump_processes: bool = True
+#     symlink_duplicates: bool = True
+#     delete_missing: bool = False
+#     extra_calc_dirs: bool = False
+#     organize_by_groups: bool = True
 
 class CollectionDumper:
+    """Class to handle dumping of a collection of AiiDA ORM entities."""
+
     def __init__(
         self,
+        collection: orm.Group | str | Sequence[str] | Sequence[int],
+        profile_dump_config: ProfileDumpConfig | None = None,
         base_dumper: BaseDumper | None = None,
         process_dumper: ProcessDumper | None = None,
         dump_logger: DumpLogger | None = None,
-        collection: orm.Group | str | list[str] | None = None,
-        deduplicate: bool = True,
         output_path: Path | None = None,
-        processes_to_dump: ProcessesToDump | None = None,
     ):
-        self.deduplicate = deduplicate
+        """Initialize the CollectionDumper.
 
-        # Collection could be a Group or a list of nodes
-        if isinstance(collection, str):
-            try:
-                collection = orm.load_group(collection)
-            except:
-                raise
+        :param collection: The collection of AiiDA ORM entities to be dumped, either a group, group label, or list of
+        :param base_dumper: Base dumper instance or None (gets instantiated).
+        :param process_dumper: Process dumper instance or None (gets instantiated).
+        :param dump_logger: Logger for the dumping (gets instantiated).
+        :param output_path: The parent output path for dumping the collection nodes.
+        :param processes_to_dump: Optional precomputed processes to dump.
+        """
 
-        self.collection = collection
+        self.collection = self._validate_collection(collection)
 
         self.base_dumper = base_dumper or BaseDumper()
         self.process_dumper = process_dumper or ProcessDumper()
         self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dumper.dump_parent_path)
 
-        # Properly set the `output_path` attribute
-        if output_path is not None:
-            self.output_path = output_path
-        else:
-            self.output_path = Path.cwd()
-
-    @cached_property
-    def nodes(self) -> list[str]:
-        return self._get_nodes()
-
-    def _get_nodes(self) -> list[str]:
-        nodes: list[str] | None = None
-        if isinstance(self.collection, orm.Group):
-            nodes = [n.uuid for n in self.collection.nodes]
-        elif isinstance(self.collection, list) and len(self.collection) > 0:
-            if all(isinstance(n, str) for n in self.collection):
-                nodes = self.collection
-            else:
-                msg = 'A collection of nodes must be passed via their UUIDs.'
-                raise TypeError(msg)
+        self.output_path = output_path or Path.cwd()
+
+        self.profile_dump_config = profile_dump_config or ProfileDumpConfig()
+
+        self._collection_nodes: Sequence[str] | Sequence[int] | None = None
+        self._processes_to_dump: ProcessesToDump | None = None
+
+    def _validate_collection(
+        self, collection: orm.Group | str | Sequence[str] | Sequence[int]
+    ) -> orm.Group | Sequence[str] | Sequence[int]:
+        """Validate the given collection identifier.
+
+        :param collection: The input collection to validate.
+        :return: The validated collection.
+        :raises NotExistent: If no ``orm.Group`` can be loaded for a given label.
+        :raises ValueError: If no list of integers or strings to identify nodes is passed.
+        """
+
+        if isinstance(collection, str):
+            try:
+                return orm.load_group(collection)
+            except Exception as exc:
+                msg = f'Could not load group: {collection}.'
+                raise NotExistent(msg) from exc
+        if (isinstance(collection, list) and all(isinstance(n, (str, int)) for n in collection)) or isinstance(
+            collection, orm.Group
+        ):
+            return collection
+
         else:
-            nodes = []
+            msg = f'{collection} is an invalid collection.'
+            raise ValueError(msg)
+
+    @property
+    def collection_nodes(self) -> Sequence[str] | Sequence[int]:
+        """Return collection nodes.
+
+        :return: List of collection node identifiers.
+        """
+        if not self._collection_nodes:
+            self._collection_nodes = self._get_collection_nodes()
+        return self._collection_nodes
+
+    def _get_collection_nodes(self) -> Sequence[str] | Sequence[int]:
+        """Retrieve the node ``PK``s/``UUID``s from the collection, filtered by the last dump time, if incremental
+        dumping is selected.
+
+        :return: List of node identifiers.
+        """
+        if not self.collection:
+            return []
+
+        nodes = [n.uuid for n in self.collection.nodes] if isinstance(self.collection, orm.Group) else self.collection
+
+        if self.base_dumper.incremental and self.base_dumper.last_dump_time:
+            nodes = filter_by_last_dump_time(nodes, last_dump_time=self.base_dumper.last_dump_time)
 
-        # TODO: Possibly have `last_dump_time` as attribute of CollectionDumper instead
-        # nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.last_dump_time)
-        nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
         return nodes
 
-    @cached_property
+    @property
     def processes_to_dump(self) -> ProcessesToDump:
-        return self._get_processes_to_dump()
+        """Get the processes to dump from the collection of nodes.
+
+        :return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
+        """
+        if not self._processes_to_dump:
+            self._processes_to_dump = self._get_processes_to_dump()
+        return self._processes_to_dump
 
     def _get_processes_to_dump(self) -> ProcessesToDump:
-        nodes = [orm.load_node(n) for n in self.nodes]
-        workflows = [node for node in nodes if isinstance(node, orm.WorkflowNode)]
-        calculations = [node for node in nodes if isinstance(node, orm.CalculationNode)]
+        """Retrieve the processeses from the collection nodes.
 
-        # Make sure that only top-level workflows are dumped in their own directories when de-duplcation is enabled
-        if self.deduplicate:
-            workflows = [workflow for workflow in workflows if workflow.caller is None]
+        If deduplication is selected, this method takes care of only dumping top-level workflows and only dump
+        calculations in their own designated directories if they are not part of a workflow.
 
-        else:
-            # If no deduplication, also sub-calculations that were called by workflows of the group, and which are not
-            # contained in the group.nodes directly are being dumped explicitly
+        :return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
+        """
+
+        if not self.collection_nodes:
+            return ProcessesToDump(calculations=[], workflows=[])
+
+        # Better than: `nodes = [orm.load_node(n) for n in self.collection_nodes]`
+        # As the list comprehension fetches each node from the DB individually
+        nodes_orm = orm.QueryBuilder().append(orm.Node, filters={'uuid': {'in': self.collection_nodes}}).all(flat=True)
+
+        workflows = [node for node in nodes_orm if isinstance(node, orm.WorkflowNode)]
+        calculations = [node for node in nodes_orm if isinstance(node, orm.CalculationNode)]
+
+        # Make sure that only top-level workflows and calculations are dumped
+        workflows = [workflow for workflow in workflows if workflow.caller is None]
+
+        # If sub-calculations that were called by workflows of the group, and which are not
+        # contained in the group.nodes directly are being dumped explicitly
+        # breakpoint()
+        if self.profile_dump_config.extra_calc_dirs:
             called_calculations = []
             for workflow in workflows:
                 called_calculations += [
                     node for node in workflow.called_descendants if isinstance(node, orm.CalculationNode)
                 ]
 
-            calculations += called_calculations
+            # Convert to set to avoid duplicates
+            calculations = list(set(calculations + called_calculations))
+        else:
+            calculations = [calculation for calculation in calculations if calculation.caller is None]
 
         return ProcessesToDump(
             calculations=calculations,
             workflows=workflows,
         )
 
-    def should_dump_processes(self) -> bool:
-        # if self.processes_to_dump is None:
-        #     self._get_processes_to_dump()
-        return (len(self.processes_to_dump.calculations) + len(self.processes_to_dump.workflows)) > 0
-
     def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> None:
+
+        """Dump a collection of calculations.
+
+        Deduplication is already handled in the ``get_processes`` method, where PKs/UUIDs are used, rather than AiiDA
+        ORM entities as here. Specifically, calculations that are part of a workflow are not dumpid in their own,
+        dedicated directory if they are part of a workflow.
+
+        :param calculations: Sequence of ``orm.CalculationNode``s
+        :return: None
+        """
+
         calculations_path = self.output_path / 'calculations'
-        dumped_calculations = {}
+        dumped_calculations: dict[str, DumpLog] = {}
+
+        logged_calculations: DumpDict = self.dump_logger.get_log()['calculations']
 
         for calculation in calculations:
             calculation_dumper = self.process_dumper
@@ -138,6 +216,13 @@ def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> Non
                 process_node=calculation, prefix=None
             )
 
+            if self.profile_dump_config.symlink_duplicates and calculation.uuid in logged_calculations.keys():
+                calculation_dump_path.parent.mkdir(exist_ok=True, parents=True)
+                os.symlink(
+                    src=logged_calculations[calculation.uuid].path,
+                    dst=calculation_dump_path,
+                )
+
             # This is handled in the get_processes method: `if calculation.caller is None:`
             calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
 
@@ -149,11 +234,16 @@ def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> Non
         self.dump_logger.update_calculations(new_calculations=dumped_calculations)
 
     def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
+        """Dump a collection of workflows.
+
+        """
         workflow_path: Path = self.output_path / 'workflows'
         dumped_workflows: dict[str, DumpLog] = {}
 
         workflow_path.mkdir(exist_ok=True, parents=True)
 
+        logged_workflows: DumpDict = self.dump_logger.get_log()['workflows']
+
         for workflow in workflows:
             workflow_dumper: ProcessDumper = self.process_dumper
 
@@ -161,10 +251,10 @@ def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
                 process_node=workflow, prefix=None
             )
 
-            logged_workflows: DumpDict = self.dump_logger.get_log()['workflows']
-
             # Symlink here, if deduplication enabled and workflow was already dumped
-            if self.deduplicate and workflow in logged_workflows.keys():
+            if self.profile_dump_config.symlink_duplicates and workflow.uuid in logged_workflows.keys():
+                workflow_dump_path.parent.mkdir(exist_ok=True, parents=True)
+
                 os.symlink(
                     src=logged_workflows[workflow.uuid].path,
                     dst=workflow_dump_path,
@@ -175,18 +265,110 @@ def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
                     output_path=workflow_dump_path,
                 )
 
-                dumped_workflows[workflow.uuid] = DumpLog(
-                    path=workflow_dump_path,
-                    time=datetime.now().astimezone(),
-                )
+            dumped_workflows[workflow.uuid] = DumpLog(
+                path=workflow_dump_path,
+                time=datetime.now().astimezone(),
+            )
 
         self.dump_logger.update_workflows(new_workflows=dumped_workflows)
 
     def dump(self) -> None:
+        """Top-level method that actually performs the dumping of the AiiDA data for the collection.
+
+        :return: None
+        """
+
         self.output_path.mkdir(exist_ok=True, parents=True)
         collection_processes: ProcessesToDump = self._get_processes_to_dump()
+        # breakpoint()
+
+        if not self.processes_to_dump.is_empty:
+            # self._dump_processes(processes=collection_processes)
+
+            # First, dump workflows, then calculations
+            if len(collection_processes.workflows) > 0:
+                # breakpoint()
+                self._dump_workflows(workflows=collection_processes.workflows)
+            if len(collection_processes.calculations) > 0:
+                # breakpoint()
+                self._dump_calculations(calculations=collection_processes.calculations)
+
+# TODO: See, if I can generalize the dump sub-methods
+    # def _dump_processes(
+    #     self,
+    #     # processes: Sequence[orm.CalculationNode | orm.WorkflowNode],
+    #     processes: Sequence[orm.CalculationNode] | Sequence[orm.WorkflowNode],
+    # ) -> None:
+    #     """Dump a collection of calculations or workflows.
+
+    #     :param processes: Sequence of ``orm.CalculationNode``s or ``orm.WorkflowNode``s
+    #     :param process_type: Type of processes, either 'calculations' or 'workflows'
+    #     :return: None
+    #     """
+
+    #     # From, e.g., 'aiida.workflows:core.arithmetic.multiply_add' to 'workflows
+    #     if isinstance(processes[0], orm.CalculationNode):
+    #         process_type_str = 'calculations'
+    #     elif isinstance(processes[0], orm.WorkflowNode):
+    #         process_type_str = 'workflows'
+    #     # else:
+    #         # breakpoint()
+    #     # process_type_str = processes[0].process_type.split(':')[0].split('.')[1]
+    #     process_type_path = self.output_path / process_type_str
+    #     process_type_path.mkdir(exist_ok=True, parents=True)
+
+    #     dumped_processes: dict[str, DumpLog] = {}
+    #     logged_processes: DumpDict = self.dump_logger.get_log()[process_type_str]
+
+    #     # breakpoint()
+
+    #     for process in processes:
+    #         process_dumper = self.process_dumper
+
+    #         process_dump_path = process_type_path / process_dumper._generate_default_dump_path(
+    #             process_node=process, prefix=None
+    #         )
+
+    #         # Target directory already exists, skip this process
+    #         if process_dump_path.exists():
+    #             continue
+
+    #         else:
+    #             # Symlink here, if deduplication enabled and process was already dumped
+    #             # TODO: Possibly check dirs here
+    #             # TODO: Alternatively have method/endpoint to delete one calculation from the dumping
+    #             # TODO: Which would also update the log.
+    #             # Otherwise, one might delete a calculation, maybe because it was wrong, and then it won't be dumped
+    #             # anymore ever.
+    #             if self.deduplicate and process.uuid in logged_processes.keys():
+    #                 try:
+    #                     os.symlink(
+    #                         src=logged_processes[process.uuid].path,
+    #                         dst=process_dump_path,
+    #                     )
+    #                 except:
+    #                     # raise
+    #                     pass
+    #                     # breakpoint()
+    #             else:
+    #                 if process_type_str == 'calculations':
+    #                     process_dumper._dump_calculation(calculation_node=process, output_path=process_dump_path)
+    #                 elif process_type_str == 'workflows':
+    #                     process_dumper._dump_workflow(
+    #                         workflow_node=process,
+    #                         output_path=process_dump_path,
+    #                     )
+
+
+    #             dumped_processes[process.uuid] = DumpLog(
+    #                 path=process_dump_path,
+    #                 time=datetime.now().astimezone(),
+    #             )
+
+    #     # breakpoint()
+
+    #     if process_type_str == 'calculations':
+    #         self.dump_logger.update_calculations(new_calculations=dumped_processes)
+    #     elif process_type_str == 'workflows':
+    #         self.dump_logger.update_workflows(new_workflows=dumped_processes)
 
-        if len(collection_processes.calculations) > 0:
-            self._dump_calculations(calculations=collection_processes.calculations)
-        if len(collection_processes.workflows) > 0:
-            self._dump_workflows(workflows=collection_processes.workflows)
diff --git a/src/aiida/tools/dumping/config.py b/src/aiida/tools/dumping/config.py
new file mode 100644
index 0000000000..cd8537ce3b
--- /dev/null
+++ b/src/aiida/tools/dumping/config.py
@@ -0,0 +1,11 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class ProfileDumpConfig:
+    dump_processes: bool = True
+    symlink_duplicates: bool = True  #
+    delete_missing: bool = False  # profile
+    extra_calc_dirs: bool = False  # collection
+    organize_by_groups: bool = True  # profile
+
diff --git a/src/aiida/tools/dumping/process.py b/src/aiida/tools/dumping/process.py
index f65da5a15e..8a4f962bf2 100644
--- a/src/aiida/tools/dumping/process.py
+++ b/src/aiida/tools/dumping/process.py
@@ -42,6 +42,8 @@
 
 
 class ProcessDumper:
+    """Class to handle dumping of an AiiDA process."""
+
     def __init__(
         self,
         base_dumper: BaseDumper | None = None,
@@ -52,6 +54,10 @@ def __init__(
         flat: bool = False,
         dump_unsealed: bool = False,
     ) -> None:
+        """Initialize the CollectionDumper.
+
+
+        """
         self.include_inputs = include_inputs
         self.include_outputs = include_outputs
         self.include_attributes = include_attributes
@@ -218,8 +224,7 @@ def dump(
         # for key, value in kwargs.items():
         #     setattr(self, key, value)
 
-        if output_path is None:
-            output_path = self._generate_default_dump_path(process_node=process_node)
+        output_path = output_path or self._generate_default_dump_path(process_node=process_node)
 
         prepare_dump_path(
             path_to_validate=output_path, overwrite=self.base_dumper.overwrite, incremental=self.base_dumper.incremental
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 6b9f33a58e..04374ebe16 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -7,11 +7,12 @@
 # For further information please visit http://www.aiida.net               #
 ###########################################################################
 
-# TODO: Use `batch_iter` from aiida.tools.archive.common
+# TODO: Possibly use `batch_iter` from aiida.tools.archive.common
 
 from __future__ import annotations
 
-from typing import cast
+from dataclasses import dataclass
+from typing import Sequence, cast
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
@@ -19,6 +20,7 @@
 from aiida.manage.configuration.profile import Profile
 from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.collection import CollectionDumper
+from aiida.tools.dumping.config import ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.utils import filter_by_last_dump_time
@@ -27,73 +29,67 @@
 
 
 class ProfileDumper:
+    """Class to handle dumping of the data of an AiiDA profile."""
+
     def __init__(
         self,
         profile: str | Profile | None = None,
+        profile_dump_config: ProfileDumpConfig | None = None,
         base_dumper: BaseDumper | None = None,
         process_dumper: ProcessDumper | None = None,
         dump_logger: DumpLogger | None = None,
-        organize_by_groups: bool = True,
-        deduplicate: bool = True,
-        groups: list[str | orm.Group] | None = None,
-        dump_processes: bool = True,
+        # deduplicate: bool = True,
+        groups: Sequence[str | orm.Group] | None = None,
     ):
-        self.organize_by_groups = organize_by_groups
-        self.deduplicate = deduplicate
-        self.dump_processes = dump_processes
+        """Initialize the ProfileDumper.
+
+        :param profile: The selected profile to dump.
+        :param base_dumper: Base dumper instance or None (gets instantiated).
+        :param process_dumper: Process dumper instance or None (gets instantiated).
+        :param dump_logger: Logger for the dumping (gets instantiated).
+        :param organize_by_groups: Organize dumped data by groups.
+        :param groups: Dump data only for selected groups.
+        :param dump_processes: Should dump process data?
+        """
+
         self.groups = groups
 
         self.base_dumper = base_dumper or BaseDumper()
         self.process_dumper = process_dumper or ProcessDumper()
         self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dumper.dump_parent_path)
 
-        # Load the profile
-        if isinstance(profile, str):
-            profile = load_profile(profile)
+        self.profile_dump_config = profile_dump_config or ProfileDumpConfig()
 
-        if profile is None:
-            manager = get_manager()
-            profile = manager.get_profile()
-
-        assert profile is not None
+        if not isinstance(profile, Profile):
+            profile = load_profile(profile=profile, allow_switch=True)
         self.profile = profile
 
-    def dump(self):
-        # No groups selected, dump data which is not part of any group
-        # If groups selected, however, this data should not also be dumped automatically
-        if not self.groups:
-            self._dump_processes_not_in_any_group()
-
-            # Still, even without selecting groups, by default, all profile data should be dumped
-            # Thus, we obtain all groups in the profile here
-            profile_groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
-            self._dump_processes_per_group(groups=profile_groups)
-
-        else:
-            self._dump_processes_per_group(groups=self.groups)
-
     def _dump_processes_not_in_any_group(self):
-        # === Dump the data that is not associated with any group ===
+        """Dump the profile's process data not contained in any group."""
 
-        # `dump_parent_path` is set in the `post_init` method of the `BaseDumper` dataclass
+        # `dump_parent_path` set to CWD in the `post_init` method of the `BaseDumper` dataclass if not given
         assert self.base_dumper.dump_parent_path is not None
-        if self.organize_by_groups:
+        if self.profile_dump_config.organize_by_groups:
             output_path = self.base_dumper.dump_parent_path / 'no-group'
         else:
             output_path = self.base_dumper.dump_parent_path
 
-        no_group_nodes = self._get_no_group_nodes()
+        no_group_nodes = self._get_no_group_processes()
 
         no_group_dumper = CollectionDumper(
+            collection=no_group_nodes,
+            profile_dump_config=self.profile_dump_config,
             base_dumper=self.base_dumper,
             process_dumper=self.process_dumper,
-            collection=no_group_nodes,
-            deduplicate=self.deduplicate,
+            # deduplicate=self.deduplicate,
             dump_logger=self.dump_logger,
             output_path=output_path,
         )
 
-        if self.dump_processes and no_group_dumper.should_dump_processes():
+        # Add additional check here to only issue the message when there are actual processes to dump for a group
+        # This might not be the case for, e.g., pseudopotential groups, or if there are no new processes since the
+        # last dumping
+        if self.dump_processes and not no_group_dumper.processes_to_dump.is_empty:
             logger.report(f'Dumping processes not in any group for profile `{self.profile.name}`...')
 
             no_group_dumper.dump()
@@ -104,59 +100,84 @@ def _dump_processes_per_group(self, groups):
         assert self.base_dumper.dump_parent_path is not None
 
         for group in groups:
-            if self.organize_by_groups:
+            if self.profile_dump_config.organize_by_groups:
                 output_path = self.base_dumper.dump_parent_path / f'group-{group.label}'
             else:
                 output_path = self.base_dumper.dump_parent_path
 
             group_dumper = CollectionDumper(
                 base_dumper=self.base_dumper,
+                profile_dump_config=self.profile_dump_config,
                 process_dumper=self.process_dumper,
                 dump_logger=self.dump_logger,
                 collection=group,
-                deduplicate=self.deduplicate,
+                # deduplicate=self.deduplicate,
                 output_path=output_path,
             )
 
-            if self.dump_processes and group_dumper.should_dump_processes():
+            # Add additional check here to only issue the message when there are actual processes to dump for a group
+            # This might not be the case for, e.g., pseudopotential groups, or if there are no new processes since the
+            # last dumping
+            # breakpoint()
+            if self.dump_processes and not group_dumper.processes_to_dump.is_empty:
+                # breakpoint()
                 logger.report(f'Dumping processes in group {group.label} for profile `{self.profile.name}`...')
 
                 group_dumper.dump()
 
-    def _get_no_group_nodes(self) -> list[str]:
-        # Get all nodes that are _not_ in any group
+    def _get_no_group_processes(self) -> Sequence[str] | Sequence[int]:
+        """Obtain nodes in the profile that are not part of any group.
+
+        :return: List of UUIDs of selected nodes.
+        """
+
         group_qb = orm.QueryBuilder().append(orm.Group)
-        profile_groups = cast(list[orm.Group], group_qb.all(flat=True))
-        node_qb = orm.QueryBuilder().append(orm.Node, project=['uuid'])
-        profile_nodes = cast(list[str], node_qb.all(flat=True))
+        profile_groups = cast(Sequence[orm.Group], group_qb.all(flat=True))
+        process_qb = orm.QueryBuilder().append(orm.ProcessNode, project=['uuid'])
+        profile_nodes = cast(Sequence[str], process_qb.all(flat=True))
 
-        nodes_in_groups: list[str] = [node.uuid for group in profile_groups for node in group.nodes]
+        nodes_in_groups: Sequence[str] = [node.uuid for group in profile_groups for node in group.nodes]
 
         # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
         # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
         # Get the called descendants of WorkflowNodes within the nodes_in_groups list
-
-        sub_nodes_in_groups: list[str] = [
+        sub_nodes_in_groups: Sequence[str] = [
             node.uuid
             for n in nodes_in_groups
-            if isinstance((workflow_node := orm.load_node(n)), orm.WorkflowNode)
+            # if isinstance((workflow_node := orm.load_node(n)), orm.WorkflowNode)
+            if isinstance((workflow_node := orm.load_node(n)), orm.ProcessNode)
             for node in workflow_node.called_descendants
         ]
 
-        # sub_nodes_in_groups: list[str] = [node.uuid for node in sub_nodes_in_groups]
         nodes_in_groups += sub_nodes_in_groups
 
-        nodes: list[str] = [profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups]
-        nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=self.base_dumper.last_dump_time)
+        process_nodes: Sequence[str | int] = [
+            profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups
+        ]
+        process_nodes = filter_by_last_dump_time(nodes=process_nodes, last_dump_time=self.base_dumper.last_dump_time)
+
+        return process_nodes
+
+    def dump_processes(self):
+        # No groups selected, dump data which is not part of any group
+        # If groups selected, however, this data should not also be dumped automatically
+        if not self.groups:
+            self._dump_processes_not_in_any_group()
+
+            # Still, even without selecting groups, by default, all profile data should be dumped
+            # Thus, we obtain all groups in the profile here
+            profile_groups = orm.QueryBuilder().append(orm.Group).all(flat=True)
+            self._dump_processes_per_group(groups=profile_groups)
 
-        return nodes
+        else:
+            self._dump_processes_per_group(groups=self.groups)
 
     @staticmethod
     def _get_number_of_nodes_to_dump(last_dump_time) -> dict[str, int]:
         result = {}
         for node_type in (orm.CalculationNode, orm.WorkflowNode):
             qb = orm.QueryBuilder().append(node_type, project=['uuid'])
-            nodes = cast(list[str], qb.all(flat=True))
+            nodes = cast(Sequence[str], qb.all(flat=True))
             nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=last_dump_time)
             result[node_type.class_node_type.split('.')[-2] + 's'] = len(nodes)
         return result
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index 0573fede09..d2f216c539 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -13,6 +13,7 @@
 import shutil
 from datetime import datetime
 from pathlib import Path
+from typing import cast
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
@@ -40,6 +41,8 @@ def prepare_dump_path(
         `incremental` are enabled.
     :raises FileNotFoundError: If no `safeguard_file` is found."""
 
+    # TODO: Handle symlinks
+
     if overwrite and incremental:
         msg = 'Both overwrite and incremental set to True. Only specify one.'
         raise ValueError(msg)
@@ -63,9 +66,16 @@ def prepare_dump_path(
             safeguard_exists = (path_to_validate / safeguard_file).is_file()
 
             if safeguard_exists:
+                logger.report(path_to_validate)
+                # breakpoint()
                 msg = '`--overwrite` option selected. Will recreate directory.'
                 logger.report(msg)
-                shutil.rmtree(path_to_validate)
+                try:
+                    shutil.rmtree(path_to_validate)
+                except OSError:
+                    # `shutil.rmtree` fails for symbolic links with
+                    # OSError: Cannot call rmtree on a symbolic link
+                    _delete_dir_recursively(path_to_validate)
 
             else:
                 msg = (
@@ -79,20 +89,64 @@ def prepare_dump_path(
     (path_to_validate / safeguard_file).touch()
 
 
-def sanitize_file_extension(filename: str | Path):
-    if isinstance(filename, Path):
-        filename = str(filename)
-    if filename.endswith('.mpl_pdf'):
-        filename = filename.replace('.mpl_pdf', '.pdf')
-    if filename.endswith('.mpl_png'):
-        filename = filename.replace('.mpl_png', '.png')
-
-    return Path(filename)
-
-
-def filter_by_last_dump_time(nodes: list[str], last_dump_time: datetime | None = None) -> list[str]:
-    if last_dump_time is not None:
-        orm_nodes = [orm.load_node(node) for node in nodes]
-        return [node.uuid for node in orm_nodes if node.mtime > last_dump_time]
-    else:
+def _delete_dir_recursively(path):
+    """
+    Delete folder, sub-folders and files.
+    Implementation taken from: https://stackoverflow.com/a/70285390/9431838
+    """
+    for f in path.glob('**/*'):
+        if f.is_symlink():
+            f.unlink(missing_ok=True)  # missing_ok is added in python 3.8
+        elif f.is_file():
+            f.unlink()
+        elif f.is_dir():
+            try:
+                f.rmdir()  # delete empty sub-folder
+            except OSError:  # sub-folder is not empty
+                _delete_dir_recursively(f)  # recurse the current sub-folder
+            except Exception as exception:  # capture other exception
+                print(f'exception name: {exception.__class__.__name__}')
+                print(f'exception msg: {exception}')
+
+    try:
+        path.rmdir()  # time to delete an empty folder
+    except NotADirectoryError:
+        path.unlink()  # delete folder even if it is a symlink, linux
+    except Exception as exception:
+        print(f'exception name: {exception.__class__.__name__}')
+        print(f'exception msg: {exception}')
+
+
+def _get_filtered_nodes(nodes: list[str | int], last_dump_time: datetime, key: str = 'uuid') -> list[str | int]:
+    """Helper function to get ``orm.Node``s from the DB based on ``id``/``uuid`` and filter by ``mtime``.
+
+    :param nodes: Collection of node PKs or UUIDs
+    :param last_dump_time: Last time nodes were dumped to disk.
+    :param key: Identifier to obtain nodes with, either ``id`` or ``uuid``.
+    :return: List of nodes filtered by ``last_dump_time``.
+    """
+
+    qb = orm.QueryBuilder().append(orm.Node, filters={key: {'in': nodes}})
+    nodes_orm: list[orm.Node] = cast(list[orm.Node], qb.all(flat=True))
+    return [getattr(node, key) for node in nodes_orm if node.mtime > last_dump_time]
+
+
+def filter_by_last_dump_time(nodes: list[str | int], last_dump_time: datetime) -> list[str | int]:
+    """Filter a list of nodes by the last dump time of the corresponding dumper.
+
+    :param nodes: A list of node identifiers, which can be either UUIDs (str) or IDs (int).
+    :param last_dump_time: Only include nodes dumped after this timestamp.
+    :return: A list of node identifiers that have a dump time after the specified last_dump_time.
+    """
+
+    # TODO: Possibly directly use QueryBuilder filter. Though, `nodes` directly accessible from orm.Group.nodes
+
+    if not nodes or last_dump_time is None:
         return nodes
+
+    key = 'uuid' if isinstance(nodes[0], str) else 'id'
+    return _get_filtered_nodes(
+        nodes=nodes,
+        last_dump_time=last_dump_time,
+        key=key,
+    )
diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
index 5ad3ddd01b..6b79dd1195 100644
--- a/tests/tools/dumping/test_collection.py
+++ b/tests/tools/dumping/test_collection.py
@@ -1,4 +1,3 @@
-###########################################################################
 # Copyright (c), The AiiDA team. All rights reserved.                     #
 # This file is part of the AiiDA code.                                    #
 #                                                                         #
@@ -17,7 +16,7 @@
 import pytest
 
 from aiida import orm
-from aiida.tools.dumping import CollectionDumper, collection
+from aiida.tools.dumping import CollectionDumper
 
 from .test_utils import compare_tree
 
@@ -28,7 +27,7 @@
 #     generate_calculation_node_add_class()  # You can also do any additional setup here
 
 
-@pytest.mark.usefixtures('aiida_profile_clean')
+# @pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def setup_no_process_group() -> orm.Group:
     no_process_group, _ = orm.Group.collection.get_or_create(label='no-process')
@@ -38,7 +37,7 @@ def setup_no_process_group() -> orm.Group:
     return no_process_group
 
 
-@pytest.mark.usefixtures('aiida_profile_clean')
+# @pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def setup_add_group(generate_calculation_node_add) -> orm.Group:
     add_group, _ = orm.Group.collection.get_or_create(label='add')
@@ -48,7 +47,7 @@ def setup_add_group(generate_calculation_node_add) -> orm.Group:
     return add_group
 
 
-@pytest.mark.usefixtures('aiida_profile_clean')
+# @pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def setup_multiply_add_group(generate_workchain_multiply_add) -> orm.Group:
     multiply_add_group, _ = orm.Group.collection.get_or_create(label='multiply-add')
@@ -58,7 +57,7 @@ def setup_multiply_add_group(generate_workchain_multiply_add) -> orm.Group:
     return multiply_add_group
 
 
-@pytest.mark.usefixtures('aiida_profile_clean')
+# @pytest.mark.usefixtures('aiida_profile_clean')
 @pytest.fixture()
 def duplicate_group():
     def _duplicate_group(source_group: orm.Group, dest_group_label: str):
@@ -69,67 +68,68 @@ def _duplicate_group(source_group: orm.Group, dest_group_label: str):
     return _duplicate_group
 
 
-@pytest.mark.usefixtures('aiida_profile_clean_class')
+# @pytest.mark.usefixtures('aiida_profile_clean_class')
 class TestCollectionDumper:
-    def test_should_dump_processes(self, setup_no_process_group, setup_add_group):
-        """"""
-        no_process_group: orm.Group = setup_no_process_group
-        add_group: orm.Group = setup_add_group
-
-        collection_dumper = CollectionDumper(collection=no_process_group)
-
-        assert collection_dumper.should_dump_processes() is False
+    # @pytest.mark.usefixtures('aiida_profile_clean')
+    # def test_should_dump_processes(self, setup_no_process_group, setup_add_group):
+    #     """"""
+    #     no_process_group: orm.Group = setup_no_process_group
+    #     add_group: orm.Group = setup_add_group
 
-        collection_dumper = CollectionDumper(collection=add_group)
+    #     collection_dumper = CollectionDumper(collection=no_process_group)
 
-        assert collection_dumper.should_dump_processes() is True
+    #     assert collection_dumper._should_dump_processes() is False
 
+    #     collection_dumper = CollectionDumper(collection=add_group)
 
-    def test_get_nodes_add_group(self, setup_add_group):
+    #     assert collection_dumper._should_dump_processes() is True
 
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_resolve_collection_nodes(self, setup_add_group, generate_calculation_node_add):
         add_group: orm.Group = setup_add_group
+        add_nodes = add_group.nodes
 
-        collection_dumper = CollectionDumper(collection=add_group)
+        add_dumper = CollectionDumper(collection=add_group)
 
-        nodes = collection_dumper._get_nodes()
+        nodes = add_dumper._get_collection_nodes()
         assert len(nodes) == 1
-        # add_group: orm.Group = setup_add_group
-
-        # collection_dumper = CollectionDumper(collection=add_group)
-        # nodes = collection_dumper._get_nodes()
-        # group_node = orm.load_node(nodes[0])
-        # group_node_uuid = nodes[0]
-
-        # assert len(nodes) == 1
-        # assert isinstance(nodes[0], str)
-        # assert isinstance(group_node, orm.CalcJobNode)
-        # assert nodes[0] == group_node_uuid
-
-        # # Now, add another CalcJobNode to the profile
-        # # As not part of the group, should not be returned
-        # cj_node1 = generate_calculation_node_add()
-        # nodes = collection_dumper._get_nodes()
-        # assert len(nodes) == 1
+        assert isinstance(nodes[0], str)
+        assert nodes[0] == add_nodes[0].uuid
+        assert isinstance(orm.load_node(nodes[0]), orm.CalcJobNode)
+
+        # Now, add another CalcJobNode to the profile
+        # As not part of the group, should not be returned
+        # Also, last_dump_time is None here by default, so no filtering applied
+        # Still contains the previous node in the returned collection
+        cj_node1 = generate_calculation_node_add()
+        nodes = add_dumper._get_collection_nodes()
+        assert len(nodes) == 1
+        assert isinstance(nodes[0], str)
+        assert nodes[0] == add_nodes[0].uuid
+        assert isinstance(orm.load_node(nodes[0]), orm.CalcJobNode)
 
-        # # Now, add the node to the group, should be captured by get_nodes
-        # add_group.add_nodes([cj_node1])
-        # nodes = collection_dumper._get_nodes()
-        # assert len(nodes) == 2
+        # Now, add the node to the group, should be captured by get_nodes
+        add_group.add_nodes([cj_node1])
+        nodes = add_dumper._get_collection_nodes()
+        assert len(nodes) == 2
+        assert set(nodes) == set([add_nodes[0].uuid, cj_node1.uuid])
 
-        # # Filtering by time should work
-        # collection_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
+        # Filtering by time should work -> Now, only cj_node2 gets returned
+        add_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
 
-        # cj_node2 = generate_calculation_node_add()
-        # add_group.add_nodes([cj_node2])
+        cj_node2 = generate_calculation_node_add()
+        add_group.add_nodes([cj_node2])
 
-        # nodes = collection_dumper._get_nodes()
-        # assert len(nodes) == 1
-        # assert nodes[0] == cj_node2.uuid
+        nodes = add_dumper._get_collection_nodes()
+        assert len(nodes) == 1
+        assert nodes[0] == cj_node2.uuid
 
-        # with pytest.raises(TypeError):
-        #     collection_dumper = CollectionDumper(collection=[1])
-        #     collection_dumper._get_nodes()
+        for invalid_collection in [{'foo': 'bar'}, [1.0, 1.1]]:
+            collection_dumper = CollectionDumper(collection=invalid_collection)
+            with pytest.raises(ValueError):
+                collection_dumper._get_collection_nodes()
 
+    @pytest.mark.usefixtures('aiida_profile_clean')
     def test_get_processes_to_dump(self, setup_add_group, setup_multiply_add_group, duplicate_group):
         add_group: orm.Group = setup_add_group
         multiply_add_group: orm.Group = setup_multiply_add_group
@@ -154,21 +154,15 @@ def test_get_processes_to_dump(self, setup_add_group, setup_multiply_add_group,
 
         # TODO: Test here also de-duplication with a Workflow with a sub-workflow
 
-    def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_path):
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_dump_calculations_add(self, setup_add_group, tmp_path):
         add_group: orm.Group = setup_add_group
-        multiply_add_group: orm.Group = setup_multiply_add_group
-
-        add_group_path = Path('add_group')
-        multiply_add_group_path = Path('multiply_add_group')
-
-        add_dumper = CollectionDumper(collection=add_group, output_path=tmp_path / add_group_path)
-        multiply_add_dumper = CollectionDumper(
-            collection=multiply_add_group, output_path=tmp_path / multiply_add_group_path
-        )
+        add_group_label = add_group.label
+        add_group_path = tmp_path / add_group_label
 
-        add_processes_to_dump = add_dumper._get_processes_to_dump()
+        add_dumper = CollectionDumper(collection=add_group, output_path=add_group_path)
 
-        add_dumper._dump_calculations(add_processes_to_dump.calculations)
+        add_dumper._dump_calculations(add_dumper._get_processes_to_dump().calculations)
 
         expected_tree = {
             'calculations': {
@@ -182,39 +176,44 @@ def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_
 
         compare_tree(expected=expected_tree, base_path=tmp_path, relative_path=add_group_path)
 
-        multiply_add_processes_to_dump = multiply_add_dumper._get_processes_to_dump()
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_dump_calculations_multiply_add(self, setup_multiply_add_group, tmp_path):
+        multiply_add_group: orm.Group = setup_multiply_add_group
+        multiply_add_group_label = multiply_add_group.label
+        multiply_add_group_path = tmp_path / multiply_add_group_label
 
-        # No calculations to dump when deduplication is enabled
-        multiply_add_dumper._dump_calculations(multiply_add_processes_to_dump.calculations)
-        multiply_add_test_path: Path = multiply_add_group_path / 'calculations'
+        multiply_add_dumper = CollectionDumper(collection=multiply_add_group, output_path=multiply_add_group_path)
 
-        assert not multiply_add_test_path.exists()
+        # No calculations to dump when deduplication is enabled
+        multiply_add_dumper._dump_calculations(multiply_add_dumper._get_processes_to_dump().calculations)
+        assert not (multiply_add_group_path / 'calculations').exists()
 
+        # Now, disable de-duplication -> Should dump calculations
         multiply_add_dumper_no_dedup = CollectionDumper(
             collection=multiply_add_group, output_path=multiply_add_group_path, deduplicate=False
         )
-        multiply_add_processes_to_dump = multiply_add_dumper_no_dedup._get_processes_to_dump()
 
-        #  calculations to dump when deduplication is enabled
-        multiply_add_dumper_no_dedup._dump_calculations(multiply_add_processes_to_dump.calculations)
+        multiply_add_dumper_no_dedup._dump_calculations(
+            multiply_add_dumper_no_dedup._get_processes_to_dump().calculations
+        )
 
         expected_tree_no_dedup = {
             'calculations': {
-                'ArithmeticAddCalculation-15': {
+                'ArithmeticAddCalculation-8': {
                     'inputs': ['_aiidasubmit.sh', 'aiida.in'],
                     'node_inputs': [],
                     'outputs': ['_scheduler-stderr.txt', '_scheduler-stdout.txt', 'aiida.out'],
                 },
-                'multiply-13': {
+                'multiply-6': {
                     'inputs': ['source_file'],
                     'node_inputs': [],
                 },
             }
         }
 
-        compare_tree(expected=expected_tree_no_dedup, base_path=tmp_path, relative_path=multiply_add_group_path)
+        compare_tree(expected=expected_tree_no_dedup, base_path=tmp_path, relative_path=Path(multiply_add_group_label))
 
-        pytest.set_trace()
+        # pytest.set_trace()
 
     # def test_dump_workflows(self):
     #     pass
@@ -261,4 +260,4 @@ def test_dump_calculations(self, setup_add_group, setup_multiply_add_group, tmp_
 
     #     with pytest.raises(TypeError):
     #         collection_dumper = CollectionDumper(collection=[1])
-    #         collection_dumper._get_nodes()
\ No newline at end of file
+    #         collection_dumper._get_nodes()

From c83cc063ab037dc7c58f10620d01eac71439db64 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 10 Feb 2025 17:57:08 +0000
Subject: [PATCH 24/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/cmdline/commands/cmd_profile.py |  11 +-
 src/aiida/common/utils.py                 |   2 -
 src/aiida/repository/repository.py        |   2 +-
 src/aiida/tools/dumping/collection.py     | 166 +++++++++++-----------
 src/aiida/tools/dumping/config.py         |   3 +-
 src/aiida/tools/dumping/process.py        |   5 +-
 src/aiida/tools/dumping/profile.py        |   3 +-
 7 files changed, 92 insertions(+), 100 deletions(-)

diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index e616debd06..ada34c7657 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -288,17 +288,20 @@ def profile_delete(force, delete_data, profiles):
     '--symlink-duplicates/--no-symlink-duplicates',
     default=True,
     show_default=True,
-    help='Symlink data if the same node is contained in multiple groups.')
+    help='Symlink data if the same node is contained in multiple groups.',
+)
 @click.option(
     '--delete-missing/--no-delete-missing',
     default=False,
     show_default=True,
-    help="If a previously dumped node is deleted from AiiDA's DB, also delete the corresponding dump directory.")
+    help="If a previously dumped node is deleted from AiiDA's DB, also delete the corresponding dump directory.",
+)
 @click.option(
     '--extra-calc-dirs/--no-extra-calc-dirs',
     default=False,
     show_default=True,
-    help='If a top-level process calls sub-processes, create a designated directory only for the top-level process.')
+    help='If a top-level process calls sub-processes, create a designated directory only for the top-level process.',
+)
 @options.INCLUDE_INPUTS()
 @options.INCLUDE_OUTPUTS()
 @options.INCLUDE_ATTRIBUTES()
@@ -331,9 +334,9 @@ def profile_mirror(
 
     from aiida.tools.dumping import ProcessDumper, ProfileDumper
     from aiida.tools.dumping.base import BaseDumper
+    from aiida.tools.dumping.config import ProfileDumpConfig
     from aiida.tools.dumping.logger import DumpLogger
     from aiida.tools.dumping.utils import prepare_dump_path
-    from aiida.tools.dumping.config import ProfileDumpConfig
 
     profile = ctx.obj['profile']
 
diff --git a/src/aiida/common/utils.py b/src/aiida/common/utils.py
index 8cd1046dfb..1b2f2b14ce 100644
--- a/src/aiida/common/utils.py
+++ b/src/aiida/common/utils.py
@@ -17,8 +17,6 @@
 from datetime import datetime
 from typing import Any, Dict
 from uuid import UUID
-from aiida.manage import get_manager, load_profile
-from aiida.manage.configuration.profile import Profile
 
 from .lang import classproperty
 
diff --git a/src/aiida/repository/repository.py b/src/aiida/repository/repository.py
index 32351ddeef..a332d4ded3 100644
--- a/src/aiida/repository/repository.py
+++ b/src/aiida/repository/repository.py
@@ -519,7 +519,7 @@ def copy_tree(self, target: Union[str, pathlib.Path], path: Optional[FilePath] =
                 dirpath.mkdir(parents=True, exist_ok=True)
 
                 with self.open(root / filename) as handle:
-                    # TODO: Possibly skip 
+                    # TODO: Possibly skip
                     filepath.write_bytes(handle.read())
 
     # these methods are not actually used in aiida-core, but are here for completeness
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index 56352b4574..dec7f532d9 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -10,7 +10,6 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
 import os
 from datetime import datetime
 from pathlib import Path
@@ -20,11 +19,10 @@
 from aiida.common.exceptions import NotExistent
 from aiida.common.log import AIIDA_LOGGER
 from aiida.tools.dumping.base import BaseDumper
+from aiida.tools.dumping.config import ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLog, DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.utils import filter_by_last_dump_time
-from aiida.tools.dumping.config import ProfileDumpConfig
-from typing import Literal
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -53,6 +51,7 @@ def is_empty(self) -> bool:
 #     extra_calc_dirs: bool = False
 #     organize_by_groups: bool = True
 
+
 class CollectionDumper:
     """Class to handle dumping of a collection of AiiDA ORM entities."""
 
@@ -193,7 +192,6 @@ def _get_processes_to_dump(self) -> ProcessesToDump:
         )
 
     def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> None:
-
         """Dump a collection of calculations.
 
         Deduplication is already handled in the ``get_processes`` method, where PKs/UUIDs are used, rather than AiiDA
@@ -234,9 +232,7 @@ def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> Non
         self.dump_logger.update_calculations(new_calculations=dumped_calculations)
 
     def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
-        """Dump a collection of workflows.
-
-        """
+        """Dump a collection of workflows."""
         workflow_path: Path = self.output_path / 'workflows'
         dumped_workflows: dict[str, DumpLog] = {}
 
@@ -293,82 +289,82 @@ def dump(self) -> None:
                 # breakpoint()
                 self._dump_calculations(calculations=collection_processes.calculations)
 
-# TODO: See, if I can generalize the dump sub-methods
-    # def _dump_processes(
-    #     self,
-    #     # processes: Sequence[orm.CalculationNode | orm.WorkflowNode],
-    #     processes: Sequence[orm.CalculationNode] | Sequence[orm.WorkflowNode],
-    # ) -> None:
-    #     """Dump a collection of calculations or workflows.
-
-    #     :param processes: Sequence of ``orm.CalculationNode``s or ``orm.WorkflowNode``s
-    #     :param process_type: Type of processes, either 'calculations' or 'workflows'
-    #     :return: None
-    #     """
-
-    #     # From, e.g., 'aiida.workflows:core.arithmetic.multiply_add' to 'workflows
-    #     if isinstance(processes[0], orm.CalculationNode):
-    #         process_type_str = 'calculations'
-    #     elif isinstance(processes[0], orm.WorkflowNode):
-    #         process_type_str = 'workflows'
-    #     # else:
-    #         # breakpoint()
-    #     # process_type_str = processes[0].process_type.split(':')[0].split('.')[1]
-    #     process_type_path = self.output_path / process_type_str
-    #     process_type_path.mkdir(exist_ok=True, parents=True)
-
-    #     dumped_processes: dict[str, DumpLog] = {}
-    #     logged_processes: DumpDict = self.dump_logger.get_log()[process_type_str]
-
-    #     # breakpoint()
-
-    #     for process in processes:
-    #         process_dumper = self.process_dumper
-
-    #         process_dump_path = process_type_path / process_dumper._generate_default_dump_path(
-    #             process_node=process, prefix=None
-    #         )
-
-    #         # Target directory already exists, skip this process
-    #         if process_dump_path.exists():
-    #             continue
-
-    #         else:
-    #             # Symlink here, if deduplication enabled and process was already dumped
-    #             # TODO: Possibly check dirs here
-    #             # TODO: Alternatively have method/endpoint to delete one calculation from the dumping
-    #             # TODO: Which would also update the log.
-    #             # Otherwise, one might delete a calculation, maybe because it was wrong, and then it won't be dumped
-    #             # anymore ever.
-    #             if self.deduplicate and process.uuid in logged_processes.keys():
-    #                 try:
-    #                     os.symlink(
-    #                         src=logged_processes[process.uuid].path,
-    #                         dst=process_dump_path,
-    #                     )
-    #                 except:
-    #                     # raise
-    #                     pass
-    #                     # breakpoint()
-    #             else:
-    #                 if process_type_str == 'calculations':
-    #                     process_dumper._dump_calculation(calculation_node=process, output_path=process_dump_path)
-    #                 elif process_type_str == 'workflows':
-    #                     process_dumper._dump_workflow(
-    #                         workflow_node=process,
-    #                         output_path=process_dump_path,
-    #                     )
-
-
-    #             dumped_processes[process.uuid] = DumpLog(
-    #                 path=process_dump_path,
-    #                 time=datetime.now().astimezone(),
-    #             )
-
-    #     # breakpoint()
-
-    #     if process_type_str == 'calculations':
-    #         self.dump_logger.update_calculations(new_calculations=dumped_processes)
-    #     elif process_type_str == 'workflows':
-    #         self.dump_logger.update_workflows(new_workflows=dumped_processes)
 
+# TODO: See, if I can generalize the dump sub-methods
+# def _dump_processes(
+#     self,
+#     # processes: Sequence[orm.CalculationNode | orm.WorkflowNode],
+#     processes: Sequence[orm.CalculationNode] | Sequence[orm.WorkflowNode],
+# ) -> None:
+#     """Dump a collection of calculations or workflows.
+
+#     :param processes: Sequence of ``orm.CalculationNode``s or ``orm.WorkflowNode``s
+#     :param process_type: Type of processes, either 'calculations' or 'workflows'
+#     :return: None
+#     """
+
+#     # From, e.g., 'aiida.workflows:core.arithmetic.multiply_add' to 'workflows
+#     if isinstance(processes[0], orm.CalculationNode):
+#         process_type_str = 'calculations'
+#     elif isinstance(processes[0], orm.WorkflowNode):
+#         process_type_str = 'workflows'
+#     # else:
+#         # breakpoint()
+#     # process_type_str = processes[0].process_type.split(':')[0].split('.')[1]
+#     process_type_path = self.output_path / process_type_str
+#     process_type_path.mkdir(exist_ok=True, parents=True)
+
+#     dumped_processes: dict[str, DumpLog] = {}
+#     logged_processes: DumpDict = self.dump_logger.get_log()[process_type_str]
+
+#     # breakpoint()
+
+#     for process in processes:
+#         process_dumper = self.process_dumper
+
+#         process_dump_path = process_type_path / process_dumper._generate_default_dump_path(
+#             process_node=process, prefix=None
+#         )
+
+#         # Target directory already exists, skip this process
+#         if process_dump_path.exists():
+#             continue
+
+#         else:
+#             # Symlink here, if deduplication enabled and process was already dumped
+#             # TODO: Possibly check dirs here
+#             # TODO: Alternatively have method/endpoint to delete one calculation from the dumping
+#             # TODO: Which would also update the log.
+#             # Otherwise, one might delete a calculation, maybe because it was wrong, and then it won't be dumped
+#             # anymore ever.
+#             if self.deduplicate and process.uuid in logged_processes.keys():
+#                 try:
+#                     os.symlink(
+#                         src=logged_processes[process.uuid].path,
+#                         dst=process_dump_path,
+#                     )
+#                 except:
+#                     # raise
+#                     pass
+#                     # breakpoint()
+#             else:
+#                 if process_type_str == 'calculations':
+#                     process_dumper._dump_calculation(calculation_node=process, output_path=process_dump_path)
+#                 elif process_type_str == 'workflows':
+#                     process_dumper._dump_workflow(
+#                         workflow_node=process,
+#                         output_path=process_dump_path,
+#                     )
+
+
+#             dumped_processes[process.uuid] = DumpLog(
+#                 path=process_dump_path,
+#                 time=datetime.now().astimezone(),
+#             )
+
+#     # breakpoint()
+
+#     if process_type_str == 'calculations':
+#         self.dump_logger.update_calculations(new_calculations=dumped_processes)
+#     elif process_type_str == 'workflows':
+#         self.dump_logger.update_workflows(new_workflows=dumped_processes)
diff --git a/src/aiida/tools/dumping/config.py b/src/aiida/tools/dumping/config.py
index cd8537ce3b..09da896ed5 100644
--- a/src/aiida/tools/dumping/config.py
+++ b/src/aiida/tools/dumping/config.py
@@ -4,8 +4,7 @@
 @dataclass
 class ProfileDumpConfig:
     dump_processes: bool = True
-    symlink_duplicates: bool = True  #
+    symlink_duplicates: bool = True
     delete_missing: bool = False  # profile
     extra_calc_dirs: bool = False  # collection
     organize_by_groups: bool = True  # profile
-
diff --git a/src/aiida/tools/dumping/process.py b/src/aiida/tools/dumping/process.py
index 8a4f962bf2..617c475bf6 100644
--- a/src/aiida/tools/dumping/process.py
+++ b/src/aiida/tools/dumping/process.py
@@ -54,10 +54,7 @@ def __init__(
         flat: bool = False,
         dump_unsealed: bool = False,
     ) -> None:
-        """Initialize the CollectionDumper.
-
-
-        """
+        """Initialize the CollectionDumper."""
         self.include_inputs = include_inputs
         self.include_outputs = include_outputs
         self.include_attributes = include_attributes
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 04374ebe16..bb76d3df6c 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -11,12 +11,11 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
 from typing import Sequence, cast
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
-from aiida.manage import get_manager, load_profile
+from aiida.manage import load_profile
 from aiida.manage.configuration.profile import Profile
 from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.collection import CollectionDumper

From ea28a519003fa1a57434867a0b2edcb9c3202582 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@gmx.net>
Date: Tue, 11 Feb 2025 10:12:33 +0100
Subject: [PATCH 25/27] WIP: Dir deletion on node deletion for mirror.

---
 src/aiida/cmdline/commands/cmd_profile.py |   4 +
 src/aiida/tools/dumping/collection.py     |  38 +++----
 src/aiida/tools/dumping/profile.py        | 101 ++++++++++++++++--
 src/aiida/tools/dumping/utils.py          | 122 +++++++++++++++-------
 4 files changed, 197 insertions(+), 68 deletions(-)

diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index ada34c7657..7ac3872e51 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -418,6 +418,10 @@ def profile_mirror(
     )
 
     profile_dumper.dump_processes()
+    profile_dumper.delete_processes()
+
+    if delete_missing:
+        profile_dumper._get_processes_to_delete()
 
     # Append the current time to the file
     last_dump_time = datetime.now().astimezone()
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index dec7f532d9..468b638fc8 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -22,7 +22,7 @@
 from aiida.tools.dumping.config import ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLog, DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
-from aiida.tools.dumping.utils import filter_by_last_dump_time
+from aiida.tools.dumping.utils import filter_by_last_dump_time, extend_calculations
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -119,7 +119,7 @@ def collection_nodes(self) -> Sequence[str] | Sequence[int]:
 
         :return: List of collection node identifiers.
         """
-        if not self._collection_nodes:
+        if self._collection_nodes is None:
             self._collection_nodes = self._get_collection_nodes()
         return self._collection_nodes
 
@@ -165,26 +165,11 @@ def _get_processes_to_dump(self) -> ProcessesToDump:
         # As the list comprehension fetches each node from the DB individually
         nodes_orm = orm.QueryBuilder().append(orm.Node, filters={'uuid': {'in': self.collection_nodes}}).all(flat=True)
 
-        workflows = [node for node in nodes_orm if isinstance(node, orm.WorkflowNode)]
-        calculations = [node for node in nodes_orm if isinstance(node, orm.CalculationNode)]
+        workflows = [node for node in nodes_orm if isinstance(node, orm.WorkflowNode) and node.caller is None]
+        calculations = [node for node in nodes_orm if isinstance(node, orm.CalculationNode) and node.caller is None]
 
-        # Make sure that only top-level workflows and calculations are dumped
-        workflows = [workflow for workflow in workflows if workflow.caller is None]
-
-        # If sub-calculations that were called by workflows of the group, and which are not
-        # contained in the group.nodes directly are being dumped explicitly
-        # breakpoint()
         if self.profile_dump_config.extra_calc_dirs:
-            called_calculations = []
-            for workflow in workflows:
-                called_calculations += [
-                    node for node in workflow.called_descendants if isinstance(node, orm.CalculationNode)
-                ]
-
-            # Convert to set to avoid duplicates
-            calculations = list(set(calculations + called_calculations))
-        else:
-            calculations = [calculation for calculation in calculations if calculation.caller is None]
+            calculations = extend_calculations(profile_dump_config=self.profile_dump_config, calculations=calculations, workflows=workflows)
 
         return ProcessesToDump(
             calculations=calculations,
@@ -222,12 +207,15 @@ def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> Non
                 )
 
             # This is handled in the get_processes method: `if calculation.caller is None:`
-            calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
+            else:
+                # TODO: Don't update the logger with the UUID of a symlinked calculation as keys must be unique
+                # TODO: Possibly add another `symlink` attribute to `DumpLog` which can hold a list of symlinks
+                calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
 
-            dumped_calculations[calculation.uuid] = DumpLog(
-                path=calculation_dump_path,
-                time=datetime.now().astimezone(),
-            )
+                dumped_calculations[calculation.uuid] = DumpLog(
+                    path=calculation_dump_path,
+                    time=datetime.now().astimezone(),
+                )
 
         self.dump_logger.update_calculations(new_calculations=dumped_calculations)
 
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index bb76d3df6c..9e9fc060ba 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -22,7 +22,7 @@
 from aiida.tools.dumping.config import ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
-from aiida.tools.dumping.utils import filter_by_last_dump_time
+from aiida.tools.dumping.utils import filter_by_last_dump_time, _safe_delete
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
@@ -48,7 +48,6 @@ def __init__(
         :param dump_logger: Logger for the dumping (gets instantiated).
         :param organize_by_groups: Organize dumped data by groups.
         :param groups: Dump data only for selected groups.
-        :param dump_processes: Should dump process data?
         """
 
         self.groups = groups
@@ -63,6 +62,9 @@ def __init__(
             profile = load_profile(profile=profile, allow_switch=True)
         self.profile = profile
 
+        self._processes_to_dump: Sequence[str] | None = None
+        self._processes_to_delete: Sequence[str] | None = None
+
     def _dump_processes_not_in_any_group(self):
         """Dump the profile's process data not contained in any group."""
 
@@ -88,7 +90,7 @@ def _dump_processes_not_in_any_group(self):
         # Add additional check here to only issue the message when there are actual processes to dump for a group
         # This might not be the case for, e.g., pseudopotential groups, or if there are no new processes since the
         # last dumping
-        if self.dump_processes and not no_group_dumper.processes_to_dump.is_empty:
+        if self.profile_dump_config.dump_processes and not no_group_dumper.processes_to_dump.is_empty:
             logger.report(f'Dumping processes not in any group for profile `{self.profile.name}`...')
 
             no_group_dumper.dump()
@@ -118,7 +120,7 @@ def _dump_processes_per_group(self, groups):
             # This might not be the case for, e.g., pseudopotential groups, or if there are no new processes since the
             # last dumping
             # breakpoint()
-            if self.dump_processes and not group_dumper.processes_to_dump.is_empty:
+            if self.profile_dump_config.dump_processes and not group_dumper.processes_to_dump.is_empty:
                 # breakpoint()
                 logger.report(f'Dumping processes in group {group.label} for profile `{self.profile.name}`...')
 
@@ -133,7 +135,7 @@ def _get_no_group_processes(self) -> Sequence[str] | Sequence[int]:
         group_qb = orm.QueryBuilder().append(orm.Group)
         profile_groups = cast(Sequence[orm.Group], group_qb.all(flat=True))
         process_qb = orm.QueryBuilder().append(orm.ProcessNode, project=['uuid'])
-        profile_nodes = cast(Sequence[str], process_qb.all(flat=True))
+        profile_processes = cast(Sequence[str], process_qb.all(flat=True))
 
         nodes_in_groups: Sequence[str] = [node.uuid for group in profile_groups for node in group.nodes]
 
@@ -151,7 +153,7 @@ def _get_no_group_processes(self) -> Sequence[str] | Sequence[int]:
         nodes_in_groups += sub_nodes_in_groups
 
         process_nodes: Sequence[str | int] = [
-            profile_node for profile_node in profile_nodes if profile_node not in nodes_in_groups
+            profile_node for profile_node in profile_processes if profile_node not in nodes_in_groups
         ]
         process_nodes = filter_by_last_dump_time(nodes=process_nodes, last_dump_time=self.base_dumper.last_dump_time)
 
@@ -160,6 +162,8 @@ def _get_no_group_processes(self) -> Sequence[str] | Sequence[int]:
     def dump_processes(self):
         # No groups selected, dump data which is not part of any group
         # If groups selected, however, this data should not also be dumped automatically
+        # TODO: Maybe populate the `processes_to_dump` property here, even though I don't really need it, as I get the
+        # TODO: nodes from the specified collection
         if not self.groups:
             self._dump_processes_not_in_any_group()
 
@@ -173,6 +177,7 @@ def dump_processes(self):
 
     @staticmethod
     def _get_number_of_nodes_to_dump(last_dump_time) -> dict[str, int]:
+        # TODO: Change this method...
         result = {}
         for node_type in (orm.CalculationNode, orm.WorkflowNode):
             qb = orm.QueryBuilder().append(node_type, project=['uuid'])
@@ -180,3 +185,87 @@ def _get_number_of_nodes_to_dump(last_dump_time) -> dict[str, int]:
             nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=last_dump_time)
             result[node_type.class_node_type.split('.')[-2] + 's'] = len(nodes)
         return result
+
+    @property
+    def processes_to_dump(self) -> Sequence[str]:
+        if self._processes_to_dump is None:
+            self._processes_to_dump = self._get_processes_to_dump()
+        return self._processes_to_dump
+
+    def _get_processes_to_dump(self) -> Sequence[str]:
+
+        process_qb = (
+            orm.QueryBuilder()
+            .append(
+                orm.ProcessNode,
+                project=['uuid'],
+                filters={'ctime': {'>': self.base_dumper.last_dump_time}}
+            )
+        )
+
+        profile_processes = cast(Sequence[str], process_qb.all(flat=True))
+
+        return profile_processes
+
+    @property
+    def processes_to_delete(self) -> Sequence[str]:
+        if self._processes_to_delete is None:
+            self._processes_to_delete = self._get_processes_to_delete()
+        return self._processes_to_delete
+
+    def _get_processes_to_delete(self) -> Sequence[str]:
+
+        dump_logger = self.dump_logger
+        log = dump_logger.get_log()
+        dumped_uuids = set(list(log['calculations'].keys()) + list(log['workflows'].keys()))
+        # Cannot use QB here because, when deleted, not in the DB anymore
+        # dumped_qb = orm.QueryBuilder().append(orm.ProcessNode, filters={'uuid': {'in': dumped_uuids}}, project=['uuid'])
+        # dumped_processes: set[str] = set(cast(list[str], dumped_qb.all(flat=True)))
+
+        # TODO: Possibly filter here since last dump time
+        # TODO: But it is highly likely that the last dump command with deletion was run a while ago
+        # TODO: So I cannot filter by last dump time, but should probably take the whole set
+        profile_qb = orm.QueryBuilder().append(orm.ProcessNode)
+        profile_processes = set(cast(Sequence[orm.ProcessNode], profile_qb.all(flat=True)))
+        profile_uuids = set([process.uuid for process in profile_processes if process.caller is None])
+
+        to_delete_uuids = list(dumped_uuids - profile_uuids)
+
+        return to_delete_uuids
+
+    def _delete_missing_process_paths(self, to_delete_uuids):
+
+        log = self.dump_logger.get_log()
+        paths_to_delete = []
+
+        for to_delete_uuid in to_delete_uuids:
+            try:
+                paths_to_delete.append(log['workflows'][to_delete_uuid].path)
+            except KeyError:
+                paths_to_delete.append(log['calculations'][to_delete_uuid].path)
+            except:
+                raise
+
+        for path_to_delete in paths_to_delete:
+            _safe_delete(
+                path_to_validate=path_to_delete,
+                safeguard_file='.aiida_node_metadata.yaml',
+                verbose=False
+            )
+
+        # breakpoint()
+
+    def delete_processes(self):
+
+        to_dump_processes = self.processes_to_dump
+        to_delete_processes = self.processes_to_delete
+
+        print(f'TO_DUMP_PROCESSES: {to_dump_processes}')
+        print(f'TO_DELETE_PROCESSES: {to_delete_processes}')
+
+        breakpoint()
+
+        self._delete_missing_process_paths(to_delete_uuids=to_delete_processes)
+
+        # TODO: Need to also delete entry from the log when I delete the dir
+        # TODO: Add also logging for node/path deletion
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index d2f216c539..9f7505b1bd 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -18,16 +18,17 @@
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
 
-__all__ = ['prepare_dump_path']
+__all__ = ["prepare_dump_path"]
 
-logger = AIIDA_LOGGER.getChild('tools.dumping')
+logger = AIIDA_LOGGER.getChild("tools.dumping")
 
 
 def prepare_dump_path(
     path_to_validate: Path,
     overwrite: bool = False,
     incremental: bool = True,
-    safeguard_file: str = '.aiida_node_metadata.yaml',
+    safeguard_file: str = ".aiida_node_metadata.yaml",
+    verbose: bool = False,
 ) -> None:
     """Create default dumping directory for a given process node and return it as absolute path.
 
@@ -44,11 +45,11 @@ def prepare_dump_path(
     # TODO: Handle symlinks
 
     if overwrite and incremental:
-        msg = 'Both overwrite and incremental set to True. Only specify one.'
+        msg = "Both overwrite and incremental set to True. Only specify one."
         raise ValueError(msg)
 
     if path_to_validate.is_file():
-        msg = f'A file at the given path `{path_to_validate}` already exists.'
+        msg = f"A file at the given path `{path_to_validate}` already exists."
         raise FileExistsError(msg)
 
     # Handle existing directory
@@ -58,43 +59,67 @@ def prepare_dump_path(
         # Case 1: Non-empty directory and overwrite is False
         if not is_empty and not overwrite:
             if not incremental:
-                msg = f'Path `{path_to_validate}` already exists, and neither overwrite nor incremental is enabled.'
+                msg = f"Path `{path_to_validate}` already exists, and neither overwrite nor incremental is enabled."
                 raise FileExistsError(msg)
 
         # Case 2: Non-empty directory, overwrite is True
         if not is_empty and overwrite:
-            safeguard_exists = (path_to_validate / safeguard_file).is_file()
-
-            if safeguard_exists:
-                logger.report(path_to_validate)
-                # breakpoint()
-                msg = '`--overwrite` option selected. Will recreate directory.'
-                logger.report(msg)
-                try:
-                    shutil.rmtree(path_to_validate)
-                except OSError:
-                    # `shutil.rmtree` fails for symbolic links with
-                    # OSError: Cannot call rmtree on a symbolic link
-                    _delete_dir_recursively(path_to_validate)
-
-            else:
-                msg = (
-                    f'Path `{path_to_validate}` exists without safeguard file `{safeguard_file}`. '
-                    f'Not removing because path might be a directory not created by AiiDA.'
-                )
-                raise FileNotFoundError(msg)
-
-    # Create directory if it doesn't exist or was removed
+            _safe_delete(
+                path_to_validate=path_to_validate,
+                safeguard_file=safeguard_file,
+                verbose=verbose,
+            )
+
+    # Re-create directory, as both shutil.rmtree and `_delete_dir_recursively` delete the original dir
     path_to_validate.mkdir(exist_ok=True, parents=True)
     (path_to_validate / safeguard_file).touch()
 
 
+def _safe_delete(
+    path_to_validate: Path,
+    safeguard_file: str = ".aiida_node_metadata.yaml",
+    verbose: bool = False,
+) -> None:
+    """Also deletes the top-level directory itself.
+    """
+
+    if not path_to_validate.exists():
+        return
+
+    is_empty = any(path_to_validate.iterdir())
+    if is_empty:
+        path_to_validate.rmdir()
+        return
+
+    safeguard_exists = (path_to_validate / safeguard_file).is_file()
+
+    if safeguard_exists:
+        if verbose:
+            logger.report(str(path_to_validate))
+            msg = "`--overwrite` option selected. Will recreate directory."
+            logger.report(msg)
+        try:
+            _delete_dir_recursively(path_to_validate)
+            # shutil.rmtree(path_to_validate)
+        except OSError:
+            # `shutil.rmtree` fails for symbolic links with
+            # OSError: Cannot call rmtree on a symbolic link
+            _delete_dir_recursively(path_to_validate)
+
+    else:
+        msg = (
+            f"Path `{path_to_validate}` exists without safeguard file `{safeguard_file}`. "
+            f"Not removing because path might be a directory not created by AiiDA."
+        )
+        raise FileNotFoundError(msg)
+
+
 def _delete_dir_recursively(path):
     """
     Delete folder, sub-folders and files.
     Implementation taken from: https://stackoverflow.com/a/70285390/9431838
     """
-    for f in path.glob('**/*'):
+    for f in path.glob("**/*"):
         if f.is_symlink():
             f.unlink(missing_ok=True)  # missing_ok is added in python 3.8
         elif f.is_file():
@@ -105,19 +130,21 @@ def _delete_dir_recursively(path):
             except OSError:  # sub-folder is not empty
                 _delete_dir_recursively(f)  # recurse the current sub-folder
             except Exception as exception:  # capture other exception
-                print(f'exception name: {exception.__class__.__name__}')
-                print(f'exception msg: {exception}')
+                print(f"exception name: {exception.__class__.__name__}")
+                print(f"exception msg: {exception}")
 
     try:
         path.rmdir()  # time to delete an empty folder
     except NotADirectoryError:
         path.unlink()  # delete folder even if it is a symlink, linux
     except Exception as exception:
-        print(f'exception name: {exception.__class__.__name__}')
-        print(f'exception msg: {exception}')
+        print(f"exception name: {exception.__class__.__name__}")
+        print(f"exception msg: {exception}")
 
 
-def _get_filtered_nodes(nodes: list[str | int], last_dump_time: datetime, key: str = 'uuid') -> list[str | int]:
+def _get_filtered_nodes(
+    nodes: list[str | int], last_dump_time: datetime, key: str = "uuid"
+) -> list[str | int]:
     """Helper function to get ``orm.Node``s from the DB based on ``id``/``uuid`` and filter by ``mtime``.
 
     :param nodes: Collection of node PKs or UUIDs
@@ -126,12 +153,14 @@ def _get_filtered_nodes(nodes: list[str | int], last_dump_time: datetime, key: s
     :return: List of nodes filtered by ``last_dump_time``.
     """
 
-    qb = orm.QueryBuilder().append(orm.Node, filters={key: {'in': nodes}})
+    qb = orm.QueryBuilder().append(orm.Node, filters={key: {"in": nodes}})
     nodes_orm: list[orm.Node] = cast(list[orm.Node], qb.all(flat=True))
     return [getattr(node, key) for node in nodes_orm if node.mtime > last_dump_time]
 
 
-def filter_by_last_dump_time(nodes: list[str | int], last_dump_time: datetime) -> list[str | int]:
+def filter_by_last_dump_time(
+    nodes: list[str | int], last_dump_time: datetime
+) -> list[str | int]:
     """Filter a list of nodes by the last dump time of the corresponding dumper.
 
     :param nodes: A list of node identifiers, which can be either UUIDs (str) or IDs (int).
@@ -144,9 +173,28 @@ def filter_by_last_dump_time(nodes: list[str | int], last_dump_time: datetime) -
     if not nodes or last_dump_time is None:
         return nodes
 
-    key = 'uuid' if isinstance(nodes[0], str) else 'id'
+    key = "uuid" if isinstance(nodes[0], str) else "id"
     return _get_filtered_nodes(
         nodes=nodes,
         last_dump_time=last_dump_time,
         key=key,
     )
+
+
+def extend_calculations(profile_dump_config, calculations, workflows):
+
+    # If sub-calculations that were called by workflows of the group, and which are not
+    # contained in the group.nodes directly are being dumped explicitly
+    # breakpoint()
+    called_calculations = []
+    for workflow in workflows:
+        called_calculations += [
+            node
+            for node in workflow.called_descendants
+            if isinstance(node, orm.CalculationNode)
+        ]
+
+    # Convert to set to avoid duplicates
+    calculations = list(set(calculations + called_calculations))
+
+    return calculations

From 862674a3a09c371e93819e0de960208c7702d144 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 11 Feb 2025 09:12:58 +0000
Subject: [PATCH 26/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/tools/dumping/collection.py |  6 ++-
 src/aiida/tools/dumping/profile.py    | 21 ++---------
 src/aiida/tools/dumping/utils.py      | 53 +++++++++++----------------
 3 files changed, 29 insertions(+), 51 deletions(-)

diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index 468b638fc8..cd2ae90186 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -22,7 +22,7 @@
 from aiida.tools.dumping.config import ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLog, DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
-from aiida.tools.dumping.utils import filter_by_last_dump_time, extend_calculations
+from aiida.tools.dumping.utils import extend_calculations, filter_by_last_dump_time
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -169,7 +169,9 @@ def _get_processes_to_dump(self) -> ProcessesToDump:
         calculations = [node for node in nodes_orm if isinstance(node, orm.CalculationNode) and node.caller is None]
 
         if self.profile_dump_config.extra_calc_dirs:
-            calculations = extend_calculations(profile_dump_config=self.profile_dump_config, calculations=calculations, workflows=workflows)
+            calculations = extend_calculations(
+                profile_dump_config=self.profile_dump_config, calculations=calculations, workflows=workflows
+            )
 
         return ProcessesToDump(
             calculations=calculations,
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index 9e9fc060ba..db03e2b5cf 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -22,7 +22,7 @@
 from aiida.tools.dumping.config import ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
-from aiida.tools.dumping.utils import filter_by_last_dump_time, _safe_delete
+from aiida.tools.dumping.utils import _safe_delete, filter_by_last_dump_time
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
@@ -193,14 +193,8 @@ def processes_to_dump(self) -> Sequence[str]:
         return self._processes_to_dump
 
     def _get_processes_to_dump(self) -> Sequence[str]:
-
-        process_qb = (
-            orm.QueryBuilder()
-            .append(
-                orm.ProcessNode,
-                project=['uuid'],
-                filters={'ctime': {'>': self.base_dumper.last_dump_time}}
-            )
+        process_qb = orm.QueryBuilder().append(
+            orm.ProcessNode, project=['uuid'], filters={'ctime': {'>': self.base_dumper.last_dump_time}}
         )
 
         profile_processes = cast(Sequence[str], process_qb.all(flat=True))
@@ -214,7 +208,6 @@ def processes_to_delete(self) -> Sequence[str]:
         return self._processes_to_delete
 
     def _get_processes_to_delete(self) -> Sequence[str]:
-
         dump_logger = self.dump_logger
         log = dump_logger.get_log()
         dumped_uuids = set(list(log['calculations'].keys()) + list(log['workflows'].keys()))
@@ -234,7 +227,6 @@ def _get_processes_to_delete(self) -> Sequence[str]:
         return to_delete_uuids
 
     def _delete_missing_process_paths(self, to_delete_uuids):
-
         log = self.dump_logger.get_log()
         paths_to_delete = []
 
@@ -247,16 +239,11 @@ def _delete_missing_process_paths(self, to_delete_uuids):
                 raise
 
         for path_to_delete in paths_to_delete:
-            _safe_delete(
-                path_to_validate=path_to_delete,
-                safeguard_file='.aiida_node_metadata.yaml',
-                verbose=False
-            )
+            _safe_delete(path_to_validate=path_to_delete, safeguard_file='.aiida_node_metadata.yaml', verbose=False)
 
         # breakpoint()
 
     def delete_processes(self):
-
         to_dump_processes = self.processes_to_dump
         to_delete_processes = self.processes_to_delete
 
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index 9f7505b1bd..17a075c59f 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -10,7 +10,6 @@
 
 from __future__ import annotations
 
-import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import cast
@@ -18,16 +17,16 @@
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
 
-__all__ = ["prepare_dump_path"]
+__all__ = ['prepare_dump_path']
 
-logger = AIIDA_LOGGER.getChild("tools.dumping")
+logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
 def prepare_dump_path(
     path_to_validate: Path,
     overwrite: bool = False,
     incremental: bool = True,
-    safeguard_file: str = ".aiida_node_metadata.yaml",
+    safeguard_file: str = '.aiida_node_metadata.yaml',
     verbose: bool = False,
 ) -> None:
     """Create default dumping directory for a given process node and return it as absolute path.
@@ -45,11 +44,11 @@ def prepare_dump_path(
     # TODO: Handle symlinks
 
     if overwrite and incremental:
-        msg = "Both overwrite and incremental set to True. Only specify one."
+        msg = 'Both overwrite and incremental set to True. Only specify one.'
         raise ValueError(msg)
 
     if path_to_validate.is_file():
-        msg = f"A file at the given path `{path_to_validate}` already exists."
+        msg = f'A file at the given path `{path_to_validate}` already exists.'
         raise FileExistsError(msg)
 
     # Handle existing directory
@@ -59,7 +58,7 @@ def prepare_dump_path(
         # Case 1: Non-empty directory and overwrite is False
         if not is_empty and not overwrite:
             if not incremental:
-                msg = f"Path `{path_to_validate}` already exists, and neither overwrite nor incremental is enabled."
+                msg = f'Path `{path_to_validate}` already exists, and neither overwrite nor incremental is enabled.'
                 raise FileExistsError(msg)
 
         # Case 2: Non-empty directory, overwrite is True
@@ -77,11 +76,10 @@ def prepare_dump_path(
 
 def _safe_delete(
     path_to_validate: Path,
-    safeguard_file: str = ".aiida_node_metadata.yaml",
+    safeguard_file: str = '.aiida_node_metadata.yaml',
     verbose: bool = False,
 ) -> None:
-    """Also deletes the top-level directory itself.
-    """
+    """Also deletes the top-level directory itself."""
 
     if not path_to_validate.exists():
         return
@@ -96,7 +94,7 @@ def _safe_delete(
     if safeguard_exists:
         if verbose:
             logger.report(str(path_to_validate))
-            msg = "`--overwrite` option selected. Will recreate directory."
+            msg = '`--overwrite` option selected. Will recreate directory.'
             logger.report(msg)
         try:
             _delete_dir_recursively(path_to_validate)
@@ -108,8 +106,8 @@ def _safe_delete(
 
     else:
         msg = (
-            f"Path `{path_to_validate}` exists without safeguard file `{safeguard_file}`. "
-            f"Not removing because path might be a directory not created by AiiDA."
+            f'Path `{path_to_validate}` exists without safeguard file `{safeguard_file}`. '
+            f'Not removing because path might be a directory not created by AiiDA.'
         )
         raise FileNotFoundError(msg)
 
@@ -119,7 +117,7 @@ def _delete_dir_recursively(path):
     Delete folder, sub-folders and files.
     Implementation taken from: https://stackoverflow.com/a/70285390/9431838
     """
-    for f in path.glob("**/*"):
+    for f in path.glob('**/*'):
         if f.is_symlink():
             f.unlink(missing_ok=True)  # missing_ok is added in python 3.8
         elif f.is_file():
@@ -130,21 +128,19 @@ def _delete_dir_recursively(path):
             except OSError:  # sub-folder is not empty
                 _delete_dir_recursively(f)  # recurse the current sub-folder
             except Exception as exception:  # capture other exception
-                print(f"exception name: {exception.__class__.__name__}")
-                print(f"exception msg: {exception}")
+                print(f'exception name: {exception.__class__.__name__}')
+                print(f'exception msg: {exception}')
 
     try:
         path.rmdir()  # time to delete an empty folder
     except NotADirectoryError:
         path.unlink()  # delete folder even if it is a symlink, linux
     except Exception as exception:
-        print(f"exception name: {exception.__class__.__name__}")
-        print(f"exception msg: {exception}")
+        print(f'exception name: {exception.__class__.__name__}')
+        print(f'exception msg: {exception}')
 
 
-def _get_filtered_nodes(
-    nodes: list[str | int], last_dump_time: datetime, key: str = "uuid"
-) -> list[str | int]:
+def _get_filtered_nodes(nodes: list[str | int], last_dump_time: datetime, key: str = 'uuid') -> list[str | int]:
     """Helper function to get ``orm.Node``s from the DB based on ``id``/``uuid`` and filter by ``mtime``.
 
     :param nodes: Collection of node PKs or UUIDs
@@ -153,14 +149,12 @@ def _get_filtered_nodes(
     :return: List of nodes filtered by ``last_dump_time``.
     """
 
-    qb = orm.QueryBuilder().append(orm.Node, filters={key: {"in": nodes}})
+    qb = orm.QueryBuilder().append(orm.Node, filters={key: {'in': nodes}})
     nodes_orm: list[orm.Node] = cast(list[orm.Node], qb.all(flat=True))
     return [getattr(node, key) for node in nodes_orm if node.mtime > last_dump_time]
 
 
-def filter_by_last_dump_time(
-    nodes: list[str | int], last_dump_time: datetime
-) -> list[str | int]:
+def filter_by_last_dump_time(nodes: list[str | int], last_dump_time: datetime) -> list[str | int]:
     """Filter a list of nodes by the last dump time of the corresponding dumper.
 
     :param nodes: A list of node identifiers, which can be either UUIDs (str) or IDs (int).
@@ -173,7 +167,7 @@ def filter_by_last_dump_time(
     if not nodes or last_dump_time is None:
         return nodes
 
-    key = "uuid" if isinstance(nodes[0], str) else "id"
+    key = 'uuid' if isinstance(nodes[0], str) else 'id'
     return _get_filtered_nodes(
         nodes=nodes,
         last_dump_time=last_dump_time,
@@ -182,17 +176,12 @@ def filter_by_last_dump_time(
 
 
 def extend_calculations(profile_dump_config, calculations, workflows):
-
     # If sub-calculations that were called by workflows of the group, and which are not
     # contained in the group.nodes directly are being dumped explicitly
     # breakpoint()
     called_calculations = []
     for workflow in workflows:
-        called_calculations += [
-            node
-            for node in workflow.called_descendants
-            if isinstance(node, orm.CalculationNode)
-        ]
+        called_calculations += [node for node in workflow.called_descendants if isinstance(node, orm.CalculationNode)]
 
     # Convert to set to avoid duplicates
     calculations = list(set(calculations + called_calculations))

From ac0bf513fa563975e1ced44976b7f1ff0b65b914 Mon Sep 17 00:00:00 2001
From: Julian Geiger <julian.geiger@psi.ch>
Date: Tue, 11 Feb 2025 16:26:16 +0100
Subject: [PATCH 27/27] Various improvements

- First version with symlinks and delete-missing works
- Turn BaseDumper into BaseDumpConfig dataclass
- Merge _dump_calculations and _dump_workflows to _dump_processes
- Improve Logger
- Typing
---
 docs/source/reference/command_line.rst    |   2 +-
 src/aiida/cmdline/commands/cmd_process.py |  13 +-
 src/aiida/cmdline/commands/cmd_profile.py |  39 +--
 src/aiida/tools/dumping/__init__.py       |   6 +-
 src/aiida/tools/dumping/base.py           |  28 ---
 src/aiida/tools/dumping/collection.py     | 283 +++++++---------------
 src/aiida/tools/dumping/config.py         |  47 +++-
 src/aiida/tools/dumping/logger.py         | 158 +++++++++---
 src/aiida/tools/dumping/process.py        |  41 ++--
 src/aiida/tools/dumping/profile.py        | 144 +++++------
 src/aiida/tools/dumping/utils.py          |  66 +++--
 tests/tools/dumping/test_collection.py    |  12 +-
 tests/tools/dumping/test_process.py       |  14 +-
 13 files changed, 444 insertions(+), 409 deletions(-)
 delete mode 100644 src/aiida/tools/dumping/base.py

diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst
index 283993fac9..ca81f2e421 100644
--- a/docs/source/reference/command_line.rst
+++ b/docs/source/reference/command_line.rst
@@ -398,7 +398,7 @@ Below is a list with all available subcommands.
       configure-rabbitmq  Configure RabbitMQ for a profile.
       delete              Delete one or more profiles.
       list                Display a list of all available profiles.
-      mirror              Dump all data in an AiiDA profile's storage to disk.
+      mirror              Dump all data in an AiiDA profile's storage to disk in a...
       set-default         Set a profile as the default profile.
       setdefault          (Deprecated) Set a profile as the default profile.
       setup               Set up a new profile.
diff --git a/src/aiida/cmdline/commands/cmd_process.py b/src/aiida/cmdline/commands/cmd_process.py
index 395c74de5e..e7054b8649 100644
--- a/src/aiida/cmdline/commands/cmd_process.py
+++ b/src/aiida/cmdline/commands/cmd_process.py
@@ -606,23 +606,26 @@ def process_dump(
     """
 
     from aiida.tools.archive.exceptions import ExportValidationError
-    from aiida.tools.dumping.base import BaseDumper
+    from aiida.tools.dumping.config import BaseDumpConfig, ProcessDumpConfig
     from aiida.tools.dumping.process import ProcessDumper
 
-    base_dumper = BaseDumper(
+    base_dump_config = BaseDumpConfig(
         dump_parent_path=path,
         overwrite=overwrite,
         incremental=incremental,
     )
 
-    process_dumper = ProcessDumper(
-        base_dumper=base_dumper,
+    process_dump_config = ProcessDumpConfig(
         include_inputs=include_inputs,
         include_outputs=include_outputs,
         include_attributes=include_attributes,
         include_extras=include_extras,
         flat=flat,
-        dump_unsealed=dump_unsealed,
+    )
+
+    process_dumper = ProcessDumper(
+        base_dump_config=base_dump_config,
+        process_dump_config=process_dump_config,
     )
 
     try:
diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py
index 7ac3872e51..503684d50f 100644
--- a/src/aiida/cmdline/commands/cmd_profile.py
+++ b/src/aiida/cmdline/commands/cmd_profile.py
@@ -19,6 +19,7 @@
 from aiida.cmdline.utils import defaults, echo
 from aiida.common import exceptions
 from aiida.manage.configuration import Profile, create_profile, get_config
+from aiida.tools.dumping.config import ProcessDumpConfig
 
 
 @verdi.group('profile')
@@ -333,8 +334,7 @@ def profile_mirror(
     from pathlib import Path
 
     from aiida.tools.dumping import ProcessDumper, ProfileDumper
-    from aiida.tools.dumping.base import BaseDumper
-    from aiida.tools.dumping.config import ProfileDumpConfig
+    from aiida.tools.dumping.config import BaseDumpConfig, ProfileDumpConfig
     from aiida.tools.dumping.logger import DumpLogger
     from aiida.tools.dumping.utils import prepare_dump_path
 
@@ -367,12 +367,12 @@ def profile_mirror(
         last_dump_time = None
 
     if dry_run:
-        node_counts = ProfileDumper._get_number_of_nodes_to_dump(last_dump_time)
+        # node_counts = ProfileDumper._get_number_of_nodes_to_dump(last_dump_time)
         dry_run_message = f'Dry run for mirroring of profile `{profile.name}`. Would dump:'
         echo.echo_report(dry_run_message)
-        for count, node_type in node_counts.items():
-            echo.echo_report(f'{count}: {node_type}')
-        return
+        # for count, node_type in node_counts.items():
+        #     echo.echo_report(f'{count}: {node_type}')
+        # return
 
     if incremental:
         msg = 'Incremental mirroring selected. Will update directory.'
@@ -383,15 +383,14 @@ def profile_mirror(
     except (json.JSONDecodeError, OSError):
         dump_logger = DumpLogger(dump_parent_path=path)
 
-    base_dumper = BaseDumper(
+    base_dump_config = BaseDumpConfig(
         dump_parent_path=path,
         overwrite=overwrite,
         incremental=incremental,
         last_dump_time=last_dump_time,
     )
 
-    process_dumper = ProcessDumper(
-        base_dumper=base_dumper,
+    process_dump_config = ProcessDumpConfig(
         include_inputs=include_inputs,
         include_outputs=include_outputs,
         include_attributes=include_attributes,
@@ -399,7 +398,11 @@ def profile_mirror(
         flat=flat,
     )
 
-    # breakpoint()
+    process_dumper = ProcessDumper(
+        base_dump_config=base_dump_config,
+        process_dump_config=process_dump_config,
+    )
+
     profile_dump_config = ProfileDumpConfig(
         dump_processes=dump_processes,
         symlink_duplicates=symlink_duplicates,
@@ -411,17 +414,23 @@ def profile_mirror(
     profile_dumper = ProfileDumper(
         profile=profile,
         profile_dump_config=profile_dump_config,
-        base_dumper=base_dumper,
+        base_dump_config=base_dump_config,
         process_dumper=process_dumper,
         dump_logger=dump_logger,
         groups=groups,
     )
 
-    profile_dumper.dump_processes()
-    profile_dumper.delete_processes()
+    if len(profile_dumper.processes_to_dump) == 0:
+        echo.echo_success('No processes to dump.')
+    else:
+        profile_dumper.dump_processes()
+        echo.echo_success('Dumped XXX new nodes.')
 
     if delete_missing:
-        profile_dumper._get_processes_to_delete()
+        if len(profile_dumper.processes_to_delete) == 0:
+            echo.echo_success('No processes to delete.')
+        else:
+            profile_dumper.delete_processes()
 
     # Append the current time to the file
     last_dump_time = datetime.now().astimezone()
@@ -431,4 +440,4 @@ def profile_mirror(
     # Write the logging json file to disk
     dump_logger.save_log()
 
-    echo.echo_success(f'Dumped {dump_logger.counter} new nodes.')
+    # echo.echo_success(f'Dumped {dump_logger.counter} new nodes.')
diff --git a/src/aiida/tools/dumping/__init__.py b/src/aiida/tools/dumping/__init__.py
index 6bc7b9c2c0..8f8c86dcdb 100644
--- a/src/aiida/tools/dumping/__init__.py
+++ b/src/aiida/tools/dumping/__init__.py
@@ -8,11 +8,9 @@
 ###########################################################################
 """Modules related to the dumping of AiiDA data."""
 
-from .base import BaseDumper
 from .collection import CollectionDumper
+from .logger import DumpLogger
 from .process import ProcessDumper
 from .profile import ProfileDumper
 
-# from .collection import CollectionDumper
-
-__all__ = ('BaseDumper', 'CollectionDumper', 'ProcessDumper', 'ProfileDumper')  # , 'CollectionDumper')
+__all__ = ('CollectionDumper', 'DumpLogger', 'ProcessDumper', 'ProfileDumper')
diff --git a/src/aiida/tools/dumping/base.py b/src/aiida/tools/dumping/base.py
deleted file mode 100644
index 6bbd5b505e..0000000000
--- a/src/aiida/tools/dumping/base.py
+++ /dev/null
@@ -1,28 +0,0 @@
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved.                     #
-# This file is part of the AiiDA code.                                    #
-#                                                                         #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file        #
-# For further information please visit http://www.aiida.net               #
-###########################################################################
-
-from dataclasses import dataclass
-from datetime import datetime
-from pathlib import Path
-
-
-@dataclass
-class BaseDumper:
-    """Container for shared arguments of all Dumper classes."""
-
-    dump_parent_path: Path | None = None
-    overwrite: bool = False
-    incremental: bool = True
-    check_dirs: bool = False
-    # TODO: Make this a per-class attribute?
-    last_dump_time: datetime | None = None
-
-    def __post_init__(self):
-        if self.dump_parent_path is None:
-            self.dump_parent_path = Path.cwd()
diff --git a/src/aiida/tools/dumping/collection.py b/src/aiida/tools/dumping/collection.py
index cd2ae90186..641c406e9e 100644
--- a/src/aiida/tools/dumping/collection.py
+++ b/src/aiida/tools/dumping/collection.py
@@ -18,22 +18,19 @@
 from aiida import orm
 from aiida.common.exceptions import NotExistent
 from aiida.common.log import AIIDA_LOGGER
-from aiida.tools.dumping.base import BaseDumper
-from aiida.tools.dumping.config import ProfileDumpConfig
+from aiida.tools.dumping.config import BaseDumpConfig, ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLog, DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
-from aiida.tools.dumping.utils import extend_calculations, filter_by_last_dump_time
+from aiida.tools.dumping.utils import NodeDumpMapper, extend_calculations, filter_by_last_dump_time
 
 if TYPE_CHECKING:
-    from collections.abc import Sequence
-
-    from aiida.tools.dumping.logger import DumpDict
+    from collections.abc import Collection, Sequence
 
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
-class ProcessesToDump(NamedTuple):
+class ProcessesDumpContainer(NamedTuple):
     calculations: Sequence[orm.CalculationNode]
     workflows: Sequence[orm.WorkflowNode]
 
@@ -43,23 +40,15 @@ def is_empty(self) -> bool:
         return len(self.calculations) == 0 and len(self.workflows) == 0
 
 
-# @dataclass
-# class CollectionDumpConfig:
-#     dump_processes: bool = True
-#     symlink_duplicates: bool = True
-#     delete_missing: bool = False
-#     extra_calc_dirs: bool = False
-#     organize_by_groups: bool = True
-
-
 class CollectionDumper:
     """Class to handle dumping of a collection of AiiDA ORM entities."""
 
     def __init__(
         self,
-        collection: orm.Group | str | Sequence[str] | Sequence[int],
+        # TODO: Refactor here to different arguments: Group, and collection_nodes
+        collection: orm.Group | str | Collection[str],
         profile_dump_config: ProfileDumpConfig | None = None,
-        base_dumper: BaseDumper | None = None,
+        base_dump_config: BaseDumpConfig | None = None,
         process_dumper: ProcessDumper | None = None,
         dump_logger: DumpLogger | None = None,
         output_path: Path | None = None,
@@ -67,7 +56,7 @@ def __init__(
         """Initialize the CollectionDumper.
 
         :param collection: The collection of AiiDA ORM entities to be dumped, either a group, group label, or list of
-        :param base_dumper: Base dumper instance or None (gets instantiated).
+        :param base_dump_config: Base dumper instance or None (gets instantiated).
         :param process_dumper: Process dumper instance or None (gets instantiated).
         :param dump_logger: Logger for the dumping (gets instantiated).
         :param output_path: The parent output path for dumping the collection nodes.
@@ -76,20 +65,20 @@ def __init__(
 
         self.collection = self._validate_collection(collection)
 
-        self.base_dumper = base_dumper or BaseDumper()
+        self.base_dump_config = base_dump_config or BaseDumpConfig()
         self.process_dumper = process_dumper or ProcessDumper()
-        self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dumper.dump_parent_path)
+        self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dump_config.dump_parent_path)
 
         self.output_path = output_path or Path.cwd()
 
         self.profile_dump_config = profile_dump_config or ProfileDumpConfig()
 
-        self._collection_nodes: Sequence[str] | Sequence[int] | None = None
-        self._processes_to_dump: ProcessesToDump | None = None
+        self._collection_nodes: Collection[str] | None = None
+        self._processes_to_dump: ProcessesDumpContainer | None = None
 
     def _validate_collection(
-        self, collection: orm.Group | str | Sequence[str] | Sequence[int]
-    ) -> orm.Group | Sequence[str] | Sequence[int]:
+        self, collection: orm.Group | str | Collection[str] | Collection[int]
+    ) -> orm.Group | Collection[str]:
         """Validate the given collection identifier.
 
         :param collection: The input collection to validate.
@@ -104,9 +93,23 @@ def _validate_collection(
             except Exception as exc:
                 msg = f'Could not load group: {collection}.'
                 raise NotExistent(msg) from exc
-        if (isinstance(collection, list) and all(isinstance(n, (str, int)) for n in collection)) or isinstance(
-            collection, orm.Group
-        ):
+
+        elif isinstance(collection, orm.Group):
+            return collection
+
+        elif isinstance(collection, list):
+            if all(isinstance(n, str) for n in collection):
+                return collection
+
+            elif all(isinstance(n, int) for n in collection):
+                msg = 'Passing node collections via their PK not yet supported.'
+                raise ValueError(msg)
+
+            else:
+                msg = 'Mixing identifiers or passing other types not supported'
+                raise ValueError(msg)
+
+        elif isinstance(collection, list) and all(isinstance(n, int) for n in collection):
             return collection
 
         else:
@@ -114,7 +117,7 @@ def _validate_collection(
             raise ValueError(msg)
 
     @property
-    def collection_nodes(self) -> Sequence[str] | Sequence[int]:
+    def collection_nodes(self) -> Collection[str]:
         """Return collection nodes.
 
         :return: List of collection node identifiers.
@@ -123,24 +126,26 @@ def collection_nodes(self) -> Sequence[str] | Sequence[int]:
             self._collection_nodes = self._get_collection_nodes()
         return self._collection_nodes
 
-    def _get_collection_nodes(self) -> Sequence[str] | Sequence[int]:
-        """Retrieve the node ``PK``s/``UUID``s from the collection, filtered by the last dump time, if incremental
-        dumping is selected.
+    def _get_collection_nodes(self) -> Collection[str]:
+        """Retrieve the node UUIDs from the collection, filtered by the last dump time, if for incremental dumping.
 
-        :return: List of node identifiers.
+        :return: List of node UUIDs.
         """
         if not self.collection:
             return []
 
-        nodes = [n.uuid for n in self.collection.nodes] if isinstance(self.collection, orm.Group) else self.collection
+        if isinstance(self.collection, orm.Group):
+            nodes: Collection[str] = [n.uuid for n in self.collection.nodes]
+        else:
+            nodes = self.collection
 
-        if self.base_dumper.incremental and self.base_dumper.last_dump_time:
-            nodes = filter_by_last_dump_time(nodes, last_dump_time=self.base_dumper.last_dump_time)
+        if self.base_dump_config.incremental and self.base_dump_config.last_dump_time:
+            nodes = filter_by_last_dump_time(nodes, last_dump_time=self.base_dump_config.last_dump_time)
 
         return nodes
 
     @property
-    def processes_to_dump(self) -> ProcessesToDump:
+    def processes_to_dump(self) -> ProcessesDumpContainer:
         """Get the processes to dump from the collection of nodes.
 
         :return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
@@ -149,7 +154,7 @@ def processes_to_dump(self) -> ProcessesToDump:
             self._processes_to_dump = self._get_processes_to_dump()
         return self._processes_to_dump
 
-    def _get_processes_to_dump(self) -> ProcessesToDump:
+    def _get_processes_to_dump(self) -> ProcessesDumpContainer:
         """Retrieve the processeses from the collection nodes.
 
         If deduplication is selected, this method takes care of only dumping top-level workflows and only dump
@@ -158,8 +163,12 @@ def _get_processes_to_dump(self) -> ProcessesToDump:
         :return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
         """
 
+        # Deduplication is already handled in the ``get_processes`` method, where PKs/UUIDs are used, rather than AiiDA
+        # ORM entities as here. Specifically, calculations that are part of a workflow are not dumpid in their own,
+        # dedicated directory if they are part of a workflow.
+
         if not self.collection_nodes:
-            return ProcessesToDump(calculations=[], workflows=[])
+            return ProcessesDumpContainer(calculations=[], workflows=[])
 
         # Better than: `nodes = [orm.load_node(n) for n in self.collection_nodes]`
         # As the list comprehension fetches each node from the DB individually
@@ -173,91 +182,63 @@ def _get_processes_to_dump(self) -> ProcessesToDump:
                 profile_dump_config=self.profile_dump_config, calculations=calculations, workflows=workflows
             )
 
-        return ProcessesToDump(
+        return ProcessesDumpContainer(
             calculations=calculations,
             workflows=workflows,
         )
 
-    def _dump_calculations(self, calculations: Sequence[orm.CalculationNode]) -> None:
-        """Dump a collection of calculations.
-
-        Deduplication is already handled in the ``get_processes`` method, where PKs/UUIDs are used, rather than AiiDA
-        ORM entities as here. Specifically, calculations that are part of a workflow are not dumpid in their own,
-        dedicated directory if they are part of a workflow.
-
-        :param calculations: Sequence of ``orm.CalculationNode``s
-        :return: None
-        """
+    def _dump_processes(self, processes: Sequence[orm.CalculationNode] | Sequence[orm.WorkflowNode]) -> None:
+        """Dump a collection of processes."""
 
-        calculations_path = self.output_path / 'calculations'
-        dumped_calculations: dict[str, DumpLog] = {}
+        if len(processes) == 0:
+            return
 
-        logged_calculations: DumpDict = self.dump_logger.get_log()['calculations']
+        # TODO: Only allow for "pure" sequences of Calculation- or WorkflowNodes, or also mixed?
+        # TODO: If the latter possibly also have directory creation in the loop
+        sub_path = self.output_path / NodeDumpMapper.get_directory(node=processes[0])
+        sub_path.mkdir(exist_ok=True, parents=True)
 
-        for calculation in calculations:
-            calculation_dumper = self.process_dumper
+        logger_attr = NodeDumpMapper.get_logger_attr(node=processes[0])
+        # ! `getattr` gives a reference to the object, thus I can update the store directly
+        current_store = getattr(self.dump_logger.log, logger_attr)
 
-            calculation_dump_path = calculations_path / calculation_dumper._generate_default_dump_path(
-                process_node=calculation, prefix=None
-            )
+        # breakpoint()
 
-            if self.profile_dump_config.symlink_duplicates and calculation.uuid in logged_calculations.keys():
-                calculation_dump_path.parent.mkdir(exist_ok=True, parents=True)
-                os.symlink(
-                    src=logged_calculations[calculation.uuid].path,
-                    dst=calculation_dump_path,
-                )
+        for process in processes:
+            process_dumper = self.process_dumper
+
+            process_dump_path = sub_path / process_dumper._generate_default_dump_path(process_node=process, prefix=None)
+
+            if self.profile_dump_config.symlink_duplicates and process.uuid in current_store.entries.keys():
+                if process_dump_path.exists():
+                    continue
+                else:
+                    process_dump_path.parent.mkdir(exist_ok=True, parents=True)
+                    # breakpoint()
+                    try:
+                        os.symlink(
+                            src=current_store.entries[process.uuid].path,
+                            dst=process_dump_path,
+                        )
+                        # TODO: If this works here, call `add_link` to the DumpLog to extend an existing DumpLog
+                    except FileExistsError:
+                        pass
 
-            # This is handled in the get_processes method: `if calculation.caller is None:`
             else:
                 # TODO: Don't update the logger with the UUID of a symlinked calculation as keys must be unique
                 # TODO: Possibly add another `symlink` attribute to `DumpLog` which can hold a list of symlinks
-                calculation_dumper._dump_calculation(calculation_node=calculation, output_path=calculation_dump_path)
-
-                dumped_calculations[calculation.uuid] = DumpLog(
-                    path=calculation_dump_path,
-                    time=datetime.now().astimezone(),
-                )
-
-        self.dump_logger.update_calculations(new_calculations=dumped_calculations)
-
-    def _dump_workflows(self, workflows: Sequence[orm.WorkflowNode]) -> None:
-        """Dump a collection of workflows."""
-        workflow_path: Path = self.output_path / 'workflows'
-        dumped_workflows: dict[str, DumpLog] = {}
+                # TODO: Ignore for now, as I would need to retrieve the list of links, append to it, and assign again
 
-        workflow_path.mkdir(exist_ok=True, parents=True)
+                # process_dumper._dump_calculation(calculation_node=process, output_path=process_dump_path)
+                # ! TODO: Add DumpLogger here, such that sub-calculations of workflows are also registered in the
+                # ! dumping, otherwise they end up duplicated, as the registration is done here in the for loop
+                process_dumper.dump(process_node=process, output_path=process_dump_path)
 
-        logged_workflows: DumpDict = self.dump_logger.get_log()['workflows']
-
-        for workflow in workflows:
-            workflow_dumper: ProcessDumper = self.process_dumper
-
-            workflow_dump_path: Path = workflow_path / workflow_dumper._generate_default_dump_path(
-                process_node=workflow, prefix=None
+            current_store.add_entry(
+                uuid=process.uuid,
+                entry=DumpLog(path=process_dump_path, time=datetime.now().astimezone()),
             )
 
-            # Symlink here, if deduplication enabled and workflow was already dumped
-            if self.profile_dump_config.symlink_duplicates and workflow.uuid in logged_workflows.keys():
-                workflow_dump_path.parent.mkdir(exist_ok=True, parents=True)
-
-                os.symlink(
-                    src=logged_workflows[workflow.uuid].path,
-                    dst=workflow_dump_path,
-                )
-            else:
-                workflow_dumper._dump_workflow(
-                    workflow_node=workflow,
-                    output_path=workflow_dump_path,
-                )
-
-            dumped_workflows[workflow.uuid] = DumpLog(
-                path=workflow_dump_path,
-                time=datetime.now().astimezone(),
-            )
-
-        self.dump_logger.update_workflows(new_workflows=dumped_workflows)
-
     def dump(self) -> None:
         """Top-level method that actually performs the dumping of the AiiDA data for the collection.
 
@@ -265,7 +246,7 @@ def dump(self) -> None:
         """
 
         self.output_path.mkdir(exist_ok=True, parents=True)
-        collection_processes: ProcessesToDump = self._get_processes_to_dump()
+        collection_processes: ProcessesDumpContainer = self._get_processes_to_dump()
         # breakpoint()
 
         if not self.processes_to_dump.is_empty:
@@ -273,88 +254,6 @@ def dump(self) -> None:
 
             # First, dump workflows, then calculations
             if len(collection_processes.workflows) > 0:
-                # breakpoint()
-                self._dump_workflows(workflows=collection_processes.workflows)
+                self._dump_processes(processes=collection_processes.workflows)
             if len(collection_processes.calculations) > 0:
-                # breakpoint()
-                self._dump_calculations(calculations=collection_processes.calculations)
-
-
-# TODO: See, if I can generalize the dump sub-methods
-# def _dump_processes(
-#     self,
-#     # processes: Sequence[orm.CalculationNode | orm.WorkflowNode],
-#     processes: Sequence[orm.CalculationNode] | Sequence[orm.WorkflowNode],
-# ) -> None:
-#     """Dump a collection of calculations or workflows.
-
-#     :param processes: Sequence of ``orm.CalculationNode``s or ``orm.WorkflowNode``s
-#     :param process_type: Type of processes, either 'calculations' or 'workflows'
-#     :return: None
-#     """
-
-#     # From, e.g., 'aiida.workflows:core.arithmetic.multiply_add' to 'workflows
-#     if isinstance(processes[0], orm.CalculationNode):
-#         process_type_str = 'calculations'
-#     elif isinstance(processes[0], orm.WorkflowNode):
-#         process_type_str = 'workflows'
-#     # else:
-#         # breakpoint()
-#     # process_type_str = processes[0].process_type.split(':')[0].split('.')[1]
-#     process_type_path = self.output_path / process_type_str
-#     process_type_path.mkdir(exist_ok=True, parents=True)
-
-#     dumped_processes: dict[str, DumpLog] = {}
-#     logged_processes: DumpDict = self.dump_logger.get_log()[process_type_str]
-
-#     # breakpoint()
-
-#     for process in processes:
-#         process_dumper = self.process_dumper
-
-#         process_dump_path = process_type_path / process_dumper._generate_default_dump_path(
-#             process_node=process, prefix=None
-#         )
-
-#         # Target directory already exists, skip this process
-#         if process_dump_path.exists():
-#             continue
-
-#         else:
-#             # Symlink here, if deduplication enabled and process was already dumped
-#             # TODO: Possibly check dirs here
-#             # TODO: Alternatively have method/endpoint to delete one calculation from the dumping
-#             # TODO: Which would also update the log.
-#             # Otherwise, one might delete a calculation, maybe because it was wrong, and then it won't be dumped
-#             # anymore ever.
-#             if self.deduplicate and process.uuid in logged_processes.keys():
-#                 try:
-#                     os.symlink(
-#                         src=logged_processes[process.uuid].path,
-#                         dst=process_dump_path,
-#                     )
-#                 except:
-#                     # raise
-#                     pass
-#                     # breakpoint()
-#             else:
-#                 if process_type_str == 'calculations':
-#                     process_dumper._dump_calculation(calculation_node=process, output_path=process_dump_path)
-#                 elif process_type_str == 'workflows':
-#                     process_dumper._dump_workflow(
-#                         workflow_node=process,
-#                         output_path=process_dump_path,
-#                     )
-
-
-#             dumped_processes[process.uuid] = DumpLog(
-#                 path=process_dump_path,
-#                 time=datetime.now().astimezone(),
-#             )
-
-#     # breakpoint()
-
-#     if process_type_str == 'calculations':
-#         self.dump_logger.update_calculations(new_calculations=dumped_processes)
-#     elif process_type_str == 'workflows':
-#         self.dump_logger.update_workflows(new_workflows=dumped_processes)
+                self._dump_processes(processes=collection_processes.calculations)
diff --git a/src/aiida/tools/dumping/config.py b/src/aiida/tools/dumping/config.py
index 09da896ed5..dab792e2d8 100644
--- a/src/aiida/tools/dumping/config.py
+++ b/src/aiida/tools/dumping/config.py
@@ -1,10 +1,51 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+
 from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+
+
+@dataclass
+class BaseDumpConfig:
+    """Container for shared arguments of all Dumper classes."""
+
+    dump_parent_path: Path | None = None
+    overwrite: bool = False
+    incremental: bool = True
+    check_dirs: bool = False
+    # TODO: Make this a per-class attribute?
+    last_dump_time: datetime | None = None
+
+    def __post_init__(self):
+        if self.dump_parent_path is None:
+            self.dump_parent_path = Path.cwd()
+
+
+@dataclass
+class ProcessDumpConfig:
+    """Arguments for dumping process data."""
+
+    include_inputs: bool = True
+    include_outputs: bool = False
+    include_attributes: bool = True
+    include_extras: bool = True
+    flat: bool = False
+    dump_unsealed: bool = False
 
 
 @dataclass
 class ProfileDumpConfig:
+    """Arguments for dumping profile data."""
+
     dump_processes: bool = True
     symlink_duplicates: bool = True
-    delete_missing: bool = False  # profile
-    extra_calc_dirs: bool = False  # collection
-    organize_by_groups: bool = True  # profile
+    delete_missing: bool = False
+    extra_calc_dirs: bool = False
+    organize_by_groups: bool = True
diff --git a/src/aiida/tools/dumping/logger.py b/src/aiida/tools/dumping/logger.py
index 7489df0bbd..18f7541199 100644
--- a/src/aiida/tools/dumping/logger.py
+++ b/src/aiida/tools/dumping/logger.py
@@ -1,63 +1,140 @@
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved.                     #
+# This file is part of the AiiDA code.                                    #
+#                                                                         #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file        #
+# For further information please visit http://www.aiida.net               #
+###########################################################################
+
 import json
-from dataclasses import dataclass
+from dataclasses import dataclass, field, fields
 from datetime import datetime
 from pathlib import Path
-from typing import TypeAlias
+from typing import Collection
 
 
 @dataclass
 class DumpLog:
     """Represents a single dump log entry."""
 
+    # TODO: Possibly add `node_type` or something similar here
+
     path: Path
     time: datetime
+    links: list[Path] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {'path': str(self.path), 'time': self.time.isoformat(), 'links': [str(link) for link in self.links]}
+
+    @classmethod
+    def from_dict(cls, data: dict) -> 'DumpLog':
+        return cls(
+            path=Path(data['path']),
+            time=datetime.fromisoformat(data['time']),
+            links=[Path(link) for link in data.get('links', [])],
+        )
+
+
+@dataclass
+class DumpLogStore:
+    """A store for DumpLog entries, indexed by UUID."""
+
+    entries: dict[str, DumpLog] = field(default_factory=dict)
+
+    # TODO: If I support keeping track of the symlinks, possibly should implement extending them here
+    def add_entry(self, uuid: str, entry: DumpLog) -> None:
+        """Add a single entry to the container."""
+        self.entries[uuid] = entry
+
+    def add_entries(self, entries: dict[str, DumpLog]) -> None:
+        """Add a collection of entries to the container."""
+        self.entries.update(entries)
+
+    def del_entry(self, uuid: str) -> bool:
+        """Remove a single entry by UUID."""
+        if uuid in self.entries:
+            del self.entries[uuid]
+            return True
+        return False
+
+    def del_entries(self, uuids: Collection[str]) -> None:
+        """Remove a collection of entries by UUID."""
+        for uuid in uuids:
+            if uuid in self.entries:
+                del self.entries[uuid]
 
+    def get_entry(self, uuid: str) -> DumpLog | None:
+        """Retrieve a single entry by UUID."""
+        return self.entries.get(uuid)
 
-DumpDict: TypeAlias = dict[str, DumpLog]
+    def __len__(self) -> int:
+        """Return the number of entries in the container."""
+        return len(self.entries)
+
+    def __iter__(self):
+        """Iterate over all entries."""
+        return iter(self.entries.items())
+
+    def to_dict(self) -> dict:
+        return {uuid: entry.to_dict() for uuid, entry in self.entries.items()}
+
+    @classmethod
+    def from_dict(cls, data: dict) -> 'DumpLogStore':
+        store = cls()
+        store.entries = {uuid: DumpLog.from_dict(entry) for uuid, entry in data.items()}
+        return store
+
+
+@dataclass
+class DumpLogStoreCollection:
+    """Represents the entire log, with calculations and workflows (will be extended with Data)."""
+
+    calculations: DumpLogStore
+    workflows: DumpLogStore
 
 
 class DumpLogger:
     """Main logger class using dataclasses for better structure."""
 
-    DUMP_FILE: str = '.dump_log.json'
+    DUMP_LOG_FILE: str = '.dump_log.json'
+
+    # TODO: Possibly add `get_calculations` and `get_workflows` as convenience methods
 
     def __init__(
         self,
         dump_parent_path: Path | None = None,
-        calculations: DumpDict | None = None,
-        workflows: DumpDict | None = None,
-        counter: int = 0,
+        calculations: DumpLogStore | None = None,
+        workflows: DumpLogStore | None = None,
+        # counter: int = 0,
     ) -> None:
         self.dump_parent_path = dump_parent_path or Path.cwd()
-        self.calculations = calculations or {}
-        self.workflows = workflows or {}
-        self.counter = 0
+        self.calculations = calculations or DumpLogStore()
+        self.workflows = workflows or DumpLogStore()
+        # self.counter = counter
 
     @property
-    def dump_file(self) -> Path:
+    def log_file_path(self) -> Path:
         """Get the path to the dump file."""
-        return self.dump_parent_path / self.DUMP_FILE
+        return self.dump_parent_path / self.DUMP_LOG_FILE
 
-    def update_calculations(self, new_calculations: DumpDict) -> None:
-        """Update the calculations log."""
-        self.calculations.update(new_calculations)
-        self.counter += len(new_calculations)
+    def add_entry(self, store: DumpLogStore, uuid: str, entry: DumpLog) -> None:
+        store.add_entry(uuid, entry)
 
-    def update_workflows(self, new_workflows: DumpDict) -> None:
-        """Update the workflows log."""
-        self.workflows.update(new_workflows)
-        self.counter += len(new_workflows)
+    def del_entry(self, store: DumpLogStore, uuid: str) -> bool:
+        return store.del_entry(uuid)
 
-    def get_log(self) -> dict[str, DumpDict]:
-        """Retrieve the current state of the log."""
-        return {'calculations': self.calculations, 'workflows': self.workflows}
+    @property
+    def log(self) -> DumpLogStoreCollection:
+        """Retrieve the current state of the log as a dataclass."""
+        return DumpLogStoreCollection(calculations=self.calculations, workflows=self.workflows)
 
     def save_log(self) -> None:
         """Save the log to a JSON file."""
 
-        def serialize_logs(logs: DumpDict) -> dict:
+        def serialize_logs(container: DumpLogStore) -> dict:
             serialized = {}
-            for uuid, entry in logs.items():
+            for uuid, entry in container.entries.items():
                 serialized[uuid] = {'path': str(entry.path), 'time': entry.time.isoformat()}
             return serialized
 
@@ -66,26 +143,34 @@ def serialize_logs(logs: DumpDict) -> dict:
             'workflows': serialize_logs(self.workflows),
         }
 
-        with self.dump_file.open('w', encoding='utf-8') as f:
+        with self.log_file_path.open('w', encoding='utf-8') as f:
             json.dump(log_dict, f, indent=4)
 
+    def __enter__(self) -> 'DumpLogger':
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        self.save_log()
+
     @classmethod
     def from_file(cls, dump_parent_path: Path) -> 'DumpLogger':
         """Alternative constructor to load from an existing JSON file."""
         instance = cls(dump_parent_path=dump_parent_path)
 
-        if not instance.dump_file.exists():
+        if not instance.log_file_path.exists():
             return instance
 
         try:
-            with instance.dump_file.open('r', encoding='utf-8') as f:
+            with instance.log_file_path.open('r', encoding='utf-8') as f:
                 data = json.load(f)
 
-            def deserialize_logs(category_data: dict) -> DumpDict:
-                deserialized = {}
+            def deserialize_logs(category_data: dict) -> DumpLogStore:
+                container = DumpLogStore()
                 for uuid, entry in category_data.items():
-                    deserialized[uuid] = DumpLog(path=Path(entry['path']), time=datetime.fromisoformat(entry['time']))
-                return deserialized
+                    container.add_entry(
+                        uuid, DumpLog(path=Path(entry['path']), time=datetime.fromisoformat(entry['time']))
+                    )
+                return container
 
             instance.calculations = deserialize_logs(data['calculations'])
             instance.workflows = deserialize_logs(data['workflows'])
@@ -94,3 +179,12 @@ def deserialize_logs(category_data: dict) -> DumpDict:
             raise
 
         return instance
+
+    def find_store_by_uuid(self, uuid: str) -> DumpLogStore | None:
+        """Find the store that contains the given UUID."""
+        # Iterate over the fields of the DumpLogStoreCollection dataclass for generality
+        for field_ in fields(self.log):
+            store = getattr(self.log, field_.name)
+            if uuid in store.entries:
+                return store
+        return None
diff --git a/src/aiida/tools/dumping/process.py b/src/aiida/tools/dumping/process.py
index 617c475bf6..0e744c4421 100644
--- a/src/aiida/tools/dumping/process.py
+++ b/src/aiida/tools/dumping/process.py
@@ -35,7 +35,7 @@
 from aiida.common.exceptions import NotExistentAttributeError
 from aiida.orm.utils import LinkTriple
 from aiida.tools.archive.exceptions import ExportValidationError
-from aiida.tools.dumping.base import BaseDumper
+from aiida.tools.dumping.config import BaseDumpConfig, ProcessDumpConfig
 from aiida.tools.dumping.utils import prepare_dump_path
 
 logger = logging.getLogger(__name__)
@@ -46,23 +46,20 @@ class ProcessDumper:
 
     def __init__(
         self,
-        base_dumper: BaseDumper | None = None,
-        include_inputs: bool = True,
-        include_outputs: bool = False,
-        include_attributes: bool = True,
-        include_extras: bool = True,
-        flat: bool = False,
-        dump_unsealed: bool = False,
+        base_dump_config: BaseDumpConfig | None = None,
+        process_dump_config: ProcessDumpConfig | None = None,
     ) -> None:
-        """Initialize the CollectionDumper."""
-        self.include_inputs = include_inputs
-        self.include_outputs = include_outputs
-        self.include_attributes = include_attributes
-        self.include_extras = include_extras
-        self.flat = flat
-        self.dump_unsealed = dump_unsealed
+        """Initialize the ProcessDumper."""
 
-        self.base_dumper = base_dumper or BaseDumper()
+        self.base_dump_config = base_dump_config or BaseDumpConfig()
+        self.process_dump_config = process_dump_config or ProcessDumpConfig()
+
+        self.include_inputs = self.process_dump_config.include_inputs
+        self.include_outputs = self.process_dump_config.include_outputs
+        self.include_attributes = self.process_dump_config.include_attributes
+        self.include_extras = self.process_dump_config.include_extras
+        self.flat = self.process_dump_config.flat
+        self.dump_unsealed = self.process_dump_config.dump_unsealed
 
     @staticmethod
     def _generate_default_dump_path(
@@ -224,7 +221,9 @@ def dump(
         output_path = output_path or self._generate_default_dump_path(process_node=process_node)
 
         prepare_dump_path(
-            path_to_validate=output_path, overwrite=self.base_dumper.overwrite, incremental=self.base_dumper.incremental
+            path_to_validate=output_path,
+            overwrite=self.base_dump_config.overwrite,
+            incremental=self.base_dump_config.incremental,
         )
 
         if isinstance(process_node, orm.CalculationNode):
@@ -263,8 +262,8 @@ def _dump_workflow(
 
         prepare_dump_path(
             path_to_validate=output_path,
-            overwrite=self.base_dumper.overwrite,
-            incremental=self.base_dumper.incremental,
+            overwrite=self.base_dump_config.overwrite,
+            incremental=self.base_dump_config.incremental,
         )
         self._dump_node_yaml(process_node=workflow_node, output_path=output_path)
 
@@ -320,7 +319,9 @@ def _dump_calculation(
         """
 
         prepare_dump_path(
-            path_to_validate=output_path, overwrite=self.base_dumper.overwrite, incremental=self.base_dumper.incremental
+            path_to_validate=output_path,
+            overwrite=self.base_dump_config.overwrite,
+            incremental=self.base_dump_config.incremental,
         )
         self._dump_node_yaml(process_node=calculation_node, output_path=output_path)
 
diff --git a/src/aiida/tools/dumping/profile.py b/src/aiida/tools/dumping/profile.py
index db03e2b5cf..e3f5d3a8c7 100644
--- a/src/aiida/tools/dumping/profile.py
+++ b/src/aiida/tools/dumping/profile.py
@@ -11,15 +11,15 @@
 
 from __future__ import annotations
 
-from typing import Sequence, cast
+from collections.abc import Collection
+from typing import cast
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
 from aiida.manage import load_profile
 from aiida.manage.configuration.profile import Profile
-from aiida.tools.dumping.base import BaseDumper
 from aiida.tools.dumping.collection import CollectionDumper
-from aiida.tools.dumping.config import ProfileDumpConfig
+from aiida.tools.dumping.config import BaseDumpConfig, ProfileDumpConfig
 from aiida.tools.dumping.logger import DumpLogger
 from aiida.tools.dumping.process import ProcessDumper
 from aiida.tools.dumping.utils import _safe_delete, filter_by_last_dump_time
@@ -34,16 +34,15 @@ def __init__(
         self,
         profile: str | Profile | None = None,
         profile_dump_config: ProfileDumpConfig | None = None,
-        base_dumper: BaseDumper | None = None,
+        base_dump_config: BaseDumpConfig | None = None,
         process_dumper: ProcessDumper | None = None,
         dump_logger: DumpLogger | None = None,
-        # deduplicate: bool = True,
-        groups: Sequence[str | orm.Group] | None = None,
+        groups: Collection[str] | Collection[orm.Group] | None = None,
     ):
         """Initialize the ProfileDumper.
 
         :param profile: The selected profile to dump.
-        :param base_dumper: Base dumper instance or None (gets instantiated).
+        :param base_dump_config: Base dumper instance or None (gets instantiated).
         :param process_dumper: Process dumper instance or None (gets instantiated).
         :param dump_logger: Logger for the dumping (gets instantiated).
         :param organize_by_groups: Organize dumped data by groups.
@@ -52,9 +51,9 @@ def __init__(
 
         self.groups = groups
 
-        self.base_dumper = base_dumper or BaseDumper()
+        self.base_dump_config = base_dump_config or BaseDumpConfig()
         self.process_dumper = process_dumper or ProcessDumper()
-        self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dumper.dump_parent_path)
+        self.dump_logger = dump_logger or DumpLogger(dump_parent_path=self.base_dump_config.dump_parent_path)
 
         self.profile_dump_config = profile_dump_config or ProfileDumpConfig()
 
@@ -62,27 +61,26 @@ def __init__(
             profile = load_profile(profile=profile, allow_switch=True)
         self.profile = profile
 
-        self._processes_to_dump: Sequence[str] | None = None
-        self._processes_to_delete: Sequence[str] | None = None
+        self._processes_to_dump: Collection[str] | None = None
+        self._processes_to_delete: Collection[str] | None = None
 
     def _dump_processes_not_in_any_group(self):
         """Dump the profile's process data not contained in any group."""
 
         # `dump_parent_path` set to CWD in the `post_init` method of the `BaseDumper` dataclass if not given
-        assert self.base_dumper.dump_parent_path is not None
+        assert self.base_dump_config.dump_parent_path is not None
         if self.profile_dump_config.organize_by_groups:
-            output_path = self.base_dumper.dump_parent_path / 'no-group'
+            output_path = self.base_dump_config.dump_parent_path / 'no-group'
         else:
-            output_path = self.base_dumper.dump_parent_path
+            output_path = self.base_dump_config.dump_parent_path
 
         no_group_nodes = self._get_no_group_processes()
 
         no_group_dumper = CollectionDumper(
             collection=no_group_nodes,
             profile_dump_config=self.profile_dump_config,
-            base_dumper=self.base_dumper,
+            base_dump_config=self.base_dump_config,
             process_dumper=self.process_dumper,
-            # deduplicate=self.deduplicate,
             dump_logger=self.dump_logger,
             output_path=output_path,
         )
@@ -98,21 +96,20 @@ def _dump_processes_not_in_any_group(self):
     def _dump_processes_per_group(self, groups):
         # === Dump data per-group if Groups exist in profile or are selected ===
 
-        assert self.base_dumper.dump_parent_path is not None
+        assert self.base_dump_config.dump_parent_path is not None
 
         for group in groups:
             if self.profile_dump_config.organize_by_groups:
-                output_path = self.base_dumper.dump_parent_path / f'group-{group.label}'
+                output_path = self.base_dump_config.dump_parent_path / f'group-{group.label}'
             else:
-                output_path = self.base_dumper.dump_parent_path
+                output_path = self.base_dump_config.dump_parent_path
 
             group_dumper = CollectionDumper(
-                base_dumper=self.base_dumper,
+                base_dump_config=self.base_dump_config,
                 profile_dump_config=self.profile_dump_config,
                 process_dumper=self.process_dumper,
                 dump_logger=self.dump_logger,
                 collection=group,
-                # deduplicate=self.deduplicate,
                 output_path=output_path,
             )
 
@@ -126,23 +123,23 @@ def _dump_processes_per_group(self, groups):
 
                 group_dumper.dump()
 
-    def _get_no_group_processes(self) -> Sequence[str] | Sequence[int]:
+    def _get_no_group_processes(self) -> Collection[str]:
         """Obtain nodes in the profile that are not part of any group.
 
         :return: List of UUIDs of selected nodes.
         """
 
-        group_qb = orm.QueryBuilder().append(orm.Group)
-        profile_groups = cast(Sequence[orm.Group], group_qb.all(flat=True))
-        process_qb = orm.QueryBuilder().append(orm.ProcessNode, project=['uuid'])
-        profile_processes = cast(Sequence[str], process_qb.all(flat=True))
+        profile_groups = cast(Collection[orm.Group], orm.QueryBuilder().append(orm.Group).all(flat=True))
+        profile_processes = cast(
+            Collection[str], orm.QueryBuilder().append(orm.ProcessNode, project=['uuid']).all(flat=True)
+        )
 
-        nodes_in_groups: Sequence[str] = [node.uuid for group in profile_groups for node in group.nodes]
+        nodes_in_groups: list[str] = [node.uuid for group in profile_groups for node in group.nodes]
 
         # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
         # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
         # Get the called descendants of WorkflowNodes within the nodes_in_groups list
-        sub_nodes_in_groups: Sequence[str] = [
+        sub_nodes_in_groups: list[str] = [
             node.uuid
             for n in nodes_in_groups
             # if isinstance((workflow_node := orm.load_node(n)), orm.WorkflowNode)
@@ -152,10 +149,12 @@ def _get_no_group_processes(self) -> Sequence[str] | Sequence[int]:
 
         nodes_in_groups += sub_nodes_in_groups
 
-        process_nodes: Sequence[str | int] = [
+        process_nodes: Collection[str] = [
             profile_node for profile_node in profile_processes if profile_node not in nodes_in_groups
         ]
-        process_nodes = filter_by_last_dump_time(nodes=process_nodes, last_dump_time=self.base_dumper.last_dump_time)
+        process_nodes = filter_by_last_dump_time(
+            nodes=process_nodes, last_dump_time=self.base_dump_config.last_dump_time
+        )
 
         return process_nodes
 
@@ -175,73 +174,67 @@ def dump_processes(self):
         else:
             self._dump_processes_per_group(groups=self.groups)
 
-    @staticmethod
-    def _get_number_of_nodes_to_dump(last_dump_time) -> dict[str, int]:
-        # TODO: Change this method...
-        result = {}
-        for node_type in (orm.CalculationNode, orm.WorkflowNode):
-            qb = orm.QueryBuilder().append(node_type, project=['uuid'])
-            nodes = cast(Sequence[str], qb.all(flat=True))
-            nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=last_dump_time)
-            result[node_type.class_node_type.split('.')[-2] + 's'] = len(nodes)
-        return result
-
     @property
-    def processes_to_dump(self) -> Sequence[str]:
+    def processes_to_dump(self) -> Collection[str]:
         if self._processes_to_dump is None:
             self._processes_to_dump = self._get_processes_to_dump()
         return self._processes_to_dump
 
-    def _get_processes_to_dump(self) -> Sequence[str]:
-        process_qb = orm.QueryBuilder().append(
-            orm.ProcessNode, project=['uuid'], filters={'ctime': {'>': self.base_dumper.last_dump_time}}
-        )
+    def _get_processes_to_dump(self) -> Collection[str]:
+        if self.base_dump_config.last_dump_time is not None:
+            process_qb = orm.QueryBuilder().append(
+                orm.ProcessNode, project=['uuid'], filters={'ctime': {'>': self.base_dump_config.last_dump_time}}
+            )
+        else:
+            process_qb = orm.QueryBuilder().append(orm.ProcessNode, project=['uuid'])
 
-        profile_processes = cast(Sequence[str], process_qb.all(flat=True))
+        profile_processes = cast(Collection[str], process_qb.all(flat=True))
 
         return profile_processes
 
     @property
-    def processes_to_delete(self) -> Sequence[str]:
+    def processes_to_delete(self) -> Collection[str]:
         if self._processes_to_delete is None:
             self._processes_to_delete = self._get_processes_to_delete()
         return self._processes_to_delete
 
-    def _get_processes_to_delete(self) -> Sequence[str]:
+    def _get_processes_to_delete(self) -> Collection[str]:
         dump_logger = self.dump_logger
-        log = dump_logger.get_log()
-        dumped_uuids = set(list(log['calculations'].keys()) + list(log['workflows'].keys()))
-        # Cannot use QB here because, when deleted, not in the DB anymore
-        # dumped_qb = orm.QueryBuilder().append(orm.ProcessNode, filters={'uuid': {'in': dumped_uuids}}, project=['uuid'])
+        log = dump_logger.log
+
+        # breakpoint()
+        dumped_uuids = set(list(log.calculations.entries.keys()) + list(log.workflows.entries.keys()))
+        # Cannot use QB here because, when node deleted, it's not in the DB anymore
         # dumped_processes: set[str] = set(cast(list[str], dumped_qb.all(flat=True)))
 
         # TODO: Possibly filter here since last dump time
         # TODO: But it is highly likely that the last dump command with deletion was run a while ago
         # TODO: So I cannot filter by last dump time, but should probably take the whole set
         profile_qb = orm.QueryBuilder().append(orm.ProcessNode)
-        profile_processes = set(cast(Sequence[orm.ProcessNode], profile_qb.all(flat=True)))
+        profile_processes = set(cast(Collection[orm.ProcessNode], profile_qb.all(flat=True)))
         profile_uuids = set([process.uuid for process in profile_processes if process.caller is None])
 
         to_delete_uuids = list(dumped_uuids - profile_uuids)
 
         return to_delete_uuids
 
-    def _delete_missing_process_paths(self, to_delete_uuids):
-        log = self.dump_logger.get_log()
-        paths_to_delete = []
+    def _delete_missing_node(self, to_delete_uuid) -> None:
+        # TODO: Possibly make a delete method for the path and the log, and then call that in the loop
 
-        for to_delete_uuid in to_delete_uuids:
-            try:
-                paths_to_delete.append(log['workflows'][to_delete_uuid].path)
-            except KeyError:
-                paths_to_delete.append(log['calculations'][to_delete_uuid].path)
-            except:
-                raise
+        dump_logger = self.dump_logger
+        current_store = dump_logger.find_store_by_uuid(uuid=to_delete_uuid)
+        if not current_store:
+            return
 
-        for path_to_delete in paths_to_delete:
-            _safe_delete(path_to_validate=path_to_delete, safeguard_file='.aiida_node_metadata.yaml', verbose=False)
+        # ! Cannot load the node via its UUID here and use the type to get the correct store, as the Node is deleted
+        # ! from the DB. Should find a better solution
 
-        # breakpoint()
+        try:
+            path_to_delete = current_store.entries[to_delete_uuid].path
+            _safe_delete(path_to_validate=path_to_delete, safeguard_file='.aiida_node_metadata.yaml', verbose=False)
+            current_store.del_entry(uuid=to_delete_uuid)
+        except:
+            raise
 
     def delete_processes(self):
         to_dump_processes = self.processes_to_dump
@@ -250,9 +243,20 @@ def delete_processes(self):
         print(f'TO_DUMP_PROCESSES: {to_dump_processes}')
         print(f'TO_DELETE_PROCESSES: {to_delete_processes}')
 
-        breakpoint()
-
-        self._delete_missing_process_paths(to_delete_uuids=to_delete_processes)
+        # breakpoint()
+        for to_delete_uuid in to_delete_processes:
+            self._delete_missing_node(to_delete_uuid=to_delete_uuid)
 
         # TODO: Need to also delete entry from the log when I delete the dir
         # TODO: Add also logging for node/path deletion
+
+    # @staticmethod
+    # def _get_number_of_nodes_to_dump(last_dump_time) -> dict[str, int]:
+    #     # TODO: Change this method...
+    #     result = {}
+    #     for node_type in (orm.CalculationNode, orm.WorkflowNode):
+    #         qb = orm.QueryBuilder().append(node_type, project=['uuid'])
+    #         nodes = cast(Collection[str], qb.all(flat=True))
+    #         nodes = filter_by_last_dump_time(nodes=nodes, last_dump_time=last_dump_time)
+    #         result[node_type.class_node_type.split('.')[-2] + 's'] = len(nodes)
+    #     return result
diff --git a/src/aiida/tools/dumping/utils.py b/src/aiida/tools/dumping/utils.py
index 17a075c59f..6893f36978 100644
--- a/src/aiida/tools/dumping/utils.py
+++ b/src/aiida/tools/dumping/utils.py
@@ -12,16 +12,50 @@
 
 from datetime import datetime
 from pathlib import Path
-from typing import cast
+from typing import Collection, cast
 
 from aiida import orm
 from aiida.common.log import AIIDA_LOGGER
 
-__all__ = ['prepare_dump_path']
+# TypeAlias not supported in Py 3.9
+# Collection[str] = Collection[str] | Collection[int] | None
+
+__all__ = ('NodeDumpMapper', 'prepare_dump_path')
 
 logger = AIIDA_LOGGER.getChild('tools.dumping')
 
 
+class NodeDumpMapper:
+    calculation_key: str = 'calculations'
+    workflow_key: str = 'workflows'
+
+    @classmethod
+    def get_directory(cls, node: orm.Node) -> Path:
+        # Check node type and map to the corresponding directory
+        if isinstance(node, orm.CalculationNode):
+            # This includes subclasses like orm.CalcFunctionNode and orm.CalcJobNode
+            return Path(cls.calculation_key)
+        elif isinstance(node, orm.WorkflowNode):
+            # This includes subclasses like orm.WorkFunctionNode and orm.WorkChainNode
+            return Path(cls.workflow_key)
+        else:
+            msg = f'Dumping not implemented yet for node type: {type(node)}'
+            raise NotImplementedError(msg)
+
+    @classmethod
+    def get_logger_attr(cls, node: orm.Node) -> str:
+        # Check node type and map to the corresponding directory
+        if isinstance(node, orm.CalculationNode):
+            # This includes subclasses like orm.CalcFunctionNode and orm.CalcJobNode
+            return cls.calculation_key
+        elif isinstance(node, orm.WorkflowNode):
+            # This includes subclasses like orm.WorkFunctionNode and orm.WorkChainNode
+            return cls.workflow_key
+        else:
+            msg = f'Dumping not implemented yet for node type: {type(node)}'
+            raise NotImplementedError(msg)
+
+
 def prepare_dump_path(
     path_to_validate: Path,
     overwrite: bool = False,
@@ -84,7 +118,7 @@ def _safe_delete(
     if not path_to_validate.exists():
         return
 
-    is_empty = any(path_to_validate.iterdir())
+    is_empty = not any(path_to_validate.iterdir())
     if is_empty:
         path_to_validate.rmdir()
         return
@@ -140,21 +174,7 @@ def _delete_dir_recursively(path):
         print(f'exception msg: {exception}')
 
 
-def _get_filtered_nodes(nodes: list[str | int], last_dump_time: datetime, key: str = 'uuid') -> list[str | int]:
-    """Helper function to get ``orm.Node``s from the DB based on ``id``/``uuid`` and filter by ``mtime``.
-
-    :param nodes: Collection of node PKs or UUIDs
-    :param last_dump_time: Last time nodes were dumped to disk.
-    :param key: Identifier to obtain nodes with, either ``id`` or ``uuid``.
-    :return: List of nodes filtered by ``last_dump_time``.
-    """
-
-    qb = orm.QueryBuilder().append(orm.Node, filters={key: {'in': nodes}})
-    nodes_orm: list[orm.Node] = cast(list[orm.Node], qb.all(flat=True))
-    return [getattr(node, key) for node in nodes_orm if node.mtime > last_dump_time]
-
-
-def filter_by_last_dump_time(nodes: list[str | int], last_dump_time: datetime) -> list[str | int]:
+def filter_by_last_dump_time(nodes: Collection[str], last_dump_time: datetime | None = None) -> Collection[str]:
     """Filter a list of nodes by the last dump time of the corresponding dumper.
 
     :param nodes: A list of node identifiers, which can be either UUIDs (str) or IDs (int).
@@ -163,16 +183,12 @@ def filter_by_last_dump_time(nodes: list[str | int], last_dump_time: datetime) -
     """
 
     # TODO: Possibly directly use QueryBuilder filter. Though, `nodes` directly accessible from orm.Group.nodes
-
     if not nodes or last_dump_time is None:
         return nodes
 
-    key = 'uuid' if isinstance(nodes[0], str) else 'id'
-    return _get_filtered_nodes(
-        nodes=nodes,
-        last_dump_time=last_dump_time,
-        key=key,
-    )
+    qb = orm.QueryBuilder().append(orm.Node, filters={'uuid': {'in': nodes}})
+    nodes_orm: list[orm.Node] = cast(list[orm.Node], qb.all(flat=True))
+    return [node.uuid for node in nodes_orm if node.mtime > last_dump_time]
 
 
 def extend_calculations(profile_dump_config, calculations, workflows):
diff --git a/tests/tools/dumping/test_collection.py b/tests/tools/dumping/test_collection.py
index 6b79dd1195..7dc309e788 100644
--- a/tests/tools/dumping/test_collection.py
+++ b/tests/tools/dumping/test_collection.py
@@ -115,7 +115,7 @@ def test_resolve_collection_nodes(self, setup_add_group, generate_calculation_no
         assert set(nodes) == set([add_nodes[0].uuid, cj_node1.uuid])
 
         # Filtering by time should work -> Now, only cj_node2 gets returned
-        add_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
+        add_dumper.base_dump_config.last_dump_time = datetime.now().astimezone()
 
         cj_node2 = generate_calculation_node_add()
         add_group.add_nodes([cj_node2])
@@ -162,7 +162,7 @@ def test_dump_calculations_add(self, setup_add_group, tmp_path):
 
         add_dumper = CollectionDumper(collection=add_group, output_path=add_group_path)
 
-        add_dumper._dump_calculations(add_dumper._get_processes_to_dump().calculations)
+        add_dumper._dump_processes(add_dumper._get_processes_to_dump().calculations)
 
         expected_tree = {
             'calculations': {
@@ -185,7 +185,7 @@ def test_dump_calculations_multiply_add(self, setup_multiply_add_group, tmp_path
         multiply_add_dumper = CollectionDumper(collection=multiply_add_group, output_path=multiply_add_group_path)
 
         # No calculations to dump when deduplication is enabled
-        multiply_add_dumper._dump_calculations(multiply_add_dumper._get_processes_to_dump().calculations)
+        multiply_add_dumper._dump_processes(multiply_add_dumper._get_processes_to_dump().calculations)
         assert not (multiply_add_group_path / 'calculations').exists()
 
         # Now, disable de-duplication -> Should dump calculations
@@ -193,9 +193,7 @@ def test_dump_calculations_multiply_add(self, setup_multiply_add_group, tmp_path
             collection=multiply_add_group, output_path=multiply_add_group_path, deduplicate=False
         )
 
-        multiply_add_dumper_no_dedup._dump_calculations(
-            multiply_add_dumper_no_dedup._get_processes_to_dump().calculations
-        )
+        multiply_add_dumper_no_dedup._dump_processes(multiply_add_dumper_no_dedup._get_processes_to_dump().calculations)
 
         expected_tree_no_dedup = {
             'calculations': {
@@ -249,7 +247,7 @@ def test_dump_calculations_multiply_add(self, setup_multiply_add_group, tmp_path
     #     assert len(nodes) == 2
 
     #     # Filtering by time should work
-    #     collection_dumper.base_dumper.last_dump_time = datetime.now().astimezone()
+    #     collection_dumper.base_dump_config.last_dump_time = datetime.now().astimezone()
 
     #     cj_node2 = generate_calculation_node_add()
     #     add_group.add_nodes([cj_node2])
diff --git a/tests/tools/dumping/test_process.py b/tests/tools/dumping/test_process.py
index 56fb356054..b199ab1ade 100644
--- a/tests/tools/dumping/test_process.py
+++ b/tests/tools/dumping/test_process.py
@@ -15,7 +15,7 @@
 
 import pytest
 
-from aiida.tools.dumping.base import BaseDumper
+from aiida.tools.dumping.config import BaseDumpConfig
 from aiida.tools.dumping.process import ProcessDumper
 
 # Non-AiiDA variables
@@ -212,8 +212,8 @@ def test_dump_calculation_flat(tmp_path, generate_calculation_node_io):
 def test_dump_calculation_overwr_incr(tmp_path, generate_calculation_node_io):
     """Tests the ProcessDumper for the overwrite and incremental option."""
     dump_parent_path = tmp_path / 'cj-dump-test-overwrite'
-    base_dumper = BaseDumper(overwrite=False, incremental=False)
-    process_dumper = ProcessDumper(base_dumper=base_dumper)
+    base_dump_config = BaseDumpConfig(overwrite=False, incremental=False)
+    process_dumper = ProcessDumper(base_dump_config=base_dump_config)
     calculation_node = generate_calculation_node_io()
     calculation_node.seal()
     # Create safeguard file to mock existing dump directory
@@ -223,8 +223,8 @@ def test_dump_calculation_overwr_incr(tmp_path, generate_calculation_node_io):
     with pytest.raises(FileExistsError):
         process_dumper._dump_calculation(calculation_node=calculation_node, output_path=dump_parent_path)
     # With overwrite option true no error is raised and the dumping can run through.
-    base_dumper = BaseDumper(overwrite=True, incremental=False)
-    process_dumper = ProcessDumper(base_dumper=base_dumper)
+    base_dump_config = BaseDumpConfig(overwrite=True, incremental=False)
+    process_dumper = ProcessDumper(base_dump_config=base_dump_config)
     process_dumper._dump_calculation(calculation_node=calculation_node, output_path=dump_parent_path)
     assert (dump_parent_path / inputs_relpath / filename).is_file()
 
@@ -233,8 +233,8 @@ def test_dump_calculation_overwr_incr(tmp_path, generate_calculation_node_io):
     # Incremental also does work
     dump_parent_path.mkdir()
     (dump_parent_path / '.aiida_node_metadata.yaml').touch()
-    base_dumper = BaseDumper(overwrite=False, incremental=True)
-    process_dumper = ProcessDumper(base_dumper=base_dumper)
+    base_dump_config = BaseDumpConfig(overwrite=False, incremental=True)
+    process_dumper = ProcessDumper(base_dump_config=base_dump_config)
     process_dumper._dump_calculation(calculation_node=calculation_node, output_path=dump_parent_path)
     assert (dump_parent_path / inputs_relpath / filename).is_file()