PennLINC · tsalo · Mar 21, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/babs/babs.py b/babs/babs.py
@@ -269,7 +269,7 @@ def babs_bootstrap(
 
         Parameters
         ----------
-        input_ds: class `Input_ds`
+        input_ds: class `InputDatasets`
             Input dataset(s).
         container_name: str
             name of the container, best to include version number.
@@ -616,7 +616,7 @@ def clean_up(self, input_ds):
 
         Parameters
         ----------
-        input_ds: class `Input_ds`
+        input_ds: class `InputDatasets`
             information of input dataset(s)
 
         Notes
@@ -683,7 +683,7 @@ def babs_check_setup(self, input_ds, flag_job_test):
 
         Parameters
         ----------
-        input_ds: class `Input_ds`
+        input_ds: class `InputDatasets`
             information of input dataset(s)
         flag_job_test: bool
             Whether to submit and run a test job.
@@ -781,7 +781,7 @@ def babs_check_setup(self, input_ds, flag_job_test):
             )
 
             # ROADMAP: check if input dataset ID saved in YAML file
-            #           (not saved yet, also need to add to Input_ds class too)
+            #           (not saved yet, also need to add to InputDatasets class too)
             #           = that in `.gitmodules` in cloned ds
             #   However, It's pretty unlikely that someone changes inputs/data on their own
             #       if they're using BABS
@@ -2043,17 +2043,17 @@ def babs_unzip(self, container_config_yaml_file):
         # ====================================================
 
 
-class Input_ds:
+class InputDatasets:
     """This class is for input dataset(s)"""
 
-    def __init__(self, input_cli):
+    def __init__(self, datasets):
         """
-        This is to initialize `Input_ds` class.
+        This is to initialize `InputDatasets` class.
 
         Parameters
         ----------
-        input_cli: nested list of strings
-            see CLI `babs init --input` for more
+        datasets : dict
+            see CLI `babs init --datasets` for more
 
         Attributes
         ----------
@@ -2081,7 +2081,7 @@ def __init__(self, input_cli):
         # create an empty pandas DataFrame:
         self.df = pd.DataFrame(
             None,
-            index=list(range(0, len(input_cli))),
+            index=list(range(len(datasets))),
             columns=[
                 'name',
                 'path_in',
@@ -2095,11 +2095,13 @@ def __init__(self, input_cli):
         # number of dataset(s):
         self.num_ds = self.df.shape[0]  # number of rows in `df`
 
-        # change the `input_cli` from nested list to a pandas dataframe:
-        for i in range(0, self.num_ds):
-            self.df.loc[i, 'name'] = input_cli[i][0]
-            self.df.loc[i, 'path_in'] = input_cli[i][1]
-            self.df.loc[i, 'path_now_rel'] = op.join('inputs/data', self.df.loc[i, 'name'])
+        # change the `datasets` from dictionary to a pandas dataframe:
+        for i_dset, (name, path) in enumerate(datasets.items()):
+            self.df.loc[i_dset, 'name'] = name
+            self.df.loc[i_dset, 'path_in'] = path
+            self.df.loc[i_dset, 'path_now_rel'] = op.join(
+                'inputs/data', self.df.loc[i_dset, 'name']
+            )
 
         # sanity check: input ds names should not be identical:
         if len(set(self.df['name'].tolist())) != self.num_ds:  # length of the set = number of ds
@@ -2515,7 +2517,7 @@ def generate_bash_run_bidsapp(self, bash_path, input_ds, type_session):
         ----------
         bash_path: str
             The path to the bash file to be generated. It should be in the `analysis/code` folder.
-        input_ds: class `Input_ds`
+        input_ds: class `InputDatasets`
             input dataset(s) information
         type_session: str
             multi-ses or single-ses.
@@ -2736,7 +2738,7 @@ def generate_bash_participant_job(self, bash_path, input_ds, type_session, syste
         ----------
         bash_path: str
             The path to the bash file to be generated. It should be in the `analysis/code` folder.
-        input_ds: class `Input_ds`
+        input_ds: class `InputDatasets`
             input dataset(s) information
         type_session: str
             "multi-ses" or "single-ses".

diff --git a/babs/cli.py b/babs/cli.py
@@ -8,12 +8,13 @@
 import pandas as pd
 from filelock import FileLock, Timeout
 
-from babs.babs import BABS, Input_ds, System
+from babs.babs import BABS, InputDatasets, System
 
 # import sys
 # from datalad.interface.base import build_doc
 # from babs.core_functions import babs_init, babs_submit, babs_status
 from babs.utils import (
+    ToDict,
     create_job_status_csv,
     get_datalad_version,
     read_job_status_csv,
@@ -50,18 +51,16 @@ def _parse_init():
         required=True,
     )
     parser.add_argument(
-        '--input',
-        action='append',  # append each `--input` as a list;
-        dest='input_dataset',
-        # will get a nested list: [[<ds_name_1>, <ds_path_1>], [<ds_name_2>, <ds_path_2>]]
-        # ref: https://docs.python.org/3/library/argparse.html
-        nargs=2,  # expect 2 arguments per `--input` from the command line;
-        #            they will be gathered as one list
-        metavar=('input_dataset_name', 'input_dataset_path'),
-        help='Input BIDS DataLad dataset. '
-        'Format: ``--input <name> <path/to/input_datalad_dataset>``. '
-        'Here ``<name>`` is a name of this input dataset. '
-        '``<path/to/input_datalad_dataset>`` is the path to this input dataset.',
+        '--datasets',
+        action=ToDict,
+        metavar='NAME=PATH',
+        type=str,
+        nargs='+',
+        help=(
+            'Input BIDS datasets. '
+            'These must be provided as named folders '
+            '(e.g., `--datasets smriprep=/path/to/smriprep`).'
+        ),
         required=True,
     )
     parser.add_argument(
@@ -162,7 +161,7 @@ def _enter_init(argv=None):
 def babs_init_main(
     where_project: str,
     project_name: str,
-    input_dataset: list,
+    datasets: dict,
     list_sub_file: str,
     container_ds: str,
     container_name: str,
@@ -179,10 +178,9 @@ def babs_init_main(
         absolute path to the directory where the project will be created
     project_name: str
         the babs project name
-    input_dataset: nested list
-        for each sub-list:
-            element 1: name of input datalad dataset (str)
-            element 2: path to the input datalad dataset (str)
+    datasets : dictionary
+        Keys are the names of the input BIDS datasets, and values are the paths to the input BIDS
+        datasets.
     list_sub_file: str or None
         Path to the CSV file that lists the subject (and sessions) to analyze;
         or `None` if CLI's flag isn't specified
@@ -236,7 +234,7 @@ def babs_init_main(
     type_session = validate_type_session(type_session)
 
     # input dataset:
-    input_ds = Input_ds(input_dataset)
+    input_ds = InputDatasets(datasets)
     input_ds.get_initial_inclu_df(list_sub_file, type_session)
 
     # Note: not to perform sanity check on the input dataset re: if it exists
@@ -960,7 +958,7 @@ def babs_unzip_main(
 
 def get_existing_babs_proj(project_root):
     """
-    This is to get `babs_proj` (class `BABS`) and `input_ds` (class `Input_ds`)
+    This is to get `babs_proj` (class `BABS`) and `input_ds` (class `InputDatasets`)
     based on existing yaml file `babs_proj_config.yaml`.
     This should be used by `babs_submit()` and `babs_status`.
 
@@ -974,7 +972,7 @@ def get_existing_babs_proj(project_root):
     -------
     babs_proj: class `BABS`
         information about a BABS project
-    input_ds: class `Input_ds`
+    input_ds: class `InputDatasets`
         information about input dataset(s)
     """
 
@@ -1024,15 +1022,13 @@ def get_existing_babs_proj(project_root):
             ' Something was wrong during `babs init`...'
         )
 
-    input_cli = []  # to be a nested list
+    datasets = {}  # to be a nested list
     for i_ds in range(0, len(input_ds_yaml)):
         ds_index_str = '$INPUT_DATASET_#' + str(i_ds + 1)
-        input_cli.append(
-            [input_ds_yaml[ds_index_str]['name'], input_ds_yaml[ds_index_str]['path_in']]
-        )
+        datasets[input_ds_yaml[ds_index_str]['name']] = input_ds_yaml[ds_index_str]['path_in']
 
-    # Get the class `Input_ds`:
-    input_ds = Input_ds(input_cli)
+    # Get the class `InputDatasets`:
+    input_ds = InputDatasets(datasets)
     # update information based on current babs project:
     # 1. `path_now_abs`:
     input_ds.assign_path_now_abs(babs_proj.analysis_path)

diff --git a/babs/utils.py b/babs/utils.py
@@ -8,8 +8,10 @@
 import subprocess
 import sys
 import warnings  # built-in, no need to install
+from argparse import Action
 from datetime import datetime
 from importlib.metadata import version
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -47,7 +49,7 @@ def check_validity_unzipped_input_dataset(input_ds, type_session):
 
     Parameters
     ----------
-    input_ds: class `Input_ds`
+    input_ds: class `InputDatasets`
         info on input dataset(s)
     type_session: str
         multi-ses or single-ses
@@ -291,7 +293,7 @@ def generate_cmd_singularityRun_from_config(config, input_ds):
     config: dictionary
         attribute `config` in class Container;
         got from `read_container_config_yaml()`
-    input_ds: class `Input_ds`
+    input_ds: class `InputDatasets`
         input dataset(s) information
 
     Returns
@@ -711,7 +713,7 @@ def generate_cmd_unzip_inputds(input_ds, type_session):
 
     Parameters
     ----------
-    input_ds: class `Input_ds`
+    input_ds: class `InputDatasets`
         information about input dataset(s)
     type_session: str
         "multi-ses" or "single-ses"
@@ -987,7 +989,7 @@ def generate_cmd_determine_zipfilename(input_ds, type_session):
 
     Parameters
     ----------
-    input_ds: class Input_ds
+    input_ds: class InputDatasets
         information about input dataset(s)
     type_session: str
         "multi-ses" or "single-ses"
@@ -1084,7 +1086,7 @@ def generate_cmd_datalad_run(container, input_ds, type_session):
     ----------
     container: class `Container`
         Information about the container
-    input_ds: class `Input_ds`
+    input_ds: class `InputDatasets`
         Information about input dataset(s)
     type_session: str
         "multi-ses" or "single-ses"
@@ -1170,7 +1172,7 @@ def get_list_sub_ses(input_ds, config, babs):
 
     Parameters
     ----------
-    input_ds: class `Input_ds`
+    input_ds: class `InputDatasets`
         information about input dataset(s)
     config: config from class `Container`
         container's yaml file that's read into python
@@ -2925,3 +2927,26 @@ def ceildiv(a, b):
       ...is-there-a-ceiling-equivalent-of-operator-in-python
     """
     return -(a // -b)
+
+
+class ToDict(Action):
+    """A custom argparse "store" action to handle a list of key=value pairs."""
+
+    def __call__(self, parser, namespace, values, option_string=None):  # noqa: U100
+        """Call the argument."""
+        d = {}
+        for spec in values:
+            try:
+                name, loc = spec.split('=')
+                loc = Path(loc)
+            except ValueError:
+                loc = Path(spec)
+                name = loc.name
+
+            if name in d:
+                raise parser.error(f'Received duplicate derivative name: {name}')
+            elif name == 'preprocessed':
+                raise parser.error("The 'preprocessed' derivative is reserved for internal use.")
+
+            d[name] = str(loc)
+        setattr(namespace, self.dest, d)
diff --git a/docs/babs-init.rst b/docs/babs-init.rst
@@ -12,9 +12,9 @@ Command-Line Arguments
    :ref: babs.cli._parse_init
    :prog: babs init
 
-   --input : @after
-      Examples: ``--input BIDS /path/to/BIDS_datalad_dataset``;
-      ``--input raw_BIDS https://osf.io/t8urc/``.
+   --datasets : @after
+      Examples: ``--datasets BIDS=/path/to/BIDS_datalad_dataset``;
+      ``--datasets raw_BIDS=https://osf.io/t8urc/``.
 
       ``<name>`` is defined by yourself. Please see section
       :ref:`how-to-define-name-of-input-dataset` below for general guidelines
@@ -38,17 +38,17 @@ Please see document :ref:`preparation` for how to prepare these inputs.
 
 .. _how-to-define-name-of-input-dataset:
 
----------------------------------------------------------------------------
-How to define the input dataset's name ``<name>`` in ``babs init --input``?
----------------------------------------------------------------------------
+------------------------------------------------------------------------------
+How to define the input dataset's name ``<name>`` in ``babs init --datasets``?
+------------------------------------------------------------------------------
 
 **General guideline**: a string you think that's informative.
 Examples are ``BIDS``, ``freesurfer``.
 
 **Specific restrictions**:
 
-1. If you have **more than one** input BIDS dataset (i.e., more than one ``--input``),
-   please make sure the ``<name>`` are different for each dataset;
+1. If you have **more than one** input BIDS dataset (i.e., more than one ``--datasets``),
+   please make sure the ``<name>`` is different for each dataset;
 2. If an input BIDS dataset is a **zipped dataset**, i.e., files are zipped files, such as BIDS data
    derivatives from another BABS project:
 
@@ -138,7 +138,7 @@ an SGE cluster:
     babs init \
         --where_project /path/to/a/folder/holding/BABS/project \
         --project_name my_BABS_project \
-        --input BIDS /path/to/BIDS_datalad_dataset \
+        --datasets BIDS=/path/to/BIDS_datalad_dataset \
         --container_ds /path/to/toybidsapp-container \
         --container_name toybidsapp-0-0-7 \
         --container_config_yaml_file /path/to/container_toybidsapp.yaml \
@@ -154,8 +154,9 @@ Therefore, the 2nd input dataset should be named as 'freesurfer', a keyword in f
 
     babs init \
         ... \
-        --input BIDS /path/to/BIDS_datalad_dataset \
-        --input freesurfer /path/to/freesurfer_results_datalad_dataset \
+        --datasets \
+        BIDS=/path/to/BIDS_datalad_dataset \
+        freesurfer=/path/to/freesurfer_results_datalad_dataset \
         ...
 
 ***************
@@ -169,7 +170,7 @@ What happened: After ``babs init`` prints out a message like this:
 ``Cloning input dataset #x: '/path/to/input_dataset'``, there was an error message that includes this information:
 ``err: 'fatal: repository '/path/to/input_dataset' does not exist'``.
 
-Diagnosis: This means that the specified path to this input dataset (i.e., in ``--input``) was not valid;
+Diagnosis: This means that the specified path to this input dataset (i.e., in ``--datasets``) was not valid;
 there is no DataLad dataset there.
 
 How to solve the problem: Fix this path. To confirm the updated path is valid, you can try cloning