Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change --type-session parameter to --processing-level #233

Merged
merged 14 commits into from
Mar 24, 2025
8 changes: 4 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ jobs:
path: /home/circleci/src/babs
- restore_cache:
keys:
- test-data-v1
- test-data-v2
- run:
name: Download test datasets
command: |
@@ -30,7 +30,7 @@ jobs:
echo "Contents of test_data directory:"
ls -la /home/circleci/test_data/
- save_cache:
key: test-data-v1
key: test-data-v2
paths:
- /home/circleci/test_data

@@ -43,7 +43,7 @@ jobs:
path: /home/circleci/src/babs
- restore_cache:
keys:
- test-data-v1
- test-data-v2
- run:
name: pytest of BABS
no_output_timeout: 1h
@@ -62,7 +62,7 @@ jobs:
path: /home/circleci/src/babs
- restore_cache:
keys:
- test-data-v1
- test-data-v2
- run:
name: pytest of BABS
no_output_timeout: 1h
162 changes: 81 additions & 81 deletions babs/babs.py

Large diffs are not rendered by default.

110 changes: 53 additions & 57 deletions babs/cli.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
get_datalad_version,
read_job_status_csv,
read_yaml,
validate_type_session,
validate_processing_level,
)


@@ -95,24 +95,13 @@ def _parse_init():
' of how to run the BIDS App container',
)
parser.add_argument(
'--type_session',
'--type-session',
'--processing_level',
'--processing-level',
choices=[
'single-ses',
'single_ses',
'single-session',
'single_session',
'multi-ses',
'multi_ses',
'multiple-ses',
'multiple_ses',
'multi-session',
'multi_session',
'multiple-session',
'multiple_session',
'subject',
'session',
],
help="Whether the input dataset is single-session ['single-ses'] "
"or multiple-session ['multi-ses']",
help='Whether jobs should be run on a per-subject or per-session (within subject) basis.',
required=True,
)
parser.add_argument(
@@ -161,7 +150,7 @@ def babs_init_main(
container_ds: str,
container_name: str,
container_config: str,
type_session: str,
processing_level: str,
queue: str,
keep_if_failed: bool,
):
@@ -178,8 +167,8 @@ def babs_init_main(
list_sub_file: str or None
Path to the CSV file that lists the subject (and sessions) to analyze;
or `None` if CLI's flag isn't specified
single-ses data: column of 'sub_id';
multi-ses data: columns of 'sub_id' and 'ses_id'
subject data: column of 'sub_id';
session data: columns of 'sub_id' and 'ses_id'
container_ds: str
path to the container datalad dataset
container_name: str
@@ -188,8 +177,8 @@ def babs_init_main(
container_config: str
Path to a YAML file that contains the configurations
of how to run the BIDS App container
type_session: str
multi-ses or single-ses
processing_level : {'subject', 'session'}
whether processing is done on a subject-wise or session-wise basis
queue: str
sge or slurm
keep_if_failed: bool
@@ -222,12 +211,12 @@ def babs_init_main(
# if no datalad is installed, will raise error
print('DataLad version: ' + get_datalad_version())

# validate `type_session`:
type_session = validate_type_session(type_session)
# validate `processing_level`:
processing_level = validate_processing_level(processing_level)

# input dataset:
input_ds = InputDatasets(datasets)
input_ds.get_initial_inclu_df(list_sub_file, type_session)
input_ds.get_initial_inclu_df(list_sub_file, processing_level)

# Note: not to perform sanity check on the input dataset re: if it exists
# as: 1) robust way is to clone it, which will take longer time;
@@ -239,15 +228,15 @@ def babs_init_main(
# currently solution: add notes in Debugging in `babs init` docs: `babs init.rst`

# Create an instance of babs class:
babs_proj = BABS(project_root, type_session, queue)
babs_proj = BABS(project_root, processing_level, queue)

# Validate system's type name `queue`:
system = System(queue)

# print out key information for visual check:
print('')
print('project_root of this BABS project: ' + babs_proj.project_root)
print('type of data of this BABS project: ' + babs_proj.type_session)
print('processing level of this BABS project: ' + babs_proj.processing_level)
print('job scheduling system of this BABS project: ' + babs_proj.queue)
print('')

@@ -394,7 +383,9 @@ def _parse_submit():
# --count, --job: can only request one of them and none of them are required.
group = parser.add_mutually_exclusive_group(required=False)
group.add_argument(
'--count', type=int, help='Number of jobs to submit. It should be a positive integer.'
'--count',
type=int,
help='Number of jobs to submit. It should be a positive integer.',
)
group.add_argument(
'--all',
@@ -452,7 +443,7 @@ def babs_submit_main(
into `babs_submit()`
any negative int will be treated as submitting all jobs that haven't been submitted.
job: nested list or None
For each sub-list, the length should be 1 (for single-ses) or 2 (for multi-ses)
For each sub-list, the length should be 1 (for subject) or 2 (for session)
"""
# Get class `BABS` based on saved `analysis/code/babs_proj_config.yaml`:
babs_proj, _ = get_existing_babs_proj(project_root)
@@ -480,47 +471,50 @@ def babs_submit_main(
count = -1 # just in case; make sure all specified jobs will be submitted

# sanity check:
if babs_proj.type_session == 'single-ses':
if babs_proj.processing_level == 'subject':
expected_len = 1
elif babs_proj.type_session == 'multi-ses':
elif babs_proj.processing_level == 'session':
expected_len = 2
for i_job in range(0, len(job)):
# expected length in each sub-list:
assert len(job[i_job]) == expected_len, (
'There should be '
+ str(expected_len)
+ ' arguments in `--job`,'
+ ' as input dataset(s) is '
+ babs_proj.type_session
+ ' as processing level is '
+ babs_proj.processing_level
+ '!'
)
# 1st argument:
assert job[i_job][0][0:4] == 'sub-', (
'The 1st argument of `--job`' + " should be 'sub-*'!"
)
if babs_proj.type_session == 'multi-ses':
if babs_proj.processing_level == 'session':
# 2nd argument:
assert job[i_job][1][0:4] == 'ses-', (
'The 2nd argument of `--job`' + " should be 'ses-*'!"
)

# turn into a pandas DataFrame:
if babs_proj.type_session == 'single-ses':
if babs_proj.processing_level == 'subject':
df_job_specified = pd.DataFrame(
None, index=list(range(0, len(job))), columns=['sub_id']
)
elif babs_proj.type_session == 'multi-ses':
elif babs_proj.processing_level == 'session':
df_job_specified = pd.DataFrame(
None, index=list(range(0, len(job))), columns=['sub_id', 'ses_id']
)
for i_job in range(0, len(job)):
df_job_specified.at[i_job, 'sub_id'] = job[i_job][0]
if babs_proj.type_session == 'multi-ses':
if babs_proj.processing_level == 'session':
df_job_specified.at[i_job, 'ses_id'] = job[i_job][1]

# sanity check:
df_job_specified = check_df_job_specific(
df_job_specified, babs_proj.job_status_path_abs, babs_proj.type_session, 'babs submit'
df_job_specified,
babs_proj.job_status_path_abs,
babs_proj.processing_level,
'babs submit',
)
else: # `job` is None:
df_job_specified = None
@@ -637,8 +631,8 @@ def babs_status_main(
resubmit: nested list or None
each sub-list: one of 'failed', 'pending'. Not to include 'stalled' now until tested.
resubmit_job: nested list or None
For each sub-list, the length should be 1 (for single-ses) or 2 (for multi-ses)
container_config: str or None
For each sub-list, the length should be 1 (for subject) or 2 (for session)
container_config : str or None
Path to a YAML file that contains the configurations
of how to run the BIDS App container.
It may include 'alert_log_messages' section
@@ -687,9 +681,9 @@ def babs_status_main(
# If `resubmit-job` is requested:
if resubmit_job is not None:
# sanity check:
if babs_proj.type_session == 'single-ses':
if babs_proj.processing_level == 'subject':
expected_len = 1
elif babs_proj.type_session == 'multi-ses':
elif babs_proj.processing_level == 'session':
expected_len = 2

for i_job in range(0, len(resubmit_job)):
@@ -698,40 +692,42 @@ def babs_status_main(
'There should be '
+ str(expected_len)
+ ' arguments in `--resubmit-job`,'
+ ' as input dataset(s) is '
+ babs_proj.type_session
+ ' as processing level is '
+ babs_proj.processing_level
+ '!'
)
# 1st argument:
assert resubmit_job[i_job][0][0:4] == 'sub-', (
'The 1st argument of `--resubmit-job`' + " should be 'sub-*'!"
)
if babs_proj.type_session == 'multi-ses':
if babs_proj.processing_level == 'session':
# 2nd argument:
assert resubmit_job[i_job][1][0:4] == 'ses-', (
'The 2nd argument of `--resubmit-job`' + " should be 'ses-*'!"
)

# turn into a pandas DataFrame:
if babs_proj.type_session == 'single-ses':
if babs_proj.processing_level == 'subject':
df_resubmit_job_specific = pd.DataFrame(
None, index=list(range(0, len(resubmit_job))), columns=['sub_id']
)
elif babs_proj.type_session == 'multi-ses':
elif babs_proj.processing_level == 'session':
df_resubmit_job_specific = pd.DataFrame(
None, index=list(range(0, len(resubmit_job))), columns=['sub_id', 'ses_id']
None,
index=list(range(0, len(resubmit_job))),
columns=['sub_id', 'ses_id'],
)

for i_job in range(0, len(resubmit_job)):
df_resubmit_job_specific.at[i_job, 'sub_id'] = resubmit_job[i_job][0]
if babs_proj.type_session == 'multi-ses':
if babs_proj.processing_level == 'session':
df_resubmit_job_specific.at[i_job, 'ses_id'] = resubmit_job[i_job][1]

# sanity check:
df_resubmit_job_specific = check_df_job_specific(
df_resubmit_job_specific,
babs_proj.job_status_path_abs,
babs_proj.type_session,
babs_proj.processing_level,
'babs status',
)

@@ -979,7 +975,7 @@ def get_existing_babs_proj(project_root):
babs_proj_config = read_yaml(babs_proj_config_yaml, if_filelock=True)

# make sure the YAML file has necessary sections:
list_sections = ['type_session', 'queue', 'input_ds', 'container']
list_sections = ['processing_level', 'queue', 'input_ds', 'container']
for i in range(0, len(list_sections)):
the_section = list_sections[i]
if the_section not in babs_proj_config:
@@ -988,11 +984,11 @@ def get_existing_babs_proj(project_root):
"in 'analysis/code' folder! Please rerun `babs init` to finish the setup."
)

type_session = babs_proj_config['type_session']
processing_level = babs_proj_config['processing_level']
queue = babs_proj_config['queue']

# Get the class `BABS`:
babs_proj = BABS(project_root, type_session, queue)
babs_proj = BABS(project_root, processing_level, queue)

# update key information including `output_ria_data_dir`:
babs_proj.wtf_key_info(flag_output_ria_only=True)
@@ -1032,7 +1028,7 @@ def get_existing_babs_proj(project_root):
return babs_proj, input_ds


def check_df_job_specific(df, job_status_path_abs, type_session, which_function):
def check_df_job_specific(df, job_status_path_abs, processing_level, which_function):
"""
This is to perform sanity check on the pd.DataFrame `df`
which is used by `babs submit --job` and `babs status --resubmit-job`.
@@ -1044,11 +1040,11 @@ def check_df_job_specific(df, job_status_path_abs, type_session, which_function)
----------
df: pd.DataFrame
i.e., `df_job_specific`
list of sub_id (and ses_id, if multi-ses) that the user requests to submit or resubmit
list of sub_id (and ses_id, if session) that the user requests to submit or resubmit
job_status_path_abs: str
absolute path to the `job_status.csv`
type_session: str
'single-ses' or 'multi-ses'
processing_level : {'subject', 'session'}
whether processing is done on a subject-wise or session-wise basis
which_function: str
'babs status' or 'babs submit'
The warning message will be tailored based on this.
2 changes: 1 addition & 1 deletion babs/templates/babs_proj_config.yaml.jinja2
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
type_session: '{{ type_session }}'
processing_level: '{{ processing_level }}'
queue: '{{ queue }}'

# input dataset's name(s)
4 changes: 2 additions & 2 deletions babs/templates/bidsapp_run.sh.jinja2
Original file line number Diff line number Diff line change
@@ -2,14 +2,14 @@
set -e -u -x

subid="$1"
{% if type_session == 'multi-ses' %}
{% if processing_level == 'session' %}
sesid="$2"
{% endif %}


{% for i in range(input_ds.num_ds) %}
{% if input_ds.df.loc[i, 'is_zipped'] is true %}
{{ input_ds.df.loc[i, 'name'].upper() }}_ZIP="${{ loop.index + (2 if type_session == 'multi-ses' else 1) }}"
{{ input_ds.df.loc[i, 'name'].upper() }}_ZIP="${{ loop.index + (2 if processing_level == 'session' else 1) }}"
{% endif %}
{% endfor %}
{% if flag_filterfile %}
8 changes: 4 additions & 4 deletions babs/templates/datalad_run.sh.jinja2
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@ datalad run \
-i "code/{{ container.container_name }}_zip.sh" \
{% for dataset in input_datasets %}
{% if not dataset.is_zipped %}
-i "{{ dataset.path }}/{% if type_session == 'multi-ses' %}${subid}/${sesid}{% else %}${subid}{% endif %}" \
-i "{{ dataset.path }}/{% if processing_level == 'session' %}${subid}/${sesid}{% else %}${subid}{% endif %}" \
-i "{{ dataset.path }}/*json" \
{% else %}
-i ${ {{ dataset.name }}_ZIP} \
@@ -14,7 +14,7 @@ datalad run \
{% endif %}
--explicit \
{% for key, value in container.config['zip_foldernames'].items() %}
-o "${subid}{% if type_session == 'multi-ses' %}_${sesid}{% endif %}_{{ key }}-{{ value }}.zip" \
-o "${subid}{% if processing_level == 'session' %}_${sesid}{% endif %}_{{ key }}-{{ value }}.zip" \
{% endfor %}
-m "{{ container.container_name }} ${subid}{% if type_session == 'multi-ses' %} ${sesid}{% endif %}" \
"bash ./code/{{ container.container_name }}_zip.sh ${subid}{% if type_session == 'multi-ses' %} ${sesid}{% endif %} {% for dataset in input_datasets %}{% if dataset.is_zipped %} ${ {{ dataset.name }}_ZIP}{%endif%}{%endfor%}"
-m "{{ container.container_name }} ${subid}{% if processing_level == 'session' %} ${sesid}{% endif %}" \
"bash ./code/{{ container.container_name }}_zip.sh ${subid}{% if processing_level == 'session' %} ${sesid}{% endif %} {% for dataset in input_datasets %}{% if dataset.is_zipped %} ${ {{ dataset.name }}_ZIP}{%endif%}{%endfor%}"
2 changes: 1 addition & 1 deletion babs/templates/participant_job.sh.jinja2
Original file line number Diff line number Diff line change
@@ -25,7 +25,7 @@ varname_jobid='JOB_ID'
varname_jobid='SLURM_ARRAY_JOB_ID'
{% endif %}

{% if type_session == 'multi-ses' %}
{% if processing_level == 'session' %}
BRANCH="job-${varname_jobid}-${subid}-${sesid}"
{% else %}
BRANCH="job-${varname_jobid}-${subid}"
Loading