diff --git a/clinica/iotools/bids_utils.py b/clinica/iotools/bids_utils.py index 37ef021f9..63134b7d2 100644 --- a/clinica/iotools/bids_utils.py +++ b/clinica/iotools/bids_utils.py @@ -890,7 +890,7 @@ def run_dcm2niix( from clinica.utils.stream import cprint - cprint(msg=f"Attempting to convert {output_fmt}") + cprint(f"Attempting to convert {output_fmt}.", lvl="info") command = _build_dcm2niix_command( input_dir, output_dir, output_fmt, compress, bids_sidecar ) @@ -920,6 +920,8 @@ def identify_modality(filename: str) -> Optional[str]: Optional[str]: Modality or None if parsing uns """ + import numpy as np + filename = filename.lower() if "dwi" in filename: return "dwi" @@ -932,7 +934,7 @@ def identify_modality(filename: str) -> Optional[str]: if "fmri" in filename: return "rsfmri" else: - return None + return np.nan def parse_description(filepath: PathLike, start_line: int, end_line: int) -> str: diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py index 12157d2b0..ae3b43e5b 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py @@ -53,7 +53,6 @@ def convert_images( # complete the data extracted imaging_data = merge_imaging_data(imaging_data) - # complete clinical data if path_to_clinical: df_clinical_complete = complete_clinical_data( @@ -67,7 +66,6 @@ def convert_images( df_complete = imaging_data # build the tsv results = dataset_to_bids(df_complete, gif) - write_bids( to=bids_dir, participants=results["participants"], diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py index 33e2d40e9..0ab829827 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py @@ -1,6 +1,7 @@ +from enum import Enum from os import PathLike from pathlib import Path -from typing import Dict, Iterable, List, Tuple +from typing import Dict, Iterable, List, Optional, Tuple import pandas as pd import pydicom as pdcm @@ -20,6 +21,9 @@ def find_dicoms(path_to_source_data: PathLike) -> Iterable[Tuple[PathLike, PathL Iterable[Tuple[PathLike, PathLike]] Path to found files and parent directory """ + from clinica.utils.stream import cprint + + cprint("Looking for imaging data.", lvl="info") for z in Path(path_to_source_data).rglob("*.dcm"): yield str(z), str(Path(z).parent) @@ -60,10 +64,11 @@ def filter_dicoms(df: DataFrame) -> DataFrame: df = df.assign( series_desc=lambda df: df.source_path.apply( lambda x: pdcm.dcmread(x).SeriesDescription - ) - ) - df = df.assign( - acq_date=lambda df: df.source_path.apply(lambda x: pdcm.dcmread(x).StudyDate) + ), + acq_date=lambda df: df.source_path.apply(lambda x: pdcm.dcmread(x).StudyDate), + manufacturer=lambda df: df.source_path.apply( + lambda x: pdcm.dcmread(x).Manufacturer + ), ) df = df.set_index(["source_path"], verify_integrity=True) @@ -116,6 +121,10 @@ def find_clinical_data( List[DataFrame] Dataframes containing the clinical data """ + from clinica.utils.stream import cprint + + cprint("Looking for clinical data.", lvl="info") + return ( _read_file(_check_file(clinical_data_directory, pattern)) for pattern in ( @@ -353,8 +362,12 @@ def compute_modality(df: DataFrame) -> DataFrame: "task": "", }, } - df = df.assign( - modality=lambda x: x.series_desc.apply(lambda y: identify_modality(y)) + df = ( + df.assign( + modality=lambda x: x.series_desc.apply(lambda y: identify_modality(y)) + ) + .dropna(subset=["modality"]) + .drop_duplicates() ) return df.join(df.modality.map(modality_mapping).apply(pd.Series)) @@ -425,12 +438,123 @@ def compute_runs(df: DataFrame) -> DataFrame: DataFrame Dataframe containing the correct run for each acquisition. """ - filter = ["source_id", "source_ses_id", "suffix", "dir_num"] + + filter = ["source_id", "source_ses_id", "suffix", "number_of_parts", "dir_num"] df1 = df[filter].groupby(filter).min() df2 = df[filter].groupby(filter[:-1]).min() df1 = df1.join(df2.rename(columns={"dir_num": "run_01_dir_num"})) df_alt = df1.reset_index().assign(run=lambda x: (x.run_01_dir_num != x.dir_num)) - return df_alt.assign(run_num=lambda df: df.run.apply(lambda x: f"run-0{int(x)+1}")) + + df_run = pd.concat( + [ + df_alt, + pd.DataFrame( + _compute_scan_sequence_numbers(df_alt.run.tolist()), + columns=["run_number"], + ), + ], + axis=1, + ) + df_run = df_run.assign( + run_num=lambda df: df.run_number.apply(lambda x: f"run-{x:02d}") + ) + return df_run + + +def compute_philips_parts(df: DataFrame) -> DataFrame: + """Compute the parts numbers for philips dwi acquisitions. + The amount of dwi acquisitions linked together is indicated. + For example, if a dwi acquisition is split in 9, + the `number_of_parts` column will have a value of 9 for all of these acquisitions. + Two columns are added: + - part_number, which contains the number for each part of a DTI acquisition. + - number_of_parts, which contains the amount of parts for each DTI acquisition. + + Parameters + ---------- + df: DataFrame + Dataframe without runs. + + Returns + ------- + DataFrame + Dataframe containing the correct dwi part number for each acquisition. It also contains + the total amount of dwi parts for each subjects-session. + """ + df_with_duplicate_flags = _find_duplicate_run(df) + df_with_part_numbers = _compute_part_numbers(df_with_duplicate_flags) + df_with_number_of_parts = _compute_number_of_parts(df_with_part_numbers) + return pd.concat( + [df_with_part_numbers, df_with_number_of_parts["number_of_parts"]], axis=1 + ) + + +def _find_duplicate_run(df: DataFrame) -> DataFrame: + """Create a column that contains the information of whether a run is a duplicate or not.""" + filter = ["source_id", "source_ses_id", "suffix", "manufacturer", "dir_num"] + df = df[df["suffix"].str.contains("dwi", case=False)] + df1 = df[filter].groupby(filter).min() + df2 = df[filter].groupby(filter[:-1]).min() + df1 = df1.join(df2.rename(columns={"dir_num": "part_01_dir_num"})) + df_alt = df1.reset_index().assign(run=lambda x: (x.part_01_dir_num != x.dir_num)) + return df_alt + + +def _compute_part_numbers(df: DataFrame) -> DataFrame: + """Compute the sequence number of each part.""" + return pd.concat( + [ + df, + pd.DataFrame( + _compute_scan_sequence_numbers(df.run.tolist()), columns=["part_number"] + ), + ], + axis=1, + ) + + +def _compute_number_of_parts(df: pd.DataFrame) -> DataFrame: + """Add the number of parts (the max value of the part_number column) to each part.""" + filter2 = ["source_id", "source_ses_id", "suffix", "manufacturer", "part_number"] + df_parts_1 = df[filter2].groupby(filter2).max() + df_parts_2 = df[filter2].groupby(filter2[:-1]).max() + df_max_nb_parts = df_parts_1.join( + df_parts_2.rename(columns={"part_number": "number_of_parts"}) + ).reset_index() + return df_max_nb_parts + + +def _compute_scan_sequence_numbers(duplicate_flags: Iterable[bool]) -> List[int]: + """Return the run number from an iterable of booleans indicating + whether each scan is a duplicate or not. + + Parameters + --------- + duplicate_flags : Iterable[bool] + If the element at index k is True, then the scan k is a duplicate. + Otherwise, it is the first scan of the sequence. + + Returns + ------ + ses_numbers : List[int] + The list of scan sequence numbers. + + Examples + --------- + >>> _compute_scan_sequence_numbers([False, True, False, True, False, False, False, True, False, True, True]) + [1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 3] + + Raises + ----- + ValueError : + If the input list is empty. + """ + if len(duplicate_flags) == 0: + raise ValueError("Provided list is empty.") + ses_numbers = [1] + for k in range(1, len(duplicate_flags)): + ses_numbers.append(1 if not duplicate_flags[k] else ses_numbers[k - 1] + 1) + return ses_numbers def merge_imaging_data(df_dicom: DataFrame) -> DataFrame: @@ -480,17 +604,39 @@ def merge_imaging_data(df_dicom: DataFrame) -> DataFrame: df_sub_ses = compute_modality(df_sub_ses) df_fmap = df_sub_ses.assign( - dir_num=lambda x: x.source.apply(lambda y: int(get_parent(y).name)) + dir_num=lambda x: x.source.apply( + lambda y: int(get_parent(y).name.split("-")[0]) + ) ) df_suf = merge_fieldmaps(df_fmap, identify_fieldmaps(df_fmap)) df_suf_dir = df_suf.assign( - dir_num=lambda x: x.source.apply(lambda y: int(get_parent(y).name)) + dir_num=lambda x: x.source.apply( + lambda y: int(get_parent(y).name.split("-")[0]) + ) + ) + df_alt = compute_philips_parts(df_suf_dir) + df_parts = df_suf_dir.merge( + df_alt[["source_id", "source_ses_id", "suffix", "dir_num", "number_of_parts"]], + how="left", + on=["source_id", "source_ses_id", "suffix", "dir_num"], ) - df_alt = compute_runs(df_suf_dir) + df_parts[["number_of_parts"]] = df_parts[["number_of_parts"]].fillna(value="1") + + df_alt = compute_runs(df_parts) + df_sub_ses_run = df_suf_dir.merge( - df_alt[["source_id", "source_ses_id", "suffix", "dir_num", "run_num"]], + df_alt[ + [ + "source_id", + "source_ses_id", + "suffix", + "dir_num", + "run_num", + "number_of_parts", + ] + ], how="left", on=["source_id", "source_ses_id", "suffix", "dir_num"], ) @@ -534,10 +680,11 @@ def write_bids( from clinica.iotools.bids_dataset_description import BIDSDatasetDescription from clinica.iotools.bids_utils import run_dcm2niix, write_to_tsv + from clinica.utils.stream import cprint + cprint("Starting to write the BIDS.", lvl="info") to = Path(to) fs = LocalFileSystem(auto_mkdir=True) - # Ensure BIDS hierarchy is written first. with fs.transaction: with fs.open( @@ -568,6 +715,12 @@ def write_bids( metadata["bids_filename"], True, ) + if "dwi" in metadata["bids_filename"] and metadata.manufacturer == "Philips": + merge_philips_diffusion( + to / Path(bids_full_path).with_suffix(".json"), + metadata.number_of_parts, + metadata.run_num, + ) correct_fieldmaps_name(to) return @@ -626,3 +779,64 @@ def correct_fieldmaps_name(to: PathLike) -> None: for z in Path(to).rglob("*phasediff_e*_ph*"): os.rename(z, z.parent / re.sub(r"phasediff_e[1-9]_ph", "phasediff", z.name)) + + +def merge_philips_diffusion( + json_file: PathLike, + number_of_parts: float, + run_num: str, +) -> None: + """Add the dwi number in the provided JSON file for each run of Philips images. + The 'MultipartID' field of the input JSON file is set to 'dwi_1' or 'dwi_2' depending + on the run number. + """ + import json + + multipart_id = _get_multipart_id( + PhilipsNumberOfParts.from_int(int(number_of_parts)), run_num + ) + if multipart_id is not None: + data = json.loads(json_file.read_text()) + data["MultipartID"] = multipart_id + json.dump(data, json_file, indent=4) + + +class PhilipsNumberOfParts(Enum): + """DWI scans obtained with a Philips scanner might have + been divided in either 5 or 9 parts. This distinction is important + because we will link these different parts together. + If the number of parts is not 5 or 9, nothing will be done. + """ + + FIVE = 5 + NINE = 9 + OTHER = None + + @classmethod + def from_int(cls, nb_parts: int): + import warnings + + for member in cls: + if member.value == nb_parts: + return member + warnings.warn( + f"Unexpected number of splits {nb_parts}. " + f"Should be one of {[c.value for c in cls]}." + ) + return cls.OTHER + + +def _get_multipart_id(nb_parts: PhilipsNumberOfParts, run_num: str) -> Optional[str]: + """Return the MultiPartID for the provided run number depending on the number of parts.""" + if nb_parts == PhilipsNumberOfParts.NINE: + if run_num in (f"run-0{k}" for k in range(1, 5)): + return "dwi_2" + if run_num in (f"run-0{k}" for k in range(5, 10)): + return "dwi_1" + raise ValueError(f"{run_num} is outside of the scope.") + if nb_parts == PhilipsNumberOfParts.FIVE: + if run_num in (f"run-0{k}" for k in range(1, 5)): + return "dwi_1" + raise ValueError(f"{run_num} is outside of the scope.") + if nb_parts == PhilipsNumberOfParts.OTHER: + return None diff --git a/clinica/iotools/data/genfi_ref.csv b/clinica/iotools/data/genfi_ref.csv index cc9de1e71..eb657ff1e 100644 --- a/clinica/iotools/data/genfi_ref.csv +++ b/clinica/iotools/data/genfi_ref.csv @@ -4,9 +4,9 @@ source;session_id;session_id gender;genfi_version;bids_filename handedness;age_at_visit;bids_full_path education;date_of_scan;source_path -genetic_group;diagnosis; -genetic_status_1;ftld-cdr-global; -genetic_status_2;cdr-sob; +genetic_group;diagnosis;manufacturer +genetic_status_1;ftld-cdr-global;run_num +genetic_status_2;cdr-sob;number_of_parts ;non-brain_outer_tissue_-_background; ;non-brain_low; ;non-brain_mid; diff --git a/docs/index.md b/docs/index.md index a26aa10b5..3c61e0a0d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -74,6 +74,7 @@ Clinica provides tools to curate several publicly available neuroimaging dataset - `adni-to-bids` - [ADNI: Alzheimer’s Disease Neuroimaging Initiative](Converters/ADNI2BIDS) - `aibl-to-bids` - [AIBL: Australian Imaging, Biomarker & Lifestyle Flagship Study of Ageing](Converters/AIBL2BIDS) +- `genfi-to-bids` - [GENFI:Genetic Frontotemporal dementia Initiative](Converters/GENFItoBIDS) - `habs-to-bids` - [HABS: Harvard Aging Brain Study](Converters/HABS2BIDS) - `nifd-to-bids` - [NIFD: Neuroimaging in Frontotemporal Dementia](Converters/NIFD2BIDS) - `oasis-to-bids` - [OASIS: Open Access Series of Imaging Studies](Converters/OASIS2BIDS) diff --git a/mkdocs.yml b/mkdocs.yml index df10620c2..072b99d94 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -100,6 +100,7 @@ nav: - Dataset converters: - ADNI to BIDS: Converters/ADNI2BIDS.md - AIBL to BIDS: Converters/AIBL2BIDS.md + - GENFI to BIDS: Converters/GENFItoBIDS.md - HABS to BIDS: Converters/HABS2BIDS.md - NIFD to BIDS: Converters/NIFD2BIDS.md - OASIS to BIDS: Converters/OASIS2BIDS.md diff --git a/test/unittests/iotools/converters/genfi_to_bids/test_genfi_to_bids_utils.py b/test/unittests/iotools/converters/genfi_to_bids/test_genfi_to_bids_utils.py index db8297774..d72a08593 100644 --- a/test/unittests/iotools/converters/genfi_to_bids/test_genfi_to_bids_utils.py +++ b/test/unittests/iotools/converters/genfi_to_bids/test_genfi_to_bids_utils.py @@ -1,6 +1,16 @@ +import math +from typing import List + +import pandas as pd import pytest +from pandas.testing import assert_frame_equal from clinica.iotools.bids_utils import identify_modality +from clinica.iotools.converters.genfi_to_bids.genfi_to_bids_utils import ( + _compute_scan_sequence_numbers, + compute_philips_parts, + compute_runs, +) @pytest.mark.parametrize( @@ -11,8 +21,91 @@ ("T2", "T2w"), ("fieldmap", "fieldmap"), ("fmri", "rsfmri"), - ("blzflbzv", None), ], ) def test_identify_modality(input, expected): assert identify_modality(input) == expected + + +def test_identify_modality_is_nan(): + assert math.isnan(identify_modality("blzflbzv")) + + +@pytest.fixture +def input_df_compute_runs(): + return pd.DataFrame( + { + "source_id": ["GRN001", "GRN001", "GRN001", "C9ORF001", "C9ORF001"], + "source_ses_id": [1, 1, 1, 2, 2], + "suffix": ["a", "a", "c", "a", "b"], + "number_of_parts": [2, 2, 1, 1, 1], + "dir_num": [10, 20, 30, 10, 20], + } + ) + + +def test_compute_runs(input_df_compute_runs): + expected = pd.DataFrame( + { + "source_id": ["C9ORF001", "C9ORF001", "GRN001", "GRN001", "GRN001"], + "source_ses_id": [2, 2, 1, 1, 1], + "suffix": ["a", "b", "a", "a", "c"], + "number_of_parts": [1, 1, 2, 2, 1], + "dir_num": [10, 20, 10, 20, 30], + "run_01_dir_num": [10, 20, 10, 10, 30], + "run": [False, False, False, True, False], + "run_number": [1, 1, 1, 2, 1], + "run_num": ["run-01", "run-01", "run-01", "run-02", "run-01"], + } + ) + assert_frame_equal(compute_runs(input_df_compute_runs), expected) + + +@pytest.fixture +def input_df_compute_philips_parts(): + return pd.DataFrame( + { + "source_id": ["sub-01"] * 5 + ["sub-02"] * 3, + "source_ses_id": [1, 1, 1, 2, 2, 1, 2, 2], + "suffix": ["dwi"] * 8, + "manufacturer": ["philips"] * 8, + "dir_num": [10, 20, 30, 40, 50, 10, 20, 30], + } + ) + + +def test_compute_philips_parts(input_df_compute_philips_parts): + expected = pd.DataFrame( + { + "source_id": ["sub-01"] * 5 + ["sub-02"] * 3, + "source_ses_id": [1, 1, 1, 2, 2, 1, 2, 2], + "suffix": ["dwi"] * 8, + "manufacturer": ["philips"] * 8, + "dir_num": [10, 20, 30, 40, 50, 10, 20, 30], + "part_01_dir_num": [10, 10, 10, 40, 40, 10, 20, 20], + "run": [False, True, True, False, True, False, False, True], + "part_number": [1, 2, 3, 1, 2, 1, 1, 2], + "number_of_parts": [3, 3, 3, 2, 2, 1, 2, 2], + } + ) + assert_frame_equal(compute_philips_parts(input_df_compute_philips_parts), expected) + + +@pytest.mark.parametrize( + "input, expected", + [ + ( + [False, True, False, True, False, False, False, True, False, True, True], + [1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 3], + ), + ([True, True, True], [1, 2, 3]), + ([False, False, False], [1, 1, 1]), + ], +) +def test_compute_scan_sequence_numbers(input: List[bool], expected): + assert _compute_scan_sequence_numbers(input) == expected + + +def test_compute_scan_sequence_numbers_error(): + with pytest.raises(ValueError): + _compute_scan_sequence_numbers([])