Implements the participants sidecar data in the bidsmap (GitHub issue #…

…253)
Donders-Institute · Jan 21, 2025 · 64743d5 · 64743d5
1 parent d92c727
commit 64743d5
Show file tree

Hide file tree

Showing 9 changed files with 285 additions and 151 deletions.
diff --git a/bidscoin/bids.py b/bidscoin/bids.py
@@ -826,22 +826,22 @@ def participant(self, value: dict):
     def subject(self) -> str:
         """The regular expression for extracting the subject identifier"""
 
-        return self._data['participant']['participant_id']
+        return self._data['participant']['participant_id']['value']
 
     @subject.setter
     def subject(self, value: str):
 
-        self._data['participant']['participant_id'] = value
+        self._data['participant']['participant_id']['value'] = value
 
     @property
     def session(self) -> str:
         """The regular expression for extracting the session identifier"""
 
-        return self._data['participant']['session_id']
+        return self._data['participant']['session_id']['value']
 
     @session.setter
     def session(self, value: str):
-        self._data['participant']['session_id'] = value
+        self._data['participant']['session_id']['value'] = value
 
     @property
     def datatypes(self) -> list[DataType]:
@@ -2076,7 +2076,7 @@ def poolmetadata(datasource: DataSource, targetmeta: Path, usermeta: Meta, metae
     return Meta(metapool)
 
 
-def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: dict=None, dryrun: bool=False) -> tuple[pd.DataFrame, dict]:
+def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: dict=None, dryrun: bool=False) -> pd.DataFrame:
     """
     Read/create and/or add (if it's not there yet) a participant to the participants.tsv/.json file
 
@@ -2100,14 +2100,6 @@ def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: d
         table = pd.DataFrame()
         table.index.name = 'participant_id'
 
-    # Read the participants json sidecar
-    participants_json = participants_tsv.with_suffix('.json')
-    if participants_json.is_file():
-        with participants_json.open('r') as json_fid:
-            meta = json.load(json_fid)
-    else:
-        meta = {}
-
     # Add the participant row
     data_added = False
     if subid:
@@ -2126,30 +2118,40 @@ def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: d
             if not dryrun:
                 table.mask(table == '').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
 
-            # Create/write to the json participants table sidecar file
-            key_added = False
-            if not meta.get('participant_id'):
-                meta['participant_id'] = {'Description': 'Unique participant identifier'}
-                key_added              = True
-            if not meta.get('session_id') and 'session_id' in table.columns:
-                meta['session_id'] = {'Description': 'Session identifier'}
-                key_added          = True
-            for col in table.columns:
-                if col not in meta:
-                    key_added = True
-                    meta[col] = dict(LongName    = 'Long (unabbreviated) name of the column',
-                                     Description = 'Description of the the column',
-                                     Levels      = dict(Key='Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'),
-                                     Units       = 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED')
-
-            # Write the data to the participant sidecar file
-            if key_added:
-                LOGGER.verbose(f"Writing subject meta data to: {participants_json}")
-                if not dryrun:
-                    with participants_json.open('w') as json_fid:
-                        json.dump(meta, json_fid, indent=4)
-
-    return table, meta
+    return table
+
+
+def participantmeta(participants_json: Path, bidsmap: BidsMap=None) -> dict:
+    """
+    Read and/or write a participant sidecar file
+
+    :param participants_json:   The participants.json sidecar file
+    :param bidsmap:             The bidsmap with participants meta data. Leave empty to just read the sidecar meta data (write nothing)
+    :return:                    The sidecar meta data
+    """
+
+    # Read the participants json sidecar
+    if participants_json.is_file():
+        with participants_json.open('r') as json_fid:
+            metadata = json.load(json_fid)
+    else:
+        metadata = {}
+
+    # Populate the metadata using the bidsmap
+    if bidsmap:
+
+        # If we miss metadata then use any participant "meta" field in the bidsmap
+        participants_df = addparticipant(participants_json.with_suffix('.tsv'))
+        for column in participants_df.columns:
+            for dataformat in bidsmap.dataformats:
+                if not metadata.get(column) and column in dataformat.participant:
+                    metadata[column] = dataformat.participant[column].get('meta', {})
+
+        # Save the data
+        with participants_json.open('w') as json_fid:
+            metadata = json.dump(metadata, json_fid, indent=4)
+
+    return metadata
 
 
 def bidsprov(bidsfolder: Path, source: Path=Path(), runitem: RunItem=None, targets: Iterable[Path]=()) -> pd.DataFrame:

diff --git a/bidscoin/bidscoiner.py b/bidscoin/bidscoiner.py
@@ -24,7 +24,7 @@
 from bidscoin.utilities import unpack
 
 
-def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: bool=False, bidsmap: str='bidsmap.yaml', cluster: str='') -> None:
+def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: bool=False, bidsmapname: str='bidsmap.yaml', cluster: str='') -> None:
     """
     Main function that processes all the subjects and session in the sourcefolder and uses the
     bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder.
@@ -33,15 +33,15 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
     :param bidsfolder:   The name of the BIDS root folder
     :param participant:  List of selected subjects/participants (i.e. sub-# names/folders) to be processed (the sub-prefix can be omitted). Otherwise, all subjects in the sourcefolder will be processed
     :param force:        If True, participant will be processed, regardless of existing folders in the bidsfolder. Otherwise, existing folders will be skipped
-    :param bidsmap:      The name of the bidsmap YAML-file. If the bidsmap pathname is just the basename (i.e. no "/" in the name) then it is assumed to be located in the current directory or in bidsfolder/code/bidscoin
+    :param bidsmapname:  The name of the bidsmap YAML-file. If the bidsmap pathname is just the basename (i.e. no "/" in the name) then it is assumed to be located in the current directory or in bidsfolder/code/bidscoin
     :param cluster:      Use the DRMAA library to submit the bidscoiner jobs to a high-performance compute (HPC) cluster with DRMAA native specifications for submitting bidscoiner jobs to the HPC cluster. See cli/_bidscoiner() for default
     :return:             Nothing
     """
 
     # Input checking & defaults
     rawfolder      = Path(sourcefolder).resolve()
     bidsfolder     = Path(bidsfolder).resolve()
-    bidsmapfile    = Path(bidsmap)
+    bidsmapfile    = Path(bidsmapname)
     bidscoinfolder = bidsfolder/'code'/'bidscoin'
     bidscoinfolder.mkdir(parents=True, exist_ok=True)
     if not rawfolder.is_dir():
@@ -51,7 +51,7 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
     bcoin.setup_logging(bidscoinfolder/'bidscoiner.log')
     LOGGER.info('')
     LOGGER.info(f"-------------- START BIDScoiner {__version__}: BIDS {bidsversion()} ------------")
-    LOGGER.info(f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} participant={participant} force={force} bidsmap={bidsmapfile}")
+    LOGGER.info(f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} participant={participant} force={force} bidsmapname={bidsmapfile}")
 
     # Create a dataset description file if it does not exist
     dataset_file = bidsfolder/'dataset_description.json'
@@ -175,9 +175,10 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
             bcoin.synchronize(pbatch, jobids)
 
         # Merge the bids subfolders
-        errors                                = ''
-        provdata                              = bids.bidsprov(bidsfolder)
-        participants_table, participants_dict = bids.addparticipant(bidsfolder/'participants.tsv')
+        errors             = ''
+        provdata           = bids.bidsprov(bidsfolder)
+        participants_table = bids.addparticipant(bidsfolder/'participants.tsv')
+        participants_meta  = bids.participantmeta(bidsfolder/'participants.json')
         for bidsfolder_tmp in sorted((bidsfolder/'HPC_work').glob('bids_*')):
 
             subid = bidsfolder_tmp.name[5:]         # Uses name = f"bids_{subid}" (as defined above)
@@ -217,15 +218,16 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
             # Update the participants table + dictionary
             if subid not in participants_table.index:
                 LOGGER.verbose(f"Merging: participants.tsv -> {bidsfolder/'participants.tsv'}")
-                participant_table, participant_dict = bids.addparticipant(bidsfolder_tmp/'participants.tsv')
-                participants_table                  = pd.concat([participants_table, participant_table])
-                participants_dict.update(participant_dict)
+                participant_table  = bids.addparticipant(bidsfolder_tmp/'participants.tsv')
+                participants_table = pd.concat([participants_table, participant_table])
+                participant_meta   = bids.participantmeta(bidsfolder_tmp/'participants.json')
+                participants_meta.update(participant_meta)
 
         # Save the provenance and participants data to disk
         provdata.sort_index().to_csv(bidscoinfolder/'bidscoiner.tsv', sep='\t')
         participants_table.replace('', 'n/a').to_csv(bidsfolder/'participants.tsv', sep='\t', encoding='utf-8', na_rep='n/a')
         with (bidsfolder/'participants.json').open('w') as fid:
-            json.dump(participants_dict, fid, indent=4)
+            json.dump(participants_meta, fid, indent=4)
 
         if not DEBUG:
             shutil.rmtree(bidsfolder/'HPC_work', ignore_errors=True)
@@ -292,6 +294,9 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
                     if unpacked:
                         shutil.rmtree(sesfolder)
 
+    # Add the participants sidecar file
+    bids.participantmeta(bidsfolder/'participants.json', bidsmap)
+
     LOGGER.info('-------------- FINISHED! ------------')
     LOGGER.info('')
 

diff --git a/bidscoin/heuristics/bidsmap_bids2bids.yaml b/bidscoin/heuristics/bidsmap_bids2bids.yaml
@@ -41,9 +41,15 @@ Nibabel:
 # --------------------------------------------------------------------------------
 # Nibabel key-value heuristics (header fields that are mapped to the BIDS labels)
 # --------------------------------------------------------------------------------
-  participant: # Attributes or properties to populate the participants table/tsv-file
-    participant_id: <<filepath:/sub-(.*?)/>>      # This filesystem property extracts the subject label from the source directory. NB: Any property or attribute can be used as subject-label, e.g. <PatientID>
-    session_id: <<filepath:/sub-.*?/ses-(.*?)/>>  # This filesystem property extracts the subject label from the source directory. NB: Any property or attribute can be used as session-label, e.g. <StudyID>
+  participant:                          # Attributes or properties to populate the participants table/tsv-file
+    participant_id:
+      value: <<filepath:/sub-(.*?)/>>           # This filesystem property extracts the subject label from the source directory. NB: Any property or attribute can be used as subject-label, e.g. <PatientID>
+      meta:
+        Description: The unique participant identifier of the form sub-<label>, matching a participant entity found in the dataset
+    session_id:
+      value: <<filepath:/sub-.*?/ses-(.*?)/>>   # This filesystem property extracts the session label from the source directory. NB: Any property or attribute can be used as session-label, e.g. <StudyID>
+      meta:
+        Description: The session identifier of the form ses-<label>, matching a session found in the dataset
 
   anat:       # ----------------------- All anatomical runs --------------------
   - properties: &fileprop_anat          # This is an optional (stub) entry of properties matching (could be added to any run-item)