Skip to content

Commit

Permalink
Implements the participants sidecar data in the bidsmap (GitHub issue #…
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelzwiers committed Jan 21, 2025
1 parent d92c727 commit 64743d5
Show file tree
Hide file tree
Showing 9 changed files with 285 additions and 151 deletions.
76 changes: 39 additions & 37 deletions bidscoin/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,22 +826,22 @@ def participant(self, value: dict):
def subject(self) -> str:
"""The regular expression for extracting the subject identifier"""

return self._data['participant']['participant_id']
return self._data['participant']['participant_id']['value']

@subject.setter
def subject(self, value: str):

self._data['participant']['participant_id'] = value
self._data['participant']['participant_id']['value'] = value

@property
def session(self) -> str:
"""The regular expression for extracting the session identifier"""

return self._data['participant']['session_id']
return self._data['participant']['session_id']['value']

@session.setter
def session(self, value: str):
self._data['participant']['session_id'] = value
self._data['participant']['session_id']['value'] = value

@property
def datatypes(self) -> list[DataType]:
Expand Down Expand Up @@ -2076,7 +2076,7 @@ def poolmetadata(datasource: DataSource, targetmeta: Path, usermeta: Meta, metae
return Meta(metapool)


def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: dict=None, dryrun: bool=False) -> tuple[pd.DataFrame, dict]:
def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: dict=None, dryrun: bool=False) -> pd.DataFrame:
"""
Read/create and/or add (if it's not there yet) a participant to the participants.tsv/.json file
Expand All @@ -2100,14 +2100,6 @@ def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: d
table = pd.DataFrame()
table.index.name = 'participant_id'

# Read the participants json sidecar
participants_json = participants_tsv.with_suffix('.json')
if participants_json.is_file():
with participants_json.open('r') as json_fid:
meta = json.load(json_fid)
else:
meta = {}

# Add the participant row
data_added = False
if subid:
Expand All @@ -2126,30 +2118,40 @@ def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: d
if not dryrun:
table.mask(table == '').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')

# Create/write to the json participants table sidecar file
key_added = False
if not meta.get('participant_id'):
meta['participant_id'] = {'Description': 'Unique participant identifier'}
key_added = True
if not meta.get('session_id') and 'session_id' in table.columns:
meta['session_id'] = {'Description': 'Session identifier'}
key_added = True
for col in table.columns:
if col not in meta:
key_added = True
meta[col] = dict(LongName = 'Long (unabbreviated) name of the column',
Description = 'Description of the the column',
Levels = dict(Key='Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'),
Units = 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED')

# Write the data to the participant sidecar file
if key_added:
LOGGER.verbose(f"Writing subject meta data to: {participants_json}")
if not dryrun:
with participants_json.open('w') as json_fid:
json.dump(meta, json_fid, indent=4)

return table, meta
return table


def participantmeta(participants_json: Path, bidsmap: BidsMap=None) -> dict:
"""
Read and/or write a participant sidecar file
:param participants_json: The participants.json sidecar file
:param bidsmap: The bidsmap with participants meta data. Leave empty to just read the sidecar meta data (write nothing)
:return: The sidecar meta data
"""

# Read the participants json sidecar
if participants_json.is_file():
with participants_json.open('r') as json_fid:
metadata = json.load(json_fid)
else:
metadata = {}

# Populate the metadata using the bidsmap
if bidsmap:

# If we miss metadata then use any participant "meta" field in the bidsmap
participants_df = addparticipant(participants_json.with_suffix('.tsv'))
for column in participants_df.columns:
for dataformat in bidsmap.dataformats:
if not metadata.get(column) and column in dataformat.participant:
metadata[column] = dataformat.participant[column].get('meta', {})

# Save the data
with participants_json.open('w') as json_fid:
metadata = json.dump(metadata, json_fid, indent=4)

return metadata


def bidsprov(bidsfolder: Path, source: Path=Path(), runitem: RunItem=None, targets: Iterable[Path]=()) -> pd.DataFrame:
Expand Down
27 changes: 16 additions & 11 deletions bidscoin/bidscoiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from bidscoin.utilities import unpack


def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: bool=False, bidsmap: str='bidsmap.yaml', cluster: str='') -> None:
def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: bool=False, bidsmapname: str='bidsmap.yaml', cluster: str='') -> None:
"""
Main function that processes all the subjects and session in the sourcefolder and uses the
bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder.
Expand All @@ -33,15 +33,15 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
:param bidsfolder: The name of the BIDS root folder
:param participant: List of selected subjects/participants (i.e. sub-# names/folders) to be processed (the sub-prefix can be omitted). Otherwise, all subjects in the sourcefolder will be processed
:param force: If True, participant will be processed, regardless of existing folders in the bidsfolder. Otherwise, existing folders will be skipped
:param bidsmap: The name of the bidsmap YAML-file. If the bidsmap pathname is just the basename (i.e. no "/" in the name) then it is assumed to be located in the current directory or in bidsfolder/code/bidscoin
:param bidsmapname: The name of the bidsmap YAML-file. If the bidsmap pathname is just the basename (i.e. no "/" in the name) then it is assumed to be located in the current directory or in bidsfolder/code/bidscoin
:param cluster: Use the DRMAA library to submit the bidscoiner jobs to a high-performance compute (HPC) cluster with DRMAA native specifications for submitting bidscoiner jobs to the HPC cluster. See cli/_bidscoiner() for default
:return: Nothing
"""

# Input checking & defaults
rawfolder = Path(sourcefolder).resolve()
bidsfolder = Path(bidsfolder).resolve()
bidsmapfile = Path(bidsmap)
bidsmapfile = Path(bidsmapname)
bidscoinfolder = bidsfolder/'code'/'bidscoin'
bidscoinfolder.mkdir(parents=True, exist_ok=True)
if not rawfolder.is_dir():
Expand All @@ -51,7 +51,7 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
bcoin.setup_logging(bidscoinfolder/'bidscoiner.log')
LOGGER.info('')
LOGGER.info(f"-------------- START BIDScoiner {__version__}: BIDS {bidsversion()} ------------")
LOGGER.info(f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} participant={participant} force={force} bidsmap={bidsmapfile}")
LOGGER.info(f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} participant={participant} force={force} bidsmapname={bidsmapfile}")

# Create a dataset description file if it does not exist
dataset_file = bidsfolder/'dataset_description.json'
Expand Down Expand Up @@ -175,9 +175,10 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
bcoin.synchronize(pbatch, jobids)

# Merge the bids subfolders
errors = ''
provdata = bids.bidsprov(bidsfolder)
participants_table, participants_dict = bids.addparticipant(bidsfolder/'participants.tsv')
errors = ''
provdata = bids.bidsprov(bidsfolder)
participants_table = bids.addparticipant(bidsfolder/'participants.tsv')
participants_meta = bids.participantmeta(bidsfolder/'participants.json')
for bidsfolder_tmp in sorted((bidsfolder/'HPC_work').glob('bids_*')):

subid = bidsfolder_tmp.name[5:] # Uses name = f"bids_{subid}" (as defined above)
Expand Down Expand Up @@ -217,15 +218,16 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
# Update the participants table + dictionary
if subid not in participants_table.index:
LOGGER.verbose(f"Merging: participants.tsv -> {bidsfolder/'participants.tsv'}")
participant_table, participant_dict = bids.addparticipant(bidsfolder_tmp/'participants.tsv')
participants_table = pd.concat([participants_table, participant_table])
participants_dict.update(participant_dict)
participant_table = bids.addparticipant(bidsfolder_tmp/'participants.tsv')
participants_table = pd.concat([participants_table, participant_table])
participant_meta = bids.participantmeta(bidsfolder_tmp/'participants.json')
participants_meta.update(participant_meta)

# Save the provenance and participants data to disk
provdata.sort_index().to_csv(bidscoinfolder/'bidscoiner.tsv', sep='\t')
participants_table.replace('', 'n/a').to_csv(bidsfolder/'participants.tsv', sep='\t', encoding='utf-8', na_rep='n/a')
with (bidsfolder/'participants.json').open('w') as fid:
json.dump(participants_dict, fid, indent=4)
json.dump(participants_meta, fid, indent=4)

if not DEBUG:
shutil.rmtree(bidsfolder/'HPC_work', ignore_errors=True)
Expand Down Expand Up @@ -292,6 +294,9 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force:
if unpacked:
shutil.rmtree(sesfolder)

# Add the participants sidecar file
bids.participantmeta(bidsfolder/'participants.json', bidsmap)

LOGGER.info('-------------- FINISHED! ------------')
LOGGER.info('')

Expand Down
12 changes: 9 additions & 3 deletions bidscoin/heuristics/bidsmap_bids2bids.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,15 @@ Nibabel:
# --------------------------------------------------------------------------------
# Nibabel key-value heuristics (header fields that are mapped to the BIDS labels)
# --------------------------------------------------------------------------------
participant: # Attributes or properties to populate the participants table/tsv-file
participant_id: <<filepath:/sub-(.*?)/>> # This filesystem property extracts the subject label from the source directory. NB: Any property or attribute can be used as subject-label, e.g. <PatientID>
session_id: <<filepath:/sub-.*?/ses-(.*?)/>> # This filesystem property extracts the subject label from the source directory. NB: Any property or attribute can be used as session-label, e.g. <StudyID>
participant: # Attributes or properties to populate the participants table/tsv-file
participant_id:
value: <<filepath:/sub-(.*?)/>> # This filesystem property extracts the subject label from the source directory. NB: Any property or attribute can be used as subject-label, e.g. <PatientID>
meta:
Description: The unique participant identifier of the form sub-<label>, matching a participant entity found in the dataset
session_id:
value: <<filepath:/sub-.*?/ses-(.*?)/>> # This filesystem property extracts the session label from the source directory. NB: Any property or attribute can be used as session-label, e.g. <StudyID>
meta:
Description: The session identifier of the form ses-<label>, matching a session found in the dataset

anat: # ----------------------- All anatomical runs --------------------
- properties: &fileprop_anat # This is an optional (stub) entry of properties matching (could be added to any run-item)
Expand Down
Loading

0 comments on commit 64743d5

Please sign in to comment.