Skip to content

Commit 5e7175f

Browse files
authored
Merge pull request #120 from pydicom/add/tag-groups
Adding tag groups for values and fields
2 parents 3b924bb + da477c5 commit 5e7175f

20 files changed

+908
-80
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
1414
Referenced versions in headers are tagged on Github, in parentheses are for pypi.
1515

1616
## [vxx](https://github.com/pydicom/deid/tree/master) (master)
17+
- adding support for tag groups (values, fields) (0.1.4)
1718
- Adding option to provide function to remove (must return boolean) (0.1.38)
1819
- removing matplotlib version requirement (0.1.37)
1920
- Matplotlib dependency >= 2.1.2 (0.1.36)

deid/config/__init__.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,28 +91,40 @@ def _get_section(self, name):
9191
"""
9292
section = None
9393
if self.deid is not None:
94-
if name in self.deid:
95-
section = self.deid[name]
94+
section = self.deid.get(name)
9695
return section
9796

97+
# Get Sections
98+
9899
def get_format(self):
99100
"""return the format of the loaded deid, if one exists
100101
"""
101102
return self._get_section("format")
102103

104+
def _get_named_section(self, section_name, name=None):
105+
"""a helper function to return an entire section, or if a name is
106+
provided, a named section under it. If the section is not
107+
defined, we appropriately return None.
108+
"""
109+
section = self._get_section(section_name)
110+
if name is not None and section is not None:
111+
section = section.get(name, [])
112+
return section
113+
103114
def get_filters(self, name=None):
104115
"""return all filters for a deid recipe, or a set based on a name
105116
"""
106-
filters = self._get_section("filter")
107-
if name is not None and filters is not None:
108-
filters = filters[name]
109-
return filters
117+
return self._get_named_section("filter", name)
110118

111-
def ls_filters(self):
112-
"""list names of filter groups
119+
def get_values_lists(self, name=None):
120+
"""return a values list by name
113121
"""
114-
filters = self._get_section("filter")
115-
return list(filters.keys())
122+
return self._get_named_section("values", name)
123+
124+
def get_fields_lists(self, name=None):
125+
"""return a values list by name
126+
"""
127+
return self._get_named_section("fields", name)
116128

117129
def get_actions(self, action=None, field=None):
118130
"""get deid actions to perform on a header, or a subset based on a type
@@ -137,6 +149,38 @@ def get_actions(self, action=None, field=None):
137149

138150
return header
139151

152+
# Boolean properties
153+
154+
def _has_list_content(self, name):
155+
return len(self.deid.get(name, [])) > 0
156+
157+
def has_fields_lists(self):
158+
return self._has_list_content("fields")
159+
160+
def has_values_lists(self):
161+
return self._has_list_content("values")
162+
163+
def has_actions(self):
164+
return self._has_list_content("header")
165+
166+
# Listing
167+
168+
def listof(self, section):
169+
"""return a list of keys for a section"""
170+
listing = self._get_section(section) or {}
171+
return list(listing.keys())
172+
173+
def ls_filters(self):
174+
return self.listof("filter")
175+
176+
def ls_valuelists(self):
177+
return self.listof("values")
178+
179+
def ls_fieldlists(self):
180+
return self.listof("fields")
181+
182+
# Init
183+
140184
def _init_deid(self, deid=None, base=False, default_base="dicom"):
141185
"""initalize the recipe with one or more deids, optionally including
142186
the default. This function is called at init time. If you need to add

deid/config/standards.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,15 @@
2626
formats = ["dicom"]
2727

2828
# Supported Sections
29-
sections = ["header", "labels", "filter"]
29+
sections = ["header", "labels", "filter", "values", "fields"]
3030

31+
# Supported Header Actions
3132
actions = ("ADD", "BLANK", "JITTER", "KEEP", "REPLACE", "REMOVE", "LABEL")
3233

34+
# Supported Group actions (SPLIT only supported for values)
35+
groups = ["values", "fields"]
36+
group_actions = ("FIELD", "SPLIT")
37+
3338
# Valid actions for a filter action
3439
filters = (
3540
"contains",

deid/config/utils.py

Lines changed: 125 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,16 @@
2828
# pylint: skip-file
2929

3030
from deid.logger import bot
31-
from deid.utils import read_file
31+
from deid.utils import read_file, get_installdir
3232
from deid.data import data_base
33-
from deid.config.standards import formats, actions, sections, filters
34-
33+
from deid.config.standards import (
34+
formats,
35+
actions,
36+
sections,
37+
filters,
38+
groups,
39+
group_actions,
40+
)
3541
from collections import OrderedDict
3642
import os
3743
import re
@@ -130,7 +136,7 @@ def load_deid(path=None):
130136
config = OrderedDict()
131137
section = None
132138

133-
while len(spec) > 0:
139+
while spec:
134140

135141
# Clean up white trailing/leading space
136142
line = spec.pop(0).strip()
@@ -139,15 +145,9 @@ def load_deid(path=None):
139145
if line.startswith("#"):
140146
continue
141147

142-
# Starts with Format?
143-
elif bool(re.match("format", line, re.I)):
144-
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
145-
if fmt not in formats:
146-
bot.exit("%s is not a valid format." % fmt)
147-
148-
# Set format
149-
config["format"] = fmt
150-
bot.debug("FORMAT set to %s" % fmt)
148+
# Set format
149+
elif bool(re.match("^format", line, re.I)):
150+
config["format"] = parse_format(line)
151151

152152
# A new section?
153153
elif line.startswith("%"):
@@ -168,24 +168,18 @@ def load_deid(path=None):
168168
config=config, section=section, section_name=section_name
169169
)
170170

171-
# An action (replace, blank, remove, keep, jitter)
171+
# A %fields action (only field allowed), %values allows split
172+
elif line.upper().startswith(group_actions) and section in groups:
173+
config = parse_group_action(
174+
section=section, section_name=section_name, line=line, config=config
175+
)
176+
177+
# An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
172178
elif line.upper().startswith(actions):
173179

174180
# Start of a filter group
175181
if line.upper().startswith("LABEL") and section == "filter":
176-
members = []
177-
keep_going = True
178-
while keep_going is True:
179-
next_line = spec[0]
180-
if next_line.upper().strip().startswith("LABEL"):
181-
keep_going = False
182-
elif next_line.upper().strip().startswith("%"):
183-
keep_going = False
184-
else:
185-
new_member = spec.pop(0)
186-
members.append(new_member)
187-
if len(spec) == 0:
188-
keep_going = False
182+
members = parse_filter_group(spec)
189183

190184
# Add the filter label to the config
191185
config = parse_label(
@@ -201,7 +195,7 @@ def load_deid(path=None):
201195
section=section, section_name=section_name, line=line, config=config
202196
)
203197
else:
204-
bot.debug("%s not recognized to be in valid format, skipping." % line)
198+
bot.warning("%s not recognized to be in valid format, skipping." % line)
205199
return config
206200

207201

@@ -214,6 +208,9 @@ def find_deid(path=None):
214208
path: a path on the filesystem. If not provided, will assume PWD.
215209
216210
"""
211+
# A default deid will be loaded if all else fails
212+
default_deid = os.path.join(get_installdir(), "data", "deid.dicom")
213+
217214
if path is None:
218215
path = os.getcwd()
219216

@@ -224,7 +221,11 @@ def find_deid(path=None):
224221
]
225222

226223
if len(contenders) == 0:
227-
bot.exit("No deid settings files found in %s, exiting." % (path))
224+
bot.warning(
225+
"No deid settings files found in %s, will use default dicom.deid."
226+
% path
227+
)
228+
contenders.append(default_deid)
228229

229230
elif len(contenders) > 1:
230231
bot.warning("Multiple deid files found in %s, will use first." % (path))
@@ -238,6 +239,48 @@ def find_deid(path=None):
238239
return path
239240

240241

242+
def parse_format(line):
243+
"""given a line that starts with FORMAT, parse the format of the
244+
file and check that it is supported. If not, exit on error. If yes,
245+
return the format.
246+
247+
Parameters
248+
==========
249+
line: the line that starts with format.
250+
"""
251+
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
252+
if fmt not in formats:
253+
bot.exit("%s is not a valid format." % fmt)
254+
bot.debug("FORMAT set to %s" % fmt)
255+
return fmt
256+
257+
258+
def parse_filter_group(spec):
259+
"""given the specification (a list of lines) continue parsing lines
260+
until the filter group ends, as indicated by the start of a new LABEL,
261+
(case 1), the start of a new section (case 2) or the end of the spec
262+
file (case 3). Returns a list of members (lines) that belong to the
263+
filter group. The list (by way of using pop) is updated in the calling
264+
function.
265+
266+
Parameters
267+
==========
268+
spec: unparsed lines of the deid recipe file
269+
"""
270+
members = []
271+
keep_going = True
272+
while keep_going and spec:
273+
next_line = spec[0]
274+
if next_line.upper().strip().startswith("LABEL"):
275+
keep_going = False
276+
elif next_line.upper().strip().startswith("%"):
277+
keep_going = False
278+
else:
279+
new_member = spec.pop(0)
280+
members.append(new_member)
281+
return members
282+
283+
241284
def parse_label(section, config, section_name, members, label=None):
242285
"""parse label will add a (optionally named) label to the filter
243286
section, including one or more criteria
@@ -289,7 +332,10 @@ def parse_label(section, config, section_name, members, label=None):
289332

290333

291334
def parse_member(members, operator=None):
292-
335+
"""a parsing function for a filter member. Will return a single member
336+
with fields, values, and an operator. In the case of multiple and/or
337+
statements that are chained, will instead return a list.
338+
"""
293339
main_operator = operator
294340

295341
actions = []
@@ -382,7 +428,7 @@ def add_section(config, section, section_name=None):
382428
if section is None:
383429
bot.exit("You must define a section (e.g. %header) before any action.")
384430

385-
if section == "filter" and section_name is None:
431+
if section in ["filter", "values", "fields"] and section_name is None:
386432
bot.exit("You must provide a name for a filter section.")
387433

388434
if section not in sections:
@@ -415,6 +461,54 @@ def _remove_comments(parts):
415461
return value.split("#")[0] # remove comments
416462

417463

464+
def parse_group_action(section, line, config, section_name):
465+
"""parse a group action, either FIELD or SPLIT, which must belong to
466+
either a fields or values section.
467+
468+
Parameters
469+
=========
470+
section: a valid section name from the deid config file
471+
line: the line content to parse for the section/action
472+
config: the growing/current config dictionary
473+
section_name: optionally, a section name
474+
"""
475+
if not line.upper().startswith(group_actions):
476+
bot.exit("%s is not a valid group action." % line)
477+
478+
if not line.upper().startswith("FIELD") and section == "fields":
479+
bot.exit("%fields only supports FIELD actions.")
480+
481+
# We may have to deal with cases of spaces
482+
bot.debug("%s: adding %s" % (section, line))
483+
parts = line.split(" ")
484+
action = parts.pop(0).replace(" ", "")
485+
486+
# Both require some parts
487+
if not parts:
488+
bot.exit("%s action %s requires additional arguments" % (section, action))
489+
490+
# For both, the second is always a field or field expander
491+
field = parts.pop(0)
492+
493+
# Fields supports one or more fields with expanders (no third arguments)
494+
if section == "fields":
495+
config[section][section_name].append({"action": action, "field": field})
496+
497+
# Values supports FIELD or SPLIT
498+
elif section == "values":
499+
500+
# If we have a third set of arguments
501+
if parts:
502+
value = _remove_comments(parts)
503+
config[section][section_name].append(
504+
{"action": action, "field": field, "value": value}
505+
)
506+
else:
507+
config[section][section_name].append({"action": action, "field": field})
508+
509+
return config
510+
511+
418512
def parse_config_action(section, line, config, section_name=None):
419513
"""add action will take a line from a deid config file, a config (dictionary), and
420514
an active section name (eg header) and add an entry to the config file to perform
@@ -428,7 +522,6 @@ def parse_config_action(section, line, config, section_name=None):
428522
section_name: optionally, a section name
429523
430524
"""
431-
432525
if not line.upper().startswith(actions):
433526
bot.exit("%s is not a valid action line." % line)
434527

deid/dicom/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,5 @@
66
)
77

88
from .utils import get_files
9-
109
from .fields import extract_sequence
11-
1210
from .pixels import has_burned_pixels, DicomCleaner

0 commit comments

Comments
 (0)