Skip to content

Commit 6a487ca

Browse files
committed
adding start of changes to parse values and fields
Signed-off-by: vsoch <[email protected]>
1 parent 115a9c2 commit 6a487ca

File tree

13 files changed

+456
-59
lines changed

13 files changed

+456
-59
lines changed

deid/config/__init__.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,28 +91,40 @@ def _get_section(self, name):
9191
"""
9292
section = None
9393
if self.deid is not None:
94-
if name in self.deid:
95-
section = self.deid[name]
94+
section = self.deid.get(name)
9695
return section
9796

97+
# Get Sections
98+
9899
def get_format(self):
99100
"""return the format of the loaded deid, if one exists
100101
"""
101102
return self._get_section("format")
102103

104+
def _get_named_section(self, section_name, name=None):
105+
"""a helper function to return an entire section, or if a name is
106+
provided, a named section under it. If the section is not
107+
defined, we appropriately return None.
108+
"""
109+
section = self._get_section(section_name)
110+
if name is not None and section is not None:
111+
section = section.get(name, [])
112+
return section
113+
103114
def get_filters(self, name=None):
104115
"""return all filters for a deid recipe, or a set based on a name
105116
"""
106-
filters = self._get_section("filter")
107-
if name is not None and filters is not None:
108-
filters = filters[name]
109-
return filters
117+
return self._get_named_section("filter", name)
110118

111-
def ls_filters(self):
112-
"""list names of filter groups
119+
def get_values_lists(self, name=None):
120+
"""return a values list by name
113121
"""
114-
filters = self._get_section("filter")
115-
return list(filters.keys())
122+
return self._get_named_section("values", name)
123+
124+
def get_fields_lists(self, name=None):
125+
"""return a values list by name
126+
"""
127+
return self._get_named_section("fields", name)
116128

117129
def get_actions(self, action=None, field=None):
118130
"""get deid actions to perform on a header, or a subset based on a type
@@ -137,6 +149,38 @@ def get_actions(self, action=None, field=None):
137149

138150
return header
139151

152+
# Boolean properties
153+
154+
def _has_list_content(self, name):
155+
return len(self.deid.get(name, [])) > 0
156+
157+
def has_fields_lists(self):
158+
return self._has_list_content("fields")
159+
160+
def has_values_lists(self):
161+
return self._has_list_content("values")
162+
163+
def has_actions(self):
164+
return self._has_list_content("header")
165+
166+
# Listing
167+
168+
def listof(self, section):
169+
"""return a list of keys for a section"""
170+
listing = self._get_section(section)
171+
return list(listing.keys())
172+
173+
def ls_filters(self):
174+
return self.listof("filter")
175+
176+
def ls_valuelists(self):
177+
return self.listof("values")
178+
179+
def ls_fieldlists(self):
180+
return self.listof("fields")
181+
182+
# Init
183+
140184
def _init_deid(self, deid=None, base=False, default_base="dicom"):
141185
"""initalize the recipe with one or more deids, optionally including
142186
the default. This function is called at init time. If you need to add

deid/config/standards.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@
3131
# Supported Header Actions
3232
actions = ("ADD", "BLANK", "JITTER", "KEEP", "REPLACE", "REMOVE", "LABEL")
3333

34-
# Supported Group actions
35-
fields_actions = ["FIELD"]
36-
values_actions = ["FIELD", "SPLIT"]
34+
# Supported Group actions (SPLIT only supported for values)
35+
groups = ["values", "fields"]
36+
group_actions = ("FIELD", "SPLIT")
3737

3838
# Valid actions for a filter action
3939
filters = (

deid/config/utils.py

Lines changed: 111 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
actions,
3636
sections,
3737
filters,
38-
fields_actions,
39-
values_actions,
38+
groups,
39+
group_actions,
4040
)
4141
from collections import OrderedDict
4242
import os
@@ -145,15 +145,9 @@ def load_deid(path=None):
145145
if line.startswith("#"):
146146
continue
147147

148-
# Starts with Format?
149-
elif bool(re.match("format", line, re.I)):
150-
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
151-
if fmt not in formats:
152-
bot.exit("%s is not a valid format." % fmt)
153-
154-
# Set format
155-
config["format"] = fmt
156-
bot.debug("FORMAT set to %s" % fmt)
148+
# Set format
149+
elif bool(re.match("^format", line, re.I)):
150+
config["format"] = parse_format(line)
157151

158152
# A new section?
159153
elif line.startswith("%"):
@@ -174,24 +168,20 @@ def load_deid(path=None):
174168
config=config, section=section, section_name=section_name
175169
)
176170

177-
# An action (replace, blank, remove, keep, jitter)
171+
# A %fields action (only field allowed), %values allows split
172+
elif line.upper().startswith(group_actions) and section in groups:
173+
print("SECTION %s" % section)
174+
print(config)
175+
config = parse_group_action(
176+
section=section, section_name=section_name, line=line, config=config
177+
)
178+
179+
# An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
178180
elif line.upper().startswith(actions):
179181

180182
# Start of a filter group
181183
if line.upper().startswith("LABEL") and section == "filter":
182-
members = []
183-
keep_going = True
184-
while keep_going is True:
185-
next_line = spec[0]
186-
if next_line.upper().strip().startswith("LABEL"):
187-
keep_going = False
188-
elif next_line.upper().strip().startswith("%"):
189-
keep_going = False
190-
else:
191-
new_member = spec.pop(0)
192-
members.append(new_member)
193-
if len(spec) == 0:
194-
keep_going = False
184+
members = parse_filter_group(spec)
195185

196186
# Add the filter label to the config
197187
config = parse_label(
@@ -244,6 +234,48 @@ def find_deid(path=None):
244234
return path
245235

246236

237+
def parse_format(line):
238+
"""given a line that starts with FORMAT, parse the format of the
239+
file and check that it is supported. If not, exit on error. If yes,
240+
return the format.
241+
242+
Parameters
243+
==========
244+
line: the line that starts with format.
245+
"""
246+
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
247+
if fmt not in formats:
248+
bot.exit("%s is not a valid format." % fmt)
249+
bot.debug("FORMAT set to %s" % fmt)
250+
return fmt
251+
252+
253+
def parse_filter_group(spec):
254+
"""given the specification (a list of lines) continue parsing lines
255+
until the filter group ends, as indicated by the start of a new LABEL,
256+
(case 1), the start of a new section (case 2) or the end of the spec
257+
file (case 3). Returns a list of members (lines) that belong to the
258+
filter group. The list (by way of using pop) is updated in the calling
259+
function.
260+
261+
Parameters
262+
==========
263+
spec: unparsed lines of the deid recipe file
264+
"""
265+
members = []
266+
keep_going = True
267+
while keep_going and spec:
268+
next_line = spec[0]
269+
if next_line.upper().strip().startswith("LABEL"):
270+
keep_going = False
271+
elif next_line.upper().strip().startswith("%"):
272+
keep_going = False
273+
else:
274+
new_member = spec.pop(0)
275+
members.append(new_member)
276+
return members
277+
278+
247279
def parse_label(section, config, section_name, members, label=None):
248280
"""parse label will add a (optionally named) label to the filter
249281
section, including one or more criteria
@@ -295,7 +327,10 @@ def parse_label(section, config, section_name, members, label=None):
295327

296328

297329
def parse_member(members, operator=None):
298-
330+
"""a parsing function for a filter member. Will return a single member
331+
with fields, values, and an operator. In the case of multiple and/or
332+
statements that are chained, will instead return a list.
333+
"""
299334
main_operator = operator
300335

301336
actions = []
@@ -388,7 +423,7 @@ def add_section(config, section, section_name=None):
388423
if section is None:
389424
bot.exit("You must define a section (e.g. %header) before any action.")
390425

391-
if section == "filter" and section_name is None:
426+
if section in ["filter", "values", "fields"] and section_name is None:
392427
bot.exit("You must provide a name for a filter section.")
393428

394429
if section not in sections:
@@ -421,6 +456,55 @@ def _remove_comments(parts):
421456
return value.split("#")[0] # remove comments
422457

423458

459+
def parse_group_action(section, line, config, section_name):
460+
"""parse a group action, either FIELD or SPLIT, which must belong to
461+
either a fields or values section.
462+
463+
Parameters
464+
=========
465+
section: a valid section name from the deid config file
466+
line: the line content to parse for the section/action
467+
config: the growing/current config dictionary
468+
section_name: optionally, a section name
469+
"""
470+
if not line.upper().startswith(group_actions):
471+
bot.exit("%s is not a valid group action." % line)
472+
473+
if not line.upper().startswith("FIELD") and section == "fields":
474+
bot.exit("%fields only supports FIELD actions.")
475+
476+
# We may have to deal with cases of spaces
477+
bot.debug("%s: adding %s" % (section, line))
478+
parts = line.split(" ")
479+
action = parts.pop(0).replace(" ", "")
480+
481+
# Both require some parts
482+
if not parts:
483+
bot.exit("%s action %s requires additional arguments" % (section, action))
484+
485+
# For both, the second is always a field or field expander
486+
field = parts.pop(0)
487+
488+
# Fields supports one or more fields with expanders (no third arguments)
489+
if section == "fields":
490+
config[section][section_name].append({"action": action, "field": field})
491+
492+
# Values supports FIELD or SPLIT
493+
elif section == "values":
494+
495+
# If we have a third set of arguments
496+
if parts:
497+
value = _remove_comments(parts)
498+
print(value)
499+
config[section][section_name].append(
500+
{"action": action, "field": field, "value": value}
501+
)
502+
else:
503+
config[section][section_name].append({"action": action, "field": field})
504+
505+
return config
506+
507+
424508
def parse_config_action(section, line, config, section_name=None):
425509
"""add action will take a line from a deid config file, a config (dictionary), and
426510
an active section name (eg header) and add an entry to the config file to perform
@@ -434,7 +518,6 @@ def parse_config_action(section, line, config, section_name=None):
434518
section_name: optionally, a section name
435519
436520
"""
437-
438521
if not line.upper().startswith(actions):
439522
bot.exit("%s is not a valid action line." % line)
440523

deid/dicom/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,5 @@
66
)
77

88
from .utils import get_files
9-
109
from .fields import extract_sequence
11-
1210
from .pixels import has_burned_pixels, DicomCleaner

deid/dicom/actions.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from deid.logger import bot
2626
from deid.config.standards import actions as valid_actions
2727

28-
from .fields import expand_field_expression
28+
from .fields import expand_field_expression, find_by_values
2929

3030
from deid.utils import get_timestamp, parse_value
3131

@@ -51,7 +51,7 @@ def perform_action(dicom, action, item=None, fields=None, return_seen=False):
5151
"action" (eg, REPLACE) what to do with the field
5252
"value": if needed, the field from the response to replace with
5353
"""
54-
field = action.get("field") # e.g: PatientID, endswith:ID
54+
field = action.get("field") # e.g: PatientID, endswith:ID, values:name, fields:name
5555
value = action.get("value") # "suid" or "var:field"
5656
action = action.get("action") # "REPLACE"
5757

@@ -60,8 +60,30 @@ def perform_action(dicom, action, item=None, fields=None, return_seen=False):
6060
bot.warning("%s in not a valid choice. Defaulting to blanked." % action)
6161
action = "BLANK"
6262

63-
# If there is an expander applied to field, we iterate over
64-
fields = expand_field_expression(field=field, dicom=dicom, contenders=fields)
63+
# If values or fields is provided, ids is required
64+
if re.search("^(values|fields)", field):
65+
if not item:
66+
bot.exit(
67+
"An item lookup must be provided to reference a list of values or fields."
68+
)
69+
70+
# A values list returns fields with the value
71+
if re.search("^values", field):
72+
values = item.get(re.sub("^values:", "", field), [])
73+
fields = find_by_values(values=values, dicom=dicom)
74+
75+
# A fields list is used vertabim
76+
elif re.search("^fields", field):
77+
listing = []
78+
for contender in item.get(re.sub("^fields:", "", field), []):
79+
listing += expand_field_expression(
80+
field=contender, dicom=dicom, contenders=fields
81+
)
82+
fields = listing
83+
84+
else:
85+
# If there is an expander applied to field, we iterate over
86+
fields = expand_field_expression(field=field, dicom=dicom, contenders=fields)
6587

6688
# Keep track of fields we have seen
6789
seen = []

deid/dicom/fields.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,22 @@ def extract_sequence(sequence, prefix=None):
8787
return items
8888

8989

90+
def find_by_values(values, dicom):
91+
"""Given a list of values, find fields in the dicom that contain any
92+
of those values, as determined by a regular expression search.
93+
"""
94+
fields = []
95+
contenders = get_fields(dicom)
96+
97+
# Create single regular expression to search by
98+
regexp = "(%s)" % "|".join(values)
99+
for field, value in contenders.items():
100+
if re.search(regexp, value):
101+
fields.append(field)
102+
103+
return fields
104+
105+
90106
def expand_field_expression(field, dicom, contenders=None):
91107
"""Get a list of fields based on an expression. If
92108
no expression found, return single field. Options for fields include:
@@ -95,8 +111,7 @@ def expand_field_expression(field, dicom, contenders=None):
95111
startswith: filter to fields that start with the expression
96112
contains: filter to fields that contain the expression
97113
allfields: include all fields
98-
exceptfields: filter to all fields except those listed ( | separated)
99-
114+
exceptfields: filter to all fields except those listed ( | separated)
100115
"""
101116
# Expanders that don't have a : must be checked for
102117
expanders = ["all"]

0 commit comments

Comments
 (0)