35
35
actions ,
36
36
sections ,
37
37
filters ,
38
- fields_actions ,
39
- values_actions ,
38
+ groups ,
39
+ group_actions ,
40
40
)
41
41
from collections import OrderedDict
42
42
import os
@@ -145,15 +145,9 @@ def load_deid(path=None):
145
145
if line .startswith ("#" ):
146
146
continue
147
147
148
- # Starts with Format?
149
- elif bool (re .match ("format" , line , re .I )):
150
- fmt = re .sub ("FORMAT|(\s+)" , "" , line ).lower ()
151
- if fmt not in formats :
152
- bot .exit ("%s is not a valid format." % fmt )
153
-
154
- # Set format
155
- config ["format" ] = fmt
156
- bot .debug ("FORMAT set to %s" % fmt )
148
+ # Set format
149
+ elif bool (re .match ("^format" , line , re .I )):
150
+ config ["format" ] = parse_format (line )
157
151
158
152
# A new section?
159
153
elif line .startswith ("%" ):
@@ -174,24 +168,18 @@ def load_deid(path=None):
174
168
config = config , section = section , section_name = section_name
175
169
)
176
170
177
- # An action (replace, blank, remove, keep, jitter)
171
+ # A %fields action (only field allowed), %values allows split
172
+ elif line .upper ().startswith (group_actions ) and section in groups :
173
+ config = parse_group_action (
174
+ section = section , section_name = section_name , line = line , config = config
175
+ )
176
+
177
+ # An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
178
178
elif line .upper ().startswith (actions ):
179
179
180
180
# Start of a filter group
181
181
if line .upper ().startswith ("LABEL" ) and section == "filter" :
182
- members = []
183
- keep_going = True
184
- while keep_going is True :
185
- next_line = spec [0 ]
186
- if next_line .upper ().strip ().startswith ("LABEL" ):
187
- keep_going = False
188
- elif next_line .upper ().strip ().startswith ("%" ):
189
- keep_going = False
190
- else :
191
- new_member = spec .pop (0 )
192
- members .append (new_member )
193
- if len (spec ) == 0 :
194
- keep_going = False
182
+ members = parse_filter_group (spec )
195
183
196
184
# Add the filter label to the config
197
185
config = parse_label (
@@ -244,6 +232,48 @@ def find_deid(path=None):
244
232
return path
245
233
246
234
235
+ def parse_format (line ):
236
+ """given a line that starts with FORMAT, parse the format of the
237
+ file and check that it is supported. If not, exit on error. If yes,
238
+ return the format.
239
+
240
+ Parameters
241
+ ==========
242
+ line: the line that starts with format.
243
+ """
244
+ fmt = re .sub ("FORMAT|(\s+)" , "" , line ).lower ()
245
+ if fmt not in formats :
246
+ bot .exit ("%s is not a valid format." % fmt )
247
+ bot .debug ("FORMAT set to %s" % fmt )
248
+ return fmt
249
+
250
+
251
+ def parse_filter_group (spec ):
252
+ """given the specification (a list of lines) continue parsing lines
253
+ until the filter group ends, as indicated by the start of a new LABEL,
254
+ (case 1), the start of a new section (case 2) or the end of the spec
255
+ file (case 3). Returns a list of members (lines) that belong to the
256
+ filter group. The list (by way of using pop) is updated in the calling
257
+ function.
258
+
259
+ Parameters
260
+ ==========
261
+ spec: unparsed lines of the deid recipe file
262
+ """
263
+ members = []
264
+ keep_going = True
265
+ while keep_going and spec :
266
+ next_line = spec [0 ]
267
+ if next_line .upper ().strip ().startswith ("LABEL" ):
268
+ keep_going = False
269
+ elif next_line .upper ().strip ().startswith ("%" ):
270
+ keep_going = False
271
+ else :
272
+ new_member = spec .pop (0 )
273
+ members .append (new_member )
274
+ return members
275
+
276
+
247
277
def parse_label (section , config , section_name , members , label = None ):
248
278
"""parse label will add a (optionally named) label to the filter
249
279
section, including one or more criteria
@@ -295,7 +325,10 @@ def parse_label(section, config, section_name, members, label=None):
295
325
296
326
297
327
def parse_member (members , operator = None ):
298
-
328
+ """a parsing function for a filter member. Will return a single member
329
+ with fields, values, and an operator. In the case of multiple and/or
330
+ statements that are chained, will instead return a list.
331
+ """
299
332
main_operator = operator
300
333
301
334
actions = []
@@ -388,7 +421,7 @@ def add_section(config, section, section_name=None):
388
421
if section is None :
389
422
bot .exit ("You must define a section (e.g. %header) before any action." )
390
423
391
- if section == "filter" and section_name is None :
424
+ if section in [ "filter" , "values" , "fields" ] and section_name is None :
392
425
bot .exit ("You must provide a name for a filter section." )
393
426
394
427
if section not in sections :
@@ -421,6 +454,54 @@ def _remove_comments(parts):
421
454
return value .split ("#" )[0 ] # remove comments
422
455
423
456
457
+ def parse_group_action (section , line , config , section_name ):
458
+ """parse a group action, either FIELD or SPLIT, which must belong to
459
+ either a fields or values section.
460
+
461
+ Parameters
462
+ =========
463
+ section: a valid section name from the deid config file
464
+ line: the line content to parse for the section/action
465
+ config: the growing/current config dictionary
466
+ section_name: optionally, a section name
467
+ """
468
+ if not line .upper ().startswith (group_actions ):
469
+ bot .exit ("%s is not a valid group action." % line )
470
+
471
+ if not line .upper ().startswith ("FIELD" ) and section == "fields" :
472
+ bot .exit ("%fields only supports FIELD actions." )
473
+
474
+ # We may have to deal with cases of spaces
475
+ bot .debug ("%s: adding %s" % (section , line ))
476
+ parts = line .split (" " )
477
+ action = parts .pop (0 ).replace (" " , "" )
478
+
479
+ # Both require some parts
480
+ if not parts :
481
+ bot .exit ("%s action %s requires additional arguments" % (section , action ))
482
+
483
+ # For both, the second is always a field or field expander
484
+ field = parts .pop (0 )
485
+
486
+ # Fields supports one or more fields with expanders (no third arguments)
487
+ if section == "fields" :
488
+ config [section ][section_name ].append ({"action" : action , "field" : field })
489
+
490
+ # Values supports FIELD or SPLIT
491
+ elif section == "values" :
492
+
493
+ # If we have a third set of arguments
494
+ if parts :
495
+ value = _remove_comments (parts )
496
+ config [section ][section_name ].append (
497
+ {"action" : action , "field" : field , "value" : value }
498
+ )
499
+ else :
500
+ config [section ][section_name ].append ({"action" : action , "field" : field })
501
+
502
+ return config
503
+
504
+
424
505
def parse_config_action (section , line , config , section_name = None ):
425
506
"""add action will take a line from a deid config file, a config (dictionary), and
426
507
an active section name (eg header) and add an entry to the config file to perform
@@ -434,7 +515,6 @@ def parse_config_action(section, line, config, section_name=None):
434
515
section_name: optionally, a section name
435
516
436
517
"""
437
-
438
518
if not line .upper ().startswith (actions ):
439
519
bot .exit ("%s is not a valid action line." % line )
440
520
0 commit comments