35
35
actions ,
36
36
sections ,
37
37
filters ,
38
- fields_actions ,
39
- values_actions ,
38
+ groups ,
39
+ group_actions ,
40
40
)
41
41
from collections import OrderedDict
42
42
import os
@@ -145,15 +145,9 @@ def load_deid(path=None):
145
145
if line .startswith ("#" ):
146
146
continue
147
147
148
- # Starts with Format?
149
- elif bool (re .match ("format" , line , re .I )):
150
- fmt = re .sub ("FORMAT|(\s+)" , "" , line ).lower ()
151
- if fmt not in formats :
152
- bot .exit ("%s is not a valid format." % fmt )
153
-
154
- # Set format
155
- config ["format" ] = fmt
156
- bot .debug ("FORMAT set to %s" % fmt )
148
+ # Set format
149
+ elif bool (re .match ("^format" , line , re .I )):
150
+ config ["format" ] = parse_format (line )
157
151
158
152
# A new section?
159
153
elif line .startswith ("%" ):
@@ -174,24 +168,20 @@ def load_deid(path=None):
174
168
config = config , section = section , section_name = section_name
175
169
)
176
170
177
- # An action (replace, blank, remove, keep, jitter)
171
+ # A %fields action (only field allowed), %values allows split
172
+ elif line .upper ().startswith (group_actions ) and section in groups :
173
+ print ("SECTION %s" % section )
174
+ print (config )
175
+ config = parse_group_action (
176
+ section = section , section_name = section_name , line = line , config = config
177
+ )
178
+
179
+ # An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
178
180
elif line .upper ().startswith (actions ):
179
181
180
182
# Start of a filter group
181
183
if line .upper ().startswith ("LABEL" ) and section == "filter" :
182
- members = []
183
- keep_going = True
184
- while keep_going is True :
185
- next_line = spec [0 ]
186
- if next_line .upper ().strip ().startswith ("LABEL" ):
187
- keep_going = False
188
- elif next_line .upper ().strip ().startswith ("%" ):
189
- keep_going = False
190
- else :
191
- new_member = spec .pop (0 )
192
- members .append (new_member )
193
- if len (spec ) == 0 :
194
- keep_going = False
184
+ members = parse_filter_group (spec )
195
185
196
186
# Add the filter label to the config
197
187
config = parse_label (
@@ -244,6 +234,48 @@ def find_deid(path=None):
244
234
return path
245
235
246
236
237
+ def parse_format (line ):
238
+ """given a line that starts with FORMAT, parse the format of the
239
+ file and check that it is supported. If not, exit on error. If yes,
240
+ return the format.
241
+
242
+ Parameters
243
+ ==========
244
+ line: the line that starts with format.
245
+ """
246
+ fmt = re .sub ("FORMAT|(\s+)" , "" , line ).lower ()
247
+ if fmt not in formats :
248
+ bot .exit ("%s is not a valid format." % fmt )
249
+ bot .debug ("FORMAT set to %s" % fmt )
250
+ return fmt
251
+
252
+
253
+ def parse_filter_group (spec ):
254
+ """given the specification (a list of lines) continue parsing lines
255
+ until the filter group ends, as indicated by the start of a new LABEL,
256
+ (case 1), the start of a new section (case 2) or the end of the spec
257
+ file (case 3). Returns a list of members (lines) that belong to the
258
+ filter group. The list (by way of using pop) is updated in the calling
259
+ function.
260
+
261
+ Parameters
262
+ ==========
263
+ spec: unparsed lines of the deid recipe file
264
+ """
265
+ members = []
266
+ keep_going = True
267
+ while keep_going and spec :
268
+ next_line = spec [0 ]
269
+ if next_line .upper ().strip ().startswith ("LABEL" ):
270
+ keep_going = False
271
+ elif next_line .upper ().strip ().startswith ("%" ):
272
+ keep_going = False
273
+ else :
274
+ new_member = spec .pop (0 )
275
+ members .append (new_member )
276
+ return members
277
+
278
+
247
279
def parse_label (section , config , section_name , members , label = None ):
248
280
"""parse label will add a (optionally named) label to the filter
249
281
section, including one or more criteria
@@ -295,7 +327,10 @@ def parse_label(section, config, section_name, members, label=None):
295
327
296
328
297
329
def parse_member (members , operator = None ):
298
-
330
+ """a parsing function for a filter member. Will return a single member
331
+ with fields, values, and an operator. In the case of multiple and/or
332
+ statements that are chained, will instead return a list.
333
+ """
299
334
main_operator = operator
300
335
301
336
actions = []
@@ -388,7 +423,7 @@ def add_section(config, section, section_name=None):
388
423
if section is None :
389
424
bot .exit ("You must define a section (e.g. %header) before any action." )
390
425
391
- if section == "filter" and section_name is None :
426
+ if section in [ "filter" , "values" , "fields" ] and section_name is None :
392
427
bot .exit ("You must provide a name for a filter section." )
393
428
394
429
if section not in sections :
@@ -421,6 +456,55 @@ def _remove_comments(parts):
421
456
return value .split ("#" )[0 ] # remove comments
422
457
423
458
459
+ def parse_group_action (section , line , config , section_name ):
460
+ """parse a group action, either FIELD or SPLIT, which must belong to
461
+ either a fields or values section.
462
+
463
+ Parameters
464
+ =========
465
+ section: a valid section name from the deid config file
466
+ line: the line content to parse for the section/action
467
+ config: the growing/current config dictionary
468
+ section_name: optionally, a section name
469
+ """
470
+ if not line .upper ().startswith (group_actions ):
471
+ bot .exit ("%s is not a valid group action." % line )
472
+
473
+ if not line .upper ().startswith ("FIELD" ) and section == "fields" :
474
+ bot .exit ("%fields only supports FIELD actions." )
475
+
476
+ # We may have to deal with cases of spaces
477
+ bot .debug ("%s: adding %s" % (section , line ))
478
+ parts = line .split (" " )
479
+ action = parts .pop (0 ).replace (" " , "" )
480
+
481
+ # Both require some parts
482
+ if not parts :
483
+ bot .exit ("%s action %s requires additional arguments" % (section , action ))
484
+
485
+ # For both, the second is always a field or field expander
486
+ field = parts .pop (0 )
487
+
488
+ # Fields supports one or more fields with expanders (no third arguments)
489
+ if section == "fields" :
490
+ config [section ][section_name ].append ({"action" : action , "field" : field })
491
+
492
+ # Values supports FIELD or SPLIT
493
+ elif section == "values" :
494
+
495
+ # If we have a third set of arguments
496
+ if parts :
497
+ value = _remove_comments (parts )
498
+ print (value )
499
+ config [section ][section_name ].append (
500
+ {"action" : action , "field" : field , "value" : value }
501
+ )
502
+ else :
503
+ config [section ][section_name ].append ({"action" : action , "field" : field })
504
+
505
+ return config
506
+
507
+
424
508
def parse_config_action (section , line , config , section_name = None ):
425
509
"""add action will take a line from a deid config file, a config (dictionary), and
426
510
an active section name (eg header) and add an entry to the config file to perform
@@ -434,7 +518,6 @@ def parse_config_action(section, line, config, section_name=None):
434
518
section_name: optionally, a section name
435
519
436
520
"""
437
-
438
521
if not line .upper ().startswith (actions ):
439
522
bot .exit ("%s is not a valid action line." % line )
440
523
0 commit comments