Add syntax to schema for specifying highlight options (#168)

* updated options in parser * updated parser to load options, did migrations to change the django models, and changed encodings for file loadings to use utf-8-sig * reverted unneeded changes in parse_document.py and removed temp.txt * updated options to be a dictionary, not a list * added options in answers to serializer
Goodly · Nov 9, 2017 · 2236132 · 2236132
1 parent 25cefcf
commit 2236132
Show file tree

Hide file tree

Showing 6 changed files with 104 additions and 7 deletions.
diff --git a/data/load_data.py b/data/load_data.py
@@ -23,7 +23,7 @@
 
 from data import init_defaults
 from data.parse_document import parse_document
-from data.parse_schema import parse_schema, ParseSchemaException
+from data.parse_schema import parse_schema, ParseSchemaException, OPTION_TYPES
 
 from thresher.models import (Article, Topic, Question, Answer,
                              ArticleHighlight, HighlightGroup,
@@ -44,11 +44,16 @@ def load_answers(answers, question):
     # is vastly simplified if every question can be counted on to have
     # at least one answer with a unique ID, including
     # question_type == 'TEXT', 'DATE', or 'TIME'.
+    empty_options = {}
+    for option_type in OPTION_TYPES.values():
+        empty_options[option_type] = False
+
     if len(answers) == 0:
         answers.append({
             'answer_number': 1,
             'question': question,
-            'answer_content': 'placeholder answer for ' + question.question_type
+            'answer_content': 'placeholder answer for ' + question.question_type,
+            'options': empty_options
         })
         if question.question_type in ['RADIO', 'CHECKBOX']:
             logger.error("Question number {} of type {} in topic '{}' "
@@ -62,6 +67,7 @@ def load_answers(answers, question):
     # find the corresponding topic and question ids
     for answer_args in answers:
         answer_args['question'] = question
+        answer_args['options'] = empty_options
         # Create the answer in the database
         answer = Answer.objects.create(**answer_args)
         answer_args['id'] = answer.id
@@ -132,6 +138,13 @@ def make_lookup_topic_id(schema):
 
 # Create a dict of dicts for looking up database ids for Questions
 # using its topic number and question number, e.g. 2.04
+def make_lookup_topic_id(schema):
+    lookup_topic_id = {}
+    for topic_args in schema['topics']:
+        lookup_topic_id[topic_args['topic_number']] = topic_args['id']
+    return lookup_topic_id
+
+# Return a dictionary keyed on topic id, return an array of question ids in that topic
 def make_lookup_question_id(schema):
     lookup_question_id = {}
     for topic_args in schema['topics']:
@@ -257,10 +270,38 @@ def load_dependencies(schema, root_topic):
         else:
             raise SchemaLoadError("Line {}: Invalid 'if' clause.".format(dep))
 
+def load_options(schema, root_topic):
+    for option in schema['options']:
+        if root_topic.order == option.topic:
+            topic_obj = root_topic
+        else:
+            try:
+                topic_obj = Topic.objects.get(parent=root_topic,
+                                                order=option.topic)
+            except Topic.DoesNotExist:
+                logger.error("%s\nDidn't find topic number %d" % (option, option.topic,))
+                continue
+        try:
+            question_obj = Question.objects.get(topic=topic_obj,
+                                                question_number=option.question)
+        except Question.DoesNotExist:
+            logger.error("%s\nDidn't find question number %d" % (option, option.question,))
+            continue
+        try:
+            answer_obj = Answer.objects.get(question=question_obj,
+                                            answer_number=int(option.answer))
+        except Answer.DoesNotExist:
+            logger.error("%s\nDidn't find answer number %d" % (option, option.answer,))
+            continue
+
+        answer_obj.options[option.option] = True
+        answer_obj.save()
+
 def load_schema(schema):
     # Load the topics, questions and answers of the schema
     root_topic = load_topics(schema['topics'])
     load_dependencies(schema, root_topic)
+    load_options(schema, root_topic)
 
 def load_article(article):
     new_id = int(article['metadata']['article_number'])

diff --git a/data/parse_schema.py b/data/parse_schema.py
@@ -3,13 +3,15 @@
 import re
 import pytz, datetime
 import logging
+import codecs
 logging.basicConfig()
 logger = logging.getLogger(__name__)
 
 
 TITLE_ID = 'title:'
 INSTRUCTIONS_ID = 'instructions:'
 GLOSSARY_ID = 'glossary:'
+OPTIONS_ID = 'options'
 DEPENDENCY_ID = 'if'
 DEPENDENCY_TARGET = 'then'
 VERSION_ID = 'version:'
@@ -23,9 +25,15 @@
                   'tm' : 'TIME',
                   'st' : 'SELECT_SUBTOPIC'}
 
+OPTION_TYPES = {'nohighlight': 'NOHIGHLIGHT',
+                'optionalhighlight': 'OPTIONALHIGHLIGHT'}
+
 Dependency = namedtuple('Dependency',
     ['topic', 'question', 'answer', 'next_topic', 'next_question', 'linenum'])
 
+Option = namedtuple('Option',
+    ['topic', 'question', 'answer', 'option'])
+
 class SimpleParseException(Exception):
     pass
 
@@ -47,11 +55,12 @@ def log(self):
 def load_defaults(output):
     output['topics'] = []
     output['dependencies'] = []
+    output['options'] = []
 
 def parse_schema(schema_file):
     parsed_schema = {}
     load_defaults(parsed_schema)
-    with open(schema_file, 'r') as f:
+    with codecs.open(schema_file, mode='r', encoding='utf-8-sig', errors='strict') as f:
         linecount = 1
         version = ''
         first_line = True
@@ -102,6 +111,8 @@ def parse_schema(schema_file):
                     parse_glossary(data, current_topic)
                 elif type_id.lower() == DEPENDENCY_ID:
                     parse_dependency(data, parsed_schema, linecount)
+                elif type_id.lower() == OPTIONS_ID:
+                    parse_options(data, parsed_schema)
                 elif type_id[0].isdigit():
                     topic_number = parse_question_entry(type_id, data, current_topic)
                     if current_topic['topic_number'] is None:
@@ -176,12 +187,28 @@ def parse_dependency(dependency, output, linecount):
                                              target_question,
                                              linecount))
 
+def parse_options(options, output):
+    splitted_options = options.split(' ')
+    topic_id, question_id, answer_id = splitted_options[0].split('.')
+    option_type = splitted_options[1]
+
+    if option_type not in OPTION_TYPES:
+        valid_types = ', '.join(OPTION_TYPES.keys())
+        raise SimpleParseException(
+            "Expected option type like {}. Found '{}'"
+            .format(valid_types, option_type))
+
+    output['options'].append(Option(topic_id,
+                                    question_id,
+                                    answer_id,
+                                    OPTION_TYPES[option_type]))
+
 def infer_hint_type(question):
     match = re.search("WHERE|WHO|HOW MANY|WHEN", question, re.IGNORECASE)
     if match:
         return match.group(0).upper()
     else:
-        return 'NONE';
+        return 'NONE'
 
 def parse_question_entry(entry_num, data, current_topic):
     type_bits = entry_num.split('.')
@@ -234,7 +261,7 @@ def print_dependencies(output):
 
     try:
         output = parse_schema(args.filename[0])
-        # print_data(output)
-        print_dependencies(output)
+        print_data(output)
+        # print_dependencies(output)
     except ParseSchemaException as e:
         e.log()
diff --git a/data/sample/schema/Protester-2017-04-25-NBA-f1.txt b/data/sample/schema/Protester-2017-04-25-NBA-f1.txt
@@ -272,6 +272,10 @@ title: Event setting and composition (location, attendance, time/duration)
 2.22.07 “Half a day”: 12-15 hours
 2.22.08 “A full day”: 16-24 hours
 2.22.09 “Overnight” or “More than 1 day”
+
+options 2.04.03 nohighlight
+options 2.15.08 nohighlight
+options 2.17.06 nohighlight
 
 if 2.01.01, then 2.02
 if 2.01.02, then 2.03

diff --git a/thresher/migrations/0016_answer_options.py b/thresher/migrations/0016_answer_options.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.2 on 2017-11-03 07:53
+from __future__ import unicode_literals
+
+import django.contrib.postgres.fields.jsonb
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('thresher', '0015_auto_20171024_0253'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='answer',
+            name='options',
+            field=django.contrib.postgres.fields.jsonb.JSONField(default={}),
+        ),
+    ]
diff --git a/thresher/models.py b/thresher/models.py
@@ -271,6 +271,9 @@ class Answer(models.Model):
     # Contingent questions as an array of question IDs
     next_questions = JSONField(default=[])
 
+    # new options, for highlights right now but can be extended
+    options = JSONField(default={})
+
     class Meta:
         unique_together = ("question", "answer_number")
 

diff --git a/thresher/serializers.py b/thresher/serializers.py
@@ -82,7 +82,8 @@ class Meta:
 class AnswerSerializer(serializers.ModelSerializer):
     class Meta:
         model = Answer
-        fields = ('id', 'answer_number', 'answer_content', 'next_questions')
+        fields = ('id', 'answer_number', 'answer_content', 'next_questions',
+                  'options')
 
 class QuestionSerializer(serializers.ModelSerializer):
     # A nested serializer for all the answers (if any)