Skip to content

Commit

Permalink
Add syntax to schema for specifying highlight options (#168)
Browse files Browse the repository at this point in the history
* updated options in parser

* updated parser to load options, did migrations to change the django models, and changed encodings for file loadings to use utf-8-sig

* reverted unneeded changes in parse_document.py and removed temp.txt

* updated options to be a dictionary, not a list

* added options in answers to serializer
  • Loading branch information
JasmineDeng authored and normangilmore committed Nov 9, 2017
1 parent 25cefcf commit 2236132
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 7 deletions.
45 changes: 43 additions & 2 deletions data/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from data import init_defaults
from data.parse_document import parse_document
from data.parse_schema import parse_schema, ParseSchemaException
from data.parse_schema import parse_schema, ParseSchemaException, OPTION_TYPES

from thresher.models import (Article, Topic, Question, Answer,
ArticleHighlight, HighlightGroup,
Expand All @@ -44,11 +44,16 @@ def load_answers(answers, question):
# is vastly simplified if every question can be counted on to have
# at least one answer with a unique ID, including
# question_type == 'TEXT', 'DATE', or 'TIME'.
empty_options = {}
for option_type in OPTION_TYPES.values():
empty_options[option_type] = False

if len(answers) == 0:
answers.append({
'answer_number': 1,
'question': question,
'answer_content': 'placeholder answer for ' + question.question_type
'answer_content': 'placeholder answer for ' + question.question_type,
'options': empty_options
})
if question.question_type in ['RADIO', 'CHECKBOX']:
logger.error("Question number {} of type {} in topic '{}' "
Expand All @@ -62,6 +67,7 @@ def load_answers(answers, question):
# find the corresponding topic and question ids
for answer_args in answers:
answer_args['question'] = question
answer_args['options'] = empty_options
# Create the answer in the database
answer = Answer.objects.create(**answer_args)
answer_args['id'] = answer.id
Expand Down Expand Up @@ -132,6 +138,13 @@ def make_lookup_topic_id(schema):

# Create a dict of dicts for looking up database ids for Questions
# using its topic number and question number, e.g. 2.04
def make_lookup_topic_id(schema):
lookup_topic_id = {}
for topic_args in schema['topics']:
lookup_topic_id[topic_args['topic_number']] = topic_args['id']
return lookup_topic_id

# Return a dictionary keyed on topic id, return an array of question ids in that topic
def make_lookup_question_id(schema):
lookup_question_id = {}
for topic_args in schema['topics']:
Expand Down Expand Up @@ -257,10 +270,38 @@ def load_dependencies(schema, root_topic):
else:
raise SchemaLoadError("Line {}: Invalid 'if' clause.".format(dep))

def load_options(schema, root_topic):
for option in schema['options']:
if root_topic.order == option.topic:
topic_obj = root_topic
else:
try:
topic_obj = Topic.objects.get(parent=root_topic,
order=option.topic)
except Topic.DoesNotExist:
logger.error("%s\nDidn't find topic number %d" % (option, option.topic,))
continue
try:
question_obj = Question.objects.get(topic=topic_obj,
question_number=option.question)
except Question.DoesNotExist:
logger.error("%s\nDidn't find question number %d" % (option, option.question,))
continue
try:
answer_obj = Answer.objects.get(question=question_obj,
answer_number=int(option.answer))
except Answer.DoesNotExist:
logger.error("%s\nDidn't find answer number %d" % (option, option.answer,))
continue

answer_obj.options[option.option] = True
answer_obj.save()

def load_schema(schema):
# Load the topics, questions and answers of the schema
root_topic = load_topics(schema['topics'])
load_dependencies(schema, root_topic)
load_options(schema, root_topic)

def load_article(article):
new_id = int(article['metadata']['article_number'])
Expand Down
35 changes: 31 additions & 4 deletions data/parse_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import re
import pytz, datetime
import logging
import codecs
logging.basicConfig()
logger = logging.getLogger(__name__)


TITLE_ID = 'title:'
INSTRUCTIONS_ID = 'instructions:'
GLOSSARY_ID = 'glossary:'
OPTIONS_ID = 'options'
DEPENDENCY_ID = 'if'
DEPENDENCY_TARGET = 'then'
VERSION_ID = 'version:'
Expand All @@ -23,9 +25,15 @@
'tm' : 'TIME',
'st' : 'SELECT_SUBTOPIC'}

OPTION_TYPES = {'nohighlight': 'NOHIGHLIGHT',
'optionalhighlight': 'OPTIONALHIGHLIGHT'}

Dependency = namedtuple('Dependency',
['topic', 'question', 'answer', 'next_topic', 'next_question', 'linenum'])

Option = namedtuple('Option',
['topic', 'question', 'answer', 'option'])

class SimpleParseException(Exception):
pass

Expand All @@ -47,11 +55,12 @@ def log(self):
def load_defaults(output):
output['topics'] = []
output['dependencies'] = []
output['options'] = []

def parse_schema(schema_file):
parsed_schema = {}
load_defaults(parsed_schema)
with open(schema_file, 'r') as f:
with codecs.open(schema_file, mode='r', encoding='utf-8-sig', errors='strict') as f:
linecount = 1
version = ''
first_line = True
Expand Down Expand Up @@ -102,6 +111,8 @@ def parse_schema(schema_file):
parse_glossary(data, current_topic)
elif type_id.lower() == DEPENDENCY_ID:
parse_dependency(data, parsed_schema, linecount)
elif type_id.lower() == OPTIONS_ID:
parse_options(data, parsed_schema)
elif type_id[0].isdigit():
topic_number = parse_question_entry(type_id, data, current_topic)
if current_topic['topic_number'] is None:
Expand Down Expand Up @@ -176,12 +187,28 @@ def parse_dependency(dependency, output, linecount):
target_question,
linecount))

def parse_options(options, output):
splitted_options = options.split(' ')
topic_id, question_id, answer_id = splitted_options[0].split('.')
option_type = splitted_options[1]

if option_type not in OPTION_TYPES:
valid_types = ', '.join(OPTION_TYPES.keys())
raise SimpleParseException(
"Expected option type like {}. Found '{}'"
.format(valid_types, option_type))

output['options'].append(Option(topic_id,
question_id,
answer_id,
OPTION_TYPES[option_type]))

def infer_hint_type(question):
match = re.search("WHERE|WHO|HOW MANY|WHEN", question, re.IGNORECASE)
if match:
return match.group(0).upper()
else:
return 'NONE';
return 'NONE'

def parse_question_entry(entry_num, data, current_topic):
type_bits = entry_num.split('.')
Expand Down Expand Up @@ -234,7 +261,7 @@ def print_dependencies(output):

try:
output = parse_schema(args.filename[0])
# print_data(output)
print_dependencies(output)
print_data(output)
# print_dependencies(output)
except ParseSchemaException as e:
e.log()
4 changes: 4 additions & 0 deletions data/sample/schema/Protester-2017-04-25-NBA-f1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ title: Event setting and composition (location, attendance, time/duration)
2.22.07 “Half a day”: 12-15 hours
2.22.08 “A full day”: 16-24 hours
2.22.09 “Overnight” or “More than 1 day”

options 2.04.03 nohighlight
options 2.15.08 nohighlight
options 2.17.06 nohighlight

if 2.01.01, then 2.02
if 2.01.02, then 2.03
Expand Down
21 changes: 21 additions & 0 deletions thresher/migrations/0016_answer_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.2 on 2017-11-03 07:53
from __future__ import unicode_literals

import django.contrib.postgres.fields.jsonb
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('thresher', '0015_auto_20171024_0253'),
]

operations = [
migrations.AddField(
model_name='answer',
name='options',
field=django.contrib.postgres.fields.jsonb.JSONField(default={}),
),
]
3 changes: 3 additions & 0 deletions thresher/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ class Answer(models.Model):
# Contingent questions as an array of question IDs
next_questions = JSONField(default=[])

# new options, for highlights right now but can be extended
options = JSONField(default={})

class Meta:
unique_together = ("question", "answer_number")

Expand Down
3 changes: 2 additions & 1 deletion thresher/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ class Meta:
class AnswerSerializer(serializers.ModelSerializer):
class Meta:
model = Answer
fields = ('id', 'answer_number', 'answer_content', 'next_questions')
fields = ('id', 'answer_number', 'answer_content', 'next_questions',
'options')

class QuestionSerializer(serializers.ModelSerializer):
# A nested serializer for all the answers (if any)
Expand Down

0 comments on commit 2236132

Please sign in to comment.