diff --git a/bower.json b/bower.json index b8c8f47..60b577f 100644 --- a/bower.json +++ b/bower.json @@ -2,6 +2,6 @@ "name": "annotator-custom-editor", "dependencies": { "bootstrap": "~3.3.5", - "modernizr": "~2.8.3", + "modernizr": "~2.8.3" } } diff --git a/data/init_defaults.py b/data/init_defaults.py index dc7867e..5594d50 100644 --- a/data/init_defaults.py +++ b/data/init_defaults.py @@ -74,24 +74,42 @@ def createNick(username="nick", email="nick@example.com", password="bidsatdoe", u.save() profile = UserProfile.objects.get_or_create( user=u, - defaults = {"experience_score": 0.98, "accuracy_score": 0.99} + defaults = {"pybossa_url": "http://crowdcrafting.org", + "experience_score": 0.98, "accuracy_score": 0.99} )[0] logger.info("Created researcher '%s', password '%s'." % (username, password)) return u.userprofile -def createDecidingForce(): +def createHighlighterProject(): (project, created) = Project.objects.get_or_create( - name="Deciding Force", - instructions="This project analyzes media " + - "descriptions of interactions " + - "between police and protestors." + short_name="DecidingForceHighlighter", + defaults = { + "name": "Deciding Force Highlighter", + "task_type": "HLTR", + "instructions": "Highlight passages in articles that discuss " + + "the topics shown." + } ) if created: - logger.info("Created project 'Deciding Force'") + logger.info("Created project '%s'" % project.name) + return project + +def createQuizProject(): + (project, created) = Project.objects.get_or_create( + short_name="DecidingForceQuiz", + defaults = { + "name": "Deciding Force Quiz", + "task_type": "QUIZ", + "instructions": "Answer questions about short text passages." + } + ) + if created: + logger.info("Created project '%s'" % project.name) return project if __name__ == '__main__': createSuperUser() researchers = createThresherGroup() createNick(groups=[researchers]) - createDecidingForce() + createHighlighterProject() + createQuizProject() diff --git a/data/load_data.py b/data/load_data.py index 764000f..db06349 100644 --- a/data/load_data.py +++ b/data/load_data.py @@ -309,7 +309,8 @@ def load_args(): if __name__ == '__main__': init_defaults.createSuperUser() - init_defaults.createDecidingForce() + init_defaults.createHighlighterProject() + init_defaults.createQuizProject() researchers = init_defaults.createThresherGroup() created_by = init_defaults.createNick(groups=[researchers]) args = load_args() diff --git a/data/pybossa_api.py b/data/pybossa_api.py new file mode 100644 index 0000000..72c1baf --- /dev/null +++ b/data/pybossa_api.py @@ -0,0 +1,274 @@ +import os +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "thresher_backend.settings") +import django +django.setup() +from django.db import connection +from django.conf import settings + +import logging +logger = logging.getLogger(__name__) + +import json +import requests +from requests.compat import urljoin +import iso8601 +import django_rq + +from thresher.models import Article, Topic, Project, UserProfile, Task +from thresher.views import collectHighlightTasks, collectQuizTasks +from data import init_defaults + +class InvalidTaskType(Exception): + pass + +class FileNotFound(Exception): + pass + + +def create_remote_project(profile, project): + """ + This functions enqueues the worker to create a project on a remote + Pybossa server. + """ + # This enqueues the worker with the primary keys for profile + # and project as it would be unwise to pickle and unpickle Django + # models for later use. + # Note: If for some reason the profile or project records disappear + # by the time the worker runs, then we WANT the worker to fail. + create_remote_project_worker.delay(profile_id=profile.id, project_id=project.id) + +@django_rq.job('default', timeout=60, result_ttl=24*3600) +def create_remote_project_worker(profile_id=None, project_id=None): + profile = UserProfile.objects.get(pk=profile_id) + url = urljoin(profile.pybossa_url, "/api/project") + params = {'api_key': profile.pybossa_api_key} + + project = Project.objects.get(pk=project_id) + bundlePath = getPresenterPath(project.task_type) + payload = { + "name": project.name, + "short_name": project.short_name, + "description": "Add project description here.", + "info": { + "task_presenter": getPresenter(bundlePath) + } + } + headers = {'content-type': 'application/json'} + resp = requests.post(url, params=params, + headers=headers, timeout=30, + json=payload) + result = resp.json() + if resp.status_code / 100 == 2 and result.get('id'): + # if Pybossa reports success, then we expect these fields to be present + # save info about where this project can be found remotely + project.pybossa_url = profile.pybossa_url + project.pybossa_id = int(result.get('id')) + project.pybossa_owner_id = int(result.get('owner_id')) + project.pybossa_secret_key = result.get('secret_key', '') + project.pybossa_created = iso8601.parse_date(result.get('created')) + project.save() + # delete our large task_presenter from the result so it isn't logged by Python-RQ + result['info']['task_presenter'] = "" + else: + # our large task_presenter is embedded in the exception_msg, + # so truncate the message + result['exception_msg'] = result['exception_msg'][:256] + return result + +def delete_remote_project(profile, project): + """ + This functions enqueues the worker to delete a project on a remote + Pybossa server. + """ + delete_remote_project_worker.delay(profile_id=profile.id, project_id=project.id) + +@django_rq.job('default', timeout=60, result_ttl=24*3600) +def delete_remote_project_worker(profile_id=None, project_id=None): + profile = UserProfile.objects.get(pk=profile_id) + project = Project.objects.get(pk=project_id) + headers = {'content-type': 'application/json'} + result = { + "deleted": False, + "short_name": project.short_name, + "task_type": project.task_type, + "url": project.getURL() + } + if not project.pybossa_id: + result["deleted"] = False + result["error"] = "No id for remote project." + return result + url = urljoin(profile.pybossa_url, "/api/project/%d" % (project.pybossa_id)) + params = {'api_key': profile.pybossa_api_key } + resp = requests.delete(url, params=params, headers=headers, timeout=30) + if resp.status_code / 100 == 2: + result["deleted"] = True + project.pybossa_url = "" + project.pybossa_id = None + project.pybossa_owner_id = None + project.pybossa_secret_key = "" + project.pybossa_created = None + project.save() + # Pybossa has cascade deleted any tasks on the server - get rid of + # our references to those tasks + project.tasks.all().delete() + else: + result = resp.json() # Pybossa only returns JSON if DELETE has error + return result + +def getPresenterPath(task_type): + if task_type == "HLTR": + return settings.HIGHLIGHTER_BUNDLE_JS + elif task_type == "QUIZ": + return settings.QUIZ_BUNDLE_JS + else: + raise InvalidTaskType("Project task type must be 'HLTR' or 'QUIZ'") + +def getPresenter(bundlePath): + if os.path.isfile(bundlePath): + with open(bundlePath) as f: + js = f.read() + return "" % js + else: + raise FileNotFound("Task Presenter bundle.js not found: %s" % (bundlePath)) + +# Use our default user and projects to exercise the API. +def testCreateRemoteProjects(): + researchers = init_defaults.createThresherGroup() + profile = init_defaults.createNick(groups=[researchers]) + + hproject = init_defaults.createHighlighterProject() + create_remote_project(profile, hproject) + + qproject = init_defaults.createQuizProject() + create_remote_project(profile, qproject) + +def testDeleteRemoteProjects(): + researchers = init_defaults.createThresherGroup() + profile = init_defaults.createNick(groups=[researchers]) + + hproject = init_defaults.createHighlighterProject() + delete_remote_project(profile, hproject) + + qproject = init_defaults.createQuizProject() + delete_remote_project(profile, qproject) + +def testCreateRemoteHighlighterTasks(): + # Send primary keys through Django-RQ, not Models and Querysets + profile_id = UserProfile.objects.get(user__username="nick").id + article_ids = list(Article.objects.all().values_list('id', flat=True)) + topic_ids = list(Topic.objects.filter(parent=None) + .values_list('id', flat=True)) + project_id = Project.objects.get(name__exact="Deciding Force Highlighter").id + generate_highlight_tasks_worker.delay(profile_id=profile_id, + article_ids=article_ids, + topic_ids=topic_ids, + project_id=project_id) + +@django_rq.job('default', timeout=60, result_ttl=24*3600) +def generate_highlight_tasks_worker(profile_id=None, + article_ids=None, + topic_ids=None, + project_id=None): + startCount = len(connection.queries) + articles = Article.objects.filter(id__in=article_ids) + topics = Topic.objects.filter(id__in=topic_ids) + project = Project.objects.get(pk=project_id) + if not project.pybossa_id: + return {"error_msg": "Project '%s' must be created remotely " + "before creating tasks for it." % (project.short_name)} + if project.task_type != "HLTR": + return {"error_msg": "Project type must be 'HLTR', " + "found '%s'" % (project.task_type)} + tasks = collectHighlightTasks(articles, topics, project) + tasks = tasks[:5] # DEBUG + for task in tasks: + create_remote_task_worker.delay(profile_id=profile_id, + project_id=project_id, + task=task) + return ({ + "task_type": "HLTR", + "generatedTasks": len(tasks), + "numberOfQueries": len(connection.queries) - startCount + }) + +def testCreateRemoteQuizTasks(): + # Send primary keys through Django-RQ, not Models + profile_id = UserProfile.objects.get(user__username="nick").id + topic_id = Topic.objects.get(name__exact="Protester").id + project_id = Project.objects.get(name__exact="Deciding Force Quiz").id + generate_quiz_tasks_worker.delay(profile_id=profile_id, + topic_id=topic_id, + project_id=project_id) + +@django_rq.job('default', timeout=60, result_ttl=24*3600) +def generate_quiz_tasks_worker(profile_id=None, topic_id=None, project_id=None): + startCount = len(connection.queries) + topic = Topic.objects.get(pk=topic_id) + project = Project.objects.get(pk=project_id) + if not project.pybossa_id: + return {"error_msg": "Project '%s' must be created remotely " + "before creating tasks for it." % (project.short_name)} + if project.task_type != "QUIZ": + return {"error_msg": "Project type must be 'QUIZ', " + "found '%s'" % (project.task_type)} + tasks = collectQuizTasks(topic, project) + tasks = tasks[:5] # DEBUG + for task in tasks: + create_remote_task_worker.delay(profile_id=profile_id, + project_id=project_id, + task=task) + return ({ + "task_type": "QUIZ", + "generatedTasks": len(tasks), + "numberOfQueries": len(connection.queries) - startCount + }) + +@django_rq.job('default', timeout=60, result_ttl=24*3600) +def create_remote_task_worker(profile_id=None, project_id=None, task=None, n_answers=1): + profile = UserProfile.objects.get(pk=profile_id) + url = urljoin(profile.pybossa_url, "/api/task") + params = {'api_key': profile.pybossa_api_key} + + project = Project.objects.get(pk=project_id) + + payload = { + "project_id": project.pybossa_id, + "info": task, + "calibration": 0, + "priority_0": 0.0, + "n_answers": n_answers, + "quorum": 0 + } + + headers = {'content-type': 'application/json'} + resp = requests.post(url, params=params, + headers=headers, timeout=30, + json=payload) + result = resp.json() + if resp.status_code / 100 == 2 and result.get('id'): + # if Pybossa reports success, then we expect these fields to be present + Task( + project_id = project_id, + task_type = project.task_type, + info = task, + pybossa_id = int(result.get('id')), + pybossa_project_id = int(result.get('project_id')), + pybossa_created = iso8601.parse_date(result.get('created')), + pybossa_state = result.get('state') + ).save() + # are task info was already logged by Python RQ as an incoming parameter + result['info'] = "" + else: + # our large info item may be embedded in the exception_msg, + # so truncate the message + result['exception_msg'] = result['exception_msg'][:256] + return result + + +if __name__ == '__main__': + logger.info("Highlighter bundle: %s" % settings.HIGHLIGHTER_BUNDLE_JS) + logger.info("Quiz bundle: %s" % settings.QUIZ_BUNDLE_JS) + + testCreateRemoteProjects() + testCreateRemoteHighlighterTasks() + testCreateRemoteQuizTasks() diff --git a/data/rqworker.py b/data/rqworker.py new file mode 100644 index 0000000..9610e75 --- /dev/null +++ b/data/rqworker.py @@ -0,0 +1,13 @@ +import os +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "thresher_backend.settings") + +# Do time consuming Django setup before forking individual worker processes. +import django +django.setup() +from django.conf import settings + +from django_rq import get_worker + +if __name__ == '__main__': + worker = get_worker() + worker.work() diff --git a/docker/thresher_api/requirements.txt b/docker/thresher_api/requirements.txt index 2afaf3b..b33376b 100644 --- a/docker/thresher_api/requirements.txt +++ b/docker/thresher_api/requirements.txt @@ -6,9 +6,9 @@ django-filter==1.0.1 django-rq==0.9.4 djangorestframework==3.5.4 gunicorn==19.6.0 +iso8601==0.1.11 Markdown==2.6.8 psycopg2==2.6.2 -pybossa-client==1.1.1 pytz==2016.10 redis==2.10.5 requests==2.13.0 diff --git a/docs/generate-models-diagram.sh b/docs/generate-models-diagram.sh new file mode 100755 index 0000000..ca9a6de --- /dev/null +++ b/docs/generate-models-diagram.sh @@ -0,0 +1,4 @@ +#!/bin/sh +export DJANGO_SETTINGS_MODULE=thresher_backend.graphviz_settings +python manage.py graph_models --pygraphviz -o docs/thresher_models.png thresher +python manage.py graph_models --pygraphviz -o docs/thresher_models.svg thresher diff --git a/docs/graphviz-reqs.txt b/docs/graphviz-reqs.txt new file mode 100644 index 0000000..9af2caa --- /dev/null +++ b/docs/graphviz-reqs.txt @@ -0,0 +1,2 @@ +django-extensions==1.7.7 +pygraphviz==1.3.1 diff --git a/docs/install-graphviz.sh b/docs/install-graphviz.sh new file mode 100755 index 0000000..ae5a632 --- /dev/null +++ b/docs/install-graphviz.sh @@ -0,0 +1,3 @@ +#!/bin/bash +# For Ubuntu 14.04 +sudo apt-get install graphviz libgv-python libgraphviz-dev diff --git a/docs/thresher_models.png b/docs/thresher_models.png index 44d799e..c6bad98 100644 Binary files a/docs/thresher_models.png and b/docs/thresher_models.png differ diff --git a/docs/thresher_models.svg b/docs/thresher_models.svg index 78c12e5..cb617ba 100644 --- a/docs/thresher_models.svg +++ b/docs/thresher_models.svg @@ -4,458 +4,573 @@ - - + + model_graph - + thresher_models_UserProfile - - -     -    UserProfile     -     -id -     -     -AutoField -     -     -user -     -     -OneToOneField (id) -     -     -accuracy_score -     -     -DecimalField -     -     -experience_score -     -     -DecimalField -     + + +     +    UserProfile     +     +id +     +     +AutoField +     +     +user +     +     +OneToOneField (id) +     +     +accuracy_score +     +     +DecimalField +     +     +experience_score +     +     +DecimalField +     +     +pybossa_api_key +     +     +CharField +     +     +pybossa_url +     +     +URLField +     django_contrib_auth_models_User - - -   -User -   + + +   +User +   thresher_models_UserProfile->django_contrib_auth_models_User - -user (userprofile) + +user (userprofile) thresher_models_Project - - -     -    Project     -     -id -     -     -AutoField -     -     -instructions -     -     -TextField -     -     -name -     -     -CharField -     + + +     +    Project     +     +id +     +     +AutoField +     +     +instructions +     +     +TextField +     +     +name +     +     +CharField +     +     +pybossa_created +     +     +DateTimeField +     +     +pybossa_id +     +     +IntegerField +     +     +pybossa_owner_id +     +     +IntegerField +     +     +pybossa_secret_key +     +     +CharField +     +     +pybossa_url +     +     +URLField +     +     +short_name +     +     +CharField +     +     +task_type +     +     +CharField +     + + +thresher_models_Task + + +     +    Task     +     +id +     +     +AutoField +     +     +project +     +     +ForeignKey (id) +     +     +info +     +     +JSONField +     +     +pybossa_created +     +     +DateTimeField +     +     +pybossa_id +     +     +IntegerField +     +     +pybossa_project_id +     +     +IntegerField +     +     +pybossa_state +     +     +CharField +     +     +task_type +     +     +CharField +     + + +thresher_models_Task->thresher_models_Project + + +project (tasks) -thresher_models_Article - - -     -    Article     -     -id -     -     -AutoField -     -     -annotators -     -     -CharField -     -     -article_number -     -     -IntegerField -     -     -city_published -     -     -CharField -     -     -date_published -     -     -DateField -     -     -parse_version -     -     -CharField -     -     -periodical -     -     -CharField -     -     -periodical_code -     -     -IntegerField -     -     -state_published -     -     -CharField -     -     -text -     -     -TextField -     +thresher_models_Article + + +     +    Article     +     +id +     +     +AutoField +     +     +annotators +     +     +CharField +     +     +article_number +     +     +IntegerField +     +     +city_published +     +     +CharField +     +     +date_published +     +     +DateField +     +     +parse_version +     +     +CharField +     +     +periodical +     +     +CharField +     +     +periodical_code +     +     +IntegerField +     +     +state_published +     +     +CharField +     +     +text +     +     +TextField +     -thresher_models_Topic - - -     -    Topic     -     -id -     -     -AutoField -     -     -parent -     -     -ForeignKey (id) -     -     -glossary -     -     -TextField -     -     -instructions -     -     -TextField -     -     -name -     -     -TextField -     -     -order -     -     -IntegerField -     +thresher_models_Topic + + +     +    Topic     +     +id +     +     +AutoField +     +     +parent +     +     +ForeignKey (id) +     +     +glossary +     +     +TextField +     +     +instructions +     +     +TextField +     +     +name +     +     +TextField +     +     +order +     +     +IntegerField +     -thresher_models_Topic->thresher_models_Topic - - -parent (subtopics) +thresher_models_Topic->thresher_models_Topic + + +parent (subtopics) -thresher_models_Question - - -     -    Question     -     -id -     -     -AutoField -     -     -default_next -     -     -ForeignKey (id) -     -     -topic -     -     -ForeignKey (id) -     -     -contingency -     -     -BooleanField -     -     -question_number -     -     -IntegerField -     -     -question_text -     -     -TextField -     -     -question_type -     -     -CharField -     +thresher_models_Question + + +     +    Question     +     +id +     +     +AutoField +     +     +default_next +     +     +ForeignKey (id) +     +     +topic +     +     +ForeignKey (id) +     +     +contingency +     +     +BooleanField +     +     +question_number +     +     +IntegerField +     +     +question_text +     +     +TextField +     +     +question_type +     +     +CharField +     -thresher_models_Question->thresher_models_Topic - - -topic (questions) +thresher_models_Question->thresher_models_Topic + + +topic (questions) -thresher_models_Question->thresher_models_Question - - -default_next (next_default) +thresher_models_Question->thresher_models_Question + + +default_next (next_default) -thresher_models_Answer - - -     -    Answer     -     -id -     -     -AutoField -     -     -next_question -     -     -ForeignKey (id) -     -     -question -     -     -ForeignKey (id) -     -     -answer_content -     -     -TextField -     -     -answer_number -     -     -IntegerField -     +thresher_models_Answer + + +     +    Answer     +     +id +     +     +AutoField +     +     +next_question +     +     +ForeignKey (id) +     +     +question +     +     +ForeignKey (id) +     +     +answer_content +     +     +TextField +     +     +answer_number +     +     +IntegerField +     -thresher_models_Answer->thresher_models_Question - - -question (answers) +thresher_models_Answer->thresher_models_Question + + +question (answers) -thresher_models_Answer->thresher_models_Question - - -next_question (question_next) +thresher_models_Answer->thresher_models_Question + + +next_question (question_next) -thresher_models_ArticleHighlight - - -     -    ArticleHighlight     -     -id -     -     -AutoField -     -     -article -     -     -ForeignKey (id) -     -     -created_by -     -     -ForeignKey (id) -     -     -highlight_source -     -     -CharField -     +thresher_models_ArticleHighlight + + +     +    ArticleHighlight     +     +id +     +     +AutoField +     +     +article +     +     +ForeignKey (id) +     +     +created_by +     +     +ForeignKey (id) +     +     +highlight_source +     +     +CharField +     -thresher_models_ArticleHighlight->thresher_models_UserProfile - - -created_by (users_highlights) +thresher_models_ArticleHighlight->thresher_models_UserProfile + + +created_by (users_highlights) -thresher_models_ArticleHighlight->thresher_models_Article - - -article (users_highlights) +thresher_models_ArticleHighlight->thresher_models_Article + + +article (users_highlights) -thresher_models_HighlightGroup - - -     -    HighlightGroup     -     -id -     -     -AutoField -     -     -article_highlight -     -     -ForeignKey (id) -     -     -topic -     -     -ForeignKey (id) -     -     -case_number -     -     -IntegerField -     -     -highlight_text -     -     -TextField -     -     -offsets -     -     -TextField -     +thresher_models_HighlightGroup + + +     +    HighlightGroup     +     +id +     +     +AutoField +     +     +article_highlight +     +     +ForeignKey (id) +     +     +topic +     +     +ForeignKey (id) +     +     +case_number +     +     +IntegerField +     +     +highlight_text +     +     +TextField +     +     +offsets +     +     +TextField +     -thresher_models_HighlightGroup->thresher_models_Topic - - -topic (highlights) +thresher_models_HighlightGroup->thresher_models_Topic + + +topic (highlights) -thresher_models_HighlightGroup->thresher_models_ArticleHighlight - - -article_highlight (highlights) +thresher_models_HighlightGroup->thresher_models_ArticleHighlight + + +article_highlight (highlights) -thresher_models_SubmittedAnswer - - -     -    SubmittedAnswer     -     -id -     -     -AutoField -     -     -highlight_group -     -     -ForeignKey (id) -     -     -question -     -     -ForeignKey (id) -     -     -user_submitted -     -     -ForeignKey (id) -     -     -answer -     -     -TextField -     +thresher_models_SubmittedAnswer + + +     +    SubmittedAnswer     +     +id +     +     +AutoField +     +     +highlight_group +     +     +ForeignKey (id) +     +     +question +     +     +ForeignKey (id) +     +     +user_submitted +     +     +ForeignKey (id) +     +     +answer +     +     +TextField +     -thresher_models_SubmittedAnswer->thresher_models_UserProfile - - -user_submitted (submitted_answers) +thresher_models_SubmittedAnswer->thresher_models_UserProfile + + +user_submitted (submitted_answers) -thresher_models_SubmittedAnswer->thresher_models_Question - - -question (submitted_answers) +thresher_models_SubmittedAnswer->thresher_models_Question + + +question (submitted_answers) -thresher_models_SubmittedAnswer->thresher_models_HighlightGroup - - -highlight_group (submitted_answers) +thresher_models_SubmittedAnswer->thresher_models_HighlightGroup + + +highlight_group (submitted_answers) diff --git a/run_thresher_worker.sh b/run_thresher_worker.sh new file mode 100755 index 0000000..f1161c3 --- /dev/null +++ b/run_thresher_worker.sh @@ -0,0 +1,2 @@ +#!/bin/bash +PYTHONPATH="." python data/rqworker.py diff --git a/thresher/migrations/0011_auto_20170224_2010.py b/thresher/migrations/0011_auto_20170224_2010.py new file mode 100644 index 0000000..0df76c2 --- /dev/null +++ b/thresher/migrations/0011_auto_20170224_2010.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-02-24 20:10 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('thresher', '0010_auto_20170222_1827'), + ] + + operations = [ + migrations.AlterField( + model_name='highlightgroup', + name='case_number', + field=models.IntegerField(db_index=True), + ), + ] diff --git a/thresher/migrations/0012_auto_20170226_1938.py b/thresher/migrations/0012_auto_20170226_1938.py new file mode 100644 index 0000000..1de3413 --- /dev/null +++ b/thresher/migrations/0012_auto_20170226_1938.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-02-26 19:38 +from __future__ import unicode_literals + +import django.contrib.postgres.fields.jsonb +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('thresher', '0011_auto_20170224_2010'), + ] + + operations = [ + migrations.CreateModel( + name='Task', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('task_type', models.CharField(max_length=16)), + ('components', django.contrib.postgres.fields.jsonb.JSONField(default={b'task_type': b''})), + ('pybossa_id', models.IntegerField()), + ('pybossa_project_id', models.IntegerField()), + ('created', models.DateTimeField()), + ('state', models.CharField(max_length=16)), + ], + ), + migrations.AddField( + model_name='project', + name='pybossa_id', + field=models.IntegerField(null=True), + ), + migrations.AddField( + model_name='project', + name='task_type', + field=models.CharField(choices=[(b'HLTR', b'Highlighter'), (b'QUIZ', b'Quiz')], default=b'HLTR', max_length=4), + ), + migrations.AddField( + model_name='task', + name='project', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tasks', to='thresher.Project'), + ), + ] diff --git a/thresher/migrations/0013_auto_20170227_2029.py b/thresher/migrations/0013_auto_20170227_2029.py new file mode 100644 index 0000000..35b5eb5 --- /dev/null +++ b/thresher/migrations/0013_auto_20170227_2029.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-02-27 20:29 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('thresher', '0012_auto_20170226_1938'), + ] + + operations = [ + migrations.AddField( + model_name='project', + name='pybossa_created', + field=models.DateTimeField(null=True), + ), + migrations.AddField( + model_name='project', + name='pybossa_owner_id', + field=models.IntegerField(null=True), + ), + migrations.AddField( + model_name='project', + name='pybossa_secret_key', + field=models.CharField(blank=True, default=b'', max_length=36), + ), + migrations.AddField( + model_name='project', + name='pybossa_url', + field=models.URLField(blank=True, default=b''), + ), + ] diff --git a/thresher/migrations/0014_auto_20170228_0042.py b/thresher/migrations/0014_auto_20170228_0042.py new file mode 100644 index 0000000..68acbc7 --- /dev/null +++ b/thresher/migrations/0014_auto_20170228_0042.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-02-28 00:42 +from __future__ import unicode_literals + +import django.contrib.postgres.fields.jsonb +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('thresher', '0013_auto_20170227_2029'), + ] + + operations = [ + migrations.RenameField( + model_name='task', + old_name='created', + new_name='pybossa_created', + ), + migrations.RenameField( + model_name='task', + old_name='state', + new_name='pybossa_state', + ), + migrations.RemoveField( + model_name='task', + name='components', + ), + migrations.AddField( + model_name='task', + name='info', + field=django.contrib.postgres.fields.jsonb.JSONField(default=dict), + preserve_default=False, + ), + migrations.AlterField( + model_name='task', + name='task_type', + field=models.CharField(max_length=4), + ), + ] diff --git a/thresher/models.py b/thresher/models.py index b6a0dfc..611780c 100644 --- a/thresher/models.py +++ b/thresher/models.py @@ -1,6 +1,8 @@ from django.db import models from django.contrib.auth.models import User +from django.contrib.postgres.fields import JSONField from django.core.exceptions import ValidationError +from requests.compat import urljoin # User doing the annotating - uses OneToOneFields to add attributes to django.contrib.auth.User class UserProfile(models.Model): @@ -17,16 +19,59 @@ class UserProfile(models.Model): def __unicode__(self): return "%s" % self.user.username +TASK_TYPE = ( + ('HLTR', 'Highlighter'), + ('QUIZ', 'Quiz'), +) class Project(models.Model): - # max_length from Pybossa db + # max_length matches Pybossa db name = models.CharField(max_length=255) short_name = models.CharField(max_length=255) instructions = models.TextField() + task_type = models.CharField(max_length=4, + choices=TASK_TYPE, default="HLTR") + # following fields are null unless remote Pybossa project has been created + pybossa_url = models.URLField(blank=True, default="") + pybossa_id = models.IntegerField(null=True) + pybossa_owner_id = models.IntegerField(null=True) + # UUID format is 36 chars including hyphens + pybossa_secret_key = models.CharField(blank=True, max_length=36, default="") + pybossa_created = models.DateTimeField(null=True) def __unicode__(self): return "id %d: %s" % (self.id, self.name) + def getURL(self): + if (self.pybossa_url): + return urljoin(self.pybossa_url, "project/%s/" % (self.short_name)) + else: + return "" + +class Task(models.Model): + """ + These task records are created to record successful exports to Pybossa + so will always have Pybossa id available + """ + project = models.ForeignKey(Project, related_name="tasks", + on_delete=models.CASCADE) + task_type = models.CharField(max_length=4) # 'HLTR' or 'QUIZ' + # components provides either the article id and topic_ids used for a highlight task + # or the root topic id, article id, and case number used for a quiz task + info = JSONField() + pybossa_id = models.IntegerField() + pybossa_project_id = models.IntegerField() + pybossa_created = models.DateTimeField() + pybossa_state = models.CharField(max_length=16) # 'ongoing' or 'completed' + + def __unicode__(self): + return "id %d task type: %s pybossa_id: %d" % (self.id, self.task_type, self.pybossa_project_id) + + def getURL(self): + if task.project.pybossa_url: + return urljoin(task.project.pybossa_url, "task/%d/" % (self.pybossa_id)) + else: + return "" # Articles containing text for analysis class Article(models.Model): @@ -194,7 +239,7 @@ class HighlightGroup(models.Model): highlight_text = models.TextField() # User assigned case number for this text - case_number = models.IntegerField() + case_number = models.IntegerField(db_index=True) # The topic of this text topic = models.ForeignKey(Topic, related_name="highlights", diff --git a/thresher/serializers.py b/thresher/serializers.py index 22fe63c..342343d 100644 --- a/thresher/serializers.py +++ b/thresher/serializers.py @@ -98,7 +98,7 @@ class UserProfileSerializer(serializers.ModelSerializer): # Custom fields experience_score = serializers.DecimalField(max_digits=5, decimal_places=3) accuracy_score = serializers.DecimalField(max_digits=5, decimal_places=3) - article_highlights = ArticleHighlightSerializer(many=True) + users_highlights = ArticleHighlightSerializer(many=True) submitted_answers = SubmittedAnswerSerializer(many=True) def get_username(self, obj): @@ -107,7 +107,7 @@ def get_username(self, obj): class Meta: model = UserProfile fields = ('id', 'username', - 'experience_score', 'accuracy_score', 'article_highlights', + 'experience_score', 'accuracy_score', 'users_highlights', 'submitted_answers') diff --git a/thresher/views.py b/thresher/views.py index 6b00d2c..aedb6dd 100644 --- a/thresher/views.py +++ b/thresher/views.py @@ -114,6 +114,18 @@ def next_question(request, id, ans_num): serializer = QuestionSerializer(next_question, many=False) return Response(serializer.data) + +def collectHighlightTasks(articles=None, topics=None, project=None): + + project_data = ProjectSerializer(project, many=False).data + topics_data = RootTopicSerializer(topics, many=True).data + return [{ "project": project_data, + "topics": topics_data, + "article": + ArticleSerializer(article, many=False).data + } for article in articles ] + + class HighlightTasks(GenericAPIView): # GenericAPIView assists by providing the pagination settings # and helpful pagination API @@ -134,32 +146,24 @@ class HighlightTasks(GenericAPIView): def get(self, request, *args, **kwargs): + # GenericAPIView passes kwargs to Serializer in get_serializer + # But "?format=json&page=2" works without it. + # kwargs = {'context': self.get_serializer_context()} + # Pagination code is derived from rest_framework.mixins.ListModelMixin # and rest_framework.generics.GenericAPIView:get_serializer - project = Project.objects.get(name="Deciding Force") + project = Project.objects.get(name__exact="Deciding Force Highlighter") topics = Topic.objects.filter(parent=None) articles = self.filter_queryset(self.get_queryset()) page = self.paginate_queryset(articles) if page is not None: - tasks = self.collectTaskList(page, project, topics) + tasks = collectHighlightTasks(page, topics, project) return self.get_paginated_response(tasks) - tasks = self.collectTaskList(articles, project, topics) + tasks = collectHighlightTasks(articles, topics, project) return Response(tasks) - def collectTaskList(self, articles, project, topics): - # next line processes features like ?format=json - kwargs = {'context': self.get_serializer_context()} - - project_data = ProjectSerializer(project, many=False, **kwargs).data - topics_data = RootTopicSerializer(topics, many=True, **kwargs).data - return [{ "project": project_data, - "topics": topics_data, - "article": - ArticleSerializer(article, many=False, **kwargs).data - } for article in articles ] - class HighlightTasksNoPage(HighlightTasks): """ @@ -222,25 +226,31 @@ def collectQuizTasks(topic=None, project=None): articles = (Article.objects .filter(users_highlights__highlights__topic=topic) .prefetch_related(fetchHighlights)) - # Export 10 for development (Add endpoint to export all for production.) - articles = articles.order_by("id")[:10] + + articles = articles.order_by("id") + + project_data = ProjectSerializer(project, many=False).data + topictree_data = TopicSerializer(topictree, many=True).data # With the prefetching config above, the loops below will - # be hitting caches. + # be hitting caches. Only 8 queries should be issued against 8 tables, + # i.e. The query count will not be a function of number of rows returned. for article in articles: # Our prefetched highlightsForTopic is nested under # the ArticleHightlight record, in HighlightGroup # Not expecting more than one ArticleHighlight record # but safest to code as if there could be more than one. + + # TODO: Need to further split task by case_number here. highlights = [ hg for ah in article.users_highlights.all() for hg in ah.highlightsForTopic ] taskList.append({ - "project": ProjectSerializer(project, many=False).data, + "project": project_data, "topTopicId": topic.id, - "topictree": TopicSerializer(topictree, many=True).data, + "topictree": topictree_data, "article": ArticleSerializer(article, many=False).data, "highlights": HighlightGroupSerializer( highlights, many=True).data, @@ -265,8 +275,11 @@ def quiz_tasks(request): if request.method == 'GET': taskList = collectQuizTasks( topic = Topic.objects.get(name__exact="Protester"), - project = Project.objects.get(name__exact="Deciding Force") + project = Project.objects.get(name__exact="Deciding Force Quiz") ) + # TODO: this needs to be changed to a paginated endpoint for MockQuiz to use + # Export first 10 for now + taskList = taskList[:10] return Response(taskList) # Register our viewsets with the router diff --git a/thresher_backend/graphviz_settings.py b/thresher_backend/graphviz_settings.py new file mode 100644 index 0000000..91573df --- /dev/null +++ b/thresher_backend/graphviz_settings.py @@ -0,0 +1,8 @@ +from .settings import * + +installed_apps = list(INSTALLED_APPS) +installed_apps.append('django_extensions') +INSTALLED_APPS = tuple(installed_apps) + +GRAPH_MODELS = { +} diff --git a/thresher_backend/settings.py b/thresher_backend/settings.py index bb32830..05661f2 100644 --- a/thresher_backend/settings.py +++ b/thresher_backend/settings.py @@ -15,6 +15,8 @@ # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +HIGHLIGHTER_BUNDLE_JS = os.path.join(BASE_DIR, 'pbs-highlighter/bundle.js') +QUIZ_BUNDLE_JS = os.path.join(BASE_DIR, 'pbs-quiz/bundle.js') # REST Framework settings REST_FRAMEWORK = {