Skip to content

Commit

Permalink
Create projects and tasks on a remote Pybossa server using Django-RQ (#…
Browse files Browse the repository at this point in the history
…67)

* Remove stray comma from bower.json

* Extract task generation into standalone functions.

Migration to add db index to [HighlightGroup]case_number.

* Initialize separate projects for Highlighter and Quiz.

* Add paths for Highlighter and Quiz bundles to Django settings.

* Add pybossa_id field to Project model. Add Task model and migration.

Initial code for adding and deleting Project records on Pybossa.

* Add script and settings for generating model diagrams. Update diagrams.

* Use new functions to create default projects.

* Add files to help install graphviz on Ubuntu.

* Creating projects with task presenters included.

* Update Project and Task models. Replace previous migration adding Task model.

Add iso8601 library to Thresher requirements.

* Use Project.task_type to select task presenter bundle.

* Updated UserProfileSerializer field name 'users_highlights'

* Queue creation and deletion of remote Pybossa projects using Django-RQ.

* Update thresher model diagrams.

* Revise some comments in pybossa_api.py

* Get test project records filtering by short_name only.

* Generate tasks using django-rq worker.

* Add Pybossa info fields to Project model. Add migration.

Save info on remote project creation.

Instead of logging in workers, return useful status dictionaries to django-RQ.

* Update thresher model diagrams.

* Use kwargs for RQ workers to document args in RQ job detail view.

* Now creating Highlighter and Quiz tasks on remote Pybossa server.

* Update thresher model diagrams.

* pybossa-client doesn't work with pickle or Django-RQ. Remove.

* Add temporary script for testing Django RQ worker.
  • Loading branch information
normangilmore authored Feb 28, 2017
1 parent 4c3d657 commit 6bf0289
Show file tree
Hide file tree
Showing 21 changed files with 1,078 additions and 438 deletions.
2 changes: 1 addition & 1 deletion bower.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
"name": "annotator-custom-editor",
"dependencies": {
"bootstrap": "~3.3.5",
"modernizr": "~2.8.3",
"modernizr": "~2.8.3"
}
}
34 changes: 26 additions & 8 deletions data/init_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,42 @@ def createNick(username="nick", email="[email protected]", password="bidsatdoe",
u.save()
profile = UserProfile.objects.get_or_create(
user=u,
defaults = {"experience_score": 0.98, "accuracy_score": 0.99}
defaults = {"pybossa_url": "http://crowdcrafting.org",
"experience_score": 0.98, "accuracy_score": 0.99}
)[0]
logger.info("Created researcher '%s', password '%s'." % (username, password))
return u.userprofile

def createDecidingForce():
def createHighlighterProject():
(project, created) = Project.objects.get_or_create(
name="Deciding Force",
instructions="This project analyzes media " +
"descriptions of interactions " +
"between police and protestors."
short_name="DecidingForceHighlighter",
defaults = {
"name": "Deciding Force Highlighter",
"task_type": "HLTR",
"instructions": "Highlight passages in articles that discuss " +
"the topics shown."
}
)
if created:
logger.info("Created project 'Deciding Force'")
logger.info("Created project '%s'" % project.name)
return project

def createQuizProject():
(project, created) = Project.objects.get_or_create(
short_name="DecidingForceQuiz",
defaults = {
"name": "Deciding Force Quiz",
"task_type": "QUIZ",
"instructions": "Answer questions about short text passages."
}
)
if created:
logger.info("Created project '%s'" % project.name)
return project

if __name__ == '__main__':
createSuperUser()
researchers = createThresherGroup()
createNick(groups=[researchers])
createDecidingForce()
createHighlighterProject()
createQuizProject()
3 changes: 2 additions & 1 deletion data/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,8 @@ def load_args():

if __name__ == '__main__':
init_defaults.createSuperUser()
init_defaults.createDecidingForce()
init_defaults.createHighlighterProject()
init_defaults.createQuizProject()
researchers = init_defaults.createThresherGroup()
created_by = init_defaults.createNick(groups=[researchers])
args = load_args()
Expand Down
274 changes: 274 additions & 0 deletions data/pybossa_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "thresher_backend.settings")
import django
django.setup()
from django.db import connection
from django.conf import settings

import logging
logger = logging.getLogger(__name__)

import json
import requests
from requests.compat import urljoin
import iso8601
import django_rq

from thresher.models import Article, Topic, Project, UserProfile, Task
from thresher.views import collectHighlightTasks, collectQuizTasks
from data import init_defaults

class InvalidTaskType(Exception):
pass

class FileNotFound(Exception):
pass


def create_remote_project(profile, project):
"""
This functions enqueues the worker to create a project on a remote
Pybossa server.
"""
# This enqueues the worker with the primary keys for profile
# and project as it would be unwise to pickle and unpickle Django
# models for later use.
# Note: If for some reason the profile or project records disappear
# by the time the worker runs, then we WANT the worker to fail.
create_remote_project_worker.delay(profile_id=profile.id, project_id=project.id)

@django_rq.job('default', timeout=60, result_ttl=24*3600)
def create_remote_project_worker(profile_id=None, project_id=None):
profile = UserProfile.objects.get(pk=profile_id)
url = urljoin(profile.pybossa_url, "/api/project")
params = {'api_key': profile.pybossa_api_key}

project = Project.objects.get(pk=project_id)
bundlePath = getPresenterPath(project.task_type)
payload = {
"name": project.name,
"short_name": project.short_name,
"description": "Add project description here.",
"info": {
"task_presenter": getPresenter(bundlePath)
}
}
headers = {'content-type': 'application/json'}
resp = requests.post(url, params=params,
headers=headers, timeout=30,
json=payload)
result = resp.json()
if resp.status_code / 100 == 2 and result.get('id'):
# if Pybossa reports success, then we expect these fields to be present
# save info about where this project can be found remotely
project.pybossa_url = profile.pybossa_url
project.pybossa_id = int(result.get('id'))
project.pybossa_owner_id = int(result.get('owner_id'))
project.pybossa_secret_key = result.get('secret_key', '')
project.pybossa_created = iso8601.parse_date(result.get('created'))
project.save()
# delete our large task_presenter from the result so it isn't logged by Python-RQ
result['info']['task_presenter'] = ""
else:
# our large task_presenter is embedded in the exception_msg,
# so truncate the message
result['exception_msg'] = result['exception_msg'][:256]
return result

def delete_remote_project(profile, project):
"""
This functions enqueues the worker to delete a project on a remote
Pybossa server.
"""
delete_remote_project_worker.delay(profile_id=profile.id, project_id=project.id)

@django_rq.job('default', timeout=60, result_ttl=24*3600)
def delete_remote_project_worker(profile_id=None, project_id=None):
profile = UserProfile.objects.get(pk=profile_id)
project = Project.objects.get(pk=project_id)
headers = {'content-type': 'application/json'}
result = {
"deleted": False,
"short_name": project.short_name,
"task_type": project.task_type,
"url": project.getURL()
}
if not project.pybossa_id:
result["deleted"] = False
result["error"] = "No id for remote project."
return result
url = urljoin(profile.pybossa_url, "/api/project/%d" % (project.pybossa_id))
params = {'api_key': profile.pybossa_api_key }
resp = requests.delete(url, params=params, headers=headers, timeout=30)
if resp.status_code / 100 == 2:
result["deleted"] = True
project.pybossa_url = ""
project.pybossa_id = None
project.pybossa_owner_id = None
project.pybossa_secret_key = ""
project.pybossa_created = None
project.save()
# Pybossa has cascade deleted any tasks on the server - get rid of
# our references to those tasks
project.tasks.all().delete()
else:
result = resp.json() # Pybossa only returns JSON if DELETE has error
return result

def getPresenterPath(task_type):
if task_type == "HLTR":
return settings.HIGHLIGHTER_BUNDLE_JS
elif task_type == "QUIZ":
return settings.QUIZ_BUNDLE_JS
else:
raise InvalidTaskType("Project task type must be 'HLTR' or 'QUIZ'")

def getPresenter(bundlePath):
if os.path.isfile(bundlePath):
with open(bundlePath) as f:
js = f.read()
return "<script>\n%s\n</script>" % js
else:
raise FileNotFound("Task Presenter bundle.js not found: %s" % (bundlePath))

# Use our default user and projects to exercise the API.
def testCreateRemoteProjects():
researchers = init_defaults.createThresherGroup()
profile = init_defaults.createNick(groups=[researchers])

hproject = init_defaults.createHighlighterProject()
create_remote_project(profile, hproject)

qproject = init_defaults.createQuizProject()
create_remote_project(profile, qproject)

def testDeleteRemoteProjects():
researchers = init_defaults.createThresherGroup()
profile = init_defaults.createNick(groups=[researchers])

hproject = init_defaults.createHighlighterProject()
delete_remote_project(profile, hproject)

qproject = init_defaults.createQuizProject()
delete_remote_project(profile, qproject)

def testCreateRemoteHighlighterTasks():
# Send primary keys through Django-RQ, not Models and Querysets
profile_id = UserProfile.objects.get(user__username="nick").id
article_ids = list(Article.objects.all().values_list('id', flat=True))
topic_ids = list(Topic.objects.filter(parent=None)
.values_list('id', flat=True))
project_id = Project.objects.get(name__exact="Deciding Force Highlighter").id
generate_highlight_tasks_worker.delay(profile_id=profile_id,
article_ids=article_ids,
topic_ids=topic_ids,
project_id=project_id)

@django_rq.job('default', timeout=60, result_ttl=24*3600)
def generate_highlight_tasks_worker(profile_id=None,
article_ids=None,
topic_ids=None,
project_id=None):
startCount = len(connection.queries)
articles = Article.objects.filter(id__in=article_ids)
topics = Topic.objects.filter(id__in=topic_ids)
project = Project.objects.get(pk=project_id)
if not project.pybossa_id:
return {"error_msg": "Project '%s' must be created remotely "
"before creating tasks for it." % (project.short_name)}
if project.task_type != "HLTR":
return {"error_msg": "Project type must be 'HLTR', "
"found '%s'" % (project.task_type)}
tasks = collectHighlightTasks(articles, topics, project)
tasks = tasks[:5] # DEBUG
for task in tasks:
create_remote_task_worker.delay(profile_id=profile_id,
project_id=project_id,
task=task)
return ({
"task_type": "HLTR",
"generatedTasks": len(tasks),
"numberOfQueries": len(connection.queries) - startCount
})

def testCreateRemoteQuizTasks():
# Send primary keys through Django-RQ, not Models
profile_id = UserProfile.objects.get(user__username="nick").id
topic_id = Topic.objects.get(name__exact="Protester").id
project_id = Project.objects.get(name__exact="Deciding Force Quiz").id
generate_quiz_tasks_worker.delay(profile_id=profile_id,
topic_id=topic_id,
project_id=project_id)

@django_rq.job('default', timeout=60, result_ttl=24*3600)
def generate_quiz_tasks_worker(profile_id=None, topic_id=None, project_id=None):
startCount = len(connection.queries)
topic = Topic.objects.get(pk=topic_id)
project = Project.objects.get(pk=project_id)
if not project.pybossa_id:
return {"error_msg": "Project '%s' must be created remotely "
"before creating tasks for it." % (project.short_name)}
if project.task_type != "QUIZ":
return {"error_msg": "Project type must be 'QUIZ', "
"found '%s'" % (project.task_type)}
tasks = collectQuizTasks(topic, project)
tasks = tasks[:5] # DEBUG
for task in tasks:
create_remote_task_worker.delay(profile_id=profile_id,
project_id=project_id,
task=task)
return ({
"task_type": "QUIZ",
"generatedTasks": len(tasks),
"numberOfQueries": len(connection.queries) - startCount
})

@django_rq.job('default', timeout=60, result_ttl=24*3600)
def create_remote_task_worker(profile_id=None, project_id=None, task=None, n_answers=1):
profile = UserProfile.objects.get(pk=profile_id)
url = urljoin(profile.pybossa_url, "/api/task")
params = {'api_key': profile.pybossa_api_key}

project = Project.objects.get(pk=project_id)

payload = {
"project_id": project.pybossa_id,
"info": task,
"calibration": 0,
"priority_0": 0.0,
"n_answers": n_answers,
"quorum": 0
}

headers = {'content-type': 'application/json'}
resp = requests.post(url, params=params,
headers=headers, timeout=30,
json=payload)
result = resp.json()
if resp.status_code / 100 == 2 and result.get('id'):
# if Pybossa reports success, then we expect these fields to be present
Task(
project_id = project_id,
task_type = project.task_type,
info = task,
pybossa_id = int(result.get('id')),
pybossa_project_id = int(result.get('project_id')),
pybossa_created = iso8601.parse_date(result.get('created')),
pybossa_state = result.get('state')
).save()
# are task info was already logged by Python RQ as an incoming parameter
result['info'] = ""
else:
# our large info item may be embedded in the exception_msg,
# so truncate the message
result['exception_msg'] = result['exception_msg'][:256]
return result


if __name__ == '__main__':
logger.info("Highlighter bundle: %s" % settings.HIGHLIGHTER_BUNDLE_JS)
logger.info("Quiz bundle: %s" % settings.QUIZ_BUNDLE_JS)

testCreateRemoteProjects()
testCreateRemoteHighlighterTasks()
testCreateRemoteQuizTasks()
13 changes: 13 additions & 0 deletions data/rqworker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "thresher_backend.settings")

# Do time consuming Django setup before forking individual worker processes.
import django
django.setup()
from django.conf import settings

from django_rq import get_worker

if __name__ == '__main__':
worker = get_worker()
worker.work()
2 changes: 1 addition & 1 deletion docker/thresher_api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ django-filter==1.0.1
django-rq==0.9.4
djangorestframework==3.5.4
gunicorn==19.6.0
iso8601==0.1.11
Markdown==2.6.8
psycopg2==2.6.2
pybossa-client==1.1.1
pytz==2016.10
redis==2.10.5
requests==2.13.0
Expand Down
4 changes: 4 additions & 0 deletions docs/generate-models-diagram.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/sh
export DJANGO_SETTINGS_MODULE=thresher_backend.graphviz_settings
python manage.py graph_models --pygraphviz -o docs/thresher_models.png thresher
python manage.py graph_models --pygraphviz -o docs/thresher_models.svg thresher
2 changes: 2 additions & 0 deletions docs/graphviz-reqs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
django-extensions==1.7.7
pygraphviz==1.3.1
3 changes: 3 additions & 0 deletions docs/install-graphviz.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
# For Ubuntu 14.04
sudo apt-get install graphviz libgv-python libgraphviz-dev
Binary file modified docs/thresher_models.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 6bf0289

Please sign in to comment.