-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generic article loader with option to highlight all articles with all…
… root topics (#178) * Streamline saving article batch_name. Use variable 'raw_bytes' until article text converted to UTF-8. Improve code that handles attempts to load duplicate articles. * Add generic article loader. Adjust article loader to make metadata attributes and tuas optional. * Write command line utility to highlight all articles with all root topics. * Make highlight all utility into callable function with informative logging. * Improve formatting of Contributor default representation. * Integrate new generic loader and all highlight annotator with existing Upload Articles. Add command line script to highlight all articles with all root topics.
- Loading branch information
1 parent
0cc4d3d
commit 997cc35
Showing
7 changed files
with
150 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import os | ||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "thresher_backend.settings") | ||
|
||
import logging | ||
logger = logging.getLogger(__name__) | ||
|
||
import django | ||
django.setup() | ||
|
||
from thresher.models import Article, Topic | ||
from thresher.models import Contributor, ArticleHighlight, HighlightGroup | ||
|
||
GOLDUSERNAME=u"Full Text Highlighted" | ||
|
||
# Create a set of highlights that covers the entire article for | ||
# every root topic, so the Researcher can send these directly to the Quiz | ||
def highlightArticles(root_topics, article_set, contributor): | ||
topic_list = root_topics.all().values_list("name", flat=True) | ||
topic_names = ", ".join(topic_list) | ||
logger.info(u"Annotating entire articles with following topics: {}" | ||
.format(topic_names)) | ||
|
||
for article_obj in article_set: | ||
taskruns = article_obj.highlight_taskruns | ||
if taskruns.filter(contributor__username=GOLDUSERNAME).count() == 0: | ||
logger.info("Adding highlights to article number: {}" | ||
.format(article_obj.article_number)) | ||
article_highlight = ArticleHighlight.objects.create( | ||
article=article_obj, | ||
contributor=contributor | ||
) | ||
offset_list = [[0, len(article_obj.text), article_obj.text]] | ||
case_number = 0 | ||
for topic_obj in root_topics.all(): | ||
highlight = HighlightGroup.objects.create(offsets=offset_list, | ||
case_number=case_number, | ||
topic=topic_obj, | ||
article_highlight=article_highlight) | ||
if __name__ == '__main__': | ||
# PE schemas start with topic_number 1 | ||
root_topics = Topic.objects.filter(topic_number=1) | ||
article_set = Article.objects.all() | ||
(contributor, created) = Contributor.objects.get_or_create( | ||
username=GOLDUSERNAME | ||
) | ||
highlightArticles(root_topics, article_set, contributor) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
set -e | ||
cd /home/thresher | ||
export PYTHONPATH=/home/thresher | ||
python data/highlight_all.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
# MSYS_NO_PATHCONV fixes paths on Docker Toolbox on Windows using Git Bash / Mingw | ||
# Harmless everywhere else. | ||
export MSYS_NO_PATHCONV=1 | ||
# DB updates can use either 'run' or 'exec' | ||
docker-compose exec thresher_api sh /home/thresher/docker/thresher_api/highlight_all.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters