Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update groups metadata #51

Merged
merged 2 commits into from
May 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions asclepias_broker/api/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ class EventAPI:
"""Event API."""

@classmethod
def handle_event(cls, event: dict, no_index=False, user_id=None):
def handle_event(cls, event: dict, no_index=False, user_id=None,
delayed=True):
"""Handle an event payload."""
# Raises JSONSchema ValidationError
jsonschema.validate(event, EVENT_SCHEMA)
Expand All @@ -41,4 +42,8 @@ def handle_event(cls, event: dict, no_index=False, user_id=None):
event_uuid = str(event_obj.id)
idx_enabled = current_app.config['ASCLEPIAS_SEARCH_INDEXING_ENABLED'] \
and (not no_index)
process_event.delay(event_uuid, indexing_enabled=idx_enabled)
if delayed:
process_event.delay(event_uuid, indexing_enabled=idx_enabled)
else:
process_event.apply(kwargs=dict(event_uuid=event_uuid,
indexing_enabled=idx_enabled))
65 changes: 65 additions & 0 deletions asclepias_broker/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

import click
from flask.cli import with_appcontext
from invenio_db import db

from .api.ingestion import get_group_from_id


@click.group()
Expand Down Expand Up @@ -56,3 +59,65 @@ def load(jsondir, no_index=False):
EventAPI.handle_event(data, no_index=no_index)
except ValueError:
pass


@utils.command('update_metadata')
@click.argument('jsondir', type=click.Path(exists=True, dir_okay=True,
resolve_path=True))
@with_appcontext
def update_metadata(jsondir):
"""Load events from a directory."""
files = find_json(jsondir)
with click.progressbar(files) as bar_files:
for fn in bar_files:
with open(fn, 'r') as fp:
data = json.load(fp)
update_groups(data)


def update_groups(data):
"""Update groups and the Identity group's metadata."""
from .api.events import EventAPI

provider = data.get('Provider')
identifiers = data.get('Object').get('Identifier')

for identifier in identifiers:
event = [{
"RelationshipType": {
"Name": "IsRelatedTo",
"SubTypeSchema": "DataCite",
"SubType": "IsIdenticalTo"
},
"Target": {
"Identifier": identifier,
"Type": {
"Name": "unknown"
}
},
"LinkProvider": [
{
"Name": provider
}
],
"Source": {
"Identifier": identifiers[0],
"Type": {
"Name": "unknown"
}
},
"LinkPublicationDate": "2018-05-01"
}]
try:
EventAPI.handle_event(event, no_index=True, delayed=False)
except ValueError:
pass

try:
group = get_group_from_id(
identifiers[0]['ID'], identifiers[0]['IDScheme'])
if group:
group.data.update(data.get('Object'))
db.session.commit()
except Exception:
pass
79 changes: 77 additions & 2 deletions tests/model/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@
from asclepias_broker.api import EventAPI
from asclepias_broker.api.ingestion import get_group_from_id, \
get_or_create_groups, merge_identity_groups, merge_version_groups
from asclepias_broker.cli import update_groups
from asclepias_broker.models import Group, GroupM2M, GroupMetadata, \
GroupRelationship, GroupRelationshipM2M, GroupRelationshipMetadata, \
GroupType, Identifier, Identifier2Group, Relation, Relationship, \
Relationship2GroupRelationship


def _handle_events(events):
def _handle_events(events, no_index=False):
for ev in events:
EventAPI.handle_event(generate_payload(ev))
EventAPI.handle_event(generate_payload(ev), no_index=no_index)


def off_test_simple_id_group_merge(db):
Expand Down Expand Up @@ -64,6 +65,80 @@ def off_test_simple_id_group_merge(db):
assert Identifier2Group.query.count() == 5


def test_update_groups(db):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test might be a better fit for test_metadata.py (or test_cli.py). We'll have to restructure tests in general though, so let's make an issue instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created this issue: #52

evtsrc = [
['A', 'IsIdenticalTo', 'B'],
]
_handle_events(evtsrc, no_index=True)
assert Group.query.filter_by(type=GroupType.Identity).count() == 1
group = Group.query.filter_by(type=GroupType.Identity).one()
group_ids = set([identifier.value for identifier in group.identifiers])

# the Identity group contains only 'A' and 'B'
assert set(['A', 'B']).issubset(group_ids)
assert len(set(['A', 'B']).difference(group_ids)) == 0

payload = {
"Provider": "SAO/NASA Astrophysics Data System",
"Object": {
"Identifier": [
{
"IDScheme": "doi",
"ID": "A"
},
{
"IDScheme": "ads",
"ID": "C"
},
{
"IDScheme": "ads",
"ID": "D"
}
],
"Title": "{title}",
"Type": {"Name": "literature"},
"Creator": [
{"Name": "{author.0}"},
{"Name": "{author.1}"},
{"Name": "{author.2}"}
],
"Publisher": [
{"Name": "{pub}",
"Identifier": [{"ID": "{orcid_pub}", "IDScheme": "orcid"}]}
],
"PublicationDate": "2018"
}
}
update_groups(payload)

# fetch the group again
updated_group = Group.query.filter_by(type=GroupType.Identity).one()
updated_group_ids = set([identifier.value
for identifier in updated_group.identifiers])

# the Identity group contains now 'A', 'B', 'C' and 'D'
assert set(['A', 'B', 'C', 'D']).issubset(updated_group_ids)
assert len(set(['A', 'B', 'C', 'D']).difference(updated_group_ids)) == 0

expected_metadata = {
"Title": "{title}",
"Type": {"Name": "literature"},
"Creator": [
{"Name": "{author.0}"},
{"Name": "{author.1}"},
{"Name": "{author.2}"}
],
"Publisher": [
{"Name": "{pub}",
"Identifier": [{"ID": "{orcid_pub}", "IDScheme": "orcid"}]}
],
"PublicationDate": "2018"
}

# the group's metadata got updated as expected
assert updated_group.data.json == expected_metadata


def test_get_or_create_groups(db):
"""Test creating groups (Identity and Version) for an identifier."""
id1 = Identifier(value='A', scheme='doi')
Expand Down