Skip to content

Commit

Permalink
cli: add command to update group's metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
ChiaraBi committed May 14, 2018
1 parent 6856975 commit ef9c3a9
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 4 deletions.
9 changes: 7 additions & 2 deletions asclepias_broker/api/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ class EventAPI:
"""Event API."""

@classmethod
def handle_event(cls, event: dict, no_index=False, user_id=None):
def handle_event(cls, event: dict, no_index=False, user_id=None,
delayed=True):
"""Handle an event payload."""
# Raises JSONSchema ValidationError
jsonschema.validate(event, EVENT_SCHEMA)
Expand All @@ -41,4 +42,8 @@ def handle_event(cls, event: dict, no_index=False, user_id=None):
event_uuid = str(event_obj.id)
idx_enabled = current_app.config['ASCLEPIAS_SEARCH_INDEXING_ENABLED'] \
and (not no_index)
process_event.delay(event_uuid, indexing_enabled=idx_enabled)
if delayed:
process_event.delay(event_uuid, indexing_enabled=idx_enabled)
else:
process_event.apply(kwargs=dict(event_uuid=event_uuid,
indexing_enabled=idx_enabled))
8 changes: 8 additions & 0 deletions asclepias_broker/api/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,3 +452,11 @@ def update_metadata(relationship: Relationship, payload):
rel_metadata.update(
{k: v for k, v in payload.items()
if k in ('LinkPublicationDate', 'LinkProvider')})


def update_group_metadata(identifier, payload):
"""Update the metadata of the identifier's Identity group."""
group = get_group_from_id(identifier_value=identifier.get('ID'),
id_type=identifier.get('IDScheme'))
if group:
group.data.update(payload)
83 changes: 83 additions & 0 deletions asclepias_broker/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

import click
from flask.cli import with_appcontext
from invenio_db import db

from .api.ingestion import get_group_from_id, update_group_metadata


@click.group()
Expand Down Expand Up @@ -56,3 +59,83 @@ def load(jsondir, no_index=False):
EventAPI.handle_event(data, no_index=no_index)
except ValueError:
pass


@utils.command('update_metadata')
@click.argument('jsondir', type=click.Path(exists=True, dir_okay=True,
resolve_path=True))
@with_appcontext
def update_metadata(jsondir):
"""Load events from a directory."""
files = find_json(jsondir)
with click.progressbar(files) as bar_files:
for fn in bar_files:
with open(fn, 'r') as fp:
data = json.load(fp)
update_groups(data)


def update_groups(data):
"""Update groups and the Identity group's metadata."""
from .api.events import EventAPI

provider = data.get('Provider')
identifiers = data.get('Object').get('Identifier')
identifiers_ids = set([identifier.get('ID') for identifier in identifiers])

is_identity_group_ok = False
i = 0

while not is_identity_group_ok and i < len(identifiers):
try:
identifier_id = identifiers[i].get('ID')
group = get_group_from_id(identifier_value=identifier_id,
id_type=identifiers[i].get('IDScheme'))
except Exception:
group = None

if group is None:
i = i + 1
else:
group_ids = set([identifier.value
for identifier in group.identifiers])
if identifiers_ids.issubset(group_ids):
is_identity_group_ok = True
else:
new_identifiers_ids = identifiers_ids - group_ids
new_identifiers = [identifier for identifier in identifiers if
identifier.get('ID') in new_identifiers_ids]
event = [{
"RelationshipType": {
"Name": "IsRelatedTo",
"SubTypeSchema": "DataCite",
"SubType": "IsIdenticalTo"
},
"Target": {
"Identifier": new_identifiers[0],
"Type": {
"Name": "unknown"
}
},
"LinkProvider": [
{
"Name": provider
}
],
"Source": {
"Identifier": identifiers[i],
"Type": {
"Name": "unknown"
}
},
"LinkPublicationDate": "2018-05-01"
}]
try:
EventAPI.handle_event(event, no_index=True, delayed=False)
except ValueError:
pass
try:
update_group_metadata(identifiers[0], data.get('Object'))
db.session.commit()
except Exception:
pass
79 changes: 77 additions & 2 deletions tests/model/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@
from asclepias_broker.api import EventAPI
from asclepias_broker.api.ingestion import get_group_from_id, \
get_or_create_groups, merge_identity_groups, merge_version_groups
from asclepias_broker.cli import update_groups
from asclepias_broker.models import Group, GroupM2M, GroupMetadata, \
GroupRelationship, GroupRelationshipM2M, GroupRelationshipMetadata, \
GroupType, Identifier, Identifier2Group, Relation, Relationship, \
Relationship2GroupRelationship


def _handle_events(events):
def _handle_events(events, no_index=False):
for ev in events:
EventAPI.handle_event(generate_payload(ev))
EventAPI.handle_event(generate_payload(ev), no_index=no_index)


def off_test_simple_id_group_merge(db):
Expand Down Expand Up @@ -64,6 +65,80 @@ def off_test_simple_id_group_merge(db):
assert Identifier2Group.query.count() == 5


def test_update_groups(db):
evtsrc = [
['A', 'IsIdenticalTo', 'B'],
]
_handle_events(evtsrc, no_index=True)
assert Group.query.filter_by(type=GroupType.Identity).count() == 1
group = Group.query.filter_by(type=GroupType.Identity).one()
group_ids = set([identifier.value for identifier in group.identifiers])

# the Identity group contains only 'A' and 'B'
assert set(['A', 'B']).issubset(group_ids)
assert len(set(['A', 'B']).difference(group_ids)) == 0

payload = {
"Provider": "SAO/NASA Astrophysics Data System",
"Object": {
"Identifier": [
{
"IDScheme": "doi",
"ID": "A"
},
{
"IDScheme": "ads",
"ID": "C"
},
{
"IDScheme": "ads",
"ID": "D"
}
],
"Title": "{title}",
"Type": {"Name": "literature"},
"Creator": [
{"Name": "{author.0}"},
{"Name": "{author.1}"},
{"Name": "{author.2}"}
],
"Publisher": [
{"Name": "{pub}",
"Identifier": [{"ID": "{orcid_pub}", "IDScheme": "orcid"}]}
],
"PublicationDate": "2018"
}
}
update_groups(payload)

# fetch the group again
updated_group = Group.query.filter_by(type=GroupType.Identity).one()
updated_group_ids = set([identifier.value
for identifier in updated_group.identifiers])

# the Identity group contains now 'A', 'B', 'C' and 'D'
assert set(['A', 'B', 'C', 'D']).issubset(updated_group_ids)
assert len(set(['A', 'B', 'C', 'D']).difference(updated_group_ids)) == 0

expected_metadata = {
"Title": "{title}",
"Type": {"Name": "literature"},
"Creator": [
{"Name": "{author.0}"},
{"Name": "{author.1}"},
{"Name": "{author.2}"}
],
"Publisher": [
{"Name": "{pub}",
"Identifier": [{"ID": "{orcid_pub}", "IDScheme": "orcid"}]}
],
"PublicationDate": "2018"
}

# the group's metadata got updated as expected
assert updated_group.data.json == expected_metadata


def test_get_or_create_groups(db):
"""Test creating groups (Identity and Version) for an identifier."""
id1 = Identifier(value='A', scheme='doi')
Expand Down

0 comments on commit ef9c3a9

Please sign in to comment.