diff --git a/asclepias_broker/api/events.py b/asclepias_broker/api/events.py index 3ceb333..5f76d0a 100644 --- a/asclepias_broker/api/events.py +++ b/asclepias_broker/api/events.py @@ -23,7 +23,8 @@ class EventAPI: """Event API.""" @classmethod - def handle_event(cls, event: dict, no_index=False, user_id=None): + def handle_event(cls, event: dict, no_index=False, user_id=None, + delayed=True): """Handle an event payload.""" # Raises JSONSchema ValidationError jsonschema.validate(event, EVENT_SCHEMA) @@ -41,4 +42,8 @@ def handle_event(cls, event: dict, no_index=False, user_id=None): event_uuid = str(event_obj.id) idx_enabled = current_app.config['ASCLEPIAS_SEARCH_INDEXING_ENABLED'] \ and (not no_index) - process_event.delay(event_uuid, indexing_enabled=idx_enabled) + if delayed: + process_event.delay(event_uuid, indexing_enabled=idx_enabled) + else: + process_event.apply(kwargs=dict(event_uuid=event_uuid, + indexing_enabled=idx_enabled)) diff --git a/asclepias_broker/api/ingestion.py b/asclepias_broker/api/ingestion.py index 0cd72aa..66b810d 100644 --- a/asclepias_broker/api/ingestion.py +++ b/asclepias_broker/api/ingestion.py @@ -452,3 +452,11 @@ def update_metadata(relationship: Relationship, payload): rel_metadata.update( {k: v for k, v in payload.items() if k in ('LinkPublicationDate', 'LinkProvider')}) + + +def update_group_metadata(identifier, payload): + """Update the metadata of the identifier's Identity group.""" + group = get_group_from_id(identifier_value=identifier.get('ID'), + id_type=identifier.get('IDScheme')) + if group: + group.data.update(payload) diff --git a/asclepias_broker/cli.py b/asclepias_broker/cli.py index 9cf2a71..cefd511 100644 --- a/asclepias_broker/cli.py +++ b/asclepias_broker/cli.py @@ -14,6 +14,9 @@ import click from flask.cli import with_appcontext +from invenio_db import db + +from .api.ingestion import get_group_from_id, update_group_metadata @click.group() @@ -56,3 +59,83 @@ def load(jsondir, no_index=False): EventAPI.handle_event(data, no_index=no_index) except ValueError: pass + + +@utils.command('update_metadata') +@click.argument('jsondir', type=click.Path(exists=True, dir_okay=True, + resolve_path=True)) +@with_appcontext +def update_metadata(jsondir): + """Load events from a directory.""" + files = find_json(jsondir) + with click.progressbar(files) as bar_files: + for fn in bar_files: + with open(fn, 'r') as fp: + data = json.load(fp) + update_groups(data) + + +def update_groups(data): + """Update groups and the Identity group's metadata.""" + from .api.events import EventAPI + + provider = data.get('Provider') + identifiers = data.get('Object').get('Identifier') + identifiers_ids = set([identifier.get('ID') for identifier in identifiers]) + + is_identity_group_ok = False + i = 0 + + while not is_identity_group_ok and i < len(identifiers): + try: + identifier_id = identifiers[i].get('ID') + group = get_group_from_id(identifier_value=identifier_id, + id_type=identifiers[i].get('IDScheme')) + except Exception: + group = None + + if group is None: + i = i + 1 + else: + group_ids = set([identifier.value + for identifier in group.identifiers]) + if identifiers_ids.issubset(group_ids): + is_identity_group_ok = True + else: + new_identifiers_ids = identifiers_ids - group_ids + new_identifiers = [identifier for identifier in identifiers if + identifier.get('ID') in new_identifiers_ids] + event = [{ + "RelationshipType": { + "Name": "IsRelatedTo", + "SubTypeSchema": "DataCite", + "SubType": "IsIdenticalTo" + }, + "Target": { + "Identifier": new_identifiers[0], + "Type": { + "Name": "unknown" + } + }, + "LinkProvider": [ + { + "Name": provider + } + ], + "Source": { + "Identifier": identifiers[i], + "Type": { + "Name": "unknown" + } + }, + "LinkPublicationDate": "2018-05-01" + }] + try: + EventAPI.handle_event(event, no_index=True, delayed=False) + except ValueError: + pass + try: + update_group_metadata(identifiers[0], data.get('Object')) + db.session.commit() + except Exception: + pass diff --git a/tests/model/test_grouping.py b/tests/model/test_grouping.py index 8e718ba..f09ed2b 100644 --- a/tests/model/test_grouping.py +++ b/tests/model/test_grouping.py @@ -12,15 +12,16 @@ from asclepias_broker.api import EventAPI from asclepias_broker.api.ingestion import get_group_from_id, \ get_or_create_groups, merge_identity_groups, merge_version_groups +from asclepias_broker.cli import update_groups from asclepias_broker.models import Group, GroupM2M, GroupMetadata, \ GroupRelationship, GroupRelationshipM2M, GroupRelationshipMetadata, \ GroupType, Identifier, Identifier2Group, Relation, Relationship, \ Relationship2GroupRelationship -def _handle_events(events): +def _handle_events(events, no_index=False): for ev in events: - EventAPI.handle_event(generate_payload(ev)) + EventAPI.handle_event(generate_payload(ev), no_index=no_index) def off_test_simple_id_group_merge(db): @@ -64,6 +65,80 @@ def off_test_simple_id_group_merge(db): assert Identifier2Group.query.count() == 5 +def test_update_groups(db): + evtsrc = [ + ['A', 'IsIdenticalTo', 'B'], + ] + _handle_events(evtsrc, no_index=True) + assert Group.query.filter_by(type=GroupType.Identity).count() == 1 + group = Group.query.filter_by(type=GroupType.Identity).one() + group_ids = set([identifier.value for identifier in group.identifiers]) + + # the Identity group contains only 'A' and 'B' + assert set(['A', 'B']).issubset(group_ids) + assert len(set(['A', 'B']).difference(group_ids)) == 0 + + payload = { + "Provider": "SAO/NASA Astrophysics Data System", + "Object": { + "Identifier": [ + { + "IDScheme": "doi", + "ID": "A" + }, + { + "IDScheme": "ads", + "ID": "C" + }, + { + "IDScheme": "ads", + "ID": "D" + } + ], + "Title": "{title}", + "Type": {"Name": "literature"}, + "Creator": [ + {"Name": "{author.0}"}, + {"Name": "{author.1}"}, + {"Name": "{author.2}"} + ], + "Publisher": [ + {"Name": "{pub}", + "Identifier": [{"ID": "{orcid_pub}", "IDScheme": "orcid"}]} + ], + "PublicationDate": "2018" + } + } + update_groups(payload) + + # fetch the group again + updated_group = Group.query.filter_by(type=GroupType.Identity).one() + updated_group_ids = set([identifier.value + for identifier in updated_group.identifiers]) + + # the Identity group contains now 'A', 'B', 'C' and 'D' + assert set(['A', 'B', 'C', 'D']).issubset(updated_group_ids) + assert len(set(['A', 'B', 'C', 'D']).difference(updated_group_ids)) == 0 + + expected_metadata = { + "Title": "{title}", + "Type": {"Name": "literature"}, + "Creator": [ + {"Name": "{author.0}"}, + {"Name": "{author.1}"}, + {"Name": "{author.2}"} + ], + "Publisher": [ + {"Name": "{pub}", + "Identifier": [{"ID": "{orcid_pub}", "IDScheme": "orcid"}]} + ], + "PublicationDate": "2018" + } + + # the group's metadata got updated as expected + assert updated_group.data.json == expected_metadata + + def test_get_or_create_groups(db): """Test creating groups (Identity and Version) for an identifier.""" id1 = Identifier(value='A', scheme='doi')