From 9b252acee95f854b8278bd7bf22e1f612577ebb9 Mon Sep 17 00:00:00 2001 From: huberrob Date: Fri, 13 May 2022 14:06:51 +0200 Subject: [PATCH] version 1.4.9b; fix #267 too many signposting links followed before --- fuji_server/controllers/fair_check.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fuji_server/controllers/fair_check.py b/fuji_server/controllers/fair_check.py index f1f9eb92..6c7628b9 100644 --- a/fuji_server/controllers/fair_check.py +++ b/fuji_server/controllers/fair_check.py @@ -100,7 +100,7 @@ class FAIRCheck: IDENTIFIERS_ORG_DATA = {} GOOGLE_DATA_DOI_CACHE = [] GOOGLE_DATA_URL_CACHE = [] - FUJI_VERSION = '1.4.9' + FUJI_VERSION = '1.4.9b' def __init__(self, uid, @@ -680,19 +680,27 @@ def set_html_typed_links(self): else: self.logger.info('FsF-F2-01M : Expected HTML to check for typed links but received empty string ') - def get_html_typed_links(self, rel='item'): + def get_html_typed_links(self, rel='item', allkeys=True): # Use Typed Links in HTTP Link headers to help machines find the resources that make up a publication. # Use links to find domains specific metadata datalinks = [] + if not isinstance(rel, list): + rel = [rel] for typed_link in self.typed_links: - if typed_link.get('rel') == rel: + if typed_link.get('rel') in rel: + if not allkeys: + typed_link = {tlkey: typed_link[tlkey] for tlkey in ['url','type']} datalinks.append((typed_link)) return datalinks - def get_signposting_links(self, rel='item'): + def get_signposting_links(self, rel='item', allkeys=True): signlinks = [] + if not isinstance(rel, list): + rel = [rel] for signposting_links in self.signposting_header_links: - if signposting_links.get('rel') == rel: + if signposting_links.get('rel') in rel: + if not allkeys: + signposting_links = {slkey: signposting_links[slkey] for slkey in ['url','type']} signlinks.append(signposting_links) return signlinks @@ -893,11 +901,12 @@ def retrieve_metadata_external(self): rel_meta_links = [] sign_meta_links = [] - typed_metadata_links = self.typed_links + #typed_metadata_links = self.typed_links + typed_metadata_links = self.get_html_typed_links(['describedby', 'meta','alternate meta','metadata'], False) #signposting header links if self.get_signposting_links('describedby'): - sign_header_links = self.get_signposting_links('describedby') + sign_header_links = self.get_signposting_links('describedby', False) self.metadata_sources.append((MetaDataCollector.Sources.SIGN_POSTING.value, 'signposting')) guessed_metadata_link = self.get_guessed_xml_link() @@ -918,6 +927,8 @@ def retrieve_metadata_external(self): typed_metadata_links = [dict(t) for t in {tuple(d.items()) for d in typed_metadata_links}] typed_metadata_links = self.get_preferred_links(typed_metadata_links) + print('Typed links: ',typed_metadata_links) + for metadata_link in typed_metadata_links: if not metadata_link['type']: # guess type based on e.g. file suffix