From 195c91ad98e1adfbb0e7bf0800a7bf1aa30ab513 Mon Sep 17 00:00:00 2001 From: huberrob Date: Thu, 31 Mar 2022 12:00:46 +0200 Subject: [PATCH] signposting regex --- .../fair_evaluator_persistent_identifier.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fuji_server/evaluators/fair_evaluator_persistent_identifier.py b/fuji_server/evaluators/fair_evaluator_persistent_identifier.py index ae3093e8..7d94dbb4 100644 --- a/fuji_server/evaluators/fair_evaluator_persistent_identifier.py +++ b/fuji_server/evaluators/fair_evaluator_persistent_identifier.py @@ -112,8 +112,6 @@ def evaluate(self): header_link_string = requestHelper.getResponseHeader().get('Link') #header_link_string = requestHelper.getHTTPResponse().getheader('Link') if header_link_string is not None: - self.logger.info('FsF-F1-02D : Found signposting links in response header of landingpage') - for preparsed_link in header_link_string.split(','): found_link = None found_type, type_match = None, None @@ -123,12 +121,12 @@ def evaluate(self): found_link = parsed_link[0].strip() for link_prop in parsed_link[1:]: link_prop=str(link_prop).strip() - if link_prop.startswith('rel="'): - rel_match = re.search('rel=\"(.*?)\"', link_prop) - elif link_prop.startswith('type="'): - type_match = re.search('type=\"(.*?)\"', link_prop) - elif link_prop.startswith('formats="'): - formats_match = re.search('formats=\"(.*?)\"', link_prop) + if link_prop.startswith('rel'): + rel_match = re.search('rel\s*=\s*\"?([^,;"]+)\"?', link_prop) + elif link_prop.startswith('type'): + type_match = re.search('type\s*=\s*\"?([^,;"]+)\"?', link_prop) + elif link_prop.startswith('formats'): + formats_match = re.search('formats\s*=\s*\"?([^,;"]+)\"?', link_prop) if type_match: found_type = type_match[1] if rel_match: @@ -141,9 +139,11 @@ def evaluate(self): 'rel': found_rel, 'profile': found_formats } - if found_link: + if signposting_link_dict.get('url'): self.fuji.signposting_header_links.append(signposting_link_dict) + self.logger.info('FsF-F1-02D : Found signposting links in response header of landingpage -: ' + str(self.fuji.signposting_header_links)) + #check if there is a cite-as signposting link if self.fuji.pid_scheme is None: signposting_pid_link = self.fuji.get_signposting_links('cite-as')