Skip to content

Commit

Permalink
Merge pull request #192 from pangaea-data-publisher/robbranch2
Browse files Browse the repository at this point in the history
v1.3.4
  • Loading branch information
huberrob authored Jul 16, 2021
2 parents 83bea2b + 0c96ccd commit 22462d6
Show file tree
Hide file tree
Showing 22 changed files with 501 additions and 342 deletions.
2 changes: 2 additions & 0 deletions fuji_server/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def main():
#preproc.retrieve_linkedvocabs(lov_api=LOV_API, lodcloud_api=LOD_CLOUDNET, bioportal_api=BIOPORTAL_REST, bioportal_key=BIOPORTAL_APIKEY, isDebugMode=False)
preproc.retrieve_linkedvocabs(lov_api=LOV_API, lodcloud_api=LOD_CLOUDNET, isDebugMode=isDebug)
preproc.retrieve_default_namespaces()
preproc.set_remote_log_info(config['SERVICE']['remote_log_host'],config['SERVICE']['remote_log_path'])

logger.info('Total SPDX licenses : {}'.format(preproc.get_total_licenses()))
logger.info('Total re3repositories found from datacite api : {}'.format(len(preproc.getRE3repositories())))
Expand Down Expand Up @@ -96,6 +97,7 @@ def main():
log_dir = config['SERVICE']['logdir']
log_directory = os.path.join(my_path, log_dir)
log_file_path = os.path.join(log_directory, 'fuji.log')

if not os.path.exists(log_directory):
os.makedirs(log_directory, exist_ok=True)
#fileConfig(log_configfile, defaults={'logfilename': log_file_path.replace("\\", "/")})
Expand Down
2 changes: 2 additions & 0 deletions fuji_server/config/server.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ data_files_limit = 5
metric_specification = https://doi.org/10.5281/zenodo.4081213
log_config = config/logging.ini
logdir = logs
remote_log_host = fuji.localhost
remote_log_path = /loghandler/index.php

[USER]
usr = username
Expand Down
339 changes: 184 additions & 155 deletions fuji_server/controllers/fair_check.py

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion fuji_server/controllers/fair_object_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,15 @@ def assess_by_id(body): # noqa: E501
oaipmh_endpoint = body.oaipmh_endpoint
metadata_service_type = body.metadata_service_type
usedatacite = body.use_datacite
logger = Preprocessor.logger
logger.info('Assessment target: '+identifier)
print('Assessment target: ', identifier, flush=True)
ft = FAIRCheck(uid=identifier, test_debug=debug, metadata_service_url = metadata_service_endpoint, metadata_service_type =metadata_service_type, use_datacite=usedatacite, oaipmh_endpoint =oaipmh_endpoint)

# set target for remote logging
remote_log_host, remote_log_path = Preprocessor.remote_log_host, Preprocessor.remote_log_path
#print(remote_log_host, remote_log_path)
if remote_log_host and remote_log_path:
ft.set_remote_logging_target(remote_log_host, remote_log_path)
uid_result, pid_result = ft.check_unique_persistent()
ft.retrieve_metadata_embedded(ft.extruct_result)
if ft.repeat_pid_check:
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/data/metadata_standards.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion fuji_server/data/metadata_standards_uris.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion fuji_server/data/repodois.json

Large diffs are not rendered by default.

84 changes: 53 additions & 31 deletions fuji_server/evaluators/fair_evaluator_community_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def evaluate(self):
self.result = CommunityEndorsedStandard(id=self.metric_number, metric_identifier=self.metric_identifier,
metric_name=self.metric_name)

standards_detected: List[CommunityEndorsedStandardOutputInner] = []
community_standards_detected: List[CommunityEndorsedStandardOutputInner] = []
multidiscipliary_standards_detected = []
if self.fuji.namespace_uri:
self.fuji.namespace_uri = list(set(self.fuji.namespace_uri))
# ============== retrieve community standards by collected namespace uris
Expand All @@ -46,75 +47,96 @@ def evaluate(self):
if std_ns_temp:
subject = self.fuji.COMMUNITY_METADATA_STANDARDS_URIS.get(std_ns_temp).get('subject_areas')
std_name = self.fuji.COMMUNITY_METADATA_STANDARDS_URIS.get(std_ns_temp).get('title')
if subject and all(elem == "Multidisciplinary" for elem in subject):
self.logger.info(
'FsF-R1.3-01M : Skipped non-disciplinary standard found through namespaces -: {}'.format(
std_ns))
else:
self.logger.log(self.fuji.LOG_SUCCESS,
'FsF-R1.3-01M : Found disciplinary standard through namespaces -: {}'.format(
std_ns))
if subject:
if all(elem == "Multidisciplinary" for elem in subject):
self.logger.info(
'FsF-R1.3-01M : Found non-disciplinary standard (but RDA listed) found through namespaces -: {}'.format(
str(std_name)+' ('+str(std_ns)+')'))
self.setEvaluationCriteriumScore('FsF-R1.3-01M-3', 0, 'pass')
self.maturity = 1
multidiscipliary_standards_detected.append(std_name)
else:
self.logger.log(self.fuji.LOG_SUCCESS,
'FsF-R1.3-01M : Found disciplinary standard through namespaces -: {}'.format(
std_ns))
nsout = CommunityEndorsedStandardOutputInner()
nsout.metadata_standard = std_name # use here original standard uri detected
nsout.subject_areas = subject
nsout.urls = [std_ns]
standards_detected.append(nsout)
community_standards_detected.append(nsout)
else:
no_match.append(std_ns)
if len(no_match) > 0:
self.logger.info(
'FsF-R1.3-01M : The following standards found through namespaces are excluded as they are not listed in RDA metadata catalog -: {}'.format(
no_match))
if standards_detected:
if len(community_standards_detected) - len(multidiscipliary_standards_detected) > 0:
self.maturity = 3
self.setEvaluationCriteriumScore('FsF-R1.3-01M-1', 1, 'pass')

# ============== use standards listed in the re3data record if no metadata is detected from oai-pmh
re3_detected = False
if len(self.fuji.community_standards) > 0:
#if len(standards_detected) == 0:
#if len(community_standards_detected) == 0:
if self.fuji.use_datacite:
self.logger.info('FsF-R1.3-01M : Using re3data to detect metadata standard(s)')
for s in self.fuji.community_standards:
re3_listed = False
standard_found = self.fuji.lookup_metadatastandard_by_name(s)
if standard_found:
subject = self.fuji.COMMUNITY_STANDARDS.get(standard_found).get('subject_areas')
if subject and all(elem == "Multidisciplinary" for elem in subject):
self.logger.info('FsF-R1.3-01M : Skipped non-disciplinary standard -: {}'.format(s))
else:
if self.maturity < 2:
self.maturity = 2
re3_detected = True
self.logger.log(self.fuji.LOG_SUCCESS,
'FsF-R1.3-01M : Found disciplinary standard through re3data -: {}'.format(
s))
out = CommunityEndorsedStandardOutputInner()
out.metadata_standard = s
out.subject_areas = self.fuji.COMMUNITY_STANDARDS.get(standard_found).get('subject_areas')
out.urls = self.fuji.COMMUNITY_STANDARDS.get(standard_found).get('urls')
standards_detected.append(out)
if subject:
#print(subject, standard_found)
re3_listed = True
if all(elem == "Multidisciplinary" for elem in subject):
self.logger.info(
'FsF-R1.3-01M : Found non-disciplinary standard (but RDA listed) found through re3data -: {}'.format(
standard_found))
self.setEvaluationCriteriumScore('FsF-R1.3-01M-3', 0, 'pass')
if self.maturity <= 1:
self.maturity = 1
multidiscipliary_standards_detected.append(standard_found)
#self.logger.info('FsF-R1.3-01M : Skipped non-disciplinary standard -: {}'.format(s))
elif standard_found=='Repository-Developed Metadata Schemas':
re3_listed = False
self.logger.info('FsF-R1.3-01M : Skipped proprietary standard -: {}'.format(s))
else:
if self.maturity < 2:
self.maturity = 2
re3_detected = True
self.logger.log(self.fuji.LOG_SUCCESS,
'FsF-R1.3-01M : Found disciplinary standard through re3data -: {}'.format(
s))
if re3_listed:
rdaurls = self.fuji.COMMUNITY_STANDARDS.get(standard_found).get('urls')
if isinstance(rdaurls, list):
rdaurls= [rdaurls[0]]
out = CommunityEndorsedStandardOutputInner()
out.metadata_standard = s
out.subject_areas = self.fuji.COMMUNITY_STANDARDS.get(standard_found).get('subject_areas')
out.urls = rdaurls
community_standards_detected.append(out)
elif self.fuji.use_datacite:
self.logger.info(
'FsF-R1.3-01M : Metadata standard(s) that are listed in re3data are excluded from the assessment output.')


elif self.fuji.use_datacite:
self.logger.warning('FsF-R1.3-01M : NO metadata standard(s) of the repository specified in re3data')

if standards_detected:
if community_standards_detected:
if re3_detected:
if self.maturity < 3:
self.maturity = 2
self.setEvaluationCriteriumScore('FsF-R1.3-01M-2', 1, 'pass')
else:
self.setEvaluationCriteriumScore('FsF-R1.3-01M-2', 0, 'pass')
self.score.earned = self.total_score
if len(community_standards_detected) - len(multidiscipliary_standards_detected) > 0:
self.score.earned = self.total_score
self.result.test_status = 'pass'


else:
self.logger.warning('FsF-R1.3-01M : Unable to determine community standard(s)')
self.result.metric_tests = self.metric_tests
self.result.score = self.score
self.result.maturity = self.maturity
self.result.output = standards_detected
self.result.output = community_standards_detected
20 changes: 16 additions & 4 deletions fuji_server/evaluators/fair_evaluator_data_content_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,23 @@ def evaluate(self):
elif d == 'size':
if tika_content_size == 0:
self.logger.warning('{0} : Could not verify content size (received: 0 bytes) from downloaded file'.format(self.metric_identifier))
elif data_object.get('size') == tika_content_size:
matches_content = True
matches_size = True
else:
self.logger.warning('{0} : Could not verify content size from downloaded file -: (expected: {1}, found: {2})'.format(self.metric_identifier, str(data_object.get('size')), str(tika_content_size) ))
#print(type(data_object.get('size')))
try:
object_size=int(float(data_object.get('size')))
if object_size == tika_content_size:
matches_content = True
matches_size = True
else:
self.logger.warning(
'{0} : Could not verify content size from downloaded file -: (expected: {1}, found: {2})'.format(
self.metric_identifier, str(data_object.get('size')),
str(tika_content_size)))

except Exception as e:
self.logger.warning(
'{0} : Could not verify content size from downloaded file -: (expected: {1}, found: {2})'.format(
self.metric_identifier, str(data_object.get('size')), str(tika_content_size)))

data_content_filetype_inner = DataContentMetadataOutputInner()
data_content_filetype_inner.descriptor = descriptor
Expand Down
10 changes: 8 additions & 2 deletions fuji_server/evaluators/fair_evaluator_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def evaluate(self):
content_urls = [item.get('url') for item in self.fuji.content_identifier]
self.logger.info('FsF-R1.3-02D : Data content identifier provided - {}'.format(content_urls))
#self.maturity = 1
loginpage = False
preferred_detected = False
for file_index, data_file in enumerate(self.fuji.content_identifier):

mime_type = data_file.get('type')
if data_file.get('url') is not None:
if mime_type is None or mime_type in ['application/octet-stream']:
Expand Down Expand Up @@ -87,7 +90,7 @@ def evaluate(self):

# FILE FORMAT CHECKS....
# check if format is a scientific one:
loginpage = False

for mimetype, url in mime_url_pair.items():
data_file_output = DataFileFormatOutputInner()
preferance_reason = []
Expand Down Expand Up @@ -129,14 +132,17 @@ def evaluate(self):
preferance_reason.extend(['long term format', 'open format', 'generic science format'])
subject_area.append('General')
data_file_output.is_preferred_format = True
if 'html' in mimetype:
loginpage = True

if preferance_reason:
preferred_detected = True
data_file_output.mime_type = mimetype
data_file_output.file_uri = url
data_file_output.preference_reason = list(set(preferance_reason))
data_file_output.subject_areas = list(set(subject_area))
data_file_list.append(data_file_output)
if len(data_file_list) > 0 and not loginpage:
if preferred_detected and not loginpage:
self.score.earned = 1
self.setEvaluationCriteriumScore('FsF-R1.3-02D-1', 1, 'pass')
#self.maturity = 3
Expand Down
16 changes: 11 additions & 5 deletions fuji_server/evaluators/fair_evaluator_persistent_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def evaluate(self):
if self.fuji.id_scheme =='url':
self.fuji.origin_url = self.fuji.id
check_url =self.fuji.id
if check_url is not None:
if check_url:
# ======= RETRIEVE METADATA FROM LANDING PAGE =======
requestHelper = RequestHelper(check_url, self.logger)
requestHelper.setAcceptType(AcceptTypes.html_xml) # request
Expand All @@ -57,6 +57,7 @@ def evaluate(self):
if type(self.fuji.extruct_result) != dict:
self.fuji.extruct_result ={}
r = requestHelper.getHTTPResponse()
response_status =requestHelper.response_status

if r:
self.fuji.landing_url = requestHelper.redirect_url
Expand All @@ -69,7 +70,8 @@ def evaluate(self):

#self.fuji.repeat_pid_check = False
if self.fuji.landing_url not in ['https://datacite.org/invalid.html']:
if r.status == 200:

if response_status == 200:
# identify signposting links in header
header_link_string = requestHelper.getHTTPResponse().getheader('Link')
if header_link_string is not None:
Expand Down Expand Up @@ -124,19 +126,23 @@ def evaluate(self):
self.output.resolvable_status = True
self.logger.info('FsF-F1-02D : Object identifier active (status code = 200)')
self.fuji.isMetadataAccessible = True
elif r.status_code in [401, 402, 403]:
elif response_status in [401, 402, 403]:
self.fuji.isMetadataAccessible = False
self.logger.warning("FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}".format(code=r.status_code))
self.logger.error("FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}".format(code=response_status))
else:
self.fuji.isMetadataAccessible = False
self.logger.warning("FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}".format(code=r.status_code))
self.logger.error("FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}".format(code=response_status))
else:
self.logger.warning("FsF-F1-02D : Invalid DOI, identifier resolved to -: {code}".format(
code=self.fuji.landing_url))

else:
self.fuji.isMetadataAccessible = False
self.logger.warning("FsF-F1-02D :Resource inaccessible, no response received from -: {}".format(check_url))
if response_status in [401, 402, 403]:
self.logger.error(
"FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}".format(
code=response_status))
else:
self.logger.warning("FsF-F1-02D :Resource inaccessible, could not identify an actionable representation for the given identfier -: {}".format(self.fuji.id))

Expand Down
2 changes: 2 additions & 0 deletions fuji_server/evaluators/fair_evaluator_unique_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def evaluate(self):
self.logger.info('FsF-F1-01D : Using idutils schemes')
idhelper = IdentifierHelper(self.fuji.id)
found_ids = idhelper.identifier_schemes
self.logger.info('Starting assessment on identifier: {}'.format(self.fuji.id))

#found_ids = idutils.detect_identifier_schemes(self.fuji.id) # some schemes like PMID are generic
if len(found_ids) > 0:
self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F1-01D : Unique identifier schemes found {}'.format(found_ids))
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/catalogue_helper_datacite.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self,logger: logging.Logger = None):
def query(self, pid):
response = None
try:
res= apiresponse = re.get(self.apiURI+'/'+pid)
res= apiresponse = re.get(self.apiURI+'/'+pid, timeout=5)
self.logger.info('FsF-F4-01M : Querying DataCite API for -:' + str(pid))
if res.status_code == 200:
self.islisted =True
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/catalogue_helper_mendeley_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def query(self, pidlist):
for pid in pidlist:
try:
if pid:
res= apiresponse = re.get(self.apiURI+'/'+re.utils.quote(str(pid)))
res= apiresponse = re.get(self.apiURI+'/'+re.utils.quote(str(pid)), timeout=1)
self.logger.info('FsF-F4-01M : Querying Mendeley Data API for -:' + str(pid))
if res.status_code == 200:
resp = res.json()
Expand Down
Loading

0 comments on commit 22462d6

Please sign in to comment.