Skip to content

Commit

Permalink
extended mime type list for mime type guessing base on file extension…
Browse files Browse the repository at this point in the history
…; improved signposting handling; version 1.4.8.c
  • Loading branch information
huberrob committed Apr 27, 2022
1 parent 6000464 commit 378f711
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
13 changes: 11 additions & 2 deletions fuji_server/controllers/fair_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import io
import json
import logging, logging.handlers
import mimetypes
import re
#import urllib
import urllib.request as urllib
Expand Down Expand Up @@ -99,7 +100,7 @@ class FAIRCheck:
IDENTIFIERS_ORG_DATA = {}
GOOGLE_DATA_DOI_CACHE = []
GOOGLE_DATA_URL_CACHE = []
FUJI_VERSION = '1.4.8b'
FUJI_VERSION = '1.4.8c'

def __init__(self,
uid,
Expand Down Expand Up @@ -215,6 +216,7 @@ def load_predata(cls):
cls.VALID_RESOURCE_TYPES = Preprocessor.get_resource_types()
if not cls.IDENTIFIERS_ORG_DATA:
cls.IDENTIFIERS_ORG_DATA = Preprocessor.get_identifiers_org_data()
Preprocessor.set_mime_types()
#not needed locally ... but init class variable
#Preprocessor.get_google_data_dois()
#Preprocessor.get_google_data_urls()
Expand Down Expand Up @@ -915,6 +917,12 @@ def retrieve_metadata_external(self):
typed_metadata_links = self.get_preferred_links(typed_metadata_links)

for metadata_link in typed_metadata_links:
if not metadata_link['type']:
# guess type based on e.g. file suffix
try:
metadata_link['type'] = mimetypes.guess_type(metadata_link['url'])[0]
except Exception:
pass
if metadata_link['type'] in ['application/rdf+xml', 'text/rdf','text/n3', 'text/rdf+n3','application/rdf+n3','text/ttl','text/turtle','application/turtle', 'application/x-turtle', 'application/ld+json']:
self.logger.info('FsF-F2-01M : Found e.g. Typed Links in HTML Header linking to RDF Metadata -: (' +
str(metadata_link['type']) + ' ' + str(metadata_link['url']) + ')')
Expand Down Expand Up @@ -988,7 +996,8 @@ def retrieve_metadata_external(self):
# also add found xml namespaces without recognized data
elif len(linked_xml_collector.getNamespaces())>0:
self.merge_metadata(dict(), metadata_link['url'], source_linked_xml,'xml',lkd_namespace)

else:
self.logger.info('FsF-F2-01M : Found typed link or signposting link but cannot handle given mime type -:'+str(metadata_link['type']))

if self.reference_elements:
self.logger.debug('FsF-F2-01M : Reference metadata elements NOT FOUND -: {}'.format(
Expand Down
13 changes: 13 additions & 0 deletions fuji_server/helper/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import mimetypes

import yaml
import json
Expand Down Expand Up @@ -83,6 +84,18 @@ def __new__(cls):
# Put any initialization here.
return cls._instance

@classmethod
def set_mime_types(cls):
try:
mimes = requests.get('https://raw.githubusercontent.com/jshttp/mime-db/master/db.json').json()
for mime_type, mime_data in mimes.items():
if mime_data.get('extensions'):
for ext in mime_data.get('extensions'):
if '.' + ext not in mimetypes.types_map:
mimetypes.add_type(mime_type, "." + ext, strict=True)
except Exception as e:
cls.logger.warning('Loading additional mime types failed, will continue with standard set')

@classmethod
def set_max_content_size(cls, size):
cls.max_content_size = int(size)
Expand Down

0 comments on commit 378f711

Please sign in to comment.