Packages refactoring, ETL improved, new PartialRetrievalDictionary, E…

…urostat read bulk files modified, new FADN support, many to many mapping, new REST functions, (Workspace - Concepts Helper - Serialization - Data Input - Manager) Improved Incorporates minified version of NIS frontend.
MAGIC-nexus · Jul 12, 2018 · 69b90d5 · 69b90d5
1 parent 61835e3
commit 69b90d5
Show file tree

Hide file tree

Showing 100 changed files with 6,605 additions and 532 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,15 +1,31 @@
-FROM grahamdumpleton/mod-wsgi-docker:python-3.5
+FROM grahamdumpleton/mod-wsgi-docker:python-3.6
+
+# The image used, "grahamdumpleton/mod-wsgi-docker:python-3.6" is not in "Docker Hub"
+# It has to be built from the original source code of 3.5 image (source code in Github), with the following versions
+# in the corresponding Dockerfile section:
+#
+# ENV PYTHON_VERSION=3.6.5 \
+#    NGHTTP2_VERSION=1.32.0 \
+#    APR_VERSION=1.6.3 \
+#    APR_UTIL_VERSION=1.6.1 \
+#    APACHE_VERSION=2.4.33 \
+#    MOD_WSGI_VERSION=4.6.4 \
+#    NSS_WRAPPER_VERSION=1.1.3 \
+#    TINI_VERSION=0.18.0
+#
+# Build the image using the modified Dockerfile, with the name "grahamdumpleton/mod-wsgi-docker:python-3.6"
 
 # TODO
 # Rewrite to contain three things:
 # * NIS application. Run using "gunicorn"
 # * R modules
 # * Celery
-# The tool is "supervisor". An example in:
+#
+# The tool to run both "gunicorn" and "Celery" is "supervisor". An example in:
 # https://github.com/pm990320/docker-flask-celery
 #
 #
-# This one container is for MOD_WSGI (Apache2) <<<<<<<<<<<<<<<<<<
+# The present container is for MOD_WSGI (Apache2) <<<<<<<<<<<<<<<<<<
 #
 #
 # Build:
@@ -28,16 +44,18 @@ FROM grahamdumpleton/mod-wsgi-docker:python-3.5
 #               -v /home/rnebot/DATOS/docker/nis-local:/srv
 #               -e MAGIC_NIS_SERVICE_CONFIG_FILE="nis_docker_local_sqlite.conf" magic-nis:latest
 #
-# Production server:
-# docker create --name nis-local -l magic-postgis -l magic-redis -v /srv/docker/magic/data/nis:/srv
-#   -e VIRTUAL_HOST=one.nis.magic-nexus-eu -e VIRTUAL_PORT=80 -e LETSENCRYPT_HOST=one.nis.magic-nexus-eu
+# NOTE: in the example, the host directory (/home/rnebot/DATOS/docker/nis-local) must have RWX permissions
+#       for all users: chmod rwx+o ...
+#       If not, it may not be possible to create
+#
+# PRODUCTION SERVER:
+#
+# docker create --name nis-local --network=magic-net -l magic-postgis -l magic-redis -v /srv/docker/magic/data/nis:/srv
+#   -e VIRTUAL_HOST=one.nis.magic-nexus.eu -e VIRTUAL_PORT=80 -e LETSENCRYPT_HOST=one.nis.magic-nexus.eu
 #   -e [email protected] -e MAGIC_NIS_SERVICE_CONFIG_FILE="nis_docker_naples.conf"
 #   magic-nis:latest
 #
 #
-# NOTE: in this last example, the host directory (/home/rnebot/DATOS/docker/nis-local) must have RWX permissions
-#       for all users: chmod rwx+o ...
-#       If not, it may not be possible to create
 
 ENV MAGIC_NIS_SERVICE_CONFIG_FILE=""
 

diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2017, manselpotamus
+Copyright (c) 2017, MAGIC Horizon 2020 Project (grant 689669)
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # nis-backend
 
-## Technological Features of NIS product
+## Technological Features of NIS tool
 
 **DONE**
 * Dockerized: a Dockerfile for Apache2, receiving the name of the configuration file on image creation

diff --git a/backend/authentication/__init__.py b/backend/authentication/__init__.py
@@ -0,0 +1,55 @@
+"""
+An authenticator is in charge of receiving some credentials to LOGIN
+As a result, some demonstration of the success is obtained
+It can be an OAuth2 Bearer Token
+Or an API Key ("X-api-key" header; provided by Backend)
+Or some other
+
+"""
+from abc import ABCMeta, abstractmethod
+
+from backend.common.helper import create_dictionary
+
+
+class IAuthenticator(metaclass=ABCMeta):
+    @abstractmethod
+    def get_name(self) -> str:
+        """ Authenticator name """
+        pass
+
+    @abstractmethod
+    def check(self, request) -> str:
+        """ Checks Cookies or Headers for existing "passport" or similar.
+            If existent, checks for the validity.
+            If valid, obtains data allowing to match with some identity
+        :param request: The Request object as received by Flask
+        :return Information on what happened (if the passport is present or not;
+        if it is valid; the information to match with identity)
+        """
+        pass
+
+    def elaborate_from_credentials(self, credentials):
+        """
+        Elaborate passport from credentials
+        TODO Is the passport stored in some standard place, or return just the structure
+        :param credentials:
+        :return:
+        """
+
+
+class AuthenticatorsManager:
+    def __init__(self):
+        self.registry = create_dictionary()
+
+    def check(self, request) -> str:
+        for s in self.registry:
+            d = self.registry[s].check(request)
+            if d.get("exists", False):
+                # Continue
+                pass
+            else:
+                d = None
+                break
+        return d
+
+
diff --git a/backend/command_executors/analysis/indicators_command.py b/backend/command_executors/analysis/indicators_command.py
@@ -1,6 +1,6 @@
 import json
 
-from backend.model.memory.musiasem_concepts import Indicator
+from backend.models.musiasem_concepts import Indicator
 from backend.model_services import IExecutableCommand, get_case_study_registry_objects
 
 

diff --git a/backend/command_executors/external_data/etl_external_dataset_command.py b/backend/command_executors/external_data/etl_external_dataset_command.py
@@ -22,11 +22,11 @@ def obtain_reverse_codes(mapped, dst):
     src = set()
     dest_set = set([d.lower() for d in dst])  # Destination categories
     # Obtain origin categories referencing "dest_set" destination categories
-    # for k in mapped:
-    #     if k[1].lower() in dest_set:
-    #         src.add[k[0].lower()]
-
-    return list(set([k[0].lower() for k in mapped if k[1].lower() in dest_set]))
+    for k in mapped:
+        for t in k["to"]:
+            if t["d"] and t["d"].lower() in dest_set:
+                src.add(k["o"])
+    return list(src)  # list(set([k[0].lower() for k in mapped if k[1].lower() in dest_set]))
 
 
 class ETLExternalDatasetCommand(IExecutableCommand):
@@ -98,7 +98,14 @@ def execute(self, state: "State"):
             if strcmp(mappings[m].source, source) and \
                     strcmp(mappings[m].dataset, dataset_name) and \
                     mappings[m].origin in dims:
-                df_dst = pd.DataFrame(mappings[m].map, columns=['sou_rce', mappings[m].destination.lower()])
+                # TODO Change by n-to-m mapping
+                # Elaborate a many to one mapping
+                tmp = []
+                for el in mappings[m].map:
+                    for to in el["to"]:
+                        if to["d"]:
+                            tmp.append([el["o"], to["d"]])
+                df_dst = pd.DataFrame(tmp, columns=['sou_rce', mappings[m].destination.lower()])
                 for di in df.columns:
                     if strcmp(mappings[m].origin, di):
                         d = di
@@ -109,7 +116,9 @@ def execute(self, state: "State"):
 
         # Aggregate (If any dimension has been specified)
         if len(self._content["group_by"]) > 0:
-            values = ["value"]  # TODO self._content["measures"]  # Column names where data is
+            # Column names where data is
+            # HACK: for the case where the measure has been named "obs_value", use "value"
+            values = [m.lower() if m.lower() != "obs_value" else "value" for m in self._content["measures"]]
             rows = [v.lower() for v in self._content["group_by"]]  # Group by dimension names
             aggs = []  # Aggregation functions
             for f in self._content["agg_funcs"]:
@@ -120,17 +129,38 @@ def execute(self, state: "State"):
             # Calculate Pivot Table. The columns are a combination of values x aggregation functions
             # For instance, if two values ["v1", "v2"] and two agg. functions ["avg", "sum"] are provided
             # The columns will be: [["average", "v1"], ["average", "v2"], ["sum", "v1"], ["sum", "v2"]]
-            df2 = pd.pivot_table(df,
-                                 values=values,
-                                 index=rows,
-                                 aggfunc=[aggs[0]], fill_value=np.NaN, margins=False,
-                                 dropna=True)
-            # Remove the multiindex in columns
-            df2.columns = [col[-1] for col in df2.columns.values]
-            # Remove the index
-            df2.reset_index(inplace=True)
-            # The result, all columns (no index), is stored for later use
-            ds.data = df2
+            try:
+                # Check that all "rows" on which pivot table aggregates are present in the input "df"
+                # If not either synthesize them (only if there is a single filter value) or remove (if not present
+                for r in rows.copy():
+                    if r not in df.columns:
+                        found = False
+                        for k in params:
+                            if k.lower() == r:
+                                found = True
+                                if len(params[k]) == 1:
+                                    df[r] = params[k][0]
+                                else:
+                                    rows.remove(r)
+                                    issues((2, "Dimension '" + r + "' removed from the list of dimensions because it is not present in the raw input dataset."))
+                                break
+                        if not found:
+                            rows.remove(r)
+                            issues((2, "Dimension '" + r + "' removed from the list of dimensions because it is not present in the raw input dataset."))
+                # Pivot table
+                df2 = pd.pivot_table(df,
+                                     values=values,
+                                     index=rows,
+                                     aggfunc=[aggs[0]], fill_value=np.NaN, margins=False,
+                                     dropna=True)
+                # Remove the multiindex in columns
+                df2.columns = [col[-1] for col in df2.columns.values]
+                # Remove the index
+                df2.reset_index(inplace=True)
+                # The result, all columns (no index), is stored for later use
+                ds.data = df2
+            except Exception as e:
+                issues.append((3, "There was a problem: "+str(e)))
 
         # Store the dataset in State
         datasets[result_name] = ds

diff --git a/backend/command_executors/external_data/mapping_command.py b/backend/command_executors/external_data/mapping_command.py
@@ -3,7 +3,7 @@
 
 from backend.model_services import IExecutableCommand, get_case_study_registry_objects
 from backend.common.helper import obtain_dataset_metadata, strcmp, create_dictionary, obtain_dataset_source
-from backend.model.memory.musiasem_concepts import Mapping
+from backend.models.musiasem_concepts import Mapping
 
 
 def convert_code_list_to_hierarchy(cl, as_list=False):
@@ -123,11 +123,11 @@ def assign(n: str, v: str):
 
 def fill_map_with_all_origin_categories(dim, map):
     # Check all codes exist
-    mapped_codes = set(map.keys())
+    mapped_codes = set([d["o"] for d in map])
     all_codes = set([c for c in dim.code_list])
     for c in all_codes - mapped_codes:  # Loop over "unmapped" origin codes
         # This sentence MODIFIES map, so it is not necessary to return it
-        map[c] = [{"d": None, "w": 1.0}]  # Map to placeholder, with weight 1
+        map.append({"o": c, "to": [{"d": None, "w": 1.0}]})  # Map to placeholder, with weight 1
 
     return map
 

diff --git a/backend/command_executors/external_data/parameters_command.py b/backend/command_executors/external_data/parameters_command.py
@@ -1,6 +1,6 @@
 import json
 
-from backend.model.memory.musiasem_concepts import Parameter
+from backend.models.musiasem_concepts import Parameter
 from backend.model_services import IExecutableCommand, get_case_study_registry_objects
 
 

diff --git a/backend/command_executors/specification/data_input_command.py b/backend/command_executors/specification/data_input_command.py
@@ -3,11 +3,8 @@
 from backend.common.helper import create_dictionary
 from backend.model_services import IExecutableCommand, State, get_case_study_registry_objects
 from backend.command_generators import basic_elements_parser
-from backend.model.memory.musiasem_concepts_helper import create_quantitative_observation
-from backend.model.memory.musiasem_concepts import FactorType, Observer, FactorInProcessorType, \
-    Processor, \
-    Factor, FactorQuantitativeObservation, QualifiedQuantityExpression, \
-    FlowFundRoegenType, ProcessorsSet, HierarchiesSet, allowed_ff_types, PedigreeMatrix, Reference
+from backend.models.musiasem_concepts_helper import create_quantitative_observation
+from backend.models.musiasem_concepts import FlowFundRoegenType, ProcessorsSet, PedigreeMatrix, Reference
 
 
 class DataInputCommand(IExecutableCommand):
@@ -216,7 +213,7 @@ def process_row(row):
                     ds = datasets[r["_referenced_dataset"]]  # Obtain dataset
                 else:
                     ds = None
-                    issues.append((3, "Dataset '" + r["_referenced_dataset"] + "' is not declared. Row "+str(i)))
+                    issues.append((3, "Dataset '" + r["_referenced_dataset"] + "' is not declared. Row "+str(i+1)))
             else:
                 ds = None
             if ds:
@@ -239,16 +236,23 @@ def process_row(row):
                             var_dict[k] = r[k][1:]  # Dimension
                         else:
                             fixed_dict[k] = r[k]  # Special
-                # Iterate the dataset (a pd.DataFrame), row by row
-                for r_num, r2 in ds.data.iterrows():
-                    r_exp = fixed_dict.copy()
-                    r_exp.update({k: str(r2[v.lower()]) for k, v in var_dict.items()})
-                    if var_taxa_dict:
-                        taxa = r_exp["taxa"]
-                        taxa.update({k: r2[v.lower()] for k, v in var_taxa_dict.items()})
-                        if r_exp["processor"].startswith("#"):
-                            r_exp["processor"] = "_".join([str(taxa[t]) for t in processor_attributes if t in taxa])
-                    process_row(r_exp)
+                # Check that the # names are in the Dataset
+
+                diff = set([v.lower() for v in list(var_dict.values())+list(var_taxa_dict.values())]).difference(set(ds.data.columns))
+                if diff:
+                    # There are request fields in var_dict NOT in the input dataset "ds.data"
+                    issues.append((3, "'"+', '.join(diff)+"' are not present in the requested dataset '"+r["_referenced_dataset"]+"'. Row " + str(i+1)))
+                else:
+                    # Iterate the dataset (a pd.DataFrame), row by row
+                    for r_num, r2 in ds.data.iterrows():
+                        r_exp = fixed_dict.copy()
+                        r_exp.update({k: str(r2[v.lower()]) for k, v in var_dict.items()})
+                        if var_taxa_dict:
+                            taxa = r_exp["taxa"]
+                            taxa.update({k: r2[v.lower()] for k, v in var_taxa_dict.items()})
+                            if r_exp["processor"].startswith("#"):
+                                r_exp["processor"] = "_".join([str(taxa[t]) for t in processor_attributes if t in taxa])
+                        process_row(r_exp)
             else:  # Literal values
                 process_row(r)
 

diff --git a/backend/command_executors/specification/hierarchy_command.py b/backend/command_executors/specification/hierarchy_command.py
@@ -1,6 +1,6 @@
 import json
 
-from backend.model.memory.musiasem_concepts_helper import build_hierarchy
+from backend.models.musiasem_concepts_helper import build_hierarchy
 from backend.model_services import IExecutableCommand, get_case_study_registry_objects
 
 

diff --git a/backend/command_executors/specification/metadata_command.py b/backend/command_executors/specification/metadata_command.py
@@ -42,16 +42,17 @@ def execute(self, state: "State"):
         """
         issues = []
         cs = state.get("_case_study")
+        cs_version = state.get("_case_study_version")
         state.set("_metadata", self._metadata_dictionary)
         if cs:
             # Modify case study attributes
-            cs.name = ""
+            cs_version.name = ""
             if "case_study_name" in self._metadata_dictionary and self._metadata_dictionary["case_study_name"]:
-                cs.name = "; ".join(self._metadata_dictionary["case_study_name"])
+                cs_version.name += "- ".join(self._metadata_dictionary["case_study_name"])
             if "title" in self._metadata_dictionary and self._metadata_dictionary["title"]:
-                if cs.name:
-                    cs.name += ":: "
-                cs.name += "; ".join(self._metadata_dictionary["title"])
+                if cs_version.name:
+                    cs_version.name += "; "
+                cs_version.name += "- ".join(self._metadata_dictionary["title"])
             if "doi" in self._metadata_dictionary and self._metadata_dictionary["doi"]:
                 cs.oid = self._metadata_dictionary["doi"][0]
             if "description" in self._metadata_dictionary and self._metadata_dictionary["description"]:

diff --git a/backend/command_executors/specification/pedigree_matrix_command.py b/backend/command_executors/specification/pedigree_matrix_command.py
@@ -1,6 +1,6 @@
 import json
 
-from backend.model.memory.musiasem_concepts import PedigreeMatrix
+from backend.models.musiasem_concepts import PedigreeMatrix
 from backend.model_services import IExecutableCommand, get_case_study_registry_objects
 
 
@@ -33,6 +33,9 @@ def execute(self, state: "State"):
 
         # Insert the PedigreeMatrix object into the state
         glb_idx.put(pm.key(), pm)
+        import jsonpickle
+        s = jsonpickle.encode(pm)
+        pm2 = jsonpickle.decode(s)
 
         return None, None
 

diff --git a/backend/command_executors/specification/references_command.py b/backend/command_executors/specification/references_command.py
@@ -1,7 +1,7 @@
 import json
 
 from backend.command_generators.spreadsheet_command_parsers.specification import ref_prof
-from backend.model.memory.musiasem_concepts import Reference
+from backend.models.musiasem_concepts import Reference
 from backend.model_services import IExecutableCommand, get_case_study_registry_objects
 
 

diff --git a/backend/command_executors/specification/scale_conversion_command.py b/backend/command_executors/specification/scale_conversion_command.py
@@ -1,8 +1,8 @@
 import json
 
 from backend.common.helper import create_dictionary
-from backend.model.memory.musiasem_concepts import FactorTypesRelationUnidirectionalLinearTransformObservation, Observer
-from backend.model.memory.musiasem_concepts_helper import find_or_create_observable
+from backend.models.musiasem_concepts import FactorTypesRelationUnidirectionalLinearTransformObservation, Observer
+from backend.models.musiasem_concepts_helper import find_or_create_observable
 from backend.model_services import IExecutableCommand, get_case_study_registry_objects