Skip to content

Commit

Permalink
Add labels to exported datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
rnebot committed Dec 24, 2018
1 parent 622eb9e commit 8b16327
Show file tree
Hide file tree
Showing 11 changed files with 62 additions and 24 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ FROM grahamdumpleton/mod-wsgi-docker:python-3.6
# docker create --name nis-local --network=magic-net -l magic-postgis -l magic-redis -v /srv/docker/magic/data/nis:/srv
# -e VIRTUAL_HOST=one.nis.magic-nexus.eu -e VIRTUAL_PORT=80 -e LETSENCRYPT_HOST=one.nis.magic-nexus.eu
# -e [email protected] -e MAGIC_NIS_SERVICE_CONFIG_FILE="nis_docker_naples.conf"
# -e MOD_WSGI_REQUEST_TIMEOUT=1500 -e MOD_WSGI_SOCKET_TIMEOUT=1500
## -e MOD_WSGI_CONNECT_TIMEOUT=1500 -e MOD_WSGI_INACTIVITY_TIMEOUT=1500
# magic-nis:latest
#
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from backend.model_services import IExecutableCommand, get_case_study_registry_objects
from backend.common.helper import obtain_dataset_metadata, strcmp, create_dictionary, \
augment_dataframe_with_mapped_columns, translate_case
from backend.models.musiasem_concepts import Hierarchy
from backend.models.musiasem_concepts_helper import convert_code_list_to_hierarchy


def obtain_reverse_codes(mapped, dst):
Expand Down Expand Up @@ -186,6 +188,19 @@ def execute(self, state: "State"):
for ir, r in enumerate(rows):
rows[ir] = df_columns_dict[r]

# Create and register Hierarchy objects from origin Dataset dimensions: state, ds
ds_columns_dict = create_dictionary(data={c.code: c.code for c in ds.dimensions})
for r in rows:
if r in ds_columns_dict:
# Create hierarchy local to the dataset
for d in ds.dimensions:
if strcmp(r, d.code):
if d.code_list:
h = convert_code_list_to_hierarchy(d.code_list)
h.name = result_name + "_" + r
glb_idx.put(h.key(), h)
break

# Pivot table using Group by
if True:
groups = df.groupby(by=rows, as_index=False) # Split
Expand Down
16 changes: 16 additions & 0 deletions backend/command_executors/version2/dataset_query_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# TODO Result parameter column also change a bit
# TODO See "DatasetQry" command in "MuSIASEM case study commands" Google Spreadsheet
#
from backend.models.musiasem_concepts_helper import convert_code_list_to_hierarchy


def obtain_reverse_codes(mapped, dst):
"""
Expand Down Expand Up @@ -176,6 +178,20 @@ def execute(self, state: "State"):
if not found:
group_by_dims.remove(r)
issues.append((2, "Dimension '" + r + "' removed from the list of dimensions because it is not present in the raw input dataset."))

# Create and register Hierarchy objects from origin Dataset dimensions: state, ds
ds_columns_dict = create_dictionary(data={c.code: c.code for c in ds.dimensions})
for r in group_by_dims:
if r in ds_columns_dict:
# Create hierarchy local to the dataset
for d in ds.dimensions:
if strcmp(r, d.code):
if d.code_list:
h = convert_code_list_to_hierarchy(d.code_list)
h.name = result_name + "_" + r
glb_idx.put(h.key(), h)
break

# Pivot table using Group by
if True:
groups = df.groupby(by=group_by_dims, as_index=False) # Split
Expand Down
53 changes: 29 additions & 24 deletions backend/restful_service/service_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
print("Executing locally!")
os.environ["MAGIC_NIS_SERVICE_CONFIG_FILE"] = "./nis_local.conf"

from backend.common.helper import generate_json, obtain_dataset_source, gzipped, str2bool
from backend.common.helper import generate_json, obtain_dataset_source, gzipped, str2bool, create_dictionary
from backend.models.musiasem_methodology_support import *
from backend.common.create_database import create_pg_database_engine, create_monet_database_engine
from backend.restful_service import app, register_external_datasources
Expand Down Expand Up @@ -968,7 +968,32 @@ def reproducible_session_query_state_get_dataset(name, format): # Query list of
# Obtain the data and the metadata
ds = datasets[name] # type: Dataset
ds2 = ds.data
# TODO Elaborate Metadata
# Labels
labels_enabled = request.args.get("labels", "True") == "True"
if labels_enabled:
print("Preparing Dataset labels")
# Merge with Taxonomy LABELS, IF available
for col in ds2.columns:
hs = glb_idx.get(Hierarchy.partial_key(name + "_" + col))
if len(hs) == 1:
h = hs[0]
nodes = h.get_all_nodes()
tmp = []
for nn in nodes:
t = nodes[nn]
tmp.append([t[0].lower(), t[1]]) # CSens
if not backend.case_sensitive and ds2[col].dtype == 'O':
ds2[col + "_l"] = ds2[col].str.lower()
col = col + "_l"

# Dataframe of codes and descriptions
df_dst = pd.DataFrame(tmp, columns=['sou_rce', col + "_desc"])
ds2 = pd.merge(ds2, df_dst, how='left', left_on=col, right_on='sou_rce')
del ds2['sou_rce']
if not backend.case_sensitive:
del ds2[col]

# TODO Elaborate "meta-workbook" (workbook capable of reproducing dataset)
if format == "json":
tmp = json.loads('{"data": '+ds2.to_json(orient='split', date_format='iso', date_unit='s')+', "metadata": {}}')
del tmp["data"]["index"]
Expand Down Expand Up @@ -996,27 +1021,6 @@ def reproducible_session_query_state_get_dataset(name, format): # Query list of
schema = dict(model=dict(fields=fields), cube=dict(dimensions=dimensions, measures=measures))
r = build_json_response(dict(data=data, schema=schema), 200)
elif format == "xlsx":
# TODO Merge with Taxonomies IF some column appear
if True:
# Taxonomy definitions
hs = glb_idx.get(Hierarchy.partial_key())
# Hierarchies of Categories (not of Processors or of FactorTypes)
hset = set([h.name.lower() for h in hs if h.hierarchy_type == Taxon]) # CSens?
for col in ds2.columns:
if col in hset: # CSens ?
for h in hs:
if h.hierarchy_type == Taxon:
if h.name.lower() == col:
nodes = h.get_all_nodes()
tmp = []
for nn in nodes:
t = nodes[nn]
tmp.append([t[0].lower(), t[1]]) # CSens
# Dataframe of codes and descriptions
df_dst = pd.DataFrame(tmp, columns=['sou_rce', col + "_desc"])
ds2 = pd.merge(ds2, df_dst, how='left', left_on=col, right_on='sou_rce')
del ds2['sou_rce']

# Generate XLSX from data & return it
output = io.BytesIO()
# from pyexcelerate import Workbook, Writer
Expand All @@ -1025,7 +1029,8 @@ def reproducible_session_query_state_get_dataset(name, format): # Query list of
# wb.new_sheet(name, data=data)
# wr = Writer.Writer(wb)
# wr.save(output)
ds2.to_excel(output, sheet_name=name, index=False, engine="xlsxwriter")
print("Generating Excel")
ds2.to_excel(output, sheet_name=name, index=False) #, engine="xlsxwriter")
r = Response(output.getvalue(), mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", status=200)
else:
r = build_json_response({"error": "Could not find a Dataset with name '"+name+"' in the current state"}, 401)
Expand Down
Binary file not shown.
Binary file removed frontend/assets/images/logo_favicon.png
Binary file not shown.
Empty file modified magic_box/monitor.py
100755 → 100644
Empty file.
Empty file modified magic_box/musiasem/__init__.py
100755 → 100644
Empty file.
Empty file modified magic_box/musiasem/core/__init__.py
100755 → 100644
Empty file.
Empty file modified magic_box/nis.wsgi
100755 → 100644
Empty file.
Empty file modified magic_box/template.conf
100755 → 100644
Empty file.

0 comments on commit 8b16327

Please sign in to comment.