Skip to content

Commit

Permalink
Revert "refactor metadata extraction and add flag to preview for meta…
Browse files Browse the repository at this point in the history
…data" -- meant for feature branch not main

This reverts commit a3b74c1.
  • Loading branch information
satchelbaldwin committed Mar 25, 2024
1 parent a3b74c1 commit d2f1581
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 52 deletions.
35 changes: 0 additions & 35 deletions api/dataset/metadata.py

This file was deleted.

36 changes: 32 additions & 4 deletions api/dataset/terarium_hmi.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import xarray
from api.dataset.models import DatasetSubsetOptions
from api.dataset.metadata import extract_metadata, extract_esgf_specific_fields
from api.search.providers.era5 import ERA5SearchData
from api.settings import default_settings
import requests
Expand Down Expand Up @@ -59,10 +58,36 @@ def enumerate_dataset_skeleton(
"userId": "",
"fileNames": [],
"columns": [],
"metadata": extract_metadata(ds)
| {
"metadata": {
"format": "netcdf",
"parentDatasetId": parent_id,
"variableId": ds.attrs.get("variable_id", ""),
"preview": preview,
"dataStructure": {
k: {
"attrs": {
ak: (
ds[k].attrs[ak].item()
if isinstance(ds[k].attrs[ak], numpy.generic)
else ds[k].attrs[ak]
)
for ak in ds[k].attrs
# _ChunkSizes is an unserializable ndarray, safely ignorable
if ak != "_ChunkSizes"
},
"indexes": [i for i in ds[k].indexes.keys()],
"coordinates": [i for i in ds[k].coords.keys()],
}
for k in ds.variables.keys()
},
"raw": {
k: (
ds.attrs[k].item()
if isinstance(ds.attrs[k], numpy.generic)
else ds.attrs[k]
)
for k in ds.attrs.keys()
},
},
"grounding": {},
}
Expand All @@ -87,7 +112,10 @@ def construct_hmi_dataset(
additional_fields = {
"name": f"{dataset_name}-subset-{subset_uuid}",
"description": generate_description(ds, dataset_id, opts),
} | extract_esgf_specific_fields(ds)
"dataSourceDate": ds.attrs.get("creation_date", "UNKNOWN"),
"datasetUrl": ds.attrs.get("further_info_url", "UNKNOWN"),
"source": ds.attrs.get("source", "UNKNOWN"),
}
additional_metadata = {
"parentDatasetId": parent_dataset_id,
"subsetDetails": repr(opts),
Expand Down
12 changes: 1 addition & 11 deletions api/preview/render.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import datetime
import io
import base64
from typing import Any
from api.search.provider import AccessURLs
import cartopy.crs as ccrs
import xarray
from api.dataset.metadata import extract_esgf_specific_fields, extract_metadata
from matplotlib import pyplot as plt
from api.dataset.remote import (
cleanup_potential_artifacts,
Expand All @@ -27,24 +25,16 @@ def render_preview_for_dataset(
variable_index: str = "",
time_index: str = "",
timestamps: str = "",
analyze: bool = False,
**kwargs,
):
job_id = kwargs["job_id"]
try:
ds: xarray.Dataset | None = None
extra_metadata_discovery: dict[str, Any] = {}
# AccessURLs list or UUID str -- UUID str is terarium handle.
if isinstance(dataset, list):
ds = open_dataset(dataset, job_id)
elif isinstance(dataset, str):
ds = open_remote_dataset_hmi(dataset, job_id)
if analyze:
print("attempting to extract more information", flush=True)
extra_metadata_discovery = {
"metadata": extract_metadata(ds) | extract_esgf_specific_fields(ds)
}

if timestamps != "":
if len(timestamps.split(",")) != 2:
return {
Expand All @@ -55,7 +45,7 @@ def render_preview_for_dataset(
except KeyError as e:
return {"error": f"{e}"}
cleanup_potential_artifacts(job_id)
return {"previews": png} | extra_metadata_discovery
return {"previews": png}
except IOError as e:
return {"error": f"upstream hosting is likely having a problem. {e}"}

Expand Down
3 changes: 1 addition & 2 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ async def esgf_preview(
variable_id: str = "",
time_index: str = "",
timestamps: str = "",
analyze: bool = False,
redis=Depends(get_redis),
):
dataset = (
Expand All @@ -108,7 +107,7 @@ async def esgf_preview(
)
job = create_job(
func=render_preview_for_dataset,
args=[dataset, variable_id, time_index, timestamps, analyze],
args=[dataset, variable_id, time_index, timestamps],
redis=redis,
queue="preview",
)
Expand Down

0 comments on commit d2f1581

Please sign in to comment.