Skip to content

Commit 22a8120

Browse files
committed
Introduce a new inventory JSON/dict format that preserves more types
It doesn't convert anymore to string primitive types or collections. Signed-off-by: Alexis Jeandet <[email protected]>
1 parent 9f39dd9 commit 22a8120

File tree

6 files changed

+126
-33
lines changed

6 files changed

+126
-33
lines changed

speasy/core/impex/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -914,8 +914,7 @@ def _concatenate_variables(variables: Dict[str, SpeasyVariable], product_id) ->
914914
if len(variables) == 0:
915915
return None
916916
elif len(variables) == 1:
917-
result = list(variables.values())[0].copy()
918-
return result
917+
return list(variables.values())[0]
919918

920919
axes = []
921920
columns = []

speasy/core/inventory/indexes.py

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,15 @@ def __contains__(self, item: str or ComponentIndex):
8585
return True
8686
return False
8787

88+
8889
class ArgumentIndex(SpeasyIndex):
8990
def __init__(self, name: str, provider: str, uid: str, meta: Optional[dict] = None):
9091
super().__init__(name, provider, uid, meta)
9192

9293
def __repr__(self):
9394
return f'<ArgumentIndex: {self.spz_name()}>'
9495

96+
9597
class ArgumentListIndex(SpeasyIndex):
9698
def __init__(self, name: str, provider: str, uid: str, meta: Optional[dict] = None):
9799
super().__init__(name, provider, uid, meta)
@@ -103,7 +105,7 @@ def _arguments(self):
103105
def __repr__(self):
104106
return f'<ArgumentListIndex: {self.spz_name()}>'
105107

106-
def __getitem__(self, item)->ArgumentIndex:
108+
def __getitem__(self, item) -> ArgumentIndex:
107109
return self._arguments[item]
108110

109111
def __len__(self):
@@ -112,6 +114,7 @@ def __len__(self):
112114
def __iter__(self):
113115
return self._arguments.__iter__()
114116

117+
115118
class TemplatedParameterIndex(ParameterIndex):
116119
__spz_arguments__: ArgumentListIndex
117120

@@ -145,33 +148,48 @@ def __contains__(self, item: str or ParameterIndex):
145148
return False
146149

147150

148-
def to_dict(inventory_tree: SpeasyIndex or str):
151+
def to_dict(inventory_tree: SpeasyIndex or str, version: int = 1):
149152
if isinstance(inventory_tree, SpeasyIndex):
150-
return {key: to_dict(value) for key, value in inventory_tree.__dict__.items()}
151-
elif type(inventory_tree) is not str:
152-
return str(inventory_tree)
153+
return {key: to_dict(value, version=version) for key, value in inventory_tree.__dict__.items()}
154+
elif version <= 1:
155+
if type(inventory_tree) is not str:
156+
inventory_tree = str(inventory_tree)
157+
else:
158+
if type(inventory_tree) in [list, tuple, set]:
159+
return type(inventory_tree)([to_dict(value, version) for value in inventory_tree])
160+
if type(inventory_tree) is dict:
161+
return {key: to_dict(value, version) for key, value in inventory_tree.items()}
162+
if type(inventory_tree) not in [str, int, float, bool, type(None)]:
163+
return str(inventory_tree)
164+
153165
return inventory_tree
154166

155167

156-
def from_dict(inventory_tree: dict or str):
157-
if type(inventory_tree) is str:
158-
return inventory_tree
168+
def from_dict(inventory_tree: dict or str, version: int = 1):
169+
if version <= 1:
170+
if type(inventory_tree) is str:
171+
return inventory_tree
172+
else:
173+
if type(inventory_tree) in [str, int, float, bool, type(None), list, tuple, set]:
174+
return inventory_tree
175+
if type(inventory_tree) is dict and "__spz_type__" not in inventory_tree:
176+
return inventory_tree
159177
idx_type = inventory_tree.pop("__spz_type__")
160178
idx_name = inventory_tree.pop("__spz_name__")
161179
idx_provider = inventory_tree.pop("__spz_provider__")
162180
idx_uid = inventory_tree.pop("__spz_uid__")
163-
idx_meta = {key: from_dict(value) for key, value in inventory_tree.items()}
181+
idx_meta = {key: from_dict(value, version) for key, value in inventory_tree.items()}
164182
root = __INDEXES_TYPES__.get(idx_type, SpeasyIndex)(name=idx_name, provider=idx_provider, uid=idx_uid,
165183
meta=idx_meta)
166184
return root
167185

168186

169-
def to_json(inventory_tree: SpeasyIndex, sort_keys=True):
170-
return json.dumps(to_dict(inventory_tree), sort_keys=sort_keys)
187+
def to_json(inventory_tree: SpeasyIndex, sort_keys=True, version: int = 1):
188+
return json.dumps(to_dict(inventory_tree, version), sort_keys=sort_keys)
171189

172190

173-
def from_json(inventory_tree: str):
174-
return from_dict(json.loads(inventory_tree))
191+
def from_json(inventory_tree: str, version: int = 1):
192+
return from_dict(json.loads(inventory_tree), version)
175193

176194

177195
def make_inventory_node(parent, ctor, name, provider, uid, **meta):
@@ -191,4 +209,6 @@ def inventory_has_changed(orig, new):
191209
return True
192210
return False
193211

194-
AnyProductIndex = Union[ParameterIndex, TemplatedParameterIndex, DatasetIndex, TimetableIndex, CatalogIndex, ComponentIndex]
212+
213+
AnyProductIndex = Union[
214+
ParameterIndex, TemplatedParameterIndex, DatasetIndex, TimetableIndex, CatalogIndex, ComponentIndex]

speasy/core/proxy/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
log = logging.getLogger(__name__)
2020
PROXY_ALLOWED_KWARGS = ['disable_proxy']
21-
MINIMUM_REQUIRED_PROXY_VERSION = Version("0.11.0")
21+
MINIMUM_REQUIRED_PROXY_VERSION = Version("0.12.0")
2222
_CURRENT_PROXY_SERVER_VERSION = None
2323

2424
if proxy_cfg.url() == "" or proxy_cfg.enabled() == False:
@@ -113,13 +113,14 @@ def get(provider: str, **kwargs):
113113
kwargs['provider'] = provider
114114
kwargs['format'] = 'python_dict'
115115
kwargs['zstd_compression'] = zstd_compression
116+
kwargs['version'] = 2
116117
headers = {}
117118
if saved_inventory is not None:
118119
headers["If-Modified-Since"] = parser.parse(saved_inventory.build_date).ctime()
119120
resp = http.get(f"{url}/get_inventory", params=kwargs, headers=headers)
120121
log.debug(f"Asking {provider} inventory from proxy {resp.url}, {resp.headers}")
121122
if resp.status_code == 200:
122-
inventory = inventory_from_dict(pickle.loads(decompress(resp.bytes)))
123+
inventory = inventory_from_dict(pickle.loads(decompress(resp.bytes)), version=2)
123124
index.set("proxy_inventories", provider, inventory)
124125
index.set("proxy_inventories_save_date", provider, datetime.utcnow())
125126
return inventory

speasy/data_providers/cda/_inventory_builder/__init__.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
_MASTERS_CDF_PATH = f"{cda_cfg.inventory_data_path()}/masters_cdf/"
1414
_XML_CATALOG_PATH = f"{cda_cfg.inventory_data_path()}/all.xml"
1515

16+
_CDAWEB_INVENTORY_ = "cdaweb-inventory"
17+
_CDAWEB_INVENTORY_TREE_ = "tree_v2"
18+
_CDAWEB_INVENTORY_LAST_MODIFIED_MASTERS_ = "masters-last-modified"
19+
_CDAWEB_INVENTORY_LAST_MODIFIED_XML_ = "last_modified_xml"
20+
1621

1722
def _ensure_path_exists(path: str):
1823
dirname = os.path.dirname(path)
@@ -37,21 +42,21 @@ def _download_and_extract_master_cdf(masters_url: str):
3742

3843
def update_master_cdf(masters_url: str = "https://spdf.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/master.tar"):
3944
last_modified = http.head(masters_url).headers['last-modified']
40-
if index.get("cdaweb-inventory", "masters-last-modified", "") != last_modified:
45+
if index.get(_CDAWEB_INVENTORY_, _CDAWEB_INVENTORY_LAST_MODIFIED_MASTERS_, "") != last_modified:
4146
_clean_master_cdf_folder()
4247
_download_and_extract_master_cdf(masters_url)
43-
index.set("cdaweb-inventory", "masters-last-modified", last_modified)
48+
index.set(_CDAWEB_INVENTORY_, _CDAWEB_INVENTORY_LAST_MODIFIED_MASTERS_, last_modified)
4449
return True
4550
return False
4651

4752

4853
def update_xml_catalog(xml_catalog_url: str = "https://spdf.gsfc.nasa.gov/pub/catalogs/all.xml"):
4954
last_modified = http.head(xml_catalog_url).headers['last-modified']
50-
if index.get("cdaweb-inventory", "xml_catalog-last-modified", "") != last_modified:
55+
if index.get(_CDAWEB_INVENTORY_, _CDAWEB_INVENTORY_LAST_MODIFIED_XML_, "") != last_modified:
5156
_ensure_path_exists(_XML_CATALOG_PATH)
5257
with open(_XML_CATALOG_PATH, 'w') as f:
5358
f.write(http.get(xml_catalog_url).text)
54-
index.set("cdaweb-inventory", "xml_catalog-last-modified", last_modified)
59+
index.set(_CDAWEB_INVENTORY_, _CDAWEB_INVENTORY_LAST_MODIFIED_XML_, last_modified)
5560
return True
5661
return False
5762

@@ -60,11 +65,11 @@ def build_inventory(root: SpeasyIndex = None, xml_catalog_url: str = "https://sp
6065
masters_url: str = "https://spdf.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/master.tar"):
6166
needs_rebuild = update_xml_catalog(xml_catalog_url)
6267
needs_rebuild |= update_master_cdf(masters_url)
63-
if needs_rebuild or not index.contains("cdaweb-inventory", "tree"):
68+
if needs_rebuild or not index.contains(_CDAWEB_INVENTORY_, _CDAWEB_INVENTORY_TREE_):
6469
root = load_xml_catalog(xml_file_path=_XML_CATALOG_PATH, root=root)
6570
update_tree(root=root, master_cdf_dir=_MASTERS_CDF_PATH)
66-
index.set("cdaweb-inventory", "tree", to_dict(root))
71+
index.set(_CDAWEB_INVENTORY_, _CDAWEB_INVENTORY_TREE_, to_dict(root, version=2))
6772
else:
68-
t = from_dict(index.get("cdaweb-inventory", "tree"))
73+
t = from_dict(index.get(_CDAWEB_INVENTORY_, _CDAWEB_INVENTORY_TREE_), version=2)
6974
root.__dict__ = t.__dict__
7075
return root

speasy/data_providers/csa/__init__.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from tempfile import TemporaryDirectory
66
from typing import Optional, Tuple, Dict
77

8+
import numpy as np
89
from astroquery.utils.tap.core import TapPlus
910

1011
from speasy.core import any_files, AllowedKwargs, fix_name, EnsureUTCDateTime
@@ -21,6 +22,18 @@
2122
log = logging.getLogger(__name__)
2223

2324

25+
def _only_primitive_types(d: dict) -> dict:
26+
for k, v in d.items():
27+
if not isinstance(v, (int, bool, str, type(None), list, tuple, set)):
28+
if isinstance(v, np.integer):
29+
d[k] = int(v)
30+
elif isinstance(v, np.floating):
31+
d[k] = float(v)
32+
elif isinstance(v, np.bool):
33+
d[k] = bool(v)
34+
return d
35+
36+
2437
def to_dataset_and_variable(index_or_str: ParameterIndex or str) -> Tuple[str, str]:
2538
if type(index_or_str) is str:
2639
parts = index_or_str.split('/')
@@ -38,7 +51,7 @@ def register_dataset(instruments, datasets, dataset):
3851
name = fix_name(meta['dataset_id'])
3952
node = make_inventory_node(instruments[dataset['instruments']], DatasetIndex, name=name,
4053
provider="csa",
41-
uid=meta['dataset_id'], **meta)
54+
uid=meta['dataset_id'], **_only_primitive_types(meta))
4255
datasets[meta['dataset_id']] = node
4356

4457

@@ -49,7 +62,7 @@ def register_observatory(missions, observatories, observatory):
4962
name=fix_name(name),
5063
provider="csa",
5164
uid=name,
52-
**meta)
65+
**_only_primitive_types(meta))
5366
observatories[name] = node
5467

5568

@@ -58,7 +71,7 @@ def register_mission(inventory_tree, missions, mission):
5871
name = meta.pop('name')
5972
node = make_inventory_node(inventory_tree, SpeasyIndex, name=fix_name(name),
6073
provider="csa",
61-
uid=name, **meta)
74+
uid=name, **_only_primitive_types(meta))
6275
missions[name] = node
6376

6477

@@ -68,7 +81,7 @@ def register_instrument(observatories, instruments, instrument):
6881
node = make_inventory_node(observatories.get(instrument['observatories'], observatories['MULTIPLE']),
6982
SpeasyIndex, name=fix_name(name),
7083
provider="csa",
71-
uid=name, **meta)
84+
uid=name, **_only_primitive_types(meta))
7285
instruments[name] = node
7386

7487

@@ -81,15 +94,17 @@ def register_param(datasets, parameter):
8194
meta['stop_date'] = parent_dataset.stop_date
8295
name = fix_name(meta['parameter_id'])
8396
make_inventory_node(parent_dataset, ParameterIndex, name=name,
84-
provider="csa", uid=f"{parameter['dataset_id']}/{parameter['parameter_id']}", **meta)
97+
provider="csa", uid=f"{parameter['dataset_id']}/{parameter['parameter_id']}",
98+
**_only_primitive_types(meta))
8599

86100

87101
def build_inventory(root: SpeasyIndex, tapurl="https://csa.esac.esa.int/csa-sl-tap/tap/"):
88102
CSA = TapPlus(url=tapurl)
89103
missions_req = CSA.launch_job_async("SELECT * FROM csa.v_mission")
90104
observatories_req = CSA.launch_job_async("SELECT * FROM csa.v_observatory")
91105
instruments_req = CSA.launch_job_async("SELECT * FROM csa.v_instrument")
92-
datasets_req = CSA.launch_job_async("SELECT * FROM csa.v_dataset WHERE dataset_id like '%GRMB' OR (is_cef='true' AND is_istp='true')")
106+
datasets_req = CSA.launch_job_async(
107+
"SELECT * FROM csa.v_dataset WHERE dataset_id like '%GRMB' OR (is_cef='true' AND is_istp='true')")
93108
parameters_req = CSA.launch_job_async("SELECT * FROM csa.v_parameter WHERE data_type='Data' AND value_type<>'CHAR'")
94109
missions = {}
95110
observatories = {}
@@ -135,7 +150,7 @@ def _dataset_range(self, dataset: str or DatasetIndex) -> DateTimeRange:
135150
def _dl_variable(self,
136151
dataset: str, variable: str,
137152
start_time: datetime, stop_time: datetime, extra_http_headers: Dict[str, str] or None = None) -> \
138-
Optional[SpeasyVariable]:
153+
Optional[SpeasyVariable]:
139154

140155
# https://csa.esac.esa.int/csa-sl-tap/data?RETRIEVAL_TYPE=product&&DATASET_ID=C3_CP_PEA_LERL_DEFlux&START_DATE=2001-06-10T22:12:14Z&END_DATE=2001-06-11T06:12:14Z&DELIVERY_FORMAT=CDF_ISTP&DELIVERY_INTERVAL=all
141156
ds_range = self._dataset_range(dataset)
@@ -238,5 +253,5 @@ def get_data(self, product, start_time: datetime, stop_time: datetime,
238253

239254
def get_variable(self, dataset: str, variable: str, start_time: datetime or str, stop_time: datetime or str,
240255
**kwargs) -> \
241-
Optional[SpeasyVariable]:
256+
Optional[SpeasyVariable]:
242257
return self.get_data(f"{dataset}/{variable}", start_time, stop_time, **kwargs)

tests/test_inventories.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import unittest
2+
from ddt import ddt, data, unpack
3+
4+
import speasy as spz
5+
from speasy.core.inventory.indexes import from_dict, to_dict, SpeasyIndex
6+
from speasy.core.dataprovider import DataProvider
7+
8+
9+
def compare_inventories(inventory1: SpeasyIndex, inventory2: SpeasyIndex):
10+
if inventory1.spz_name() != inventory2.spz_name():
11+
print(f"Name mismatch: {inventory1.spz_name()} != {inventory2.spz_name()}")
12+
return False
13+
for key in inventory1.__dict__.keys():
14+
if key not in inventory2.__dict__:
15+
print(f"Key missing: {key}")
16+
return False
17+
value1 = inventory1.__dict__[key]
18+
value2 = inventory2.__dict__[key]
19+
if isinstance(value1, SpeasyIndex) and isinstance(value2, SpeasyIndex):
20+
if not compare_inventories(value1, value2):
21+
return False
22+
elif value1 != value2:
23+
print(f"Value mismatch: {value1} != {value2}")
24+
return False
25+
return True
26+
27+
28+
@ddt
29+
class FromDictAndToDictPreserveInventory(unittest.TestCase):
30+
31+
def assertInventoryEqual(self, inventory1: SpeasyIndex, inventory2: SpeasyIndex):
32+
if inventory1.spz_name() != inventory2.spz_name():
33+
self.fail(f"Name mismatch: {inventory1.spz_name()} != {inventory2.spz_name()}")
34+
for key in inventory1.__dict__.keys():
35+
if key not in inventory2.__dict__:
36+
self.fail(f"Key missing: {key}")
37+
value1 = inventory1.__dict__[key]
38+
value2 = inventory2.__dict__[key]
39+
if isinstance(value1, SpeasyIndex) and isinstance(value2, SpeasyIndex):
40+
self.assertInventoryEqual(value1, value2)
41+
elif value1 != value2:
42+
self.fail(f"Value mismatch: {value1}({type(value1)}) != {value2}({type(value2)}) for key {key}")
43+
44+
@data(
45+
(spz.amda,),
46+
(spz.cda,),
47+
(spz.ssc,),
48+
(spz.csa,),
49+
)
50+
@unpack
51+
def test_from_dict_and_to_dict_preserve_inventory(self, provider: DataProvider):
52+
inventory = provider._inventory(provider_name=provider.provider_name, disable_proxy=True)
53+
self.assertInventoryEqual(inventory, from_dict(to_dict(inventory, version=2), version=2))

0 commit comments

Comments
 (0)