Skip to content

Commit 0123ea6

Browse files
authored
Merge branch 'master' into multi_link_api
2 parents e97ade9 + c56e40b commit 0123ea6

File tree

6 files changed

+383
-90
lines changed

6 files changed

+383
-90
lines changed

.devcontainer/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pyparsing==3.1.2
3333
pyproject_hooks==1.0.0
3434
python-dateutil==2.9.0.post0
3535
pytz==2024.1
36-
requests==2.31.0
36+
requests==2.32.0
3737
requests-unixsocket==0.3.0
3838
s3fs==2024.3.1
3939
six==1.16.0

h5pyd/_hl/attrs.py

Lines changed: 196 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -166,24 +166,91 @@ def __getitem__(self, name):
166166

167167
return arr
168168

169+
def get_attributes(self, names=None, pattern=None, limit=None, marker=None):
170+
"""
171+
Get all attributes or a subset of attributes from the target object.
172+
If 'use_cache' is True, use the objdb cache if available.
173+
The cache cannot be used with pattern, limit, or marker parameters.
174+
- if 'pattern' is provided, retrieve all attributes with names that match the pattern
175+
according to Unix pathname pattern expansion rules.
176+
- if 'limit' is provided, retrieve at most 'limit' attributes.
177+
- if 'marker' is provided, retrieve attributes whose names occur after the name 'marker' in the target object
178+
"""
179+
if names and (pattern or limit or marker):
180+
raise ValueError("names cannot be used with pattern, limit or marker")
181+
182+
if self._objdb_attributes is not None:
183+
# use the objdb cache
184+
out = {}
185+
for a in self._objdb_attributes:
186+
name = a['name']
187+
out[name] = self._objdb_attributes[name]
188+
return out
189+
190+
# Omit trailing slash
191+
req = self._req_prefix[:-1]
192+
193+
body = {}
194+
params = {"IncludeData": 1}
195+
196+
if pattern:
197+
params["pattern"] = pattern
198+
if limit:
199+
params["Limit"] = limit
200+
if marker:
201+
params["Marker"] = marker
202+
203+
if names:
204+
if isinstance(names, list):
205+
names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in names]
206+
else:
207+
if isinstance(names, bytes):
208+
names = names.decode("utf-8")
209+
names = [names]
210+
211+
body['attr_names'] = names
212+
213+
if body:
214+
rsp = self._parent.POST(req, body=body, params=params)
215+
else:
216+
rsp = self._parent.GET(req, params=params)
217+
218+
attrs_json = rsp['attributes']
219+
names = [attr['name'] for attr in attrs_json]
220+
values = [attr['value'] for attr in attrs_json]
221+
out = {}
222+
223+
for i in range(len(names)):
224+
out[names[i]] = values[i]
225+
226+
return out
227+
169228
def __setitem__(self, name, value):
170229
""" Set a new attribute, overwriting any existing attribute.
171230
172231
The type and shape of the attribute are determined from the data. To
173232
use a specific type or shape, or to preserve the type of an attribute,
174233
use the methods create() and modify().
175234
"""
176-
self.create(name, data=value, dtype=base.guess_dtype(value))
235+
self.create(name, values=value, dtype=base.guess_dtype(value))
177236

178237
def __delitem__(self, name):
179238
""" Delete an attribute (which must already exist). """
180-
if isinstance(name, bytes):
181-
name = name.decode("utf-8")
182-
req = self._req_prefix + name
183-
self._parent.DELETE(req)
239+
params = {}
184240

185-
def create(self, name, data, shape=None, dtype=None):
186-
""" Create a new attribute, overwriting any existing attribute.
241+
if isinstance(name, list):
242+
names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in name]
243+
# Omit trailing slash
244+
req = self._req_prefix[:-1]
245+
params["attr_names"] = "/".join(names)
246+
else:
247+
if isinstance(name, bytes):
248+
name = name.decode("utf-8")
249+
req = self._req_prefix + name
250+
self._parent.DELETE(req, params=params)
251+
252+
def create(self, names, values, shape=None, dtype=None):
253+
""" Create new attribute(s), overwriting any existing attributes.
187254
188255
name
189256
Name of the new attribute (required)
@@ -196,104 +263,149 @@ def create(self, name, data, shape=None, dtype=None):
196263
Data type of the attribute. Overrides data.dtype if both
197264
are given.
198265
"""
199-
self._parent.log.info("attrs.create({})".format(name))
200-
201-
# First, make sure we have a NumPy array. We leave the data
202-
# type conversion for HDF5 to perform.
203-
if isinstance(data, Reference):
204-
dtype = special_dtype(ref=Reference)
205-
if not isinstance(data, Empty):
206-
data = numpy.asarray(data, dtype=dtype, order='C')
207-
208-
if shape is None and not isinstance(data, Empty):
209-
shape = data.shape
266+
self._parent.log.info(f"attrs.create({names})")
210267

211-
use_htype = None # If a committed type is given, we must use it in h5a.create.
268+
# Standardize single attribute arguments to lists
269+
if not isinstance(names, list):
270+
names = [names]
271+
values = [values]
212272

213-
if isinstance(dtype, Datatype):
214-
use_htype = dtype.id
215-
dtype = dtype.dtype
273+
# Do not permit duplicate names
274+
if len(names) != len(set(names)):
275+
raise ValueError("Duplicate attribute names are not allowed")
216276

217-
# Special case if data are complex numbers
218-
is_complex = (data.dtype.kind == 'c') and (dtype.names is None) or (
219-
dtype.names != ('r', 'i')) or (
220-
any(dt.kind != 'f' for dt, off in dtype.fields.values())) or (
221-
dtype.fields['r'][0] == dtype.fields['i'][0])
277+
if shape is not None and not isinstance(shape, list):
278+
shapes = [shape]
279+
elif shape is None:
280+
shapes = [None] * len(names)
281+
else:
282+
# Given shape is already a list of shapes
283+
shapes = shape
222284

223-
if is_complex:
224-
raise TypeError(
225-
f'Wrong committed datatype for complex numbers: {dtype.name}')
285+
if dtype is not None and not isinstance(dtype, list):
286+
dtypes = [dtype]
226287
elif dtype is None:
227-
if data.dtype.kind == 'U':
228-
# use vlen for unicode strings
229-
dtype = special_dtype(vlen=str)
230-
else:
231-
dtype = data.dtype
288+
dtypes = [None] * len(names)
232289
else:
233-
dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed
234-
235-
# Where a top-level array type is requested, we have to do some
236-
# fiddling around to present the data as a smaller array of
237-
# subarrays.
238-
if not isinstance(data, Empty):
239-
if dtype.subdtype is not None:
290+
# Given dtype is already a list of dtypes
291+
dtypes = dtype
292+
293+
type_jsons = [None] * len(names)
294+
295+
if (len(names) != len(values)) or (shapes is not None and len(shapes) != len(values)) or\
296+
(dtypes is not None and len(dtypes) != len(values)):
297+
raise ValueError("provided names, values, shapes and dtypes must have the same length")
298+
299+
for i in range(len(names)):
300+
# First, make sure we have a NumPy array. We leave the data
301+
# type conversion for HDF5 to perform.
302+
if isinstance(values[i], Reference):
303+
dtypes[i] = special_dtype(ref=Reference)
304+
if not isinstance(values[i], Empty):
305+
values[i] = numpy.asarray(values[i], dtype=dtypes[i], order='C')
306+
307+
if shapes[i] is None and not isinstance(values[i], Empty):
308+
shapes[i] = values[i].shape
309+
310+
use_htype = None # If a committed type is given, we must use it in h5a.create.
311+
312+
if isinstance(dtypes[i], Datatype):
313+
use_htype = dtypes[i].id
314+
dtypes[i] = dtypes[i].dtype
315+
316+
# Special case if data are complex numbers
317+
is_complex = (values[i].dtype.kind == 'c') and (dtypes[i].names is None) or (
318+
dtypes[i].names != ('r', 'i')) or (
319+
any(dt.kind != 'f' for dt, off in dtypes[i].fields.values())) or (
320+
dtypes[i].fields['r'][0] == dtypes[i].fields['i'][0])
321+
322+
if is_complex:
323+
raise TypeError(
324+
f'Wrong committed datatype for complex numbers: {dtypes[i].name}')
325+
elif dtypes[i] is None:
326+
if values[i].dtype.kind == 'U':
327+
# use vlen for unicode strings
328+
dtypes[i] = special_dtype(vlen=str)
329+
else:
330+
dtypes[i] = values[i].dtype
331+
else:
332+
dtypes[i] = numpy.dtype(dtypes[i]) # In case a string, e.g. 'i8' is passed
240333

241-
subdtype, subshape = dtype.subdtype
334+
# Where a top-level array type is requested, we have to do some
335+
# fiddling around to present the data as a smaller array of
336+
# subarrays.
337+
if not isinstance(values[i], Empty):
338+
if dtypes[i].subdtype is not None:
242339

243-
# Make sure the subshape matches the last N axes' sizes.
244-
if shape[-len(subshape):] != subshape:
245-
raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shape}")
340+
subdtype, subshape = dtypes[i].subdtype
246341

247-
# New "advertised" shape and dtype
248-
shape = shape[0:len(shape) - len(subshape)]
249-
dtype = subdtype
342+
# Make sure the subshape matches the last N axes' sizes.
343+
if shapes[i][-len(subshape):] != subshape:
344+
raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shapes[i]}")
250345

251-
# Not an array type; make sure to check the number of elements
252-
# is compatible, and reshape if needed.
253-
else:
254-
if numpy.prod(shape) != numpy.prod(data.shape):
255-
raise ValueError("Shape of new attribute conflicts with shape of data")
346+
# New "advertised" shape and dtype
347+
shapes[i] = shapes[i][0:len(shapes[i]) - len(subshape)]
348+
dtypes[i] = subdtype
256349

257-
if shape != data.shape:
258-
data = data.reshape(shape)
350+
# Not an array type; make sure to check the number of elements
351+
# is compatible, and reshape if needed.
352+
else:
353+
if numpy.prod(shapes[i]) != numpy.prod(values[i].shape):
354+
raise ValueError("Shape of new attribute conflicts with shape of data")
259355

260-
# We need this to handle special string types.
356+
if shapes[i] != values[i].shape:
357+
values[i] = values[i].reshape(shapes[i])
261358

262-
data = numpy.asarray(data, dtype=dtype)
359+
# We need this to handle special string types.
263360

264-
# Make HDF5 datatype and dataspace for the H5A calls
265-
if use_htype is None:
266-
type_json = getTypeItem(dtype)
267-
self._parent.log.debug("attrs.create type_json: {}".format(type_json))
361+
values[i] = numpy.asarray(values[i], dtype=dtypes[i])
268362

269-
# This mess exists because you can't overwrite attributes in HDF5.
270-
# So we write to a temporary attribute first, and then rename.
363+
# Make HDF5 datatype and dataspace for the H5A calls
364+
if use_htype is None:
365+
type_jsons[i] = getTypeItem(dtypes[i])
366+
self._parent.log.debug(f"attrs.create type_json: {format(type_jsons[i])}")
271367

272-
req = self._req_prefix + name
368+
params = {}
273369
body = {}
274-
body['type'] = type_json
275-
if isinstance(data, Empty):
276-
body['shape'] = 'H5S_NULL'
277-
else:
278-
body['shape'] = shape
279-
if data.dtype.kind != 'c':
280-
body['value'] = self._bytesArrayToList(data)
370+
params['replace'] = 1
371+
372+
attributes = {}
373+
374+
for i in range(len(names)):
375+
attr = {}
376+
attr['type'] = type_jsons[i]
377+
if isinstance(values[i], Empty):
378+
attr['shape'] = 'H5S_NULL'
281379
else:
282-
# Special case: complex numbers
283-
special_dt = createDataType(type_json)
284-
tmp = numpy.empty(shape=data.shape, dtype=special_dt)
285-
tmp['r'] = data.real
286-
tmp['i'] = data.imag
287-
body['value'] = json.loads(json.dumps(tmp.tolist()))
380+
attr['shape'] = shapes[i]
381+
if values[i].dtype.kind != 'c':
382+
attr['value'] = self._bytesArrayToList(values[i])
383+
else:
384+
# Special case: complex numbers
385+
special_dt = createDataType(type_jsons[i])
386+
tmp = numpy.empty(shape=values[i].shape, dtype=special_dt)
387+
tmp['r'] = values[i].real
388+
tmp['i'] = values[i].imag
389+
attr['value'] = json.loads(json.dumps(tmp.tolist()))
390+
attributes[names[i]] = attr
391+
392+
if len(names) > 1:
393+
# Create multiple attributes
394+
# Omit trailing slash
395+
req = self._req_prefix[:-1]
396+
body['attributes'] = attributes
397+
398+
else:
399+
# Create single attribute
400+
req = self._req_prefix + names[0]
401+
for key in attributes[names[0]]:
402+
body[key] = attributes[names[0]][key]
288403

289404
try:
290-
self._parent.PUT(req, body=body)
405+
self._parent.PUT(req, body=body, params=params)
291406
except RuntimeError:
292-
# Resource already exist, try deleting it
293-
self._parent.log.info("Update to existing attribute ({}), deleting it".format(name))
294-
self._parent.DELETE(req)
295-
# now add again
296-
self._parent.PUT(req, body=body)
407+
# 'replace' parameter is used, so failure is not due to attribute already existing
408+
raise RuntimeError("Failued to create attribute(s)")
297409

298410
def modify(self, name, value):
299411
""" Change the value of an attribute while preserving its type.

h5pyd/_hl/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ def PUT(self, req, body=None, params=None, format="json", replace=False):
10201020
else:
10211021
raise RuntimeError(rsp.reason)
10221022
else:
1023-
raise IOError(rsp.reason)
1023+
raise IOError(f"{rsp.reason}:{rsp.status_code}")
10241024

10251025
if rsp.text:
10261026
rsp_json = json.loads(rsp.text)
@@ -1053,14 +1053,14 @@ def POST(self, req, body=None, params=None, format="json"):
10531053
rsp_json = json.loads(rsp.text)
10541054
return rsp_json
10551055

1056-
def DELETE(self, req):
1056+
def DELETE(self, req, params=None):
10571057
if self.id.http_conn is None:
10581058
raise IOError("object not initialized")
10591059

10601060
# try to do a DELETE of the resource
10611061

10621062
self.log.info("DEL: {} [{}]".format(req, self.id.domain))
1063-
rsp = self.id._http_conn.DELETE(req)
1063+
rsp = self.id._http_conn.DELETE(req, params=params)
10641064
# self.log.info("RSP: " + str(rsp.status_code) + ':' + rsp.text)
10651065
if rsp.status_code != 200:
10661066
raise IOError(rsp.reason)

h5pyd/_hl/httpconn.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,9 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
434434
if format == "binary":
435435
headers["accept"] = "application/octet-stream"
436436

437+
# list of parameters which should disable cache usage
438+
no_cache_params = ["select", "query", "Limit", "Marker", "pattern", "attr"]
439+
437440
check_cache = self._cache is not None and use_cache and format == "json"
438441
check_cache = check_cache and params["domain"] == self._domain
439442
check_cache = check_cache and "select" not in params and "query" not in params

test/hl/common.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import numpy as np
2222
import unittest as ut
2323
from platform import system
24-
24+
from h5pyd import getServerInfo
2525

2626
# Check if non-ascii filenames are supported
2727
# Evidently this is the most reliable way to check
@@ -275,3 +275,10 @@ def is_hsds(self, id=None):
275275
return True
276276
else:
277277
return False
278+
279+
def hsds_version(self):
280+
""" Return the version of the HSDS server, or None if not HSDS.
281+
"""
282+
283+
rsp = getServerInfo()
284+
return rsp["hsds_version"]

0 commit comments

Comments
 (0)