Skip to content

Commit b6c54f9

Browse files
committed
Support track_order param
1 parent f783446 commit b6c54f9

File tree

7 files changed

+304
-29
lines changed

7 files changed

+304
-29
lines changed

h5pyd/_hl/attrs.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,16 +342,24 @@ def __len__(self):
342342
def __iter__(self):
343343
""" Iterate over the names of attributes. """
344344
if self._objdb_attributes is not None:
345+
if self._parent._track_order:
346+
attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created'])
347+
else:
348+
attrs = sorted(self._objdb_attributes.items())
349+
350+
ordered_attrs = {}
351+
for a in attrs:
352+
ordered_attrs[a[0]] = a[1]
345353

346-
for name in self._objdb_attributes:
354+
for name in ordered_attrs:
347355
yield name
348356

349357
else:
350358
# make server request
351359
req = self._req_prefix
352360
# backup over the trailing slash in req
353361
req = req[:-1]
354-
rsp = self._parent.GET(req)
362+
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"})
355363
attributes = rsp['attributes']
356364

357365
attrlist = []
@@ -383,3 +391,33 @@ def __repr__(self):
383391
if not self._parent.id.id:
384392
return "<Attributes of closed HDF5 object>"
385393
return f"<Attributes of HDF5 object at {id(self._parent.id)}>"
394+
395+
def __reversed__(self):
396+
""" Iterate over the names of attributes in reverse order. """
397+
if self._objdb_attributes is not None:
398+
if self._parent._track_order:
399+
attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created'])
400+
else:
401+
attrs = sorted(self._objdb_attributes.items())
402+
403+
ordered_attrs = {}
404+
for a in attrs:
405+
ordered_attrs[a[0]] = a[1]
406+
407+
for name in reversed(ordered_attrs):
408+
yield name
409+
410+
else:
411+
# make server request
412+
req = self._req_prefix
413+
# backup over the trailing slash in req
414+
req = req[:-1]
415+
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"})
416+
attributes = rsp['attributes']
417+
418+
attrlist = []
419+
for attr in attributes:
420+
attrlist.append(attr['name'])
421+
422+
for name in reversed(attrlist):
423+
yield name

h5pyd/_hl/dataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ def allocated_size(self):
717717
self._getVerboseInfo()
718718
return self._allocated_size
719719

720-
def __init__(self, bind):
720+
def __init__(self, bind, track_order=False):
721721
"""Create a new Dataset object by binding to a low-level DatasetID."""
722722

723723
if not isinstance(bind, DatasetID):
@@ -732,6 +732,7 @@ def __init__(self, bind):
732732
# make a numpy dtype out of the type json
733733
self._dtype = createDataType(self.id.type_json)
734734
self._item_size = getItemSize(self.id.type_json)
735+
self._track_order = track_order
735736

736737
self._shape = self.get_shape()
737738

h5pyd/_hl/files.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def __init__(
120120
logger=None,
121121
owner=None,
122122
linked_domain=None,
123+
track_order=False,
123124
retries=10,
124125
timeout=180,
125126
**kwds,
@@ -155,6 +156,10 @@ def __init__(
155156
by admin users
156157
linked_domain
157158
Create new domain using the root of the linked domain
159+
track_order
160+
Whether to track dataset/group/attribute creation order within this file. Objects will be iterated
161+
in ascending creation order if this is enabled, otherwise in ascending alphanumeric order.
162+
158163
retries
159164
Number of retry attempts to be used if a server request fails
160165
timeout
@@ -270,6 +275,8 @@ def __init__(
270275
if bucket:
271276
params["bucket"] = bucket
272277

278+
params["CreateOrder"] = "1" if track_order else "0"
279+
273280
# need some special logic for the first request in local mode
274281
# to give the sockets time to initialize
275282

@@ -393,8 +400,9 @@ def __init__(
393400
self._verboseUpdated = None # when the verbose data was fetched
394401
self._lastScan = None # when summary stats where last updated by server
395402
self._dn_ids = dn_ids
403+
self._track_order = track_order
396404

397-
Group.__init__(self, self._id)
405+
Group.__init__(self, self._id, track_order=track_order)
398406

399407
def _getVerboseInfo(self):
400408
now = time.time()
@@ -403,7 +411,7 @@ def _getVerboseInfo(self):
403411
):
404412
# resynch the verbose data
405413
req = "/?verbose=1"
406-
rsp_json = self.GET(req, use_cache=False)
414+
rsp_json = self.GET(req, use_cache=False, params={"CreateOrder": "1" if self._track_order else "0"})
407415

408416
self.log.debug("get verbose info: {}".format(rsp_json))
409417
props = {}

h5pyd/_hl/group.py

Lines changed: 65 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class Group(HLObject, MutableMappingHDF5):
4949
""" Represents an HDF5 group.
5050
"""
5151

52-
def __init__(self, bind, **kwargs):
52+
def __init__(self, bind, track_order=False, **kwargs):
5353
# print "group init, bind:", bind
5454

5555
""" Create a new Group object by binding to a low-level GroupID.
@@ -58,6 +58,7 @@ def __init__(self, bind, **kwargs):
5858
if not isinstance(bind, GroupID):
5959
raise ValueError(f"{bind} is not a GroupID")
6060
HLObject.__init__(self, bind, **kwargs)
61+
self._track_order = track_order
6162
self._req_prefix = "/groups/" + self.id.uuid
6263
self._link_db = {} # cache for links
6364

@@ -149,7 +150,7 @@ def _get_link_json(self, h5path):
149150
req = "/groups/" + parent_uuid + "/links/" + name
150151

151152
try:
152-
rsp_json = self.GET(req)
153+
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
153154
except IOError:
154155
raise KeyError("Unable to open object (Component not found)")
155156

@@ -181,7 +182,7 @@ def _get_objdb_links(self):
181182
group_json = objdb[self.id.id]
182183
return group_json["links"]
183184

184-
def create_group(self, h5path):
185+
def create_group(self, h5path, track_order=False):
185186
""" Create and return a new subgroup.
186187
187188
Name may be absolute or relative. Fails if the target name already
@@ -237,6 +238,7 @@ def create_group(self, h5path):
237238
parent_name = parent_name + '/' + link
238239
self.log.debug("create group - parent name: {}".format(parent_name))
239240
sub_group._name = parent_name
241+
sub_group._track_order = track_order
240242
parent_uuid = sub_group.id.id
241243
else:
242244
# sub-group already exsits
@@ -258,6 +260,7 @@ def create_group(self, h5path):
258260
if sub_group is None:
259261
# didn't actually create anything
260262
raise ValueError("name already exists")
263+
261264
return sub_group
262265

263266
def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds):
@@ -547,7 +550,7 @@ def require_group(self, name):
547550
raise TypeError(f"Incompatible object ({grp.__class__.__name__}) already exists")
548551
return grp
549552

550-
def getObjByUuid(self, uuid, collection_type=None):
553+
def getObjByUuid(self, uuid, collection_type=None, track_order=False):
551554
""" Utility method to get an obj based on collection type and uuid """
552555
self.log.debug(f"getObjByUuid({uuid})")
553556
obj_json = None
@@ -582,10 +585,10 @@ def getObjByUuid(self, uuid, collection_type=None):
582585
# will need to get JSON from server
583586
req = f"/{collection_type}/{uuid}"
584587
# make server request
585-
obj_json = self.GET(req)
588+
obj_json = self.GET(req, params={"CreateOrder": "1" if track_order else "0"})
586589

587590
if collection_type == 'groups':
588-
tgt = Group(GroupID(self, obj_json))
591+
tgt = Group(GroupID(self, obj_json), track_order=track_order)
589592
elif collection_type == 'datatypes':
590593
tgt = Datatype(TypeID(self, obj_json))
591594
elif collection_type == 'datasets':
@@ -595,13 +598,13 @@ def getObjByUuid(self, uuid, collection_type=None):
595598
if "dims" in shape_json and len(shape_json["dims"]) == 1 and dtype_json["class"] == 'H5T_COMPOUND':
596599
tgt = Table(DatasetID(self, obj_json))
597600
else:
598-
tgt = Dataset(DatasetID(self, obj_json))
601+
tgt = Dataset(DatasetID(self, obj_json), track_order=track_order)
599602
else:
600603
raise IOError(f"Unexpected collection_type: {collection_type}")
601604

602605
return tgt
603606

604-
def __getitem__(self, name):
607+
def __getitem__(self, name, track_order=False):
605608
""" Open an object in the file """
606609
# convert bytes to str for PY3
607610
if isinstance(name, bytes):
@@ -614,11 +617,11 @@ def __getitem__(self, name):
614617
if tgt is not None:
615618
return tgt # ref'd object has not been deleted
616619
if isinstance(name.id, GroupID):
617-
tgt = self.getObjByUuid(name.id.uuid, collection_type="groups")
620+
tgt = self.getObjByUuid(name.id.uuid, collection_type="groups", track_order=track_order)
618621
elif isinstance(name.id, DatasetID):
619-
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets")
622+
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets", track_order=track_order)
620623
elif isinstance(name.id, TypeID):
621-
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets")
624+
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets", track_order=track_order)
622625
else:
623626
raise IOError("Unexpected Error - ObjectID type: " + name.__class__.__name__)
624627
return tgt
@@ -631,11 +634,11 @@ def __getitem__(self, name):
631634
link_class = link_json['class']
632635

633636
if link_class == 'H5L_TYPE_HARD':
634-
tgt = self.getObjByUuid(link_json['id'], collection_type=link_json['collection'])
637+
tgt = self.getObjByUuid(link_json['id'], collection_type=link_json['collection'], track_order=track_order)
635638
elif link_class == 'H5L_TYPE_SOFT':
636639
h5path = link_json['h5path']
637640
soft_parent_uuid, soft_json = self._get_link_json(h5path)
638-
tgt = self.getObjByUuid(soft_json['id'], collection_type=soft_json['collection'])
641+
tgt = self.getObjByUuid(soft_json['id'], collection_type=soft_json['collection'], track_order=track_order)
639642

640643
elif link_class == 'H5L_TYPE_EXTERNAL':
641644
# try to get a handle to the file and return the linked object...
@@ -651,7 +654,8 @@ def __getitem__(self, name):
651654
endpoint = self.id.http_conn.endpoint
652655
username = self.id.http_conn.username
653656
password = self.id.http_conn.password
654-
f = File(external_domain, endpoint=endpoint, username=username, password=password, mode='r')
657+
f = File(external_domain, endpoint=endpoint, username=username, password=password, mode='r',
658+
track_order=track_order)
655659
except IOError:
656660
# unable to find external link
657661
raise KeyError("Unable to open file: " + link_json['h5domain'])
@@ -675,7 +679,7 @@ def __getitem__(self, name):
675679
tgt._name = name
676680
return tgt
677681

678-
def get(self, name, default=None, getclass=False, getlink=False):
682+
def get(self, name, default=None, getclass=False, getlink=False, track_order=False):
679683
""" Retrieve an item or other information.
680684
681685
"name" given only:
@@ -699,18 +703,17 @@ def get(self, name, default=None, getclass=False, getlink=False):
699703
>>> if cls == SoftLink:
700704
... print '"foo" is a soft link!'
701705
"""
702-
703706
if not (getclass or getlink):
704707
try:
705-
return self[name]
708+
return self.__getitem__(name, track_order)
706709
except KeyError:
707710
return default
708711

709712
if name not in self:
710713
return default
711714

712715
elif getclass and not getlink:
713-
obj = self.__getitem__(name)
716+
obj = self.__getitem__(name, track_order)
714717
if obj is None:
715718
return None
716719
if obj.id.__class__ is GroupID:
@@ -777,7 +780,7 @@ def __setitem__(self, name, obj):
777780
raise IOError("cannot create subgroup of softlink")
778781
parent_uuid = link_json["id"]
779782
req = "/groups/" + parent_uuid
780-
group_json = self.GET(req)
783+
group_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
781784
tgt = Group(GroupID(self, group_json))
782785
tgt[basename] = obj
783786

@@ -867,7 +870,7 @@ def __len__(self):
867870
return len(links_json)
868871

869872
req = "/groups/" + self.id.uuid
870-
rsp_json = self.GET(req)
873+
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
871874
return rsp_json['linkCount']
872875

873876
def __iter__(self):
@@ -876,7 +879,7 @@ def __iter__(self):
876879

877880
if links is None:
878881
req = "/groups/" + self.id.uuid + "/links"
879-
rsp_json = self.GET(req)
882+
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
880883
links = rsp_json['links']
881884

882885
# reset the link cache
@@ -888,7 +891,16 @@ def __iter__(self):
888891
for x in links:
889892
yield x['title']
890893
else:
891-
for name in links:
894+
if self._track_order:
895+
links = sorted(links.items(), key=lambda x: x[1]['created'])
896+
else:
897+
links = sorted(links.items())
898+
899+
ordered_links = {}
900+
for link in links:
901+
ordered_links[link[0]] = link[1]
902+
903+
for name in ordered_links:
892904
yield name
893905

894906
def __contains__(self, name):
@@ -1092,7 +1104,7 @@ def visititems(self, func):
10921104
else:
10931105
# request from server
10941106
req = "/groups/" + parent.id.uuid + "/links"
1095-
rsp_json = self.GET(req)
1107+
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
10961108
links = rsp_json['links']
10971109
for link in links:
10981110
obj = None
@@ -1137,6 +1149,36 @@ def __repr__(self):
11371149
r = f'<HDF5 group {namestr} ({len(self)} members)>'
11381150
return r
11391151

1152+
def __reversed__(self):
1153+
""" Iterate over member names in reverse order """
1154+
links = self._get_objdb_links()
1155+
1156+
if links is None:
1157+
req = "/groups/" + self.id.uuid + "/links"
1158+
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
1159+
links = rsp_json['links']
1160+
1161+
# reset the link cache
1162+
self._link_db = {}
1163+
for link in links:
1164+
name = link["title"]
1165+
self._link_db[name] = link
1166+
1167+
for x in reversed(links):
1168+
yield x['title']
1169+
else:
1170+
if self._track_order:
1171+
links = sorted(links.items(), key=lambda x: x[1]['created'])
1172+
else:
1173+
links = sorted(links.items())
1174+
1175+
ordered_links = {}
1176+
for link in links:
1177+
ordered_links[link[0]] = link[1]
1178+
1179+
for name in reversed(ordered_links):
1180+
yield name
1181+
11401182

11411183
class HardLink(object):
11421184

0 commit comments

Comments
 (0)