Skip to content

Commit 5b77248

Browse files
committed
Multi-link options for group.get()
1 parent 4735e8a commit 5b77248

File tree

3 files changed

+195
-4
lines changed

3 files changed

+195
-4
lines changed

h5pyd/_hl/group.py

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,19 @@ def __getitem__(self, name, track_order=False):
679679
tgt._name = name
680680
return tgt
681681

682-
def get(self, name, default=None, getclass=False, getlink=False, track_order=False):
682+
def objectify_link_json(self, link_json):
683+
if "id" in link_json:
684+
link_obj = HardLink(link_json["id"])
685+
elif "h5path" in link_json and "h5domain" not in link_json:
686+
link_obj = SoftLink(link_json["h5path"])
687+
elif "h5path" in link_json and "h5domain" in link_json:
688+
link_obj = ExternalLink(link_json["h5domain"], link_json["h5path"])
689+
else:
690+
raise ValueError("Invalid link JSON")
691+
692+
return link_obj
693+
694+
def get(self, name, default=None, getclass=False, getlink=False, track_order=False, **kwds):
683695
""" Retrieve an item or other information.
684696
685697
"name" given only:
@@ -697,6 +709,21 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
697709
Return HardLink, SoftLink and ExternalLink classes. Return
698710
"default" if nothing with that name exists.
699711
712+
"limit" is an integer:
713+
If "name" is None, this will return the first "limit" links in the group.
714+
715+
"marker" is a string:
716+
If "name" is None, this will return only the links that come after the marker in the group's link ordering.
717+
718+
"pattern" is a string:
719+
If "name" is None, this will return only the links that match the given pattern
720+
in the target group (and subgroups, if follow_links is provided).
721+
Matching is done according to Unix pathname expansion rules.
722+
723+
"follow_links" is True:
724+
If "name" is None, subgroups of the target group will be recursively searched
725+
for links that match the given names or pattern.
726+
700727
Example:
701728
702729
>>> cls = group.get('foo', getclass=True)
@@ -709,7 +736,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
709736
except KeyError:
710737
return default
711738

712-
if name not in self:
739+
if name is not None and name not in self:
713740
return default
714741

715742
elif getclass and not getlink:
@@ -726,6 +753,52 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
726753
raise TypeError("Unknown object type")
727754

728755
elif getlink:
756+
if name is None:
757+
# Get all links in target group(s)
758+
# Retrieve "limit", "marker", and "pattern" from kwds
759+
limit = kwds.get("limit", None)
760+
marker = kwds.get("marker", None)
761+
pattern = kwds.get("pattern", None)
762+
follow_links = kwds.get("follow_links", False)
763+
764+
req = "/groups/" + self.id.uuid + "/links"
765+
params = {}
766+
767+
if limit:
768+
params["Limit"] = limit
769+
if marker:
770+
params["Marker"] = marker
771+
if pattern:
772+
params["pattern"] = pattern
773+
if follow_links:
774+
params["follow_links"] = 1
775+
if track_order:
776+
params["CreateOrder"] = 1
777+
778+
rsp = self.GET(req, params=params)
779+
780+
if "links" in rsp:
781+
# Process list of link objects so they may be accessed by name
782+
links = rsp['links']
783+
links_out = {}
784+
if all([isUUID(k) for k in links]):
785+
# Multiple groups queried, links are returned under group ids
786+
for group_id in links:
787+
group_links = {}
788+
789+
for link in links[group_id]:
790+
group_links[link["title"]] = self.objectify_link_json(link)
791+
792+
links_out[group_id] = group_links
793+
794+
else:
795+
for link in links:
796+
links_out[link["title"]] = self.objectify_link_json(link)
797+
else:
798+
raise ValueError("Can't parse server response to links query")
799+
800+
return links_out
801+
729802
parent_uuid, link_json = self._get_link_json(name)
730803
typecode = link_json['class']
731804

@@ -740,7 +813,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
740813

741814
return ExternalLink(link_json['h5domain'], link_json['h5path'])
742815
elif typecode == 'H5L_TYPE_HARD':
743-
return HardLink if getclass else HardLink()
816+
return HardLink if getclass else HardLink(link_json['id'])
744817
else:
745818
raise TypeError("Unknown link type")
746819

@@ -1214,8 +1287,16 @@ class HardLink(object):
12141287
Represents a hard link in an HDF5 file. Provided only so that
12151288
Group.get works in a sensible way. Has no other function.
12161289
"""
1290+
@property
1291+
# The uuid of the target object
1292+
def id(self):
1293+
return self._id
12171294

1218-
pass
1295+
def __init__(self, id=None):
1296+
self._id = id
1297+
1298+
def __repr__(self):
1299+
return f'<HardLink to "{self.id}">'
12191300

12201301

12211302
# TODO: implement equality testing for these

h5pyd/_hl/httpconn.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,8 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
437437
check_cache = self._cache is not None and use_cache and format == "json"
438438
check_cache = check_cache and params["domain"] == self._domain
439439
check_cache = check_cache and "select" not in params and "query" not in params
440+
check_cache = check_cache and "follow_links" not in params and "pattern" not in params
441+
check_cache = check_cache and "Limit" not in params and "Marker" not in params
440442

441443
if check_cache:
442444
self.log.debug("httpcon - checking cache")
@@ -448,6 +450,7 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
448450
self.log.info(
449451
f"GET: {self._endpoint + req} [{params['domain']}] timeout: {self._timeout}"
450452
)
453+
451454
for k in params:
452455
if k != "domain":
453456
v = params[k]
@@ -462,6 +465,7 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
462465
stream = False
463466
else:
464467
stream = True
468+
465469
rsp = s.get(
466470
self._endpoint + req,
467471
params=params,
@@ -497,6 +501,8 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
497501

498502
add_to_cache = content_type and content_type.startswith("application/json")
499503
add_to_cache = add_to_cache and content_length < MAX_CACHE_ITEM_SIZE and not req.endswith("/value")
504+
add_to_cache = add_to_cache and "follow_links" not in params and "pattern" not in params
505+
add_to_cache = add_to_cache and "Limit" not in params and "Marker" not in params
500506

501507
if add_to_cache:
502508
# add to our _cache

test/hl/test_group.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,110 @@ def test_link_multi_create(self):
417417
self.assertEqual(link.path, links[i % num_links]._path)
418418
self.assertEqual(link.filename, links[i % num_links]._filename)
419419

420+
def test_link_get_multi(self):
421+
if config.get("use_h5py"):
422+
return
423+
424+
filename = self.getFileName("test_link_get_multi")
425+
print(filename)
426+
427+
f = h5py.File(filename, 'w')
428+
g1 = f.create_group("g1")
429+
430+
# Create subgroups
431+
g2 = g1.create_group("g2")
432+
g3 = g2.create_group("g3")
433+
434+
# Create links in each group
435+
436+
num_links = 20
437+
names = ["link" + str(i) for i in range(num_links)]
438+
439+
for name in names:
440+
g1[name] = g1
441+
g2[name] = g2
442+
g3[name] = g3
443+
444+
# Get all links from g1 only
445+
links_out = g1.get(None, getlink=True)
446+
447+
self.assertEqual(len(links_out), num_links + 1)
448+
449+
for name in names:
450+
self.assertTrue(name in links_out)
451+
link = links_out[name]
452+
self.assertEqual(link.id, g1.id.uuid)
453+
454+
# Get all links from g1 and subgroups
455+
links_out = g1.get(None, getlink=True, follow_links=True)
456+
457+
# 3 groups containing links
458+
self.assertEqual(len(links_out), 3)
459+
460+
for group_id in [g1.id.uuid, g2.id.uuid, g3.id.uuid]:
461+
self.assertTrue(group_id in links_out)
462+
links = links_out[group_id]
463+
464+
if group_id == g3.id.uuid:
465+
self.assertEqual(len(links), num_links)
466+
else:
467+
self.assertEqual(len(links), num_links + 1)
468+
469+
for name in names:
470+
self.assertTrue(name in links)
471+
link = links[name]
472+
self.assertEqual(link.id, group_id)
473+
474+
# Make sure cache does not erroneously return recursive links
475+
links_out = g1.get(None, getlink=True)
476+
self.assertEqual(len(links_out), num_links + 1)
477+
478+
# Return only 5 links from group
479+
480+
links_out = g1.get(None, getlink=True, limit=5)
481+
self.assertEqual(len(links_out), 5)
482+
483+
self.assertTrue("g2" in links_out)
484+
for name in sorted(names)[0:4]:
485+
self.assertTrue(name in links_out)
486+
link = links_out[name]
487+
self.assertEqual(link.id, g1.id.uuid)
488+
489+
# Return next 5 links via marker
490+
links_out = g1.get(None, getlink=True, limit=5, marker=sorted(names)[3])
491+
492+
self.assertEqual(len(links_out), 5)
493+
494+
for name in sorted(names)[4:9]:
495+
self.assertTrue(name in links_out)
496+
link = links_out[name]
497+
self.assertEqual(link.id, g1.id.uuid)
498+
499+
# Return all links in g1 besides g2
500+
links_out = g1.get(None, getlink=True, pattern="link*")
501+
self.assertEqual(len(links_out), 20)
502+
503+
for name in names:
504+
if name.startswith("link1"):
505+
self.assertTrue(name in links_out)
506+
link = links_out[name]
507+
self.assertEqual(link.id, g1.id.uuid)
508+
509+
# Return all links in g1/g2/g3 except for the group links
510+
links_out = g1.get(None, getlink=True, follow_links=True, pattern="link*")
511+
self.assertEqual(len(links_out), 3)
512+
513+
for group_id in [g1.id.uuid, g2.id.uuid, g3.id.uuid]:
514+
self.assertTrue(group_id in links_out)
515+
links = links_out[group_id]
516+
517+
self.assertEqual(len(links), num_links)
518+
519+
for name in names:
520+
self.assertTrue(name in links)
521+
link = links[name]
522+
self.assertEqual(link.id, group_id)
523+
420524

421525
class TestTrackOrder(TestCase):
422526

0 commit comments

Comments
 (0)