Skip to content

Commit d1da062

Browse files
authored
feat(torque): add 6 new query methods for graph traversal (#57)
* feat(torque): add 6 new query methods for graph traversal New methods: - both(predicates): traverse edges in both directions - is_(*nodes): filter to specific nodes - unique(): remove duplicate vertices from results - limit(n): limit results to first N vertices - skip(n): skip first N vertices in results - back(tag): return to previously tagged position Includes 18 new tests and README documentation * adding discord link * chore: bump version to 3.2.0
1 parent f27da38 commit d1da062

File tree

4 files changed

+353
-9
lines changed

4 files changed

+353
-9
lines changed

README.md

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
![](https://static.pepy.tech/badge/cogdb) [![PyPI version](https://badge.fury.io/py/cogdb.svg)](https://badge.fury.io/py/cogdb) ![Python 3.8](https://img.shields.io/badge/python-3.8+-blue.svg)
2-
[![Build Status](https://travis-ci.org/arun1729/cog.svg?branch=master)](https://travis-ci.org/arun1729/cog) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![codecov](https://codecov.io/gh/arun1729/cog/branch/master/graph/badge.svg)](https://codecov.io/gh/arun1729/cog)
2+
[![Build Status](https://travis-ci.org/arun1729/cog.svg?branch=master)](https://travis-ci.org/arun1729/cog) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![codecov](https://codecov.io/gh/arun1729/cog/branch/master/graph/badge.svg)](https://codecov.io/gh/arun1729/cog) [![Discord](https://img.shields.io/badge/Discord-Join%20Server-7289da?logo=discord&logoColor=white)](https://discord.gg/nqNpNGfjts)
33

44
# ![logo](cog-logo.png)
55
# CogDB - Micro Graph Database for Python Applications
66
> Documents and examples at [cogdb.io](https://cogdb.io)
77
8-
> New release: 3.1.0
9-
> - **Batch insert mode** for significantly faster bulk graph loading
10-
> - New `put_batch()` method for efficient triple insertion
11-
> - Performance improvements: up to 1.6x faster inserts at scale
12-
> - New word embeddings API
13-
> - Similarity filtering using word embeddings
14-
> - Filter step
8+
> New release: 3.2.0
9+
> - New Torque query methods: `both()`, `is_()`, `unique()`, `limit()`, `skip()`, `back()`
10+
> - Bidirectional traversal and pagination support
11+
> - Navigate back to tagged vertices
1512
1613
![ScreenShot](notes/ex2.png)
1714

@@ -182,6 +179,52 @@ g.v("emily").out("follows").filter(func=lambda x: x.startswith("f")).all()
182179
```
183180
> {'result': [{'id': 'fred'}]}
184181
182+
#### Bidirectional Traversal
183+
184+
Follow edges in both directions (outgoing and incoming):
185+
```python
186+
g.v("bob").both("follows").all()
187+
```
188+
> {'result': [{'id': 'fred'}, {'id': 'alice'}, {'id': 'charlie'}, {'id': 'dani'}]}
189+
190+
#### Filter to Specific Nodes
191+
192+
Filter results to only include specific vertices:
193+
```python
194+
g.v("alice").out("follows").is_("bob", "dani").all()
195+
```
196+
> {'result': [{'id': 'bob'}, {'id': 'dani'}]}
197+
198+
#### Remove Duplicates
199+
200+
Remove duplicate vertices from results:
201+
```python
202+
g.v().out("follows").unique().all()
203+
```
204+
> {'result': [{'id': 'bob'}, {'id': 'fred'}, {'id': 'greg'}, {'id': 'dani'}]}
205+
206+
#### Pagination with Limit and Skip
207+
208+
Limit results to first N vertices:
209+
```python
210+
g.v().limit(3).all()
211+
```
212+
> {'result': [{'id': 'alice'}, {'id': 'bob'}, {'id': 'charlie'}]}
213+
214+
Skip first N vertices:
215+
```python
216+
g.v().skip(2).limit(2).all()
217+
```
218+
> {'result': [{'id': 'charlie'}, {'id': 'dani'}]}
219+
220+
#### Navigate Back to Tagged Vertex
221+
222+
Return to a previously tagged position while preserving the traversal path:
223+
```python
224+
g.v("alice").tag("start").out("follows").out("follows").back("start").all()
225+
```
226+
> {'result': [{'start': 'alice', 'id': 'alice'}]}
227+
185228

186229
#### json example
187230

cog/torque.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,114 @@ def filter(self, func):
450450
self.last_visited_vertices = [v for v in self.last_visited_vertices if func(v.id)]
451451
return self
452452

453+
def both(self, predicates=None):
454+
'''
455+
Traverse edges in both directions (out + in).
456+
:param predicates: A string or list of predicate strings to follow.
457+
:return: self for method chaining.
458+
'''
459+
if predicates is not None:
460+
if not isinstance(predicates, list):
461+
predicates = [predicates]
462+
predicates = list(map(hash_predicate, predicates))
463+
else:
464+
predicates = self.all_predicates
465+
466+
self.cog.use_namespace(self.graph_name)
467+
traverse_vertex = []
468+
469+
for predicate in predicates:
470+
for v in self.last_visited_vertices:
471+
# Outgoing edges
472+
out_record = self.cog.use_table(predicate).get(out_nodes(v.id))
473+
if out_record is not None:
474+
if out_record.value_type == "s":
475+
v_adj = Vertex(str(out_record.value)).set_edge(predicate)
476+
v_adj.tags.update(v.tags)
477+
traverse_vertex.append(v_adj)
478+
elif out_record.value_type == "l":
479+
for v_adjacent in out_record.value:
480+
v_adj = Vertex(v_adjacent).set_edge(predicate)
481+
v_adj.tags.update(v.tags)
482+
traverse_vertex.append(v_adj)
483+
484+
# Incoming edges
485+
in_record = self.cog.use_table(predicate).get(in_nodes(v.id))
486+
if in_record is not None:
487+
if in_record.value_type == "s":
488+
v_adj = Vertex(str(in_record.value)).set_edge(predicate)
489+
v_adj.tags.update(v.tags)
490+
traverse_vertex.append(v_adj)
491+
elif in_record.value_type == "l":
492+
for v_adjacent in in_record.value:
493+
v_adj = Vertex(v_adjacent).set_edge(predicate)
494+
v_adj.tags.update(v.tags)
495+
traverse_vertex.append(v_adj)
496+
497+
self.last_visited_vertices = traverse_vertex
498+
return self
499+
500+
def is_(self, *nodes):
501+
'''
502+
Filter paths to only those currently at the specified node(s).
503+
:param nodes: One or more node IDs to filter to.
504+
:return: self for method chaining.
505+
'''
506+
if len(nodes) == 1 and isinstance(nodes[0], list):
507+
node_set = set(nodes[0])
508+
else:
509+
node_set = set(nodes)
510+
self.last_visited_vertices = [v for v in self.last_visited_vertices if v.id in node_set]
511+
return self
512+
513+
def unique(self):
514+
'''
515+
Remove duplicate vertices from the result set.
516+
:return: self for method chaining.
517+
'''
518+
seen = set()
519+
unique_vertices = []
520+
for v in self.last_visited_vertices:
521+
if v.id not in seen:
522+
seen.add(v.id)
523+
unique_vertices.append(v)
524+
self.last_visited_vertices = unique_vertices
525+
return self
526+
527+
def limit(self, n):
528+
'''
529+
Limit results to the first N vertices.
530+
:param n: Maximum number of vertices to return.
531+
:return: self for method chaining.
532+
'''
533+
self.last_visited_vertices = self.last_visited_vertices[:n]
534+
return self
535+
536+
def skip(self, n):
537+
'''
538+
Skip the first N vertices in the result set.
539+
:param n: Number of vertices to skip.
540+
:return: self for method chaining.
541+
'''
542+
self.last_visited_vertices = self.last_visited_vertices[n:]
543+
return self
544+
545+
def back(self, tag):
546+
'''
547+
Return to vertices saved at the given tag, preserving all constraints.
548+
:param tag: A previous tag in the query to jump back to.
549+
:return: self for method chaining.
550+
'''
551+
vertices = []
552+
for v in self.last_visited_vertices:
553+
if tag in v.tags:
554+
tagged_vertex = Vertex(v.tags[tag])
555+
tagged_vertex.tags = v.tags.copy()
556+
tagged_vertex.edges = v.edges.copy()
557+
vertices.append(tagged_vertex)
558+
self.last_visited_vertices = vertices
559+
return self
560+
453561
def sim(self, word, operator, threshold, strict=False):
454562
"""
455563
Applies cosine similarity filter to the vertices and removes any vertices that do not pass the filter.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
setup(name='cogdb',
5-
version='3.1.0',
5+
version='3.2.0',
66
description='Persistent Embedded Graph Database',
77
url='http://github.com/arun1729/cog',
88
author='Arun Mahendra',

test/test_torque_extensions.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
from cog.torque import Graph
2+
import unittest
3+
import os
4+
import shutil
5+
6+
DIR_NAME = "TorqueExtensionsTest"
7+
8+
9+
def ordered(obj):
10+
if isinstance(obj, dict):
11+
return sorted((k, ordered(v)) for k, v in list(obj.items()))
12+
if isinstance(obj, list):
13+
return sorted(ordered(x) for x in obj)
14+
else:
15+
return obj
16+
17+
18+
class TorqueExtensionsTest(unittest.TestCase):
19+
"""
20+
Tests for new Torque traversal methods:
21+
- both(): bidirectional traversal
22+
- is_(): filter to specific nodes
23+
- unique(): remove duplicates
24+
- limit(): limit results
25+
- skip(): skip results
26+
- back(): return to tagged position
27+
"""
28+
maxDiff = None
29+
30+
@classmethod
31+
def setUpClass(cls):
32+
if os.path.exists("/tmp/" + DIR_NAME):
33+
shutil.rmtree("/tmp/" + DIR_NAME)
34+
os.mkdir("/tmp/" + DIR_NAME)
35+
36+
cls.g = Graph(graph_name="test_graph", cog_home=DIR_NAME)
37+
# Create a simple test graph:
38+
# alice -> bob -> charlie -> alice (cycle)
39+
# alice -> dani
40+
# bob has status "cool"
41+
cls.g.put("alice", "follows", "bob")
42+
cls.g.put("bob", "follows", "charlie")
43+
cls.g.put("charlie", "follows", "alice")
44+
cls.g.put("alice", "follows", "dani")
45+
cls.g.put("bob", "status", "cool")
46+
cls.g.put("dani", "status", "cool")
47+
print(">>> TorqueExtensionsTest setup complete.\n")
48+
49+
# =========== both() tests ===========
50+
51+
def test_both_follows_from_bob(self):
52+
"""both() should return vertices connected by edges in either direction."""
53+
result = self.g.v("bob").both("follows").all()
54+
ids = {r['id'] for r in result['result']}
55+
# bob follows charlie, alice follows bob
56+
self.assertIn("charlie", ids)
57+
self.assertIn("alice", ids)
58+
59+
def test_both_no_predicate(self):
60+
"""both() with no predicate should follow all edge types."""
61+
result = self.g.v("bob").both().all()
62+
ids = {r['id'] for r in result['result']}
63+
# bob -> charlie (follows), alice -> bob (follows), bob -> cool (status)
64+
self.assertIn("charlie", ids)
65+
self.assertIn("alice", ids)
66+
self.assertIn("cool", ids)
67+
68+
# =========== is_() tests ===========
69+
70+
def test_is_single_node(self):
71+
"""is_() should filter to only the specified node."""
72+
result = self.g.v("alice").out("follows").is_("bob").all()
73+
self.assertEqual(len(result['result']), 1)
74+
self.assertEqual(result['result'][0]['id'], "bob")
75+
76+
def test_is_multiple_nodes(self):
77+
"""is_() should accept multiple nodes."""
78+
result = self.g.v("alice").out("follows").is_("bob", "dani").all()
79+
ids = {r['id'] for r in result['result']}
80+
self.assertEqual(ids, {"bob", "dani"})
81+
82+
def test_is_no_match(self):
83+
"""is_() should return empty if no nodes match."""
84+
result = self.g.v("alice").out("follows").is_("nonexistent").all()
85+
self.assertEqual(result['result'], [])
86+
87+
def test_is_with_list(self):
88+
"""is_() should accept a list of nodes."""
89+
result = self.g.v("alice").out("follows").is_(["bob", "dani"]).all()
90+
ids = {r['id'] for r in result['result']}
91+
self.assertEqual(ids, {"bob", "dani"})
92+
93+
# =========== unique() tests ===========
94+
95+
def test_unique_removes_duplicates(self):
96+
"""unique() should remove duplicate vertices."""
97+
# Get all followers' statuses - "cool" appears twice (bob and dani)
98+
result_without_unique = self.g.v("alice").out("follows").out("status").all()
99+
result_with_unique = self.g.v("alice").out("follows").out("status").unique().all()
100+
101+
# Without unique, we should have duplicates
102+
ids_without = [r['id'] for r in result_without_unique['result']]
103+
self.assertEqual(ids_without.count("cool"), 2)
104+
105+
# With unique, no duplicates
106+
ids_with = [r['id'] for r in result_with_unique['result']]
107+
self.assertEqual(ids_with.count("cool"), 1)
108+
109+
def test_unique_preserves_order(self):
110+
"""unique() should preserve the order of first occurrence."""
111+
result = self.g.v().unique().all()
112+
# Should have vertices in order of first appearance
113+
self.assertTrue(len(result['result']) > 0)
114+
115+
# =========== limit() tests ===========
116+
117+
def test_limit_returns_n_results(self):
118+
"""limit() should return at most N vertices."""
119+
result = self.g.v().limit(2).all()
120+
self.assertEqual(len(result['result']), 2)
121+
122+
def test_limit_more_than_available(self):
123+
"""limit() with N larger than result set should return all."""
124+
all_result = self.g.v().all()
125+
limited_result = self.g.v().limit(1000).all()
126+
self.assertEqual(len(all_result['result']), len(limited_result['result']))
127+
128+
def test_limit_zero(self):
129+
"""limit(0) should return empty."""
130+
result = self.g.v().limit(0).all()
131+
self.assertEqual(result['result'], [])
132+
133+
# =========== skip() tests ===========
134+
135+
def test_skip_skips_n_results(self):
136+
"""skip() should skip the first N vertices."""
137+
all_result = self.g.v().all()
138+
skipped_result = self.g.v().skip(2).all()
139+
self.assertEqual(len(skipped_result['result']), len(all_result['result']) - 2)
140+
141+
def test_skip_more_than_available(self):
142+
"""skip() with N larger than result set should return empty."""
143+
result = self.g.v().skip(1000).all()
144+
self.assertEqual(result['result'], [])
145+
146+
def test_limit_and_skip_pagination(self):
147+
"""limit() and skip() together enable pagination."""
148+
all_result = self.g.v().all()
149+
page1 = self.g.v().limit(2).all()
150+
page2 = self.g.v().skip(2).limit(2).all()
151+
152+
# Pages should not overlap
153+
page1_ids = {r['id'] for r in page1['result']}
154+
page2_ids = {r['id'] for r in page2['result']}
155+
self.assertEqual(len(page1_ids & page2_ids), 0)
156+
157+
# =========== back() tests ===========
158+
159+
def test_back_returns_to_tagged_vertex(self):
160+
"""back() should return to the previously tagged vertex."""
161+
result = self.g.v("alice").tag("start").out("follows").back("start").all()
162+
# Should return to alice
163+
ids = {r['id'] for r in result['result']}
164+
self.assertEqual(ids, {"alice"})
165+
166+
def test_back_preserves_tags(self):
167+
"""back() should preserve existing tags."""
168+
result = self.g.v("alice").tag("origin").out("follows").tag("middle").back("origin").all()
169+
for r in result['result']:
170+
self.assertIn("origin", r)
171+
self.assertIn("middle", r)
172+
173+
def test_back_with_invalid_tag(self):
174+
"""back() with non-existent tag should return empty."""
175+
result = self.g.v("alice").out("follows").back("nonexistent").all()
176+
self.assertEqual(result['result'], [])
177+
178+
def test_back_after_filter(self):
179+
"""back() should work with filtered results."""
180+
result = self.g.v("alice").tag("start").out("follows").has("status", "cool").back("start").all()
181+
# Only bob has status cool, so we should get back to alice (who follows bob)
182+
ids = {r['id'] for r in result['result']}
183+
self.assertEqual(ids, {"alice"})
184+
185+
@classmethod
186+
def tearDownClass(cls):
187+
cls.g.close()
188+
shutil.rmtree("/tmp/" + DIR_NAME)
189+
print("*** TorqueExtensionsTest cleanup complete.")
190+
191+
192+
if __name__ == '__main__':
193+
unittest.main()

0 commit comments

Comments
 (0)