Skip to content

Commit fdc727b

Browse files
feat: nebula graph add time label (#1383)
Co-authored-by: Wendong <[email protected]> Co-authored-by: Wendong-Fan <[email protected]>
1 parent 3ea2b05 commit fdc727b

File tree

2 files changed

+165
-39
lines changed

2 files changed

+165
-39
lines changed

camel/storages/graph_storages/nebula_graph.py

Lines changed: 89 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import logging
1616
import re
1717
import time
18-
from typing import TYPE_CHECKING, Any, Dict, List, Tuple
18+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
1919

2020
from camel.storages.graph_storages.base import BaseGraphStorage
2121
from camel.storages.graph_storages.graph_element import (
@@ -203,46 +203,62 @@ def add_graph_elements(
203203
def ensure_edge_type_exists(
204204
self,
205205
edge_type: str,
206+
time_label: Optional[str] = None,
206207
) -> None:
207208
r"""Ensures that a specified edge type exists in the NebulaGraph
208209
database. If the edge type already exists, this method does nothing.
209210
210211
Args:
211212
edge_type (str): The name of the edge type to be created.
213+
time_label (str, optional): A specific timestamp to set as the
214+
default value for the time label property. If not
215+
provided, no timestamp will be added. (default: :obj:`None`)
212216
213217
Raises:
214218
Exception: If the edge type creation fails after multiple retry
215219
attempts, an exception is raised with the error message.
216220
"""
217-
create_edge_stmt = f'CREATE EDGE IF NOT EXISTS {edge_type}()'
221+
create_edge_stmt = f"CREATE EDGE IF NOT EXISTS {edge_type} ()"
222+
if time_label is not None:
223+
time_label = self._validate_time_label(time_label)
224+
create_edge_stmt = f"""CREATE EDGE IF NOT EXISTS {edge_type}
225+
(time_label DATETIME DEFAULT {time_label})"""
218226

219227
for attempt in range(MAX_RETRIES):
220228
res = self.query(create_edge_stmt)
221229
if res.is_succeeded():
222-
return # Tag creation succeeded, exit the method
230+
return # Edge type creation succeeded
223231

224232
if attempt < MAX_RETRIES - 1:
225233
time.sleep(RETRY_DELAY)
226234
else:
227235
# Final attempt failed, raise an exception
228236
raise Exception(
229-
f"Failed to create tag `{edge_type}` after "
237+
f"Failed to create edge type `{edge_type}` after "
230238
f"{MAX_RETRIES} attempts: {res.error_msg()}"
231239
)
232240

233-
def ensure_tag_exists(self, tag_name: str) -> None:
241+
def ensure_tag_exists(
242+
self, tag_name: str, time_label: Optional[str] = None
243+
) -> None:
234244
r"""Ensures a tag is created in the NebulaGraph database. If the tag
235245
already exists, it does nothing.
236246
237247
Args:
238248
tag_name (str): The name of the tag to be created.
249+
time_label (str, optional): A specific timestamp to set as the
250+
default value for the time label property. If not provided,
251+
no timestamp will be added. (default: :obj:`None`)
239252
240253
Raises:
241254
Exception: If the tag creation fails after retries, an exception
242255
is raised with the error message.
243256
"""
244-
245-
create_tag_stmt = f'CREATE TAG IF NOT EXISTS {tag_name}()'
257+
create_tag_stmt = f"CREATE TAG IF NOT EXISTS {tag_name} ()"
258+
if time_label is not None:
259+
time_label = self._validate_time_label(time_label)
260+
create_tag_stmt = f"""CREATE TAG IF NOT EXISTS {tag_name}
261+
(time_label DATETIME DEFAULT {time_label})"""
246262

247263
for attempt in range(MAX_RETRIES):
248264
res = self.query(create_tag_stmt)
@@ -262,27 +278,39 @@ def add_node(
262278
self,
263279
node_id: str,
264280
tag_name: str,
281+
time_label: Optional[str] = None,
265282
) -> None:
266283
r"""Add a node with the specified tag and properties.
267284
268285
Args:
269286
node_id (str): The ID of the node.
270287
tag_name (str): The tag name of the node.
288+
time_label (str, optional): A specific timestamp to set for
289+
the node's time label property. If not provided, no timestamp
290+
will be added. (default: :obj:`None`)
271291
"""
272292
node_id = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fa5]', '', node_id)
273293
tag_name = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fa5]', '', tag_name)
274294

275-
self.ensure_tag_exists(tag_name)
295+
self.ensure_tag_exists(tag_name, time_label)
276296

277-
# Insert node without properties
278-
insert_stmt = (
279-
f'INSERT VERTEX IF NOT EXISTS {tag_name}() VALUES "{node_id}":()'
280-
)
297+
# Insert node with or without time_label property
298+
if time_label is not None:
299+
time_label = self._validate_time_label(time_label)
300+
insert_stmt = (
301+
f'INSERT VERTEX IF NOT EXISTS {tag_name}(time_label) VALUES '
302+
f'"{node_id}":("{time_label}")'
303+
)
304+
else:
305+
insert_stmt = (
306+
f'INSERT VERTEX IF NOT EXISTS {tag_name}() VALUES '
307+
f'"{node_id}":()'
308+
)
281309

282310
for attempt in range(MAX_RETRIES):
283311
res = self.query(insert_stmt)
284312
if res.is_succeeded():
285-
return # Tag creation succeeded, exit the method
313+
return # Node creation succeeded, exit the method
286314

287315
if attempt < MAX_RETRIES - 1:
288316
time.sleep(RETRY_DELAY)
@@ -348,7 +376,7 @@ def refresh_schema(self) -> None:
348376
@property
349377
def get_structured_schema(self) -> Dict[str, Any]:
350378
r"""Generates a structured schema consisting of node and relationship
351-
properties, relationships, and metadata.
379+
properties, relationships, and metadata, including timestamps.
352380
353381
Returns:
354382
Dict[str, Any]: A dictionary representing the structured schema.
@@ -419,6 +447,7 @@ def add_triplet(
419447
subj: str,
420448
obj: str,
421449
rel: str,
450+
time_label: Optional[str] = None,
422451
) -> None:
423452
r"""Adds a relationship (triplet) between two entities in the Nebula
424453
Graph database.
@@ -427,28 +456,44 @@ def add_triplet(
427456
subj (str): The identifier for the subject entity.
428457
obj (str): The identifier for the object entity.
429458
rel (str): The relationship between the subject and object.
459+
time_label (str, optional): A specific timestamp to set for the
460+
time label property of the relationship. If not provided,
461+
no timestamp will be added. (default: :obj:`None`)
462+
463+
Raises:
464+
ValueError: If the time_label format is invalid.
465+
Exception: If creating the relationship fails.
430466
"""
431467
subj = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fa5]', '', subj)
432468
obj = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fa5]', '', obj)
433469
rel = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fa5]', '', rel)
434470

435471
self.ensure_tag_exists(subj)
436472
self.ensure_tag_exists(obj)
437-
self.ensure_edge_type_exists(rel)
473+
self.ensure_edge_type_exists(rel, time_label)
438474
self.add_node(node_id=subj, tag_name=subj)
439475
self.add_node(node_id=obj, tag_name=obj)
440476

441-
# Avoid latenicy
477+
# Avoid latency
442478
time.sleep(1)
443479

444-
insert_stmt = (
445-
f'INSERT EDGE IF NOT EXISTS {rel}() VALUES "{subj}"->"{obj}":();'
446-
)
480+
# Create edge with or without time_label property
481+
if time_label is not None:
482+
time_label = self._validate_time_label(time_label)
483+
insert_stmt = (
484+
f'INSERT EDGE IF NOT EXISTS {rel}(time_label) VALUES '
485+
f'"{subj}"->"{obj}":("{time_label}")'
486+
)
487+
else:
488+
insert_stmt = (
489+
f'INSERT EDGE IF NOT EXISTS {rel}() VALUES '
490+
f'"{subj}"->"{obj}":()'
491+
)
447492

448493
res = self.query(insert_stmt)
449494
if not res.is_succeeded():
450495
raise Exception(
451-
f'create relationship `]{subj}` -> `{obj}`'
496+
f'create relationship `{subj}` -> `{obj}`'
452497
+ f'failed: {res.error_msg()}'
453498
)
454499

@@ -568,3 +613,27 @@ def get_relationship_properties(
568613
)
569614

570615
return rel_schema_props, rel_structure_props
616+
617+
def _validate_time_label(self, time_label: str) -> str:
618+
r"""Validates the format of a time label string.
619+
620+
Args:
621+
time_label (str): The time label string to validate.
622+
Should be in format 'YYYY-MM-DDThh:mm:ss'.
623+
624+
Returns:
625+
str: The validated time label.
626+
627+
Raises:
628+
ValueError: If the time label format is invalid.
629+
"""
630+
try:
631+
# Check if the format matches YYYY-MM-DDThh:mm:ss
632+
pattern = r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$'
633+
if not re.match(pattern, time_label):
634+
raise ValueError(
635+
"Time label must be in format 'YYYY-MM-DDThh:mm:ss'"
636+
)
637+
return time_label
638+
except Exception as e:
639+
raise ValueError(f"Invalid time label format: {e!s}")

test/storages/graph_storages/test_nebula_graph.py

Lines changed: 76 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# limitations under the License.
1313
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1414
import unittest
15-
from unittest.mock import Mock, patch
15+
from unittest.mock import Mock, call, patch
1616

1717
from unstructured.documents.elements import Element
1818

@@ -106,7 +106,7 @@ def test_add_node(self):
106106

107107
self.graph.add_node(node_id, tag_name)
108108

109-
self.graph.ensure_tag_exists.assert_called_with(tag_name)
109+
self.graph.ensure_tag_exists.assert_has_calls([call(tag_name, None)])
110110
insert_stmt = (
111111
f'INSERT VERTEX IF NOT EXISTS {tag_name}() VALUES "{node_id}":()'
112112
)
@@ -121,7 +121,7 @@ def test_ensure_tag_exists_success(self):
121121

122122
self.graph.ensure_tag_exists(tag_name)
123123

124-
create_tag_stmt = f'CREATE TAG IF NOT EXISTS {tag_name}()'
124+
create_tag_stmt = f'CREATE TAG IF NOT EXISTS {tag_name} ()'
125125
self.graph.query.assert_called_with(create_tag_stmt)
126126

127127
@patch('time.sleep', return_value=None)
@@ -153,13 +153,12 @@ def test_add_triplet(self):
153153

154154
self.graph.add_triplet(subj, obj, rel)
155155

156-
self.graph.ensure_tag_exists.assert_any_call(subj)
157-
self.graph.ensure_tag_exists.assert_any_call(obj)
158-
self.graph.ensure_edge_type_exists.assert_called_with(rel)
156+
self.graph.ensure_tag_exists.assert_has_calls([call(subj), call(obj)])
157+
self.graph.ensure_edge_type_exists.assert_has_calls([call(rel, None)])
159158
self.graph.add_node.assert_any_call(node_id=subj, tag_name=subj)
160159
self.graph.add_node.assert_any_call(node_id=obj, tag_name=obj)
161160
insert_stmt = (
162-
f'INSERT EDGE IF NOT EXISTS {rel}() VALUES "{subj}"->"{obj}":();'
161+
f'INSERT EDGE IF NOT EXISTS {rel}() VALUES "{subj}"->"{obj}":()'
163162
)
164163
self.graph.query.assert_called_with(insert_stmt)
165164

@@ -401,24 +400,24 @@ def test_get_schema(self):
401400
def test_get_structured_schema(self):
402401
self.graph.get_node_properties = Mock(
403402
return_value=(
404-
['Node.prop'],
405-
[{'labels': 'Node', 'properties': ['prop']}],
403+
['Person.name', 'Person.age'],
404+
[{'labels': 'Person', 'properties': ['name', 'age']}],
406405
)
407406
)
408407
self.graph.get_relationship_properties = Mock(
409408
return_value=(
410-
['Rel.prop'],
411-
[{'type': 'Rel', 'properties': ['prop']}],
409+
['KNOWS.since'],
410+
[{'type': 'KNOWS', 'properties': ['since']}],
412411
)
413412
)
414-
self.graph.get_relationship_types = Mock(return_value=['RELATES_TO'])
415-
self.graph.get_indexes = Mock(return_value=['index1'])
413+
self.graph.get_relationship_types = Mock(return_value=['KNOWS'])
414+
self.graph.get_indexes = Mock(return_value=[])
416415
structured_schema = self.graph.get_structured_schema
417416
expected_schema = {
418-
"node_props": {'Node': ['prop']},
419-
"rel_props": {'Rel': ['prop']},
420-
"relationships": ['RELATES_TO'],
421-
"metadata": {"index": ['index1']},
417+
"node_props": {"Person": ["name", "age"]},
418+
"rel_props": {"KNOWS": ["since"]},
419+
"relationships": ["KNOWS"],
420+
"metadata": {"index": []},
422421
}
423422
self.assertEqual(structured_schema, expected_schema)
424423

@@ -465,6 +464,64 @@ def test_add_graph_elements(self):
465464
'node1', 'node2', 'RELATES_TO'
466465
)
467466

467+
def test_validate_time_label_valid(self):
468+
valid_time = "2024-12-31T21:45:22"
469+
result = self.graph._validate_time_label(valid_time)
470+
self.assertEqual(result, valid_time)
471+
472+
def test_validate_time_label_none(self):
473+
with self.assertRaises(ValueError):
474+
self.graph._validate_time_label(None)
475+
476+
def test_add_triplet_with_time_label(self):
477+
subj = 'node1'
478+
obj = 'node2'
479+
rel = 'RELATESTO'
480+
time_label = '2024-12-31T21:45:22'
481+
482+
self.graph.ensure_tag_exists = Mock()
483+
self.graph.ensure_edge_type_exists = Mock()
484+
self.graph.add_node = Mock()
485+
mock_result = Mock()
486+
mock_result.is_succeeded.return_value = True
487+
self.graph.query = Mock(return_value=mock_result)
488+
489+
self.graph.add_triplet(subj, obj, rel, time_label)
490+
491+
self.graph.ensure_tag_exists.assert_has_calls(
492+
[call('node1'), call('node2')]
493+
)
494+
self.graph.ensure_edge_type_exists.assert_called_with(rel, time_label)
495+
self.graph.add_node.assert_any_call(node_id=subj, tag_name=subj)
496+
self.graph.add_node.assert_any_call(node_id=obj, tag_name=obj)
497+
498+
expected_stmt = (
499+
f'INSERT EDGE IF NOT EXISTS {rel}(time_label) VALUES '
500+
f'"{subj}"->"{obj}":("{time_label}")'
501+
)
502+
self.graph.query.assert_called_with(expected_stmt)
503+
504+
def test_add_triplet_with_invalid_time_label(self):
505+
subj = 'node1'
506+
obj = 'node2'
507+
rel = 'RELATESTO'
508+
invalid_time = '2024/12/31 21:45:22' # wrong format
509+
510+
with self.assertRaises(ValueError) as context:
511+
self.graph.add_triplet(subj, obj, rel, invalid_time)
512+
513+
self.assertIn("Invalid time label format", str(context.exception))
514+
515+
def test_ensure_tag_exists_with_time_label(self):
516+
tag_name = 'Tag1'
517+
time_label = '2024-12-31T21:45:22'
518+
519+
mock_result = Mock()
520+
mock_result.is_succeeded.return_value = True
521+
self.graph.query = Mock(return_value=mock_result)
522+
523+
self.graph.ensure_tag_exists(tag_name, time_label)
468524

469-
if __name__ == '__main__':
470-
unittest.main()
525+
expected_stmt = f"""CREATE TAG IF NOT EXISTS {tag_name}
526+
(time_label DATETIME DEFAULT {time_label})"""
527+
self.graph.query.assert_called_with(expected_stmt)

0 commit comments

Comments
 (0)