remove EntityPool and Error COT code

a-s-g93 · a-s-g93 · Oct 16, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024
commit 71cfdf2a88ac3b93ed2cfde91bf5e48fc6f2693c
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,8 +8,10 @@
 
 ### Changed
 
-* Change `DataModel` generation logic to first generate nodes, then generate relationships.
+* Change `DataModel` generation logic to first generate nodes, then generate relationships
 * Updated examples
+* Add `DataModel` validator to check for parallel relationships
+* Update `DataModel`, `Node`, `Relationship` and `Property` validation logic to better utilize Pydantic
 
 ### Added
 

diff --git a/neo4j_runway/llm/base.py b/neo4j_runway/llm/base.py
@@ -10,23 +10,18 @@
 from instructor.exceptions import InstructorRetryException
 from tenacity import Retrying, stop_after_attempt
 
-from ..inputs import UserInput
 from ..models import DataModel
 from ..models.core.node import Nodes
 from ..resources.llm_response_types import (
-    DataModelEntityPool,
     DiscoveryResponse,
     ErrorRecommendations,
 )
 from ..resources.prompts import (
     SYSTEM_PROMPTS,
 )
 from ..resources.prompts.data_modeling import (
-    create_data_model_iteration_prompt,
-    create_initial_data_model_cot_prompt,
     create_initial_data_model_prompt,
     create_initial_nodes_prompt,
-    create_retry_data_model_generation_prompt,
 )
 from .context import create_context
 
@@ -276,21 +271,3 @@ def _get_data_model_response(
             )
 
         return response
-
-    def _get_chain_of_thought_for_error_recommendations_response(
-        self, formatted_prompt: str
-    ) -> str:
-        """
-        Generate fixes for the previous data model.
-        """
-        print("Analyzing errors...")
-        response: ErrorRecommendations = self.client.chat.completions.create(
-            model=self.model_name,
-            messages=[
-                {"role": "system", "content": SYSTEM_PROMPTS["retry"]},
-                {"role": "user", "content": formatted_prompt},
-            ],
-            response_model=ErrorRecommendations,
-            **self.model_params,
-        )
-        return response.recommendations
diff --git a/neo4j_runway/models/core/data_model.py b/neo4j_runway/models/core/data_model.py
@@ -4,15 +4,14 @@
 
 import json
 from ast import literal_eval
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import yaml
 from graphviz import Digraph
 from pydantic import (
     BaseModel,
     ValidationError,
     ValidationInfo,
-    field_validator,
     model_validator,
 )
 from pydantic_core import InitErrorDetails, PydanticCustomError
@@ -21,7 +20,6 @@
     InvalidArrowsDataModelError,
     InvalidSolutionsWorkbenchDataModelError,
 )
-from ...resources.prompts.data_modeling import create_data_model_errors_cot_prompt
 from ..arrows import ArrowsDataModel, ArrowsNode, ArrowsRelationship
 from ..solutions_workbench import (
     SolutionsWorkbenchDataModel,
@@ -51,38 +49,6 @@ class DataModel(BaseModel):
     relationships: List[Relationship]
     metadata: Optional[Dict[str, Any]] = None
 
-    # def __init__(
-    #     self,
-    #     nodes: List[Node],
-    #     relationships: List[Relationship],
-    #     metadata: Optional[Dict[str, Any]] = None,
-    #     use_neo4j_naming_conventions: bool = True,
-    # ) -> None:
-    #     """
-    #     The standard Graph Data Model representation in Neo4j Runway.
-
-    #     Parameters
-    #     ----------
-    #     nodes : List[Node]
-    #         A list of the nodes in the data model.
-    #     relationships : List[Relationship]
-    #         A list of the relationships in the data model.
-    #     metadata: Optional[Dict[str, Any]]
-    #         Metadata from an import source such as Solutions Workbench, by default None
-    #     use_neo4j_naming_conventions : bool, optional
-    #         Whether to convert labels, relationships and properties to Neo4j naming conventions, by default True
-    #     """
-    #     super().__init__(
-    #         nodes=nodes,
-    #         relationships=relationships,
-    #         metadata=metadata,
-    #         use_neo4j_naming_conventions=True,
-    #     )
-
-    #     # default apply Neo4j naming conventions.
-    #     if use_neo4j_naming_conventions:
-    #         self.apply_neo4j_naming_conventions()
-
     @property
     def node_labels(self) -> List[str]:
         """

diff --git a/neo4j_runway/models/core/node.py b/neo4j_runway/models/core/node.py
@@ -428,16 +428,6 @@ def validate_nodes(cls, nodes: List[Node]) -> List[Node]:
     def advanced_validation(self, info: ValidationInfo) -> "Nodes":
         errors: List[InitErrorDetails] = list()
 
-        def _retrieve_duplicated_property_column_mapping(
-            context: Tuple[str, str, int, str, int, str],
-        ) -> str:
-            """Retrieve a `Property` in the data model that shares a `column_mapping` attribute."""
-
-            prop: Property = self.__getattribute__(context[1])[context[2]].properties[
-                context[4]
-            ]
-            return prop.column_mapping
-
         def _parse_duplicated_property_location(
             context: Tuple[str, str, int, str, int, str],
         ) -> Tuple[str, int, str, int, str]:

diff --git a/neo4j_runway/resources/llm_response_types/error_recommendations.py b/neo4j_runway/resources/llm_response_types/error_recommendations.py
diff --git a/neo4j_runway/resources/llm_response_types/initial_model_pool.py b/neo4j_runway/resources/llm_response_types/initial_model_pool.py
diff --git a/neo4j_runway/resources/prompts/data_modeling/__init__.py b/neo4j_runway/resources/prompts/data_modeling/__init__.py
@@ -1,20 +1,10 @@
-from .data_model_error_handling import (
-    create_data_model_errors_cot_prompt,
-    create_retry_data_model_generation_prompt,
-    create_retry_initial_data_model_prep_generation_prompt,
-)
 from .initial_data_model import (
-    create_initial_data_model_cot_prompt,
     create_initial_data_model_prompt,
     create_initial_nodes_prompt,
 )
 from .iterative_data_model import create_data_model_iteration_prompt
 
 __all__ = [
-    "create_retry_initial_data_model_prep_generation_prompt",
-    "create_data_model_errors_cot_prompt",
-    "create_retry_data_model_generation_prompt",
-    "create_initial_data_model_cot_prompt",
     "create_initial_data_model_prompt",
     "create_data_model_iteration_prompt",
     "create_initial_nodes_prompt",

diff --git a/neo4j_runway/resources/prompts/data_modeling/constants.py b/neo4j_runway/resources/prompts/data_modeling/constants.py
@@ -78,22 +78,6 @@
 }
 """
 
-ENTITY_POOL_GENERATION_RULES = """Based upon the above information and of high-quality graph data models,
-return the following:
-* Any possible Nodes and their respective properties
-* Any possible Relationships and their respective source Nodes and target Nodes
-* Relationships and their respective properties, if any
-* Explanations for each decision and how it will benefit the data model
-* All possible relationships for nodes
-* `source_name` for all nodes and relationships
-
-Remember
-* All properties must be found in the data dictionary above!
-* A property may have an alias in another file as a foreign key.
-* A node may not have properties from multiple files!
-* A relationship may not have properties from multiple files!
-* Find properties that may uniquely identify Nodes"""
-
 NODE_GENERATION_RULES = """Please follow these rules strictly! Billions of dollars depend on you.
 Nodes
 * Each node must have a unique property or node key pair

diff --git a/neo4j_runway/resources/prompts/data_modeling/data_model_error_handling.py b/neo4j_runway/resources/prompts/data_modeling/data_model_error_handling.py
diff --git a/neo4j_runway/resources/prompts/data_modeling/initial_data_model.py b/neo4j_runway/resources/prompts/data_modeling/initial_data_model.py
@@ -1,45 +1,14 @@
 from typing import Any, Dict, Optional
 
-from ...llm_response_types.initial_model_pool import DataModelEntityPool
 from .constants import (
     DATA_MODEL_FORMAT,
-    ENTITY_POOL_GENERATION_RULES,
     NODE_GENERATION_RULES,
     NODES_FORMAT,
 )
 from .formatters import get_rules
 from .template import create_data_modeling_prompt
 
 
-def create_initial_data_model_cot_prompt(
-    discovery_text: str,
-    multifile: bool,
-    use_cases: Optional[str],
-    valid_columns: Dict[str, Any],
-    data_dictionary: Optional[Dict[str, Any]] = None,
-) -> str:
-    """
-    Generate a prompt to find nodes, relationships and properties to include in a data model.
-    This is only for brainstorming, result of prompt should not be a DataModel.
-
-    Returns
-    -------
-    str
-        The prompt.
-    """
-    prefix = "Please generate a pool of entities that will be used to construct a graph data model."
-
-    return create_data_modeling_prompt(
-        prefix=prefix,
-        discovery=discovery_text,
-        multifile=multifile,
-        use_cases=use_cases,
-        valid_columns=valid_columns,
-        data_dictionary=data_dictionary,
-        rules=ENTITY_POOL_GENERATION_RULES,
-    )
-
-
 def create_initial_nodes_prompt(
     discovery_text: str,
     multifile: bool,
@@ -88,7 +57,6 @@ def create_initial_data_model_prompt(
     return create_data_modeling_prompt(
         prefix=prefix,
         discovery=discovery_text,
-        entity_pool=data_model_recommendations,
         valid_columns=valid_columns,
         data_dictionary=data_dictionary,
         use_cases=use_cases,

diff --git a/neo4j_runway/resources/prompts/data_modeling/template.py b/neo4j_runway/resources/prompts/data_modeling/template.py
@@ -31,7 +31,6 @@ def create_data_modeling_prompt(
     errors: Optional[List[str]] = None,
     corrections: Optional[str] = None,
     data_model: Optional["DataModel"] = None,  # type: ignore
-    entity_pool: Optional["DataModelEntityPool"] = None,  # type: ignore
     nodes: Optional["Nodes"] = None,  # type: ignore
     use_cases: Optional[str] = None,
     data_model_format: Optional[str] = None,
@@ -52,8 +51,6 @@ def create_data_modeling_prompt(
         res += format_corrections(corrections=corrections)
     if data_model is not None:
         res += format_data_model(data_model=data_model, yaml_format=data_model_as_yaml)
-    if entity_pool is not None:
-        res += format_entity_pool(entity_pool=entity_pool, retry_prompt=retry_prompt)
     if nodes is not None:
         res += format_nodes(nodes=nodes, retry_prompt=retry_prompt)
     if use_cases is not None: