Merge branch 'ml4ai:main' into main

DARPA-ASKEM · Jan 24, 2024 · 4d5815a · 4d5815a
2 parents cb4c9c0 + 4605924
commit 4d5815a
Show file tree

Hide file tree

Showing 73 changed files with 4,912 additions and 2,649 deletions.
diff --git a/.github/workflows/tests-and-docs.yml b/.github/workflows/tests-and-docs.yml
@@ -86,7 +86,7 @@ jobs:
  # Install tree-sitter parser (for Python component unit tests)
  - name: Install tree-sitter parsers
  working-directory: .
- run: python skema/program_analysis/tree_sitter_parsers/build_parsers.py --all
+ run: python skema/program_analysis/tree_sitter_parsers/build_parsers.py --ci --all
 
 
  # docs (API)

diff --git a/docs/dev/cast_frontend.md b/docs/dev/cast_frontend.md
@@ -0,0 +1,9 @@
+## CAST FrontEnd Generation Notes
+### Using Var vs Name nodes
+Currently in the CAST generation we have a convention on when to use Var and Name nodes.
+The GroMEt generation depends on these being conistent, otherwise there will be errors in the generation.
+In the future this convention might change, or be eliminated altogether, but for now this is the current set of rules.
+
+- If the variable in question is being stored into (i.e. as the result of an assignment), then we use Var. Even if it's a variable that has already been defined.
+- If the variable in question is being read from (i.e. being used in an expression), then we use Name.
+- Whenever we're creating a function call Call() node, the name of the function is specified using the Name node.
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -45,6 +45,7 @@ nav:
  - Generating code2fn model coverage reports: "dev/generating_code2fn_model_coverage.md"
  - Using code ingestion frontends: "dev/using_code_ingestion_frontends.md"
  - Using tree-sitter preprocessor: "dev/using_tree_sitter_preprocessor.md"
+ - CAST Front-end generation: "dev/cast_frontend.md"
  - Coverage:
  - Code2fn coverage reports: "coverage/code2fn_coverage/report.html"
  - TA1 Integration Dashboard: "https://integration-dashboard.terarium.ai/TA1"

diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,7 @@ dependencies=[
  "typing_extensions", # see https://github.com/pydantic/pydantic/issues/5821#issuecomment-1559196859
  "fastapi~=0.100.0",
  "starlette",
+ "httpx",
  "pydantic>=2.0.0",
  "uvicorn",
  "python-multipart",
@@ -42,7 +43,7 @@ dynamic = ["readme"]
 # Pygraphviz is often tricky to install, so we reserve it for the dev extras
 # list.
 # - six: Required by auto-generated Swagger models
-dev = ["pytest", "pytest-cov", "pytest-xdist", "httpx", "black", "mypy", "coverage", "pygraphviz", "six"]
+dev = ["pytest", "pytest-cov", "pytest-xdist", "pytest-asyncio", "black", "mypy", "coverage", "pygraphviz", "six"]
 
 demo = ["notebook"]
 

diff --git a/skema/gromet/fn/gromet_fn_module.py b/skema/gromet/fn/gromet_fn_module.py
@@ -191,7 +191,7 @@ def fn_array(self, fn_array):
  def metadata_collection(self):
  """Gets the metadata_collection of this GrometFNModule. # noqa: E501
 
- Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GroMEt object. # noqa: E501
+ Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GrometFNModule object. # noqa: E501
 
  :return: The metadata_collection of this GrometFNModule. # noqa: E501
  :rtype: list[list[Metadata]]
@@ -202,7 +202,7 @@ def metadata_collection(self):
  def metadata_collection(self, metadata_collection):
  """Sets the metadata_collection of this GrometFNModule.
 
- Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GroMEt object. # noqa: E501
+ Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GrometFNModule object. # noqa: E501
 
  :param metadata_collection: The metadata_collection of this GrometFNModule. # noqa: E501
  :type: list[list[Metadata]]

diff --git a/skema/img2mml/eqn2mml.py b/skema/img2mml/eqn2mml.py
@@ -7,7 +7,7 @@
 
 from typing import Text
 from typing_extensions import Annotated
-from fastapi import APIRouter, FastAPI, Response, Request, Query, UploadFile
+from fastapi import APIRouter, FastAPI, status, Response, Request, Query, UploadFile
 from skema.rest.proxies import SKEMA_MATHJAX_ADDRESS
 from skema.img2mml.api import (
  get_mathml_from_bytes,
@@ -86,23 +86,23 @@ def process_latex_equation(eqn: Text) -> Response:
  "/img2mml/healthcheck",
  summary="Check health of eqn2mml service",
  response_model=int,
- status_code=200,
+ status_code=status.HTTP_200_OK,
 )
 def img2mml_healthcheck() -> int:
- return 200
+ return status.HTTP_200_OK
 
 
 @router.get(
  "/latex2mml/healthcheck",
  summary="Check health of mathjax service",
  response_model=int,
- status_code=200,
+ status_code=status.HTTP_200_OK,
 )
 def latex2mml_healthcheck() -> int:
  try:
  return int(requests.get(f"{SKEMA_MATHJAX_ADDRESS}/healthcheck").status_code)
  except:
- return 500
+ return status.HTTP_500_INTERNAL_SERVER_ERROR
 
 
 @router.post("/image/mml", summary="Get MathML representation of an equation image")

diff --git a/skema/isa/data.py b/skema/isa/data.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+
+mml = """<math>
+ <mfrac>
+ <mrow>
+ <mi>&#x2202;</mi>
+ <mi>H</mi>
+ </mrow>
+ <mrow>
+ <mi>&#x2202;</mi>
+ <mi>t</mi>
+ </mrow>
+ </mfrac>
+ <mo>=</mo>
+ <mi>&#x2207;</mi>
+ <mo>&#x22C5;</mo>
+ <mo>(</mo>
+ <mi>&#x0393;</mi>
+ <msup>
+ <mi>H</mi>
+ <mrow>
+ <mi>n</mi>
+ <mo>+</mo>
+ <mn>2</mn>
+ </mrow>
+ </msup>
+ <mo>|</mo>
+ <mi>&#x2207;</mi>
+ <mi>H</mi>
+ <msup>
+ <mo>|</mo>
+ <mrow>
+ <mi>n</mi>
+ <mo>&#x2212;</mo>
+ <mn>1</mn>
+ </mrow>
+ </msup>
+ <mi>&#x2207;</mi>
+ <mi>H</mi>
+ <mo>)</mo>
+ </math>
+ """
+
+expected = 'digraph G {\n0 [color=blue, label="Div(Γ*(H^(n+2))*(Abs(Grad(H))^(n-1))*Grad(H))"];\n1 [color=blue, label="D(1, t)(H)"];\n2 [color=blue, label="Γ*(H^(n+2))*(Abs(Grad(H))^(n-1))*Grad(H)"];\n3 [color=blue, label="Γ"];\n4 [color=blue, label="H^(n+2)"];\n5 [color=blue, label="H"];\n6 [color=blue, label="n+2"];\n7 [color=blue, label="n"];\n8 [color=blue, label="2"];\n9 [color=blue, label="Abs(Grad(H))^(n-1)"];\n10 [color=blue, label="Abs(Grad(H))"];\n11 [color=blue, label="Grad(H)"];\n12 [color=blue, label="n-1"];\n13 [color=blue, label="1"];\n1 -> 0 [color=blue, label="="];\n2 -> 0 [color=blue, label="Div"];\n3 -> 2 [color=blue, label="*"];\n4 -> 2 [color=blue, label="*"];\n5 -> 4 [color=blue, label="^"];\n6 -> 4 [color=blue, label="^"];\n7 -> 6 [color=blue, label="+"];\n8 -> 6 [color=blue, label="+"];\n9 -> 2 [color=blue, label="*"];\n10 -> 9 [color=blue, label="^"];\n11 -> 10 [color=blue, label="Abs"];\n5 -> 11 [color=blue, label="Grad"];\n12 -> 9 [color=blue, label="^"];\n7 -> 12 [color=blue, label="+"];\n13 -> 12 [color=blue, label="-"];\n11 -> 2 [color=blue, label="*"];\n}\n'
diff --git a/skema/isa/isa_service.py b/skema/isa/isa_service.py
@@ -1,12 +1,15 @@
 # -*- coding: utf-8 -*-
 
-from fastapi import FastAPI, File
+from fastapi import Depends, FastAPI, APIRouter, status
 from skema.isa.lib import align_mathml_eqs
+import skema.isa.data as isa_data
+from skema.rest import utils
 from pydantic import BaseModel
+import httpx
 
-# Create a web app using FastAPI
+from skema.rest.proxies import SKEMA_RS_ADDESS
 
-app = FastAPI()
+router = APIRouter()
 
 
 # Model for ISA_Result
@@ -15,17 +18,39 @@ class ISA_Result(BaseModel):
  union_graph: str = None
 
 
-@app.get("/ping", summary="Ping endpoint to test health of service")
-def ping():
- return "The ISA service is running."
+@router.get(
+ "/healthcheck", 
+ summary="Status of ISA service",
+ response_model=int,
+ status_code=status.HTTP_200_OK
+)
+async def healthcheck(client: httpx.AsyncClient = Depends(utils.get_client)) -> int:
+ res = await client.get(f"{SKEMA_RS_ADDESS}/ping")
+ return res.status_code
 
 
-@app.put("/align-eqns", summary="Align two MathML equations")
+@router.post(
+ "/align-eqns", 
+ summary="Align two MathML equations"
+)
 async def align_eqns(
- file1: str, file2: str, mention_json1: str = "", mention_json2: str = ""
+ mml1: str, mml2: str, mention_json1: str = "", mention_json2: str = ""
 ) -> ISA_Result:
- """
+ f"""
  Endpoint for align two MathML equations.
+
+ ### Python example
+
+ ```
+ import requests
+
+ request = {{
+ "mml1": {isa_data.mml},
+ "mml2": {isa_data.mml}
+ }}
+
+ response=requests.post("/isa/align-eqns", json=request)
+ res = response.json()
  """
  (
  matching_ratio,
@@ -36,8 +61,16 @@ async def align_eqns(
  aligned_indices2,
  union_graph,
  perfectly_matched_indices1,
- ) = align_mathml_eqs(file1, file2, mention_json1, mention_json2)
- ir = ISA_Result()
- ir.matching_ratio = matching_ratio
- ir.union_graph = union_graph.to_string()
- return ir
+ ) = align_mathml_eqs(mml1, mml2, mention_json1, mention_json2)
+ return ISA_Result(
+ matching_ratio = matching_ratio,
+ union_graph = union_graph.to_string()
+ )
+
+
+app = FastAPI()
+app.include_router(
+ router,
+ prefix="/isa",
+ tags=["isa"],
+)
diff --git a/skema/isa/lib.py b/skema/isa/lib.py
@@ -2,13 +2,14 @@
 """
 All the functions required by performing incremental structure alignment (ISA)
 Author: Liang Zhang ([email protected])
-Updated date: August 24, 2023
+Updated date: December 18, 2023
 """
 import json
 import warnings
 from typing import List, Any, Union, Dict
 from numpy import ndarray
 from pydot import Dot
+from skema.rest.proxies import SKEMA_RS_ADDESS
 
 warnings.filterwarnings("ignore")
 import requests
@@ -173,8 +174,9 @@ def generate_graph(file: str = "", render: bool = False) -> pydot.Dot:
  content = f.read()
 
  digraph = requests.put(
- "http://localhost:8080/mathml/math-exp-graph", data=content.encode("utf-8")
+ f"{SKEMA_RS_ADDESS}/mathml/math-exp-graph", data=content.encode("utf-8")
  )
+
  if render:
  src = Source(digraph.text)
  src.render("doctest-output/mathml_exp_tree", view=True)
@@ -671,8 +673,8 @@ def check_square_array(arr: np.ndarray) -> List[int]:
 
 
 def align_mathml_eqs(
- file1: str = "",
- file2: str = "",
+ mml1: str = "",
+ mml2: str = "",
  mention_json1: str = "",
  mention_json2: str = "",
  mode: int = 2,
@@ -685,7 +687,7 @@ def align_mathml_eqs(
  [1] Fishkind, D. E., Adali, S., Patsolic, H. G., Meng, L., Singh, D., Lyzinski, V., & Priebe, C. E. (2019).
  Seeded graph matching. Pattern recognition, 87, 203-215.
 
- Input: the paths of the two equation MathMLs; mention_json1: the mention file of paper 1; mention_json1: the mention file of paper 2;
+ Input: mml1 & mml2: the file path or contents of the two equation MathMLs; mention_json1: the mention file of paper 1; mention_json1: the mention file of paper 2;
  mode 0: without considering any priors; mode 1: having a heuristic prior
  with the similarity of node labels; mode 2: using the variable definitions
  Output:
@@ -698,8 +700,8 @@ def align_mathml_eqs(
  union_graph: the visualization of the alignment result
  perfectly_matched_indices1: strictly matched node indices in Graph 1
  """
- graph1 = generate_graph(file1)
- graph2 = generate_graph(file2)
+ graph1 = generate_graph(mml1)
+ graph2 = generate_graph(mml2)
 
  amatrix1, node_labels1 = generate_amatrix(graph1)
  amatrix2, node_labels2 = generate_amatrix(graph2)

diff --git a/skema/program_analysis/CAST/fortran/node_helper.py b/skema/program_analysis/CAST/fortran/node_helper.py
@@ -1,8 +1,10 @@
+import itertools
 from typing import List, Dict
-from skema.program_analysis.CAST2FN.model.cast import SourceRef
 
 from tree_sitter import Node
 
+from skema.program_analysis.CAST2FN.model.cast import SourceRef
+
 CONTROL_CHARACTERS = [
  ",",
  "=",
@@ -41,7 +43,7 @@ def __init__(self, source: str, source_file_name: str):
  # get_identifier optimization variables
  self.source_lines = source.splitlines(keepends=True)
  self.line_lengths = [len(line) for line in self.source_lines]
- self.line_length_sums = [sum(self.line_lengths[:i+1]) for i in range(len(self.source_lines))]
+ self.line_length_sums = list(itertools.accumulate(self.line_lengths))#[sum(self.line_lengths[:i+1]) for i in range(len(self.source_lines))]
 
  def get_source_ref(self, node: Node) -> SourceRef:
  """Given a node and file name, return a CAST SourceRef object."""
@@ -96,6 +98,9 @@ def get_children_by_types(node: Node, types: List):
  """Takes in a node and a list of types as inputs and returns all children matching those types. Otherwise, return an empty list"""
  return [child for child in node.children if child.type in types]
 
+def get_children_except_types(node: Node, types: List):
+ """Takes in a node and a list of types as inputs and returns all children not matching those types. Otherwise, return an empty list"""
+ return [child for child in node.children if child.type not in types]
 
 def get_first_child_index(node, type: str):
  """Get the index of the first child of node with type type."""

diff --git a/skema/program_analysis/CAST/fortran/preprocessor/preprocess.py b/skema/program_analysis/CAST/fortran/preprocessor/preprocess.py
@@ -34,7 +34,7 @@ def preprocess(
  """
  # NOTE: The order of preprocessing steps does matter. We have to run the GCC preprocessor before correcting the continuation lines or there could be issues
 
- # TODO: Create single location for generating include base path 
+ # TODO: Create single location for generating include base path
  source = source_path.read_text()
 
  # Get paths for intermediate products
@@ -67,21 +67,21 @@ def preprocess(
 
  # Step 2: Correct include directives to remove system references
  source = fix_include_directives(source)
- 
+
  # Step 3: Process with gcc c-preprocessor
  include_base_directory = Path(source_path.parent, f"include_{source_path.stem}")
  if not include_base_directory.exists():
  include_base_directory = include_base_directory.parent
  source = run_c_preprocessor(source, include_base_directory)
  if out_gcc:
  gcc_path.write_text(source)
- 
+
  # Step 4: Prepare for tree-sitter
  # This step removes any additional preprocessor directives added or not removed by GCC
  source = "\n".join(
  ["!" + line if line.startswith("#") else line for line in source.splitlines()]
  )
- 
+
  # Step 5: Check for unsupported idioms
  if out_unsupported:
  unsupported_path.write_text(
@@ -173,7 +173,7 @@ def fix_include_directives(source: str) -> str:
 def run_c_preprocessor(source: str, include_base_path: Path) -> str:
  """Run the gcc c-preprocessor. Its run from the context of the include_base_path, so that it can find all included files"""
  result = run(
- ["gcc", "-cpp", "-E", "-"],
+ ["gcc", "-cpp", "-E", "-x", "f95", "-"],
  input=source,
  text=True,
  capture_output=True,
@@ -183,8 +183,14 @@ def run_c_preprocessor(source: str, include_base_path: Path) -> str:
  return result.stdout
 
 
+def convert_assigned(source: str) -> str:
+ """Convered ASSIGNED GO TO to COMPUTED GO TO"""
+ pass
+
+
 def convert_to_free_form(source: str) -> str:
  """If fixed-form Fortran source, convert to free-form"""
+
  def validate_parse_tree(source: str) -> bool:
  """Parse source with tree-sitter and check if an error is returned."""
  language = Language(INSTALLED_LANGUAGES_FILEPATH, "fortran")
@@ -204,7 +210,7 @@ def validate_parse_tree(source: str) -> bool:
  )
  if validate_parse_tree(free_source):
  return free_source
- 
+
  return source