Skip to content

Commit

Permalink
Merge branch 'ml4ai:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
YohannParis authored Jan 24, 2024
2 parents cb4c9c0 + 4605924 commit 4d5815a
Show file tree
Hide file tree
Showing 73 changed files with 4,912 additions and 2,649 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests-and-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
# Install tree-sitter parser (for Python component unit tests)
- name: Install tree-sitter parsers
working-directory: .
run: python skema/program_analysis/tree_sitter_parsers/build_parsers.py --all
run: python skema/program_analysis/tree_sitter_parsers/build_parsers.py --ci --all


# docs (API)
Expand Down
9 changes: 9 additions & 0 deletions docs/dev/cast_frontend.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## CAST FrontEnd Generation Notes
### Using Var vs Name nodes
Currently in the CAST generation we have a convention on when to use Var and Name nodes.
The GroMEt generation depends on these being conistent, otherwise there will be errors in the generation.
In the future this convention might change, or be eliminated altogether, but for now this is the current set of rules.

- If the variable in question is being stored into (i.e. as the result of an assignment), then we use Var. Even if it's a variable that has already been defined.
- If the variable in question is being read from (i.e. being used in an expression), then we use Name.
- Whenever we're creating a function call Call() node, the name of the function is specified using the Name node.
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ nav:
- Generating code2fn model coverage reports: "dev/generating_code2fn_model_coverage.md"
- Using code ingestion frontends: "dev/using_code_ingestion_frontends.md"
- Using tree-sitter preprocessor: "dev/using_tree_sitter_preprocessor.md"
- CAST Front-end generation: "dev/cast_frontend.md"
- Coverage:
- Code2fn coverage reports: "coverage/code2fn_coverage/report.html"
- TA1 Integration Dashboard: "https://integration-dashboard.terarium.ai/TA1"
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies=[
"typing_extensions", # see https://github.com/pydantic/pydantic/issues/5821#issuecomment-1559196859
"fastapi~=0.100.0",
"starlette",
"httpx",
"pydantic>=2.0.0",
"uvicorn",
"python-multipart",
Expand All @@ -42,7 +43,7 @@ dynamic = ["readme"]
# Pygraphviz is often tricky to install, so we reserve it for the dev extras
# list.
# - six: Required by auto-generated Swagger models
dev = ["pytest", "pytest-cov", "pytest-xdist", "httpx", "black", "mypy", "coverage", "pygraphviz", "six"]
dev = ["pytest", "pytest-cov", "pytest-xdist", "pytest-asyncio", "black", "mypy", "coverage", "pygraphviz", "six"]

demo = ["notebook"]

Expand Down
4 changes: 2 additions & 2 deletions skema/gromet/fn/gromet_fn_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def fn_array(self, fn_array):
def metadata_collection(self):
"""Gets the metadata_collection of this GrometFNModule. # noqa: E501
Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GroMEt object. # noqa: E501
Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GrometFNModule object. # noqa: E501
:return: The metadata_collection of this GrometFNModule. # noqa: E501
:rtype: list[list[Metadata]]
Expand All @@ -202,7 +202,7 @@ def metadata_collection(self):
def metadata_collection(self, metadata_collection):
"""Sets the metadata_collection of this GrometFNModule.
Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GroMEt object. # noqa: E501
Table (array) of lists (arrays) of metadata, where each list in the Table-array represents the collection of metadata associated with a GrometFNModule object. # noqa: E501
:param metadata_collection: The metadata_collection of this GrometFNModule. # noqa: E501
:type: list[list[Metadata]]
Expand Down
10 changes: 5 additions & 5 deletions skema/img2mml/eqn2mml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from typing import Text
from typing_extensions import Annotated
from fastapi import APIRouter, FastAPI, Response, Request, Query, UploadFile
from fastapi import APIRouter, FastAPI, status, Response, Request, Query, UploadFile
from skema.rest.proxies import SKEMA_MATHJAX_ADDRESS
from skema.img2mml.api import (
get_mathml_from_bytes,
Expand Down Expand Up @@ -86,23 +86,23 @@ def process_latex_equation(eqn: Text) -> Response:
"/img2mml/healthcheck",
summary="Check health of eqn2mml service",
response_model=int,
status_code=200,
status_code=status.HTTP_200_OK,
)
def img2mml_healthcheck() -> int:
return 200
return status.HTTP_200_OK


@router.get(
"/latex2mml/healthcheck",
summary="Check health of mathjax service",
response_model=int,
status_code=200,
status_code=status.HTTP_200_OK,
)
def latex2mml_healthcheck() -> int:
try:
return int(requests.get(f"{SKEMA_MATHJAX_ADDRESS}/healthcheck").status_code)
except:
return 500
return status.HTTP_500_INTERNAL_SERVER_ERROR


@router.post("/image/mml", summary="Get MathML representation of an equation image")
Expand Down
44 changes: 44 additions & 0 deletions skema/isa/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-

mml = """<math>
<mfrac>
<mrow>
<mi>&#x2202;</mi>
<mi>H</mi>
</mrow>
<mrow>
<mi>&#x2202;</mi>
<mi>t</mi>
</mrow>
</mfrac>
<mo>=</mo>
<mi>&#x2207;</mi>
<mo>&#x22C5;</mo>
<mo>(</mo>
<mi>&#x0393;</mi>
<msup>
<mi>H</mi>
<mrow>
<mi>n</mi>
<mo>+</mo>
<mn>2</mn>
</mrow>
</msup>
<mo>|</mo>
<mi>&#x2207;</mi>
<mi>H</mi>
<msup>
<mo>|</mo>
<mrow>
<mi>n</mi>
<mo>&#x2212;</mo>
<mn>1</mn>
</mrow>
</msup>
<mi>&#x2207;</mi>
<mi>H</mi>
<mo>)</mo>
</math>
"""

expected = 'digraph G {\n0 [color=blue, label="Div(Γ*(H^(n+2))*(Abs(Grad(H))^(n-1))*Grad(H))"];\n1 [color=blue, label="D(1, t)(H)"];\n2 [color=blue, label="Γ*(H^(n+2))*(Abs(Grad(H))^(n-1))*Grad(H)"];\n3 [color=blue, label="Γ"];\n4 [color=blue, label="H^(n+2)"];\n5 [color=blue, label="H"];\n6 [color=blue, label="n+2"];\n7 [color=blue, label="n"];\n8 [color=blue, label="2"];\n9 [color=blue, label="Abs(Grad(H))^(n-1)"];\n10 [color=blue, label="Abs(Grad(H))"];\n11 [color=blue, label="Grad(H)"];\n12 [color=blue, label="n-1"];\n13 [color=blue, label="1"];\n1 -> 0 [color=blue, label="="];\n2 -> 0 [color=blue, label="Div"];\n3 -> 2 [color=blue, label="*"];\n4 -> 2 [color=blue, label="*"];\n5 -> 4 [color=blue, label="^"];\n6 -> 4 [color=blue, label="^"];\n7 -> 6 [color=blue, label="+"];\n8 -> 6 [color=blue, label="+"];\n9 -> 2 [color=blue, label="*"];\n10 -> 9 [color=blue, label="^"];\n11 -> 10 [color=blue, label="Abs"];\n5 -> 11 [color=blue, label="Grad"];\n12 -> 9 [color=blue, label="^"];\n7 -> 12 [color=blue, label="+"];\n13 -> 12 [color=blue, label="-"];\n11 -> 2 [color=blue, label="*"];\n}\n'
61 changes: 47 additions & 14 deletions skema/isa/isa_service.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# -*- coding: utf-8 -*-

from fastapi import FastAPI, File
from fastapi import Depends, FastAPI, APIRouter, status
from skema.isa.lib import align_mathml_eqs
import skema.isa.data as isa_data
from skema.rest import utils
from pydantic import BaseModel
import httpx

# Create a web app using FastAPI
from skema.rest.proxies import SKEMA_RS_ADDESS

app = FastAPI()
router = APIRouter()


# Model for ISA_Result
Expand All @@ -15,17 +18,39 @@ class ISA_Result(BaseModel):
union_graph: str = None


@app.get("/ping", summary="Ping endpoint to test health of service")
def ping():
return "The ISA service is running."
@router.get(
"/healthcheck",
summary="Status of ISA service",
response_model=int,
status_code=status.HTTP_200_OK
)
async def healthcheck(client: httpx.AsyncClient = Depends(utils.get_client)) -> int:
res = await client.get(f"{SKEMA_RS_ADDESS}/ping")
return res.status_code


@app.put("/align-eqns", summary="Align two MathML equations")
@router.post(
"/align-eqns",
summary="Align two MathML equations"
)
async def align_eqns(
file1: str, file2: str, mention_json1: str = "", mention_json2: str = ""
mml1: str, mml2: str, mention_json1: str = "", mention_json2: str = ""
) -> ISA_Result:
"""
f"""
Endpoint for align two MathML equations.
### Python example
```
import requests
request = {{
"mml1": {isa_data.mml},
"mml2": {isa_data.mml}
}}
response=requests.post("/isa/align-eqns", json=request)
res = response.json()
"""
(
matching_ratio,
Expand All @@ -36,8 +61,16 @@ async def align_eqns(
aligned_indices2,
union_graph,
perfectly_matched_indices1,
) = align_mathml_eqs(file1, file2, mention_json1, mention_json2)
ir = ISA_Result()
ir.matching_ratio = matching_ratio
ir.union_graph = union_graph.to_string()
return ir
) = align_mathml_eqs(mml1, mml2, mention_json1, mention_json2)
return ISA_Result(
matching_ratio = matching_ratio,
union_graph = union_graph.to_string()
)


app = FastAPI()
app.include_router(
router,
prefix="/isa",
tags=["isa"],
)
16 changes: 9 additions & 7 deletions skema/isa/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
"""
All the functions required by performing incremental structure alignment (ISA)
Author: Liang Zhang ([email protected])
Updated date: August 24, 2023
Updated date: December 18, 2023
"""
import json
import warnings
from typing import List, Any, Union, Dict
from numpy import ndarray
from pydot import Dot
from skema.rest.proxies import SKEMA_RS_ADDESS

warnings.filterwarnings("ignore")
import requests
Expand Down Expand Up @@ -173,8 +174,9 @@ def generate_graph(file: str = "", render: bool = False) -> pydot.Dot:
content = f.read()

digraph = requests.put(
"http://localhost:8080/mathml/math-exp-graph", data=content.encode("utf-8")
f"{SKEMA_RS_ADDESS}/mathml/math-exp-graph", data=content.encode("utf-8")
)

if render:
src = Source(digraph.text)
src.render("doctest-output/mathml_exp_tree", view=True)
Expand Down Expand Up @@ -671,8 +673,8 @@ def check_square_array(arr: np.ndarray) -> List[int]:


def align_mathml_eqs(
file1: str = "",
file2: str = "",
mml1: str = "",
mml2: str = "",
mention_json1: str = "",
mention_json2: str = "",
mode: int = 2,
Expand All @@ -685,7 +687,7 @@ def align_mathml_eqs(
[1] Fishkind, D. E., Adali, S., Patsolic, H. G., Meng, L., Singh, D., Lyzinski, V., & Priebe, C. E. (2019).
Seeded graph matching. Pattern recognition, 87, 203-215.
Input: the paths of the two equation MathMLs; mention_json1: the mention file of paper 1; mention_json1: the mention file of paper 2;
Input: mml1 & mml2: the file path or contents of the two equation MathMLs; mention_json1: the mention file of paper 1; mention_json1: the mention file of paper 2;
mode 0: without considering any priors; mode 1: having a heuristic prior
with the similarity of node labels; mode 2: using the variable definitions
Output:
Expand All @@ -698,8 +700,8 @@ def align_mathml_eqs(
union_graph: the visualization of the alignment result
perfectly_matched_indices1: strictly matched node indices in Graph 1
"""
graph1 = generate_graph(file1)
graph2 = generate_graph(file2)
graph1 = generate_graph(mml1)
graph2 = generate_graph(mml2)

amatrix1, node_labels1 = generate_amatrix(graph1)
amatrix2, node_labels2 = generate_amatrix(graph2)
Expand Down
9 changes: 7 additions & 2 deletions skema/program_analysis/CAST/fortran/node_helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import itertools
from typing import List, Dict
from skema.program_analysis.CAST2FN.model.cast import SourceRef

from tree_sitter import Node

from skema.program_analysis.CAST2FN.model.cast import SourceRef

CONTROL_CHARACTERS = [
",",
"=",
Expand Down Expand Up @@ -41,7 +43,7 @@ def __init__(self, source: str, source_file_name: str):
# get_identifier optimization variables
self.source_lines = source.splitlines(keepends=True)
self.line_lengths = [len(line) for line in self.source_lines]
self.line_length_sums = [sum(self.line_lengths[:i+1]) for i in range(len(self.source_lines))]
self.line_length_sums = list(itertools.accumulate(self.line_lengths))#[sum(self.line_lengths[:i+1]) for i in range(len(self.source_lines))]

def get_source_ref(self, node: Node) -> SourceRef:
"""Given a node and file name, return a CAST SourceRef object."""
Expand Down Expand Up @@ -96,6 +98,9 @@ def get_children_by_types(node: Node, types: List):
"""Takes in a node and a list of types as inputs and returns all children matching those types. Otherwise, return an empty list"""
return [child for child in node.children if child.type in types]

def get_children_except_types(node: Node, types: List):
"""Takes in a node and a list of types as inputs and returns all children not matching those types. Otherwise, return an empty list"""
return [child for child in node.children if child.type not in types]

def get_first_child_index(node, type: str):
"""Get the index of the first child of node with type type."""
Expand Down
18 changes: 12 additions & 6 deletions skema/program_analysis/CAST/fortran/preprocessor/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def preprocess(
"""
# NOTE: The order of preprocessing steps does matter. We have to run the GCC preprocessor before correcting the continuation lines or there could be issues

# TODO: Create single location for generating include base path
# TODO: Create single location for generating include base path
source = source_path.read_text()

# Get paths for intermediate products
Expand Down Expand Up @@ -67,21 +67,21 @@ def preprocess(

# Step 2: Correct include directives to remove system references
source = fix_include_directives(source)

# Step 3: Process with gcc c-preprocessor
include_base_directory = Path(source_path.parent, f"include_{source_path.stem}")
if not include_base_directory.exists():
include_base_directory = include_base_directory.parent
source = run_c_preprocessor(source, include_base_directory)
if out_gcc:
gcc_path.write_text(source)

# Step 4: Prepare for tree-sitter
# This step removes any additional preprocessor directives added or not removed by GCC
source = "\n".join(
["!" + line if line.startswith("#") else line for line in source.splitlines()]
)

# Step 5: Check for unsupported idioms
if out_unsupported:
unsupported_path.write_text(
Expand Down Expand Up @@ -173,7 +173,7 @@ def fix_include_directives(source: str) -> str:
def run_c_preprocessor(source: str, include_base_path: Path) -> str:
"""Run the gcc c-preprocessor. Its run from the context of the include_base_path, so that it can find all included files"""
result = run(
["gcc", "-cpp", "-E", "-"],
["gcc", "-cpp", "-E", "-x", "f95", "-"],
input=source,
text=True,
capture_output=True,
Expand All @@ -183,8 +183,14 @@ def run_c_preprocessor(source: str, include_base_path: Path) -> str:
return result.stdout


def convert_assigned(source: str) -> str:
"""Convered ASSIGNED GO TO to COMPUTED GO TO"""
pass


def convert_to_free_form(source: str) -> str:
"""If fixed-form Fortran source, convert to free-form"""

def validate_parse_tree(source: str) -> bool:
"""Parse source with tree-sitter and check if an error is returned."""
language = Language(INSTALLED_LANGUAGES_FILEPATH, "fortran")
Expand All @@ -204,7 +210,7 @@ def validate_parse_tree(source: str) -> bool:
)
if validate_parse_tree(free_source):
return free_source

return source


Expand Down
Loading

0 comments on commit 4d5815a

Please sign in to comment.