Skip to content

Commit e8f069d

Browse files
hf-kkleinKonstantin
andauthored
Add ahb_expressions table + model class with bedingungstexts (usable iff fundamend[sqlmodels,ahbicht] is installed) (#124)
--------- Co-authored-by: Konstantin <[email protected]>
1 parent b4baecb commit e8f069d

File tree

6 files changed

+1098
-0
lines changed

6 files changed

+1098
-0
lines changed

domain-specific-terms.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ sie
1616
rekursion
1717
rekursive
1818
finde
19+
contrl

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ sqlmodels = [
2828
"sqlmodel>=0.0.22",
2929
"sqlalchemy[mypy]>=2.0.37"
3030
]
31+
ahbicht = [
32+
"ahbicht>=0.13.2"
33+
]
3134
coverage = [
3235
"coverage==7.8.0"
3336
]
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
"""
2+
helper module to create a table with a "Bedingung" column like the one in the PDF/docx AHBs
3+
"""
4+
5+
import asyncio
6+
import logging
7+
import uuid
8+
from contextvars import ContextVar
9+
from typing import Optional
10+
11+
from efoli import EdifactFormat, EdifactFormatVersion
12+
13+
from fundamend.sqlmodels import AhbHierarchyMaterialized, Bedingung
14+
from fundamend.sqlmodels.anwendungshandbuch import Paket, UbBedingung
15+
16+
try:
17+
from sqlalchemy.sql.functions import func
18+
from sqlmodel import Field, Session, SQLModel, UniqueConstraint, col, select
19+
20+
except ImportError as import_error:
21+
import_error.msg += "; Did you install fundamend[sqlmodels] or did you try to import from fundamend.models instead?"
22+
# sqlmodel is only an optional dependency when fundamend is used to fill a database
23+
raise
24+
25+
26+
try:
27+
import inject
28+
from ahbicht.content_evaluation.evaluationdatatypes import EvaluatableData, EvaluatableDataProvider
29+
from ahbicht.content_evaluation.evaluator_factory import create_content_evaluation_result_based_evaluators
30+
from ahbicht.content_evaluation.expression_check import is_valid_expression
31+
from ahbicht.content_evaluation.token_logic_provider import SingletonTokenLogicProvider, TokenLogicProvider
32+
from ahbicht.expressions.condition_expression_parser import extract_categorized_keys
33+
from ahbicht.models.content_evaluation_result import ContentEvaluationResult, ContentEvaluationResultSchema
34+
from lark.exceptions import VisitError
35+
except ImportError as import_error:
36+
import_error.msg += "; Did you install fundamend[sqlmodels,ahbicht]?"
37+
# sqlmodel and ahbicht are only optional dependencies when fundamend is used to fill a database
38+
raise
39+
40+
_logger = logging.getLogger(__name__)
41+
42+
_content_evaluation_result: ContextVar[Optional[ContentEvaluationResult]] = ContextVar(
43+
"_content_evaluation_result", default=None
44+
)
45+
46+
47+
def _get_evaluatable_data() -> EvaluatableData[ContentEvaluationResult]:
48+
"""
49+
returns the _content_evaluation_result context var value wrapped in a EvaluatableData container.
50+
This is the kind of data that the ContentEvaluationResultBased RC/FC Evaluators, HintsProvider and Package Resolver
51+
require.
52+
:return:
53+
"""
54+
cer = _content_evaluation_result.get()
55+
return EvaluatableData(
56+
body=ContentEvaluationResultSchema().dump(cer),
57+
edifact_format=EdifactFormat.UTILMD, # not important, something has to be here
58+
edifact_format_version=EdifactFormatVersion.FV2504, # not important, something has to be here
59+
)
60+
61+
62+
def _setup_weird_ahbicht_dependency_injection() -> None:
63+
def configure(binder: inject.Binder) -> None:
64+
binder.bind(
65+
TokenLogicProvider,
66+
SingletonTokenLogicProvider(
67+
[*create_content_evaluation_result_based_evaluators(EdifactFormat.UTILMD, EdifactFormatVersion.FV2504)]
68+
),
69+
)
70+
binder.bind_to_provider(EvaluatableDataProvider, _get_evaluatable_data)
71+
72+
inject.configure_once(configure)
73+
74+
75+
def _generate_node_texts(session: Session, expression: str, ahb_pk: uuid.UUID) -> str:
76+
categorized_key_extract = asyncio.run(extract_categorized_keys(expression))
77+
bedingung_keys = (
78+
categorized_key_extract.format_constraint_keys
79+
+ categorized_key_extract.requirement_constraint_keys
80+
+ categorized_key_extract.hint_keys
81+
)
82+
paket_keys = categorized_key_extract.package_keys
83+
ubbedingung_keys = categorized_key_extract.time_condition_keys
84+
# probably, we'd be faster if we just loaded all pakete and all bedingungen once instead of selecting over and over
85+
# again for each expression
86+
bedingungen = {
87+
x.nummer: x.text
88+
for x in session.exec(
89+
select(Bedingung).where(
90+
col(Bedingung.nummer).in_(bedingung_keys), # pylint:disable=no-member
91+
Bedingung.anwendungshandbuch_primary_key == ahb_pk,
92+
)
93+
).all()
94+
}
95+
pakete = {
96+
x.nummer: x.text
97+
for x in session.exec(
98+
select(Paket).where(
99+
col(Paket.nummer).in_(paket_keys),
100+
Paket.anwendungshandbuch_primary_key == ahb_pk, # pylint:disable=no-member
101+
)
102+
).all()
103+
}
104+
ubbedingungen = {
105+
x.nummer: x.text
106+
for x in session.exec(
107+
select(UbBedingung).where(
108+
col(UbBedingung.nummer).in_(ubbedingung_keys), # pylint:disable=no-member
109+
UbBedingung.anwendungshandbuch_primary_key == ahb_pk,
110+
)
111+
).all()
112+
}
113+
joined_dict = {**bedingungen, **pakete, **ubbedingungen}
114+
node_texts = "\n".join([f"[{key}] {value}" for key, value in joined_dict.items()])
115+
return node_texts
116+
117+
118+
def _get_validity_node_texts_and_error_message_cpu_intensive(
119+
expression: str, session: Session, anwendungshandbuch_pk: uuid.UUID
120+
) -> tuple[bool, str, str | None]:
121+
try:
122+
is_valid, error_message = asyncio.run(is_valid_expression(expression, _content_evaluation_result.set))
123+
if is_valid: # we might actually get a meaningful node_texts even for invalid expressions, but I don't like it
124+
node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
125+
else:
126+
node_texts = ""
127+
except NotImplementedError: # ahbicht fault/missing feature -> act like it's valid
128+
node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
129+
error_message = None
130+
return is_valid, node_texts, error_message
131+
132+
133+
def _get_validity_node_texts_and_error_message_fast(
134+
expression: str, session: Session, anwendungshandbuch_pk: uuid.UUID
135+
) -> tuple[bool, str, str | None]:
136+
try:
137+
node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
138+
except SyntaxError as syntax_error:
139+
_logger.info("The expression '%s' could not be parsed: %s", expression, syntax_error)
140+
return (
141+
False,
142+
"",
143+
str(syntax_error),
144+
) # I decided against returning the error message, although it's tempting - but still bad practice
145+
except VisitError as visit_error:
146+
_logger.info("The expression '%s' could not be parsed: %s", expression, visit_error)
147+
return False, "", str(visit_error)
148+
return True, node_texts, None
149+
150+
151+
def create_and_fill_ahb_expression_table(session: Session, use_cpu_intensive_validity_check: bool = False) -> None:
152+
"""
153+
creates and fills the ahb_expressions table. It uses the ahb_hierarchy_materialized table to extract all expressions
154+
and parses each expression with ahbicht. The latter has to be done in Python.
155+
If the CPU intensive validity check is enabled, not only expression alone is checked but also all its possible
156+
outcomes. This leads to only few additional expressions marked as invalid but is very slow.
157+
"""
158+
rows: list[tuple[EdifactFormatVersion | None, str, str | None, uuid.UUID]] = []
159+
_setup_weird_ahbicht_dependency_injection()
160+
for ahb_status_col in [
161+
AhbHierarchyMaterialized.segmentgroup_ahb_status,
162+
AhbHierarchyMaterialized.segment_ahb_status,
163+
AhbHierarchyMaterialized.dataelement_ahb_status,
164+
AhbHierarchyMaterialized.code_ahb_status,
165+
]:
166+
stmt = select(
167+
AhbHierarchyMaterialized.edifact_format_version,
168+
AhbHierarchyMaterialized.format,
169+
ahb_status_col,
170+
AhbHierarchyMaterialized.anwendungshandbuch_primary_key,
171+
)
172+
rows.extend(session.exec(stmt)) # type:ignore[arg-type]
173+
non_empty_rows: list[tuple[EdifactFormatVersion, str, str, uuid.UUID]] = [
174+
r for r in rows if r[2] is not None and r[0] is not None and r[2].strip() # type:ignore[misc]
175+
]
176+
if not any(rows):
177+
raise ValueError(
178+
"No rows found in ahb_hierarchy_materialized table; Run `create_db_and_populate_with_ahb_view` before."
179+
)
180+
non_empty_rows.sort(key=lambda x: (x[0], x[1], x[2]))
181+
seen: set[tuple[str, str, str]] = set()
182+
unique_rows = [
183+
row
184+
for row in non_empty_rows
185+
if (key := (row[0], row[1], row[2].strip())) not in seen
186+
and not seen.add(key) # type:ignore[ func-returns-value]
187+
]
188+
for row in unique_rows: # there are ~3600 unique rows for FV2410+FV2504 as of 2025-04-15
189+
expression = row[2].strip()
190+
if use_cpu_intensive_validity_check:
191+
# as of 2025-04-15 I have no clue how long this actually takes for all expressions
192+
_, node_texts, error_message = _get_validity_node_texts_and_error_message_cpu_intensive(
193+
expression, session, row[3]
194+
)
195+
else:
196+
_, node_texts, error_message = _get_validity_node_texts_and_error_message_fast(expression, session, row[3])
197+
ahb_expression_row = AhbExpression(
198+
edifact_format_version=row[0],
199+
format=row[1],
200+
expression=expression,
201+
node_texts=node_texts,
202+
anwendungshandbuch_primary_key=row[3],
203+
ahbicht_error_message=error_message,
204+
)
205+
session.add(ahb_expression_row)
206+
_logger.debug(
207+
"Added row (%s, %s, %s) to the ahb_expressions_table",
208+
ahb_expression_row.edifact_format_version,
209+
ahb_expression_row.format,
210+
ahb_expression_row.expression,
211+
)
212+
number_of_inserted_rows = session.scalar(
213+
select(func.count(AhbExpression.id)) # type:ignore[arg-type]# pylint:disable=not-callable
214+
)
215+
_logger.info(
216+
"Inserted %d rows into the table %s",
217+
number_of_inserted_rows,
218+
AhbExpression.__tablename__,
219+
)
220+
session.commit()
221+
222+
223+
class AhbExpression(SQLModel, table=True):
224+
"""
225+
A table that contains all expressions that are used in any AHB, each with prüfidentifikator and format_version.
226+
It's created by UNIONing all 'ahb_status' columns from all relevant tables.
227+
Additionally, this table has a column that resolves the expression to a human-readable text.
228+
"""
229+
230+
__tablename__ = "ahb_expressions"
231+
__table_args__ = (
232+
UniqueConstraint(
233+
"edifact_format_version",
234+
"format",
235+
"expression",
236+
name="idx_ahb_expressions_metadata_expression",
237+
),
238+
)
239+
id: uuid.UUID = Field(primary_key=True, default_factory=uuid.uuid4)
240+
edifact_format_version: EdifactFormatVersion = Field(index=True)
241+
format: str = Field(index=True) # the edifact format, e.g. 'UTILMD'
242+
# expressions and conditions are always interpreted on a per-format basis (no pruefidentifikator required)
243+
expression: str = Field(index=True) #: e.g 'Muss [1] U [2]'
244+
node_texts: str = Field()
245+
"""
246+
this contains the typical "[1] Foo Text\n[2] Bar Text" which explains the meaning of the nodes from inside the
247+
respective Expression (e.g. for expression "Muss [1] U [2]")
248+
"""
249+
ahbicht_error_message: str | None = Field(default=None)
250+
anwendungshandbuch_primary_key: uuid.UUID = Field()

tox.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ deps =
1616
-r requirements.txt
1717
.[tests]
1818
.[sqlmodels]
19+
.[ahbicht]
1920
setenv = PYTHONPATH = {toxinidir}/src
2021
commands = python -m pytest --basetemp={envtmpdir} {posargs} -vv
2122

@@ -24,6 +25,7 @@ deps =
2425
-r requirements.txt
2526
.[tests]
2627
.[sqlmodels]
28+
.[ahbicht]
2729
setenv = PYTHONPATH = {toxinidir}/src
2830
commands = python -m pytest -m snapshot --basetemp={envtmpdir} {posargs} --snapshot-update
2931

0 commit comments

Comments
 (0)