Skip to content

Commit

Permalink
Fix #19147 - Executable Test Suites (#19221)
Browse files Browse the repository at this point in the history
* backend

* format & tests

* rename backend

* migrations and ingestion

* format & tests

* format & tests

* tests

* format & tests

* tests

* updated ui side of changes

* addressing comment

* fixed failing unit test

* fix test list

* added e2e test, and fixed existing test

---------

Co-authored-by: Shailesh Parmar <[email protected]>
  • Loading branch information
pmbrull and ShaileshParmar11 committed Jan 7, 2025
1 parent 5593520 commit 8fc6e8f
Show file tree
Hide file tree
Showing 75 changed files with 4,506 additions and 491 deletions.
26 changes: 26 additions & 0 deletions bootstrap/sql/migrations/native/1.6.2/mysql/schemaChanges.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
-- add timestamp index for test case result reindex performance
ALTER TABLE data_quality_data_time_series ADD INDEX `idx_timestamp_desc` (timestamp DESC);

-- rename executable -> basic for test suites
UPDATE test_suite
SET json = JSON_INSERT(
JSON_REMOVE(json, '$.executable'),
'$.basic',
JSON_EXTRACT(json, '$.executable')
)
WHERE JSON_EXTRACT(json, '$.executable') IS NOT NULL;

-- rename executableEntityReference -> basicEntityReference for test suites
UPDATE test_suite
SET json = JSON_INSERT(
JSON_REMOVE(json, '$.executableEntityReference'),
'$.basicEntityReference',
JSON_EXTRACT(json, '$.executableEntityReference')
)
WHERE JSON_EXTRACT(json, '$.executableEntityReference') IS NOT NULL;

-- clean up the testSuites
UPDATE test_case SET json = json_remove(json, '$.testSuites');

-- clean up the testSuites in the version history too
UPDATE entity_extension SET json = json_remove(json, '$.testSuites') WHERE jsonSchema = 'testCase';
28 changes: 28 additions & 0 deletions bootstrap/sql/migrations/native/1.6.2/postgres/schemaChanges.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- add timestamp index for test case result reindex performance
CREATE INDEX idx_timestamp_desc ON data_quality_data_time_series (timestamp DESC);

-- rename executable -> basic for test suites
UPDATE test_suite
SET json = jsonb_set(
json::jsonb #- '{executable}',
'{basic}',
(json #> '{executable}')::jsonb,
true
)
WHERE json #>> '{executable}' IS NOT NULL;

-- rename executableEntityReference -> basicEntityReference for test suites
UPDATE test_suite
SET json = jsonb_set(
json::jsonb #- '{executableEntityReference}',
'{basicEntityReference}',
(json #> '{executableEntityReference}')::jsonb,
true
)
WHERE json #>> '{executableEntityReference}' IS NOT NULL;

-- clean up the testSuites
UPDATE test_case SET json = json::jsonb #- '{testSuites}';

-- clean up the testSuites in the version history too
UPDATE entity_extension SET json = json::jsonb #- '{testSuites}' WHERE jsonSchema = 'testCase';
4 changes: 4 additions & 0 deletions ingestion/src/metadata/data_quality/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from metadata.config.common import ConfigModel
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
from metadata.generated.schema.tests.basic import TestCaseResult
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
from metadata.ingestion.models.custom_pydantic import BaseModel
Expand Down Expand Up @@ -63,6 +64,9 @@ class TableAndTests(BaseModel):
executable_test_suite: Optional[CreateTestSuiteRequest] = Field(
None, description="If no executable test suite is found, we'll create one"
)
service_connection: DatabaseConnection = Field(
..., description="Service connection for the given table"
)


class TestCaseResults(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from metadata.data_quality.runner.core import DataTestsRunner
from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)
Expand Down Expand Up @@ -93,7 +94,9 @@ def _run(self, record: TableAndTests) -> Either:
record.table, openmetadata_test_cases
)

test_suite_runner = self.get_test_suite_runner(record.table)
test_suite_runner = self.get_test_suite_runner(
record.table, record.service_connection
)

logger.debug(
f"Found {len(openmetadata_test_cases)} test cases for table {record.table.fullyQualifiedName.root}"
Expand Down Expand Up @@ -351,9 +354,9 @@ def filter_incompatible_test_cases(
result.append(tc)
return result

def get_test_suite_runner(self, table: Table):
def get_test_suite_runner(
self, table: Table, service_connection: DatabaseConnection
):
return BaseTestSuiteRunner(
self.config,
self.metadata,
table,
self.config, self.metadata, table, service_connection
).get_data_quality_runner()
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ def __init__(
config: OpenMetadataWorkflowConfig,
ometa_client: OpenMetadata,
entity: Table,
service_connection: DatabaseConnection,
):
self.validator_builder_class = ValidatorBuilder
self._interface = None
self.entity = entity
self.service_conn_config = self._copy_service_config(config, self.entity.database) # type: ignore
self.service_conn_config = self._copy_service_config(service_connection, self.entity.database) # type: ignore
self._interface_type: str = self.service_conn_config.type.value.lower()

self.source_config = TestSuitePipeline.model_validate(
Expand All @@ -67,7 +68,7 @@ def interface(self, interface):
self._interface = interface

def _copy_service_config(
self, config: OpenMetadataWorkflowConfig, database: EntityReference
self, service_connection: DatabaseConnection, database: EntityReference
) -> DatabaseConnection:
"""Make a copy of the service config and update the database name
Expand All @@ -77,9 +78,7 @@ def _copy_service_config(
Returns:
DatabaseService.__config__
"""
config_copy = deepcopy(
config.source.serviceConnection.root.config # type: ignore
)
config_copy = deepcopy(service_connection.config) # type: ignore
if hasattr(
config_copy, # type: ignore
"supportsDatabase",
Expand Down
Loading

0 comments on commit 8fc6e8f

Please sign in to comment.