diff --git a/README.md b/README.md index 19d0847..63b91ad 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ The current checks are (see also the 'show-validations' command): | RQ2 | Layers must have at least one feature. | | RQ3 | _LEGACY:_ use RQ14 * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON). | | RQ4 | The geopackage should have no views defined. | -| RQ5 | _LEGACY:_ use RQ23 * Geometry should be valid. | +| RQ5 | _LEGACY:_ use RQ23 * Geometry should be valid and in GeoPackage format. | | RQ6 | Column names must start with a letter, and valid characters are lowercase a-z, numbers or underscores. | | RQ7 | Tables should have a feature id column with unique index. | | RQ8 | Geopackage must conform to given JSON or YAML definitions. | @@ -111,7 +111,7 @@ The current checks are (see also the 'show-validations' command): | RQ16 | _LEGACY:_ use RQ21 * All layer and column names shall not be longer than 53 characters. | | RQ21 | All layer and column names shall not be longer than 57 characters. | | RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | -| RQ23 | Geometry should be valid and simple. | +| RQ23 | Geometry should be valid, simple and in GeoPackage format. | | RQ24 | Geometry should not be null or empty (e.g. 'POINT EMPTY', WKT 'POINT(NaN NaN)'). | | RC17 | It is recommended to name all GEOMETRY type columns 'geom'. | | RC18 | It is recommended to give all GEOMETRY type columns the same name. | diff --git a/geopackage_validator/validations/geometry_valid_check.py b/geopackage_validator/validations/geometry_valid_check.py index 087889b..63661c5 100644 --- a/geopackage_validator/validations/geometry_valid_check.py +++ b/geopackage_validator/validations/geometry_valid_check.py @@ -2,16 +2,28 @@ from geopackage_validator.validations import validator from geopackage_validator import utils -SQL_ONLY_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id +SQL_VALID_TEMPLATE_V0 = """SELECT reason, count(reason) AS count, row_id FROM( SELECT - CASE INSTR(ST_IsValidReason("{column_name}"), '[') + CASE ST_IsValid("{column_name}") WHEN 0 - THEN ST_IsValidReason("{column_name}") - ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '[')) + THEN + CASE INSTR(ST_IsValidReason("{column_name}"), '[') + WHEN 0 + THEN ST_IsValidReason("{column_name}") + ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '[')) + END + ELSE + CASE + WHEN IsValidGPB("{column_name}") = 0 + THEN 'Not GeoPackage geometry' + END END AS reason, cast(rowid AS INTEGER) AS row_id - FROM "{table_name}" WHERE ST_IsValid("{column_name}") = 0 + FROM "{table_name}" + WHERE + ST_IsValid("{column_name}") = 0 OR + (IsValidGPB("{column_name}") = 0 AND ST_IsEmpty("{column_name}") = 0) -- Empty geometry is considered valid ) GROUP BY reason;""" @@ -27,13 +39,19 @@ ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '[')) END ELSE - CASE ST_IsSimple("{column_name}") - WHEN 0 + CASE + WHEN ST_IsSimple("{column_name}") = 0 THEN 'Not Simple' + WHEN IsValidGPB("{column_name}") = 0 + THEN 'Not GeoPackage geometry' END END AS reason, cast(rowid AS INTEGER) AS row_id - FROM "{table_name}" WHERE ST_IsValid("{column_name}") = 0 OR ST_IsSimple("{column_name}") = 0 + FROM "{table_name}" + WHERE + ST_IsValid("{column_name}") = 0 OR + ST_IsSimple("{column_name}") = 0 OR + (IsValidGPB("{column_name}") = 0 AND ST_IsEmpty("{column_name}") = 0) -- Empty geometry is considered valid ) GROUP BY reason;""" @@ -58,7 +76,7 @@ class ValidGeometryValidatorV0(validator.Validator): message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}" def check(self) -> Iterable[str]: - result = query_geometry_valid(self.dataset, SQL_ONLY_VALID_TEMPLATE) + result = query_geometry_valid(self.dataset, SQL_VALID_TEMPLATE_V0) return [ self.message.format( diff --git a/tests/data/test_geometry_spatialite.gpkg b/tests/data/test_geometry_spatialite.gpkg new file mode 100644 index 0000000..46b10dc Binary files /dev/null and b/tests/data/test_geometry_spatialite.gpkg differ diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index 3213eb2..cdbe508 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -1,20 +1,21 @@ from geopackage_validator.utils import open_dataset from geopackage_validator.validations.geometry_valid_check import ( query_geometry_valid, - SQL_ONLY_VALID_TEMPLATE, + SQL_VALID_TEMPLATE_V0, SQL_VALID_TEMPLATE, ) def test_with_gpkg_valid(): dataset = open_dataset("tests/data/test_geometry_valid.gpkg") - checks = list(query_geometry_valid(dataset, SQL_ONLY_VALID_TEMPLATE)) - assert len(checks) == 1 - assert checks[0][0] == "test_geometry_valid" - assert checks[0][1] == "geometry" - assert checks[0][2] == "Self-intersection" - assert checks[0][3] == 1 - assert checks[0][4] == 1 + for template in [SQL_VALID_TEMPLATE_V0, SQL_VALID_TEMPLATE]: + checks = list(query_geometry_valid(dataset, template)) + assert len(checks) == 1 + assert checks[0][0] == "test_geometry_valid" + assert checks[0][1] == "geometry" + assert checks[0][2] == "Self-intersection" + assert checks[0][3] == 1 + assert checks[0][4] == 1 def test_with_gpkg_simple(): @@ -53,6 +54,19 @@ def test_with_gpkg_null(): assert len(checks) == 0 +def test_with_gpkg_spatialite(): + # spatialite geometries are not considered valid + dataset = open_dataset("tests/data/test_geometry_spatialite.gpkg") + for template in [SQL_VALID_TEMPLATE_V0, SQL_VALID_TEMPLATE]: + checks = list(query_geometry_valid(dataset, template)) + assert len(checks) == 1 + assert checks[0][0] == "test_geometry_spatialite" + assert checks[0][1] == "geom" + assert checks[0][2] == "Not GeoPackage geometry" + assert checks[0][3] == 1 + assert checks[0][4] == 1 + + def test_with_gpkg_allcorrect(): dataset = open_dataset("tests/data/test_allcorrect.gpkg") checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))