Skip to content

Commit 14ccd3a

Browse files
committed
Region code generation should match postgis logic
1 parent 4935d96 commit 14ccd3a

File tree

3 files changed

+83
-6
lines changed

3 files changed

+83
-6
lines changed

cubedash/summary/_extents.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from datacube.drivers.postgres._fields import PgDocField, RangeDocField
3737

3838
from datacube.index import Index
39-
from datacube.model import DatasetType, Field, MetadataType
39+
from datacube.model import DatasetType, Field, MetadataType, Dataset
4040

4141
_LOG = structlog.get_logger()
4242

@@ -527,7 +527,9 @@ def __init__(
527527
units_label: str = "regions"
528528

529529
@classmethod
530-
def for_product(cls, dt: DatasetType, region_shapes: Dict[str, GeometryCollection]):
530+
def for_product(
531+
cls, dt: DatasetType, region_shapes: Dict[str, GeometryCollection] = None
532+
):
531533
region_code_field: Field = dt.metadata_type.dataset_fields.get("region_code")
532534
grid_spec = dt.grid_spec
533535
# Ingested grids trump the "region_code" field because they've probably sliced it up smaller.
@@ -557,7 +559,23 @@ def geojson_extent(self, region_code):
557559
"properties": {"region_code": region_code},
558560
}
559561

562+
def dataset_region_code(self, dataset: Dataset) -> Optional[str]:
563+
"""
564+
Get the region code for a dataset.
565+
566+
This should always give the same result as the alchemy_expression() function,
567+
but is computed in pure python.
568+
569+
Classes that override alchemy_expression should override this to match.
570+
"""
571+
return dataset.metadata.region_code
572+
560573
def alchemy_expression(self):
574+
"""
575+
Get an alchemy expression that computes dataset's region code
576+
577+
Classes that override this should also override dataset_region_code to match.
578+
"""
561579
dt = self.product
562580
region_code_field: Field = dt.metadata_type.dataset_fields.get("region_code")
563581
# `alchemy_expression` is part of the postgres driver (PgDocField),
@@ -625,6 +643,21 @@ def alchemy_expression(self):
625643
func.floor((func.ST_Y(center_point) - origin_y) / size_y).cast(String),
626644
)
627645

646+
def dataset_region_code(self, dataset: Dataset) -> Optional[str]:
647+
tiles = [
648+
tile
649+
for tile, _ in dataset.type.grid_spec.tiles(
650+
dataset.extent.centroid.boundingbox
651+
)
652+
]
653+
if not len(tiles) == 1:
654+
raise ValueError(
655+
"Tiled dataset should only have one tile? "
656+
f"Got {tiles!r} for {dataset!r}"
657+
)
658+
x, y = tiles[0]
659+
return f"{x}_{y}"
660+
628661

629662
def _from_xy_region_code(region_code: str):
630663
"""
@@ -676,6 +709,19 @@ def alchemy_expression(self):
676709
else_=path_field.lower.alchemy_expression.cast(String),
677710
)
678711

712+
def dataset_region_code(self, dataset: Dataset) -> Optional[str]:
713+
path_range = dataset.metadata.fields["sat_path"]
714+
row_range = dataset.metadata.fields["sat_row"]
715+
if row_range is None and path_range is None:
716+
return None
717+
718+
# If it's just one scene? Include it specifically
719+
if row_range[0] == row_range[1]:
720+
return f"{path_range[0]}_{row_range[1]}"
721+
# Otherwise it's a range of rows, so we say the whole path.
722+
else:
723+
return f"{path_range[0]}"
724+
679725

680726
def _region_code_field(dt: DatasetType):
681727
"""

cubedash/warmup.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import click
88
from click import secho
99

10+
from cubedash.summary import RegionInfo
1011
from datacube.index import Index
1112
from datacube.ui.click import config_option, environment_option, pass_index
1213

@@ -48,11 +49,15 @@ def find_examples_of_all_public_urls(index: Index):
4849
# TODO: Do non-region_code regions too (such as ingested data)
4950
# TODO: Actually we have no EO3 in this test data, so it does nothing.
5051
# Maybe add test data from test_eo3_support.py?
51-
if "region_code" in dataset.metadata.fields:
52-
yield f"/api/regions/{name}/{time:%Y/%m/%d}"
52+
region_info = RegionInfo.for_product(dt)
5353

54-
yield f"/region/{name}/{dataset.metadata.region_code}"
55-
yield f"/region/{name}/{dataset.metadata.region_code}/{time:%Y/%m/%d}"
54+
if region_info is not None:
55+
region_code = region_info.dataset_region_code(dataset)
56+
if region_code is not None:
57+
yield f"/api/regions/{name}/{time:%Y/%m/%d}"
58+
59+
yield f"/region/{name}/{region_code}"
60+
yield f"/region/{name}/{region_code}/{time:%Y/%m/%d}"
5661

5762
for [dataset_id] in index.datasets.search_returning(("id",), limit=10):
5863
yield f"/dataset/{dataset_id}"

integration_tests/test_summarise_data.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from cubedash._utils import alchemy_engine
1414
from cubedash.summary import SummaryStore
15+
from cubedash.summary._extents import GridRegionInfo
1516
from cubedash.summary._schema import CUBEDASH_SCHEMA
1617
from datacube.index import Index
1718
from datacube.index.hl import Doc2Dataset
@@ -427,3 +428,28 @@ def _get_product_seq_value():
427428
assert (
428429
value_after_rerun == original_value
429430
), "Product sequence was incremented without any new products being added."
431+
432+
433+
def test_computed_regions_match_those_summarised(summary_store: SummaryStore):
434+
"""
435+
The region code for all datasets should be computed identically when
436+
done in both SQL and Python.
437+
"""
438+
summary_store.refresh_all_products()
439+
440+
# Loop through all datasets in the test data to check that the the DB and Python
441+
# functions give identical region codes.
442+
for product in summary_store.index.products.get_all():
443+
region_info = GridRegionInfo.for_product(product, None)
444+
for dataset in summary_store.index.datasets.search(product=product.name):
445+
(
446+
footprint,
447+
alchemy_calculated_region_code,
448+
) = summary_store.get_dataset_footprint_region(dataset.id)
449+
450+
python_calculated_region_code = region_info.dataset_region_code(dataset)
451+
assert python_calculated_region_code == alchemy_calculated_region_code, (
452+
"Python and DB calculated region codes didn't product the same value. "
453+
f"{python_calculated_region_code!r} != {alchemy_calculated_region_code!r}"
454+
f"for product {dataset.type.name!r}, dataset {dataset!r}"
455+
)

0 commit comments

Comments
 (0)