|
3 | 3 | from collections import Counter
|
4 | 4 | from dataclasses import dataclass
|
5 | 5 | from datetime import date, datetime, timedelta
|
6 |
| -from typing import Dict, Generator, Iterable, List, Optional, Sequence, Tuple, Union |
| 6 | +from typing import ( |
| 7 | + Dict, |
| 8 | + Generator, |
| 9 | + Iterable, |
| 10 | + List, |
| 11 | + Optional, |
| 12 | + Sequence, |
| 13 | + Tuple, |
| 14 | + Union, |
| 15 | + Set, |
| 16 | +) |
7 | 17 | from uuid import UUID
|
8 | 18 |
|
9 | 19 | import dateutil.parser
|
|
14 | 24 | from geoalchemy2 import shape as geo_shape
|
15 | 25 | from geoalchemy2.shape import to_shape
|
16 | 26 | from shapely.geometry import GeometryCollection
|
17 |
| -from sqlalchemy import DDL, String, and_, func, select |
| 27 | +from sqlalchemy import DDL, String, and_, func, select, bindparam, SmallInteger, literal |
18 | 28 | from sqlalchemy.dialects import postgresql as postgres
|
19 | 29 | from sqlalchemy.dialects.postgresql import TSTZRANGE
|
20 | 30 | from sqlalchemy.engine import Engine, RowProxy
|
@@ -128,13 +138,14 @@ def init(self):
|
128 | 138 |
|
129 | 139 | (Requires `create` permissions in the db)
|
130 | 140 | """
|
| 141 | + needed_update = not _schema.is_compatible_schema(self._engine) |
| 142 | + |
| 143 | + # Add any missing schema items or patches. |
131 | 144 | _schema.create_schema(self._engine)
|
132 |
| - # Apply any needed updates. |
133 |
| - refresh_items = _schema.update_schema(self._engine) |
| 145 | + refresh_also = _schema.update_schema(self._engine) |
134 | 146 |
|
135 |
| - # Refresh relevant data summaries |
136 |
| - for refresh_item in refresh_items: |
137 |
| - _refresh_data(refresh_item, store=self) |
| 147 | + if needed_update or refresh_also: |
| 148 | + _refresh_data(refresh_also, store=self) |
138 | 149 |
|
139 | 150 | @classmethod
|
140 | 151 | def create(cls, index: Index, log=_LOG) -> "SummaryStore":
|
@@ -185,7 +196,7 @@ def refresh_product(
|
185 | 196 | return None
|
186 | 197 |
|
187 | 198 | _LOG.info("init.product", product_name=product.name)
|
188 |
| - added_count = _extents.refresh_product( |
| 199 | + change_count = _extents.refresh_product( |
189 | 200 | self.index,
|
190 | 201 | product,
|
191 | 202 | recompute_all_extents=force_dataset_extent_recompute,
|
@@ -226,7 +237,62 @@ def refresh_product(
|
226 | 237 | fixed_metadata=fixed_metadata,
|
227 | 238 | )
|
228 | 239 | )
|
229 |
| - return added_count |
| 240 | + |
| 241 | + self._refresh_product_regions(product) |
| 242 | + _LOG.info("init.regions.done", product_name=product.name) |
| 243 | + return change_count |
| 244 | + |
| 245 | + def _refresh_product_regions(self, dataset_type: DatasetType) -> int: |
| 246 | + log = _LOG.bind(product_name=dataset_type.name) |
| 247 | + log.info("refresh.regions.start") |
| 248 | + select_by_srid = ( |
| 249 | + select( |
| 250 | + [ |
| 251 | + DATASET_SPATIAL.c.dataset_type_ref, |
| 252 | + DATASET_SPATIAL.c.region_code, |
| 253 | + func.ST_Transform( |
| 254 | + func.ST_Union(DATASET_SPATIAL.c.footprint), 4326 |
| 255 | + ).label("footprint"), |
| 256 | + func.count().label("count"), |
| 257 | + ] |
| 258 | + ) |
| 259 | + .where( |
| 260 | + DATASET_SPATIAL.c.dataset_type_ref |
| 261 | + == bindparam("product_ref", dataset_type.id, type_=SmallInteger) |
| 262 | + ) |
| 263 | + .group_by("dataset_type_ref", "region_code") |
| 264 | + .cte("srid_groups") |
| 265 | + ) |
| 266 | + |
| 267 | + columns = dict( |
| 268 | + dataset_type_ref=select_by_srid.c.dataset_type_ref, |
| 269 | + region_code=func.coalesce(select_by_srid.c.region_code, ""), |
| 270 | + footprint=func.ST_SimplifyPreserveTopology( |
| 271 | + func.ST_Union(select_by_srid.c.footprint), literal(0.0001) |
| 272 | + ), |
| 273 | + count=func.sum(select_by_srid.c.count), |
| 274 | + ) |
| 275 | + query = postgres.insert(REGION).from_select( |
| 276 | + columns.keys(), |
| 277 | + select(columns.values()) |
| 278 | + .select_from(select_by_srid) |
| 279 | + .group_by("dataset_type_ref", "region_code"), |
| 280 | + ) |
| 281 | + query = query.on_conflict_do_update( |
| 282 | + index_elements=["dataset_type_ref", "region_code"], |
| 283 | + set_=dict( |
| 284 | + footprint=query.excluded.footprint, |
| 285 | + count=query.excluded.count, |
| 286 | + generation_time=func.now(), |
| 287 | + ), |
| 288 | + ) |
| 289 | + # Path(__file__).parent.joinpath("insertion.sql").write_text( |
| 290 | + # f"\n{as_sql(query)}\n" |
| 291 | + # ) |
| 292 | + changed_rows = self._engine.execute(query).rowcount |
| 293 | + |
| 294 | + log.info("refresh.regions.end", changed_regions=changed_rows) |
| 295 | + return changed_rows |
230 | 296 |
|
231 | 297 | def refresh_stats(self, concurrently=False):
|
232 | 298 | """
|
@@ -905,21 +971,24 @@ def get_dataset_footprint_region(self, dataset_id):
|
905 | 971 | )
|
906 | 972 |
|
907 | 973 |
|
908 |
| -def _refresh_data(item: PleaseRefresh, store: SummaryStore): |
| 974 | +def _refresh_data(please_refresh: Set[PleaseRefresh], store: SummaryStore): |
909 | 975 | """
|
910 |
| - Refresh the given kind of data. |
| 976 | + Refresh product information after a schema update, plus the given kind of data. |
911 | 977 | """
|
912 |
| - if item == PleaseRefresh.DATASET_EXTENTS: |
913 |
| - for dt in store.all_dataset_types(): |
914 |
| - _LOG.info("data.refreshing_extents", product=dt.name) |
915 |
| - # Skip product if it's never been summarised at all. |
916 |
| - if store.get_product_summary(dt.name) is None: |
917 |
| - continue |
918 |
| - |
919 |
| - store.refresh_product(dt, force_dataset_extent_recompute=True) |
920 |
| - _LOG.info("data.refreshing_extents.complete") |
921 |
| - else: |
922 |
| - raise NotImplementedError(f"Unknown data type to refresh_data: {item}") |
| 978 | + recompute_dataset_extents = PleaseRefresh.DATASET_EXTENTS in please_refresh |
| 979 | + |
| 980 | + for dt in store.all_dataset_types(): |
| 981 | + _LOG.info("data.refreshing_extents", product=dt.name) |
| 982 | + # Skip product if it's never been summarised at all. |
| 983 | + if store.get_product_summary(dt.name) is None: |
| 984 | + continue |
| 985 | + |
| 986 | + store.refresh_product( |
| 987 | + dt, |
| 988 | + refresh_older_than=timedelta(minutes=-1), |
| 989 | + force_dataset_extent_recompute=recompute_dataset_extents, |
| 990 | + ) |
| 991 | + _LOG.info("data.refreshing_extents.complete") |
923 | 992 |
|
924 | 993 |
|
925 | 994 | def _safe_read_date(d):
|
|
0 commit comments