From 9d07956a499cc4c8e90673f516620cdff3711bed Mon Sep 17 00:00:00 2001 From: whatnick Date: Wed, 12 Feb 2020 04:29:21 +0000 Subject: [PATCH 01/30] #197 Initial SQL to capture space time --- create_space_time.sql | 65 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 create_space_time.sql diff --git a/create_space_time.sql b/create_space_time.sql new file mode 100644 index 000000000..176ddcf21 --- /dev/null +++ b/create_space_time.sql @@ -0,0 +1,65 @@ +-- This is an exploration script for maintaining spatio-temporal extents for datasets in +-- datacube database outside of JSON blobs with a coalesce around 2 different types +-- metadata definitions (eodataset and eo3) + +-- Install Postgis extensions on public schema +create extension postgis; +-- Needed to give other schemas access to PostGIS functions installed in the public schema +ALTER DATABASE datacube +SET + search_path = public, + agdc; + +-- Handling different variants of metadata requires COALESCE +-- https://www.postgresql.org/docs/11/functions-conditional.html#FUNCTIONS-COALESCE-NVL-IFNULL + +-- Try all different locations for temporal extents and COALESCE them +-- This is the eodataset variant of the temporal extent +select + tstzrange( + (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp + ) as temporal_extent +from agdc.dataset where metadata_type_ref=1::smallint; + +-- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton +-- timestamp, some other variants use start/end timestamps. From OWS perspective temporal +-- resolution is 1 whole day +select + tstzrange( + (metadata->'properties'->>'datetime'):: timestamp, + (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' + ) as temporal_extent +from agdc.dataset where metadata_type_ref=3::smallint; + +-- Try all different locations for spatial extents and COALESCE them +-- This is eo3 spatial (Uses CEMP INSAR as a sample product) +select metadata from agdc.dataset where metadata_type_ref=3::smallint; + +-- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) +with corners as +(select id, + (metadata #> '{extent, coord, ll, lat}') as ll_lat, + (metadata #> '{extent, coord, ll, lon}') as ll_lon, + (metadata #> '{extent, coord, lr, lat}') as lr_lat, + (metadata #> '{extent, coord, lr, lon}') as lr_lon, + (metadata #> '{extent, coord, ul, lat}') as ul_lat, + (metadata #> '{extent, coord, ul, lon}') as ul_lon, + (metadata #> '{extent, coord, ur, lat}') as ur_lat, + (metadata #> '{extent, coord, ur, lon}') as ur_lon + from agdc.dataset where metadata_type_ref=1::smallint) +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, + ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent +from corners; + +-- This is optional and in native projection where present (3577, spatial reference where present) +select + ST_Transform( + ST_SetSRID( + ST_GeomFromGeoJSON( + metadata #>> '{grid_spatial,projection,valid_data}'), + 3577 + ), + 4326 + ) as spatial_extent +from agdc.dataset; \ No newline at end of file From 08dcf8373ac5d84ac12b75f807cc1528c6499f17 Mon Sep 17 00:00:00 2001 From: whatnick Date: Thu, 13 Feb 2020 04:30:49 +0000 Subject: [PATCH 02/30] #197 Add more variants and detail spatial capture --- create_space_time.sql | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index 176ddcf21..9d940fc4d 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -33,7 +33,18 @@ from agdc.dataset where metadata_type_ref=3::smallint; -- Try all different locations for spatial extents and COALESCE them -- This is eo3 spatial (Uses CEMP INSAR as a sample product) -select metadata from agdc.dataset where metadata_type_ref=3::smallint; +with ranges as +(select id, + (metadata #> '{extent, lat, begin}') as lat_begin, + (metadata #> '{extent, lat, end}') as lat_end, + (metadata #> '{extent, lon, begin}') as lon_begin, + (metadata #> '{extent, lon, end}') as lon_end + from agdc.dataset where metadata_type_ref=3::smallint) +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, + lon_begin, lat_end, lon_begin, lat_begin)::geometry +as spatial_extent +from ranges; -- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) with corners as @@ -52,14 +63,15 @@ select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent from corners; --- This is optional and in native projection where present (3577, spatial reference where present) -select +-- This is optional and in native projection where present, +-- String processing drops EPSG prefix +select id, ST_Transform( ST_SetSRID( ST_GeomFromGeoJSON( metadata #>> '{grid_spatial,projection,valid_data}'), - 3577 + substr(metadata #>> '{grid_spatial,projection,spatial_reference}',6)::integer ), 4326 - ) as spatial_extent -from agdc.dataset; \ No newline at end of file + ) as detail_spatial_extent +from agdc.dataset where metadata_type_ref=3::smallint; From 03d45e656aabe146a30539a43483becfc8f92a5f Mon Sep 17 00:00:00 2001 From: whatnick Date: Thu, 13 Feb 2020 05:45:10 +0000 Subject: [PATCH 03/30] #197 Swap coalesce with Union --- create_space_time.sql | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index 9d940fc4d..7f67b1b38 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -10,44 +10,41 @@ SET search_path = public, agdc; --- Handling different variants of metadata requires COALESCE --- https://www.postgresql.org/docs/11/functions-conditional.html#FUNCTIONS-COALESCE-NVL-IFNULL +-- Handling different variants of metadata requires UNION with WHICH clauses per metadata type +-- https://www.postgresql.org/docs/11/queries-union.html --- Try all different locations for temporal extents and COALESCE them +-- Try all different locations for temporal extents and UNION them -- This is the eodataset variant of the temporal extent select - tstzrange( + id,tstzrange( (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp ) as temporal_extent -from agdc.dataset where metadata_type_ref=1::smallint; - +from agdc.dataset where metadata_type_ref=1::smallint +UNION -- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton -- timestamp, some other variants use start/end timestamps. From OWS perspective temporal -- resolution is 1 whole day select - tstzrange( + id,tstzrange( (metadata->'properties'->>'datetime'):: timestamp, (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' ) as temporal_extent from agdc.dataset where metadata_type_ref=3::smallint; --- Try all different locations for spatial extents and COALESCE them + +-- Spatial extents per dataset (to be created as a column of the space-time table) +-- Try all different locations for spatial extents and UNION them +with -- This is eo3 spatial (Uses CEMP INSAR as a sample product) -with ranges as +ranges as (select id, (metadata #> '{extent, lat, begin}') as lat_begin, (metadata #> '{extent, lat, end}') as lat_end, (metadata #> '{extent, lon, begin}') as lon_begin, (metadata #> '{extent, lon, end}') as lon_end - from agdc.dataset where metadata_type_ref=3::smallint) -select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', - lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, - lon_begin, lat_end, lon_begin, lat_begin)::geometry -as spatial_extent -from ranges; - + from agdc.dataset where metadata_type_ref=3::smallint), -- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) -with corners as +corners as (select id, (metadata #> '{extent, coord, ll, lat}') as ll_lat, (metadata #> '{extent, coord, ll, lon}') as ll_lon, @@ -58,6 +55,12 @@ with corners as (metadata #> '{extent, coord, ur, lat}') as ur_lat, (metadata #> '{extent, coord, ur, lon}') as ur_lon from agdc.dataset where metadata_type_ref=1::smallint) +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, + lon_begin, lat_end, lon_begin, lat_begin)::geometry +as spatial_extent +from ranges +UNION select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent From c051808fac2b346a7d8554d7b0cc762692b155b6 Mon Sep 17 00:00:00 2001 From: whatnick Date: Fri, 14 Feb 2020 04:51:52 +0000 Subject: [PATCH 04/30] #197 add ARD metadata type time/space parsing --- create_space_time.sql | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index 7f67b1b38..8010eccca 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -30,6 +30,14 @@ select (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' ) as temporal_extent from agdc.dataset where metadata_type_ref=3::smallint; +-- Start/End timestamp variant product. +-- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml +select + id,tstzrange( + (metadata->'properties'->>'dtr:start_datetime'):: timestamp, + (metadata->'properties'->>'dtr:end_datetime'):: timestamp + ) as temporal_extent +from agdc.dataset where metadata_type_ref=4::smallint; -- Spatial extents per dataset (to be created as a column of the space-time table) @@ -42,7 +50,9 @@ ranges as (metadata #> '{extent, lat, end}') as lat_end, (metadata #> '{extent, lon, begin}') as lon_begin, (metadata #> '{extent, lon, end}') as lon_end - from agdc.dataset where metadata_type_ref=3::smallint), + from agdc.dataset where + metadata_type_ref=3::smallint + ), -- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) corners as (select id, @@ -77,4 +87,6 @@ select id, ), 4326 ) as detail_spatial_extent -from agdc.dataset where metadata_type_ref=3::smallint; +from agdc.dataset where +(metadata_type_ref=3::smallint or +metadata_type_ref=4::smallint) From f95ad6c502bd80af78f69fd752defb9534224ab1 Mon Sep 17 00:00:00 2001 From: whatnick Date: Wed, 19 Feb 2020 06:33:59 +0000 Subject: [PATCH 05/30] #197 More unionizing --- create_space_time.sql | 54 +++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index 8010eccca..7877ff6bc 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -29,7 +29,8 @@ select (metadata->'properties'->>'datetime'):: timestamp, (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' ) as temporal_extent -from agdc.dataset where metadata_type_ref=3::smallint; +from agdc.dataset where metadata_type_ref=3::smallint +UNION -- Start/End timestamp variant product. -- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml select @@ -74,19 +75,48 @@ UNION select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent -from corners; +from corners +UNION +select id, + ST_Transform( + ST_SetSRID( + ST_GeomFromGeoJSON( + metadata #>> '{geometry}'), + substr( + metadata #>> '{crs}',6)::integer + ), + 4326 + ) as spatial_extent + from agdc.dataset where metadata_type_ref=4::smallint; + -- This is optional and in native projection where present, -- String processing drops EPSG prefix -select id, - ST_Transform( +select + id, + ST_Transform( ST_SetSRID( - ST_GeomFromGeoJSON( + ST_GeomFromGeoJSON( metadata #>> '{grid_spatial,projection,valid_data}'), - substr(metadata #>> '{grid_spatial,projection,spatial_reference}',6)::integer - ), - 4326 - ) as detail_spatial_extent -from agdc.dataset where -(metadata_type_ref=3::smallint or -metadata_type_ref=4::smallint) + substr( + metadata #>> '{grid_spatial,projection,spatial_reference}',6)::integer + ), + 4326 + ) as detail_spatial_extent + from agdc.dataset + where + metadata_type_ref=3::smallint + +select id, + ST_Transform( + ST_SetSRID( + ST_GeomFromGeoJSON( + metadata #>> '{geometry}'), + substr( + metadata #>> '{crs}',6)::integer + ), + 4326 + ) as detail_spatial_extent + + from agdc.dataset where metadata_type_ref=4::smallint; + From 22f9942a27cff654222f6695f86fb468b172876b Mon Sep 17 00:00:00 2001 From: whatnick Date: Fri, 21 Feb 2020 06:29:19 +0000 Subject: [PATCH 06/30] #197 Support gqa_eo/eo_plus metadata type --- create_space_time.sql | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index 7877ff6bc..a81d31dac 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -19,7 +19,8 @@ select id,tstzrange( (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp ) as temporal_extent -from agdc.dataset where metadata_type_ref=1::smallint +from agdc.dataset where + metadata_type_ref=1::smallint or metadata_type_ref=5::smallint UNION -- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton -- timestamp, some other variants use start/end timestamps. From OWS perspective temporal @@ -65,7 +66,8 @@ corners as (metadata #> '{extent, coord, ul, lon}') as ul_lon, (metadata #> '{extent, coord, ur, lat}') as ur_lat, (metadata #> '{extent, coord, ur, lon}') as ur_lon - from agdc.dataset where metadata_type_ref=1::smallint) + from agdc.dataset where metadata_type_ref=1::smallint + or metadata_type_ref=5::smallint) select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, lon_begin, lat_end, lon_begin, lat_begin)::geometry @@ -120,3 +122,4 @@ select id, from agdc.dataset where metadata_type_ref=4::smallint; +select count(1),metadata_type_ref from agdc.dataset group by metadata_type_ref; \ No newline at end of file From ce5a277e2414b158d0cf708a0bfddf6c2c7950ca Mon Sep 17 00:00:00 2001 From: whatnick Date: Mon, 24 Feb 2020 04:58:28 +0000 Subject: [PATCH 07/30] #197 Use metadata table instead of harcoded numbers --- create_space_time.sql | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index a81d31dac..bddf8444a 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -14,13 +14,18 @@ SET -- https://www.postgresql.org/docs/11/queries-union.html -- Try all different locations for temporal extents and UNION them +with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +) -- This is the eodataset variant of the temporal extent select id,tstzrange( (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp ) as temporal_extent from agdc.dataset where - metadata_type_ref=1::smallint or metadata_type_ref=5::smallint + metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) UNION -- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton -- timestamp, some other variants use start/end timestamps. From OWS perspective temporal @@ -30,7 +35,7 @@ select (metadata->'properties'->>'datetime'):: timestamp, (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' ) as temporal_extent -from agdc.dataset where metadata_type_ref=3::smallint +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name='eo3') UNION -- Start/End timestamp variant product. -- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml @@ -39,12 +44,16 @@ select (metadata->'properties'->>'dtr:start_datetime'):: timestamp, (metadata->'properties'->>'dtr:end_datetime'):: timestamp ) as temporal_extent -from agdc.dataset where metadata_type_ref=4::smallint; +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')); -- Spatial extents per dataset (to be created as a column of the space-time table) -- Try all different locations for spatial extents and UNION them with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +), -- This is eo3 spatial (Uses CEMP INSAR as a sample product) ranges as (select id, @@ -53,7 +62,7 @@ ranges as (metadata #> '{extent, lon, begin}') as lon_begin, (metadata #> '{extent, lon, end}') as lon_end from agdc.dataset where - metadata_type_ref=3::smallint + metadata_type_ref in (select id from metadata_lookup where name='eo3') ), -- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) corners as @@ -66,8 +75,7 @@ corners as (metadata #> '{extent, coord, ul, lon}') as ul_lon, (metadata #> '{extent, coord, ur, lat}') as ur_lat, (metadata #> '{extent, coord, ur, lon}') as ur_lon - from agdc.dataset where metadata_type_ref=1::smallint - or metadata_type_ref=5::smallint) + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus'))) select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, lon_begin, lat_end, lon_begin, lat_begin)::geometry @@ -79,6 +87,7 @@ select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent from corners UNION +-- This is lansat_scene and landsat_l1_scene with geometries select id, ST_Transform( ST_SetSRID( @@ -89,7 +98,7 @@ select id, ), 4326 ) as spatial_extent - from agdc.dataset where metadata_type_ref=4::smallint; + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')); -- This is optional and in native projection where present, From 82d89c26cbe3dabaa4d8a050f610540423b0c330 Mon Sep 17 00:00:00 2001 From: whatnick Date: Mon, 24 Feb 2020 23:31:35 +0000 Subject: [PATCH 08/30] #197 Create x3 materialized views --- create_space_time.sql | 62 +++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index bddf8444a..ee324e34d 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -14,6 +14,8 @@ SET -- https://www.postgresql.org/docs/11/queries-union.html -- Try all different locations for temporal extents and UNION them +CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (ID, temporal_extent) +AS with -- Crib metadata to use as for string matching various types metadata_lookup as ( @@ -49,6 +51,8 @@ from agdc.dataset where metadata_type_ref in (select id from metadata_lookup whe -- Spatial extents per dataset (to be created as a column of the space-time table) -- Try all different locations for spatial extents and UNION them +CREATE MATERIALIZED VIEW IF NOT EXISTS space_view (ID, spatial_extent) +AS with -- Crib metadata to use as for string matching various types metadata_lookup as ( @@ -101,34 +105,40 @@ select id, from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')); +-- Join the above queries for space and time as CTE's into a space-time view + + -- This is optional and in native projection where present, -- String processing drops EPSG prefix -select - id, - ST_Transform( - ST_SetSRID( - ST_GeomFromGeoJSON( - metadata #>> '{grid_spatial,projection,valid_data}'), - substr( - metadata #>> '{grid_spatial,projection,spatial_reference}',6)::integer - ), - 4326 - ) as detail_spatial_extent - from agdc.dataset - where - metadata_type_ref=3::smallint +-- select +-- id, +-- ST_Transform( +-- ST_SetSRID( +-- ST_GeomFromGeoJSON( +-- metadata #>> '{grid_spatial,projection,valid_data}'), +-- substr( +-- metadata #>> '{grid_spatial,projection,spatial_reference}',6)::integer +-- ), +-- 4326 +-- ) as detail_spatial_extent +-- from agdc.dataset +-- where +-- metadata_type_ref=3::smallint -select id, - ST_Transform( - ST_SetSRID( - ST_GeomFromGeoJSON( - metadata #>> '{geometry}'), - substr( - metadata #>> '{crs}',6)::integer - ), - 4326 - ) as detail_spatial_extent +-- select id, +-- ST_Transform( +-- ST_SetSRID( +-- ST_GeomFromGeoJSON( +-- metadata #>> '{geometry}'), +-- substr( +-- metadata #>> '{crs}',6)::integer +-- ), +-- 4326 +-- ) as detail_spatial_extent +-- from agdc.dataset where metadata_type_ref=4::smallint; - from agdc.dataset where metadata_type_ref=4::smallint; +CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, spatial_extent, temporal_extent) +AS +select space_view.id, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id; -select count(1),metadata_type_ref from agdc.dataset group by metadata_type_ref; \ No newline at end of file +-- select * from space_time_view; \ No newline at end of file From 3ba6cba2e1f2bb466e286a08054052e56920cfb5 Mon Sep 17 00:00:00 2001 From: whatnick Date: Thu, 27 Feb 2020 00:31:30 +0000 Subject: [PATCH 09/30] #197 Add indexes on materialized views --- create_space_time.sql | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/create_space_time.sql b/create_space_time.sql index ee324e34d..82d790095 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -141,4 +141,16 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, spatial_extent, temp AS select space_view.id, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id; --- select * from space_time_view; \ No newline at end of file +-- select * from space_time_view; + +-- Spatial extents are indexed using GIST index for BBOX queries +-- https://postgis.net/workshops/postgis-intro/indexing.html +CREATE INDEX space_time_view_geom_idx + ON space_time_view + USING GIST (spatial_extent); + +-- Time range types can carray indexes for range lookup +-- https://www.postgresql.org/docs/11/rangetypes.html#RANGETYPES-INDEXING +CREATE INDEX space_time_view_time_idx + ON space_time_view + USING SPGIST (temporal_extent); \ No newline at end of file From ad2dc1ee892409375265742f0c5b6b504add5057 Mon Sep 17 00:00:00 2001 From: whatnick Date: Fri, 28 Feb 2020 03:43:43 +0000 Subject: [PATCH 10/30] #197 Add dataset_type_ref --- create_space_time.sql | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/create_space_time.sql b/create_space_time.sql index 82d790095..2ad28ebdd 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -10,11 +10,20 @@ SET search_path = public, agdc; +-- Get rid of already existing Materialized View +DROP INDEX space_time_view_geom_idx; +DROP INDEX space_time_view_time_idx; +DROP INDEX space_time_view_ds_idx; +DROP MATERIALIZED VIEW space_time_view; +DROP MATERIALIZED VIEW time_view; +DROP MATERIALIZED VIEW space_view; + + -- Handling different variants of metadata requires UNION with WHICH clauses per metadata type -- https://www.postgresql.org/docs/11/queries-union.html -- Try all different locations for temporal extents and UNION them -CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (ID, temporal_extent) +CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (dataset_type_ref, ID, temporal_extent) AS with -- Crib metadata to use as for string matching various types @@ -23,7 +32,7 @@ metadata_lookup as ( ) -- This is the eodataset variant of the temporal extent select - id,tstzrange( + dataset_type_ref, id,tstzrange( (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp ) as temporal_extent from agdc.dataset where @@ -33,7 +42,7 @@ UNION -- timestamp, some other variants use start/end timestamps. From OWS perspective temporal -- resolution is 1 whole day select - id,tstzrange( + dataset_type_ref, id,tstzrange( (metadata->'properties'->>'datetime'):: timestamp, (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' ) as temporal_extent @@ -42,7 +51,7 @@ UNION -- Start/End timestamp variant product. -- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml select - id,tstzrange( + dataset_type_ref, id,tstzrange( (metadata->'properties'->>'dtr:start_datetime'):: timestamp, (metadata->'properties'->>'dtr:end_datetime'):: timestamp ) as temporal_extent @@ -137,9 +146,9 @@ select id, -- ) as detail_spatial_extent -- from agdc.dataset where metadata_type_ref=4::smallint; -CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, spatial_extent, temporal_extent) +CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, dataset_type_ref, spatial_extent, temporal_extent) AS -select space_view.id, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id; +select space_view.id, dataset_type_ref, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id; -- select * from space_time_view; @@ -153,4 +162,10 @@ CREATE INDEX space_time_view_geom_idx -- https://www.postgresql.org/docs/11/rangetypes.html#RANGETYPES-INDEXING CREATE INDEX space_time_view_time_idx ON space_time_view - USING SPGIST (temporal_extent); \ No newline at end of file + USING SPGIST (temporal_extent); + +-- Create standard btree index over dataset_type_ref to ease searching by +-- https://ieftimov.com/post/postgresql-indexes-btree/ +CREATE INDEX space_time_view_ds_idx + ON space_time_view + USING BTREE(dataset_type_ref) \ No newline at end of file From d566b4ccc7028c1202c23ca1761352699e8e44d9 Mon Sep 17 00:00:00 2001 From: whatnick Date: Mon, 2 Mar 2020 02:23:19 +0000 Subject: [PATCH 11/30] #197 close off index line --- create_space_time.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/create_space_time.sql b/create_space_time.sql index 2ad28ebdd..e9dfaac43 100644 --- a/create_space_time.sql +++ b/create_space_time.sql @@ -168,4 +168,4 @@ CREATE INDEX space_time_view_time_idx -- https://ieftimov.com/post/postgresql-indexes-btree/ CREATE INDEX space_time_view_ds_idx ON space_time_view - USING BTREE(dataset_type_ref) \ No newline at end of file + USING BTREE(dataset_type_ref); \ No newline at end of file From ec9e8d726014859ce91a85bfcd53eff72a0c2c2d Mon Sep 17 00:00:00 2001 From: phaesler Date: Tue, 24 Mar 2020 11:43:20 +1100 Subject: [PATCH 12/30] Start of update_ranges refactor to use mvs. --- datacube_ows/update_ranges_2.py | 185 ++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100755 datacube_ows/update_ranges_2.py diff --git a/datacube_ows/update_ranges_2.py b/datacube_ows/update_ranges_2.py new file mode 100755 index 000000000..c1cfd4e4e --- /dev/null +++ b/datacube_ows/update_ranges_2.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +from datacube_ows.product_ranges import update_all_ranges, get_sqlconn, add_product_range, add_multiproduct_range, add_all, update_range +from datacube import Datacube +from psycopg2.sql import SQL, Identifier +import os +import click + +@click.command() +@click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") +@click.option("--role", default=None, help="Role to grant database permissions to") +@click.option("--product", default=None, help="The name of a datacube product.") +@click.option("--multiproduct", default=None, help="The name of OWS multi-product." ) +@click.option("--merge-only/--no-merge-only", default=False, help="When used with the multiproduct and calculate-extent options, the ranges for underlying datacube products are not updated.") +@click.option("--calculate-extent/--no-calculate-extent", default=True, help="no-calculate-extent uses database queries to maximise efficiency. calculate-extent calculates ranges directly and is the default.") +def main(product, multiproduct, merge_only, calculate_extent, schema, role): + """Manage datacube-ows range tables. + + A valid invocation should specify at most one of '--product', '--multiproduct' or '--schema'. + If neither of these options are specified, then the ranges for all products and multiproducts + are updated. + """ + if product and multiproduct: + print("Sorry, you specified both a product and multiproduct. One at a time, please.") + return 1 + elif schema and (product or multiproduct): + print("Sorry, cannot update the schema and ranges in the same invocation.") + return 1 + elif schema and not role: + print("Sorry, cannot update schema without specifying a role") + return 1 + + if os.environ.get("PYDEV_DEBUG"): + import pydevd_pycharm + pydevd_pycharm.settrace('172.17.0.1', port=12321, stdoutToServer=True, stderrToServer=True) + + + dc = Datacube(app="ows_update_ranges") + if schema: + print("Checking schema....") + print("Creating or replacing WMS database schema...") + create_schema(dc, role) + print("Done") + elif not calculate_extent: + if product: + print("Updating range for: ", product) + add_product_range(dc, product) + elif multiproduct: + print("Updating range for: ", multiproduct) + add_multiproduct_range(dc, multiproduct) + else: + print("Updating range for all, using SQL extent calculation") + add_all(dc) + print("Done") + else: + if product: + print("Updating range for: ", product) + p, u, i, sp, su, si = update_range(dc, product, multi=False) + if u: + print("Ranges updated for", product) + elif i: + print("New ranges inserted for", product) + else: + print("Ranges up to date for", product) + if sp or su or si: + print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) + elif multiproduct: + print("Updating range for: ", multiproduct) + p, u, i = update_range(dc, multiproduct, multi=True, follow_dependencies=not merge_only) + if u: + print("Merged ranges updated for", multiproduct) + elif i: + print("Merged ranges inserted for", multiproduct) + else: + print("Merged ranges up to date for", multiproduct) + else: + print ("Updating ranges for all layers/products") + p, u, i, sp, su, si, mp, mu, mi = update_all_ranges(dc) + print ("Updated ranges for %d existing layers/products and inserted ranges for %d new layers/products (%d existing layers/products unchanged)" % (u, i, p)) + if sp or su or si: + print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) + if mp or mu or mi: + print ("Updated ranges for %d existing multi-products and inserted ranges for %d new multi-products (%d existing multi-products unchanged)" % (su, si, sp)) + return 0 + + +def create_schema(dc, role): + commands = [ + ("Creating/replacing wms schema", "create schema if not exists wms"), + + ("Creating/replacing product ranges table", """ + create table if not exists wms.product_ranges ( + id smallint not null primary key references agdc.dataset_type (id), + + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + + dates jsonb not null, + + bboxes jsonb not null) + """), + ("Creating/replacing sub-product ranges table", """ + create table if not exists wms.sub_product_ranges ( + product_id smallint not null references agdc.dataset_type (id), + sub_product_id smallint not null, + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + dates jsonb not null, + bboxes jsonb not null, + constraint pk_sub_product_ranges primary key (product_id, sub_product_id) ) + """), + ("Creating/replacing multi-product ranges table", """ + create table if not exists wms.multiproduct_ranges ( + wms_product_name varchar(128) not null primary key, + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + dates jsonb not null, + bboxes jsonb not null) + """), + # Functions + ("Creating/replacing wms_get_min() function", """ + CREATE OR REPLACE FUNCTION wms_get_min(integer[], text) RETURNS numeric AS $$ + DECLARE + ret numeric; + ul text[] DEFAULT array_append('{extent, coord, ul}', $2); + ur text[] DEFAULT array_append('{extent, coord, ur}', $2); + ll text[] DEFAULT array_append('{extent, coord, ll}', $2); + lr text[] DEFAULT array_append('{extent, coord, lr}', $2); + BEGIN + WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = any($1) AND archived IS NULL ) + SELECT MIN(LEAST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, + (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) + INTO ret + FROM m; + RETURN ret; + END; + $$ LANGUAGE plpgsql; + """), + ("Creating/replacing wms_get_max() function", """ + CREATE OR REPLACE FUNCTION wms_get_max(integer[], text) RETURNS numeric AS $$ + DECLARE + ret numeric; + ul text[] DEFAULT array_append('{extent, coord, ul}', $2); + ur text[] DEFAULT array_append('{extent, coord, ur}', $2); + ll text[] DEFAULT array_append('{extent, coord, ll}', $2); + lr text[] DEFAULT array_append('{extent, coord, lr}', $2); + BEGIN + WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = ANY ($1) AND archived IS NULL ) + SELECT MAX(GREATEST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, + (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) + INTO ret + FROM m; + RETURN ret; + END; + $$ LANGUAGE plpgsql; + """), + ] + + conn = get_sqlconn(dc) + for msg, sql in commands: + print(msg) + conn.execute(sql) + + # Add user based on param + # use psycopg2 directly to get proper psql + # quoting on the role name identifier + print("Granting usage on schema") + q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) + with conn.connection.cursor() as psycopg2connection: + psycopg2connection.execute(q) + conn.close() + + return + + +if __name__ == '__main__': + main() + + From ada822061037477971c11357d470906a69aae418 Mon Sep 17 00:00:00 2001 From: whatnick Date: Thu, 26 Mar 2020 05:24:20 +0000 Subject: [PATCH 13/30] #197 Place sql in separate folder and add sample Signed-off-by: whatnick --- create_space_time.sql => sql/create_space_time.sql | 0 sql/use_space_time.sql | 11 +++++++++++ 2 files changed, 11 insertions(+) rename create_space_time.sql => sql/create_space_time.sql (100%) create mode 100644 sql/use_space_time.sql diff --git a/create_space_time.sql b/sql/create_space_time.sql similarity index 100% rename from create_space_time.sql rename to sql/create_space_time.sql diff --git a/sql/use_space_time.sql b/sql/use_space_time.sql new file mode 100644 index 000000000..ef1275241 --- /dev/null +++ b/sql/use_space_time.sql @@ -0,0 +1,11 @@ +--- Usage of space-time tables to obtain spatial and temporal extents of datasets +--- from indexed materialized views +select dataset_type_ref, ST_Extent(spatial_extent) as bbox, array_agg(temporal_extent) from space_time_view group by dataset_type_ref; +------------------------------------------------------------------------------------------------------------------------------------- +-----------------------------------------------------------QUERY PLAN---------------------------------------------------------------- +------------------------------------------------------------------------------------------------------------------------------------- +--HashAggregate (cost=316452.75..316453.50 rows=50 width=99) (actual time=9850.712..10340.107 rows=74 loops=1) +-- Group Key: dataset_type_ref +-- -> Seq Scan on space_time_view (cost=0.00..255246.00 rows=8160900 width=128) (actual time=0.009..2750.915 rows=8160900 loops=1) +-- Planning Time: 0.502 ms +-- Execution Time: 10364.716 ms \ No newline at end of file From 5117f0d17e57a497d9fb742b805543e5d950faad Mon Sep 17 00:00:00 2001 From: phaesler Date: Fri, 27 Mar 2020 14:51:17 +1100 Subject: [PATCH 14/30] Start new update_ranges based on mat views. --- datacube_ows/product_ranges_2.py | 331 +++++++++++++++++++++++++++++++ datacube_ows/update_ranges_2.py | 17 +- update_ranges_2.py | 4 + 3 files changed, 347 insertions(+), 5 deletions(-) create mode 100644 datacube_ows/product_ranges_2.py create mode 100644 update_ranges_2.py diff --git a/datacube_ows/product_ranges_2.py b/datacube_ows/product_ranges_2.py new file mode 100644 index 000000000..3d885a700 --- /dev/null +++ b/datacube_ows/product_ranges_2.py @@ -0,0 +1,331 @@ +#pylint: skip-file + +from __future__ import absolute_import, division, print_function + +from datetime import datetime +import datacube + +from datacube_ows.ows_configuration import get_config, OWSNamedLayer # , get_layers, ProductLayerDef +from datacube_ows.ogc_utils import local_date +from psycopg2.extras import Json +from itertools import zip_longest +import json + +from datacube_ows.utils import get_sqlconn + + +def get_crsids(cfg=None): + if not cfg: + cfg = get_config() + return cfg.published_CRSs.keys() + + +def get_crses(cfg=None): + return {crsid: datacube.utils.geometry.CRS(crsid) for crsid in get_crsids(cfg)} + + +def jsonise_bbox(bbox): + if isinstance(bbox, dict): + return bbox + else: + return { + "top": bbox.top, + "bottom": bbox.bottom, + "left": bbox.left, + "right": bbox.right, + } + + +def create_multiprod_range_entry(dc, product, crses): + conn = get_sqlconn(dc) + txn = conn.begin() + if isinstance(product, dict): + prodids = [p.id for p in product["products"]] + wms_name = product["name"] + else: + prodids = [ p.id for p in product.products ] + wms_name = product.name + + # Attempt to insert row + conn.execute(""" + INSERT INTO wms.multiproduct_ranges + (wms_product_name,lat_min,lat_max,lon_min,lon_max,dates,bboxes) + VALUES + (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) + ON CONFLICT (wms_product_name) DO NOTHING + """, + {"p_id": wms_name, "empty": Json("")}) + + # Update extents + conn.execute(""" + UPDATE wms.multiproduct_ranges + SET (lat_min,lat_max,lon_min,lon_max) = + (wms_get_min(%(p_prodids)s, 'lat'), wms_get_max(%(p_prodids)s, 'lat'), wms_get_min(%(p_prodids)s, 'lon'), wms_get_max(%(p_prodids)s, 'lon')) + WHERE wms_product_name=%(p_id)s + """, + {"p_id": wms_name, "p_prodids": prodids}) + + # Create sorted list of dates + conn.execute(""" + WITH sorted + AS (SELECT to_jsonb(array_agg(dates.d)) + AS dates + FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD') + AS d + FROM agdc.dataset + WHERE dataset_type_ref = any (%(p_prodids)s) + AND archived IS NULL + ORDER BY d) dates) + UPDATE wms.multiproduct_ranges + SET dates=sorted.dates + FROM sorted + WHERE wms_product_name=%(p_id)s + """, + {"p_id": wms_name, "p_prodids": prodids}) + + # calculate bounding boxes + results = list(conn.execute(""" + SELECT lat_min,lat_max,lon_min,lon_max + FROM wms.multiproduct_ranges + WHERE wms_product_name=%(p_id)s + """, + {"p_id": wms_name} )) + + r = results[0] + + epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") + box = datacube.utils.geometry.box( + float(r[2]), + float(r[0]), + float(r[3]), + float(r[1]), + epsg4326) + + cfg = get_config() + conn.execute(""" + UPDATE wms.multiproduct_ranges + SET bboxes = %s::jsonb + WHERE wms_product_name=%s + """, + Json({ crsid: jsonise_bbox(box.to_crs(crs).boundingbox) for crsid, crs in get_crses(cfg).items() }), + wms_name + ) + + txn.commit() + conn.close() + return + + +def create_range_entry(dc, product, crses, summary_product=False): + # NB. product is an ODC product + conn = get_sqlconn(dc) + txn = conn.begin() + prodid = product.id + + # insert empty row if one does not already exist + conn.execute(""" + INSERT INTO wms.product_ranges + (id,lat_min,lat_max,lon_min,lon_max,dates,bboxes) + VALUES + (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) + ON CONFLICT (id) DO NOTHING + """, + {"p_id": prodid, "empty": Json("")}) + + + # Update min/max lat/longs + conn.execute( + """ + UPDATE wms.product_ranges pr + SET lat_min = st_ymin(st_extent(sv.spatial_extent)), + lat_max = st_ymax(st_extent(sv.spatial_extent)), + lon_min = st_xmin(st_extent(sv.spatial_extent)), + lon_max = st_xmax(st_extent(sv.spatial_extent)) + FROM public.space_view sv + WHERE sv.dataset_type_ref=%(p_id)s + """, + {"p_id": prodid}) + + # Set default timezone + conn.execute(""" + set timezone to 'Etc/UTC' + """) + + # Experimental shit + + results = conn.execute( + """ + select dataset_type_ref, + ST_XMin(st_extent(spatial_extent)), + ST_XMax(st_extent(spatial_extent)), + ST_YMin(st_extent(spatial_extent)), + ST_YMax(st_extent(spatial_extent)), + array_agg(temporal_extent) + from space_time_view + group by dataset_type_ref + """ + ) + + for result in results: + print("Oo-ah!") + + conn.rollback() + quit() + + if summary_product: + # Loop over dates + dates = set() + + for result in conn.execute(""" + SELECT DISTINCT cast(metadata -> 'extent' ->> 'from_dt' as date) as dt + FROM agdc.dataset + WHERE dataset_type_ref = %(p_id)s + AND archived IS NULL + ORDER BY dt + """, + {"p_id": prodid}): + dates.add(result[0]) + dates = sorted(dates) + + conn.execute(""" + UPDATE wms.product_ranges + SET dates = %(dates)s + WHERE id= %(p_id)s + """, + { + "dates": Json([t.strftime("%Y-%m-%d") for t in dates]), + "p_id": prodid + } + ) + else: + # Create sorted list of dates + conn.execute(""" + WITH sorted + AS (SELECT to_jsonb(array_agg(dates.d)) + AS dates + FROM (SELECT DISTINCT + date(cast(metadata -> 'extent' ->> 'center_dt' as timestamp) AT TIME ZONE 'UTC' + + (least(to_number(metadata -> 'extent' -> 'coord' -> 'll' ->> 'lon', '9999.9999999999999999999999999999999999'), + to_number(metadata -> 'extent' -> 'coord' -> 'ul' ->> 'lon', '9999.9999999999999999999999999999999999')) + + greatest(to_number(metadata -> 'extent' -> 'coord' -> 'lr' ->> 'lon', '9999.9999999999999999999999999999999999'), + to_number(metadata -> 'extent' -> 'coord' -> 'ur' ->> 'lon', '9999.9999999999999999999999999999999999'))) / 30.0 * interval '1 hour') + AS d + FROM agdc.dataset + WHERE dataset_type_ref=%(p_id)s + AND archived IS NULL + ORDER BY d) dates) + UPDATE wms.product_ranges + SET dates=sorted.dates + FROM sorted + WHERE id=%(p_id)s + """, + {"p_id": prodid}) + + # calculate bounding boxes + results = list(conn.execute(""" + SELECT lat_min,lat_max,lon_min,lon_max + FROM wms.product_ranges + WHERE id=%s + """, + prodid)) + + r = results[0] + + epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") + box = datacube.utils.geometry.box( + float(r[2]), + float(r[0]), + float(r[3]), + float(r[1]), + epsg4326) + + conn.execute(""" + UPDATE wms.product_ranges + SET bboxes = %s::jsonb + WHERE id=%s + """, + Json( + {crsid: {"top": box.to_crs(crs).boundingbox.top, + "bottom": box.to_crs(crs).boundingbox.bottom, + "left": box.to_crs(crs).boundingbox.left, + "right": box.to_crs(crs).boundingbox.right} + for crsid, crs in crses.items() + } + ), + product.id) + + txn.commit() + conn.close() + + +def check_datasets_exist(dc, product_name): + conn = get_sqlconn(dc) + + results = conn.execute(""" + SELECT COUNT(*) + FROM agdc.dataset ds, agdc.dataset_type p + WHERE ds.archived IS NULL + AND ds.dataset_type_ref = p.id + AND p.name = %s""", + product_name) + + conn.close() + + return list(results)[0][0] > 0 + + +def add_product_range(dc, product): + if isinstance(product, str): + product_name = product + dc_product = dc.index.products.get_by_name(product) + else: + product_name = product.name + dc_product = product + + ows_product = get_config().native_product_index.get(product_name) + if ows_product: + summary_product = not ows_product.is_raw_time_res + else: + summary_product = False + + assert dc_product is not None + + if check_datasets_exist(dc, product_name): + create_range_entry(dc, dc_product, get_crses(), summary_product) + else: + print("Could not find any datasets for: ", product_name) + + +def add_multiproduct_range(dc, product, follow_dependencies=True): + if isinstance(product, str): + product = get_config().product_index.get(product) + + assert product is not None + assert product.multi_product + + if follow_dependencies: + for product_name in product.product_names: + dc_prod = dc.index.products.get_by_name(product_name) + if not check_datasets_exist(dc, product_name): + print("Could not find any datasets for: ", product_name) + else: + add_product_range(dc, product_name) + + # Actually merge and store! + create_multiprod_range_entry(dc, product, get_crses()) + + +def add_all(dc): + multi_products = set() + for product_cfg in get_config().product_index.values(): + product_name = product_cfg.product_name + if product_cfg.multi_product: + multi_products.add(product_cfg) + else: + print("Adding range for:", product_name) + add_product_range(dc, product_name) + + for p in multi_products: + print("Adding multiproduct range for:", p.name) + add_multiproduct_range(dc, p, follow_dependencies=False) + diff --git a/datacube_ows/update_ranges_2.py b/datacube_ows/update_ranges_2.py index c1cfd4e4e..ba62415ba 100755 --- a/datacube_ows/update_ranges_2.py +++ b/datacube_ows/update_ranges_2.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 -from datacube_ows.product_ranges import update_all_ranges, get_sqlconn, add_product_range, add_multiproduct_range, add_all, update_range +from datacube_ows.product_ranges import update_all_ranges, update_range +from datacube_ows.product_ranges_2 import get_sqlconn, add_product_range, add_multiproduct_range, add_all from datacube import Datacube from psycopg2.sql import SQL, Identifier import os @@ -11,8 +12,8 @@ @click.option("--role", default=None, help="Role to grant database permissions to") @click.option("--product", default=None, help="The name of a datacube product.") @click.option("--multiproduct", default=None, help="The name of OWS multi-product." ) -@click.option("--merge-only/--no-merge-only", default=False, help="When used with the multiproduct and calculate-extent options, the ranges for underlying datacube products are not updated.") -@click.option("--calculate-extent/--no-calculate-extent", default=True, help="no-calculate-extent uses database queries to maximise efficiency. calculate-extent calculates ranges directly and is the default.") +@click.option("--merge-only/--no-merge-only", default=False, help="When used with the multiproduct options, the ranges for underlying datacube products are not updated.") +@click.option("--calculate-extent/--no-calculate-extent", default=False, help="no-calculate-extent uses database queries against the extent materialised views to maximise efficiency and is the default. calculate-extent calculates ranges directly.") def main(product, multiproduct, merge_only, calculate_extent, schema, role): """Manage datacube-ows range tables. @@ -29,6 +30,9 @@ def main(product, multiproduct, merge_only, calculate_extent, schema, role): elif schema and not role: print("Sorry, cannot update schema without specifying a role") return 1 + elif role and not schema: + print("Sorry, role only makes sense for updating the schema") + return 1 if os.environ.get("PYDEV_DEBUG"): import pydevd_pycharm @@ -42,6 +46,7 @@ def main(product, multiproduct, merge_only, calculate_extent, schema, role): create_schema(dc, role) print("Done") elif not calculate_extent: + print("Deriving extents from materialised views") if product: print("Updating range for: ", product) add_product_range(dc, product) @@ -49,10 +54,11 @@ def main(product, multiproduct, merge_only, calculate_extent, schema, role): print("Updating range for: ", multiproduct) add_multiproduct_range(dc, multiproduct) else: - print("Updating range for all, using SQL extent calculation") + print("Updating range for all configured products") add_all(dc) - print("Done") + print("Done") else: + print("Calculating extents manually - this may take a long time") if product: print("Updating range for: ", product) p, u, i, sp, su, si = update_range(dc, product, multi=False) @@ -81,6 +87,7 @@ def main(product, multiproduct, merge_only, calculate_extent, schema, role): print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) if mp or mu or mi: print ("Updated ranges for %d existing multi-products and inserted ranges for %d new multi-products (%d existing multi-products unchanged)" % (su, si, sp)) + print("Done") return 0 diff --git a/update_ranges_2.py b/update_ranges_2.py new file mode 100644 index 000000000..cf68645f4 --- /dev/null +++ b/update_ranges_2.py @@ -0,0 +1,4 @@ +from datacube_ows.update_ranges_2 import main + +if __name__ == '__main__': + main() From ba2e14f2f5a710c1be90532dcad7771ff6f590d5 Mon Sep 17 00:00:00 2001 From: phaesler Date: Mon, 30 Mar 2020 09:30:50 +1100 Subject: [PATCH 15/30] Lat/lon limits from stv. --- datacube_ows/product_ranges_2.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/datacube_ows/product_ranges_2.py b/datacube_ows/product_ranges_2.py index 3d885a700..d73a91121 100644 --- a/datacube_ows/product_ranges_2.py +++ b/datacube_ows/product_ranges_2.py @@ -135,16 +135,19 @@ def create_range_entry(dc, product, crses, summary_product=False): # Update min/max lat/longs conn.execute( - """ - UPDATE wms.product_ranges pr - SET lat_min = st_ymin(st_extent(sv.spatial_extent)), - lat_max = st_ymax(st_extent(sv.spatial_extent)), - lon_min = st_xmin(st_extent(sv.spatial_extent)), - lon_max = st_xmax(st_extent(sv.spatial_extent)) - FROM public.space_view sv - WHERE sv.dataset_type_ref=%(p_id)s - """, - {"p_id": prodid}) + """ + UPDATE wms.product_ranges pr + SET lat_min = st_ymin(subq.bbox), + lat_max = st_ymax(subq.bbox), + lon_min = st_xmin(subq.bbox), + lon_max = st_xmax(subq.bbox) + FROM ( + SELECT st_extent(stv.spatial_extent) as bbox + FROM public.space_time_view stv + WHERE stv.dataset_type_ref = %(p_id)s + ) as subq + """, + {"p_id": prodid}) # Set default timezone conn.execute(""" @@ -161,15 +164,16 @@ def create_range_entry(dc, product, crses, summary_product=False): ST_YMin(st_extent(spatial_extent)), ST_YMax(st_extent(spatial_extent)), array_agg(temporal_extent) - from space_time_view + from space_time_view sv group by dataset_type_ref """ ) for result in results: + # array_agg comes through as list of DateRanges with upper and lower datetimes. print("Oo-ah!") - conn.rollback() + txn.rollback() quit() if summary_product: From 95ef8ed581e7e57bf722047639376b481eb18c01 Mon Sep 17 00:00:00 2001 From: phaesler Date: Tue, 31 Mar 2020 09:36:11 +1100 Subject: [PATCH 16/30] Work in progress --- datacube_ows/product_ranges_2.py | 65 +++++++++++++++++--------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/datacube_ows/product_ranges_2.py b/datacube_ows/product_ranges_2.py index d73a91121..a9cf0344a 100644 --- a/datacube_ows/product_ranges_2.py +++ b/datacube_ows/product_ranges_2.py @@ -154,41 +154,24 @@ def create_range_entry(dc, product, crses, summary_product=False): set timezone to 'Etc/UTC' """) - # Experimental shit - - results = conn.execute( - """ - select dataset_type_ref, - ST_XMin(st_extent(spatial_extent)), - ST_XMax(st_extent(spatial_extent)), - ST_YMin(st_extent(spatial_extent)), - ST_YMax(st_extent(spatial_extent)), - array_agg(temporal_extent) - from space_time_view sv - group by dataset_type_ref - """ - ) - - for result in results: - # array_agg comes through as list of DateRanges with upper and lower datetimes. - print("Oo-ah!") - - txn.rollback() - quit() if summary_product: # Loop over dates dates = set() - for result in conn.execute(""" - SELECT DISTINCT cast(metadata -> 'extent' ->> 'from_dt' as date) as dt - FROM agdc.dataset - WHERE dataset_type_ref = %(p_id)s - AND archived IS NULL - ORDER BY dt - """, - {"p_id": prodid}): - dates.add(result[0]) + results = conn.execute( + """ + select + array_agg(temporal_extent) + from public.space_time_view + WHERE dataset_type_ref = %(p_id)s + """, + {"p_id": prodid} + ) + for result in results: + for dat_ran in result[0]: + dates.add(dat_ran.lower) + dates = sorted(dates) conn.execute(""" @@ -225,6 +208,28 @@ def create_range_entry(dc, product, crses, summary_product=False): """, {"p_id": prodid}) +# Experimental shit + + results = conn.execute( + """ + select dataset_type_ref, + ST_XMin(st_extent(spatial_extent)), + ST_XMax(st_extent(spatial_extent)), + ST_YMin(st_extent(spatial_extent)), + ST_YMax(st_extent(spatial_extent)), + array_agg(temporal_extent) + from space_time_view sv + group by dataset_type_ref + """ + ) + + for result in results: + # array_agg comes through as list of DateRanges with upper and lower datetimes. + print("Oo-ah!") + + txn.rollback() + quit() + # calculate bounding boxes results = list(conn.execute(""" SELECT lat_min,lat_max,lon_min,lon_max From bb09ab33a153368c12123b435e20d64b2b150b46 Mon Sep 17 00:00:00 2001 From: whatnick Date: Tue, 31 Mar 2020 04:05:19 +0000 Subject: [PATCH 17/30] #197 add a microsecond workaround Signed-off-by: whatnick --- sql/create_space_time.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/create_space_time.sql b/sql/create_space_time.sql index e9dfaac43..21374a193 100644 --- a/sql/create_space_time.sql +++ b/sql/create_space_time.sql @@ -34,7 +34,7 @@ metadata_lookup as ( select dataset_type_ref, id,tstzrange( (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp - ) as temporal_extent + ) + interval '1 microsecond' as temporal_extent from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) UNION From 834273134aa419f82d5e302823564b69ca2713ba Mon Sep 17 00:00:00 2001 From: Tisham Dhar Date: Wed, 1 Apr 2020 11:28:39 +1100 Subject: [PATCH 18/30] Fix plus location --- sql/create_space_time.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/create_space_time.sql b/sql/create_space_time.sql index 21374a193..d908fd772 100644 --- a/sql/create_space_time.sql +++ b/sql/create_space_time.sql @@ -33,8 +33,8 @@ metadata_lookup as ( -- This is the eodataset variant of the temporal extent select dataset_type_ref, id,tstzrange( - (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp - ) + interval '1 microsecond' as temporal_extent + (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp + interval '1 microsecond' + ) as temporal_extent from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) UNION @@ -168,4 +168,4 @@ CREATE INDEX space_time_view_time_idx -- https://ieftimov.com/post/postgresql-indexes-btree/ CREATE INDEX space_time_view_ds_idx ON space_time_view - USING BTREE(dataset_type_ref); \ No newline at end of file + USING BTREE(dataset_type_ref); From 0a8885c6a6b44e3e85f5e77f8c7ccacdd53f4739 Mon Sep 17 00:00:00 2001 From: phaesler Date: Wed, 15 Apr 2020 09:29:59 +1000 Subject: [PATCH 19/30] Work in progress. --- datacube_ows/ows_configuration.py | 11 ++- datacube_ows/product_ranges_2.py | 73 +++++++++----------- datacube_ows/templates/wcs_capabilities.xml | 2 +- datacube_ows/templates/wms_capabilities.xml | 2 + datacube_ows/templates/wmts_capabilities.xml | 2 +- sql/create_space_time.sql | 1 + 6 files changed, 47 insertions(+), 44 deletions(-) diff --git a/datacube_ows/ows_configuration.py b/datacube_ows/ows_configuration.py index cd413594a..7513f7a63 100644 --- a/datacube_ows/ows_configuration.py +++ b/datacube_ows/ows_configuration.py @@ -297,6 +297,7 @@ class OWSNamedLayer(OWSLayer): def __init__(self, cfg, global_cfg, dc, parent_layer=None): super().__init__(cfg, global_cfg, dc, parent_layer) self.name = cfg["name"] + self.hide = False try: self.parse_product_names(cfg) self.products = [] @@ -472,7 +473,9 @@ def parse_wcs(self, cfg, dc): try: native_bounding_box = self.bboxes[self.native_CRS] except KeyError: - raise ConfigException("No bounding box in ranges for native CRS %s - rerun update_ranges.py" % self.native_CRS) + print("Layer: %s No bounding box in ranges for native CRS %s - rerun update_ranges.py" % (self.name, self.native_CRS)) + self.hide = True + return self.origin_x = native_bounding_box["left"] self.origin_y = native_bounding_box["bottom"] try: @@ -512,6 +515,7 @@ def force_range_update(self, ext_dc=None): dc = ext_dc else: dc = get_cube() + self.hide = False self._ranges = None try: from datacube_ows.product_ranges import get_ranges @@ -520,8 +524,9 @@ def force_range_update(self, ext_dc=None): raise Exception("Null product range") self.bboxes = self.extract_bboxes() except Exception as a: - range_failure = "get_ranges failed for layer %s: %s" % (self.name, str(a)) - raise ConfigException(range_failure) + print("get_ranges failed for layer %s: %s" % (self.name, str(a))) + self.hide = True + self.bboxes = {} finally: if not ext_dc: release_cube(dc) diff --git a/datacube_ows/product_ranges_2.py b/datacube_ows/product_ranges_2.py index a9cf0344a..ccad18e6f 100644 --- a/datacube_ows/product_ranges_2.py +++ b/datacube_ows/product_ranges_2.py @@ -59,29 +59,45 @@ def create_multiprod_range_entry(dc, product, crses): # Update extents conn.execute(""" UPDATE wms.multiproduct_ranges - SET (lat_min,lat_max,lon_min,lon_max) = - (wms_get_min(%(p_prodids)s, 'lat'), wms_get_max(%(p_prodids)s, 'lat'), wms_get_min(%(p_prodids)s, 'lon'), wms_get_max(%(p_prodids)s, 'lon')) - WHERE wms_product_name=%(p_id)s + SET lat_min = subq.lat_min, + lat_max = subq.lat_max, + lon_min = subq.lon_min, + lon_max = subq.lon_max + FROM ( + select min(lat_min) as lat_min, + max(lat_max) as lat_max, + min(lon_min) as lon_min, + max(lon_max) as lon_max + from wms.product_ranges + where id in %(p_prodid)s + ) as subq + WHERE wms_product_name = %(p_id)s """, {"p_id": wms_name, "p_prodids": prodids}) # Create sorted list of dates + results = conn.execute( + """ + SELECT dates + FROM wms.product_ranges + WHERE id in %(p_prodid)s + """, {"p_prodids": prodids} + ) + dates = set() + for r in results: + for d in r[0]: + dates.add(r) + dates = sorted(dates) conn.execute(""" - WITH sorted - AS (SELECT to_jsonb(array_agg(dates.d)) - AS dates - FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD') - AS d - FROM agdc.dataset - WHERE dataset_type_ref = any (%(p_prodids)s) - AND archived IS NULL - ORDER BY d) dates) - UPDATE wms.multiproduct_ranges - SET dates=sorted.dates - FROM sorted - WHERE wms_product_name=%(p_id)s - """, - {"p_id": wms_name, "p_prodids": prodids}) + UPDATE wms.multiproduct_ranges + SET dates = %(dates)s + WHERE wms_product_name= %(p_id)s + """, + { + "dates": Json([t.strftime("%Y-%m-%d") for t in dates]), + "p_id": wms_name + } + ) # calculate bounding boxes results = list(conn.execute(""" @@ -208,27 +224,6 @@ def create_range_entry(dc, product, crses, summary_product=False): """, {"p_id": prodid}) -# Experimental shit - - results = conn.execute( - """ - select dataset_type_ref, - ST_XMin(st_extent(spatial_extent)), - ST_XMax(st_extent(spatial_extent)), - ST_YMin(st_extent(spatial_extent)), - ST_YMax(st_extent(spatial_extent)), - array_agg(temporal_extent) - from space_time_view sv - group by dataset_type_ref - """ - ) - - for result in results: - # array_agg comes through as list of DateRanges with upper and lower datetimes. - print("Oo-ah!") - - txn.rollback() - quit() # calculate bounding boxes results = list(conn.execute(""" diff --git a/datacube_ows/templates/wcs_capabilities.xml b/datacube_ows/templates/wcs_capabilities.xml index 472f6ab27..b6b29f5c7 100644 --- a/datacube_ows/templates/wcs_capabilities.xml +++ b/datacube_ows/templates/wcs_capabilities.xml @@ -112,7 +112,7 @@ xsi:schemaLocation="http://www.opengis.net/wcs http://schemas.opengis.net/wcs/1. {% if show_content_metadata %} {% for product in cfg.product_index.values() %} - {% if product.wcs %} + {% if product.wcs and not product.hide %} {% set product_ranges = product.ranges %} {{ product.definition.description }} diff --git a/datacube_ows/templates/wms_capabilities.xml b/datacube_ows/templates/wms_capabilities.xml index 817c76569..9f009a1ce 100644 --- a/datacube_ows/templates/wms_capabilities.xml +++ b/datacube_ows/templates/wms_capabilities.xml @@ -35,6 +35,7 @@ {% endif %} {%- endmacro %} {% macro render_named_layer(lyr) -%} + {% if not lyr.hide %} {% set lyr_ranges = lyr.ranges %} {{ lyr.name }} @@ -129,6 +130,7 @@ {% endfor %} {# TODO: Layers for Subproducts #} + {% endif %} {%- endmacro %} 0 +def add_ranges(dc, product_names, summary=False, merge_only=False): + odc_products = {} + ows_multiproducts = [] + for pname in product_names: + dc_product = None + ows_product = get_config().product_index.get(pname) + if not ows_product: + ows_product = get_config().native_product_index.get(pname) + if ows_product: + for dc_pname in ows_product.product_names: + if dc_pname in odc_products: + odc_products[dc_pname]["ows"].append(ows_product) + else: + odc_products[dc_pname] = { "ows": [ows_product]} + print("OWS Layer %s maps to ODC Product(s): %s" % ( + ows_product.name, + repr(ows_product.product_names) + )) + if ows_product.multi_product: + ows_multiproducts.append(ows_product) + if not ows_product: + dc_product = dc.index.products.get_by_name(pname) + if dc_product: + print("ODC Layer: %s" % pname) + if pname in odc_products: + odc_products[pname]["ows"].append(None) + else: + odc_products[pname] = { "ows": [None]} + else: + print("Unrecognised product name:", pname) + continue -def add_product_range(dc, product): - if isinstance(product, str): - product_name = product - dc_product = dc.index.products.get_by_name(product) - else: - product_name = product.name - dc_product = product - - ows_product = get_config().native_product_index.get(product_name) - if ows_product: - summary_product = not ows_product.is_raw_time_res - else: - summary_product = False - - assert dc_product is not None - - if check_datasets_exist(dc, product_name): - create_range_entry(dc, dc_product, get_crses(), summary_product) + if ows_multiproducts and merge_only: + print("Merge-only: Skipping range update of products:", repr(list(odc_products.keys()))) else: - print("Could not find any datasets for: ", product_name) - - -def add_multiproduct_range(dc, product, follow_dependencies=True): - if isinstance(product, str): - product = get_config().product_index.get(product) - - assert product is not None - assert product.multi_product - - if follow_dependencies: - for product_name in product.product_names: - dc_prod = dc.index.products.get_by_name(product_name) - if not check_datasets_exist(dc, product_name): - print("Could not find any datasets for: ", product_name) + for pname, ows_prods in odc_products.items(): + dc_product = dc.index.products.get_by_name(pname) + if check_datasets_exist(dc, dc_product.name): + prod_summary = summary + for ows_prod in ows_prods["ows"]: + if ows_prod: + prod_summary = not ows_prod.is_raw_time_res + break + create_range_entry(dc, dc_product, get_crses(), prod_summary) else: - add_product_range(dc, product_name) - - # Actually merge and store! - create_multiprod_range_entry(dc, product, get_crses()) + print("Could not find any datasets for: ", pname) + for mp in ows_multiproducts: + create_multiprod_range_entry(dc, mp, get_crses()) -def add_all(dc): - multi_products = set() - for product_cfg in get_config().product_index.values(): - product_name = product_cfg.product_name - if product_cfg.multi_product: - multi_products.add(product_cfg) - else: - print("Adding range for:", product_name) - add_product_range(dc, product_name) + print("Done.") - for p in multi_products: - print("Adding multiproduct range for:", p.name) - add_multiproduct_range(dc, p, follow_dependencies=False) diff --git a/datacube_ows/update_ranges_2.py b/datacube_ows/update_ranges_2.py index ba62415ba..405d8b103 100755 --- a/datacube_ows/update_ranges_2.py +++ b/datacube_ows/update_ranges_2.py @@ -1,32 +1,42 @@ #!/usr/bin/env python3 from datacube_ows.product_ranges import update_all_ranges, update_range -from datacube_ows.product_ranges_2 import get_sqlconn, add_product_range, add_multiproduct_range, add_all +from datacube_ows.product_ranges_2 import get_sqlconn, add_ranges from datacube import Datacube from psycopg2.sql import SQL, Identifier +from datacube_ows.ows_configuration import get_config import os import click @click.command() +@click.option("--views", is_flag=True, default=False, help="Create or update the ODC spatio-temporal materialised views.") @click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") @click.option("--role", default=None, help="Role to grant database permissions to") -@click.option("--product", default=None, help="The name of a datacube product.") -@click.option("--multiproduct", default=None, help="The name of OWS multi-product." ) -@click.option("--merge-only/--no-merge-only", default=False, help="When used with the multiproduct options, the ranges for underlying datacube products are not updated.") -@click.option("--calculate-extent/--no-calculate-extent", default=False, help="no-calculate-extent uses database queries against the extent materialised views to maximise efficiency and is the default. calculate-extent calculates ranges directly.") -def main(product, multiproduct, merge_only, calculate_extent, schema, role): +@click.option("--summary", is_flag=True, default=False, help="Treat any named ODC products with no corresponding configured OWS Layer as summary products" ) +@click.option("--merge-only/--no-merge-only", default=False, help="When used with a multiproduct layer, the ranges for underlying datacube products are not updated.") +@click.argument("products", nargs=-1) +def main(products, merge_only, summary, schema, views, role): """Manage datacube-ows range tables. - A valid invocation should specify at most one of '--product', '--multiproduct' or '--schema'. - If neither of these options are specified, then the ranges for all products and multiproducts - are updated. + Valid invocations: + + * Some combination of the --views and --schema flags (and no PRODUCTS). + (Perform the specified database updates) + + * One of more OWS or ODC product names + (Update ranges for the specified PRODUCTS + + * No PRODUCTS (and neither the --views nor --schema flags) + (Update ranges for all configured OWS products. + + Uses the DATACUBE_OWS_CFG environment variable to find the OWS config file. """ - if product and multiproduct: - print("Sorry, you specified both a product and multiproduct. One at a time, please.") - return 1 - elif schema and (product or multiproduct): + if schema and products: print("Sorry, cannot update the schema and ranges in the same invocation.") return 1 + elif views and products: + print("Sorry, cannot update the materialised views and ranges in the same invocation.") + return 1 elif schema and not role: print("Sorry, cannot update schema without specifying a role") return 1 @@ -38,59 +48,184 @@ def main(product, multiproduct, merge_only, calculate_extent, schema, role): import pydevd_pycharm pydevd_pycharm.settrace('172.17.0.1', port=12321, stdoutToServer=True, stderrToServer=True) - dc = Datacube(app="ows_update_ranges") - if schema: - print("Checking schema....") - print("Creating or replacing WMS database schema...") - create_schema(dc, role) - print("Done") - elif not calculate_extent: - print("Deriving extents from materialised views") - if product: - print("Updating range for: ", product) - add_product_range(dc, product) - elif multiproduct: - print("Updating range for: ", multiproduct) - add_multiproduct_range(dc, multiproduct) - else: - print("Updating range for all configured products") - add_all(dc) - print("Done") - else: - print("Calculating extents manually - this may take a long time") - if product: - print("Updating range for: ", product) - p, u, i, sp, su, si = update_range(dc, product, multi=False) - if u: - print("Ranges updated for", product) - elif i: - print("New ranges inserted for", product) - else: - print("Ranges up to date for", product) - if sp or su or si: - print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) - elif multiproduct: - print("Updating range for: ", multiproduct) - p, u, i = update_range(dc, multiproduct, multi=True, follow_dependencies=not merge_only) - if u: - print("Merged ranges updated for", multiproduct) - elif i: - print("Merged ranges inserted for", multiproduct) - else: - print("Merged ranges up to date for", multiproduct) - else: - print ("Updating ranges for all layers/products") - p, u, i, sp, su, si, mp, mu, mi = update_all_ranges(dc) - print ("Updated ranges for %d existing layers/products and inserted ranges for %d new layers/products (%d existing layers/products unchanged)" % (u, i, p)) - if sp or su or si: - print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) - if mp or mu or mi: - print ("Updated ranges for %d existing multi-products and inserted ranges for %d new multi-products (%d existing multi-products unchanged)" % (su, si, sp)) - print("Done") + if schema or views: + if schema: + print("Checking schema....") + print("Creating or replacing WMS database schema...") + create_schema(dc, role) + print("Done") + if views: + print("Recalculating materialised views...") + create_views(dc) + print("Done") + return 0 + + print("Deriving extents from materialised views") + if not products: + products = get_config().product_index.keys() + add_ranges(dc, products, summary, merge_only) return 0 +def create_views(dc): + commands = [ + ("Installing Postgis extensions on public schema", + "create extension if not exists postgis"), + ("Giving other schemas access to PostGIS functions installed in the public schema", + """ALTER DATABASE datacube + SET + search_path = public, + agdc + """), + ("Dropping already existing Materialized View Index 1/3", + "DROP INDEX IF EXISTS space_time_view_geom_idx"), + ("Dropping already existing Materialized View Index 2/3", + "DROP INDEX IF EXISTS space_time_view_time_idx"), + ("Dropping already existing Materialized View Index 3/3", + "DROP INDEX IF EXISTS space_time_view_ds_idx"), + ("Dropping already existing Materialized View 1/3", + "DROP MATERIALIZED VIEW IF EXISTS space_time_view"), + ("Dropping already existing Materialized View 2/3", + "DROP MATERIALIZED VIEW IF EXISTS time_view"), + ("Dropping already existing Materialized View 3/3", + "DROP MATERIALIZED VIEW IF EXISTS space_view"), + ("Setting default timezone to UTC", + "set timezone to 'Etc/UTC'"), + +# Handling different variants of metadata requires UNION with WHICH clauses per metadata type +# https://www.postgresql.org/docs/11/queries-union.html + +# Try all different locations for temporal extents and UNION them + ("Creating TIME Materialised View", + """ +CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (dataset_type_ref, ID, temporal_extent) +AS +with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +) +-- This is the eodataset variant of the temporal extent +select + dataset_type_ref, id,tstzrange( + (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp + interval '1 microsecond' + ) as temporal_extent +from agdc.dataset where + metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) +UNION +-- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton +-- timestamp, some other variants use start/end timestamps. From OWS perspective temporal +-- resolution is 1 whole day +select + dataset_type_ref, id,tstzrange( + (metadata->'properties'->>'datetime'):: timestamp, + (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' + ) as temporal_extent +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name='eo3') +UNION +-- Start/End timestamp variant product. +-- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml +select + dataset_type_ref, id,tstzrange( + (metadata->'properties'->>'dtr:start_datetime'):: timestamp, + (metadata->'properties'->>'dtr:end_datetime'):: timestamp + ) as temporal_extent +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) +"""), + # Spatial extents per dataset (to be created as a column of the space-time table) + # Try all different locations for spatial extents and UNION them + ("Creating SPACE Materialised View (Slowest step!)", +""" +CREATE MATERIALIZED VIEW IF NOT EXISTS space_view (ID, spatial_extent) +AS +with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +), +-- This is eo3 spatial (Uses CEMP INSAR as a sample product) +ranges as +(select id, + (metadata #> '{extent, lat, begin}') as lat_begin, + (metadata #> '{extent, lat, end}') as lat_end, + (metadata #> '{extent, lon, begin}') as lon_begin, + (metadata #> '{extent, lon, end}') as lon_end + from agdc.dataset where + metadata_type_ref in (select id from metadata_lookup where name='eo3') + ), +-- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) +corners as +(select id, + (metadata #> '{extent, coord, ll, lat}') as ll_lat, + (metadata #> '{extent, coord, ll, lon}') as ll_lon, + (metadata #> '{extent, coord, lr, lat}') as lr_lat, + (metadata #> '{extent, coord, lr, lon}') as lr_lon, + (metadata #> '{extent, coord, ul, lat}') as ul_lat, + (metadata #> '{extent, coord, ul, lon}') as ul_lon, + (metadata #> '{extent, coord, ur, lat}') as ur_lat, + (metadata #> '{extent, coord, ur, lon}') as ur_lon + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus'))) +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, + lon_begin, lat_end, lon_begin, lat_begin)::geometry +as spatial_extent +from ranges +UNION +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, + ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent +from corners +UNION +-- This is lansat_scene and landsat_l1_scene with geometries +select id, + ST_Transform( + ST_SetSRID( + ST_GeomFromGeoJSON( + metadata #>> '{geometry}'), + substr( + metadata #>> '{crs}',6)::integer + ), + 4326 + ) as spatial_extent + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) + """, True), +# Join the above queries for space and time as CTE's into a space-time view + + ("Creating combined SPACE-TIME Materialised View", + """ +CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, dataset_type_ref, spatial_extent, temporal_extent) +AS +select space_view.id, dataset_type_ref, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id + """), + +# Spatial extents are indexed using GIST index for BBOX queries +# https://postgis.net/workshops/postgis-intro/indexing.html + ("Creating Materialised View Index 1/3", """ +CREATE INDEX space_time_view_geom_idx + ON space_time_view + USING GIST (spatial_extent) + """), + +# Time range types can carray indexes for range lookup +# https://www.postgresql.org/docs/11/rangetypes.html#RANGETYPES-INDEXING + ("Creating Materialised View Index 2/3", """ + CREATE INDEX space_time_view_time_idx + ON space_time_view + USING SPGIST (temporal_extent) + """), + +# Create standard btree index over dataset_type_ref to ease searching by +# https://ieftimov.com/post/postgresql-indexes-btree/ + ("Creating Materialised View Index 3/3", """ + CREATE INDEX space_time_view_ds_idx + ON space_time_view + USING BTREE(dataset_type_ref) + """), + + ] + run_sql(dc, commands) + def create_schema(dc, role): commands = [ ("Creating/replacing wms schema", "create schema if not exists wms"), @@ -167,20 +302,35 @@ def create_schema(dc, role): END; $$ LANGUAGE plpgsql; """), + ("""Granting usage on schema""", + "GRANT USAGE ON SCHEMA wms TO %s" % role + ) ] + run_sql(dc, commands) +def run_sql(dc, commands): conn = get_sqlconn(dc) - for msg, sql in commands: + for cmd_blob in commands: + if len(cmd_blob) == 2: + msg, sql = cmd_blob + override = False + else: + msg, sql, override = cmd_blob print(msg) - conn.execute(sql) + if override: + q = SQL(sql) + with conn.connection.cursor() as psycopg2connection: + psycopg2connection.execute(q) + else: + conn.execute(sql) # Add user based on param # use psycopg2 directly to get proper psql # quoting on the role name identifier - print("Granting usage on schema") - q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) - with conn.connection.cursor() as psycopg2connection: - psycopg2connection.execute(q) + # print("Granting usage on schema") + # q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) + # with conn.connection.cursor() as psycopg2connection: + # psycopg2connection.execute(q) conn.close() return From bd1d006cd5ea72188d3a268332fce4622534b70c Mon Sep 17 00:00:00 2001 From: phaesler Date: Thu, 30 Apr 2020 14:44:43 +1000 Subject: [PATCH 21/30] Add update_ranges_2.py to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index fdf25b687..5c117367a 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,7 @@ 'console_scripts': [ 'datacube-ows=datacube_ows.wsgi:main', 'datacube-ows-update=datacube_ows.update_ranges:main' + 'datacube-ows-update_2=datacube_ows.update_ranges_2:main' ] }, packages=find_packages(), From 55a8dd183d1ef935b01b6d91d8d123f8b0e640fd Mon Sep 17 00:00:00 2001 From: phaesler Date: Thu, 30 Apr 2020 14:59:27 +1000 Subject: [PATCH 22/30] Fix -/_ in setup.py. Appease lint gods. --- datacube_ows/ows_configuration.py | 1 + datacube_ows/update_ranges_2.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datacube_ows/ows_configuration.py b/datacube_ows/ows_configuration.py index 5b37bc3c3..9676809ed 100644 --- a/datacube_ows/ows_configuration.py +++ b/datacube_ows/ows_configuration.py @@ -528,6 +528,7 @@ def force_range_update(self, ext_dc=None): if self._ranges is None: raise Exception("Null product range") self.bboxes = self.extract_bboxes() + # pylint: disable=broad-except except Exception as a: print("get_ranges failed for layer %s: %s" % (self.name, str(a))) self.hide = True diff --git a/datacube_ows/update_ranges_2.py b/datacube_ows/update_ranges_2.py index 405d8b103..9fabd3cab 100755 --- a/datacube_ows/update_ranges_2.py +++ b/datacube_ows/update_ranges_2.py @@ -63,7 +63,7 @@ def main(products, merge_only, summary, schema, views, role): print("Deriving extents from materialised views") if not products: - products = get_config().product_index.keys() + products = list(get_config().product_index.keys()) add_ranges(dc, products, summary, merge_only) return 0 diff --git a/setup.py b/setup.py index 5c117367a..b58f3c82f 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ 'console_scripts': [ 'datacube-ows=datacube_ows.wsgi:main', 'datacube-ows-update=datacube_ows.update_ranges:main' - 'datacube-ows-update_2=datacube_ows.update_ranges_2:main' + 'datacube-ows-update-2=datacube_ows.update_ranges_2:main' ] }, packages=find_packages(), From c3e6fa67e6d84ce15d93d1e85dd911e4f48b531b Mon Sep 17 00:00:00 2001 From: phaesler Date: Thu, 30 Apr 2020 15:13:27 +1000 Subject: [PATCH 23/30] Allow refresh of MVs instead of full rebuild. --- datacube_ows/update_ranges_2.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/datacube_ows/update_ranges_2.py b/datacube_ows/update_ranges_2.py index 9fabd3cab..5182f37b5 100755 --- a/datacube_ows/update_ranges_2.py +++ b/datacube_ows/update_ranges_2.py @@ -1,15 +1,14 @@ #!/usr/bin/env python3 -from datacube_ows.product_ranges import update_all_ranges, update_range from datacube_ows.product_ranges_2 import get_sqlconn, add_ranges from datacube import Datacube -from psycopg2.sql import SQL, Identifier +from psycopg2.sql import SQL from datacube_ows.ows_configuration import get_config import os import click @click.command() -@click.option("--views", is_flag=True, default=False, help="Create or update the ODC spatio-temporal materialised views.") +@click.option("--views", is_flag=True, default=False, help="Create (if called with the --schema option) or refresh the ODC spatio-temporal materialised views.") @click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") @click.option("--role", default=None, help="Role to grant database permissions to") @click.option("--summary", is_flag=True, default=False, help="Treat any named ODC products with no corresponding configured OWS Layer as summary products" ) @@ -55,10 +54,14 @@ def main(products, merge_only, summary, schema, views, role): print("Creating or replacing WMS database schema...") create_schema(dc, role) print("Done") - if views: - print("Recalculating materialised views...") + if schema and views: + print("Creating or replacing materialised views...") create_views(dc) print("Done") + elif views: + print("Refreshing materialised views...") + refresh_views(dc) + print("Done") return 0 print("Deriving extents from materialised views") @@ -226,6 +229,22 @@ def create_views(dc): ] run_sql(dc, commands) + +def refresh_views(dc): + commands = [ + ("Refreshing TIME materialized view", + "REFRESH MATERIALIZED VIEW time_view" + ), + ("Refreshing SPACE materialized view", + "REFRESH MATERIALIZED VIEW space_view" + ), + ("Refreshing combined SPACE-TIME materialized view", + "REFRESH MATERIALIZED VIEW CONCURRENTLY space_time_view" + ), + ] + run_sql(dc, commands) + + def create_schema(dc, role): commands = [ ("Creating/replacing wms schema", "create schema if not exists wms"), From 4e62b0415157d2b57f07e3a0fbce3459b175bee9 Mon Sep 17 00:00:00 2001 From: phaesler Date: Thu, 30 Apr 2020 16:58:13 +1000 Subject: [PATCH 24/30] missing comma in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b58f3c82f..65f4830c8 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ entry_points={ 'console_scripts': [ 'datacube-ows=datacube_ows.wsgi:main', - 'datacube-ows-update=datacube_ows.update_ranges:main' + 'datacube-ows-update=datacube_ows.update_ranges:main', 'datacube-ows-update-2=datacube_ows.update_ranges_2:main' ] }, From 5042ac372891a8f27bdc22ae65e3f553fd9f0011 Mon Sep 17 00:00:00 2001 From: phaesler Date: Fri, 1 May 2020 10:49:24 +1000 Subject: [PATCH 25/30] The update_ranges.py is now the new mv method with the old method available as update_ranges_old.py. update_ranges now has backwards compatible option handling (with warnings). --- datacube_ows/ows_configuration.py | 2 +- datacube_ows/product_ranges.py | 844 +++++++---------------------- datacube_ows/product_ranges_2.py | 351 ------------ datacube_ows/product_ranges_old.py | 761 ++++++++++++++++++++++++++ datacube_ows/update_ranges.py | 343 +++++++++--- datacube_ows/update_ranges_2.py | 361 ------------ datacube_ows/update_ranges_old.py | 186 +++++++ setup.py | 4 +- update_ranges_2.py | 4 - update_ranges_old.py | 4 + 10 files changed, 1418 insertions(+), 1442 deletions(-) delete mode 100644 datacube_ows/product_ranges_2.py create mode 100644 datacube_ows/product_ranges_old.py delete mode 100755 datacube_ows/update_ranges_2.py create mode 100755 datacube_ows/update_ranges_old.py delete mode 100644 update_ranges_2.py create mode 100644 update_ranges_old.py diff --git a/datacube_ows/ows_configuration.py b/datacube_ows/ows_configuration.py index 9676809ed..1a29b7cdb 100644 --- a/datacube_ows/ows_configuration.py +++ b/datacube_ows/ows_configuration.py @@ -15,7 +15,7 @@ from datacube_ows.cube_pool import cube, get_cube, release_cube from datacube_ows.band_mapper import StyleDef -from datacube_ows.ogc_utils import get_function, ConfigException, ProductLayerException, FunctionWrapper +from datacube_ows.ogc_utils import get_function, ConfigException, FunctionWrapper import logging diff --git a/datacube_ows/product_ranges.py b/datacube_ows/product_ranges.py index 2224657ba..8918a64d9 100644 --- a/datacube_ows/product_ranges.py +++ b/datacube_ows/product_ranges.py @@ -13,53 +13,24 @@ from datacube_ows.utils import get_sqlconn -DEFAULT_GEOJSON = json.loads('''{ -"type": "Polygon", -"coordinates": [ - [ - [ - 110.91796875, - -43.96119063892024 - ], - [ - 158.203125, - -43.96119063892024 - ], - [ - 158.203125, - -10.660607953624762 - ], - [ - 110.91796875, - -10.660607953624762 - ], - [ - 110.91796875, - -43.96119063892024 - ] - ] -] -}''') - -DEFAULT_GEOJSON_CRS = datacube.utils.geometry.CRS('EPSG:4326') - -def accum_min(a, b): - if a is None: - return b - elif b is None: - return a - else: - return min(a, b) - - -def accum_max(a, b): - if a is None: - return b - elif b is None: - return a +def get_odc_products(dc, any_product, odc_only=False): + if isinstance(any_product, OWSNamedLayer): + return any_product.products + elif isinstance(any_product, str): + dc_product = dc.index.products.get_by_name(any_product) + if odc_only: + ows_product = None + else: + ows_product = get_config().product_index.get(any_product) + if ows_product: + if dc_product and [dc_product] == ows_product.products: + # The same! + return [dc_product] + print("Updating OWS product %s (ODC Products: []). If you meant the ODC product %s, please use the --odc-only flag.") + return ows_product.products else: - return max(a, b) - + # Assume ODC product + return [any_product] def get_crsids(cfg=None): if not cfg: @@ -82,535 +53,16 @@ def jsonise_bbox(bbox): "right": bbox.right, } -def determine_product_ranges(dc, dc_product, extractor, summary_dataset=False): - # pylint: disable=too-many-locals, too-many-branches, too-many-statements, protected-access - start = datetime.now() - print("Product: ", dc_product.name) - r = { - "lat": { - "min": None, - "max": None - }, - "lon": { - "min": None, - "max": None - }, - } - sub_r = {} - time_set = set() - cfg = get_config() - print ("OK, Let's do it") - crsids = get_crsids(cfg) - extents = {crsid: None for crsid in crsids} - crses = get_crses(cfg) - ds_count = 0 - for ds in dc.find_datasets(product=dc_product.name): - print("Processing a dataset", ds.id) - if summary_dataset: - ds_time = ds.metadata.time[0] - else: - ds_time = local_date(ds) - if extractor is not None: - path = extractor(ds) - if path not in sub_r: - sub_r[path] = { - "lat": { - "min": None, - "max": None, - }, - "lon": { - "min": None, - "max": None, - }, - "time_set": set(), - "extents": {crsid: None for crsid in crsids} - } - sub_r[path]["lat"]["min"] = accum_min(sub_r[path]["lat"]["min"], ds.metadata.lat.begin) - sub_r[path]["lat"]["max"] = accum_max(sub_r[path]["lat"]["max"], ds.metadata.lat.end) - sub_r[path]["lon"]["min"] = accum_min(sub_r[path]["lon"]["min"], ds.metadata.lon.begin) - sub_r[path]["lon"]["max"] = accum_max(sub_r[path]["lon"]["max"], ds.metadata.lon.end) - else: - path = None - - r["lat"]["min"] = accum_min(r["lat"]["min"], ds.metadata.lat.begin) - r["lat"]["max"] = accum_max(r["lat"]["max"], ds.metadata.lat.end) - r["lon"]["min"] = accum_min(r["lon"]["min"], ds.metadata.lon.begin) - r["lon"]["max"] = accum_max(r["lon"]["max"], ds.metadata.lon.end) - - time_set.add(ds_time) - if path is not None: - sub_r[path]["time_set"].add(ds_time) - - for crsid in crsids: - print("Working with CRS", crsid) - crs = crses[crsid] - ext = ds.extent - if ext.crs != crs: - ext = ext.to_crs(crs) - cvx_ext = ext.convex_hull - if cvx_ext != ext: - print("INFO: Dataset", ds.id, "CRS", crsid, "extent is not convex.") - if extents[crsid] is None: - extents[crsid] = cvx_ext - else: - if not extents[crsid].is_valid: - print("WARNING: Extent Union for", ds.id, "CRS", crsid, "is not valid") - if not cvx_ext.is_valid: - print("WARNING: Extent for CRS", crsid, "is not valid") - union = extents[crsid].union(cvx_ext) - if union._geom is not None: - extents[crsid] = union - else: - print("WARNING: Dataset", ds.id, "CRS", crsid, "union topology exception, ignoring union") - if path is not None: - if sub_r[path]["extents"][crsid] is None: - sub_r[path]["extents"][crsid] = cvx_ext - else: - sub_r[path]["extents"][crsid] = sub_r[path]["extents"][crsid].union(cvx_ext) - ds_count += 1 - - if ds_count > 0: - r["times"] = sorted(time_set) - r["time_set"] = time_set - r["bboxes"] = { crsid: jsonise_bbox(extents[crsid].boundingbox) for crsid in crsids } - print("LATS: ", r["lat"], " LONS: ", r["lon"]) - if extractor is not None: - for path in sub_r.keys(): - sub_r[path]["times"] = sorted(sub_r[path]["time_set"]) - sub_r[path]["bboxes"] = {crsid: jsonise_bbox(sub_r[path]["extents"][crsid].boundingbox) for crsid in crsids} - del sub_r[path]["extents"] - r["sub_products"] = sub_r - end = datetime.now() - print("Scanned %d datasets in %d seconds" % (ds_count, (end - start).seconds)) - else: - end = datetime.now() - print("No datasets indexed. Nothing to do and didn't do it in %s seconds" % (end - start).seconds) - return r - - -def get_ids_in_db(conn): - results = conn.execute("select id from wms.product_ranges") - ids = [r["id"] for r in results] - return ids - - -def get_product_paths_in_db(conn, dc_product): - results = conn.execute(""" - SELECT sub_product_id - FROM wms.sub_product_ranges - WHERE product_id = %s - ORDER BY product_id, sub_product_id""", - dc_product.id - ) - ids = { r["sub_product_id"] for r in results } - return ids - - -def rng_update(conn, rng, product, path=None): - # pylint: disable=bad-continuation - if isinstance(product, OWSNamedLayer): - if product.multi_product: - assert path is None - conn.execute(""" - UPDATE wms.multiproduct_ranges - SET - lat_min=%s, - lat_max=%s, - lon_min=%s, - lon_max=%s, - dates=%s, - bboxes=%s - WHERE wms_product_name=%s - """, - rng["lat"]["min"], - rng["lat"]["max"], - rng["lon"]["min"], - rng["lon"]["max"], - Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), - Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), - product.name) - return - product = product.product - if path is not None: - conn.execute(""" - UPDATE wms.sub_product_ranges - SET - lat_min=%s, - lat_max=%s, - lon_min=%s, - lon_max=%s, - dates=%s, - bboxes=%s - WHERE product_id=%s - AND sub_product_id=%s - """, - rng["lat"]["min"], - rng["lat"]["max"], - rng["lon"]["min"], - rng["lon"]["max"], - - Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), - Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), - product.id, - path - ) - else: - conn.execute(""" - UPDATE wms.product_ranges - SET - lat_min=%s, - lat_max=%s, - lon_min=%s, - lon_max=%s, - dates=%s, - bboxes=%s - WHERE id=%s - """, - rng["lat"]["min"], - rng["lat"]["max"], - rng["lon"]["min"], - rng["lon"]["max"], - - Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), - Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), - product.id - ) - - -def rng_insert(conn, rng, product, path=None): - # pylint: disable=bad-continuation - if isinstance(product, OWSNamedLayer): - if product.multi_product: - conn.execute(""" - INSERT into wms.multiproduct_ranges - (wms_product_name, lat_min,lat_max,lon_min,lon_max, dates,bboxes) - VALUES - (%s, %s,%s,%s,%s, %s,%s) - """, - product.name, - - rng["lat"]["min"], - rng["lat"]["max"], - rng["lon"]["min"], - rng["lon"]["max"], - - Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), - Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), - ) - return - product = product.product - if path is not None: - conn.execute(""" - INSERT into wms.sub_product_ranges - (product_id, sub_product_id, lat_min,lat_max,lon_min,lon_max, dates,bboxes) - VALUES - (%s,%s, %s,%s,%s,%s, %s,%s) - """, - product.id, - path, - - rng["lat"]["min"], - rng["lat"]["max"], - rng["lon"]["min"], - rng["lon"]["max"], - - Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), - Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), - ) - else: - conn.execute(""" - INSERT into wms.product_ranges - (id, lat_min,lat_max,lon_min,lon_max, dates,bboxes) - VALUES - (%s, %s,%s,%s,%s, %s,%s) - """, - product.id, - - rng["lat"]["min"], - rng["lat"]["max"], - rng["lon"]["min"], - rng["lon"]["max"], - - Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), - Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), - ) - - -def ranges_equal(r1, rdb): - # pylint: disable=too-many-branches - for coord in ("lat", "lon"): - for ext in ("max", "min"): - if abs(r1[coord][ext] - rdb[coord][ext]) > 1e-12: - return False - if len(r1["times"]) != len(rdb["times"]): - return False - for t1, t2 in zip_longest(r1["times"], rdb["times"]): - if t1 != t2: - return False - if len(r1["bboxes"]) != len(rdb["bboxes"]): - return False - try: - for cs in r1["bboxes"].keys(): - bb1 = r1["bboxes"][cs] - bb2 = rdb["bboxes"][cs] - if abs(bb1["top"] - float(bb2["top"])) > 1e-12: - return False - if abs(bb1["bottom"] - float(bb2["bottom"])) > 1e-12: - return False - if abs(bb1["left"] - float(bb2["left"])) > 1e-12: - return False - if abs(bb1["right"] - float(bb2["right"])) > 1e-12: - return False - except KeyError: - return False - return True - - -def update_range(dc, product, multi=False, follow_dependencies=True): - if multi: - product = get_config().product_index.get(product) - else: - product = dc.index.products.get_by_name(product) - - if product is None: - raise Exception("Requested product not found.") - - if multi: - return update_multi_range(dc, product, follow_dependencies=follow_dependencies) - else: - return update_single_range(dc, product) - - -def update_single_range(dc, product): - if isinstance(product, OWSNamedLayer): - assert not product.multi_product - dc_product = product.product - extractor = product.sub_product_extractor - summary = not product.is_raw_time_res - else: - dc_product = product - extractor = None - product = get_config().native_product_index.get(product.name) - if product: - summary = not product.is_raw_time_res - else: - summary = False - - product_range = determine_product_ranges(dc, dc_product, extractor, summary) - conn = get_sqlconn(dc) - txn = conn.begin() - db_range = get_ranges(dc, dc_product, is_dc_product=True) - subids_in_db = get_product_paths_in_db(conn, dc_product) - - ok = 0 - ins = 0 - upd = 0 - if db_range: - if ranges_equal(product_range, db_range): - print("Ranges equal, not updating") - ok = 1 - else: - rng_update(conn, product_range, dc_product) - print("Updating range") - upd = 1 - else: - rng_insert(conn, product_range, dc_product) - print("Inserting new range") - ins = 1 - - sok = 0 - sins = 0 - supd = 0 - if "sub_products" in product_range: - for path, subr in product_range["sub_products"].items(): - if path in subids_in_db: - db_range = get_ranges(dc, dc_product, path, is_dc_product=True) - if ranges_equal(subr, db_range): - sok += 1 - else: - rng_update(conn, subr, dc_product, path) - supd += 1 - else: - rng_insert(conn, subr, dc_product, path) - sins += 1 - txn.commit() - conn.close() - - return (ok, upd, ins, sok, supd, sins) - - -def update_multi_range(dc, product, follow_dependencies=True): - assert product.multi_product - - if follow_dependencies: - for dc_product in product.products: - update_single_range(dc, dc_product) - - mp_ranges = None - for p in product.products: - mp_ranges = merge_ranges(mp_ranges, get_ranges(dc, p, is_dc_product=True)) - - - db_range = get_ranges(dc, product) - conn = get_sqlconn(dc) - txn = conn.begin() - - ok = 0 - ins = 0 - upd = 0 - if db_range: - if ranges_equal(mp_ranges, db_range): - print("Ranges equal, not updating") - ok = 1 - else: - rng_update(conn, mp_ranges, product) - print("Updating range") - upd = 1 - else: - rng_insert(conn, mp_ranges, product) - print("Inserting new range") - ins = 1 - - - txn.commit() - conn.close() - return (ok, upd, ins) - - -def update_all_ranges(dc): - i = 0 - u = 0 - p = 0 - si = 0 - su = 0 - sp = 0 - mi = 0 - mu = 0 - mp = 0 - - multiproducts = set() - - for prod in get_config().product_index.values(): - if prod.multi_product: - multiproducts.add(prod) - else: - stats = update_single_range(dc, prod) - p += stats[0] - u += stats[1] - i += stats[2] - sp += stats[3] - su += stats[4] - si += stats[5] - - for mprod in multiproducts: - stats = update_multi_range(dc, mprod, follow_dependencies=False) - mp += stats[0] - mu += stats[1] - mi += stats[2] - - return p, u, i, sp, su, si, mp, mu, mi - - -def get_ranges(dc, product, path=None, is_dc_product=False): - conn = get_sqlconn(dc) - if not is_dc_product and product.multi_product: - if path is not None: - raise Exception("Combining subproducts and multiproducts is not yet supported") - results = conn.execute(""" - SELECT * - FROM wms.multiproduct_ranges - WHERE wms_product_name=%s""", - product.name - ) - else: - if is_dc_product: - prod_id = product.id - else: - prod_id = product.product.id - if path is not None: - results = conn.execute(""" - SELECT * - FROM wms.sub_product_ranges - WHERE product_id=%s and sub_product_id=%s""", - prod_id, path - ) - else: - results = conn.execute(""" - SELECT * - FROM wms.product_ranges - WHERE id=%s""", - prod_id - ) - for result in results: - conn.close() - times = [datetime.strptime(d, "%Y-%m-%d").date() for d in result["dates"]] - if not times: - return None - return { - "lat": { - "min": float(result["lat_min"]), - "max": float(result["lat_max"]), - }, - "lon": { - "min": float(result["lon_min"]), - "max": float(result["lon_max"]), - }, - "times": times, - "start_time": times[0], - "end_time": times[-1], - "time_set": set(times), - "bboxes": result["bboxes"] - } - return None - - -def merge_ranges(r1, r2): - if r1 is None: - return r2 - elif r2 is None: - return r1 - time_set = r1["time_set"] | r2["time_set"] - times = sorted(list(time_set)) - return { - "lat": { - "min": min(r1["lat"]["min"], r2["lat"]["min"]), - "max": max(r1["lat"]["max"], r2["lat"]["max"]), - }, - "lon": { - "min": min(r1["lon"]["min"], r2["lon"]["min"]), - "max": max(r1["lon"]["max"], r2["lon"]["max"]), - }, - "times": times, - "start_time": times[0], - "end_time": times[-1], - "time_set": time_set, - "bboxes": { - crs: { - "top": max(r1["bboxes"][crs]["top"], r2["bboxes"][crs]["top"]), - "bottom": min(r1["bboxes"][crs]["bottom"], r2["bboxes"][crs]["bottom"]), - "right": max(r1["bboxes"][crs]["right"], r2["bboxes"][crs]["right"]), - "left": min(r1["bboxes"][crs]["left"], r2["bboxes"][crs]["left"]), - } - for crs in r1["bboxes"].keys() - } - } - - -def get_sub_ranges(dc, product): - conn = get_sqlconn(dc) - results = conn.execute("select sub_product_id from wms.sub_product_ranges where product_id=%s", product.product.id) - return {r["sub_product_id"]: get_ranges(dc, product.product.id, r["sub_product_id"]) for r in results} - def create_multiprod_range_entry(dc, product, crses): + print("Merging multiproduct ranges for %s (ODC products: %s)" % ( + product.name, + repr(product.product_names) + )) conn = get_sqlconn(dc) txn = conn.begin() - if isinstance(product, dict): - prodids = [p.id for p in product["products"]] - wms_name = product["name"] - else: - prodids = [ p.id for p in product.products ] - wms_name = product.name + prodids = [ p.id for p in product.products ] + wms_name = product.name # Attempt to insert row conn.execute(""" @@ -625,29 +77,45 @@ def create_multiprod_range_entry(dc, product, crses): # Update extents conn.execute(""" UPDATE wms.multiproduct_ranges - SET (lat_min,lat_max,lon_min,lon_max) = - (wms_get_min(%(p_prodids)s, 'lat'), wms_get_max(%(p_prodids)s, 'lat'), wms_get_min(%(p_prodids)s, 'lon'), wms_get_max(%(p_prodids)s, 'lon')) - WHERE wms_product_name=%(p_id)s + SET lat_min = subq.lat_min, + lat_max = subq.lat_max, + lon_min = subq.lon_min, + lon_max = subq.lon_max + FROM ( + select min(lat_min) as lat_min, + max(lat_max) as lat_max, + min(lon_min) as lon_min, + max(lon_max) as lon_max + from wms.product_ranges + where id = ANY (%(p_prodids)s) + ) as subq + WHERE wms_product_name = %(p_id)s """, {"p_id": wms_name, "p_prodids": prodids}) # Create sorted list of dates + results = conn.execute( + """ + SELECT dates + FROM wms.product_ranges + WHERE id = ANY (%(p_prodids)s) + """, {"p_prodids": prodids} + ) + dates = set() + for r in results: + for d in r[0]: + dates.add(d) + dates = sorted(dates) conn.execute(""" - WITH sorted - AS (SELECT to_jsonb(array_agg(dates.d)) - AS dates - FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD') - AS d - FROM agdc.dataset - WHERE dataset_type_ref = any (%(p_prodids)s) - AND archived IS NULL - ORDER BY d) dates) - UPDATE wms.multiproduct_ranges - SET dates=sorted.dates - FROM sorted - WHERE wms_product_name=%(p_id)s - """, - {"p_id": wms_name, "p_prodids": prodids}) + UPDATE wms.multiproduct_ranges + SET dates = %(dates)s + WHERE wms_product_name= %(p_id)s + """, + { + "dates": Json(dates), + "p_id": wms_name + } + ) # calculate bounding boxes results = list(conn.execute(""" @@ -683,11 +151,13 @@ def create_multiprod_range_entry(dc, product, crses): def create_range_entry(dc, product, crses, summary_product=False): + print("Updating range for ODC product %s..."% product.name) + # NB. product is an ODC product conn = get_sqlconn(dc) txn = conn.begin() prodid = product.id - # Attempt to insert row + # insert empty row if one does not already exist conn.execute(""" INSERT INTO wms.product_ranges (id,lat_min,lat_max,lon_min,lon_max,dates,bboxes) @@ -697,33 +167,46 @@ def create_range_entry(dc, product, crses, summary_product=False): """, {"p_id": prodid, "empty": Json("")}) - # Update extents - conn.execute(""" - UPDATE wms.product_ranges - SET (lat_min,lat_max,lon_min,lon_max) = - (wms_get_min(%(p_idarr)s, 'lat'), wms_get_max(%(p_idarr)s, 'lat'), wms_get_min(%(p_idarr)s, 'lon'), wms_get_max(%(p_idarr)s, 'lon')) - WHERE id=%(p_id)s - """, - {"p_id": prodid, "p_idarr": [ prodid ]}) + + # Update min/max lat/longs + conn.execute( + """ + UPDATE wms.product_ranges pr + SET lat_min = st_ymin(subq.bbox), + lat_max = st_ymax(subq.bbox), + lon_min = st_xmin(subq.bbox), + lon_max = st_xmax(subq.bbox) + FROM ( + SELECT st_extent(stv.spatial_extent) as bbox + FROM public.space_time_view stv + WHERE stv.dataset_type_ref = %(p_id)s + ) as subq + """, + {"p_id": prodid}) # Set default timezone conn.execute(""" set timezone to 'Etc/UTC' """) + if summary_product: # Loop over dates dates = set() - for result in conn.execute(""" - SELECT DISTINCT cast(metadata -> 'extent' ->> 'from_dt' as date) as dt - FROM agdc.dataset - WHERE dataset_type_ref = %(p_id)s - AND archived IS NULL - ORDER BY dt - """, - {"p_id": prodid}): - dates.add(result[0]) + results = conn.execute( + """ + select + array_agg(temporal_extent) + from public.space_time_view + WHERE dataset_type_ref = %(p_id)s + """, + {"p_id": prodid} + ) + for result in results: + for dat_ran in result[0]: + dates.add(dat_ran.lower) + dates = sorted(dates) conn.execute(""" @@ -760,6 +243,7 @@ def create_range_entry(dc, product, crses, summary_product=False): """, {"p_id": prodid}) + # calculate bounding boxes results = list(conn.execute(""" SELECT lat_min,lat_max,lon_min,lon_max @@ -812,59 +296,107 @@ def check_datasets_exist(dc, product_name): return list(results)[0][0] > 0 +def add_ranges(dc, product_names, summary=False, merge_only=False): + odc_products = {} + ows_multiproducts = [] + for pname in product_names: + dc_product = None + ows_product = get_config().product_index.get(pname) + if not ows_product: + ows_product = get_config().native_product_index.get(pname) + if ows_product: + for dc_pname in ows_product.product_names: + if dc_pname in odc_products: + odc_products[dc_pname]["ows"].append(ows_product) + else: + odc_products[dc_pname] = { "ows": [ows_product]} + print("OWS Layer %s maps to ODC Product(s): %s" % ( + ows_product.name, + repr(ows_product.product_names) + )) + if ows_product.multi_product: + ows_multiproducts.append(ows_product) + if not ows_product: + dc_product = dc.index.products.get_by_name(pname) + if dc_product: + print("ODC Layer: %s" % pname) + if pname in odc_products: + odc_products[pname]["ows"].append(None) + else: + odc_products[pname] = { "ows": [None]} + else: + print("Unrecognised product name:", pname) + continue -def add_product_range(dc, product): - if isinstance(product, str): - product_name = product - dc_product = dc.index.products.get_by_name(product) - else: - product_name = product.name - dc_product = product - - ows_product = get_config().native_product_index.get(product_name) - if ows_product: - summary_product = not ows_product.is_raw_time_res - else: - summary_product = False - - assert dc_product is not None - - if check_datasets_exist(dc, product_name): - create_range_entry(dc, dc_product, get_crses(), summary_product) + if ows_multiproducts and merge_only: + print("Merge-only: Skipping range update of products:", repr(list(odc_products.keys()))) else: - print("Could not find any datasets for: ", product_name) - - -def add_multiproduct_range(dc, product, follow_dependencies=True): - if isinstance(product, str): - product = get_config().product_index.get(product) - - assert product is not None - assert product.multi_product - - if follow_dependencies: - for product_name in product.product_names: - dc_prod = dc.index.products.get_by_name(product_name) - if not check_datasets_exist(dc, product_name): - print("Could not find any datasets for: ", product_name) + for pname, ows_prods in odc_products.items(): + dc_product = dc.index.products.get_by_name(pname) + if check_datasets_exist(dc, dc_product.name): + prod_summary = summary + for ows_prod in ows_prods["ows"]: + if ows_prod: + prod_summary = not ows_prod.is_raw_time_res + break + create_range_entry(dc, dc_product, get_crses(), prod_summary) else: - add_product_range(dc, product_name) + print("Could not find any datasets for: ", pname) - # Actually merge and store! - create_multiprod_range_entry(dc, product, get_crses()) + for mp in ows_multiproducts: + create_multiprod_range_entry(dc, mp, get_crses()) + print("Done.") -def add_all(dc): - multi_products = set() - for product_cfg in get_config().product_index.values(): - product_name = product_cfg.product_name - if product_cfg.multi_product: - multi_products.add(product_cfg) - else: - print("Adding range for:", product_name) - add_product_range(dc, product_name) - - for p in multi_products: - print("Adding multiproduct range for:", p.name) - add_multiproduct_range(dc, p, follow_dependencies=False) +def get_ranges(dc, product, path=None, is_dc_product=False): + conn = get_sqlconn(dc) + if not is_dc_product and product.multi_product: + if path is not None: + raise Exception("Combining subproducts and multiproducts is not yet supported") + results = conn.execute(""" + SELECT * + FROM wms.multiproduct_ranges + WHERE wms_product_name=%s""", + product.name + ) + else: + if is_dc_product: + prod_id = product.id + else: + prod_id = product.product.id + if path is not None: + results = conn.execute(""" + SELECT * + FROM wms.sub_product_ranges + WHERE product_id=%s and sub_product_id=%s""", + prod_id, path + ) + else: + results = conn.execute(""" + SELECT * + FROM wms.product_ranges + WHERE id=%s""", + prod_id + ) + for result in results: + conn.close() + times = [datetime.strptime(d, "%Y-%m-%d").date() for d in result["dates"]] + if not times: + return None + return { + "lat": { + "min": float(result["lat_min"]), + "max": float(result["lat_max"]), + }, + "lon": { + "min": float(result["lon_min"]), + "max": float(result["lon_max"]), + }, + "times": times, + "start_time": times[0], + "end_time": times[-1], + "time_set": set(times), + "bboxes": result["bboxes"] + } + return None \ No newline at end of file diff --git a/datacube_ows/product_ranges_2.py b/datacube_ows/product_ranges_2.py deleted file mode 100644 index 3bd3ef967..000000000 --- a/datacube_ows/product_ranges_2.py +++ /dev/null @@ -1,351 +0,0 @@ -#pylint: skip-file - -from __future__ import absolute_import, division, print_function - -from datetime import datetime -import datacube - -from datacube_ows.ows_configuration import get_config, OWSNamedLayer # , get_layers, ProductLayerDef -from datacube_ows.ogc_utils import local_date -from psycopg2.extras import Json -from itertools import zip_longest -import json - -from datacube_ows.utils import get_sqlconn - -def get_odc_products(dc, any_product, odc_only=False): - if isinstance(any_product, OWSNamedLayer): - return any_product.products - elif isinstance(any_product, str): - dc_product = dc.index.products.get_by_name(any_product) - if odc_only: - ows_product = None - else: - ows_product = get_config().product_index.get(any_product) - if ows_product: - if dc_product and [dc_product] == ows_product.products: - # The same! - return [dc_product] - print("Updating OWS product %s (ODC Products: []). If you meant the ODC product %s, please use the --odc-only flag.") - return ows_product.products - else: - # Assume ODC product - return [any_product] - -def get_crsids(cfg=None): - if not cfg: - cfg = get_config() - return cfg.published_CRSs.keys() - - -def get_crses(cfg=None): - return {crsid: datacube.utils.geometry.CRS(crsid) for crsid in get_crsids(cfg)} - - -def jsonise_bbox(bbox): - if isinstance(bbox, dict): - return bbox - else: - return { - "top": bbox.top, - "bottom": bbox.bottom, - "left": bbox.left, - "right": bbox.right, - } - - -def create_multiprod_range_entry(dc, product, crses): - print("Merging multiproduct ranges for %s (ODC products: %s)" % ( - product.name, - repr(product.product_names) - )) - conn = get_sqlconn(dc) - txn = conn.begin() - prodids = [ p.id for p in product.products ] - wms_name = product.name - - # Attempt to insert row - conn.execute(""" - INSERT INTO wms.multiproduct_ranges - (wms_product_name,lat_min,lat_max,lon_min,lon_max,dates,bboxes) - VALUES - (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) - ON CONFLICT (wms_product_name) DO NOTHING - """, - {"p_id": wms_name, "empty": Json("")}) - - # Update extents - conn.execute(""" - UPDATE wms.multiproduct_ranges - SET lat_min = subq.lat_min, - lat_max = subq.lat_max, - lon_min = subq.lon_min, - lon_max = subq.lon_max - FROM ( - select min(lat_min) as lat_min, - max(lat_max) as lat_max, - min(lon_min) as lon_min, - max(lon_max) as lon_max - from wms.product_ranges - where id = ANY (%(p_prodids)s) - ) as subq - WHERE wms_product_name = %(p_id)s - """, - {"p_id": wms_name, "p_prodids": prodids}) - - # Create sorted list of dates - results = conn.execute( - """ - SELECT dates - FROM wms.product_ranges - WHERE id = ANY (%(p_prodids)s) - """, {"p_prodids": prodids} - ) - dates = set() - for r in results: - for d in r[0]: - dates.add(d) - dates = sorted(dates) - conn.execute(""" - UPDATE wms.multiproduct_ranges - SET dates = %(dates)s - WHERE wms_product_name= %(p_id)s - """, - { - "dates": Json(dates), - "p_id": wms_name - } - ) - - # calculate bounding boxes - results = list(conn.execute(""" - SELECT lat_min,lat_max,lon_min,lon_max - FROM wms.multiproduct_ranges - WHERE wms_product_name=%(p_id)s - """, - {"p_id": wms_name} )) - - r = results[0] - - epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") - box = datacube.utils.geometry.box( - float(r[2]), - float(r[0]), - float(r[3]), - float(r[1]), - epsg4326) - - cfg = get_config() - conn.execute(""" - UPDATE wms.multiproduct_ranges - SET bboxes = %s::jsonb - WHERE wms_product_name=%s - """, - Json({ crsid: jsonise_bbox(box.to_crs(crs).boundingbox) for crsid, crs in get_crses(cfg).items() }), - wms_name - ) - - txn.commit() - conn.close() - return - - -def create_range_entry(dc, product, crses, summary_product=False): - print("Updating range for ODC product %s..."% product.name) - # NB. product is an ODC product - conn = get_sqlconn(dc) - txn = conn.begin() - prodid = product.id - - # insert empty row if one does not already exist - conn.execute(""" - INSERT INTO wms.product_ranges - (id,lat_min,lat_max,lon_min,lon_max,dates,bboxes) - VALUES - (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) - ON CONFLICT (id) DO NOTHING - """, - {"p_id": prodid, "empty": Json("")}) - - - # Update min/max lat/longs - conn.execute( - """ - UPDATE wms.product_ranges pr - SET lat_min = st_ymin(subq.bbox), - lat_max = st_ymax(subq.bbox), - lon_min = st_xmin(subq.bbox), - lon_max = st_xmax(subq.bbox) - FROM ( - SELECT st_extent(stv.spatial_extent) as bbox - FROM public.space_time_view stv - WHERE stv.dataset_type_ref = %(p_id)s - ) as subq - """, - {"p_id": prodid}) - - # Set default timezone - conn.execute(""" - set timezone to 'Etc/UTC' - """) - - - if summary_product: - # Loop over dates - dates = set() - - results = conn.execute( - """ - select - array_agg(temporal_extent) - from public.space_time_view - WHERE dataset_type_ref = %(p_id)s - """, - {"p_id": prodid} - ) - for result in results: - for dat_ran in result[0]: - dates.add(dat_ran.lower) - - dates = sorted(dates) - - conn.execute(""" - UPDATE wms.product_ranges - SET dates = %(dates)s - WHERE id= %(p_id)s - """, - { - "dates": Json([t.strftime("%Y-%m-%d") for t in dates]), - "p_id": prodid - } - ) - else: - # Create sorted list of dates - conn.execute(""" - WITH sorted - AS (SELECT to_jsonb(array_agg(dates.d)) - AS dates - FROM (SELECT DISTINCT - date(cast(metadata -> 'extent' ->> 'center_dt' as timestamp) AT TIME ZONE 'UTC' + - (least(to_number(metadata -> 'extent' -> 'coord' -> 'll' ->> 'lon', '9999.9999999999999999999999999999999999'), - to_number(metadata -> 'extent' -> 'coord' -> 'ul' ->> 'lon', '9999.9999999999999999999999999999999999')) + - greatest(to_number(metadata -> 'extent' -> 'coord' -> 'lr' ->> 'lon', '9999.9999999999999999999999999999999999'), - to_number(metadata -> 'extent' -> 'coord' -> 'ur' ->> 'lon', '9999.9999999999999999999999999999999999'))) / 30.0 * interval '1 hour') - AS d - FROM agdc.dataset - WHERE dataset_type_ref=%(p_id)s - AND archived IS NULL - ORDER BY d) dates) - UPDATE wms.product_ranges - SET dates=sorted.dates - FROM sorted - WHERE id=%(p_id)s - """, - {"p_id": prodid}) - - - # calculate bounding boxes - results = list(conn.execute(""" - SELECT lat_min,lat_max,lon_min,lon_max - FROM wms.product_ranges - WHERE id=%s - """, - prodid)) - - r = results[0] - - epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") - box = datacube.utils.geometry.box( - float(r[2]), - float(r[0]), - float(r[3]), - float(r[1]), - epsg4326) - - conn.execute(""" - UPDATE wms.product_ranges - SET bboxes = %s::jsonb - WHERE id=%s - """, - Json( - {crsid: {"top": box.to_crs(crs).boundingbox.top, - "bottom": box.to_crs(crs).boundingbox.bottom, - "left": box.to_crs(crs).boundingbox.left, - "right": box.to_crs(crs).boundingbox.right} - for crsid, crs in crses.items() - } - ), - product.id) - - txn.commit() - conn.close() - - -def check_datasets_exist(dc, product_name): - conn = get_sqlconn(dc) - - results = conn.execute(""" - SELECT COUNT(*) - FROM agdc.dataset ds, agdc.dataset_type p - WHERE ds.archived IS NULL - AND ds.dataset_type_ref = p.id - AND p.name = %s""", - product_name) - - conn.close() - - return list(results)[0][0] > 0 - -def add_ranges(dc, product_names, summary=False, merge_only=False): - odc_products = {} - ows_multiproducts = [] - for pname in product_names: - dc_product = None - ows_product = get_config().product_index.get(pname) - if not ows_product: - ows_product = get_config().native_product_index.get(pname) - if ows_product: - for dc_pname in ows_product.product_names: - if dc_pname in odc_products: - odc_products[dc_pname]["ows"].append(ows_product) - else: - odc_products[dc_pname] = { "ows": [ows_product]} - print("OWS Layer %s maps to ODC Product(s): %s" % ( - ows_product.name, - repr(ows_product.product_names) - )) - if ows_product.multi_product: - ows_multiproducts.append(ows_product) - if not ows_product: - dc_product = dc.index.products.get_by_name(pname) - if dc_product: - print("ODC Layer: %s" % pname) - if pname in odc_products: - odc_products[pname]["ows"].append(None) - else: - odc_products[pname] = { "ows": [None]} - else: - print("Unrecognised product name:", pname) - continue - - if ows_multiproducts and merge_only: - print("Merge-only: Skipping range update of products:", repr(list(odc_products.keys()))) - else: - for pname, ows_prods in odc_products.items(): - dc_product = dc.index.products.get_by_name(pname) - if check_datasets_exist(dc, dc_product.name): - prod_summary = summary - for ows_prod in ows_prods["ows"]: - if ows_prod: - prod_summary = not ows_prod.is_raw_time_res - break - create_range_entry(dc, dc_product, get_crses(), prod_summary) - else: - print("Could not find any datasets for: ", pname) - - for mp in ows_multiproducts: - create_multiprod_range_entry(dc, mp, get_crses()) - - print("Done.") - - diff --git a/datacube_ows/product_ranges_old.py b/datacube_ows/product_ranges_old.py new file mode 100644 index 000000000..4965a2901 --- /dev/null +++ b/datacube_ows/product_ranges_old.py @@ -0,0 +1,761 @@ +#pylint: skip-file + +from __future__ import absolute_import, division, print_function + +from datetime import datetime +from itertools import zip_longest +import datacube + +from psycopg2.extras import Json + +from datacube_ows.ows_configuration import get_config, OWSNamedLayer +from datacube_ows.ogc_utils import local_date +from datacube_ows.utils import get_sqlconn +from datacube_ows.product_ranges import get_crses, get_crsids, jsonise_bbox, get_ranges + + +def accum_min(a, b): + if a is None: + return b + elif b is None: + return a + else: + return min(a, b) + + +def accum_max(a, b): + if a is None: + return b + elif b is None: + return a + else: + return max(a, b) + + +def determine_product_ranges(dc, dc_product, extractor, summary_dataset=False): + # pylint: disable=too-many-locals, too-many-branches, too-many-statements, protected-access + start = datetime.now() + print("Product: ", dc_product.name) + r = { + "lat": { + "min": None, + "max": None + }, + "lon": { + "min": None, + "max": None + }, + } + sub_r = {} + time_set = set() + cfg = get_config() + print ("OK, Let's do it") + crsids = get_crsids(cfg) + extents = {crsid: None for crsid in crsids} + crses = get_crses(cfg) + ds_count = 0 + for ds in dc.find_datasets(product=dc_product.name): + print("Processing a dataset", ds.id) + if summary_dataset: + ds_time = ds.metadata.time[0] + else: + ds_time = local_date(ds) + if extractor is not None: + path = extractor(ds) + if path not in sub_r: + sub_r[path] = { + "lat": { + "min": None, + "max": None, + }, + "lon": { + "min": None, + "max": None, + }, + "time_set": set(), + "extents": {crsid: None for crsid in crsids} + } + sub_r[path]["lat"]["min"] = accum_min(sub_r[path]["lat"]["min"], ds.metadata.lat.begin) + sub_r[path]["lat"]["max"] = accum_max(sub_r[path]["lat"]["max"], ds.metadata.lat.end) + sub_r[path]["lon"]["min"] = accum_min(sub_r[path]["lon"]["min"], ds.metadata.lon.begin) + sub_r[path]["lon"]["max"] = accum_max(sub_r[path]["lon"]["max"], ds.metadata.lon.end) + else: + path = None + + r["lat"]["min"] = accum_min(r["lat"]["min"], ds.metadata.lat.begin) + r["lat"]["max"] = accum_max(r["lat"]["max"], ds.metadata.lat.end) + r["lon"]["min"] = accum_min(r["lon"]["min"], ds.metadata.lon.begin) + r["lon"]["max"] = accum_max(r["lon"]["max"], ds.metadata.lon.end) + + time_set.add(ds_time) + if path is not None: + sub_r[path]["time_set"].add(ds_time) + + for crsid in crsids: + print("Working with CRS", crsid) + crs = crses[crsid] + ext = ds.extent + if ext.crs != crs: + ext = ext.to_crs(crs) + cvx_ext = ext.convex_hull + if cvx_ext != ext: + print("INFO: Dataset", ds.id, "CRS", crsid, "extent is not convex.") + if extents[crsid] is None: + extents[crsid] = cvx_ext + else: + if not extents[crsid].is_valid: + print("WARNING: Extent Union for", ds.id, "CRS", crsid, "is not valid") + if not cvx_ext.is_valid: + print("WARNING: Extent for CRS", crsid, "is not valid") + union = extents[crsid].union(cvx_ext) + if union._geom is not None: + extents[crsid] = union + else: + print("WARNING: Dataset", ds.id, "CRS", crsid, "union topology exception, ignoring union") + if path is not None: + if sub_r[path]["extents"][crsid] is None: + sub_r[path]["extents"][crsid] = cvx_ext + else: + sub_r[path]["extents"][crsid] = sub_r[path]["extents"][crsid].union(cvx_ext) + ds_count += 1 + + if ds_count > 0: + r["times"] = sorted(time_set) + r["time_set"] = time_set + r["bboxes"] = { crsid: jsonise_bbox(extents[crsid].boundingbox) for crsid in crsids } + print("LATS: ", r["lat"], " LONS: ", r["lon"]) + if extractor is not None: + for path in sub_r.keys(): + sub_r[path]["times"] = sorted(sub_r[path]["time_set"]) + sub_r[path]["bboxes"] = {crsid: jsonise_bbox(sub_r[path]["extents"][crsid].boundingbox) for crsid in crsids} + del sub_r[path]["extents"] + r["sub_products"] = sub_r + end = datetime.now() + print("Scanned %d datasets in %d seconds" % (ds_count, (end - start).seconds)) + else: + end = datetime.now() + print("No datasets indexed. Nothing to do and didn't do it in %s seconds" % (end - start).seconds) + return r + + +def get_product_paths_in_db(conn, dc_product): + results = conn.execute(""" + SELECT sub_product_id + FROM wms.sub_product_ranges + WHERE product_id = %s + ORDER BY product_id, sub_product_id""", + dc_product.id + ) + ids = { r["sub_product_id"] for r in results } + return ids + + +def rng_update(conn, rng, product, path=None): + # pylint: disable=bad-continuation + if isinstance(product, OWSNamedLayer): + if product.multi_product: + assert path is None + conn.execute(""" + UPDATE wms.multiproduct_ranges + SET + lat_min=%s, + lat_max=%s, + lon_min=%s, + lon_max=%s, + dates=%s, + bboxes=%s + WHERE wms_product_name=%s + """, + rng["lat"]["min"], + rng["lat"]["max"], + rng["lon"]["min"], + rng["lon"]["max"], + Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), + Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), + product.name) + return + product = product.product + if path is not None: + conn.execute(""" + UPDATE wms.sub_product_ranges + SET + lat_min=%s, + lat_max=%s, + lon_min=%s, + lon_max=%s, + dates=%s, + bboxes=%s + WHERE product_id=%s + AND sub_product_id=%s + """, + rng["lat"]["min"], + rng["lat"]["max"], + rng["lon"]["min"], + rng["lon"]["max"], + + Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), + Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), + product.id, + path + ) + else: + conn.execute(""" + UPDATE wms.product_ranges + SET + lat_min=%s, + lat_max=%s, + lon_min=%s, + lon_max=%s, + dates=%s, + bboxes=%s + WHERE id=%s + """, + rng["lat"]["min"], + rng["lat"]["max"], + rng["lon"]["min"], + rng["lon"]["max"], + + Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), + Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), + product.id + ) + + +def rng_insert(conn, rng, product, path=None): + # pylint: disable=bad-continuation + if isinstance(product, OWSNamedLayer): + if product.multi_product: + conn.execute(""" + INSERT into wms.multiproduct_ranges + (wms_product_name, lat_min,lat_max,lon_min,lon_max, dates,bboxes) + VALUES + (%s, %s,%s,%s,%s, %s,%s) + """, + product.name, + + rng["lat"]["min"], + rng["lat"]["max"], + rng["lon"]["min"], + rng["lon"]["max"], + + Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), + Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), + ) + return + product = product.product + if path is not None: + conn.execute(""" + INSERT into wms.sub_product_ranges + (product_id, sub_product_id, lat_min,lat_max,lon_min,lon_max, dates,bboxes) + VALUES + (%s,%s, %s,%s,%s,%s, %s,%s) + """, + product.id, + path, + + rng["lat"]["min"], + rng["lat"]["max"], + rng["lon"]["min"], + rng["lon"]["max"], + + Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), + Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), + ) + else: + conn.execute(""" + INSERT into wms.product_ranges + (id, lat_min,lat_max,lon_min,lon_max, dates,bboxes) + VALUES + (%s, %s,%s,%s,%s, %s,%s) + """, + product.id, + + rng["lat"]["min"], + rng["lat"]["max"], + rng["lon"]["min"], + rng["lon"]["max"], + + Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), + Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), + ) + + +def ranges_equal(r1, rdb): + # pylint: disable=too-many-branches + for coord in ("lat", "lon"): + for ext in ("max", "min"): + if abs(r1[coord][ext] - rdb[coord][ext]) > 1e-12: + return False + if len(r1["times"]) != len(rdb["times"]): + return False + for t1, t2 in zip_longest(r1["times"], rdb["times"]): + if t1 != t2: + return False + if len(r1["bboxes"]) != len(rdb["bboxes"]): + return False + try: + for cs in r1["bboxes"].keys(): + bb1 = r1["bboxes"][cs] + bb2 = rdb["bboxes"][cs] + if abs(bb1["top"] - float(bb2["top"])) > 1e-12: + return False + if abs(bb1["bottom"] - float(bb2["bottom"])) > 1e-12: + return False + if abs(bb1["left"] - float(bb2["left"])) > 1e-12: + return False + if abs(bb1["right"] - float(bb2["right"])) > 1e-12: + return False + except KeyError: + return False + return True + + +def update_range(dc, product, multi=False, follow_dependencies=True): + if multi: + product = get_config().product_index.get(product) + else: + product = dc.index.products.get_by_name(product) + + if product is None: + raise Exception("Requested product not found.") + + if multi: + return update_multi_range(dc, product, follow_dependencies=follow_dependencies) + else: + return update_single_range(dc, product) + + +def update_single_range(dc, product): + if isinstance(product, OWSNamedLayer): + assert not product.multi_product + dc_product = product.product + extractor = product.sub_product_extractor + summary = not product.is_raw_time_res + else: + dc_product = product + extractor = None + product = get_config().native_product_index.get(product.name) + if product: + summary = not product.is_raw_time_res + else: + summary = False + + product_range = determine_product_ranges(dc, dc_product, extractor, summary) + conn = get_sqlconn(dc) + txn = conn.begin() + db_range = get_ranges(dc, dc_product, is_dc_product=True) + subids_in_db = get_product_paths_in_db(conn, dc_product) + + ok = 0 + ins = 0 + upd = 0 + if db_range: + if ranges_equal(product_range, db_range): + print("Ranges equal, not updating") + ok = 1 + else: + rng_update(conn, product_range, dc_product) + print("Updating range") + upd = 1 + else: + rng_insert(conn, product_range, dc_product) + print("Inserting new range") + ins = 1 + + sok = 0 + sins = 0 + supd = 0 + if "sub_products" in product_range: + for path, subr in product_range["sub_products"].items(): + if path in subids_in_db: + db_range = get_ranges(dc, dc_product, path, is_dc_product=True) + if ranges_equal(subr, db_range): + sok += 1 + else: + rng_update(conn, subr, dc_product, path) + supd += 1 + else: + rng_insert(conn, subr, dc_product, path) + sins += 1 + txn.commit() + conn.close() + + return (ok, upd, ins, sok, supd, sins) + + +def update_multi_range(dc, product, follow_dependencies=True): + assert product.multi_product + + if follow_dependencies: + for dc_product in product.products: + update_single_range(dc, dc_product) + + mp_ranges = None + for p in product.products: + mp_ranges = merge_ranges(mp_ranges, get_ranges(dc, p, is_dc_product=True)) + + + db_range = get_ranges(dc, product) + conn = get_sqlconn(dc) + txn = conn.begin() + + ok = 0 + ins = 0 + upd = 0 + if db_range: + if ranges_equal(mp_ranges, db_range): + print("Ranges equal, not updating") + ok = 1 + else: + rng_update(conn, mp_ranges, product) + print("Updating range") + upd = 1 + else: + rng_insert(conn, mp_ranges, product) + print("Inserting new range") + ins = 1 + + + txn.commit() + conn.close() + return (ok, upd, ins) + + +def update_all_ranges(dc): + i = 0 + u = 0 + p = 0 + si = 0 + su = 0 + sp = 0 + mi = 0 + mu = 0 + mp = 0 + + multiproducts = set() + + for prod in get_config().product_index.values(): + if prod.multi_product: + multiproducts.add(prod) + else: + stats = update_single_range(dc, prod) + p += stats[0] + u += stats[1] + i += stats[2] + sp += stats[3] + su += stats[4] + si += stats[5] + + for mprod in multiproducts: + stats = update_multi_range(dc, mprod, follow_dependencies=False) + mp += stats[0] + mu += stats[1] + mi += stats[2] + + return p, u, i, sp, su, si, mp, mu, mi + + +def merge_ranges(r1, r2): + if r1 is None: + return r2 + elif r2 is None: + return r1 + time_set = r1["time_set"] | r2["time_set"] + times = sorted(list(time_set)) + return { + "lat": { + "min": min(r1["lat"]["min"], r2["lat"]["min"]), + "max": max(r1["lat"]["max"], r2["lat"]["max"]), + }, + "lon": { + "min": min(r1["lon"]["min"], r2["lon"]["min"]), + "max": max(r1["lon"]["max"], r2["lon"]["max"]), + }, + "times": times, + "start_time": times[0], + "end_time": times[-1], + "time_set": time_set, + "bboxes": { + crs: { + "top": max(r1["bboxes"][crs]["top"], r2["bboxes"][crs]["top"]), + "bottom": min(r1["bboxes"][crs]["bottom"], r2["bboxes"][crs]["bottom"]), + "right": max(r1["bboxes"][crs]["right"], r2["bboxes"][crs]["right"]), + "left": min(r1["bboxes"][crs]["left"], r2["bboxes"][crs]["left"]), + } + for crs in r1["bboxes"].keys() + } + } + + +def get_sub_ranges(dc, product): + conn = get_sqlconn(dc) + results = conn.execute("select sub_product_id from wms.sub_product_ranges where product_id=%s", product.product.id) + return {r["sub_product_id"]: get_ranges(dc, product.product.id, r["sub_product_id"]) for r in results} + + +def create_multiprod_range_entry(dc, product, crses): + conn = get_sqlconn(dc) + txn = conn.begin() + if isinstance(product, dict): + prodids = [p.id for p in product["products"]] + wms_name = product["name"] + else: + prodids = [ p.id for p in product.products ] + wms_name = product.name + + # Attempt to insert row + conn.execute(""" + INSERT INTO wms.multiproduct_ranges + (wms_product_name,lat_min,lat_max,lon_min,lon_max,dates,bboxes) + VALUES + (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) + ON CONFLICT (wms_product_name) DO NOTHING + """, + {"p_id": wms_name, "empty": Json("")}) + + # Update extents + conn.execute(""" + UPDATE wms.multiproduct_ranges + SET (lat_min,lat_max,lon_min,lon_max) = + (wms_get_min(%(p_prodids)s, 'lat'), wms_get_max(%(p_prodids)s, 'lat'), wms_get_min(%(p_prodids)s, 'lon'), wms_get_max(%(p_prodids)s, 'lon')) + WHERE wms_product_name=%(p_id)s + """, + {"p_id": wms_name, "p_prodids": prodids}) + + # Create sorted list of dates + conn.execute(""" + WITH sorted + AS (SELECT to_jsonb(array_agg(dates.d)) + AS dates + FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD') + AS d + FROM agdc.dataset + WHERE dataset_type_ref = any (%(p_prodids)s) + AND archived IS NULL + ORDER BY d) dates) + UPDATE wms.multiproduct_ranges + SET dates=sorted.dates + FROM sorted + WHERE wms_product_name=%(p_id)s + """, + {"p_id": wms_name, "p_prodids": prodids}) + + # calculate bounding boxes + results = list(conn.execute(""" + SELECT lat_min,lat_max,lon_min,lon_max + FROM wms.multiproduct_ranges + WHERE wms_product_name=%(p_id)s + """, + {"p_id": wms_name} )) + + r = results[0] + + epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") + box = datacube.utils.geometry.box( + float(r[2]), + float(r[0]), + float(r[3]), + float(r[1]), + epsg4326) + + cfg = get_config() + conn.execute(""" + UPDATE wms.multiproduct_ranges + SET bboxes = %s::jsonb + WHERE wms_product_name=%s + """, + Json({ crsid: jsonise_bbox(box.to_crs(crs).boundingbox) for crsid, crs in get_crses(cfg).items() }), + wms_name + ) + + txn.commit() + conn.close() + return + + +def create_range_entry(dc, product, crses, summary_product=False): + conn = get_sqlconn(dc) + txn = conn.begin() + prodid = product.id + + # Attempt to insert row + conn.execute(""" + INSERT INTO wms.product_ranges + (id,lat_min,lat_max,lon_min,lon_max,dates,bboxes) + VALUES + (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) + ON CONFLICT (id) DO NOTHING + """, + {"p_id": prodid, "empty": Json("")}) + + # Update extents + conn.execute(""" + UPDATE wms.product_ranges + SET (lat_min,lat_max,lon_min,lon_max) = + (wms_get_min(%(p_idarr)s, 'lat'), wms_get_max(%(p_idarr)s, 'lat'), wms_get_min(%(p_idarr)s, 'lon'), wms_get_max(%(p_idarr)s, 'lon')) + WHERE id=%(p_id)s + """, + {"p_id": prodid, "p_idarr": [ prodid ]}) + + # Set default timezone + conn.execute(""" + set timezone to 'Etc/UTC' + """) + + if summary_product: + # Loop over dates + dates = set() + + for result in conn.execute(""" + SELECT DISTINCT cast(metadata -> 'extent' ->> 'from_dt' as date) as dt + FROM agdc.dataset + WHERE dataset_type_ref = %(p_id)s + AND archived IS NULL + ORDER BY dt + """, + {"p_id": prodid}): + dates.add(result[0]) + dates = sorted(dates) + + conn.execute(""" + UPDATE wms.product_ranges + SET dates = %(dates)s + WHERE id= %(p_id)s + """, + { + "dates": Json([t.strftime("%Y-%m-%d") for t in dates]), + "p_id": prodid + } + ) + else: + # Create sorted list of dates + conn.execute(""" + WITH sorted + AS (SELECT to_jsonb(array_agg(dates.d)) + AS dates + FROM (SELECT DISTINCT + date(cast(metadata -> 'extent' ->> 'center_dt' as timestamp) AT TIME ZONE 'UTC' + + (least(to_number(metadata -> 'extent' -> 'coord' -> 'll' ->> 'lon', '9999.9999999999999999999999999999999999'), + to_number(metadata -> 'extent' -> 'coord' -> 'ul' ->> 'lon', '9999.9999999999999999999999999999999999')) + + greatest(to_number(metadata -> 'extent' -> 'coord' -> 'lr' ->> 'lon', '9999.9999999999999999999999999999999999'), + to_number(metadata -> 'extent' -> 'coord' -> 'ur' ->> 'lon', '9999.9999999999999999999999999999999999'))) / 30.0 * interval '1 hour') + AS d + FROM agdc.dataset + WHERE dataset_type_ref=%(p_id)s + AND archived IS NULL + ORDER BY d) dates) + UPDATE wms.product_ranges + SET dates=sorted.dates + FROM sorted + WHERE id=%(p_id)s + """, + {"p_id": prodid}) + + # calculate bounding boxes + results = list(conn.execute(""" + SELECT lat_min,lat_max,lon_min,lon_max + FROM wms.product_ranges + WHERE id=%s + """, + prodid)) + + r = results[0] + + epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") + box = datacube.utils.geometry.box( + float(r[2]), + float(r[0]), + float(r[3]), + float(r[1]), + epsg4326) + + conn.execute(""" + UPDATE wms.product_ranges + SET bboxes = %s::jsonb + WHERE id=%s + """, + Json( + {crsid: {"top": box.to_crs(crs).boundingbox.top, + "bottom": box.to_crs(crs).boundingbox.bottom, + "left": box.to_crs(crs).boundingbox.left, + "right": box.to_crs(crs).boundingbox.right} + for crsid, crs in crses.items() + } + ), + product.id) + + txn.commit() + conn.close() + + +def check_datasets_exist(dc, product_name): + conn = get_sqlconn(dc) + + results = conn.execute(""" + SELECT COUNT(*) + FROM agdc.dataset ds, agdc.dataset_type p + WHERE ds.archived IS NULL + AND ds.dataset_type_ref = p.id + AND p.name = %s""", + product_name) + + conn.close() + + return list(results)[0][0] > 0 + + +def add_product_range(dc, product): + if isinstance(product, str): + product_name = product + dc_product = dc.index.products.get_by_name(product) + else: + product_name = product.name + dc_product = product + + ows_product = get_config().native_product_index.get(product_name) + if ows_product: + summary_product = not ows_product.is_raw_time_res + else: + summary_product = False + + assert dc_product is not None + + if check_datasets_exist(dc, product_name): + create_range_entry(dc, dc_product, get_crses(), summary_product) + else: + print("Could not find any datasets for: ", product_name) + + +def add_multiproduct_range(dc, product, follow_dependencies=True): + if isinstance(product, str): + product = get_config().product_index.get(product) + + assert product is not None + assert product.multi_product + + if follow_dependencies: + for product_name in product.product_names: + dc_prod = dc.index.products.get_by_name(product_name) + if not check_datasets_exist(dc, product_name): + print("Could not find any datasets for: ", product_name) + else: + add_product_range(dc, product_name) + + # Actually merge and store! + create_multiprod_range_entry(dc, product, get_crses()) + + +def add_all(dc): + multi_products = set() + for product_cfg in get_config().product_index.values(): + product_name = product_cfg.product_name + if product_cfg.multi_product: + multi_products.add(product_cfg) + else: + print("Adding range for:", product_name) + add_product_range(dc, product_name) + + for p in multi_products: + print("Adding multiproduct range for:", p.name) + add_multiproduct_range(dc, p, follow_dependencies=False) + diff --git a/datacube_ows/update_ranges.py b/datacube_ows/update_ranges.py index f791a4704..2ced637b6 100755 --- a/datacube_ows/update_ranges.py +++ b/datacube_ows/update_ranges.py @@ -1,90 +1,284 @@ #!/usr/bin/env python3 -from datacube_ows.product_ranges import update_all_ranges, add_product_range, add_multiproduct_range, add_all, update_range -from datacube_ows.utils import get_sqlconn +from datacube_ows.product_ranges import get_sqlconn, add_ranges from datacube import Datacube -from psycopg2.sql import SQL, Identifier +from psycopg2.sql import SQL +from datacube_ows.ows_configuration import get_config import os import click @click.command() +@click.option("--views", is_flag=True, default=False, help="Create (if called with the --schema option) or refresh the ODC spatio-temporal materialised views.") @click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") @click.option("--role", default=None, help="Role to grant database permissions to") -@click.option("--product", default=None, help="The name of a datacube product.") -@click.option("--multiproduct", default=None, help="The name of OWS multi-product." ) -@click.option("--merge-only/--no-merge-only", default=False, help="When used with the multiproduct and calculate-extent options, the ranges for underlying datacube products are not updated.") -@click.option("--calculate-extent/--no-calculate-extent", default=True, help="no-calculate-extent uses database queries to maximise efficiency. calculate-extent calculates ranges directly and is the default.") -def main(product, multiproduct, merge_only, calculate_extent, schema, role): +@click.option("--summary", is_flag=True, default=False, help="Treat any named ODC products with no corresponding configured OWS Layer as summary products" ) +@click.option("--merge-only/--no-merge-only", default=False, help="When used with a multiproduct layer, the ranges for underlying datacube products are not updated.") +@click.option("--product", default=None, help="Deprecated option provided for backwards compatibility") +@click.option("--multiproduct", default=None, help="Deprecated option provided for backwards compatibility." ) +@click.option("--calculate-extent/--no-calculate-extent", default=None, help="Has no effect any more. Provided for backwards compatibility only") +@click.argument("products", nargs=-1) +def main(products, + merge_only, summary, + schema, views, role, + product, multiproduct, calculate_extent): """Manage datacube-ows range tables. - A valid invocation should specify at most one of '--product', '--multiproduct' or '--schema'. - If neither of these options are specified, then the ranges for all products and multiproducts - are updated. + Valid invocations: + + * Some combination of the --views and --schema flags (and no PRODUCTS). + (Perform the specified database updates) + + * One of more OWS or ODC product names + (Update ranges for the specified PRODUCTS + + * No PRODUCTS (and neither the --views nor --schema flags) + (Update ranges for all configured OWS products. + + Uses the DATACUBE_OWS_CFG environment variable to find the OWS config file. """ - if product and multiproduct: - print("Sorry, you specified both a product and multiproduct. One at a time, please.") - return 1 - elif schema and (product or multiproduct): + # Handle old-style calls + if not products: + products = [] + if product: + print("********************************************************************************") + print("Warning: The product flag is deprecated and will be removed in a future release.") + print(" The correct way to make this call is now:") + print(" ") + print(" python3 update_ranges.py %s" % product) + print("********************************************************************************") + products.append(product) + if multiproduct: + print("********************************************************************************") + print("Warning: The product flag is deprecated and will be removed in a future release.") + print(" The correct way to make this call is now:") + print(" ") + if merge_only: + print(" python3 update_ranges.py --merge-only %s" % multiproduct) + else: + print(" python3 update_ranges.py %s" % multiproduct) + print("********************************************************************************") + products.append(multiproduct) + if calculate_extent is not None: + print("********************************************************************************") + print("Warning: The calculate-extent and no-calculate-extent flags no longer have ") + print(" any effect. They are kept only for backwards compatibility and will") + print(" be removed in a future release.") + print("********************************************************************************") + if schema and products: print("Sorry, cannot update the schema and ranges in the same invocation.") return 1 + elif views and products: + print("Sorry, cannot update the materialised views and ranges in the same invocation.") + return 1 elif schema and not role: print("Sorry, cannot update schema without specifying a role") return 1 + elif role and not schema: + print("Sorry, role only makes sense for updating the schema") + return 1 if os.environ.get("PYDEV_DEBUG"): import pydevd_pycharm pydevd_pycharm.settrace('172.17.0.1', port=12321, stdoutToServer=True, stderrToServer=True) - - dc = Datacube(app="wms_update_ranges") - if schema: - print("Checking schema....") - print("Creating or replacing WMS database schema...") - create_schema(dc, role) - print("Done") - elif not calculate_extent: - if product: - print("Updating range for: ", product) - add_product_range(dc, product) - elif multiproduct: - print("Updating range for: ", multiproduct) - add_multiproduct_range(dc, multiproduct) - else: - print("Updating range for all, using SQL extent calculation") - add_all(dc) + dc = Datacube(app="ows_update_ranges") + if schema or views: + if schema: + print("Checking schema....") + print("Creating or replacing WMS database schema...") + create_schema(dc, role) print("Done") - else: - if product: - print("Updating range for: ", product) - p, u, i, sp, su, si = update_range(dc, product, multi=False) - if u: - print("Ranges updated for", product) - elif i: - print("New ranges inserted for", product) - else: - print("Ranges up to date for", product) - if sp or su or si: - print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) - elif multiproduct: - print("Updating range for: ", multiproduct) - p, u, i = update_range(dc, multiproduct, multi=True, follow_dependencies=not merge_only) - if u: - print("Merged ranges updated for", multiproduct) - elif i: - print("Merged ranges inserted for", multiproduct) - else: - print("Merged ranges up to date for", multiproduct) - else: - print ("Updating ranges for all layers/products") - p, u, i, sp, su, si, mp, mu, mi = update_all_ranges(dc) - print ("Updated ranges for %d existing layers/products and inserted ranges for %d new layers/products (%d existing layers/products unchanged)" % (u, i, p)) - if sp or su or si: - print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) - if mp or mu or mi: - print ("Updated ranges for %d existing multi-products and inserted ranges for %d new multi-products (%d existing multi-products unchanged)" % (su, si, sp)) + if schema and views: + print("Creating or replacing materialised views...") + create_views(dc) + print("Done") + elif views: + print("Refreshing materialised views...") + refresh_views(dc) + print("Done") + return 0 + + print("Deriving extents from materialised views") + if not products: + products = list(get_config().product_index.keys()) + add_ranges(dc, products, summary, merge_only) return 0 +def create_views(dc): + commands = [ + ("Installing Postgis extensions on public schema", + "create extension if not exists postgis"), + ("Giving other schemas access to PostGIS functions installed in the public schema", + """ALTER DATABASE datacube + SET + search_path = public, + agdc + """), + ("Dropping already existing Materialized View Index 1/3", + "DROP INDEX IF EXISTS space_time_view_geom_idx"), + ("Dropping already existing Materialized View Index 2/3", + "DROP INDEX IF EXISTS space_time_view_time_idx"), + ("Dropping already existing Materialized View Index 3/3", + "DROP INDEX IF EXISTS space_time_view_ds_idx"), + ("Dropping already existing Materialized View 1/3", + "DROP MATERIALIZED VIEW IF EXISTS space_time_view"), + ("Dropping already existing Materialized View 2/3", + "DROP MATERIALIZED VIEW IF EXISTS time_view"), + ("Dropping already existing Materialized View 3/3", + "DROP MATERIALIZED VIEW IF EXISTS space_view"), + ("Setting default timezone to UTC", + "set timezone to 'Etc/UTC'"), + +# Handling different variants of metadata requires UNION with WHICH clauses per metadata type +# https://www.postgresql.org/docs/11/queries-union.html + +# Try all different locations for temporal extents and UNION them + ("Creating TIME Materialised View", + """ +CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (dataset_type_ref, ID, temporal_extent) +AS +with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +) +-- This is the eodataset variant of the temporal extent +select + dataset_type_ref, id,tstzrange( + (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp + interval '1 microsecond' + ) as temporal_extent +from agdc.dataset where + metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) +UNION +-- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton +-- timestamp, some other variants use start/end timestamps. From OWS perspective temporal +-- resolution is 1 whole day +select + dataset_type_ref, id,tstzrange( + (metadata->'properties'->>'datetime'):: timestamp, + (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' + ) as temporal_extent +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name='eo3') +UNION +-- Start/End timestamp variant product. +-- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml +select + dataset_type_ref, id,tstzrange( + (metadata->'properties'->>'dtr:start_datetime'):: timestamp, + (metadata->'properties'->>'dtr:end_datetime'):: timestamp + ) as temporal_extent +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) +"""), + # Spatial extents per dataset (to be created as a column of the space-time table) + # Try all different locations for spatial extents and UNION them + ("Creating SPACE Materialised View (Slowest step!)", +""" +CREATE MATERIALIZED VIEW IF NOT EXISTS space_view (ID, spatial_extent) +AS +with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +), +-- This is eo3 spatial (Uses CEMP INSAR as a sample product) +ranges as +(select id, + (metadata #> '{extent, lat, begin}') as lat_begin, + (metadata #> '{extent, lat, end}') as lat_end, + (metadata #> '{extent, lon, begin}') as lon_begin, + (metadata #> '{extent, lon, end}') as lon_end + from agdc.dataset where + metadata_type_ref in (select id from metadata_lookup where name='eo3') + ), +-- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) +corners as +(select id, + (metadata #> '{extent, coord, ll, lat}') as ll_lat, + (metadata #> '{extent, coord, ll, lon}') as ll_lon, + (metadata #> '{extent, coord, lr, lat}') as lr_lat, + (metadata #> '{extent, coord, lr, lon}') as lr_lon, + (metadata #> '{extent, coord, ul, lat}') as ul_lat, + (metadata #> '{extent, coord, ul, lon}') as ul_lon, + (metadata #> '{extent, coord, ur, lat}') as ur_lat, + (metadata #> '{extent, coord, ur, lon}') as ur_lon + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus'))) +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, + lon_begin, lat_end, lon_begin, lat_begin)::geometry +as spatial_extent +from ranges +UNION +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, + ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent +from corners +UNION +-- This is lansat_scene and landsat_l1_scene with geometries +select id, + ST_Transform( + ST_SetSRID( + ST_GeomFromGeoJSON( + metadata #>> '{geometry}'), + substr( + metadata #>> '{crs}',6)::integer + ), + 4326 + ) as spatial_extent + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) + """, True), +# Join the above queries for space and time as CTE's into a space-time view + + ("Creating combined SPACE-TIME Materialised View", + """ +CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, dataset_type_ref, spatial_extent, temporal_extent) +AS +select space_view.id, dataset_type_ref, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id + """), + +# Spatial extents are indexed using GIST index for BBOX queries +# https://postgis.net/workshops/postgis-intro/indexing.html + ("Creating Materialised View Index 1/3", """ +CREATE INDEX space_time_view_geom_idx + ON space_time_view + USING GIST (spatial_extent) + """), + +# Time range types can carray indexes for range lookup +# https://www.postgresql.org/docs/11/rangetypes.html#RANGETYPES-INDEXING + ("Creating Materialised View Index 2/3", """ + CREATE INDEX space_time_view_time_idx + ON space_time_view + USING SPGIST (temporal_extent) + """), + +# Create standard btree index over dataset_type_ref to ease searching by +# https://ieftimov.com/post/postgresql-indexes-btree/ + ("Creating Materialised View Index 3/3", """ + CREATE INDEX space_time_view_ds_idx + ON space_time_view + USING BTREE(dataset_type_ref) + """), + + ] + run_sql(dc, commands) + + +def refresh_views(dc): + commands = [ + ("Refreshing TIME materialized view", + "REFRESH MATERIALIZED VIEW time_view" + ), + ("Refreshing SPACE materialized view", + "REFRESH MATERIALIZED VIEW space_view" + ), + ("Refreshing combined SPACE-TIME materialized view", + "REFRESH MATERIALIZED VIEW CONCURRENTLY space_time_view" + ), + ] + run_sql(dc, commands) + + def create_schema(dc, role): commands = [ ("Creating/replacing wms schema", "create schema if not exists wms"), @@ -161,20 +355,35 @@ def create_schema(dc, role): END; $$ LANGUAGE plpgsql; """), + ("""Granting usage on schema""", + "GRANT USAGE ON SCHEMA wms TO %s" % role + ) ] + run_sql(dc, commands) +def run_sql(dc, commands): conn = get_sqlconn(dc) - for msg, sql in commands: + for cmd_blob in commands: + if len(cmd_blob) == 2: + msg, sql = cmd_blob + override = False + else: + msg, sql, override = cmd_blob print(msg) - conn.execute(sql) + if override: + q = SQL(sql) + with conn.connection.cursor() as psycopg2connection: + psycopg2connection.execute(q) + else: + conn.execute(sql) # Add user based on param # use psycopg2 directly to get proper psql # quoting on the role name identifier - print("Granting usage on schema") - q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) - with conn.connection.cursor() as psycopg2connection: - psycopg2connection.execute(q) + # print("Granting usage on schema") + # q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) + # with conn.connection.cursor() as psycopg2connection: + # psycopg2connection.execute(q) conn.close() return diff --git a/datacube_ows/update_ranges_2.py b/datacube_ows/update_ranges_2.py deleted file mode 100755 index 5182f37b5..000000000 --- a/datacube_ows/update_ranges_2.py +++ /dev/null @@ -1,361 +0,0 @@ -#!/usr/bin/env python3 - -from datacube_ows.product_ranges_2 import get_sqlconn, add_ranges -from datacube import Datacube -from psycopg2.sql import SQL -from datacube_ows.ows_configuration import get_config -import os -import click - -@click.command() -@click.option("--views", is_flag=True, default=False, help="Create (if called with the --schema option) or refresh the ODC spatio-temporal materialised views.") -@click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") -@click.option("--role", default=None, help="Role to grant database permissions to") -@click.option("--summary", is_flag=True, default=False, help="Treat any named ODC products with no corresponding configured OWS Layer as summary products" ) -@click.option("--merge-only/--no-merge-only", default=False, help="When used with a multiproduct layer, the ranges for underlying datacube products are not updated.") -@click.argument("products", nargs=-1) -def main(products, merge_only, summary, schema, views, role): - """Manage datacube-ows range tables. - - Valid invocations: - - * Some combination of the --views and --schema flags (and no PRODUCTS). - (Perform the specified database updates) - - * One of more OWS or ODC product names - (Update ranges for the specified PRODUCTS - - * No PRODUCTS (and neither the --views nor --schema flags) - (Update ranges for all configured OWS products. - - Uses the DATACUBE_OWS_CFG environment variable to find the OWS config file. - """ - if schema and products: - print("Sorry, cannot update the schema and ranges in the same invocation.") - return 1 - elif views and products: - print("Sorry, cannot update the materialised views and ranges in the same invocation.") - return 1 - elif schema and not role: - print("Sorry, cannot update schema without specifying a role") - return 1 - elif role and not schema: - print("Sorry, role only makes sense for updating the schema") - return 1 - - if os.environ.get("PYDEV_DEBUG"): - import pydevd_pycharm - pydevd_pycharm.settrace('172.17.0.1', port=12321, stdoutToServer=True, stderrToServer=True) - - dc = Datacube(app="ows_update_ranges") - if schema or views: - if schema: - print("Checking schema....") - print("Creating or replacing WMS database schema...") - create_schema(dc, role) - print("Done") - if schema and views: - print("Creating or replacing materialised views...") - create_views(dc) - print("Done") - elif views: - print("Refreshing materialised views...") - refresh_views(dc) - print("Done") - return 0 - - print("Deriving extents from materialised views") - if not products: - products = list(get_config().product_index.keys()) - add_ranges(dc, products, summary, merge_only) - return 0 - - -def create_views(dc): - commands = [ - ("Installing Postgis extensions on public schema", - "create extension if not exists postgis"), - ("Giving other schemas access to PostGIS functions installed in the public schema", - """ALTER DATABASE datacube - SET - search_path = public, - agdc - """), - ("Dropping already existing Materialized View Index 1/3", - "DROP INDEX IF EXISTS space_time_view_geom_idx"), - ("Dropping already existing Materialized View Index 2/3", - "DROP INDEX IF EXISTS space_time_view_time_idx"), - ("Dropping already existing Materialized View Index 3/3", - "DROP INDEX IF EXISTS space_time_view_ds_idx"), - ("Dropping already existing Materialized View 1/3", - "DROP MATERIALIZED VIEW IF EXISTS space_time_view"), - ("Dropping already existing Materialized View 2/3", - "DROP MATERIALIZED VIEW IF EXISTS time_view"), - ("Dropping already existing Materialized View 3/3", - "DROP MATERIALIZED VIEW IF EXISTS space_view"), - ("Setting default timezone to UTC", - "set timezone to 'Etc/UTC'"), - -# Handling different variants of metadata requires UNION with WHICH clauses per metadata type -# https://www.postgresql.org/docs/11/queries-union.html - -# Try all different locations for temporal extents and UNION them - ("Creating TIME Materialised View", - """ -CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (dataset_type_ref, ID, temporal_extent) -AS -with --- Crib metadata to use as for string matching various types -metadata_lookup as ( - select id,name from agdc.metadata_type -) --- This is the eodataset variant of the temporal extent -select - dataset_type_ref, id,tstzrange( - (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp + interval '1 microsecond' - ) as temporal_extent -from agdc.dataset where - metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) -UNION --- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton --- timestamp, some other variants use start/end timestamps. From OWS perspective temporal --- resolution is 1 whole day -select - dataset_type_ref, id,tstzrange( - (metadata->'properties'->>'datetime'):: timestamp, - (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' - ) as temporal_extent -from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name='eo3') -UNION --- Start/End timestamp variant product. --- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml -select - dataset_type_ref, id,tstzrange( - (metadata->'properties'->>'dtr:start_datetime'):: timestamp, - (metadata->'properties'->>'dtr:end_datetime'):: timestamp - ) as temporal_extent -from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) -"""), - # Spatial extents per dataset (to be created as a column of the space-time table) - # Try all different locations for spatial extents and UNION them - ("Creating SPACE Materialised View (Slowest step!)", -""" -CREATE MATERIALIZED VIEW IF NOT EXISTS space_view (ID, spatial_extent) -AS -with --- Crib metadata to use as for string matching various types -metadata_lookup as ( - select id,name from agdc.metadata_type -), --- This is eo3 spatial (Uses CEMP INSAR as a sample product) -ranges as -(select id, - (metadata #> '{extent, lat, begin}') as lat_begin, - (metadata #> '{extent, lat, end}') as lat_end, - (metadata #> '{extent, lon, begin}') as lon_begin, - (metadata #> '{extent, lon, end}') as lon_end - from agdc.dataset where - metadata_type_ref in (select id from metadata_lookup where name='eo3') - ), --- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) -corners as -(select id, - (metadata #> '{extent, coord, ll, lat}') as ll_lat, - (metadata #> '{extent, coord, ll, lon}') as ll_lon, - (metadata #> '{extent, coord, lr, lat}') as lr_lat, - (metadata #> '{extent, coord, lr, lon}') as lr_lon, - (metadata #> '{extent, coord, ul, lat}') as ul_lat, - (metadata #> '{extent, coord, ul, lon}') as ul_lon, - (metadata #> '{extent, coord, ur, lat}') as ur_lat, - (metadata #> '{extent, coord, ur, lon}') as ur_lon - from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus'))) -select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', - lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, - lon_begin, lat_end, lon_begin, lat_begin)::geometry -as spatial_extent -from ranges -UNION -select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', - ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, - ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent -from corners -UNION --- This is lansat_scene and landsat_l1_scene with geometries -select id, - ST_Transform( - ST_SetSRID( - ST_GeomFromGeoJSON( - metadata #>> '{geometry}'), - substr( - metadata #>> '{crs}',6)::integer - ), - 4326 - ) as spatial_extent - from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) - """, True), -# Join the above queries for space and time as CTE's into a space-time view - - ("Creating combined SPACE-TIME Materialised View", - """ -CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, dataset_type_ref, spatial_extent, temporal_extent) -AS -select space_view.id, dataset_type_ref, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id - """), - -# Spatial extents are indexed using GIST index for BBOX queries -# https://postgis.net/workshops/postgis-intro/indexing.html - ("Creating Materialised View Index 1/3", """ -CREATE INDEX space_time_view_geom_idx - ON space_time_view - USING GIST (spatial_extent) - """), - -# Time range types can carray indexes for range lookup -# https://www.postgresql.org/docs/11/rangetypes.html#RANGETYPES-INDEXING - ("Creating Materialised View Index 2/3", """ - CREATE INDEX space_time_view_time_idx - ON space_time_view - USING SPGIST (temporal_extent) - """), - -# Create standard btree index over dataset_type_ref to ease searching by -# https://ieftimov.com/post/postgresql-indexes-btree/ - ("Creating Materialised View Index 3/3", """ - CREATE INDEX space_time_view_ds_idx - ON space_time_view - USING BTREE(dataset_type_ref) - """), - - ] - run_sql(dc, commands) - - -def refresh_views(dc): - commands = [ - ("Refreshing TIME materialized view", - "REFRESH MATERIALIZED VIEW time_view" - ), - ("Refreshing SPACE materialized view", - "REFRESH MATERIALIZED VIEW space_view" - ), - ("Refreshing combined SPACE-TIME materialized view", - "REFRESH MATERIALIZED VIEW CONCURRENTLY space_time_view" - ), - ] - run_sql(dc, commands) - - -def create_schema(dc, role): - commands = [ - ("Creating/replacing wms schema", "create schema if not exists wms"), - - ("Creating/replacing product ranges table", """ - create table if not exists wms.product_ranges ( - id smallint not null primary key references agdc.dataset_type (id), - - lat_min decimal not null, - lat_max decimal not null, - lon_min decimal not null, - lon_max decimal not null, - - dates jsonb not null, - - bboxes jsonb not null) - """), - ("Creating/replacing sub-product ranges table", """ - create table if not exists wms.sub_product_ranges ( - product_id smallint not null references agdc.dataset_type (id), - sub_product_id smallint not null, - lat_min decimal not null, - lat_max decimal not null, - lon_min decimal not null, - lon_max decimal not null, - dates jsonb not null, - bboxes jsonb not null, - constraint pk_sub_product_ranges primary key (product_id, sub_product_id) ) - """), - ("Creating/replacing multi-product ranges table", """ - create table if not exists wms.multiproduct_ranges ( - wms_product_name varchar(128) not null primary key, - lat_min decimal not null, - lat_max decimal not null, - lon_min decimal not null, - lon_max decimal not null, - dates jsonb not null, - bboxes jsonb not null) - """), - # Functions - ("Creating/replacing wms_get_min() function", """ - CREATE OR REPLACE FUNCTION wms_get_min(integer[], text) RETURNS numeric AS $$ - DECLARE - ret numeric; - ul text[] DEFAULT array_append('{extent, coord, ul}', $2); - ur text[] DEFAULT array_append('{extent, coord, ur}', $2); - ll text[] DEFAULT array_append('{extent, coord, ll}', $2); - lr text[] DEFAULT array_append('{extent, coord, lr}', $2); - BEGIN - WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = any($1) AND archived IS NULL ) - SELECT MIN(LEAST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, - (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) - INTO ret - FROM m; - RETURN ret; - END; - $$ LANGUAGE plpgsql; - """), - ("Creating/replacing wms_get_max() function", """ - CREATE OR REPLACE FUNCTION wms_get_max(integer[], text) RETURNS numeric AS $$ - DECLARE - ret numeric; - ul text[] DEFAULT array_append('{extent, coord, ul}', $2); - ur text[] DEFAULT array_append('{extent, coord, ur}', $2); - ll text[] DEFAULT array_append('{extent, coord, ll}', $2); - lr text[] DEFAULT array_append('{extent, coord, lr}', $2); - BEGIN - WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = ANY ($1) AND archived IS NULL ) - SELECT MAX(GREATEST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, - (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) - INTO ret - FROM m; - RETURN ret; - END; - $$ LANGUAGE plpgsql; - """), - ("""Granting usage on schema""", - "GRANT USAGE ON SCHEMA wms TO %s" % role - ) - ] - run_sql(dc, commands) - -def run_sql(dc, commands): - conn = get_sqlconn(dc) - for cmd_blob in commands: - if len(cmd_blob) == 2: - msg, sql = cmd_blob - override = False - else: - msg, sql, override = cmd_blob - print(msg) - if override: - q = SQL(sql) - with conn.connection.cursor() as psycopg2connection: - psycopg2connection.execute(q) - else: - conn.execute(sql) - - # Add user based on param - # use psycopg2 directly to get proper psql - # quoting on the role name identifier - # print("Granting usage on schema") - # q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) - # with conn.connection.cursor() as psycopg2connection: - # psycopg2connection.execute(q) - conn.close() - - return - - -if __name__ == '__main__': - main() - - diff --git a/datacube_ows/update_ranges_old.py b/datacube_ows/update_ranges_old.py new file mode 100755 index 000000000..b25e7dde5 --- /dev/null +++ b/datacube_ows/update_ranges_old.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 + +from datacube_ows.product_ranges_old import update_all_ranges, add_product_range, add_multiproduct_range, add_all, update_range +from datacube_ows.utils import get_sqlconn +from datacube import Datacube +from psycopg2.sql import SQL, Identifier +import os +import click + +@click.command() +@click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") +@click.option("--role", default=None, help="Role to grant database permissions to") +@click.option("--product", default=None, help="The name of a datacube product.") +@click.option("--multiproduct", default=None, help="The name of OWS multi-product." ) +@click.option("--merge-only/--no-merge-only", default=False, help="When used with the multiproduct and calculate-extent options, the ranges for underlying datacube products are not updated.") +@click.option("--calculate-extent/--no-calculate-extent", default=True, help="no-calculate-extent uses database queries to maximise efficiency. calculate-extent calculates ranges directly and is the default.") +def main(product, multiproduct, merge_only, calculate_extent, schema, role): + """Manage datacube-ows range tables. + + A valid invocation should specify at most one of '--product', '--multiproduct' or '--schema'. + If neither of these options are specified, then the ranges for all products and multiproducts + are updated. + """ + if product and multiproduct: + print("Sorry, you specified both a product and multiproduct. One at a time, please.") + return 1 + elif schema and (product or multiproduct): + print("Sorry, cannot update the schema and ranges in the same invocation.") + return 1 + elif schema and not role: + print("Sorry, cannot update schema without specifying a role") + return 1 + + if os.environ.get("PYDEV_DEBUG"): + import pydevd_pycharm + pydevd_pycharm.settrace('172.17.0.1', port=12321, stdoutToServer=True, stderrToServer=True) + + + dc = Datacube(app="wms_update_ranges") + if schema: + print("Checking schema....") + print("Creating or replacing WMS database schema...") + create_schema(dc, role) + print("Done") + elif not calculate_extent: + if product: + print("Updating range for: ", product) + add_product_range(dc, product) + elif multiproduct: + print("Updating range for: ", multiproduct) + add_multiproduct_range(dc, multiproduct) + else: + print("Updating range for all, using SQL extent calculation") + add_all(dc) + print("Done") + else: + if product: + print("Updating range for: ", product) + p, u, i, sp, su, si = update_range(dc, product, multi=False) + if u: + print("Ranges updated for", product) + elif i: + print("New ranges inserted for", product) + else: + print("Ranges up to date for", product) + if sp or su or si: + print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) + elif multiproduct: + print("Updating range for: ", multiproduct) + p, u, i = update_range(dc, multiproduct, multi=True, follow_dependencies=not merge_only) + if u: + print("Merged ranges updated for", multiproduct) + elif i: + print("Merged ranges inserted for", multiproduct) + else: + print("Merged ranges up to date for", multiproduct) + else: + print ("Updating ranges for all layers/products") + p, u, i, sp, su, si, mp, mu, mi = update_all_ranges(dc) + print ("Updated ranges for %d existing layers/products and inserted ranges for %d new layers/products (%d existing layers/products unchanged)" % (u, i, p)) + if sp or su or si: + print ("Updated ranges for %d existing sub-products and inserted ranges for %d new sub-products (%d existing sub-products unchanged)" % (su, si, sp)) + if mp or mu or mi: + print ("Updated ranges for %d existing multi-products and inserted ranges for %d new multi-products (%d existing multi-products unchanged)" % (su, si, sp)) + return 0 + + +def create_schema(dc, role): + commands = [ + ("Creating/replacing wms schema", "create schema if not exists wms"), + + ("Creating/replacing product ranges table", """ + create table if not exists wms.product_ranges ( + id smallint not null primary key references agdc.dataset_type (id), + + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + + dates jsonb not null, + + bboxes jsonb not null) + """), + ("Creating/replacing sub-product ranges table", """ + create table if not exists wms.sub_product_ranges ( + product_id smallint not null references agdc.dataset_type (id), + sub_product_id smallint not null, + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + dates jsonb not null, + bboxes jsonb not null, + constraint pk_sub_product_ranges primary key (product_id, sub_product_id) ) + """), + ("Creating/replacing multi-product ranges table", """ + create table if not exists wms.multiproduct_ranges ( + wms_product_name varchar(128) not null primary key, + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + dates jsonb not null, + bboxes jsonb not null) + """), + # Functions + ("Creating/replacing wms_get_min() function", """ + CREATE OR REPLACE FUNCTION wms_get_min(integer[], text) RETURNS numeric AS $$ + DECLARE + ret numeric; + ul text[] DEFAULT array_append('{extent, coord, ul}', $2); + ur text[] DEFAULT array_append('{extent, coord, ur}', $2); + ll text[] DEFAULT array_append('{extent, coord, ll}', $2); + lr text[] DEFAULT array_append('{extent, coord, lr}', $2); + BEGIN + WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = any($1) AND archived IS NULL ) + SELECT MIN(LEAST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, + (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) + INTO ret + FROM m; + RETURN ret; + END; + $$ LANGUAGE plpgsql; + """), + ("Creating/replacing wms_get_max() function", """ + CREATE OR REPLACE FUNCTION wms_get_max(integer[], text) RETURNS numeric AS $$ + DECLARE + ret numeric; + ul text[] DEFAULT array_append('{extent, coord, ul}', $2); + ur text[] DEFAULT array_append('{extent, coord, ur}', $2); + ll text[] DEFAULT array_append('{extent, coord, ll}', $2); + lr text[] DEFAULT array_append('{extent, coord, lr}', $2); + BEGIN + WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = ANY ($1) AND archived IS NULL ) + SELECT MAX(GREATEST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, + (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) + INTO ret + FROM m; + RETURN ret; + END; + $$ LANGUAGE plpgsql; + """), + ] + + conn = get_sqlconn(dc) + for msg, sql in commands: + print(msg) + conn.execute(sql) + + # Add user based on param + # use psycopg2 directly to get proper psql + # quoting on the role name identifier + print("Granting usage on schema") + q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) + with conn.connection.cursor() as psycopg2connection: + psycopg2connection.execute(q) + conn.close() + + return + + +if __name__ == '__main__': + main() + + diff --git a/setup.py b/setup.py index 65f4830c8..e86d660a7 100644 --- a/setup.py +++ b/setup.py @@ -50,8 +50,8 @@ entry_points={ 'console_scripts': [ 'datacube-ows=datacube_ows.wsgi:main', - 'datacube-ows-update=datacube_ows.update_ranges:main', - 'datacube-ows-update-2=datacube_ows.update_ranges_2:main' + 'datacube-ows-update-old=datacube_ows.update_ranges_old:main', + 'datacube-ows-update=datacube_ows.update_ranges:main' ] }, packages=find_packages(), diff --git a/update_ranges_2.py b/update_ranges_2.py deleted file mode 100644 index cf68645f4..000000000 --- a/update_ranges_2.py +++ /dev/null @@ -1,4 +0,0 @@ -from datacube_ows.update_ranges_2 import main - -if __name__ == '__main__': - main() diff --git a/update_ranges_old.py b/update_ranges_old.py new file mode 100644 index 000000000..76599a94b --- /dev/null +++ b/update_ranges_old.py @@ -0,0 +1,4 @@ +from datacube_ows.update_ranges_old import main + +if __name__ == '__main__': + main() From 32ad6d848c7ede0d16fb1720c988db0d2c369847 Mon Sep 17 00:00:00 2001 From: phaesler Date: Fri, 1 May 2020 11:15:06 +1000 Subject: [PATCH 26/30] The update_ranges.py is now the new mv method with the old method available as update_ranges_old.py. update_ranges now has backwards compatible option handling (with warnings). --- datacube_ows/update_ranges.py | 395 ---------------------------------- 1 file changed, 395 deletions(-) delete mode 100755 datacube_ows/update_ranges.py diff --git a/datacube_ows/update_ranges.py b/datacube_ows/update_ranges.py deleted file mode 100755 index 2ced637b6..000000000 --- a/datacube_ows/update_ranges.py +++ /dev/null @@ -1,395 +0,0 @@ -#!/usr/bin/env python3 - -from datacube_ows.product_ranges import get_sqlconn, add_ranges -from datacube import Datacube -from psycopg2.sql import SQL -from datacube_ows.ows_configuration import get_config -import os -import click - -@click.command() -@click.option("--views", is_flag=True, default=False, help="Create (if called with the --schema option) or refresh the ODC spatio-temporal materialised views.") -@click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") -@click.option("--role", default=None, help="Role to grant database permissions to") -@click.option("--summary", is_flag=True, default=False, help="Treat any named ODC products with no corresponding configured OWS Layer as summary products" ) -@click.option("--merge-only/--no-merge-only", default=False, help="When used with a multiproduct layer, the ranges for underlying datacube products are not updated.") -@click.option("--product", default=None, help="Deprecated option provided for backwards compatibility") -@click.option("--multiproduct", default=None, help="Deprecated option provided for backwards compatibility." ) -@click.option("--calculate-extent/--no-calculate-extent", default=None, help="Has no effect any more. Provided for backwards compatibility only") -@click.argument("products", nargs=-1) -def main(products, - merge_only, summary, - schema, views, role, - product, multiproduct, calculate_extent): - """Manage datacube-ows range tables. - - Valid invocations: - - * Some combination of the --views and --schema flags (and no PRODUCTS). - (Perform the specified database updates) - - * One of more OWS or ODC product names - (Update ranges for the specified PRODUCTS - - * No PRODUCTS (and neither the --views nor --schema flags) - (Update ranges for all configured OWS products. - - Uses the DATACUBE_OWS_CFG environment variable to find the OWS config file. - """ - # Handle old-style calls - if not products: - products = [] - if product: - print("********************************************************************************") - print("Warning: The product flag is deprecated and will be removed in a future release.") - print(" The correct way to make this call is now:") - print(" ") - print(" python3 update_ranges.py %s" % product) - print("********************************************************************************") - products.append(product) - if multiproduct: - print("********************************************************************************") - print("Warning: The product flag is deprecated and will be removed in a future release.") - print(" The correct way to make this call is now:") - print(" ") - if merge_only: - print(" python3 update_ranges.py --merge-only %s" % multiproduct) - else: - print(" python3 update_ranges.py %s" % multiproduct) - print("********************************************************************************") - products.append(multiproduct) - if calculate_extent is not None: - print("********************************************************************************") - print("Warning: The calculate-extent and no-calculate-extent flags no longer have ") - print(" any effect. They are kept only for backwards compatibility and will") - print(" be removed in a future release.") - print("********************************************************************************") - if schema and products: - print("Sorry, cannot update the schema and ranges in the same invocation.") - return 1 - elif views and products: - print("Sorry, cannot update the materialised views and ranges in the same invocation.") - return 1 - elif schema and not role: - print("Sorry, cannot update schema without specifying a role") - return 1 - elif role and not schema: - print("Sorry, role only makes sense for updating the schema") - return 1 - - if os.environ.get("PYDEV_DEBUG"): - import pydevd_pycharm - pydevd_pycharm.settrace('172.17.0.1', port=12321, stdoutToServer=True, stderrToServer=True) - - dc = Datacube(app="ows_update_ranges") - if schema or views: - if schema: - print("Checking schema....") - print("Creating or replacing WMS database schema...") - create_schema(dc, role) - print("Done") - if schema and views: - print("Creating or replacing materialised views...") - create_views(dc) - print("Done") - elif views: - print("Refreshing materialised views...") - refresh_views(dc) - print("Done") - return 0 - - print("Deriving extents from materialised views") - if not products: - products = list(get_config().product_index.keys()) - add_ranges(dc, products, summary, merge_only) - return 0 - - -def create_views(dc): - commands = [ - ("Installing Postgis extensions on public schema", - "create extension if not exists postgis"), - ("Giving other schemas access to PostGIS functions installed in the public schema", - """ALTER DATABASE datacube - SET - search_path = public, - agdc - """), - ("Dropping already existing Materialized View Index 1/3", - "DROP INDEX IF EXISTS space_time_view_geom_idx"), - ("Dropping already existing Materialized View Index 2/3", - "DROP INDEX IF EXISTS space_time_view_time_idx"), - ("Dropping already existing Materialized View Index 3/3", - "DROP INDEX IF EXISTS space_time_view_ds_idx"), - ("Dropping already existing Materialized View 1/3", - "DROP MATERIALIZED VIEW IF EXISTS space_time_view"), - ("Dropping already existing Materialized View 2/3", - "DROP MATERIALIZED VIEW IF EXISTS time_view"), - ("Dropping already existing Materialized View 3/3", - "DROP MATERIALIZED VIEW IF EXISTS space_view"), - ("Setting default timezone to UTC", - "set timezone to 'Etc/UTC'"), - -# Handling different variants of metadata requires UNION with WHICH clauses per metadata type -# https://www.postgresql.org/docs/11/queries-union.html - -# Try all different locations for temporal extents and UNION them - ("Creating TIME Materialised View", - """ -CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (dataset_type_ref, ID, temporal_extent) -AS -with --- Crib metadata to use as for string matching various types -metadata_lookup as ( - select id,name from agdc.metadata_type -) --- This is the eodataset variant of the temporal extent -select - dataset_type_ref, id,tstzrange( - (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp + interval '1 microsecond' - ) as temporal_extent -from agdc.dataset where - metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) -UNION --- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton --- timestamp, some other variants use start/end timestamps. From OWS perspective temporal --- resolution is 1 whole day -select - dataset_type_ref, id,tstzrange( - (metadata->'properties'->>'datetime'):: timestamp, - (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' - ) as temporal_extent -from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name='eo3') -UNION --- Start/End timestamp variant product. --- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml -select - dataset_type_ref, id,tstzrange( - (metadata->'properties'->>'dtr:start_datetime'):: timestamp, - (metadata->'properties'->>'dtr:end_datetime'):: timestamp - ) as temporal_extent -from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) -"""), - # Spatial extents per dataset (to be created as a column of the space-time table) - # Try all different locations for spatial extents and UNION them - ("Creating SPACE Materialised View (Slowest step!)", -""" -CREATE MATERIALIZED VIEW IF NOT EXISTS space_view (ID, spatial_extent) -AS -with --- Crib metadata to use as for string matching various types -metadata_lookup as ( - select id,name from agdc.metadata_type -), --- This is eo3 spatial (Uses CEMP INSAR as a sample product) -ranges as -(select id, - (metadata #> '{extent, lat, begin}') as lat_begin, - (metadata #> '{extent, lat, end}') as lat_end, - (metadata #> '{extent, lon, begin}') as lon_begin, - (metadata #> '{extent, lon, end}') as lon_end - from agdc.dataset where - metadata_type_ref in (select id from metadata_lookup where name='eo3') - ), --- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) -corners as -(select id, - (metadata #> '{extent, coord, ll, lat}') as ll_lat, - (metadata #> '{extent, coord, ll, lon}') as ll_lon, - (metadata #> '{extent, coord, lr, lat}') as lr_lat, - (metadata #> '{extent, coord, lr, lon}') as lr_lon, - (metadata #> '{extent, coord, ul, lat}') as ul_lat, - (metadata #> '{extent, coord, ul, lon}') as ul_lon, - (metadata #> '{extent, coord, ur, lat}') as ur_lat, - (metadata #> '{extent, coord, ur, lon}') as ur_lon - from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus'))) -select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', - lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, - lon_begin, lat_end, lon_begin, lat_begin)::geometry -as spatial_extent -from ranges -UNION -select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', - ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, - ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent -from corners -UNION --- This is lansat_scene and landsat_l1_scene with geometries -select id, - ST_Transform( - ST_SetSRID( - ST_GeomFromGeoJSON( - metadata #>> '{geometry}'), - substr( - metadata #>> '{crs}',6)::integer - ), - 4326 - ) as spatial_extent - from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) - """, True), -# Join the above queries for space and time as CTE's into a space-time view - - ("Creating combined SPACE-TIME Materialised View", - """ -CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, dataset_type_ref, spatial_extent, temporal_extent) -AS -select space_view.id, dataset_type_ref, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id - """), - -# Spatial extents are indexed using GIST index for BBOX queries -# https://postgis.net/workshops/postgis-intro/indexing.html - ("Creating Materialised View Index 1/3", """ -CREATE INDEX space_time_view_geom_idx - ON space_time_view - USING GIST (spatial_extent) - """), - -# Time range types can carray indexes for range lookup -# https://www.postgresql.org/docs/11/rangetypes.html#RANGETYPES-INDEXING - ("Creating Materialised View Index 2/3", """ - CREATE INDEX space_time_view_time_idx - ON space_time_view - USING SPGIST (temporal_extent) - """), - -# Create standard btree index over dataset_type_ref to ease searching by -# https://ieftimov.com/post/postgresql-indexes-btree/ - ("Creating Materialised View Index 3/3", """ - CREATE INDEX space_time_view_ds_idx - ON space_time_view - USING BTREE(dataset_type_ref) - """), - - ] - run_sql(dc, commands) - - -def refresh_views(dc): - commands = [ - ("Refreshing TIME materialized view", - "REFRESH MATERIALIZED VIEW time_view" - ), - ("Refreshing SPACE materialized view", - "REFRESH MATERIALIZED VIEW space_view" - ), - ("Refreshing combined SPACE-TIME materialized view", - "REFRESH MATERIALIZED VIEW CONCURRENTLY space_time_view" - ), - ] - run_sql(dc, commands) - - -def create_schema(dc, role): - commands = [ - ("Creating/replacing wms schema", "create schema if not exists wms"), - - ("Creating/replacing product ranges table", """ - create table if not exists wms.product_ranges ( - id smallint not null primary key references agdc.dataset_type (id), - - lat_min decimal not null, - lat_max decimal not null, - lon_min decimal not null, - lon_max decimal not null, - - dates jsonb not null, - - bboxes jsonb not null) - """), - ("Creating/replacing sub-product ranges table", """ - create table if not exists wms.sub_product_ranges ( - product_id smallint not null references agdc.dataset_type (id), - sub_product_id smallint not null, - lat_min decimal not null, - lat_max decimal not null, - lon_min decimal not null, - lon_max decimal not null, - dates jsonb not null, - bboxes jsonb not null, - constraint pk_sub_product_ranges primary key (product_id, sub_product_id) ) - """), - ("Creating/replacing multi-product ranges table", """ - create table if not exists wms.multiproduct_ranges ( - wms_product_name varchar(128) not null primary key, - lat_min decimal not null, - lat_max decimal not null, - lon_min decimal not null, - lon_max decimal not null, - dates jsonb not null, - bboxes jsonb not null) - """), - # Functions - ("Creating/replacing wms_get_min() function", """ - CREATE OR REPLACE FUNCTION wms_get_min(integer[], text) RETURNS numeric AS $$ - DECLARE - ret numeric; - ul text[] DEFAULT array_append('{extent, coord, ul}', $2); - ur text[] DEFAULT array_append('{extent, coord, ur}', $2); - ll text[] DEFAULT array_append('{extent, coord, ll}', $2); - lr text[] DEFAULT array_append('{extent, coord, lr}', $2); - BEGIN - WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = any($1) AND archived IS NULL ) - SELECT MIN(LEAST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, - (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) - INTO ret - FROM m; - RETURN ret; - END; - $$ LANGUAGE plpgsql; - """), - ("Creating/replacing wms_get_max() function", """ - CREATE OR REPLACE FUNCTION wms_get_max(integer[], text) RETURNS numeric AS $$ - DECLARE - ret numeric; - ul text[] DEFAULT array_append('{extent, coord, ul}', $2); - ur text[] DEFAULT array_append('{extent, coord, ur}', $2); - ll text[] DEFAULT array_append('{extent, coord, ll}', $2); - lr text[] DEFAULT array_append('{extent, coord, lr}', $2); - BEGIN - WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = ANY ($1) AND archived IS NULL ) - SELECT MAX(GREATEST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, - (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) - INTO ret - FROM m; - RETURN ret; - END; - $$ LANGUAGE plpgsql; - """), - ("""Granting usage on schema""", - "GRANT USAGE ON SCHEMA wms TO %s" % role - ) - ] - run_sql(dc, commands) - -def run_sql(dc, commands): - conn = get_sqlconn(dc) - for cmd_blob in commands: - if len(cmd_blob) == 2: - msg, sql = cmd_blob - override = False - else: - msg, sql, override = cmd_blob - print(msg) - if override: - q = SQL(sql) - with conn.connection.cursor() as psycopg2connection: - psycopg2connection.execute(q) - else: - conn.execute(sql) - - # Add user based on param - # use psycopg2 directly to get proper psql - # quoting on the role name identifier - # print("Granting usage on schema") - # q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) - # with conn.connection.cursor() as psycopg2connection: - # psycopg2connection.execute(q) - conn.close() - - return - - -if __name__ == '__main__': - main() - - From 63cf7285a5dcc22994de9eadf3a11a48bf7c4052 Mon Sep 17 00:00:00 2001 From: phaesler Date: Fri, 1 May 2020 11:25:22 +1000 Subject: [PATCH 27/30] How is this file still missing from git? --- datacube_ows/update_ranges.py | 395 ++++++++++++++++++++++++++++++++++ 1 file changed, 395 insertions(+) create mode 100755 datacube_ows/update_ranges.py diff --git a/datacube_ows/update_ranges.py b/datacube_ows/update_ranges.py new file mode 100755 index 000000000..2ced637b6 --- /dev/null +++ b/datacube_ows/update_ranges.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python3 + +from datacube_ows.product_ranges import get_sqlconn, add_ranges +from datacube import Datacube +from psycopg2.sql import SQL +from datacube_ows.ows_configuration import get_config +import os +import click + +@click.command() +@click.option("--views", is_flag=True, default=False, help="Create (if called with the --schema option) or refresh the ODC spatio-temporal materialised views.") +@click.option("--schema", is_flag=True, default=False, help="Create or update the OWS database schema.") +@click.option("--role", default=None, help="Role to grant database permissions to") +@click.option("--summary", is_flag=True, default=False, help="Treat any named ODC products with no corresponding configured OWS Layer as summary products" ) +@click.option("--merge-only/--no-merge-only", default=False, help="When used with a multiproduct layer, the ranges for underlying datacube products are not updated.") +@click.option("--product", default=None, help="Deprecated option provided for backwards compatibility") +@click.option("--multiproduct", default=None, help="Deprecated option provided for backwards compatibility." ) +@click.option("--calculate-extent/--no-calculate-extent", default=None, help="Has no effect any more. Provided for backwards compatibility only") +@click.argument("products", nargs=-1) +def main(products, + merge_only, summary, + schema, views, role, + product, multiproduct, calculate_extent): + """Manage datacube-ows range tables. + + Valid invocations: + + * Some combination of the --views and --schema flags (and no PRODUCTS). + (Perform the specified database updates) + + * One of more OWS or ODC product names + (Update ranges for the specified PRODUCTS + + * No PRODUCTS (and neither the --views nor --schema flags) + (Update ranges for all configured OWS products. + + Uses the DATACUBE_OWS_CFG environment variable to find the OWS config file. + """ + # Handle old-style calls + if not products: + products = [] + if product: + print("********************************************************************************") + print("Warning: The product flag is deprecated and will be removed in a future release.") + print(" The correct way to make this call is now:") + print(" ") + print(" python3 update_ranges.py %s" % product) + print("********************************************************************************") + products.append(product) + if multiproduct: + print("********************************************************************************") + print("Warning: The product flag is deprecated and will be removed in a future release.") + print(" The correct way to make this call is now:") + print(" ") + if merge_only: + print(" python3 update_ranges.py --merge-only %s" % multiproduct) + else: + print(" python3 update_ranges.py %s" % multiproduct) + print("********************************************************************************") + products.append(multiproduct) + if calculate_extent is not None: + print("********************************************************************************") + print("Warning: The calculate-extent and no-calculate-extent flags no longer have ") + print(" any effect. They are kept only for backwards compatibility and will") + print(" be removed in a future release.") + print("********************************************************************************") + if schema and products: + print("Sorry, cannot update the schema and ranges in the same invocation.") + return 1 + elif views and products: + print("Sorry, cannot update the materialised views and ranges in the same invocation.") + return 1 + elif schema and not role: + print("Sorry, cannot update schema without specifying a role") + return 1 + elif role and not schema: + print("Sorry, role only makes sense for updating the schema") + return 1 + + if os.environ.get("PYDEV_DEBUG"): + import pydevd_pycharm + pydevd_pycharm.settrace('172.17.0.1', port=12321, stdoutToServer=True, stderrToServer=True) + + dc = Datacube(app="ows_update_ranges") + if schema or views: + if schema: + print("Checking schema....") + print("Creating or replacing WMS database schema...") + create_schema(dc, role) + print("Done") + if schema and views: + print("Creating or replacing materialised views...") + create_views(dc) + print("Done") + elif views: + print("Refreshing materialised views...") + refresh_views(dc) + print("Done") + return 0 + + print("Deriving extents from materialised views") + if not products: + products = list(get_config().product_index.keys()) + add_ranges(dc, products, summary, merge_only) + return 0 + + +def create_views(dc): + commands = [ + ("Installing Postgis extensions on public schema", + "create extension if not exists postgis"), + ("Giving other schemas access to PostGIS functions installed in the public schema", + """ALTER DATABASE datacube + SET + search_path = public, + agdc + """), + ("Dropping already existing Materialized View Index 1/3", + "DROP INDEX IF EXISTS space_time_view_geom_idx"), + ("Dropping already existing Materialized View Index 2/3", + "DROP INDEX IF EXISTS space_time_view_time_idx"), + ("Dropping already existing Materialized View Index 3/3", + "DROP INDEX IF EXISTS space_time_view_ds_idx"), + ("Dropping already existing Materialized View 1/3", + "DROP MATERIALIZED VIEW IF EXISTS space_time_view"), + ("Dropping already existing Materialized View 2/3", + "DROP MATERIALIZED VIEW IF EXISTS time_view"), + ("Dropping already existing Materialized View 3/3", + "DROP MATERIALIZED VIEW IF EXISTS space_view"), + ("Setting default timezone to UTC", + "set timezone to 'Etc/UTC'"), + +# Handling different variants of metadata requires UNION with WHICH clauses per metadata type +# https://www.postgresql.org/docs/11/queries-union.html + +# Try all different locations for temporal extents and UNION them + ("Creating TIME Materialised View", + """ +CREATE MATERIALIZED VIEW IF NOT EXISTS time_view (dataset_type_ref, ID, temporal_extent) +AS +with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +) +-- This is the eodataset variant of the temporal extent +select + dataset_type_ref, id,tstzrange( + (metadata -> 'extent' ->> 'from_dt') :: timestamp,(metadata -> 'extent' ->> 'to_dt') :: timestamp + interval '1 microsecond' + ) as temporal_extent +from agdc.dataset where + metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus')) +UNION +-- This is the eo3 variant of the temporal extent, the sample eo3 dataset uses a singleton +-- timestamp, some other variants use start/end timestamps. From OWS perspective temporal +-- resolution is 1 whole day +select + dataset_type_ref, id,tstzrange( + (metadata->'properties'->>'datetime'):: timestamp, + (metadata->'properties'->>'datetime'):: timestamp + interval '1 day' + ) as temporal_extent +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name='eo3') +UNION +-- Start/End timestamp variant product. +-- http://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3/092/090/2019/06/05/ga_ls8c_ard_3-0-0_092090_2019-06-05_final.odc-metadata.yaml +select + dataset_type_ref, id,tstzrange( + (metadata->'properties'->>'dtr:start_datetime'):: timestamp, + (metadata->'properties'->>'dtr:end_datetime'):: timestamp + ) as temporal_extent +from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) +"""), + # Spatial extents per dataset (to be created as a column of the space-time table) + # Try all different locations for spatial extents and UNION them + ("Creating SPACE Materialised View (Slowest step!)", +""" +CREATE MATERIALIZED VIEW IF NOT EXISTS space_view (ID, spatial_extent) +AS +with +-- Crib metadata to use as for string matching various types +metadata_lookup as ( + select id,name from agdc.metadata_type +), +-- This is eo3 spatial (Uses CEMP INSAR as a sample product) +ranges as +(select id, + (metadata #> '{extent, lat, begin}') as lat_begin, + (metadata #> '{extent, lat, end}') as lat_end, + (metadata #> '{extent, lon, begin}') as lon_begin, + (metadata #> '{extent, lon, end}') as lon_end + from agdc.dataset where + metadata_type_ref in (select id from metadata_lookup where name='eo3') + ), +-- This is eo spatial (Uses ALOS-PALSAR over Africa as a sample product) +corners as +(select id, + (metadata #> '{extent, coord, ll, lat}') as ll_lat, + (metadata #> '{extent, coord, ll, lon}') as ll_lon, + (metadata #> '{extent, coord, lr, lat}') as lr_lat, + (metadata #> '{extent, coord, lr, lon}') as lr_lon, + (metadata #> '{extent, coord, ul, lat}') as ul_lat, + (metadata #> '{extent, coord, ul, lon}') as ul_lon, + (metadata #> '{extent, coord, ur, lat}') as ur_lat, + (metadata #> '{extent, coord, ur, lon}') as ur_lon + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo','gqa_eo','eo_plus'))) +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + lon_begin, lat_begin, lon_end, lat_begin, lon_end, lat_end, + lon_begin, lat_end, lon_begin, lat_begin)::geometry +as spatial_extent +from ranges +UNION +select id,format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))', + ll_lon, ll_lat, lr_lon, lr_lat, ur_lon, ur_lat, + ul_lon, ul_lat, ll_lon, ll_lat)::geometry as spatial_extent +from corners +UNION +-- This is lansat_scene and landsat_l1_scene with geometries +select id, + ST_Transform( + ST_SetSRID( + ST_GeomFromGeoJSON( + metadata #>> '{geometry}'), + substr( + metadata #>> '{crs}',6)::integer + ), + 4326 + ) as spatial_extent + from agdc.dataset where metadata_type_ref in (select id from metadata_lookup where name in ('eo3_landsat_ard')) + """, True), +# Join the above queries for space and time as CTE's into a space-time view + + ("Creating combined SPACE-TIME Materialised View", + """ +CREATE MATERIALIZED VIEW IF NOT EXISTS space_time_view (ID, dataset_type_ref, spatial_extent, temporal_extent) +AS +select space_view.id, dataset_type_ref, spatial_extent, temporal_extent from space_view join time_view on space_view.id=time_view.id + """), + +# Spatial extents are indexed using GIST index for BBOX queries +# https://postgis.net/workshops/postgis-intro/indexing.html + ("Creating Materialised View Index 1/3", """ +CREATE INDEX space_time_view_geom_idx + ON space_time_view + USING GIST (spatial_extent) + """), + +# Time range types can carray indexes for range lookup +# https://www.postgresql.org/docs/11/rangetypes.html#RANGETYPES-INDEXING + ("Creating Materialised View Index 2/3", """ + CREATE INDEX space_time_view_time_idx + ON space_time_view + USING SPGIST (temporal_extent) + """), + +# Create standard btree index over dataset_type_ref to ease searching by +# https://ieftimov.com/post/postgresql-indexes-btree/ + ("Creating Materialised View Index 3/3", """ + CREATE INDEX space_time_view_ds_idx + ON space_time_view + USING BTREE(dataset_type_ref) + """), + + ] + run_sql(dc, commands) + + +def refresh_views(dc): + commands = [ + ("Refreshing TIME materialized view", + "REFRESH MATERIALIZED VIEW time_view" + ), + ("Refreshing SPACE materialized view", + "REFRESH MATERIALIZED VIEW space_view" + ), + ("Refreshing combined SPACE-TIME materialized view", + "REFRESH MATERIALIZED VIEW CONCURRENTLY space_time_view" + ), + ] + run_sql(dc, commands) + + +def create_schema(dc, role): + commands = [ + ("Creating/replacing wms schema", "create schema if not exists wms"), + + ("Creating/replacing product ranges table", """ + create table if not exists wms.product_ranges ( + id smallint not null primary key references agdc.dataset_type (id), + + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + + dates jsonb not null, + + bboxes jsonb not null) + """), + ("Creating/replacing sub-product ranges table", """ + create table if not exists wms.sub_product_ranges ( + product_id smallint not null references agdc.dataset_type (id), + sub_product_id smallint not null, + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + dates jsonb not null, + bboxes jsonb not null, + constraint pk_sub_product_ranges primary key (product_id, sub_product_id) ) + """), + ("Creating/replacing multi-product ranges table", """ + create table if not exists wms.multiproduct_ranges ( + wms_product_name varchar(128) not null primary key, + lat_min decimal not null, + lat_max decimal not null, + lon_min decimal not null, + lon_max decimal not null, + dates jsonb not null, + bboxes jsonb not null) + """), + # Functions + ("Creating/replacing wms_get_min() function", """ + CREATE OR REPLACE FUNCTION wms_get_min(integer[], text) RETURNS numeric AS $$ + DECLARE + ret numeric; + ul text[] DEFAULT array_append('{extent, coord, ul}', $2); + ur text[] DEFAULT array_append('{extent, coord, ur}', $2); + ll text[] DEFAULT array_append('{extent, coord, ll}', $2); + lr text[] DEFAULT array_append('{extent, coord, lr}', $2); + BEGIN + WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = any($1) AND archived IS NULL ) + SELECT MIN(LEAST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, + (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) + INTO ret + FROM m; + RETURN ret; + END; + $$ LANGUAGE plpgsql; + """), + ("Creating/replacing wms_get_max() function", """ + CREATE OR REPLACE FUNCTION wms_get_max(integer[], text) RETURNS numeric AS $$ + DECLARE + ret numeric; + ul text[] DEFAULT array_append('{extent, coord, ul}', $2); + ur text[] DEFAULT array_append('{extent, coord, ur}', $2); + ll text[] DEFAULT array_append('{extent, coord, ll}', $2); + lr text[] DEFAULT array_append('{extent, coord, lr}', $2); + BEGIN + WITH m AS ( SELECT metadata FROM agdc.dataset WHERE dataset_type_ref = ANY ($1) AND archived IS NULL ) + SELECT MAX(GREATEST((m.metadata#>>ul)::numeric, (m.metadata#>>ur)::numeric, + (m.metadata#>>ll)::numeric, (m.metadata#>>lr)::numeric)) + INTO ret + FROM m; + RETURN ret; + END; + $$ LANGUAGE plpgsql; + """), + ("""Granting usage on schema""", + "GRANT USAGE ON SCHEMA wms TO %s" % role + ) + ] + run_sql(dc, commands) + +def run_sql(dc, commands): + conn = get_sqlconn(dc) + for cmd_blob in commands: + if len(cmd_blob) == 2: + msg, sql = cmd_blob + override = False + else: + msg, sql, override = cmd_blob + print(msg) + if override: + q = SQL(sql) + with conn.connection.cursor() as psycopg2connection: + psycopg2connection.execute(q) + else: + conn.execute(sql) + + # Add user based on param + # use psycopg2 directly to get proper psql + # quoting on the role name identifier + # print("Granting usage on schema") + # q = SQL("GRANT USAGE ON SCHEMA wms TO {}").format(Identifier(role)) + # with conn.connection.cursor() as psycopg2connection: + # psycopg2connection.execute(q) + conn.close() + + return + + +if __name__ == '__main__': + main() + + From b37ed5ad47e51f1690430d9baf62f7776cb1582f Mon Sep 17 00:00:00 2001 From: phaesler Date: Fri, 1 May 2020 13:01:27 +1000 Subject: [PATCH 28/30] Use database name from connection settings. --- datacube_ows/update_ranges.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datacube_ows/update_ranges.py b/datacube_ows/update_ranges.py index 2ced637b6..ba6d4e6de 100755 --- a/datacube_ows/update_ranges.py +++ b/datacube_ows/update_ranges.py @@ -110,11 +110,11 @@ def create_views(dc): ("Installing Postgis extensions on public schema", "create extension if not exists postgis"), ("Giving other schemas access to PostGIS functions installed in the public schema", - """ALTER DATABASE datacube + """ALTER DATABASE %s SET search_path = public, agdc - """), + """ % os.environ.get("DB_DATABASE", "datacube")), ("Dropping already existing Materialized View Index 1/3", "DROP INDEX IF EXISTS space_time_view_geom_idx"), ("Dropping already existing Materialized View Index 2/3", @@ -172,8 +172,7 @@ def create_views(dc): """), # Spatial extents per dataset (to be created as a column of the space-time table) # Try all different locations for spatial extents and UNION them - ("Creating SPACE Materialised View (Slowest step!)", -""" + ("Creating SPACE Materialised View (Slowest step!)", """ CREATE MATERIALIZED VIEW IF NOT EXISTS space_view (ID, spatial_extent) AS with From 7943d91dcff5522b04577c1837b2d02f2ea1c49b Mon Sep 17 00:00:00 2001 From: Tom Butler Date: Fri, 1 May 2020 03:02:08 +0000 Subject: [PATCH 29/30] rebuild test database dump to include MV tables --- docker/database/dump.sql | 314 +++++++++++++++++++++++++-------------- 1 file changed, 204 insertions(+), 110 deletions(-) diff --git a/docker/database/dump.sql b/docker/database/dump.sql index 53e0c7713..915bfedab 100644 --- a/docker/database/dump.sql +++ b/docker/database/dump.sql @@ -17,13 +17,13 @@ SET client_min_messages = warning; SET row_security = off; -- --- Name: agdc; Type: SCHEMA; Schema: -; Owner: agdc_admin +-- Name: agdc; Type: SCHEMA; Schema: -; Owner: opendatacube -- CREATE SCHEMA agdc; -ALTER SCHEMA agdc OWNER TO agdc_admin; +ALTER SCHEMA agdc OWNER TO opendatacube; -- -- Name: pg_cron; Type: EXTENSION; Schema: -; Owner: @@ -107,7 +107,7 @@ COMMENT ON EXTENSION postgis_topology IS 'PostGIS topology spatial types and fun -- --- Name: float8range; Type: TYPE; Schema: agdc; Owner: agdc_admin +-- Name: float8range; Type: TYPE; Schema: agdc; Owner: opendatacube -- CREATE TYPE agdc.float8range AS RANGE ( @@ -116,10 +116,10 @@ CREATE TYPE agdc.float8range AS RANGE ( ); -ALTER TYPE agdc.float8range OWNER TO agdc_admin; +ALTER TYPE agdc.float8range OWNER TO opendatacube; -- --- Name: common_timestamp(text); Type: FUNCTION; Schema: agdc; Owner: agdc_admin +-- Name: common_timestamp(text); Type: FUNCTION; Schema: agdc; Owner: opendatacube -- CREATE FUNCTION agdc.common_timestamp(text) RETURNS timestamp with time zone @@ -129,7 +129,7 @@ select ($1)::timestamp at time zone 'utc'; $_$; -ALTER FUNCTION agdc.common_timestamp(text) OWNER TO agdc_admin; +ALTER FUNCTION agdc.common_timestamp(text) OWNER TO opendatacube; -- -- Name: wms_get_max(integer[], text); Type: FUNCTION; Schema: agdc; Owner: opendatacube @@ -414,7 +414,7 @@ SET default_tablespace = ''; SET default_with_oids = false; -- --- Name: dataset; Type: TABLE; Schema: agdc; Owner: agdc_admin +-- Name: dataset; Type: TABLE; Schema: agdc; Owner: opendatacube -- CREATE TABLE agdc.dataset ( @@ -428,10 +428,10 @@ CREATE TABLE agdc.dataset ( ); -ALTER TABLE agdc.dataset OWNER TO agdc_admin; +ALTER TABLE agdc.dataset OWNER TO opendatacube; -- --- Name: dataset_location; Type: TABLE; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location; Type: TABLE; Schema: agdc; Owner: opendatacube -- CREATE TABLE agdc.dataset_location ( @@ -445,10 +445,10 @@ CREATE TABLE agdc.dataset_location ( ); -ALTER TABLE agdc.dataset_location OWNER TO agdc_admin; +ALTER TABLE agdc.dataset_location OWNER TO opendatacube; -- --- Name: dataset_location_id_seq; Type: SEQUENCE; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location_id_seq; Type: SEQUENCE; Schema: agdc; Owner: opendatacube -- CREATE SEQUENCE agdc.dataset_location_id_seq @@ -460,17 +460,17 @@ CREATE SEQUENCE agdc.dataset_location_id_seq CACHE 1; -ALTER TABLE agdc.dataset_location_id_seq OWNER TO agdc_admin; +ALTER TABLE agdc.dataset_location_id_seq OWNER TO opendatacube; -- --- Name: dataset_location_id_seq; Type: SEQUENCE OWNED BY; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location_id_seq; Type: SEQUENCE OWNED BY; Schema: agdc; Owner: opendatacube -- ALTER SEQUENCE agdc.dataset_location_id_seq OWNED BY agdc.dataset_location.id; -- --- Name: dataset_source; Type: TABLE; Schema: agdc; Owner: agdc_admin +-- Name: dataset_source; Type: TABLE; Schema: agdc; Owner: opendatacube -- CREATE TABLE agdc.dataset_source ( @@ -480,10 +480,10 @@ CREATE TABLE agdc.dataset_source ( ); -ALTER TABLE agdc.dataset_source OWNER TO agdc_admin; +ALTER TABLE agdc.dataset_source OWNER TO opendatacube; -- --- Name: dataset_type; Type: TABLE; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type; Type: TABLE; Schema: agdc; Owner: opendatacube -- CREATE TABLE agdc.dataset_type ( @@ -498,10 +498,10 @@ CREATE TABLE agdc.dataset_type ( ); -ALTER TABLE agdc.dataset_type OWNER TO agdc_admin; +ALTER TABLE agdc.dataset_type OWNER TO opendatacube; -- --- Name: dataset_type_id_seq; Type: SEQUENCE; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type_id_seq; Type: SEQUENCE; Schema: agdc; Owner: opendatacube -- CREATE SEQUENCE agdc.dataset_type_id_seq @@ -513,17 +513,17 @@ CREATE SEQUENCE agdc.dataset_type_id_seq CACHE 1; -ALTER TABLE agdc.dataset_type_id_seq OWNER TO agdc_admin; +ALTER TABLE agdc.dataset_type_id_seq OWNER TO opendatacube; -- --- Name: dataset_type_id_seq; Type: SEQUENCE OWNED BY; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type_id_seq; Type: SEQUENCE OWNED BY; Schema: agdc; Owner: opendatacube -- ALTER SEQUENCE agdc.dataset_type_id_seq OWNED BY agdc.dataset_type.id; -- --- Name: metadata_type; Type: TABLE; Schema: agdc; Owner: agdc_admin +-- Name: metadata_type; Type: TABLE; Schema: agdc; Owner: opendatacube -- CREATE TABLE agdc.metadata_type ( @@ -536,7 +536,7 @@ CREATE TABLE agdc.metadata_type ( ); -ALTER TABLE agdc.metadata_type OWNER TO agdc_admin; +ALTER TABLE agdc.metadata_type OWNER TO opendatacube; -- -- Name: dv_eo_dataset; Type: VIEW; Schema: agdc; Owner: opendatacube @@ -721,7 +721,7 @@ CREATE VIEW agdc.dv_telemetry_dataset AS ALTER TABLE agdc.dv_telemetry_dataset OWNER TO opendatacube; -- --- Name: metadata_type_id_seq; Type: SEQUENCE; Schema: agdc; Owner: agdc_admin +-- Name: metadata_type_id_seq; Type: SEQUENCE; Schema: agdc; Owner: opendatacube -- CREATE SEQUENCE agdc.metadata_type_id_seq @@ -733,15 +733,122 @@ CREATE SEQUENCE agdc.metadata_type_id_seq CACHE 1; -ALTER TABLE agdc.metadata_type_id_seq OWNER TO agdc_admin; +ALTER TABLE agdc.metadata_type_id_seq OWNER TO opendatacube; -- --- Name: metadata_type_id_seq; Type: SEQUENCE OWNED BY; Schema: agdc; Owner: agdc_admin +-- Name: metadata_type_id_seq; Type: SEQUENCE OWNED BY; Schema: agdc; Owner: opendatacube -- ALTER SEQUENCE agdc.metadata_type_id_seq OWNED BY agdc.metadata_type.id; +-- +-- Name: space_view; Type: MATERIALIZED VIEW; Schema: public; Owner: opendatacube +-- + +CREATE MATERIALIZED VIEW public.space_view AS + WITH metadata_lookup AS ( + SELECT metadata_type.id, + metadata_type.name + FROM agdc.metadata_type + ), ranges AS ( + SELECT dataset.id, + (dataset.metadata #> '{extent,lat,begin}'::text[]) AS lat_begin, + (dataset.metadata #> '{extent,lat,end}'::text[]) AS lat_end, + (dataset.metadata #> '{extent,lon,begin}'::text[]) AS lon_begin, + (dataset.metadata #> '{extent,lon,end}'::text[]) AS lon_end + FROM agdc.dataset + WHERE (dataset.metadata_type_ref IN ( SELECT metadata_lookup.id + FROM metadata_lookup + WHERE ((metadata_lookup.name)::text = 'eo3'::text))) + ), corners AS ( + SELECT dataset.id, + (dataset.metadata #> '{extent,coord,ll,lat}'::text[]) AS ll_lat, + (dataset.metadata #> '{extent,coord,ll,lon}'::text[]) AS ll_lon, + (dataset.metadata #> '{extent,coord,lr,lat}'::text[]) AS lr_lat, + (dataset.metadata #> '{extent,coord,lr,lon}'::text[]) AS lr_lon, + (dataset.metadata #> '{extent,coord,ul,lat}'::text[]) AS ul_lat, + (dataset.metadata #> '{extent,coord,ul,lon}'::text[]) AS ul_lon, + (dataset.metadata #> '{extent,coord,ur,lat}'::text[]) AS ur_lat, + (dataset.metadata #> '{extent,coord,ur,lon}'::text[]) AS ur_lon + FROM agdc.dataset + WHERE (dataset.metadata_type_ref IN ( SELECT metadata_lookup.id + FROM metadata_lookup + WHERE ((metadata_lookup.name)::text = ANY ((ARRAY['eo'::character varying, 'gqa_eo'::character varying, 'eo_plus'::character varying])::text[])))) + ) + SELECT ranges.id, + (format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))'::text, ranges.lon_begin, ranges.lat_begin, ranges.lon_end, ranges.lat_begin, ranges.lon_end, ranges.lat_end, ranges.lon_begin, ranges.lat_end, ranges.lon_begin, ranges.lat_begin))::public.geometry AS spatial_extent + FROM ranges +UNION + SELECT corners.id, + (format('POLYGON(( %s %s, %s %s, %s %s, %s %s, %s %s))'::text, corners.ll_lon, corners.ll_lat, corners.lr_lon, corners.lr_lat, corners.ur_lon, corners.ur_lat, corners.ul_lon, corners.ul_lat, corners.ll_lon, corners.ll_lat))::public.geometry AS spatial_extent + FROM corners +UNION + SELECT dataset.id, + public.st_transform(public.st_setsrid(public.st_geomfromgeojson((dataset.metadata #>> '{geometry}'::text[])), (substr((dataset.metadata #>> '{crs}'::text[]), 6))::integer), 4326) AS spatial_extent + FROM agdc.dataset + WHERE (dataset.metadata_type_ref IN ( SELECT metadata_lookup.id + FROM metadata_lookup + WHERE ((metadata_lookup.name)::text = 'eo3_landsat_ard'::text))) + WITH NO DATA; + + +ALTER TABLE public.space_view OWNER TO opendatacube; + +-- +-- Name: time_view; Type: MATERIALIZED VIEW; Schema: public; Owner: opendatacube +-- + +CREATE MATERIALIZED VIEW public.time_view AS + WITH metadata_lookup AS ( + SELECT metadata_type.id, + metadata_type.name + FROM agdc.metadata_type + ) + SELECT dataset.dataset_type_ref, + dataset.id, + tstzrange(((((dataset.metadata -> 'extent'::text) ->> 'from_dt'::text))::timestamp without time zone)::timestamp with time zone, (((((dataset.metadata -> 'extent'::text) ->> 'to_dt'::text))::timestamp without time zone + '00:00:00.000001'::interval))::timestamp with time zone) AS temporal_extent + FROM agdc.dataset + WHERE (dataset.metadata_type_ref IN ( SELECT metadata_lookup.id + FROM metadata_lookup + WHERE ((metadata_lookup.name)::text = ANY ((ARRAY['eo'::character varying, 'gqa_eo'::character varying, 'eo_plus'::character varying])::text[])))) +UNION + SELECT dataset.dataset_type_ref, + dataset.id, + tstzrange(((((dataset.metadata -> 'properties'::text) ->> 'datetime'::text))::timestamp without time zone)::timestamp with time zone, (((((dataset.metadata -> 'properties'::text) ->> 'datetime'::text))::timestamp without time zone + '1 day'::interval))::timestamp with time zone) AS temporal_extent + FROM agdc.dataset + WHERE (dataset.metadata_type_ref IN ( SELECT metadata_lookup.id + FROM metadata_lookup + WHERE ((metadata_lookup.name)::text = 'eo3'::text))) +UNION + SELECT dataset.dataset_type_ref, + dataset.id, + tstzrange(((((dataset.metadata -> 'properties'::text) ->> 'dtr:start_datetime'::text))::timestamp without time zone)::timestamp with time zone, ((((dataset.metadata -> 'properties'::text) ->> 'dtr:end_datetime'::text))::timestamp without time zone)::timestamp with time zone) AS temporal_extent + FROM agdc.dataset + WHERE (dataset.metadata_type_ref IN ( SELECT metadata_lookup.id + FROM metadata_lookup + WHERE ((metadata_lookup.name)::text = 'eo3_landsat_ard'::text))) + WITH NO DATA; + + +ALTER TABLE public.time_view OWNER TO opendatacube; + +-- +-- Name: space_time_view; Type: MATERIALIZED VIEW; Schema: public; Owner: opendatacube +-- + +CREATE MATERIALIZED VIEW public.space_time_view AS + SELECT space_view.id, + time_view.dataset_type_ref, + space_view.spatial_extent, + time_view.temporal_extent + FROM (public.space_view + JOIN public.time_view ON ((space_view.id = time_view.id))) + WITH NO DATA; + + +ALTER TABLE public.space_time_view OWNER TO opendatacube; + -- -- Name: multiproduct_ranges; Type: TABLE; Schema: wms; Owner: opendatacube -- @@ -795,28 +902,28 @@ CREATE TABLE wms.sub_product_ranges ( ALTER TABLE wms.sub_product_ranges OWNER TO opendatacube; -- --- Name: dataset_location id; Type: DEFAULT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location id; Type: DEFAULT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_location ALTER COLUMN id SET DEFAULT nextval('agdc.dataset_location_id_seq'::regclass); -- --- Name: dataset_type id; Type: DEFAULT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type id; Type: DEFAULT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_type ALTER COLUMN id SET DEFAULT nextval('agdc.dataset_type_id_seq'::regclass); -- --- Name: metadata_type id; Type: DEFAULT; Schema: agdc; Owner: agdc_admin +-- Name: metadata_type id; Type: DEFAULT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.metadata_type ALTER COLUMN id SET DEFAULT nextval('agdc.metadata_type_id_seq'::regclass); -- --- Data for Name: dataset; Type: TABLE DATA; Schema: agdc; Owner: agdc_admin +-- Data for Name: dataset; Type: TABLE DATA; Schema: agdc; Owner: opendatacube -- COPY agdc.dataset (id, metadata_type_ref, dataset_type_ref, metadata, archived, added, added_by) FROM stdin; @@ -870,7 +977,7 @@ bc7c6bcf-05f1-5435-8b56-b1848c41de71 1 1 {"id": "bc7c6bcf-05f1-5435-8b56-b1848c4 -- --- Data for Name: dataset_location; Type: TABLE DATA; Schema: agdc; Owner: agdc_admin +-- Data for Name: dataset_location; Type: TABLE DATA; Schema: agdc; Owner: opendatacube -- COPY agdc.dataset_location (id, dataset_ref, uri_scheme, uri_body, added, added_by, archived) FROM stdin; @@ -924,7 +1031,7 @@ COPY agdc.dataset_location (id, dataset_ref, uri_scheme, uri_body, added, added_ -- --- Data for Name: dataset_source; Type: TABLE DATA; Schema: agdc; Owner: agdc_admin +-- Data for Name: dataset_source; Type: TABLE DATA; Schema: agdc; Owner: opendatacube -- COPY agdc.dataset_source (dataset_ref, classifier, source_dataset_ref) FROM stdin; @@ -932,7 +1039,7 @@ COPY agdc.dataset_source (dataset_ref, classifier, source_dataset_ref) FROM stdi -- --- Data for Name: dataset_type; Type: TABLE DATA; Schema: agdc; Owner: agdc_admin +-- Data for Name: dataset_type; Type: TABLE DATA; Schema: agdc; Owner: opendatacube -- COPY agdc.dataset_type (id, name, metadata, metadata_type_ref, definition, added, added_by) FROM stdin; @@ -944,7 +1051,7 @@ COPY agdc.dataset_type (id, name, metadata, metadata_type_ref, definition, added -- --- Data for Name: metadata_type; Type: TABLE DATA; Schema: agdc; Owner: agdc_admin +-- Data for Name: metadata_type; Type: TABLE DATA; Schema: agdc; Owner: opendatacube -- COPY agdc.metadata_type (id, name, definition, added, added_by) FROM stdin; @@ -1011,21 +1118,21 @@ COPY wms.sub_product_ranges (product_id, sub_product_id, lat_min, lat_max, lon_m -- --- Name: dataset_location_id_seq; Type: SEQUENCE SET; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location_id_seq; Type: SEQUENCE SET; Schema: agdc; Owner: opendatacube -- SELECT pg_catalog.setval('agdc.dataset_location_id_seq', 46, true); -- --- Name: dataset_type_id_seq; Type: SEQUENCE SET; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type_id_seq; Type: SEQUENCE SET; Schema: agdc; Owner: opendatacube -- SELECT pg_catalog.setval('agdc.dataset_type_id_seq', 4, true); -- --- Name: metadata_type_id_seq; Type: SEQUENCE SET; Schema: agdc; Owner: agdc_admin +-- Name: metadata_type_id_seq; Type: SEQUENCE SET; Schema: agdc; Owner: opendatacube -- SELECT pg_catalog.setval('agdc.metadata_type_id_seq', 2, true); @@ -1039,7 +1146,7 @@ SELECT pg_catalog.setval('cron.jobid_seq', 1, false); -- --- Name: dataset pk_dataset; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset pk_dataset; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset @@ -1047,7 +1154,7 @@ ALTER TABLE ONLY agdc.dataset -- --- Name: dataset_location pk_dataset_location; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location pk_dataset_location; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_location @@ -1055,7 +1162,7 @@ ALTER TABLE ONLY agdc.dataset_location -- --- Name: dataset_source pk_dataset_source; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_source pk_dataset_source; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_source @@ -1063,7 +1170,7 @@ ALTER TABLE ONLY agdc.dataset_source -- --- Name: dataset_type pk_dataset_type; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type pk_dataset_type; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_type @@ -1071,7 +1178,7 @@ ALTER TABLE ONLY agdc.dataset_type -- --- Name: metadata_type pk_metadata_type; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: metadata_type pk_metadata_type; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.metadata_type @@ -1079,7 +1186,7 @@ ALTER TABLE ONLY agdc.metadata_type -- --- Name: dataset_location uq_dataset_location_uri_scheme; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location uq_dataset_location_uri_scheme; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_location @@ -1087,7 +1194,7 @@ ALTER TABLE ONLY agdc.dataset_location -- --- Name: dataset_source uq_dataset_source_source_dataset_ref; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_source uq_dataset_source_source_dataset_ref; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_source @@ -1095,7 +1202,7 @@ ALTER TABLE ONLY agdc.dataset_source -- --- Name: dataset_type uq_dataset_type_name; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type uq_dataset_type_name; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_type @@ -1103,7 +1210,7 @@ ALTER TABLE ONLY agdc.dataset_type -- --- Name: metadata_type uq_metadata_type_name; Type: CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: metadata_type uq_metadata_type_name; Type: CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.metadata_type @@ -1135,77 +1242,98 @@ ALTER TABLE ONLY wms.product_ranges -- --- Name: dix_ls5_usgs_level1_scene_lat_lon_time; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls5_usgs_level1_scene_lat_lon_time; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls5_usgs_level1_scene_lat_lon_time ON agdc.dataset USING gist (agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text), tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 3)); -- --- Name: dix_ls5_usgs_level1_scene_time_lat_lon; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls5_usgs_level1_scene_time_lat_lon; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls5_usgs_level1_scene_time_lat_lon ON agdc.dataset USING gist (tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 3)); -- --- Name: dix_ls7_usgs_level1_scene_lat_lon_time; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls7_usgs_level1_scene_lat_lon_time; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls7_usgs_level1_scene_lat_lon_time ON agdc.dataset USING gist (agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text), tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 2)); -- --- Name: dix_ls7_usgs_level1_scene_time_lat_lon; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls7_usgs_level1_scene_time_lat_lon; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls7_usgs_level1_scene_time_lat_lon ON agdc.dataset USING gist (tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 2)); -- --- Name: dix_ls8_l1_pc_usgs_lat_lon_time; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls8_l1_pc_usgs_lat_lon_time; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls8_l1_pc_usgs_lat_lon_time ON agdc.dataset USING gist (agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text), tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 4)); -- --- Name: dix_ls8_l1_pc_usgs_time_lat_lon; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls8_l1_pc_usgs_time_lat_lon; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls8_l1_pc_usgs_time_lat_lon ON agdc.dataset USING gist (tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 4)); -- --- Name: dix_ls8_usgs_level1_scene_lat_lon_time; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls8_usgs_level1_scene_lat_lon_time; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls8_usgs_level1_scene_lat_lon_time ON agdc.dataset USING gist (agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text), tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 1)); -- --- Name: dix_ls8_usgs_level1_scene_time_lat_lon; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: dix_ls8_usgs_level1_scene_time_lat_lon; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX dix_ls8_usgs_level1_scene_time_lat_lon ON agdc.dataset USING gist (tstzrange(LEAST(agdc.common_timestamp((metadata #>> '{extent,from_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), GREATEST(agdc.common_timestamp((metadata #>> '{extent,to_dt}'::text[])), agdc.common_timestamp((metadata #>> '{extent,center_dt}'::text[]))), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ur,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ul,lat}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lat}'::text[]))::double precision), '[]'::text), agdc.float8range(LEAST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), GREATEST(((metadata #>> '{extent,coord,ul,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ur,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,ll,lon}'::text[]))::double precision, ((metadata #>> '{extent,coord,lr,lon}'::text[]))::double precision), '[]'::text)) WHERE ((archived IS NULL) AND (dataset_type_ref = 1)); -- --- Name: ix_agdc_dataset_dataset_type_ref; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: ix_agdc_dataset_dataset_type_ref; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX ix_agdc_dataset_dataset_type_ref ON agdc.dataset USING btree (dataset_type_ref); -- --- Name: ix_agdc_dataset_location_dataset_ref; Type: INDEX; Schema: agdc; Owner: agdc_admin +-- Name: ix_agdc_dataset_location_dataset_ref; Type: INDEX; Schema: agdc; Owner: opendatacube -- CREATE INDEX ix_agdc_dataset_location_dataset_ref ON agdc.dataset_location USING btree (dataset_ref); -- --- Name: dataset fk_dataset_dataset_type_ref_dataset_type; Type: FK CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: space_time_view_ds_idx; Type: INDEX; Schema: public; Owner: opendatacube +-- + +CREATE INDEX space_time_view_ds_idx ON public.space_time_view USING btree (dataset_type_ref); + + +-- +-- Name: space_time_view_geom_idx; Type: INDEX; Schema: public; Owner: opendatacube +-- + +CREATE INDEX space_time_view_geom_idx ON public.space_time_view USING gist (spatial_extent); + + +-- +-- Name: space_time_view_time_idx; Type: INDEX; Schema: public; Owner: opendatacube +-- + +CREATE INDEX space_time_view_time_idx ON public.space_time_view USING spgist (temporal_extent); + + +-- +-- Name: dataset fk_dataset_dataset_type_ref_dataset_type; Type: FK CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset @@ -1213,7 +1341,7 @@ ALTER TABLE ONLY agdc.dataset -- --- Name: dataset_location fk_dataset_location_dataset_ref_dataset; Type: FK CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_location fk_dataset_location_dataset_ref_dataset; Type: FK CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_location @@ -1221,7 +1349,7 @@ ALTER TABLE ONLY agdc.dataset_location -- --- Name: dataset fk_dataset_metadata_type_ref_metadata_type; Type: FK CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset fk_dataset_metadata_type_ref_metadata_type; Type: FK CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset @@ -1229,7 +1357,7 @@ ALTER TABLE ONLY agdc.dataset -- --- Name: dataset_source fk_dataset_source_dataset_ref_dataset; Type: FK CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_source fk_dataset_source_dataset_ref_dataset; Type: FK CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_source @@ -1237,7 +1365,7 @@ ALTER TABLE ONLY agdc.dataset_source -- --- Name: dataset_source fk_dataset_source_source_dataset_ref_dataset; Type: FK CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_source fk_dataset_source_source_dataset_ref_dataset; Type: FK CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_source @@ -1245,7 +1373,7 @@ ALTER TABLE ONLY agdc.dataset_source -- --- Name: dataset_type fk_dataset_type_metadata_type_ref_metadata_type; Type: FK CONSTRAINT; Schema: agdc; Owner: agdc_admin +-- Name: dataset_type fk_dataset_type_metadata_type_ref_metadata_type; Type: FK CONSTRAINT; Schema: agdc; Owner: opendatacube -- ALTER TABLE ONLY agdc.dataset_type @@ -1282,87 +1410,53 @@ CREATE POLICY cron_job_policy ON cron.job USING ((username = (CURRENT_USER)::tex ALTER TABLE cron.job ENABLE ROW LEVEL SECURITY; -- --- Name: SCHEMA agdc; Type: ACL; Schema: -; Owner: agdc_admin --- - -GRANT USAGE ON SCHEMA agdc TO agdc_user; -GRANT CREATE ON SCHEMA agdc TO agdc_manage; - - --- --- Name: FUNCTION common_timestamp(text); Type: ACL; Schema: agdc; Owner: agdc_admin +-- Name: TABLE space_view; Type: ACL; Schema: public; Owner: opendatacube -- -GRANT ALL ON FUNCTION agdc.common_timestamp(text) TO agdc_user; +GRANT SELECT ON TABLE public.space_view TO replicator; -- --- Name: TABLE dataset; Type: ACL; Schema: agdc; Owner: agdc_admin +-- Name: TABLE time_view; Type: ACL; Schema: public; Owner: opendatacube -- -GRANT SELECT ON TABLE agdc.dataset TO agdc_user; -GRANT INSERT ON TABLE agdc.dataset TO agdc_ingest; +GRANT SELECT ON TABLE public.time_view TO replicator; -- --- Name: TABLE dataset_location; Type: ACL; Schema: agdc; Owner: agdc_admin +-- Name: TABLE space_time_view; Type: ACL; Schema: public; Owner: opendatacube -- -GRANT SELECT ON TABLE agdc.dataset_location TO agdc_user; -GRANT INSERT ON TABLE agdc.dataset_location TO agdc_ingest; +GRANT SELECT ON TABLE public.space_time_view TO replicator; -- --- Name: SEQUENCE dataset_location_id_seq; Type: ACL; Schema: agdc; Owner: agdc_admin --- - -GRANT SELECT,USAGE ON SEQUENCE agdc.dataset_location_id_seq TO agdc_ingest; - - --- --- Name: TABLE dataset_source; Type: ACL; Schema: agdc; Owner: agdc_admin --- - -GRANT SELECT ON TABLE agdc.dataset_source TO agdc_user; -GRANT INSERT ON TABLE agdc.dataset_source TO agdc_ingest; - - --- --- Name: TABLE dataset_type; Type: ACL; Schema: agdc; Owner: agdc_admin --- - -GRANT SELECT ON TABLE agdc.dataset_type TO agdc_user; -GRANT INSERT,DELETE ON TABLE agdc.dataset_type TO agdc_manage; - - --- --- Name: SEQUENCE dataset_type_id_seq; Type: ACL; Schema: agdc; Owner: agdc_admin +-- Name: DEFAULT PRIVILEGES FOR TABLES; Type: DEFAULT ACL; Schema: public; Owner: opendatacube -- -GRANT SELECT,USAGE ON SEQUENCE agdc.dataset_type_id_seq TO agdc_ingest; +ALTER DEFAULT PRIVILEGES FOR ROLE opendatacube IN SCHEMA public REVOKE ALL ON TABLES FROM opendatacube; +ALTER DEFAULT PRIVILEGES FOR ROLE opendatacube IN SCHEMA public GRANT SELECT ON TABLES TO replicator; -- --- Name: TABLE metadata_type; Type: ACL; Schema: agdc; Owner: agdc_admin +-- Name: space_view; Type: MATERIALIZED VIEW DATA; Schema: public; Owner: opendatacube -- -GRANT SELECT ON TABLE agdc.metadata_type TO agdc_user; -GRANT INSERT,DELETE ON TABLE agdc.metadata_type TO agdc_manage; +REFRESH MATERIALIZED VIEW public.space_view; -- --- Name: SEQUENCE metadata_type_id_seq; Type: ACL; Schema: agdc; Owner: agdc_admin +-- Name: time_view; Type: MATERIALIZED VIEW DATA; Schema: public; Owner: opendatacube -- -GRANT SELECT,USAGE ON SEQUENCE agdc.metadata_type_id_seq TO agdc_ingest; +REFRESH MATERIALIZED VIEW public.time_view; -- --- Name: DEFAULT PRIVILEGES FOR TABLES; Type: DEFAULT ACL; Schema: public; Owner: opendatacube +-- Name: space_time_view; Type: MATERIALIZED VIEW DATA; Schema: public; Owner: opendatacube -- -ALTER DEFAULT PRIVILEGES FOR ROLE opendatacube IN SCHEMA public REVOKE ALL ON TABLES FROM opendatacube; -ALTER DEFAULT PRIVILEGES FOR ROLE opendatacube IN SCHEMA public GRANT SELECT ON TABLES TO replicator; +REFRESH MATERIALIZED VIEW public.space_time_view; -- From 7ba8f156fa44fe8a1fcfaedb4d4e439902d586bf Mon Sep 17 00:00:00 2001 From: phaesler Date: Fri, 1 May 2020 17:45:30 +1000 Subject: [PATCH 30/30] Better error message when mat views are missing. --- datacube_ows/update_ranges.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/datacube_ows/update_ranges.py b/datacube_ows/update_ranges.py index ba6d4e6de..a8d802642 100755 --- a/datacube_ows/update_ranges.py +++ b/datacube_ows/update_ranges.py @@ -2,7 +2,9 @@ from datacube_ows.product_ranges import get_sqlconn, add_ranges from datacube import Datacube +import psycopg2 from psycopg2.sql import SQL +import sqlalchemy from datacube_ows.ows_configuration import get_config import os import click @@ -101,7 +103,15 @@ def main(products, print("Deriving extents from materialised views") if not products: products = list(get_config().product_index.keys()) - add_ranges(dc, products, summary, merge_only) + try: + add_ranges(dc, products, summary, merge_only) + except (psycopg2.errors.UndefinedColumn, + sqlalchemy.exc.ProgrammingError): + print("ERROR: OWS schema or extent materialised views appear to be missing", + "\n", + " Try running with the --schema and --views options first." + ) + return 1 return 0