Skip to content

Commit

Permalink
Geomedian filmstrip plots of change through time notebook (#482)
Browse files Browse the repository at this point in the history
Very easy to use, nice work.
  • Loading branch information
robbibt authored and s-m-t-c committed Jan 23, 2020
1 parent daaf88e commit 953a7be
Show file tree
Hide file tree
Showing 4 changed files with 1,032 additions and 0 deletions.
731 changes: 731 additions & 0 deletions Real_world_examples/Change_filmstrips.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Real_world_examples/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ More complex workflows demonstrating how DEA can be used to address real-world p
:caption: Real World Examples

Change_detection.ipynb
Change_filmstrips.ipynb
Chlorophyll_monitoring.ipynb
Coastal_erosion.ipynb
Intertidal_elevation.ipynb
Expand Down
300 changes: 300 additions & 0 deletions Scripts/notebookapp_changefilmstrips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
# notebookapp_changefilmstrips.py
'''
This file contains functions for loading and interacting with data in the
change filmstrips notebook, inside the Real_world_examples folder.
Available functions:
run_filmstrip_app
Last modified: January 2020
'''

# Load modules
import os
import sys
import dask
import datacube
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from odc.algo import xr_geomedian
from odc.ui import select_on_a_map
from dask.utils import parse_bytes
from datacube.utils.geometry import CRS
from datacube.utils.rio import configure_s3_access
from datacube.utils.dask import start_local_dask


# Load utility functions
sys.path.append('../Scripts')
from dea_datahandling import load_ard
from dea_coastaltools import tidal_tag
from dea_datahandling import mostcommon_crs


def run_filmstrip_app(output_name,
time_range,
time_step,
tide_range=(0.0, 1.0),
resolution=(-30, 30),
max_cloud=50,
ls7_slc_off=False,
size_limit=100):
'''
An interactive app that allows the user to select a region from a
map, then load Digital Earth Australia Landsat data and combine it
using the geometric median ("geomedian") statistic to reveal the
median or 'typical' appearance of the landscape for a series of
time periods.
The results for each time period are combined into a 'filmstrip'
plot which visualises how the landscape has changed in appearance
across time, with a 'change heatmap' panel highlighting potential
areas of greatest change.
For coastal applications, the analysis can be customised to select
only satellite images obtained during a specific tidal range
(e.g. low, average or high tide).
Last modified: January 2020
Parameters
----------
output_name : str
A name that will be used to name the output filmstrip plot file.
time_range : tuple
A tuple giving the date range to analyse
(e.g. `time_range = ('1988-01-01', '2017-12-31')`).
time_step : dict
This parameter sets the length of the time periods to compare
(e.g. `time_step = {'years': 5}` will generate one filmstrip
plot for every five years of data; `time_step = {'months': 18}`
will generate one plot for each 18 month period etc. Time
periods are counted from the first value given in `time_range`.
tide_range : tuple, optional
An optional parameter that can be used to generate filmstrip
plots based on specific ocean tide conditions. This can be
valuable for analysing change consistently along the coast.
For example, `tide_range = (0.0, 0.2)` will select only
satellite images acquired at the lowest 20% of tides;
`tide_range = (0.8, 1.0)` will select images from the highest
20% of tides. The default is `tide_range = (0.0, 1.0)` which
will select all images regardless of tide.
resolution : tuple, optional
The spatial resolution to load data. The default is
`resolution = (-30, 30)`, which will load data at 30 m pixel
resolution. Increasing this (e.g. to `resolution = (-100, 100)`)
can be useful for loading large spatial extents.
max_cloud : int, optional
This parameter can be used to exclude satellite images with
excessive cloud. The default is `50`, which will keep all images
with less than 50% cloud.
ls7_slc_off : bool, optional
An optional boolean indicating whether to include data from
after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to
False, which removes all Landsat 7 observations > May 31 2003.
Returns
-------
ds_geomedian : xarray Dataset
An xarray dataset containing geomedian composites for each
timestep in the analysis.
'''

#########
# Setup #
#########

# Connect to datacube database
dc = datacube.Datacube(app='DEA_notebooks_template')

# Configure dashboard link to go over proxy
dask.config.set({"distributed.dashboard.link":
os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')+"proxy/{port}/status"});

# Figure out how much memory/cpu we really have (those are set by jupyterhub)
mem_limit = int(os.environ.get('MEM_LIMIT', '0'))
cpu_limit = float(os.environ.get('CPU_LIMIT', '0'))
cpu_limit = int(cpu_limit) if cpu_limit > 0 else 4
mem_limit = mem_limit if mem_limit > 0 else parse_bytes('8Gb')

# Leave 4Gb for notebook itself
mem_limit -= parse_bytes('4Gb')

# Close previous client if any, so that one can re-run this cell
client = locals().get('client', None)
if client is not None:
client.close()
del client

# Start dask client
client = start_local_dask(n_workers=1,
threads_per_worker=cpu_limit,
memory_limit=mem_limit)
display(client)

# Configure GDAL for s3 access
configure_s3_access(aws_unsigned=True,
client=client);


########################
# Select and load data #
########################

# Define centre_coords as a global variable
global centre_coords

# Test if centre_coords is in the global namespace;
# use default value if it isn't
if 'centre_coords' not in globals():
centre_coords = (-33.9719, 151.1934)

# Plot interactive map to select area
geopolygon = select_on_a_map(height='600px',
center=centre_coords , zoom=12)

# Set centre coords based on most recent selection to re-focus
# subsequent data selections
centre_coords = geopolygon.centroid.points[0][::-1]

# Test size of selected area
area = (geopolygon.to_crs(crs = CRS('epsg:3577')).area /
(size_limit * 1000000))
radius = np.round(np.sqrt(size_limit), 1)
if area > size_limit:
print(f'Warning: Your selected area is {area:.00f} square '
f'kilometers. \nPlease select an area of less than '
f'{size_limit} square kilometers (e.g. {radius} x {radius}'
f' km) . \nTo select a smaller area, re-run the cell '
f'above and draw a new polygon.')

else:

print('Starting analysis...')

# Obtain native CRS
crs = mostcommon_crs(dc=dc,
product='ga_ls5t_ard_3',
query={'time': '1990',
'geopolygon': geopolygon})

# Create query based on time range, area selected, custom params
query = {'time': time_range,
'geopolygon': geopolygon,
'output_crs': crs,
'gqa_iterative_mean_xy': [0, 1],
'cloud_cover': [0, max_cloud],
'resolution': resolution,
'align': (resolution[1] / 2.0, resolution[1] / 2.0)}

# Load data from all three Landsats
ds = load_ard(dc=dc,
measurements=['nbart_red',
'nbart_green',
'nbart_blue'],
products=['ga_ls5t_ard_3',
'ga_ls7e_ard_3',
'ga_ls8c_ard_3'],
min_gooddata=0.0,
lazy_load=True,
ls7_slc_off=ls7_slc_off,
**query)

# Optionally calculate tides for each timestep in the satellite
# dataset and drop any observations out side this range
if tide_range != (0.0, 1.0):
ds = tidal_tag(ds=ds, tidepost_lat=None, tidepost_lon=None)
min_tide, max_tide = ds.tide_height.quantile(tide_range).values
ds = ds.sel(time = (ds.tide_height >= min_tide) &
(ds.tide_height <= max_tide))
ds = ds.drop('tide_height')
print(f' Keeping {len(ds.time)} observations with tides '
f'between {min_tide:.2f} and {max_tide:.2f} m')

# Create time step ranges to generate filmstrips from
bins_dt = pd.date_range(start=time_range[0],
end=time_range[1],
freq=pd.DateOffset(**time_step))

# Bin all satellite observations by timestep. If some observations
# fall outside the upper bin, label these with the highest bin
labels = bins_dt.astype('str')
time_steps = (pd.cut(ds.time.values, bins_dt, labels = labels[:-1])
.add_categories(labels[-1])
.fillna(labels[-1]))
time_steps_var = xr.DataArray(time_steps, [('time', ds.time)],
name='timestep')

# Resample data temporally into time steps, and compute geomedians
ds_geomedian = (ds.groupby(time_steps_var)
.apply(lambda ds_subset:
xr_geomedian(ds_subset,
num_threads=1, # disable internal threading, dask will run several concurrently
eps=0.2 * (1 / 10_000), # 1/5 pixel value resolution
nocheck=True))) # disable some checks inside geomedian library that use too much ram

print('\nGenerating geomedian composites and plotting '
'filmstrips... (click the Dashboard link above for status)')
ds_geomedian = ds_geomedian.compute()

# Reset CRS that is lost during geomedian compositing
ds_geomedian.attrs['crs'] = ds.crs


############
# Plotting #
############

# Convert to array and extract vmin/vmax
output_array = ds_geomedian[['nbart_red', 'nbart_green',
'nbart_blue']].to_array()
percentiles = output_array.quantile(q=(0.02, 0.98)).values

# Create the plot with one subplot more than timesteps in the
# dataset. Figure width is set based on the number of subplots
# and aspect ratio
n_obs = output_array.sizes['timestep']
ratio = output_array.sizes['x'] / output_array.sizes['y']
fig, axes = plt.subplots(1, n_obs + 1,
figsize=(5 * ratio * (n_obs + 1), 5))
fig.subplots_adjust(wspace=0.05, hspace=0.05)

# Add timesteps to the plot, set aspect to equal to preserve shape
for i, ax_i in enumerate(axes.flatten()[:n_obs]):
output_array.isel(timestep=i).plot.imshow(ax=ax_i,
vmin=percentiles[0],
vmax=percentiles[1])
ax_i.get_xaxis().set_visible(False)
ax_i.get_yaxis().set_visible(False)
ax_i.set_aspect('equal')

# Add change heatmap panel to final subplot. Heatmap is computed
# by first taking the log of the array (so change in dark areas
# can be identified), then computing standard deviation between
# all timesteps
(np.log(output_array)
.std(dim=['timestep'])
.mean(dim='variable')
.plot.imshow(ax=axes.flatten()[-1],
robust=True,
cmap='magma',
add_colorbar=False))
axes.flatten()[-1].get_xaxis().set_visible(False)
axes.flatten()[-1].get_yaxis().set_visible(False)
axes.flatten()[-1].set_aspect('equal')
axes.flatten()[-1].set_title('Change heatmap')

# Export to file
date_string = '_'.join(time_range)
ts_v = list(time_step.values())[0]
ts_k = list(time_step.keys())[0]
fig.savefig(f'filmstrip_{output_name}_{date_string}_{ts_v}{ts_k}.png',
dpi=150,
bbox_inches='tight',
pad_inches=0.1)

return ds_geomedian

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 953a7be

Please sign in to comment.