Skip to content

Commit

Permalink
Improve performance by removing dask load
Browse files Browse the repository at this point in the history
  • Loading branch information
robbibt committed Dec 7, 2022
1 parent 27bd449 commit 375a33f
Show file tree
Hide file tree
Showing 3 changed files with 288 additions and 54 deletions.
71 changes: 34 additions & 37 deletions coastlines/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@
from shapely.geometry import box
from shapely.ops import nearest_points
from skimage.measure import label, regionprops
from skimage.morphology import binary_closing, binary_dilation, dilation, disk
from skimage.morphology import (
binary_closing,
binary_dilation,
binary_erosion,
dilation,
disk,
)

from coastlines.utils import configure_logging, load_config
from dea_tools.spatial import subpixel_contours, xr_vectorize, xr_rasterize
Expand Down Expand Up @@ -190,7 +196,7 @@ def load_rasters(
# return ocean_mask


def ocean_masking(ds, ocean_land_da, connectivity=1, dilation=None, land_dilation=10):
def ocean_masking(ds, ocean_da, connectivity=1, dilation=None):
"""
Identifies ocean by selecting regions of water that overlap
with ocean pixels. This region can be optionally dilated to
Expand All @@ -203,25 +209,20 @@ def ocean_masking(ds, ocean_land_da, connectivity=1, dilation=None, land_dilatio
An array containing True for land pixels, and False for water.
This can be obtained by thresholding a water index
array (e.g. MNDWI < 0).
ocean_land_da : xarray.DataArray
A supplementary dataset used to separate ocean waters from
other inland water. The array should contain values of 0
for ocean, and values of 1 or greater for land pixels.
For Australia, we use the Geodata 100K coastline dataset,
rasterized as the "geodata_coast_100k" product on the
DEA datacube.
ocean_da : xarray.DataArray
A supplementary static dataset used to separate ocean waters
from other inland water. The array should contain values of 1
for high certainty ocean pixels, and 0 for all other pixels
(land, inland water etc). For Australia, we use the Geodata
100K coastline dataset, rasterized as the "geodata_coast_100k"
product on the DEA datacube.
connectivity : integer, optional
An integer passed to the 'connectivity' parameter of the
`skimage.measure.label` function.
dilation : integer, optional
The number of pixels to dilate ocean pixels to ensure than
adequate land pixels are included for subpixel waterline
extraction. Defaults to None.
land_dilation : integer, optional
The number of pixels by which to dilate land pixels in
`ocean_land_da`. This ensures that we only select true
deeper ocean pixels when separating inland from ocean
waters.
Returns:
--------
Expand All @@ -230,11 +231,8 @@ def ocean_masking(ds, ocean_land_da, connectivity=1, dilation=None, land_dilatio
pixels as True.
"""

# Identify pixels that are land in either Geodata or ds
land_mask = (ocean_land_da != 0) | ds

# Dilate to restrict to deeper ocean, and invert to get water
water_mask = ~odc.algo.binary_dilation(land_mask, land_dilation)
# Update `ocean_da` to mask out any pixels that are land in `ds` too
ocean_da = ocean_da & (ds != 1)

# First, break all time array into unique, discrete regions/blobs
blobs = xr.apply_ufunc(label, ds, 1, False, connectivity)
Expand All @@ -244,11 +242,7 @@ def ocean_masking(ds, ocean_land_da, connectivity=1, dilation=None, land_dilatio
# it does, then it is considered to be directly connected with the
# ocean; if not, then it is an inland waterbody.
ocean_mask = blobs.isin(
[
i.label
for i in regionprops(blobs.values, water_mask.values)
if i.max_intensity
]
[i.label for i in regionprops(blobs.values, ocean_da.values) if i.max_intensity]
)

# Dilate mask so that we include land pixels on the inland side
Expand All @@ -260,7 +254,7 @@ def ocean_masking(ds, ocean_land_da, connectivity=1, dilation=None, land_dilatio
return ocean_mask


def coastal_masking(ds, ocean_land_da, buffer=50, closing=None):
def coastal_masking(ds, ocean_da, buffer=50, closing=None):
"""
Creates a symmetrical buffer around the land-water boundary
in a input boolean array. This is used to create a study area
Expand All @@ -272,13 +266,13 @@ def coastal_masking(ds, ocean_land_da, buffer=50, closing=None):
ds : xarray.DataArray
A single time-step boolean array containing True for land
pixels, and False for water.
ocean_land_da : xarray.DataArray
A supplementary dataset used to separate ocean waters from
other inland water. The array should contain values of 0
for ocean, and values of 1 or greater for land pixels.
For Australia, we use the Geodata 100K coastline dataset,
rasterized as the "geodata_coast_100k" product on the
DEA datacube.
ocean_da : xarray.DataArray
A supplementary static dataset used to separate ocean waters
from other inland water. The array should contain values of 1
for high certainty ocean pixels, and 0 for all other pixels
(land, inland water etc). For Australia, we use the Geodata
100K coastline dataset, rasterized as the "geodata_coast_100k"
product on the DEA datacube.
buffer : integer, optional
The number of pixels to buffer the land-water boundary in
each direction.
Expand All @@ -303,7 +297,7 @@ def _coastal_buffer(ds, buffer):

# Identify ocean pixels based on overlap with the Geodata
# 100K coastline dataset
all_time_ocean = ocean_masking(ds, ocean_land_da)
all_time_ocean = ocean_masking(ds, ocean_da)

# Generate coastal buffer from ocean-land boundary
coastal_mask = xr.apply_ufunc(
Expand Down Expand Up @@ -632,19 +626,22 @@ def contours_preprocess(

# Load Geodata 100K coastal layer to use to separate ocean waters from
# other inland waters. This product has values of 0 for ocean waters,
# and values of 1 and 2 for mainland/island pixels.
# and values of 1 and 2 for mainland/island pixels. We extract ocean
# pixels (value 0), then erode these by 10 pixels to ensure we only
# use high certainty deeper water ocean regions for identifying ocean
# pixel in our satellite imagery
dc = datacube.Datacube()
geodata_da = dc.load(
product="geodata_coast_100k",
like=all_time.odc.geobox.compat,
dask_chunks={},
).land.squeeze("time")
ocean_da = xr.apply_ufunc(binary_erosion, geodata_da == 0, disk(10))

# Use all time and Geodata 100K data to produce the buffered coastal
# study area. Morphological closing helps to "close" the entrances of
# estuaries and rivers, removing them from the analysis.
coastal_mask = coastal_masking(
ds=all_time, ocean_land_da=geodata_da, buffer=buffer_pixels, closing=15
ds=all_time, ocean_da=ocean_da, buffer=buffer_pixels, closing=15
)

# Optionally modify the coastal mask using manually supplied polygons to
Expand Down Expand Up @@ -680,7 +677,7 @@ def contours_preprocess(
(thresholded_ds != 0) # Set both 1s and NaN to True
.where(~inland_mask, 1)
.groupby("year")
.apply(lambda x: ocean_masking(x, geodata_da, 1, 3))
.apply(lambda x: ocean_masking(x, ocean_da, 1, 3))
)

# Keep pixels within annual mask layers, all time coastal buffer,
Expand Down
77 changes: 65 additions & 12 deletions notebooks/DEACoastlines_generation_vector.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/jovyan/Robbi/dea-coastlines\n"
]
}
],
"source": [
"cd .."
]
Expand All @@ -48,16 +56,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33mWARNING: You are using pip version 22.0.2; however, version 22.3.1 is available.\n",
"You should consider upgrading via the '/env/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n",
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install -r requirements.in --quiet"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -94,7 +112,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -122,9 +140,35 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<xarray.Dataset>\n",
"Dimensions: (year: 34, y: 2146, x: 2043)\n",
"Coordinates:\n",
" * year (year) int64 1988 1989 1990 1991 1992 ... 2017 2018 2019 2020 2021\n",
" * y (y) float64 -2.996e+06 -2.996e+06 ... -3.06e+06 -3.06e+06\n",
" * x (x) float64 5.134e+05 5.135e+05 5.135e+05 ... 5.747e+05 5.747e+05\n",
"Data variables:\n",
" mndwi (year, y, x) float32 0.02181 0.1959 0.3493 ... 0.4211 0.52 0.4345\n",
" count (year, y, x) int16 4 4 4 4 4 4 4 4 4 ... 15 15 16 16 16 15 15 15 15\n",
" stdev (year, y, x) float32 0.2889 0.279 0.1988 ... 0.3327 0.2737 0.3331\n",
"Attributes:\n",
" transform: (30.0, 0.0, 513435.0, 0.0, -30.0, -2995755.0)\n",
" crs: +init=epsg:32656\n",
" res: (30.0, 30.0)\n",
" is_tiled: 1\n",
" nodatavals: (nan,)\n",
" scales: (1.0,)\n",
" offsets: (0.0,)\n",
" AREA_OR_POINT: Area\n"
]
}
],
"source": [
"yearly_ds, gapfill_ds = coastlines.vector.load_rasters(\n",
" path=\"data/interim/raster\",\n",
Expand All @@ -150,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -193,7 +237,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -211,9 +255,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 67,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Operating in single z-value, multiple arrays mode\n",
"Writing contours to temp.geojson\n"
]
}
],
"source": [
"# Extract contours\n",
"contours_gdf = subpixel_contours(\n",
Expand Down
Loading

0 comments on commit 375a33f

Please sign in to comment.