Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy rectilinear interpolator, with sparse #6006

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
54 changes: 39 additions & 15 deletions lib/iris/analysis/_interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
from itertools import product
import operator

import dask.array
import numpy as np
from numpy.lib.stride_tricks import as_strided
import numpy.ma as ma

from iris._lazy_data import is_lazy_data, is_masked_data
from iris.coords import AuxCoord, DimCoord
import iris.util

Expand Down Expand Up @@ -200,13 +202,8 @@ def __init__(self, src_cube, coords, method, extrapolation_mode):
set to NaN.

"""
# Trigger any deferred loading of the source cube's data and snapshot
# its state to ensure that the interpolator is impervious to external
# changes to the original source cube. The data is loaded to prevent
# the snapshot having lazy data, avoiding the potential for the
# same data to be loaded again and again.
if src_cube.has_lazy_data():
src_cube.data
# Snapshot the cube state to ensure that the interpolator is impervious
# to external changes to the original source cube.
self._src_cube = src_cube.copy()
# Coordinates defining the dimensions to be interpolated.
self._src_coords = [self._src_cube.coord(coord) for coord in coords]
Expand Down Expand Up @@ -315,20 +312,21 @@ def _interpolate(self, data, interp_points):
data = data.astype(dtype)

mode = EXTRAPOLATION_MODES[self._mode]
_data = _get_data(data)
if self._interpolator is None:
# Cache the interpolator instance.
# NB. The constructor of the _RegularGridInterpolator class does
# some unnecessary checks on the fill_value parameter,
# so we set it afterwards instead. Sneaky. ;-)
self._interpolator = _RegularGridInterpolator(
self._src_points,
data,
_data,
method=self.method,
bounds_error=mode.bounds_error,
fill_value=None,
)
else:
self._interpolator.values = data
self._interpolator.values = _data

# We may be re-using a cached interpolator, so ensure the fill
# value is set appropriately for extrapolating data values.
Expand All @@ -341,17 +339,16 @@ def _interpolate(self, data, interp_points):
# interpolation points.
result = result.astype(data.dtype)

if np.ma.isMaskedArray(data) or mode.force_mask:
# NB. np.ma.getmaskarray returns an array of `False` if
if _is_masked_array(data) or mode.force_mask:
# NB. getmaskarray returns an array of `False` if
# `data` is not a masked array.
src_mask = np.ma.getmaskarray(data)
src_mask = _get_mask_array(data)
# Switch the extrapolation to work with mask values.
self._interpolator.fill_value = mode.mask_fill_value
self._interpolator.values = src_mask
mask_fraction = self._interpolator(interp_points)
new_mask = mask_fraction > 0
if ma.isMaskedArray(data) or np.any(new_mask):
result = np.ma.MaskedArray(result, new_mask)
result = iris.util._mask_array(result, new_mask)

return result

Expand Down Expand Up @@ -592,7 +589,7 @@ def __call__(self, sample_points, collapse_scalar=True):

sample_points = _canonical_sample_points(self._src_coords, sample_points)

data = self._src_cube.data
data = self._src_cube.core_data()
# Interpolate the cube payload.
interpolated_data = self._points(sample_points, data)

Expand Down Expand Up @@ -668,3 +665,30 @@ def gen_new_cube():
new_cube = new_cube[tuple(dim_slices)]

return new_cube


def _is_masked_array(array):
"""Equivalent to func:`numpy.ma.isMaskedArray`, but works for both lazy AND realised arrays."""
if is_lazy_data(array):
is_masked_array = is_masked_data(array)
else:
is_masked_array = np.ma.isMaskedArray(array)
return is_masked_array


def _get_data(array):
"""Equivalent to :func:`np.ma.getdata`, but works for both lazy AND realised arrays."""
if is_lazy_data(array):
result = dask.array.ma.getdata(array)
else:
result = np.ma.getdata(array)
return result


def _get_mask_array(array):
"""Equivalent to func:`numpy.ma.getmaskarray`, but works for both lazy AND realised arrays."""
if is_lazy_data(array):
result = dask.array.ma.getmaskarray(array)
else:
result = np.ma.getmaskarray(array)
return result
24 changes: 17 additions & 7 deletions lib/iris/analysis/_scipy_interpolate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import itertools

import dask.array
import numpy as np
from scipy.sparse import csr_matrix
from sparse import GCXS

from iris._lazy_data import is_lazy_data

# ============================================================================
# | Copyright SciPy |
Expand Down Expand Up @@ -218,7 +221,7 @@ def compute_interp_weights(self, xi, method=None):
n_result_values = len(indices[0])
n_non_zero = n_result_values * n_src_values_per_result_value
weights = np.ones(n_non_zero, dtype=norm_distances[0].dtype)
col_indices = np.empty(n_non_zero)
col_indices = np.empty(n_non_zero, dtype=int)
row_ptrs = np.arange(
0,
n_non_zero + n_src_values_per_result_value,
Expand All @@ -238,11 +241,13 @@ def compute_interp_weights(self, xi, method=None):
weights[i::n_src_values_per_result_value] *= cw

n_src_values = np.prod(list(map(len, self.grid)))
sparse_matrix = csr_matrix(
sparse_matrix = GCXS(
(weights, col_indices, row_ptrs),
compressed_axes=[0],
shape=(n_result_values, n_src_values),
)

if is_lazy_data(self.values):
sparse_matrix = dask.array.from_array(sparse_matrix)
prepared = (xi_shape, method, sparse_matrix, None, out_of_bounds)

return prepared
Expand Down Expand Up @@ -289,18 +294,23 @@ def interp_using_pre_computed_weights(self, computed_weights):
def _evaluate_linear_sparse(self, sparse_matrix):
ndim = len(self.grid)
if ndim == self.values.ndim:
result = sparse_matrix * self.values.reshape(-1)
result = sparse_matrix @ self.values.reshape(-1)
else:
shape = (sparse_matrix.shape[1], -1)
result = sparse_matrix * self.values.reshape(shape)
result = sparse_matrix @ self.values.reshape(shape)

return result

def _evaluate_nearest(self, indices, norm_distances, out_of_bounds):
idx_res = []
for i, yi in zip(indices, norm_distances):
idx_res.append(np.where(yi <= 0.5, i, i + 1))
return self.values[tuple(idx_res)]
if is_lazy_data(self.values):
# dask arrays do not (yet) support fancy indexing
indexer = self.values.vindex
else:
indexer = self.values
return indexer[tuple(idx_res)]

def _find_indices(self, xi):
# find relevant edges between which xi are situated
Expand Down
1 change: 1 addition & 0 deletions requirements/py310.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies:
- pyproj
- scipy
- shapely !=1.8.3
- sparse

# Optional dependencies.
- esmpy >=7.0
Expand Down
1 change: 1 addition & 0 deletions requirements/py311.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies:
- pyproj
- scipy
- shapely !=1.8.3
- sparse

# Optional dependencies.
- esmpy >=7.0
Expand Down
1 change: 1 addition & 0 deletions requirements/py312.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies:
- pyproj
- scipy
- shapely !=1.8.3
- sparse

# Optional dependencies.
- esmpy >=7.0
Expand Down
Loading