Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: support geopandas objects in distance statistics #145

Merged
merged 2 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 32 additions & 15 deletions pointpats/distance_statistics.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,28 @@
import numpy
import warnings
from scipy import spatial, interpolate
from collections import namedtuple

import geopandas
import numpy
import shapely
from scipy import interpolate, spatial

from .geometry import (
TREE_TYPES,
)
from .geometry import (
area as _area,
k_neighbors as _k_neighbors,
)
from .geometry import (
build_best_tree as _build_best_tree,
)
from .geometry import (
k_neighbors as _k_neighbors,
)
from .geometry import (
prepare_hull as _prepare_hull,
TREE_TYPES,
)
from .random import poisson


__all__ = [
"f",
"g",
Expand Down Expand Up @@ -49,6 +60,9 @@ def _prepare(coordinates, support, distances, metric, hull, edge_correction):
if edge_correction is not None:
raise NotImplementedError("Edge correction is not currently implemented.")

if isinstance(coordinates, geopandas.GeoDataFrame | geopandas.GeoSeries):
coordinates = shapely.get_coordinates(coordinates.geometry)

# cast to coordinate array
if isinstance(coordinates, TREE_TYPES):
tree = coordinates
Expand Down Expand Up @@ -127,7 +141,7 @@ def f(

Parameters
----------
coordinates : numpy.ndarray of shape (n,2)
coordinates : geopandas object | numpy.ndarray of shape (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -211,7 +225,7 @@ def g(

Parameters
-----------
coordinates : numpy.ndarray of shape (n,2)
coordinates : geopandas object | numpy.ndarray of shape (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -301,7 +315,7 @@ def j(

Parameters
-----------
coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -390,7 +404,7 @@ def k(
This function counts the number of pairs of points that are closer than a given distance.
As d increases, K approaches the number of point pairs.

coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -459,7 +473,7 @@ def l(

Parameters
----------
coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -541,6 +555,9 @@ def _ripley_test(
n_simulations=9999,
**kwargs,
):
if isinstance(coordinates, geopandas.GeoDataFrame | geopandas.GeoSeries):
coordinates = shapely.get_coordinates(coordinates.geometry)

stat_function, result_container = _ripley_dispatch.get(calltype)
core_kwargs = dict(
support=support,
Expand Down Expand Up @@ -621,7 +638,7 @@ def f_test(

Parameters
-----------
coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -686,7 +703,7 @@ def g_test(

Parameters
----------
coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -749,7 +766,7 @@ def j_test(
When the J function is consistently below 1, then it indicates clustering.
When consistently above 1, it suggests dispersion.

coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -828,7 +845,7 @@ def k_test(

Parameters
----------
coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down Expand Up @@ -894,7 +911,7 @@ def l_test(

Parameters
----------
coordinates : numpy.ndarray, (n,2)
coordinates : geopandas object | numpy.ndarray, (n,2)
input coordinates to function
support : tuple of length 1, 2, or 3, int, or numpy.ndarray
tuple, encoding (stop,), (start, stop), or (start, stop, num)
Expand Down
24 changes: 14 additions & 10 deletions pointpats/tests/test_distance_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
from libpysal.cg import alpha_shape_auto
import shapely
import warnings
import geopandas
import pytest


points = numpy.asarray(
[
[66.22, 32.54],
Expand All @@ -23,6 +25,8 @@
]
)

points_gs = geopandas.GeoSeries.from_xy(*points.T)

tree = spatial.KDTree(points)

chull = spatial.ConvexHull(points)
Expand Down Expand Up @@ -207,8 +211,8 @@ def test_simulate():
# cluster poisson
# cluster normal


def test_f():
@pytest.mark.parametrize("points", [points, points_gs], ids=["numpy.ndarray", "GeoSeries"])
def test_f(points):
# -------------------------------------------------------------------------#
# Check f function has consistent performance

Expand All @@ -234,8 +238,8 @@ def test_f():
)
assert f_test.simulations.shape == (99, 15)


def test_g():
@pytest.mark.parametrize("points", [points, points_gs], ids=["numpy.ndarray", "GeoSeries"])
def test_g(points):
# -------------------------------------------------------------------------#
# Check f function works, has statistical results that are consistent

Expand All @@ -257,8 +261,8 @@ def test_g():
)
assert g_test.simulations.shape == (99, 15)


def test_j():
@pytest.mark.parametrize("points", [points, points_gs], ids=["numpy.ndarray", "GeoSeries"])
def test_j(points):
# -------------------------------------------------------------------------#
# Check j function works, matches manual, is truncated correctly

Expand All @@ -282,8 +286,8 @@ def test_j():

numpy.testing.assert_allclose(j_test.statistic, manual_j[:4], atol=0.1, rtol=0.05)


def test_k():
@pytest.mark.parametrize("points", [points, points_gs], ids=["numpy.ndarray", "GeoSeries"])
def test_k(points):
# -------------------------------------------------------------------------#
# Check K function works, matches a manual, slower explicit computation

Expand All @@ -297,8 +301,8 @@ def test_k():
k_test.statistic, manual_unscaled_k * 2 / n / intensity
)


def test_l():
@pytest.mark.parametrize("points", [points, points_gs], ids=["numpy.array", "GeoSeries"])
def test_l(points):
# -------------------------------------------------------------------------#
# Check L Function works, can be linearized, and has the right value
_, k = ripley.k(points, support=support)
Expand Down