Skip to content

Adding tail tests for pos=bb+ww #85

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# build the container
container:
docker build -t libpysal .

# run jupyter notebook for development
nb:
docker run --rm -p 8888:8888 -v ${PWD}:/home/jovyan libpysal

# run a shell for development
cli:
docker run -it -p 8888:8888 -v ${PWD}:/home/jovyan libpysal /bin/bash

cov:
pytest --cov-report html --cov=esda
210 changes: 116 additions & 94 deletions esda/join_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
Spatial autocorrelation for binary attributes

"""
__author__ = "Sergio J. Rey <[email protected]> , Luc Anselin <[email protected]>"
__author__ = "Serge Rey <[email protected]> , Luc Anselin <[email protected]>"

from libpysal.weights.spatial_lag import lag_spatial
from .tabular import _univariate_handler
from scipy.stats import chi2_contingency
from scipy.stats import chi2
import numpy as np
import pandas as pd
import warnings

__all__ = ['Join_Counts']
__all__ = ["Join_Counts"]

PERMUTATIONS = 999

Expand Down Expand Up @@ -74,21 +75,20 @@ class Join_Counts(object):
minimum of permuted bw values
max_bw : float
maximum of permuted bw values
chi2 : float
Chi-square statistic on contingency table for join counts
chi2_p : float
Analytical p-value for chi2
chi2_dof : int
Degrees of freedom for analytical chi2
pos : float
bb+ww
p_sim_pos : float
p-value based on permutations (one-sided) for pos
crosstab : DataFrame
Contingency table for observed join counts
expected : DataFrame
Expected contingency table for the null
Expected contingency table under the null
chi2 : float
Observed value of chi2 for join count contingency table (see Notes).
p_sim_chi2 : float
p-value for chi2 under random spatial permutations



Examples
--------

Expand Down Expand Up @@ -128,84 +128,94 @@ class Join_Counts(object):
24.0
>>> np.min(jc.sim_bw)
7.0
>>> round(jc.chi2_p, 3)
0.004
>>> jc.p_sim_chi2
0.002
0.008
>>> jc.pos
20.0
>>> jc.p_sim_pos
0.001

Notes
-----
Technical details and derivations can be found in :cite:`cliff81`.

Analytical inference using the chi2 is approximate and is thus not used.
The independence assumption is clearly violated for join counts even
if the data is free from spatial autocorrelation as neighboring join counts
will be correlated by construction. Thus only, the chi2 attribute is
reported, no analytical p-values are reported.

Instead, `p_sim_chi2` is reported which uses the sampling distribution of
the chi2 statistic under the null based on random spatial permutations of
the data.

Warnings will be issued when zero values for specific expected values of
join counts are encountered in the sample or when carrying out the
permutations. In the former case, no inference related attributes are set
on the object, while in the latter, realizations with zero expected counts
are not used in constructing the sampling distribution for the chi2
statistic.

Technical details and derivations can be found in :cite:`cliff81`.
"""

def __init__(self, y, w, permutations=PERMUTATIONS):
y = np.asarray(y).flatten()
w.transformation = 'b' # ensure we have binary weights
w.transformation = "b" # ensure we have binary weights
self.w = w
self.adj_list = self.w.to_adjlist(remove_symmetric=True)
self.adj_list = self.w.to_adjlist() # full symmetry needed for tables
self.y = y
self.permutations = permutations
self.J = w.s0 / 2.
self.J = w.s0 / 2.0
results = self.__calc(self.y)
self.bb = results[0]
self.ww = results[1]
self.bw = results[2]
self.chi2 = results[3]
self.chi2_p = results[4]
self.chi2_dof = results[5]
self.autocorr_pos = self.bb + self.ww
self.autocorr_neg = self.bw

crosstab = pd.DataFrame(data=results[-1])
id_names = ['W', 'B']
idx = pd.Index(id_names, name='Focal')
crosstab.set_index(idx, inplace=True)
crosstab.columns = pd.Index(id_names, name='Neighbor')
self.crosstab = crosstab
expected = pd.DataFrame(data=results[6])
expected.set_index(idx, inplace=True)
expected.columns = pd.Index(id_names, name='Neighbor')
self.expected = expected
self.calc = self.__calc

if permutations:
sim = []
i = 0
while i < permutations:
try:
res = self.__calc(np.random.permutation(self.y))
sim.append(res)
i += 1
except ValueError:
# expected count of 0 -> inadmissible
pass
sim_jc = np.array(sim)
self.sim_bb = sim_jc[:, 0]
self.min_bb = np.min(self.sim_bb)
self.mean_bb = np.mean(self.sim_bb)
self.max_bb = np.max(self.sim_bb)
self.sim_bw = sim_jc[:, 2]
self.min_bw = np.min(self.sim_bw)
self.mean_bw = np.mean(self.sim_bw)
self.max_bw = np.max(self.sim_bw)
self.sim_autocurr_pos = sim_jc[:, 0]+sim_jc[:, 1]
self.sim_autocurr_neg = sim_jc[:, 2]
self.sim_chi2 = sim_jc[:, 3]

stat = ((self.autocorr_pos - np.mean(self.sim_autocurr_pos))**2 / np.mean(self.sim_autocurr_pos)**2 +
(self.autocorr_neg - np.mean(self.sim_autocurr_neg))**2 / np.mean(self.sim_autocurr_pos)**2)
self.sim_autocorr_chi2 = 1 - chi2.cdf(stat, 1)
if results:
self.bb = results[0]
self.ww = results[1]
self.bw = results[2]
self.pos = self.bb + self.ww
self.neg = self.bw # bw==wb
self.chi2 = results[3]
crosstab = pd.DataFrame(data=results[-2])
id_names = ["W", "B"]
idx = pd.Index(id_names, name="Focal")
crosstab.set_index(idx, inplace=True)
crosstab.columns = pd.Index(id_names, name="Neighbor")
self.crosstab = crosstab
expected = pd.DataFrame(data=results[-1])
expected.set_index(idx, inplace=True)
expected.columns = pd.Index(id_names, name="Neighbor")
self.expected = expected
self.calc = self.__calc

p_sim_bb = self.__pseudop(self.sim_bb, self.bb)
p_sim_bw = self.__pseudop(self.sim_bw, self.bw)
p_sim_chi2 = self.__pseudop(self.sim_chi2, self.chi2)
p_sim_autocorr_pos = self.__pseudop(self.sim_autocurr_pos, self.autocorr_pos)
p_sim_autocorr_neg = self.__pseudop(self.sim_autocurr_neg, self.autocorr_neg)
self.p_sim_bb = p_sim_bb
self.p_sim_bw = p_sim_bw
self.p_sim_chi2 = p_sim_chi2
self.p_sim_autocorr_pos = p_sim_autocorr_pos
self.p_sim_autocorr_neg = p_sim_autocorr_neg
if permutations:
sim = []
i = 0
while i < permutations:
try:
res = self.__calc(np.random.permutation(self.y))
sim.append(res)
i += 1
except ValueError:
warnings.warn('Zero expected joins encountered, ignoring realization.')
pass
sim_jc = np.array(sim)
self.sim_bb = sim_jc[:, 0]
self.sim_ww = sim_jc[:, 1]
self.sim_pos = self.sim_bb + self.sim_ww
self.min_bb = np.min(self.sim_bb)
self.mean_bb = np.mean(self.sim_bb)
self.max_bb = np.max(self.sim_bb)
self.sim_bw = sim_jc[:, 2]
self.sim_neg = self.sim_bw
self.min_bw = np.min(self.sim_bw)
self.mean_bw = np.mean(self.sim_bw)
self.max_bw = np.max(self.sim_bw)
self.sim_chi2 = sim_jc[:, 3]
self.p_sim_bb = self.__pseudop(self.sim_bb, self.bb)
self.p_sim_bw = self.__pseudop(self.sim_bw, self.bw)
self.p_sim_ww = self.__pseudop(self.sim_ww, self.ww)
self.p_sim_pos = self.__pseudop(self.sim_pos, self.pos)
self.p_sim_neg = self.__pseudop(self.sim_neg, self.neg)
self.p_sim_chi2 = self.__pseudop(self.sim_chi2, self.chi2)

def __calc(self, z):
adj_list = self.adj_list
Expand All @@ -214,28 +224,35 @@ def __calc(self, z):
neighbor = zseries.loc[adj_list.neighbor].values
sim = focal == neighbor
dif = 1 - sim
bb = (focal * sim).sum()
ww = ((1-focal) * sim).sum()
bw = (focal * dif).sum()
wb = ((1-focal) * dif).sum()
table = [[ww, wb],
[bw, bb]]
chi2 = chi2_contingency(table)
bb = (focal * sim).sum() / 2.
ww = ((1 - focal) * sim).sum() / 2.
bw = (focal * dif).sum() / 2.
wb = ((1 - focal) * dif).sum() / 2.
table = [[ww, wb], [bw, bb]]
try:
chi2 = chi2_contingency(table)
except ValueError:
msg = 'Zero expected join count encountered. No inference made.'
msg += str(table)
warnings.warn(msg)
return None
stat, pvalue, dof, expected = chi2
return (bb, ww, bw+wb, stat, pvalue, dof, expected, np.array(table))
return (bb, ww, bw + wb, stat, np.array(table), expected)

def __pseudop(self, sim, jc):
above = sim >=jc
above = sim >= jc
larger = sum(above)
psim = (larger + 1.) / (self.permutations + 1.)
psim = (larger + 1.0) / (self.permutations + 1.0)
return psim

@property
def _statistic(self):
return self.bw

@classmethod
def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws):
def by_col(
cls, df, cols, w=None, inplace=False, pvalue="sim", outvals=None, **stat_kws
):
"""
Function to compute a Join_Count statistic on a dataframe

Expand Down Expand Up @@ -271,11 +288,16 @@ def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **s
"""
if outvals is None:
outvals = []
outvals.extend(['bb', 'p_sim_bw', 'p_sim_bb'])
pvalue = ''
return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue,
outvals=outvals, stat=cls,
swapname='bw', **stat_kws)



outvals.extend(["bb", "p_sim_bw", "p_sim_bb"])
pvalue = ""
return _univariate_handler(
df,
cols,
w=w,
inplace=inplace,
pvalue=pvalue,
outvals=outvals,
stat=cls,
swapname="bw",
**stat_kws
)
10 changes: 2 additions & 8 deletions esda/tests/test_join_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ def test_Join_Counts(self):
self.assertAlmostEqual(jc.bb, 10.0)
self.assertAlmostEqual(jc.bw, 4.0)
self.assertAlmostEqual(jc.ww, 10.0)
self.assertAlmostEqual(jc.autocorr_neg, 4.0) # jc.bw
self.assertAlmostEqual(jc.autocorr_pos, 20.0)
self.assertAlmostEqual(jc.J, 24.0)
self.assertAlmostEqual(len(jc.sim_bb), 999)
self.assertAlmostEqual(jc.p_sim_bb, 0.0030000000000000001)
Expand All @@ -34,12 +32,8 @@ def test_Join_Counts(self):
self.assertAlmostEqual(np.mean(jc.sim_bw), 12.811811811811811)
self.assertAlmostEqual(np.max(jc.sim_bw), 24.0)
self.assertAlmostEqual(np.min(jc.sim_bw), 7.0)
self.assertAlmostEqual(8.479632255856034, jc.chi2)
self.assertAlmostEqual(0.003591446953916693, jc.chi2_p)
self.assertAlmostEqual(0.002, jc.p_sim_chi2)
self.assertAlmostEqual(1.0, jc.p_sim_autocorr_neg)
self.assertAlmostEqual(0.001, jc.p_sim_autocorr_pos)
self.assertAlmostEqual(0.2653504320039377, jc.sim_autocorr_chi2)
self.assertAlmostEqual(8.16666666666666, jc.chi2)
self.assertAlmostEqual(0.008, jc.p_sim_chi2)

@unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
def test_by_col(self):
Expand Down
Loading