diff --git a/esda/functions.py b/esda/functions.py new file mode 100644 index 00000000..d29aa962 --- /dev/null +++ b/esda/functions.py @@ -0,0 +1,66 @@ +from .moran import ( + Moran, + Moran_Local, + Moran_BV, + Moran_Local_BV, + Moran_Rate, + Moran_Local_Rate, +) +from .geary import Geary +from .gamma import Gamma +from .geary_local import Geary_Local +from .geary_local_mv import Geary_Local_MV +from .getisord import G, G_Local +from .join_counts import Join_Counts +from .join_counts_local import Join_Counts_Local +from .join_counts_local_bv import Join_Counts_Local_BV +from .join_counts_local_mv import Join_Counts_Local_MV + +# from .lee import Spatial_Pearson # no solution yet for sklearn style classes +# from .losh import LOSH +import inspect + +for klass in ( + Moran, + Moran_Local, + Moran_BV, + Moran_Local_BV, + Moran_Rate, + Moran_Local_Rate, + Geary, + Gamma, + Geary_Local, + Geary_Local_MV, + G, + G_Local, + Join_Counts, + Join_Counts_Local, + Join_Counts_Local_BV, + Join_Counts_Local_MV, +): + assert hasattr(klass, "_statistic"), f"{klass} has no _statistic" + assert not callable(klass._statistic), f"{klass}._statistic is callable" + klassname = klass.__name__ + name = klass.__name__.lower() + if klassname == "LOSH": + defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.pval" + elif klassname == "Spatial_Pearson": + defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.significance_" + else: + defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.p_sim" + exec(defn) + exec(f"{name}.__doc__ = {klassname}.__doc__") + init_sig = inspect.signature(klass) + globals()[name].__signature__ = init_sig + del globals()[klassname] + +for klass in (LOSH, Spatial_Pearson): + # sklearn style... + pass + +del klassname +del klass +del name +del init_sig +del defn +del inspect diff --git a/esda/geary_local.py b/esda/geary_local.py index e34df518..7cf68bd2 100644 --- a/esda/geary_local.py +++ b/esda/geary_local.py @@ -121,7 +121,7 @@ def fit(self, x): n_jobs = self.n_jobs seed = self.seed - self.localG = self._statistic(x, w) + self.localG = self._stat_func(x, w) if permutations: self.p_sim, self.rlocalG = _crand_plus( @@ -150,8 +150,12 @@ def fit(self, x): return self + @property + def _statistic(self): + return self.localG + @staticmethod - def _statistic(x, w): + def _stat_func(x, w): # Caclulate z-scores for x zscore_x = (x - np.mean(x)) / np.std(x) # Create focal (xi) and neighbor (zi) values diff --git a/esda/geary_local_mv.py b/esda/geary_local_mv.py index eaeea999..53a1fcc5 100644 --- a/esda/geary_local_mv.py +++ b/esda/geary_local_mv.py @@ -68,21 +68,21 @@ def fit(self, variables): >>> lG_mv.localG[0:5] >>> lG_mv.p_sim[0:5] """ - self.variables = np.array(variables, dtype='float') + self.variables = np.array(variables, dtype="float") w = self.connectivity - w.transform = 'r' + w.transform = "r" self.n = len(variables[0]) self.w = w - + permutations = self.permutations # Caclulate z-scores for input variables # to be used in _statistic and _crand zvariables = stats.zscore(variables, axis=1) - self.localG = self._statistic(variables, zvariables, w) + self.localG = self._stat_func(variables, zvariables, w) if permutations: self._crand(zvariables) @@ -95,27 +95,30 @@ def fit(self, variables): return self + @property + def _statistic(self): + return self.localG + @staticmethod - def _statistic(variables, zvariables, w): + def _stat_func(variables, zvariables, w): # Define denominator adjustment k = len(variables) # Create focal and neighbor values adj_list = w.to_adjlist(remove_symmetric=False) zseries = [pd.Series(i, index=w.id_order) for i in zvariables] - focal = [zseries[i].loc[adj_list.focal].values - for i in range(len(variables))] - neighbor = [zseries[i].loc[adj_list.neighbor].values - for i in range(len(variables))] + focal = [zseries[i].loc[adj_list.focal].values for i in range(len(variables))] + neighbor = [ + zseries[i].loc[adj_list.neighbor].values for i in range(len(variables)) + ] # Carry out local Geary calculation - gs = adj_list.weight.values * \ - (np.array(focal) - np.array(neighbor))**2 + gs = adj_list.weight.values * (np.array(focal) - np.array(neighbor)) ** 2 # Reorganize data temp = pd.DataFrame(gs).T - temp['ID'] = adj_list.focal.values - adj_list_gs = temp.groupby(by='ID').sum() + temp["ID"] = adj_list.focal.values + adj_list_gs = temp.groupby(by="ID").sum() localG = np.array(adj_list_gs.sum(axis=1) / k) - return (localG) + return localG def _crand(self, zvariables): """ @@ -146,13 +149,15 @@ def _crand(self, zvariables): np.random.shuffle(idsi) vars_rand = [] for j in range(nvars): - vars_rand.append(zvariables[j][idsi[rids[:, 0:wc[i]]]]) + vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]]) # vars rand as tmp # Calculate diff diff = [] for z in range(nvars): - diff.append((np.array((zvariables[z][i] - vars_rand[z])**2 - * w[i])).sum(1) / nvars) + diff.append( + (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i])).sum(1) + / nvars + ) # add up differences temp = np.array([sum(x) for x in zip(*diff)]) # Assign to object to be returned diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py index 7944f5f1..242834a9 100644 --- a/esda/join_counts_local.py +++ b/esda/join_counts_local.py @@ -115,7 +115,7 @@ def fit(self, y, n_jobs=1, permutations=999): self.n = len(y) self.w = w - self.LJC = self._statistic(y, w) + self.LJC = self._stat_func(y, w) if permutations: self.p_sim, self.rjoins = _crand_plus( @@ -132,8 +132,12 @@ def fit(self, y, n_jobs=1, permutations=999): return self + @property + def _statistic(self): + return self.LJC + @staticmethod - def _statistic(y, w): + def _stat_func(y, w): # Create adjacency list. Note that remove_symmetric=False - this is # different from the esda.Join_Counts() function. adj_list = w.to_adjlist(remove_symmetric=False) diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py index 3aed8be7..7059b5e0 100644 --- a/esda/join_counts_local_bv.py +++ b/esda/join_counts_local_bv.py @@ -133,7 +133,7 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): n_jobs = self.n_jobs seed = self.seed - self.LJC = self._statistic(x, z, w, case=case) + self.LJC = self._stat_func(x, z, w, case=case) if permutations: if case == "BJC": @@ -168,8 +168,12 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): return self + @property + def _statistic(self): + return self.LJC + @staticmethod - def _statistic(x, z, w, case): + def _stat_func(x, z, w, case): # Create adjacency list. Note that remove_symmetric=False - this is # different from the esda.Join_Counts() function. adj_list = w.to_adjlist(remove_symmetric=False) diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py index 5cd24b5b..e8379341 100644 --- a/esda/join_counts_local_mv.py +++ b/esda/join_counts_local_mv.py @@ -115,7 +115,7 @@ def fit(self, variables, n_jobs=1, permutations=999): # np.array() of dtype='float' for numba self.ext = np.array(np.prod(np.vstack(variables), axis=0), dtype="float") - self.LJC = self._statistic(variables, w) + self.LJC = self._stat_func(variables, w) if permutations: self.p_sim, self.rjoins = _crand_plus( @@ -132,8 +132,12 @@ def fit(self, variables, n_jobs=1, permutations=999): return self + @property + def _statistic(self): + return self.LJC + @staticmethod - def _statistic(variables, w): + def _stat_func(variables, w): # Create adjacency list. Note that remove_symmetric=False - # different from the esda.Join_Counts() function. adj_list = w.to_adjlist(remove_symmetric=False) diff --git a/esda/lee.py b/esda/lee.py index b5a3eb03..ac81eb70 100644 --- a/esda/lee.py +++ b/esda/lee.py @@ -22,10 +22,10 @@ def __init__(self, connectivity=None, permutations=999): --------- connectivity: scipy.sparse matrix object the connectivity structure describing the relationships - between observed units. Will be row-standardized. + between observed units. Will be row-standardized. permutations: int the number of permutations to conduct for inference. - if < 1, no permutational inference will be conducted. + if < 1, no permutational inference will be conducted. Attributes ---------- @@ -36,10 +36,10 @@ def __init__(self, connectivity=None, permutations=999): smoothing factor" reference_distribution_: numpy.ndarray (n_permutations, 2,2) distribution of correlation matrices for randomly-shuffled - maps. + maps. significance_: numpy.ndarray (2,2) permutation-based p-values for the fraction of times the - observed correlation was more extreme than the simulated + observed correlation was more extreme than the simulated correlations. """ self.connectivity = connectivity @@ -77,7 +77,7 @@ def fit(self, x, y): ) if self.connectivity is None: self.connectivity = sparse.eye(Z.shape[0]) - self.association_ = self._statistic(Z, self.connectivity) + self.association_ = self._stat_func(Z, self.connectivity) standard_connectivity = sparse.csc_matrix( self.connectivity / self.connectivity.sum(axis=1) @@ -100,8 +100,12 @@ def fit(self, x, y): self.significance_ = (extreme + 1.0) / (self.permutations + 1.0) return self + @property + def _statistic(self): + return self.association_ + @staticmethod - def _statistic(Z, W): + def _stat_func(Z, W): ctc = W.T @ W ones = numpy.ones(ctc.shape[0]) return (Z.T @ ctc @ Z) / (ones.T @ ctc @ ones) @@ -118,13 +122,13 @@ def __init__(self, connectivity=None, permutations=999): --------- connectivity: scipy.sparse matrix object the connectivity structure describing the relationships - between observed units. Will be row-standardized. + between observed units. Will be row-standardized. permutations: int the number of permutations to conduct for inference. - if < 1, no permutational inference will be conducted. + if < 1, no permutational inference will be conducted. significance_: numpy.ndarray (2,2) permutation-based p-values for the fraction of times the - observed correlation was more extreme than the simulated + observed correlation was more extreme than the simulated correlations. Attributes ---------- @@ -135,10 +139,10 @@ def __init__(self, connectivity=None, permutations=999): smoothing factor" reference_distribution_: numpy.ndarray (n_permutations, n_samples) distribution of correlation matrices for randomly-shuffled - maps. + maps. significance_: numpy.ndarray (n_samples,) permutation-based p-values for the fraction of times the - observed correlation was more extreme than the simulated + observed correlation was more extreme than the simulated correlations. @@ -151,14 +155,14 @@ def __init__(self, connectivity=None, permutations=999): def fit(self, x, y): """ - bivariate local pearson's R based on Eq. 22 in Lee (2001), using + bivariate local pearson's R based on Eq. 22 in Lee (2001), using site-wise conditional randomization from Moran_Local_BV. - + L_i = \dfrac{ n \cdot \Big[\big(\sum_i w_{ij}(x_j - \bar{x})\big) \big(\sum_i w_{ij}(y_j - \bar{y})\big) \Big] - } + } { \sqrt{\sum_i (x_i - \bar{x})^2} \sqrt{\sum_i (y_i - \bar{y})^2}} @@ -166,13 +170,13 @@ def fit(self, x, y): n \cdot (\tilde{x}_j - \bar{x}) (\tilde{y}_j - \bar{y}) - } + } { \sqrt{\sum_i (x_i - \bar{x})^2} \sqrt{\sum_i (y_i - \bar{y})^2}} - Lee, Sang Il. (2001), "Developing a bivariate spatial - association measure: An integration of Pearson's r and + Lee, Sang Il. (2001), "Developing a bivariate spatial + association measure: An integration of Pearson's r and Moran's I." Journal of Geographical Systems, 3(4):369-385. Arguments diff --git a/esda/losh.py b/esda/losh.py index bc2f0ec1..357b0ade 100644 --- a/esda/losh.py +++ b/esda/losh.py @@ -84,26 +84,34 @@ def fit(self, y, a=2): w = self.connectivity - self.Hi, self.ylag, self.yresid, self.VarHi = self._statistic(y, w, a) + self.Hi, self.ylag, self.yresid, self.VarHi = self._stat_func(y, w, a) if self.inference is None: return self - elif self.inference == 'chi-square': + elif self.inference == "chi-square": if a != 2: - warnings.warn(f'Chi-square inference assumes that a=2, but \ - a={a}. This means the inference will be invalid!') + warnings.warn( + f"Chi-square inference assumes that a=2, but \ + a={a}. This means the inference will be invalid!" + ) else: - dof = 2/self.VarHi - Zi = (2*self.Hi)/self.VarHi + dof = 2 / self.VarHi + Zi = (2 * self.Hi) / self.VarHi self.pval = 1 - stats.chi2.cdf(Zi, dof) else: - raise NotImplementedError(f'The requested inference method \ - ({self.inference}) is not currently supported!') + raise NotImplementedError( + f"The requested inference method \ + ({self.inference}) is not currently supported!" + ) return self + @property + def _statistic(self): + return self.Hi + @staticmethod - def _statistic(y, w, a): + def _stat_func(y, w, a): # Define what type of variance to use if a is None: a = 2 @@ -113,9 +121,9 @@ def _statistic(y, w, a): rowsum = np.array(w.sparse.sum(axis=1)).flatten() # Calculate spatial mean - ylag = lp.weights.lag_spatial(w, y)/rowsum + ylag = lp.weights.lag_spatial(w, y) / rowsum # Calculate and adjust residuals based on multiplier - yresid = abs(y-ylag)**a + yresid = abs(y - ylag) ** a # Calculate denominator of Hi equation denom = np.mean(yresid) * np.array(rowsum) # Carry out final Hi calculation @@ -126,9 +134,11 @@ def _statistic(y, w, a): n = len(y) squared_rowsum = np.asarray(w.sparse.multiply(w.sparse).sum(axis=1)).flatten() - VarHi = ((n-1)**-1) * \ - (denom**-2) * \ - ((np.sum(yresid**2)/n) - yresid_mean**2) * \ - ((n*squared_rowsum) - (rowsum**2)) + VarHi = ( + ((n - 1) ** -1) + * (denom ** -2) + * ((np.sum(yresid ** 2) / n) - yresid_mean ** 2) + * ((n * squared_rowsum) - (rowsum ** 2)) + ) return (Hi, ylag, yresid, VarHi)