Skip to content

Commit

Permalink
Merge pull request #320 from martinfleis/simpson
Browse files Browse the repository at this point in the history
REF: simplify calculation of Simpson diversity
  • Loading branch information
jGaboardi authored Dec 23, 2021
2 parents 8d341e5 + 1b6a816 commit 90ce789
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 30 deletions.
41 changes: 12 additions & 29 deletions momepy/diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,6 @@ class Simpson:
return Inverse Simpson index instead of Simpson index (``1 / λ``)
categorical : bool (default False)
treat values as categories (will not use ``binning``)
categories : list-like (default None)
list of categories. If None ``values.unique()`` is used.
verbose : bool (default True)
if True, shows progress bars in loops and indication of steps
**classification_kwds : dict
Expand Down Expand Up @@ -315,7 +313,6 @@ def __init__(
self.gini_simpson = gini_simpson
self.inverse = inverse
self.categorical = categorical
self.categories = categories
self.classification_kwds = classification_kwds

data = gdf.copy()
Expand All @@ -327,13 +324,10 @@ def __init__(

data = data.set_index(unique_id)[values]

if not categories:
categories = data.unique()

if not categorical:
self.bins = classify(data, scheme=binning, **classification_kwds).bins
else:
self.bins = categories
self.bins = None

results_list = []
for index in tqdm(data.index, total=data.shape[0], disable=not verbose):
Expand All @@ -347,7 +341,6 @@ def __init__(
values_list,
self.bins,
categorical=categorical,
categories=categories,
)
)
else:
Expand All @@ -361,7 +354,7 @@ def __init__(
self.series = pd.Series(results_list, index=gdf.index)


def simpson_diversity(data, bins=None, categorical=False, categories=None):
def simpson_diversity(values, bins=None, categorical=False):
"""
Calculates the Simpson\'s diversity index of data. Helper function for
:py:class:`momepy.Simpson`.
Expand All @@ -370,18 +363,16 @@ def simpson_diversity(data, bins=None, categorical=False, categories=None):
\\lambda=\\sum_{i=1}^{R} p_{i}^{2}
Formula adapted from https://gist.github.com/martinjc/f227b447791df8c90568.
Parameters
----------
data : GeoDataFrame
GeoDataFrame containing morphological tessellation
values : pandas.Series
list of values
bins : array, optional
array of top edges of classification bins. Result of binnng.bins.
array of top edges of classification bins.
Should be equalt to the result of binnng.bins.
categorical : bool (default False)
treat values as categories (will not use ``bins``)
categories : list-like (default None)
list of categories
Returns
-------
Expand All @@ -398,24 +389,16 @@ def simpson_diversity(data, bins=None, categorical=False, categories=None):
except ImportError:
raise ImportError("The 'mapclassify' package is required")

def p(n, N):
"""Relative abundance"""
if n == 0:
return 0
return float(n) / N

if categorical:
counts = data.value_counts().to_dict()
for c in categories:
if c not in counts.keys():
counts[c] = 0
counts = values.value_counts()

else:
sample_bins = mc.UserDefined(data, bins)
counts = dict(zip(bins, sample_bins.counts))
sample_bins = mc.UserDefined(values, bins)
counts = sample_bins.counts

N = sum(counts.values())
N = sum(counts)

return sum(p(n, N) ** 2 for n in counts.values() if n != 0)
return sum((n / N) ** 2 for n in counts if n != 0)


class Gini:
Expand Down
1 change: 0 additions & 1 deletion tests/test_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def test_Simpson(self):
self.sw,
"uID",
categorical=True,
categories=range(15),
).series
assert cat2[0] == pytest.approx(0.15)

Expand Down

0 comments on commit 90ce789

Please sign in to comment.