diff --git a/libpysal/cg/ops/_accessors.py b/libpysal/cg/ops/_accessors.py index 4ee72cb42..123f9df16 100644 --- a/libpysal/cg/ops/_accessors.py +++ b/libpysal/cg/ops/_accessors.py @@ -23,7 +23,6 @@ def get_attr(df, geom_col="geometry", inplace=False, attr=None): return outval - _doc_template = """ Tabular accessor to grab a geometric object's {n} attribute. diff --git a/libpysal/cg/ops/_shapely.py b/libpysal/cg/ops/_shapely.py index b2bb9600f..97b308581 100644 --- a/libpysal/cg/ops/_shapely.py +++ b/libpysal/cg/ops/_shapely.py @@ -123,10 +123,10 @@ def cascaded_union(df, geom_col="geometry", **groupby_kws): See Also -------- - + pysal.shapely_ext.cascaded_union pandas.DataFrame.groupby - + """ by = groupby_kws.pop("by", None) @@ -159,10 +159,10 @@ def unary_union(df, geom_col="geometry", **groupby_kws): See Also -------- - + pysal.shapely_ext.unary_union pandas.DataFrame.groupby - + """ by = groupby_kws.pop("by", None) @@ -208,10 +208,10 @@ def cascaded_intersection(df, geom_col="geometry", **groupby_kws): See Also -------- - + pysal.shapely_ext.cascaded_intersection pandas.DataFrame.groupby - + """ by = groupby_kws.pop("by", None) diff --git a/libpysal/cg/ops/tabular.py b/libpysal/cg/ops/tabular.py index 378ed9faf..56bbca3ec 100644 --- a/libpysal/cg/ops/tabular.py +++ b/libpysal/cg/ops/tabular.py @@ -53,13 +53,13 @@ def spatial_join( the suffix to apply to overlapping column names from ``df1``.; and (4) ``'rsuffix'`` defaults to ``right'``), the suffix to apply to overlapping column names from ``df2``. - + Returns ------- df : pandas.DataFrame A pandas.DataFrame with a new set of polygons and attributes resulting from the overlay. - + """ import geopandas as gpd @@ -103,13 +103,13 @@ def spatial_overlay( Default is ``'geometry'``. **kwargs : dict Optional keyword arguments passed in ``geopandas.tools.overlay``. - + Returns ------- df : pandas.DataFrame A pandas.DataFrame with a new set of polygons and attributes resulting from the overlay. - + """ import geopandas as gpd diff --git a/libpysal/cg/ops/tests/test_shapely.py b/libpysal/cg/ops/tests/test_shapely.py index f116fb1ac..914a5d55d 100644 --- a/libpysal/cg/ops/tests/test_shapely.py +++ b/libpysal/cg/ops/tests/test_shapely.py @@ -123,21 +123,20 @@ def test_has_z(self): def test_is_empty(self): """ PySAL doesn't really support empty shapes. Like, the following errors out: - + ``` ps.cg.Polygon([[]]) ``` - + and you can make it work by: - + ``` ps.cg.Polygon([[()]]) ``` but that won't convert over to shapely. - So, we're only testing the negative here. - + """ for df in self.dframes: self.compare("is_empty", df) diff --git a/libpysal/cg/polygonQuadTreeStructure.py b/libpysal/cg/polygonQuadTreeStructure.py index 11ebb720d..276dfe965 100644 --- a/libpysal/cg/polygonQuadTreeStructure.py +++ b/libpysal/cg/polygonQuadTreeStructure.py @@ -130,7 +130,7 @@ def __init__(self, level, min_x, min_y, length_x, length_y, arcs, status): @property def rings(self): - """ the list of rings which are formed by the intersection of this cell and the arcs pass them + """the list of rings which are formed by the intersection of this cell and the arcs pass them Returns ------- diff --git a/libpysal/cg/rtree.py b/libpysal/cg/rtree.py index a1bcbf8ec..0f1ea5866 100644 --- a/libpysal/cg/rtree.py +++ b/libpysal/cg/rtree.py @@ -59,17 +59,17 @@ def coords(self) -> tuple: def overlap(self, orect): """Return the overlapping area of two rectangles. - + Parameters ---------- orect : libpysal.cg.Rect Another rectangle. - + Returns ------- overlapping_area : float The area of the overlap between ``orect`` and ``self``. - + """ overlapping_area = self.intersect(orect).area() @@ -108,7 +108,7 @@ def extent(self) -> tuple: def grow(self, amt=None, sf=0.5): """Grow the bounds of a rectangle. - + Parameters ---------- amt : float @@ -116,12 +116,12 @@ def grow(self, amt=None, sf=0.5): triggers the value of ``BUFFER``. sf : float The scale factor for ``amt``. Default is ``0.5``. - + Returns ------- rect : libpysal.cg.Rect A new rectangle grown by ``amt`` and scaled by ``sf``. - + """ if not amt: @@ -133,12 +133,12 @@ def grow(self, amt=None, sf=0.5): def intersect(self, o): """Find the intersection of two rectangles. - + Parameters ---------- o : libpysal.cg.Rect Another rectangle. - + Returns ------- intersection : {libpysal.cg.NullRect, libpysal.cg.Rect} @@ -168,17 +168,17 @@ def intersect(self, o): def does_contain(self, o): """Check whether the rectangle contains the other rectangle. - + Parameters ---------- o : libpysal.cg.Rect Another rectangle. - + Returns ------- dc : bool ``True`` if ``self`` contains ``o`` otherwise ``False``. - + """ dc = self.does_containpoint((o.x, o.y)) and self.does_containpoint((o.xx, o.yy)) @@ -187,17 +187,17 @@ def does_contain(self, o): def does_intersect(self, o): """Check whether the rectangles interect. - + Parameters ---------- o : libpysal.cg.Rect Another rectangle. - + Returns ------- dcp : bool ``True`` if ``self`` intersects ``o`` otherwise ``False``. - + """ di = self.intersect(o).area() > 0 @@ -206,17 +206,17 @@ def does_intersect(self, o): def does_containpoint(self, p): """Check whether the rectangle contains a point or not. - + Parameters ---------- p : libpysal.cg.Point A point. - + Returns ------- dcp : bool ``True`` if ``self`` contains ``p`` otherwise ``False``. - + """ x, y = p @@ -227,12 +227,12 @@ def does_containpoint(self, p): def union(self, o): """Union two rectangles. - + Parameters ---------- o : libpysal.cg.Rect Another rectangle. - + Returns ------- res : libpysal.cg.Rect @@ -265,12 +265,12 @@ def union(self, o): def union_point(self, o): """Union the rectangle and a point - + Parameters ---------- o : libpysal.cg.Point A point. - + Returns ------- res : libpysal.cg.Rect @@ -309,17 +309,17 @@ def diagonal(self) -> float: def union_all(kids): """Create union of all child rectangles. - + Parameters ---------- kids : list A list of ``libpysal.cg._NodeCursor`` objects. - + Returns ------- cur : {libpysal.cg.Rect, libpysal.cg.NullRect} The unioned result of all child rectangles. - + """ cur = NullRect @@ -337,7 +337,7 @@ def Rtree(): class RTree(object): """An RTree for efficiently querying space based on intersecting rectangles. - + Attributes ---------- count : int @@ -357,12 +357,12 @@ class RTree(object): The pool of leaf objects in the tree. cursor : libpysal.cg._NodeCursor The non-root node and all its children. - + Examples -------- - + Instantiate an ``RTree``. - + >>> from libpysal.cg import RTree, Chain >>> segments = [ ... [(0.0, 1.5), (1.5, 1.5)], @@ -374,10 +374,10 @@ class RTree(object): >>> rt = RTree() >>> for segment in segments: ... rt.insert(segment, Rect(*segment.bounding_box).grow(sf=10.)) - + Examine the tree generation statistics. The statistics here are all 0 due to the simple structure of the tree in this example. - + >>> rt.stats {'overflow_f': 0, 'avg_overflow_t_f': 0.0, @@ -386,51 +386,51 @@ class RTree(object): 'sum_kmeans_iter_f': 0, 'count_kmeans_iter_f': 0, 'avg_kmeans_iter_f': 0.0} - + Examine the number of nodes and leaves. There five nodes and four leaves (the root plus its four children). - + >>> rt.count, rt.leaf_count (5, 4) - + The pool of nodes are the node IDs in the tree. - + >>> rt.node_pool array('L', [0, 4, 0, 0, 1, 1, 2, 2, 3, 3]) - + The pool of leaves are the geometric objects that were inserted into the tree. - + >>> rt.leaf_pool[0].vertices [(0.0, 1.5), (1.5, 1.5)] - + The pool of rectangles are the bounds of partitioned space in the tree. Examine the first one. - + >>> rt.rect_pool[:4] array('d', [-2.220446049250313e-15, -2.220446049250313e-15, 3.000000000000002, 3.000000000000002]) - + Add the bounding box of a leaf to the tree manually. - + >>> rt.add(Chain(((2,2), (4,4))), (2,2,4,4)) >>> rt.count, rt.leaf_count (6, 5) - + Query the tree for an intersection. One object is contained in this query. - + >>> rt.intersection([.4, 2.1, .9, 2.6])[0].vertices [(0.5, 2), (1, 2.5)] - + Query the tree with a much larger box. All objects are contained in this query. - + >>> len(rt.intersection([-1, -1, 4, 4])) == rt.leaf_count True - + Query the tree with box outside the tree objects. No objects are contained in this query. - + >>> rt.intersection([5, 5, 6, 6]) [] - + """ def __init__(self): @@ -471,14 +471,14 @@ def _ensure_pool(self, idx: int): def insert(self, o, orect): """Insert an object and its bounding box into the tree. - + Parameters ---------- o : libpysal.cg.{Point, Chain, Rectangle, Polygon} The object to insert into the tree. orect : ibpysal.cg.Rect The object's bounding box. - + """ self.cursor.insert(o, orect) @@ -486,13 +486,13 @@ def insert(self, o, orect): def query_rect(self, r): """Query a rectangle. - + Parameters ---------- r : {tuple, libpysal.cg.Point} The bounding box of the rectangle in question; a :math:`(minx,miny,maxx,maxy)` set of coordinates. - + Yields ------ x : generator @@ -504,12 +504,12 @@ def query_rect(self, r): def query_point(self, p): """Query a point. - + Parameters ---------- p : {tuple, libpysal.cg.Point} The point in question; an :math:`(x,y)` coordinate. - + Yields ------ x : generator @@ -527,12 +527,12 @@ def walk(self, pred): def intersection(self, boundingbox): """Query for an intersection between leaves in the ``RTree`` and the bounding box of an object. - + Parameters ---------- boundingbox : list The bounding box: ``[minx, miny, maxx, maxy]``. - + Returns ------- objs : list @@ -557,7 +557,7 @@ def add(self, id, boundingbox): An object id. boundingbox : list The bounding box: ``[minx, miny, maxx, maxy]``. - + """ self.cursor.insert(id, Rect(*boundingbox)) @@ -566,7 +566,7 @@ def add(self, id, boundingbox): class _NodeCursor(object): """An internal class for keeping track of, and reorganizing, the structure and composition of the ``RTree``. - + Parameters ---------- rooto : libpysal.cg.{Point, Chain, Rectangle, Polygon} @@ -579,7 +579,7 @@ class _NodeCursor(object): The ID of the first child of the node. next_sibling : int The ID of the sibling of the node. - + Attributes ---------- root : libpysal.cg.RTree @@ -588,13 +588,13 @@ class _NodeCursor(object): See ``RTree.node_pool``. rpool : array.array See ``RTree.rect_pool``. - + """ @classmethod def create(cls, rooto, rect): """Create a node in the tree structure. - + Parameters ---------- rooto : libpysal.cg.{Point, Chain, Rectangle, Polygon} @@ -603,12 +603,12 @@ def create(cls, rooto, rect): The ID of the node. rect : libpysal.cg.Rect The bounding rectangle of the leaf object. - + Returns ------- retv : libpysal.cg._NodeCursor The generated node. - + """ idx = rooto.count @@ -625,19 +625,19 @@ def create(cls, rooto, rect): @classmethod def create_with_children(cls, children, rooto): """Create a non-leaf node in the tree structure. - + Parameters ---------- children : list The child nodes of the node to be generated rooto : libpysal.cg.{Point, Chain, Rectangle, Polygon} The object from which the node will be generated. - + Returns ------- nc : libpysal.cg._NodeCursor The generated node with children. - + """ rect = union_all([c for c in children]) nr = Rect(rect.x, rect.y, rect.xx, rect.yy) @@ -652,7 +652,7 @@ def create_with_children(cls, children, rooto): @classmethod def create_leaf(cls, rooto, leaf_obj, leaf_rect): """Create a leaf node in the tree structure. - + Parameters ---------- rooto : libpysal.cg.{Point, Chain, Rectangle, Polygon} @@ -661,12 +661,12 @@ def create_leaf(cls, rooto, leaf_obj, leaf_rect): The leaf object. leaf_rect : libpysal.cg.Rect The bounding rectangle of the leaf object. - + Returns ------- res : libpysal.cg._NodeCursor The generated leaf node. - + """ rect = Rect(leaf_rect.x, leaf_rect.y, leaf_rect.xx, leaf_rect.yy) @@ -758,14 +758,14 @@ def p(o, x): def lift(self): """Promote a node to (potentially) rearrange the tree structure for optimal clustering. - + Called from ``_NodeCursor._balance()``. - + Returns ------- lifted : libpysal.cg._NodeCursor The lifted node. - + """ lifted = _NodeCursor( @@ -814,12 +814,12 @@ def holds_leaves(self) -> bool: def get_first_child(self): """Get the first child of a node. - + Returns ------- c : libpysal.cg._NodeCursor The first child of the specified node. - + """ fc = self.first_child @@ -867,7 +867,7 @@ def nchildren(self) -> int: def insert(self, leafo, leafrect): """Insert a leaf object into the tree. See ``RTree.insert(o, orect)`` for parameter description. - + """ index = self.index @@ -921,7 +921,7 @@ def _balance(self): and ``silhouette_coeff()`` for (heuristically) optimal clusterings of nodes in the tree structure after the child count of a node has grown past the maximum allowed number (see ``MAXCHILDREN``). - + Called from ``_NodeCursor.insert()``. """ @@ -959,10 +959,10 @@ def _balance(self): def _set_children(self, cs: list): """Set up the (new/altered) leaf tree structure. - + Called from ``_NodeCursor.create_with_children()`` and ``_NodeCursor._balance()``. - + """ self.first_child = 0 @@ -983,14 +983,14 @@ def _set_children(self, cs: list): self._save_back() def _insert_child(self, c): - """Internal function for child node insertion. + """Internal function for child node insertion. Called from ``_NodeCursor.insert()``. - + Parameters ---------- c : libpysal.cg._NodeCursor A child ``libpysal.cg._NodeCursor`` object. - + """ c.next_sibling = self.first_child @@ -1027,19 +1027,19 @@ def children(self): def avg_diagonals(node, onodes): """Calculate the mean diagonals. - + Parameters ---------- node : libpysal.cg._NodeCursor The target node in question. onodes : ist A list of ``libpysal.cg._NodeCursor`` objects. - + Returns ------- diag_avg : float The mean diagonal distance of ``node`` and ``onodes``. - + """ nidx = node.index @@ -1068,7 +1068,7 @@ def avg_diagonals(node, onodes): def silhouette_w(node, cluster, next_closest_cluster): """Calculate a silhouette score between a certain node and 2 clusters: - + Parameters ---------- node : libpysal.cg._NodeCursor @@ -1077,13 +1077,13 @@ def silhouette_w(node, cluster, next_closest_cluster): A list of ``libpysal.cg._NodeCursor`` objects. next_closest_cluster : list Another list of ``libpysal.cg._NodeCursor`` objects. - + Returns ------- silw : float The silhouette score between ``{node, cluster}`` and ``{node, next_closest_cluster}``. - + """ ndist = avg_diagonals(node, cluster) @@ -1099,17 +1099,17 @@ def silhouette_coeff(clustering): the clusters are well defined, a score of ``0`` indicates the clusters are undefined, and a score of ``-1`` indicates the clusters are defined incorrectly. - + Parameters ---------- clustering : list A list of ``libpysal.cg._NodeCursor`` objects. - + Returns ------- silcoeff : float Score for how well defined the clusters are. - + """ # special case for a clustering of 1.0 @@ -1135,17 +1135,17 @@ def silhouette_coeff(clustering): def center_of_gravity(nodes): """Find the center of gravity of multiple nodes. - + Parameters ---------- nodes : list A list of ``libpysal.cg.RTree`` and ``libpysal.cg._NodeCursor`` objects. - + Returns ------- cog : float The center of gravity of multiple nodes. - + """ totarea = 0.0 @@ -1165,14 +1165,14 @@ def center_of_gravity(nodes): def closest(centroids, node): """Find the closest controid to the node's center of gravity. - + Parameters ---------- centroids : list A list of (x, y) coordinates for the center of other clusters. node : libpysal.cg_NodeCursor A ``libpysal.cg._NodeCursor`` instance. - + Returns ------- ridx : int @@ -1195,7 +1195,7 @@ def closest(centroids, node): def k_means_cluster(root, k, nodes): """Find ``k`` clusters. - + Parameters ---------- root : libpysal.cg.RTree @@ -1204,12 +1204,12 @@ def k_means_cluster(root, k, nodes): The number clusters to find. nodes : list A list of ``libpysal.cg.RTree`` and ``libpysal.cg._NodeCursor`` objects. - + Returns ------- clusters : list Updated versions of ``nodes`` defining new clusters. - + """ t = time.process_time() diff --git a/libpysal/cg/sphere.py b/libpysal/cg/sphere.py index 4be043a5f..805a76d90 100644 --- a/libpysal/cg/sphere.py +++ b/libpysal/cg/sphere.py @@ -67,13 +67,13 @@ def arcdist(pt0, pt1, radius=RADIUS_EARTH_KM): Examples -------- - + >>> pt0 = (0, 0) >>> pt1 = (180, 0) >>> d = arcdist(pt0, pt1, RADIUS_EARTH_MILES) >>> d == math.pi * RADIUS_EARTH_MILES True - + """ dist = linear2arcdist(euclidean(toXYZ(pt0), toXYZ(pt1)), radius) @@ -84,7 +84,7 @@ def arcdist(pt0, pt1, radius=RADIUS_EARTH_KM): def arcdist2linear(arc_dist, radius=RADIUS_EARTH_KM): """Convert an arc distance (spherical earth) to a linear distance (R3) in the unit sphere. - + Parameters ---------- arc_dist : float @@ -95,24 +95,24 @@ def arcdist2linear(arc_dist, radius=RADIUS_EARTH_KM): radius in miles, ``RADIUS_EARTH_MILES`` (``3958.76``) is also an option. Source: http://nssdc.gsfc.nasa.gov/planetary/factsheet/earthfact.html - + Returns ------- linear_dist : float The linear distance conversion of ``arc_dist``. - + Examples -------- - + >>> pt0 = (0, 0) >>> pt1 = (180, 0) >>> d = arcdist(pt0, pt1, RADIUS_EARTH_MILES) >>> d == math.pi * RADIUS_EARTH_MILES True - + >>> arcdist2linear(d, RADIUS_EARTH_MILES) 2.0 - + """ circumference = 2 * math.pi * radius @@ -126,7 +126,7 @@ def arcdist2linear(arc_dist, radius=RADIUS_EARTH_KM): def linear2arcdist(linear_dist, radius=RADIUS_EARTH_KM): """Convert a linear distance in the unit sphere (R3) to an arc distance based on supplied radius. - + Parameters ---------- linear_dist : float @@ -137,26 +137,26 @@ def linear2arcdist(linear_dist, radius=RADIUS_EARTH_KM): radius in miles, ``RADIUS_EARTH_MILES`` (``3958.76``) is also an option. Source: http://nssdc.gsfc.nasa.gov/planetary/factsheet/earthfact.html - + Returns ------- arc_dist : float The arc distance conversion of ``linear_dist``. - + Raises ------ ValueError Raised when ``linear_dist`` exceeds the diameter of the unit sphere. - + Examples -------- - + >>> pt0 = (0, 0) >>> pt1 = (180, 0) >>> d = arcdist(pt0, pt1, RADIUS_EARTH_MILES) >>> d == linear2arcdist(2.0, radius=RADIUS_EARTH_MILES) True - + """ if linear_dist == float("inf"): @@ -175,7 +175,7 @@ def linear2arcdist(linear_dist, radius=RADIUS_EARTH_KM): def toXYZ(pt): """Convert a point's latitude and longitude to x,y,z. - + Parameters ---------- pt : tuple @@ -185,7 +185,7 @@ def toXYZ(pt): ------- x, y, z : tuple A point in form (x, y, z). - + """ phi, theta = list(map(math.radians, pt)) @@ -199,17 +199,17 @@ def toXYZ(pt): def toLngLat(xyz): """Convert a point's x,y,z to latitude and longitude. - + Parameters ---------- xyz : tuple A point assumed to be in form (x,y,z). - + Returns ------- phi, theta : tuple A point in form (phi, theta) [y,x]. - + """ x, y, z = xyz @@ -228,7 +228,7 @@ def toLngLat(xyz): def brute_knn(pts, k, mode="arc", radius=RADIUS_EARTH_KM): """Computes a brute-force :math:`k` nearest neighbors. - + Parameters ---------- pts : list @@ -244,12 +244,12 @@ def brute_knn(pts, k, mode="arc", radius=RADIUS_EARTH_KM): radius in miles, ``RADIUS_EARTH_MILES`` (``3958.76``) is also an option. Source: http://nssdc.gsfc.nasa.gov/planetary/factsheet/earthfact.html - + Returns ------- w : dict A neighbor ID lookup. - + """ n = len(pts) @@ -372,7 +372,7 @@ def lonlat(pointslist): Examples -------- - + >>> points = [ ... (41.981417, -87.893517), (41.980396, -87.776787), (41.980906, -87.696450) ... ] @@ -402,7 +402,7 @@ def haversine(x): Examples -------- - + >>> haversine(math.pi) # is 180 in radians, hence sin of 90 = 1 1.0 @@ -441,7 +441,7 @@ def radangle(p0, p1): Examples -------- - + >>> p0 = (-87.893517, 41.981417) >>> p1 = (-87.519295, 41.657498) >>> radangle(p0, p1) @@ -449,7 +449,7 @@ def radangle(p0, p1): Notes ----- - + Uses haversine formula, function haversine and degree to radian conversion lambda function ``d2r``. @@ -491,18 +491,18 @@ def harcdist(p0, p1, lonx=True, radius=RADIUS_EARTH_KM): Examples -------- - + >>> p0 = (-87.893517, 41.981417) >>> p1 = (-87.519295, 41.657498) >>> harcdist(p0, p1) 47.52873002976876 - + >>> harcdist(p0, p1, radius=None) 0.007460167953189258 Notes ----- - + Uses the ``radangle`` function to compute radian angle. """ @@ -546,12 +546,12 @@ def geointerpolate(p0, p1, t, lonx=True): Examples -------- - + >>> p0 = (-87.893517, 41.981417) >>> p1 = (-87.519295, 41.657498) >>> geointerpolate(p0, p1, 0.1) # using lon-lat (-87.85592403438788, 41.949079912574796) - + >>> p3 = (41.981417, -87.893517) >>> p4 = (41.657498, -87.519295) >>> geointerpolate(p3, p4, 0.1, lonx=False) # using lat-lon @@ -612,7 +612,7 @@ def geogrid(pup, pdown, k, lonx=True): Examples -------- - + >>> pup = (42.023768, -87.946389) # Arlington Heights, IL >>> pdown = (41.644415, -87.524102) # Hammond, IN >>> geogrid(pup,pdown, 3, lonx=False) diff --git a/libpysal/cg/standalone.py b/libpysal/cg/standalone.py index bff681919..badf84a55 100644 --- a/libpysal/cg/standalone.py +++ b/libpysal/cg/standalone.py @@ -51,7 +51,7 @@ def bbcommon(bb, bbother): A bounding box. bbother : list The bounding box to test against. - + Returns ------- chflag : int @@ -64,7 +64,7 @@ def bbcommon(bb, bbother): >>> b1 = [10, 0, 20, 10] >>> bbcommon(b0, b1) 1 - + """ chflag = 0 @@ -91,20 +91,20 @@ def get_bounding_box(items): Examples -------- - + >>> bb = get_bounding_box([Point((-1, 5)), Rectangle(0, 6, 11, 12)]) >>> bb.left -1.0 - + >>> bb.lower 5.0 - + >>> bb.right 11.0 - + >>> bb.upper 12.0 - + """ def left(o): @@ -170,26 +170,26 @@ def get_angle_between(ray1, ray2): A ray forming the beginning of the angle measured. ray2 : libpysal.cg.Ray A ray forming the end of the angle measured. - + Returns ------- angle : float The angle between ``ray1`` and ``ray2``. - + Raises ------ ValueError Raised when rays do not have the same origin. - + Examples -------- - + >>> get_angle_between( ... Ray(Point((0, 0)), Point((1, 0))), ... Ray(Point((0, 0)), Point((1, 0))) ... ) 0.0 - + """ if ray1.o != ray2.o: @@ -233,13 +233,13 @@ def is_collinear(p1, p2, p3): Examples -------- - + >>> is_collinear(Point((0, 0)), Point((1, 1)), Point((5, 5))) True - + >>> is_collinear(Point((0, 0)), Point((1, 1)), Point((5, 0))) False - + """ eps = np.finfo(type(p1[0])).eps @@ -274,19 +274,19 @@ def get_segments_intersect(seg1, seg2): Examples -------- - + >>> seg1 = LineSegment(Point((0, 0)), Point((0, 10))) >>> seg2 = LineSegment(Point((-5, 5)), Point((5, 5))) >>> i = get_segments_intersect(seg1, seg2) >>> isinstance(i, Point) True - + >>> str(i) '(0.0, 5.0)' - + >>> seg3 = LineSegment(Point((100, 100)), Point((100, 101))) >>> i = get_segments_intersect(seg2, seg3) - + """ p1 = seg1.p1 @@ -353,16 +353,16 @@ def get_segment_point_intersect(seg, pt): Examples -------- - + >>> seg = LineSegment(Point((0, 0)), Point((0, 10))) >>> pt = Point((0, 5)) >>> i = get_segment_point_intersect(seg, pt) >>> str(i) '(0.0, 5.0)' - + >>> pt2 = Point((5, 5)) >>> get_segment_point_intersect(seg, pt2) - + """ eps = np.finfo(type(pt[0])).eps @@ -401,16 +401,16 @@ def get_polygon_point_intersect(poly, pt): Examples -------- - + >>> poly = Polygon([Point((0, 0)), Point((1, 0)), Point((1, 1)), Point((0, 1))]) >>> pt = Point((0.5, 0.5)) >>> i = get_polygon_point_intersect(poly, pt) >>> str(i) '(0.5, 0.5)' - + >>> pt2 = Point((2, 2)) >>> get_polygon_point_intersect(poly, pt2) - + """ def pt_lies_on_part_boundary(p, vx): @@ -458,16 +458,16 @@ def get_rectangle_point_intersect(rect, pt): Examples -------- - + >>> rect = Rectangle(0, 0, 5, 5) >>> pt = Point((1, 1)) >>> i = get_rectangle_point_intersect(rect, pt) >>> str(i) '(1.0, 1.0)' - + >>> pt2 = Point((10, 10)) >>> get_rectangle_point_intersect(rect, pt2) - + """ if rect.left <= pt[0] <= rect.right and rect.lower <= pt[1] <= rect.upper: @@ -494,28 +494,28 @@ def get_ray_segment_intersect(ray, seg): The intersecting point or line between ``ray`` and ``seg`` if an intersection exists or ``None`` if ``ray`` and ``seg`` do not intersect. - + See Also -------- - + libpysal.cg.get_segments_intersect - - + + Examples -------- - + >>> ray = Ray(Point((0, 0)), Point((0, 1))) >>> seg = LineSegment(Point((-1, 10)), Point((1, 10))) >>> i = get_ray_segment_intersect(ray, seg) >>> isinstance(i, Point) True - + >>> str(i) '(0.0, 10.0)' - + >>> seg2 = LineSegment(Point((10, 10)), Point((10, 11))) >>> get_ray_segment_intersect(ray, seg2) - + """ # Upper bound on origin to segment dist (+1) @@ -554,42 +554,42 @@ def get_rectangle_rectangle_intersection(r0, r1, checkOverlap=True): checkOverlap : bool Call ``bbcommon(r0, r1)`` prior to complex geometry checking. Default is ``True``. Prior to setting as - ``False`` see the Notes section. - + ``False`` see the Notes section. + Returns ------- intersection : {libpysal.cg.Point, libpysal.cg.LineSegment, libpysal.cg.Rectangle, None} - The intersecting point, line, or rectangle between + The intersecting point, line, or rectangle between `r0`` and ``r1`` if an intersection exists or ``None`` if ``r0`` and ``r1`` do not intersect. Notes ----- - + The algorithm assumes the rectangles overlap. The keyword ``checkOverlap=False`` should be used with extreme caution. Examples -------- - + >>> r0 = Rectangle(0,4,6,9) >>> r1 = Rectangle(4,0,9,7) >>> ri = get_rectangle_rectangle_intersection(r0,r1) >>> ri[:] [4.0, 4.0, 6.0, 7.0] - + >>> r0 = Rectangle(0,0,4,4) >>> r1 = Rectangle(2,1,6,3) >>> ri = get_rectangle_rectangle_intersection(r0,r1) >>> ri[:] [2.0, 1.0, 4.0, 3.0] - + >>> r0 = Rectangle(0,0,4,4) >>> r1 = Rectangle(2,1,3,2) >>> ri = get_rectangle_rectangle_intersection(r0,r1) >>> ri[:] == r1[:] True - + """ intersection = None @@ -625,7 +625,7 @@ def get_polygon_point_dist(poly, pt): ---------- poly : libpysal.cg.Polygon A polygon to compute distance from. - + pt : libpysal.cg.Point a point to compute distance from @@ -633,19 +633,19 @@ def get_polygon_point_dist(poly, pt): ------- dist : float The distance between ``poly`` and ``point``. - + Examples -------- - + >>> poly = Polygon([Point((0, 0)), Point((1, 0)), Point((1, 1)), Point((0, 1))]) >>> pt = Point((2, 0.5)) >>> get_polygon_point_dist(poly, pt) 1.0 - + >>> pt2 = Point((0.5, 0.5)) >>> get_polygon_point_dist(poly, pt2) 0.0 - + """ if get_polygon_point_intersect(poly, pt) is not None: @@ -669,24 +669,24 @@ def get_points_dist(pt1, pt2): ---------- pt1 : libpysal.cg.Point A point. - + pt2 : libpysal.cg.Point The other point. - + Returns ------- dist : float The distance between ``pt1`` and ``pt2``. - + Examples -------- - + >>> get_points_dist(Point((4, 4)), Point((4, 8))) 4.0 - + >>> get_points_dist(Point((0, 0)), Point((0, 0))) 0.0 - + """ dist = math.hypot(pt1[0] - pt2[0], pt1[1] - pt2[1]) @@ -705,7 +705,7 @@ def get_segment_point_dist(seg, pt): A line segment to compute distance from. pt : libpysal.cg.Point A point to compute distance from. - + Returns ------- dist : float @@ -716,16 +716,16 @@ def get_segment_point_dist(seg, pt): Examples -------- - + >>> seg = LineSegment(Point((0, 0)), Point((10, 0))) >>> pt = Point((5, 5)) >>> get_segment_point_dist(seg, pt) (5.0, 0.5) - + >>> pt2 = Point((0, 0)) >>> get_segment_point_dist(seg, pt2) (0.0, 0.0) - + """ src_p = seg.p1 @@ -782,7 +782,7 @@ def get_point_at_angle_and_dist(ray, angle, dist): The angle relative to ``ray`` at which ``point`` is located. dist : float The distance from the origin of ``ray`` at which ``point`` is located. - + Returns ------- point : libpysal.cg.Point @@ -790,18 +790,18 @@ def get_point_at_angle_and_dist(ray, angle, dist): Examples -------- - + >>> ray = Ray(Point((0, 0)), Point((1, 0))) >>> pt = get_point_at_angle_and_dist(ray, math.pi, 1.0) >>> isinstance(pt, Point) True - + >>> round(pt[0], 8) -1.0 - + >>> round(pt[1], 8) 0.0 - + """ v = (ray.p[0] - ray.o[0], ray.p[1] - ray.o[1]) @@ -817,7 +817,7 @@ def get_point_at_angle_and_dist(ray, angle, dist): def convex_hull(points): """Returns the convex hull of a set of points. - + Parameters ---------- points : list @@ -827,14 +827,14 @@ def convex_hull(points): ------- stack : list A list of points representing the convex hull. - + Examples -------- - + >>> points = [Point((0, 0)), Point((4, 4)), Point((4, 0)), Point((3, 1))] >>> convex_hull(points) [(0.0, 0.0), (4.0, 0.0), (4.0, 4.0)] - + """ def right_turn(p1, p2, p3) -> bool: @@ -863,7 +863,7 @@ def right_turn(p1, p2, p3) -> bool: def is_clockwise(vertices): """Returns whether a list of points describing a polygon are clockwise or counterclockwise. - + Parameters ---------- vertices : list @@ -873,21 +873,21 @@ def is_clockwise(vertices): ------- clockwise : bool ``True`` if ``vertices`` are clockwise, otherwise ``False``. - + See Also -------- - + libpysal.cg.ccw - + Examples -------- - + >>> is_clockwise([Point((0, 0)), Point((10, 0)), Point((0, 10))]) False - + >>> is_clockwise([Point((0, 0)), Point((0, 10)), Point((10, 0))]) True - + >>> v = [ ... (-106.57798, 35.174143999999998), ... (-106.583412, 35.174141999999996), @@ -950,7 +950,7 @@ def is_clockwise(vertices): ... ] >>> is_clockwise(v) True - + """ clockwise = True @@ -971,7 +971,7 @@ def is_clockwise(vertices): def ccw(vertices): """Returns whether a list of points is counterclockwise. - + Parameters ---------- vertices : list @@ -981,21 +981,21 @@ def ccw(vertices): ------- counter_clockwise : bool ``True`` if ``vertices`` are counter clockwise, otherwise ``False``. - + See Also -------- - + libpysal.cg.is_clockwise - + Examples -------- - + >>> ccw([Point((0, 0)), Point((10, 0)), Point((0, 10))]) True - + >>> ccw([Point((0, 0)), Point((0, 10)), Point((10, 0))]) False - + """ counter_clockwise = True @@ -1008,7 +1008,7 @@ def ccw(vertices): def seg_intersect(a, b, c, d): """Tests if two segments (a,b) and (c,d) intersect. - + Parameters ---------- a : libpysal.cg.Point @@ -1019,15 +1019,15 @@ def seg_intersect(a, b, c, d): The first vertex for the second segment. d : libpysal.cg.Point The second vertex for the second segment. - + Returns ------- segments_intersect : bool ``True`` if segments ``(a,b)`` and ``(c,d)``, otherwise ``False``. - + Examples -------- - + >>> a = Point((0,1)) >>> b = Point((0,10)) >>> c = Point((-2,5)) @@ -1035,10 +1035,10 @@ def seg_intersect(a, b, c, d): >>> e = Point((-3,5)) >>> seg_intersect(a, b, c, d) True - + >>> seg_intersect(a, b, c, e) False - + """ segments_intersect = True @@ -1063,7 +1063,7 @@ def _point_in_vertices(pt, vertices): A point. vertices : list A list of vertices representing as polygon. - + Returns ------- pt_in_poly : bool @@ -1071,13 +1071,13 @@ def _point_in_vertices(pt, vertices): Examples -------- - + >>> _point_in_vertices( ... Point((1, 1)), ... [Point((0, 0)), Point((10, 0)), Point((0, 10))] ... ) True - + """ def neg_ray_intersect(p1, p2, p3) -> bool: @@ -1126,29 +1126,29 @@ def point_touches_rectangle(point, rect): A point or point coordinates. rect : libpysal.cg.Rectangle A rectangle. - + Returns ------- chflag : int ``1`` if ``point`` is in (or touches boundary of) ``rect``, otherwise ``0``. - + Examples -------- - + >>> rect = Rectangle(0, 0, 10, 10) >>> a = Point((5, 5)) >>> b = Point((10, 5)) >>> c = Point((11, 11)) >>> point_touches_rectangle(a, rect) 1 - + >>> point_touches_rectangle(b, rect) 1 - + >>> point_touches_rectangle(c, rect) 0 - + """ chflag = 0 @@ -1170,7 +1170,7 @@ def get_shared_segments(poly1, poly2, bool_ret=False): A Polygon. bool_ret : bool Return only a ``bool``. Default is ``False``. - + Returns ------- common : list @@ -1181,7 +1181,7 @@ def get_shared_segments(poly1, poly2, bool_ret=False): Examples -------- - + >>> from libpysal.cg.shapes import Polygon >>> x = [0, 0, 1, 1] >>> y = [0, 1, 1, 0] @@ -1279,20 +1279,20 @@ def distance_matrix(X, p=2.0, threshold=5e7): ------- D : numpy.ndarray An n by :math:`m` :math:`p`-norm distance matrix. - + Raises ------ TypeError Raised when an invalid dimensional array is passed in. - + Notes ----- - + Needs optimization/integration with other weights in PySAL. - + Examples -------- - + >>> x, y = [r.flatten() for r in np.indices((3, 3))] >>> data = np.array([x, y]).T >>> d = distance_matrix(data) @@ -1315,7 +1315,7 @@ def distance_matrix(X, p=2.0, threshold=5e7): 1.41421356, 1. , 0. , 1. ], [2.82842712, 2.23606798, 2. , 2.23606798, 1.41421356, 1. , 2. , 1. , 0. ]]) - + """ if X.ndim == 1: diff --git a/libpysal/cg/tests/test_geoJSON.py b/libpysal/cg/tests/test_geoJSON.py index f952f592b..4f255bb87 100644 --- a/libpysal/cg/tests/test_geoJSON.py +++ b/libpysal/cg/tests/test_geoJSON.py @@ -7,9 +7,8 @@ class test_MultiPloygon(unittest.TestCase): def test___init__1(self): - """Tests conversion of polygons with multiple shells to - geoJSON multipolygons and back. - + """Tests conversion of polygons with multiple + shells to geoJSON multipolygons and back. """ ncovr = pysal_examples.load_example("NCOVR") diff --git a/libpysal/cg/tests/test_locators.py b/libpysal/cg/tests/test_locators.py index 30aa4318c..b9574bd75 100644 --- a/libpysal/cg/tests/test_locators.py +++ b/libpysal/cg/tests/test_locators.py @@ -1,10 +1,13 @@ """locators Unittest.""" + from ..shapes import * from ..locators import * import unittest class PolygonLocator_Tester(unittest.TestCase): + """Setup class for unit tests.""" + def setUp(self): p1 = Polygon([Point((0, 1)), Point((4, 5)), Point((5, 1))]) p2 = Polygon([Point((3, 9)), Point((6, 7)), Point((1, 1))]) diff --git a/libpysal/cg/tests/test_polygonQuadTreeStructure.py b/libpysal/cg/tests/test_polygonQuadTreeStructure.py index 27528a831..49fc6afb7 100644 --- a/libpysal/cg/tests/test_polygonQuadTreeStructure.py +++ b/libpysal/cg/tests/test_polygonQuadTreeStructure.py @@ -1,4 +1,5 @@ """locators Unittest.""" + from ..polygonQuadTreeStructure import QuadTreeStructureSingleRing from ..shapes import Ring import unittest @@ -8,7 +9,7 @@ class TestQuadTreeStructureSingleRing(unittest.TestCase): def test_QuadTreeStructureSingleRing(self): """Tests if the class could successfully determine if a point is inside of a polygon. - + """ ring_texas = Ring( diff --git a/libpysal/cg/tests/test_rtree.py b/libpysal/cg/tests/test_rtree.py index caee13a1a..cb4df5c05 100644 --- a/libpysal/cg/tests/test_rtree.py +++ b/libpysal/cg/tests/test_rtree.py @@ -1,9 +1,12 @@ """pyrtree Unittest.""" + from ..rtree import RTree, Rect import unittest class Pyrtree_Tester(unittest.TestCase): + """Setup class for unit tests.""" + def setUp(self): k = 10 w = 20 diff --git a/libpysal/cg/tests/test_segmentLocator.py b/libpysal/cg/tests/test_segmentLocator.py index d2ce4e00a..b8f7800f4 100644 --- a/libpysal/cg/tests/test_segmentLocator.py +++ b/libpysal/cg/tests/test_segmentLocator.py @@ -1,10 +1,13 @@ """Segment Locator Unittest.""" + from ..shapes import * from ..segmentLocator import * import unittest class SegmentGrid_Tester(unittest.TestCase): + """Setup class for unit tests.""" + def setUp(self): # 10x10 grid with four line segments, one for each edge of the grid. self.grid = SegmentGrid(Rectangle(0, 0, 10, 10), 1) diff --git a/libpysal/cg/tests/test_shapes.py b/libpysal/cg/tests/test_shapes.py index 37530178d..e570a99a3 100644 --- a/libpysal/cg/tests/test_shapes.py +++ b/libpysal/cg/tests/test_shapes.py @@ -316,7 +316,7 @@ def test_bounding_box1(self): def test_len1(self): """Test correctness with multiple parts and zero-length point-to-point distances. - + """ vertices = [ diff --git a/libpysal/cg/tests/test_standalone.py b/libpysal/cg/tests/test_standalone.py index 622620338..a5a83ded1 100644 --- a/libpysal/cg/tests/test_standalone.py +++ b/libpysal/cg/tests/test_standalone.py @@ -90,7 +90,10 @@ def test_is_collinear_AlongY(self): def test_is_collinear_smallFloat(self): """ - Given: p1 = (0.1, 0.2), p2 = (0.2, 0.3), p3 = (0.3, 0.4) + Given: + + ``` + p1 = (0.1, 0.2), p2 = (0.2, 0.3), p3 = (0.3, 0.4) Line(p1, p2): y = mx + b m = (0.3-0.2) / (0.2-0.1) = .1/.1 = 1 @@ -108,27 +111,31 @@ def test_is_collinear_smallFloat(self): y = 1*x + 0.1 - Line(p1, p2) == Line(p2 ,p3) - - Therefore ``p1, p2, p3`` are collinear. + Line(p1,p2) == Line(p2,p3) + ``` + Therefore ``(p1, p2, p3)`` are collinear. Due to floating point rounding areas the standard test, - - ((p2[0]-p1[0])*(p3[1]-p1[1]) - (p2[1]-p1[1])*(p3[0]-p1[0])) == 0 - - will fail. To get around this we use an epsilon. The ``numpy.finfo`` - function return an smallest epsilon for the given data types such that, - - (numpy.finfo(float).eps + 1.0) != 1.0 + + ``` + ((p2[0]-p1[0])*(p3[1]-p1[1]) - (p2[1]-p1[1])*(p3[0]-p1[0])) == 0 + ``` + + will fail. To get around this we use an epsilon. numpy.finfo function + return an smallest epsilon for the given data types such that, + + ``` + (numpy.finfo(float).eps + 1.0) != 1.0 + ``` Therefore if - - abs( - (p2[0]-p1[0]) * (p3[1]-p1[1]) - (p2[1]-p1[1]) * (p3[0]-p1[0]) - ) < numpy.finfo(p1[0]).eps - + + ``` + abs((p2[0]-p1[0])*(p3[1]-p1[1]) - (p2[1]-p1[1])*(p3[0]-p1[0])) < numpy.finfo(p1[0]).eps + ``` + The points are collinear. - + """ self.assertEqual( diff --git a/libpysal/cg/voronoi.py b/libpysal/cg/voronoi.py index e283a3b25..20be01228 100644 --- a/libpysal/cg/voronoi.py +++ b/libpysal/cg/voronoi.py @@ -31,15 +31,15 @@ def voronoi(points, radius=None): the list contains the sequence of the indices of Voronoi vertices composing a Voronoi polygon (region), whereas the array contains the Voronoi vertex coordinates. - + Examples -------- - + >>> points = [(10.2, 5.1), (4.7, 2.2), (5.3, 5.7), (2.7, 5.3)] >>> regions, coordinates = voronoi(points) >>> regions [[1, 3, 2], [4, 5, 1, 0], [0, 1, 7, 6], [9, 0, 8]] - + >>> coordinates array([[ 4.21783296, 4.08408578], [ 7.51956025, 3.51807539], @@ -51,7 +51,7 @@ def voronoi(points, radius=None): [ 9.4642193 , 19.3994576 ], [ 1.78491801, 19.89803294], [ -9.22691341, -4.58994414]]) - + """ vor = voronoi_regions(Voronoi(points), radius=radius) @@ -68,13 +68,13 @@ def voronoi_regions(vor, radius=None): A planar Voronoi diagram. radius : float (optional) Distance to 'points at infinity'. Default is ``None.`` - + Returns ------- regions_vertices : tuple A two-element tuple consisting of a list of finite voronoi regions and an array Voronoi vertex coordinates. - + """ new_regions = [] @@ -148,14 +148,14 @@ def as_dataframes(regions, vertices, points): Finite Voronoi polygons as geometries. points_df : geopandas.GeoDataFrame Originator points as geometries. - + Raises ------ ImportError Raised when ``geopandas`` is not available. ImportError Raised when ``shapely`` is not available. - + """ try: @@ -201,7 +201,7 @@ def voronoi_frames(points, radius=None, clip="extent"): clip : {str, shapely.geometry.Polygon} An overloaded option about how to clip the voronoi cells. Default is ``'extent'``. Options are as follows. - + * ``'none'``/``None`` -- No clip is applied. Voronoi cells may be arbitrarily larger that the source map. Note that this may lead to cells that are many orders of magnitude larger in extent than the original map. Not recommended. * ``'bbox'``/``'extent'``/``'bounding box'`` -- Clip the voronoi cells to the bounding box of the input points. * ``'chull``/``'convex hull'`` -- Clip the voronoi cells to the convex hull of the input points. @@ -212,7 +212,7 @@ def voronoi_frames(points, radius=None, clip="extent"): ------- reg_vtx : tuple Two ``geopandas.GeoDataFrame`` (or ``pandas.DataFrame`` if ``geopandas`` - is unavailable) objects--``(region_df, points_df)``--of finite + is unavailable) objects--``(region_df, points_df)``--of finite Voronoi polygons and the originator points as geometries. Notes @@ -226,12 +226,12 @@ def voronoi_frames(points, radius=None, clip="extent"): Examples -------- - + >>> points = [(10.2, 5.1), (4.7, 2.2), (5.3, 5.7), (2.7, 5.3)] >>> regions_df, points_df = voronoi_frames(points) >>> regions_df.shape (4, 1) - + >>> regions_df.shape == points_df.shape True @@ -249,7 +249,7 @@ def voronoi_frames(points, radius=None, clip="extent"): def clip_voronoi_frames_to_extent(regions, vertices, clip="extent"): """Generate a geopandas.GeoDataFrame of Voronoi cells clipped to a specified extent. - + Parameters ---------- regions : geopandas.GeoDataFrame @@ -271,12 +271,12 @@ def clip_voronoi_frames_to_extent(regions, vertices, clip="extent"): contains all points (e.g. the smallest alphashape, using ``libpysal.cg.alpha_shape_auto``). - Polygon: Clip to an arbitrary Polygon. - + Returns ------- clipped_regions : geopandas.GeoDataFrame A ``geopandas.GeoDataFrame`` of clipped voronoi regions. - + Raises ------ ImportError @@ -285,7 +285,7 @@ def clip_voronoi_frames_to_extent(regions, vertices, clip="extent"): Raised when ``geopandas`` is not available. ValueError Raised when in invalid value for ``clip`` is passed in. - + """ try: from shapely.geometry import Polygon diff --git a/libpysal/common.py b/libpysal/common.py index 76b7ff3c2..16f1265e6 100644 --- a/libpysal/common.py +++ b/libpysal/common.py @@ -1,3 +1,6 @@ +"""Function used throughout the PySAL ecosystem. +""" + import copy import sys import time @@ -69,16 +72,16 @@ def simport(modname): ---------- modname : str Module name needed to import. - + Returns ------- _simport : tuple - Either (True, Module) or (False, None) depending + Either (``True``, ````) or (``False``, None) depending on whether the import succeeded. Notes ----- - + Wrapping this function around an iterative context or a with context would allow the module to be used without necessarily attaching it permanently in the global namespace: @@ -99,7 +102,7 @@ def simport(modname): #do alternative behavior here The first idiom makes it work kind of a like a with statement. - + """ try: @@ -111,7 +114,6 @@ def simport(modname): return _simport - def requires(*args, **kwargs): """Decorator to wrap functions with extra dependencies. @@ -121,14 +123,14 @@ def requires(*args, **kwargs): Modules names as strings to import. verbose : bool Set as ``True`` to print a warning message on import failure. - + Returns ------- inner : func The original function if all arg in args are importable. passer : func A function that passes if ``inner`` fails. - + """ v = kwargs.pop("verbose", True) diff --git a/libpysal/examples/__init__.py b/libpysal/examples/__init__.py index 46c9246b2..6b15f2d56 100644 --- a/libpysal/examples/__init__.py +++ b/libpysal/examples/__init__.py @@ -17,8 +17,15 @@ available_datasets = builtin_datasets.copy() available_datasets.update(remote_datasets.datasets) -__all__ = ["get_path", "available", "explain", "fetch_all", - "get_url", "load_example", "summary"] +__all__ = [ + "get_path", + "available", + "explain", + "fetch_all", + "get_url", + "load_example", + "summary", +] example_manager.add_examples(available_datasets) diff --git a/libpysal/examples/base.py b/libpysal/examples/base.py index bd15b78d3..36ef7d873 100644 --- a/libpysal/examples/base.py +++ b/libpysal/examples/base.py @@ -19,7 +19,6 @@ from typing import Union - def get_data_home(): """Return the path of the ``libpysal`` data directory. This folder is platform specific. If the folder does not already exist, it is automatically created. @@ -52,11 +51,6 @@ def get_list_of_files(dir_name): all_files : list All file and directory paths. - Raises - ------ - FileNotFoundError - If the file or directory is not found. - """ # names in the given directory @@ -87,7 +81,7 @@ def type_of_script() -> str: return "jupyter" if "terminal" in ipy_str: return "ipython" - except: + except NameError: return "terminal" @@ -112,7 +106,7 @@ class Example: Attributes ---------- root : str - The ``name`` parameter with filled spaces (_). + The ``name`` parameter with filled underscores (``'_`'`). installed : bool ``True`` if the example is installed, otherwise ``False``. zipfile : zipfile.ZipFile @@ -120,8 +114,7 @@ class Example: """ - def __init__(self, name, description, n, k, download_url, - explain_url): + def __init__(self, name, description, n, k, download_url, explain_url): """Initialze Example.""" self.name = name self.description = description @@ -139,12 +132,14 @@ def get_local_path(self, path=get_data_home()) -> str: def get_path(self, file_name, verbose=True) -> Union[str, None]: """Get the path for local file.""" file_list = self.get_file_list() + for file_path in file_list: base_name = os.path.basename(file_path) if file_name == base_name: return file_path + if verbose: - print(f'{file_name} is not a file in this example.') + print(f"{file_name} is not a file in this example.") return None def downloaded(self) -> bool: @@ -153,20 +148,24 @@ def downloaded(self) -> bool: if os.path.isdir(path): self.installed = True return True + return False def explain(self) -> None: """Provide a description of the example.""" file_name = self.explain_url.split("/")[-1] + if file_name == "README.md": explain_page = requests.get(self.explain_url) crawled = BeautifulSoup(explain_page.text, "html.parser") print(crawled.text) return None + if type_of_script() == "terminal": webbrowser.open(self.explain_url) return None + from IPython.display import IFrame return IFrame(self.explain_url, width=700, height=350) @@ -183,30 +182,36 @@ def download(self, path=get_data_home()): archive.extractall(path=target) self.zipfile = archive self.installed = True - except requests.exceptions.RequestException as e: + except requests.exceptions.RequestException as e: raise SystemExit(e) - def get_file_list(self) -> Union[list, None]: """Get the list of local files for the example.""" + path = self.get_local_path() + if os.path.isdir(path): return get_list_of_files(path) + return None def json_dict(self) -> dict: """Container for example meta data.""" + meta = {} meta["name"] = self.name meta["description"] = self.description meta["download_url"] = self.download_url meta["explain_url"] = self.explain_url meta["root"] = self.root + return meta def load(self, file_name) -> io.FileIO: """Dispatch to libpysal.io to open file.""" + pth = self.get_path(file_name) + if pth: return ps_open(pth) @@ -233,31 +238,37 @@ def available(self): names = list(datasets.keys()) names.sort() rows = [] + for name in names: description = datasets[name].description installed = datasets[name].installed rows.append([name, description, installed]) + datasets = pandas.DataFrame( data=rows, columns=["Name", "Description", "Installed"] ) + datasets.style.set_properties(subset=["text"], **{"width": "300px"}) return datasets def load(self, example_name: str) -> Example: """Load example dataset, download if not locally available.""" + if example_name in self.datasets: example = self.datasets[example_name] + if example.installed: return example else: example.download() return example else: - print(f'Example not available: {example_name}') + print(f"Example not available: {example_name}") return None def download_remotes(self): """Download all remotes.""" + names = list(self.remotes.keys()) names.sort() @@ -267,22 +278,23 @@ def download_remotes(self): try: example.download() except: - print(f'Example not downloaded: {name}.') + print(f"Example not downloaded: {name}.") def get_installed_names(self) -> list: """Return names of all currently installed datasets.""" + ds = self.datasets + return [name for name in ds if ds[name].installed] def get_remote_url(self, name): - if name in self.datasets: + if name in self.datasets: try: return self.datasets[name].download_url except: - print(f'{name} is a built-in dataset, no url.') + print(f"{name} is a built-in dataset, no url.") else: - print(f'{name} is not an available dataset.') - + print(f"{name} is not an available dataset.") def summary(self): """Report on datasets.""" @@ -290,7 +302,7 @@ def summary(self): n = available.shape[0] n_installed = available.Installed.sum() n_remote = n - n_installed - print(f'{n} datasets available, {n_installed} installed, {n_remote} remote.') + print(f"{n} datasets available, {n_installed} installed, {n_remote} remote.") example_manager = Examples() diff --git a/libpysal/examples/builtin.py b/libpysal/examples/builtin.py index 69e788490..7920d982e 100644 --- a/libpysal/examples/builtin.py +++ b/libpysal/examples/builtin.py @@ -68,30 +68,40 @@ def __init__(self, name, dirname): def get_file_list(self) -> list: """Return a list of file names.""" + return get_list_of_files(self.dirname) def get_path(self, file_name: str, verbose=True) -> str: """Get path for local file.""" + file_list = self.get_file_list() + for file_path in file_list: base_name = os.path.basename(file_path) if file_name == base_name: return file_path + if verbose: print("{} is not a file in this example".format(file_name)) + return None def explain(self): """Provide a printed description of the example.""" + description = [f for f in self.get_file_list() if "README.md" in f][0] + with open(description, "r", encoding="utf8") as f: print(f.read()) def get_description(self) -> str: """Dataset description.""" + description = [f for f in self.get_file_list() if "README.md" in f][0] + with open(description, "r", encoding="utf8") as f: lines = f.readlines() + return lines[3].strip() diff --git a/libpysal/examples/remotes.py b/libpysal/examples/remotes.py index 4bca64262..fd53f08b2 100644 --- a/libpysal/examples/remotes.py +++ b/libpysal/examples/remotes.py @@ -9,341 +9,478 @@ # remote_dict holds the metadata for remote datasets from the geoda center # to update prior to release run _remote_data() -_remote_dict = {'AirBnB': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/airbnb.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//airbnb/', - 'n': '77', - 'k': '20', - 'description': 'Airbnb rentals, socioeconomics, and crime in Chicago'}, - 'Atlanta': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/atlanta_hom.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//atlanta_old/', - 'n': '90', - 'k': '23', - 'description': 'Atlanta, GA region homicide counts and rates'}, - 'Baltimore': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/baltimore.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//baltim/', - 'n': '211', - 'k': '17', - 'description': 'Baltimore house sales prices and hedonics'}, - 'Bostonhsg': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/boston.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//boston-housing/', - 'n': '506', - 'k': '23', - 'description': 'Boston housing and neighborhood data'}, - 'Buenosaires': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/buenosaires.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//buenos-aires_old/', - 'n': ' 209', - 'k': ' 21', - 'description': ' Electoral Data for 1999 Argentinean Elections'}, - 'Cars': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/Abandoned_Vehicles_Map.csv', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//1-source-and-description/', - 'n': '137,867', - 'k': '21', - 'description': '2011 abandoned vehicles in Chicago (311 complaints).'}, - 'Charleston1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/CharlestonMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//charleston-1_old/', - 'n': ' 117', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Charleston, SC MSA and counties'}, - 'Charleston2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/CharlestonMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//charleston2/', - 'n': ' 44', - 'k': ' 97', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Charleston, SC MSA'}, - 'Chicago Health': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/comarea.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//comarea_vars/', - 'n': ' 77', - 'k': ' 86', - 'description': ' Chicago Health + Socio-Economics'}, - 'Chicago commpop': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/chicago_commpop.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//commpop/', - 'n': ' 77', - 'k': ' 8', - 'description': ' Chicago Community Area Population Percent Change for 2000 and 2010'}, - 'Chicago parcels': {'download_url': 'https://geodacenter.github.io/data-and-lab//https://uchicago.box.com/s/j2d2ch5uvckse24y8l7vh9198wnq216i', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//parcels/', - 'n': ' 592,521', - 'k': ' 5', - 'description': ' Tax parcel polygons of Cook county'}, - 'Chile Labor': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/flma.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//FLMA/', - 'n': '141', - 'k': '62', - 'description': 'Labor Markets in Chile (1982-2002)'}, - 'Chile Migration': {'download_url': 'https://geodacenter.github.io/data-and-lab//https://uchicago.box.com/s/yqc97nq23hoeeqo5lkc2grlg98skokgk', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//CHIM/', - 'n': ' 304', - 'k': ' 10', - 'description': ' Internal Migration in Chile (1977-2002)'}, - 'Cincinnati': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/walnuthills_updated.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//walnut_hills/', - 'n': ' 457', - 'k': ' 89', - 'description': ' 2008 Cincinnati Crime + Socio-Demographics'}, - 'Cleveland': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/cleveland.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//clev_sls_154_core/', - 'n': ' 205', - 'k': ' 9', - 'description': ' 2015 sales prices of homes in Cleveland, OH.'}, - 'Columbus': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/columbus.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//columbus/', - 'n': ' 49', - 'k': ' 20', - 'description': ' Columbus neighborhood crime'}, - 'Elections': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/election.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//county_election_2012_2016-variables/', - 'n': ' 3,108', - 'k': ' 74', - 'description': ' 2012 and 2016 Presidential Elections'}, - 'Grid100': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/grid100.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//grid100/', - 'n': ' 100', - 'k': ' 34', - 'description': ' Grid with simulated variables'}, - 'Groceries': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/grocery.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//chicago_sup_vars/', - 'n': ' 148', - 'k': ' 7', - 'description': ' 2015 Chicago supermarkets'}, - 'Guerry': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/guerry.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//Guerry/', - 'n': ' 85', - 'k': ' 23', - 'description': ' Moral statistics of France (Guerry, 1833)'}, - 'Health+': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/income_diversity.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//co_income_diversity_variables/', - 'n': ' 3,984', - 'k': ' 64', - 'description': ' 2000 Health, Income + Diversity'}, - 'Health Indicators': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/healthIndicators.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//healthindicators-variables/', - 'n': ' 77', - 'k': ' 31', - 'description': ' Chicago Health Indicators (2005-11)'}, - 'Hickory1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/HickoryMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//hickory1/', - 'n': ' 68', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Hickory, NC MSA and counties'}, - 'Hickory2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/HickoryMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//hickory2/', - 'n': ' 29', - 'k': ' 55', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Hickory, NC MSA'}, - 'Home Sales': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/kingcounty.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//KingCounty-HouseSales2015/', - 'n': ' 21,613', - 'k': ' 21', - 'description': ' 2014-15 Home Sales in King County, WA'}, - 'Houston': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/houston_hom.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//houston/', - 'n': ' 52', - 'k': ' 23', - 'description': ' Houston, TX region homicide counts and rates'}, - 'Juvenile': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/juvenile.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//juvenile/', - 'n': ' 168', - 'k': ' 3', - 'description': ' Cardiff juvenile delinquent residences'}, - 'Lansing1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/LansingMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//lansing1/', - 'n': ' 117', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Lansing, MI MSA and counties'}, - 'Lansing2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/LansingMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//lansing2/', - 'n': ' 46', - 'k': ' 55', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Lansing, MI MSA'}, - 'Laozone': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/laozone.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//ozone/', - 'n': ' 32', - 'k': ' 8', - 'description': ' Ozone measures at monitoring stations in Los Angeles basin'}, - 'LasRosas': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/lasrosas.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//lasrosas/', - 'n': ' 1,738', - 'k': ' 34', - 'description': ' Corn yield, fertilizer and field data for precision agriculture, Argentina, 1999'}, - 'Liquor Stores': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/liquor.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//liq_chicago/', - 'n': ' 571', - 'k': ' 2', - 'description': ' 2015 Chicago Liquor Stores'}, - 'Malaria': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/malariacolomb.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//colomb_malaria/', - 'n': ' 1,068', - 'k': ' 50', - 'description': ' Malaria incidence and population (1973, 95, 93 censuses and projections until 2005) \xa0 \xa0 \xa0'}, - 'Milwaukee1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/MilwaukeeMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//milwaukee1/', - 'n': ' 417', - 'k': ' 31', - 'description': ' 2000 Census Tract Data for Milwaukee, WI MSA'}, - 'Milwaukee2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/MilwaukeeMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//milwaukee2/', - 'n': ' 83', - 'k': ' 55', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Milwaukee, WI MSA'}, - 'NCOVR': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/ncovr.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//ncovr/', - 'n': '3,085', - 'k': ' 69', - 'description': ' US county homicides 1960-1990'}, - 'Natregimes': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/natregimes.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//natregimes/', - 'n': ' 3,085', - 'k': ' 73', - 'description': ' NCOVR with regimes (book/PySAL)'}, - 'NDVI': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/ndvi.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//ndvi/', - 'n': ' 49', - 'k': ' 5', - 'description': ' Normalized Difference Vegetation Index grid'}, - 'Nepal': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/nepal.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//nepal/', - 'n': ' 75', - 'k': ' 61', - 'description': ' Health, poverty and education indicators for Nepal districts'}, - 'NYC': {'download_url': 'https://geodacenter.github.io/data-and-lab///data/nyc.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//nyc/', - 'n': ' 55', - 'k': ' 34', - 'description': ' Demographic and housing data for New York City subboroughs, 2002-09'}, - 'NYC Earnings': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/lehd.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//LEHD_Data/', - 'n': ' 108,487', - 'k': ' 70', - 'description': ' Block-level Earnings in NYC (2002-14)'}, - 'NYC Education': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/nyc_2000Census.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//NYC-Census-2000/', - 'n': ' 2,216', - 'k': ' 56', - 'description': ' NYC Education (2000)'}, - 'NYC Neighborhoods': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/nycnhood_acs.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//NYC-Nhood-ACS-2008-12/', - 'n': ' 195', - 'k': ' 98', - 'description': ' Demographics for New York City neighborhoods'}, - 'NYC Socio-Demographics': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/nyctract_acs.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//NYC_Tract_ACS2008_12/', - 'n': ' 2,166', - 'k': ' 113', - 'description': ' NYC Education + Socio-Demographics'}, - 'Ohiolung': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/ohiolung.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//ohiolung/', - 'n': ' 88', - 'k': ' 42', - 'description': ' Ohio lung cancer data, 1968, 1978, 1988'}, - 'Orlando1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/OrlandoMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//orlando1/', - 'n': ' 328', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Orlando, FL MSA and counties'}, - 'Orlando2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/OrlandoMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//orlando2/', - 'n': ' 94', - 'k': ' 59', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Orlando, FL MSA'}, - 'Oz9799': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/oz9799.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//oz96/', - 'n': ' 30', - 'k': ' 78', - 'description': ' Monthly ozone data, 1997-99'}, - 'Phoenix ACS': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/phx2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//phx/', - 'n': ' 685', - 'k': ' 17', - 'description': ' Phoenix American Community Survey Data (2010, 5-year averages)'}, - 'Pittsburgh': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/pittsburgh.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//pitt93/', - 'n': ' 143', - 'k': ' 8', - 'description': ' Pittsburgh homicide locations'}, - 'Police': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/police.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//police/', - 'n': ' 82', - 'k': ' 21', - 'description': ' Police expenditures Mississippi counties'}, - 'Sacramento1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/sacramento.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//sacramento1/', - 'n': ' 403', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Sacramento MSA'}, - 'Sacramento2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/SacramentoMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//sacramento2/', - 'n': ' 125', - 'k': ' 53', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Sacramento MSA'}, - 'SanFran Crime': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/SFCrime_July_Dec2012.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//SFcrimes_vars/', - 'n': ' 3,384', - 'k': ' 13', - 'description': ' July-Dec 2012 crime incidents in San Francisco (points + area) - for CAST'}, - 'Savannah1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/SavannahMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//savannah1/', - 'n': ' 77', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Savannah, GA MSA and counties'}, - 'Savannah2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/SavannahMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//savannah2/', - 'n': ' 24', - 'k': ' 55', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Savannah, GA MSA'}, - 'Scotlip': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/scotlip.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//scotlip/', - 'n': ' 56', - 'k': ' 11', - 'description': ' Male lip cancer in Scotland, 1975-80'}, - 'Seattle1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/SeattleMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//seattle1/', - 'n': ' 664', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Seattle, WA MSA and counties'}, - 'Seattle2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/SeattleMSA2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//seattle2/', - 'n': ' 145', - 'k': ' 59', - 'description': ' 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Seattle, WA MSA'}, - 'SIDS': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/sids.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//sids/', - 'n': ' 100', - 'k': ' 13', - 'description': ' North Carolina county SIDS death counts'}, - 'SIDS2': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/sids2.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//sids2/', - 'n': ' 100', - 'k': ' 17', - 'description': ' North Carolina county SIDS death counts and rates'}, - 'Snow': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/snow.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//snow/', - 'n': ' NA', - 'k': ' NA', - 'description': ' John Snow & the 19th Century Cholera Epidemic'}, - 'South': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/south.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//south/', - 'n': ' 1,412', - 'k': ' 69', - 'description': ' US Southern county homicides 1960-1990'}, - 'Spirals': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/spirals.csv', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//spirals/', - 'n': ' 301', - 'k': ' 2', - 'description': ' Synthetic spiral points'}, - 'StLouis': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/stlouis.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//stlouis/', - 'n': ' 78', - 'k': ' 23', - 'description': ' St Louis region county homicide counts and rates'}, - 'Tampa1': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/TampaMSA.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//tampa1/', - 'n': ' 547', - 'k': ' 30', - 'description': ' 2000 Census Tract Data for Tampa, FL MSA and counties'}, - 'US SDOH': {'download_url': 'https://geodacenter.github.io/data-and-lab//data/us-sdoh-2014.zip', - 'explain_url': 'https://geodacenter.github.io/data-and-lab//us-sdoh/', - 'n': ' 71,901', - 'k': ' 25', - 'description': ' 2014 US Social Determinants of Health Data'}} +_remote_dict = { + "AirBnB": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/airbnb.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//airbnb/", + "n": "77", + "k": "20", + "description": "Airbnb rentals, socioeconomics, and crime in Chicago", + }, + "Atlanta": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/atlanta_hom.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//atlanta_old/", + "n": "90", + "k": "23", + "description": "Atlanta, GA region homicide counts and rates", + }, + "Baltimore": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/baltimore.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//baltim/", + "n": "211", + "k": "17", + "description": "Baltimore house sales prices and hedonics", + }, + "Bostonhsg": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/boston.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//boston-housing/", + "n": "506", + "k": "23", + "description": "Boston housing and neighborhood data", + }, + "Buenosaires": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/buenosaires.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//buenos-aires_old/", + "n": " 209", + "k": " 21", + "description": " Electoral Data for 1999 Argentinean Elections", + }, + "Cars": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/Abandoned_Vehicles_Map.csv", + "explain_url": "https://geodacenter.github.io/data-and-lab//1-source-and-description/", + "n": "137,867", + "k": "21", + "description": "2011 abandoned vehicles in Chicago (311 complaints).", + }, + "Charleston1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/CharlestonMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//charleston-1_old/", + "n": " 117", + "k": " 30", + "description": " 2000 Census Tract Data for Charleston, SC MSA and counties", + }, + "Charleston2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/CharlestonMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//charleston2/", + "n": " 44", + "k": " 97", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Charleston, SC MSA", + }, + "Chicago Health": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/comarea.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//comarea_vars/", + "n": " 77", + "k": " 86", + "description": " Chicago Health + Socio-Economics", + }, + "Chicago commpop": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/chicago_commpop.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//commpop/", + "n": " 77", + "k": " 8", + "description": " Chicago Community Area Population Percent Change for 2000 and 2010", + }, + "Chicago parcels": { + "download_url": "https://geodacenter.github.io/data-and-lab//https://uchicago.box.com/s/j2d2ch5uvckse24y8l7vh9198wnq216i", + "explain_url": "https://geodacenter.github.io/data-and-lab//parcels/", + "n": " 592,521", + "k": " 5", + "description": " Tax parcel polygons of Cook county", + }, + "Chile Labor": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/flma.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//FLMA/", + "n": "141", + "k": "62", + "description": "Labor Markets in Chile (1982-2002)", + }, + "Chile Migration": { + "download_url": "https://geodacenter.github.io/data-and-lab//https://uchicago.box.com/s/yqc97nq23hoeeqo5lkc2grlg98skokgk", + "explain_url": "https://geodacenter.github.io/data-and-lab//CHIM/", + "n": " 304", + "k": " 10", + "description": " Internal Migration in Chile (1977-2002)", + }, + "Cincinnati": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/walnuthills_updated.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//walnut_hills/", + "n": " 457", + "k": " 89", + "description": " 2008 Cincinnati Crime + Socio-Demographics", + }, + "Cleveland": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/cleveland.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//clev_sls_154_core/", + "n": " 205", + "k": " 9", + "description": " 2015 sales prices of homes in Cleveland, OH.", + }, + "Columbus": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/columbus.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//columbus/", + "n": " 49", + "k": " 20", + "description": " Columbus neighborhood crime", + }, + "Elections": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/election.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//county_election_2012_2016-variables/", + "n": " 3,108", + "k": " 74", + "description": " 2012 and 2016 Presidential Elections", + }, + "Grid100": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/grid100.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//grid100/", + "n": " 100", + "k": " 34", + "description": " Grid with simulated variables", + }, + "Groceries": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/grocery.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//chicago_sup_vars/", + "n": " 148", + "k": " 7", + "description": " 2015 Chicago supermarkets", + }, + "Guerry": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/guerry.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//Guerry/", + "n": " 85", + "k": " 23", + "description": " Moral statistics of France (Guerry, 1833)", + }, + "Health+": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/income_diversity.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//co_income_diversity_variables/", + "n": " 3,984", + "k": " 64", + "description": " 2000 Health, Income + Diversity", + }, + "Health Indicators": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/healthIndicators.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//healthindicators-variables/", + "n": " 77", + "k": " 31", + "description": " Chicago Health Indicators (2005-11)", + }, + "Hickory1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/HickoryMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//hickory1/", + "n": " 68", + "k": " 30", + "description": " 2000 Census Tract Data for Hickory, NC MSA and counties", + }, + "Hickory2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/HickoryMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//hickory2/", + "n": " 29", + "k": " 55", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Hickory, NC MSA", + }, + "Home Sales": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/kingcounty.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//KingCounty-HouseSales2015/", + "n": " 21,613", + "k": " 21", + "description": " 2014-15 Home Sales in King County, WA", + }, + "Houston": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/houston_hom.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//houston/", + "n": " 52", + "k": " 23", + "description": " Houston, TX region homicide counts and rates", + }, + "Juvenile": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/juvenile.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//juvenile/", + "n": " 168", + "k": " 3", + "description": " Cardiff juvenile delinquent residences", + }, + "Lansing1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/LansingMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//lansing1/", + "n": " 117", + "k": " 30", + "description": " 2000 Census Tract Data for Lansing, MI MSA and counties", + }, + "Lansing2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/LansingMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//lansing2/", + "n": " 46", + "k": " 55", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Lansing, MI MSA", + }, + "Laozone": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/laozone.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//ozone/", + "n": " 32", + "k": " 8", + "description": " Ozone measures at monitoring stations in Los Angeles basin", + }, + "LasRosas": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/lasrosas.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//lasrosas/", + "n": " 1,738", + "k": " 34", + "description": " Corn yield, fertilizer and field data for precision agriculture, Argentina, 1999", + }, + "Liquor Stores": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/liquor.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//liq_chicago/", + "n": " 571", + "k": " 2", + "description": " 2015 Chicago Liquor Stores", + }, + "Malaria": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/malariacolomb.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//colomb_malaria/", + "n": " 1,068", + "k": " 50", + "description": " Malaria incidence and population (1973, 95, 93 censuses and projections until 2005) \xa0 \xa0 \xa0", + }, + "Milwaukee1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/MilwaukeeMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//milwaukee1/", + "n": " 417", + "k": " 31", + "description": " 2000 Census Tract Data for Milwaukee, WI MSA", + }, + "Milwaukee2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/MilwaukeeMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//milwaukee2/", + "n": " 83", + "k": " 55", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Milwaukee, WI MSA", + }, + "NCOVR": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/ncovr.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//ncovr/", + "n": "3,085", + "k": " 69", + "description": " US county homicides 1960-1990", + }, + "Natregimes": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/natregimes.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//natregimes/", + "n": " 3,085", + "k": " 73", + "description": " NCOVR with regimes (book/PySAL)", + }, + "NDVI": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/ndvi.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//ndvi/", + "n": " 49", + "k": " 5", + "description": " Normalized Difference Vegetation Index grid", + }, + "Nepal": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/nepal.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//nepal/", + "n": " 75", + "k": " 61", + "description": " Health, poverty and education indicators for Nepal districts", + }, + "NYC": { + "download_url": "https://geodacenter.github.io/data-and-lab///data/nyc.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//nyc/", + "n": " 55", + "k": " 34", + "description": " Demographic and housing data for New York City subboroughs, 2002-09", + }, + "NYC Earnings": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/lehd.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//LEHD_Data/", + "n": " 108,487", + "k": " 70", + "description": " Block-level Earnings in NYC (2002-14)", + }, + "NYC Education": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/nyc_2000Census.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//NYC-Census-2000/", + "n": " 2,216", + "k": " 56", + "description": " NYC Education (2000)", + }, + "NYC Neighborhoods": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/nycnhood_acs.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//NYC-Nhood-ACS-2008-12/", + "n": " 195", + "k": " 98", + "description": " Demographics for New York City neighborhoods", + }, + "NYC Socio-Demographics": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/nyctract_acs.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//NYC_Tract_ACS2008_12/", + "n": " 2,166", + "k": " 113", + "description": " NYC Education + Socio-Demographics", + }, + "Ohiolung": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/ohiolung.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//ohiolung/", + "n": " 88", + "k": " 42", + "description": " Ohio lung cancer data, 1968, 1978, 1988", + }, + "Orlando1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/OrlandoMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//orlando1/", + "n": " 328", + "k": " 30", + "description": " 2000 Census Tract Data for Orlando, FL MSA and counties", + }, + "Orlando2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/OrlandoMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//orlando2/", + "n": " 94", + "k": " 59", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Orlando, FL MSA", + }, + "Oz9799": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/oz9799.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//oz96/", + "n": " 30", + "k": " 78", + "description": " Monthly ozone data, 1997-99", + }, + "Phoenix ACS": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/phx2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//phx/", + "n": " 685", + "k": " 17", + "description": " Phoenix American Community Survey Data (2010, 5-year averages)", + }, + "Pittsburgh": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/pittsburgh.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//pitt93/", + "n": " 143", + "k": " 8", + "description": " Pittsburgh homicide locations", + }, + "Police": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/police.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//police/", + "n": " 82", + "k": " 21", + "description": " Police expenditures Mississippi counties", + }, + "Sacramento1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/sacramento.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//sacramento1/", + "n": " 403", + "k": " 30", + "description": " 2000 Census Tract Data for Sacramento MSA", + }, + "Sacramento2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/SacramentoMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//sacramento2/", + "n": " 125", + "k": " 53", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Sacramento MSA", + }, + "SanFran Crime": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/SFCrime_July_Dec2012.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//SFcrimes_vars/", + "n": " 3,384", + "k": " 13", + "description": " July-Dec 2012 crime incidents in San Francisco (points + area) - for CAST", + }, + "Savannah1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/SavannahMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//savannah1/", + "n": " 77", + "k": " 30", + "description": " 2000 Census Tract Data for Savannah, GA MSA and counties", + }, + "Savannah2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/SavannahMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//savannah2/", + "n": " 24", + "k": " 55", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Savannah, GA MSA", + }, + "Scotlip": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/scotlip.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//scotlip/", + "n": " 56", + "k": " 11", + "description": " Male lip cancer in Scotland, 1975-80", + }, + "Seattle1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/SeattleMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//seattle1/", + "n": " 664", + "k": " 30", + "description": " 2000 Census Tract Data for Seattle, WA MSA and counties", + }, + "Seattle2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/SeattleMSA2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//seattle2/", + "n": " 145", + "k": " 59", + "description": " 1998 and 2001 Zip Code Business Patterns (Census Bureau) for Seattle, WA MSA", + }, + "SIDS": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/sids.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//sids/", + "n": " 100", + "k": " 13", + "description": " North Carolina county SIDS death counts", + }, + "SIDS2": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/sids2.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//sids2/", + "n": " 100", + "k": " 17", + "description": " North Carolina county SIDS death counts and rates", + }, + "Snow": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/snow.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//snow/", + "n": " NA", + "k": " NA", + "description": " John Snow & the 19th Century Cholera Epidemic", + }, + "South": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/south.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//south/", + "n": " 1,412", + "k": " 69", + "description": " US Southern county homicides 1960-1990", + }, + "Spirals": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/spirals.csv", + "explain_url": "https://geodacenter.github.io/data-and-lab//spirals/", + "n": " 301", + "k": " 2", + "description": " Synthetic spiral points", + }, + "StLouis": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/stlouis.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//stlouis/", + "n": " 78", + "k": " 23", + "description": " St Louis region county homicide counts and rates", + }, + "Tampa1": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/TampaMSA.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//tampa1/", + "n": " 547", + "k": " 30", + "description": " 2000 Census Tract Data for Tampa, FL MSA and counties", + }, + "US SDOH": { + "download_url": "https://geodacenter.github.io/data-and-lab//data/us-sdoh-2014.zip", + "explain_url": "https://geodacenter.github.io/data-and-lab//us-sdoh/", + "n": " 71,901", + "k": " 25", + "description": " 2014 US Social Determinants of Health Data", + }, +} + def _remote_data(): """Helper function to get remote metadata for each release. @@ -362,10 +499,12 @@ def _remote_data(): except: warnings.warn("Remote data sets not available. Check connection.") return {} + soup = BeautifulSoup(page.text, "html.parser") samples = soup.find(class_="samples") rows = samples.find_all("tr") datasets = {} + for row in rows[1:]: data = row.find_all("td") name = data[0].text.strip() @@ -375,7 +514,13 @@ def _remote_data(): targets = row.find_all("a") download_url = url + targets[1].attrs["href"] explain_url = url + targets[0].attrs["href"] - datasets[name] = {'download_url': download_url, 'explain_url': explain_url, 'n': n, 'k': k, 'description': description} + datasets[name] = { + "download_url": download_url, + "explain_url": explain_url, + "n": n, + "k": k, + "description": description, + } return datasets @@ -396,7 +541,7 @@ def _build_remotes(): download_url = _remote_dict[name]["download_url"] explain_url = _remote_dict[name]["explain_url"] datasets[name] = Example(name, description, n, k, download_url, explain_url) - + # Other Remotes # rio name = "Rio Grande do Sul" diff --git a/libpysal/io/fileio.py b/libpysal/io/fileio.py index b9c11dfcd..17875df22 100644 --- a/libpysal/io/fileio.py +++ b/libpysal/io/fileio.py @@ -29,12 +29,12 @@ class FileIO_MetaCls(type): subclasses of `FileIO` also inherit this meta class, which registers their abilities with the FileIO registry. Subclasses must contain ``FORMATS`` and ``MODES`` (both are ``type(list)``). - + Raises ------ TypeError FileIO subclasses must have ``FORMATS`` and ``MODES`` defined. - + """ def __new__(mcs, name, bases, dict): @@ -58,9 +58,9 @@ class FileIO(object, metaclass=FileIO_MetaCls): # should be a type? """Metaclass for supporting spatial data file read and write. How this works: - + ``FileIO.open(\\*args) == FileIO(\\*args)`` - + When creating a new instance of `FileIO` the ``.__new__`` method intercepts. ``.__new__`` parses the filename to determine the ``fileType``. Next, ``.__registry`` and checked for that type. Each type supports one or more modes @@ -70,10 +70,10 @@ class FileIO(object, metaclass=FileIO_MetaCls): # should be a type? forced to conform to the prescribed API. The metaclass takes care of the registration by parsing the class definition. It doesn't make much sense to treat weights in the same way as shapefiles and dbfs, so... - + * ... for now we'll just return an instance of `W` on ``mode='r'``. * ... on ``mode='w'``, ``.write`` will expect an instance of `W`. - + """ __registry = {} # {'shp':{'r':[OGRshpReader,pysalShpReader]}} @@ -207,12 +207,12 @@ def __setIds(self, ids: Union[list, dict, None]): """Property method for ``.ids``. Takes a list of ids and maps then to a 0-based index. Need to provide a method to set ID's based on a ``fieldName`` preferably without reading the whole file. - + Raises ------ AssertionError Raised when IDs are not unique. - + """ if isinstance(ids, list): @@ -249,26 +249,26 @@ def __iter__(self): @staticmethod def _complain_ifclosed(closed): """From `StringIO`. - + Raises ------ ValueError Raised when a file is already closed. - + """ if closed: raise ValueError("I/O operation on closed file.") def cast(self, key, typ): """Cast ``key`` as ``typ``. - + Raises ------ TypeError Raised when a cast object in not callable. KeyError Raised when a key is not present. - + """ if key in self.header: if not self._spec: @@ -286,12 +286,12 @@ def cast(self, key, typ): def _cast(self, row) -> list: """ - + Raises ------ ValueError Raised when a value could not be cast a particular type. - + """ if self._spec and row: try: @@ -316,12 +316,12 @@ def _cast(self, row) -> list: def __next__(self) -> list: """A `FileIO` object is its own iterator, see `StringIO`. - + Raises ------ StopIteration Raised at the EOF. - + """ self._complain_ifclosed(self.closed) @@ -369,12 +369,12 @@ def read(self, n=-1) -> Union[list, None]: """Read at most ``n`` objects, less if read hits EOF. If size is negative or omitted read all objects until EOF. Returns ``None`` if EOF is reached before any objects. - + Raises ------ StopIteration Raised at the EOF. - + """ self._complain_ifclosed(self.closed) @@ -401,12 +401,12 @@ def read(self, n=-1) -> Union[list, None]: def __read(self) -> list: """Gets one row from the file handler, and if necessary casts it's objects. - + Raises ------ StopIteration Raised at the EOF. - + """ row = self._read() @@ -419,11 +419,11 @@ def __read(self) -> list: def _read(self): """Must be implemented by subclasses that support 'r' subclasses. Should increment ``.pos`` and redefine this doc string. - + Raises ------ NotImplementedError - + """ self._complain_ifclosed(self.closed) @@ -431,11 +431,11 @@ def _read(self): def truncate(self, size=None): """Should be implemented by subclasses and redefine this doc string. - + Raises ------ NotImplementedError - + """ self._complain_ifclosed(self.closed) @@ -445,11 +445,11 @@ def write(self, obj): """Must be implemented by subclasses that support 'w' subclasses Should increment ``.pos``. Subclasses should also check if ``obj`` is an instance of type(list) and redefine this doc string. - + Raises ------ NotImplementedError - + """ self._complain_ifclosed(self.closed) @@ -458,11 +458,11 @@ def write(self, obj): def flush(self): """ - + Raises ------ NotImplementedError - + """ self._complain_ifclosed(self.closed) diff --git a/libpysal/io/geotable/dbf.py b/libpysal/io/geotable/dbf.py index be4fcf332..6b0bc6789 100644 --- a/libpysal/io/geotable/dbf.py +++ b/libpysal/io/geotable/dbf.py @@ -19,7 +19,7 @@ def check_dups(li): ------- dups : list The duplicate IDs. - + """ dups = list(set([x for x in li if li.count(x) > 1])) @@ -30,7 +30,7 @@ def check_dups(li): def dbfdups(dbfpath, idvar): """Checks duplicates in a ``.dBase`` file ID variable must be specified correctly. Author(s) -- Luc Anselin - + Parameters ---------- dbfpath : str @@ -42,7 +42,7 @@ def dbfdups(dbfpath, idvar): ------- dups : list The duplicate IDs. - + """ db = ps_open(dbfpath, "r") li = db.by_col(idvar) @@ -77,13 +77,13 @@ def df2dbf(df, dbf_path, my_specs=None): ------- dbf_path : str Path to the output ``.dbf`` - + Notes ----- - + Use of ``dtypes.name`` may not be fully robust, but preferred approach of using ``isinstance`` seems too clumsy. - + """ if my_specs: specs = my_specs @@ -138,7 +138,7 @@ def dbf2df(dbf_path, index=None, cols=False, incl_index=False): ------- df : pandas.DataFrame The resultant ``pandas.DataFrame`` object. - + """ db = ps_open(dbf_path) @@ -187,7 +187,7 @@ def dbfjoin(dbf1_path, dbf2_path, out_path, joinkey1, joinkey2): ------- dp : str Path to output file. - + """ df1 = dbf2df(dbf1_path, index=joinkey1) @@ -215,7 +215,7 @@ def dta2dbf(dta_path, dbf_path): ------- dp : str path to output file - + """ db = pd.read_stata(dta_path) diff --git a/libpysal/io/geotable/file.py b/libpysal/io/geotable/file.py index d2a6c6947..491ba2e76 100644 --- a/libpysal/io/geotable/file.py +++ b/libpysal/io/geotable/file.py @@ -8,19 +8,19 @@ def read_files(filepath, **kwargs): """Reads a ``.dbf``/``.shp`` pair, squashing geometries into a 'geometry' column. - + Parameters ---------- filepath : str The file path. **kwargs : dict Optional keyword arguments for ``dbf2df()``. - + Returns ------- df : pandas.DataFrame The results dataframe returned from ``dbf2df()``. - + """ # keyword arguments wrapper will strip all around dbf2df's required arguments @@ -52,7 +52,7 @@ def read_files(filepath, **kwargs): def write_files(df, filepath, **kwargs): """Writes dataframes with potential geometric components out to files. - + Parameters ---------- df : pandas.DataFrame @@ -61,14 +61,14 @@ def write_files(df, filepath, **kwargs): The file path. **kwargs : dict Optional keyword arguments for ``df2dbf()``. - + Returns ------- dbf_path : str Path to the output ``.dbf`` paths : tuple The file paths for ``dbf_out``, ``shp_out``, ``W_path``. - + """ geomcol = kwargs.pop("geomcol", "geometry") @@ -97,8 +97,8 @@ def write_files(df, filepath, **kwargs): def _pairpath(filepath: str) -> tuple: """Return ``.dbf``/``.shp`` paths for any ``.shp``, - ``.dbf``, or basepath passed to function. - + ``.dbf``, or basepath passed to function. + """ base = os.path.splitext(filepath)[0] diff --git a/libpysal/io/geotable/shp.py b/libpysal/io/geotable/shp.py index bbc96a571..ed29fe21a 100644 --- a/libpysal/io/geotable/shp.py +++ b/libpysal/io/geotable/shp.py @@ -4,17 +4,17 @@ def shp2series(filepath): """Reads a shapefile, stuffing each shape into an element of a ``pandas.Series``. - + Parameters ---------- filepath : str Path to the file. - + Returns ------- s : pandas.Series The data cast a ``pandas.Series`` object. - + """ f = ps_open(filepath) @@ -26,17 +26,17 @@ def shp2series(filepath): def series2shp(series, filepath): """Writes a ``pandas.Series`` of PySAL polygons to a file - + Parameters ---------- series : pandas.Series The data to write out. - + Returns ------- filepath : str Path to the file. - + """ f = ps_open(filepath, "w") diff --git a/libpysal/io/geotable/wrappers.py b/libpysal/io/geotable/wrappers.py index a244b1b24..e6440aac1 100644 --- a/libpysal/io/geotable/wrappers.py +++ b/libpysal/io/geotable/wrappers.py @@ -8,19 +8,19 @@ @requires("geopandas") def geopandas(filename, **kw): """Wrapper for ``geopandas.read_file()``. - + Parameters ---------- filename : str Path to the file. **kw : dict Optional keyword arguments for ``geopandas.read_file()``. - + Returns ------- gdf : geopandas.GeoDataFrame The shapefile read in as a ``geopandas.GeoDataFrame``. - + """ import geopandas @@ -33,7 +33,7 @@ def geopandas(filename, **kw): @requires("fiona") def fiona(filename, geom_type="shapely", **kw): """Open a file with ``fiona`` and convert to a ``pandas.DataFrame``. - + Parameters ---------- filename : str @@ -42,7 +42,7 @@ def fiona(filename, geom_type="shapely", **kw): Package/method to use from creating geometries. Default is ``'shapely'``. **kw : dict Optional keyword arguments for ``fiona.open()``. - + Returns ------- df : pandas.DataFrame diff --git a/libpysal/io/iohandlers/arcgis_dbf.py b/libpysal/io/iohandlers/arcgis_dbf.py index 25a93ecb7..a80a4c50c 100644 --- a/libpysal/io/iohandlers/arcgis_dbf.py +++ b/libpysal/io/iohandlers/arcgis_dbf.py @@ -29,7 +29,7 @@ class ArcGISDbfIO(fileio.FileIO): record numbers, instead of original ids. An exemplary structure of an ArcGIS dbf file is as follows: - + ``` [Line 1] Field1 RECORD_ID NID WEIGHT [Line 2] 0 72 76 1 @@ -41,7 +41,7 @@ class ArcGISDbfIO(fileio.FileIO): References ---------- - + http://webhelp.esri.com/arcgisdesktop/9.3/index.cfm?TopicName=Convert_Spatial_Weights_Matrix_to_Table_(Spatial_Statistics) """ @@ -74,12 +74,12 @@ def seek(self, pos): def _read(self): """Reads ArcGIS dbf file - + Returns ------- w : libpysal.weights.W A ``libpysal.weights.W`` object. - + Raises ------ StopIteration @@ -88,7 +88,7 @@ def _read(self): Raised when the weights data structure is incorrect. TypeError Raised when the IDs are not integers. - + Examples -------- diff --git a/libpysal/io/iohandlers/arcgis_swm.py b/libpysal/io/iohandlers/arcgis_swm.py index a83d5de58..97b76d4a2 100644 --- a/libpysal/io/iohandlers/arcgis_swm.py +++ b/libpysal/io/iohandlers/arcgis_swm.py @@ -13,7 +13,7 @@ class ArcGISSwmIO(fileio.FileIO): Spatial weights objects in the ArcGIS ``.swm`` format are used in ArcGIS Spatial Statistics tools. Particularly, this format can be directly used with the tools under the category of Mapping Clusters. - + The values for``ORG_i`` and ``DST_i`` should be integers, as ArcGIS Spatial Statistics tools support only unique integer IDs. For the case where a weights object uses non-integer IDs, `ArcGISSwmIO` allows users to use @@ -22,19 +22,19 @@ class ArcGISSwmIO(fileio.FileIO): .. table:: ArcGIS SWM Components ============ ============ ==================================== ================================ - Part Data type Description Length + Part Data type Description Length ============ ============ ==================================== ================================ - ID_VAR_NAME ASCII TEXT ID variable name Flexible (Up to the 1st ;) + ID_VAR_NAME ASCII TEXT ID variable name Flexible (Up to the 1st ;) ESRI_SRS ASCII TEXT ESRI spatial reference system Flexible (Btw the 1st ; and \\n) - NO_OBS l.e. int Number of observations 4 - ROW_STD l.e. int Whether or not row-standardized 4 - WGT_i - ORG_i l.e. int ID of observaiton i 4 - NO_NGH_i l.e. int Number of neighbors for obs. i (m) 4 - NGHS_i - DSTS_i l.e. int IDs of all neighbors of obs. i 4*m - WS_i l.e. float Weights for obs. i and its neighbors 8*m - W_SUM_i l.e. float Sum of weights for " 8 + NO_OBS l.e. int Number of observations 4 + ROW_STD l.e. int Whether or not row-standardized 4 + WGT_i + ORG_i l.e. int ID of observaiton i 4 + NO_NGH_i l.e. int Number of neighbors for obs. i (m) 4 + NGHS_i + DSTS_i l.e. int IDs of all neighbors of obs. i 4*m + WS_i l.e. float Weights for obs. i and its neighbors 8*m + W_SUM_i l.e. float Sum of weights for " 8 ============ ============ ==================================== ================================ """ @@ -77,17 +77,17 @@ def seek(self, pos): def _read(self): """Read an ArcGIS ``.swm`` file. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- @@ -131,17 +131,17 @@ def _read(self): def read_old_version(self, header): """Read the old version of ArcGIS(<10.1) ``.swm`` file. - + Parameters ---------- header : str The first line of the ``.swm`` file. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ id_var, srs = header[:-1].split(";") @@ -175,19 +175,19 @@ def read_old_version(self, header): def read_new_version(self, header_line): """Read the new version of ArcGIS(<10.1) ``.swm`` file, which contains more parameters and records weights in two ways, fixed or variable. - + Parameters ---------- header_line : str The first line of the ``.swm`` file, which contains a lot of parameters. The parameters are divided by semicolons (';') and the key-value of each parameter is divided by at marks ('@'). - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ headerDict = {} @@ -250,7 +250,7 @@ def write(self, obj, useIdIndex=False): Raised when the input ``obj`` is not a PySAL `W`. TypeError Raised when the IDs in input ``obj`` are not integers. - + Examples -------- @@ -296,7 +296,7 @@ def write(self, obj, useIdIndex=False): Clean up the temporary file created for this example. >>> os.remove(fname) - + """ self._complain_ifclosed(self.closed) diff --git a/libpysal/io/iohandlers/arcgis_txt.py b/libpysal/io/iohandlers/arcgis_txt.py index 0489fc0fb..616918d36 100644 --- a/libpysal/io/iohandlers/arcgis_txt.py +++ b/libpysal/io/iohandlers/arcgis_txt.py @@ -15,7 +15,7 @@ class ArcGISTextIO(gwt.GwtIO): Statistics tools. This format is a simple text file with ASCII encoding and can be directly used with the tools under the category of "Mapping Clusters." But, it cannot be used with the "Generate Spatial Weights Matrix" tool. - + The first line of the ArcGIS text file is a header including the name of a data column that holded the ID variable in the original source data table. After this header line, it includes three data columns for origin ID, @@ -26,7 +26,7 @@ class ArcGISTextIO(gwt.GwtIO): numbers, instead of original IDs. An exemplary structure of an ArcGIS text file is as follows: - + [Line 1] StationID [Line 2] 1 1 0.0 [Line 3] 1 2 0.1 @@ -36,7 +36,7 @@ class ArcGISTextIO(gwt.GwtIO): [Line 7] 3 1 0.16667 [Line 8] 3 2 0.06667 [Line 9] 3 3 0.0 - + As shown in the above example, this file format allows explicit specification of weights for self-neighbors. When no entry is available for self-neighbors, @@ -49,7 +49,7 @@ class ArcGISTextIO(gwt.GwtIO): Notes ----- - + When there is a ``.dbf`` file whose name is identical to the name of the source text file, `ArcGISTextIO` checks the data type of the ID data column and uses it for reading and writing the text file. Otherwise, it considers IDs are strings. @@ -65,20 +65,20 @@ def __init__(self, *args, **kwargs): def _read(self): """Read in an ArcGIS text file. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. - + TypeError Raised when the IDs are not integers. - + Examples -------- @@ -174,7 +174,7 @@ def write(self, obj, useIdIndex=False): Raised when the IDs in input ``obj`` are not integers. TypeError Raised when the input ``obj`` is not a PySAL `W`. - + Examples -------- @@ -217,7 +217,7 @@ def write(self, obj, useIdIndex=False): Clean up the temporary file created for this example. >>> os.remove(fname) - + """ self._complain_ifclosed(self.closed) diff --git a/libpysal/io/iohandlers/csvWrapper.py b/libpysal/io/iohandlers/csvWrapper.py index 74046cad9..6ac8cd46f 100644 --- a/libpysal/io/iohandlers/csvWrapper.py +++ b/libpysal/io/iohandlers/csvWrapper.py @@ -12,7 +12,7 @@ class csvWrapper(tables.DataTable): Examples -------- - + >>> import libpysal >>> stl = libpysal.examples.load_example('stl') >>> file_name = stl.get_path('stl_hom.csv') @@ -41,7 +41,7 @@ class csvWrapper(tables.DataTable): 'RDAC80', 'RDAC85', 'RDAC90'] - + >>> f._spec [str, str, @@ -65,7 +65,7 @@ class csvWrapper(tables.DataTable): float, float, float] - + """ __doc__ = tables.DataTable.__doc__ diff --git a/libpysal/io/iohandlers/dat.py b/libpysal/io/iohandlers/dat.py index 9502f26e4..09eb6f3a7 100644 --- a/libpysal/io/iohandlers/dat.py +++ b/libpysal/io/iohandlers/dat.py @@ -11,7 +11,7 @@ class DatIO(gwt.GwtIO): This ``.dat`` format is a simple text file with a ``.DAT`` or ``.dat`` extension. Without a header line, it includes three data columns for origin ID, destination ID, and weight values as follows: - + ``` [Line 1] 2 1 0.25 [Line 2] 5 1 0.50 @@ -28,17 +28,17 @@ class DatIO(gwt.GwtIO): def _read(self): """Reads in a ``.dat`` file as a PySAL `W` object. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- @@ -125,7 +125,7 @@ def write(self, obj): Clean up the temporary file created for this example. >>> os.remove(fname) - + """ self._complain_ifclosed(self.closed) diff --git a/libpysal/io/iohandlers/db.py b/libpysal/io/iohandlers/db.py index b64eb82c4..3ec8414f1 100644 --- a/libpysal/io/iohandlers/db.py +++ b/libpysal/io/iohandlers/db.py @@ -25,8 +25,7 @@ class SQLConnection(fileio.FileIO): - """Reads an SQL mappable. - """ + """Reads an SQL mappable.""" FORMATS = ["sqlite", "db"] MODES = ["r"] @@ -85,12 +84,12 @@ def tables(self) -> list: @property def session(self): """Create an ``sqlalchemy.orm.Session`` instance. - + Returns ------- self._session : sqlalchemy.orm.Session An ``sqlalchemy.orm.Session`` instance. - + """ # What happens if the session is externally closed? Check for None? diff --git a/libpysal/io/iohandlers/gal.py b/libpysal/io/iohandlers/gal.py index 46a688531..849e29641 100644 --- a/libpysal/io/iohandlers/gal.py +++ b/libpysal/io/iohandlers/gal.py @@ -20,7 +20,7 @@ def __init__(self, *args, **kwargs): def read(self, n=-1, sparse=False): """Read in a ``.gal`` file. - + Parameters ---------- n : int @@ -28,7 +28,7 @@ def read(self, n=-1, sparse=False): sparse: bool If ``True`` return a ``scipy`` sparse object. If ``False`` return PySAL `W` object. Default is ``False``. - + Returns ------- w : {libpysal.weights.W, libpysal.weights.WSP} @@ -52,12 +52,12 @@ def _get_data_type(self): def _set_data_type(self, typ): """ - + Raises ------ TypeError Raised when ``typ`` is not a callable. - + """ if callable(typ): self._typ = typ @@ -73,12 +73,12 @@ def _read(self): ------- w : {libpysal.weights.W, libpysal.weights.WSP} A PySAL `W` object or a thin PySAL `WSP`. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- @@ -93,10 +93,10 @@ def _read(self): >>> w = testfile.read() >>> w.n == 100 True - + >>> print(round(w.sd,6)) 1.515124 - + >>> testfile = libpysal.io.open(libpysal.examples.get_path('sids2.gal'), 'r') Return a sparse matrix for the `W` information. @@ -197,7 +197,7 @@ def write(self, obj): ------ TypeError Raised when the input ``obj`` is not a PySAL `W`. - + Examples -------- @@ -238,7 +238,7 @@ def write(self, obj): Clean up the temporary file created for this example. >>> os.remove(fname) - + """ self._complain_ifclosed(self.closed) diff --git a/libpysal/io/iohandlers/geobugs_txt.py b/libpysal/io/iohandlers/geobugs_txt.py index c11cc0c3a..a2235eaf2 100644 --- a/libpysal/io/iohandlers/geobugs_txt.py +++ b/libpysal/io/iohandlers/geobugs_txt.py @@ -11,11 +11,11 @@ class GeoBUGSTextIO(fileio.FileIO): `GeoBUGS` generates a spatial weights matrix as an R object and writes it out as an ASCII text representation of the R object. An exemplary `GeoBUGS` text file is as follows. - + ``` list([CARD], [ADJ], [WGT], [SUMNUMNEIGH]) ``` - + where ``[CARD]`` and ``[ADJ]`` are required but the others are optional. PySAL assumes ``[CARD]`` and ``[ADJ]`` always exist in an input text file. It can read a `GeoBUGS` text file, even when its content is not written @@ -31,7 +31,7 @@ class GeoBUGSTextIO(fileio.FileIO): [ADJ]: adj = c ([a list of comma-splitted neighbor IDs]) - + If caridnality is zero, neighbor IDs are skipped. The ordering of observations is the same in both ``[CARD]`` and ``[ADJ]``. Neighbor IDs are record numbers starting from one. @@ -48,7 +48,7 @@ class GeoBUGSTextIO(fileio.FileIO): Notes ----- - + For the files generated from R the ``spdep``, ``nb2WB``, and ``dput`` functions. It is assumed that the value for the control parameter of the ``dput`` function is ``NULL``. Please refer to R ``spdep`` and @@ -56,7 +56,7 @@ class GeoBUGSTextIO(fileio.FileIO): References ---------- - + * **Thomas, A., Best, N., Lunn, D., Arnold, R., and Spiegelhalter, D.** (2004) GeoBUGS User Manual. R spdep nb2WB function help file. @@ -119,17 +119,17 @@ def seek(self, pos) -> int: def _read(self): """Reads in a `GeoBUGSTextIO` object. - + Raises ------ StopIteration Raised at the EOF. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ if self.pos > 0: @@ -214,7 +214,7 @@ def write(self, obj): ------ TypeError Raised when the input ``obj`` is not a PySAL `W`. - + Examples -------- @@ -240,7 +240,7 @@ def write(self, obj): >>> o = libpysal.io.open(fname, 'w', 'geobugs_text') - Write the Weights object into the open file. + Write the Weights object into the open file. >>> o.write(w) >>> o.close() diff --git a/libpysal/io/iohandlers/geoda_txt.py b/libpysal/io/iohandlers/geoda_txt.py index 4e9ee79ca..6fcac4739 100644 --- a/libpysal/io/iohandlers/geoda_txt.py +++ b/libpysal/io/iohandlers/geoda_txt.py @@ -8,24 +8,24 @@ class GeoDaTxtReader(tables.DataTable): """GeoDa Text File Export Format. - + Examples -------- - + >>> import libpysal >>> f = libpysal.io.open(libpysal.examples.get_path('stl_hom.txt'),'r') >>> f.header ['FIPSNO', 'HR8488', 'HR8893', 'HC8488'] - + >>> len(f) 78 - + >>> f.dat[0] ['17107', '1.290722', '1.624458', '2'] - + >>> f.dat[-1] ['29223', '0', '8.451537', '0'] - + >>> f._spec [int, float, float, int] @@ -46,12 +46,12 @@ def __init__(self, *args, **kwargs): def _open(self): """ - + Raises ------ TypeError Raised when the input 'geoda_txt' is not valid. - + """ if self.mode == "r": diff --git a/libpysal/io/iohandlers/gwt.py b/libpysal/io/iohandlers/gwt.py index 9e3811157..18e090aeb 100644 --- a/libpysal/io/iohandlers/gwt.py +++ b/libpysal/io/iohandlers/gwt.py @@ -15,11 +15,11 @@ class unique_filter(object): Examples -------- - + >>> l = ['a', 'a', 'b', 'a', 'c', 'v', 'd', 'a', 'v', 'd'] >>> list(filter(unique_filter(),l)) ['a', 'b', 'c', 'v', 'd'] - + """ def __init__(self): @@ -64,17 +64,17 @@ def _get_shpName(self) -> str: def read(self, n=-1): """ - + Parameters ---------- n : int Read at most ``n`` objects. Default is ``-1``. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ self._complain_ifclosed(self.closed) @@ -94,14 +94,14 @@ def _readlines(self, id_type, ret_ids=False): many weight file formats. Header lines, however, are different from format to format. So, for code reusability, this part is separated out from the ``_read()`` function by Myunghwa Hwang. - + Parameters ---------- id_type : type Cast IDs as this type. ret_ids : bool Return IDs (``True``). Default is ``False``. - + Returns ------- weights : dict @@ -110,7 +110,7 @@ def _readlines(self, id_type, ret_ids=False): Dictionary of neighbor ID values. ids : list List of ID values. - + """ data = [row.strip().split() for row in self.file.readlines()] @@ -139,17 +139,17 @@ def _readlines(self, id_type, ret_ids=False): def _read(self): """Reads ``.gwt`` file. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- @@ -236,12 +236,12 @@ def _writelines(self, obj): repeatedly used for many weight file formats. Header lines, however, are different from format to format. So, for code reusability, this part is separated out from write function by Myunghwa Hwang. - + Parameters ---------- obj : libpysal.weights.W A PySAL `W` object. - + """ for id in obj.id_order: @@ -260,7 +260,7 @@ def write(self, obj): ---------- obj : libpysal.weights.W A PySAL `W` object. - + Raises ------ TypeError @@ -306,7 +306,7 @@ def write(self, obj): Clean up the temporary file created for this example. >>> os.remove(fname) - + """ self._complain_ifclosed(self.closed) diff --git a/libpysal/io/iohandlers/mat.py b/libpysal/io/iohandlers/mat.py index 02ef61aad..7951ac5b3 100644 --- a/libpysal/io/iohandlers/mat.py +++ b/libpysal/io/iohandlers/mat.py @@ -19,13 +19,13 @@ class MatIO(fileio.FileIO): Notes ----- - + If a given weights object contains too many observations to write it out as a full matrix, PySAL writes out the object as a sparse matrix. References ---------- - + `MathWorks `_ (2011) "MATLAB 7 MAT-File Format." @@ -50,17 +50,17 @@ def _get_varName(self) -> str: def read(self, n=-1): """ - + Parameters ---------- n : int Read at most ``n`` objects. Default is ``-1``. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ self._complain_ifclosed(self.closed) @@ -76,17 +76,17 @@ def seek(self, pos): def _read(self): """Reads MATLAB ``.mat`` file. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- @@ -135,7 +135,7 @@ def write(self, obj): ---------- obj : libpysal.weights.W A PySAL `W` object. - + Raises ------ TypeError diff --git a/libpysal/io/iohandlers/mtx.py b/libpysal/io/iohandlers/mtx.py index 52f65d5cc..49d66faa3 100644 --- a/libpysal/io/iohandlers/mtx.py +++ b/libpysal/io/iohandlers/mtx.py @@ -19,7 +19,7 @@ class MtxIO(fileio.FileIO): With the above assumptions, the structure of a MTX file containing a spatial weights matrix can be defined as follows: - + ``` %%MatrixMarket matrix coordinate real general <--- header 1 (constant) % Comments starts <--- @@ -30,7 +30,7 @@ class MtxIO(fileio.FileIO): ... | L entry lines IL JL A(IL,JL) <--- ``` - + In the MTX format, the index for rows or columns starts with 1. PySAL uses ``mtx`` tools in @@ -40,7 +40,7 @@ class MtxIO(fileio.FileIO): References ---------- - + `MTX format specification `_ `Matrix Market files @@ -58,7 +58,7 @@ def __init__(self, *args, **kwargs): def read(self, n=-1, sparse=False): """ - + Parameters ---------- n : int @@ -66,12 +66,12 @@ def read(self, n=-1, sparse=False): sparse : bool Flag for returning a sparse weights matrix (``True``). Default is ``False``. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ self._sparse = sparse @@ -85,17 +85,17 @@ def seek(self, pos): def _read(self): """Reads MatrixMarket ``.mtx`` file. - + Returns ------- w : {libpysal.weights.W, libpysal.weights.WSP} A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- @@ -143,7 +143,7 @@ def _read(self): 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]] - + """ if self.pos > 0: @@ -169,7 +169,7 @@ def write(self, obj): ---------- obj : libpysal.weights.W A PySAL `W` object. - + Raises ------ TypeError diff --git a/libpysal/io/iohandlers/pyDbfIO.py b/libpysal/io/iohandlers/pyDbfIO.py index d2b30143f..603a61f7f 100644 --- a/libpysal/io/iohandlers/pyDbfIO.py +++ b/libpysal/io/iohandlers/pyDbfIO.py @@ -14,14 +14,14 @@ class DBF(tables.DataTable): """PySAL DBF Reader/Writer. This DBF handler implements the PySAL DataTable interface and initializes an instance of the PySAL's DBF handler. - + Parameters ----------- dataPath : str Path to file, including file name and extension. mode : str Mode for file interaction; either ``'r'`` or ``'w'``. - + Attributes ---------- header : list @@ -41,7 +41,7 @@ class DBF(tables.DataTable): >>> dbf = libpysal.io.open(libpysal.examples.get_path('juvenile.dbf'), 'r') >>> dbf.header ['ID', 'X', 'Y'] - + >>> dbf.field_spec [('N', 9, 0), ('N', 9, 0), ('N', 9, 0)] @@ -115,12 +115,12 @@ def __init__(self, *args, **kwargs): def __len__(self) -> int: """ - + Raises ------ IOError Raised when a file is open ``'w'`` mode. - + """ if self.mode != "r": @@ -135,12 +135,12 @@ def seek(self, i): def _get_col(self, key: str) -> list: """Return the column vector. - + Raises ------ AttributeError Raised when a field does not exist in the header. - + """ if key not in self._col_index: @@ -246,12 +246,12 @@ def read_record(self, i: int) -> list: def _read(self) -> Union[list, None]: """ - + Raises ------ IOError Raised when a file is open ``'w'`` mode. - + """ if self.mode != "r": @@ -267,14 +267,14 @@ def _read(self) -> Union[list, None]: def write(self, obj: list): """ - + Raises ------ IOError Raised when a file is open ``'r'`` mode. TypeError Raised when a row length and header length are not equivalent. - + """ self._complain_ifclosed(self.closed) @@ -337,14 +337,14 @@ def close(self): def _firstWrite(self): """ - + Raises ------ IOError Raised when there is no specified header. IOError Raised when there is no field specification. - + """ if not self.header: diff --git a/libpysal/io/iohandlers/pyShpIO.py b/libpysal/io/iohandlers/pyShpIO.py index bbd3bb787..f13ed1cb8 100644 --- a/libpysal/io/iohandlers/pyShpIO.py +++ b/libpysal/io/iohandlers/pyShpIO.py @@ -37,7 +37,7 @@ class PurePyShpWrapper(fileio.FileIO): Notes ----- - + This class wraps ``_pyShpIO``'s ``shp_file`` class with the PySAL `FileIO` API. shp_file can be used without PySAL. @@ -48,25 +48,25 @@ class PurePyShpWrapper(fileio.FileIO): >>> f = tempfile.NamedTemporaryFile(suffix='.shp') >>> fname = f.name >>> f.close() - + >>> import libpysal >>> i = libpysal.io.open(libpysal.examples.get_path('10740.shp'),'r') >>> o = libpysal.io.open(fname,'w') - + >>> for shp in i: ... o.write(shp) >>> o.close() - + >>> one = libpysal.io.open(libpysal.examples.get_path('10740.shp'),'rb').read() >>> two = libpysal.io.open(fname,'rb').read() >>> one[0].centroid == two[0].centroid True - + >>> one = libpysal.io.open(libpysal.examples.get_path('10740.shx'),'rb').read() >>> two = libpysal.io.open(fname[:-1]+'x','rb').read() >>> one[0].centroid == two[0].centroid True - + >>> import os >>> os.remove(fname); os.remove(fname.replace('.shp','.shx')) diff --git a/libpysal/io/iohandlers/stata_txt.py b/libpysal/io/iohandlers/stata_txt.py index a6bc13784..bee013a23 100644 --- a/libpysal/io/iohandlers/stata_txt.py +++ b/libpysal/io/iohandlers/stata_txt.py @@ -26,7 +26,7 @@ class StataTextIO(fileio.FileIO): its export options in STATA. Structure 1: Encoding using the list of neighbor IDs. - + ``` [Line 1] [Number_of_Observations] [Line 2] [ID_of_Obs_1] [ID_of_Neighbor_1_of_Obs_1] [ID_of_Neighbor_2_of_Obs_1] ... [ID_of_Neighbor_m_of_Obs_1] @@ -34,11 +34,11 @@ class StataTextIO(fileio.FileIO): [Line 4] [ID_of_Obs_3] [ID_of_Neighbor_1_of_Obs_3] [ID_of_Neighbor_2_of_Obs_3] ... ``` - + Note that for island observations their IDs are still recorded. - + Structure 2: Encoding using a full matrix format. - + ``` [Line 1] [Number_of_Observations] [Line 2] [ID_of_Obs_1] [w_11] [w_12] ... [w_1n] @@ -47,20 +47,20 @@ class StataTextIO(fileio.FileIO): ... [Line n+1] [ID_of_Obs_n] [w_n1] [w_n2] ... [w_nn] ``` - + where :math:`w_{ij}` can be a form of general weight. That is, :math:`w_ij` can be both a binary value or a general numeric value. If an observation is an island, all of its ``w`` columns contain 0. References ---------- - + Drukker D.M., Peng H., Prucha I.R., and Raciborski R. (2011) "Creating and managing spatial-weighting matrices using the spmat command" Notes ----- - + The ``spmat`` command allows users to add any note to a spatial weights matrix object in STATA. However, all those notes are lost when the matrix is exported. PySAL also does not take care of those notes. @@ -77,17 +77,17 @@ def __init__(self, *args, **kwargs): def read(self, n=-1): """ - + Parameters ---------- n : int Read at most ``n`` objects. Default is ``-1``. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ self._complain_ifclosed(self.closed) @@ -101,17 +101,17 @@ def seek(self, pos): def _read(self): """Reads STATA Text file Returns a pysal.weights.weights.W object - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- @@ -198,12 +198,12 @@ def write(self, obj, matrix_form=False): A PySAL `W` object. matrix_form : bool Flag for matrix form (``True``). Default is ``False``. - + Raises ------ TypeError Raised when the input ``obj`` is not a PySAL `W`. - + Examples -------- @@ -246,7 +246,7 @@ def write(self, obj, matrix_form=False): Clean up the temporary file created for this example. >>> os.remove(fname) - + """ self._complain_ifclosed(self.closed) diff --git a/libpysal/io/iohandlers/template.py b/libpysal/io/iohandlers/template.py index 0ae9df384..c9664c79c 100644 --- a/libpysal/io/iohandlers/template.py +++ b/libpysal/io/iohandlers/template.py @@ -35,18 +35,18 @@ def __init__(self, *args, **kwargs): # Writers must subclass ``.write()`` def write(self, obj): - """ ``.write`` method of the 'foobar' template - + """``.write`` method of the 'foobar' template + Parameters ---------- obj : str Some string. - + Raises ------ TypeError Raised when a ``str`` is expected, but got another type. - + """ # GOOD TO HAVE, this will prevent invalid operations on closed files. @@ -111,17 +111,17 @@ def foobar(c): def _read(self): """The ``_read`` method should return only ONE object. - + Returns ------- obj_plus_break : str only ONE object. - + Raises ------ StopIteration Raised at the EOF. - + """ line = self.fileObj.readline() @@ -138,17 +138,17 @@ def _read(self): def write(self, obj): """The ``.write`` method of the 'foobar' template, receives an ``obj``. - + Paramters --------- obj : str Some string. - + Raises ------ TypeError Raised when a ``str`` is expected, but got another type. - + """ self._complain_ifclosed(self.closed) if issubclass(type(obj), str): diff --git a/libpysal/io/iohandlers/wk1.py b/libpysal/io/iohandlers/wk1.py index 25600a8cb..1dcb02e88 100644 --- a/libpysal/io/iohandlers/wk1.py +++ b/libpysal/io/iohandlers/wk1.py @@ -8,7 +8,7 @@ class Wk1IO(fileio.FileIO): """MATLAB ``wk1read.m`` and ``wk1write.m`` that were written by - Brian M. Bourgault in 10/22/93. Opens, reads, and writes weights + Brian M. Bourgault in 10/22/93. Opens, reads, and writes weights ile objects in Lotus Wk1 format. Lotus Wk1 files are used in Dr. LeSage's MATLAB Econometrics library. @@ -20,12 +20,12 @@ class Wk1IO(fileio.FileIO): is a blank or has a number. The internal structure of a `Wk1` file written by PySAL is as follows: - + ``` [BOF][DIM][CPI][CAL][CMODE][CORD][SPLIT][SYNC][CURS][WIN] [HCOL][MRG][LBL][CELL_1]...[CELL_m][EOF] ``` - + where ``[CELL_k]`` equals to ``[DTYPE][DLEN][DFORMAT][CINDEX][CVALUE]``. The parts between ``[BOF]`` and ``[CELL_1]`` are variable according to the software program used to write a ``.wk1`` file. While reading a @@ -153,17 +153,17 @@ def _get_varName(self) -> str: def read(self, n=-1): """ - + Parameters ---------- n : int Read at most ``n`` objects. Default is ``-1``. - + Returns ------- w : libpysal.weights.W A PySAL `W` object. - + """ self._complain_ifclosed(self.closed) @@ -181,14 +181,14 @@ def _read(self): ------- w : libpysal.weights.W A PySAL `W` object. - + Raises ------ StopIteration Raised at the EOF. ValueError Raised when the header of the file is invalid. - + Examples -------- @@ -261,14 +261,14 @@ def write(self, obj): ---------- obj : libpysal.weights.W A PySAL `W` object. - + Raises ------ ValueError Raised when the `WK1` file has more than 256 observations. TypeError Raised when the input ``obj`` is not a PySAL `W`. - + Examples -------- diff --git a/libpysal/io/iohandlers/wkt.py b/libpysal/io/iohandlers/wkt.py index 6eb05a921..afa5a593b 100644 --- a/libpysal/io/iohandlers/wkt.py +++ b/libpysal/io/iohandlers/wkt.py @@ -16,7 +16,7 @@ class WKTReader(fileio.FileIO): Examples -------- - + Read in WKT-formatted file. >>> import libpysal diff --git a/libpysal/io/tables.py b/libpysal/io/tables.py index 7aec65fb3..ea1e0530f 100644 --- a/libpysal/io/tables.py +++ b/libpysal/io/tables.py @@ -54,14 +54,14 @@ def by_col(self): def _get_col(self, key): """Returns the column vector. - + Raises ------ AttributeError Raised when the header is not set. AttributeError Raised when a field does not exist. - + """ if not self.header: @@ -108,7 +108,7 @@ def by_col_array(self, *args): [ 1.91515848, 3.4507747 ], [ 1.28864319, 3.26381409], [ 0. , 7.77000777]]) - + >>> hr = dbf.by_col_array(['HR80', 'HR70']) >>> hr[0:5] array([[ 8.85582713, 0. ], @@ -116,7 +116,7 @@ def by_col_array(self, *args): [ 3.4507747 , 1.91515848], [ 3.26381409, 1.28864319], [ 7.77000777, 0. ]]) - + >>> hr = dbf.by_col_array(['HR80']) >>> hr[0:5] array([[ 8.85582713], @@ -124,7 +124,7 @@ def by_col_array(self, *args): [ 3.4507747 ], [ 3.26381409], [ 7.77000777]]) - + Numpy only supports homogeneous arrays. See Notes above. >>> hr = dbf.by_col_array('STATE_NAME', 'HR80') @@ -142,7 +142,7 @@ def by_col_array(self, *args): ['Washington'], ['Washington'], ['Washington']], dtype='>> X[0:5] array([[ 8.85582713, 0. ], [17.20874204, 0. ], @@ -170,7 +170,7 @@ def by_col_array(self, *args): def __getitem__(self, key) -> list: """DataTables fully support slicing in 2D. To provide slicing, handlers must provide ``__len__``. Slicing accepts up to two arguments. For example, - + * ``table[row]`` * ``table[row, col]`` * ``table[row_start:row_stop]`` @@ -180,16 +180,16 @@ def __getitem__(self, key) -> list: * etc. ALL indices are Zero-Offsets. For example, - + * ``>>> assert index in range(0, len(table))`` - + Raises ------ TypeError Raised when two dimensions are not provided for slicing. TypeError Raised when an unknown key is present. - + """ prevPos = self.tell() @@ -232,7 +232,7 @@ def __getitem__(self, key) -> list: @requires("pandas") def to_df(self, n=-1, read_shp=False, **df_kws): """Convert a ``libpysal.DataTable`` to a ``pandas.DataFrame``. - + Parameters ---------- n : int @@ -241,7 +241,7 @@ def to_df(self, n=-1, read_shp=False, **df_kws): Read in from a shapefile (``True``). Default is ``False``. **df_kws : dict Optional keyword arguments to pass into ``pandas.DataFrame()``. - + Returns ------- df : pandas.DataFrame diff --git a/libpysal/io/util/shapefile.py b/libpysal/io/util/shapefile.py index c3bbe378b..15aee792e 100644 --- a/libpysal/io/util/shapefile.py +++ b/libpysal/io/util/shapefile.py @@ -179,7 +179,7 @@ def _unpackDict(structure, fileObj): def _unpackDict2(d, structure, fileObj): """Utility Function, used arrays instead from struct. - + Parameters ---------- d : dict @@ -189,12 +189,12 @@ def _unpackDict2(d, structure, fileObj): ``(('FieldName 1','type','byteOrder'),('FieldName 2','type','byteOrder'))``. fileObj : file An open file at the correct position. - + Returns ------- d : dict The updated dictionary. - + """ for name, dtype, order in structure: @@ -210,7 +210,7 @@ def _unpackDict2(d, structure, fileObj): def _packDict(structure, d) -> str: """Utility Function for packing a dictionary with byte strings. - + Parameters ---------- structure : tuple @@ -218,20 +218,20 @@ def _packDict(structure, d) -> str: ``(('FieldName 1','type','byteOrder'),('FieldName 2','type','byteOrder'))``. d : dict Dictionary in the form: ``{'FieldName 1': value, 'FieldName 2': value}``. - + Examples -------- - + >>> s = _packDict( ... (('FieldName 1', 'i', '<'), ('FieldName 2', 'i', '<')), ... {'FieldName 1': 1, 'FieldName 2': 2} ... ) >>> s == pack('>> unpack(' str: class shp_file: """Reads and writes the SHP compenent of a shapefile. - + Parameters ---------- filename : str @@ -260,17 +260,17 @@ class shp_file: ``'ARC'``, ``'ARCZ'``, ``'ARCM'``, ``'POLYGON'``, ``'POLYGONZ'``, ``'POLYGONM'``, ``'MULTIPOINT'``, ``'MULTIPOINTZ'``, ``'MULTIPOINTM'``, ``'MULTIPATCH'``. Default is ``None``. - + Attributes ---------- header : dict Contents of the SHP header. For contents see ``HEADERSTRUCT``. shape : int See ``SHAPE_TYPES`` and ``TYPE_DISPATCH``. - + Examples -------- - + >>> import libpysal >>> shp = shp_file(libpysal.examples.get_path('10740.shp')) >>> shp.header == { @@ -293,15 +293,15 @@ class shp_file: ... 'File Length': 260534 ... } True - + >>> len(shp) 195 - + Notes ----- - + The header of both the SHP and SHX files are indentical. - + """ SHAPE_TYPES = { @@ -322,12 +322,12 @@ class shp_file: def __iswritable(self) -> bool: """ - + Raises ------ IOError Raised when a bad file name is passed in. - + """ try: @@ -338,12 +338,12 @@ def __iswritable(self) -> bool: def __isreadable(self) -> bool: """ - + Raises ------ IOError Raised when a bad file name is passed in. - + """ try: @@ -354,14 +354,14 @@ def __isreadable(self) -> bool: def __init__(self, fileName, mode="r", shape_type=None): """ - + Raises ------ Exception Raised when an invalid shape type is passed in. Exception Raised when an invalid mode is passed in. - + """ self.__mode = mode @@ -405,10 +405,10 @@ def _open_shp_file(self): def _create_shp_file(self, shape_type: str): """Creates a shp/shx file. - + Examples -------- - + >>> import libpysal, os >>> shp = shp_file('test', 'w', 'POINT') >>> p = shp_file(libpysal.examples.get_path('Point.shp')) @@ -419,15 +419,15 @@ def _create_shp_file(self, shape_type: str): ... libpysal.examples.get_path('Point.shp'), 'rb' ... ).read() True - + >>> open('test.shx', 'rb').read() == open( ... libpysal.examples.get_path('Point.shx'), 'rb' ... ).read() True - + >>> os.remove('test.shx') >>> os.remove('test.shp') - + """ self.__iswritable() @@ -467,15 +467,15 @@ def type(self) -> str: def __next__(self) -> int: """Returns the next shape in the shapefile. - + Raises ------ StopIteration Raised at the EOF. - + Examples -------- - + >>> import libpysal >>> list(shp_file(libpysal.examples.get_path('Point.shp'))) == [ ... { @@ -525,7 +525,7 @@ def __next__(self) -> int: ... } ... ] True - + """ self.__isreadable() @@ -612,7 +612,7 @@ def close(self): class shx_file: """Reads and writes the SHX compenent of a shapefile. - + Parameters ---------- filename : str @@ -621,17 +621,17 @@ class shx_file: ``'.shx'``, ``'.shp'`` and append ``'.shx'``. mode : str The mode for file interaction. Must be ``'r'`` (read). - + Attributes ---------- index : list Contains the file offset and length of each recond in the SHP component. numRecords : int The number of records. - + Examples -------- - + >>> import libpysal >>> shx = shx_file(libpysal.examples.get_path('10740.shx')) >>> shx._header == { @@ -654,24 +654,24 @@ class shx_file: ... 'File Length': 830 ... } True - + >>> len(shx.index) 195 - + >>> shx = shx_file(libpysal.examples.get_path('Point.shx')) >>> isinstance(shx, shx_file) True - + """ def __iswritable(self) -> bool: """ - + Raises ------ IOError Raised when a bad file name is passed in. - + """ try: @@ -682,12 +682,12 @@ def __iswritable(self) -> bool: def __isreadable(self) -> bool: """ - + Raises ------ IOError Raised when a bad file name is passed in. - + """ try: @@ -739,27 +739,27 @@ def _create_shx_file(self): def add_record(self, size: int): """Add a record to the shx index. - + Parameters ---------- size : int The length of the record in bytes NOT including the 8-byte record header. - + Returns ------- rec_id : int The sequential record ID, 1-based. pos : int See ``self.__offset`` in ``_create_shx_file``. - + Notes ----- - + The SHX records contain (Offset, Length) in 16-bit words. - + Examples -------- - + >>> import libpysal, os >>> shx = shx_file(libpysal.examples.get_path('Point.shx')) >>> shx.index @@ -772,7 +772,7 @@ def add_record(self, size: int): (268, 20), (296, 20), (324, 20)] - + >>> shx2 = shx_file('test', 'w') >>> [shx2.add_record(rec[1]) for rec in shx.index] [(1, 100), @@ -784,18 +784,18 @@ def add_record(self, size: int): (7, 268), (8, 296), (9, 324)] - + >>> shx2.index == shx.index True - + >>> shx2.close(shx._header) >>> open('test.shx', 'rb').read() == open( ... libpysal.examples.get_path('Point.shx'), 'rb' ... ).read() True - + >>> os.remove('test.shx') - + """ self.__iswritable() @@ -838,22 +838,22 @@ def pack(self, x=None) -> str: class Point(object): """Packs and unpacks a shapefile Point type. - + Examples -------- - + >>> import libpysal >>> shp = shp_file(libpysal.examples.get_path('Point.shp')) >>> rec = shp.get_shape(0) >>> rec == {'Y': -0.25904661905760773, 'X': -0.00068176617532103578, 'Shape Type': 1} True - + >>> # +8 byte record header >>> pos = shp.fileObj.seek(shp._shx.index[0][0] + 8) >>> dat = shp.fileObj.read(shp._shx.index[0][1]) >>> dat == Point.pack(rec) True - + """ Shape_Type = 1 @@ -906,10 +906,10 @@ class PointZ(Point): class PolyLine: """Packs and unpacks a shapefile PolyLine type. - + Examples -------- - + >>> import libpysal >>> shp = shp_file(libpysal.examples.get_path('Line.shp')) >>> rec = shp.get_shape(0) @@ -929,13 +929,13 @@ class PolyLine: ... 'Parts Index': [0] ... } True - + >>> # +8 byte record header >>> pos = shp.fileObj.seek(shp._shx.index[0][0] + 8) >>> dat = shp.fileObj.read(shp._shx.index[0][1]) >>> dat == PolyLine.pack(rec) True - + """ HASZ = False @@ -970,12 +970,12 @@ class PolyLine: @classmethod def unpack(cls, dat) -> dict: """ - + Parameters ---------- dat : file An open file at the correct position. - + """ record = _unpackDict(cls.USTRUCT, dat) @@ -1055,12 +1055,12 @@ class PolyLineZ(object): @classmethod def unpack(cls, dat) -> dict: """ - + Parameters ---------- dat : file An open file at the correct position. - + """ record = _unpackDict(cls.USTRUCT, dat) @@ -1117,10 +1117,10 @@ def pack(cls, record: dict) -> str: class Polygon(PolyLine): """Packs and unpacks a shapefile Polygon type identical to PolyLine. - + Examples -------- - + >>> import libpysal >>> shp = shp_file(libpysal.examples.get_path('Polygon.shp')) >>> rec = shp.get_shape(1) @@ -1144,13 +1144,13 @@ class Polygon(PolyLine): ... 'Parts Index': [0] ... } True - + >>> # +8 byte record header >>> pos = shp.fileObj.seek(shp._shx.index[1][0] + 8) >>> dat = shp.fileObj.read(shp._shx.index[1][1]) >>> dat == Polygon.pack(rec) True - + """ String_Type = "POLYGON" diff --git a/libpysal/io/util/tests/test_shapefile.py b/libpysal/io/util/tests/test_shapefile.py index 4bcf00934..3787d62fb 100644 --- a/libpysal/io/util/tests/test_shapefile.py +++ b/libpysal/io/util/tests/test_shapefile.py @@ -28,8 +28,7 @@ def bufferIO(buf): - """Temp stringIO function to force compat. - """ + """Temp stringIO function to force compat.""" return io.BytesIO(buf) diff --git a/libpysal/io/util/weight_converter.py b/libpysal/io/util/weight_converter.py index d1b9093c0..74b44bd24 100644 --- a/libpysal/io/util/weight_converter.py +++ b/libpysal/io/util/weight_converter.py @@ -35,14 +35,14 @@ def __init__(self, inputPath, dataFormat=None): def _setW(self): """Reads a weights file and sets a ``pysal.weights.W`` object as an attribute. - + Raises ------ IOError Raised when there is a problem reading in the file. RuntimeError Raised when there is a problem creating the weights object. - + Examples -------- @@ -89,7 +89,7 @@ def w_set(self) -> bool: def write(self, outputPath, dataFormat=None, useIdIndex=True, matrix_form=True): """ - + Parameters ---------- outputPath : str @@ -115,7 +115,7 @@ def write(self, outputPath, dataFormat=None, useIdIndex=True, matrix_form=True): Examples -------- - + >>> import tempfile, os, libpysal Create a `WeightConverter` object. @@ -226,7 +226,7 @@ def weight_convert( Examples -------- - + >>> import tempfile, os, libpysal Create a temporary file for this example. diff --git a/libpysal/io/util/wkb.py b/libpysal/io/util/wkb.py index 99a8db3d5..e704c9790 100644 --- a/libpysal/io/util/wkb.py +++ b/libpysal/io/util/wkb.py @@ -81,17 +81,17 @@ def loads(s: str): WKBMultiPolygon mpolygon; } }; - + Returns ------- geom : {None, libpysal.cg.{Point, Chain, Polygon}} The geometric object or ``None``. - + Raises ------ TypeError Raised when an unsupported shape type is passed in. - + """ # To allow recursive calls, read only the bytes we need. diff --git a/libpysal/io/util/wkt.py b/libpysal/io/util/wkt.py index 152f65bbf..194ca30ab 100644 --- a/libpysal/io/util/wkt.py +++ b/libpysal/io/util/wkt.py @@ -13,7 +13,7 @@ class WKTParser: Examples -------- - + >>> import libpysal Create some Well-Known Text objects. @@ -31,10 +31,10 @@ class WKTParser: >>> parser(p).parts [[(1.0, 1.0), (1.0, 5.0), (5.0, 5.0), (5.0, 1.0), (1.0, 1.0)], [(2.0, 2.0), (2.0, 3.0), (3.0, 3.0), (3.0, 2.0), (2.0, 2.0)]] - + >>> parser(p).centroid (2.9705882352941178, 2.9705882352941178) - + >>> parser(p).area 17.0 @@ -47,7 +47,7 @@ class WKTParser: >>> parser(l).len 73.45538453219989 - + >>> parser(l).parts [[(3.0, 4.0), (10.0, 50.0), (20.0, 25.0)]] @@ -56,7 +56,7 @@ class WKTParser: >>> f = libpysal.io.open(libpysal.examples.get_path('stl_hom.wkt')) >>> f.mode 'r' - + >>> f.header [] @@ -100,12 +100,12 @@ def Polygon(self, geoStr): def fromWKT(self, wkt): """Returns geometric representation from WKT or ``None``. - + Raises ------ NotImplementedError Raised when a unknown/unsupported format is passed in. - + """ matches = self.regExes["typeStr"].match(wkt) diff --git a/libpysal/test_NameSpace.py b/libpysal/test_NameSpace.py index 2f827ae6f..65240893a 100644 --- a/libpysal/test_NameSpace.py +++ b/libpysal/test_NameSpace.py @@ -5,9 +5,9 @@ @unittest.skip("Skipping unittest for namespace") class TestNameSpace(unittest.TestCase): """ - This test makes sure we don't remove anything from the pysal NameSpace that - 1.0 users might expect to be there. 1.0 Namespace was taken from the 1.1 - Code sprint wave, with special names removes (__all__, etc) + This test makes sure we don't remove anything from the pysal NameSpace that + 1.0 users might expect to be there. 1.0 Namespace was taken from the 1.1 + Code sprint wave, with special names removes (__all__, etc). """ def test_contents(self): diff --git a/libpysal/weights/_contW_lists.py b/libpysal/weights/_contW_lists.py index 6f71c4cfe..d5396c17d 100644 --- a/libpysal/weights/_contW_lists.py +++ b/libpysal/weights/_contW_lists.py @@ -15,12 +15,12 @@ def _get_verts(shape): return _get_boundary_points(shape) -def _get_boundary_points(shape): +def _get_boundary_points(shape) -> list: + """Recursively handle polygons vs. multipolygons + to extract the boundary point set from each. """ - Recursively handle polygons vs. multipolygons to - extract the boundary point set from each. - """ - if shape.geom_type.lower() == "polygon": + + if shape.type.lower() == "polygon": shape = shape.boundary return _get_boundary_points(shape) elif shape.geom_type.lower() == "linestring": @@ -39,27 +39,27 @@ def _get_boundary_points(shape): class ContiguityWeightsLists: - """ - Contiguity for a collection of polygons using high performance - list, set, and dict containers + """Contiguity for a collection of polygons using high + performance ``list``, ``set``, and ``dict`` containers. + + Parameters + ---------- + collection: PySAL PolygonCollection + A collection of polygons. + wttype: int + Set to ``1`` for Queen contiguity or set to ``2`` for Rook contiguity. + Default is ``1``. + """ def __init__(self, collection, wttype=1): - """ - Parameters - ---------- - - collection: PySAL PolygonCollection - wttype: int - 1: Queen - 2: Rook - """ self.collection = list(collection) self.wttype = wttype self.jcontiguity() def jcontiguity(self): + numPoly = len(self.collection) w = {} @@ -120,4 +120,5 @@ def jcontiguity(self): pass else: raise Exception(f"Weight type {self.wttype} Not Understood!") + self.w = w diff --git a/libpysal/weights/adjtools.py b/libpysal/weights/adjtools.py index 1f5e07f63..3db94ce4e 100644 --- a/libpysal/weights/adjtools.py +++ b/libpysal/weights/adjtools.py @@ -9,46 +9,62 @@ def adjlist_apply( skip_verify=False, to_adjlist_kws=dict(drop_islands=None), ): - """ - apply a function to an adajcency list, getting an adjacency list and result. + """Apply a function to an adajcency list, getting an adjacency list and result. Parameters ---------- - X : iterable - an (N,P)-length iterable to apply ``func'' to. If (N,1), then `func` - must take 2 arguments and return a single reduction. If P>1, then - func must take two P-length arrays and return a single - reduction of them. - W : pysal.weights.W object - a weights object that provides adjacency information - alist : pandas DataFrame - a table containing an adajacency list representation of a W matrix - func : callable - a function taking two arguments and returning a single argument. - This will be evaluated for every (focal, neighbor) pair, or each - row of the adjacency list. If `X` has more than one column, this - function should take two arrays and provide a single scalar in - return. - Example scalars include: lambda x,y: x < y, np.subtract - Example multivariates: lambda (x,y): np.all(x < y)'' - lambda (x,y): np.sum((x-y)**2) - sklearn.metrics.euclidean_distance + + X : iterable + An :math:`(N,P)`-length iterable to apply ``func`` to. If :math:`(N,1)`, + then ``func`` must take 2 arguments and return a single reduction. + If :math:`P`>1`, then ``func`` must take two :math:`P`-length arrays + and return a single reduction of them. + W : libpysal.weights.W + A weights object that provides adjacency information. Default is ``None``. + alist : pandas.DataFrame + A table containing an adajacency list representation of a `W` matrix. + Default is ``None``. + func : callable + A function taking two arguments and returning a single argument. This will + be evaluated for every (focal, neighbor) pair, or each row of the adjacency + list. If ``X`` has more than one column, this function should take two arrays + and provide a single scalar in return. Default is ``np.subtract``. + Example scalars include: + ``lambda x,y: x < y, np.subtract`` + Example multivariates: + ``lambda (x,y): np.all(x < y)`` + ``lambda (x,y): np.sum((x-y)**2)`` + ``sklearn.metrics.euclidean_distance`` skip_verify: bool - Whether or not to skip verifying that the W is the same as an adjacency list. - Do this if you are certain the adjacency list and W agree and would like to - avoid re-instantiating a W from the adjacency list. + Whether or not to skip verifying that the `W` is the same as an adjacency + list. Do this if you are certain the adjacency list and `W` agree and + would like to avoid re-instantiating a `W` from the adjacency list. + Default is ``False``. to_adjlist_kws : dict Keyword arguments for ``W.to_adjlist()``. Default is ``dict(drop_islands=None)``. Returns ------- - an adjacency list (or modifies alist inplace) with the function applied to each row. + + alist_atts : list + An adjacency list (or modifies ``alist`` inplace) + with the function applied to each row. + + Raises + ------ + + ImportError + Pandas must be installed to use this function. + """ + try: import pandas as pd except ImportError: - raise ImportError("pandas must be installed to use this function") + raise ImportError("Pandas must be installed to use this function.") + W, alist = _get_W_and_alist(W, alist, to_adjlist_kws, skip_verify=skip_verify) + if len(X.shape) > 1: if X.shape[-1] > 1: return _adjlist_mvapply( @@ -61,6 +77,7 @@ def adjlist_apply( ) else: vec = np.asarray(X).flatten() + ids = np.asarray(W.id_order)[:, None] table = pd.DataFrame(ids, columns=["id"]) table = pd.concat((table, pd.DataFrame(vec[:, None], columns=("att",))), axis=1) @@ -77,23 +94,35 @@ def adjlist_apply( alist_atts[func.__name__] = alist_atts[["att_focal", "att_neighbor"]].apply( lambda x: func(x.att_focal, x.att_neighbor), axis=1 ) + return alist_atts def _adjlist_mvapply( X, W=None, alist=None, func=None, skip_verify=False, to_adjlist_kws=dict() ): + """This function is used when ``X`` is multi-dimensional. See + ``libpysal.weights.adjtools.adjlist_apply()`` for + Parameters, Returns, and Raises information. + + """ + try: import pandas as pd except ImportError: - raise ImportError("pandas must be installed to use this function") - assert len(X.shape) == 2, "data is not two-dimensional" + raise ImportError("Pandas must be installed to use this function.") + + assert len(X.shape) == 2, "Data is not two-dimensional." + W, alist = _get_W_and_alist(W, alist, to_adjlist_kws, skip_verify=skip_verify) - assert X.shape[0] == W.n, "number of samples in X does not match W" + + assert X.shape[0] == W.n, "The number of samples in X does not match W." + try: names = X.columns.tolist() except AttributeError: names = list(map(str, list(range(X.shape[1])))) + ids = np.asarray(W.id_order)[:, None] table = pd.DataFrame(ids, columns=["id"]) table = pd.concat((table, pd.DataFrame(X, columns=names)), axis=1) @@ -106,6 +135,7 @@ def _adjlist_mvapply( right_on="id", suffixes=("_focal", "_neighbor"), ) + alist_atts.drop(["id_focal", "id_neighbor"], axis=1, inplace=True) alist_atts[func.__name__] = list( map( @@ -118,32 +148,44 @@ def _adjlist_mvapply( ), ) ) + return alist_atts def _get_W_and_alist(W, alist, to_adjlist_kws, skip_verify=False): + """Either (1) compute a `W` from an ``alist``; (2) compute an adjacency list + from a `W`; (3) raise a ``ValueError`` if neither are provided; or (4) raise an + ``AssertionError`` if both `W` and ``adjlist`` are provided and don't match. + If this completes successfully, the `W` and ``adjlist`` will both be returned and + are checked for equality. See ``libpysal.weights.adjtools.adjlist_apply()`` + for parameters and returns information. + + Raises + ------ + + ValueError + Either W or Adjacency List must be provided. + """ - Either: - 1. compute a W from an alist - 2. adjacencylist from a W - 3. raise ValueError if neither are provided, - 4. raise AssertionError if both W and adjlist are provided and don't match. - If this completes successfully, the W/adjlist will both be returned and are checked for equality. - """ + if (alist is None) and (W is not None): alist = W.to_adjlist(**to_adjlist_kws) + elif (W is None) and (alist is not None): from .weights import W W = W.from_adjlist(alist, **to_adjlist_kws) + elif (W is None) and (alist is None): - raise ValueError("Either W or Adjacency List must be provided") + raise ValueError("Either W or Adjacency List must be provided.") + elif (W is not None) and (alist is not None) and (not skip_verify): from .weights import W as W_ np.testing.assert_allclose( W.sparse.toarray(), W_.from_adjlist(alist).sparse.toarray() ) + return W, alist @@ -156,56 +198,79 @@ def adjlist_map( neighbor_col="neighbor", to_adjlist_kws=dict(drop_islands=None), ): - """ - Map a set of functions over a W or adjacency list + """Map a set of functions over a `W` or an adjacency list. Parameters ---------- - data : np.ndarray or pandas dataframe - N x P array of N observations and P covariates. - funcs : iterable or callable - a function to apply to each of the P columns in ``data'', or a list of functions - to apply to each column of P. This function must take two arguments, compare them, - and return a value. Examples may be ``lambda x,y: x < y'' or ``np.subtract''. - W : pysal.weights.W object - a pysal weights object. If not provided, one is constructed from - the given adjacency list. - alist : pandas dataframe - an adjacency list representation of a weights matrix. If not provided, - one is constructed from the weights object. If both are provided, - they are validated against one another to ensure they provide identical weights - matrices. - focal_col : string - name of column in alist containing the focal observation ids - neighbor_col: string - name of column in alist containing the neighboring observation ids + + data : {numpy.ndarray, pandas.Dataframe} + `N x P` array of `N` observations and `P` covariates. + funcs : iterable or callable + A function to apply to each of the `P` columns in ``data``, or a + list of functions to apply to each column of `P`. This function + must take two arguments, compare them, and return a value. Examples + may be ``lambda x,y: x < y`` or ``np.subtract``. + Default is ``(np.subtract,)``. + W : libpysal.weights.W + A PySAL weights object. If not provided, one is + constructed from the given adjacency list. Default is ``None``. + alist : pandas.Dataframe + An adjacency list representation of a weights matrix. If not + provided, one is constructed from the weights object. If both are + provided, they are validated against one another to ensure they + provide identical weights matrices. Default is ``None``. + focal_col : str + The name of column in ``alist`` containing the focal observation ids. + Default is ``'focal'``. + neighbor_col : str + The name of column in ``alist`` containing the neighboring observation ids. + Default is ``'neighbor'``. to_adjlist_kws : dict Keyword arguments for ``W.to_adjlist()``. Default is ``dict(drop_islands=None)``. Returns ------- - returns an adjacency list (or modifies one if provided) with each function applied to the column - of the data. + + alist : list + An adjacency list (or modifies one if provided) with each function + applied to the column of the data. + + Raises + ------ + + ImportError + Pandas must be installed to use this function. + """ + try: import pandas as pd except ImportError: - raise ImportError("pandas must be installed to use this function") + raise ImportError("Pandas must be installed to use this function.") + if isinstance(data, pd.DataFrame): names = data.columns data = data.values else: names = [str(i) for i in range(data.shape[1])] - assert data.shape[0] == W.n, "shape of data does not match shape of adjacency" + + assert ( + data.shape[0] == W.n + ), "The shape of 'data' does not match the shape of 'adjacency'." + if callable(funcs): funcs = (funcs,) + if len(funcs) == 1: funcs = [funcs[0] for _ in range(data.shape[1])] + assert data.shape[1] == len( funcs - ), "shape of data does not match the number of functions provided" + ), "The shape of 'data' does not match the number of functions provided." W, alist = _get_W_and_alist(W, alist, to_adjlist_kws) + fnames = set([f.__name__ for f in funcs]) + for i, (column, function) in enumerate(zip(data.T, funcs)): alist = adjlist_apply( column, W=W, alist=alist, skip_verify=True, to_adjlist_kws=to_adjlist_kws @@ -215,31 +280,38 @@ def adjlist_map( columns={function.__name__: "_".join((function.__name__, names[i]))} ) fnames.update((function.__name__,)) + return alist def filter_adjlist(adjlist, focal_col="focal", neighbor_col="neighbor"): - """ - This dedupes an adjacency list by examining both (a,b) and (b,a) when (a,b) is enountered. - The removal is done in order of the iteration order of the input adjacency list. So, if a - special order of removal is desired, you need to sort the list before this function. + """This de-duplicates an adjacency list by examining both `(a,b)` and `(b,a)` + when `(a,b)` is encountered. The removal is done in order of the iteration + order of the input adjacency list. So, if a special order of removal is + desired, you need to sort the list before this function. Parameters ---------- - adjlist : pandas DataFrame - a dataframe that contains focal and neighbor columns - focal_col : string - the name of the column with the focal observation id - neighbor_col: string - the name of the column with the neighbor observation id + adjlist : pandas.DataFrame + A dataframe that contains focal and neighbor columns. + focal_col : str + The name of the column with the focal observation id. Default is ``'focal'``. + neighbor_col : str + The name of the column with the neighbor observation id. + Default is ``'neighbor'``. Returns ------- - an adjacency table with reversible entries removed. + + adjlist : pandas.DataFrame + An adjacency table with reversible entries removed. + """ + edges = adjlist.loc[:, [focal_col, neighbor_col]] undirected = set() to_remove = [] + for index, *edge in edges.itertuples(name=None): edge = tuple(edge) if edge in undirected or edge[::-1] in undirected: @@ -248,4 +320,5 @@ def filter_adjlist(adjlist, focal_col="focal", neighbor_col="neighbor"): undirected.add(edge) undirected.add(edge[::-1]) adjlist = adjlist.drop(to_remove) + return adjlist diff --git a/libpysal/weights/contiguity.py b/libpysal/weights/contiguity.py index 2e009f69c..45f62ee45 100644 --- a/libpysal/weights/contiguity.py +++ b/libpysal/weights/contiguity.py @@ -20,68 +20,87 @@ WT_TYPE = {"rook": 2, "queen": 1} # for _contW_Binning -__author__ = "Sergio J. Rey , Levi John Wolf " +__author__ = ( + "Sergio J. Rey , Levi John Wolf " +) __all__ = ["Rook", "Queen", "Voronoi"] class Rook(W): - """ - Construct a weights object from a collection of pysal polygons that share at least one edge. + """Construct a weights object from a collection of + PySAL polygons that share at least one edge. Parameters ---------- - polygons : list - a collection of PySAL shapes to build weights from - ids : list - a list of names to use to build the weights - **kw : keyword arguments - optional arguments for :class:`pysal.weights.W` + + polygons : list + A collection of PySAL shapes from which to build weights. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. The parameter ``ids``, + a list of names to use to build the weights, should be included here. See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ - def __init__(self, polygons, **kw): + def __init__(self, polygons, **kwargs): + criterion = "rook" - ids = kw.pop("ids", None) + ids = kwargs.pop("ids", None) polygons, backup = itertools.tee(polygons) first_shape = next(iter(backup)) + if isinstance(first_shape, point_type): polygons, vertices = voronoi_frames(get_points_array(polygons)) polygons = list(polygons.geometry) + neighbors, ids = _build(polygons, criterion=criterion, ids=ids) - W.__init__(self, neighbors, ids=ids, **kw) + + W.__init__(self, neighbors, ids=ids, **kwargs) @classmethod def from_shapefile(cls, filepath, idVariable=None, full=False, **kwargs): - """ - Rook contiguity weights from a polygon shapefile. + """`Rook` contiguity weights from a polygon shapefile. Parameters ---------- - shapefile : string - name of polygon shapefile including suffix. - sparse : boolean - If True return WSP instance - If False return W instance + filepath : str + The name of polygon shapefile including the file extension. + idVariable : str + The name of the attribute in the shapefile to associate + with ids in the weights. Default is ``None``. + full : bool + Write out the entire path for a shapefile (``True``) or + only the base of the shapefile without extension (``False``). + Default is ``False``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Rook``. ``'sparse'`` + should be included here. If ``True`` return `WSP` instance. + If ``False`` return `W` instance. Returns ------- - w : W - instance of spatial weights + w : libpysal.weights.Rook + A rook-style instance of spatial weights. Examples -------- + >>> from libpysal.weights import Rook >>> import libpysal - >>> wr=Rook.from_shapefile(libpysal.examples.get_path("columbus.shp"), "POLYID") + >>> wr = Rook.from_shapefile(libpysal.examples.get_path("columbus.shp"), "POLYID") >>> "%.3f"%wr.pct_nonzero '8.330' - >>> wr=Rook.from_shapefile(libpysal.examples.get_path("columbus.shp"), sparse=True) + + >>> wr = Rook.from_shapefile( + ... libpysal.examples.get_path("columbus.shp"), sparse=True + ... ) >>> pct_sp = wr.sparse.nnz *1. / wr.n**2 >>> "%.3f"%pct_sp '0.083' @@ -89,47 +108,68 @@ def from_shapefile(cls, filepath, idVariable=None, full=False, **kwargs): Notes ----- - Rook contiguity defines as neighbors any pair of polygons that share a - common edge in their polygon definitions. + `Rook` contiguity defines as neighbors any pair of polygons + that share a common edge in their polygon definitions. See Also -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Rook` + + libpysal.weights.W + libpysal.weights.Rook + """ sparse = kwargs.pop("sparse", False) + if idVariable is not None: ids = get_ids(filepath, idVariable) else: ids = None + w = cls(FileIO(filepath), ids=ids, **kwargs) w.set_shapefile(filepath, idVariable=idVariable, full=full) + if sparse: w = w.to_WSP() + return w @classmethod def from_iterable(cls, iterable, sparse=False, **kwargs): - """ - Construct a weights object from a collection of arbitrary polygons. This - will cast the polygons to PySAL polygons, then build the W. + """Construct a weights object from a collection of arbitrary polygons. + This will cast the polygons to PySAL polygons, then build the `W`. Parameters ---------- - iterable : iterable - a collection of of shapes to be cast to PySAL shapes. Must - support iteration. Can be either Shapely or PySAL shapes. - **kw : keyword arguments - optional arguments for :class:`pysal.weights.W` + + iterable : iterable + A collection of of shapes to be cast to PySAL shapes. Must + support iteration. Can be either Shapely or PySAL shapes. + sparse : bool + Generate a `WSP` object. Default is ``False``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Rook``. + + Returns + ------- + + w : libpysal.weights.Rook + A rook-style instance of spatial weights. + See Also -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Rook` + + libpysal.weights.W + libpysal.weights.WSP + libpysal.weights.Rook + """ + new_iterable = iter(iterable) w = cls(new_iterable, **kwargs) + if sparse: w = WSP.from_W(w) + return w @classmethod @@ -143,44 +183,47 @@ def from_dataframe( use_index=None, **kwargs, ): - """ - Construct a weights object from a (geo)pandas dataframe with a geometry - column. This will cast the polygons to PySAL polygons, then build the W + """Construct a weights object from a ``pandas.DataFrame`` with a geometry + column. This will cast the polygons to PySAL polygons, then build the `W` using ids from the dataframe. Parameters ---------- - df : DataFrame - a :class: `pandas.DataFrame` containing geometries to use - for spatial weights - geom_col : string - the name of the column in `df` that contains the - geometries. Defaults to active geometry column. - idVariable : string - DEPRECATED - use `ids` instead. - the name of the column to use as IDs. If nothing is - provided, the dataframe index is used - ids : list-like, string - a list-like of ids to use to index the spatial weights object or - the name of the column to use as IDs. If nothing is - provided, the dataframe index is used if `use_index=True` or - a positional index is used if `use_index=False`. - Order of the resulting W is not respected from this list. - id_order : list - DEPRECATED - argument is deprecated and will be removed. - An ordered list of ids to use to index the spatial weights - object. If used, the resulting weights object will iterate - over results in the order of the names provided in this - argument. - use_index : bool - use index of `df` as `ids` to index the spatial weights object. - Defaults to False but in future will default to True. + + df : pandas.DataFrame + A ``pandas.DataFrame`` containing geometries to use for spatial weights. + geom_col : {None, str} + The name of the column in ``df`` that contains the + geometries. Defaults to the active geometry column. + idVariable : str + The name of the column to use as IDs. If nothing is provided, the + dataframe index is used. Default is ``None``. + ids : list + A list of ids to use to index the spatial weights object. + Order is not respected from this list. Default is ``None``. + id_order : list + An ordered list of ids to use to index the spatial weights object. If + used, the resulting weights object will iterate over results in the + order of the names provided in this argument. Default is ``None``. + use_index : bool + Use the index of ``df`` as ``ids`` to index the spatial weights object. + Defaults to ``False`` but in future will default to ``True``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Rook``. + + Returns + ------- + w : w : libpysal.weights.Rook + A rook-style instance of spatial weights. See Also -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Rook` + + libpysal.weights.W + libpysal.weights.Rook + """ + if geom_col is None: geom_col = df.geometry.name @@ -191,8 +234,8 @@ def from_dataframe( stacklevel=2, ) if id_order is True and ((idVariable is not None) or (ids is not None)): - # if idVariable is None, we want ids. Otherwise, we want the - # idVariable column + # if ``idVariable`` is ``None``, we want ids. + # Otherwise, we want the ``idVariable`` column. id_order = list(df.get(idVariable, ids)) else: id_order = df.get(id_order, ids) @@ -243,6 +286,8 @@ def from_dataframe( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs ) + return w + @classmethod def from_xarray( cls, @@ -255,18 +300,18 @@ def from_xarray( sparse=True, **kwargs, ): - """ - Construct a weights object from a xarray.DataArray with an additional + """Construct a weights object from a ``xarray.DataArray`` with an additional attribute index containing coordinate values of the raster - in the form of Pandas.Index/MultiIndex. + in the form of ``Pandas.Index``/``MultiIndex``. Parameters ---------- + da : xarray.DataArray - Input 2D or 3D DataArray with shape=(z, y, x) - z_value : int/string/float + Input 2D or 3D DataArray with shape=(z, y, x). + z_value : {int, str, float} Select the z_value of 3D DataArray with multiple layers. - coords_labels : dictionary + coords_labels : dict Pass dimension labels for coordinates and layers if they do not belong to default dimensions, which are (band/time, y/lat, x/lon) e.g. coords_labels = {"y_label": "latitude", "x_label": "longitude", "z_label": "year"} @@ -283,24 +328,29 @@ def from_xarray( Number of cores to be used in the sparse weight construction. If -1, all available cores are used. Default is 1. **kwargs : keyword arguments - optional arguments passed when sparse = False + Optional arguments passed when ``sparse=False``. Returns ------- - w : libpysal.weights.W/libpysal.weights.WSP - instance of spatial weights class W or WSP with an index attribute + + w : {libpysal.weights.W, libpysal.weights.WSP} + An instance of spatial weights class `W` or `WSP` with an index attribute. + + See Also + -------- + + libpysal.weights.weights.W + libpysal.weights.weights.WSP Notes ----- + 1. Lower order contiguities are also selected. 2. Returned object contains `index` attribute that includes a `Pandas.MultiIndex` object from the DataArray. - See Also - -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.weights.WSP` """ + if sparse: w = da2WSP(da, "rook", z_value, coords_labels, k, include_nodata) else: @@ -309,113 +359,153 @@ def from_xarray( class Queen(W): - """ - Construct a weights object from a collection of pysal polygons that share at least one vertex. + """Construct a weights object from a collection of PySAL + polygons that share at least one vertex. Parameters ---------- - polygons : list - a collection of PySAL shapes to build weights from - ids : list - a list of names to use to build the weights - **kw : keyword arguments - optional arguments for :class:`pysal.weights.W` + polygons : list + A collection of PySAL shapes from which to build weights. + **kwargs : dict + Keyword arguments for ``pysal.weights.W``. The parameter ``ids``, + a list of names to use to build the weights, should be included here. See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ - def __init__(self, polygons, **kw): + def __init__(self, polygons, **kwargs): + criterion = "queen" - ids = kw.pop("ids", None) + ids = kwargs.pop("ids", None) polygons, backup = itertools.tee(polygons) first_shape = next(iter(backup)) + if isinstance(first_shape, point_type): polygons, vertices = voronoi_frames(get_points_array(polygons)) polygons = list(polygons.geometry) + neighbors, ids = _build(polygons, criterion=criterion, ids=ids) - W.__init__(self, neighbors, ids=ids, **kw) + + W.__init__(self, neighbors, ids=ids, **kwargs) @classmethod def from_shapefile(cls, filepath, idVariable=None, full=False, **kwargs): - """ - Queen contiguity weights from a polygon shapefile. + """`Queen` contiguity weights from a polygon shapefile. Parameters ---------- - shapefile : string - name of polygon shapefile including suffix. - idVariable : string - name of a column in the shapefile's DBF to use for ids. - sparse : boolean - If True return WSP instance - If False return W instance + filepath : str + The name of polygon shapefile including the file extension. + idVariable : str + The name of the attribute in the shapefile to associate + with ids in the weights. Default is ``None``. + full : bool + Write out the entire path for a shapefile (``True``) or + only the base of the shapefile without extension (``False``). + Default is ``False``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Queen``. ``'sparse'`` + should be included here. If ``True`` return `WSP` instance. + If ``False`` return `W` instance. + Returns ------- - - w : W - instance of spatial weights + w : libpysal.weights.Queen + A queen-style instance of spatial weights. Examples -------- + >>> from libpysal.weights import Queen >>> import libpysal - >>> wq=Queen.from_shapefile(libpysal.examples.get_path("columbus.shp")) + >>> wq = Queen.from_shapefile(libpysal.examples.get_path("columbus.shp")) >>> "%.3f"%wq.pct_nonzero '9.829' - >>> wq=Queen.from_shapefile(libpysal.examples.get_path("columbus.shp"),"POLYID") + + >>> wq = Queen.from_shapefile( + ... libpysal.examples.get_path("columbus.shp"), "POLYID" + ... ) >>> "%.3f"%wq.pct_nonzero '9.829' - >>> wq=Queen.from_shapefile(libpysal.examples.get_path("columbus.shp"), sparse=True) + + >>> wq = Queen.from_shapefile( + ... libpysal.examples.get_path("columbus.shp"), sparse=True + ... ) >>> pct_sp = wq.sparse.nnz *1. / wq.n**2 >>> "%.3f"%pct_sp '0.098' Notes + ----- - Queen contiguity defines as neighbors any pair of polygons that share at + `Queen` contiguity defines as neighbors any pair of polygons that share at least one vertex in their polygon definitions. See Also -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Queen` + + libpysal.weights.W + libpysal.weights.Queen + """ + sparse = kwargs.pop("sparse", False) + if idVariable is not None: ids = get_ids(filepath, idVariable) else: ids = None + w = cls(FileIO(filepath), ids=ids, **kwargs) w.set_shapefile(filepath, idVariable=idVariable, full=full) + if sparse: w = w.to_WSP() + return w @classmethod def from_iterable(cls, iterable, sparse=False, **kwargs): - """ - Construct a weights object from a collection of arbitrary polygons. This - will cast the polygons to PySAL polygons, then build the W. + """Construct a weights object from a collection of arbitrary polygons. + This will cast the polygons to PySAL polygons, then build the `W`. Parameters ---------- - iterable : iterable - a collection of of shapes to be cast to PySAL shapes. Must - support iteration. Contents may either be a shapely or PySAL shape. - **kw : keyword arguments - optional arguments for :class:`pysal.weights.W` + + iterable : iterable + A collection of of shapes to be cast to PySAL shapes. Must + support iteration. Can be either Shapely or PySAL shapes. + sparse : bool + Generate a `WSP` object. Default is ``False``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Queen``. + + Returns + ------- + + w : libpysal.weights.Queen + A queen-style instance of spatial weights. + See Also - --------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguiyt.Queen` + -------- + + libpysal.weights.W + libpysal.weights.WSP + libpysal.weights.Queen + """ + new_iterable = iter(iterable) w = cls(new_iterable, **kwargs) + if sparse: w = WSP.from_W(w) + return w @classmethod @@ -429,44 +519,54 @@ def from_dataframe( use_index=None, **kwargs, ): - """ - Construct a weights object from a (geo)pandas dataframe with a geometry - column. This will cast the polygons to PySAL polygons, then build the W + """Construct a weights object from a ``pandas.DataFrame`` with a geometry + column. This will cast the polygons to PySAL polygons, then build the `W` using ids from the dataframe. Parameters ---------- - df : DataFrame - a :class: `pandas.DataFrame` containing geometries to use - for spatial weights - geom_col : string - the name of the column in `df` that contains the - geometries. Defaults to active geometry column. - idVariable : string - DEPRECATED - use `ids` instead. - the name of the column to use as IDs. If nothing is - provided, the dataframe index is used - ids : list-like, string - a list-like of ids to use to index the spatial weights object or - the name of the column to use as IDs. If nothing is - provided, the dataframe index is used if `use_index=True` or - a positional index is used if `use_index=False`. - Order of the resulting W is not respected from this list. - id_order : list - DEPRECATED - argument is deprecated and will be removed. - An ordered list of ids to use to index the spatial weights - object. If used, the resulting weights object will iterate - over results in the order of the names provided in this - argument. - use_index : bool - use index of `df` as `ids` to index the spatial weights object. - Defaults to False but in future will default to True. + + df : pandas.DataFrame + A ``pandas.DataFrame`` containing geometries to use for spatial weights. + geom_col : {None, str} + The name of the column in ``df`` that contains the + geometries. Defaults to the active geometry column. + idVariable : str + DEPRECATED - use `ids` instead. + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used. + ids : list-like, string + a list-like of ids to use to index the spatial weights object or + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used if `use_index=True` or + a positional index is used if `use_index=False`. + Order of the resulting W is not respected from this list. + id_order : list + DEPRECATED - argument is deprecated and will be removed. + An ordered list of ids to use to index the spatial weights + object. If used, the resulting weights object will iterate + over results in the order of the names provided in this + argument. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. + Defaults to False but in future will default to True. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Queen``. + + Returns + ------- + + w : libpysal.weights.Queen + A queen-style instance of spatial weights. See Also -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Queen` + + libpysal.weights.W + libpysal.weights.Queen + """ + if geom_col is None: geom_col = df.geometry.name @@ -542,17 +642,18 @@ def from_xarray( **kwargs, ): """ - Construct a weights object from a xarray.DataArray with an additional + Construct a weights object from a ``xarray.DataArray`` with an additional attribute index containing coordinate values of the raster - in the form of Pandas.Index/MultiIndex. + in the form of ``Pandas.Index``/``MultiIndex``. Parameters ---------- + da : xarray.DataArray - Input 2D or 3D DataArray with shape=(z, y, x) - z_value : int/string/float + Input 2D or 3D DataArray with shape=(z, y, x). + z_value : {int, str, float} Select the z_value of 3D DataArray with multiple layers. - coords_labels : dictionary + coords_labels : dict Pass dimension labels for coordinates and layers if they do not belong to default dimensions, which are (band/time, y/lat, x/lon) e.g. coords_labels = {"y_label": "latitude", "x_label": "longitude", "z_label": "year"} @@ -569,24 +670,29 @@ def from_xarray( Number of cores to be used in the sparse weight construction. If -1, all available cores are used. Default is 1. **kwargs : keyword arguments - optional arguments passed when sparse = False + Optional arguments passed when ``sparse=False``. Returns ------- - w : libpysal.weights.W/libpysal.weights.WSP - instance of spatial weights class W or WSP with an index attribute + + w : {libpysal.weights.W, libpysal.weights.WSP} + An instance of spatial weights class `W` or `WSP` with an index attribute. + + See Also + -------- + + libpysal.weights.weights.W + libpysal.weights.weights.WSP Notes ----- + 1. Lower order contiguities are also selected. 2. Returned object contains `index` attribute that includes a `Pandas.MultiIndex` object from the DataArray. - See Also - -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.weights.WSP` """ + if sparse: w = da2WSP(da, "queen", z_value, coords_labels, k, include_nodata) else: @@ -595,40 +701,51 @@ def from_xarray( def Voronoi(points, criterion="rook", clip="ahull", **kwargs): - """ - Voronoi weights for a 2-d point set - - - Points are Voronoi neighbors if their polygons share an edge or vertex. - + """Voronoi weights for a 2-d point set. Points are Voronoi neighbors + if their polygons share an edge or vertex. Parameters ---------- - points : array - (n,2) - coordinates for point locations - kwargs : arguments to pass to Rook, the underlying contiguity class. + points : array-like + An array-like ``(n,2)`` object of coordinates for point locations. + criterion : str + The weight criterion, either ``'rook'`` or ``'queen'``. Default is ``'rook'``. + clip : : str, shapely.geometry.Polygon + An overloaded option about how to clip the voronoi cells. Default is ``'ahull'``. + See ``libpysal.cg.voronoi_frames()`` for more explanation. + **kwargs : dict + Keyword arguments to pass to ``libpysal.weights.Voronoi``. Returns ------- - w : W - instance of spatial weights + w : libpysal.weights.Voronoi + A voronoi-style instance of spatial weights. + + Raises + ------ + + ValueError + An unsupported value of ``criterion`` was passed in. Examples -------- + >>> import numpy as np >>> from libpysal.weights import Voronoi >>> np.random.seed(12345) - >>> points= np.random.random((5,2))*10 + 10 + >>> points = np.random.random((5,2))*10 + 10 >>> w = Voronoi(points) >>> w.neighbors {0: [2, 3, 4], 1: [2], 2: [0, 1, 4], 3: [0, 4], 4: [0, 2, 3]} + """ + from ..cg.voronoi import voronoi_frames region_df, _ = voronoi_frames(points, clip=clip) + if criterion.lower() == "queen": cls = Queen elif criterion.lower() == "rook": @@ -638,13 +755,15 @@ def Voronoi(points, criterion="rook", clip="ahull", **kwargs): "Contiguity criterion {} not supported. " 'Only "rook" and "queen" are supported.'.format(criterion) ) - return cls.from_dataframe(region_df, **kwargs) + + w = cls.from_dataframe(region_df, **kwargs) + + return w def _from_dataframe(df, **kwargs): - """ - Construct a voronoi contiguity weight directly from a dataframe. - Note that if criterion='rook', this is identical to the delaunay + """Construct a voronoi contiguity weight directly from a dataframe. + Note that if ``criterion='rook'``, this is identical to the delaunay graph for the points if no clipping of the voronoi cells is applied. If the input dataframe is of any other geometry type than "Point", @@ -652,15 +771,31 @@ def _from_dataframe(df, **kwargs): Parameters ---------- - df : pandas.DataFrame - dataframe containing point geometries for a - voronoi diagram. + + df : pandas.DataFrame + A dataframe containing point geometries for a Voronoi diagram. + **kwargs : dict + Keyword arguments to pass to ``libpysal.weights.Voronoi``. Returns ------- - w : W - instance of spatial weights. + w : libpysal.weights.Vornoi + A voronoi-style instance of spatial weights. + + Notes + ----- + + If ``criterion='rook'``, this is identical to the Delaunay graph for the points. + + Raises + ------ + + NotImplementedError + If the input dataframe is of any other geometry type than ``Point``, + a ``ValueError`` is caught and raised as a ``NotImplementedError``. + """ + try: x, y = df.geometry.x.values, df.geometry.y.values except ValueError: @@ -670,35 +805,47 @@ def _from_dataframe(df, **kwargs): "You may consider using df.centroid." ) coords = numpy.column_stack((x, y)) - return Voronoi(coords, **kwargs) + + w = Voronoi(coords, **kwargs) + + return w Voronoi.from_dataframe = _from_dataframe def _build(polygons, criterion="rook", ids=None): - """ - This is a developer-facing function to construct a spatial weights object. + """This is a developer-facing function to construct a spatial weights object. Parameters ---------- - polygons : list - list of pysal polygons to use to build contiguity - criterion : string - option of which kind of contiguity to build. Is either "rook" or "queen" - ids : list - list of ids to use to index the neighbor dictionary + polygons : list + A list of PySAL polygons to use to build contiguity. + criterion : str + Option of which kind of contiguity to build, either ``'rook'`` or ``'queen'``. + Default is ``'rook'``. + ids : list + A list of ids to use to index the neighbor dictionary. Default is ``None``. Returns ------- - tuple containing (neighbors, ids), where neighbors is a dictionary - describing contiguity relations and ids is the list of ids used to index - that dictionary. - - NOTE: this is different from the prior behavior of buildContiguity, which - returned an actual weights object. Since this just dispatches for the - classes above, this returns the raw ingredients for a spatial weights - object, not the object itself. + neighbor_result : tuple + The contents are ``(neighbors, ids)``, where ``neighbors`` is + a dictionary describing contiguity relations and ``ids`` is the + list of ids used to index that dictionary. + + Raises + ------ + ValueError + The argument to the ``ids`` parameter contains duplicate entries. + + Notes + ----- + + This is different from the prior behavior of ``buildContiguity``, which returned an + actual weights object. Since this just dispatches for the classes above, this returns + the raw ingredients for a spatial weights object, not the object itself. + """ if ids and len(ids) != len(set(ids)): raise ValueError( @@ -708,12 +855,13 @@ def _build(polygons, criterion="rook", ids=None): wttype = WT_TYPE[criterion.lower()] geo = polygons if issubclass(type(geo), FileIO): - geo.seek(0) # Make sure we read from the beginning of the file. + # Make sure we read from the beginning of the file. + geo.seek(0) neighbor_data = ContiguityWeightsLists(polygons, wttype=wttype).w neighbors = {} - # weights={} + if ids: for key in neighbor_data: ida = ids[key] @@ -725,23 +873,24 @@ def _build(polygons, criterion="rook", ids=None): else: for key in neighbor_data: neighbors[key] = set(neighbor_data[key]) - return ( + + neighbor_result = ( dict( list(zip(list(neighbors.keys()), list(map(list, list(neighbors.values()))))) ), ids, ) + return neighbor_result + def buildContiguity(polygons, criterion="rook", ids=None): + """This is a deprecated function. It builds a contiguity `W` from the + polygons provided. As such, it is now identical to calling the class + constructors for `Rook` or `Queen`. """ - This is a deprecated function. + # Warn('This function is deprecated. Please use the Rook or Queen classes', UserWarning) - It builds a contiguity W from the polygons provided. As such, it is now - identical to calling the class constructors for Rook or Queen. - """ - # Warn('This function is deprecated. Please use the Rook or Queen classes', - # UserWarning) if criterion.lower() == "rook": return Rook(polygons, ids=ids) elif criterion.lower() == "queen": diff --git a/libpysal/weights/distance.py b/libpysal/weights/distance.py index ba35e8548..7fd82cd36 100644 --- a/libpysal/weights/distance.py +++ b/libpysal/weights/distance.py @@ -1,5 +1,8 @@ __all__ = ["KNN", "Kernel", "DistanceBand"] -__author__ = "Sergio J. Rey , Levi John Wolf " + +__author__ = ( + "Sergio J. Rey , Levi John Wolf " +) from ..cg.kdtree import KDTree @@ -19,44 +22,49 @@ def knnW(data, k=2, p=2, ids=None, radius=None, distance_metric="euclidean"): - """ - This is deprecated. Use the pysal.weights.KNN class instead. - """ - # Warn('This function is deprecated. Please use pysal.weights.KNN', UserWarning) + """This is deprecated. Use the ``libpysal.weights.KNN`` class instead.""" + # Warn("This function is deprecated. Please use libpysal.weights.KNN", UserWarning) + return KNN(data, k=k, p=p, ids=ids, radius=radius, distance_metric=distance_metric) class KNN(W): - """ - Creates nearest neighbor weights matrix based on k nearest - neighbors. + """Creates nearest neighbor weights matrix based on `k` nearest neighbors. Parameters ---------- - kdtree : object - PySAL KDTree or ArcKDTree where KDtree.data is array (n,k) - n observations on k characteristics used to measure - distances between the n objects - k : int - number of nearest neighbors - p : float - Minkowski p-norm distance metric parameter: - 1<=p<=infinity - 2: Euclidean distance - 1: Manhattan distance - Ignored if the KDTree is an ArcKDTree - ids : list - identifiers to attach to each observation + + data : {libpysal.cg.KDTree, libpysal.cg.ArcKDTree} + An ``(n,k)`` array of `n` observations on `k` characteristics + used to measure distances between the `n` objects. + k : int + The number of nearest neighbors. Default is ``2``. + p : {int, float} + Minkowski `p`-norm distance metric parameter where + :math:`1<=\mathtt{p}<=\infty`. ``2`` is Euclidean distance and + ``1`` is Manhattan distance. This parameter is ignored if the + ``KDTree`` is an ``ArcKDTree``. Default is ``2``. + ids : list + Identifiers to attach to each observation. Default is ``None``. + radius : float + If supplied arc distances will be calculated based on the given radius + and ``p`` will be ignored. Default is ``None``. + See ``libpysal.cg.KDTree`` for more details. + distance_metric : str + Either ``'euclidean'`` or ``'arc'``. Default is ``'euclidean'``. + See ``libpysal.cg.KDTree`` for more details. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Returns ------- - w : W - instance - Weights object with binary weights + w : libpysal.weights.KNN + A `k` nearest neighbors weights instance. Examples -------- + >>> import libpysal >>> import numpy as np >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] @@ -64,15 +72,16 @@ class KNN(W): >>> wnn2 = libpysal.weights.KNN(kd, 2) >>> [1,3] == wnn2.neighbors[0] True + >>> wnn2 = KNN(kd,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} - now with 1 rather than 0 offset + Now with 1 rather than 0 offset: - >>> wnn2 = libpysal.weights.KNN(kd, 2, ids=range(1,7)) + >>> wnn2 = libpysal.weights.KNN(kd, 2, ids=range(1, 7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] @@ -100,7 +109,9 @@ class KNN(W): See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ def __init__( @@ -113,14 +124,17 @@ def __init__( distance_metric="euclidean", **kwargs ): + if radius is not None: distance_metric = "arc" + if isKDTree(data): self.kdtree = data self.data = self.kdtree.data else: self.kdtree = KDTree(data, radius=radius, distance_metric=distance_metric) self.data = self.kdtree.data + self.k = k self.p = p @@ -151,57 +165,53 @@ def __init__( @classmethod def from_shapefile(cls, filepath, *args, **kwargs): - """ - Nearest neighbor weights from a shapefile. + """Nearest neighbor weights from a shapefile. Parameters ---------- - data : string - shapefile containing attribute data. - k : int - number of nearest neighbors - p : float - Minkowski p-norm distance metric parameter: - 1<=p<=infinity - 2: Euclidean distance - 1: Manhattan distance - ids : list - identifiers to attach to each observation - radius : float - If supplied arc_distances will be calculated - based on the given radius. p will be ignored. + filepath : str + The name of polygon shapefile (including the file extension) + containing attribute data. + *args : iterable + Positional arguments for ``libpysal.weights.KNN``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.KNN``. Returns ------- - w : KNN - instance; Weights object with binary weights. + w : libpysal.weights.KNN + A `k` nearest neighbors weights instance. Examples -------- - Polygon shapefile + From a polygon shapefile: + >>> import libpysal >>> from libpysal.weights import KNN - >>> wc=KNN.from_shapefile(libpysal.examples.get_path("columbus.shp")) + >>> wc = KNN.from_shapefile(libpysal.examples.get_path("columbus.shp")) >>> "%.4f"%wc.pct_nonzero '4.0816' + >>> set([2,1]) == set(wc.neighbors[0]) True - >>> wc3=KNN.from_shapefile(libpysal.examples.get_path("columbus.shp"),k=3) - >>> set(wc3.neighbors[0]) == set([2,1,3]) - True - >>> set(wc3.neighbors[2]) == set([4,3,0]) + + >>> wc3 = KNN.from_shapefile(libpysal.examples.get_path("columbus.shp"), k=3) + >>> set(wc3.neighbors[0]) == set([2, 1, 3]) True + >>> set(wc3.neighbors[2]) == set([4, 3, 0]) + True - Point shapefile + From a point shapefile: - >>> w=KNN.from_shapefile(libpysal.examples.get_path("juvenile.shp")) + >>> w = KNN.from_shapefile(libpysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 1.1904761904761905 - >>> w1=KNN.from_shapefile(libpysal.examples.get_path("juvenile.shp"),k=1) + + >>> w1 = KNN.from_shapefile(libpysal.examples.get_path("juvenile.shp"), k=1) >>> "%.3f"%w1.pct_nonzero '0.595' @@ -212,50 +222,61 @@ def from_shapefile(cls, filepath, *args, **kwargs): See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ - return cls(get_points_array_from_shapefile(filepath), *args, **kwargs) + + w = cls(get_points_array_from_shapefile(filepath), *args, **kwargs) + + return w @classmethod def from_array(cls, array, *args, **kwargs): - """ - Creates nearest neighbor weights matrix based on k nearest - neighbors. + """Creates nearest neighbor weights matrix based on `k` nearest neighbors. Parameters ---------- - array : np.ndarray - (n, k) array representing n observations on - k characteristics used to measure distances - between the n objects - **kwargs : keyword arguments, see Rook + + array : numpy.ndarray + An ``(n, k)`` array representing `n` observations on `k` + characteristics used to measure distances between the `n` objects. + *args : iterable + Positional arguments for ``libpysal.weights.KNN``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.KNN``. Returns ------- - w : W - instance - Weights object with binary weights + + w : libpysal.weights.KNN + A `k` nearest neighbors weights instance. Examples -------- + >>> from libpysal.weights import KNN >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> wnn2 = KNN.from_array(points, 2) >>> [1,3] == wnn2.neighbors[0] True - >>> wnn2 = KNN.from_array(points,2) + + >>> wnn2 = KNN.from_array(points, 2) >>> wnn2[0] {1: 1.0, 3: 1.0} + >>> wnn2[1] {0: 1.0, 3: 1.0} - now with 1 rather than 0 offset + Now with 1 rather than 0 offset: - >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7)) + >>> wnn2 = KNN.from_array(points, 2, ids=range(1, 7)) >>> wnn2[1] {2: 1.0, 4: 1.0} + >>> wnn2[2] {1: 1.0, 4: 1.0} + >>> 0 in wnn2.neighbors False @@ -266,75 +287,98 @@ def from_array(cls, array, *args, **kwargs): See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ - return cls(array, *args, **kwargs) + + w = cls(array, *args, **kwargs) + + return w @classmethod def from_dataframe( cls, df, geom_col=None, ids=None, use_index=True, *args, **kwargs ): - """ - Make KNN weights from a dataframe. + """Make `KNN` weights from a dataframe. Parameters ---------- - df : pandas.dataframe - a dataframe with a geometry column that can be used to - construct a W object - geom_col : string - the name of the column in `df` that contains the - geometries. Defaults to active geometry column. - ids : list-like, string - a list-like of ids to use to index the spatial weights object or - the name of the column to use as IDs. If nothing is - provided, the dataframe index is used if `use_index=True` or - a positional index is used if `use_index=False`. - Order of the resulting W is not respected from this list. - use_index : bool - use index of `df` as `ids` to index the spatial weights object. + + df : pandas.DataFrame + A dataframe with a geometry column that can be used + to construct a `W` object. + geom_col : {None, str} + The column name of the geometry stored in ``df``. + Defaults to the active geometry column. + ids : {str, iterable} + If string, the column name of the indices from the dataframe. + If iterable, a list of ids to use for the `W`. + If ``None``, ``df.index`` is used. Default is ``None``. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. + *args : iterable + Positional arguments for ``libpysal.weights.KNN``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.KNN``. + + Returns + ------- + + w : libpysal.weights.KNN + A `k` nearest neighbors weights instance. See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ + if geom_col is None: geom_col = df.geometry.name + pts = get_points_array(df[geom_col]) + if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() - return cls(pts, *args, ids=ids, **kwargs) + + w = cls(pts, *args, ids=ids, **kwargs) + + return w def reweight(self, k=None, p=None, new_data=None, new_ids=None, inplace=True): - """ - Redo K-Nearest Neighbor weights construction using given parameters + """Redo `K`-nearest neighbor weights construction using given parameters. Parameters ---------- - new_data : np.ndarray - an array containing additional data to use in the KNN - weight - new_ids : list - a list aligned with new_data that provides the ids for - each new observation - inplace : bool - a flag denoting whether to modify the KNN object - in place or to return a new KNN object - k : int - number of nearest neighbors - p : float - Minkowski p-norm distance metric parameter: - 1<=p<=infinity - 2: Euclidean distance - 1: Manhattan distance - Ignored if the KDTree is an ArcKDTree + + k : int + The number of nearest neighbors. Default is ``None``. + p : {int, float} + Minkowski `p`-norm distance metric parameter where + :math:`1<=\mathtt{p}<=\infty`. ``2`` is Euclidean distance and + ``1`` is Manhattan distance. This parameter is ignored if the + ``KDTree`` is an ``ArcKDTree``. Default is ``None``. + new_data : numpy.ndarray + An array containing additional data to use in the `KNN` weight. + Default is ``None``. + new_ids : list + A list aligned with ``new_data`` that provides the ids + for each new observation. Default is ``None``. + inplace : bool + A flag denoting whether to modify the `KNN` object + in place or to return a new `KNN` object. Default is ``True``. Returns ------- - A copy of the object using the new parameterization, or None if the - object is reweighted in place. + + w : libpysal.weights.KNN + A copy of the `k` nearest neighbors weights instance using the + new parameterization, or ``None`` if the object is reweighted in place. + """ if new_data is not None: @@ -351,105 +395,120 @@ def reweight(self, k=None, p=None, new_data=None, new_ids=None, inplace=True): ids = self.id_order elif (new_data is None) and (new_ids is not None): Warn("Remapping ids must be done using w.remap_ids") + if k is None: k = self.k if p is None: p = self.p + if inplace: self._reset() self.__init__(data, ids=ids, k=k, p=p) else: - return KNN(data, ids=ids, k=k, p=p) + w = KNN(data, ids=ids, k=k, p=p) + + return w class Kernel(W): - """ - Spatial weights based on kernel functions. + """Spatial weights based on kernel functions. Parameters ---------- - data : array - (n,k) or KDTree where KDtree.data is array (n,k) - n observations on k characteristics used to measure - distances between the n objects - bandwidth : float - or array-like (optional) - the bandwidth :math:`h_i` for the kernel. - fixed : binary - If true then :math:`h_i=h \\forall i`. If false then - bandwidth is adaptive across observations. - k : int - the number of nearest neighbors to use for determining - bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` - where :math:`dknn` is a vector of k-nearest neighbor - distances (the distance to the kth nearest neighbor for each - observation). For adaptive bandwidths, :math:`h_i=dknn_i` - diagonal : boolean - If true, set diagonal weights = 1.0, if false (default), - diagonals weights are set to value according to kernel - function. - function : {'triangular','uniform','quadratic','quartic','gaussian'} - kernel function defined as follows with + data : {libpysal.cg.KDTree, libpysal.cg.ArcKDTree} + An :math:`(n,k)` array of :math:`n` observations on :math:`k` + characteristics used to measure distances between the :math:`n` objects. + k : int + The number of nearest neighbors to use for determining the bandwidth. For a + fixed bandwidth, :math:`h_i = max(dknn) \\forall i` where :math:`dknn` is a + vector of :math:`k`-nearest neighbor distances (the distance to the + :math:`k`th nearest neighbor for each observation). For adaptive bandwidths, + :math:`h_i=dknn_i`. Default is ``2``. + bandwidth : {float, array-like} + The bandwidth :math:`h_i` for the kernel. Default is ``None``. + fixed : bool + If ``True`` then :math:`h_i = h \\forall i`. If ``False`` then + bandwidth is adaptive across observations. Default is ``True``. + diagonal : bool + If ``True``, set diagonal weights to ``1.0``. If ``False`` diagonal weights + are set to values according to the kernel function. Default is ``False``. + eps : float + The adjustment to ensure the `knn` distance range + is closed on the `knn`th observations. Default is ``1.0000001``. + ids : list + Identifiers to attach to each observation. Default is ``None``. + radius : float + If supplied arc distances will be calculated based on the given radius + and ``p`` will be ignored. Default is ``None``. + See ``libpysal.cg.KDTree`` for more details. + distance_metric : str + Either ``'euclidean'`` or ``'arc'``. Default is ``'euclidean'``. + See ``libpysal.cg.KDTree`` for more details. + function : str + Either ``'triangular'``, ``'uniform'``, ``'quadratic'``, ``'quartic'``, + or ``'gaussian'``. Default is ``'triangular'``. + The kernel function is defined as follows with - .. math:: + .. math:: - z_{i,j} = d_{i,j}/h_i + z_{i,j} = d_{i,j}/h_i - triangular + triangular - .. math:: + .. math:: - K(z) = (1 - |z|) \\ if |z| \\le 1 + K(z) = (1 - |z|) \\ if |z| \\le 1 - uniform + uniform - .. math:: + .. math:: - K(z) = 1/2 \\ if |z| \\le 1 + K(z) = 1/2 \\ if |z| \\le 1 - quadratic + quadratic - .. math:: + .. math:: - K(z) = (3/4)(1-z^2) \\ if |z| \\le 1 + K(z) = (3/4)(1-z^2) \\ if |z| \\le 1 - quartic + quartic - .. math:: + .. math:: - K(z) = (15/16)(1-z^2)^2 \\ if |z| \\le 1 + K(z) = (15/16)(1-z^2)^2 \\ if |z| \\le 1 - gaussian + gaussian - .. math:: + .. math:: - K(z) = (2\\pi)^{(-1/2)} exp(-z^2 / 2) + K(z) = (2\\pi)^{(-1/2)} exp(-z^2 / 2) - eps : float - adjustment to ensure knn distance range is closed on the - knnth observations + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Attributes ---------- - weights : dict - Dictionary keyed by id with a list of weights for each neighbor + weights : dict + Dictionary keyed by id with a list of weights for each neighbor. neighbors : dict - of lists of neighbors keyed by observation id - - bandwidth : array - array of bandwidths + Lists of neighbors keyed by observation id. + bandwidth : array-like + An array of bandwidths. Examples -------- + >>> from libpysal.weights import Kernel - >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] - >>> kw=Kernel(points) + >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] + >>> kw = Kernel(points) >>> kw.weights[0] [1.0, 0.500000049999995, 0.4409830615267465] + >>> kw.neighbors[0] [0, 1, 3] + >>> kw.bandwidth array([[20.000002], [20.000002], @@ -457,11 +516,14 @@ class Kernel(W): [20.000002], [20.000002], [20.000002]]) - >>> kw15=Kernel(points,bandwidth=15.0) + + >>> kw15 = Kernel(points,bandwidth=15.0) >>> kw15[0] {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701} + >>> kw15.neighbors[0] [0, 1, 3] + >>> kw15.bandwidth array([[15.], [15.], @@ -470,14 +532,16 @@ class Kernel(W): [15.], [15.]]) - Adaptive bandwidths user specified + Adaptive bandwidths user specified: - >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0] - >>> kwa=Kernel(points,bandwidth=bw) + >>> bw = [25.0,15.0,25.0,16.0,14.5,25.0] + >>> kwa = Kernel(points,bandwidth=bw) >>> kwa.weights[0] [1.0, 0.6, 0.552786404500042, 0.10557280900008403] + >>> kwa.neighbors[0] [0, 1, 3, 4] + >>> kwa.bandwidth array([[25. ], [15. ], @@ -486,13 +550,15 @@ class Kernel(W): [14.5], [25. ]]) - Endogenous adaptive bandwidths + Endogenous adaptive bandwidths: - >>> kwea=Kernel(points,fixed=False) + >>> kwea = Kernel(points,fixed=False) >>> kwea.weights[0] [1.0, 0.10557289844279438, 9.99999900663795e-08] + >>> kwea.neighbors[0] [0, 1, 3] + >>> kwea.bandwidth array([[11.18034101], [11.18034101], @@ -501,11 +567,12 @@ class Kernel(W): [14.14213704], [18.02775818]]) - Endogenous adaptive bandwidths with Gaussian kernel + Endogenous adaptive bandwidths with Gaussian kernel: - >>> kweag=Kernel(points,fixed=False,function='gaussian') + >>> kweag = Kernel(points,fixed=False,function='gaussian') >>> kweag.weights[0] [0.3989422804014327, 0.2674190291577696, 0.2419707487162134] + >>> kweag.bandwidth array([[11.18034101], [11.18034101], @@ -514,11 +581,12 @@ class Kernel(W): [14.14213704], [18.02775818]]) - Diagonals to 1.0 + Diagonals to 1.0: >>> kq = Kernel(points,function='gaussian') >>> kq.weights {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]} + >>> kqd = Kernel(points, function='gaussian', diagonal=True) >>> kqd.weights {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]} @@ -528,19 +596,21 @@ class Kernel(W): def __init__( self, data, + k=2, bandwidth=None, fixed=True, - k=2, - function="triangular", + diagonal=False, eps=1.0000001, ids=None, - diagonal=False, - distance_metric="euclidean", radius=None, + distance_metric="euclidean", + function="triangular", **kwargs ): + if radius is not None: distance_metric = "arc" + if isKDTree(data): self.kdtree = data self.data = self.kdtree.data @@ -548,10 +618,12 @@ def __init__( else: self.kdtree = KDTree(data, distance_metric=distance_metric, radius=radius) self.data = self.kdtree.data + self.k = k + 1 self.function = function.lower() self.fixed = fixed self.eps = eps + if bandwidth: try: bandwidth = np.array(bandwidth) @@ -564,103 +636,173 @@ def __init__( self._eval_kernel() neighbors, weights = self._k_to_W(ids) + if diagonal: for i in neighbors: weights[i][neighbors[i].index(i)] = 1.0 + W.__init__(self, neighbors, weights, ids, **kwargs) @classmethod def from_shapefile(cls, filepath, idVariable=None, **kwargs): - """ - Kernel based weights from shapefile + """Construct kernel-based weights from a shapefile. Parameters ---------- - shapefile : string - shapefile name with shp suffix - idVariable : string - name of column in shapefile's DBF to use for ids + + filepath : str + The name of polygon shapefile (including the file extension) + containing attribute data. + idVariable : str + The name of the column in shapefile's DBF to use for ids. + Default is ``None``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Kernel``. Returns ------- - Kernel Weights Object + + w : libpysal.weights.Kernel + A kernel weights instance. See Also - -------- - :class:`libpysal.weights.weights.W` + --------- + + libpysal.weights.W + """ + points = get_points_array_from_shapefile(filepath) + if idVariable is not None: ids = get_ids(filepath, idVariable) else: ids = None - return cls.from_array(points, ids=ids, **kwargs) + + w = cls.from_array(points, ids=ids, **kwargs) + + return w @classmethod def from_array(cls, array, **kwargs): - """ - Construct a Kernel weights from an array. Supports all the same options - as :class:`libpysal.weights.Kernel` + """Construct kernel-based weights from an array. + + Parameters + ---------- + + array : numpy.ndarray + An ``(n, k)`` array representing `n` observations on `k` + characteristics used to measure distances between the `n` objects. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Kernel``. + + Returns + ------- + + w : libpysal.weights.Kernel + A kernel weights instance. + See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ return cls(array, **kwargs) @classmethod def from_dataframe(cls, df, geom_col=None, ids=None, use_index=True, **kwargs): - """ - Make Kernel weights from a dataframe. + """Construct kernel-based weights from a dataframe. Parameters ---------- - df : pandas.dataframe - a dataframe with a geometry column that can be used to - construct a W object - geom_col : string - the name of the column in `df` that contains the - geometries. Defaults to active geometry column. - ids : list-like, string - a list-like of ids to use to index the spatial weights object or - the name of the column to use as IDs. If nothing is - provided, the dataframe index is used if `use_index=True` or - a positional index is used if `use_index=False`. - Order of the resulting W is not respected from this list. - use_index : bool - use index of `df` as `ids` to index the spatial weights object. + + df : pandas.DataFrame + A dataframe with a geometry column that can be used + to construct a PySAL `W` object. + geom_col : {None, str} + The column name of the geometry stored in ``df``. + Defaults to the active geometry column. + ids : {str, iterable} + If string, the column name of the indices from the dataframe. + If iterable, a list of ids to use for the `W`. + If ``None``, ``df.index`` is used. Default is ``None``. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. + **kwargs : dict + Keyword arguments for ``libpysal.weights.Kernel``. + + Returns + ------- + + w : libpysal.weights.Kernel + A kernel weights instance. See Also -------- - :class:`libpysal.weights.weights.W` + + libpysal.weights.W + """ + if geom_col is None: geom_col = df.geometry.name + pts = get_points_array(df[geom_col]) + if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() - return cls(pts, ids=ids, **kwargs) + + w = cls(pts, ids=ids, **kwargs) + + return w def _k_to_W(self, ids=None): + """Internal method for converting `k` neighbors to weights. + + Parameters + ---------- + + ids : list + See ``ids`` in ``libpysal.weights.Kernel``. Default is ``None``. + + Returns + ------- + + allneighbors : dict + Index lookup of all neighbors. + weights : dict + Index lookup of neighbor weights. + + """ + allneighbors = {} weights = {} + if ids: ids = np.array(ids) else: ids = np.arange(len(self.data)) + for i, neighbors in enumerate(self.kernel): + if len(self.neigh[i]) == 0: allneighbors[ids[i]] = [] weights[ids[i]] = [] else: allneighbors[ids[i]] = list(ids[self.neigh[i]]) weights[ids[i]] = self.kernel[i].tolist() + return allneighbors, weights def _set_bw(self): + """Internal method for setting binary weights.""" + dmat, neigh = self.kdtree.query(self.data, k=self.k) + if self.fixed: # use max knn distance as bandwidth bandwidth = dmat.max() * self.eps @@ -675,6 +817,8 @@ def _set_bw(self): self.neigh = nnq[1] def _eval_kernel(self): + """Internal method for evaluate the kernel function.""" + # get points within bandwidth distance of each point if not hasattr(self, "neigh"): kdtq = self.kdtree.query_ball_point @@ -682,6 +826,7 @@ def _eval_kernel(self): kdtq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth) ] self.neigh = neighbors + # get distances for neighbors bw = self.bandwidth @@ -695,6 +840,7 @@ def _eval_kernel(self): zi = np.array([dict(list(zip(ni, di)))[nid] for nid in nids]) / bw[i] z.append(zi) zs = z + # functions follow Anselin and Rey (2010) table 5.4 if self.function == "triangular": self.kernel = [1 - zi for zi in zs] @@ -713,94 +859,117 @@ def _eval_kernel(self): class DistanceBand(W): - """ - Spatial weights based on distance band. + """Spatial weights based on distance band. Parameters ---------- - data : array - (n,k) or KDTree where KDtree.data is array (n,k) - n observations on k characteristics used to measure - distances between the n objects - threshold : float - distance band - p : float - DEPRECATED: use `distance_metric` - Minkowski p-norm distance metric parameter: - 1<=p<=infinity - 2: Euclidean distance - 1: Manhattan distance - binary : boolean - If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0 - If false wij=dij^{alpha} - alpha : float - distance decay parameter for weight (default -1.0) - if alpha is positive the weights will not decline with - distance. If binary is True, alpha is ignored - - ids : list - values to use for keys of the neighbors and weights dicts - - build_sp : boolean - DEPRECATED - True to build sparse distance matrix and false to build dense - distance matrix; significant speed gains may be obtained - dending on the sparsity of the of distance_matrix and - threshold that is applied - silent : boolean - By default libpysal will print a warning if the - dataset contains any disconnected observations or - islands. To silence this warning set this - parameter to True. + data : {array-like, libpysal.cg.KDTree} + ``(n,k)`` or ``KDTree`` where ``KDtree.data`` is an ``(n,k)`` array + of `n` observations on `k` characteristics used to measure + distances between the `n` objects. + threshold : float + The distance band. + p : {int, float} + Minkowski `p`-norm distance metric parameter where :math:`1<=\mathtt{p}<=\infty`. + ``2`` is Euclidean distance and ``1`` is Manhattan distance. + This parameter is ignored if the ``KDTree`` is an ``ArcKDTree``. + Default is ``2``. + binary : bool + If set to ``True``, :math:`w_{ij}=1` if :math:`d_{i,j}<=\mathtt{threshold}`, + otherwise :math:`w_{i,j}=0`. If set to ``False``, + :math:`w_{ij}=d_{ij}^{\mathtt{alpha}}`. Default is ``True``. + alpha : float + The distance decay parameter for weights. Default is ``-1.0``. + If ``alpha`` is positive the weights will not decline with distance. + If ``binary`` is set to ``True``, ``alpha`` is ignored. + ids : list + Identifiers to attach to each observation. Default is ``None``. + build_sp : boolean + Set to ``True`` to build a sparse distance matrix and ``False`` to build dense + distance matrix. Significant speed gains may be obtained depending on the + sparsity of the of distance matrix and the ``threshold`` that is applied. + Default is ``True``. + silence_warnings : bool + By default (``False``) libpysal will print a warning if the dataset contains any + disconnected observations or islands. To silence this warning set to ``True``. + radius : float + If supplied arc distances will be calculated based on the given radius + and ``p`` will be ignored. Default is ``None``. + See ``libpysal.cg.KDTree`` for more details. + distance_metric : str + Either ``'euclidean'`` or ``'arc'``. Default is ``'euclidean'``. + See ``libpysal.cg.KDTree`` for more details. Attributes ---------- - weights : dict - of neighbor weights keyed by observation id + weights : dict + Neighbor weights keyed by observation id. neighbors : dict - of neighbors keyed by observation id + Neighbors keyed by observation id. + + Raises + ------ + + Value Error + An array was unable to be instantiated with ``data``. Examples -------- - >>> import libpysal - >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] - >>> wcheck = libpysal.weights.W({0: [1, 3], 1: [0, 3], 2: [], 3: [0, 1], 4: [5], 5: [4]}) - WARNING: there is one disconnected observation (no neighbors) - Island id: [2] - >>> w=libpysal.weights.DistanceBand(points,threshold=11.2) + >>> import libpysal + >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] + >>> wcheck = libpysal.weights.W( + ... {0: [1, 3], 1: [0, 3], 2: [], 3: [0, 1], 4: [5], 5: [4]} + ... ) + UserWarning: The weights matrix is not fully connected: + There are 3 disconnected components. + There is 1 island with id: 2. + + >>> w = libpysal.weights.DistanceBand(points, threshold=11.2) + UserWarning: The weights matrix is not fully connected: + There are 3 disconnected components. + There is 1 island with id: 2. - WARNING: there is one disconnected observation (no neighbors) - Island id: [2] >>> libpysal.weights.util.neighbor_equality(w, wcheck) True - >>> w=libpysal.weights.DistanceBand(points,threshold=14.2) - >>> wcheck = libpysal.weights.W({0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [1, 0], 4: [5, 2, 1], 5: [4]}) + + >>> w = libpysal.weights.DistanceBand(points, threshold=14.2) + >>> wcheck = libpysal.weights.W( + ... {0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [1, 0], 4: [5, 2, 1], 5: [4]} + ... ) >>> libpysal.weights.util.neighbor_equality(w, wcheck) True - inverse distance weights + Inverse distance weights: - >>> w=libpysal.weights.DistanceBand(points,threshold=11.2,binary=False) + >>> w = libpysal.weights.DistanceBand(points, threshold=11.2, binary=False) + UserWarning: The weights matrix is not fully connected: + There are 3 disconnected components. + There is 1 island with id: 2. - WARNING: there is one disconnected observation (no neighbors) - Island id: [2] >>> w.weights[0] [0.1, 0.08944271909999159] >>> w.neighbors[0].tolist() [1, 3] - gravity weights + Gravity weights: - >>> w=libpysal.weights.DistanceBand(points,threshold=11.2,binary=False,alpha=-2.) + >>> w = libpysal.weights.DistanceBand(points, threshold=11.2, binary=False, alpha=-2.) + UserWarning: The weights matrix is not fully connected: + There are 3 disconnected components. + There is 1 island with id: 2. - WARNING: there is one disconnected observation (no neighbors) - Island id: [2] >>> w.weights[0] [0.01, 0.007999999999999998] + Notes + ----- + + This was initially implemented running ``scipy v0.8.0dev`` (in epd 6.1). + Earlier versions of scipy (0.7.0) have a logic bug in ``scipy/sparse/dok.py``, + so Serge changed line 221 of that file on sal-dev to fix the logic bug. """ @@ -809,22 +978,25 @@ def __init__( data, threshold, p=2, - alpha=-1.0, binary=True, + alpha=-1.0, ids=None, build_sp=True, silence_warnings=False, - distance_metric="euclidean", radius=None, + distance_metric="euclidean", ): - """Casting to floats is a work around for a bug in scipy.spatial. - See detail in pysal issue #126. + """Casting to floats is a work around for a bug in ``scipy.spatial``. + See details in `pysal/pysal#126 `_. """ + if ids is not None: ids = list(ids) + if radius is not None: distance_metric = "arc" + self.p = p self.threshold = threshold self.binary = binary @@ -846,87 +1018,125 @@ def __init__( ) self.data = self.kdtree.data except: - raise ValueError("Could not make array from data") + raise ValueError("Could not make array from data.") else: self.data = data self.kdtree = None self._band() neighbors, weights = self._distance_to_W(ids) + W.__init__( self, neighbors, weights, ids, silence_warnings=self.silence_warnings ) @classmethod def from_shapefile(cls, filepath, threshold, idVariable=None, **kwargs): - """ - Distance-band based weights from shapefile + """Construct a distance band weights object from a shapefile. Parameters ---------- - shapefile : string - shapefile name with shp suffix - idVariable : string - name of column in shapefile's DBF to use for ids + + filepath : str + The name of polygon shapefile (including the file extension) + containing attribute data. + threshold : float + The distance band. + idVariable : str + The name of the column in shapefile's DBF to use for ids. + Default is ``None``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.DistanceBand``. Returns ------- - Kernel Weights Object + + w : libpysal.weights.DistanceBand + A distance band weights instance. """ + points = get_points_array_from_shapefile(filepath) + if idVariable is not None: ids = get_ids(filepath, idVariable) else: ids = None - return cls.from_array(points, threshold, ids=ids, **kwargs) + + w = cls.from_array(points, threshold, ids=ids, **kwargs) + + return w @classmethod def from_array(cls, array, threshold, **kwargs): - """ - Construct a DistanceBand weights from an array. Supports all the same options - as :class:`libpysal.weights.DistanceBand` + """Construct a distance band weights object from an array. + + Parameters + ---------- + + array : numpy.ndarray + An ``(n, k)`` array representing `n` observations on `k` + characteristics used to measure distances between the `n` objects. + threshold : float + The distance band. + **kwargs : dict + Keyword arguments for ``libpysal.weights.DistanceBand``. + + Returns + ------- + w : libpysal.weights.DistanceBand + A distance band weights instance. """ + return cls(array, threshold, **kwargs) @classmethod def from_dataframe( cls, df, threshold, geom_col=None, ids=None, use_index=True, **kwargs ): - """ Make DistanceBand weights from a dataframe. Parameters ---------- - df : pandas.dataframe - a dataframe with a geometry column that can be used to - construct a W object - geom_col : string - the name of the column in `df` that contains the - geometries. Defaults to active geometry column. - ids : list-like, string - a list-like of ids to use to index the spatial weights object or - the name of the column to use as IDs. If nothing is - provided, the dataframe index is used if `use_index=True` or - a positional index is used if `use_index=False`. - Order of the resulting W is not respected from this list. - use_index : bool - use index of `df` as `ids` to index the spatial weights object. + + df : pandas.DataFrame + A dataframe with a geometry column that can be used + to construct a PySAL `W` object. + threshold : float + The distance band. + geom_col : {None, str} + The column name of the geometry stored in ``df``. + Defaults to the active geometry column. + ids : {str, iterable} + If string, the column name of the indices from the dataframe. + If iterable, a list of ids to use for the `W`. + If ``None``, ``df.index`` is used. Default is ``None``. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. + **kwargs : dict + Keyword arguments for ``libpysal.weights.DistanceBand``. """ + if geom_col is None: geom_col = df.geometry.name + pts = get_points_array(df[geom_col]) + if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() - return cls(pts, threshold, ids=ids, **kwargs) + + w = cls(pts, threshold, ids=ids, **kwargs) + + return w def _band(self): - """Find all pairs within threshold.""" + """Internal function for finding all pairs within the threshold.""" + if self.build_sp: self.dmat = self.kdtree.sparse_distance_matrix( self.kdtree, max_distance=self.threshold, p=self.p @@ -935,12 +1145,30 @@ def _band(self): if str(self.kdtree).split(".")[-1][0:10] == "Arc_KDTree": raise TypeError( "Unable to calculate dense arc distance matrix;" - ' parameter "build_sp" must be set to True for arc' - " distance type weight" + " parameter 'build_sp' must be set to True for arc" + " distance type weight." ) self.dmat = self._spdistance_matrix(self.data, self.data, self.threshold) def _distance_to_W(self, ids=None): + """Internal method for converting distance band neighbors to weights. + + Parameters + ---------- + + ids : list + See ``ids`` in ``libpysal.weights.DistanceBand``. Default is ``None``. + + Returns + ------- + + neighbors : dict + Index lookup of all neighbors. + weights : dict + Index lookup of neighbor weights. + + """ + if self.binary: self.dmat[self.dmat > 0] = 1 self.dmat.eliminate_zeros() @@ -966,11 +1194,41 @@ def _distance_to_W(self, ids=None): return neighbors, weights def _spdistance_matrix(self, x, y, threshold=None): + """Internal method for converting a distance matrix into a CSR matrix. + + Parameters + ---------- + + x : array-like + X values. + y : array-like + Y values. + threshold : float + See ``threshold`` in ``DistanceBand``. Default is ``None``. + + Returns + ------- + + sp_mtx : scipy.sparse.csr_matrix + A Compressed Sparse Row matrix. + + See Also + -------- + + scipy.spatial.distance_matrix + scipy.sparse.csr_matrix + + """ + dist = distance_matrix(x, y) + if threshold is not None: zeros = dist > threshold dist[zeros] = 0 - return sp.csr_matrix(dist) + + sp_mtx = sp.csr_matrix(dist) + + return sp_mtx def _test(): @@ -978,6 +1236,7 @@ def _test(): # the following line could be used to define an alternative to the '' flag # doctest.BLANKLINE_MARKER = 'something better than ' + start_suppress = np.get_printoptions()["suppress"] np.set_printoptions(suppress=True) doctest.testmod() @@ -985,4 +1244,5 @@ def _test(): if __name__ == "__main__": + _test() diff --git a/libpysal/weights/raster.py b/libpysal/weights/raster.py index 90ea91388..90d4176e0 100644 --- a/libpysal/weights/raster.py +++ b/libpysal/weights/raster.py @@ -18,7 +18,6 @@ def intercepted_function(f, *f_args, **f_kwargs): return intercepted_function - else: from ..common import jit @@ -225,7 +224,7 @@ def da2WSP( da = da[slice_dict] ser = da.to_series() - dtype = np.int32 if (shape[0] * shape[1]) < 46340 ** 2 else np.int64 + dtype = np.int32 if (shape[0] * shape[1]) < 46340**2 else np.int64 if "nodatavals" in da.attrs and da.attrs["nodatavals"]: mask = (ser != da.attrs["nodatavals"][0]).to_numpy() ids = np.where(mask)[0] @@ -292,7 +291,7 @@ def da2WSP( # then eliminate zeros from the data. This changes the # sparcity of the csr_matrix !! if k > 1 and not include_nodata: - sw = sum(map(lambda x: sw ** x, range(1, k + 1))) + sw = sum(map(lambda x: sw**x, range(1, k + 1))) sw.setdiag(0) sw.eliminate_zeros() sw.data[:] = np.ones_like(sw.data, dtype=np.int8) diff --git a/libpysal/weights/set_operations.py b/libpysal/weights/set_operations.py index dbbabc423..de3a9f718 100644 --- a/libpysal/weights/set_operations.py +++ b/libpysal/weights/set_operations.py @@ -2,7 +2,12 @@ Set-like manipulation of weights matrices. """ -__author__ = "Sergio J. Rey , Charles Schmidt , David Folch , Dani Arribas-Bel " +__author__ = ( + "Sergio J. Rey ," + "Charles Schmidt ," + "David Folch ," + "Dani Arribas-Bel " +) import copy from .weights import W, WSP @@ -20,31 +25,29 @@ def w_union(w1, w2, **kwargs): - """ - Returns a binary weights object, w, that includes all neighbor pairs that - exist in either w1 or w2. + """Return a binary weights object, ``w``, that includes all + neighbor pairs that exist in either ``w1`` or ``w2``. Parameters ---------- - - w1 : W - object - w2 : W - object - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + w1 : libpysal.weights.W + A PySAL weights object. + w2 : libpysal.weights.W + A PySAL weights object. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Returns ------- - - w : W - object + w : libpysal.weights.W + The union of two PySAL weights objects. Notes ----- - ID comparisons are performed using ==, therefore the integer ID 2 is + + ID comparisons are performed using ``==``, therefore the integer ID 2 is equivalent to the float ID 2.0. Returns a matrix with all the unique IDs - from w1 and w2. + from ``w1`` and ``w2``. Examples -------- @@ -67,45 +70,52 @@ def w_union(w1, w2, **kwargs): [19, 11, 14] """ + neighbors = dict(list(w1.neighbors.items())) + for i in w2.neighbors: if i in neighbors: add_neigh = set(neighbors[i]).union(set(w2.neighbors[i])) neighbors[i] = list(add_neigh) else: neighbors[i] = copy.copy(w2.neighbors[i]) - return W(neighbors, **kwargs) + + w = W(neighbors, **kwargs) + + return w def w_intersection(w1, w2, w_shape="w1", **kwargs): - """ - Returns a binary weights object, w, that includes only - those neighbor pairs that exist in both w1 and w2. + """Returns a binary weights object, ``w``, that includes only + those neighbor pairs that exist in both ``w1`` and ``w2``. Parameters ---------- - - w1 : W - object - w2 : W - object - w_shape : string - Defines the shape of the returned weights matrix. 'w1' returns a - matrix with the same IDs as w1; 'all' returns a matrix with all - the unique IDs from w1 and w2; and 'min' returns a matrix with - only the IDs occurring in both w1 and w2. - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + w1 : libpysal.weights.W + A PySAL weights object. + w2 : libpysal.weights.W + A PySAL weights object. + w_shape : str + Defines the shape of the returned weights matrix. ``'w1'`` returns a + matrix with the same IDs as ``w1``; ``'all'`` returns a matrix with all + the unique IDs from ``w1`` and ``w2``; and ``'min'`` returns a matrix with + only the IDs occurring in both ``w1`` and ``w2``. Default is ``'w1'``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Returns ------- + w : libpysal.weights.W + The intersection of two PySAL weights objects. - w : W - object + Raises + ------ + ValueError + An invalid string value was passed to ``w_shape``. Notes ----- - ID comparisons are performed using ==, therefore the integer ID 2 is + ID comparisons are performed using ``==``, therefore the integer ID 2 is equivalent to the float ID 2.0. Examples @@ -137,7 +147,7 @@ def w_intersection(w1, w2, w_shape="w1", **kwargs): elif w_shape == "min": neigh_keys = set(w1.neighbors.keys()).intersection(set(w2.neighbors.keys())) else: - raise Exception("invalid string passed to w_shape") + raise ValueError("Invalid string passed to w_shape.") neighbors = {} for i in neigh_keys: @@ -147,55 +157,63 @@ def w_intersection(w1, w2, w_shape="w1", **kwargs): else: neighbors[i] = [] - return W(neighbors, **kwargs) + w = W(neighbors, **kwargs) + + return w def w_difference(w1, w2, w_shape="w1", constrained=True, **kwargs): - """ - Returns a binary weights object, w, that includes only neighbor pairs - in w1 that are not in w2. The w_shape and constrained parameters - determine which pairs in w1 that are not in w2 are returned. + """Returns a binary weights object, ``w``, that includes + only neighbor pairs in ``w1`` that are not in ``w2``. The + ``w_shape`` and ``constrained`` parameters determine which + pairs in ``w1`` that are not in ``w2`` are returned. Parameters ---------- - - w1 : W - object - w2 : W - object - w_shape : string - Defines the shape of the returned weights matrix. 'w1' returns a - matrix with the same IDs as w1; 'all' returns a matrix with all - the unique IDs from w1 and w2; and 'min' returns a matrix with - the IDs occurring in w1 and not in w2. - constrained : boolean - If False then the full set of neighbor pairs in w1 that are - not in w2 are returned. If True then those pairs that would - not be possible if w_shape='min' are dropped. Ignored if - w_shape is set to 'min'. - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + w1 : libpysal.weights.W + A PySAL weights object. + w2 : libpysal.weights.W + A PySAL weights object. + w_shape : str + Defines the shape of the returned weights matrix. ``'w1'`` returns a + matrix with the same IDs as ``w1``; ``'all'`` returns a matrix with all + the unique IDs from ``w1`` and ``w2``; and ``'min'`` returns a matrix with + the IDs occurring in ``w1`` and not in ``w2``. Default is ``'w1'``. + constrained : bool + If ``False`` then the full set of neighbor pairs in ``w1`` that are + not in ``w2`` are returned. If ``True`` then those pairs that would + not be possible if ``w_shape='min'`` are dropped. Default is ``True``. + Ignored if ``w_shape`` is set to ``'min'``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Returns ------- + w : libpysal.weights.W + The difference of two PySAL weights objects. - w : W - object + Raises + ------ + RuntimeError + An empty weights matrix was returned. + ValueError + An invalid string value was passed to ``w_shape``. Notes ----- - ID comparisons are performed using ==, therefore the integer ID 2 is + + ID comparisons are performed using ``==``, therefore the integer ID 2 is equivalent to the float ID 2.0. Examples -------- - Construct rook (w2) and queen (w1) weights matrices for two 4x4 regions - (16 areas). A queen matrix has all the joins a rook matrix does plus joins - between areas that share a corner. The new matrix formed by the difference - of rook from queen contains only join at corners (typically called a - bishop matrix). Note that the difference of queen from rook would result - in a weights matrix with no joins. + Construct rook (``w2``) and queen (``w1``) weights matrices for two 4x4 + regions (16 areas). A queen matrix has all the joins a rook matrix does + plus joins between areas that share a corner. The new matrix formed by + the difference of rook from queen contains only joins at corners (typically + called a bishop matrix). Note that the difference of queen from rook would + result in a weights matrix with no joins. >>> from libpysal.weights import lat2W, w_difference >>> w1 = lat2W(4,4,rook=False) @@ -219,9 +237,9 @@ def w_difference(w1, w2, w_shape="w1", constrained=True, **kwargs): elif w_shape == "min": neigh_keys = set(w1.neighbors.keys()).difference(set(w2.neighbors.keys())) if not neigh_keys: - raise Exception("returned an empty weights matrix") + raise RuntimeError("Returned an empty weights matrix.") else: - raise Exception("invalid string passed to w_shape") + raise ValueError("Invalid string passed to w_shape.") neighbors = {} for i in neigh_keys: @@ -242,53 +260,59 @@ def w_difference(w1, w2, w_shape="w1", constrained=True, **kwargs): for i in constrained_keys: neighbors[i] = list(set(neighbors[i]).intersection(constrained_keys)) - return W(neighbors, **kwargs) + w = W(neighbors, **kwargs) + + return w def w_symmetric_difference(w1, w2, w_shape="all", constrained=True, **kwargs): - """ - Returns a binary weights object, w, that includes only neighbor pairs - that are not shared by w1 and w2. The w_shape and constrained parameters - determine which pairs that are not shared by w1 and w2 are returned. + """Returns a binary weights object, ``w``, that includes only + neighbor pairs that are not shared by ``w1`` and ``w2``. The + ``w_shape`` and ``constrained`` parameters determine which + pairs that are not shared by ``w1`` and ``w2`` are returned. Parameters ---------- - - w1 : W - object - w2 : W - object - w_shape : string - Defines the shape of the returned weights matrix. 'all' returns a - matrix with all the unique IDs from w1 and w2; and 'min' returns - a matrix with the IDs not shared by w1 and w2. - constrained : boolean - If False then the full set of neighbor pairs that are not - shared by w1 and w2 are returned. If True then those pairs - that would not be possible if w_shape='min' are dropped. - Ignored if w_shape is set to 'min'. - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + w1 : libpysal.weights.W + A PySAL weights object. + w2 : libpysal.weights.W + A PySAL weights object. + w_shape : str + Defines the shape of the returned weights matrix. ``'all'`` returns a + matrix with all the unique IDs from ``w1`` and ``w2``; and ``'min'`` + returns a matrix with the IDs not shared by ``w1`` and ``w2``. + constrained : bool + If ``False`` then the full set of neighbor pairs that are not + shared by ``w1`` and ``w2`` are returned. If ``True`` then those pairs + that would not be possible if ``w_shape='min'`` are dropped. + Default is ``True``. Ignored if ``w_shape`` is set to ``'min'``. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Returns ------- + w : libpysal.weights.W + The symmetric difference of two PySAL weights objects. - w : W - object + Raises + ------ + ValueError + An invalid string value was passed to ``w_shape``. Notes ----- - ID comparisons are performed using ==, therefore the integer ID 2 is + + ID comparisons are performed using ``==``, therefore the integer ID 2 is equivalent to the float ID 2.0. Examples -------- - Construct queen weights matrix for a 4x4 (16 areas) region (w1) and a rook - matrix for a 6x4 (24 areas) region (w2). The symmetric difference of these - two matrices (with w_shape set to 'all' and constrained set to False) - contains the corner joins in the overlap area, all the joins in the - non-overlap area. + Construct a queen weights matrix for a 4x4 (16 areas) region (``w1``) + and a rook matrix for a 6x4 (24 areas) region (``w2``). The symmetric + difference of these two matrices (with ``w_shape`` set to ``'all'`` and + ``constrained`` set to ``False``) contains the corner joins in the overlap + area, all the joins in the non-overlap area. >>> from libpysal.weights import lat2W, w_symmetric_difference >>> w1 = lat2W(4,4,rook=False) @@ -312,7 +336,7 @@ def w_symmetric_difference(w1, w2, w_shape="all", constrained=True, **kwargs): set(w2.neighbors.keys()) ) else: - raise Exception("invalid string passed to w_shape") + raise ValueError("Invalid string passed to w_shape.") neighbors = {} for i in neigh_keys: @@ -337,39 +361,37 @@ def w_symmetric_difference(w1, w2, w_shape="all", constrained=True, **kwargs): for i in constrained_keys: neighbors[i] = list(set(neighbors[i]).intersection(constrained_keys)) - return W(neighbors, **kwargs) + w = W(neighbors, **kwargs) + + return w def w_subset(w1, ids, **kwargs): - """ - Returns a binary weights object, w, that includes only those - observations in ids. + """Returns a binary weights object, ``w``, that includes only those + observations in passed in with the ``ids`` parameter. Parameters ---------- - - w1 : W - object - ids : list - A list containing the IDs to be include in the returned weights - object. - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + w1 : libpysal.weights.W + A PySAL weights object. + ids : list + A list containing the IDs to be include in the returned weights object. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Returns ------- - - w : W - object + w : libpysal.weights.W + The subset of a PySAL weights object. Examples -------- - Construct a rook weights matrix for a 6x4 region (24 areas). By default - PySAL assigns integer IDs to the areas in a region. By passing in a list - of integers from 0 to 15, the first 16 areas are extracted from the - previous weights matrix, and only those joins relevant to the new region - are retained. + Construct a rook weights matrix for a 6x4 region (24 areas). By + default PySAL assigns integer IDs to the areas in a region. By + passing in a list of integers from 0 to 15, the first 16 areas are + extracted from the previous weights matrix, and only those joins + relevant to the new region are retained. >>> from libpysal.weights import lat2W, w_subset >>> w1 = lat2W(6,4) @@ -386,6 +408,7 @@ def w_subset(w1, ids, **kwargs): neighbors = {} ids_set = set(list(ids)) + for i in ids: if i in w1.neighbors: neigh_add = ids_set.intersection(set(w1.neighbors[i])) @@ -393,45 +416,50 @@ def w_subset(w1, ids, **kwargs): else: neighbors[i] = [] - return W(neighbors, id_order=list(ids), **kwargs) + w = W(neighbors, id_order=list(ids), **kwargs) + return w -def w_clip(w1, w2, outSP=True, **kwargs): - """ - Clip a continuous W object (w1) with a different W object (w2) so only cells where - w2 has a non-zero value remain with non-zero values in w1. - Checks on w1 and w2 are performed to make sure they conform to the - appropriate format and, if not, they are converted. +def w_clip(w1, w2, outSP=True, **kwargs): + """Clip a continuous `W` object (``w1``) with a different `W` object + (``w2``) so only cells where ``w2`` has a non-zero value remain with + non-zero values in ``w1``. Checks on ``w1`` and ``w2`` are performed + to make sure they conform to the appropriate format and, if not, they + are converted. Parameters ---------- - w1 : W - W, scipy.sparse.csr.csr_matrix - Potentially continuous weights matrix to be clipped. The clipped - matrix wc will have at most the same elements as w1. - w2 : W - W, scipy.sparse.csr.csr_matrix - Weights matrix to use as shell to clip w1. Automatically - converted to binary format. Only non-zero elements in w2 will be - kept non-zero in wc. NOTE: assumed to be of the same shape as w1 - outSP : boolean - If True (default) return sparse version of the clipped W, if - False, return W object of the clipped matrix - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + w1 : {libpysal.weights.W, scipy.sparse.csr_matrix} + The potentially continuous weights matrix to be clipped. The clipped + matrix, ``wc``, will have at most the same elements as ``w1``. + w2 : {libpysal.weights.W, scipy.sparse.csr_matrix} + The weights matrix to use as a shell to clip ``w1``. It is automatically + converted to binary format. Only non-zero elements in ``w2`` will be + kept non-zero in ``wc``. It is assumed to be of the same shape as ``w1``. + outSP : bool + If ``True`` (default) return the sparse version of the clipped `W`, if + ``False``, return a `W` object of the clipped matrix. + **kwargs : dict + Keyword arguments for ``libpysal.weights.W``. Returns ------- - wc : W - W, scipy.sparse.csr.csr_matrix - Clipped W object (sparse if outSP=Ture). It inherits ``id_order`` from w1. + wc : {libpysal.weights.W, scipy.sparse.csr_matrix} + A clipped `W` object that is sparse if ``outSP`` is set to ``True``. + It inherits ``id_order`` from ``w1``. + + Notes + ----- + + The ``w2`` parameter is assumed to be of the same shape as ``w1``. Examples -------- + >>> from libpysal.weights import lat2W - First create a W object from a lattice using queen contiguity and + First create a `W` object from a lattice using queen contiguity and row-standardize it (note that these weights will stay when we clip the object, but they will not neccesarily represent a row-standardization anymore): @@ -441,24 +469,25 @@ def w_clip(w1, w2, outSP=True, **kwargs): We will clip that geography assuming observations 0, 2, 3 and 4 belong to one group and 1, 5 belong to another group and we don't want both groups - to interact with each other in our weights (i.e. w_ij = 0 if i and j in - different groups). For that, we use the following method: + to interact with each other in our weights (i.e. :math:`w_ij = 0` + if :math:`i` and :math:`j` are in different groups). + For that, we use the following method: >>> import libpysal >>> w2 = libpysal.weights.block_weights(['r1', 'r2', 'r1', 'r1', 'r1', 'r2']) - To illustrate that w2 will only be considered as binary even when the - object passed is not, we can row-standardize it + To illustrate that ``w2`` will only be considered as binary even when the + object passed is not, we can row-standardize it. >>> w2.transform = 'R' - The clipped object ``wc`` will contain only the spatial queen - relationships that occur within one group ('r1' or 'r2') but will have - gotten rid of those that happen across groups + The clipped object ``wc`` will contain only the spatial queen relationships + that occur within one group (``'r1'`` or ``'r2'``) but will have + gotten rid of those that happen across groups. >>> wcs = libpysal.weights.w_clip(w1, w2, outSP=True) - This will create a sparse object (recommended when n is large). + This will create a sparse object (recommended when :math:`n` is large). >>> wcs.sparse.toarray() array([[0. , 0. , 0.33333333, 0.33333333, 0. , @@ -475,8 +504,8 @@ def w_clip(w1, w2, outSP=True, **kwargs): 0. ]]) - If we wanted an original W object, we can control that with the argument - ``outSP``: + If we wanted an original `W` object, we can control + that with the argument ``outSP``: >>> wc = libpysal.weights.w_clip(w1, w2, outSP=False) >>> wc.full()[0] @@ -509,14 +538,20 @@ def w_clip(w1, w2, outSP=True, **kwargs): if not w1.id_order: w1.id_order = None + id_order = w1.id_order + if not isspmatrix_csr(w1): w1 = w1.sparse + if not isspmatrix_csr(w2): w2 = w2.sparse + w2.data = ones(w2.data.shape) wc = w1.multiply(w2) wc = WSP(wc, id_order=id_order) + if not outSP: wc = WSP2W(wc, **kwargs) + return wc diff --git a/libpysal/weights/spatial_lag.py b/libpysal/weights/spatial_lag.py index db8e9c0b5..3c3f91758 100644 --- a/libpysal/weights/spatial_lag.py +++ b/libpysal/weights/spatial_lag.py @@ -1,38 +1,41 @@ +"""Spatial lag operations. """ -Spatial lag operations. -""" -__author__ = "Sergio J. Rey , David C. Folch , Levi John Wolf ," + "David C. Folch ," + "Levi John Wolf " +) + __all__ = ["lag_spatial", "lag_categorical"] import numpy as np def lag_spatial(w, y): - """ - Spatial lag operator. - - If w is row standardized, returns the average of each observation's neighbors; - if not, returns the weighted sum of each observation's neighbors. + """A spatial lag operator. If ``w`` is row standardized, this function + returns the average of each observation's neighbors. If it is not, the + weighted sum of each observation's neighbors is returned. Parameters ---------- - w : W - libpysal spatial weightsobject - y : array - numpy array with dimensionality conforming to w (see examples) + w : libpysal.weights.weights.W + A PySAL spatial weights object. + y : array-like + A ``numpy`` array with dimensionality conforming to ``w`` (see examples). Returns ------- - wy : array - array of numeric values for the spatial lag + wy : numpy.ndarray + An array of numeric values for the spatial lag. Examples -------- - Setup a 9x9 binary spatial weights matrix and vector of data; compute the - spatial lag of the vector. + Setup a 9x9 binary spatial weights matrix and vector of data, + then compute the spatial lag of the vector. >>> import libpysal >>> import numpy as np @@ -42,7 +45,7 @@ def lag_spatial(w, y): >>> yl array([ 4., 6., 6., 10., 16., 14., 10., 18., 12.]) - Row standardize the weights matrix and recompute the spatial lag + Row standardize the weights matrix and recompute the spatial lag. >>> w.transform = 'r' >>> yl = libpysal.weights.lag_spatial(w, y) @@ -51,7 +54,7 @@ def lag_spatial(w, y): 4.66666667, 5. , 6. , 6. ]) - Explicitly define data vector as 9x1 and recompute the spatial lag + Explicitly define data vector as 9x1 and recompute the spatial lag. >>> y.shape = (9, 1) >>> yl = libpysal.weights.lag_spatial(w, y) @@ -67,7 +70,7 @@ def lag_spatial(w, y): [6. ]]) - Take the spatial lag of a 9x2 data matrix + Take the spatial lag of a 9x2 data matrix. >>> yr = np.arange(8, -1, -1) >>> yr.shape = (9, 1) @@ -85,48 +88,56 @@ def lag_spatial(w, y): [6. , 2. ]]) """ + return w.sparse * y def lag_categorical(w, y, ties="tryself"): - """ - Spatial lag operator for categorical variables. - - Constructs the most common categories of neighboring observations, weighted - by their weight strength. + """A spatial lag operator for categorical variables. This function + constructs the most common categories of neighboring observations + weighted by their weight strength. Parameters ---------- - w : W - PySAL spatial weightsobject - y : iterable - iterable collection of categories (either int or - string) with dimensionality conforming to w (see examples) - ties : str - string describing the method to use when resolving - ties. By default, the option is "tryself", - and the category of the focal observation - is included with its neighbors to try - and break a tie. If this does not resolve the tie, - a winner is chosen randomly. To just use random choice to - break ties, pass "random" instead. + w : libpysal.weights.weights.W + PySAL spatial weights object. + y : iterable + An iterable collection of categories (either ``int`` or ``str``) + with dimensionality conforming to ``w`` (see examples). + ties : str + The method to use when resolving ties. By default, the option is + ``'tryself'``, and the category of the focal observation is included + with its neighbors to try and break a tie. If this does not resolve + the tie, a winner is chosen randomly. To just use random choice to + break ties, pass ``'random'`` instead. + The following are supported options + + * ``'tryself'`` -- Use the focal observation's label to tiebreak. If this doesn't successfully break the tie, which only occurs if it induces a new tie, decide randomly.; + * ``'random'`` -- Resolve the tie randomly amongst winners.; + * ``'lowest'`` -- Pick the lowest-value label amongst winners.; + * ``'highest'`` -- Pick the highest-value label amongst winners. + Returns ------- - an (n x k) column vector containing the most common neighboring observation + + output : numpy.ndarray + An :math:`(n \cdot k)` column vector containing + the most common neighboring observation. Notes ----- - This works on any array where the number of unique elements along the column - axis is less than the number of elements in the array, for any dtype. - That means the routine should work on any dtype that np.unique() can - compare. + + This works on any array where the number of unique elements + along the column axis is less than the number of elements in + the array, for any ``dtype``. That means the routine should + work on any ``dtype`` that ``numpy.unique()`` can compare. Examples -------- - Set up a 9x9 weights matrix describing a 3x3 regular lattice. Lag one list of - categorical variables with no ties. + Set up a 9x9 weights matrix describing a 3x3 regular lattice. + Lag one list of categorical variables with no ties. >>> import libpysal >>> import numpy as np @@ -137,7 +148,7 @@ def lag_categorical(w, y, ties="tryself"): >>> np.array_equal(y_l, np.array(['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b'])) True - Explicitly reshape y into a (9x1) array and calculate lag again + Explicitly reshape ``y`` into a (9x1) array and calculate lag again. >>> yvect = np.array(y).reshape(9,1) >>> yvect_l = libpysal.weights.lag_categorical(w,yvect) @@ -145,7 +156,7 @@ def lag_categorical(w, y, ties="tryself"): >>> np.array_equal(yvect_l, check) True - compute the lag of a 9x2 matrix of categories + Compute the lag of a 9x2 matrix of categories. >>> y2 = ['a', 'c', 'c', 'd', 'b', 'a', 'd', 'd', 'c'] >>> ym = np.vstack((y,y2)).T @@ -155,12 +166,16 @@ def lag_categorical(w, y, ties="tryself"): True """ + if isinstance(y, list): y = np.array(y) orig_shape = y.shape + if len(orig_shape) > 1: if orig_shape[1] > 1: - return np.vstack([lag_categorical(w, col) for col in y.T]).T + output = np.vstack([lag_categorical(w, col) for col in y.T]).T + return output + y = y.flatten() output = np.zeros_like(y) labels = np.unique(y) @@ -178,64 +193,76 @@ def lag_categorical(w, y, ties="tryself"): focal_idx, normalized_labels, neighborhood_tally, neighbors, ties, w ) output[focal_idx] = labels[out_label_idx] - return output.reshape(orig_shape) - -def _resolve_ties(idx, normalized_labels, tally, neighbors, method, w): - """ - Helper function to resolve ties if lag is multimodal + output = output.reshape(orig_shape) - first, if this function gets called when there's actually no tie, then the - correct value will be picked. + return output - if 'random' is selected as the method, a random tiebeaker is picked - if 'tryself' is selected, then the observation's own value will be used in - an attempt to break the tie, but if it fails, a random tiebreaker will be - selected. +def _resolve_ties(idx, normalized_labels, tally, neighbors, method, w): + """Helper function to resolve ties if lag is multimodal. First, if this function + gets called when there's actually no tie, then the correct value will be picked. + If ``'random'`` is selected as the method, a random tiebeaker is picked. If + ``'tryself'`` is selected, then the observation's own value will be used in an + attempt to break the tie, but if it fails, a random tiebreaker will be selected. Parameters - ---------- - idx : int - index (aligned with `normalized_labels`) of the - current observation being resolved. - normalized_labels : (n,) array of ints - normalized array of labels for each observation - tally : (p,) array of floats - current tally of neighbors' labels around `idx` to resolve. - neighbors : dict of (neighbor_name : weight) - the elements of the weights object, identical to w[idx] - method : string - configuration option to use a specific tiebreaking method. - supported options are: - 1. tryself: Use the focal observation's label to tiebreak. - If this doesn't successfully break the tie, - (which only occurs if it induces a new tie), - decide randomly. - 2. random: Resolve the tie randomly amongst winners. - 3. lowest: Pick the lowest-value label amongst winners. - 4. highest: Pick the highest-value label amongst winners. - w : pysal.W object - a PySAL weights object aligned with normalized_labels. + --------- + + idx : int + The index (aligned with ``normalized_labels``) of + the current observation being resolved. + normalized_labels : numpy.ndarray + A :math:`(n,)` normalized array of labels for each observation. + tally : numpy.ndarray + The current tally of :math:`(p,)` neighbors' labels around ``idx`` to resolve. + neighbors : dict of (neighbor_name : weight) + The elements of the weights object (identical to ``w[idx]``) + in the form ``{neighbor_name : weight}``. + method : str + The configuration option to use a specific tiebreaking method. + See ``lag_categorical()`` for all supported options. + w : libpysal.weights.weights.W + A PySAL weights object aligned with ``normalized_labels``. Returns ------- - integer denoting which label to use to label the observation. + + label : int + An integer denoting which label to use to label the observation. + + Raises + ------ + + KeyError + The tie-breaking method for categorical lag is not recognized. + """ - (ties,) = np.where(tally == tally.max()) # returns a tuple for flat arrays - if len(tally[tally == tally.max()]) <= 1: # no tie, pick the highest - return np.argmax(tally).astype(int) - elif method.lower() == "random": # choose randomly from tally - return np.random.choice(np.squeeze(ties)).astype(int) - elif method.lower() == "lowest": # pick lowest tied value - return ties[0].astype(int) - elif method.lower() == "highest": # pick highest tied value - return ties[-1].astype(int) - elif ( - method.lower() == "tryself" - ): # add self-label as observation, try again, random if fail + + m = method.lower() + + # returns a tuple for flat arrays + (ties,) = np.where(tally == tally.max()) + + # no tie, pick the highest + if len(tally[tally == tally.max()]) <= 1: + label = np.argmax(tally).astype(int) + # choose randomly from tally + elif m == "random": + label = np.random.choice(np.squeeze(ties)).astype(int) + # pick lowest tied value + elif m == "lowest": + label = ties[0].astype(int) + # pick highest tied value + elif m == "highest": + label = ties[-1].astype(int) + # add self-label as observation, try again, random if fail + elif m == "tryself": mean_neighbor_value = np.mean(list(neighbors.values())) tally[normalized_labels[idx]] += mean_neighbor_value - return _resolve_ties(idx, normalized_labels, tally, neighbors, "random", w) + label = _resolve_ties(idx, normalized_labels, tally, neighbors, "random", w) else: - raise KeyError("Tie-breaking method for categorical lag not recognized") + msg = "Tie-breaking method for categorical lag not recognized: %s" % m + raise KeyError(msg) + + return label diff --git a/libpysal/weights/spintW.py b/libpysal/weights/spintW.py index 30445bcfc..ddb76f25d 100644 --- a/libpysal/weights/spintW.py +++ b/libpysal/weights/spintW.py @@ -1,7 +1,6 @@ """ Spatial weights for spatial interaction including contiguity OD weights (ODW), network based weights (netW), and distance-decay based vector weights (vecW). - """ __author__ = "Taylor Oshan " @@ -13,27 +12,43 @@ def ODW(Wo, Wd, transform="r", silence_warnings=True): - """ - Constructs an o*d by o*d origin-destination style spatial weight for o*d - flows using standard spatial weights on o origins and d destinations. Input - spatial weights must be binary or able to be sutiably transformed to binary. + """Construct an :math:`(o \cdot d)\\times(o \cdot d)` + origin-destination style spatial weight for :math:`o \cdot d` + flows using standard spatial weights on :math:`o` origins + and :math:`d` destinations. Input spatial weights must be + binary or able to be sutiably transformed to binary. Parameters ---------- - Wo : W object for origin locations - o x o spatial weight object amongst o origins - - Wd : W object for destination locations - d x d spatial weight object amongst d destinations - transform : Transformation for standardization of final OD spatial weight; default - is 'r' for row standardized + Wo : libpysal.weights.W + A `W` object for origin locations as a :math:`o \cdot o` + spatial weight object amongst :math:`o` origins. + Wd : libpysal.weights.W + A `W` object for destination locations as a :math:`d \cdot d` + spatial weight object amongst :math:`d` destinations + transform : str + A transformation for standardization of final the + `OD` spatial weights. Default is ``'r'`` for row standardized. + silence_warnings : bool + By default (``True``) libpysal will silence a warning if the dataset contains any + disconnected observations or islands. To print this warning set to ``False``. Returns ------- - W : spatial contiguity W object for assocations between flows - o*d x o*d spatial weight object amongst o*d flows between o - origins and d destinations + + Ww : libpysal.weights.WSP + A sparse spatial contiguity `W` object for assocations between flows + between :math:`o` origins and :math:`d` destinations, + :math:`(o \cdot d)\\times(o \cdot d)`. + + Raises + ------ + + AttributeError + The ``Wo`` argument is not binary. + AttributeError + The ``Wd`` argument is not binary. Examples -------- @@ -51,6 +66,7 @@ def ODW(Wo, Wd, transform="r", silence_warnings=True): 0. , 0. , 0. , 0. , 0. ]) """ + if Wo.transform != "b": try: Wo.tranform = "b" @@ -59,6 +75,7 @@ def ODW(Wo, Wd, transform="r", silence_warnings=True): "Wo is not binary and cannot be transformed to " "binary. Wo must be binary or suitably transformed to binary." ) + if Wd.transform != "b": try: Wd.tranform = "b" @@ -67,6 +84,7 @@ def ODW(Wo, Wd, transform="r", silence_warnings=True): "Wd is not binary and cannot be transformed to " "binary. Wd must be binary or suitably transformed to binary." ) + Wo = Wo.sparse Wo.eliminate_zeros() Wd = Wd.sparse @@ -75,37 +93,51 @@ def ODW(Wo, Wd, transform="r", silence_warnings=True): Ww.eliminate_zeros() Ww = WSP(Ww).to_W(silence_warnings=silence_warnings) Ww.transform = transform + return Ww def netW(link_list, share="A", transform="r", **kwargs): - """ - Create a network-contiguity based weight object based on different nodal - relationships encoded in a network. + """Create a network-contiguity based weights object based + on different nodal relationships encoded in a network. Parameters ---------- - link_list : list - of tuples where each tuple is of the form (o,d) where o is an - origin id and d is a destination id - - share : string - denoting how to define the nodal relationship used to determine neighboring edges; defualt is 'A' for any shared nodes between two network edges; options include: O a shared origin node; D a shared destination node; OD; a shared origin or a shared destination node; C a shared node that is the destination of the first edge and the origin of the second edge - i.e., a directed chain is formed moving from edge one to edge two. - - transform : Transformation for standardization of final OD spatial weight; default - is 'r' for row standardized - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + link_list : list + Collection of tuples where each ``tuple`` is of the form :math:`(o,d)` + where :math:`o` is an origin id and :math:`d` is a destination id. + share : str + This denotes how to define the nodal relationship used to determine + neighboring edges. The default is ``'A'``, for any shared nodes between + two network edges; options include: ``'O'`` a shared origin node; ``'D'`` + a shared destination node; ``'OD'``; a shared origin or a shared + destination node; ``'C'`` a shared node that is the destination of + the first edge and the origin of the second edge - i.e., a directed + chain is formed moving from edge one to edge two. + transform : str + A transformation for standardization of final the + `OD` spatial weights. Default is ``'r'`` for row standardized. + **kwargs : dict + Optional keyword arguments arguments for ``libpysal.weights.W`` Returns ------- - W : nodal contiguity W object for networkd edges or flows - W Object representing the binary adjacency of the network edges - given a definition of nodal relationshilibpysal.weights.spintW. + + netW : libpysal.weights.W + A nodal contiguity `W` object for network edges or + flows representing the binary adjacency of the network + edges given a definition of nodal relationships. + + Raises + ------ + + AttributeError + The ``share`` parameter must be ``'O'``, ``'D'``, ``'OD'``, or ``'C'``. Examples -------- + >>> import libpysal >>> links = [('a','b'), ('a','c'), ('a','d'), ('c','d'), ('c', 'b'), ('c','a')] >>> O = libpysal.weights.netW(links, share='O') @@ -119,11 +151,14 @@ def netW(link_list, share="A", transform="r", **kwargs): [('a', 'c'), ('a', 'd'), ('c', 'b'), ('c', 'a')] """ + neighbors = {} neighbors = OrderedDict() edges = link_list + for key in edges: neighbors[key] = [] + for neigh in edges: if key == neigh: continue @@ -149,10 +184,12 @@ def netW(link_list, share="A", transform="r", **kwargs): neighbors[key].append(neigh) else: raise AttributeError( - "Parameter 'share' must be 'O', 'D'," " 'OD', or 'C'" + "Parameter 'share' must be 'O', 'D', 'OD', or 'C'." ) + netW = W(neighbors, **kwargs) netW.tranform = transform + return netW @@ -169,54 +206,57 @@ def vecW( build_sp=False, **kwargs ): - """ - Distance-based spatial weight for vectors that is computed using a + """Distance-based spatial weight for vectors that is computed using a 4-dimensional distance between the origin x,y-coordinates and the - destination x,y-coordinates + destination x,y-coordinates. Parameters ---------- - origin_x : list or array - of vector origin x-coordinates - origin_y : list or array - of vector origin y-coordinates - dest_x : list or array - of vector destination x-coordinates - dest_y : list or array - of vector destination y-coordinates - threshold : float - distance band - p : float - Minkowski p-norm distance metric parameter: - 1<=p<=infinity - 2: Euclidean distance - 1: Manhattan distance - binary : boolean - If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0 - If false wij=dij^{alpha} - alpha : float - distance decay parameter for weight (default -1.0) - if alpha is positive the weights will not decline with - distance. If binary is True, alpha is ignored - - ids : list - values to use for keys of the neighbors and weights dicts - build_sp : boolean - True to build sparse distance matrix and false to build dense - distance matrix; significant speed gains may be obtained - dending on the sparsity of the of distance_matrix and - threshold that is applied - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + origin_x : {list, numpy.ndarray} + A vector of origin x-coordinates. + origin_y : {list, numpy.ndarray} + A vector of origin y-coordinates. + dest_x : {list, numpy.ndarray} + A vector of destination x-coordinates. + dest_y : {list, numpy.ndarray} + A vector of destination y-coordinates. + threshold : float + The distance band. + p : {int, float} + Minkowski `p`-norm distance metric parameter where :math:`1<=\mathtt{p}<=\infty`. + ``2`` is Euclidean distance and ``1`` is Manhattan distance. + This parameter is ignored if the ``KDTree`` is an ``ArcKDTree``. + Default is ``2``. + alpha : float + The distance decay parameter for weights. Default is ``-1.0``. + If ``alpha`` is positive the weights will not decline with distance. + If ``binary`` is set to ``True``, ``alpha`` is ignored. + binary : bool + If set to ``True``, :math:`w_{ij}=1` if :math:`d_{i,j}<=\mathtt{threshold}`, + otherwise :math:`w_{i,j}=0`. If set to ``False``, + :math:`w_{ij}=d_{ij}^{\mathtt{alpha}}`. Default is ``True``. + ids : list + Identifiers to attach to each observation in ``neighbors`` + and ``weights``. Default is ``None``. + build_sp : boolean + Set to ``True`` to build a sparse distance matrix and ``False`` to build dense + distance matrix. Significant speed gains may be obtained depending on the + sparsity of the of distance matrix and the ``threshold`` that is applied. + Default is ``True``. + **kwargs : dict + Optional keyword arguments arguments for ``libpysal.weights.W``. Returns ------- - W : DistanceBand W object that uses 4-dimenional distances between - vectors origin and destination coordinates. + + w : libpysal.weights.DistanceBand + A ``libpysal.weights.DistanceBand`` `W` object that uses 4-dimenional + distances between vectors of origin and destination coordinates. Examples -------- + >>> import libpysal >>> x1 = [5,6,3] >>> y1 = [1,8,5] @@ -230,8 +270,10 @@ def vecW( [1, 2] """ + data = list(zip(origin_x, origin_y, dest_x, dest_y)) - W = DistanceBand( + + w = DistanceBand( data, threshold=threshold, p=p, @@ -241,39 +283,50 @@ def vecW( build_sp=False, **kwargs ) - return W + + return w def mat2L(edge_matrix): - """ - Convert a matrix denoting network connectivity (edges or flows) to a list - denoting edges + """Convert a matrix denoting network connectivity + (edges or flows) to a list denoting edges. Parameters ---------- - edge_matrix : array - where rows denote network edge origins, columns denote - network edge destinations, and non-zero entries denote the - existence of an edge between a given origin and destination + + edge_matrix : numpy.ndarray + A matrix where rows denote network edge origins, columns denote + network edge destinations, and non-zero entries denote the + existence of an edge between a given origin and destination. + + Raises + ------ + + AttributeError + The input matrix is not two dimensional. Returns ------- - edge_list : list - of tuples where each tuple is of the form (o,d) where o is an - origin id and d is a destination id + + edge_list : list + Collection of tuples where each ``tuple`` is of the form :math:`(o,d)` + where :math:`o` is an origin id and :math:`d` is a destination id. """ + if len(edge_matrix.shape) != 2: raise AttributeError( "Matrix of network edges should be two dimensions" "with edge origins on one axis and edge destinations on the" "second axis with non-zero matrix entires denoting an edge" - "between and origin and destination" + "between and origin and destination." ) edge_list = [] rows, cols = edge_matrix.shape + for row in range(rows): for col in range(cols): if edge_matrix[row, col] != 0: edge_list.append((row, col)) + return edge_list diff --git a/libpysal/weights/test.py b/libpysal/weights/test.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/libpysal/weights/tests/test_Wsets.py b/libpysal/weights/tests/test_Wsets.py index c4e73141f..85f8d005b 100644 --- a/libpysal/weights/tests/test_Wsets.py +++ b/libpysal/weights/tests/test_Wsets.py @@ -1,4 +1,5 @@ -"""Unit test for set_operations module.""" +"""Unit tests for the set_operations module.""" + import unittest from ..util import lat2W, block_weights from .. import set_operations @@ -9,7 +10,6 @@ class Testset_operations(unittest.TestCase): """Unit test for set_operations module.""" def test_w_union(self): - """Unit test""" w1 = lat2W(4, 4) w2 = lat2W(6, 4) w3 = set_operations.w_union(w1, w2) @@ -19,7 +19,6 @@ def test_w_union(self): self.assertEqual(set(w3.neighbors[15]), set([19, 11, 14])) def test_w_intersection(self): - """Unit test""" w1 = lat2W(4, 4) w2 = lat2W(6, 4) w3 = set_operations.w_union(w1, w2) @@ -29,7 +28,6 @@ def test_w_intersection(self): self.assertEqual(set(w3.neighbors[15]), set([19, 11, 14])) def test_w_difference(self): - """Unit test""" w1 = lat2W(4, 4, rook=False) w2 = lat2W(4, 4, rook=True) w3 = set_operations.w_difference(w1, w2, constrained=False) @@ -39,7 +37,6 @@ def test_w_difference(self): self.assertEqual(set(w3.neighbors[15]), set([10])) def test_w_symmetric_difference(self): - """Unit test""" w1 = lat2W(4, 4, rook=False) w2 = lat2W(6, 4, rook=True) w3 = set_operations.w_symmetric_difference(w1, w2, constrained=False) @@ -49,7 +46,6 @@ def test_w_symmetric_difference(self): self.assertEqual(set(w3.neighbors[15]), set([10, 19])) def test_w_subset(self): - """Unit test""" w1 = lat2W(6, 4) ids = list(range(16)) w2 = set_operations.w_subset(w1, ids) @@ -58,7 +54,6 @@ def test_w_subset(self): self.assertEqual(set(w2.neighbors[15]), set([11, 14])) def test_w_clip(self): - """Unit test for w_clip""" w1 = lat2W(3, 2, rook=False) w1.transform = "R" w2 = block_weights(["r1", "r2", "r1", "r1", "r1", "r2"]) @@ -84,6 +79,7 @@ def test_w_clip(self): suite = unittest.TestLoader().loadTestsFromTestCase(Testset_operations) + if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/libpysal/weights/tests/test__contW_lists.py b/libpysal/weights/tests/test__contW_lists.py index 4ac9f1727..a95658608 100644 --- a/libpysal/weights/tests/test__contW_lists.py +++ b/libpysal/weights/tests/test__contW_lists.py @@ -16,7 +16,7 @@ class TestContiguityWeights(unittest.TestCase): def setUp(self): - """ Setup the binning contiguity weights""" + """Setup the binning contiguity weights.""" shpObj = ps_open(pysal_examples.get_path("virginia.shp"), "r") self.binningW = ContiguityWeightsLists(shpObj, QUEEN) shpObj.close() @@ -36,7 +36,7 @@ def test_ContiguityWeightsLists(self): self.assertEqual(len(self.binningW.w), 136) def test_nested_polygons(self): - # load queen gal file created using Open Geoda. + # load queen gal file created using Open Geoda geodaW = ps_open(pysal_examples.get_path("virginia.gal"), "r").read() # build matching W with pysal pysalWb = self.build_W( @@ -51,12 +51,11 @@ def test_nested_polygons(self): self.assertEqual(geoda_neighbors, pysalb_neighbors) def test_true_rook(self): - # load queen gal file created using Open Geoda. + # load queen gal file created using Open Geoda geodaW = ps_open(pysal_examples.get_path("rook31.gal"), "r").read() # build matching W with pysal - # pysalW = pysal.rook_from_shapefile(pysal_examples.get_path('rook31.shp'),','POLY_ID') pysalWb = self.build_W(pysal_examples.get_path("rook31.shp"), ROOK, "POLY_ID") - # compare output. + # compare output for key in geodaW.neighbors: geoda_neighbors = list(map(int, geodaW.neighbors[key])) pysalb_neighbors = pysalWb.neighbors[int(key)] @@ -65,14 +64,13 @@ def test_true_rook(self): self.assertEqual(geoda_neighbors, pysalb_neighbors) def test_true_rook2(self): - # load queen gal file created using Open Geoda. - + # load queen gal file created using Open Geoda stl = pysal_examples.load_example("stl") gal_file = test_file = stl.get_path("stl_hom_rook.gal") geodaW = ps_open(gal_file, "r").read() # build matching W with pysal pysalWb = self.build_W(stl.get_path("stl_hom.shp"), ROOK, "POLY_ID_OG") - # compare output. + # compare output for key in geodaW.neighbors: geoda_neighbors = list(map(int, geodaW.neighbors[key])) pysalb_neighbors = pysalWb.neighbors[int(key)] @@ -81,11 +79,11 @@ def test_true_rook2(self): self.assertEqual(geoda_neighbors, pysalb_neighbors) def test_true_rook3(self): - # load queen gal file created using Open Geoda. + # load queen gal file created using Open Geoda geodaW = ps_open(pysal_examples.get_path("virginia_rook.gal"), "r").read() # build matching W with pysal pysalWb = self.build_W(pysal_examples.get_path("virginia.shp"), ROOK, "POLY_ID") - # compare output. + # compare output for key in geodaW.neighbors: geoda_neighbors = list(map(int, geodaW.neighbors[key])) pysalb_neighbors = pysalWb.neighbors[int(key)] @@ -108,7 +106,11 @@ def test_shapely(self): self.assertEqual(pysalneighbs.w, shplyneighbs.w) def build_W(self, shapefile, type, idVariable=None): - """ Building 2 W's the hard way. We need to do this so we can test both rtree and binning """ + """ + Building 2 W's the hard way. + We need to do this so we can test both rtree and binning. + """ + dbname = os.path.splitext(shapefile)[0] + ".dbf" db = ps_open(dbname) shpObj = ps_open(shapefile) @@ -132,9 +134,5 @@ def build_W(self, shapefile, type, idVariable=None): return binningW -# suite = unittest.TestLoader().loadTestsFromTestCase(_TestContiguityWeights) - if __name__ == "__main__": - # runner = unittest.TextTestRunner() - # runner.run(suite) unittest.main() diff --git a/libpysal/weights/tests/test_adjlist.py b/libpysal/weights/tests/test_adjlist.py index f1cc08c86..0046e5673 100644 --- a/libpysal/weights/tests/test_adjlist.py +++ b/libpysal/weights/tests/test_adjlist.py @@ -17,7 +17,7 @@ PANDAS_MISSING = True -@ut.skipIf(PANDAS_MISSING, "Pandas is gone") +@ut.skipIf(PANDAS_MISSING, "Pandas is missing") class Test_Adjlist(ut.TestCase): def setUp(self): self.knownW = io.open(examples.get_path("columbus.gal")).read() @@ -73,9 +73,9 @@ def test_filter(self): tuples = set([tuple(t) for t in alist[["focal", "neighbor"]].values]) full_alist = grid.to_adjlist(drop_islands=True) all_possible = set([tuple(t) for t in full_alist[["focal", "neighbor"]].values]) - assert tuples.issubset(all_possible), ( - "the de-duped adjlist has links " "not in the duplicated adjlist." - ) + assert tuples.issubset( + all_possible + ), "the de-duplicated adjlist has links not in the duplicated adjlist." complements = all_possible.difference(tuples) reversed_complements = set([t[::-1] for t in complements]) assert reversed_complements == tuples, ( @@ -160,7 +160,7 @@ def test_sort(self): usv = np.array(["53", "53", "30", "30", "30"]) np.testing.assert_array_equal(unsorted_al.focal.values[:5], usv) np.testing.assert_array_equal(sorted_al.focal.values[:5], sv) - + def test_ids(self): df = geopandas.read_file(examples.get_path("columbus.dbf")).head() df["my_id"] = range(3, len(df) + 3) diff --git a/libpysal/weights/tests/test_contiguity.py b/libpysal/weights/tests/test_contiguity.py index ea6b3877f..f76c6c175 100644 --- a/libpysal/weights/tests/test_contiguity.py +++ b/libpysal/weights/tests/test_contiguity.py @@ -28,6 +28,7 @@ class Contiguity_Mixin(object): + polygon_path = pysal_examples.get_path("columbus.shp") point_path = pysal_examples.get_path("baltim.shp") da = raster.testDataArray((1, 4, 4), missing_vals=False) @@ -98,7 +99,7 @@ def test_from_array(self): # test named, sparse from point array pass - @ut.skipIf(PANDAS_EXTINCT, "Missing pandas") + @ut.skipIf(PANDAS_EXTINCT, "Missing pandas.") def test_from_dataframe(self): # basic df = pdio.read_files(self.polygon_path) @@ -136,6 +137,7 @@ def test_from_geodataframe_order(self): def test_from_xarray(self): w = self.cls.from_xarray(self.da, sparse=False, n_jobs=-1) self.assertEqual(w[self.known_wi_da], self.known_w_da) + ws = self.cls.from_xarray(self.da) srowvec = ws.sparse[self.known_wspi_da].todense().tolist()[0] this_w = {i: k for i, k in enumerate(srowvec) if k > 0} @@ -175,7 +177,7 @@ def setUp(self): (1, 30.0, 60.0): 1, } - @ut.skipIf(GEOPANDAS_EXTINCT, "Missing Geopandas") + @ut.skipIf(GEOPANDAS_EXTINCT, "Missing geopandas.") def test_linestrings(self): import geopandas diff --git a/libpysal/weights/tests/test_distance.py b/libpysal/weights/tests/test_distance.py index 5f2043baa..586754723 100644 --- a/libpysal/weights/tests/test_distance.py +++ b/libpysal/weights/tests/test_distance.py @@ -52,30 +52,23 @@ def setUp(self): if not k.startswith("_") } ) + self.test_msg = "You need to implement this test before this module will pass." def test_init(self): # test vanilla, named - raise NotImplementedError( - "You need to implement this test " "before this module will pass" - ) + raise NotImplementedError(self.test_msg) def test_from_shapefile(self): # test vanilla, named, sparse - raise NotImplementedError( - "You need to implement this test " "before this module will pass" - ) + raise NotImplementedError(self.test_msg) def test_from_array(self): # test named, sparse - raise NotImplementedError( - "You need to implement this test " "before this module will pass" - ) + raise NotImplementedError(self.test_msg) def test_from_dataframe(self): - # test named, columnar, defau - raise NotImplementedError( - "You need to implement this test " "before this module will pass" - ) + # test named, columnar, default + raise NotImplementedError(self.test_msg) class Test_KNN(ut.TestCase, Distance_Mixin): @@ -222,9 +215,8 @@ def test_from_geodataframe(self): # Function/User tests # ########################## def test_integers(self): - """ - see issue #126 - """ + """See issue #126.""" + grid_integers = [tuple(map(int, poly.vertices[0])) for poly in self.grid_f] self.grid_f.seek(0) grid_dbw = d.DistanceBand(grid_integers, 1) @@ -408,6 +400,8 @@ def test_arcdistance(self): kern = ut.TestLoader().loadTestsFromTestCase(Test_Kernel) db = ut.TestLoader().loadTestsFromTestCase(Test_DistanceBand) suite = ut.TestSuite([knn, kern, db]) + + if __name__ == "__main__": runner = ut.TextTestRunner() runner.run(suite) diff --git a/libpysal/weights/tests/test_raster.py b/libpysal/weights/tests/test_raster.py index f694b1deb..53c35f491 100644 --- a/libpysal/weights/tests/test_raster.py +++ b/libpysal/weights/tests/test_raster.py @@ -10,18 +10,20 @@ class Testraster(unittest.TestCase): def setUp(self): self.da1 = raster.testDataArray() self.da2 = raster.testDataArray((1, 4, 4), missing_vals=False) - self.da3 = self.da2.rename( - {"band": "layer", "x": "longitude", "y": "latitude"}) + self.da3 = self.da2.rename({"band": "layer", "x": "longitude", "y": "latitude"}) + self.da3 = self.da2.rename({"band": "layer", "x": "longitude", "y": "latitude"}) self.data1 = pd.Series(np.ones(5)) self.da4 = raster.testDataArray((1, 1), missing_vals=False) self.da4.data = np.array([["test"]]) def test_da2W(self): w1 = raster.da2W(self.da1, "queen", k=2, n_jobs=-1) - self.assertEqual(w1[(1, -30.0, -180.0)], - {(1, -90.0, 60.0): 1, (1, -90.0, -60.0): 1}) - self.assertEqual(w1[(1, -30.0, 180.0)], - {(1, -90.0, -60.0): 1, (1, -90.0, 60.0): 1}) + self.assertEqual( + w1[(1, -30.0, -180.0)], {(1, -90.0, 60.0): 1, (1, -90.0, -60.0): 1} + ) + self.assertEqual( + w1[(1, -30.0, 180.0)], {(1, -90.0, -60.0): 1, (1, -90.0, 60.0): 1} + ) self.assertEqual(w1.n, 5) self.assertEqual(w1.index.names, self.da1.to_series().index.names) self.assertEqual(w1.index.tolist()[0], (1, 90.0, 180.0)) @@ -30,25 +32,30 @@ def test_da2W(self): self.assertEqual(w1.index.tolist()[3], (1, -90.0, -60.0)) w2 = raster.da2W(self.da2, "rook") self.assertEqual( - sorted(w2.neighbors[(1, -90.0, 180.0)]), [(1, -90.0, 60.0), (1, -30.0, 180.0)]) - self.assertEqual(sorted(w2.neighbors[( - 1, -90.0, 60.0)]), [(1, -90.0, -60.0), (1, -90.0, 180.0), (1, -30.0, 60.0)]) + sorted(w2.neighbors[(1, -90.0, 180.0)]), + [(1, -90.0, 60.0), (1, -30.0, 180.0)], + ) + self.assertEqual( + sorted(w2.neighbors[(1, -90.0, 60.0)]), + [(1, -90.0, -60.0), (1, -90.0, 180.0), (1, -30.0, 60.0)], + ) self.assertEqual(w2.n, 16) self.assertEqual(w2.index.names, self.da2.to_series().index.names) - self.assertEqual(w2.index.tolist(), - self.da2.to_series().index.tolist()) + self.assertEqual(w2.index.tolist(), self.da2.to_series().index.tolist()) coords_labels = { "z_label": "layer", "y_label": "latitude", "x_label": "longitude", } w3 = raster.da2W(self.da3, z_value=1, coords_labels=coords_labels) - self.assertEqual(sorted(w3.neighbors[( - 1, -90.0, 180.0)]), [(1, -90.0, 60.0), (1, -30.0, 60.0), (1, -30.0, 180.0)]) + self.assertEqual( + sorted(w3.neighbors[(1, -90.0, 180.0)]), + [(1, -90.0, 60.0), (1, -30.0, 60.0), (1, -30.0, 180.0)], + ) self.assertEqual(w3.n, 16) self.assertEqual(w3.index.names, self.da3.to_series().index.names) - self.assertEqual(w3.index.tolist(), - self.da3.to_series().index.tolist()) + self.assertEqual(w3.index.tolist(), self.da3.to_series().index.tolist()) + self.assertEqual(w3.index.tolist(), self.da3.to_series().index.tolist()) def test_da2WSP(self): w1 = raster.da2WSP(self.da1, "rook", n_jobs=-1) @@ -67,27 +74,22 @@ def test_da2WSP(self): w2 = raster.da2WSP(self.da2, "queen", k=2, include_nodata=True) w3 = raster.da2WSP(self.da2, "queen", k=2, n_jobs=-1) self.assertEqual(w2.sparse.nnz, w3.sparse.nnz) - self.assertEqual(w2.sparse.todense().tolist(), - w3.sparse.todense().tolist()) + self.assertEqual(w2.sparse.todense().tolist(), w3.sparse.todense().tolist()) self.assertEqual(w2.n, 16) self.assertEqual(w2.index.names, self.da2.to_series().index.names) - self.assertEqual(w2.index.tolist(), - self.da2.to_series().index.tolist()) + self.assertEqual(w2.index.tolist(), self.da2.to_series().index.tolist()) def test_w2da(self): w2 = raster.da2W(self.da2, "rook", n_jobs=-1) - da2 = raster.w2da(self.da2.data.flatten(), w2, - self.da2.attrs, self.da2.coords) + da2 = raster.w2da(self.da2.data.flatten(), w2, self.da2.attrs, self.da2.coords) da_compare = DataArray.equals(da2, self.da2) self.assertEqual(da_compare, True) def test_wsp2da(self): wsp1 = raster.da2WSP(self.da1, "queen") da1 = raster.wsp2da(self.data1, wsp1) - self.assertEqual(da1["y"].values.tolist(), - self.da1["y"].values.tolist()) - self.assertEqual(da1["x"].values.tolist(), - self.da1["x"].values.tolist()) + self.assertEqual(da1["y"].values.tolist(), self.da1["y"].values.tolist()) + self.assertEqual(da1["x"].values.tolist(), self.da1["x"].values.tolist()) self.assertEqual(da1.shape, (1, 4, 4)) def test_da_checker(self): diff --git a/libpysal/weights/tests/test_spatial_lag.py b/libpysal/weights/tests/test_spatial_lag.py index 39e9a3342..e437c1e92 100644 --- a/libpysal/weights/tests/test_spatial_lag.py +++ b/libpysal/weights/tests/test_spatial_lag.py @@ -61,6 +61,7 @@ def test_lag_categorical(self): suite = unittest.TestLoader().loadTestsFromTestCase(Test_spatial_lag) + if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/libpysal/weights/tests/test_user.py b/libpysal/weights/tests/test_user.py index 7d6ed1aaa..58f25fa15 100644 --- a/libpysal/weights/tests/test_user.py +++ b/libpysal/weights/tests/test_user.py @@ -23,6 +23,7 @@ def test_build_lattice_shapefile(self): suite = unittest.TestLoader().loadTestsFromTestCase(Testuser) + if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/libpysal/weights/tests/test_util.py b/libpysal/weights/tests/test_util.py index 8ad631128..2f0c16a5e 100644 --- a/libpysal/weights/tests/test_util.py +++ b/libpysal/weights/tests/test_util.py @@ -255,7 +255,7 @@ def test_get_ids_shp(self): self.assertEqual(polyids5, polyids[:5]) @unittest.skipIf( - not HAS_GEOPANDAS, "Missing geopandas, cannot test get_ids with gdf" + not HAS_GEOPANDAS, "Missing geopandas; cannot test get_ids() with gdf." ) def test_get_ids_gdf(self): gdf = gpd.read_file(examples.get_path("columbus.shp")) @@ -293,7 +293,7 @@ def test_attach_islands(self): self.assertEqual(w_attach[w.islands[0]], {166: 1.0}) @unittest.skipIf( - not HAS_GEOPANDAS, "Missing geopandas, cannot test nonplanar neighbors" + not HAS_GEOPANDAS, "Missing geopandas; cannot test nonplanar neighbors." ) def test_nonplanar_neighbors(self): df = gpd.read_file(examples.get_path("map_RS_BR.shp")) @@ -339,7 +339,7 @@ def test_nonplanar_neighbors(self): self.assertEqual(wnp.neighbors[23], [0, 45, 59, 107, 152, 185, 246]) @unittest.skipIf( - not HAS_GEOPANDAS, "Missing geopandas, cannot test fuzzy_contiguity" + not HAS_GEOPANDAS, "Missing geopandas; cannot test fuzzy contiguity." ) def test_fuzzy_contiguity(self): rs = examples.get_path("map_RS_BR.shp") @@ -363,6 +363,7 @@ def test_fuzzy_contiguity(self): suite = unittest.TestLoader().loadTestsFromTestCase(Testutil) + if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/libpysal/weights/user.py b/libpysal/weights/user.py index fe77a5a20..4891868d7 100644 --- a/libpysal/weights/user.py +++ b/libpysal/weights/user.py @@ -3,7 +3,7 @@ contiguity and distance criteria. """ -__author__ = "Sergio J. Rey " +__author__ = "Sergio J. Rey " from .util import get_points_array_from_shapefile, min_threshold_distance from ..io.fileio import FileIO as ps_open @@ -18,62 +18,60 @@ def spw_from_gal(galfile): - """ - Sparse scipy matrix for w from a gal file. + """Sparse ``scipy`` matrix for a `W` from a ``.gal`` file. Parameters ---------- - - galfile : string - name of gal file including suffix + galfile : str + The name of a ``.gal`` file including the file extension. Returns ------- - - spw : sparse_matrix - scipy sparse matrix in CSR format - - ids : array - identifiers for rows/cols of spw + spw : libpysal.weights.WSP + The sparse matrix in CSR format (``scipy.sparse.csr_matrix``) can + be accessed through ``spw.sparse``. Examples -------- + >>> import libpysal >>> spw = libpysal.weights.spw_from_gal(libpysal.examples.get_path("sids2.gal")) + + The number of all stored values in ``spw``: + >>> spw.sparse.nnz 462 """ - return ps_open(galfile, "r").read(sparse=True) + spw = ps_open(galfile, "r").read(sparse=True) + + return spw def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2): - """ - Get the maximum nearest neighbor distance between observations in the - shapefile. + """Get the maximum nearest neighbor distance + between observations in the shapefile. Parameters ---------- - shapefile : string - shapefile name with shp suffix. - radius : float - If supplied arc_distances will be calculated - based on the given radius. p will be ignored. - p : float - Minkowski p-norm distance metric parameter: - 1<=p<=infinity - 2: Euclidean distance - 1: Manhattan distance + shapefile : str + The shapefile name including the ``.shp`` file extension. + radius : float + If supplied ``arc_distances`` will be calculated based on the given + radius and ``p`` will be ignored. Default is ``None``. + p : {int, float} + Minkowski `p`-norm distance metric parameter where :math:`1<=\mathtt{p}<=\infty`. + ``2`` is Euclidean distance and ``1`` is Manhattan distance. Default is ``2``. Returns ------- - d : float - Maximum nearest neighbor distance between the n - observations. + nnd : float + The maximum nearest neighbor distance between the ``n`` observations. Examples -------- + >>> import libpysal >>> md = libpysal.weights.min_threshold_dist_from_shapefile(libpysal.examples.get_path("columbus.shp")) >>> md @@ -83,44 +81,49 @@ def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2): Notes ----- - Supports polygon or point shapefiles. For polygon shapefiles, distance is - based on polygon centroids. Distances are defined using coordinates in - shapefile which are assumed to be projected and not geographical - coordinates. + + This function supports polygon or point shapefiles. For polygon + shapefiles, distance is based on polygon centroids. Distances are + defined using coordinates from the shapefile which are assumed to + be projected and not geographical coordinates. """ + points = get_points_array_from_shapefile(shapefile) + if radius is not None: kdt = cg.kdtree.Arc_KDTree(points, radius=radius) nn = kdt.query(kdt.data, k=2) nnd = nn[0].max(axis=0)[1] + return nnd + return min_threshold_distance(points, p) -def build_lattice_shapefile(nrows, ncols, outFileName): - """ - Build a lattice shapefile with nrows rows and ncols cols. +def build_lattice_shapefile(nrows, ncols, out_file_name): + """Build a lattice shapefile with ``nrows`` rows and ``ncols`` columns. Parameters ---------- - - nrows : int - Number of rows - ncols : int - Number of cols - outFileName : str - shapefile name with shp suffix - - Returns - ------- - None + nrows : int + The number of rows. + ncols : int + The number of columns. + out_file_name : str + The shapefile name including the ``.shp`` file extension. + + Raises + ------ + ValueError + An unrecognized file extension was given. """ - if not outFileName.endswith(".shp"): - raise ValueError("outFileName must end with .shp") - o = ps_open(outFileName, "w") - dbf_name = outFileName.split(".")[0] + ".dbf" + + if not out_file_name.endswith(".shp"): + raise ValueError("'out_file_name' must end with '.shp'.") + o = ps_open(out_file_name, "w") + dbf_name = out_file_name.split(".")[0] + ".dbf" d = ps_open(dbf_name, "w") d.header = ["ID"] d.field_spec = [("N", 8, 0)] @@ -143,6 +146,7 @@ def _test(): # the following line could be used to define an alternative to the '' flag # doctest.BLANKLINE_MARKER = 'something better than ' + start_suppress = np.get_printoptions()["suppress"] np.set_printoptions(suppress=True) doctest.testmod() diff --git a/libpysal/weights/util.py b/libpysal/weights/util.py index dd8393577..d841037e1 100644 --- a/libpysal/weights/util.py +++ b/libpysal/weights/util.py @@ -1,11 +1,21 @@ from ..io.fileio import FileIO as psopen from .weights import W, WSP from .set_operations import w_subset +from ..common import requires + +from collections import defaultdict +import copy +from itertools import tee +import numbers +import os +from warnings import warn + import numpy as np +import scipy from scipy import sparse from scipy.spatial import KDTree -import copy import scipy.spatial + import os import scipy from warnings import warn @@ -20,7 +30,7 @@ GPD_08 = Version(gpd.__version__) >= Version("0.8.0") except ImportError: - warn("geopandas not available. Some functionality will be disabled.") + warn("Geopandas not available. Some functionality will be disabled.") try: from shapely.geometry.base import BaseGeometry @@ -60,33 +70,34 @@ def hexLat2W(nrows=5, ncols=5, **kwargs): - """ - Create a W object for a hexagonal lattice. + """Create a `W` object for a hexagonal lattice. Parameters ---------- - nrows : int - number of rows - ncols : int - number of columns - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + nrows : int + The number of rows. + ncols : int + The number of columns. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - w : W - instance of spatial weights class W + + w : libpysal.weights.W + An instance of spatial weights, `W`. Notes ----- - Observations are row ordered: first k observations are in row 0, next k in row 1, and so on. - Construction is based on shifting every other column of a regular lattice - down 1/2 of a cell. + Observations are row ordered with the first :math:`k` observations being + in row 0, the next :math:`k` in row 1, and so on. Construction is based + on shifting every other column of a regular lattice down 1/2 of a cell. Examples -------- + >>> from libpysal.weights import lat2W, hexLat2W >>> w = lat2W() >>> w.neighbors[1] @@ -98,11 +109,12 @@ def hexLat2W(nrows=5, ncols=5, **kwargs): [0, 6, 2, 5, 7] >>> wh.neighbors[21] [16, 20, 22] + """ if nrows == 1 or ncols == 1: - print("Hexagon lattice requires at least 2 rows and columns") - print("Returning a linear contiguity structure") + print("Hexagon lattice requires at least 2 rows and columns.") + print("Returning a linear contiguity structure.") return lat2W(nrows, ncols) n = nrows * ncols @@ -112,8 +124,10 @@ def hexLat2W(nrows=5, ncols=5, **kwargs): c1 = ncols - 1 w = lat2W(nrows, ncols).neighbors + for i in range(n): odd = cid[i] % 2 + if odd: if rid[i] < r1: # odd col index above last row # new sw neighbor @@ -140,40 +154,45 @@ def hexLat2W(nrows=5, ncols=5, **kwargs): w[i] = w.get(i, []) + jne w[i] = w.get(i, []) + jnw + w = W(w, **kwargs) + return W(w, **kwargs) def lat2W(nrows=5, ncols=5, rook=True, id_type="int", **kwargs): - """ - Create a W object for a regular lattice. + """Create a `W` object for a regular lattice. Parameters ---------- - nrows : int - number of rows - ncols : int - number of columns - rook : boolean - type of contiguity. Default is rook. For queen, rook =False - id_type : string - string defining the type of IDs to use in the final W object; - options are 'int' (0, 1, 2 ...; default), 'float' (0.0, - 1.0, 2.0, ...) and 'string' ('id0', 'id1', 'id2', ...) - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + nrows : int + The number of rows. + ncols : int + The number of columns. + rook : bool + The type of contiguity. Default is `Rook`. For `Queen` set to ``False``. + id_type : str + The type of IDs to use in the final `W` object. The options are as + follows with ``'int'`` being the default. + + * ``'int'`` -- ``(0, 1, 2, ...)`` + * ``'float'`` -- ``(0.0, 1.0, 2.0, ...)`` + * ``'string'`` -- ``('id0', 'id1', 'id2', ...)`` + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - w : W - instance of spatial weights class W + w : libpysal.weights.W + An instance of spatial weights, `W`. Notes ----- - Observations are row ordered: first k observations are in row 0, next k in row 1, and so on. + Observations are row ordered with the first :math:`k` observations being + in row 0, the next :math:`k` in row 1, and so on. Examples -------- @@ -186,7 +205,9 @@ def lat2W(nrows=5, ncols=5, rook=True, id_type="int", **kwargs): True >>> w9[3] == {0: 1.0, 4: 1.0, 6: 1.0} True + """ + n = nrows * ncols r1 = nrows - 1 c1 = ncols - 1 @@ -194,7 +215,9 @@ def lat2W(nrows=5, ncols=5, rook=True, id_type="int", **kwargs): cid = [i % ncols for i in range(n)] w = {} r = below = 0 + for i in range(n - 1): + if rid[i] < r1: below = rid[i] + 1 r = below * ncols + cid[i] @@ -219,13 +242,16 @@ def lat2W(nrows=5, ncols=5, rook=True, id_type="int", **kwargs): neighbors = {} weights = {} + for key in w: weights[key] = [1.0] * len(w[key]) ids = list(range(n)) + if id_type == "string": ids = ["id" + str(i) for i in ids] elif id_type == "float": ids = [i * 1.0 for i in ids] + if id_type == "string" or id_type == "float": id_dict = dict(list(zip(list(range(n)), ids))) alt_w = {} @@ -237,38 +263,40 @@ def lat2W(nrows=5, ncols=5, rook=True, id_type="int", **kwargs): alt_weights[key] = weights[i] w = alt_w weights = alt_weights - return W(w, weights, ids=ids, id_order=ids[:], **kwargs) + w = W(w, weights, ids=ids, id_order=ids[:], **kwargs) -def block_weights(regimes, ids=None, sparse=False, **kwargs): - """ - Construct spatial weights for regime neighbors. + return w - Block contiguity structures are relevant when defining neighbor relations - based on membership in a regime. For example, all counties belonging to - the same state could be defined as neighbors, in an analysis of all - counties in the US. + +def block_weights(regimes, ids=None, sparse=False, **kwargs): + """Construct spatial weights for regime neighbors. Block contiguity structures + are relevant when defining neighbor relations based on membership in a regime. + For example, all counties belonging to the same state could be defined as + neighbors, in an analysis of all counties in the US. Parameters ---------- - regimes : list, array - ids of which regime an observation belongs to - ids : list, array - Ordered sequence of IDs for the observations - sparse : boolean - If True return WSP instance - If False return W instance - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + regimes : {list, numpy.ndarray} + The ids of the regime to which an observation belongs. + ids : {list, numpy.ndarray} + An ordered sequence of IDs for the observations. + sparse : bool + If ``True`` return `WSP` instance, otherwise return `W` instance + (``False``). Default is ``False``. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - W : spatial weights instance + w : {libpysal.weights.W, libpysal.weights.WSP} + An instance of spatial weights (`W`), or a thin version (`WSP`). Examples -------- + >>> from libpysal.weights import block_weights >>> import numpy as np >>> regimes = np.ones(25) @@ -277,57 +305,66 @@ def block_weights(regimes, ids=None, sparse=False, **kwargs): >>> regimes array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 3., 3., 3., 3.]) + >>> w = block_weights(regimes) >>> w.weights[0] [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + >>> w.neighbors[0] [1, 2, 3, 4, 5, 6, 7, 8, 9, 20] + >>> regimes = ['n','n','s','s','e','e','w','w','e'] >>> n = len(regimes) >>> w = block_weights(regimes) >>> w.neighbors == {0: [1], 1: [0], 2: [3], 3: [2], 4: [5, 8], 5: [4, 8], 6: [7], 7: [6], 8: [4, 5]} True + """ + rids = np.unique(regimes) neighbors = {} NPNZ = np.nonzero regimes = np.array(regimes) + for rid in rids: members = NPNZ(regimes == rid)[0] for member in members: neighbors[member] = members[NPNZ(members != member)[0]].tolist() + w = W(neighbors, **kwargs) + if ids is not None: w.remap_ids(ids) + if sparse: w = WSP(w.sparse, id_order=ids) + return w def comb(items, n=None): - """ - Combinations of size n taken from items + """Combinations of size :math:`n` taken from items. Parameters ---------- items : list - items to be drawn from - n : integer - size of combinations to take from items + The items to be drawn from. + n : int + The size of combinations to take from items. - Returns - ------- + Yields + ------ - implicit : generator - combinations of size n taken from items + vc : generator + The combinations of size :math:`n` taken from items. Examples -------- + >>> x = range(4) >>> for c in comb(x, 2): ... print(c) - ... [0, 1] [0, 2] [0, 3] @@ -336,52 +373,58 @@ def comb(items, n=None): [2, 3] """ + items = list(items) + if n is None: n = len(items) + for i in list(range(len(items))): v = items[i : i + 1] + if n == 1: - yield v + vc = v + yield vc else: rest = items[i + 1 :] for c in comb(rest, n - 1): - yield v + c + vc = v + c + yield vc def order(w, kmax=3): - """ - Determine the non-redundant order of contiguity up to a specific - order. + """Determine the non-redundant order of contiguity up to a specific order. Parameters ---------- - w : W - spatial weights object - - kmax : int - maximum order of contiguity + w : libpysal.weights.W + An instance of spatial weights, `W`. + kmax : int + The maximum order of contiguity. Default is ``3``. Returns ------- - info : dictionary - observation id is the key, value is a list of contiguity - orders with a negative 1 in the ith position + info : dict + The observation ID are the keys and the values are lists of contiguity + orders with a -1 in the :math:`i`-th position. Notes ----- + Implements the algorithm in :cite:`Anselin1996b`. Examples -------- + >>> from libpysal.weights import order, Rook >>> import libpysal >>> w = Rook.from_shapefile(libpysal.examples.get_path('10740.shp')) + The weights matrix is not fully connected: + There are 2 disconnected components. + There is 1 island with id: 163. - WARNING: there is one disconnected observation (no neighbors) - Island id: [163] >>> w3 = order(w, kmax = 3) >>> w3[1][0:5] [1, -1, 1, 2, 1] @@ -390,11 +433,14 @@ def order(w, kmax=3): ids = w.id_order info = {} + for id_ in ids: s = [0] * w.n s[ids.index(id_)] = -1 + for j in w.neighbors[id_]: s[ids.index(j)] = 1 + k = 1 while k < kmax: knext = k + 1 @@ -410,85 +456,105 @@ def order(w, kmax=3): s[nid] = knext k = knext info[id_] = s + return info def higher_order(w, k=2, **kwargs): - """ - Contiguity weights object of order k. + """Contiguity weights object of order :math:`k`. Parameters ---------- - w : W - spatial weights object - k : int - order of contiguity - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + w : libpysal.weights.W + An instance of spatial weights, `W`. + k : int + The order of contiguity. Default is ``2``. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - implicit : W - spatial weights object + w : libpysal.weights.W + An instance of spatial weights, `W`. Notes ----- - Proper higher order neighbors are returned such that i and j are k-order - neighbors iff the shortest path from i-j is of length k. + + Proper higher order neighbors are returned such that :math:`i` and :math:`j` + are :math:`k`-order neighbors if and only if ('iff') the shortest path from + :math:`i`-:math:`j` is of length :math:`k`. Examples -------- + >>> from libpysal.weights import lat2W, higher_order >>> w10 = lat2W(10, 10) >>> w10_2 = higher_order(w10, 2) >>> w10_2[0] == {2: 1.0, 11: 1.0, 20: 1.0} True + >>> w5 = lat2W() >>> w5[0] == {1: 1.0, 5: 1.0} True + >>> w5[1] == {0: 1.0, 2: 1.0, 6: 1.0} True + >>> w5_2 = higher_order(w5,2) >>> w5_2[0] == {10: 1.0, 2: 1.0, 6: 1.0} True + """ - return higher_order_sp(w, k, **kwargs) + + w = higher_order_sp(w, k, **kwargs) + + return w def higher_order_sp( w, k=2, shortest_path=True, diagonal=False, lower_order=False, **kwargs ): - """ - Contiguity weights for either a sparse W or W for order k. + """Contiguity weights for either a `WSP` or `W` for order :math:`k`. Parameters ---------- - w : W - sparse_matrix, spatial weights object or - scipy.sparse.csr.csr_instance - k : int - Order of contiguity - shortest_path : boolean - True: i,j and k-order neighbors if the - shortest path for i,j is k. - False: i,j are k-order neighbors if there - is a path from i,j of length k. - diagonal : boolean - True: keep k-order (i,j) joins when i==j - False: remove k-order (i,j) joins when i==j - lower_order : boolean - True: include lower order contiguities - False: return only weights of order k - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + + w : {libpysal.weights.W, libpysal.weights.WSP, scipy.sparse.csr_instance} + A sparse matrix or spatial weights object. + k : int + The order of contiguity. Default is ``2``. + shortest_path : bool + Set to ``True`` to define :math:`i,j` are :math:`k`-order neighbors if the + shortest path for :math:`i,j` is :math:`k`. Set to ``False`` to define + :math:`i,j` and :math:`k`-order neighbors if there is a path from + :math:`i,j` of length :math:`k`. Default is ``True``. + diagonal : bool + Set to ``True`` to keep :math:`k`-order :math:`(i,j)` joins when :math:`i==j`. + Set to ``False`` to remove :math:`k`-order :math:`(i,j)` joins when :math:`i==j`. + Default is ``False``. + lower_order : boolean + Include lower order contiguities (``True``) or return only + weights of order :math:`k` (``False``). Default is ``False``. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - wk : W - WSP, type matches type of w argument + wk : {libpysal.weights.W, libpysal.weights.WSP} + The `WSP` type matches the type of the ``w`` argument. + + Raises + ------ + + ValueError + The input `W` weights are not binary. + ValueError + The input ``scipy.sparse.csr_instance`` weights are not binary. + TypeError + The input weights are in the correct format and/or are not binary. Examples -------- @@ -497,32 +563,51 @@ def higher_order_sp( >>> w25 = lat2W(5,5) >>> w25.n 25 + >>> w25[0] == {1: 1.0, 5: 1.0} True + >>> w25_2 = higher_order_sp(w25, 2) >>> w25_2[0] == {10: 1.0, 2: 1.0, 6: 1.0} True + >>> w25_2 = higher_order_sp(w25, 2, diagonal=True) >>> w25_2[0] == {0: 1.0, 10: 1.0, 2: 1.0, 6: 1.0} True + >>> w25_3 = higher_order_sp(w25, 3) >>> w25_3[0] == {15: 1.0, 3: 1.0, 11: 1.0, 7: 1.0} True + >>> w25_3 = higher_order_sp(w25, 3, shortest_path=False) >>> w25_3[0] == {1: 1.0, 3: 1.0, 5: 1.0, 7: 1.0, 11: 1.0, 15: 1.0} True + >>> w25_3 = higher_order_sp(w25, 3, lower_order=True) - >>> w25_3[0] == {5: 1.0, 7: 1.0, 11: 1.0, 2: 1.0, 15: 1.0, 6: 1.0, 10: 1.0, 1: 1.0, 3: 1.0} + >>> w25_3[0] == { + ... 5: 1.0, + ... 7: 1.0, + ... 11: 1.0, + ... 2: 1.0, + ... 15: 1.0, + ... 6: 1.0, + ... 10: 1.0, + ... 1: 1.0, + ... 3: 1.0 + ... } True """ + id_order = None + if issubclass(type(w), W) or isinstance(w, W): if np.unique(np.hstack(list(w.weights.values()))) == np.array([1.0]): id_order = w.id_order w = w.sparse else: - raise ValueError("Weights are not binary (0,1)") + raise ValueError("Weights are not binary (0,1).") + elif scipy.sparse.isspmatrix_csr(w): if not np.unique(w.data) == np.array([1.0]): raise ValueError( @@ -530,8 +615,8 @@ def higher_order_sp( ) else: raise TypeError( - "Weights provided are neither a binary W object nor " - "a scipy.sparse.csr_matrix" + "Weights provided are neither a binary 'W' object nor " + "a 'scipy.sparse.csr_matrix'." ) if lower_order: @@ -560,7 +645,11 @@ def higher_order_sp( k = id_order[k] v = id_order[v] d[k].append(v) - return W(neighbors=d, **kwargs) + + wk = W(neighbors=d, **kwargs) + + return wk + else: d = {} for pair in sk: @@ -569,25 +658,27 @@ def higher_order_sp( d[k].append(v) else: d[k] = [v] - return WSP(W(neighbors=d, **kwargs).sparse) + + wk = WSP(W(neighbors=d, **kwargs).sparse) + + return wk def w_local_cluster(w): - r""" - Local clustering coefficients for each unit as a node in a graph. + r"""Local clustering coefficients for each unit as a node in a graph. Parameters ---------- - w : W - spatial weights object + w : libpysal.weights.W + An instance of spatial weights, `W`. Returns ------- - c : array - (w.n,1) - local clustering coefficients + c : numpy.ndarray + An array of local clustering coefficients with + dimensions :math:`(\mathtt{w}.n,1)`. Notes ----- @@ -599,12 +690,14 @@ def w_local_cluster(w): c_i = | \{w_{j,k}\} |/ (k_i(k_i - 1)): j,k \in N_i - where :math:`N_i` is the set of neighbors to :math:`i`, :math:`k_i = - |N_i|` and :math:`\{w_{j,k}\}` is the set of non-zero elements of the - weights between pairs in :math:`N_i` :cite:`Watts1998`. + where :math:`N_i` is the set of neighbors to :math:`i`, + :math:`k_i = |N_i|` and :math:`\{w_{j,k}\}` is the set + of non-zero elements of the weights between pairs in + :math:`N_i` :cite:`Watts1998`. Examples -------- + >>> from libpysal.weights import lat2W, w_local_cluster >>> w = lat2W(3,3, rook=False) >>> w_local_cluster(w) @@ -631,39 +724,45 @@ def w_local_cluster(w): def shimbel(w): - """ - Find the Shimbel matrix for first order contiguity matrix. + """Find the Shimbel matrix for first order contiguity matrix. Parameters ---------- - w : W - spatial weights object + + w : libpysal.weights.W + An instance of spatial weights, `W`. Returns ------- - info : list - list of lists; one list for each observation which stores - the shortest order between it and each of the the other observations. + info : list + A list of lists; one list for each observation which stores + the shortest order between it and each of the other observations. Examples -------- + >>> from libpysal.weights import lat2W, shimbel >>> w5 = lat2W() >>> w5_shimbel = shimbel(w5) >>> w5_shimbel[0][24] 8 + >>> w5_shimbel[0][0:4] [-1, 1, 2, 3] + """ info = {} ids = w.id_order + for i in ids: s = [0] * w.n s[ids.index(i)] = -1 + for j in w.neighbors[i]: s[ids.index(j)] = 1 + k = 1 flag = s.count(0) while flag: @@ -680,26 +779,29 @@ def shimbel(w): k = knext flag = s.count(0) info[i] = s + return info def full(w): - """ - Generate a full numpy array. + """Generate a full ``numpy.ndarray``. Parameters ---------- - w : W - spatial weights object + + w : libpysal.weights.W + An instance of spatial weights, `W`. Returns ------- + (fullw, keys) : tuple - first element being the full numpy array and second element - keys being the ids associated with each row in the array. + The first element is the full ``numpy.ndarray`` and the second element + is a list of keys that are the ids associated with each row in the array. Examples -------- + >>> from libpysal.weights import W, full >>> neighbors = {'first':['second'],'second':['first','third'],'third':['second']} >>> weights = {'first':[1],'second':[1,1],'third':[1]} @@ -709,33 +811,45 @@ def full(w): array([[0., 1., 0.], [1., 0., 1.], [0., 1., 0.]]) + >>> ids ['first', 'second', 'third'] + """ - return w.full() + + fullw, keys = w.full() + + return fullw, keys def full2W(m, ids=None, **kwargs): - """ - Create a PySAL W object from a full array. + """Create a PySAL `W` object from a full array. Parameters ---------- - m : array - nxn array with the full weights matrix - ids : list - User ids assumed to be aligned with m - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + m : numpy.ndarray + An :math:`n`x:math:`n` array with the full weights matrix. + ids : list + User ids assumed to be aligned with ``m``. Default is ``None``. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. + + Raises + ------ + + ValueError + The input array, ``m``, is not square. Returns ------- - w : W - PySAL weights object + + w : libpysal.weights.W + An instance of spatial weights, `W`. Examples -------- + >>> from libpysal.weights import full2W >>> import numpy as np @@ -769,42 +883,52 @@ def full2W(m, ids=None, **kwargs): [ True, True, True, True], [ True, True, True, True]]) """ + if m.shape[0] != m.shape[1]: - raise ValueError("Your array is not square") + raise ValueError("Your array is not square.") + neighbors, weights = {}, {} + for i in range(m.shape[0]): - # for i, row in enumerate(m): row = m[i] + if ids: i = ids[i] + ngh = list(row.nonzero()[0]) weights[i] = list(row[ngh]) ngh = list(ngh) + if ids: ngh = [ids[j] for j in ngh] + neighbors[i] = ngh - return W(neighbors, weights, id_order=ids, **kwargs) + w = W(neighbors, weights, id_order=ids, **kwargs) -def WSP2W(wsp, **kwargs): + return w - """ - Convert a pysal WSP object (thin weights matrix) to a pysal W object. + +def WSP2W(wsp, **kwargs): + """Convert a PySAL `WSP` object (thin weights matrix) to a PySAL `W` object. Parameters ---------- - wsp : WSP - PySAL sparse weights object - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + + wsp : libpysal.weights.WSP + An instance of sparse spatial weights, `W`. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - w : W - PySAL weights object + + w : libpysal.weights.W + An instance of spatial weights, `W`. Examples -------- + >>> from libpysal.weights import lat2W, WSP, WSP2W Build a 10x10 scipy.sparse matrix for a rectangular 2x5 region of cells @@ -814,6 +938,7 @@ def WSP2W(wsp, **kwargs): >>> wsp = WSP(sp) >>> wsp.n 10 + >>> wsp.sparse[0].todense() matrix([[0, 1, 0, 0, 0, 1, 0, 0, 0, 0]], dtype=int8) @@ -822,31 +947,37 @@ def WSP2W(wsp, **kwargs): >>> w = WSP2W(wsp) >>> w.n 10 + >>> print(w.full()[0][0]) [0. 1. 0. 0. 0. 1. 0. 0. 0. 0.] - """ + data = wsp.sparse.data indptr = wsp.sparse.indptr id_order = wsp.id_order + if id_order: # replace indices with user IDs indices = [id_order[i] for i in wsp.sparse.indices] else: id_order = list(range(wsp.n)) + neighbors, weights = {}, {} start = indptr[0] + for i in range(wsp.n): oid = id_order[i] end = indptr[i + 1] neighbors[oid] = indices[start:end] weights[oid] = data[start:end] start = end + ids = copy.copy(wsp.id_order) w = W(neighbors, weights, ids, **kwargs) w._sparse = copy.deepcopy(wsp.sparse) w._cache["sparse"] = w._sparse + return w @@ -856,32 +987,41 @@ def insert_diagonal(w, val=1.0, wsp=False): def fill_diagonal(w, val=1.0, wsp=False): - """ - Returns a new weights object with values inserted along the main diagonal. + """Returns a new weights object with values inserted along the main diagonal. Parameters ---------- - w : W - Spatial weights object - - diagonal : float, int or array - Defines the value(s) to which the weights matrix diagonal should - be set. If a constant is passed then each element along the - diagonal will get this value (default is 1.0). An array of length - w.n can be passed to set explicit values to each element along - the diagonal (assumed to be in the same order as w.id_order). - wsp : boolean - If True return a thin weights object of the type WSP, if False - return the standard W object. + w : libpysal.weights.W + An instance of spatial weights, `W`. + val : {float, int, array_like} + Defines the value(s) to which the weights matrix diagonal should + be set. If a constant is passed then each element along the + diagonal will get this value (default is ``1.0``). An array of length + ``w.n`` can be passed to set explicit values to each element along + the diagonal (assumed to be in the same order as ``w.id_order``). + wsp : bool + If ``True`` return a thin weights object of the type `WSP`, if ``False`` + return the standard `W` object. Default is ``False``. + + Raises + ------ + + ValueError + The shape of the spatial weights object and the + length of the diagonal are not equivalent. + ValueError + The input value for the diagonal is not valid. Returns ------- - w : W - Spatial weights object + + w_out : libpysal.weights.W + An instance of spatial weights, `W`. Examples -------- + >>> from libpysal.weights import lat2W >>> import numpy as np @@ -892,6 +1032,7 @@ def fill_diagonal(w, val=1.0, wsp=False): >>> w_const = insert_diagonal(w) >>> w['id0'] == {'id5': 1.0, 'id1': 1.0} True + >>> w_const['id0'] == {'id5': 1.0, 'id0': 1.0, 'id1': 1.0} True @@ -906,106 +1047,133 @@ def fill_diagonal(w, val=1.0, wsp=False): w_new = copy.deepcopy(w.sparse) w_new = w_new.tolil() + if issubclass(type(val), np.ndarray): if w.n != val.shape[0]: - raise Exception("shape of w and diagonal do not match") + raise ValueError("Shape of 'w' and diagonal do not match.") w_new.setdiag(val) elif isinstance(val, numbers.Number): w_new.setdiag([val] * w.n) else: - raise Exception("Invalid value passed to diagonal") + raise ValueError("Invalid value passed to diagonal.") + w_out = WSP(w_new, copy.copy(w.id_order)) - if wsp: - return w_out - else: - return WSP2W(w_out) + + if not wsp: + w_out = WSP2W(w_out) + + return w_out def remap_ids(w, old2new, id_order=[], **kwargs): - """ - Remaps the IDs in a spatial weights object. + """Remaps the IDs in a spatial weights object. Parameters ---------- - w : W - Spatial weights object - - old2new : dictionary - Dictionary where the keys are the IDs in w (i.e. "old IDs") and - the values are the IDs to replace them (i.e. "new IDs") + w : libpysal.weights.W + An instance of spatial weights, `W`. + old2new : dict + Dictionary where the keys are the IDs in w (i.e. "old IDs") + and the values are the IDs to replace them (i.e. "new IDs"). id_order : list - An ordered list of new IDs, which defines the order of observations when - iterating over W. If not set then the id_order in w will be - used. - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` + An ordered list of new IDs, which defines the order of + observations when iterating over `W`. If not set then the + id_order in ``w`` will be used. Default is ``[]``. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. + + Raises + ------ + TypeError + The object passed in as ``w`` is not a spatial weights object. Returns ------- - implicit : W - Spatial weights object with new IDs + w : libpysal.weights.W + An instance of spatial weights, `W`, with new IDs. Examples -------- + >>> from libpysal.weights import lat2W >>> w = lat2W(3,2) >>> w.id_order [0, 1, 2, 3, 4, 5] + >>> w.neighbors[0] [2, 1] + >>> old_to_new = {0:'a', 1:'b', 2:'c', 3:'d', 4:'e', 5:'f'} >>> w_new = remap_ids(w, old_to_new) + >>> w_new.id_order ['a', 'b', 'c', 'd', 'e', 'f'] + >>> w_new.neighbors['a'] ['c', 'b'] """ if not isinstance(w, W): - raise Exception("w must be a spatial weights object") + raise TypeError("'w' must be a spatial weights object.") + new_neigh = {} new_weights = {} + for key, value in list(w.neighbors.items()): new_values = [old2new[i] for i in value] new_key = old2new[key] new_neigh[new_key] = new_values new_weights[new_key] = copy.copy(w.weights[key]) + if id_order: - return W(new_neigh, new_weights, id_order, **kwargs) + w = W(new_neigh, new_weights, id_order, **kwargs) + else: if w.id_order: id_order = [old2new[i] for i in w.id_order] - return W(new_neigh, new_weights, id_order, **kwargs) + w = W(new_neigh, new_weights, id_order, **kwargs) else: - return W(new_neigh, new_weights, **kwargs) + w = W(new_neigh, new_weights, **kwargs) + + return w def get_ids(in_shps, idVariable): - """ - Gets the IDs from the DBF file that moves with a given shape file or - a geopandas.GeoDataFrame. + """Gets the IDs from the ``DBF`` file that moves with + a given shape file or a ``geopandas.GeoDataFrame``. Parameters ---------- - in_shps : str or geopandas.GeoDataFrame - The input geographic data. Either + + in_shps : {str, geopandas.GeoDataFrame} + The input geographic data. Either (1) a path to a shapefile including suffix (str); or (2) a geopandas.GeoDataFrame. - idVariable : str - name of a column in the shapefile's DBF or the - geopandas.GeoDataFrame to use for ids. + idVariable : str + The name of a column in the shapefile's DBF or the + ``geopandas.GeoDataFrame`` to use for IDs. Returns ------- - ids : list - a list of IDs + + var : list + A list of IDs. + + Raises + ------ + + IOError + No ``.dbf`` file is present in the indicated directory. + KeyError + The variable name passed in for IDs is not found. Examples -------- + >>> from libpysal.weights.util import get_ids >>> import libpysal >>> polyids = get_ids(libpysal.examples.get_path("columbus.shp"), "POLYID") @@ -1037,6 +1205,7 @@ def get_ids(in_shps, idVariable): else: cols = list(in_shps.columns) var = list(in_shps[idVariable]) + return var except IOError: @@ -1045,35 +1214,41 @@ def get_ids(in_shps, idVariable): + ' The DBF file "%s" could not be found.' % (in_shps, dbname) ) raise IOError(msg) + except (AttributeError, KeyError): msg = ( 'The variable "%s" not found in the DBF/GDF. The the following ' + "variables are present: %s." % (idVariable, ",".join(cols)) ) + raise KeyError(msg) def get_points_array(iterable): - """ - Gets a data array of x and y coordinates from a given iterable + """Gets a data array of `x` and `y` coordinates from a given iterable. + Parameters ---------- - iterable : iterable - arbitrary collection of shapes that supports iteration + + iterable : iterable + An arbitrary collection of shapes that supports iteration. Returns ------- - points : array - (n, 2) - a data array of x and y coordinates + + data : numpy.ndarray + A data array of `x` and `y` coordinates with shape :math:`(n, 2)`. Notes ----- - If the given shape file includes polygons, - this function returns x and y coordinates of the polygons' centroids + + If the given shapefile includes polygons, this function + returns `x` and `y` coordinates of the polygons' centroids. """ + first_choice, backup = tee(iterable) + try: if HAS_SHAPELY: data = np.vstack( @@ -1088,31 +1263,34 @@ def get_points_array(iterable): data = np.vstack([np.array(shape.centroid) for shape in first_choice]) except AttributeError: data = np.vstack([shape for shape in backup]) + return data def get_points_array_from_shapefile(shapefile): - """ - Gets a data array of x and y coordinates from a given shapefile. + """Gets a data array of `x` and `y` coordinates from a given shapefile. Parameters ---------- - shapefile : string - name of a shape file including suffix + + shapefile : str + The name of a shapefile including the extension. Returns ------- - points : array - (n, 2) - a data array of x and y coordinates + + data : numpy.ndarray + A data array of `x` and `y` coordinates with shape :math:`(n, 2)`. Notes ----- - If the given shape file includes polygons, - this function returns x and y coordinates of the polygons' centroids + + If the given shape file includes polygons, this function + returns `x` and `y` coordinates of the polygons' centroids Examples -------- + Point shapefile >>> import libpysal @@ -1123,7 +1301,6 @@ def get_points_array_from_shapefile(shapefile): [80., 95.], [79., 90.]]) - Polygon shapefile >>> xy = get_points_array_from_shapefile(libpysal.examples.get_path('columbus.shp')) @@ -1131,36 +1308,37 @@ def get_points_array_from_shapefile(shapefile): array([[ 8.82721847, 14.36907602], [ 8.33265837, 14.03162401], [ 9.01226541, 13.81971908]]) + """ f = psopen(shapefile) data = get_points_array(f) + return data def min_threshold_distance(data, p=2): - """ - Get the maximum nearest neighbor distance. + """Get the maximum nearest neighbor distance. Parameters ---------- - data : array - (n,k) or KDTree where KDtree.data is array (n,k) - n observations on k attributes - p : float - Minkowski p-norm distance metric parameter: - 1<=p<=infinity - 2: Euclidean distance - 1: Manhattan distance + data : numpy.ndarray + :math:`(n,k)` or ``KDTree``` where ``KDtree.data`` is an + :math:`(n,k)` array of :math:`n` observations on :math:`k` attributes.s + p : float + Minkowski `p`-norm distance metric parameter where :math:`1<=\mathtt{p}<=\infty`. + ``2`` is Euclidean distance and ``1`` is Manhattan distance. Default is ``2``. Returns ------- - nnd : float - maximum nearest neighbor distance between the n observations + + nnd : float + The maximum nearest neighbor distance between the :math:`n` observations. Examples -------- + >>> from libpysal.weights.util import min_threshold_distance >>> import numpy as np >>> x, y = np.indices((5, 5)) @@ -1171,44 +1349,48 @@ def min_threshold_distance(data, p=2): 1.0 """ + if issubclass(type(data), scipy.spatial.KDTree): kd = data data = kd.data else: kd = KDTree(data) + nn = kd.query(data, k=2, p=p) nnd = nn[0].max(axis=0)[1] + return nnd def lat2SW(nrows=3, ncols=5, criterion="rook", row_st=False): - """ - Create a sparse W matrix for a regular lattice. + """Create a sparse `W` matrix for a regular lattice. Parameters ---------- - nrows : int - number of rows - ncols : int - number of columns - rook : {"rook", "queen", "bishop"} - type of contiguity. Default is rook. - row_st : boolean - If True, the created sparse W object is row-standardized so - every row sums up to one. Defaults to False. + nrows : int + The number of rows. Default is ``3``. + ncols : int + The number of columns. Default is ``5``. + criterion : str + The type of contiguity. Default is ``'rook'``. Options + are ``'rook'``, ``'queen'``, and ``'bishop'``. + row_st : boolean + If ``True``, the created sparse `W` object is row-standardized + so every row sums up to one. Default is ``False``. Returns ------- - w : scipy.sparse.dia_matrix - instance of a scipy sparse matrix + m : scipy.sparse.dia_matrix + An instance of a ``scipy`` sparse matrix. Notes ----- - Observations are row ordered: first k observations are in row 0, next k in row 1, and so on. - This method directly creates the W matrix using the strucuture of the contiguity type. + Observations are row ordered: first :math:`k` observations are in row 0, + next :math:`k` in row 1, and so on. This method directly creates the `W` + matrix using the strucuture of the contiguity type. Examples -------- @@ -1217,20 +1399,26 @@ def lat2SW(nrows=3, ncols=5, criterion="rook", row_st=False): >>> w9 = lat2SW(3,3) >>> w9[0,1] == 1 True + >>> w9[3,6] == 1 True + >>> w9r = lat2SW(3,3, row_st=True) >>> w9r[3,6] == 1./3 True + """ n = nrows * ncols diagonals = [] offsets = [] + if criterion == "rook" or criterion == "queen": d = np.ones((1, n)) + for i in range(ncols - 1, n, ncols): d[0, i] = 0 + diagonals.append(d) offsets.append(-1) @@ -1240,6 +1428,7 @@ def lat2SW(nrows=3, ncols=5, criterion="rook", row_st=False): if criterion == "queen" or criterion == "bishop": d = np.ones((1, n)) + for i in range(0, n, ncols): d[0, i] = 0 diagonals.append(d) @@ -1250,21 +1439,28 @@ def lat2SW(nrows=3, ncols=5, criterion="rook", row_st=False): d[0, i] = 0 diagonals.append(d) offsets.append(-(ncols + 1)) + data = np.concatenate(diagonals) offsets = np.array(offsets) m = sparse.dia_matrix((data, offsets), shape=(n, n), dtype=np.int8) m = m + m.T + if row_st: m = sparse.spdiags(1.0 / m.sum(1).T, 0, *m.shape) * m + m = m.tocsc() m.eliminate_zeros() + return m def write_gal(file, k=10): + """Write out a ``.gal`` spatial weights file.""" + f = open(file, "w") n = k * k f.write("0 %d" % n) + for i in range(n): row = i / k col = i % k @@ -1272,165 +1468,181 @@ def write_gal(file, k=10): neighs = [j for j in neighs if j >= 0 and j < n] f.write("\n%d %d\n" % (i, len(neighs))) f.write(" ".join(map(str, neighs))) + f.close() def neighbor_equality(w1, w2): - """ - Test if the neighbor sets are equal between two weights objects + """Test if the neighbor sets are equal between two weights objects. Parameters ---------- - w1 : W - instance of spatial weights class W - - w2 : W - instance of spatial weights class W + w1 : libpysal.weights.W + An instance of spatial weights, `W`. + w2 : libpysal.weights.W + An instance of spatial weights, `W`. Returns ------- - Boolean + equality : bool + Single equality evaluator. Notes ----- + Only set membership is evaluated, no check of the weight values is carried out. Examples -------- + >>> from libpysal.weights.util import neighbor_equality >>> from libpysal.weights import lat2W, W >>> w1 = lat2W(3,3) >>> w2 = lat2W(3,3) >>> neighbor_equality(w1, w2) True + >>> w3 = lat2W(5,5) >>> neighbor_equality(w1, w3) False + >>> n4 = w1.neighbors.copy() >>> n4[0] = [1] >>> n4[1] = [4, 2] >>> w4 = W(n4) >>> neighbor_equality(w1, w4) False + >>> n5 = w1.neighbors.copy() >>> n5[0] [3, 1] + >>> n5[0] = [1, 3] >>> w5 = W(n5) >>> neighbor_equality(w1, w5) True """ + + equality = True + n1 = w1.neighbors n2 = w2.neighbors ids_1 = set(n1.keys()) ids_2 = set(n2.keys()) + if ids_1 != ids_2: - return False - for i in ids_1: - if set(w1.neighbors[i]) != set(w2.neighbors[i]): - return False - return True + equality = False + + if equality: + for i in ids_1: + if set(w1.neighbors[i]) != set(w2.neighbors[i]): + equality = False + break + + return equality def isKDTree(obj): + """This is a utility function to determine whether or not an object is a + `KDTree`, since `KDTree` and `cKDTree` have no common parent type. """ - This is a utility function to determine whether or not an object is a - KDTree, since KDTree and cKDTree have no common parent type - """ + return any([issubclass(type(obj), KDTYPE) for KDTYPE in KDTREE_TYPES]) def attach_islands(w, w_knn1, **kwargs): - """ - Attach nearest neighbor to islands in spatial weight w. + """Attach the nearest neighbor to islands in a spatial weights object, `W`. Parameters ---------- - w : libpysal.weights.W - pysal spatial weight object (unstandardized). - w_knn1 : libpysal.weights.W - Nearest neighbor pysal spatial weight object (k=1). - **kwargs : keyword arguments - optional arguments for :class:`pysal.weights.W` - + w : libpysal.weights.W + A PySAL spatial weights object (unstandardized). + w_knn1 : libpysal.weights.KNN + A PySAL Nearest neighbor spatial weight object :math:`(k=1)`. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - : libpysal.weights.W - pysal spatial weight object w without islands. + w : libpysal.weights.W + A PySAL spatial weight object, `W`, without islands. Examples -------- + >>> from libpysal.weights import lat2W, Rook, KNN, attach_islands >>> import libpysal >>> w = Rook.from_shapefile(libpysal.examples.get_path('10740.shp')) >>> w.islands [163] + >>> w_knn1 = KNN.from_shapefile(libpysal.examples.get_path('10740.shp'),k=1) >>> w_attach = attach_islands(w, w_knn1) >>> w_attach.islands [] + >>> w_attach[w.islands[0]] {166: 1.0} """ neighbors, weights = copy.deepcopy(w.neighbors), copy.deepcopy(w.weights) + if not len(w.islands): print("There are no disconnected observations (no islands)!") - return w + else: for island in w.islands: nb = w_knn1.neighbors[island][0] + if type(island) is float: nb = float(nb) + neighbors[island] = [nb] weights[island] = [1.0] neighbors[nb] = neighbors[nb] + [island] weights[nb] = weights[nb] + [1.0] - return W(neighbors, weights, id_order=w.id_order, **kwargs) + w = W(neighbors, weights, id_order=w.id_order, **kwargs) + + return w -def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): - """ - Detect neighbors for non-planar polygon collections +def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): + """Detect neighbors for non-planar polygon collections. Parameters ---------- - w: pysal W - A spatial weights object with reported islands - - - geodataframe: GeoDataframe - The polygon dataframe from which w was constructed. - - tolerance: float - The percentage of the minimum horizontal or vertical extent (minextent) of - the dataframe to use in defining a buffering distance to allow for fuzzy - contiguity detection. The buffering distance is equal to tolerance*minextent. - **kwargs: keyword arguments - optional arguments for :class:`pysal.weights.W` - + w : libpysal.weights.W + A spatial weights object with reported islands. + geodataframe : geopandas.GeoDataFrame + The polygon dataframe from which ``w`` was constructed. + tolerance : float + The percentage of the minimum horizontal or vertical extent (``minextent``) of + the dataframe to use in defining a buffering distance to allow for fuzzy + contiguity detection. The buffering distance is equal to ``tolerance*minextent``. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Attributes ---------- - non_planar_joins : dictionary - Stores the new joins detected. Key is the id of the focal unit, value is a list of neighbor ids. + non_planar_joins : dict + Stores the new joins detected. Key is the ID of the + focal unit, value is a list of neighbor IDs. Returns ------- - - w: pysal W + w : libpysal.weights.W Spatial weights object that encodes fuzzy neighbors. - This will have an attribute `non_planar_joins` to indicate what new joins were detected. + This will have an attribute `non_planar_joins` to + indicate what new joins were detected. Notes ----- @@ -1448,6 +1660,9 @@ def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): The buffering check assumes the geometry coordinates are projected. + For an example see `nonplanarweights.ipynb `_. + + Examples -------- @@ -1457,23 +1672,24 @@ def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): >>> w = libpysal.weights.Queen.from_dataframe(df) >>> w.islands [0, 4, 23, 27, 80, 94, 101, 107, 109, 119, 122, 139, 169, 175, 223, 239, 247, 253, 254, 255, 256, 261, 276, 291, 294, 303, 321, 357, 374] + >>> wnp = libpysal.weights.nonplanar_neighbors(w, df) >>> wnp.islands [] + >>> w.neighbors[0] [] + >>> wnp.neighbors[0] [23, 59, 152, 239] + >>> wnp.neighbors[23] [0, 45, 59, 107, 152, 185, 246] - Also see `nonplanarweights.ipynb` - References ---------- - Planar Enforcement: http://ibis.geog.ubc.ca/courses/klink/gis.notes/ncgia/u12.html#SEC12.6 - + Planar Enforcement: `see here `_. """ @@ -1481,6 +1697,7 @@ def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): assert ( gdf.sindex ), "GeoDataFrame must have a spatial index. Please make sure you have `libspatialindex` installed" + islands = w.islands joins = copy.deepcopy(w.neighbors) candidates = gdf.geometry @@ -1494,8 +1711,11 @@ def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): for j, candidate in enumerate(candidates) if focal.intersects(candidate) and j != island ] + if len(neighbors) > 0: + for neighbor in neighbors: + if neighbor not in joins[island]: fixes[island].append(neighbor) joins[island].append(neighbor) @@ -1507,6 +1727,7 @@ def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): if islands: x0, y0, x1, y1 = gdf.total_bounds distance = tolerance * min(x1 - x0, y1 - y0) + for island in islands: dilated = gdf.iloc[island].geometry.buffer(distance) neighbors = [ @@ -1514,8 +1735,11 @@ def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): for j, candidate in enumerate(candidates) if dilated.intersects(candidate) and j != island ] + if len(neighbors) > 0: + for neighbor in neighbors: + if neighbor not in joins[island]: fixes[island].append(neighbor) joins[island].append(neighbor) @@ -1525,6 +1749,7 @@ def nonplanar_neighbors(w, geodataframe, tolerance=0.001, **kwargs): w = W(joins, **kwargs) w.non_planar_joins = fixes + return w @@ -1538,39 +1763,40 @@ def fuzzy_contiguity( predicate="intersects", **kwargs, ): - """ - Fuzzy contiguity spatial weights + """Fuzzy contiguity spatial weights. Parameters ---------- - gdf: GeoDataFrame - - tolerance: float - The percentage of the length of the minimum side of the bounding rectangle for the GeoDataFrame to use in determining the buffering distance. - - buffering: boolean - If False (default) joins will only be detected for features that intersect (touch, contain, within). - If True then features will be buffered and intersections will be based on buffered features. - - drop: boolean - If True (default), the buffered features are removed from the GeoDataFrame. If False, buffered features are added to the GeoDataFrame. - + gdf : geopandas.GeoDataFrame + A polygon dataframe. + tolerance : float + The percentage of the length of the minimum side of the bounding + rectangle for ``gdf`` to use in determining the buffering distance. + Default is ``0.005``. + buffering : bool + If ``False`` (default) joins will only be detected for features + that intersect (touch, contain, within). If ``True`` then features + will be buffered and intersections will be based on buffered features. + drop : bool + If ``True`` (default), the buffered features are removed from ``gdf``. + If ``False``, buffered features are added to the ``gdf``. buffer : float - Specify exact buffering distance. Ignores `tolerance`. - - predicate : {'intersects', 'within', 'contains', 'overlaps', 'crosses', 'touches'} - The predicate to use for determination of neighbors. Default is 'intersects'. If None is passed, neighbours are determined based on - the intersection of bounding boxes. - - **kwargs: keyword arguments - optional arguments for :class:`pysal.weights.W` - + Specify an exact buffering distance. When set ``tolerance`` is ignored. + Default is ``None``. + predicate : str + The predicate to use for determination of neighbors. Valid + values are: ``'intersects'``, ``'within'``, ``'contains'``, + ``'overlaps'``, ``'crosses'``, and ``'touches'``. Default is + ``'intersects'``. If ``None`` is passed, neighbours are + determined based on the intersection of bounding boxes. + **kwargs : dict + Optional keyword arguments for ``libpysal.weights.W``. Returns ------- - w: PySAL W + w : libpysal.weights.W Spatial weights based on fuzzy contiguity. Weights are binary. Examples @@ -1584,15 +1810,18 @@ def fuzzy_contiguity( >>> wq = libpysal.weights.Queen.from_dataframe(rs_df) >>> len(wq.islands) 29 + >>> wq[0] {} + >>> wf = fuzzy_contiguity(rs_df) >>> wf.islands [] + >>> wf[0] == dict({239: 1.0, 59: 1.0, 152: 1.0, 23: 1.0, 107: 1.0}) True - Example needing to use buffering + An example of needing to use buffering: >>> from shapely.geometry import Polygon >>> p0 = Polygon([(0,0), (10,0), (10,10)]) @@ -1603,9 +1832,11 @@ def fuzzy_contiguity( >>> wf = fuzzy_contiguity(gdf) >>> wf.islands [2] + >>> wfb = fuzzy_contiguity(gdf, buffering=True) >>> wfb.islands [] + >>> wfb[2] {1: 1.0} @@ -1630,18 +1861,18 @@ def fuzzy_contiguity( final case arises when one polygon is "inside" a second polygon but is not encoded to represent a hole in the containing polygon. - Detection of the second case will require setting buffering=True and exploring different values for tolerance. + Detection of the second case will require setting + ``buffering=True`` and exploring different values for tolerance. The buffering check assumes the geometry coordinates are projected. - References ---------- - Planar Enforcement: http://ibis.geog.ubc.ca/courses/klink/gis.notes/ncgia/u12.html#SEC12.6 - + Planar Enforcement: `see here `_. """ + if buffering: if not buffer: # buffer each shape @@ -1679,6 +1910,7 @@ def fuzzy_contiguity( if buffering: gdf.set_geometry(old_geometry_name, inplace=True) + if drop: gdf.drop(columns=["_buffer"], inplace=True) diff --git a/libpysal/weights/weights.py b/libpysal/weights/weights.py index 5f43eb36e..b3ae892aa 100644 --- a/libpysal/weights/weights.py +++ b/libpysal/weights/weights.py @@ -1,7 +1,8 @@ """ -Weights. +Spatial Weights. """ -__author__ = "Sergio J. Rey " + +__author__ = "Sergio J. Rey " import copy from os.path import basename as BASENAME @@ -26,17 +27,20 @@ class _LabelEncoder(object): Attributes ---------- + classes_: array of shape [n_classes] Class labels for each index. Examples -------- + >>> le = _LabelEncoder() >>> le.fit(["NY", "CA", "NY", "CA", "TX", "TX"]) >>> le.classes_ array(['CA', 'NY', 'TX']) >>> le.transform(["NY", "CA", "NY", "CA", "TX", "TX"]) array([1, 0, 1, 0, 2, 2]) + """ def fit(self, y): @@ -44,13 +48,16 @@ def fit(self, y): Parameters ---------- + y : list list of labels Returns ------- + self : instance of self. - Fitted label encoder. + Fitted label encoder. + """ self.classes_ = np.unique(y) return self @@ -60,35 +67,37 @@ def transform(self, y): Parameters ---------- + y : list list of labels Returns ------- + y : array array of normalized labels. + """ return np.searchsorted(self.classes_, y) class W(object): - """ - Spatial weights class. Class attributes are described by their + """Spatial weights class. Class attributes are described by their docstrings. to view, use the ``help`` function. Parameters ---------- neighbors : dict - Key is region ID, value is a list of neighbor IDS. + Key is region ID, value is a list of neighbor IDs. For example, ``{'a':['b'],'b':['a','c'],'c':['b']}``. weights : dict Key is region ID, value is a list of edge weights. If not supplied all edge weights are assumed to have a weight of 1. For example, ``{'a':[0.5],'b':[0.5,1.5],'c':[1.5]}``. id_order : list - An ordered list of ids, defines the order of observations when - iterating over ``W`` if not set, lexicographical ordering is used + An ordered list of IDs, defines the order of observations when + iterating over `W` if not set, lexicographical ordering is used to iterate and the ``id_order_set`` property will return ``False``. This can be set after creation by setting the ``id_order`` property. silence_warnings : bool @@ -168,7 +177,7 @@ class W(object): >>> round(w.trcWtW, 3) 2533.667 - Cardinality Histogram: + Cardinality histogram: >>> w.histogram [(2, 4), (3, 392), (4, 9604)] @@ -177,7 +186,6 @@ class W(object): >>> from libpysal.weights import W >>> w = W({1:[0],0:[1],2:[], 3:[]}) - UserWarning: The weights matrix is not fully connected: There are 3 disconnected components. There are 2 islands with ids: 2, 3. @@ -187,16 +195,20 @@ class W(object): def __init__( self, neighbors, weights=None, id_order=None, silence_warnings=False, ids=None ): + self.silence_warnings = silence_warnings self.transformations = {} self.neighbors = neighbors + if not weights: weights = {} for key in neighbors: weights[key] = [1.0] * len(neighbors[key]) + self.weights = weights self.transformations["O"] = self.weights.copy() # original weights self.transform = "O" + if id_order is None: self._id_order = list(self.neighbors.keys()) self._id_order.sort() @@ -204,14 +216,17 @@ def __init__( else: self._id_order = id_order self._id_order_set = True + self._reset() self._n = len(self.weights) + if (not self.silence_warnings) and (self.n_components > 1): message = ( "The weights matrix is not fully connected: " "\n There are %d disconnected components." % self.n_components ) ni = len(self.islands) + if ni == 1: message = message + "\n There is 1 island with id: %s." % ( str(self.islands[0]) @@ -221,62 +236,79 @@ def __init__( ni, ", ".join(str(island) for island in self.islands), ) + warnings.warn(message) def _reset(self): """Reset properties.""" + self._cache = {} def to_file(self, path="", format=None): - """ - Write a weights to a file. The format is guessed automatically + """Write a weights object to a file. The format is inferred from the path, but can be overridden with the format argument. - - See libpysal.io.FileIO for more information. + See ``libpysal.io.FileIO`` for more information. Parameters ---------- - path : string - location to save the file - format : string - string denoting the format to write the weights to. + path : str + The location to save the file. Default is ``''``. + format : str + The format of the weights file to write. Default is ``None``. + + See Also + -------- + + libpysal.io.FileIO - Returns - ------- - None """ + f = popen(dataPath=path, mode="w", dataFormat=format) f.write(self) f.close() @classmethod def from_file(cls, path="", format=None): - """ - Read a weights file into a W object. + """Read a weights file into a `W` object. Parameters ---------- - path : string - location to save the file - format : string - string denoting the format to write the weights to. + + path : str + The location to save the file. Default is ``''``. + format : str + The format of the weights file to read. Default is ``None``. Returns ------- - W object + w : libpysal.weights.W + A PySAL `W` spatial weights object. + """ + f = popen(dataPath=path, mode="r", dataFormat=format) w = f.read() f.close() + return w @classmethod def from_shapefile(cls, *args, **kwargs): - # we could also just "do the right thing," but I think it'd make sense to - # try and get people to use `Rook.from_shapefile(shapefile)` rather than - # W.from_shapefile(shapefile, type=`rook`), otherwise we'd need to build - # a type dispatch table. Generic W should be for stuff we don't know + """Construct a weights object from a shapefile. + + Raises + ------ + + NotImplementedError + Use type-specific constructors, like Rook, Queen, DistanceBand, or Kernel. + + """ + + # we could also just 'do the right thing,' but I think it'd make sense to + # try and get people to use ``Rook.from_shapefile(shapefile)`` rather than + # `W`.from_shapefile(shapefile, type=`rook`), otherwise we'd need to build + # a type dispatch table. Generic `W` should be for stuff we don't know # anything about. raise NotImplementedError( "Use type-specific constructors, like Rook, Queen, DistanceBand, or Kernel" @@ -288,22 +320,23 @@ def from_WSP(cls, WSP, silence_warnings=True): Parameters ---------- - wsp : WSP - PySAL sparse weights object - silence_warnings : bool + WSP : WSP + PySAL sparse weights object + silence_warnings : bool By default ``libpysal`` will print a warning if the dataset contains any disconnected components or islands. To silence this warning set this parameter to ``True``. - Returns ------- - w : W - PySAL weights object + + w : W + PySAL weights object Examples -------- + >>> from libpysal.weights import lat2W, WSP, W Build a 10x10 scipy.sparse matrix for a rectangular 2x5 region of cells @@ -324,6 +357,7 @@ def from_WSP(cls, WSP, silence_warnings=True): >>> print(w.full()[0][0]) [0 1 0 0 0 1 0 0 0 0] """ + data = WSP.sparse.data indptr = WSP.sparse.indptr id_order = WSP.id_order @@ -344,14 +378,14 @@ def from_WSP(cls, WSP, silence_warnings=True): w = W(neighbors, weights, ids, silence_warnings=silence_warnings) w._sparse = copy.deepcopy(WSP.sparse) w._cache["sparse"] = w._sparse + return w @classmethod def from_adjlist( cls, adjlist, focal_col="focal", neighbor_col="neighbor", weight_col=None ): - """ - Return an adjacency list representation of a weights object. + """Return an adjacency list representation of a weights object. Parameters ---------- @@ -366,13 +400,23 @@ def from_adjlist( Name of the column with the weight information. If not provided and the dataframe has no column named "weight" then all weights are assumed to be 1. + + Returns + ------- + w : libpysal.weights.W + A PySAL `W` spatial weights object. + """ + if weight_col is None: weight_col = "weight" + try_weightcol = getattr(adjlist, weight_col) + if try_weightcol is None: adjlist = adjlist.copy(deep=True) adjlist["weight"] = 1 + grouper = adjlist.groupby(focal_col) neighbors = dict() weights = dict() @@ -393,18 +437,18 @@ def to_adjlist( weight_col="weight", sort_joins=False, ): - """ - Compute an adjacency list representation of a weights object. + """Compute an adjacency list representation of a weights object. Parameters ---------- + remove_symmetric : bool - Whether or not to remove symmetric entries. If the ``W`` + Whether or not to remove symmetric entries. If the `W` is symmetric, a standard directed adjacency list will contain both the forward and backward links by default because adjacency lists are a directed graph representation. If this is ``True``, - a ``W`` created from this adjacency list **MAY NOT BE THE SAME** - as the original ``W``. If you would like to consider (1,2) and + a `W` created from this adjacency list **MAY NOT BE THE SAME** + as the original `W`. If you would like to consider (1,2) and (2,1) as distinct links, leave this as ``False``. drop_islands : bool Whether or not to preserve islands as entries in the adjacency @@ -412,16 +456,29 @@ def to_adjlist( in the adjacency list. If islands are kept, they are coded as self-neighbors with zero weight. focal_col : str - Name of the column in which to store "source" node ids. + Name of the column in which to store 'source' node ids. neighbor_col : str - Name of the column in which to store "destination" node ids. + Name of the column in which to store 'destination' node ids. weight_col : str Name of the column in which to store weight information. sort_joins : bool Whether or not to lexicographically sort the adjacency list by (focal_col, neighbor_col). Default is False. + Raises + ------ + + ImportError + Pandas must be installed to use this function. + + Returns + ------- + + adjlist : pandas.DataFrame + An adjacency list representation within a dataframe. + """ + try: import pandas except (ImportError, ModuleNotFoundError): @@ -453,6 +510,7 @@ def to_adjlist( adjlist = pandas.concat((adjlist, island_adjlist)).reset_index(drop=True) if sort_joins: return adjlist.sort_values([focal_col, neighbor_col]) + return adjlist def to_networkx(self): @@ -460,8 +518,16 @@ def to_networkx(self): Returns ------- - A ``networkx`` graph representation of the ``W`` object. + netx : networkx.Graph + A ``networkx`` graph representation of the `W` object. + + Raises + ------ + ImportError + NetworkX must be installed to use this function. + """ + try: import networkx as nx except ImportError: @@ -471,37 +537,50 @@ def to_networkx(self): @classmethod def from_networkx(cls, graph, weight_col="weight"): - """Convert a ``networkx`` graph to a PySAL ``W`` object. + """Convert a ``networkx`` graph to a PySAL `W` object. Parameters ---------- + graph : networkx.Graph - The graph to convert to a ``W``. - weight_col : string + The graph to convert to a `W`. + weight_col : str If the graph is labeled, this should be the name of the field - to use as the weight for the ``W``. + to use as the weight for the `W`. Default is ``'weight'``. Returns ------- - w : libpysal.weights.W - A ``W`` object containing the same graph as the ``networkx`` graph. + + w : libpysal.weights.WSP + A `WSP` object containing the same graph as the ``networkx`` graph. + + Raises + ------ + + ImportError + NetworkX must be installed to use this function. + """ + try: import networkx as nx except ImportError: raise ImportError("NetworkX 2.7+ is required to use this function.") sparse_array = nx.to_scipy_sparse_array(graph) w = WSP(sparse_array).to_W() + return w @property def sparse(self): - """Sparse matrix object. For any matrix manipulations required for w, + """A sparse matrix object. For any matrix manipulations required for w, ``w.sparse`` should be used. This is based on ``scipy.sparse``. """ + if "sparse" not in self._cache: self._sparse = self._build_sparse() self._cache["sparse"] = self._sparse + return self._sparse @classmethod @@ -510,16 +589,19 @@ def from_sparse(cls, sparse): Parameters ---------- + sparse : scipy.sparse array Returns ------- + w : libpysal.weights.W A ``W`` object containing the same graph as the ``scipy.sparse`` graph. Notes ----- + When the sparse array has a zero in its data attribute, and the corresponding row and column values are equal, the value for the pysal weight will be 0 for the "loop". @@ -537,16 +619,19 @@ def to_sparse(self, fmt="coo"): Parameters ---------- + fmt : {'bsr', 'coo', 'csc', 'csr'} scipy.sparse format Returns ------- + scipy.sparse array A scipy.sparse array with a format of fmt. Notes ----- + The keys of the w.neighbors are encoded to determine row,col in the sparse array. @@ -574,23 +659,27 @@ def to_sparse(self, fmt="coo"): @property def n_components(self): """Store whether the adjacency matrix is fully connected.""" + if "n_components" not in self._cache: self._n_components, self._component_labels = connected_components( self.sparse ) self._cache["n_components"] = self._n_components self._cache["component_labels"] = self._component_labels + return self._n_components @property def component_labels(self): """Store the graph component in which each observation falls.""" + if "component_labels" not in self._cache: self._n_components, self._component_labels = connected_components( self.sparse ) self._cache["n_components"] = self._n_components self._cache["component_labels"] = self._component_labels + return self._component_labels def _build_sparse(self): @@ -600,67 +689,67 @@ def _build_sparse(self): col = [] data = [] id2i = self.id2i + for i, neigh_list in list(self.neighbor_offsets.items()): card = self.cardinalities[i] row.extend([id2i[i]] * card) col.extend(neigh_list) data.extend(self.weights[i]) + row = np.array(row) col = np.array(col) data = np.array(data) + s = scipy.sparse.csr_matrix((data, (row, col)), shape=(self.n, self.n)) + return s @property def id2i(self): - """Dictionary where the key is an ID and the value is that ID's - index in ``W.id_order``. + """A dictionary where the key is an ID + and the value is that ID's index in ``W.id_order``. """ + if "id2i" not in self._cache: self._id2i = {} for i, id_i in enumerate(self._id_order): self._id2i[id_i] = i self._id2i = self._id2i self._cache["id2i"] = self._id2i + return self._id2i @property def n(self): - """Number of units.""" + """The number of units.""" + if "n" not in self._cache: self._n = len(self.neighbors) self._cache["n"] = self._n + return self._n @property def s0(self): - r"""``s0`` is defined as - - .. math:: + """``s0`` is defined as :math:`s0=\sum_i \sum_j w_{i,j}`.""" - s0=\sum_i \sum_j w_{i,j} - - """ if "s0" not in self._cache: self._s0 = self.sparse.sum() self._cache["s0"] = self._s0 + return self._s0 @property def s1(self): - r"""``s1`` is defined as - - .. math:: + """``s1`` is defined as :math:`s1=1/2 \sum_i \sum_j \Big(w_{i,j} + w_{j,i}\Big)^2`.""" - s1=1/2 \sum_i \sum_j \Big(w_{i,j} + w_{j,i}\Big)^2 - - """ if "s1" not in self._cache: t = self.sparse.transpose() t = t + self.sparse t2 = t.multiply(t) # element-wise square self._s1 = t2.sum() / 2.0 self._cache["s1"] = self._s1 + return self._s1 @property @@ -669,27 +758,26 @@ def s2array(self): See Also -------- - s2 + + libpysal.weights.W.s2 """ + if "s2array" not in self._cache: s = self.sparse self._s2array = np.array(s.sum(1) + s.sum(0).transpose()) ** 2 self._cache["s2array"] = self._s2array + return self._s2array @property def s2(self): - r"""``s2`` is defined as - - .. math:: + """``s2`` is defined as :math:`s2=\sum_j \Big(\sum_i w_{i,j} + \sum_i w_{j,i}\Big)^2`.""" - s2=\sum_j \Big(\sum_i w_{i,j} + \sum_i w_{j,i}\Big)^2 - - """ if "s2" not in self._cache: self._s2 = self.s2array.sum() self._cache["s2"] = self._s2 + return self._s2 @property @@ -698,12 +786,15 @@ def trcW2(self): See Also -------- - diagW2 + + libpysal.weights.W.diagW2 """ + if "trcW2" not in self._cache: self._trcW2 = self.diagW2.sum() self._cache["trcw2"] = self._trcW2 + return self._trcW2 @property @@ -712,12 +803,15 @@ def diagW2(self): See Also -------- - trcW2 + + libpysal.weights.W.trcW2 """ + if "diagw2" not in self._cache: self._diagW2 = (self.sparse * self.sparse).diagonal() self._cache["diagW2"] = self._diagW2 + return self._diagW2 @property @@ -726,12 +820,15 @@ def diagWtW(self): See Also -------- - trcWtW + + libpysal.weights.W.trcWtW """ + if "diagWtW" not in self._cache: self._diagWtW = (self.sparse.transpose() * self.sparse).diagonal() self._cache["diagWtW"] = self._diagWtW + return self._diagWtW @property @@ -740,105 +837,130 @@ def trcWtW(self): See Also -------- - diagWtW + + libpysal.weights.W.diagWtW """ + if "trcWtW" not in self._cache: self._trcWtW = self.diagWtW.sum() self._cache["trcWtW"] = self._trcWtW + return self._trcWtW @property def diagWtW_WW(self): """Diagonal of :math:`W^{'}W + WW`.""" + if "diagWtW_WW" not in self._cache: wt = self.sparse.transpose() w = self.sparse self._diagWtW_WW = (wt * w + w * w).diagonal() self._cache["diagWtW_WW"] = self._diagWtW_WW + return self._diagWtW_WW @property def trcWtW_WW(self): """Trace of :math:`W^{'}W + WW`.""" + if "trcWtW_WW" not in self._cache: self._trcWtW_WW = self.diagWtW_WW.sum() self._cache["trcWtW_WW"] = self._trcWtW_WW + return self._trcWtW_WW @property def pct_nonzero(self): """Percentage of nonzero weights.""" + if "pct_nonzero" not in self._cache: self._pct_nonzero = 100.0 * self.sparse.nnz / (1.0 * self._n**2) self._cache["pct_nonzero"] = self._pct_nonzero + return self._pct_nonzero @property def cardinalities(self): """Number of neighbors for each observation.""" + if "cardinalities" not in self._cache: c = {} for i in self._id_order: c[i] = len(self.neighbors[i]) self._cardinalities = c self._cache["cardinalities"] = self._cardinalities + return self._cardinalities @property def max_neighbors(self): """Largest number of neighbors.""" + if "max_neighbors" not in self._cache: self._max_neighbors = max(self.cardinalities.values()) self._cache["max_neighbors"] = self._max_neighbors + return self._max_neighbors @property def mean_neighbors(self): - """Average number of neighbors.""" + """Average (mean) number of neighbors.""" + if "mean_neighbors" not in self._cache: self._mean_neighbors = np.mean(list(self.cardinalities.values())) self._cache["mean_neighbors"] = self._mean_neighbors + return self._mean_neighbors @property def min_neighbors(self): """Minimum number of neighbors.""" + if "min_neighbors" not in self._cache: self._min_neighbors = min(self.cardinalities.values()) self._cache["min_neighbors"] = self._min_neighbors + return self._min_neighbors @property def nonzero(self): """Number of nonzero weights.""" + if "nonzero" not in self._cache: self._nonzero = self.sparse.nnz self._cache["nonzero"] = self._nonzero + return self._nonzero @property def sd(self): """Standard deviation of number of neighbors.""" + if "sd" not in self._cache: self._sd = np.std(list(self.cardinalities.values())) self._cache["sd"] = self._sd + return self._sd @property def asymmetries(self): """List of id pairs with asymmetric weights.""" + if "asymmetries" not in self._cache: self._asymmetries = self.asymmetry() self._cache["asymmetries"] = self._asymmetries + return self._asymmetries @property def islands(self): """List of ids without any neighbors.""" + if "islands" not in self._cache: self._islands = [i for i, c in list(self.cardinalities.items()) if c == 0] self._cache["islands"] = self._islands + return self._islands @property @@ -846,6 +968,7 @@ def histogram(self): """Cardinality histogram as a dictionary where key is the id and value is the number of neighbors for that unit. """ + if "histogram" not in self._cache: ct, bin = np.histogram( list(self.cardinalities.values()), @@ -853,6 +976,7 @@ def histogram(self): ) self._histogram = list(zip(bin, ct)) self._cache["histogram"] = self._histogram + return self._histogram def __getitem__(self, key): @@ -860,25 +984,26 @@ def __getitem__(self, key): Examples -------- + >>> from libpysal.weights import lat2W >>> w = lat2W() - >>> w[0] == dict({1: 1.0, 5: 1.0}) True + """ + return dict(list(zip(self.neighbors[key], self.weights[key]))) def __iter__(self): - """ - Support iteration over weights. + """Support iteration over weights. Examples -------- + >>> from libpysal.weights import lat2W - >>> w=lat2W(3,3) - >>> for i,wi in enumerate(w): - ... print(i,wi[0]) - ... + >>> w = lat2W(3, 3) + >>> for i, wi in enumerate(w): + ... print(i, wi[0]) 0 0 1 1 2 2 @@ -888,24 +1013,32 @@ def __iter__(self): 6 6 7 7 8 8 - >>> + """ + for i in self._id_order: yield i, dict(list(zip(self.neighbors[i], self.weights[i]))) def remap_ids(self, new_ids): - """ - In place modification throughout ``W`` of id values from - ``w.id_order`` to ``new_ids`` in all. + """An in place modification throughout `W` of id + values from ``w.id_order`` to ``new_ids`` in all. Parameters ---------- - new_ids : list, numpy.ndarray - Aligned list of new ids to be inserted. Note that first - element of ``new_ids`` will replace first element of - ``w.id_order``, second element of ``new_ids`` replaces second - element of ``w.id_order`` and so on. + new_ids : {list, numpy.ndarray} + An aligned list of new ids to be inserted. Note that the + first element of ``new_ids`` will replace the first element + of ``w.id_order``, the second element of ``new_ids`` replaces + the second element of ``w.id_order`` and so on. + + Raises + ------ + + ValueError + The length of ``old_ids`` does not match that of ``new_ids``. + ValueError + The list ``new_ids`` contains duplicates. Examples -------- @@ -922,21 +1055,24 @@ def remap_ids(self, new_ids): ['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8'] >>> w.neighbors['id0'] ['id3', 'id1'] + """ old_ids = self._id_order + if len(old_ids) != len(new_ids): raise Exception( - "W.remap_ids: length of `old_ids` does not match that of" - " new_ids" + "W.remap_ids: length of `old_ids` does not match that of `new_ids`." ) + if len(set(new_ids)) != len(new_ids): - raise Exception("W.remap_ids: list `new_ids` contains duplicates") + raise Exception("'W.remap_ids': list 'new_ids' contains duplicates.") else: new_neighbors = {} new_weights = {} old_transformations = self.transformations["O"].copy() new_transformations = {} + for o, n in zip(old_ids, new_ids): o_neighbors = self.neighbors[o] o_weights = self.weights[o] @@ -944,6 +1080,7 @@ def remap_ids(self, new_ids): new_neighbors[n] = n_neighbors new_weights[n] = o_weights[:] new_transformations[n] = old_transformations[o] + self.neighbors = new_neighbors self.weights = new_weights self.transformations["O"] = new_transformations @@ -955,7 +1092,7 @@ def remap_ids(self, new_ids): self._reset() def __set_id_order(self, ordered_ids): - """Set the iteration order in w. ``W`` can be iterated over. On + """Set the iteration order in ``w``. `W` can be iterated over. On construction the iteration order is set to the lexicographic order of the keys in the ``w.weights`` dictionary. If a specific order is required it can be set with this method. @@ -966,6 +1103,12 @@ def __set_id_order(self, ordered_ids): ordered_ids : sequence Identifiers for observations in specified order. + Raises + ------ + + ValueError + The ``ordered_ids`` argument does not align with ``W.ids``. + Notes ----- @@ -977,10 +1120,9 @@ def __set_id_order(self, ordered_ids): -------- >>> from libpysal.weights import lat2W - >>> w=lat2W(3,3) + >>> w = lat2W(3,3) >>> for i,wi in enumerate(w): ... print(i, wi[0]) - ... 0 0 1 1 2 2 @@ -990,14 +1132,14 @@ def __set_id_order(self, ordered_ids): 6 6 7 7 8 8 + >>> w.id_order [0, 1, 2, 3, 4, 5, 6, 7, 8] - >>> w.id_order=range(8,-1,-1) + >>> w.id_order = range(8,-1,-1) >>> list(w.id_order) [8, 7, 6, 5, 4, 3, 2, 1, 0] >>> for i,w_i in enumerate(w): ... print(i,w_i[0]) - ... 0 8 1 7 2 6 @@ -1015,12 +1157,13 @@ def __set_id_order(self, ordered_ids): self._id_order_set = True self._reset() else: - raise Exception("ordered_ids do not align with W ids") + raise ValueError("'ordered_ids' do not align with 'W.ids'.") def __get_id_order(self): """Returns the ids for the observations in the order in which they would be encountered if iterating over the weights. """ + return self._id_order id_order = property(__get_id_order, __set_id_order) @@ -1031,36 +1174,41 @@ def id_order_set(self): Examples -------- + >>> from libpysal.weights import lat2W - >>> w=lat2W() + >>> w = lat2W() >>> w.id_order_set True + """ + return self._id_order_set @property def neighbor_offsets(self): - """ - Given the current ``id_order``, ``neighbor_offsets[id]`` is the - offsets of the id's neighbors in ``id_order``. + """Given the current ``id_order``, ``neighbor_offsets[id]`` + is the offsets of the id's neighbors in ``id_order``. Returns ------- + neighbor_list : list Offsets of the id's neighbors in ``id_order``. Examples -------- + >>> from libpysal.weights import W - >>> neighbors={'c': ['b'], 'b': ['c', 'a'], 'a': ['b']} - >>> weights ={'c': [1.0], 'b': [1.0, 1.0], 'a': [1.0]} - >>> w=W(neighbors,weights) + >>> neighbors = {'c': ['b'], 'b': ['c', 'a'], 'a': ['b']} + >>> weights = {'c': [1.0], 'b': [1.0, 1.0], 'a': [1.0]} + >>> w = W(neighbors,weights) >>> w.id_order = ['a','b','c'] >>> w.neighbor_offsets['b'] [2, 0] >>> w.id_order = ['b','a','c'] >>> w.neighbor_offsets['b'] [2, 1] + """ if "neighbors_0" not in self._cache: @@ -1079,14 +1227,16 @@ def get_transform(self): Returns ------- + transformation : str, None Valid transformation value. See the ``transform`` parameters in ``set_transform()`` for a detailed description. Examples -------- + >>> from libpysal.weights import lat2W - >>> w=lat2W() + >>> w = lat2W() >>> w.weights[0] [1.0, 1.0] >>> w.transform @@ -1100,6 +1250,7 @@ def get_transform(self): See also -------- + set_transform """ @@ -1111,6 +1262,7 @@ def set_transform(self, value="B"): Parameters ---------- + transform : str This parameter is not case sensitive. The following are valid transformations. @@ -1124,15 +1276,14 @@ def set_transform(self, value="B"): Notes ----- - Transformations are applied only to the value of the weights at - instantiation. Chaining of transformations cannot be done on a ``W`` - instance. - + Transformations are applied only to the value of the weights at instantiation. + Chaining of transformations cannot be done on a `W` instance. Examples -------- + >>> from libpysal.weights import lat2W - >>> w=lat2W() + >>> w = lat2W() >>> w.weights[0] [1.0, 1.0] >>> w.transform @@ -1143,9 +1294,12 @@ def set_transform(self, value="B"): >>> w.transform='b' >>> w.weights[0] [1.0, 1.0] + """ + value = value.upper() self._transform = value + if value in self.transformations: self.weights = self.transformations[value] self._reset() @@ -1218,55 +1372,74 @@ def set_transform(self, value="B"): self.weights = original self._reset() else: - raise Exception("unsupported weights transformation") + raise Exception("Unsupported weights transformation.") transform = property(get_transform, set_transform) def asymmetry(self, intrinsic=True): - r""" - Asymmetry check. + r"""Asymmetry check. Parameters ---------- + intrinsic : bool Default is ``True``. Intrinsic symmetry is defined as - - .. math:: - - w_{i,j} == w_{j,i} - - If ``intrinsic`` is ``False`` symmetry is defined as - - .. math:: - - i \in N_j \ \& \ j \in N_i - + :math:`w_{i,j} == w_{j,i}`. If ``intrinsic`` is ``False`` + symmetry is defined as :math:`i \in N_j \ \& \ j \in N_i` where :math:`N_j` is the set of neighbors for :math:`j`. Returns ------- - asymmetries : list - Empty if no asymmetries are found if asymmetries, then a + + ijs : list + Empty if no asymmetries are found if asymmetries, otherwise a ``list`` of ``(i,j)`` tuples is returned. Examples -------- >>> from libpysal.weights import lat2W - >>> w=lat2W(3,3) + >>> w = lat2W(3, 3) >>> w.asymmetry() [] - >>> w.transform='r' + + >>> w.transform = 'r' >>> w.asymmetry() - [(0, 1), (0, 3), (1, 0), (1, 2), (1, 4), (2, 1), (2, 5), (3, 0), (3, 4), (3, 6), (4, 1), (4, 3), (4, 5), (4, 7), (5, 2), (5, 4), (5, 8), (6, 3), (6, 7), (7, 4), (7, 6), (7, 8), (8, 5), (8, 7)] + [(0, 1), + (0, 3), + (1, 0), + (1, 2), + (1, 4), + (2, 1), + (2, 5), + (3, 0), + (3, 4), + (3, 6), + (4, 1), + (4, 3), + (4, 5), + (4, 7), + (5, 2), + (5, 4), + (5, 8), + (6, 3), + (6, 7), + (7, 4), + (7, 6), + (7, 8), + (8, 5), + (8, 7)] + >>> result = w.asymmetry(intrinsic=False) >>> result [] - >>> neighbors={0:[1,2,3], 1:[1,2,3], 2:[0,1], 3:[0,1]} - >>> weights={0:[1,1,1], 1:[1,1,1], 2:[1,1], 3:[1,1]} - >>> w=W(neighbors,weights) + + >>> neighbors = {0: [1, 2, 3], 1:[1, 2, 3], 2:[0, 1], 3: [0, 1]} + >>> weights = {0: [1, 1, 1], 1: [1, 1, 1], 2: [1, 1], 3: [1, 1]} + >>> w = W(neighbors, weights) >>> w.asymmetry() [(0, 1), (1, 0)] + """ if intrinsic: @@ -1278,25 +1451,43 @@ def asymmetry(self, intrinsic=True): self.transform = transform ids = np.nonzero(wd) + if len(ids[0]) == 0: - return [] + ijs = [] + return ijs else: ijs = list(zip(ids[0], ids[1])) ijs.sort() return ijs def symmetrize(self, inplace=False): - """Construct a symmetric KNN weight. This ensures that the neighbors + """Construct a symmetric `KNN` weight. This ensures that the neighbors of each focal observation consider the focal observation itself as - a neighbor. This returns a generic ``W`` object, since the object is no + a neighbor. This returns a generic `W` object, since the object is no longer guaranteed to have ``k`` neighbors for each observation. + + Parameters + ---------- + + inplace : bool + Update the `W` object in place (``True``). Default is ``False``. + + Returns + ------- + + out_W : libpysal.weights.W + A symmetrized `W` object. Default is ``False``. + If ``inplace`` is set to ``True`` the `W` object is simply updated. + """ + if not inplace: neighbors = copy.deepcopy(self.neighbors) weights = copy.deepcopy(self.weights) out_W = W(neighbors, weights, id_order=self.id_order) out_W.symmetrize(inplace=True) return out_W + else: for focal, fneighbs in list(self.neighbors.items()): for j, neighbor in enumerate(fneighbs): @@ -1312,20 +1503,25 @@ def full(self): Parameters ---------- + self : libpysal.weights.W spatial weights object Returns ------- + (fullw, keys) : tuple The first element being the full ``numpy.ndarray`` and second element keys being the ids associated with each row in the array. Examples -------- + >>> from libpysal.weights import W, full - >>> neighbors = {'first':['second'],'second':['first','third'],'third':['second']} - >>> weights = {'first':[1],'second':[1,1],'third':[1]} + >>> neighbors = { + ... 'first': ['second'], 'second': ['first', 'third'], 'third': ['second'] + ... } + >>> weights = {'first': [1], 'second': [1, 1], 'third': [1]} >>> w = W(neighbors, weights) >>> wf, ids = full(w) >>> wf @@ -1334,50 +1530,62 @@ def full(self): [0., 1., 0.]]) >>> ids ['first', 'second', 'third'] + """ + wfull = self.sparse.toarray() keys = list(self.neighbors.keys()) + if self.id_order: keys = self.id_order return (wfull, keys) def to_WSP(self): - """Generate a ``WSP`` object. + """Generate a `WSP` object. Returns ------- - implicit : libpysal.weights.WSP - Thin ``W`` class + w : libpysal.weights.WSP + A thin `W` class. Examples -------- + >>> from libpysal.weights import W, WSP - >>> neighbors={'first':['second'],'second':['first','third'],'third':['second']} - >>> weights={'first':[1],'second':[1,1],'third':[1]} - >>> w=W(neighbors,weights) - >>> wsp=w.to_WSP() + >>> neighbors = { + ... 'first': ['second'], 'second': ['first', 'third'], 'third': ['second'] + ... } + >>> weights = {'first': [1], 'second': [1, 1], 'third': [1]} + >>> w = W(neighbors,weights) + >>> wsp = w.to_WSP() >>> isinstance(wsp, WSP) True + >>> wsp.n 3 + >>> wsp.s0 4 See also -------- - WSP + + libpysal.weights.WSP """ - return WSP(self.sparse, self._id_order) + + w = WSP(self.sparse, self._id_order) + + return w def set_shapefile(self, shapefile, idVariable=None, full=False): - """ - Adding metadata for writing headers of ``.gal`` and ``.gwt`` files. + """Adding metadata for writing headers of ``.gal`` and ``.gwt`` files. Parameters ---------- + shapefile : str The shapefile name used to construct weights. idVariable : str @@ -1387,6 +1595,7 @@ def set_shapefile(self, shapefile, idVariable=None, full=False): Write out the entire path for a shapefile (``True``) or only the base of the shapefile without extension (``False``). Default is ``True``. + """ if full: @@ -1404,6 +1613,7 @@ def plot( Parameters ---------- + gdf : geopandas.GeoDataFrame The original shapes whose topological relations are modelled in ``W``. indexed_on : str @@ -1427,13 +1637,21 @@ def plot( Returns ------- + f : matplotlib.figure.Figure Figure on which the plot is made. ax : matplotlib.axes.Axes Axis on which the plot is made. + Raises + ------ + + ImportError + Matplotlib must be installed to use this function. + Notes ----- + If you'd like to overlay the actual shapes from the ``geopandas.GeoDataFrame``, call ``gdf.plot(ax=ax)`` after this. To plot underneath, adjust the z-order of the plot as follows: @@ -1447,26 +1665,33 @@ def plot( >>> import geopandas >>> gdf = geopandas.read_file(lp.examples.get_path("columbus.shp")) >>> weights = Queen.from_dataframe(gdf) - >>> tmp = weights.plot(gdf, color='firebrickred', node_kws=dict(marker='*', color='k')) + >>> tmp = weights.plot( + ... gdf, color='firebrickred', node_kws=dict(marker='*', color='k') + ... ) + """ + try: import matplotlib.pyplot as plt except ImportError: raise ImportError( - "W.plot depends on matplotlib.pyplot, and this was" - "not able to be imported. \nInstall matplotlib to" + "'W.plot()' depends on 'matplotlib.pyplot', and this was" + "not able to be imported. \nInstall 'matplotlib' to" "plot spatial weights." ) + if ax is None: f = plt.figure() ax = plt.gca() else: f = plt.gcf() + if node_kws is not None: if "color" not in node_kws: node_kws["color"] = color else: node_kws = dict(color=color) + if edge_kws is not None: if "color" not in edge_kws: edge_kws["color"] = color @@ -1489,49 +1714,65 @@ def plot( ax.plot(*list(zip(focal, neighbor)), marker=None, **edge_kws) seen.update((idx, nidx)) seen.update((nidx, idx)) + ax.scatter( gdf.centroid.apply(lambda p: p.x), gdf.centroid.apply(lambda p: p.y), **node_kws, ) + return f, ax class WSP(object): - """Thin ``W`` class for ``spreg``. + """Thin `W` class for ``spreg``. Parameters ---------- sparse : scipy.sparse.{matrix-type} - NxN object from ``scipy.sparse`` + An :math:`NxN` object from ``scipy.sparse``. + id_order : list + An ordered list of ids, assumed to match the ordering in ``sparse``. Attributes ---------- - n : int - description - s0 : float - description - trcWtW_WW : float - description + n : int + The number of rows in ``sparse``. + s0 : float + :math:`s0=\sum_i \sum_j w_{i,j}`. + trcWtW_WW : float + Trace of :math:`W^{'}W + WW`. + + Raises + ------ + + ValueError + A scipy sparse object must be passed in. + ValueError + The weights object must be square. + ValueError + The number of values in ``id_order`` must match shape of ``sparse``. Examples -------- - From GAL information + From GAL information: >>> import scipy.sparse >>> from libpysal.weights import WSP >>> rows = [0, 1, 1, 2, 2, 3] >>> cols = [1, 0, 2, 1, 3, 3] >>> weights = [1, 0.75, 0.25, 0.9, 0.1, 1] - >>> sparse = scipy.sparse.csr_matrix((weights, (rows, cols)), shape=(4,4)) + >>> sparse = scipy.sparse.csr_matrix((weights, (rows, cols)), shape=(4, 4)) >>> w = WSP(sparse) >>> w.s0 4.0 + >>> w.trcWtW_WW 6.395 + >>> w.n 4 @@ -1539,10 +1780,12 @@ class WSP(object): def __init__(self, sparse, id_order=None, index=None): if not scipy.sparse.issparse(sparse): - raise ValueError("must pass a scipy sparse object") + raise ValueError("A scipy sparse object must be passed in.") + rows, cols = sparse.shape if rows != cols: - raise ValueError("Weights object must be square") + raise ValueError("The weights object must be square.") + self.sparse = sparse.tocsr() self.n = sparse.shape[0] self._cache = {} @@ -1580,21 +1823,18 @@ def id_order(self): @property def s0(self): - r"""``s0`` is defined as: - - .. math:: - - s0=\sum_i \sum_j w_{i,j} + r"""``s0`` is defined as :math:`s0=\sum_i \sum_j w_{i,j}`.""" - """ if "s0" not in self._cache: self._s0 = self.sparse.sum() self._cache["s0"] = self._s0 + return self._s0 @property def trcWtW_WW(self): """Trace of :math:`W^{'}W + WW`.""" + if "trcWtW_WW" not in self._cache: self._trcWtW_WW = self.diagWtW_WW.sum() self._cache["trcWtW_WW"] = self._trcWtW_WW @@ -1603,36 +1843,43 @@ def trcWtW_WW(self): @property def diagWtW_WW(self): """Diagonal of :math:`W^{'}W + WW`.""" + if "diagWtW_WW" not in self._cache: wt = self.sparse.transpose() w = self.sparse self._diagWtW_WW = (wt * w + w * w).diagonal() self._cache["diagWtW_WW"] = self._diagWtW_WW + return self._diagWtW_WW @classmethod def from_W(cls, W): - """Constructs a ``WSP`` object from the ``W``'s sparse matrix. + """Constructs a `WSP` object from the `W`'s sparse matrix. Parameters ---------- + W : libpysal.weights.W A PySAL weights object with a sparse form and ids. Returns ------- - A ``WSP`` instance. + + w : libpysal.weights.WSP + A `WSP` instance. + """ - return cls(W.sparse, id_order=W.id_order) + + w = cls(W.sparse, id_order=W.id_order) + + return w def to_W(self, silence_warnings=False): - """ - Convert a pysal WSP object (thin weights matrix) to a pysal W object. + """Convert a PySAL `WSP` object (thin weights matrix) to a pysal `W` object. Parameters ---------- - self : WSP - PySAL sparse weights object. + silence_warnings : bool Switch to ``True`` to turn off print statements for every observation with islands. Default is ``False``, which does @@ -1640,29 +1887,33 @@ def to_W(self, silence_warnings=False): Returns ------- - w : W - PySAL weights object. + + w : libpysal.weights.W + A PySAL spatial weights object. Examples -------- + >>> from libpysal.weights import lat2SW, WSP, WSP2W Build a 10x10 ``scipy.sparse`` matrix for a rectangular 2x5 region of cells (rook contiguity), then construct a ``libpysal`` - sparse weights object (``self``). + sparse weights object. >>> sp = lat2SW(2, 5) - >>> self = WSP(sp) - >>> self.n + >>> wsp = WSP(sp) + >>> wsp.n 10 - >>> print(self.sparse[0].todense()) + + >>> print(wsp.sparse[0].todense()) [[0 1 0 0 0 1 0 0 0 0]] Convert this sparse weights object to a standard PySAL weights object. - >>> w = WSP2W(self) + >>> w = WSP2W(wsp) >>> w.n 10 + >>> print(w.full()[0][0]) [0. 1. 0. 0. 0. 1. 0. 0. 0. 0.] @@ -1672,12 +1923,15 @@ def to_W(self, silence_warnings=False): data = list(self.sparse.data) indptr = list(self.sparse.indptr) id_order = self.id_order + if id_order: # replace indices with user IDs indices = [id_order[i] for i in indices] else: id_order = list(range(self.n)) + neighbors, weights = {}, {} + start = indptr[0] for i in range(self.n): oid = id_order[i] @@ -1685,8 +1939,10 @@ def to_W(self, silence_warnings=False): neighbors[oid] = indices[start:end] weights[oid] = data[start:end] start = end + ids = copy.copy(self.id_order) w = W(neighbors, weights, ids, silence_warnings=silence_warnings) w._sparse = copy.deepcopy(self.sparse) w._cache["sparse"] = w._sparse + return w