diff --git a/paper/figs/mclp.png b/paper/figs/mclp.png new file mode 100644 index 00000000..945b29bc Binary files /dev/null and b/paper/figs/mclp.png differ diff --git a/paper/figs/mexico_maxp.png b/paper/figs/mexico_maxp.png index fe034fef..039f91a4 100644 Binary files a/paper/figs/mexico_maxp.png and b/paper/figs/mexico_maxp.png differ diff --git a/paper/paper.bib b/paper/paper.bib index 37059b8b..32fd3797 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -46,12 +46,13 @@ @article{miller1960integer doi={10.1145/321043.321046} } -@article{murray2019contemporary, +@article{murray2021contemporary, title={Contemporary optimization application through geographic information systems}, - author={Murray, Alan T}, + author={Murray, Alan T.}, journal={Omega}, + volume = {99}, pages={102176}, - year={2019}, + year={2021}, publisher={Elsevier}, doi={10.1016/j.omega.2019.102176} } @@ -88,14 +89,16 @@ @article{openshaw1995algorithms doi={10.1068/a270425} } -@incollection{rey2010pysal, - title={PySAL: A Python library of spatial analytical methods}, - author={Rey, Sergio J and Anselin, Luc}, - booktitle={Handbook of applied spatial analysis}, - pages={175--193}, - year={2010}, - publisher={Springer}, - doi={10.1007/978-3-642-03647-7_11} +@article{pysal2007, + author={Sergio Rey and Luc Anselin}, + title={{PySAL: A Python Library of Spatial Analytical Methods}}, + journal={The Review of Regional Studies}, + year=2007, + volume={37}, + number={1}, + pages={5-27}, + keywords={Open Source; Software; Spatial}, + url={https://rrs.scholasticahq.com/article/8285.pdf} } @article{rey2015open, @@ -110,6 +113,15 @@ @article{rey2015open doi={10.3390/ijgi4020815} } +@article{Rey2021, + author={Rey, Sergio J. and Anselin, Luc and Amaral, Pedro and Arribas-Bel, Dani and Cortes, Renan Xavier and Gaboardi, James David and Kang, Wei and Knaap, Elijah and Li, Ziqi and Lumnitz, Stefanie and Oshan, Taylor M. and Shao, Hu and Wolf, Levi John}, + title={{The PySAL Ecosystem: Philosophy and Implementation}}, + journal={Geographical Analysis}, + year={2021}, + doi={10.1111/gean.12276}, + abstract={PySAL is a library for geocomputation and spatial data science. Written in Python, the library has a long history of supporting novel scholarship and broadening methodological impacts far afield of academic work. Recently, many new techniques, methods of analyses, and development modes have been implemented, making the library much larger and more encompassing than that previously discussed in the literature. As such, we provide an introduction to the library as it stands now, as well as the scientific and conceptual underpinnings of its core set of components. Finally, we provide a prospective look at the library's future evolution.} +} + @article{wolf2020, title={Spatially-encouraged spectral clustering: a technique for blending map typologies and regionalization}, author={Wolf, Levi}, @@ -117,11 +129,45 @@ @article{wolf2020 doi={10.31219/osf.io/yzt2p} } -@article{Rey2021, - author={Rey, Sergio J. and Anselin, Luc and Amaral, Pedro and Arribas-Bel, Dani and Cortes, Renan Xavier and Gaboardi, James David and Kang, Wei and Knaap, Elijah and Li, Ziqi and Lumnitz, Stefanie and Oshan, Taylor M. and Shao, Hu and Wolf, Levi John}, - title={{The PySAL Ecosystem: Philosophy and Implementation}}, +@article{Toregas1971, + author={Toregas, Constantine and Swain, R. and ReVelle, C. S. and Bergman, L.}, + doi={10.1287/opre.19.6.1363}, + journal={Operations Research}, + number={6}, + pages={1363--1373}, + title={{The Location of Emergency Service Facilities}}, + volume={19}, + year={1971} +} + +@article{Church1974, + author={Church, Richard L. and ReVelle, C.S.}, + doi={10.1111/j.1435-5597.1974.tb00902.x}, + journal={Papers in Regional Science Association}, + pages={101--118}, + title={{The Maximal Covering Location Problem}}, + volume={32}, + year={1974} +} + +@article{ReVelle1970, + author={ReVelle, C. S. and Swain, R.W.}, journal={Geographical Analysis}, - year={2021}, - doi={10.1111/gean.12276}, - abstract={PySAL is a library for geocomputation and spatial data science. Written in Python, the library has a long history of supporting novel scholarship and broadening methodological impacts far afield of academic work. Recently, many new techniques, methods of analyses, and development modes have been implemented, making the library much larger and more encompassing than that previously discussed in the literature. As such, we provide an introduction to the library as it stands now, as well as the scientific and conceptual underpinnings of its core set of components. Finally, we provide a prospective look at the library's future evolution.} + number={1}, + pages={30--42}, + title={{Central Facilities Location}}, + volume={2}, + year={1970} } + +@article{Hakimi1964, + author={Hakimi, S. L.}, + doi={10.1287/opre.12.3.450}, + journal={Operations Research}, + number={3}, + pages={450--459}, + pmid={8598587}, + title={{Optimum Locations of Switching Centers and the Absolute Centers and Medians of a Graph}}, + volume={12}, + year={1964} +} \ No newline at end of file diff --git a/paper/paper.md b/paper/paper.md index 312a8e46..ea9527b3 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -8,14 +8,23 @@ authors: - name: Xin Feng orcid: 0000-0001-7253-3154 affiliation: "1, 2" + - name: Germano Barcelos + orcid: 0000-0002-4758-1776 + affiliation: 3 - name: James D. Gaboardi orcid: 0000-0002-4776-6826 - affiliation: 3 + affiliation: "4, 5" - name: Elijah Knaap orcid: 0000-0001-7520-2238 affiliation: 1 - name: Ran Wei affiliation: 1 + - name: Levi Wolf + orcid: 0000-0003-0274-599X + affiliation: 6 + - name: Qunshan Zhao + orcid: 0000-0002-5549-9457 + affiliation: 7 - name: Sergio Rey orcid: 0000-0001-5857-9762 affiliation: 1 @@ -24,73 +33,115 @@ affiliations: index: 1 - name: Department of Geography and Environmental Sustainability, University of Oklahoma index: 2 - - name: Department of Geography, Pennsylvania State University + - name: Federal University of Vi\c{c}osa index: 3 -date: 29 April 2021 + - name: Oak Ridge National Laboratory + index: 4 + - name: The Peter R. Gould Center for Geography Education and Outreach, Penn State + index: 5 + - name: University of Bristol + index: 6 + - name: Urban Big Data Centre, School of Social & Political Sciences, University of Glasgow + index: 7 +date: 01 November 2021 bibliography: paper.bib --- # Summary -Spatial optimization is a major spatial analytical tool in management and planning, the significance of which cannot be overstated. Spatial optimization models play an important role in designing and managing effective and efficient service systems such as transportation, education, public health, environmental protection, and commercial investment among others. To this end, spopt (spatial optimization) is under active development for the inclusion of newly proposed models and methods for regionalization, facility location, and transportation-oriented solutions. Spopt is a submodule in the open-source spatial analysis library PySAL (Python Spatial Analysis Library) founded by Dr. Serge Rey and Dr. Luc Ancelin in 2005 [@rey2010pysal;@rey2015open;@Rey2021]. The goal of developing spopt is to provide management and decision-making support to all relevant practitioners and to further promote the appropriate and meaningful application of spatial optimization models in practice. - +Spatial optimization is a major spatial analytical tool in management and planning, the significance of which cannot be overstated. Spatial optimization models play an important role in designing and managing effective and efficient service systems such as transportation, education, public health, environmental protection, and commercial investment among others. To this end, spopt (\textbf{sp}atial \textbf{opt}imization) is under active development for the inclusion of newly proposed models and methods for regionalization, facility location, and transportation-oriented solutions. Spopt is a submodule in the open-source spatial analysis library PySAL (Python Spatial Analysis Library) founded by Dr. Serge Rey and Dr. Luc Anselin in 2005 [@pysal2007;@rey2015open;@Rey2021]. The goal of developing spopt is to provide management and decision-making support to all relevant practitioners and to further promote the appropriate and meaningful application of spatial optimization models in practice. # Statement of need -Spatial optimization methods/algorithms can be accessed in many ways. ArcGIS (https://www.esri.com/en-us/home) and TransCAD (https://www.caliper.com/) are two well-known commercial GIS software packages that provide modules designed for structuring and solving spatial optimization problems. The optimization functions they offer focus on a set classical single facility location methods (e.g., Weber, Median, Centroid, 1-center), routing and shortest path methods (e.g., shortest path on the network, least cost path over the terrain), and multi-facility location-allocation methods (e.g., coverage models, p-median problem). They are user-friendly and visually appealing, but the cost is relatively high [@murray2019contemporary]. - -Open source software is another option to access spatial optimization. Although it may require users to have a certain level of programming experience, open source software provides relatively novel and comprehensive methods, and more importantly, it is free. This is particularly true for regionalization methods. They are very limited in commercial GIS software, and may only have grouping analysis for vector data and region identification for raster data. On the contrary, there are many application-oriented open source packages that facilitate the implementation of regionalization methods in various fields, including climate (e.g., HiClimR (https://cran.r-project.org/web/packages/HiClimR/index.html), synoptReg (https://cran.r-project.org/web/packages/synoptReg/index.html)), biography (e.g., Phyloregion (https://cran.r-project.org/web/packages/phyloregion/index.html), regioneR (http://bioconductor.org/packages/release/bioc/html/regioneR.html)), hydrology (e.g., nsRFA(https://cran.r-project.org/web/packages/nsRFA/index.html)), agricultural (e.g., OpenLCA (https://www.openlca.org/)), and so on. The functions of graph regionalization with clustering and partitioning have been provided by several packages such as Rgeoda, maxcut: Max-Cut Problem, RBGL: R Boost Graph Library, and grPartition. They are probably the most closely related projects to the regionalization section of spopt, however, they are written in R and MATLAB. Therefore, it is necessary to develop an open source optimization package written in Python that focuses on regionalization. +Spatial optimization methods/algorithms can be accessed in many ways. ArcGIS (https://www.esri.com/en-us/home) and TransCAD (https://www.caliper.com/) are two well-known commercial GIS software packages that provide modules designed for structuring and solving spatial optimization problems. The optimization functions they offer focus on a set classical single facility location methods (e.g., Weber, Median, Centroid, 1-center), routing and shortest path methods (e.g., shortest path on the network, least cost path over the terrain), and multi-facility location-allocation methods (e.g., coverage models, p-median problem). They are user-friendly and visually appealing, but the cost is relatively high [@murray2021contemporary]. +Open-source software is another option to access spatial optimization. Although it may require users to have a certain level of programming experience, open-source software provides relatively novel and comprehensive methods, and more importantly, it is free and can be easily replicated. This is particularly true for regionalization and facility-location methods. Regionalization methods are limited in commercial GIS software, and may only have grouping analysis for vector data and region identification for raster data. On the contrary, there are many application-oriented open-source packages that facilitate the implementation of regionalization methods in various fields, including climate (e.g., HiClimR (https://cran.r-project.org/web/packages/HiClimR/index.html), synoptReg (https://cran.r-project.org/web/packages/synoptReg/index.html)), biography (e.g., Phyloregion (https://cran.r-project.org/web/packages/phyloregion/index.html), regioneR (http://bioconductor.org/packages/release/bioc/html/regioneR.html)), hydrology (e.g., nsRFA(https://cran.r-project.org/web/packages/nsRFA/index.html)), agricultural (e.g., OpenLCA (https://www.openlca.org/)), and so on. The functions of graph regionalization with clustering and partitioning have been provided by several packages such as Rgeoda, maxcut: Max-Cut Problem, RBGL: R Boost Graph Library, and grPartition. They are probably the most closely related projects to the regionalization section of spopt, however, they are written in R and MATLAB. For facility-location methods, commercial software such as TransCAD and ArcGIS implements models using a heuristic approach. However, they don't provide details about the solution found, which limits the interpretability of the results (Chen et al., 2021). On the other hand, existing open-source packages mostly aim at solving coverage problems such as PySpatialOpt (https://github.com/apulverizer/pyspatialopt), Allagash (https://apulverizer.github.io/allagash/) and maxcovr (https://github.com/njtierney/maxcovr), but the available models, solvers, and overall accessibility vary significantly. Therefore, it is necessary to develop an open-source optimization package written in Python that includes various types of classic facility-location methods with a wide range of supported optimization solvers. # Current functionality -Originating from the region module in PySAL, spopt is under active development for the inclusion of newly proposed models and methods for regionalization. Six models are developed for aggregating a large set of geographic units (with small footprints) into a smaller number of regions (with large footprints). They are: +Originating from the region module in PySAL, spopt is under active development for the inclusion of newly proposed models and methods for regionalization and facility location. Regarding regionalization, six models are developed for aggregating a large set of geographic units (with small footprints) into a smaller number of regions (with large footprints). They are: + 1. Max-p-regions: the clustering of a set of geographic areas into the maximum number of homogeneous and spatially contiguous regions such that the value of a spatially extensive regional attribute is above a predefined threshold [@duque2012max;@wei2020efficient]. 2. Spatially-encouraged spectral clustering (spenc): an algorithm to balance spatial and feature coherence using kernel combination in spectral clustering [@wolf2020]. 3. Region-K-means: K-means clustering for regions with the constraint that each cluster forms a spatially connected component. 4. Automatic Zoning Procedure (AZP): the aggregation of data for a larger number of zones into a prespecified smaller number of regions based on a predefined type of objective function [@openshaw1977geographical;@openshaw1995algorithms]. -5. Skater: a constrained spatial regionalization algorithm based on spanning tree pruning. Specifically, the number of edges is prespecified to be cut in a continuous tree to group spatial units into contiguous regions [@assunccao2006efficient]. +5. Skater: a constrained spatial regionalization algorithm based on spanning tree pruning. Specifically, the number of edges is prespecified to be cut in a continuous tree to group spatial units into contiguous regions [@assunccao2006efficient]. 6. WardSpatial: an agglomerative clustering (each observation starts in its own cluster, and pairs of clusters are chosen to merge at each step) using ward linkage (the goal is to minimize the variance of the clusters) with a spatial connectivity constraint ([sklearn.cluster.AgglomerativeClustering](sklearn.cluster.AgglomerativeClustering: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html)). Take the functionality of Max-p-regions as an example. Other methods can be applied in a similar process, including importing the needed packages, imputing and reading data, defining the parameters, solving the model, and plotting the solution. ```python from spopt.region import MaxPHeuristic as MaxP -import matplotlib.pyplot as plt -import geopandas as gpd -import libpysal -# input the data on regional incomes for Mexican states as an example. -mexico = gpd.read_file(libpysal.examples.get_path('mexicojoin.shp')) -# specify a number of parameters that will serve as input to the Max-p-regions model. Details can be found # on https://pysal.org/spopt/notebooks/maxp.html. -attrs_name = [f'PCGDP{2000}'] +import geopandas, libpysal +# Read in the data on regional incomes for Mexican states. +mexico = geopandas.read_file(libpysal.examples.get_path("mexicojoin.shp")) +# Specify parameters for the Max-p-regions model. +# Details can be found at https://pysal.org/spopt/notebooks/maxp.html. +attrs_name = [f"PCGDP{2000}"] w = libpysal.weights.Queen.from_dataframe(mexico) -threshold, top_n, mexico['count'] = 6, 2, 1 -threshold_name = 'count' -# solve the Max-p-regions model. +threshold_name, threshold, top_n, mexico["count"] = "count", 6, 2, 1 +# Solve the Max-p-regions model. model = MaxP(mexico, w, attrs_name, threshold_name, threshold, top_n) model.solve() -# plot the model solution. -mexico['maxp_new'] = model.labels_ -mexico.plot(column='maxp_new', categorical=True, edgecolor='w') +# Plot the model solution. +mexico["maxp_new"] = model.labels_ +mexico.plot(column="maxp_new", categorical=True, edgecolor="w"); ``` -The corresponding solution of Max-p-regions running the above code is shown in \autoref{fig: maxp} + +The corresponding solution of Max-p-regions running the above code is shown in \autoref{fig: maxp}. It results in five regions, three of which have six states, and two with seven states each. Each region is a spatially connected component, as required by the Max-p-regions problem.  +For facility-location, four models, including two coverage models and two location-allocation models based on median and center problems, are developed using an exact approach. + +1. Location Set Covering Problem (LSCP): Finding the minimum number of facilities and their locations such that all demands are covered within the maximal distance or time standard [@Toregas1971]. +2. Maximal Covering Location Problem (MCLP): Locating a prespecified number of facilities such that demand coverage within a maximal service distance or time is maximized [@Church1974]. +3. P-Median Problem: Locating \textit{p} facilities and allocating the demand served by these facilities so that the total weighted assignment distance or time is minimized [@ReVelle1970]. +4. P-Center Problem: Locating \textit{p} facilities and allocating the demand served by these facilities to minimize the maximum assignment distance or time between demands and their allocated facilities [@Hakimi1964]. + +For example, Maximal Covering Location Model functionality is used to select 4 out of 16 store sites in the San Francisco area to maximize demand coverage, as shown in \autoref{fig: mclp}. Other facility-location methods can be applied in a similar way. + +```python +from spopt.locate.coverage import MCLP +import geopandas, numpy, pandas, pulp +# Read in the datasets +ntw_dist = pandas.read_csv("SF_network_distance_candidateStore_16_censusTract_205_new.csv") +demand_points = pandas.read_csv("SF_demand_205_centroid_uniform_weight.csv", index_col=0) +facility_points = pandas.read_csv("SF_store_site_16_longlat.csv", index_col=0) +study_area = geopandas.read_file("ServiceAreas_4.shp").dissolve() +# Create a store site to tract centroid distance matrix +ntw_piv = ntw_dist.pivot_table(values="distance", index="DestinationName", columns="name") +cost_matrix, ai, p = ntw_piv.to_numpy(), demand_points["POP2000"].to_numpy(), 4 +mclp = MCLP.from_cost_matrix(cost_matrix, ai, max_coverage=5000, p_facilities=p) +mclp = mclp.solve(pulp.GLPK(msg=False)) +# Build a facility-demand array for demand covered by each facility +mclp.facility_client_array() +fgeom = geopandas.points_from_xy(facility_points.long, facility_points.lat) +facility_points_gdf = geopandas.GeoDataFrame( + facility_points, geometry=fgeom, +).sort_values(by=["NAME"]).reset_index() +dgeom = geopandas.points_from_xy(demand_points.long, demand_points.lat) +demand_points_gdf = geopandas.GeoDataFrame( + demand_points, geometry=dgeom, +).sort_values(by=["NAME"]).reset_index() +# plot results +n_facilities, title = facility_points_gdf.shape[0], f"MCLP ($p$={p})" +#plot_results(mclp, facility_points_gdf, demand_points_gdf, n_facilities, title) +``` + + # Planned Enhancements Spopt is under active development and the spopt developers look forward to your extensive attention and participation. In the near future, there are three major enhancements we plan to pursue for spopt: 1. The first stream will be on the enhancement of regionalization algorithms by including several novel extensions of the classical regionalization models, such as the integration of spatial data uncertainty and the shape of identified regions in the max-p-regions problem. -2. The second direction involves providing an open source version of classical single and/or multi-facility location and location-allocation methods which have been included in commercial GIS software packages. -3. We anticipate adding functionality for solving traditional routing and transportation-oriented problems. Initially, this will come in the form of integer programming formulations of the Travelling Salesperson Problem [@miller1960integer] and the Transportation Problem [@koopmans1949optimum]. - +2. The second direction involves adding capacity constraints and includes a polygon partial coverage on facility location models. No commercial and open-source software has provided these features before. +3. We anticipate adding functionality for solving traditional routing and transportation-oriented optimization problems. Initially, this will come in the form of integer programming formulations of the Travelling Salesperson Problem [@miller1960integer] and the Transportation Problem [@koopmans1949optimum]. # Acknowledgements We would like to thank all the contributors to this package. Besides, we would like to extend our gratitude to all the users for inspiring and questioning this package to make it better. Spopt development was partially supported by National Science Foundation Award #1831615 RIDIR: Scalable Geospatial Analytics for Social Science Research. - # References