Skip to content

Commit

Permalink
Initial release v1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Paco Valdez committed Feb 19, 2021
1 parent 6963f5e commit 4b2c113
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 51 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/

.csv
40 changes: 31 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,41 @@

[![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/)

requirements:
* requests

To run script first you need to set the API_KEY environment variable, like this:

# Installation
## Using Pip
```bash
$ pip install geocli
```
## Manual
```bash
$ git clone https://github.com/expandaventures/geocoding-cli
$ cd geocoding-cli
$ python setup.py install
```
# Usage
```bash
$ geocli
```
export API_KEY='YOUR_API_KEY'
## Geocode a single address
`Usage: geocli geocode [OPTIONS] ADDRESS [STATE] [CITY]`
```bash
$ geocli geocode --dry_run 'Av. Reforma 222' cdmx mexico
```
## Batch: geocode a addresses in a file

and next use
`Usage: geocli batch [OPTIONS] INPUT_FILE OUTPUT_PATH`
```bash
$ geocli batch --dry_run sample.csv out.csv
```

The expected schema in the file is:

```
python3 geocoding_imss.py file.csv output_name
id, state, unused, city, ... , address
1, CDMX,,Mexico,,Av. Reforma 222
```

where ```file.csv``` is the file that you want to read and parse address to coordinates and ```output_name``` is the file name that will saved after the script runs
The **ONLY** columns used are 2nd, 4th and last.
(e.g. row[1], row[3], row[-1])

45 changes: 45 additions & 0 deletions geocli/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import sys
import click
from geocli_driver import process_file, get_location_by_address


@click.group()
@click.version_option("1.0.0")
def main():
"""A Geocoding CLI"""
pass


@main.command()
@click.argument('input_file', required=True, type=click.Path(exists=True))
@click.argument('output_path', required=True)
@click.option('--dry_run', is_flag=True)
def batch(**kwargs):
"""Process a file and output the results to a file"""
results = process_file(kwargs.get("input_file"),
kwargs.get("output_path"),
kwargs.get("dry_run", False),
)
click.echo(f'Processed {results} results')


@main.command()
@click.argument('address', required=True)
@click.argument('state', required=False, default='')
@click.argument('city', required=False, default='')
@click.option('--dry_run', is_flag=True)
def geocode(**kwargs):
"""process a single address"""
lat, lon = get_location_by_address(kwargs.get("state", ""),
kwargs.get("city", ""),
address_string=kwargs.get("address"),
dry_run=kwargs.get("dry_run", False),
)
click.echo(f'Lat, Lon: {lat}, {lon}')


if __name__ == '__main__':
args = sys.argv
if "--help" in args or len(args) == 1:
print("GEOCLI")
main()
80 changes: 38 additions & 42 deletions geocli/main.py → geocli/geocli_driver.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import csv
import os
import re
import sys
import csv
import time

import requests
import time

start = time.time()

API_KEY = os.getenv("API_KEY", "")
API_KEY = os.getenv("GOOGLE_API_KEY", False)
LIMIT_LON = tuple(map(float, os.getenv("LIMIT_LON", "-117.12776,14.5388286402").split(',')))
LIMIT_LAT = tuple(map(float, os.getenv("LIMIT_LAT", "-86.811982388,32.72083").split(',')))


def clean_and_format(string):
Expand All @@ -24,11 +22,7 @@ def extract_cp(string):
return cp_str


LIMIT_LON = -117.12776, 14.5388286402
LIMIT_LAT = -86.811982388, 32.72083


def get_location_by_address(state, city, address_string=None):
def get_location_by_address(state, city, address_string=None, dry_run=False):
if address_string:
address_string = re.sub(r'ENTRE\s(.*)\sY\s(.*)\sCOLONIA:', 'COLONIA:', address_string)
address_string = address_string.replace("C.P.", "postal_code=")
Expand All @@ -40,6 +34,9 @@ def get_location_by_address(state, city, address_string=None):

url = "https://maps.googleapis.com/maps/api/geocode/json?address={}&region=mx&key={}".format(address_string,
API_KEY)
if dry_run:
print(url)
return None, None
response = requests.get(url)
try:
if response.status_code == 200:
Expand All @@ -55,33 +52,32 @@ def get_location_by_address(state, city, address_string=None):
return None, None


args = sys.argv
if len(args) != 3:
raise Exception("Scripts takes exactly two argument ({} given) [file_path] and [output_name]".format(len(args) - 1))
file_path = args[1]
output_name = args[2]

with open(file_path) as f:
csv_data = csv.reader(f, delimiter=',', quotechar='"')
rows = list(csv_data)
rows[0].append("Latitud")
rows[0].append("Longitud")
for row in rows[1:]:
address = row[-1]
cp = extract_cp(address)
lat, lng = get_location_by_address(row[1], row[3], address_string=address)
if (not lat and not lng) or (
(LIMIT_LAT[0] > lat > LIMIT_LAT[1]) and (LIMIT_LON[0] > lng > LIMIT_LON[1])):
lat, lng = get_location_by_address(row[1], row[3], address_string=cp)
row.append(lat)
row.append(lng)
print(row[0], lat, lng)


with open(output_name, 'w', newline='') as csvfile:
csv_file = csv.writer(csvfile, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_file.writerows(rows)

end = time.time()
print(end - start)
def process_file(file_path, output_name, dry_run=False):
if not API_KEY and not dry_run:
raise ValueError("Environment Variable GOOGLE_API_KEY must be set")
start = time.time()
with open(file_path) as f:
csv_data = csv.reader(f, delimiter=',', quotechar='"')
rows = list(csv_data)
rows[0].append("Latitud")
rows[0].append("Longitud")
for row in rows[1:]:
address = row[-1]
cp = extract_cp(address)
lat, lng = get_location_by_address(row[1], row[3], address_string=address, dry_run=dry_run)
if (not lat and not lng) or (
(LIMIT_LAT[0] > lat > LIMIT_LAT[1]) and (LIMIT_LON[0] > lng > LIMIT_LON[1])):
lat, lng = get_location_by_address(row[1], row[3], address_string=cp, dry_run=dry_run)
row.append(lat)
row.append(lng)
print(row[0], lat, lng)
if dry_run:
return 0

with open(output_name, 'w', newline='') as csvfile:
csv_file = csv.writer(csvfile, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_file.writerows(rows)
end = time.time()
print(end - start)
return len(rows)
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
certifi==2020.12.5
chardet==4.0.0
click==7.1.2
idna==2.10
requests==2.25.1
urllib3==1.26.3
2 changes: 2 additions & 0 deletions sample.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id, state, unused, city, ... , address
1, CDMX,,Mexico,,Reforma 222
47 changes: 47 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from setuptools import setup, find_packages
from io import open
from os import path

import pathlib

# The directory containing this file
HERE = pathlib.Path(__file__).parent

# The text of the README file
README = (HERE / "README.md").read_text()

# automatically captured required modules for install_requires in requirements.txt
with open(path.join(HERE, 'requirements.txt'), encoding='utf-8') as f:
all_reqs = f.read().split('\n')

install_requires = [x.strip() for x in all_reqs if ('git+' not in x) and (
not x.startswith('#')) and (not x.startswith('-'))]
dependency_links = [x.strip().replace('git+', '') for x in all_reqs \
if 'git+' not in x]
setup(
name='geocli',
description='A simple commandline app for geocoding',
version='1.0.0',
packages=find_packages(), # list of all packages
install_requires=install_requires,
python_requires='>=2.7', # any python greater than 2.7
entry_points='''
[console_scripts]
geocli=geocli.__main__:main
''',
author="Paco Valdez",
keyword="google maps, georeference, address, geocoding",
long_description=README,
long_description_content_type="text/markdown",
license='GPLv3',
url='https://github.com/expandaventures/geocoding-cli',
download_url='https://github.com/expandaventures/geocoding-cli/archive/1.0.0.tar.gz',
dependency_links=dependency_links,
author_email='[email protected]',
classifiers=[
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
]
)

0 comments on commit 4b2c113

Please sign in to comment.