Skip to content

Commit

Permalink
Merge pull request #53 from openaddresses/geojson-internal
Browse files Browse the repository at this point in the history
GeoJSON Internal [Major]
  • Loading branch information
ingalls authored Sep 18, 2023
2 parents 4f57e3e + 20325d6 commit f4730bb
Show file tree
Hide file tree
Showing 10 changed files with 759 additions and 843 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
2023-09-17 v9.0.0
- Remove GeoJSONLD Render Functions
- Remove MBTiles Render Functions
- Rewrite confrom to natively output GeoJSON+LD
- Rewrite Preview module to expect GeoJSON+LD

2023-09-09 v8.3.1
- Use the file extension from the content-type header when it disagrees with content-disposition in https://github.com/openaddresses/batch-machine/pull/54

Expand All @@ -6,6 +12,9 @@
- Make GeoJSON+LD output a default in https://github.com/openaddresses/batch-machine/pull/51
- Update TippeCanoe to latest version in https://github.com/openaddresses/batch-machine/pull/50

2023-09-05 v8.2.4
- Update TippeCanoe to latest version
- Make GeoJSON+LD output a default
2023-09-02 v8.2.3
- Fix properties in geojson-ld features in https://github.com/openaddresses/batch-machine/pull/49
- Remove extra whitespace in geojson-ld output in https://github.com/openaddresses/batch-machine/pull/48
Expand Down
2 changes: 1 addition & 1 deletion openaddr/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8.3.1
9.0.0
19 changes: 9 additions & 10 deletions openaddr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .conform import (
ConformResult,
DecompressionTask,
ConvertToCsvTask,
ConvertToGeojsonTask,
elaborate_filenames,
ADDRESSES_SCHEMA,
BUILDINGS_SCHEMA,
Expand Down Expand Up @@ -134,22 +134,21 @@ def conform(source_config, destdir, extras):
decompressed_paths = task2.decompress(downloaded_path, workdir, names)
_L.info("Decompressed to %d files", len(decompressed_paths))

task4 = ConvertToCsvTask()
task4 = ConvertToGeojsonTask()
try:
csv_path, feat_count = task4.convert(source_config, decompressed_paths, workdir)
out_path, feat_count = task4.convert(source_config, decompressed_paths, workdir)
if feat_count > 0:
_L.info("Converted to %s with %d features", csv_path, feat_count)
_L.info("Converted to %s with %d features", out_path, feat_count)
else:
_L.warning('Found no features in source data')
csv_path = None
out_path = None
except Exception as e:
_L.warning("Error doing conform; skipping", exc_info=True)
csv_path, feat_count = None, 0
out_path, feat_count = None, 0

out_path = None
if csv_path is not None and exists(csv_path):
move(csv_path, join(destdir, 'out.csv'))
out_path = realpath(join(destdir, 'out.csv'))
if out_path is not None and exists(out_path):
move(out_path, join(destdir, 'out.geojson'))
out_path = realpath(join(destdir, 'out.geojson'))

rmtree(workdir)

Expand Down
102 changes: 55 additions & 47 deletions openaddr/conform.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

from .geojson import stream_geojson

from shapely.wkt import loads as wkt_loads
from shapely.geometry import mapping

from zipfile import ZipFile
from locale import getpreferredencoding
from os.path import splitext
Expand All @@ -41,9 +44,9 @@ def gdal_error_handler(err_class, err_num, err_msg):
# We add columns to the extracted CSV with our own data with these names.
GEOM_FIELDNAME = 'OA:GEOM'

ADDRESSES_SCHEMA = [ 'NUMBER', 'STREET', 'UNIT', 'CITY', 'DISTRICT', 'REGION', 'POSTCODE', 'ID' ]
BUILDINGS_SCHEMA = []
PARCELS_SCHEMA = [ 'PID' ]
ADDRESSES_SCHEMA = [ 'HASH', 'NUMBER', 'STREET', 'UNIT', 'CITY', 'DISTRICT', 'REGION', 'POSTCODE', 'ID' ]
BUILDINGS_SCHEMA = [ 'HASH']
PARCELS_SCHEMA = [ 'HASH', 'PID' ]
RESERVED_SCHEMA = ADDRESSES_SCHEMA + BUILDINGS_SCHEMA + PARCELS_SCHEMA + [
"LAT",
"LON"
Expand Down Expand Up @@ -365,7 +368,7 @@ def find_source_path(data_source, source_paths):
_L.warning("Unknown source conform format %s", format_string)
return None

class ConvertToCsvTask(object):
class ConvertToGeojsonTask(object):
known_types = ('.shp', '.json', '.csv', '.kml', '.gdb')

def convert(self, source_config, source_paths, workdir):
Expand All @@ -381,11 +384,11 @@ def convert(self, source_config, source_paths, workdir):
source_path = find_source_path(source_config.data_source, source_paths)
if source_path is not None:
basename, ext = os.path.splitext(os.path.basename(source_path))
dest_path = os.path.join(convert_path, basename + ".csv")
dest_path = os.path.join(convert_path, basename + ".geojson")
rc = conform_cli(source_config, source_path, dest_path)
if rc == 0:
with open(dest_path) as file:
addr_count = sum(1 for line in file) - 1
addr_count = sum(1 for line in file)

# Success! Return the path of the output CSV
return dest_path, addr_count
Expand Down Expand Up @@ -832,14 +835,17 @@ def row_transform_and_convert(source_config, row):
# Make up a random fingerprint if none exists
cache_fingerprint = source_config.data_source.get('fingerprint', str(uuid4()))

row = row_convert_to_out(source_config, row)
row = row_calculate_hash(cache_fingerprint, row)

feat = row_convert_to_out(source_config, row)

if source_config.layer == "addresses":
row = row_canonicalize_unit_and_number(source_config.data_source, row)
row = row_round_lat_lon(source_config.data_source, row)
feat['properties'] = row_canonicalize_unit_and_number(source_config.data_source, feat['properties'])

row = row_calculate_hash(cache_fingerprint, row)
return row
if feat['geometry'] and len(feat['geometry']['coordinates']) > 0:
feat['geometry']['coordinates'] = set_precision(feat['geometry']['coordinates'], 7)

return feat

def fxn_smash_case(fxn):
if "field" in fxn:
Expand Down Expand Up @@ -1043,33 +1049,24 @@ def row_fxn_constant(sc, row, key, fxn):

def row_canonicalize_unit_and_number(sc, row):
"Canonicalize address unit and number"
row["UNIT"] = (row["UNIT"] or '').strip()
row["NUMBER"] = (row["NUMBER"] or '').strip()
if row["NUMBER"].endswith(".0"):
row["NUMBER"] = row["NUMBER"][:-2]
row["STREET"] = (row["STREET"] or '').strip()
return row
row["unit"] = (row.get("unit", '') or '').strip()
row["number"] = (row.get("number", '') or '').strip()
row["street"] = (row.get("street", '') or '').strip()

def _round_wgs84_to_7(n):
"Round a WGS84 coordinate to 7 decimal points. Input and output both strings."
try:
return "%.12g" % round(float(n), 7)
except:
return n
if row.get("number", '').endswith('.0'):
row["number"] = row["number"][:-2]

def row_round_lat_lon(sc, row):
"Round WGS84 coordinates to 1cm precision"
if row.get('GEOM') is not None and 'POINT' in row['GEOM']:
try:
geom = ogr.CreateGeometryFromWkt(row['GEOM'])
x = _round_wgs84_to_7(geom.GetX())
y = _round_wgs84_to_7(geom.GetY())
return row

row['GEOM'] = ogr.CreateGeometryFromWkt('POINT ({} {})'.format(x, y)).ExportToWkt()
except Exception:
pass
def set_precision(coords, precision):
result = []
try:
return round(coords, int(precision))
except TypeError:
for coord in coords:
result.append(set_precision(coord, precision))

return row
return result

def row_calculate_hash(cache_fingerprint, row):
''' Calculate row hash based on content and existing fingerprint.
Expand All @@ -1078,29 +1075,41 @@ def row_calculate_hash(cache_fingerprint, row):
'''
hash = sha1(cache_fingerprint.encode('utf8'))
hash.update(json.dumps(sorted(row.items()), separators=(',', ':')).encode('utf8'))
row.update(HASH=hash.hexdigest()[:16])
row.update({'oa:hash': hash.hexdigest()[:16]})

return row

def row_convert_to_out(source_config, row):
"Convert a row from the source schema to OpenAddresses output schema"

geom = row.get(GEOM_FIELDNAME.lower(), None)
if geom == "POINT EMPTY" or geom == '':
geom = None

output = {
"GEOM": row.get(GEOM_FIELDNAME.lower(), None),
"type": "Feature",
"properties": {},
"geometry": geom
}

if output["geometry"] is not None:
wkt_parsed = wkt_loads(output["geometry"])
output["geometry"] = mapping(wkt_parsed)


for field in source_config.SCHEMA:
if row.get('oa:{}'.format(field.lower())) is not None:
# If there is an OA prefix, it is not a native field and was compiled
# via an attrib funciton or concatentation
output[field] = row.get('oa:{}'.format(field.lower()))
output["properties"][field.lower()] = row.get('oa:{}'.format(field.lower()))
else:
# Get a native field as specified in the conform object
cfield = source_config.data_source['conform'].get(field.lower())

if cfield:
output[field] = row.get(cfield.lower())
output["properties"][field.lower()] = row.get(cfield.lower())
else:
output[field] = ''
output["properties"][field.lower()] = ''

return output

Expand Down Expand Up @@ -1134,8 +1143,8 @@ def extract_to_source_csv(source_config, source_path, extract_path):
else:
raise Exception("Unsupported source format %s" % format_string)

def transform_to_out_csv(source_config, extract_path, dest_path):
''' Transform an extracted source CSV to the OpenAddresses output CSV by applying conform rules.
def transform_to_out_geojson(source_config, extract_path, dest_path):
''' Transform an extracted source CSV to the OpenAddresses output GeoJSON by applying conform rules.
source_config: description of the source, containing the conform object
extract_path: extracted CSV file to process
Expand All @@ -1147,14 +1156,12 @@ def transform_to_out_csv(source_config, extract_path, dest_path):
# Read through the extract CSV
with open(extract_path, 'r', encoding='utf-8') as extract_fp:
reader = csv.DictReader(extract_fp)
# Write to the destination CSV
# Write to the destination GeoJSON
with open(dest_path, 'w', encoding='utf-8') as dest_fp:
writer = csv.DictWriter(dest_fp, ['GEOM', 'HASH', *source_config.SCHEMA])
writer.writeheader()
# For every row in the extract
for extract_row in reader:
out_row = row_transform_and_convert(source_config, extract_row)
writer.writerow(out_row)
dest_fp.write(json.dumps(out_row) + '\n')

def conform_cli(source_config, source_path, dest_path):
"Command line entry point for conforming a downloaded source to an output CSV."
Expand All @@ -1176,7 +1183,7 @@ def conform_cli(source_config, source_path, dest_path):

try:
extract_to_source_csv(source_config, source_path, extract_path)
transform_to_out_csv(source_config, extract_path, dest_path)
transform_to_out_geojson(source_config, extract_path, dest_path)
finally:
os.remove(extract_path)

Expand All @@ -1203,7 +1210,8 @@ def check_source_tests(source_config):
for (index, test) in enumerate(acceptance_tests):
input = row_smash_case(source_config.data_source, test['inputs'])
output = row_smash_case(source_config.data_source, row_transform_and_convert(source_config, input))
actual = {k: v for (k, v) in output.items() if k in test['expected']}

actual = {k: v for (k, v) in output['properties'].items() if k in test['expected']}
expected = row_smash_case(source_config.data_source, test['expected'])

if actual != expected:
Expand Down
64 changes: 13 additions & 51 deletions openaddr/preview.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
from __future__ import division
import logging; _L = logging.getLogger('openaddr.preview')

from zipfile import ZipFile
from io import TextIOWrapper
from csv import DictReader
from tempfile import mkstemp
from math import pow, sqrt, pi, log
from argparse import ArgumentParser
from urllib.parse import urlparse
import json, itertools, os, struct

import requests, uritemplate, mapbox_vector_tile
Expand All @@ -30,10 +25,9 @@
# Web Mercator, https://trac.osgeo.org/openlayers/wiki/SphericalMercator
EPSG900913 = '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +no_defs'

def render(filename_or_url, png_filename, width, resolution, mapbox_key):
def render(src_filename, png_filename, width, resolution, mapbox_key):
'''
'''
src_filename = get_local_filename(filename_or_url)
_, points_filename = mkstemp(prefix='points-', suffix='.bin')

try:
Expand Down Expand Up @@ -90,56 +84,24 @@ def render(filename_or_url, png_filename, width, resolution, mapbox_key):
os.remove(points_filename)
surface.write_to_png(png_filename)

def get_local_filename(filename_or_url):
'''
def iterate_file_lonlats(filename):
''' Stream (lon, lat) coordinates from an input GeoJSON
'''
parsed = urlparse(filename_or_url)
suffix = os.path.splitext(parsed.path)[1]

if parsed.scheme in ('', 'file'):
return filename_or_url

if parsed.scheme not in ('http', 'https'):
raise ValueError('Unknown URL type: {}'.format(filename_or_url))

_L.info('Downloading {}...'.format(filename_or_url))

got = requests.get(filename_or_url)
_, filename = mkstemp(prefix='Preview-', suffix=suffix)

with open(filename, 'wb') as file:
file.write(got.content)
_L.debug('Saved to {}'.format(filename))
with open(filename, 'r') as file:
for line in file:
try:
line = json.loads(line)

return filename
lon, lat, x = ogr.CreateGeometryFromJson(json.dumps(line['geometry'])).PointOnSurface().GetPoint()

def iterate_file_lonlats(filename):
''' Stream (lon, lat) coordinates from an input .csv or .zip file.
'''
suffix = os.path.splitext(filename)[1].lower()

if suffix == '.csv':
open_file = open(filename, 'r')
elif suffix == '.zip':
open_file = open(filename, 'rb')

with open_file as file:
if suffix == '.csv':
csv_file = file
elif suffix == '.zip':
zip = ZipFile(file)
csv_names = [name for name in zip.namelist() if name.endswith('.csv')]
csv_file = TextIOWrapper(zip.open(csv_names[0]))

for row in DictReader(csv_file):
try:
lon, lat, x = ogr.CreateGeometryFromWkt(row['GEOM']).PointOnSurface().GetPoint()
if -180 <= lon <= 180 and -90 <= lat <= 90:
yield (lon, lat)
except Exception as e:
print('ERROR', e)
except:
continue

if -180 <= lon <= 180 and -90 <= lat <= 90:
yield (lon, lat)

def get_map_features(xmin, ymin, xmax, ymax, resolution, scale, mapbox_key):
'''
'''
Expand Down Expand Up @@ -412,7 +374,7 @@ def draw_line(ctx, start, points):

parser = ArgumentParser(description='Draw a map of a single source preview.')

parser.add_argument('src_filename', help='Input Zip or CSV filename or URL.')
parser.add_argument('src_filename', help='Input GeoJSON')
parser.add_argument('png_filename', help='Output PNG filename.')

parser.set_defaults(resolution=1, width=668)
Expand Down
Loading

0 comments on commit f4730bb

Please sign in to comment.