-
Notifications
You must be signed in to change notification settings - Fork 40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
geojson: demarcate collections #35
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,11 +58,11 @@ pub fn read_geojson<R: Read, P: FeatureProcessor>(mut reader: R, processor: &mut | |
} | ||
|
||
pub fn read_geojson_fc<R: Read, P: FeatureProcessor>(reader: R, processor: &mut P) -> Result<()> { | ||
let mut idx = 0; | ||
for feature in FeatureReader::from_reader(reader).features() { | ||
processor.dataset_begin(None)?; | ||
for (idx, feature) in FeatureReader::from_reader(reader).features().enumerate(){ | ||
process_geojson_feature(&feature?, idx, processor)?; | ||
idx += 1; | ||
} | ||
processor.dataset_end()?; | ||
Ok(()) | ||
} | ||
|
||
|
@@ -83,22 +83,15 @@ fn process_geojson<P: FeatureProcessor>(gj: &GeoGeoJson, processor: &mut P) -> R | |
GeoGeoJson::FeatureCollection(ref collection) => { | ||
processor.dataset_begin(None)?; | ||
for (idx, feature) in collection.features.iter().enumerate() { | ||
processor.feature_begin(idx as u64)?; | ||
if let Some(ref properties) = feature.properties { | ||
processor.properties_begin()?; | ||
process_properties(properties, processor)?; | ||
processor.properties_end()?; | ||
} | ||
if let Some(ref geometry) = feature.geometry { | ||
processor.geometry_begin()?; | ||
process_geojson_geom_n(geometry, idx, processor)?; | ||
processor.geometry_end()?; | ||
} | ||
processor.feature_end(idx as u64)?; | ||
process_geojson_feature(feature, idx, processor)? | ||
} | ||
processor.dataset_end()?; | ||
} | ||
GeoGeoJson::Feature(ref feature) => process_geojson_feature(feature, 0, processor)?, | ||
GeoGeoJson::Feature(ref feature) => { | ||
processor.dataset_begin(None)?; | ||
process_geojson_feature(feature, 0, processor)?; | ||
processor.dataset_end()?; | ||
} | ||
GeoGeoJson::Geometry(ref geometry) => { | ||
process_geojson_geom_n(geometry, 0, processor)?; | ||
} | ||
|
@@ -112,7 +105,6 @@ fn process_geojson_feature<P: FeatureProcessor>( | |
idx: usize, | ||
processor: &mut P, | ||
) -> Result<()> { | ||
processor.dataset_begin(None)?; | ||
if feature.geometry.is_some() || feature.properties.is_some() { | ||
processor.feature_begin(idx as u64)?; | ||
if let Some(ref properties) = feature.properties { | ||
|
@@ -127,14 +119,14 @@ fn process_geojson_feature<P: FeatureProcessor>( | |
} | ||
processor.feature_end(idx as u64)?; | ||
} | ||
processor.dataset_end()?; | ||
Ok(()) | ||
} | ||
|
||
/// Process top-level GeoJSON items (geometry only) | ||
fn process_geojson_geom<P: GeomProcessor>(gj: &GeoGeoJson, processor: &mut P) -> Result<()> { | ||
match *gj { | ||
GeoGeoJson::FeatureCollection(ref collection) => { | ||
processor.geometrycollection_begin(collection.features.len(), 0)?; | ||
for (idx, geometry) in collection | ||
.features | ||
.iter() | ||
|
@@ -144,6 +136,7 @@ fn process_geojson_geom<P: GeomProcessor>(gj: &GeoGeoJson, processor: &mut P) -> | |
{ | ||
process_geojson_geom_n(geometry, idx, processor)?; | ||
} | ||
processor.geometrycollection_end(0)?; | ||
} | ||
GeoGeoJson::Feature(ref feature) => { | ||
if let Some(ref geometry) = feature.geometry { | ||
|
@@ -367,7 +360,7 @@ mod test { | |
let mut wkt_data: Vec<u8> = Vec::new(); | ||
assert!(read_geojson_fc(geojson.as_bytes(), &mut WktWriter::new(&mut wkt_data)).is_ok()); | ||
let wkt = std::str::from_utf8(&wkt_data).unwrap(); | ||
assert_eq!(wkt, "MULTIPOLYGON(((173.020375 -40.919052,173.247234 -41.331999,173.958405 -40.926701,174.247587 -41.349155,174.248517 -41.770008,173.876447 -42.233184,173.22274 -42.970038,172.711246 -43.372288,173.080113 -43.853344,172.308584 -43.865694,171.452925 -44.242519,171.185138 -44.897104,170.616697 -45.908929,169.831422 -46.355775,169.332331 -46.641235,168.411354 -46.619945,167.763745 -46.290197,166.676886 -46.219917,166.509144 -45.852705,167.046424 -45.110941,168.303763 -44.123973,168.949409 -43.935819,169.667815 -43.555326,170.52492 -43.031688,171.12509 -42.512754,171.569714 -41.767424,171.948709 -41.514417,172.097227 -40.956104,172.79858 -40.493962,173.020375 -40.919052)),((174.612009 -36.156397,175.336616 -37.209098,175.357596 -36.526194,175.808887 -36.798942,175.95849 -37.555382,176.763195 -37.881253,177.438813 -37.961248,178.010354 -37.579825,178.517094 -37.695373,178.274731 -38.582813,177.97046 -39.166343,177.206993 -39.145776,176.939981 -39.449736,177.032946 -39.879943,176.885824 -40.065978,176.508017 -40.604808,176.01244 -41.289624,175.239567 -41.688308,175.067898 -41.425895,174.650973 -41.281821,175.22763 -40.459236,174.900157 -39.908933,173.824047 -39.508854,173.852262 -39.146602,174.574802 -38.797683,174.743474 -38.027808,174.697017 -37.381129,174.292028 -36.711092,174.319004 -36.534824,173.840997 -36.121981,173.054171 -35.237125,172.636005 -34.529107,173.007042 -34.450662,173.551298 -35.006183,174.32939 -35.265496,174.612009 -36.156397)))"); | ||
assert_eq!(wkt, "GEOMETRYCOLLECTION(MULTIPOLYGON(((173.020375 -40.919052,173.247234 -41.331999,173.958405 -40.926701,174.247587 -41.349155,174.248517 -41.770008,173.876447 -42.233184,173.22274 -42.970038,172.711246 -43.372288,173.080113 -43.853344,172.308584 -43.865694,171.452925 -44.242519,171.185138 -44.897104,170.616697 -45.908929,169.831422 -46.355775,169.332331 -46.641235,168.411354 -46.619945,167.763745 -46.290197,166.676886 -46.219917,166.509144 -45.852705,167.046424 -45.110941,168.303763 -44.123973,168.949409 -43.935819,169.667815 -43.555326,170.52492 -43.031688,171.12509 -42.512754,171.569714 -41.767424,171.948709 -41.514417,172.097227 -40.956104,172.79858 -40.493962,173.020375 -40.919052)),((174.612009 -36.156397,175.336616 -37.209098,175.357596 -36.526194,175.808887 -36.798942,175.95849 -37.555382,176.763195 -37.881253,177.438813 -37.961248,178.010354 -37.579825,178.517094 -37.695373,178.274731 -38.582813,177.97046 -39.166343,177.206993 -39.145776,176.939981 -39.449736,177.032946 -39.879943,176.885824 -40.065978,176.508017 -40.604808,176.01244 -41.289624,175.239567 -41.688308,175.067898 -41.425895,174.650973 -41.281821,175.22763 -40.459236,174.900157 -39.908933,173.824047 -39.508854,173.852262 -39.146602,174.574802 -38.797683,174.743474 -38.027808,174.697017 -37.381129,174.292028 -36.711092,174.319004 -36.534824,173.840997 -36.121981,173.054171 -35.237125,172.636005 -34.529107,173.007042 -34.450662,173.551298 -35.006183,174.32939 -35.265496,174.612009 -36.156397))))"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would expect a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would you expect if the input FeatureCollection had a MultiPolygon and then a LineString? |
||
Ok(()) | ||
} | ||
|
||
|
@@ -400,11 +393,11 @@ mod test { | |
let wkt = std::str::from_utf8(&wkt_data).unwrap(); | ||
assert_eq!( | ||
&wkt[0..100], | ||
"POINT(32.533299524864844 0.583299105614628),POINT(30.27500161597942 0.671004121125236),POINT(15.7989" | ||
"GEOMETRYCOLLECTION(POINT(32.533299524864844 0.583299105614628),POINT(30.27500161597942 0.67100412112" | ||
); | ||
assert_eq!( | ||
&wkt[wkt.len()-100..], | ||
"06510862875),POINT(103.85387481909902 1.294979325105942),POINT(114.18306345846304 22.30692675357551)" | ||
"6510862875),POINT(103.85387481909902 1.294979325105942),POINT(114.18306345846304 22.30692675357551))" | ||
); | ||
Ok(()) | ||
} | ||
|
@@ -417,11 +410,11 @@ mod test { | |
let wkt = std::str::from_utf8(&wkt_data).unwrap(); | ||
assert_eq!( | ||
&wkt[0..100], | ||
"POINT(32.533299524864844 0.583299105614628),POINT(30.27500161597942 0.671004121125236),POINT(15.7989" | ||
"GEOMETRYCOLLECTION(POINT(32.533299524864844 0.583299105614628),POINT(30.27500161597942 0.67100412112" | ||
); | ||
assert_eq!( | ||
&wkt[wkt.len()-100..], | ||
"06510862875),POINT(103.85387481909902 1.294979325105942),POINT(114.18306345846304 22.30692675357551)" | ||
"6510862875),POINT(103.85387481909902 1.294979325105942),POINT(114.18306345846304 22.30692675357551))" | ||
); | ||
Ok(()) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -178,7 +178,16 @@ impl<W: Write> GeomProcessor for WktWriter<'_, W> { | |
|
||
impl<W: Write> PropertyProcessor for WktWriter<'_, W> {} | ||
|
||
impl<W: Write> FeatureProcessor for WktWriter<'_, W> {} | ||
impl<W: Write> FeatureProcessor for WktWriter<'_, W> { | ||
// Feature Collections may contain multiple geometries, so we need to wrap | ||
// them in a GeometryCollection to ensure we output valid geometry. | ||
fn dataset_begin(&mut self, _name: Option<&str>) -> Result<()> { | ||
self.geometrycollection_begin(0, 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think here's a confusion between FeatureCollections and GeometryCollections. The latter is a special geometry type for one feature (https://datatracker.ietf.org/doc/html/rfc7946#appendix-A.7). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll respond just here - since I think all three of your comments are essentially about the same issue.
This was intentional - I am proposing that we explicitly interpret a feature collection as a geometry collection in WKT. You're right that in geojson So then, if we get a FeatureCollection from geojson and want to convert it to WKT, how should we do that? Our current approach is to output a series of comma separated WKT geometries. Unfortunately, this output cannot be parsed by postgis, gdal, jts, nor georust::wkt (thus not by geozero itself!). A corollary exists with the geo-types geometry writer. Similar to WKT, geo-types is strictly Geometry - no Feature/FeatureCollections. And in the same way, a geojson FeatureCollection with multiple features will be output as a top level Do you see a practical downside with the approach I'm proposing here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, strange, but if it's fixes WKT, then I can live with a hacky solution. But I would still expect, that my first remark in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right that this affects other output formats. My goal is to make sure all the "geometry only" formats — those which don't allow feature data: geo-types, WKT, WKB — behave sanely. For example, here's something I consider a problem with the current As input, consider this geojson feature collection with two Point features.
Then this contrived use case:
Does that seem like a reasonable use case? What centroid would you expect to be computed? Currently it's:
Taking a closer look at the value of `geojson.to_wkb():
To break that down, the first part is:
concatenated directly to:
I don't know if it's valid WKB to simply concatenate multiple WKB's together, but for better or worse, postgis actually seems to parse it:
The problem is that the second point has apparently just been ignored! With the changes proposed in this PR, just like with the WKT example, the output of converting a FeatureCollection to WKB has changed to be wrapped in a GeometryCollection. So now
I think this is much more likely to behave how the user expects. The centroid selected from the example will be the centroid of all the geometries from all the features.
Do you agree that the current situation is problematic, or is my use case overly contrived? Do you think there's a different solution? FYI I pushed up another commit with some tests that show the new wkb behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this approach generally makes sense to you, I can add some more thorough testing to the other output formats. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the explanation and the good example. The main problem is IMO, that we currently do not distinct between multi-feature (dataset) capable formats like GeoJSON and single feature formats like WKB. It's a good moment to find a proper solution for that, since I'm replicating the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It seems like you have some reservations about this - so I want to make sure we're on the same page. Setting aside implementation details for now, and just focusing on behavior: I think it's a good expectation that all format conversions (e.g. Test cases and examples can be helpful. Can you help think of some code that behaves surprisingly with the proposed behavior - I think that'd help me understand your reservations about making this default behavior. And maybe then I could be able to address them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any questions I can answer or changes I can make to move this forward? (Also, I just now rebased against latest.) |
||
} | ||
fn dataset_end(&mut self) -> Result<()> { | ||
self.geometrycollection_end(0) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't look right to me. A GeoJSON feature collection can contain any geometry type, not only GeometryCollections.