Skip to content

Commit

Permalink
Significantly improve performance with large datasets
Browse files Browse the repository at this point in the history
Measurements pre-patch:
* CPU Time: 1600090 ms
* Memory: 117.41 GB
* Total time: 26:45

Measurements post-patch:
* CPU Time: 827613 ms (-48.3%)
* Memory: 166.57 GB (+41.9%)
* Total time: 13:50 (-51%)

Signed-off-by: Taylor Smock <[email protected]>
  • Loading branch information
tsmock committed Mar 22, 2024
1 parent e5a6f9b commit d35dfd9
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import com.vividsolutions.jump.feature.Feature;
import com.vividsolutions.jump.feature.FeatureCollection;
import com.vividsolutions.jump.feature.FeatureDataset;

/**
* Composes several FeatureMatchers into one. Candidate features are whittled
Expand Down Expand Up @@ -68,8 +69,7 @@ public ChainMatcher(FeatureMatcher[] matchers) {
*/
@Override
public Matches match(Feature target, FeatureCollection candidates) {
Matches survivors = new Matches(
candidates.getFeatureSchema(), candidates.getFeatures());
Matches survivors = new Matches(new FeatureDataset(candidates.getFeatures(), candidates.getFeatureSchema(), candidates.getEnvelope()));
for (FeatureMatcher matcher : matchers) {
survivors = matcher.match(target, survivors);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ public interface FeatureMatcher {
* @return the matching features, and a score for each. (Implementors should
* document how they do their scoring).
*/
public Matches match(Feature target, FeatureCollection candidates);
Matches match(Feature target, FeatureCollection candidates);

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@
*/
package com.vividsolutions.jcs.conflate.polygonmatch;

import java.util.Collections;

import org.locationtech.jts.geom.Geometry;
import com.vividsolutions.jump.feature.Feature;
import com.vividsolutions.jump.feature.FeatureCollection;
import com.vividsolutions.jump.feature.FeatureDataset;

/**
* Base class of FeatureMatchers that compare the target to each candidate
Expand All @@ -46,7 +49,9 @@ public IndependentCandidateMatcher() {

@Override
public Matches match(Feature target, FeatureCollection candidates) {
Matches matches = new Matches(candidates.getFeatureSchema());
final FeatureDataset fds = new FeatureDataset(Collections.emptySet(), candidates.getFeatureSchema(),
candidates.getEnvelope());
final Matches matches = new Matches(fds);
for (Feature candidate : candidates) {
double score = match(target.getGeometry(), candidate.getGeometry());
if (score > 0) { matches.add(candidate, score); }
Expand Down
37 changes: 29 additions & 8 deletions src/com/vividsolutions/jcs/conflate/polygonmatch/Matches.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
*/
public class Matches extends AbstractMap<Feature, Double> implements FeatureCollection, Cloneable {
private final Set<Map.Entry<Feature, Double>> entrySet = new HashSet<>();
private int size;
/**
* Creates a Matches object.
* @param schema metadata applicable to the features that will be stored in
Expand Down Expand Up @@ -82,16 +83,26 @@ protected Matches clone() {
public Matches(FeatureSchema schema, List<Feature> features) {
// We want to ensure that the dataset won't have a ton of ArrayList#grow calls
// So we initialize the dataset with all the data
this.dataset = new FeatureDataset(features.size(), schema);
this.dataset = new FeatureDataset(features.size(), schema, null);
this.scores = new double[features.size()];
addAll(features, 1, true);
}

for (Feature match : features) {
add(match, 1);
}
/**
* Creates a Matches object, initialized with the given Dataset.
* @param featureDataset The dataset to use for initialization
*/
public Matches(FeatureDataset featureDataset) {
// We want to ensure that the dataset won't have a ton of ArrayList#grow calls
// So we initialize the dataset with all the data
this.scores = new double[featureDataset.size()];
this.dataset = new FeatureDataset(featureDataset);

addAll(featureDataset, 1, false);
}

private final FeatureDataset dataset;
private double[] scores = new double[0];
private double[] scores;

/**
* This method is not supported, because added features need to be associated
Expand All @@ -113,6 +124,12 @@ public void addAll(Collection<? extends Feature> features) {
throw new UnsupportedOperationException("Use #add(feature, score) instead");
}

private void addAll(Iterable<? extends Feature> features, double score, boolean addToDataset) {
for (Feature feature : features) {
add(feature, score, addToDataset);
}
}

/**
* This method is not supported, because added features need to be associated
* with a score. Use #add(Feature, double) instead.
Expand Down Expand Up @@ -191,6 +208,10 @@ public void remove(Feature feature) {
* @param score the confidence of the match, ranging from 0 to 1
*/
public void add(Feature feature, double score) {
add(feature, score, true);
}

private void add(Feature feature, double score, boolean addToDataset) {
// We want to avoid the string concatenation here, if we don't need it.
// It is *very* expensive when run with large datasets.
// This used to be an Assert.isTrue statement
Expand All @@ -200,8 +221,8 @@ public void add(Feature feature, double score) {
if (score == 0) {
return;
}
scoreAdd(dataset.size(), score);
dataset.add(feature);
scoreAdd(size++, score);
if (addToDataset) dataset.add(feature);
if (score > topScore) {
topScore = score;
topMatch = feature;
Expand All @@ -216,7 +237,7 @@ private void scoreAdd(int index, double score) {
}

private Feature topMatch;
private double topScore = 0;
private double topScore;

public double getTopScore() {
return topScore;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.locationtech.jts.geom.Envelope;
import com.vividsolutions.jump.feature.Feature;
import com.vividsolutions.jump.feature.FeatureCollection;
import com.vividsolutions.jump.feature.FeatureDataset;
import com.vividsolutions.jump.geom.EnvelopeUtil;

/**
Expand Down Expand Up @@ -79,6 +80,6 @@ public WindowMatcher() {}
public Matches match(Feature target, FeatureCollection candidates) {
Envelope window = new Envelope(target.getGeometry().getEnvelopeInternal());
window = EnvelopeUtil.expand(window, buffer);
return new Matches(candidates.getFeatureSchema(), candidates.query(window));
return new Matches(new FeatureDataset(candidates.query(window), candidates.getFeatureSchema(), window));
}
}
58 changes: 41 additions & 17 deletions src/com/vividsolutions/jump/feature/FeatureDataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,34 +55,58 @@ public class FeatureDataset implements FeatureCollection {
* @param featureSchema the types of the attributes of the features in this collection
*/
public FeatureDataset(Collection<Feature> newFeatures, FeatureSchema featureSchema) {
features = new ArrayList<>(newFeatures);
this(newFeatures, featureSchema, null);
}

/**
* Creates a FeatureDataset, initialized with a group of Features.
* @param newFeatures an initial group of features to add to this FeatureDataset
* @param featureSchema the types of the attributes of the features in this collection
* @param envelope The expected envelope for the features
*/
public FeatureDataset(Collection<Feature> newFeatures, FeatureSchema featureSchema, Envelope envelope) {
this.features = new ArrayList<>(newFeatures);
this.featureSchema = featureSchema;
this.envelope = envelope != null ? envelope.copy() : null;
}

/**
* Creates a FeatureDataset, with an initial list size for features
* @param newFeatures The expected size of the features
* @param featureSchema the types of the attributes of the features in this collection
* @param envelope The expected envelope for this dataset
*/
public FeatureDataset(int newFeatures, FeatureSchema featureSchema) {
features = new ArrayList<>(newFeatures);
public FeatureDataset(int newFeatures, FeatureSchema featureSchema, Envelope envelope) {
this.features = new ArrayList<>(newFeatures);
this.featureSchema = featureSchema;
this.envelope = envelope == null ? null : envelope.copy();
}

/**
* Creates a FeatureDataset.
* @param featureSchema the types of the attributes of the features in this collection
*/
public FeatureDataset(FeatureSchema featureSchema) {
this(new ArrayList<Feature>(), featureSchema);
this(new ArrayList<>(), featureSchema);
}

/**
* Clone another dataset
* @param otherDataset The dataset to clone
*/
public FeatureDataset(FeatureDataset otherDataset) {
this(otherDataset.getFeatures(), otherDataset.getFeatureSchema());
if (otherDataset.envelope != null) {
this.envelope = otherDataset.envelope.copy();
}
}

public Feature getFeature(int index) {
return features.get(index);
}

@Override
public FeatureSchema getFeatureSchema() {
public FeatureSchema getFeatureSchema() {
return featureSchema;
}

Expand All @@ -91,7 +115,7 @@ public FeatureSchema getFeatureSchema() {
* later change a Feature's geometry using Feature#setGeometry.
*/
@Override
public Envelope getEnvelope() {
public Envelope getEnvelope() {
if (envelope == null) {
envelope = new Envelope();

Expand All @@ -105,12 +129,12 @@ public Envelope getEnvelope() {
}

@Override
public List<Feature> getFeatures() {
public List<Feature> getFeatures() {
return Collections.unmodifiableList(features);
}

@Override
public boolean isEmpty() {
public boolean isEmpty() {
return size() == 0;
}

Expand All @@ -122,7 +146,7 @@ public boolean isEmpty() {
//<<TODO:DESIGN>> Perhaps return value should be a Set, not a List, because order
//doesn't matter. [Jon Aquino]
@Override
public List<Feature> query(Envelope envelope) {
public List<Feature> query(Envelope envelope) {
if (!envelope.intersects(getEnvelope())) {
return new ArrayList<>();
}
Expand All @@ -141,7 +165,7 @@ public List<Feature> query(Envelope envelope) {
}

@Override
public void add(Feature feature) {
public void add(Feature feature) {
features.add(feature);
if (envelope != null) {
envelope.expandToInclude(feature.getGeometry().getEnvelopeInternal());
Expand All @@ -162,15 +186,15 @@ public boolean contains(Feature feature) {
* @param env
*/
@Override
public Collection<Feature> remove(Envelope env) {
public Collection<Feature> remove(Envelope env) {
Collection<Feature> features = query(env);
removeAll(features);

return features;
}

@Override
public void remove(Feature feature) {
public void remove(Feature feature) {
features.remove(feature);
invalidateEnvelope();
}
Expand All @@ -179,18 +203,18 @@ public void remove(Feature feature) {
* Removes all features from this collection.
*/
@Override
public void clear() {
public void clear() {
invalidateEnvelope();
features.clear();
}

@Override
public int size() {
public int size() {
return features.size();
}

@Override
public Iterator<Feature> iterator() {
public Iterator<Feature> iterator() {
return features.iterator();
}

Expand All @@ -199,7 +223,7 @@ public void invalidateEnvelope() {
}

@Override
public void addAll(Collection<? extends Feature> features) {
public void addAll(Collection<? extends Feature> features) {
this.features.addAll(features);
if (envelope != null) {
for (Feature feature : features) {
Expand All @@ -209,7 +233,7 @@ public void addAll(Collection<? extends Feature> features) {
}

@Override
public void removeAll(Collection<Feature> features) {
public void removeAll(Collection<Feature> features) {
this.features.removeAll(features);
invalidateEnvelope();
}
Expand Down

0 comments on commit d35dfd9

Please sign in to comment.