Skip to content

Commit b2b6622

Browse files
committed
precompute address rows on import
1 parent e941c98 commit b2b6622

File tree

10 files changed

+219
-107
lines changed

10 files changed

+219
-107
lines changed

src/main/java/de/komoot/photon/nominatim/DBDataAdapter.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,9 @@ public interface DBDataAdapter {
3030
* Wrap a DELETE statement with a RETURNING clause.
3131
*/
3232
String deleteReturning(String deleteSQL, String columns);
33+
34+
/**
35+
* Wrap function to create a json array from a SELECT.
36+
*/
37+
String jsonArrayFromSelect(String valueSQL, String fromSQL);
3338
}

src/main/java/de/komoot/photon/nominatim/NominatimConnector.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,6 @@
1515
* Base class for workers connecting to a Nominatim database
1616
*/
1717
public class NominatimConnector {
18-
protected static final String SELECT_COLS_PLACEX = "SELECT place_id, osm_type, osm_id, class, type, name, postcode, address, extratags, ST_Envelope(geometry) AS bbox, parent_place_id, linked_place_id, rank_address, rank_search, importance, country_code, centroid";
19-
protected static final String SELECT_COLS_ADDRESS = "SELECT p.name, p.class, p.type, p.rank_address";
20-
protected static final String SELECT_OSMLINE_OLD_STYLE = "SELECT place_id, osm_id, parent_place_id, startnumber, endnumber, interpolationtype, postcode, country_code, linegeo";
21-
protected static final String SELECT_OSMLINE_NEW_STYLE = "SELECT place_id, osm_id, parent_place_id, startnumber, endnumber, step, postcode, country_code, linegeo";
22-
2318
protected final DBDataAdapter dbutils;
2419
protected final JdbcTemplate template;
2520
protected Map<String, Map<String, String>> countryNames;

src/main/java/de/komoot/photon/nominatim/NominatimImporter.java

Lines changed: 105 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,19 @@
22

33
import de.komoot.photon.PhotonDoc;
44
import de.komoot.photon.nominatim.model.*;
5-
import org.apache.commons.dbcp2.BasicDataSource;
65
import org.locationtech.jts.geom.Geometry;
76
import org.slf4j.Logger;
8-
import org.springframework.jdbc.core.JdbcTemplate;
9-
import org.springframework.jdbc.core.RowCallbackHandler;
10-
import org.springframework.jdbc.core.RowMapper;
117

12-
import java.sql.ResultSet;
13-
import java.sql.SQLException;
148
import java.sql.Types;
15-
import java.util.*;
9+
import java.util.List;
10+
import java.util.Map;
1611

1712
/**
1813
* Importer for data from a Nominatim database.
1914
*/
2015
public class NominatimImporter extends NominatimConnector {
2116
private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(NominatimImporter.class);
2217

23-
// One-item cache for address lookup. Speeds up rank 30 processing.
24-
private long parentPlaceId = -1;
25-
private List<AddressRow> parentTerms = null;
26-
2718
public NominatimImporter(String host, int port, String database, String username, String password) {
2819
this(host, port, database, username, password, new PostgisDataAdapter());
2920
}
@@ -33,52 +24,6 @@ public NominatimImporter(String host, int port, String database, String username
3324
}
3425

3526

36-
List<AddressRow> getAddresses(PhotonDoc doc) {
37-
RowMapper<AddressRow> rowMapper = (rs, rowNum) -> new AddressRow(
38-
dbutils.getMap(rs, "name"),
39-
rs.getString("class"),
40-
rs.getString("type"),
41-
rs.getInt("rank_address")
42-
);
43-
44-
AddressType atype = doc.getAddressType();
45-
46-
if (atype == null || atype == AddressType.COUNTRY) {
47-
return Collections.emptyList();
48-
}
49-
50-
List<AddressRow> terms = null;
51-
52-
if (atype == AddressType.HOUSE) {
53-
long placeId = doc.getParentPlaceId();
54-
if (placeId != parentPlaceId) {
55-
parentTerms = template.query(SELECT_COLS_ADDRESS
56-
+ " FROM placex p, place_addressline pa"
57-
+ " WHERE p.place_id = pa.address_place_id and pa.place_id = ?"
58-
+ " and pa.cached_rank_address > 4 and pa.address_place_id != ? and pa.isaddress"
59-
+ " ORDER BY rank_address desc, fromarea desc, distance asc, rank_search desc",
60-
rowMapper, placeId, placeId);
61-
62-
// need to add the term for the parent place ID itself
63-
parentTerms.addAll(0, template.query(SELECT_COLS_ADDRESS + " FROM placex p WHERE p.place_id = ?",
64-
rowMapper, placeId));
65-
parentPlaceId = placeId;
66-
}
67-
terms = parentTerms;
68-
69-
} else {
70-
long placeId = doc.getPlaceId();
71-
terms = template.query(SELECT_COLS_ADDRESS
72-
+ " FROM placex p, place_addressline pa"
73-
+ " WHERE p.place_id = pa.address_place_id and pa.place_id = ?"
74-
+ " and pa.cached_rank_address > 4 and pa.address_place_id != ? and pa.isaddress"
75-
+ " ORDER BY rank_address desc, fromarea desc, distance asc, rank_search desc",
76-
rowMapper, placeId, placeId);
77-
}
78-
79-
return terms;
80-
}
81-
8227
/**
8328
* Parse every relevant row in placex and location_osmline
8429
* for the given country. Also imports place from county-less places.
@@ -105,21 +50,74 @@ public void readCountry(String countryCode, ImportThread importThread) {
10550
sqlArgTypes = new int[]{Types.VARCHAR};
10651
}
10752

53+
NominatimAddressCache addressCache = new NominatimAddressCache();
54+
addressCache.loadCountryAddresses(template, dbutils, countryCode);
55+
10856
final PlaceRowMapper placeRowMapper = new PlaceRowMapper(dbutils);
109-
template.query(SELECT_COLS_PLACEX + " FROM placex " +
110-
" WHERE linked_place_id IS NULL AND centroid IS NOT NULL AND " + countrySQL +
111-
" ORDER BY geometry_sector, parent_place_id",
57+
// First read ranks below 30, independent places
58+
template.query(
59+
"SELECT place_id, osm_type, osm_id, class, type, name, postcode," +
60+
" address, extratags, ST_Envelope(geometry) AS bbox, parent_place_id," +
61+
" linked_place_id, rank_address, rank_search, importance, country_code, centroid," +
62+
dbutils.jsonArrayFromSelect(
63+
"address_place_id",
64+
"FROM place_addressline pa " +
65+
" WHERE pa.place_id = p.place_id AND isaddress" +
66+
" ORDER BY cached_rank_address DESC") + " as addresslines" +
67+
" FROM placex p" +
68+
" WHERE linked_place_id IS NULL AND centroid IS NOT NULL AND " + countrySQL +
69+
" AND rank_search < 30" +
70+
" ORDER BY geometry_sector, parent_place_id",
11271
sqlArgs, sqlArgTypes, rs -> {
11372
final PhotonDoc doc = placeRowMapper.mapRow(rs, 0);
73+
final Map<String, String> address = dbutils.getMap(rs, "address");
74+
11475
assert (doc != null);
11576

116-
final Map<String, String> address = dbutils.getMap(rs, "address");
77+
final var addressPlaces = addressCache.getAddressList(rs.getString("addresslines"));
78+
completePlace(doc, addressPlaces);
79+
doc.address(address); // take precedence over computed address
80+
doc.setCountry(cnames);
81+
82+
var result = NominatimResult.fromAddress(doc, address);
11783

84+
if (result.isUsefulForIndex()) {
85+
importThread.addDocument(result);
86+
}
87+
});
88+
89+
// Next get all POIs/housenumbers.
90+
template.query(
91+
"SELECT p.place_id, p.osm_type, p.osm_id, p.class, p.type, p.name, p.postcode," +
92+
" p.address, p.extratags, ST_Envelope(p.geometry) AS bbox, p.parent_place_id," +
93+
" p.linked_place_id, p.rank_address, p.rank_search, p.importance, p.country_code, p.centroid," +
94+
" parent.class as parent_class, parent.type as parent_type," +
95+
" parent.rank_address as parent_rank_address, parent.name as parent_name, " +
96+
dbutils.jsonArrayFromSelect(
97+
"address_place_id",
98+
"FROM place_addressline pa " +
99+
" WHERE pa.place_id IN (p.place_id, coalesce(p.parent_place_id, p.place_id)) AND isaddress" +
100+
" ORDER BY cached_rank_address DESC, pa.place_id = p.place_id DESC") + " as addresslines" +
101+
" FROM placex p LEFT JOIN placex parent ON p.parent_place_id = parent.place_id" +
102+
" WHERE p.linked_place_id IS NULL AND p.centroid IS NOT NULL AND p." + countrySQL +
103+
" AND p.rank_search = 30 " +
104+
" ORDER BY p.geometry_sector",
105+
sqlArgs, sqlArgTypes, rs -> {
106+
final PhotonDoc doc = placeRowMapper.mapRow(rs, 0);
107+
final Map<String, String> address = dbutils.getMap(rs, "address");
118108

119-
completePlace(doc);
120-
// Add address last, so it takes precedence.
121-
doc.address(address);
109+
assert (doc != null);
122110

111+
final var addressPlaces = addressCache.getAddressList(rs.getString("addresslines"));
112+
if (rs.getString("parent_class") != null) {
113+
addressPlaces.add(0, new AddressRow(
114+
dbutils.getMap(rs, "parent_name"),
115+
rs.getString("parent_class"),
116+
rs.getString("parent_type"),
117+
rs.getInt("parent_rank_address")));
118+
}
119+
completePlace(doc, addressPlaces);
120+
doc.address(address); // take precedence over computed address
123121
doc.setCountry(cnames);
124122

125123
var result = NominatimResult.fromAddress(doc, address);
@@ -130,32 +128,50 @@ public void readCountry(String countryCode, ImportThread importThread) {
130128
});
131129

132130
final OsmlineRowMapper osmlineRowMapper = new OsmlineRowMapper();
133-
template.query((hasNewStyleInterpolation ? SELECT_OSMLINE_NEW_STYLE : SELECT_OSMLINE_OLD_STYLE) +
134-
" FROM location_property_osmline" +
135-
" WHERE startnumber is not null AND " + countrySQL +
136-
" ORDER BY geometry_sector, parent_place_id",
131+
template.query(
132+
"SELECT p.place_id, p.osm_id, p.parent_place_id, p.startnumber, p.endnumber, p.postcode, p.country_code, p.linegeo," +
133+
(hasNewStyleInterpolation ? " p.step," : " p.interpolationtype,") +
134+
" parent.class as parent_class, parent.type as parent_type," +
135+
" parent.rank_address as parent_rank_address, parent.name as parent_name, " +
136+
dbutils.jsonArrayFromSelect(
137+
"address_place_id",
138+
"FROM place_addressline pa " +
139+
" WHERE pa.place_id IN (p.place_id, coalesce(p.parent_place_id, p.place_id)) AND isaddress" +
140+
" ORDER BY cached_rank_address DESC, pa.place_id = p.place_id DESC") + " as addresslines" +
141+
" FROM location_property_osmline p LEFT JOIN placex parent ON p.parent_place_id = parent.place_id" +
142+
" WHERE startnumber is not null AND p." + countrySQL +
143+
" ORDER BY p.geometry_sector, p.parent_place_id",
137144
sqlArgs, sqlArgTypes, rs -> {
138-
final PhotonDoc doc = osmlineRowMapper.mapRow(rs, 0);
139-
140-
completePlace(doc);
141-
doc.setCountry(cnames);
142-
143-
final Geometry geometry = dbutils.extractGeometry(rs, "linegeo");
144-
final NominatimResult docs;
145-
if (hasNewStyleInterpolation) {
146-
docs = NominatimResult.fromInterpolation(
147-
doc, rs.getLong("startnumber"), rs.getLong("endnumber"),
148-
rs.getLong("step"), geometry);
149-
} else {
150-
docs = NominatimResult.fromInterpolation(
151-
doc, rs.getLong("startnumber"), rs.getLong("endnumber"),
152-
rs.getString("interpolationtype"), geometry);
153-
}
145+
final PhotonDoc doc = osmlineRowMapper.mapRow(rs, 0);
146+
147+
final var addressPlaces = addressCache.getAddressList(rs.getString("addresslines"));
148+
if (rs.getString("parent_class") != null) {
149+
addressPlaces.add(0, new AddressRow(
150+
dbutils.getMap(rs, "parent_name"),
151+
rs.getString("parent_class"),
152+
rs.getString("parent_type"),
153+
rs.getInt("parent_rank_address")));
154+
}
155+
completePlace(doc, addressPlaces);
154156

155-
if (docs.isUsefulForIndex()) {
156-
importThread.addDocument(docs);
157-
}
158-
});
157+
doc.setCountry(cnames);
158+
159+
final Geometry geometry = dbutils.extractGeometry(rs, "linegeo");
160+
final NominatimResult docs;
161+
if (hasNewStyleInterpolation) {
162+
docs = NominatimResult.fromInterpolation(
163+
doc, rs.getLong("startnumber"), rs.getLong("endnumber"),
164+
rs.getLong("step"), geometry);
165+
} else {
166+
docs = NominatimResult.fromInterpolation(
167+
doc, rs.getLong("startnumber"), rs.getLong("endnumber"),
168+
rs.getString("interpolationtype"), geometry);
169+
}
170+
171+
if (docs.isUsefulForIndex()) {
172+
importThread.addDocument(docs);
173+
}
174+
});
159175

160176
}
161177

@@ -164,8 +180,7 @@ public void readCountry(String countryCode, ImportThread importThread) {
164180
*
165181
* @param doc
166182
*/
167-
private void completePlace(PhotonDoc doc) {
168-
final List<AddressRow> addresses = getAddresses(doc);
183+
private void completePlace(PhotonDoc doc, List<AddressRow> addresses) {
169184
final AddressType doctype = doc.getAddressType();
170185
for (AddressRow address : addresses) {
171186
AddressType atype = address.getAddressType();

src/main/java/de/komoot/photon/nominatim/NominatimUpdater.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
public class NominatimUpdater extends NominatimConnector {
1616
private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(NominatimUpdater.class);
1717

18+
private static final String SELECT_COLS_PLACEX = "SELECT place_id, osm_type, osm_id, class, type, name, postcode, address, extratags, ST_Envelope(geometry) AS bbox, parent_place_id, linked_place_id, rank_address, rank_search, importance, country_code, centroid";
19+
private static final String SELECT_COLS_ADDRESS = "SELECT p.name, p.class, p.type, p.rank_address";
20+
private static final String SELECT_OSMLINE_OLD_STYLE = "SELECT place_id, osm_id, parent_place_id, startnumber, endnumber, interpolationtype, postcode, country_code, linegeo";
21+
private static final String SELECT_OSMLINE_NEW_STYLE = "SELECT place_id, osm_id, parent_place_id, startnumber, endnumber, step, postcode, country_code, linegeo";
22+
1823
private static final String TRIGGER_SQL =
1924
"DROP TABLE IF EXISTS photon_updates;"
2025
+ "CREATE TABLE photon_updates (rel TEXT, place_id BIGINT,"

src/main/java/de/komoot/photon/nominatim/PostgisDataAdapter.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,9 @@ public Boolean mapRow(ResultSet resultSet, int i) throws SQLException {
5858
public String deleteReturning(String deleteSQL, String columns) {
5959
return deleteSQL + " RETURNING " + columns;
6060
}
61+
62+
@Override
63+
public String jsonArrayFromSelect(String valueSQL, String fromSQL) {
64+
return "(SELECT json_agg(val) FROM (SELECT " + valueSQL + " as val " + fromSQL + ") xxx)";
65+
}
6166
}

src/main/java/de/komoot/photon/nominatim/model/AddressRow.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,14 @@ public boolean isUsefulForContext() {
3737
public Map<String, String> getName() {
3838
return this.name;
3939
}
40+
41+
@Override
42+
public String toString() {
43+
return "AddressRow{" +
44+
"name=" + name.getOrDefault("name", "?") +
45+
", osmKey='" + osmKey + '\'' +
46+
", osmValue='" + osmValue + '\'' +
47+
", rankAddress=" + rankAddress +
48+
'}';
49+
}
4050
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package de.komoot.photon.nominatim.model;
2+
3+
import de.komoot.photon.nominatim.DBDataAdapter;
4+
import org.json.JSONArray;
5+
import org.slf4j.Logger;
6+
import org.springframework.jdbc.core.JdbcTemplate;
7+
import org.springframework.jdbc.core.RowCallbackHandler;
8+
9+
import java.util.ArrayList;
10+
import java.util.HashMap;
11+
import java.util.List;
12+
import java.util.Map;
13+
14+
/**
15+
* Container for caching information about address parts.
16+
*/
17+
public class NominatimAddressCache {
18+
private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(NominatimAddressCache.class);
19+
20+
private static final String BASE_COUNTRY_QUERY =
21+
"SELECT place_id, name, class, type, rank_address FROM placex" +
22+
" WHERE rank_address between 5 and 25 AND linked_place_id is null";
23+
24+
private final Map<Integer, AddressRow> addresses = new HashMap<>();
25+
26+
public void loadCountryAddresses(JdbcTemplate template, DBDataAdapter dbutils, String countryCode) {
27+
final RowCallbackHandler rowMapper = (rs) -> {
28+
addresses.put(
29+
rs.getInt("place_id"),
30+
new AddressRow(
31+
dbutils.getMap(rs, "name"),
32+
rs.getString("class"),
33+
rs.getString("type"),
34+
rs.getInt("rank_address")
35+
));
36+
};
37+
38+
if (countryCode == null) {
39+
template.query(BASE_COUNTRY_QUERY + " AND country_code is null", rowMapper);
40+
} else {
41+
template.query(BASE_COUNTRY_QUERY + " AND country_code = ?", rowMapper, countryCode);
42+
}
43+
44+
if (addresses.size() > 0) {
45+
LOGGER.info("Loaded {} address places for country {}", addresses.size(), countryCode);
46+
}
47+
}
48+
49+
public List<AddressRow> getAddressList(String addressline) {
50+
ArrayList<AddressRow> outlist = new ArrayList<>();
51+
52+
if (addressline != null && !addressline.isBlank()) {
53+
JSONArray addressPlaces = new JSONArray(addressline);
54+
for (int i = 0; i < addressPlaces.length(); ++i) {
55+
Integer place_id = addressPlaces.optInt(i);
56+
if (place_id != null) {
57+
AddressRow row = addresses.get(place_id);
58+
if (row != null) {
59+
outlist.add(row);
60+
}
61+
}
62+
}
63+
}
64+
65+
return outlist;
66+
}
67+
}

0 commit comments

Comments
 (0)