diff --git a/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java b/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java index 821df88c4c9..7d032562882 100644 --- a/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java +++ b/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java @@ -386,7 +386,7 @@ public String toString() { } } - private static class StartOffsetComparator implements Comparator { + static class StartOffsetComparator implements Comparator { @Override public int compare(SpellCheckCorrection o1, SpellCheckCorrection o2) { return o1.getOriginal().startOffset() - o2.getOriginal().startOffset(); diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java index a099cc17ddc..a3bf4f3fcba 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java @@ -51,6 +51,14 @@ public class SpellCheckCollator { private boolean suggestionsMayOverlap = false; private int docCollectionLimit = 0; + private static volatile boolean mergeSpellCheckCorrections = + Boolean.parseBoolean( + System.getProperty("solr.SpellCheckCollator.mergeSpellCheckCorrections", "true")); + + private static volatile boolean logSIOOBEDebugData = + Boolean.parseBoolean( + System.getProperty("solr.SpellCheckCollator.logSIOOBEDebugData", "true")); + public List collate( SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse) { List collations = new ArrayList<>(); @@ -212,7 +220,8 @@ public List collate( return collations; } - private String getCollation(String origQuery, List corrections) { + static String getCollation(String origQuery, List correctionsAll) { + List corrections = mergeSpellCheckCorrections(correctionsAll); StringBuilder collation = new StringBuilder(origQuery); int offset = 0; String corr = ""; @@ -260,6 +269,12 @@ private String getCollation(String origQuery, List correct } corr = corrSb.toString(); int startIndex = tok.startOffset() + offset - oneForReqOrProhib; + + if (startIndex < 0) { // avoiding StringIndexOutOfBoundsException see SOLR-13360 + logSIOOBEDebugData(origQuery, correctionsAll); + break; + } + int endIndex = tok.endOffset() + offset; collation.replace(startIndex, endIndex, corr); offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset()); @@ -267,6 +282,61 @@ private String getCollation(String origQuery, List correct return collation.toString(); } + static List mergeSpellCheckCorrections( + List corrections) { + if (!mergeSpellCheckCorrections) { + return corrections; + } + + // Note. fix overlapping token intervals + // - sorting by token.startOffset. unclear if this can cause some reshuffle where lower rank + // items with the same position can come first + // - remove overlapping [startOffset, endOffset] intervals + + List filtered = new ArrayList(corrections); + filtered.sort(new PossibilityIterator.StartOffsetComparator()); + + int end = -1; + for (Iterator iterator = filtered.iterator(); iterator.hasNext(); ) { + SpellCheckCorrection correction = iterator.next(); + Token t = correction.getOriginal(); + if (t.startOffset() > end && t.endOffset() > end) { + end = t.endOffset(); + } else { + iterator.remove(); + } + } + + return filtered; + } + + static void logSIOOBEDebugData(String origQuery, List corrections) { + if (!logSIOOBEDebugData) { + return; + } + logSIOOBEDebugData = false; + StringBuilder info = new StringBuilder(origQuery); + info.append("["); + for (SpellCheckCorrection correction : corrections) { + Token tok = correction.getOriginal(); + info.append( + "('" + + tok + + "', " + + tok.startOffset() + + ", " + + tok.endOffset() + + ", " + + tok.getPositionIncrement() + + ", '" + + correction.getOriginal() + + "'),"); + } + info.append("]"); + info.append(", mergeSpellCheckCorrections=" + mergeSpellCheckCorrections); + log.warn("logging SIOOBE debug data, please report to SOLR-13360 {}", info); + } + public SpellCheckCollator setMaxCollations(int maxCollations) { this.maxCollations = maxCollations; return this; diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml index d104e8d80af..5cbb2d87861 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml @@ -177,6 +177,13 @@ a_s 3 + + + direct_spelltest_t + solr.DirectSolrSpellChecker + spelltest_t + 3 +