-
Notifications
You must be signed in to change notification settings - Fork 690
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SOLR-13360 StringIndexOutOfBoundsException: String index out of range…
…: -3
- Loading branch information
Showing
4 changed files
with
364 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
136 changes: 136 additions & 0 deletions
136
solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorCollationOnlyTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package org.apache.solr.spelling; | ||
|
||
import java.util.List; | ||
import org.apache.solr.SolrTestCaseJ4; | ||
import org.junit.Test; | ||
|
||
public class SpellCheckCollatorCollationOnlyTest extends SolrTestCaseJ4 { | ||
|
||
// Tests SOLR-13360 using some manual correction scenario based on various observations recorded | ||
// in the ticket. | ||
// | ||
// Notes | ||
// - tokens with (tok.getPositionIncrement() == 0) will be filtered out. | ||
// - replacing longer tokens with shorter ones can make the offset go negative, which seems ok as | ||
// long as the startOffset is always in (strict) increasing order | ||
|
||
@Test | ||
public void testCollationGeneration1() { | ||
// sample from synonyms.txt: | ||
// # Synonyms used in semantic expansion | ||
// tabby => tabby, cat, feline, animal | ||
// persian => persian, cat, feline, animal | ||
|
||
String origQuery = "cats persians tabby"; | ||
List<SpellCheckCorrection> corrections = | ||
List.of( | ||
// ref 1st token | ||
correction("cats", 0, 4, 1, "cat"), // original token. offset goes to -1 | ||
|
||
// ref 2nd token | ||
correction("persian", 5, 13, 1, "persian"), // original token. offset goes to -2 | ||
|
||
// ref 3rd token | ||
correction("tabbi", 14, 19, 1, "tabbi"), // original token | ||
correction("cat", 14, 19, 0, "cat"), // synonym | ||
correction("felin", 14, 19, 0, "felin"), // synonym | ||
correction("anim", 14, 19, 0, "anim") // synonym | ||
); | ||
|
||
String collation = SpellCheckCollator.getCollation(origQuery, corrections); | ||
String expected = "cat persian tabbi"; | ||
assertEquals("Incorrect collation: " + collation, expected, collation); | ||
} | ||
|
||
@Test | ||
public void testCollationGeneration1Shuffle() { | ||
// same as testCollationGeneration1 but I am manually shuffling the tokens | ||
|
||
String origQuery = "cats persians tabby"; | ||
List<SpellCheckCorrection> corrections = | ||
List.of( | ||
// ref 2nd token | ||
correction("persian", 5, 13, 1, "persian"), // original token. offset goes to -2 | ||
|
||
// ref 1st token | ||
correction("cats", 0, 4, 1, "cat"), // original token. offset goes to -1 | ||
|
||
// ref 3rd token | ||
correction("tabbi", 14, 19, 1, "tabbi"), // original token | ||
correction("cat", 14, 19, 0, "cat"), // synonym | ||
correction("felin", 14, 19, 0, "felin"), // synonym | ||
correction("anim", 14, 19, 0, "anim") // synonym | ||
); | ||
|
||
String collation = SpellCheckCollator.getCollation(origQuery, corrections); | ||
String expected = "cat persian tabbi"; | ||
assertEquals("Incorrect collation: " + collation, expected, collation); | ||
} | ||
|
||
@Test | ||
public void testCollationGeneration1Repeat() { | ||
// same as testCollationGeneration1 but I am manually repeating one of the tokens | ||
|
||
String origQuery = "cats persians tabby"; | ||
List<SpellCheckCorrection> corrections = | ||
List.of( | ||
// ref 1st token | ||
correction("cats", 0, 4, 1, "cat"), // original token. offset goes to -1 | ||
|
||
// ref 1st token - duplicated | ||
correction("cats", 0, 4, 1, "cat"), // original token. offset goes to -1 | ||
|
||
// ref 2nd token | ||
correction("persian", 5, 13, 1, "persian"), // original token. offset goes to -2 | ||
|
||
// ref 3rd token | ||
correction("tabbi", 14, 19, 1, "tabbi"), // original token | ||
correction("cat", 14, 19, 0, "cat"), // synonym | ||
correction("felin", 14, 19, 0, "felin"), // synonym | ||
correction("anim", 14, 19, 0, "anim") // synonym | ||
); | ||
|
||
String collation = SpellCheckCollator.getCollation(origQuery, corrections); | ||
String expected = "cat persian tabbi"; | ||
assertEquals("Incorrect collation: " + collation, expected, collation); | ||
} | ||
|
||
@Test | ||
public void testCollationGeneration2() { | ||
// sample from synonyms.txt: | ||
// panthera pardus, leopard|0.6 | ||
// | ||
// Note. depending on the field type, this can end up as the list of tokens: [leopard, 0, 6, | ||
// panthera, pardu, cats] | ||
|
||
String origQuery = "panthera pardus cats"; | ||
|
||
List<SpellCheckCorrection> corrections = | ||
List.of( | ||
correction("leopard", 0, 15, 1, "leopard"), | ||
correction("0", 0, 15, 1, "0"), | ||
correction("6", 0, 15, 1, "6"), | ||
correction("panthera", 0, 8, 0, "panthera"), | ||
correction("pardu", 9, 15, 1, "pardu"), | ||
correction("cats", 16, 20, 1, "cat")); | ||
|
||
String collation = SpellCheckCollator.getCollation(origQuery, corrections); | ||
String expected = "pardu cat"; // TODO what is expected here? | ||
assertEquals("Incorrect collation: " + collation, expected, collation); | ||
} | ||
|
||
private static SpellCheckCorrection correction( | ||
String text, int start, int end, int positionIncrement, String correction) { | ||
SpellCheckCorrection spellCheckCorrection = new SpellCheckCorrection(); | ||
spellCheckCorrection.setOriginal(token(text, start, end, positionIncrement)); | ||
spellCheckCorrection.setCorrection(correction); | ||
spellCheckCorrection.setNumberOfOccurences(1); | ||
return spellCheckCorrection; | ||
} | ||
|
||
private static Token token(String text, int start, int end, int positionIncrement) { | ||
Token token = new Token(text, start, end); | ||
token.setPositionIncrement(positionIncrement); | ||
return token; | ||
} | ||
} |
Oops, something went wrong.