Skip to content

Commit 28fc776

Browse files
authored
[8.18] Fix NPE in flat_bbq scorer when all vectors are missing (#129548) (#129625)
* Fix NPE in flat_bbq scorer when all vectors are missing (#129548) It is possible to get all the way down to the knn format reader and there be no vectors in the index. This execution path is possible if utilizing nested queries (which bypasses the higher level checks in `KnnFloatVectorQuery#approximateSearch`). bbq_flat should check for the existence of vectors before attempting to create the scorer. (cherry picked from commit 80667d0) * fixing compilation
1 parent bc8ea7c commit 28fc776

File tree

7 files changed

+143
-2
lines changed

7 files changed

+143
-2
lines changed

docs/changelog/129548.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129548
2+
summary: Fix NPE in `flat_bbq` scorer when all vectors are missing
3+
area: Vector Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ public RandomVectorScorer getRandomVectorScorer(
5959
float[] target
6060
) throws IOException {
6161
if (vectorValues instanceof RandomAccessBinarizedByteVectorValues binarizedVectors) {
62+
assert binarizedVectors.getQuantizer() != null
63+
: "BinarizedByteVectorValues must have a quantizer for ES816BinaryFlatVectorsScorer";
64+
assert binarizedVectors.size() > 0 : "BinarizedByteVectorValues must have at least one vector for ES816BinaryFlatVectorsScorer";
6265
BinaryQuantizer quantizer = binarizedVectors.getQuantizer();
6366
float[] centroid = binarizedVectors.getCentroid();
6467
// FIXME: precompute this once?

server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
154154
@Override
155155
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
156156
FieldEntry fi = fields.get(field);
157-
if (fi == null) {
157+
if (fi == null || fi.size() == 0) {
158158
return null;
159159
}
160160
return vectorScorer.getRandomVectorScorer(

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryFlatVectorsScorer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ public RandomVectorScorer getRandomVectorScorer(
6464
float[] target
6565
) throws IOException {
6666
if (vectorValues instanceof RandomAccessBinarizedByteVectorValues binarizedVectors) {
67+
assert binarizedVectors.getQuantizer() != null
68+
: "BinarizedByteVectorValues must have a quantizer for ES816BinaryFlatVectorsScorer";
69+
assert binarizedVectors.size() > 0 : "BinarizedByteVectorValues must have at least one vector for ES816BinaryFlatVectorsScorer";
6770
OptimizedScalarQuantizer quantizer = binarizedVectors.getQuantizer();
6871
float[] centroid = binarizedVectors.getCentroid();
6972
// We make a copy as the quantization process mutates the input

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
153153
@Override
154154
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
155155
FieldEntry fi = fields.get(field);
156-
if (fi == null) {
156+
if (fi == null || fi.size() == 0) {
157157
return null;
158158
}
159159
return vectorScorer.getRandomVectorScorer(

server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,38 @@
2424
import org.apache.lucene.codecs.KnnVectorsFormat;
2525
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
2626
import org.apache.lucene.document.Document;
27+
import org.apache.lucene.document.Field;
2728
import org.apache.lucene.document.KnnFloatVectorField;
2829
import org.apache.lucene.index.DirectoryReader;
2930
import org.apache.lucene.index.FloatVectorValues;
3031
import org.apache.lucene.index.IndexReader;
3132
import org.apache.lucene.index.IndexWriter;
3233
import org.apache.lucene.index.IndexWriterConfig;
3334
import org.apache.lucene.index.LeafReader;
35+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
36+
import org.apache.lucene.index.Term;
3437
import org.apache.lucene.index.VectorSimilarityFunction;
38+
import org.apache.lucene.search.FieldExistsQuery;
3539
import org.apache.lucene.search.IndexSearcher;
3640
import org.apache.lucene.search.KnnFloatVectorQuery;
41+
import org.apache.lucene.search.MatchAllDocsQuery;
3742
import org.apache.lucene.search.Query;
43+
import org.apache.lucene.search.TermQuery;
3844
import org.apache.lucene.search.TopDocs;
3945
import org.apache.lucene.search.TotalHits;
46+
import org.apache.lucene.search.join.BitSetProducer;
47+
import org.apache.lucene.search.join.CheckJoinIndex;
48+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
49+
import org.apache.lucene.search.join.QueryBitSetProducer;
4050
import org.apache.lucene.store.Directory;
4151
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
4252
import org.elasticsearch.common.logging.LogConfigurator;
4353
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
4454

4555
import java.io.IOException;
56+
import java.util.ArrayList;
57+
import java.util.Arrays;
58+
import java.util.List;
4659
import java.util.Locale;
4760

4861
import static java.lang.String.format;
@@ -67,6 +80,58 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
6780
};
6881
}
6982

83+
static String encodeInts(int[] i) {
84+
return Arrays.toString(i);
85+
}
86+
87+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
88+
// Create a filter that defines "parent" documents in the index
89+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
90+
CheckJoinIndex.check(r, parentsFilter);
91+
return parentsFilter;
92+
}
93+
94+
Document makeParent(int[] children) {
95+
Document parent = new Document();
96+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
97+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
98+
return parent;
99+
}
100+
101+
public void testEmptyDiversifiedChildSearch() throws Exception {
102+
String fieldName = "field";
103+
int dims = random().nextInt(4, 65);
104+
float[] vector = randomVector(dims);
105+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
106+
try (Directory d = newDirectory()) {
107+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(getCodec());
108+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
109+
try (IndexWriter w = new IndexWriter(d, iwc)) {
110+
List<Document> toAdd = new ArrayList<>();
111+
for (int j = 1; j <= 5; j++) {
112+
Document doc = new Document();
113+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
114+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
115+
toAdd.add(doc);
116+
}
117+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
118+
w.addDocuments(toAdd);
119+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
120+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
121+
w.flush();
122+
w.commit();
123+
w.forceMerge(1);
124+
try (IndexReader reader = DirectoryReader.open(w)) {
125+
IndexSearcher searcher = new IndexSearcher(reader);
126+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
127+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
128+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
129+
}
130+
}
131+
132+
}
133+
}
134+
70135
public void testSearch() throws Exception {
71136
String fieldName = "field";
72137
int numVectors = random().nextInt(99, 500);

server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,38 @@
2424
import org.apache.lucene.codecs.KnnVectorsFormat;
2525
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
2626
import org.apache.lucene.document.Document;
27+
import org.apache.lucene.document.Field;
2728
import org.apache.lucene.document.KnnFloatVectorField;
2829
import org.apache.lucene.index.DirectoryReader;
2930
import org.apache.lucene.index.FloatVectorValues;
3031
import org.apache.lucene.index.IndexReader;
3132
import org.apache.lucene.index.IndexWriter;
3233
import org.apache.lucene.index.IndexWriterConfig;
3334
import org.apache.lucene.index.LeafReader;
35+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
36+
import org.apache.lucene.index.Term;
3437
import org.apache.lucene.index.VectorSimilarityFunction;
38+
import org.apache.lucene.search.FieldExistsQuery;
3539
import org.apache.lucene.search.IndexSearcher;
3640
import org.apache.lucene.search.KnnFloatVectorQuery;
41+
import org.apache.lucene.search.MatchAllDocsQuery;
3742
import org.apache.lucene.search.Query;
43+
import org.apache.lucene.search.TermQuery;
3844
import org.apache.lucene.search.TopDocs;
3945
import org.apache.lucene.search.TotalHits;
46+
import org.apache.lucene.search.join.BitSetProducer;
47+
import org.apache.lucene.search.join.CheckJoinIndex;
48+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
49+
import org.apache.lucene.search.join.QueryBitSetProducer;
4050
import org.apache.lucene.store.Directory;
4151
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
4252
import org.elasticsearch.common.logging.LogConfigurator;
4353
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
4454

4555
import java.io.IOException;
56+
import java.util.ArrayList;
57+
import java.util.Arrays;
58+
import java.util.List;
4659
import java.util.Locale;
4760

4861
import static java.lang.String.format;
@@ -67,6 +80,58 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
6780
};
6881
}
6982

83+
static String encodeInts(int[] i) {
84+
return Arrays.toString(i);
85+
}
86+
87+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
88+
// Create a filter that defines "parent" documents in the index
89+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
90+
CheckJoinIndex.check(r, parentsFilter);
91+
return parentsFilter;
92+
}
93+
94+
Document makeParent(int[] children) {
95+
Document parent = new Document();
96+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
97+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
98+
return parent;
99+
}
100+
101+
public void testEmptyDiversifiedChildSearch() throws Exception {
102+
String fieldName = "field";
103+
int dims = random().nextInt(4, 65);
104+
float[] vector = randomVector(dims);
105+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
106+
try (Directory d = newDirectory()) {
107+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(getCodec());
108+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
109+
try (IndexWriter w = new IndexWriter(d, iwc)) {
110+
List<Document> toAdd = new ArrayList<>();
111+
for (int j = 1; j <= 5; j++) {
112+
Document doc = new Document();
113+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
114+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
115+
toAdd.add(doc);
116+
}
117+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
118+
w.addDocuments(toAdd);
119+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
120+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
121+
w.flush();
122+
w.commit();
123+
w.forceMerge(1);
124+
try (IndexReader reader = DirectoryReader.open(w)) {
125+
IndexSearcher searcher = new IndexSearcher(reader);
126+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
127+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
128+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
129+
}
130+
}
131+
132+
}
133+
}
134+
70135
public void testSearch() throws Exception {
71136
String fieldName = "field";
72137
int numVectors = random().nextInt(99, 500);

0 commit comments

Comments
 (0)