Skip to content

Commit ecf7046

Browse files
author
Jack Hung
committed
Updated Lucene BBQ integration and made it default
1 parent 4b74750 commit ecf7046

File tree

12 files changed

+452
-21
lines changed

12 files changed

+452
-21
lines changed

qa/restart-upgrade/build.gradle

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,12 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
111111
}
112112

113113
// excludes: 1.any; 2.x where x < 17
114-
def validPrefixesForQFrameBitEncoderBWCChecks = ['1.'] + ((0..16).collect { "2.${it}." } as Collection<String>)
115-
if (validPrefixesForQFrameBitEncoderBWCChecks.any { knn_bwc_version.startsWith(it) }
114+
def invalidPrefixesForQFrameBitEncoderAndLuceneBBQBWCChecks = ['1.'] + ((0..16).collect { "2.${it}." } as Collection<String>)
115+
if (invalidPrefixesForQFrameBitEncoderAndLuceneBBQBWCChecks.any { knn_bwc_version.startsWith(it) }
116116
) {
117117
filter {
118+
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLucene4xBWC"
119+
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLuceneOnDiskNoCompressionBWC"
118120
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testDiskBasedMergeBWC"
119121
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testRandomRotationBWC"
120122
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testADC_BWC"
@@ -271,9 +273,11 @@ task testRestartUpgrade(type: StandaloneRestIntegTestTask) {
271273
}
272274
}
273275

274-
def validPrefixesForQFrameBitEncoderBWCChecks = ['1.'] + ((0..16).collect { "2.${it}." } as Collection<String>)
275-
if (validPrefixesForQFrameBitEncoderBWCChecks.any { knn_bwc_version.startsWith(it) }) {
276+
def invalidPrefixesForQFrameBitEncoderAndLuceneBBQBWCChecks = ['1.'] + ((0..16).collect { "2.${it}." } as Collection<String>)
277+
if (invalidPrefixesForQFrameBitEncoderAndLuceneBBQBWCChecks.any { knn_bwc_version.startsWith(it) }) {
276278
filter {
279+
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLucene4xBWC"
280+
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLuceneOnDiskNoCompressionBWC"
277281
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testDiskBasedMergeBWC"
278282
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testRandomRotationBWC"
279283
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testADC_BWC"
@@ -340,11 +344,12 @@ task testRestartUpgrade(type: StandaloneRestIntegTestTask) {
340344
}
341345
}
342346

343-
if (knn_bwc_version.startsWith("1.") || knn_bwc_version.startsWith("2.") || knn_bwc_version.startsWith("3.0")) {
344-
filter {
345-
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLuceneBBQ"
346-
}
347+
def invalidPrefixesForLuceneBBQChecks = ['1.', '2.'] + ((0..5).collect { "3.${it}." } as Collection<String>)
348+
if (invalidPrefixesForLuceneBBQChecks.any { knn_bwc_version.startsWith(it) }) {
349+
filter {
350+
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLuceneBBQ"
347351
}
352+
}
348353

349354
nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
350355
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")

qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,88 @@ public void testRandomRotationBWC() throws Exception {
674674
}
675675
}
676676

677+
private void testKNNAfterBBQIntegrationBWCRunner(String mapping) throws Exception {
678+
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
679+
int k = 4;
680+
int dimension = 8;
681+
682+
float[] queryVector = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
683+
int[] expectedOrder = {1, 2, 3, 4}; // Using Inner product
684+
if (isRunningAgainstOldCluster()) {
685+
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), mapping);
686+
687+
Float[] vector1 = { 1.0f, 2.0f, 3.0f, 12.0f, 5.0f, 6.0f, 7.0f, 8.0f };
688+
Float[] vector2 = { 1.0f, 2.0f, 7.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
689+
Float[] vector3 = { 1.0f, 4.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
690+
Float[] vector4 = { 2.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
691+
addKnnDoc(testIndex, "1", TEST_FIELD, vector1);
692+
addKnnDoc(testIndex, "2", TEST_FIELD, vector2);
693+
addKnnDoc(testIndex, "3", TEST_FIELD, vector3);
694+
addKnnDoc(testIndex, "4", TEST_FIELD, vector4);
695+
696+
Response searchResponse = searchKNNIndex(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, k), k);
697+
List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), TEST_FIELD);
698+
assertEquals(k, results.size());
699+
for (int i = 0; i < k; i++) {
700+
assertEquals(expectedOrder[i], Integer.parseInt(results.get(i).getDocId()));
701+
}
702+
} else {
703+
Response searchResponse = searchKNNIndex(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, k), k);
704+
List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), TEST_FIELD);
705+
assertEquals(k, results.size());
706+
for (int i = 0; i < k; i++) {
707+
assertEquals(expectedOrder[i], Integer.parseInt(results.get(i).getDocId()));
708+
}
709+
deleteKNNIndex(testIndex);
710+
}
711+
}
712+
713+
public void testKNNIndexLucene4xBWC() throws Exception {
714+
// Verify that old lucene indices with compression level
715+
// specified still work
716+
int dimension = 8;
717+
String mapping = XContentFactory.jsonBuilder()
718+
.startObject()
719+
.startObject("properties")
720+
.startObject(TEST_FIELD)
721+
.field(VECTOR_TYPE, KNN_VECTOR)
722+
.field(DIMENSION, dimension)
723+
.field(COMPRESSION_LEVEL_PARAMETER, CompressionLevel.x4.getName())
724+
.startObject(KNN_METHOD)
725+
.field(NAME, METHOD_HNSW)
726+
.field(METHOD_PARAMETER_SPACE_TYPE, SpaceType.INNER_PRODUCT.getValue())
727+
.field(KNN_ENGINE, LUCENE_NAME)
728+
.endObject()
729+
.endObject()
730+
.endObject()
731+
.endObject()
732+
.toString();
733+
testKNNAfterBBQIntegrationBWCRunner(mapping);
734+
}
735+
736+
public void testKNNIndexLuceneOnDiskNoCompressionBWC() throws Exception {
737+
// Pre-3.6, Lucene with ON_DISK defaults to 4x. Post-3.6, default
738+
// is now 32x. Verify old indices still work
739+
int dimension = 8;
740+
String mapping = XContentFactory.jsonBuilder()
741+
.startObject()
742+
.startObject("properties")
743+
.startObject(TEST_FIELD)
744+
.field(VECTOR_TYPE, KNN_VECTOR)
745+
.field(DIMENSION, dimension)
746+
.field(MODE_PARAMETER, Mode.ON_DISK.getName())
747+
.startObject(KNN_METHOD)
748+
.field(NAME, METHOD_HNSW)
749+
.field(METHOD_PARAMETER_SPACE_TYPE, SpaceType.INNER_PRODUCT.getValue())
750+
.field(KNN_ENGINE, LUCENE_NAME)
751+
.endObject()
752+
.endObject()
753+
.endObject()
754+
.endObject()
755+
.toString();
756+
testKNNAfterBBQIntegrationBWCRunner(mapping);
757+
}
758+
677759
public void testKNNIndexLuceneBBQ() throws Exception {
678760
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
679761

src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import lombok.extern.log4j.Log4j2;
1010
import org.apache.lucene.codecs.KnnVectorsFormat;
1111
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
12+
import org.opensearch.Version;
1213
import org.opensearch.index.IndexSettings;
1314
import org.opensearch.index.mapper.MapperService;
1415
import org.opensearch.knn.index.KNNSettings;
@@ -19,8 +20,10 @@
1920
import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams;
2021
import org.opensearch.knn.index.engine.KNNEngine;
2122
import org.opensearch.knn.index.engine.KNNMethodContext;
23+
import org.opensearch.knn.index.mapper.CompressionLevel;
2224
import org.opensearch.knn.index.mapper.KNNMappingConfig;
2325
import org.opensearch.knn.index.mapper.KNNVectorFieldType;
26+
import org.opensearch.knn.index.mapper.Mode;
2427

2528
import java.util.Map;
2629
import java.util.Optional;
@@ -59,6 +62,25 @@ public BasePerFieldKnnVectorsFormat(
5962
this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null, null);
6063
}
6164

65+
public BasePerFieldKnnVectorsFormat(
66+
Optional<MapperService> mapperService,
67+
int defaultMaxConnections,
68+
int defaultBeamWidth,
69+
Supplier<KnnVectorsFormat> defaultFormatSupplier,
70+
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier,
71+
Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier
72+
) {
73+
this(
74+
mapperService,
75+
defaultMaxConnections,
76+
defaultBeamWidth,
77+
defaultFormatSupplier,
78+
vectorsFormatSupplier,
79+
scalarQuantizedVectorsFormatSupplier,
80+
null
81+
);
82+
}
83+
6284
public BasePerFieldKnnVectorsFormat(
6385
Optional<MapperService> mapperService,
6486
int defaultMaxConnections,
@@ -107,6 +129,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
107129
final KNNMethodContext knnMethodContext = knnMappingConfig.getKnnMethodContext()
108130
.orElseThrow(() -> new IllegalArgumentException("KNN method context cannot be empty"));
109131
nativeIndexBuildStrategyFactory.setKnnLibraryIndexingContext(knnMappingConfig.getKnnLibraryIndexingContext());
132+
110133
final KNNEngine engine = knnMethodContext.getKnnEngine();
111134
final Map<String, Object> params = knnMethodContext.getMethodComponentContext().getParameters();
112135

@@ -146,6 +169,21 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
146169
return scalarQuantizedVectorsFormatSupplier.apply(knnScalarQuantizedVectorsFormatParams);
147170
}
148171
}
172+
// BBQ encoder should be implied for on_disk mode or compression level 32x
173+
if (knnMappingConfig.getIndexCreatedVersion().onOrAfter(Version.V_3_6_0)
174+
&& ((knnMappingConfig.getCompressionLevel() == CompressionLevel.NOT_CONFIGURED
175+
&& knnMappingConfig.getMode() == Mode.ON_DISK) || knnMappingConfig.getCompressionLevel() == CompressionLevel.x32)) {
176+
KNNBBQVectorsFormatParams bbqParams = new KNNBBQVectorsFormatParams(params, defaultMaxConnections, defaultBeamWidth);
177+
log.debug(
178+
"Initialize KNN vector format for field [{}] with binary quantization, params [{}] = \"{}\", [{}] = \"{}\"",
179+
field,
180+
MAX_CONNECTIONS,
181+
bbqParams.getMaxConnections(),
182+
BEAM_WIDTH,
183+
bbqParams.getBeamWidth()
184+
);
185+
return bbqVectorsFormatSupplier.apply(bbqParams);
186+
}
149187

150188
KNNVectorsFormatParams knnVectorsFormatParams = new KNNVectorsFormatParams(
151189
params,

src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormat.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
package org.opensearch.knn.index.codec.KNN9120Codec;
77

8+
import org.apache.lucene.codecs.lucene104.Lucene104HnswScalarQuantizedVectorsFormat;
9+
import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat;
810
import org.apache.lucene.backward_codecs.lucene99.Lucene99RWHnswScalarQuantizedVectorsFormat;
911
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
1012
import org.opensearch.common.collect.Tuple;
@@ -78,7 +80,8 @@ public KNN9120PerFieldKnnVectorsFormat(
7880
},
7981
knnBBQVectorsFormatParams -> {
8082
final Tuple<Integer, ExecutorService> mergeThreadCountAndExecutorService = getMergeThreadCountAndExecutorService();
81-
return new Lucene102HnswBinaryQuantizedVectorsFormat(
83+
return new Lucene104HnswScalarQuantizedVectorsFormat(
84+
Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT_QUERY_NIBBLE,
8285
knnBBQVectorsFormatParams.getMaxConnections(),
8386
knnBBQVectorsFormatParams.getBeamWidth(),
8487
mergeThreadCountAndExecutorService.v1(),

src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ public KNN990PerFieldKnnVectorsFormat(final Optional<MapperService> mapperServic
3737
knnScalarQuantizedVectorsFormatParams.isCompressFlag(),
3838
knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(),
3939
null
40-
),
41-
knnBBQVectorsFormatParams -> new Lucene102HnswBinaryQuantizedVectorsFormat(
42-
knnBBQVectorsFormatParams.getMaxConnections(),
43-
knnBBQVectorsFormatParams.getBeamWidth(),
44-
NUM_MERGE_WORKERS,
45-
null
4640
)
4741
);
4842
}

src/main/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolver.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.knn.index.engine.lucene;
77

8+
import org.opensearch.Version;
89
import org.opensearch.common.ValidationException;
910
import org.opensearch.knn.index.SpaceType;
1011
import org.opensearch.knn.index.engine.AbstractMethodResolver;
@@ -113,6 +114,10 @@ private CompressionLevel getDefaultCompressionLevel(KNNMethodConfigContext knnMe
113114
return knnMethodConfigContext.getCompressionLevel();
114115
}
115116
if (knnMethodConfigContext.getMode() == Mode.ON_DISK) {
117+
// Starting with version 3.6, supporting BBQ by default
118+
if (knnMethodConfigContext.getVersionCreated().onOrAfter(Version.V_3_6_0)) {
119+
return CompressionLevel.x32;
120+
}
116121
return CompressionLevel.x4;
117122
}
118123
return CompressionLevel.x1;

src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public RescoreContext getDefaultRescoreContext(Mode mode, int dimension, Version
138138
}
139139

140140
// Special handling for Lucene BBQ (x32 compression)
141-
if (this == x32 && engine == KNNEngine.LUCENE && version.onOrAfter(Version.V_3_3_0)) {
141+
if (this == x32 && engine == KNNEngine.LUCENE && version.onOrAfter(Version.V_3_6_0)) {
142142
if (dimension <= RescoreContext.DIMENSION_THRESHOLD) {
143143
return RescoreContext.builder()
144144
.oversampleFactor(RescoreContext.OVERSAMPLE_FACTOR_BELOW_DIMENSION_THRESHOLD)

src/test/java/org/opensearch/knn/index/LuceneEngineIT.java

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
import org.opensearch.knn.common.KNNConstants;
2727
import org.opensearch.knn.index.query.KNNQueryBuilder;
2828
import org.opensearch.knn.index.engine.KNNEngine;
29-
29+
import org.opensearch.knn.index.mapper.CompressionLevel;
30+
import org.opensearch.knn.index.mapper.Mode;
3031
import java.io.IOException;
3132
import java.util.Arrays;
3233
import java.util.List;
@@ -92,6 +93,8 @@ public class LuceneEngineIT extends KNNRestTestCase {
9293
private static final String KNN_VECTOR_TYPE = "knn_vector";
9394
private static final String PROPERTIES_FIELD_NAME = "properties";
9495
private static final String TYPE_FIELD_NAME = "type";
96+
private static final String COMPRESSION_LEVEL_FIELD_NAME = "compression_level";
97+
private static final String MODE_FIELD_NAME = "mode";
9598
private static final String INTEGER_FIELD_NAME = "int_field";
9699
private static final String FILED_TYPE_INTEGER = "integer";
97100
private static final String NON_EXISTENT_INTEGER_FIELD_NAME = "nonexistent_int_field";
@@ -752,6 +755,34 @@ private void createKnnIndexMappingWithLuceneEngineAndSQEncoder(
752755
createKnnIndex(INDEX_NAME, mapping);
753756
}
754757

758+
private void addSearchDeleteToCurrentKNNIndex() throws Exception {
759+
Float[] vector = { 6.0f, 6.0f, 7.0f };
760+
addKnnDoc(INDEX_NAME, DOC_ID, FIELD_NAME, vector);
761+
refreshIndex(INDEX_NAME);
762+
assertEquals(1, getDocCount(INDEX_NAME));
763+
764+
Response searchResponse = searchKNNIndex(INDEX_NAME, new KNNQueryBuilder(FIELD_NAME, new float[] { 6.0f, 6.0f, 7.0f }, 1), 1);
765+
List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), FIELD_NAME);
766+
assertEquals(1, results.size());
767+
assertEquals(DOC_ID, results.get(0).getDocId());
768+
769+
deleteKnnDoc(INDEX_NAME, DOC_ID);
770+
refreshIndex(INDEX_NAME);
771+
assertEquals(0, getDocCount(INDEX_NAME));
772+
}
773+
774+
@SneakyThrows
775+
public void testAddSearchDeleteWithCompressionLevelImpliedBBQ() {
776+
createKnnIndexMappingWithLuceneEngineWithModeAndCompression(CompressionLevel.x32, DIMENSION, Mode.NOT_CONFIGURED);
777+
addSearchDeleteToCurrentKNNIndex();
778+
}
779+
780+
@SneakyThrows
781+
public void testAddSearchDeleteWithModeImpliedBBQ() {
782+
createKnnIndexMappingWithLuceneEngineWithModeAndCompression(CompressionLevel.NOT_CONFIGURED, DIMENSION, Mode.ON_DISK);
783+
addSearchDeleteToCurrentKNNIndex();
784+
}
785+
755786
@SneakyThrows
756787
public void testBBQ_withInvalidParams_thenThrowException() {
757788
// Use "byte" data_type with bbq encoder which throws an exception
@@ -841,6 +872,27 @@ public void testQueryWithFilterUsingBBQEncoder() throws Exception {
841872
validateQueryResultsWithFilters(searchVector, 5, 1, expectedDocIdsKGreaterThanFilterResult, expectedDocIdsKLimitsFilterResult);
842873
}
843874

875+
private void createKnnIndexMappingWithLuceneEngineWithModeAndCompression(CompressionLevel compressionLevel, int dimension, Mode mode)
876+
throws Exception {
877+
XContentBuilder builder = XContentFactory.jsonBuilder()
878+
.startObject()
879+
.startObject(PROPERTIES_FIELD_NAME)
880+
.startObject(FIELD_NAME)
881+
.field(TYPE_FIELD_NAME, KNN_VECTOR_TYPE)
882+
.field(DIMENSION_FIELD_NAME, dimension);
883+
884+
if (compressionLevel != CompressionLevel.NOT_CONFIGURED) {
885+
builder.field(COMPRESSION_LEVEL_FIELD_NAME, compressionLevel.getName());
886+
}
887+
if (mode != Mode.NOT_CONFIGURED) {
888+
builder.field(MODE_FIELD_NAME, mode.getName());
889+
}
890+
builder.endObject().endObject().endObject();
891+
892+
String mapping = builder.toString();
893+
createKnnIndex(INDEX_NAME, mapping);
894+
}
895+
844896
private void createKnnIndexMappingWithLuceneEngineAndBBQEncoder(int dimension, SpaceType spaceType, VectorDataType vectorDataType)
845897
throws Exception {
846898
XContentBuilder builder = XContentFactory.jsonBuilder()

0 commit comments

Comments
 (0)