Skip to content

Commit 575e66d

Browse files
Merge FP16 native scoring V1 into main branch (#2963)
* Added MMapByteVectorValues for FP16 native scoring in LuceneOnFaiss. (#2904) Signed-off-by: Dooyong Kim <kdooyong@amazon.com> * Native scoring for FP16 V1 implementation. (#2922) * Added Faiss FP16 native scoring. Signed-off-by: Dooyong Kim <kdooyong@amazon.com> * Added MultiLeafTopKnnCollectorManager. Signed-off-by: Dooyong Kim <kdooyong@amazon.com> * Added Java docs for native scoring for FP16. Signed-off-by: Dooyong Kim <kdooyong@amazon.com> * Delete jni/Makefile Signed-off-by: Doo Yong Kim <0ctopus13prime@gmail.com> * Reflect Tejas's comments. Signed-off-by: Dooyong Kim <kdooyong@amazon.com> --------- Signed-off-by: Dooyong Kim <kdooyong@amazon.com> Signed-off-by: Doo Yong Kim <0ctopus13prime@gmail.com> --------- Signed-off-by: Dooyong Kim <kdooyong@amazon.com> Signed-off-by: Doo Yong Kim <0ctopus13prime@gmail.com>
1 parent 5e9e2dc commit 575e66d

37 files changed

+2450
-35
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1717
* Refactor to not use parallel for MMR rerank. [#2968](https://github.com/opensearch-project/k-NN/pull/2968)
1818

1919
### Enhancements
20-
* Removed VectorSearchHolders map from NativeEngines990KnnVectorsReader [#2948](https://github.com/opensearch-project/k-NN/pull/2948)
20+
* Removed VectorSearchHolders map from NativeEngines990KnnVectorsReader [#2948](https://github.com/opensearch-project/k-NN/pull/2948)
21+
* Native scoring for FP16 [#2922](https://github.com/opensearch-project/k-NN/pull/2922)

build.gradle

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,10 +289,23 @@ publishing {
289289

290290
compileJava {
291291
options.compilerArgs.addAll(["-processor", 'lombok.launch.AnnotationProcessorHider$AnnotationProcessor'])
292+
293+
// Since MemorySegment is not available until JDK22, exclude it when packaging and only include it for Java22+.
294+
def javaExt = project.extensions.getByType(JavaPluginExtension)
295+
if (javaExt.sourceCompatibility <= JavaVersion.VERSION_21 || javaExt.targetCompatibility <= JavaVersion.VERSION_21) {
296+
exclude("org/opensearch/knn/memoryoptsearch/MemorySegmentAddressExtractorJDK22.java")
297+
}
292298
}
293299
compileTestJava {
294300
options.compilerArgs.addAll(["-processor", 'lombok.launch.AnnotationProcessorHider$AnnotationProcessor'])
295301
}
302+
javadoc {
303+
// Block generating Java doc as it will complain MemorySegment is under preview for Java21.
304+
def javaExt = project.extensions.getByType(JavaPluginExtension)
305+
if (javaExt.sourceCompatibility <= JavaVersion.VERSION_21 || javaExt.targetCompatibility <= JavaVersion.VERSION_21) {
306+
exclude("org/opensearch/knn/memoryoptsearch/MemorySegmentAddressExtractorJDK22.java")
307+
}
308+
}
296309
compileTestFixturesJava {
297310
options.compilerArgs.addAll(["-processor", 'lombok.launch.AnnotationProcessorHider$AnnotationProcessor'])
298311
}
@@ -424,7 +437,7 @@ tasks.register('buildJniLib', Exec) {
424437
args.add("--build")
425438
args.add("jni/build")
426439
args.add("--target")
427-
def knn_libs = ['opensearchknn_faiss', 'opensearchknn_common', 'opensearchknn_nmslib']
440+
def knn_libs = ['opensearchknn_faiss', 'opensearchknn_common', 'opensearchknn_nmslib', 'opensearchknn_simd']
428441
if (project.hasProperty('knn_libs')) {
429442
knn_libs = ['opensearchknn_common'] + project.knn_libs.split(',').collect { it.trim() }
430443
}

jni/CMakeLists.txt

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ set(TARGET_LIB_UTIL opensearchknn_util)
1616
set(TARGET_LIB_COMMON opensearchknn_common) # common lib for JNI
1717
set(TARGET_LIB_NMSLIB opensearchknn_nmslib) # nmslib JNI
1818
set(TARGET_LIB_FAISS opensearchknn_faiss) # faiss JNI
19+
set(TARGET_LIB_SIMD opensearchknn_simd) # SIMD computing JNI
1920
set(TARGET_LIBS "") # Libs to be installed
2021

2122
set(CMAKE_CXX_STANDARD 17)
@@ -127,6 +128,48 @@ endif ()
127128

128129
# ---------------------------------------------------------------------------
129130

131+
# ----------------------------- SIMD Computing ------------------------------
132+
# Init SIMD computing properties
133+
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/init-simd.cmake)
134+
135+
# Set target library + source
136+
set(SIMD_COMPUTING_SRCS
137+
${CMAKE_CURRENT_SOURCE_DIR}/src/org_opensearch_knn_jni_SimdVectorComputeService.cpp
138+
${CMAKE_CURRENT_SOURCE_DIR}/src/simd/similarity_function/similarity_function.cpp
139+
)
140+
141+
# Make it a shared library
142+
add_library(${TARGET_LIB_SIMD} SHARED ${SIMD_COMPUTING_SRCS})
143+
144+
# Set SIMD compile option
145+
target_compile_options(${TARGET_LIB_SIMD} PRIVATE ${FP16_SIMD_FLAGS} ${SIMD_FLAGS})
146+
147+
# Link util library
148+
target_link_libraries(${TARGET_LIB_SIMD} ${TARGET_LINK_FAISS_LIB} ${TARGET_LIB_UTIL})
149+
150+
# Add include headers
151+
target_include_directories(${TARGET_LIB_SIMD} PRIVATE
152+
${CMAKE_CURRENT_SOURCE_DIR}/include
153+
${CMAKE_CURRENT_SOURCE_DIR}/jni/include
154+
$ENV{JAVA_HOME}/include
155+
$ENV{JAVA_HOME}/include/${JVM_OS_TYPE}
156+
${CMAKE_CURRENT_SOURCE_DIR}/external/faiss
157+
)
158+
159+
# Set common properties
160+
opensearch_set_common_properties(${TARGET_LIB_SIMD})
161+
162+
# Set the output name to embed the SIMD extension (if any)
163+
set_target_properties(${TARGET_LIB_SIMD} PROPERTIES
164+
OUTPUT_NAME "${TARGET_LIB_SIMD}"
165+
)
166+
167+
# Add target SIMD library to TARGET_LIBS list
168+
list(APPEND TARGET_LIBS ${TARGET_LIB_SIMD})
169+
# ---------------------------------------------------------------------------
170+
171+
172+
130173
# --------------------------------- TESTS -----------------------------------
131174
# Windows : Comment the TESTS for now because the tests are failing(failing to build jni_tests.exe) if we are building our target libraries as SHARED libraries.
132175
# TODO: Fix the failing JNI TESTS on Windows

jni/cmake/init-simd.cmake

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#
2+
# Copyright OpenSearch Contributors
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
6+
include(CheckCXXSourceCompiles)
7+
8+
# Allow user overrides
9+
if(NOT DEFINED AVX2_ENABLED)
10+
set(AVX2_ENABLED true) # set default value as true if the argument is not set
11+
endif()
12+
13+
if(NOT DEFINED AVX512_ENABLED)
14+
set(AVX512_ENABLED true) # set default value as true if the argument is not set
15+
endif()
16+
17+
if(NOT DEFINED AVX512_SPR_ENABLED)
18+
# By default it's false
19+
set(AVX512_SPR_ENABLED false)
20+
21+
# We only explore CPU info in Linux by default
22+
if(AVX512_ENABLED AND ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
23+
find_program(LSCPU_PROGRAM lscpu)
24+
if(LSCPU_PROGRAM)
25+
if(CPU_INFO MATCHES "GenuineIntel" AND
26+
CPU_INFO MATCHES "avx512_fp16" AND
27+
CPU_INFO MATCHES "avx512_bf16" AND
28+
CPU_INFO MATCHES "avx512_vpopcntdq")
29+
set(AVX512_SPR_ENABLED true)
30+
endif()
31+
endif()
32+
endif()
33+
endif()
34+
35+
# Default SIMD state
36+
set(KNN_HAVE_AVX2_F16C OFF)
37+
set(KNN_HAVE_AVX512 OFF)
38+
set(KNN_HAVE_AVX512_SPR OFF)
39+
set(KNN_HAVE_ARM_FP16 OFF)
40+
set(SIMD_OPT_LEVEL "")
41+
set(SIMD_FLAGS "")
42+
43+
if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows" OR (NOT AVX2_ENABLED AND NOT AVX512_ENABLED AND NOT AVX512_SPR_ENABLED))
44+
message(STATUS "[SIMD] Windows or SIMD explicitly disabled. Falling back to generic.")
45+
set(SIMD_OPT_LEVEL "generic") # Keep optimization level as generic on Windows OS as it is not supported due to MINGW64 compiler issue.
46+
set(SIMD_FLAGS "")
47+
48+
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64")
49+
set(CMAKE_REQUIRED_FLAGS "-march=armv8.4-a+fp16")
50+
check_cxx_source_compiles("
51+
#include <arm_neon.h>
52+
int main() {
53+
float32x4_t f = vdupq_n_f32(1.0f);
54+
float16x4_t h = vcvt_f16_f32(f);
55+
(void)h;
56+
return 0;
57+
}" HAVE_NEON_FP16)
58+
unset(CMAKE_REQUIRED_FLAGS)
59+
60+
if(HAVE_NEON_FP16)
61+
set(KNN_HAVE_ARM_FP16 ON)
62+
set(SIMD_OPT_LEVEL "generic") # On aarch64 avx2 is not supported.
63+
set(SIMD_FLAGS -march=armv8.4-a+fp16)
64+
add_definitions(-DKNN_HAVE_ARM_FP16)
65+
message(STATUS "[SIMD] ARM NEON with FP16 supported.")
66+
else()
67+
message(STATUS "[SIMD] ARM NEON FP16 instructions not supported by compiler. Falling back to generic.")
68+
endif()
69+
70+
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND AVX512_SPR_ENABLED)
71+
set(CMAKE_REQUIRED_FLAGS "-mavx512f -mavx512fp16 -mf16c")
72+
check_cxx_source_compiles("
73+
#include <immintrin.h>
74+
int main() {
75+
__m512 v = _mm512_set1_ps(1.0f);
76+
__m256i h = _mm512_cvt_roundps_ph(v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
77+
__m512 w = _mm512_cvtph_ps(h);
78+
(void)w;
79+
return 0;
80+
}" HAVE_AVX512_SPR_COMPILER)
81+
unset(CMAKE_REQUIRED_FLAGS)
82+
83+
if(HAVE_AVX512_SPR_COMPILER)
84+
set(KNN_HAVE_AVX512_SPR ON)
85+
set(SIMD_OPT_LEVEL "avx512_spr")
86+
set(SIMD_FLAGS -mavx512f -mavx512fp16 -mf16c)
87+
add_definitions(-DKNN_HAVE_AVX512_SPR)
88+
message(STATUS "[SIMD] AVX512_SPR supported by compiler.")
89+
else()
90+
message(FATAL_ERROR "[SIMD] AVX512_SPR was explicitly enabled, but compiler does not support it.")
91+
endif()
92+
93+
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND AVX512_ENABLED)
94+
set(CMAKE_REQUIRED_FLAGS "-mavx512f -mf16c")
95+
check_cxx_source_compiles("
96+
#include <immintrin.h>
97+
int main() {
98+
__m512 v = _mm512_setzero_ps();
99+
__m256i h = _mm512_cvtps_ph(v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
100+
(void)h;
101+
return 0;
102+
}" HAVE_AVX512_COMPILER)
103+
unset(CMAKE_REQUIRED_FLAGS)
104+
105+
if(HAVE_AVX512_COMPILER)
106+
set(KNN_HAVE_AVX512 ON)
107+
set(SIMD_OPT_LEVEL "avx512") # Keep optimization level as avx512 to improve performance on Linux. This is not present on mac systems, and presently not supported on Windows OS.
108+
set(SIMD_FLAGS -mavx512f -mf16c)
109+
add_definitions(-DKNN_HAVE_AVX512)
110+
message(STATUS "[SIMD] AVX512 + F16C supported by compiler.")
111+
else()
112+
message(FATAL_ERROR "[SIMD] AVX512 + FP16 was explicitly enabled, but compiler does not support it.")
113+
endif()
114+
115+
else()
116+
set(CMAKE_REQUIRED_FLAGS "-mavx2 -mf16c -mfma")
117+
check_cxx_source_compiles("
118+
#include <immintrin.h>
119+
int main() {
120+
__m256 v = _mm256_setzero_ps();
121+
__m128i h = _mm256_cvtps_ph(v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
122+
(void)h;
123+
return 0;
124+
}" HAVE_AVX2_COMPILER)
125+
unset(CMAKE_REQUIRED_FLAGS)
126+
127+
if(HAVE_AVX2_COMPILER)
128+
set(KNN_HAVE_AVX2_F16C ON)
129+
set(SIMD_OPT_LEVEL "avx2") # Keep optimization level as avx2 to improve performance on Linux and Mac.
130+
set(SIMD_FLAGS -mavx2 -mf16c -mfma)
131+
add_definitions(-DKNN_HAVE_AVX2_F16C)
132+
message(STATUS "[SIMD] AVX2 + F16C supported by compiler.")
133+
else()
134+
message(FATAL_ERROR "[SIMD] AVX2 + F16C was explicitly enabled, but compiler does not support it.")
135+
endif()
136+
endif()
137+
138+
# Fallback if nothing matched
139+
if(SIMD_OPT_LEVEL STREQUAL "")
140+
message(WARNING "[SIMD] No SIMD support detected or all SIMD options disabled. Falling back to Java encoding/decoding.")
141+
set(SIMD_OPT_LEVEL "generic")
142+
set(SIMD_FLAGS "")
143+
endif()
144+
145+
# Always-used flags
146+
set(FP16_SIMD_FLAGS "-O3" "-fPIC")

jni/include/memory_util.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,20 @@
2020
#define RESTRICT
2121
#endif
2222

23+
#if defined(__GNUC__) || defined(__clang__)
24+
/**
25+
* Generic wrapper for GCC/Clang's __builtin_assume_aligned.
26+
* This tells the compiler that 'ptr' is guaranteed to be aligned to 'align' bytes.
27+
*/
28+
#define BUILTIN_ASSUME_ALIGNED(ptr, align) \
29+
(typeof(ptr))__builtin_assume_aligned((ptr), (align))
30+
#else
31+
32+
/**
33+
* Fallback for other compilers (e.g., MSVC or others without __builtin_assume_aligned).
34+
* Returns the original pointer, relying on explicit aligned intrinsics like _mm512_load_ps.
35+
*/
36+
#define BUILTIN_ASSUME_ALIGNED(ptr, align) (ptr)
37+
#endif
38+
2339
#endif //KNNPLUGIN_JNI_INCLUDE_MEMORY_UTIL_H_

jni/include/org_opensearch_knn_jni_SimdVectorComputeService.h

Lines changed: 37 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

jni/include/platform_defs.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
// Ensures this header file is included only once during compilation.
3+
// Prevents duplicate definition errors if included multiple times.
4+
5+
#if defined(__GNUC__) || defined(__clang__)
6+
// These macros are for GCC and Clang compilers.
7+
// __builtin_expect() gives the compiler a hint about which branch is more likely
8+
// to be taken. This helps the compiler generate more efficient branch prediction code.
9+
//
10+
// Example:
11+
// if (LIKELY(x > 0)) → compiler assumes condition is usually true
12+
// if (UNLIKELY(x < 0)) → compiler assumes condition is usually false
13+
//
14+
// The '!!(x)' ensures that 'x' is treated as a boolean (0 or 1).
15+
#define LIKELY(x) (__builtin_expect(!!(x), 1))
16+
#define UNLIKELY(x) (__builtin_expect(!!(x), 0))
17+
#else
18+
// Fallback for any other compiler: do nothing special.
19+
#define LIKELY(x) (x)
20+
#define UNLIKELY(x) (x)
21+
#endif

0 commit comments

Comments
 (0)