Skip to content

Commit 5454497

Browse files
committed
Update BLAKE3
1 parent 789d130 commit 5454497

File tree

8 files changed

+158
-89
lines changed

8 files changed

+158
-89
lines changed

src/belahash/blake3.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
https://github.com/BLAKE3-team/BLAKE3
2-
4d32708f511fd85c6b0fb131295cc73224246738
2+
fc2f7e4206f016b0cac0593f23a7d5976ce066e6

src/belahash/blake3/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
blake3
22
example
3+
build/
34
*.o
5+
6+
CMakeUserPresets.json

src/belahash/blake3/CMakeLists.txt

Lines changed: 128 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,114 @@
1-
cmake_minimum_required(VERSION 3.9)
1+
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
2+
3+
# respect C_EXTENSIONS OFF without explicitly setting C_STANDARD
4+
if (POLICY CMP0128)
5+
cmake_policy(SET CMP0128 NEW)
6+
endif()
7+
# mark_as_advanced does not implicitly create UNINITIALIZED cache entries
8+
if (POLICY CMP0102)
9+
cmake_policy(SET CMP0102 NEW)
10+
endif()
211

312
project(libblake3
4-
VERSION 1.4.0
13+
VERSION 1.5.2
514
DESCRIPTION "BLAKE3 C implementation"
615
LANGUAGES C ASM
716
)
817

918
include(FeatureSummary)
1019
include(GNUInstallDirs)
1120

21+
# architecture lists for which to enable assembly / SIMD sources
22+
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
23+
set(BLAKE3_X86_NAMES i686 x86 X86)
24+
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
1225
# default SIMD compiler flag configuration (can be overriden by toolchains or CLI)
13-
if(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
26+
if(MSVC)
1427
set(BLAKE3_CFLAGS_SSE2 "/arch:SSE2" CACHE STRING "the compiler flags to enable SSE2")
1528
# MSVC has no dedicated sse4.1 flag (see https://learn.microsoft.com/en-us/cpp/build/reference/arch-x86?view=msvc-170)
1629
set(BLAKE3_CFLAGS_SSE4.1 "/arch:AVX" CACHE STRING "the compiler flags to enable SSE4.1")
1730
set(BLAKE3_CFLAGS_AVX2 "/arch:AVX2" CACHE STRING "the compiler flags to enable AVX2")
1831
set(BLAKE3_CFLAGS_AVX512 "/arch:AVX512" CACHE STRING "the compiler flags to enable AVX512")
1932

33+
set(BLAKE3_AMD64_ASM_SOURCES
34+
blake3_avx2_x86-64_windows_msvc.asm
35+
blake3_avx512_x86-64_windows_msvc.asm
36+
blake3_sse2_x86-64_windows_msvc.asm
37+
blake3_sse41_x86-64_windows_msvc.asm
38+
)
39+
2040
elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
2141
OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
2242
OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
2343
set(BLAKE3_CFLAGS_SSE2 "-msse2" CACHE STRING "the compiler flags to enable SSE2")
2444
set(BLAKE3_CFLAGS_SSE4.1 "-msse4.1" CACHE STRING "the compiler flags to enable SSE4.1")
2545
set(BLAKE3_CFLAGS_AVX2 "-mavx2" CACHE STRING "the compiler flags to enable AVX2")
2646
set(BLAKE3_CFLAGS_AVX512 "-mavx512f -mavx512vl" CACHE STRING "the compiler flags to enable AVX512")
47+
48+
if (WIN32)
49+
set(BLAKE3_AMD64_ASM_SOURCES
50+
blake3_avx2_x86-64_windows_gnu.S
51+
blake3_avx512_x86-64_windows_gnu.S
52+
blake3_sse2_x86-64_windows_gnu.S
53+
blake3_sse41_x86-64_windows_gnu.S
54+
)
55+
56+
elseif(UNIX)
57+
set(BLAKE3_AMD64_ASM_SOURCES
58+
blake3_avx2_x86-64_unix.S
59+
blake3_avx512_x86-64_unix.S
60+
blake3_sse2_x86-64_unix.S
61+
blake3_sse41_x86-64_unix.S
62+
)
63+
endif()
64+
65+
if (CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
66+
AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
67+
# 32-bit ARMv8 needs NEON to be enabled explicitly
68+
set(BLAKE3_CFLAGS_NEON "-mfpu=neon" CACHE STRING "the compiler flags to enable NEON")
69+
endif()
2770
endif()
28-
# architecture lists for which to enable assembly / SIMD sources
29-
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
30-
set(BLAKE3_X86_NAMES i686 x86 X86)
31-
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
71+
72+
mark_as_advanced(BLAKE3_CFLAGS_SSE2 BLAKE3_CFLAGS_SSE4.1 BLAKE3_CFLAGS_AVX2 BLAKE3_CFLAGS_AVX512 BLAKE3_CFLAGS_NEON)
73+
mark_as_advanced(BLAKE3_AMD64_ASM_SOURCES)
74+
75+
message(STATUS "BLAKE3 SIMD configuration: ${CMAKE_C_COMPILER_ARCHITECTURE_ID}")
76+
if(MSVC AND DEFINED CMAKE_C_COMPILER_ARCHITECTURE_ID)
77+
if(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]86")
78+
set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")
79+
80+
elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]64")
81+
set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")
82+
83+
elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Aa][Rr][Mm]64")
84+
set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")
85+
86+
else()
87+
set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
88+
endif()
89+
90+
elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES)
91+
set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")
92+
93+
elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES
94+
AND DEFINED BLAKE3_CFLAGS_SSE2
95+
AND DEFINED BLAKE3_CFLAGS_SSE4.1
96+
AND DEFINED BLAKE3_CFLAGS_AVX2
97+
AND DEFINED BLAKE3_CFLAGS_AVX512)
98+
set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")
99+
100+
elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
101+
OR ANDROID_ABI STREQUAL "armeabi-v7a"
102+
OR BLAKE3_USE_NEON_INTRINSICS)
103+
AND (DEFINED BLAKE3_CFLAGS_NEON
104+
OR CMAKE_SIZEOF_VOID_P EQUAL 8))
105+
set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")
106+
107+
else()
108+
set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
109+
endif()
110+
111+
mark_as_advanced(BLAKE3_SIMD_TYPE)
32112

33113
# library target
34114
add_library(blake3
@@ -41,73 +121,49 @@ add_library(BLAKE3::blake3 ALIAS blake3)
41121
# library configuration
42122
set(BLAKE3_PKGCONFIG_CFLAGS)
43123
if (BUILD_SHARED_LIBS)
44-
target_compile_definitions(blake3
124+
target_compile_definitions(blake3
45125
PUBLIC BLAKE3_DLL
46126
PRIVATE BLAKE3_DLL_EXPORTS
47127
)
48128
list(APPEND BLAKE3_PKGCONFIG_CFLAGS -DBLAKE3_DLL)
49129
endif()
50-
target_include_directories(blake3 PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
130+
target_include_directories(blake3 PUBLIC
131+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
132+
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
133+
)
51134
set_target_properties(blake3 PROPERTIES
52135
VERSION ${PROJECT_VERSION}
53136
SOVERSION 0
54137
C_VISIBILITY_PRESET hidden
138+
C_EXTENSIONS OFF
55139
)
140+
target_compile_features(blake3 PUBLIC c_std_99)
141+
# ensure C_EXTENSIONS OFF is respected without overriding CMAKE_C_STANDARD
142+
# which may be set by the user or toolchain file
143+
if (NOT POLICY CMP0128 AND NOT DEFINED CMAKE_C_STANDARD)
144+
set_target_properties(blake3 PROPERTIES C_STANDARD 99)
145+
endif()
56146

57147
# optional SIMD sources
58-
macro(BLAKE3_DISABLE_SIMD)
59-
set(BLAKE3_SIMD_AMD64_ASM OFF)
60-
set(BLAKE3_SIMD_X86_INTRINSICS OFF)
61-
set(BLAKE3_SIMD_NEON_INTRINSICS OFF)
62-
set_source_files_properties(blake3_dispatch.c PROPERTIES
63-
COMPILE_DEFINITIONS BLAKE3_USE_NEON=0;BLAKE3_NO_SSE2;BLAKE3_NO_SSE41;BLAKE3_NO_AVX2;BLAKE3_NO_AVX512
64-
)
65-
endmacro()
66-
67-
if(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES OR BLAKE3_USE_AMD64_ASM)
148+
if(BLAKE3_SIMD_TYPE STREQUAL "amd64-asm")
149+
if (NOT DEFINED BLAKE3_AMD64_ASM_SOURCES)
150+
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'amd64-asm' but no assembly sources are available for the target architecture.")
151+
endif()
68152
set(BLAKE3_SIMD_AMD64_ASM ON)
69153

70-
if(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
154+
if(MSVC)
71155
enable_language(ASM_MASM)
72-
target_sources(blake3 PRIVATE
73-
blake3_avx2_x86-64_windows_msvc.asm
74-
blake3_avx512_x86-64_windows_msvc.asm
75-
blake3_sse2_x86-64_windows_msvc.asm
76-
blake3_sse41_x86-64_windows_msvc.asm
77-
)
78-
79-
elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
80-
OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
81-
OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
82-
if (WIN32)
83-
target_sources(blake3 PRIVATE
84-
blake3_avx2_x86-64_windows_gnu.S
85-
blake3_avx512_x86-64_windows_gnu.S
86-
blake3_sse2_x86-64_windows_gnu.S
87-
blake3_sse41_x86-64_windows_gnu.S
88-
)
89-
90-
elseif(UNIX)
91-
target_sources(blake3 PRIVATE
92-
blake3_avx2_x86-64_unix.S
93-
blake3_avx512_x86-64_unix.S
94-
blake3_sse2_x86-64_unix.S
95-
blake3_sse41_x86-64_unix.S
96-
)
97-
98-
else()
99-
BLAKE3_DISABLE_SIMD()
100-
endif()
101-
102-
else()
103-
BLAKE3_DISABLE_SIMD()
104156
endif()
105157

106-
elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES OR BLAKE3_USE_X86_INTRINSICS)
107-
AND DEFINED BLAKE3_CFLAGS_SSE2
108-
AND DEFINED BLAKE3_CFLAGS_SSE4.1
109-
AND DEFINED BLAKE3_CFLAGS_AVX2
110-
AND DEFINED BLAKE3_CFLAGS_AVX512)
158+
target_sources(blake3 PRIVATE ${BLAKE3_AMD64_ASM_SOURCES})
159+
160+
elseif(BLAKE3_SIMD_TYPE STREQUAL "x86-intrinsics")
161+
if (NOT DEFINED BLAKE3_CFLAGS_SSE2
162+
OR NOT DEFINED BLAKE3_CFLAGS_SSE4.1
163+
OR NOT DEFINED BLAKE3_CFLAGS_AVX2
164+
OR NOT DEFINED BLAKE3_CFLAGS_AVX512)
165+
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'x86-intrinsics' but no compiler flags are available for the target architecture.")
166+
endif()
111167
set(BLAKE3_SIMD_X86_INTRINSICS ON)
112168

113169
target_sources(blake3 PRIVATE
@@ -121,24 +177,31 @@ elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES OR BLAKE3_USE_X86_INTRIN
121177
set_source_files_properties(blake3_sse2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE2}")
122178
set_source_files_properties(blake3_sse41.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE4.1}")
123179

124-
elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
125-
OR ((ANDROID_ABI STREQUAL "armeabi-v7a"
126-
OR BLAKE3_USE_NEON_INTRINSICS)
127-
AND (DEFINED BLAKE3_CFLAGS_NEON
128-
OR CMAKE_SIZEOF_VOID_P EQUAL 8)))
180+
elseif(BLAKE3_SIMD_TYPE STREQUAL "neon-intrinsics")
129181
set(BLAKE3_SIMD_NEON_INTRINSICS ON)
130182

131183
target_sources(blake3 PRIVATE
132184
blake3_neon.c
133185
)
134-
set_source_files_properties(blake3_dispatch.c PROPERTIES COMPILE_DEFINITIONS BLAKE3_USE_NEON=1)
186+
target_compile_definitions(blake3 PRIVATE
187+
BLAKE3_USE_NEON=1
188+
)
135189

136190
if (DEFINED BLAKE3_CFLAGS_NEON)
137191
set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
138192
endif()
139193

194+
elseif(BLAKE3_SIMD_TYPE STREQUAL "none")
195+
target_compile_definitions(blake3 PRIVATE
196+
BLAKE3_USE_NEON=0
197+
BLAKE3_NO_SSE2
198+
BLAKE3_NO_SSE41
199+
BLAKE3_NO_AVX2
200+
BLAKE3_NO_AVX512
201+
)
202+
140203
else()
141-
BLAKE3_DISABLE_SIMD()
204+
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to an unknown value: '${BLAKE3_SIMD_TYPE}'")
142205
endif()
143206

144207
# cmake install support
@@ -171,6 +234,7 @@ install(FILES "${CMAKE_BINARY_DIR}/libblake3.pc"
171234
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
172235

173236
# print feature summary
237+
# add_feature_info cannot directly use the BLAKE3_SIMD_TYPE :(
174238
add_feature_info("AMD64 assembly" BLAKE3_SIMD_AMD64_ASM "The library uses hand written amd64 SIMD assembly.")
175239
add_feature_info("x86 SIMD intrinsics" BLAKE3_SIMD_X86_INTRINSICS "The library uses x86 SIMD intrinsics.")
176240
add_feature_info("NEON SIMD intrinsics" BLAKE3_SIMD_NEON_INTRINSICS "The library uses NEON SIMD intrinsics.")

src/belahash/blake3/blake3.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,7 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
134134
input_len -= BLAKE3_BLOCK_LEN;
135135
}
136136

137-
size_t take = chunk_state_fill_buf(self, input, input_len);
138-
input += take;
139-
input_len -= take;
137+
chunk_state_fill_buf(self, input, input_len);
140138
}
141139

142140
INLINE output_t chunk_state_output(const blake3_chunk_state *self) {
@@ -341,21 +339,24 @@ INLINE void compress_subtree_to_parent_node(
341339
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
342340
chunk_counter, flags, cv_array);
343341
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
344-
345-
// If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
342+
// The following loop never executes when MAX_SIMD_DEGREE_OR_2 is 2, because
343+
// as we just asserted, num_cvs will always be <=2 in that case. But GCC
344+
// (particularly GCC 8.5) can't tell that it never executes, and if NDEBUG is
345+
// set then it emits incorrect warnings here. We tried a few different
346+
// hacks to silence these, but in the end our hacks just produced different
347+
// warnings (see https://github.com/BLAKE3-team/BLAKE3/pull/380). Out of
348+
// desperation, we ifdef out this entire loop when we know it's not needed.
349+
#if MAX_SIMD_DEGREE_OR_2 > 2
350+
// If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input,
346351
// compress_subtree_wide() returns more than 2 chaining values. Condense
347352
// them into 2 by forming parent nodes repeatedly.
348353
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
349-
// The second half of this loop condition is always true, and we just
350-
// asserted it above. But GCC can't tell that it's always true, and if NDEBUG
351-
// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
352-
// warnings here. GCC 8.5 is particularly sensitive, so if you're changing
353-
// this code, test it against that version.
354-
while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
354+
while (num_cvs > 2) {
355355
num_cvs =
356356
compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
357357
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
358358
}
359+
#endif
359360
memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
360361
}
361362

@@ -427,7 +428,7 @@ INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
427428
// of the whole tree, and it would need to be ROOT finalized. We can't
428429
// compress it until we know.
429430
// 2) This 64 KiB input might complete a larger tree, whose root node is
430-
// similarly going to be the the root of the whole tree. For example, maybe
431+
// similarly going to be the root of the whole tree. For example, maybe
431432
// we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
432433
// node at the root of the 256 KiB subtree until we know how to finalize it.
433434
//

src/belahash/blake3/blake3.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
extern "C" {
3131
#endif
3232

33-
#define BLAKE3_VERSION_STRING "1.5.0"
33+
#define BLAKE3_VERSION_STRING "1.5.2"
3434
#define BLAKE3_KEY_LEN 32
3535
#define BLAKE3_OUT_LEN 32
3636
#define BLAKE3_BLOCK_LEN 64

src/belahash/blake3/blake3_dispatch.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44

55
#include "blake3_impl.h"
66

7-
#if defined(IS_X86)
87
#if defined(_MSC_VER)
98
#include <Windows.h>
9+
#endif
10+
11+
#if defined(IS_X86)
12+
#if defined(_MSC_VER)
1013
#include <intrin.h>
1114
#elif defined(__GNUC__)
1215
#include <immintrin.h>
@@ -32,9 +35,9 @@
3235
#define ATOMIC_LOAD(x) x
3336
#define ATOMIC_STORE(x, y) x = y
3437
#elif defined(_MSC_VER)
35-
#define ATOMIC_INT long
36-
#define ATOMIC_LOAD(x) _InterlockedOr(&x, 0)
37-
#define ATOMIC_STORE(x, y) _InterlockedExchange(&x, y)
38+
#define ATOMIC_INT LONG
39+
#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
40+
#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
3841
#else
3942
#define ATOMIC_INT int
4043
#define ATOMIC_LOAD(x) x

src/belahash/blake3/blake3_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ enum blake3_flags {
2828
#define INLINE static inline __attribute__((always_inline))
2929
#endif
3030

31-
#if defined(__x86_64__) || defined(_M_X64)
31+
#if (defined(__x86_64__) || defined(_M_X64)) && !defined(_M_ARM64EC)
3232
#define IS_X86
3333
#define IS_X86_64
3434
#endif
@@ -38,7 +38,7 @@ enum blake3_flags {
3838
#define IS_X86_32
3939
#endif
4040

41-
#if defined(__aarch64__) || defined(_M_ARM64)
41+
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
4242
#define IS_AARCH64
4343
#endif
4444

0 commit comments

Comments
 (0)