61
61
62
62
namespace fg = fastgltf;
63
63
64
- #if defined(_MSC_VER)
65
- #define FORCEINLINE __forceinline
66
- #elif defined(__GNUC__) || defined(__clang__)
67
- #define FORCEINLINE [[gnu::always_inline]] inline
68
- #else
69
- // On other compilers we need the inline specifier, so that the functions in this compilation unit
70
- // can be properly inlined without the "function body can be overwritten at link time" error.
71
- #define FORCEINLINE inline
72
- #endif
73
-
74
64
namespace fastgltf ::base64 {
75
65
using DecodeFunctionInplace = std::function<void (std::string_view, std::uint8_t *, std::size_t )>;
76
66
using DecodeFunction = std::function<fg::StaticVector<std::uint8_t >(std::string_view)>;
@@ -120,7 +110,7 @@ namespace fastgltf::base64 {
120
110
// The AVX and SSE decoding functions are based on http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html.
121
111
// It covers various methods of en-/decoding base64 using SSE and AVX and also shows their
122
112
// performance metrics.
123
- [[gnu::target(" avx2" )]] FORCEINLINE auto avx2_lookup_pshufb_bitmask (const __m256i input) {
113
+ [[gnu::target(" avx2" )]] FASTGLTF_FORCEINLINE auto avx2_lookup_pshufb_bitmask (const __m256i input) {
124
114
const auto higher_nibble = _mm256_and_si256 (_mm256_srli_epi32 (input, 4 ), _mm256_set1_epi8 (0x0f ));
125
115
126
116
const auto shiftLUT = _mm256_setr_epi8 (
@@ -137,7 +127,7 @@ namespace fastgltf::base64 {
137
127
return _mm256_add_epi8 (input, shift);
138
128
}
139
129
140
- [[gnu::target(" avx2" )]] FORCEINLINE auto avx2_pack_ints (__m256i input) {
130
+ [[gnu::target(" avx2" )]] FASTGLTF_FORCEINLINE auto avx2_pack_ints (__m256i input) {
141
131
const auto merge = _mm256_maddubs_epi16 (input, _mm256_set1_epi32 (0x01400140 ));
142
132
return _mm256_madd_epi16 (merge, _mm256_set1_epi32 (0x00011000 ));
143
133
}
@@ -204,7 +194,7 @@ namespace fastgltf::base64 {
204
194
return ret;
205
195
}
206
196
207
- [[gnu::target(" sse4.1" )]] FORCEINLINE auto sse4_lookup_pshufb_bitmask (const __m128i input) {
197
+ [[gnu::target(" sse4.1" )]] FASTGLTF_FORCEINLINE auto sse4_lookup_pshufb_bitmask (const __m128i input) {
208
198
const auto higher_nibble = _mm_and_si128 (_mm_srli_epi32 (input, 4 ), _mm_set1_epi8 (0x0f ));
209
199
210
200
const auto shiftLUT = _mm_setr_epi8 (
@@ -218,7 +208,7 @@ namespace fastgltf::base64 {
218
208
return _mm_add_epi8 (input, shift);
219
209
}
220
210
221
- [[gnu::target(" sse4.1" )]] FORCEINLINE auto sse4_pack_ints (__m128i input) {
211
+ [[gnu::target(" sse4.1" )]] FASTGLTF_FORCEINLINE auto sse4_pack_ints (__m128i input) {
222
212
const auto merge = _mm_maddubs_epi16 (input, _mm_set1_epi32 (0x01400140 ));
223
213
return _mm_madd_epi16 (merge, _mm_set1_epi32 (0x00011000 ));
224
214
}
@@ -279,7 +269,7 @@ namespace fastgltf::base64 {
279
269
return ret;
280
270
}
281
271
#elif defined(FASTGLTF_IS_A64)
282
- FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask (const uint8x16_t input) {
272
+ FASTGLTF_FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask (const uint8x16_t input) {
283
273
// clang-format off
284
274
constexpr std::array<int8_t , 16 > shiftLUTdata = {
285
275
0 , 0 , 19 , 4 , -65 , -65 , -71 , -71 ,
@@ -298,7 +288,7 @@ FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
298
288
return vaddq_s8 (input, shift);
299
289
}
300
290
301
- FORCEINLINE int16x8_t neon_pack_ints (const int8x16_t input) {
291
+ FASTGLTF_FORCEINLINE int16x8_t neon_pack_ints (const int8x16_t input) {
302
292
const uint32x4_t mask = vdupq_n_u32 (0x01400140 );
303
293
304
294
const int16x8_t tl = vmulq_s16 (vreinterpretq_s16_u16 (vmovl_u8 (vget_low_u8 (input))), vmovl_s8 (vget_low_s8 (mask)));
@@ -387,7 +377,7 @@ static constexpr std::array<std::uint8_t, 128> base64lut = {
387
377
388
378
namespace fastgltf ::base64 {
389
379
template <typename Output>
390
- FORCEINLINE void decode_block (std::array<std::uint8_t , 4 >& sixBitChars, Output output) {
380
+ FASTGLTF_FORCEINLINE void decode_block (std::array<std::uint8_t , 4 >& sixBitChars, Output output) {
391
381
for (std::size_t i = 0 ; i < 4 ; i++) {
392
382
assert (static_cast <std::size_t >(sixBitChars[i]) < base64lut.size ());
393
383
sixBitChars[i] = base64lut[sixBitChars[i]];
0 commit comments