Skip to content

Commit b670b7c

Browse files
committed
Change: Move FORCEINLINE macro into util header
1 parent 0420436 commit b670b7c

File tree

3 files changed

+21
-21
lines changed

3 files changed

+21
-21
lines changed

include/fastgltf/math.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -671,17 +671,17 @@ namespace fastgltf::math {
671671
}
672672

673673
/** Returns the column vector at the given index. */
674-
[[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) noexcept {
674+
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) noexcept {
675675
return col(idx);
676676
}
677-
[[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) const noexcept {
677+
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) const noexcept {
678678
return col(idx);
679679
}
680680

681-
[[nodiscard]] constexpr decltype(auto) col(std::size_t idx) noexcept {
681+
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) noexcept {
682682
return _data[idx];
683683
}
684-
[[nodiscard]] constexpr decltype(auto) col(std::size_t idx) const noexcept {
684+
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) const noexcept {
685685
return _data[idx];
686686
}
687687

include/fastgltf/util.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,16 @@
118118
#define FASTGLTF_INTRINSIC
119119
#endif
120120

121+
#if defined(_MSC_VER)
122+
#define FASTGLTF_FORCEINLINE __forceinline
123+
#elif defined(__GNUC__) || defined(__clang__)
124+
#define FASTGLTF_FORCEINLINE [[gnu::always_inline]] inline
125+
#else
126+
// On other compilers we need the inline specifier, so that the functions in this compilation unit
127+
// can be properly inlined without the "function body can be overwritten at link time" error.
128+
#define FASTGLTF_FORCEINLINE inline
129+
#endif
130+
121131
#ifdef _MSC_VER
122132
#pragma warning(push)
123133
#pragma warning(disable : 5030) // attribute 'x' is not recognized

src/base64.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,6 @@
6161

6262
namespace fg = fastgltf;
6363

64-
#if defined(_MSC_VER)
65-
#define FORCEINLINE __forceinline
66-
#elif defined(__GNUC__) || defined(__clang__)
67-
#define FORCEINLINE [[gnu::always_inline]] inline
68-
#else
69-
// On other compilers we need the inline specifier, so that the functions in this compilation unit
70-
// can be properly inlined without the "function body can be overwritten at link time" error.
71-
#define FORCEINLINE inline
72-
#endif
73-
7464
namespace fastgltf::base64 {
7565
using DecodeFunctionInplace = std::function<void(std::string_view, std::uint8_t*, std::size_t)>;
7666
using DecodeFunction = std::function<fg::StaticVector<std::uint8_t>(std::string_view)>;
@@ -120,7 +110,7 @@ namespace fastgltf::base64 {
120110
// The AVX and SSE decoding functions are based on http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html.
121111
// It covers various methods of en-/decoding base64 using SSE and AVX and also shows their
122112
// performance metrics.
123-
[[gnu::target("avx2")]] FORCEINLINE auto avx2_lookup_pshufb_bitmask(const __m256i input) {
113+
[[gnu::target("avx2")]] FASTGLTF_FORCEINLINE auto avx2_lookup_pshufb_bitmask(const __m256i input) {
124114
const auto higher_nibble = _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0x0f));
125115

126116
const auto shiftLUT = _mm256_setr_epi8(
@@ -137,7 +127,7 @@ namespace fastgltf::base64 {
137127
return _mm256_add_epi8(input, shift);
138128
}
139129

140-
[[gnu::target("avx2")]] FORCEINLINE auto avx2_pack_ints(__m256i input) {
130+
[[gnu::target("avx2")]] FASTGLTF_FORCEINLINE auto avx2_pack_ints(__m256i input) {
141131
const auto merge = _mm256_maddubs_epi16(input, _mm256_set1_epi32(0x01400140));
142132
return _mm256_madd_epi16(merge, _mm256_set1_epi32(0x00011000));
143133
}
@@ -204,7 +194,7 @@ namespace fastgltf::base64 {
204194
return ret;
205195
}
206196

207-
[[gnu::target("sse4.1")]] FORCEINLINE auto sse4_lookup_pshufb_bitmask(const __m128i input) {
197+
[[gnu::target("sse4.1")]] FASTGLTF_FORCEINLINE auto sse4_lookup_pshufb_bitmask(const __m128i input) {
208198
const auto higher_nibble = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0x0f));
209199

210200
const auto shiftLUT = _mm_setr_epi8(
@@ -218,7 +208,7 @@ namespace fastgltf::base64 {
218208
return _mm_add_epi8(input, shift);
219209
}
220210

221-
[[gnu::target("sse4.1")]] FORCEINLINE auto sse4_pack_ints(__m128i input) {
211+
[[gnu::target("sse4.1")]] FASTGLTF_FORCEINLINE auto sse4_pack_ints(__m128i input) {
222212
const auto merge = _mm_maddubs_epi16(input, _mm_set1_epi32(0x01400140));
223213
return _mm_madd_epi16(merge, _mm_set1_epi32(0x00011000));
224214
}
@@ -279,7 +269,7 @@ namespace fastgltf::base64 {
279269
return ret;
280270
}
281271
#elif defined(FASTGLTF_IS_A64)
282-
FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
272+
FASTGLTF_FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
283273
// clang-format off
284274
constexpr std::array<int8_t, 16> shiftLUTdata = {
285275
0, 0, 19, 4, -65, -65, -71, -71,
@@ -298,7 +288,7 @@ FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
298288
return vaddq_s8(input, shift);
299289
}
300290

301-
FORCEINLINE int16x8_t neon_pack_ints(const int8x16_t input) {
291+
FASTGLTF_FORCEINLINE int16x8_t neon_pack_ints(const int8x16_t input) {
302292
const uint32x4_t mask = vdupq_n_u32(0x01400140);
303293

304294
const int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input))), vmovl_s8(vget_low_s8(mask)));
@@ -387,7 +377,7 @@ static constexpr std::array<std::uint8_t, 128> base64lut = {
387377

388378
namespace fastgltf::base64 {
389379
template <typename Output>
390-
FORCEINLINE void decode_block(std::array<std::uint8_t, 4>& sixBitChars, Output output) {
380+
FASTGLTF_FORCEINLINE void decode_block(std::array<std::uint8_t, 4>& sixBitChars, Output output) {
391381
for (std::size_t i = 0; i < 4; i++) {
392382
assert(static_cast<std::size_t>(sixBitChars[i]) < base64lut.size());
393383
sixBitChars[i] = base64lut[sixBitChars[i]];

0 commit comments

Comments
 (0)