Change: Move FORCEINLINE macro into util header

spnda · spnda · commit b670b7c85f18 · 2024-06-23T01:58:05.000+02:00
diff --git a/include/fastgltf/math.hpp b/include/fastgltf/math.hpp
@@ -671,17 +671,17 @@ namespace fastgltf::math {
 		}
 
 		/** Returns the column vector at the given index. */
-		[[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) noexcept {
+		[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) noexcept {
 			return col(idx);
 		}
-		[[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) const noexcept {
+		[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) const noexcept {
 			return col(idx);
 		}
 
-		[[nodiscard]] constexpr decltype(auto) col(std::size_t idx) noexcept {
+		[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) noexcept {
 			return _data[idx];
 		}
-		[[nodiscard]] constexpr decltype(auto) col(std::size_t idx) const noexcept {
+		[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) const noexcept {
 			return _data[idx];
 		}
 
diff --git a/include/fastgltf/util.hpp b/include/fastgltf/util.hpp
@@ -118,6 +118,16 @@
 #define FASTGLTF_INTRINSIC
 #endif
 
+#if defined(_MSC_VER)
+#define FASTGLTF_FORCEINLINE __forceinline
+#elif defined(__GNUC__) || defined(__clang__)
+#define FASTGLTF_FORCEINLINE [[gnu::always_inline]] inline
+#else
+// On other compilers we need the inline specifier, so that the functions in this compilation unit
+// can be properly inlined without the "function body can be overwritten at link time" error.
+#define FASTGLTF_FORCEINLINE inline
+#endif
+
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable : 5030) // attribute 'x' is not recognized
diff --git a/src/base64.cpp b/src/base64.cpp
@@ -61,16 +61,6 @@
 
 namespace fg = fastgltf;
 
-#if defined(_MSC_VER)
-#define FORCEINLINE __forceinline
-#elif defined(__GNUC__) || defined(__clang__)
-#define FORCEINLINE [[gnu::always_inline]] inline
-#else
-// On other compilers we need the inline specifier, so that the functions in this compilation unit
-// can be properly inlined without the "function body can be overwritten at link time" error.
-#define FORCEINLINE inline
-#endif
-
 namespace fastgltf::base64 {
     using DecodeFunctionInplace = std::function<void(std::string_view, std::uint8_t*, std::size_t)>;
     using DecodeFunction = std::function<fg::StaticVector<std::uint8_t>(std::string_view)>;
@@ -120,7 +110,7 @@ namespace fastgltf::base64 {
 // The AVX and SSE decoding functions are based on http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html.
 // It covers various methods of en-/decoding base64 using SSE and AVX and also shows their
 // performance metrics.
-[[gnu::target("avx2")]] FORCEINLINE auto avx2_lookup_pshufb_bitmask(const __m256i input) {
+[[gnu::target("avx2")]] FASTGLTF_FORCEINLINE auto avx2_lookup_pshufb_bitmask(const __m256i input) {
     const auto higher_nibble = _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0x0f));
 
     const auto shiftLUT = _mm256_setr_epi8(
@@ -137,7 +127,7 @@ namespace fastgltf::base64 {
     return _mm256_add_epi8(input, shift);
 }
 
-[[gnu::target("avx2")]] FORCEINLINE auto avx2_pack_ints(__m256i input) {
+[[gnu::target("avx2")]] FASTGLTF_FORCEINLINE auto avx2_pack_ints(__m256i input) {
     const auto merge = _mm256_maddubs_epi16(input, _mm256_set1_epi32(0x01400140));
     return _mm256_madd_epi16(merge, _mm256_set1_epi32(0x00011000));
 }
@@ -204,7 +194,7 @@ namespace fastgltf::base64 {
     return ret;
 }
 
-[[gnu::target("sse4.1")]] FORCEINLINE auto sse4_lookup_pshufb_bitmask(const __m128i input) {
+[[gnu::target("sse4.1")]] FASTGLTF_FORCEINLINE auto sse4_lookup_pshufb_bitmask(const __m128i input) {
     const auto higher_nibble = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0x0f));
 
     const auto shiftLUT = _mm_setr_epi8(
@@ -218,7 +208,7 @@ namespace fastgltf::base64 {
     return _mm_add_epi8(input, shift);
 }
 
-[[gnu::target("sse4.1")]] FORCEINLINE auto sse4_pack_ints(__m128i input) {
+[[gnu::target("sse4.1")]] FASTGLTF_FORCEINLINE auto sse4_pack_ints(__m128i input) {
     const auto merge = _mm_maddubs_epi16(input, _mm_set1_epi32(0x01400140));
     return _mm_madd_epi16(merge, _mm_set1_epi32(0x00011000));
 }
@@ -279,7 +269,7 @@ namespace fastgltf::base64 {
     return ret;
 }
 #elif defined(FASTGLTF_IS_A64)
-FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
+FASTGLTF_FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
     // clang-format off
     constexpr std::array<int8_t, 16> shiftLUTdata = {
         0,   0,  19,   4, -65, -65, -71, -71,
@@ -298,7 +288,7 @@ FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
     return vaddq_s8(input, shift);
 }
 
-FORCEINLINE int16x8_t neon_pack_ints(const int8x16_t input) {
+FASTGLTF_FORCEINLINE int16x8_t neon_pack_ints(const int8x16_t input) {
     const uint32x4_t mask = vdupq_n_u32(0x01400140);
 
     const int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input))), vmovl_s8(vget_low_s8(mask)));
@@ -387,7 +377,7 @@ static constexpr std::array<std::uint8_t, 128> base64lut = {
 
 namespace fastgltf::base64 {
     template <typename Output>
-	FORCEINLINE void decode_block(std::array<std::uint8_t, 4>& sixBitChars, Output output) {
+	FASTGLTF_FORCEINLINE void decode_block(std::array<std::uint8_t, 4>& sixBitChars, Output output) {
 		for (std::size_t i = 0; i < 4; i++) {
 			assert(static_cast<std::size_t>(sixBitChars[i]) < base64lut.size());
 			sixBitChars[i] = base64lut[sixBitChars[i]];

Original file line number	Diff line number	Diff line change
`@@ -671,17 +671,17 @@ namespace fastgltf::math {`
`671`	`671`	`}`
`672`	`672`
`673`	`673`	`/** Returns the column vector at the given index. */`
`674`		`- [[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) noexcept {`
	`674`	`+ [[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) noexcept {`
`675`	`675`	`return col(idx);`
`676`	`676`	`}`
`677`		`- [[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) const noexcept {`
	`677`	`+ [[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) const noexcept {`
`678`	`678`	`return col(idx);`
`679`	`679`	`}`
`680`	`680`
`681`		`- [[nodiscard]] constexpr decltype(auto) col(std::size_t idx) noexcept {`
	`681`	`+ [[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) noexcept {`
`682`	`682`	`return _data[idx];`
`683`	`683`	`}`
`684`		`- [[nodiscard]] constexpr decltype(auto) col(std::size_t idx) const noexcept {`
	`684`	`+ [[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) const noexcept {`
`685`	`685`	`return _data[idx];`
`686`	`686`	`}`
`687`	`687`