@@ -377,66 +377,73 @@ SZ_PUBLIC sz_cptr_t sz_rfind_byteset_serial(sz_cptr_t text, sz_size_t length, sz
377377 * This implementation uses hardware-agnostic SWAR technique, to process 8 characters at a time.
378378 * Identical to `memchr(haystack, needle[0], haystack_length)`.
379379 */
380- SZ_PUBLIC sz_cptr_t sz_find_byte_serial (sz_cptr_t h , sz_size_t h_length , sz_cptr_t n ) {
380+ SZ_PUBLIC sz_cptr_t sz_find_byte_serial (sz_cptr_t h_chars , sz_size_t h_length , sz_cptr_t n_chars ) {
381381
382382 if (!h_length ) return SZ_NULL_CHAR ;
383- sz_cptr_t const h_end = h + h_length ;
383+ // Reinterpret as unsigned bytes so the SWAR broadcast below cannot sign-extend
384+ // on platforms where `char` is signed (e.g. `-fsigned-char`). See issue #306.
385+ sz_u8_t const * h = (sz_u8_t const * )h_chars ;
386+ sz_u8_t const * const n = (sz_u8_t const * )n_chars ;
387+ sz_u8_t const * const h_end = h + h_length ;
384388
385389#if !SZ_IS_BIG_ENDIAN_ // Use SWAR only on little-endian platforms for brevity.
386390#if !SZ_USE_MISALIGNED_LOADS // Process the misaligned head, to void UB on unaligned 64-bit loads.
387391 for (; ((sz_size_t )h & 7ull ) && h < h_end ; ++ h )
388- if (* h == * n ) return h ;
392+ if (* h == * n ) return ( sz_cptr_t ) h ;
389393#endif
390394
391395 // Broadcast the n into every byte of a 64-bit integer to use SWAR
392396 // techniques and process eight characters at a time.
393397 sz_u64_vec_t h_vec , n_vec , match_vec ;
394398 match_vec .u64 = 0 ;
395- n_vec .u64 = (sz_u64_t )n [ 0 ] * 0x0101010101010101ull ;
399+ n_vec .u64 = (sz_u64_t )* n * 0x0101010101010101ull ;
396400 for (; h + 8 <= h_end ; h += 8 ) {
397401 h_vec .u64 = * (sz_u64_t const * )h ;
398402 match_vec = sz_u64_each_byte_equal_ (h_vec , n_vec );
399- if (match_vec .u64 ) return h + sz_u64_ctz (match_vec .u64 ) / 8 ;
403+ if (match_vec .u64 ) return ( sz_cptr_t )( h + sz_u64_ctz (match_vec .u64 ) / 8 ) ;
400404 }
401405#endif
402406
403407 // Handle the misaligned tail.
404408 for (; h < h_end ; ++ h )
405- if (* h == * n ) return h ;
409+ if (* h == * n ) return ( sz_cptr_t ) h ;
406410 return SZ_NULL_CHAR ;
407411}
408412
409413/* Find the last occurrence of a @b single-character needle in an arbitrary length haystack.
410414 * This implementation uses hardware-agnostic SWAR technique, to process 8 characters at a time.
411415 * Identical to `memrchr(haystack, needle[0], haystack_length)`.
412416 */
413- sz_cptr_t sz_rfind_byte_serial (sz_cptr_t h , sz_size_t h_length , sz_cptr_t n ) {
417+ sz_cptr_t sz_rfind_byte_serial (sz_cptr_t h_chars , sz_size_t h_length , sz_cptr_t n_chars ) {
414418
415419 if (!h_length ) return SZ_NULL_CHAR ;
416- sz_cptr_t const h_start = h ;
420+ // Reinterpret as unsigned bytes so the SWAR broadcast below cannot sign-extend
421+ // on platforms where `char` is signed (e.g. `-fsigned-char`). See issue #306.
422+ sz_u8_t const * const h_start = (sz_u8_t const * )h_chars ;
423+ sz_u8_t const * const n = (sz_u8_t const * )n_chars ;
417424
418425 // Reposition the `h` pointer to the end, as we will be walking backwards.
419- h = h + h_length - 1 ;
426+ sz_u8_t const * h = h_start + h_length - 1 ;
420427
421428#if !SZ_IS_BIG_ENDIAN_ // Use SWAR only on little-endian platforms for brevity.
422429#if !SZ_USE_MISALIGNED_LOADS // Process the misaligned head, to void UB on unaligned 64-bit loads.
423430 for (; ((sz_size_t )(h + 1 ) & 7ull ) && h >= h_start ; -- h )
424- if (* h == * n ) return h ;
431+ if (* h == * n ) return ( sz_cptr_t ) h ;
425432#endif
426433
427434 // Broadcast the n into every byte of a 64-bit integer to use SWAR
428435 // techniques and process eight characters at a time.
429436 sz_u64_vec_t h_vec , n_vec , match_vec ;
430- n_vec .u64 = (sz_u64_t )n [ 0 ] * 0x0101010101010101ull ;
437+ n_vec .u64 = (sz_u64_t )* n * 0x0101010101010101ull ;
431438 for (; h >= h_start + 7 ; h -= 8 ) {
432439 h_vec .u64 = * (sz_u64_t const * )(h - 7 );
433440 match_vec = sz_u64_each_byte_equal_ (h_vec , n_vec );
434- if (match_vec .u64 ) return h - sz_u64_clz (match_vec .u64 ) / 8 ;
441+ if (match_vec .u64 ) return ( sz_cptr_t )( h - sz_u64_clz (match_vec .u64 ) / 8 ) ;
435442 }
436443#endif
437444
438445 for (; h >= h_start ; -- h )
439- if (* h == * n ) return h ;
446+ if (* h == * n ) return ( sz_cptr_t ) h ;
440447 return SZ_NULL_CHAR ;
441448}
442449
0 commit comments