Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perform unaligned writes via FFI when necessary #587

Merged
merged 19 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 32 additions & 9 deletions Data/ByteString/Builder/Prim/Binary.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE Trustworthy #-}

{-# LANGUAGE TypeApplications #-}

-- | Copyright : (c) 2010-2011 Simon Meier
-- License : BSD3-style (see LICENSE)
--
Expand Down Expand Up @@ -70,7 +73,7 @@ import Foreign
--
{-# INLINE word8 #-}
word8 :: FixedPrim Word8
word8 = storableToF
word8 = fixedPrim 1 (flip poke) -- Word8 is always aligned

--
-- We rely on the fromIntegral to do the right masking for us.
Expand Down Expand Up @@ -143,23 +146,43 @@ word64LE = word64Host
--
{-# INLINE wordHost #-}
wordHost :: FixedPrim Word
wordHost = storableToF
wordHost = case finiteBitSize (0 :: Word) of
32 -> fromIntegral @Word @Word32 >$< word32Host
64 -> fromIntegral @Word @Word64 >$< word64Host
_ -> error "Data.ByteString.Builder.Prim.Binary.wordHost: unexpected word size"

-- | Encoding 'Word16's in native host order and host endianness.
{-# INLINE word16Host #-}
word16Host :: FixedPrim Word16
word16Host = storableToF
word16Host = fixedPrim 2 unaligned_write_u16

-- | Encoding 'Word32's in native host order and host endianness.
{-# INLINE word32Host #-}
word32Host :: FixedPrim Word32
word32Host = storableToF
word32Host = fixedPrim 4 unaligned_write_u32

-- | Encoding 'Word64's in native host order and host endianness.
{-# INLINE word64Host #-}
word64Host :: FixedPrim Word64
word64Host = storableToF
word64Host = fixedPrim 8 unaligned_write_u64

#if HS_BYTESTRING_UNALIGNED_POKES_OK
unaligned_write_u16 :: Word16 -> Ptr Word8 -> IO ()
unaligned_write_u16 x p = poke (castPtr p) x

unaligned_write_u32 :: Word32 -> Ptr Word8 -> IO ()
unaligned_write_u32 x p = poke (castPtr p) x

unaligned_write_u64 :: Word64 -> Ptr Word8 -> IO ()
unaligned_write_u64 x p = poke (castPtr p) x
#else
foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u16"
unaligned_write_u16 :: Word16 -> Ptr Word8 -> IO ()
foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u32"
unaligned_write_u32 :: Word32 -> Ptr Word8 -> IO ()
foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u64"
unaligned_write_u64 :: Word64 -> Ptr Word8 -> IO ()
#endif

------------------------------------------------------------------------------
-- Int encodings
Expand Down Expand Up @@ -215,22 +238,22 @@ int64LE = fromIntegral >$< word64LE
--
{-# INLINE intHost #-}
intHost :: FixedPrim Int
intHost = storableToF
intHost = fromIntegral @Int @Word >$< wordHost

-- | Encoding 'Int16's in native host order and host endianness.
{-# INLINE int16Host #-}
int16Host :: FixedPrim Int16
int16Host = storableToF
int16Host = fromIntegral @Int16 @Word16 >$< word16Host

-- | Encoding 'Int32's in native host order and host endianness.
{-# INLINE int32Host #-}
int32Host :: FixedPrim Int32
int32Host = storableToF
int32Host = fromIntegral @Int32 @Word32 >$< word32Host

-- | Encoding 'Int64's in native host order and host endianness.
{-# INLINE int64Host #-}
int64Host :: FixedPrim Int64
int64Host = storableToF
int64Host = fromIntegral @Int64 @Word64 >$< word64Host

-- IEEE Floating Point Numbers
------------------------------
Expand Down
9 changes: 2 additions & 7 deletions Data/ByteString/Builder/Prim/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ import Foreign
import Prelude hiding (maxBound)

#include "MachDeps.h"
#include "bytestring-cpp-macros.h"

------------------------------------------------------------------------------
-- Supporting infrastructure
Expand Down Expand Up @@ -199,13 +200,7 @@ liftFixedToBounded = toB

{-# INLINE CONLIKE storableToF #-}
storableToF :: forall a. Storable a => FixedPrim a
-- Not all architectures are forgiving of unaligned accesses; whitelist ones
-- which are known not to trap (either to the kernel for emulation, or crash).
#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
|| ((defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) \
&& defined(__ARM_FEATURE_UNALIGNED)) \
|| defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
|| defined(powerpc64le_HOST_ARCH)
#if HS_UNALIGNED_POKES_OK
clyring marked this conversation as resolved.
Show resolved Hide resolved
storableToF = FP (sizeOf (undefined :: a)) (\x op -> poke (castPtr op) x)
#else
storableToF = FP (sizeOf (undefined :: a)) $ \x op ->
Expand Down
22 changes: 7 additions & 15 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,8 @@
{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -fexpose-all-unfoldings #-}
-- Not all architectures are forgiving of unaligned accesses; whitelist ones
-- which are known not to trap (either to the kernel for emulation, or crash).
#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
|| ((defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) \
&& defined(__ARM_FEATURE_UNALIGNED)) \
|| defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
|| defined(powerpc64le_HOST_ARCH)
#define SAFE_UNALIGNED 1
#endif

#include "bytestring-cpp-macros.h"

-- |
-- Module : Data.ByteString.Short.Internal
Expand Down Expand Up @@ -172,7 +165,7 @@ import Data.Array.Byte
import Data.Bits
( FiniteBits (finiteBitSize)
, shiftL
#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
#if HS_UNALIGNED_ByteArray_OPS_OK
, shiftR
#endif
, (.&.)
Expand Down Expand Up @@ -231,7 +224,7 @@ import GHC.Exts
, indexWord8Array#, indexCharArray#
, writeWord8Array#
, unsafeFreezeByteArray#
#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
#if HS_UNALIGNED_ByteArray_OPS_OK
,writeWord64Array#
,indexWord8ArrayAsWord64#
#endif
Expand Down Expand Up @@ -803,8 +796,7 @@ reverse :: ShortByteString -> ShortByteString
reverse = \sbs ->
let l = length sbs
ba = asBA sbs
-- https://gitlab.haskell.org/ghc/ghc/-/issues/21015
#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
#if HS_UNALIGNED_ByteArray_OPS_OK
in create l (\mba -> go ba mba l)
where
go :: forall s. BA -> MBA s -> Int -> ST s ()
Expand Down Expand Up @@ -1607,7 +1599,7 @@ indexCharArray (BA# ba#) (I# i#) = C# (indexCharArray# ba# i#)
indexWord8Array :: BA -> Int -> Word8
indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#)

#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
#if HS_UNALIGNED_ByteArray_OPS_OK
indexWord8ArrayAsWord64 :: BA -> Int -> Word64
indexWord8ArrayAsWord64 (BA# ba#) (I# i#) = W64# (indexWord8ArrayAsWord64# ba# i#)
#endif
Expand All @@ -1632,7 +1624,7 @@ writeWord8Array (MBA# mba#) (I# i#) (W8# w#) =
ST $ \s -> case writeWord8Array# mba# i# w# s of
s' -> (# s', () #)

#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
#if HS_UNALIGNED_ByteArray_OPS_OK
writeWord64Array :: MBA s -> Int -> Word64 -> ST s ()
writeWord64Array (MBA# mba#) (I# i#) (W64# w#) =
ST $ \s -> case writeWord64Array# mba# i# w# s of
Expand Down
8 changes: 4 additions & 4 deletions bytestring.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -128,19 +128,19 @@ library
-fmax-simplifier-iterations=10
-fdicts-cheap
-fspec-constr-count=6

c-sources: cbits/fpstring.c
cbits/itoa.c
cbits/shortbytestring.c

if (arch(aarch64))
c-sources: cbits/aarch64/is-valid-utf8.c
else
c-sources: cbits/is-valid-utf8.c

-- DNDEBUG disables asserts in cbits/
cc-options: -std=c11 -DNDEBUG=1

-- No need to link to libgcc on ghc-9.4 and later which uses a clang-based
-- toolchain.
if os(windows) && impl(ghc < 9.3)
Expand Down
19 changes: 19 additions & 0 deletions cbits/fpstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#define USE_SIMD_COUNT
#endif

#include "bytestring-cpp-macros.h"

/* copy a string in reverse */
void fps_reverse(unsigned char *q, unsigned char *p, size_t n) {
p += n-1;
Expand Down Expand Up @@ -106,6 +108,23 @@ void fps_sort(unsigned char *p, size_t len) {
return qsort(p, len, 1, fps_compare);
}

#if !HS_UNALIGNED_POKES_OK
void fps_unaligned_write_u16(uint16_t x, uint8_t *p) {
memcpy(p, &x, 2);
return;
}

void fps_unaligned_write_u32(uint32_t x, uint8_t *p) {
memcpy(p, &x, 4);
return;
}

void fps_unaligned_write_u64(uint64_t x, uint8_t *p) {
memcpy(p, &x, 8);
return;
}
#endif

/* count the number of occurrences of a char in a string */
size_t fps_count_naive(unsigned char *str, size_t len, unsigned char w) {
size_t c;
Expand Down
27 changes: 27 additions & 0 deletions include/bytestring-cpp-macros.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#if 0
This file gets included in both C and Haskell sources,
so any comments should by guarded by an if 0.
#endif



#define HS_UNALIGNED_POKES_OK \
defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
|| ((defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) \
&& defined(__ARM_FEATURE_UNALIGNED)) \
|| defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
|| defined(powerpc64le_HOST_ARCH)
#if 0
Not all architectures are forgiving of unaligned accesses; whitelist ones
which are known not to trap (either to the kernel for emulation, or crash).
#endif


#define HS_UNALIGNED_ByteArray_OPS_OK \
MIN_VERSION_base(4,12,0) \
&& (MIN_VERSION_base(4,16,1) || UNALIGNED_POKES_OK)
#if 0
The unaligned ByteArray# primops became available with base-4.12.0,
but require an unaligned-friendly host architecture to be safe to use
until ghc-9.2.2; see https://gitlab.haskell.org/ghc/ghc/-/issues/21015
#endif