Skip to content

Commit

Permalink
Add pure Haskell implementation (#631)
Browse files Browse the repository at this point in the history
bytestring used to rely on C functions. This patch adds equivalent
functions implemented in Haskell. The main purpose is for the JavaScript
backend to fully support bytestring.

Pure Haskell implementation can be enabled explicitly with a cabal flag.
It's automatically enabled for the JavaScript platform.

Thanks to Matthew Craven for the thorough review and the many
suggestions.

Co-authored-by: Matthew Craven <[email protected]>
(cherry picked from commit d497f39)
  • Loading branch information
hsyl20 authored and clyring committed Feb 7, 2024
1 parent ed7a9d4 commit ec7c738
Show file tree
Hide file tree
Showing 14 changed files with 1,398 additions and 99 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,27 @@ jobs:
- name: Test
run: cabal test --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts -DHS_BYTESTRING_ASSERTIONS'

pure-haskell:
needs: build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: haskell/actions/setup@v2
id: setup-haskell-cabal
with:
ghc-version: 'latest'
- name: Update cabal package database
run: cabal update
- uses: actions/cache@v3
name: Cache cabal stuff
with:
path: |
${{ steps.setup-haskell-cabal.outputs.cabal-store }}
dist-newstyle
key: ${{ runner.os }}-latest-pure-haskell
- name: Test
run: cabal test -fpure-haskell --ghc-options=-fno-ignore-asserts --enable-tests --test-show-details=direct all

old-gcc:
needs: build
runs-on: ubuntu-latest
Expand Down
12 changes: 0 additions & 12 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ import Control.Exception (IOException, catch, finally, assert, throwIO)
import Control.Monad (when)

import Foreign.C.String (CString, CStringLen)
import Foreign.C.Types (CSize (CSize), CInt (CInt))
import Foreign.ForeignPtr (ForeignPtr, touchForeignPtr)
import Foreign.ForeignPtr.Unsafe(unsafeForeignPtrToPtr)
import Foreign.Marshal.Alloc (allocaBytes)
Expand Down Expand Up @@ -1562,17 +1561,6 @@ isValidUtf8 (BS ptr len) = accursedUnutterablePerformIO $ unsafeWithForeignPtr p
else cIsValidUtf8Safe p (fromIntegral len)
pure $ i /= 0

-- We import bytestring_is_valid_utf8 both unsafe and safe. For small inputs
-- we can use the unsafe version to get a bit more performance, but for large
-- inputs the safe version should be used to avoid GC synchronization pauses
-- in multithreaded contexts.

foreign import ccall unsafe "bytestring_is_valid_utf8" cIsValidUtf8
:: Ptr Word8 -> CSize -> IO CInt

foreign import ccall safe "bytestring_is_valid_utf8" cIsValidUtf8Safe
:: Ptr Word8 -> CSize -> IO CInt

-- | Break a string on a substring, returning a pair of the part of the
-- string prior to the match, and the rest of the string.
--
Expand Down
8 changes: 1 addition & 7 deletions Data/ByteString/Builder/ASCII.hs
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ import Data.ByteString.Builder.Internal (Builder)
import qualified Data.ByteString.Builder.Prim as P
import qualified Data.ByteString.Builder.Prim.Internal as P
import Data.ByteString.Builder.RealFloat (floatDec, doubleDec)
import Data.ByteString.Internal.Type (c_int_dec_padded9, c_long_long_int_dec_padded18)

import Foreign
import Foreign.C.Types
import Data.List.NonEmpty (NonEmpty(..))

------------------------------------------------------------------------------
Expand Down Expand Up @@ -311,12 +311,6 @@ integerDec i
(q,r) -> fromInteger q : fromInteger r : putB ns


foreign import ccall unsafe "static _hs_bytestring_int_dec_padded9"
c_int_dec_padded9 :: CInt -> Ptr Word8 -> IO ()

foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec_padded18"
c_long_long_int_dec_padded18 :: CLLong -> Ptr Word8 -> IO ()

{-# INLINE intDecPadded #-}
intDecPadded :: P.BoundedPrim Int
intDecPadded = P.liftFixedToBounded $ P.caseWordSize_32_64
Expand Down
20 changes: 1 addition & 19 deletions Data/ByteString/Builder/Prim/ASCII.hs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ module Data.ByteString.Builder.Prim.ASCII

) where

import Data.ByteString.Internal.Type
import Data.ByteString.Builder.Prim.Binary
import Data.ByteString.Builder.Prim.Internal
import Data.ByteString.Builder.Prim.Internal.Floating
Expand All @@ -86,7 +87,6 @@ import Data.ByteString.Utils.UnalignedWrite
import Data.Char (ord)

import Foreign
import Foreign.C.Types

-- | Encode the least 7-bits of a 'Char' using the ASCII encoding.
{-# INLINE char7 #-}
Expand All @@ -101,12 +101,6 @@ char7 = (\c -> fromIntegral $ ord c .&. 0x7f) >$< word8
-- Signed integers
------------------

foreign import ccall unsafe "static _hs_bytestring_int_dec" c_int_dec
:: CInt -> Ptr Word8 -> IO (Ptr Word8)

foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec" c_long_long_int_dec
:: CLLong -> Ptr Word8 -> IO (Ptr Word8)

{-# INLINE encodeIntDecimal #-}
encodeIntDecimal :: Integral a => Int -> BoundedPrim a
encodeIntDecimal bound = boundedPrim bound $ c_int_dec . fromIntegral
Expand Down Expand Up @@ -143,12 +137,6 @@ intDec = caseWordSize_32_64
-- Unsigned integers
--------------------

foreign import ccall unsafe "static _hs_bytestring_uint_dec" c_uint_dec
:: CUInt -> Ptr Word8 -> IO (Ptr Word8)

foreign import ccall unsafe "static _hs_bytestring_long_long_uint_dec" c_long_long_uint_dec
:: CULLong -> Ptr Word8 -> IO (Ptr Word8)

{-# INLINE encodeWordDecimal #-}
encodeWordDecimal :: Integral a => Int -> BoundedPrim a
encodeWordDecimal bound = boundedPrim bound $ c_uint_dec . fromIntegral
Expand Down Expand Up @@ -187,12 +175,6 @@ wordDec = caseWordSize_32_64
-- without lead
---------------

foreign import ccall unsafe "static _hs_bytestring_uint_hex" c_uint_hex
:: CUInt -> Ptr Word8 -> IO (Ptr Word8)

foreign import ccall unsafe "static _hs_bytestring_long_long_uint_hex" c_long_long_uint_hex
:: CULLong -> Ptr Word8 -> IO (Ptr Word8)

{-# INLINE encodeWordHex #-}
encodeWordHex :: forall a. (Storable a, Integral a) => BoundedPrim a
encodeWordHex =
Expand Down
23 changes: 17 additions & 6 deletions Data/ByteString/Builder/Prim/Internal/Base16.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE CPP #-}
-- |
-- Copyright : (c) 2011 Simon Meier
-- License : BSD3-style (see LICENSE)
Expand All @@ -22,23 +23,33 @@ module Data.ByteString.Builder.Prim.Internal.Base16 (
) where

import Foreign
import Foreign.C.Types
import GHC.Exts (Addr#, Ptr(..))
#if PURE_HASKELL
import qualified Data.ByteString.Internal.Pure as Pure
#else
import Foreign.C.Types
#endif

-- Creating the encoding table
------------------------------

-- | An encoding table for Base16 encoding.
data EncodingTable = EncodingTable Addr#

foreign import ccall "&hs_bytestring_lower_hex_table"
c_lower_hex_table :: Ptr CChar

-- | The encoding table for hexadecimal values with lower-case characters;
-- e.g., deadbeef.
lowerTable :: EncodingTable
lowerTable = case c_lower_hex_table of
Ptr p# -> EncodingTable p#
lowerTable =
#if PURE_HASKELL
case Pure.lower_hex_table of
Ptr p# -> EncodingTable p#
#else
case c_lower_hex_table of
Ptr p# -> EncodingTable p#

foreign import ccall "&hs_bytestring_lower_hex_table"
c_lower_hex_table :: Ptr CChar
#endif

-- | Encode an octet as 16bit word comprising both encoded nibbles ordered
-- according to the host endianness. Writing these 16bit to memory will write
Expand Down
Loading

0 comments on commit ec7c738

Please sign in to comment.