Skip to content

Commit

Permalink
Rework IsString instance to avoid pitfalls
Browse files Browse the repository at this point in the history
* In ghci IsString.fromString = packCharsSafe. This is a partial function,
  throwing erros on characters out of Latin1 range, but partiality is not a big
  deal for interactive sessions.
* For compiled programs:
  * For ASCII literals IsString.fromString = packChars = unsafePackLiteral,
    no runtime overhead and Core remains exactly the same as before.
  * For Unicode literals (and literals with '\0') IsString.fromString = packCharsSafe.
    This incurs runtime overhead, but such thunk is evaluated only once in its life.
  * For dynamic strings such as user input IsString.fromString = error.
    This is to make runtime behaviour more robust (if it crashes, it crashes for
    any input) and avoids a danger of routines, which were tested only for ASCII
    inputs but suddenly receive Unicode and silently lose data.
  • Loading branch information
Bodigrim committed Jun 21, 2022
1 parent 4e62154 commit 6b1714a
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 10 deletions.
22 changes: 20 additions & 2 deletions Data/ByteString/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ import Data.Data (Data(..), mkNoRepType)

import GHC.Base (nullAddr#,realWorld#,unsafeChr)
import GHC.Exts (IsList(..))
import GHC.CString (unpackCString#)
import GHC.CString (unpackCString#, unpackCStringUtf8#)
import GHC.Prim (Addr#)

#define TIMES_INT_2_AVAILABLE MIN_VERSION_ghc_prim(0,7,0)
Expand Down Expand Up @@ -278,7 +278,25 @@ instance IsList ByteString where
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ByteString where
{-# INLINE fromString #-}
fromString = packChars
fromString = packCharsWrapper

packCharsWrapper :: String -> ByteString
packCharsWrapper = packCharsSafe
{-# NOINLINE packCharsWrapper #-}

{-# RULES
"ByteString packCharsWrapper/ASCII" forall s .
packCharsWrapper (unpackCString# s) = packChars (unpackCString# s)
"ByteString packCharsWrapper/Unicode" forall s .
packCharsWrapper (unpackCStringUtf8# s) = packCharsSafe (unpackCStringUtf8# s)
"ByteString packCharsWrapper/naked" [0]
packCharsWrapper = error "instance IsString StrictByteString can be applied only to statically-known strings"
#-}

packCharsSafe :: String -> ByteString
packCharsSafe xs
| all (< '\256') xs = packChars xs
| otherwise = error $ "instance IsString StrictByteString: detected characters outside of Latin1 range in " ++ xs

instance Data ByteString where
gfoldl f z txt = z packBytes `f` unpackBytes txt
Expand Down
23 changes: 22 additions & 1 deletion Data/ByteString/Lazy/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveLift #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE Unsafe #-}
{-# OPTIONS_HADDOCK not-home #-}
Expand Down Expand Up @@ -68,6 +69,7 @@ import Data.String (IsString(..))
import Data.Typeable (Typeable)
import Data.Data (Data(..), mkNoRepType)

import GHC.CString (unpackCString#, unpackCStringUtf8#)
import GHC.Exts (IsList(..))

import qualified Language.Haskell.TH.Syntax as TH
Expand Down Expand Up @@ -123,7 +125,26 @@ instance IsList ByteString where
-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ByteString where
fromString = packChars
fromString = packCharsWrapper

packCharsWrapper :: String -> ByteString
packCharsWrapper = packCharsSafe
{-# NOINLINE packCharsWrapper #-}

{-# RULES
"ByteString packCharsWrapper/ASCII" forall s .
packCharsWrapper (unpackCString# s) = packChars (unpackCString# s)
"ByteString packCharsWrapper/Unicode" forall s .
packCharsWrapper (unpackCStringUtf8# s) = packCharsSafe (unpackCStringUtf8# s)
"ByteString packCharsWrapper/naked" [0]
packCharsWrapper = error "instance IsString LazyByteString can be applied only to statically-known strings"
#-}

packCharsSafe :: String -> ByteString
packCharsSafe xs
| Prelude.all (< '\256') xs = packChars xs
| otherwise = error $ "instance IsString LazyByteString: detected characters outside of Latin1 range in " ++ xs


instance Data ByteString where
gfoldl f z txt = z packBytes `f` unpackBytes txt
Expand Down
25 changes: 24 additions & 1 deletion Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ import Foreign.Marshal.Alloc
( allocaBytes )
import Foreign.Storable
( pokeByteOff )
import GHC.CString
( unpackCString#
, unpackCStringUtf8#
)
import GHC.Exts
( Int(I#), Int#, Ptr(Ptr), Addr#, Char(C#)
, State#, RealWorld
Expand Down Expand Up @@ -264,6 +268,7 @@ import Prelude
, not
, snd
)
import qualified Prelude

import qualified Data.ByteString.Internal as BS

Expand Down Expand Up @@ -349,7 +354,25 @@ instance GHC.Exts.IsList ShortByteString where
-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ShortByteString where
fromString = packChars
fromString = packCharsWrapper

packCharsWrapper :: String -> ShortByteString
packCharsWrapper = packCharsSafe
{-# NOINLINE packCharsWrapper #-}

{-# RULES
"ByteString packCharsWrapper/ASCII" forall s .
packCharsWrapper (unpackCString# s) = packChars (unpackCString# s)
"ByteString packCharsWrapper/Unicode" forall s .
packCharsWrapper (unpackCStringUtf8# s) = packCharsSafe (unpackCStringUtf8# s)
"ByteString packCharsWrapper/naked" [0]
packCharsWrapper = error "instance IsString ShortByteString can be applied only to statically-known strings"
#-}

packCharsSafe :: String -> ShortByteString
packCharsSafe xs
| Prelude.all (< '\256') xs = packChars xs
| otherwise = error $ "instance IsString ShortByteString: detected characters outside of Latin1 range in " ++ xs

instance Data ShortByteString where
gfoldl f z txt = z packBytes `f` unpackBytes txt
Expand Down
4 changes: 2 additions & 2 deletions tests/Properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -567,8 +567,8 @@ prop_short_mappend xs ys =
prop_short_mconcat xss =
mconcat xss == Short.unpack (mconcat (map Short.pack xss))

prop_short_fromString s =
fromString s == Short.fromShort (fromString s)
prop_short_fromString xs = expectFailure $
length (Short.unpack (fromString xs)) `seq` ()

prop_short_show xs =
show (Short.pack xs) == show (map P.w2c xs)
Expand Down
6 changes: 2 additions & 4 deletions tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -596,8 +596,8 @@ tests =
\n x -> B.indexMaybe x (fromIntegral (n :: Int)) === x B.!? (fromIntegral n)

#ifdef BYTESTRING_CHAR8
, testProperty "isString" $
\x -> x === fromString (B.unpack x)
, testProperty "isString" $ expectFailure $
\xs -> length (B.unpack (fromString xs)) `seq` ()
, testRdInt @Int "readInt"
, testRdInt @Int8 "readInt8"
, testRdInt @Int16 "readInt16"
Expand Down Expand Up @@ -651,8 +651,6 @@ tests =

#ifndef BYTESTRING_CHAR8
-- issue #393
, testProperty "fromString non-char8" $
\s -> fromString s == B.pack (map (fromIntegral . ord :: Char -> Word8) s)
, testProperty "fromString literal" $
fromString "\0\1\2\3\4" == B.pack [0,1,2,3,4]
#endif
Expand Down

0 comments on commit 6b1714a

Please sign in to comment.