diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs index 5fc8222a7..221ffa598 100644 --- a/Data/ByteString/Internal.hs +++ b/Data/ByteString/Internal.hs @@ -131,7 +131,7 @@ import Data.Data (Data(..), mkNoRepType) import GHC.Base (nullAddr#,realWorld#,unsafeChr) import GHC.Exts (IsList(..)) -import GHC.CString (unpackCString#) +import GHC.CString (unpackCString#, unpackCStringUtf8#) import GHC.Prim (Addr#) #define TIMES_INT_2_AVAILABLE MIN_VERSION_ghc_prim(0,7,0) @@ -278,7 +278,25 @@ instance IsList ByteString where -- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� instance IsString ByteString where {-# INLINE fromString #-} - fromString = packChars + fromString = packCharsWrapper + +packCharsWrapper :: String -> ByteString +packCharsWrapper = packCharsSafe +{-# NOINLINE packCharsWrapper #-} + +{-# RULES +"ByteString packCharsWrapper/ASCII" forall s . + packCharsWrapper (unpackCString# s) = packChars (unpackCString# s) +"ByteString packCharsWrapper/Unicode" forall s . + packCharsWrapper (unpackCStringUtf8# s) = packCharsSafe (unpackCStringUtf8# s) +"ByteString packCharsWrapper/naked" [0] + packCharsWrapper = error "instance IsString StrictByteString can be applied only to statically-known strings" +#-} + +packCharsSafe :: String -> ByteString +packCharsSafe xs + | all (< '\256') xs = packChars xs + | otherwise = error $ "instance IsString StrictByteString: detected characters outside of Latin1 range in " ++ xs instance Data ByteString where gfoldl f z txt = z packBytes `f` unpackBytes txt diff --git a/Data/ByteString/Lazy/Internal.hs b/Data/ByteString/Lazy/Internal.hs index 63dc3f670..3f8d3ec08 100644 --- a/Data/ByteString/Lazy/Internal.hs +++ b/Data/ByteString/Lazy/Internal.hs @@ -2,6 +2,7 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveLift #-} +{-# LANGUAGE MagicHash #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE Unsafe #-} {-# OPTIONS_HADDOCK not-home #-} @@ -68,6 +69,7 @@ import Data.String (IsString(..)) import Data.Typeable (Typeable) import Data.Data (Data(..), mkNoRepType) +import GHC.CString (unpackCString#, unpackCStringUtf8#) import GHC.Exts (IsList(..)) import qualified Language.Haskell.TH.Syntax as TH @@ -123,7 +125,26 @@ instance IsList ByteString where -- | Beware: 'fromString' truncates multi-byte characters to octets. -- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� instance IsString ByteString where - fromString = packChars + fromString = packCharsWrapper + +packCharsWrapper :: String -> ByteString +packCharsWrapper = packCharsSafe +{-# NOINLINE packCharsWrapper #-} + +{-# RULES +"ByteString packCharsWrapper/ASCII" forall s . + packCharsWrapper (unpackCString# s) = packChars (unpackCString# s) +"ByteString packCharsWrapper/Unicode" forall s . + packCharsWrapper (unpackCStringUtf8# s) = packCharsSafe (unpackCStringUtf8# s) +"ByteString packCharsWrapper/naked" [0] + packCharsWrapper = error "instance IsString LazyByteString can be applied only to statically-known strings" +#-} + +packCharsSafe :: String -> ByteString +packCharsSafe xs + | Prelude.all (< '\256') xs = packChars xs + | otherwise = error $ "instance IsString LazyByteString: detected characters outside of Latin1 range in " ++ xs + instance Data ByteString where gfoldl f z txt = z packBytes `f` unpackBytes txt diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 5cb3a20aa..7e94cb6a7 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -208,6 +208,10 @@ import Foreign.Marshal.Alloc ( allocaBytes ) import Foreign.Storable ( pokeByteOff ) +import GHC.CString + ( unpackCString# + , unpackCStringUtf8# + ) import GHC.Exts ( Int(I#), Int#, Ptr(Ptr), Addr#, Char(C#) , State#, RealWorld @@ -264,6 +268,7 @@ import Prelude , not , snd ) +import qualified Prelude import qualified Data.ByteString.Internal as BS @@ -349,7 +354,25 @@ instance GHC.Exts.IsList ShortByteString where -- | Beware: 'fromString' truncates multi-byte characters to octets. -- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� instance IsString ShortByteString where - fromString = packChars + fromString = packCharsWrapper + +packCharsWrapper :: String -> ShortByteString +packCharsWrapper = packCharsSafe +{-# NOINLINE packCharsWrapper #-} + +{-# RULES +"ByteString packCharsWrapper/ASCII" forall s . + packCharsWrapper (unpackCString# s) = packChars (unpackCString# s) +"ByteString packCharsWrapper/Unicode" forall s . + packCharsWrapper (unpackCStringUtf8# s) = packCharsSafe (unpackCStringUtf8# s) +"ByteString packCharsWrapper/naked" [0] + packCharsWrapper = error "instance IsString ShortByteString can be applied only to statically-known strings" +#-} + +packCharsSafe :: String -> ShortByteString +packCharsSafe xs + | Prelude.all (< '\256') xs = packChars xs + | otherwise = error $ "instance IsString ShortByteString: detected characters outside of Latin1 range in " ++ xs instance Data ShortByteString where gfoldl f z txt = z packBytes `f` unpackBytes txt diff --git a/tests/Properties.hs b/tests/Properties.hs index 15cb5d724..2f9df9463 100644 --- a/tests/Properties.hs +++ b/tests/Properties.hs @@ -567,8 +567,8 @@ prop_short_mappend xs ys = prop_short_mconcat xss = mconcat xss == Short.unpack (mconcat (map Short.pack xss)) -prop_short_fromString s = - fromString s == Short.fromShort (fromString s) +prop_short_fromString xs = expectFailure $ + length (Short.unpack (fromString xs)) `seq` () prop_short_show xs = show (Short.pack xs) == show (map P.w2c xs) diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs index 757059701..50c685dd3 100644 --- a/tests/Properties/ByteString.hs +++ b/tests/Properties/ByteString.hs @@ -596,8 +596,8 @@ tests = \n x -> B.indexMaybe x (fromIntegral (n :: Int)) === x B.!? (fromIntegral n) #ifdef BYTESTRING_CHAR8 - , testProperty "isString" $ - \x -> x === fromString (B.unpack x) + , testProperty "isString" $ expectFailure $ + \xs -> length (B.unpack (fromString xs)) `seq` () , testRdInt @Int "readInt" , testRdInt @Int8 "readInt8" , testRdInt @Int16 "readInt16" @@ -651,8 +651,6 @@ tests = #ifndef BYTESTRING_CHAR8 -- issue #393 - , testProperty "fromString non-char8" $ - \s -> fromString s == B.pack (map (fromIntegral . ord :: Char -> Word8) s) , testProperty "fromString literal" $ fromString "\0\1\2\3\4" == B.pack [0,1,2,3,4] #endif