diff --git a/Generate.hs b/Generate.hs index 1cfde169..721515ed 100755 --- a/Generate.hs +++ b/Generate.hs @@ -34,11 +34,11 @@ main = do ,"import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )" ,"import GHC.IO.Encoding.UTF16 ( mkUTF16le )" ,"import GHC.IO.Encoding.UTF8 ( mkUTF8 )" - ,"import System.OsString.Internal.Types" - ,"import System.OsPath.Encoding.Internal" + ,"import System.OsString.Internal.Types.Hidden" + ,"import System.OsPath.Encoding.Internal.Hidden" ,"import qualified Data.Char as C" - ,"import qualified System.OsPath.Data.ByteString.Short as SBS" - ,"import qualified System.OsPath.Data.ByteString.Short.Word16 as SBS16" + ,"import qualified System.OsPath.Data.ByteString.Short.Hidden as SBS" + ,"import qualified System.OsPath.Data.ByteString.Short.Word16.Hidden as SBS16" ,"import qualified System.FilePath.Windows as W" ,"import qualified System.FilePath.Posix as P" #ifdef GHC_MAKE diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index df454328..7ad9f301 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -138,8 +138,8 @@ import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import qualified GHC.Foreign as GHC import Data.Word ( Word16 ) -import System.OsPath.Data.ByteString.Short.Word16 -import System.OsPath.Data.ByteString.Short ( packCStringLen ) +import System.OsPath.Data.ByteString.Short.Word16.Hidden +import System.OsPath.Data.ByteString.Short.Hidden ( packCStringLen ) #define CHAR Word16 #define STRING ShortByteString #define FILEPATH ShortByteString @@ -148,7 +148,7 @@ import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import qualified GHC.Foreign as GHC import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import Data.Word ( Word8 ) -import System.OsPath.Data.ByteString.Short +import System.OsPath.Data.ByteString.Short.Hidden #define CHAR Word8 #define STRING ShortByteString #define FILEPATH ShortByteString diff --git a/System/OsPath/Common.hs b/System/OsPath/Common.hs index 0af0ed60..8f7ab4a6 100644 --- a/System/OsPath/Common.hs +++ b/System/OsPath/Common.hs @@ -109,7 +109,7 @@ where #ifdef WINDOWS import System.OsPath.Types -import System.OsString.Windows as PS +import System.OsString.Windows.Hidden as PS ( unsafeFromChar , toChar , decodeUtf @@ -141,7 +141,7 @@ import Language.Haskell.TH.Syntax import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import System.OsPath.Types -import System.OsString.Posix as PS +import System.OsString.Posix.Hidden as PS ( unsafeFromChar , toChar , decodeUtf @@ -171,7 +171,7 @@ import System.OsPath.Internal as PS ) import System.OsPath.Types ( OsPath ) -import System.OsString ( unsafeFromChar, toChar ) +import System.OsString.Internal.Hidden ( unsafeFromChar, toChar ) #if defined(mingw32_HOST_OS) || defined(__MINGW32__) import qualified System.OsPath.Windows as C @@ -182,7 +182,7 @@ import qualified System.OsPath.Posix as C import Data.Bifunctor ( bimap ) #endif -import System.OsString.Internal.Types +import System.OsString.Internal.Types.Hidden ------------------------ diff --git a/System/OsPath/Data/ByteString/Short.hs b/System/OsPath/Data/ByteString/Short.hs index c8265bd8..e850681f 100644 --- a/System/OsPath/Data/ByteString/Short.hs +++ b/System/OsPath/Data/ByteString/Short.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NoImplicitPrelude #-} -- | -- Module : System.OsPath.Data.ByteString.Short @@ -176,18 +175,4 @@ module System.OsPath.Data.ByteString.Short {-# DEPRECATED "Use System.OsString.D useAsCStringLen, ) where -import Data.ByteString.Short.Internal -import System.OsPath.Data.ByteString.Short.Internal - -import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise) -import Data.Word (Word8) - -uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString) -uncons2 = \sbs -> - let l = length sbs - nl = l - 2 - in if | l <= 1 -> Nothing - | otherwise -> let h = indexWord8Array (asBA sbs) 0 - h' = indexWord8Array (asBA sbs) 1 - t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl - in Just (h, h', t) +import System.OsPath.Data.ByteString.Short.Hidden diff --git a/System/OsPath/Data/ByteString/Short/Hidden.hs b/System/OsPath/Data/ByteString/Short/Hidden.hs new file mode 100644 index 00000000..c5c49f5f --- /dev/null +++ b/System/OsPath/Data/ByteString/Short/Hidden.hs @@ -0,0 +1,193 @@ +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE NoImplicitPrelude #-} +-- | +-- Module : System.OsPath.Data.ByteString.Short.Hidden +-- Copyright : (c) Duncan Coutts 2012-2013, Julian Ospald 2022 +-- License : BSD-style +-- +-- Maintainer : hasufell@posteo.de +-- Stability : stable +-- Portability : ghc only +-- +-- A compact representation suitable for storing short byte strings in memory. +-- +-- In typical use cases it can be imported alongside "Data.ByteString", e.g. +-- +-- > import qualified Data.ByteString as B +-- > import qualified Data.ByteString.Short as B +-- > (ShortByteString, toShort, fromShort) +-- +-- Other 'ShortByteString' operations clash with "Data.ByteString" or "Prelude" +-- functions however, so they should be imported @qualified@ with a different +-- alias e.g. +-- +-- > import qualified Data.ByteString.Short as B.Short +-- +module System.OsPath.Data.ByteString.Short.Hidden + + ( + + -- * The @ShortByteString@ type + + ShortByteString(..), + + -- ** Memory overhead + -- | With GHC, the memory overheads are as follows, expressed in words and + -- in bytes (words are 4 and 8 bytes on 32 or 64bit machines respectively). + -- + -- * 'B.ByteString' unshared: 8 words; 32 or 64 bytes. + -- + -- * 'B.ByteString' shared substring: 4 words; 16 or 32 bytes. + -- + -- * 'ShortByteString': 4 words; 16 or 32 bytes. + -- + -- For the string data itself, both 'ShortByteString' and 'B.ByteString' use + -- one byte per element, rounded up to the nearest word. For example, + -- including the overheads, a length 10 'ShortByteString' would take + -- @16 + 12 = 28@ bytes on a 32bit platform and @32 + 16 = 48@ bytes on a + -- 64bit platform. + -- + -- These overheads can all be reduced by 1 word (4 or 8 bytes) when the + -- 'ShortByteString' or 'B.ByteString' is unpacked into another constructor. + -- + -- For example: + -- + -- > data ThingId = ThingId {-# UNPACK #-} !Int + -- > {-# UNPACK #-} !ShortByteString + -- + -- This will take @1 + 1 + 3@ words (the @ThingId@ constructor + + -- unpacked @Int@ + unpacked @ShortByteString@), plus the words for the + -- string data. + + -- ** Heap fragmentation + -- | With GHC, the 'B.ByteString' representation uses /pinned/ memory, + -- meaning it cannot be moved by the GC. This is usually the right thing to + -- do for larger strings, but for small strings using pinned memory can + -- lead to heap fragmentation which wastes space. The 'ShortByteString' + -- type (and the @Text@ type from the @text@ package) use /unpinned/ memory + -- so they do not contribute to heap fragmentation. In addition, with GHC, + -- small unpinned strings are allocated in the same way as normal heap + -- allocations, rather than in a separate pinned area. + + -- * Introducing and eliminating 'ShortByteString's + empty, + singleton, + pack, + unpack, + fromShort, + toShort, + + -- * Basic interface + snoc, + cons, + append, + last, + tail, + uncons, + uncons2, + head, + init, + unsnoc, + null, + length, + + -- * Transforming ShortByteStrings + map, + reverse, + intercalate, + + -- * Reducing 'ShortByteString's (folds) + foldl, + foldl', + foldl1, + foldl1', + + foldr, + foldr', + foldr1, + foldr1', + + -- ** Special folds + all, + any, + concat, + + -- ** Generating and unfolding ByteStrings + replicate, + unfoldr, + unfoldrN, + + -- * Substrings + + -- ** Breaking strings + take, + takeEnd, + takeWhileEnd, + takeWhile, + drop, + dropEnd, + dropWhile, + dropWhileEnd, + breakEnd, + break, + span, + spanEnd, + splitAt, + split, + splitWith, + stripSuffix, + stripPrefix, + + -- * Predicates + isInfixOf, + isPrefixOf, + isSuffixOf, + + -- ** Search for arbitrary substrings + breakSubstring, + + -- * Searching ShortByteStrings + + -- ** Searching by equality + elem, + + -- ** Searching with a predicate + find, + filter, + partition, + + -- * Indexing ShortByteStrings + index, + indexMaybe, + (!?), + elemIndex, + elemIndices, + count, + findIndex, + findIndices, + + -- * Low level conversions + -- ** Packing 'Foreign.C.String.CString's and pointers + packCString, + packCStringLen, + + -- ** Using ShortByteStrings as 'Foreign.C.String.CString's + useAsCString, + useAsCStringLen, + ) where + +import Data.ByteString.Short.Internal +import System.OsPath.Data.ByteString.Short.Internal.Hidden + +import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise) +import Data.Word (Word8) + +uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString) +uncons2 = \sbs -> + let l = length sbs + nl = l - 2 + in if | l <= 1 -> Nothing + | otherwise -> let h = indexWord8Array (asBA sbs) 0 + h' = indexWord8Array (asBA sbs) 1 + t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl + in Just (h, h', t) diff --git a/System/OsPath/Data/ByteString/Short/Internal.hs b/System/OsPath/Data/ByteString/Short/Internal.hs index faf1408e..10c1e5a5 100644 --- a/System/OsPath/Data/ByteString/Short/Internal.hs +++ b/System/OsPath/Data/ByteString/Short/Internal.hs @@ -1,11 +1,3 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE UnboxedTuples #-} -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE UnliftedFFITypes #-} - -- | -- Module : System.OsPath.Data.ByteString.Short.Internal -- Copyright : © 2022 Julian Ospald @@ -17,465 +9,9 @@ -- -- Internal low-level utilities mostly for 'System.OsPath.Data.ByteString.Short.Word16', -- such as byte-array operations and other stuff not meant to be exported from Word16 module. -module System.OsPath.Data.ByteString.Short.Internal {-# DEPRECATED "Use System.OsString.Data.ByteString.Short.Internal from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} where - -import Control.Monad.ST -import Control.Exception (assert, throwIO) -import Data.Bits (Bits(..)) -import Data.ByteString.Short.Internal (ShortByteString(..), length) -#if !MIN_VERSION_base(4,11,0) -import Data.Semigroup - ( Semigroup((<>)) ) -import Foreign.C.Types - ( CSize(..) - , CInt(..) +module System.OsPath.Data.ByteString.Short.Internal {-# DEPRECATED "Use System.OsString.Data.ByteString.Short.Internal from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} + ( module System.OsPath.Data.ByteString.Short.Internal.Hidden ) -import Data.ByteString.Internal - ( accursedUnutterablePerformIO - ) -#endif -#if !MIN_VERSION_bytestring(0,10,9) -import Foreign.Marshal.Alloc (allocaBytes) -import Foreign.C.String ( CString, CStringLen ) -import Foreign.C.Types ( CSize(..) ) -import Foreign.Storable (pokeByteOff) -#endif -import Foreign.Marshal.Array (withArray0, peekArray0, newArray0, withArrayLen, peekArray) -import GHC.Exts -import GHC.Word -import GHC.ST - ( ST (ST) ) -import GHC.Stack ( HasCallStack ) -import Prelude hiding - ( length ) - -import qualified Data.ByteString.Short.Internal as BS -import qualified Data.Char as C -import qualified Data.List as List - - -_nul :: Word16 -_nul = 0x00 - -isSpace :: Word16 -> Bool -isSpace = C.isSpace . word16ToChar - --- | Total conversion to char. -word16ToChar :: Word16 -> Char -word16ToChar = C.chr . fromIntegral - -create :: Int -> (forall s. MBA s -> ST s ()) -> ShortByteString -create len fill = - runST $ do - mba <- newByteArray len - fill mba - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#) -{-# INLINE create #-} - - -asBA :: ShortByteString -> BA -asBA (SBS ba#) = BA# ba# - - - -data BA = BA# ByteArray# -data MBA s = MBA# (MutableByteArray# s) - - -newPinnedByteArray :: Int -> ST s (MBA s) -newPinnedByteArray (I# len#) = - ST $ \s -> case newPinnedByteArray# len# s of - (# s', mba# #) -> (# s', MBA# mba# #) - -newByteArray :: Int -> ST s (MBA s) -newByteArray (I# len#) = - ST $ \s -> case newByteArray# len# s of - (# s', mba# #) -> (# s', MBA# mba# #) - -copyByteArray :: BA -> Int -> MBA s -> Int -> Int -> ST s () -copyByteArray (BA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = - ST $ \s -> case copyByteArray# src# src_off# dst# dst_off# len# s of - s' -> (# s', () #) - -unsafeFreezeByteArray :: MBA s -> ST s BA -unsafeFreezeByteArray (MBA# mba#) = - ST $ \s -> case unsafeFreezeByteArray# mba# s of - (# s', ba# #) -> (# s', BA# ba# #) - -copyAddrToByteArray :: Ptr a -> MBA RealWorld -> Int -> Int -> ST RealWorld () -copyAddrToByteArray (Ptr src#) (MBA# dst#) (I# dst_off#) (I# len#) = - ST $ \s -> case copyAddrToByteArray# src# dst# dst_off# len# s of - s' -> (# s', () #) - - --- this is a copy-paste from bytestring -#if !MIN_VERSION_bytestring(0,10,9) ------------------------------------------------------------------------- --- Primop replacements - --- --------------------------------------------------------------------- --- --- Standard C functions --- - -foreign import ccall unsafe "string.h strlen" c_strlen - :: CString -> IO CSize - - --- --------------------------------------------------------------------- --- --- Uses our C code --- - --- | /O(n)./ Construct a new @ShortByteString@ from a @CString@. The --- resulting @ShortByteString@ is an immutable copy of the original --- @CString@, and is managed on the Haskell heap. The original --- @CString@ must be null terminated. --- --- @since 0.10.10.0 -packCString :: CString -> IO ShortByteString -packCString cstr = do - len <- c_strlen cstr - packCStringLen (cstr, fromIntegral len) - --- | /O(n)./ Construct a new @ShortByteString@ from a @CStringLen@. The --- resulting @ShortByteString@ is an immutable copy of the original @CStringLen@. --- The @ShortByteString@ is a normal Haskell value and will be managed on the --- Haskell heap. --- --- @since 0.10.10.0 -packCStringLen :: CStringLen -> IO ShortByteString -packCStringLen (cstr, len) | len >= 0 = BS.createFromPtr cstr len -packCStringLen (_, len) = - moduleErrorIO "packCStringLen" ("negative length: " ++ show len) - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a --- null-terminated @CString@. The @CString@ is a copy and will be freed --- automatically; it must not be stored or used after the --- subcomputation finishes. --- --- @since 0.10.10.0 -useAsCString :: ShortByteString -> (CString -> IO a) -> IO a -useAsCString bs action = - allocaBytes (l+1) $ \buf -> do - BS.copyToPtr bs 0 buf (fromIntegral l) - pokeByteOff buf l (0::Word8) - action buf - where l = length bs - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a 'CStringLen'. --- As for 'useAsCString' this function makes a copy of the original @ShortByteString@. --- It must not be stored or used after the subcomputation finishes. --- --- Beware that this function does not add a terminating @\NUL@ byte at the end of 'CStringLen'. --- If you need to construct a pointer to a null-terminated sequence, use 'useAsCString' --- (and measure length independently if desired). --- --- @since 0.10.10.0 -useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a -useAsCStringLen bs action = - allocaBytes l $ \buf -> do - BS.copyToPtr bs 0 buf (fromIntegral l) - action (buf, l) - where l = length bs - - -#endif - - --- | /O(n)./ Construct a new @ShortByteString@ from a @CWString@. The --- resulting @ShortByteString@ is an immutable copy of the original --- @CWString@, and is managed on the Haskell heap. The original --- @CWString@ must be null terminated. --- --- @since 0.10.10.0 -packCWString :: Ptr Word16 -> IO ShortByteString -packCWString cwstr = do - cs <- peekArray0 _nul cwstr - return (packWord16 cs) - --- | /O(n)./ Construct a new @ShortByteString@ from a @CWStringLen@. The --- resulting @ShortByteString@ is an immutable copy of the original @CWStringLen@. --- The @ShortByteString@ is a normal Haskell value and will be managed on the --- Haskell heap. --- --- @since 0.10.10.0 -packCWStringLen :: (Ptr Word16, Int) -> IO ShortByteString -packCWStringLen (cp, len) = do - cs <- peekArray len cp - return (packWord16 cs) - - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a --- null-terminated @CWString@. The @CWString@ is a copy and will be freed --- automatically; it must not be stored or used after the --- subcomputation finishes. --- --- @since 0.10.10.0 -useAsCWString :: ShortByteString -> (Ptr Word16 -> IO a) -> IO a -useAsCWString = withArray0 _nul . unpackWord16 - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. --- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. --- It must not be stored or used after the subcomputation finishes. --- --- @since 0.10.10.0 -useAsCWStringLen :: ShortByteString -> ((Ptr Word16, Int) -> IO a) -> IO a -useAsCWStringLen bs action = withArrayLen (unpackWord16 bs) $ \ len ptr -> action (ptr, len) - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. --- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. --- It must not be stored or used after the subcomputation finishes. --- --- @since 0.10.10.0 -newCWString :: ShortByteString -> IO (Ptr Word16) -newCWString = newArray0 _nul . unpackWord16 - - - - - -- --------------------------------------------------------------------- --- Internal utilities - -moduleErrorIO :: String -> String -> IO a -moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg -{-# NOINLINE moduleErrorIO #-} - -moduleErrorMsg :: String -> String -> String -moduleErrorMsg fun msg = "System.OsPath.Data.ByteString.Short." ++ fun ++ ':':' ':msg - -packWord16 :: [Word16] -> ShortByteString -packWord16 cs = packLenWord16 (List.length cs) cs - -packLenWord16 :: Int -> [Word16] -> ShortByteString -packLenWord16 len ws0 = - create (len * 2) (\mba -> go mba 0 ws0) - where - go :: MBA s -> Int -> [Word16] -> ST s () - go !_ !_ [] = return () - go !mba !i (w:ws) = do - writeWord16Array mba i w - go mba (i+2) ws - - -unpackWord16 :: ShortByteString -> [Word16] -unpackWord16 sbs = go len [] - where - len = length sbs - go !i !acc - | i < 1 = acc - | otherwise = let !w = indexWord16Array (asBA sbs) (i - 2) - in go (i - 2) (w:acc) - -packWord16Rev :: [Word16] -> ShortByteString -packWord16Rev cs = packLenWord16Rev (List.length cs * 2) cs - -packLenWord16Rev :: Int -> [Word16] -> ShortByteString -packLenWord16Rev len ws0 = - create len (\mba -> go mba len ws0) - where - go :: MBA s -> Int -> [Word16] -> ST s () - go !_ !_ [] = return () - go !mba !i (w:ws) = do - writeWord16Array mba (i - 2) w - go mba (i - 2) ws - - --- | This isn't strictly Word16 array write. Instead it's two consecutive Word8 array --- writes to avoid endianness issues due to primops doing automatic alignment based --- on host platform. We want to always write LE to the byte array. -writeWord16Array :: MBA s - -> Int -- ^ Word8 index (not Word16) - -> Word16 - -> ST s () -writeWord16Array (MBA# mba#) (I# i#) (W16# w#) = - case encodeWord16LE# w# of - (# lsb#, msb# #) -> - ST (\s -> case writeWord8Array# mba# i# lsb# s of - s' -> (# s', () #)) >> - ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of - s' -> (# s', () #)) - -indexWord8Array :: BA - -> Int -- ^ Word8 index - -> Word8 -indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#) - --- | This isn't strictly Word16 array read. Instead it's two Word8 array reads --- to avoid endianness issues due to primops doing automatic alignment based --- on host platform. We expect the byte array to be LE always. -indexWord16Array :: BA - -> Int -- ^ Word8 index (not Word16) - -> Word16 -indexWord16Array ba i = fromIntegral lsb .|. (fromIntegral msb `shiftL` 8) - where - lsb = indexWord8Array ba i - msb = indexWord8Array ba (i + 1) - -#if !MIN_VERSION_base(4,16,0) - -encodeWord16LE# :: Word# -- ^ Word16 - -> (# Word#, Word# #) -- ^ Word8 (LSB, MSB) -encodeWord16LE# x# = (# x# `and#` int2Word# 0xff# - , x# `and#` int2Word# 0xff00# `shiftRL#` 8# #) - -decodeWord16LE# :: (# Word#, Word# #) -- ^ Word8 (LSB, MSB) - -> Word# -- ^ Word16 -decodeWord16LE# (# lsb#, msb# #) = msb# `shiftL#` 8# `or#` lsb# - -#else - -encodeWord16LE# :: Word16# -- ^ Word16 - -> (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) -encodeWord16LE# x# = (# word16ToWord8# x# - , word16ToWord8# (x# `uncheckedShiftRLWord16#` 8#) #) where - word16ToWord8# y = wordToWord8# (word16ToWord# y) - -decodeWord16LE# :: (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) - -> Word16# -- ^ Word16 -decodeWord16LE# (# lsb#, msb# #) = ((word8ToWord16# msb# `uncheckedShiftLWord16#` 8#) `orWord16#` word8ToWord16# lsb#) - where - word8ToWord16# y = wordToWord16# (word8ToWord# y) - -#endif - -setByteArray :: MBA s -> Int -> Int -> Int -> ST s () -setByteArray (MBA# dst#) (I# off#) (I# len#) (I# c#) = - ST $ \s -> case setByteArray# dst# off# len# c# s of - s' -> (# s', () #) - -copyMutableByteArray :: MBA s -> Int -> MBA s -> Int -> Int -> ST s () -copyMutableByteArray (MBA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = - ST $ \s -> case copyMutableByteArray# src# src_off# dst# dst_off# len# s of - s' -> (# s', () #) - --- | Given the maximum size needed and a function to make the contents --- of a ShortByteString, createAndTrim makes the 'ShortByteString'. --- The generating function is required to return the actual final size --- (<= the maximum size) and the result value. The resulting byte array --- is realloced to this size. -createAndTrim :: Int -> (forall s. MBA s -> ST s (Int, a)) -> (ShortByteString, a) -createAndTrim l fill = - runST $ do - mba <- newByteArray l - (l', res) <- fill mba - if assert (l' <= l) $ l' >= l - then do - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#, res) - else do - mba2 <- newByteArray l' - copyMutableByteArray mba 0 mba2 0 l' - BA# ba# <- unsafeFreezeByteArray mba2 - return (SBS ba#, res) -{-# INLINE createAndTrim #-} - -createAndTrim' :: Int -> (forall s. MBA s -> ST s Int) -> ShortByteString -createAndTrim' l fill = - runST $ do - mba <- newByteArray l - l' <- fill mba - if assert (l' <= l) $ l' >= l - then do - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#) - else do - mba2 <- newByteArray l' - copyMutableByteArray mba 0 mba2 0 l' - BA# ba# <- unsafeFreezeByteArray mba2 - return (SBS ba#) -{-# INLINE createAndTrim' #-} - -createAndTrim'' :: Int -> (forall s. MBA s -> MBA s -> ST s (Int, Int)) -> (ShortByteString, ShortByteString) -createAndTrim'' l fill = - runST $ do - mba1 <- newByteArray l - mba2 <- newByteArray l - (l1, l2) <- fill mba1 mba2 - sbs1 <- freeze' l1 mba1 - sbs2 <- freeze' l2 mba2 - pure (sbs1, sbs2) - where - freeze' :: Int -> MBA s -> ST s ShortByteString - freeze' l' mba = - if assert (l' <= l) $ l' >= l - then do - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#) - else do - mba2 <- newByteArray l' - copyMutableByteArray mba 0 mba2 0 l' - BA# ba# <- unsafeFreezeByteArray mba2 - return (SBS ba#) -{-# INLINE createAndTrim'' #-} - --- Returns the index of the first match or the length of the whole --- bytestring if nothing matched. -findIndexOrLength :: (Word16 -> Bool) -> ShortByteString -> Int -findIndexOrLength k (assertEven -> sbs) = go 0 - where - l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = l `div` 2 - | k (w n) = n `div` 2 - | otherwise = go (n + 2) -{-# INLINE findIndexOrLength #-} - - --- | Returns the length of the substring matching, not the index. --- If no match, returns 0. -findFromEndUntil :: (Word16 -> Bool) -> ShortByteString -> Int -findFromEndUntil k sbs = go (BS.length sbs - 2) - where - ba = asBA sbs - w = indexWord16Array ba - go !n | n < 0 = 0 - | k (w n) = (n `div` 2) + 1 - | otherwise = go (n - 2) -{-# INLINE findFromEndUntil #-} - - -assertEven :: ShortByteString -> ShortByteString -assertEven sbs@(SBS barr#) - | even (I# (sizeofByteArray# barr#)) = sbs - | otherwise = error ("Uneven number of bytes: " <> show (BS.length sbs) <> ". This is not a Word16 bytestream.") - - --- Common up near identical calls to `error' to reduce the number --- constant strings created when compiled: -errorEmptySBS :: HasCallStack => String -> a -errorEmptySBS fun = moduleError fun "empty ShortByteString" -{-# NOINLINE errorEmptySBS #-} - -moduleError :: HasCallStack => String -> String -> a -moduleError fun msg = error (moduleErrorMsg fun msg) -{-# NOINLINE moduleError #-} - -compareByteArraysOff :: BA -- ^ array 1 - -> Int -- ^ offset for array 1 - -> BA -- ^ array 2 - -> Int -- ^ offset for array 2 - -> Int -- ^ length to compare - -> Int -- ^ like memcmp -#if MIN_VERSION_base(4,11,0) -compareByteArraysOff (BA# ba1#) (I# ba1off#) (BA# ba2#) (I# ba2off#) (I# len#) = - I# (compareByteArrays# ba1# ba1off# ba2# ba2off# len#) -#else -compareByteArraysOff (BA# ba1#) ba1off (BA# ba2#) ba2off len = - assert (ba1off + len <= (I# (sizeofByteArray# ba1#))) - $ assert (ba2off + len <= (I# (sizeofByteArray# ba2#))) - $ fromIntegral $ accursedUnutterablePerformIO $ - c_memcmp_ByteArray ba1# - ba1off - ba2# - ba2off - (fromIntegral len) - - -foreign import ccall unsafe "static sbs_memcmp_off" - c_memcmp_ByteArray :: ByteArray# -> Int -> ByteArray# -> Int -> CSize -> IO CInt -#endif +import System.OsPath.Data.ByteString.Short.Internal.Hidden diff --git a/System/OsPath/Data/ByteString/Short/Internal/Hidden.hs b/System/OsPath/Data/ByteString/Short/Internal/Hidden.hs new file mode 100644 index 00000000..4642db90 --- /dev/null +++ b/System/OsPath/Data/ByteString/Short/Internal/Hidden.hs @@ -0,0 +1,481 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE UnliftedFFITypes #-} + +-- | +-- Module : System.OsPath.Data.ByteString.Short.Internal.Hidden +-- Copyright : © 2022 Julian Ospald +-- License : MIT +-- +-- Maintainer : Julian Ospald +-- Stability : experimental +-- Portability : portable +-- +-- Internal low-level utilities mostly for 'System.OsPath.Data.ByteString.Short.Word16', +-- such as byte-array operations and other stuff not meant to be exported from Word16 module. +module System.OsPath.Data.ByteString.Short.Internal.Hidden where + +import Control.Monad.ST +import Control.Exception (assert, throwIO) +import Data.Bits (Bits(..)) +import Data.ByteString.Short.Internal (ShortByteString(..), length) +#if !MIN_VERSION_base(4,11,0) +import Data.Semigroup + ( Semigroup((<>)) ) +import Foreign.C.Types + ( CSize(..) + , CInt(..) + ) +import Data.ByteString.Internal + ( accursedUnutterablePerformIO + ) +#endif +#if !MIN_VERSION_bytestring(0,10,9) +import Foreign.Marshal.Alloc (allocaBytes) +import Foreign.C.String ( CString, CStringLen ) +import Foreign.C.Types ( CSize(..) ) +import Foreign.Storable (pokeByteOff) +#endif +import Foreign.Marshal.Array (withArray0, peekArray0, newArray0, withArrayLen, peekArray) +import GHC.Exts +import GHC.Word +import GHC.ST + ( ST (ST) ) +import GHC.Stack ( HasCallStack ) +import Prelude hiding + ( length ) + +import qualified Data.ByteString.Short.Internal as BS +import qualified Data.Char as C +import qualified Data.List as List + + +_nul :: Word16 +_nul = 0x00 + +isSpace :: Word16 -> Bool +isSpace = C.isSpace . word16ToChar + +-- | Total conversion to char. +word16ToChar :: Word16 -> Char +word16ToChar = C.chr . fromIntegral + +create :: Int -> (forall s. MBA s -> ST s ()) -> ShortByteString +create len fill = + runST $ do + mba <- newByteArray len + fill mba + BA# ba# <- unsafeFreezeByteArray mba + return (SBS ba#) +{-# INLINE create #-} + + +asBA :: ShortByteString -> BA +asBA (SBS ba#) = BA# ba# + + + +data BA = BA# ByteArray# +data MBA s = MBA# (MutableByteArray# s) + + +newPinnedByteArray :: Int -> ST s (MBA s) +newPinnedByteArray (I# len#) = + ST $ \s -> case newPinnedByteArray# len# s of + (# s', mba# #) -> (# s', MBA# mba# #) + +newByteArray :: Int -> ST s (MBA s) +newByteArray (I# len#) = + ST $ \s -> case newByteArray# len# s of + (# s', mba# #) -> (# s', MBA# mba# #) + +copyByteArray :: BA -> Int -> MBA s -> Int -> Int -> ST s () +copyByteArray (BA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = + ST $ \s -> case copyByteArray# src# src_off# dst# dst_off# len# s of + s' -> (# s', () #) + +unsafeFreezeByteArray :: MBA s -> ST s BA +unsafeFreezeByteArray (MBA# mba#) = + ST $ \s -> case unsafeFreezeByteArray# mba# s of + (# s', ba# #) -> (# s', BA# ba# #) + +copyAddrToByteArray :: Ptr a -> MBA RealWorld -> Int -> Int -> ST RealWorld () +copyAddrToByteArray (Ptr src#) (MBA# dst#) (I# dst_off#) (I# len#) = + ST $ \s -> case copyAddrToByteArray# src# dst# dst_off# len# s of + s' -> (# s', () #) + + +-- this is a copy-paste from bytestring +#if !MIN_VERSION_bytestring(0,10,9) +------------------------------------------------------------------------ +-- Primop replacements + +-- --------------------------------------------------------------------- +-- +-- Standard C functions +-- + +foreign import ccall unsafe "string.h strlen" c_strlen + :: CString -> IO CSize + + +-- --------------------------------------------------------------------- +-- +-- Uses our C code +-- + +-- | /O(n)./ Construct a new @ShortByteString@ from a @CString@. The +-- resulting @ShortByteString@ is an immutable copy of the original +-- @CString@, and is managed on the Haskell heap. The original +-- @CString@ must be null terminated. +-- +-- @since 0.10.10.0 +packCString :: CString -> IO ShortByteString +packCString cstr = do + len <- c_strlen cstr + packCStringLen (cstr, fromIntegral len) + +-- | /O(n)./ Construct a new @ShortByteString@ from a @CStringLen@. The +-- resulting @ShortByteString@ is an immutable copy of the original @CStringLen@. +-- The @ShortByteString@ is a normal Haskell value and will be managed on the +-- Haskell heap. +-- +-- @since 0.10.10.0 +packCStringLen :: CStringLen -> IO ShortByteString +packCStringLen (cstr, len) | len >= 0 = BS.createFromPtr cstr len +packCStringLen (_, len) = + moduleErrorIO "packCStringLen" ("negative length: " ++ show len) + +-- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a +-- null-terminated @CString@. The @CString@ is a copy and will be freed +-- automatically; it must not be stored or used after the +-- subcomputation finishes. +-- +-- @since 0.10.10.0 +useAsCString :: ShortByteString -> (CString -> IO a) -> IO a +useAsCString bs action = + allocaBytes (l+1) $ \buf -> do + BS.copyToPtr bs 0 buf (fromIntegral l) + pokeByteOff buf l (0::Word8) + action buf + where l = length bs + +-- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a 'CStringLen'. +-- As for 'useAsCString' this function makes a copy of the original @ShortByteString@. +-- It must not be stored or used after the subcomputation finishes. +-- +-- Beware that this function does not add a terminating @\NUL@ byte at the end of 'CStringLen'. +-- If you need to construct a pointer to a null-terminated sequence, use 'useAsCString' +-- (and measure length independently if desired). +-- +-- @since 0.10.10.0 +useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a +useAsCStringLen bs action = + allocaBytes l $ \buf -> do + BS.copyToPtr bs 0 buf (fromIntegral l) + action (buf, l) + where l = length bs + + +#endif + + +-- | /O(n)./ Construct a new @ShortByteString@ from a @CWString@. The +-- resulting @ShortByteString@ is an immutable copy of the original +-- @CWString@, and is managed on the Haskell heap. The original +-- @CWString@ must be null terminated. +-- +-- @since 0.10.10.0 +packCWString :: Ptr Word16 -> IO ShortByteString +packCWString cwstr = do + cs <- peekArray0 _nul cwstr + return (packWord16 cs) + +-- | /O(n)./ Construct a new @ShortByteString@ from a @CWStringLen@. The +-- resulting @ShortByteString@ is an immutable copy of the original @CWStringLen@. +-- The @ShortByteString@ is a normal Haskell value and will be managed on the +-- Haskell heap. +-- +-- @since 0.10.10.0 +packCWStringLen :: (Ptr Word16, Int) -> IO ShortByteString +packCWStringLen (cp, len) = do + cs <- peekArray len cp + return (packWord16 cs) + + +-- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a +-- null-terminated @CWString@. The @CWString@ is a copy and will be freed +-- automatically; it must not be stored or used after the +-- subcomputation finishes. +-- +-- @since 0.10.10.0 +useAsCWString :: ShortByteString -> (Ptr Word16 -> IO a) -> IO a +useAsCWString = withArray0 _nul . unpackWord16 + +-- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. +-- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. +-- It must not be stored or used after the subcomputation finishes. +-- +-- @since 0.10.10.0 +useAsCWStringLen :: ShortByteString -> ((Ptr Word16, Int) -> IO a) -> IO a +useAsCWStringLen bs action = withArrayLen (unpackWord16 bs) $ \ len ptr -> action (ptr, len) + +-- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. +-- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. +-- It must not be stored or used after the subcomputation finishes. +-- +-- @since 0.10.10.0 +newCWString :: ShortByteString -> IO (Ptr Word16) +newCWString = newArray0 _nul . unpackWord16 + + + + + -- --------------------------------------------------------------------- +-- Internal utilities + +moduleErrorIO :: String -> String -> IO a +moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg +{-# NOINLINE moduleErrorIO #-} + +moduleErrorMsg :: String -> String -> String +moduleErrorMsg fun msg = "System.OsPath.Data.ByteString.Short." ++ fun ++ ':':' ':msg + +packWord16 :: [Word16] -> ShortByteString +packWord16 cs = packLenWord16 (List.length cs) cs + +packLenWord16 :: Int -> [Word16] -> ShortByteString +packLenWord16 len ws0 = + create (len * 2) (\mba -> go mba 0 ws0) + where + go :: MBA s -> Int -> [Word16] -> ST s () + go !_ !_ [] = return () + go !mba !i (w:ws) = do + writeWord16Array mba i w + go mba (i+2) ws + + +unpackWord16 :: ShortByteString -> [Word16] +unpackWord16 sbs = go len [] + where + len = length sbs + go !i !acc + | i < 1 = acc + | otherwise = let !w = indexWord16Array (asBA sbs) (i - 2) + in go (i - 2) (w:acc) + +packWord16Rev :: [Word16] -> ShortByteString +packWord16Rev cs = packLenWord16Rev (List.length cs * 2) cs + +packLenWord16Rev :: Int -> [Word16] -> ShortByteString +packLenWord16Rev len ws0 = + create len (\mba -> go mba len ws0) + where + go :: MBA s -> Int -> [Word16] -> ST s () + go !_ !_ [] = return () + go !mba !i (w:ws) = do + writeWord16Array mba (i - 2) w + go mba (i - 2) ws + + +-- | This isn't strictly Word16 array write. Instead it's two consecutive Word8 array +-- writes to avoid endianness issues due to primops doing automatic alignment based +-- on host platform. We want to always write LE to the byte array. +writeWord16Array :: MBA s + -> Int -- ^ Word8 index (not Word16) + -> Word16 + -> ST s () +writeWord16Array (MBA# mba#) (I# i#) (W16# w#) = + case encodeWord16LE# w# of + (# lsb#, msb# #) -> + ST (\s -> case writeWord8Array# mba# i# lsb# s of + s' -> (# s', () #)) >> + ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of + s' -> (# s', () #)) + +indexWord8Array :: BA + -> Int -- ^ Word8 index + -> Word8 +indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#) + +-- | This isn't strictly Word16 array read. Instead it's two Word8 array reads +-- to avoid endianness issues due to primops doing automatic alignment based +-- on host platform. We expect the byte array to be LE always. +indexWord16Array :: BA + -> Int -- ^ Word8 index (not Word16) + -> Word16 +indexWord16Array ba i = fromIntegral lsb .|. (fromIntegral msb `shiftL` 8) + where + lsb = indexWord8Array ba i + msb = indexWord8Array ba (i + 1) + +#if !MIN_VERSION_base(4,16,0) + +encodeWord16LE# :: Word# -- ^ Word16 + -> (# Word#, Word# #) -- ^ Word8 (LSB, MSB) +encodeWord16LE# x# = (# x# `and#` int2Word# 0xff# + , x# `and#` int2Word# 0xff00# `shiftRL#` 8# #) + +decodeWord16LE# :: (# Word#, Word# #) -- ^ Word8 (LSB, MSB) + -> Word# -- ^ Word16 +decodeWord16LE# (# lsb#, msb# #) = msb# `shiftL#` 8# `or#` lsb# + +#else + +encodeWord16LE# :: Word16# -- ^ Word16 + -> (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) +encodeWord16LE# x# = (# word16ToWord8# x# + , word16ToWord8# (x# `uncheckedShiftRLWord16#` 8#) #) + where + word16ToWord8# y = wordToWord8# (word16ToWord# y) + +decodeWord16LE# :: (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) + -> Word16# -- ^ Word16 +decodeWord16LE# (# lsb#, msb# #) = ((word8ToWord16# msb# `uncheckedShiftLWord16#` 8#) `orWord16#` word8ToWord16# lsb#) + where + word8ToWord16# y = wordToWord16# (word8ToWord# y) + +#endif + +setByteArray :: MBA s -> Int -> Int -> Int -> ST s () +setByteArray (MBA# dst#) (I# off#) (I# len#) (I# c#) = + ST $ \s -> case setByteArray# dst# off# len# c# s of + s' -> (# s', () #) + +copyMutableByteArray :: MBA s -> Int -> MBA s -> Int -> Int -> ST s () +copyMutableByteArray (MBA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = + ST $ \s -> case copyMutableByteArray# src# src_off# dst# dst_off# len# s of + s' -> (# s', () #) + +-- | Given the maximum size needed and a function to make the contents +-- of a ShortByteString, createAndTrim makes the 'ShortByteString'. +-- The generating function is required to return the actual final size +-- (<= the maximum size) and the result value. The resulting byte array +-- is realloced to this size. +createAndTrim :: Int -> (forall s. MBA s -> ST s (Int, a)) -> (ShortByteString, a) +createAndTrim l fill = + runST $ do + mba <- newByteArray l + (l', res) <- fill mba + if assert (l' <= l) $ l' >= l + then do + BA# ba# <- unsafeFreezeByteArray mba + return (SBS ba#, res) + else do + mba2 <- newByteArray l' + copyMutableByteArray mba 0 mba2 0 l' + BA# ba# <- unsafeFreezeByteArray mba2 + return (SBS ba#, res) +{-# INLINE createAndTrim #-} + +createAndTrim' :: Int -> (forall s. MBA s -> ST s Int) -> ShortByteString +createAndTrim' l fill = + runST $ do + mba <- newByteArray l + l' <- fill mba + if assert (l' <= l) $ l' >= l + then do + BA# ba# <- unsafeFreezeByteArray mba + return (SBS ba#) + else do + mba2 <- newByteArray l' + copyMutableByteArray mba 0 mba2 0 l' + BA# ba# <- unsafeFreezeByteArray mba2 + return (SBS ba#) +{-# INLINE createAndTrim' #-} + +createAndTrim'' :: Int -> (forall s. MBA s -> MBA s -> ST s (Int, Int)) -> (ShortByteString, ShortByteString) +createAndTrim'' l fill = + runST $ do + mba1 <- newByteArray l + mba2 <- newByteArray l + (l1, l2) <- fill mba1 mba2 + sbs1 <- freeze' l1 mba1 + sbs2 <- freeze' l2 mba2 + pure (sbs1, sbs2) + where + freeze' :: Int -> MBA s -> ST s ShortByteString + freeze' l' mba = + if assert (l' <= l) $ l' >= l + then do + BA# ba# <- unsafeFreezeByteArray mba + return (SBS ba#) + else do + mba2 <- newByteArray l' + copyMutableByteArray mba 0 mba2 0 l' + BA# ba# <- unsafeFreezeByteArray mba2 + return (SBS ba#) +{-# INLINE createAndTrim'' #-} + +-- Returns the index of the first match or the length of the whole +-- bytestring if nothing matched. +findIndexOrLength :: (Word16 -> Bool) -> ShortByteString -> Int +findIndexOrLength k (assertEven -> sbs) = go 0 + where + l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = l `div` 2 + | k (w n) = n `div` 2 + | otherwise = go (n + 2) +{-# INLINE findIndexOrLength #-} + + +-- | Returns the length of the substring matching, not the index. +-- If no match, returns 0. +findFromEndUntil :: (Word16 -> Bool) -> ShortByteString -> Int +findFromEndUntil k sbs = go (BS.length sbs - 2) + where + ba = asBA sbs + w = indexWord16Array ba + go !n | n < 0 = 0 + | k (w n) = (n `div` 2) + 1 + | otherwise = go (n - 2) +{-# INLINE findFromEndUntil #-} + + +assertEven :: ShortByteString -> ShortByteString +assertEven sbs@(SBS barr#) + | even (I# (sizeofByteArray# barr#)) = sbs + | otherwise = error ("Uneven number of bytes: " <> show (BS.length sbs) <> ". This is not a Word16 bytestream.") + + +-- Common up near identical calls to `error' to reduce the number +-- constant strings created when compiled: +errorEmptySBS :: HasCallStack => String -> a +errorEmptySBS fun = moduleError fun "empty ShortByteString" +{-# NOINLINE errorEmptySBS #-} + +moduleError :: HasCallStack => String -> String -> a +moduleError fun msg = error (moduleErrorMsg fun msg) +{-# NOINLINE moduleError #-} + +compareByteArraysOff :: BA -- ^ array 1 + -> Int -- ^ offset for array 1 + -> BA -- ^ array 2 + -> Int -- ^ offset for array 2 + -> Int -- ^ length to compare + -> Int -- ^ like memcmp +#if MIN_VERSION_base(4,11,0) +compareByteArraysOff (BA# ba1#) (I# ba1off#) (BA# ba2#) (I# ba2off#) (I# len#) = + I# (compareByteArrays# ba1# ba1off# ba2# ba2off# len#) +#else +compareByteArraysOff (BA# ba1#) ba1off (BA# ba2#) ba2off len = + assert (ba1off + len <= (I# (sizeofByteArray# ba1#))) + $ assert (ba2off + len <= (I# (sizeofByteArray# ba2#))) + $ fromIntegral $ accursedUnutterablePerformIO $ + c_memcmp_ByteArray ba1# + ba1off + ba2# + ba2off + (fromIntegral len) + + +foreign import ccall unsafe "static sbs_memcmp_off" + c_memcmp_ByteArray :: ByteArray# -> Int -> ByteArray# -> Int -> CSize -> IO CInt +#endif + diff --git a/System/OsPath/Data/ByteString/Short/Word16.hs b/System/OsPath/Data/ByteString/Short/Word16.hs index c3b3ca48..1e7ebbcd 100644 --- a/System/OsPath/Data/ByteString/Short/Word16.hs +++ b/System/OsPath/Data/ByteString/Short/Word16.hs @@ -1,14 +1,3 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} -{-# LANGUAGE MultiWayIf #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} - -{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} - -- | -- Module : System.OsPath.Data.ByteString.Short.Word16 -- Copyright : © 2022 Julian Ospald @@ -145,12 +134,7 @@ module System.OsPath.Data.ByteString.Short.Word16 {-# DEPRECATED "Use System.OsS useAsCWStringLen ) where -import System.OsPath.Data.ByteString.Short ( append, intercalate, concat, stripSuffix, stripPrefix, isPrefixOf, isSuffixOf, length, empty, null, ShortByteString(..), fromShort, toShort ) -import System.OsPath.Data.ByteString.Short.Internal -import Data.Bits - ( shiftR - ) -import Data.Word + import Prelude hiding ( Foldable(..) , all @@ -172,724 +156,4 @@ import Prelude hiding , take , takeWhile ) -import qualified Data.Foldable as Foldable -import GHC.ST ( ST ) -import GHC.Stack ( HasCallStack ) -import GHC.Exts ( inline ) - -import qualified Data.ByteString.Short.Internal as BS -import qualified Data.List as List - - --- ----------------------------------------------------------------------------- --- Introducing and eliminating 'ShortByteString's - --- | /O(1)/ Convert a 'Word16' into a 'ShortByteString' -singleton :: Word16 -> ShortByteString -singleton = \w -> create 2 (\mba -> writeWord16Array mba 0 w) - - --- | /O(n)/. Convert a list into a 'ShortByteString' -pack :: [Word16] -> ShortByteString -pack = packWord16 - - --- | /O(n)/. Convert a 'ShortByteString' into a list. -unpack :: ShortByteString -> [Word16] -unpack = unpackWord16 . assertEven - - --- --------------------------------------------------------------------- --- Basic interface - --- | This is like 'length', but the number of 'Word16', not 'Word8'. -numWord16 :: ShortByteString -> Int -numWord16 = (`shiftR` 1) . BS.length . assertEven - -infixr 5 `cons` --same as list (:) -infixl 5 `snoc` - --- | /O(n)/ Append a Word16 to the end of a 'ShortByteString' --- --- Note: copies the entire byte array -snoc :: ShortByteString -> Word16 -> ShortByteString -snoc = \(assertEven -> sbs) c -> let l = BS.length sbs - nl = l + 2 - in create nl $ \mba -> do - copyByteArray (asBA sbs) 0 mba 0 l - writeWord16Array mba l c - --- | /O(n)/ 'cons' is analogous to (:) for lists. --- --- Note: copies the entire byte array -cons :: Word16 -> ShortByteString -> ShortByteString -cons c = \(assertEven -> sbs) -> let l = BS.length sbs - nl = l + 2 - in create nl $ \mba -> do - writeWord16Array mba 0 c - copyByteArray (asBA sbs) 0 mba 2 l - --- | /O(1)/ Extract the last element of a ShortByteString, which must be finite and at least one Word16. --- An exception will be thrown in the case of an empty ShortByteString. -last :: HasCallStack => ShortByteString -> Word16 -last = \(assertEven -> sbs) -> case null sbs of - True -> errorEmptySBS "last" - False -> indexWord16Array (asBA sbs) (BS.length sbs - 2) - --- | /O(n)/ Extract the elements after the head of a ShortByteString, which must at least one Word16. --- An exception will be thrown in the case of an empty ShortByteString. --- --- Note: copies the entire byte array -tail :: HasCallStack => ShortByteString -> ShortByteString -tail = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if - | l <= 0 -> errorEmptySBS "tail" - | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl - --- | /O(n)/ Extract the head and tail of a ByteString, returning Nothing --- if it is empty. -uncons :: ShortByteString -> Maybe (Word16, ShortByteString) -uncons = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if | l <= 0 -> Nothing - | otherwise -> let h = indexWord16Array (asBA sbs) 0 - t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl - in Just (h, t) - --- | /O(n)/ Extract first two elements and the rest of a ByteString, --- returning Nothing if it is shorter than two elements. -uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString) -uncons2 = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 4 - in if | l <= 2 -> Nothing - | otherwise -> let h = indexWord16Array (asBA sbs) 0 - h' = indexWord16Array (asBA sbs) 2 - t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl - in Just (h, h', t) - --- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16. --- An exception will be thrown in the case of an empty ShortByteString. -head :: HasCallStack => ShortByteString -> Word16 -head = \(assertEven -> sbs) -> case null sbs of - True -> errorEmptySBS "last" - False -> indexWord16Array (asBA sbs) 0 - --- | /O(n)/ Return all the elements of a 'ShortByteString' except the last one. --- An exception will be thrown in the case of an empty ShortByteString. --- --- Note: copies the entire byte array -init :: HasCallStack => ShortByteString -> ShortByteString -init = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if - | l <= 0 -> errorEmptySBS "tail" - | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl - --- | /O(n)/ Extract the 'init' and 'last' of a ByteString, returning Nothing --- if it is empty. -unsnoc :: ShortByteString -> Maybe (ShortByteString, Word16) -unsnoc = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if | l <= 0 -> Nothing - | otherwise -> let l' = indexWord16Array (asBA sbs) (l - 2) - i = create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl - in Just (i, l') - - --- --------------------------------------------------------------------- --- Transformations - --- | /O(n)/ 'map' @f xs@ is the ShortByteString obtained by applying @f@ to each --- element of @xs@. -map :: (Word16 -> Word16) -> ShortByteString -> ShortByteString -map f = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - in create l (\mba -> go ba mba 0 l) - where - go :: BA -> MBA s -> Int -> Int -> ST s () - go !ba !mba !i !l - | i >= l = return () - | otherwise = do - let w = indexWord16Array ba i - writeWord16Array mba i (f w) - go ba mba (i+2) l - --- TODO: implement more efficiently --- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. -reverse :: ShortByteString -> ShortByteString -reverse = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - in create l (\mba -> go ba mba 0 l) - where - go :: BA -> MBA s -> Int -> Int -> ST s () - go !ba !mba !i !l - | i >= l = return () - | otherwise = do - let w = indexWord16Array ba i - writeWord16Array mba (l - 2 - i) w - go ba mba (i+2) l - - --- --------------------------------------------------------------------- --- Special folds - --- | /O(n)/ Applied to a predicate and a 'ShortByteString', 'all' determines --- if all elements of the 'ShortByteString' satisfy the predicate. -all :: (Word16 -> Bool) -> ShortByteString -> Bool -all k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = True - | otherwise = k (w n) && go (n + 2) - in go 0 - - --- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if --- any element of the 'ByteString' satisfies the predicate. -any :: (Word16 -> Bool) -> ShortByteString -> Bool -any k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = False - | otherwise = k (w n) || go (n + 2) - in go 0 - - --- --------------------------------------------------------------------- --- Unfolds and replicates - - --- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ --- the value of every element. The following holds: --- --- > replicate w c = unfoldr w (\u -> Just (u,u)) c -replicate :: Int -> Word16 -> ShortByteString -replicate w c - | w <= 0 = empty - -- can't use setByteArray here, because we write UTF-16LE - | otherwise = create (w * 2) (`go` 0) - where - go mba ix - | ix < 0 || ix >= w * 2 = pure () - | otherwise = writeWord16Array mba ix c >> go mba (ix + 2) - --- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' --- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a --- ShortByteString from a seed value. The function takes the element and --- returns 'Nothing' if it is done producing the ShortByteString or returns --- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, --- and @b@ is the seed value for further production. --- --- This function is not efficient/safe. It will build a list of @[Word16]@ --- and run the generator until it returns `Nothing`, otherwise recurse infinitely, --- then finally create a 'ShortByteString'. --- --- Examples: --- --- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 --- > == pack [0, 1, 2, 3, 4, 5] --- -unfoldr :: (a -> Maybe (Word16, a)) -> a -> ShortByteString -unfoldr f x0 = packWord16Rev $ go x0 mempty - where - go x words' = case f x of - Nothing -> words' - Just (w, x') -> go x' (w:words') - --- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ShortByteString from a seed --- value. However, the length of the result is limited by the first --- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' --- when the maximum length of the result is known. --- --- The following equation relates 'unfoldrN' and 'unfoldr': --- --- > fst (unfoldrN n f s) == take n (unfoldr f s) --- -unfoldrN :: forall a. - Int -- ^ number of 'Word16' - -> (a -> Maybe (Word16, a)) - -> a - -> (ShortByteString, Maybe a) -unfoldrN i f = \x0 -> - if | i < 0 -> (empty, Just x0) - | otherwise -> createAndTrim (i * 2) $ \mba -> go mba x0 0 - - where - go :: forall s. MBA s -> a -> Int -> ST s (Int, Maybe a) - go !mba !x !n = go' x n - where - go' :: a -> Int -> ST s (Int, Maybe a) - go' !x' !n' - | n' == i * 2 = return (n', Just x') - | otherwise = case f x' of - Nothing -> return (n', Nothing) - Just (w, x'') -> do - writeWord16Array mba n' w - go' x'' (n'+2) - - --- -------------------------------------------------------------------- --- Predicates - - - --- --------------------------------------------------------------------- --- Substrings - --- | /O(n)/ 'take' @n@, applied to a ShortByteString @xs@, returns the prefix --- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. --- --- Note: copies the entire byte array -take :: Int -- ^ number of Word16 - -> ShortByteString - -> ShortByteString -take = \n (assertEven -> sbs) -> - let sl = numWord16 sbs - len8 = n * 2 - in if | n >= sl -> sbs - | n <= 0 -> empty - | otherwise -> - create len8 $ \mba -> copyByteArray (asBA sbs) 0 mba 0 len8 - - --- | /O(1)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. --- Takes @n@ elements from end of bytestring. --- --- >>> takeEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "e\NULf\NULg\NUL" --- >>> takeEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "" --- >>> takeEnd 4 "a\NULb\NULc\NUL" --- "a\NULb\NULc\NUL" -takeEnd :: Int -- ^ number of 'Word16' - -> ShortByteString - -> ShortByteString -takeEnd n = \(assertEven -> sbs) -> - let sl = BS.length sbs - n2 = n * 2 - in if | n2 >= sl -> sbs - | n2 <= 0 -> empty - | otherwise -> create n2 $ \mba -> copyByteArray (asBA sbs) (max 0 (sl - n2)) mba 0 n2 - --- | Similar to 'P.takeWhile', --- returns the longest (possibly empty) prefix of elements --- satisfying the predicate. -takeWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -takeWhile f ps = take (findIndexOrLength (not . f) ps) ps - --- | Returns the longest (possibly empty) suffix of elements --- satisfying the predicate. --- --- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. -takeWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -takeWhileEnd f ps = drop (findFromEndUntil (not . f) ps) ps - - --- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or @[]@ if @n > 'length' xs@. --- --- Note: copies the entire byte array -drop :: Int -- ^ number of 'Word16' - -> ShortByteString - -> ShortByteString -drop = \n' (assertEven -> sbs) -> - let len = BS.length sbs - n = n' * 2 - in if | n <= 0 -> sbs - | n >= len -> empty - | otherwise -> - let newLen = len - n - in create newLen $ \mba -> copyByteArray (asBA sbs) n mba 0 newLen - --- | /O(1)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. --- Drops @n@ elements from end of bytestring. --- --- >>> dropEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "a\NULb\NULc\NULd\NUL" --- >>> dropEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- >>> dropEnd 4 "a\NULb\NULc\NUL" --- "" -dropEnd :: Int -- ^ number of 'Word16' - -> ShortByteString - -> ShortByteString -dropEnd n' = \(assertEven -> sbs) -> - let sl = BS.length sbs - nl = sl - n - n = n' * 2 - in if | n >= sl -> empty - | n <= 0 -> sbs - | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl - --- | Similar to 'P.dropWhile', --- drops the longest (possibly empty) prefix of elements --- satisfying the predicate and returns the remainder. --- --- Note: copies the entire byte array -dropWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -dropWhile f = \(assertEven -> ps) -> drop (findIndexOrLength (not . f) ps) ps - --- | Similar to 'P.dropWhileEnd', --- drops the longest (possibly empty) suffix of elements --- satisfying the predicate and returns the remainder. --- --- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. --- --- @since 0.10.12.0 -dropWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -dropWhileEnd f = \(assertEven -> ps) -> take (findFromEndUntil (not . f) ps) ps - --- | Returns the longest (possibly empty) suffix of elements which __do not__ --- satisfy the predicate and the remainder of the string. --- --- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. -breakEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -breakEnd p = \(assertEven -> sbs) -> splitAt (findFromEndUntil p sbs) sbs - --- | Similar to 'P.break', --- returns the longest (possibly empty) prefix of elements which __do not__ --- satisfy the predicate and the remainder of the string. --- --- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. -break :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -break = \p (assertEven -> ps) -> case findIndexOrLength p ps of n -> splitAt n ps - --- | Similar to 'P.span', --- returns the longest (possibly empty) prefix of elements --- satisfying the predicate and the remainder of the string. --- --- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. --- -span :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -{- HLINT ignore "Use span" -} -span p = break (not . p) . assertEven - --- | Returns the longest (possibly empty) suffix of elements --- satisfying the predicate and the remainder of the string. --- --- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. --- --- We have --- --- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") --- --- and --- --- > spanEnd (not . isSpace) ps --- > == --- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x) --- -spanEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -spanEnd p = \(assertEven -> ps) -> splitAt (findFromEndUntil (not.p) ps) ps - --- | /O(n)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. --- --- Note: copies the substrings -splitAt :: Int -- ^ number of Word16 - -> ShortByteString - -> (ShortByteString, ShortByteString) -splitAt n' = \(assertEven -> sbs) -> if - | n <= 0 -> (empty, sbs) - | otherwise -> - let slen = BS.length sbs - in if | n >= BS.length sbs -> (sbs, empty) - | otherwise -> - let llen = min slen (max 0 n) - rlen = max 0 (slen - max 0 n) - lsbs = create llen $ \mba -> copyByteArray (asBA sbs) 0 mba 0 llen - rsbs = create rlen $ \mba -> copyByteArray (asBA sbs) n mba 0 rlen - in (lsbs, rsbs) - where - n = n' * 2 - --- | /O(n)/ Break a 'ShortByteString' into pieces separated by the byte --- argument, consuming the delimiter. I.e. --- --- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 --- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 --- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 --- > split undefined "" == [] -- and not [""] --- --- and --- --- > intercalate [c] . split c == id --- > split == splitWith . (==) --- --- Note: copies the substrings -split :: Word16 -> ShortByteString -> [ShortByteString] -split w = splitWith (== w) . assertEven - - --- | /O(n)/ Splits a 'ShortByteString' into components delimited by --- separators, where the predicate returns True for a separator element. --- The resulting components do not contain the separators. Two adjacent --- separators result in an empty component in the output. eg. --- --- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 --- > splitWith undefined "" == [] -- and not [""] --- -splitWith :: (Word16 -> Bool) -> ShortByteString -> [ShortByteString] -splitWith p = \(assertEven -> sbs) -> if - | BS.null sbs -> [] - | otherwise -> go sbs - where - go sbs' - | BS.null sbs' = [mempty] - | otherwise = - case break p sbs' of - (a, b) - | BS.null b -> [a] - | otherwise -> a : go (tail b) - - --- | Check whether one string is a substring of another. -isInfixOf :: ShortByteString -> ShortByteString -> Bool -isInfixOf sbs = \s -> null sbs || not (null $ snd $ GHC.Exts.inline breakSubstring sbs s) - - --- algorithm: https://github.com/haskell/filepath/issues/195#issuecomment-1605633713 -breakSubstring :: ShortByteString -- ^ String to search for - -> ShortByteString -- ^ String to search in - -> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring -breakSubstring bPat@(asBA -> pat) bInp@(asBA -> inp) = go 0 - where - lpat = BS.length bPat - linp = BS.length bInp - go ix - | let ix' = ix * 2 - , linp >= ix' + lpat = - if | compareByteArraysOff pat 0 inp ix' lpat == 0 -> splitAt ix bInp - | otherwise -> go (ix + 1) - | otherwise - = (bInp, mempty) - - --- --------------------------------------------------------------------- --- Reducing 'ByteString's - --- | 'foldl', applied to a binary operator, a starting value (typically --- the left-identity of the operator), and a ShortByteString, reduces the --- ShortByteString using the binary operator, from left to right. --- -foldl :: (a -> Word16 -> a) -> a -> ShortByteString -> a -foldl f v = List.foldl f v . unpack . assertEven - --- | 'foldl'' is like 'foldl', but strict in the accumulator. --- -foldl' :: (a -> Word16 -> a) -> a -> ShortByteString -> a -foldl' f v = List.foldl' f v . unpack . assertEven - --- | 'foldr', applied to a binary operator, a starting value --- (typically the right-identity of the operator), and a ShortByteString, --- reduces the ShortByteString using the binary operator, from right to left. -foldr :: (Word16 -> a -> a) -> a -> ShortByteString -> a -foldr f v = List.foldr f v . unpack . assertEven - --- | 'foldr'' is like 'foldr', but strict in the accumulator. -foldr' :: (Word16 -> a -> a) -> a -> ShortByteString -> a -foldr' k v = Foldable.foldr' k v . unpack . assertEven - --- | 'foldl1' is a variant of 'foldl' that has no starting value --- argument, and thus must be applied to non-empty 'ShortByteString's. --- An exception will be thrown in the case of an empty ShortByteString. -foldl1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldl1 k = List.foldl1 k . unpack . assertEven - --- | 'foldl1'' is like 'foldl1', but strict in the accumulator. --- An exception will be thrown in the case of an empty ShortByteString. -foldl1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldl1' k = List.foldl1' k . unpack . assertEven - --- | 'foldr1' is a variant of 'foldr' that has no starting value argument, --- and thus must be applied to non-empty 'ShortByteString's --- An exception will be thrown in the case of an empty ShortByteString. -foldr1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldr1 k = List.foldr1 k . unpack . assertEven - --- | 'foldr1'' is a variant of 'foldr1', but is strict in the --- accumulator. -foldr1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldr1' k = \(assertEven -> sbs) -> if null sbs then errorEmptySBS "foldr1'" else foldr' k (last sbs) (init sbs) - - --- -------------------------------------------------------------------- --- Searching ShortByteString - --- | /O(1)/ 'ShortByteString' index (subscript) operator, starting from 0. -index :: HasCallStack - => ShortByteString - -> Int -- ^ number of 'Word16' - -> Word16 -index = \(assertEven -> sbs) i -> if - | i >= 0 && i < numWord16 sbs -> unsafeIndex sbs i - | otherwise -> indexError sbs i - --- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: --- --- > 0 <= n < length bs --- --- @since 0.11.0.0 -indexMaybe :: ShortByteString - -> Int -- ^ number of 'Word16' - -> Maybe Word16 -indexMaybe = \(assertEven -> sbs) i -> if - | i >= 0 && i < numWord16 sbs -> Just $! unsafeIndex sbs i - | otherwise -> Nothing -{-# INLINE indexMaybe #-} - -unsafeIndex :: ShortByteString - -> Int -- ^ number of 'Word16' - -> Word16 -unsafeIndex sbs i = indexWord16Array (asBA sbs) (i * 2) - -indexError :: HasCallStack => ShortByteString -> Int -> a -indexError sbs i = - moduleError "index" $ "error in array index: " ++ show i - ++ " not in range [0.." ++ show (numWord16 sbs) ++ "]" - --- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: --- --- > 0 <= n < length bs --- --- @since 0.11.0.0 -(!?) :: ShortByteString - -> Int -- ^ number of 'Word16' - -> Maybe Word16 -(!?) = indexMaybe -{-# INLINE (!?) #-} - --- | /O(n)/ 'elem' is the 'ShortByteString' membership predicate. -elem :: Word16 -> ShortByteString -> Bool -elem c = \(assertEven -> sbs) -> case elemIndex c sbs of Nothing -> False ; _ -> True - --- | /O(n)/ 'filter', applied to a predicate and a ByteString, --- returns a ByteString containing those characters that satisfy the --- predicate. -filter :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -filter k = \(assertEven -> sbs) -> - let l = BS.length sbs - in if | l <= 0 -> sbs - | otherwise -> createAndTrim' l $ \mba -> go mba (asBA sbs) l - where - go :: forall s. MBA s -- mutable output bytestring - -> BA -- input bytestring - -> Int -- length of input bytestring - -> ST s Int - go !mba ba !l = go' 0 0 - where - go' :: Int -- bytes read - -> Int -- bytes written - -> ST s Int - go' !br !bw - | br >= l = return bw - | otherwise = do - let w = indexWord16Array ba br - if k w - then do - writeWord16Array mba bw w - go' (br+2) (bw+2) - else - go' (br+2) bw - --- | /O(n)/ The 'find' function takes a predicate and a ByteString, --- and returns the first element in matching the predicate, or 'Nothing' --- if there is no such element. --- --- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing --- -find :: (Word16 -> Bool) -> ShortByteString -> Maybe Word16 -find f = \(assertEven -> sbs) -> case findIndex f sbs of - Just n -> Just (sbs `index` n) - _ -> Nothing - --- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns --- the pair of ByteStrings with elements which do and do not satisfy the --- predicate, respectively; i.e., --- --- > partition p bs == (filter p xs, filter (not . p) xs) --- -partition :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -partition k = \(assertEven -> sbs) -> - let l = BS.length sbs - in if | l <= 0 -> (sbs, sbs) - | otherwise -> createAndTrim'' l $ \mba1 mba2 -> go mba1 mba2 (asBA sbs) l - where - go :: forall s. - MBA s -- mutable output bytestring1 - -> MBA s -- mutable output bytestring2 - -> BA -- input bytestring - -> Int -- length of input bytestring - -> ST s (Int, Int) -- (length mba1, length mba2) - go !mba1 !mba2 ba !l = go' 0 0 - where - go' :: Int -- bytes read - -> Int -- bytes written to bytestring 1 - -> ST s (Int, Int) -- (length mba1, length mba2) - go' !br !bw1 - | br >= l = return (bw1, br - bw1) - | otherwise = do - let w = indexWord16Array ba br - if k w - then do - writeWord16Array mba1 bw1 w - go' (br+2) (bw1+2) - else do - writeWord16Array mba2 (br - bw1) w - go' (br+2) bw1 - --- -------------------------------------------------------------------- --- Indexing ShortByteString - --- | /O(n)/ The 'elemIndex' function returns the index of the first --- element in the given 'ShortByteString' which is equal to the query --- element, or 'Nothing' if there is no such element. -elemIndex :: Word16 - -> ShortByteString - -> Maybe Int -- ^ number of 'Word16' -{- HLINT ignore "Use elemIndex" -} -elemIndex k = findIndex (==k) . assertEven - --- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning --- the indices of all elements equal to the query element, in ascending order. -elemIndices :: Word16 -> ShortByteString -> [Int] -{- HLINT ignore "Use elemIndices" -} -elemIndices k = findIndices (==k) . assertEven - --- | count returns the number of times its argument appears in the ShortByteString -count :: Word16 -> ShortByteString -> Int -count w = List.length . elemIndices w . assertEven - --- | /O(n)/ The 'findIndex' function takes a predicate and a 'ShortByteString' and --- returns the index of the first element in the ByteString --- satisfying the predicate. -findIndex :: (Word16 -> Bool) -> ShortByteString -> Maybe Int -findIndex k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = Nothing - | k (w n) = Just (n `shiftR` 1) - | otherwise = go (n + 2) - in go 0 - --- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the --- indices of all elements satisfying the predicate, in ascending order. -findIndices :: (Word16 -> Bool) -> ShortByteString -> [Int] -findIndices k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = [] - | k (w n) = (n `shiftR` 1) : go (n + 2) - | otherwise = go (n + 2) - in go 0 - +import System.OsPath.Data.ByteString.Short.Word16.Hidden \ No newline at end of file diff --git a/System/OsPath/Data/ByteString/Short/Word16/Hidden.hs b/System/OsPath/Data/ByteString/Short/Word16/Hidden.hs new file mode 100644 index 00000000..08d8f735 --- /dev/null +++ b/System/OsPath/Data/ByteString/Short/Word16/Hidden.hs @@ -0,0 +1,895 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} + +{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} + +-- | +-- Module : System.OsPath.Data.ByteString.Short.Word16.Hidden +-- Copyright : © 2022 Julian Ospald +-- License : MIT +-- +-- Maintainer : Julian Ospald +-- Stability : experimental +-- Portability : portable +-- +-- ShortByteStrings encoded as UTF16-LE, suitable for windows FFI calls. +-- +-- Word16s are *always* in BE encoding (both input and output), so e.g. 'pack' +-- takes a list of BE encoded @[Word16]@ and produces a UTF16-LE encoded ShortByteString. +-- +-- Likewise, 'unpack' takes a UTF16-LE encoded ShortByteString and produces a list of BE encoded @[Word16]@. +-- +-- Indices and lengths are always in respect to Word16, not Word8. +-- +-- All functions will error out if the input string is not a valid UTF16 stream (uneven number of bytes). +-- So use this module with caution. +module System.OsPath.Data.ByteString.Short.Word16.Hidden ( + -- * The @ShortByteString@ type and representation + ShortByteString(..), + + -- * Introducing and eliminating 'ShortByteString's + empty, + singleton, + pack, + unpack, + fromShort, + toShort, + + -- * Basic interface + snoc, + cons, + append, + last, + tail, + uncons, + uncons2, + head, + init, + unsnoc, + null, + length, + numWord16, + + -- * Transforming ShortByteStrings + map, + reverse, + intercalate, + + -- * Reducing 'ShortByteString's (folds) + foldl, + foldl', + foldl1, + foldl1', + + foldr, + foldr', + foldr1, + foldr1', + + -- ** Special folds + all, + any, + concat, + + -- ** Generating and unfolding ByteStrings + replicate, + unfoldr, + unfoldrN, + + -- * Substrings + + -- ** Breaking strings + take, + takeEnd, + takeWhileEnd, + takeWhile, + drop, + dropEnd, + dropWhile, + dropWhileEnd, + breakEnd, + break, + span, + spanEnd, + splitAt, + split, + splitWith, + stripSuffix, + stripPrefix, + + -- * Predicates + isInfixOf, + isPrefixOf, + isSuffixOf, + + -- ** Search for arbitrary substrings + breakSubstring, + + -- * Searching ShortByteStrings + + -- ** Searching by equality + elem, + + -- ** Searching with a predicate + find, + filter, + partition, + + -- * Indexing ShortByteStrings + index, + indexMaybe, + (!?), + elemIndex, + elemIndices, + count, + findIndex, + findIndices, + + -- ** Encoding validation + -- isValidUtf8, + + -- * Low level conversions + -- ** Packing 'CString's and pointers + packCWString, + packCWStringLen, + newCWString, + + -- ** Using ShortByteStrings as 'CString's + useAsCWString, + useAsCWStringLen + ) +where +import System.OsPath.Data.ByteString.Short.Hidden ( append, intercalate, concat, stripSuffix, stripPrefix, isPrefixOf, isSuffixOf, length, empty, null, ShortByteString(..), fromShort, toShort ) +import System.OsPath.Data.ByteString.Short.Internal.Hidden +import Data.Bits + ( shiftR + ) +import Data.Word +import Prelude hiding + ( Foldable(..) + , all + , any + , reverse + , break + , concat + , drop + , dropWhile + , filter + , head + , init + , last + , map + , replicate + , span + , splitAt + , tail + , take + , takeWhile + ) +import qualified Data.Foldable as Foldable +import GHC.ST ( ST ) +import GHC.Stack ( HasCallStack ) +import GHC.Exts ( inline ) + +import qualified Data.ByteString.Short.Internal as BS +import qualified Data.List as List + + +-- ----------------------------------------------------------------------------- +-- Introducing and eliminating 'ShortByteString's + +-- | /O(1)/ Convert a 'Word16' into a 'ShortByteString' +singleton :: Word16 -> ShortByteString +singleton = \w -> create 2 (\mba -> writeWord16Array mba 0 w) + + +-- | /O(n)/. Convert a list into a 'ShortByteString' +pack :: [Word16] -> ShortByteString +pack = packWord16 + + +-- | /O(n)/. Convert a 'ShortByteString' into a list. +unpack :: ShortByteString -> [Word16] +unpack = unpackWord16 . assertEven + + +-- --------------------------------------------------------------------- +-- Basic interface + +-- | This is like 'length', but the number of 'Word16', not 'Word8'. +numWord16 :: ShortByteString -> Int +numWord16 = (`shiftR` 1) . BS.length . assertEven + +infixr 5 `cons` --same as list (:) +infixl 5 `snoc` + +-- | /O(n)/ Append a Word16 to the end of a 'ShortByteString' +-- +-- Note: copies the entire byte array +snoc :: ShortByteString -> Word16 -> ShortByteString +snoc = \(assertEven -> sbs) c -> let l = BS.length sbs + nl = l + 2 + in create nl $ \mba -> do + copyByteArray (asBA sbs) 0 mba 0 l + writeWord16Array mba l c + +-- | /O(n)/ 'cons' is analogous to (:) for lists. +-- +-- Note: copies the entire byte array +cons :: Word16 -> ShortByteString -> ShortByteString +cons c = \(assertEven -> sbs) -> let l = BS.length sbs + nl = l + 2 + in create nl $ \mba -> do + writeWord16Array mba 0 c + copyByteArray (asBA sbs) 0 mba 2 l + +-- | /O(1)/ Extract the last element of a ShortByteString, which must be finite and at least one Word16. +-- An exception will be thrown in the case of an empty ShortByteString. +last :: HasCallStack => ShortByteString -> Word16 +last = \(assertEven -> sbs) -> case null sbs of + True -> errorEmptySBS "last" + False -> indexWord16Array (asBA sbs) (BS.length sbs - 2) + +-- | /O(n)/ Extract the elements after the head of a ShortByteString, which must at least one Word16. +-- An exception will be thrown in the case of an empty ShortByteString. +-- +-- Note: copies the entire byte array +tail :: HasCallStack => ShortByteString -> ShortByteString +tail = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if + | l <= 0 -> errorEmptySBS "tail" + | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl + +-- | /O(n)/ Extract the head and tail of a ByteString, returning Nothing +-- if it is empty. +uncons :: ShortByteString -> Maybe (Word16, ShortByteString) +uncons = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if | l <= 0 -> Nothing + | otherwise -> let h = indexWord16Array (asBA sbs) 0 + t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl + in Just (h, t) + +-- | /O(n)/ Extract first two elements and the rest of a ByteString, +-- returning Nothing if it is shorter than two elements. +uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString) +uncons2 = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 4 + in if | l <= 2 -> Nothing + | otherwise -> let h = indexWord16Array (asBA sbs) 0 + h' = indexWord16Array (asBA sbs) 2 + t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl + in Just (h, h', t) + +-- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16. +-- An exception will be thrown in the case of an empty ShortByteString. +head :: HasCallStack => ShortByteString -> Word16 +head = \(assertEven -> sbs) -> case null sbs of + True -> errorEmptySBS "last" + False -> indexWord16Array (asBA sbs) 0 + +-- | /O(n)/ Return all the elements of a 'ShortByteString' except the last one. +-- An exception will be thrown in the case of an empty ShortByteString. +-- +-- Note: copies the entire byte array +init :: HasCallStack => ShortByteString -> ShortByteString +init = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if + | l <= 0 -> errorEmptySBS "tail" + | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl + +-- | /O(n)/ Extract the 'init' and 'last' of a ByteString, returning Nothing +-- if it is empty. +unsnoc :: ShortByteString -> Maybe (ShortByteString, Word16) +unsnoc = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if | l <= 0 -> Nothing + | otherwise -> let l' = indexWord16Array (asBA sbs) (l - 2) + i = create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl + in Just (i, l') + + +-- --------------------------------------------------------------------- +-- Transformations + +-- | /O(n)/ 'map' @f xs@ is the ShortByteString obtained by applying @f@ to each +-- element of @xs@. +map :: (Word16 -> Word16) -> ShortByteString -> ShortByteString +map f = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + in create l (\mba -> go ba mba 0 l) + where + go :: BA -> MBA s -> Int -> Int -> ST s () + go !ba !mba !i !l + | i >= l = return () + | otherwise = do + let w = indexWord16Array ba i + writeWord16Array mba i (f w) + go ba mba (i+2) l + +-- TODO: implement more efficiently +-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. +reverse :: ShortByteString -> ShortByteString +reverse = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + in create l (\mba -> go ba mba 0 l) + where + go :: BA -> MBA s -> Int -> Int -> ST s () + go !ba !mba !i !l + | i >= l = return () + | otherwise = do + let w = indexWord16Array ba i + writeWord16Array mba (l - 2 - i) w + go ba mba (i+2) l + + +-- --------------------------------------------------------------------- +-- Special folds + +-- | /O(n)/ Applied to a predicate and a 'ShortByteString', 'all' determines +-- if all elements of the 'ShortByteString' satisfy the predicate. +all :: (Word16 -> Bool) -> ShortByteString -> Bool +all k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = True + | otherwise = k (w n) && go (n + 2) + in go 0 + + +-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if +-- any element of the 'ByteString' satisfies the predicate. +any :: (Word16 -> Bool) -> ShortByteString -> Bool +any k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = False + | otherwise = k (w n) || go (n + 2) + in go 0 + + +-- --------------------------------------------------------------------- +-- Unfolds and replicates + + +-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ +-- the value of every element. The following holds: +-- +-- > replicate w c = unfoldr w (\u -> Just (u,u)) c +replicate :: Int -> Word16 -> ShortByteString +replicate w c + | w <= 0 = empty + -- can't use setByteArray here, because we write UTF-16LE + | otherwise = create (w * 2) (`go` 0) + where + go mba ix + | ix < 0 || ix >= w * 2 = pure () + | otherwise = writeWord16Array mba ix c >> go mba (ix + 2) + +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a +-- ShortByteString from a seed value. The function takes the element and +-- returns 'Nothing' if it is done producing the ShortByteString or returns +-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, +-- and @b@ is the seed value for further production. +-- +-- This function is not efficient/safe. It will build a list of @[Word16]@ +-- and run the generator until it returns `Nothing`, otherwise recurse infinitely, +-- then finally create a 'ShortByteString'. +-- +-- Examples: +-- +-- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 +-- > == pack [0, 1, 2, 3, 4, 5] +-- +unfoldr :: (a -> Maybe (Word16, a)) -> a -> ShortByteString +unfoldr f x0 = packWord16Rev $ go x0 mempty + where + go x words' = case f x of + Nothing -> words' + Just (w, x') -> go x' (w:words') + +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ShortByteString from a seed +-- value. However, the length of the result is limited by the first +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' +-- when the maximum length of the result is known. +-- +-- The following equation relates 'unfoldrN' and 'unfoldr': +-- +-- > fst (unfoldrN n f s) == take n (unfoldr f s) +-- +unfoldrN :: forall a. + Int -- ^ number of 'Word16' + -> (a -> Maybe (Word16, a)) + -> a + -> (ShortByteString, Maybe a) +unfoldrN i f = \x0 -> + if | i < 0 -> (empty, Just x0) + | otherwise -> createAndTrim (i * 2) $ \mba -> go mba x0 0 + + where + go :: forall s. MBA s -> a -> Int -> ST s (Int, Maybe a) + go !mba !x !n = go' x n + where + go' :: a -> Int -> ST s (Int, Maybe a) + go' !x' !n' + | n' == i * 2 = return (n', Just x') + | otherwise = case f x' of + Nothing -> return (n', Nothing) + Just (w, x'') -> do + writeWord16Array mba n' w + go' x'' (n'+2) + + +-- -------------------------------------------------------------------- +-- Predicates + + + +-- --------------------------------------------------------------------- +-- Substrings + +-- | /O(n)/ 'take' @n@, applied to a ShortByteString @xs@, returns the prefix +-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. +-- +-- Note: copies the entire byte array +take :: Int -- ^ number of Word16 + -> ShortByteString + -> ShortByteString +take = \n (assertEven -> sbs) -> + let sl = numWord16 sbs + len8 = n * 2 + in if | n >= sl -> sbs + | n <= 0 -> empty + | otherwise -> + create len8 $ \mba -> copyByteArray (asBA sbs) 0 mba 0 len8 + + +-- | /O(1)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. +-- Takes @n@ elements from end of bytestring. +-- +-- >>> takeEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "e\NULf\NULg\NUL" +-- >>> takeEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "" +-- >>> takeEnd 4 "a\NULb\NULc\NUL" +-- "a\NULb\NULc\NUL" +takeEnd :: Int -- ^ number of 'Word16' + -> ShortByteString + -> ShortByteString +takeEnd n = \(assertEven -> sbs) -> + let sl = BS.length sbs + n2 = n * 2 + in if | n2 >= sl -> sbs + | n2 <= 0 -> empty + | otherwise -> create n2 $ \mba -> copyByteArray (asBA sbs) (max 0 (sl - n2)) mba 0 n2 + +-- | Similar to 'P.takeWhile', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate. +takeWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +takeWhile f ps = take (findIndexOrLength (not . f) ps) ps + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate. +-- +-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. +takeWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +takeWhileEnd f ps = drop (findFromEndUntil (not . f) ps) ps + + +-- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or @[]@ if @n > 'length' xs@. +-- +-- Note: copies the entire byte array +drop :: Int -- ^ number of 'Word16' + -> ShortByteString + -> ShortByteString +drop = \n' (assertEven -> sbs) -> + let len = BS.length sbs + n = n' * 2 + in if | n <= 0 -> sbs + | n >= len -> empty + | otherwise -> + let newLen = len - n + in create newLen $ \mba -> copyByteArray (asBA sbs) n mba 0 newLen + +-- | /O(1)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. +-- Drops @n@ elements from end of bytestring. +-- +-- >>> dropEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "a\NULb\NULc\NULd\NUL" +-- >>> dropEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- >>> dropEnd 4 "a\NULb\NULc\NUL" +-- "" +dropEnd :: Int -- ^ number of 'Word16' + -> ShortByteString + -> ShortByteString +dropEnd n' = \(assertEven -> sbs) -> + let sl = BS.length sbs + nl = sl - n + n = n' * 2 + in if | n >= sl -> empty + | n <= 0 -> sbs + | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl + +-- | Similar to 'P.dropWhile', +-- drops the longest (possibly empty) prefix of elements +-- satisfying the predicate and returns the remainder. +-- +-- Note: copies the entire byte array +dropWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +dropWhile f = \(assertEven -> ps) -> drop (findIndexOrLength (not . f) ps) ps + +-- | Similar to 'P.dropWhileEnd', +-- drops the longest (possibly empty) suffix of elements +-- satisfying the predicate and returns the remainder. +-- +-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. +-- +-- @since 0.10.12.0 +dropWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +dropWhileEnd f = \(assertEven -> ps) -> take (findFromEndUntil (not . f) ps) ps + +-- | Returns the longest (possibly empty) suffix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. +breakEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +breakEnd p = \(assertEven -> sbs) -> splitAt (findFromEndUntil p sbs) sbs + +-- | Similar to 'P.break', +-- returns the longest (possibly empty) prefix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. +break :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +break = \p (assertEven -> ps) -> case findIndexOrLength p ps of n -> splitAt n ps + +-- | Similar to 'P.span', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. +-- +span :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +{- HLINT ignore "Use span" -} +span p = break (not . p) . assertEven + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. +-- +-- We have +-- +-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") +-- +-- and +-- +-- > spanEnd (not . isSpace) ps +-- > == +-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x) +-- +spanEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +spanEnd p = \(assertEven -> ps) -> splitAt (findFromEndUntil (not.p) ps) ps + +-- | /O(n)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. +-- +-- Note: copies the substrings +splitAt :: Int -- ^ number of Word16 + -> ShortByteString + -> (ShortByteString, ShortByteString) +splitAt n' = \(assertEven -> sbs) -> if + | n <= 0 -> (empty, sbs) + | otherwise -> + let slen = BS.length sbs + in if | n >= BS.length sbs -> (sbs, empty) + | otherwise -> + let llen = min slen (max 0 n) + rlen = max 0 (slen - max 0 n) + lsbs = create llen $ \mba -> copyByteArray (asBA sbs) 0 mba 0 llen + rsbs = create rlen $ \mba -> copyByteArray (asBA sbs) n mba 0 rlen + in (lsbs, rsbs) + where + n = n' * 2 + +-- | /O(n)/ Break a 'ShortByteString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 +-- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 +-- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 +-- > split undefined "" == [] -- and not [""] +-- +-- and +-- +-- > intercalate [c] . split c == id +-- > split == splitWith . (==) +-- +-- Note: copies the substrings +split :: Word16 -> ShortByteString -> [ShortByteString] +split w = splitWith (== w) . assertEven + + +-- | /O(n)/ Splits a 'ShortByteString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 +-- > splitWith undefined "" == [] -- and not [""] +-- +splitWith :: (Word16 -> Bool) -> ShortByteString -> [ShortByteString] +splitWith p = \(assertEven -> sbs) -> if + | BS.null sbs -> [] + | otherwise -> go sbs + where + go sbs' + | BS.null sbs' = [mempty] + | otherwise = + case break p sbs' of + (a, b) + | BS.null b -> [a] + | otherwise -> a : go (tail b) + + +-- | Check whether one string is a substring of another. +isInfixOf :: ShortByteString -> ShortByteString -> Bool +isInfixOf sbs = \s -> null sbs || not (null $ snd $ GHC.Exts.inline breakSubstring sbs s) + + +-- algorithm: https://github.com/haskell/filepath/issues/195#issuecomment-1605633713 +breakSubstring :: ShortByteString -- ^ String to search for + -> ShortByteString -- ^ String to search in + -> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring +breakSubstring bPat@(asBA -> pat) bInp@(asBA -> inp) = go 0 + where + lpat = BS.length bPat + linp = BS.length bInp + go ix + | let ix' = ix * 2 + , linp >= ix' + lpat = + if | compareByteArraysOff pat 0 inp ix' lpat == 0 -> splitAt ix bInp + | otherwise -> go (ix + 1) + | otherwise + = (bInp, mempty) + + +-- --------------------------------------------------------------------- +-- Reducing 'ByteString's + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a ShortByteString, reduces the +-- ShortByteString using the binary operator, from left to right. +-- +foldl :: (a -> Word16 -> a) -> a -> ShortByteString -> a +foldl f v = List.foldl f v . unpack . assertEven + +-- | 'foldl'' is like 'foldl', but strict in the accumulator. +-- +foldl' :: (a -> Word16 -> a) -> a -> ShortByteString -> a +foldl' f v = List.foldl' f v . unpack . assertEven + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a ShortByteString, +-- reduces the ShortByteString using the binary operator, from right to left. +foldr :: (Word16 -> a -> a) -> a -> ShortByteString -> a +foldr f v = List.foldr f v . unpack . assertEven + +-- | 'foldr'' is like 'foldr', but strict in the accumulator. +foldr' :: (Word16 -> a -> a) -> a -> ShortByteString -> a +foldr' k v = Foldable.foldr' k v . unpack . assertEven + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'ShortByteString's. +-- An exception will be thrown in the case of an empty ShortByteString. +foldl1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldl1 k = List.foldl1 k . unpack . assertEven + +-- | 'foldl1'' is like 'foldl1', but strict in the accumulator. +-- An exception will be thrown in the case of an empty ShortByteString. +foldl1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldl1' k = List.foldl1' k . unpack . assertEven + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'ShortByteString's +-- An exception will be thrown in the case of an empty ShortByteString. +foldr1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldr1 k = List.foldr1 k . unpack . assertEven + +-- | 'foldr1'' is a variant of 'foldr1', but is strict in the +-- accumulator. +foldr1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldr1' k = \(assertEven -> sbs) -> if null sbs then errorEmptySBS "foldr1'" else foldr' k (last sbs) (init sbs) + + +-- -------------------------------------------------------------------- +-- Searching ShortByteString + +-- | /O(1)/ 'ShortByteString' index (subscript) operator, starting from 0. +index :: HasCallStack + => ShortByteString + -> Int -- ^ number of 'Word16' + -> Word16 +index = \(assertEven -> sbs) i -> if + | i >= 0 && i < numWord16 sbs -> unsafeIndex sbs i + | otherwise -> indexError sbs i + +-- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 0.11.0.0 +indexMaybe :: ShortByteString + -> Int -- ^ number of 'Word16' + -> Maybe Word16 +indexMaybe = \(assertEven -> sbs) i -> if + | i >= 0 && i < numWord16 sbs -> Just $! unsafeIndex sbs i + | otherwise -> Nothing +{-# INLINE indexMaybe #-} + +unsafeIndex :: ShortByteString + -> Int -- ^ number of 'Word16' + -> Word16 +unsafeIndex sbs i = indexWord16Array (asBA sbs) (i * 2) + +indexError :: HasCallStack => ShortByteString -> Int -> a +indexError sbs i = + moduleError "index" $ "error in array index: " ++ show i + ++ " not in range [0.." ++ show (numWord16 sbs) ++ "]" + +-- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 0.11.0.0 +(!?) :: ShortByteString + -> Int -- ^ number of 'Word16' + -> Maybe Word16 +(!?) = indexMaybe +{-# INLINE (!?) #-} + +-- | /O(n)/ 'elem' is the 'ShortByteString' membership predicate. +elem :: Word16 -> ShortByteString -> Bool +elem c = \(assertEven -> sbs) -> case elemIndex c sbs of Nothing -> False ; _ -> True + +-- | /O(n)/ 'filter', applied to a predicate and a ByteString, +-- returns a ByteString containing those characters that satisfy the +-- predicate. +filter :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +filter k = \(assertEven -> sbs) -> + let l = BS.length sbs + in if | l <= 0 -> sbs + | otherwise -> createAndTrim' l $ \mba -> go mba (asBA sbs) l + where + go :: forall s. MBA s -- mutable output bytestring + -> BA -- input bytestring + -> Int -- length of input bytestring + -> ST s Int + go !mba ba !l = go' 0 0 + where + go' :: Int -- bytes read + -> Int -- bytes written + -> ST s Int + go' !br !bw + | br >= l = return bw + | otherwise = do + let w = indexWord16Array ba br + if k w + then do + writeWord16Array mba bw w + go' (br+2) (bw+2) + else + go' (br+2) bw + +-- | /O(n)/ The 'find' function takes a predicate and a ByteString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +-- +-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing +-- +find :: (Word16 -> Bool) -> ShortByteString -> Maybe Word16 +find f = \(assertEven -> sbs) -> case findIndex f sbs of + Just n -> Just (sbs `index` n) + _ -> Nothing + +-- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns +-- the pair of ByteStrings with elements which do and do not satisfy the +-- predicate, respectively; i.e., +-- +-- > partition p bs == (filter p xs, filter (not . p) xs) +-- +partition :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +partition k = \(assertEven -> sbs) -> + let l = BS.length sbs + in if | l <= 0 -> (sbs, sbs) + | otherwise -> createAndTrim'' l $ \mba1 mba2 -> go mba1 mba2 (asBA sbs) l + where + go :: forall s. + MBA s -- mutable output bytestring1 + -> MBA s -- mutable output bytestring2 + -> BA -- input bytestring + -> Int -- length of input bytestring + -> ST s (Int, Int) -- (length mba1, length mba2) + go !mba1 !mba2 ba !l = go' 0 0 + where + go' :: Int -- bytes read + -> Int -- bytes written to bytestring 1 + -> ST s (Int, Int) -- (length mba1, length mba2) + go' !br !bw1 + | br >= l = return (bw1, br - bw1) + | otherwise = do + let w = indexWord16Array ba br + if k w + then do + writeWord16Array mba1 bw1 w + go' (br+2) (bw1+2) + else do + writeWord16Array mba2 (br - bw1) w + go' (br+2) bw1 + +-- -------------------------------------------------------------------- +-- Indexing ShortByteString + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'ShortByteString' which is equal to the query +-- element, or 'Nothing' if there is no such element. +elemIndex :: Word16 + -> ShortByteString + -> Maybe Int -- ^ number of 'Word16' +{- HLINT ignore "Use elemIndex" -} +elemIndex k = findIndex (==k) . assertEven + +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +elemIndices :: Word16 -> ShortByteString -> [Int] +{- HLINT ignore "Use elemIndices" -} +elemIndices k = findIndices (==k) . assertEven + +-- | count returns the number of times its argument appears in the ShortByteString +count :: Word16 -> ShortByteString -> Int +count w = List.length . elemIndices w . assertEven + +-- | /O(n)/ The 'findIndex' function takes a predicate and a 'ShortByteString' and +-- returns the index of the first element in the ByteString +-- satisfying the predicate. +findIndex :: (Word16 -> Bool) -> ShortByteString -> Maybe Int +findIndex k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = Nothing + | k (w n) = Just (n `shiftR` 1) + | otherwise = go (n + 2) + in go 0 + +-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +findIndices :: (Word16 -> Bool) -> ShortByteString -> [Int] +findIndices k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = [] + | k (w n) = (n `shiftR` 1) : go (n + 2) + | otherwise = go (n + 2) + in go 0 + diff --git a/System/OsPath/Encoding.hs b/System/OsPath/Encoding.hs index fe81497f..40ed0b7d 100644 --- a/System/OsPath/Encoding.hs +++ b/System/OsPath/Encoding.hs @@ -28,4 +28,4 @@ module System.OsPath.Encoding ) where -import System.OsPath.Encoding.Internal +import System.OsPath.Encoding.Internal.Hidden diff --git a/System/OsPath/Encoding/Internal.hs b/System/OsPath/Encoding/Internal.hs index 218ad370..2271a906 100644 --- a/System/OsPath/Encoding/Internal.hs +++ b/System/OsPath/Encoding/Internal.hs @@ -1,349 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude - , BangPatterns - , TypeApplications - , MultiWayIf - #-} -{-# OPTIONS_GHC -funbox-strict-fields #-} +module System.OsPath.Encoding.Internal {-# DEPRECATED "Use System.OsString.Encoding.Internal from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} + ( module System.OsPath.Encoding.Internal.Hidden + ) + where - -module System.OsPath.Encoding.Internal {-# DEPRECATED "Use System.OsString.Encoding.Internal from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} where - -import qualified System.OsPath.Data.ByteString.Short as BS8 -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 - -import GHC.Base -import GHC.Real -import GHC.Num --- import GHC.IO -import GHC.IO.Buffer -import GHC.IO.Encoding.Failure -import GHC.IO.Encoding.Types -import Data.Bits -import Control.Exception (SomeException, try, Exception (displayException), evaluate) -import qualified GHC.Foreign as GHC -import Data.Either (Either) -import GHC.IO (unsafePerformIO) -import Control.DeepSeq (force, NFData (rnf)) -import Data.Bifunctor (first) -import Data.Data (Typeable) -import GHC.Show (Show (show)) -import Numeric (showHex) -import Foreign.C (CStringLen) -import Data.Char (chr) -import Foreign -import Prelude (FilePath) -import GHC.IO.Encoding (getFileSystemEncoding) - --- ----------------------------------------------------------------------------- --- UCS-2 LE --- - -ucs2le :: TextEncoding -ucs2le = mkUcs2le ErrorOnCodingFailure - -mkUcs2le :: CodingFailureMode -> TextEncoding -mkUcs2le cfm = TextEncoding { textEncodingName = "UCS-2LE", - mkTextDecoder = ucs2le_DF cfm, - mkTextEncoder = ucs2le_EF cfm } - -ucs2le_DF :: CodingFailureMode -> IO (TextDecoder ()) -ucs2le_DF cfm = - return (BufferCodec { - encode = ucs2le_decode, - recover = recoverDecode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - -ucs2le_EF :: CodingFailureMode -> IO (TextEncoder ()) -ucs2le_EF cfm = - return (BufferCodec { - encode = ucs2le_encode, - recover = recoverEncode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - - -ucs2le_decode :: DecodeBuffer -ucs2le_decode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - loop !ir !ow - | ow >= os = done OutputUnderflow ir ow - | ir >= iw = done InputUnderflow ir ow - | ir + 1 == iw = done InputUnderflow ir ow - | otherwise = do - c0 <- readWord8Buf iraw ir - c1 <- readWord8Buf iraw (ir+1) - let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 - ow' <- writeCharBuf oraw ow (unsafeChr x1) - loop (ir+2) ow' - - -- lambda-lifted, to avoid thunks being built in the inner-loop: - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - in - loop ir0 ow0 - - -ucs2le_encode :: EncodeBuffer -ucs2le_encode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - loop !ir !ow - | ir >= iw = done InputUnderflow ir ow - | os - ow < 2 = done OutputUnderflow ir ow - | otherwise = do - (c,ir') <- readCharBuf iraw ir - case ord c of - x | x < 0x10000 -> do - writeWord8Buf oraw ow (fromIntegral x) - writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) - loop ir' (ow+2) - | otherwise -> done InvalidSequence ir ow - in - loop ir0 ow0 - --- ----------------------------------------------------------------------------- --- UTF-16b --- - --- | Mimics the base encoding for filesystem operations. This should be total on all inputs (word16 byte arrays). --- --- Note that this has a subtle difference to 'encodeWithBaseWindows'/'decodeWithBaseWindows': it doesn't care for --- the @0x0000@ end marker and will as such produce different results. Use @takeWhile (/= '\NUL')@ on the input --- to recover this behavior. -utf16le_b :: TextEncoding -utf16le_b = mkUTF16le_b ErrorOnCodingFailure - -mkUTF16le_b :: CodingFailureMode -> TextEncoding -mkUTF16le_b cfm = TextEncoding { textEncodingName = "UTF-16LE_b", - mkTextDecoder = utf16le_b_DF cfm, - mkTextEncoder = utf16le_b_EF cfm } - -utf16le_b_DF :: CodingFailureMode -> IO (TextDecoder ()) -utf16le_b_DF cfm = - return (BufferCodec { - encode = utf16le_b_decode, - recover = recoverDecode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - -utf16le_b_EF :: CodingFailureMode -> IO (TextEncoder ()) -utf16le_b_EF cfm = - return (BufferCodec { - encode = utf16le_b_encode, - recover = recoverEncode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - - -utf16le_b_decode :: DecodeBuffer -utf16le_b_decode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - loop !ir !ow - | ow >= os = done OutputUnderflow ir ow - | ir >= iw = done InputUnderflow ir ow - | ir + 1 == iw = done InputUnderflow ir ow - | otherwise = do - c0 <- readWord8Buf iraw ir - c1 <- readWord8Buf iraw (ir+1) - let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 - if | iw - ir >= 4 -> do - c2 <- readWord8Buf iraw (ir+2) - c3 <- readWord8Buf iraw (ir+3) - let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2 - if | 0xd800 <= x1 && x1 <= 0xdbff - , 0xdc00 <= x2 && x2 <= 0xdfff -> do - ow' <- writeCharBuf oraw ow (unsafeChr ((x1 - 0xd800)*0x400 + (x2 - 0xdc00) + 0x10000)) - loop (ir+4) ow' - | otherwise -> do - ow' <- writeCharBuf oraw ow (unsafeChr x1) - loop (ir+2) ow' - | iw - ir >= 2 -> do - ow' <- writeCharBuf oraw ow (unsafeChr x1) - loop (ir+2) ow' - | otherwise -> done InputUnderflow ir ow - - -- lambda-lifted, to avoid thunks being built in the inner-loop: - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - in - loop ir0 ow0 - - -utf16le_b_encode :: EncodeBuffer -utf16le_b_encode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - loop !ir !ow - | ir >= iw = done InputUnderflow ir ow - | os - ow < 2 = done OutputUnderflow ir ow - | otherwise = do - (c,ir') <- readCharBuf iraw ir - case ord c of - x | x < 0x10000 -> do - writeWord8Buf oraw ow (fromIntegral x) - writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) - loop ir' (ow+2) - | otherwise -> - if os - ow < 4 then done OutputUnderflow ir ow else do - let x' = x - 0x10000 - w1 = x' `div` 0x400 + 0xd800 - w2 = x' `mod` 0x400 + 0xdc00 - writeWord8Buf oraw ow (fromIntegral w1) - writeWord8Buf oraw (ow+1) (fromIntegral (w1 `shiftR` 8)) - writeWord8Buf oraw (ow+2) (fromIntegral w2) - writeWord8Buf oraw (ow+3) (fromIntegral (w2 `shiftR` 8)) - loop ir' (ow+4) - in - loop ir0 ow0 - --- ----------------------------------------------------------------------------- --- Windows encoding (ripped off from base) --- - -cWcharsToChars_UCS2 :: [Word16] -> [Char] -cWcharsToChars_UCS2 = map (chr . fromIntegral) - - --- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding. - --- coding errors generate Chars in the surrogate range -cWcharsToChars :: [Word16] -> [Char] -cWcharsToChars = map chr . fromUTF16 . map fromIntegral - where - fromUTF16 :: [Int] -> [Int] - fromUTF16 (c1:c2:wcs) - | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff = - ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs - fromUTF16 (c:wcs) = c : fromUTF16 wcs - fromUTF16 [] = [] - -charsToCWchars :: [Char] -> [Word16] -charsToCWchars = foldr (utf16Char . ord) [] - where - utf16Char :: Int -> [Word16] -> [Word16] - utf16Char c wcs - | c < 0x10000 = fromIntegral c : wcs - | otherwise = let c' = c - 0x10000 in - fromIntegral (c' `div` 0x400 + 0xd800) : - fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs - --- ----------------------------------------------------------------------------- - --- ----------------------------------------------------------------------------- --- FFI --- - -withFilePathWin :: FilePath -> (Int -> Ptr Word16 -> IO a) -> IO a -withFilePathWin = withArrayLen . charsToCWchars - -peekFilePathWin :: (Ptr Word16, Int) -> IO FilePath -peekFilePathWin (cp, l) = do - cs <- peekArray l cp - return (cWcharsToChars cs) - -withFilePathPosix :: FilePath -> (CStringLen -> IO a) -> IO a -withFilePathPosix fp f = getFileSystemEncoding >>= \enc -> GHC.withCStringLen enc fp f - -peekFilePathPosix :: CStringLen -> IO FilePath -peekFilePathPosix fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp - --- | Decode with the given 'TextEncoding'. -decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String -decodeWithTE enc ba = unsafePerformIO $ do - r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen enc fp - evaluate $ force $ first (flip EncodingError Nothing . displayException) r - --- | Encode with the given 'TextEncoding'. -encodeWithTE :: TextEncoding -> String -> Either EncodingException BS8.ShortByteString -encodeWithTE enc str = unsafePerformIO $ do - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr - evaluate $ force $ first (flip EncodingError Nothing . displayException) r - --- ----------------------------------------------------------------------------- --- Encoders / decoders --- - --- | This mimics the filepath decoder base uses on unix, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -decodeWithBasePosix :: BS8.ShortByteString -> IO String -decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekFilePathPosix fp - --- | This mimics the filepath dencoder base uses on unix, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -encodeWithBasePosix :: String -> IO BS8.ShortByteString -encodeWithBasePosix str = withFilePathPosix str $ \cstr -> BS8.packCStringLen cstr - --- | This mimics the filepath decoder base uses on windows, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -decodeWithBaseWindows :: BS16.ShortByteString -> IO String -decodeWithBaseWindows ba = BS16.useAsCWStringLen ba $ \fp -> peekFilePathWin fp - --- | This mimics the filepath dencoder base uses on windows, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -encodeWithBaseWindows :: String -> IO BS16.ShortByteString -encodeWithBaseWindows str = withFilePathWin str $ \l cstr -> BS16.packCWStringLen (cstr, l) - - --- ----------------------------------------------------------------------------- --- Types --- - -data EncodingException = - EncodingError String (Maybe Word8) - -- ^ Could not decode a byte sequence because it was invalid under - -- the given encoding, or ran out of input in mid-decode. - deriving (Eq, Typeable) - - -showEncodingException :: EncodingException -> String -showEncodingException (EncodingError desc (Just w)) - = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc) -showEncodingException (EncodingError desc Nothing) - = "Cannot decode input: " ++ desc - -instance Show EncodingException where - show = showEncodingException - -instance Exception EncodingException - -instance NFData EncodingException where - rnf (EncodingError desc w) = rnf desc `seq` rnf w - - --- ----------------------------------------------------------------------------- --- Words --- - -wNUL :: Word16 -wNUL = 0x00 +import System.OsPath.Encoding.Internal.Hidden \ No newline at end of file diff --git a/System/OsPath/Encoding/Internal/Hidden.hs b/System/OsPath/Encoding/Internal/Hidden.hs new file mode 100644 index 00000000..e9aec3ba --- /dev/null +++ b/System/OsPath/Encoding/Internal/Hidden.hs @@ -0,0 +1,349 @@ +{-# LANGUAGE NoImplicitPrelude + , BangPatterns + , TypeApplications + , MultiWayIf + #-} +{-# OPTIONS_GHC -funbox-strict-fields #-} + + +module System.OsPath.Encoding.Internal.Hidden where + +import qualified System.OsPath.Data.ByteString.Short.Hidden as BS8 +import qualified System.OsPath.Data.ByteString.Short.Word16.Hidden as BS16 + +import GHC.Base +import GHC.Real +import GHC.Num +-- import GHC.IO +import GHC.IO.Buffer +import GHC.IO.Encoding.Failure +import GHC.IO.Encoding.Types +import Data.Bits +import Control.Exception (SomeException, try, Exception (displayException), evaluate) +import qualified GHC.Foreign as GHC +import Data.Either (Either) +import GHC.IO (unsafePerformIO) +import Control.DeepSeq (force, NFData (rnf)) +import Data.Bifunctor (first) +import Data.Data (Typeable) +import GHC.Show (Show (show)) +import Numeric (showHex) +import Foreign.C (CStringLen) +import Data.Char (chr) +import Foreign +import Prelude (FilePath) +import GHC.IO.Encoding (getFileSystemEncoding) + +-- ----------------------------------------------------------------------------- +-- UCS-2 LE +-- + +ucs2le :: TextEncoding +ucs2le = mkUcs2le ErrorOnCodingFailure + +mkUcs2le :: CodingFailureMode -> TextEncoding +mkUcs2le cfm = TextEncoding { textEncodingName = "UCS-2LE", + mkTextDecoder = ucs2le_DF cfm, + mkTextEncoder = ucs2le_EF cfm } + +ucs2le_DF :: CodingFailureMode -> IO (TextDecoder ()) +ucs2le_DF cfm = + return (BufferCodec { + encode = ucs2le_decode, + recover = recoverDecode cfm, + close = return (), + getState = return (), + setState = const $ return () + }) + +ucs2le_EF :: CodingFailureMode -> IO (TextEncoder ()) +ucs2le_EF cfm = + return (BufferCodec { + encode = ucs2le_encode, + recover = recoverEncode cfm, + close = return (), + getState = return (), + setState = const $ return () + }) + + +ucs2le_decode :: DecodeBuffer +ucs2le_decode + input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } + output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } + = let + loop !ir !ow + | ow >= os = done OutputUnderflow ir ow + | ir >= iw = done InputUnderflow ir ow + | ir + 1 == iw = done InputUnderflow ir ow + | otherwise = do + c0 <- readWord8Buf iraw ir + c1 <- readWord8Buf iraw (ir+1) + let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 + ow' <- writeCharBuf oraw ow (unsafeChr x1) + loop (ir+2) ow' + + -- lambda-lifted, to avoid thunks being built in the inner-loop: + done why !ir !ow = return (why, + if ir == iw then input{ bufL=0, bufR=0 } + else input{ bufL=ir }, + output{ bufR=ow }) + in + loop ir0 ow0 + + +ucs2le_encode :: EncodeBuffer +ucs2le_encode + input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } + output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } + = let + done why !ir !ow = return (why, + if ir == iw then input{ bufL=0, bufR=0 } + else input{ bufL=ir }, + output{ bufR=ow }) + loop !ir !ow + | ir >= iw = done InputUnderflow ir ow + | os - ow < 2 = done OutputUnderflow ir ow + | otherwise = do + (c,ir') <- readCharBuf iraw ir + case ord c of + x | x < 0x10000 -> do + writeWord8Buf oraw ow (fromIntegral x) + writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) + loop ir' (ow+2) + | otherwise -> done InvalidSequence ir ow + in + loop ir0 ow0 + +-- ----------------------------------------------------------------------------- +-- UTF-16b +-- + +-- | Mimics the base encoding for filesystem operations. This should be total on all inputs (word16 byte arrays). +-- +-- Note that this has a subtle difference to 'encodeWithBaseWindows'/'decodeWithBaseWindows': it doesn't care for +-- the @0x0000@ end marker and will as such produce different results. Use @takeWhile (/= '\NUL')@ on the input +-- to recover this behavior. +utf16le_b :: TextEncoding +utf16le_b = mkUTF16le_b ErrorOnCodingFailure + +mkUTF16le_b :: CodingFailureMode -> TextEncoding +mkUTF16le_b cfm = TextEncoding { textEncodingName = "UTF-16LE_b", + mkTextDecoder = utf16le_b_DF cfm, + mkTextEncoder = utf16le_b_EF cfm } + +utf16le_b_DF :: CodingFailureMode -> IO (TextDecoder ()) +utf16le_b_DF cfm = + return (BufferCodec { + encode = utf16le_b_decode, + recover = recoverDecode cfm, + close = return (), + getState = return (), + setState = const $ return () + }) + +utf16le_b_EF :: CodingFailureMode -> IO (TextEncoder ()) +utf16le_b_EF cfm = + return (BufferCodec { + encode = utf16le_b_encode, + recover = recoverEncode cfm, + close = return (), + getState = return (), + setState = const $ return () + }) + + +utf16le_b_decode :: DecodeBuffer +utf16le_b_decode + input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } + output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } + = let + loop !ir !ow + | ow >= os = done OutputUnderflow ir ow + | ir >= iw = done InputUnderflow ir ow + | ir + 1 == iw = done InputUnderflow ir ow + | otherwise = do + c0 <- readWord8Buf iraw ir + c1 <- readWord8Buf iraw (ir+1) + let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 + if | iw - ir >= 4 -> do + c2 <- readWord8Buf iraw (ir+2) + c3 <- readWord8Buf iraw (ir+3) + let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2 + if | 0xd800 <= x1 && x1 <= 0xdbff + , 0xdc00 <= x2 && x2 <= 0xdfff -> do + ow' <- writeCharBuf oraw ow (unsafeChr ((x1 - 0xd800)*0x400 + (x2 - 0xdc00) + 0x10000)) + loop (ir+4) ow' + | otherwise -> do + ow' <- writeCharBuf oraw ow (unsafeChr x1) + loop (ir+2) ow' + | iw - ir >= 2 -> do + ow' <- writeCharBuf oraw ow (unsafeChr x1) + loop (ir+2) ow' + | otherwise -> done InputUnderflow ir ow + + -- lambda-lifted, to avoid thunks being built in the inner-loop: + done why !ir !ow = return (why, + if ir == iw then input{ bufL=0, bufR=0 } + else input{ bufL=ir }, + output{ bufR=ow }) + in + loop ir0 ow0 + + +utf16le_b_encode :: EncodeBuffer +utf16le_b_encode + input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } + output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } + = let + done why !ir !ow = return (why, + if ir == iw then input{ bufL=0, bufR=0 } + else input{ bufL=ir }, + output{ bufR=ow }) + loop !ir !ow + | ir >= iw = done InputUnderflow ir ow + | os - ow < 2 = done OutputUnderflow ir ow + | otherwise = do + (c,ir') <- readCharBuf iraw ir + case ord c of + x | x < 0x10000 -> do + writeWord8Buf oraw ow (fromIntegral x) + writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) + loop ir' (ow+2) + | otherwise -> + if os - ow < 4 then done OutputUnderflow ir ow else do + let x' = x - 0x10000 + w1 = x' `div` 0x400 + 0xd800 + w2 = x' `mod` 0x400 + 0xdc00 + writeWord8Buf oraw ow (fromIntegral w1) + writeWord8Buf oraw (ow+1) (fromIntegral (w1 `shiftR` 8)) + writeWord8Buf oraw (ow+2) (fromIntegral w2) + writeWord8Buf oraw (ow+3) (fromIntegral (w2 `shiftR` 8)) + loop ir' (ow+4) + in + loop ir0 ow0 + +-- ----------------------------------------------------------------------------- +-- Windows encoding (ripped off from base) +-- + +cWcharsToChars_UCS2 :: [Word16] -> [Char] +cWcharsToChars_UCS2 = map (chr . fromIntegral) + + +-- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding. + +-- coding errors generate Chars in the surrogate range +cWcharsToChars :: [Word16] -> [Char] +cWcharsToChars = map chr . fromUTF16 . map fromIntegral + where + fromUTF16 :: [Int] -> [Int] + fromUTF16 (c1:c2:wcs) + | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff = + ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs + fromUTF16 (c:wcs) = c : fromUTF16 wcs + fromUTF16 [] = [] + +charsToCWchars :: [Char] -> [Word16] +charsToCWchars = foldr (utf16Char . ord) [] + where + utf16Char :: Int -> [Word16] -> [Word16] + utf16Char c wcs + | c < 0x10000 = fromIntegral c : wcs + | otherwise = let c' = c - 0x10000 in + fromIntegral (c' `div` 0x400 + 0xd800) : + fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs + +-- ----------------------------------------------------------------------------- + +-- ----------------------------------------------------------------------------- +-- FFI +-- + +withFilePathWin :: FilePath -> (Int -> Ptr Word16 -> IO a) -> IO a +withFilePathWin = withArrayLen . charsToCWchars + +peekFilePathWin :: (Ptr Word16, Int) -> IO FilePath +peekFilePathWin (cp, l) = do + cs <- peekArray l cp + return (cWcharsToChars cs) + +withFilePathPosix :: FilePath -> (CStringLen -> IO a) -> IO a +withFilePathPosix fp f = getFileSystemEncoding >>= \enc -> GHC.withCStringLen enc fp f + +peekFilePathPosix :: CStringLen -> IO FilePath +peekFilePathPosix fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp + +-- | Decode with the given 'TextEncoding'. +decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String +decodeWithTE enc ba = unsafePerformIO $ do + r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen enc fp + evaluate $ force $ first (flip EncodingError Nothing . displayException) r + +-- | Encode with the given 'TextEncoding'. +encodeWithTE :: TextEncoding -> String -> Either EncodingException BS8.ShortByteString +encodeWithTE enc str = unsafePerformIO $ do + r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr + evaluate $ force $ first (flip EncodingError Nothing . displayException) r + +-- ----------------------------------------------------------------------------- +-- Encoders / decoders +-- + +-- | This mimics the filepath decoder base uses on unix, +-- with the small distinction that we're not truncating at NUL bytes (because we're not at +-- the outer FFI layer). +decodeWithBasePosix :: BS8.ShortByteString -> IO String +decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekFilePathPosix fp + +-- | This mimics the filepath dencoder base uses on unix, +-- with the small distinction that we're not truncating at NUL bytes (because we're not at +-- the outer FFI layer). +encodeWithBasePosix :: String -> IO BS8.ShortByteString +encodeWithBasePosix str = withFilePathPosix str $ \cstr -> BS8.packCStringLen cstr + +-- | This mimics the filepath decoder base uses on windows, +-- with the small distinction that we're not truncating at NUL bytes (because we're not at +-- the outer FFI layer). +decodeWithBaseWindows :: BS16.ShortByteString -> IO String +decodeWithBaseWindows ba = BS16.useAsCWStringLen ba $ \fp -> peekFilePathWin fp + +-- | This mimics the filepath dencoder base uses on windows, +-- with the small distinction that we're not truncating at NUL bytes (because we're not at +-- the outer FFI layer). +encodeWithBaseWindows :: String -> IO BS16.ShortByteString +encodeWithBaseWindows str = withFilePathWin str $ \l cstr -> BS16.packCWStringLen (cstr, l) + + +-- ----------------------------------------------------------------------------- +-- Types +-- + +data EncodingException = + EncodingError String (Maybe Word8) + -- ^ Could not decode a byte sequence because it was invalid under + -- the given encoding, or ran out of input in mid-decode. + deriving (Eq, Typeable) + + +showEncodingException :: EncodingException -> String +showEncodingException (EncodingError desc (Just w)) + = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc) +showEncodingException (EncodingError desc Nothing) + = "Cannot decode input: " ++ desc + +instance Show EncodingException where + show = showEncodingException + +instance Exception EncodingException + +instance NFData EncodingException where + rnf (EncodingError desc w) = rnf desc `seq` rnf w + + +-- ----------------------------------------------------------------------------- +-- Words +-- + +wNUL :: Word16 +wNUL = 0x00 diff --git a/System/OsPath/Internal.hs b/System/OsPath/Internal.hs index 3bdf5318..bf099828 100644 --- a/System/OsPath/Internal.hs +++ b/System/OsPath/Internal.hs @@ -7,7 +7,7 @@ module System.OsPath.Internal where import {-# SOURCE #-} System.OsPath ( isValid ) import System.OsPath.Types -import qualified System.OsString.Internal as OS +import qualified System.OsString.Internal.Hidden as OS import Control.Monad.Catch ( MonadThrow ) @@ -19,7 +19,7 @@ import Language.Haskell.TH.Syntax ( Lift (..), lift ) import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) -import System.OsString.Internal.Types +import System.OsString.Internal.Types.Hidden import System.OsPath.Encoding import Control.Monad (when) import System.IO diff --git a/System/OsPath/Types.hs b/System/OsPath/Types.hs index 6bf1b774..71b8e31e 100644 --- a/System/OsPath/Types.hs +++ b/System/OsPath/Types.hs @@ -18,7 +18,7 @@ module System.OsPath.Types ) where -import System.OsString.Internal.Types +import System.OsString.Internal.Types.Hidden -- | Filepaths are @wchar_t*@ data on windows as passed to syscalls. diff --git a/System/OsString.hs b/System/OsString.hs index 0325fd63..15d5674d 100644 --- a/System/OsString.hs +++ b/System/OsString.hs @@ -43,7 +43,7 @@ module System.OsString {-# DEPRECATED "Use System.OsString from os-string >= 2.0 ) where -import System.OsString.Internal +import System.OsString.Internal.Hidden ( unsafeFromChar , toChar , encodeUtf @@ -56,5 +56,5 @@ import System.OsString.Internal , decodeFS , unpack ) -import System.OsString.Internal.Types +import System.OsString.Internal.Types.Hidden ( OsString, OsChar ) diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index 0ec7615d..c4b656f0 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -11,7 +11,7 @@ #define POSIX_DOC #endif -module System.OsString.MODULE_NAME {-# DEPRECATED "Use System.OsString.MODULE_NAME from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} +module System.OsString.MODULE_NAME.Hidden ( -- * Types #ifdef WINDOWS @@ -46,7 +46,7 @@ where -import System.OsString.Internal.Types ( +import System.OsString.Internal.Types.Hidden ( #ifdef WINDOWS WindowsString(..), WindowsChar(..) #else @@ -78,14 +78,14 @@ import System.OsPath.Encoding import System.IO ( TextEncoding, utf16le ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 -import qualified System.OsPath.Data.ByteString.Short as BS8 +import qualified System.OsPath.Data.ByteString.Short.Word16.Hidden as BS16 +import qualified System.OsPath.Data.ByteString.Short.Hidden as BS8 #else import System.OsPath.Encoding import System.IO ( TextEncoding, utf8 ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) -import qualified System.OsPath.Data.ByteString.Short as BS +import qualified System.OsPath.Data.ByteString.Short.Hidden as BS #endif diff --git a/System/OsString/Internal.hs b/System/OsString/Internal.hs index a7639af6..911fbc89 100644 --- a/System/OsString/Internal.hs +++ b/System/OsString/Internal.hs @@ -1,174 +1,6 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE UnliftedFFITypes #-} - -module System.OsString.Internal {-# DEPRECATED "Use System.OsString.Internal from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} where - -import System.OsString.Internal.Types - -import Control.Monad.Catch - ( MonadThrow ) -import Data.ByteString - ( ByteString ) -import Data.Char -import Language.Haskell.TH.Quote - ( QuasiQuoter (..) ) -import Language.Haskell.TH.Syntax - ( Lift (..), lift ) -import System.IO - ( TextEncoding ) - -import System.OsPath.Encoding ( EncodingException(..) ) -import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -import GHC.IO.Encoding.UTF16 ( mkUTF16le ) -import qualified System.OsString.Windows as PF -#else -import GHC.IO.Encoding.UTF8 ( mkUTF8 ) -import qualified System.OsString.Posix as PF -#endif - - - - --- | Partial unicode friendly encoding. --- --- On windows this encodes as UTF16-LE (strictly), which is a pretty good guess. --- On unix this encodes as UTF8 (strictly), which is a good guess. --- --- Throws a 'EncodingException' if encoding fails. -encodeUtf :: MonadThrow m => String -> m OsString -encodeUtf = fmap OsString . PF.encodeUtf - --- | Encode an 'OsString' given the platform specific encodings. -encodeWith :: TextEncoding -- ^ unix text encoding - -> TextEncoding -- ^ windows text encoding - -> String - -> Either EncodingException OsString -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -encodeWith _ winEnc str = OsString <$> PF.encodeWith winEnc str -#else -encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str -#endif - --- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem --- operations, which is: --- --- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, --- but PEP 383 only works properly on UTF-8 encodings, so good luck) --- 2. on windows does permissive UTF-16 encoding, where coding errors generate --- Chars in the surrogate range --- --- Looking up the locale requires IO. If you're not worried about calls --- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure --- to deeply evaluate the result to catch exceptions). -encodeFS :: String -> IO OsString -encodeFS = fmap OsString . PF.encodeFS - - --- | Partial unicode friendly decoding. --- --- On windows this decodes as UTF16-LE (strictly), which is a pretty good guess. --- On unix this decodes as UTF8 (strictly), which is a good guess. Note that --- filenames on unix are encoding agnostic char arrays. --- --- Throws a 'EncodingException' if decoding fails. -decodeUtf :: MonadThrow m => OsString -> m String -decodeUtf (OsString x) = PF.decodeUtf x - --- | Decode an 'OsString' with the specified encoding. --- --- The String is forced into memory to catch all exceptions. -decodeWith :: TextEncoding -- ^ unix text encoding - -> TextEncoding -- ^ windows text encoding - -> OsString - -> Either EncodingException String -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -decodeWith _ winEnc (OsString x) = PF.decodeWith winEnc x -#else -decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x -#endif - - --- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem --- operations, which is: --- --- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, --- but PEP 383 only works properly on UTF-8 encodings, so good luck) --- 2. on windows does permissive UTF-16 encoding, where coding errors generate --- Chars in the surrogate range --- --- Looking up the locale requires IO. If you're not worried about calls --- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure --- to deeply evaluate the result to catch exceptions). -decodeFS :: OsString -> IO String -decodeFS (OsString x) = PF.decodeFS x - - --- | Constructs an @OsString@ from a ByteString. --- --- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked. --- --- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely). -fromBytes :: MonadThrow m - => ByteString - -> m OsString -fromBytes = fmap OsString . PF.fromBytes - - --- | QuasiQuote an 'OsString'. This accepts Unicode characters --- and encodes as UTF-8 on unix and UTF-16 on windows. -osstr :: QuasiQuoter -osstr = - QuasiQuoter -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - { quoteExp = \s -> do - osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF16le ErrorOnCodingFailure) $ s - lift osp - , quotePat = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" - , quoteType = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a type)" - , quoteDec = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" - } -#else - { quoteExp = \s -> do - osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF8 ErrorOnCodingFailure) $ s - lift osp - , quotePat = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" - , quoteType = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a type)" - , quoteDec = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" - } -#endif - - --- | Unpack an 'OsString' to a list of 'OsChar'. -unpack :: OsString -> [OsChar] -unpack (OsString x) = OsChar <$> PF.unpack x - - --- | Pack a list of 'OsChar' to an 'OsString' --- --- Note that using this in conjunction with 'unsafeFromChar' to --- convert from @[Char]@ to 'OsString' is probably not what --- you want, because it will truncate unicode code points. -pack :: [OsChar] -> OsString -pack = OsString . PF.pack . fmap (\(OsChar x) -> x) - - --- | Truncates on unix to 1 and on Windows to 2 octets. -unsafeFromChar :: Char -> OsChar -unsafeFromChar = OsChar . PF.unsafeFromChar - --- | Converts back to a unicode codepoint (total). -toChar :: OsChar -> Char -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -toChar (OsChar (WindowsChar w)) = chr $ fromIntegral w -#else -toChar (OsChar (PosixChar w)) = chr $ fromIntegral w -#endif +module System.OsString.Internal {-# DEPRECATED "Use System.OsString.Internal from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} + ( module System.OsString.Internal.Hidden + ) + where +import System.OsString.Internal.Hidden \ No newline at end of file diff --git a/System/OsString/Internal/Hidden.hs b/System/OsString/Internal/Hidden.hs new file mode 100644 index 00000000..c40467b0 --- /dev/null +++ b/System/OsString/Internal/Hidden.hs @@ -0,0 +1,174 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE UnliftedFFITypes #-} + +module System.OsString.Internal.Hidden where + +import System.OsString.Internal.Types.Hidden + +import Control.Monad.Catch + ( MonadThrow ) +import Data.ByteString + ( ByteString ) +import Data.Char +import Language.Haskell.TH.Quote + ( QuasiQuoter (..) ) +import Language.Haskell.TH.Syntax + ( Lift (..), lift ) +import System.IO + ( TextEncoding ) + +import System.OsPath.Encoding ( EncodingException(..) ) +import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +import GHC.IO.Encoding.UTF16 ( mkUTF16le ) +import qualified System.OsString.Windows.Hidden as PF +#else +import GHC.IO.Encoding.UTF8 ( mkUTF8 ) +import qualified System.OsString.Posix.Hidden as PF +#endif + + + + +-- | Partial unicode friendly encoding. +-- +-- On windows this encodes as UTF16-LE (strictly), which is a pretty good guess. +-- On unix this encodes as UTF8 (strictly), which is a good guess. +-- +-- Throws a 'EncodingException' if encoding fails. +encodeUtf :: MonadThrow m => String -> m OsString +encodeUtf = fmap OsString . PF.encodeUtf + +-- | Encode an 'OsString' given the platform specific encodings. +encodeWith :: TextEncoding -- ^ unix text encoding + -> TextEncoding -- ^ windows text encoding + -> String + -> Either EncodingException OsString +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +encodeWith _ winEnc str = OsString <$> PF.encodeWith winEnc str +#else +encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str +#endif + +-- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem +-- operations, which is: +-- +-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, +-- but PEP 383 only works properly on UTF-8 encodings, so good luck) +-- 2. on windows does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +encodeFS :: String -> IO OsString +encodeFS = fmap OsString . PF.encodeFS + + +-- | Partial unicode friendly decoding. +-- +-- On windows this decodes as UTF16-LE (strictly), which is a pretty good guess. +-- On unix this decodes as UTF8 (strictly), which is a good guess. Note that +-- filenames on unix are encoding agnostic char arrays. +-- +-- Throws a 'EncodingException' if decoding fails. +decodeUtf :: MonadThrow m => OsString -> m String +decodeUtf (OsString x) = PF.decodeUtf x + +-- | Decode an 'OsString' with the specified encoding. +-- +-- The String is forced into memory to catch all exceptions. +decodeWith :: TextEncoding -- ^ unix text encoding + -> TextEncoding -- ^ windows text encoding + -> OsString + -> Either EncodingException String +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +decodeWith _ winEnc (OsString x) = PF.decodeWith winEnc x +#else +decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x +#endif + + +-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem +-- operations, which is: +-- +-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, +-- but PEP 383 only works properly on UTF-8 encodings, so good luck) +-- 2. on windows does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +decodeFS :: OsString -> IO String +decodeFS (OsString x) = PF.decodeFS x + + +-- | Constructs an @OsString@ from a ByteString. +-- +-- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked. +-- +-- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely). +fromBytes :: MonadThrow m + => ByteString + -> m OsString +fromBytes = fmap OsString . PF.fromBytes + + +-- | QuasiQuote an 'OsString'. This accepts Unicode characters +-- and encodes as UTF-8 on unix and UTF-16 on windows. +osstr :: QuasiQuoter +osstr = + QuasiQuoter +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) + { quoteExp = \s -> do + osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF16le ErrorOnCodingFailure) $ s + lift osp + , quotePat = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" + , quoteType = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a type)" + , quoteDec = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" + } +#else + { quoteExp = \s -> do + osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF8 ErrorOnCodingFailure) $ s + lift osp + , quotePat = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" + , quoteType = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a type)" + , quoteDec = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" + } +#endif + + +-- | Unpack an 'OsString' to a list of 'OsChar'. +unpack :: OsString -> [OsChar] +unpack (OsString x) = OsChar <$> PF.unpack x + + +-- | Pack a list of 'OsChar' to an 'OsString' +-- +-- Note that using this in conjunction with 'unsafeFromChar' to +-- convert from @[Char]@ to 'OsString' is probably not what +-- you want, because it will truncate unicode code points. +pack :: [OsChar] -> OsString +pack = OsString . PF.pack . fmap (\(OsChar x) -> x) + + +-- | Truncates on unix to 1 and on Windows to 2 octets. +unsafeFromChar :: Char -> OsChar +unsafeFromChar = OsChar . PF.unsafeFromChar + +-- | Converts back to a unicode codepoint (total). +toChar :: OsChar -> Char +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +toChar (OsChar (WindowsChar w)) = chr $ fromIntegral w +#else +toChar (OsChar (PosixChar w)) = chr $ fromIntegral w +#endif + diff --git a/System/OsString/Internal/Types.hs b/System/OsString/Internal/Types.hs index 2f7c2259..7cdaac1d 100644 --- a/System/OsString/Internal/Types.hs +++ b/System/OsString/Internal/Types.hs @@ -1,11 +1,3 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE PackageImports #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE TemplateHaskell #-} -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TypeApplications #-} {-# LANGUAGE PatternSynonyms #-} module System.OsString.Internal.Types {-# DEPRECATED "Use System.OsString.Internal.Types from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} @@ -29,218 +21,4 @@ module System.OsString.Internal.Types {-# DEPRECATED "Use System.OsString.Intern ) where - -import Control.DeepSeq -import Data.Data -import Data.Word -import Language.Haskell.TH.Syntax - ( Lift (..), lift ) -#if !MIN_VERSION_base(4,11,0) -import Data.Semigroup -#endif -import GHC.Generics (Generic) - -import System.OsPath.Encoding.Internal -import qualified System.OsPath.Data.ByteString.Short as BS -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 -#if MIN_VERSION_template_haskell(2,16,0) -import qualified Language.Haskell.TH.Syntax as TH -#endif - --- Using unpinned bytearrays to avoid Heap fragmentation and --- which are reasonably cheap to pass to FFI calls --- wrapped with typeclass-friendly types allowing to avoid CPP --- --- Note that, while unpinned bytearrays incur a memcpy on each --- FFI call, this overhead is generally much preferable to --- the memory fragmentation of pinned bytearrays - --- | Commonly used windows string as wide character bytes. -newtype WindowsString = WindowsString { getWindowsString :: BS.ShortByteString } - deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) - --- | Decodes as UCS-2. -instance Show WindowsString where - -- cWcharsToChars_UCS2 is total - show = show . cWcharsToChars_UCS2 . BS16.unpack . getWindowsString - --- | Just a short bidirectional synonym for 'WindowsString' constructor. -pattern WS :: BS.ShortByteString -> WindowsString -pattern WS { unWS } <- WindowsString unWS where - WS a = WindowsString a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE WS #-} -#endif - - -instance Lift WindowsString where - lift (WindowsString bs) - = [| WindowsString (BS.pack $(lift $ BS.unpack bs)) :: WindowsString |] -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - --- | Commonly used Posix string as uninterpreted @char[]@ --- array. -newtype PosixString = PosixString { getPosixString :: BS.ShortByteString } - deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) - --- | Prints the raw bytes without decoding. -instance Show PosixString where - show (PosixString ps) = show ps - --- | Just a short bidirectional synonym for 'PosixString' constructor. -pattern PS :: BS.ShortByteString -> PosixString -pattern PS { unPS } <- PosixString unPS where - PS a = PosixString a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE PS #-} -#endif - -instance Lift PosixString where - lift (PosixString bs) - = [| PosixString (BS.pack $(lift $ BS.unpack bs)) :: PosixString |] -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - - -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -type PlatformString = WindowsString -#else -type PlatformString = PosixString -#endif - -newtype WindowsChar = WindowsChar { getWindowsChar :: Word16 } - deriving (Eq, Ord, Typeable, Generic, NFData) - -instance Show WindowsChar where - show (WindowsChar wc) = show wc - -newtype PosixChar = PosixChar { getPosixChar :: Word8 } - deriving (Eq, Ord, Typeable, Generic, NFData) - -instance Show PosixChar where - show (PosixChar pc) = show pc - --- | Just a short bidirectional synonym for 'WindowsChar' constructor. -pattern WW :: Word16 -> WindowsChar -pattern WW { unWW } <- WindowsChar unWW where - WW a = WindowsChar a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE WW #-} -#endif - --- | Just a short bidirectional synonym for 'PosixChar' constructor. -pattern PW :: Word8 -> PosixChar -pattern PW { unPW } <- PosixChar unPW where - PW a = PosixChar a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE PW #-} -#endif - -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -type PlatformChar = WindowsChar -#else -type PlatformChar = PosixChar -#endif - - --- | Newtype representing short operating system specific strings. --- --- Internally this is either 'WindowsString' or 'PosixString', --- depending on the platform. Both use unpinned --- 'ShortByteString' for efficiency. --- --- The constructor is only exported via "System.OsString.Internal.Types", since --- dealing with the internals isn't generally recommended, but supported --- in case you need to write platform specific code. -newtype OsString = OsString { getOsString :: PlatformString } - deriving (Typeable, Generic, NFData) - --- | On windows, decodes as UCS-2. On unix prints the raw bytes without decoding. -instance Show OsString where - show (OsString os) = show os - --- | Byte equality of the internal representation. -instance Eq OsString where - (OsString a) == (OsString b) = a == b - --- | Byte ordering of the internal representation. -instance Ord OsString where - compare (OsString a) (OsString b) = compare a b - - --- | \"String-Concatenation\" for 'OsString'. This is __not__ the same --- as '()'. -instance Monoid OsString where -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - mempty = OsString (WindowsString BS.empty) -#if MIN_VERSION_base(4,16,0) - mappend = (<>) -#else - mappend (OsString (WindowsString a)) (OsString (WindowsString b)) - = OsString (WindowsString (mappend a b)) -#endif -#else - mempty = OsString (PosixString BS.empty) -#if MIN_VERSION_base(4,16,0) - mappend = (<>) -#else - mappend (OsString (PosixString a)) (OsString (PosixString b)) - = OsString (PosixString (mappend a b)) -#endif -#endif -#if MIN_VERSION_base(4,11,0) -instance Semigroup OsString where -#if MIN_VERSION_base(4,16,0) -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - (<>) (OsString (WindowsString a)) (OsString (WindowsString b)) - = OsString (WindowsString (mappend a b)) -#else - (<>) (OsString (PosixString a)) (OsString (PosixString b)) - = OsString (PosixString (mappend a b)) -#endif -#else - (<>) = mappend -#endif -#endif - - -instance Lift OsString where -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - lift (OsString (WindowsString bs)) - = [| OsString (WindowsString (BS.pack $(lift $ BS.unpack bs))) :: OsString |] -#else - lift (OsString (PosixString bs)) - = [| OsString (PosixString (BS.pack $(lift $ BS.unpack bs))) :: OsString |] -#endif -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - - --- | Newtype representing a code unit. --- --- On Windows, this is restricted to two-octet codepoints 'Word16', --- on POSIX one-octet ('Word8'). -newtype OsChar = OsChar { getOsChar :: PlatformChar } - deriving (Typeable, Generic, NFData) - -instance Show OsChar where - show (OsChar pc) = show pc - --- | Byte equality of the internal representation. -instance Eq OsChar where - (OsChar a) == (OsChar b) = a == b - --- | Byte ordering of the internal representation. -instance Ord OsChar where - compare (OsChar a) (OsChar b) = compare a b - +import System.OsString.Internal.Types.Hidden \ No newline at end of file diff --git a/System/OsString/Internal/Types/Hidden.hs b/System/OsString/Internal/Types/Hidden.hs new file mode 100644 index 00000000..2fa8dbf8 --- /dev/null +++ b/System/OsString/Internal/Types/Hidden.hs @@ -0,0 +1,246 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE PackageImports #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE PatternSynonyms #-} + +module System.OsString.Internal.Types.Hidden + ( + WindowsString(..) + , pattern WS + , unWS + , PosixString(..) + , unPS + , pattern PS + , PlatformString + , WindowsChar(..) + , unWW + , pattern WW + , PosixChar(..) + , unPW + , pattern PW + , PlatformChar + , OsString(..) + , OsChar(..) + ) +where + + +import Control.DeepSeq +import Data.Data +import Data.Word +import Language.Haskell.TH.Syntax + ( Lift (..), lift ) +#if !MIN_VERSION_base(4,11,0) +import Data.Semigroup +#endif +import GHC.Generics (Generic) + +import System.OsPath.Encoding.Internal.Hidden +import qualified System.OsPath.Data.ByteString.Short.Hidden as BS +import qualified System.OsPath.Data.ByteString.Short.Word16.Hidden as BS16 +#if MIN_VERSION_template_haskell(2,16,0) +import qualified Language.Haskell.TH.Syntax as TH +#endif + +-- Using unpinned bytearrays to avoid Heap fragmentation and +-- which are reasonably cheap to pass to FFI calls +-- wrapped with typeclass-friendly types allowing to avoid CPP +-- +-- Note that, while unpinned bytearrays incur a memcpy on each +-- FFI call, this overhead is generally much preferable to +-- the memory fragmentation of pinned bytearrays + +-- | Commonly used windows string as wide character bytes. +newtype WindowsString = WindowsString { getWindowsString :: BS.ShortByteString } + deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) + +-- | Decodes as UCS-2. +instance Show WindowsString where + -- cWcharsToChars_UCS2 is total + show = show . cWcharsToChars_UCS2 . BS16.unpack . getWindowsString + +-- | Just a short bidirectional synonym for 'WindowsString' constructor. +pattern WS :: BS.ShortByteString -> WindowsString +pattern WS { unWS } <- WindowsString unWS where + WS a = WindowsString a +#if __GLASGOW_HASKELL__ >= 802 +{-# COMPLETE WS #-} +#endif + + +instance Lift WindowsString where + lift (WindowsString bs) + = [| WindowsString (BS.pack $(lift $ BS.unpack bs)) :: WindowsString |] +#if MIN_VERSION_template_haskell(2,17,0) + liftTyped = TH.unsafeCodeCoerce . TH.lift +#elif MIN_VERSION_template_haskell(2,16,0) + liftTyped = TH.unsafeTExpCoerce . TH.lift +#endif + +-- | Commonly used Posix string as uninterpreted @char[]@ +-- array. +newtype PosixString = PosixString { getPosixString :: BS.ShortByteString } + deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) + +-- | Prints the raw bytes without decoding. +instance Show PosixString where + show (PosixString ps) = show ps + +-- | Just a short bidirectional synonym for 'PosixString' constructor. +pattern PS :: BS.ShortByteString -> PosixString +pattern PS { unPS } <- PosixString unPS where + PS a = PosixString a +#if __GLASGOW_HASKELL__ >= 802 +{-# COMPLETE PS #-} +#endif + +instance Lift PosixString where + lift (PosixString bs) + = [| PosixString (BS.pack $(lift $ BS.unpack bs)) :: PosixString |] +#if MIN_VERSION_template_haskell(2,17,0) + liftTyped = TH.unsafeCodeCoerce . TH.lift +#elif MIN_VERSION_template_haskell(2,16,0) + liftTyped = TH.unsafeTExpCoerce . TH.lift +#endif + + +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +type PlatformString = WindowsString +#else +type PlatformString = PosixString +#endif + +newtype WindowsChar = WindowsChar { getWindowsChar :: Word16 } + deriving (Eq, Ord, Typeable, Generic, NFData) + +instance Show WindowsChar where + show (WindowsChar wc) = show wc + +newtype PosixChar = PosixChar { getPosixChar :: Word8 } + deriving (Eq, Ord, Typeable, Generic, NFData) + +instance Show PosixChar where + show (PosixChar pc) = show pc + +-- | Just a short bidirectional synonym for 'WindowsChar' constructor. +pattern WW :: Word16 -> WindowsChar +pattern WW { unWW } <- WindowsChar unWW where + WW a = WindowsChar a +#if __GLASGOW_HASKELL__ >= 802 +{-# COMPLETE WW #-} +#endif + +-- | Just a short bidirectional synonym for 'PosixChar' constructor. +pattern PW :: Word8 -> PosixChar +pattern PW { unPW } <- PosixChar unPW where + PW a = PosixChar a +#if __GLASGOW_HASKELL__ >= 802 +{-# COMPLETE PW #-} +#endif + +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +type PlatformChar = WindowsChar +#else +type PlatformChar = PosixChar +#endif + + +-- | Newtype representing short operating system specific strings. +-- +-- Internally this is either 'WindowsString' or 'PosixString', +-- depending on the platform. Both use unpinned +-- 'ShortByteString' for efficiency. +-- +-- The constructor is only exported via "System.OsString.Internal.Types", since +-- dealing with the internals isn't generally recommended, but supported +-- in case you need to write platform specific code. +newtype OsString = OsString { getOsString :: PlatformString } + deriving (Typeable, Generic, NFData) + +-- | On windows, decodes as UCS-2. On unix prints the raw bytes without decoding. +instance Show OsString where + show (OsString os) = show os + +-- | Byte equality of the internal representation. +instance Eq OsString where + (OsString a) == (OsString b) = a == b + +-- | Byte ordering of the internal representation. +instance Ord OsString where + compare (OsString a) (OsString b) = compare a b + + +-- | \"String-Concatenation\" for 'OsString'. This is __not__ the same +-- as '()'. +instance Monoid OsString where +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) + mempty = OsString (WindowsString BS.empty) +#if MIN_VERSION_base(4,16,0) + mappend = (<>) +#else + mappend (OsString (WindowsString a)) (OsString (WindowsString b)) + = OsString (WindowsString (mappend a b)) +#endif +#else + mempty = OsString (PosixString BS.empty) +#if MIN_VERSION_base(4,16,0) + mappend = (<>) +#else + mappend (OsString (PosixString a)) (OsString (PosixString b)) + = OsString (PosixString (mappend a b)) +#endif +#endif +#if MIN_VERSION_base(4,11,0) +instance Semigroup OsString where +#if MIN_VERSION_base(4,16,0) +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) + (<>) (OsString (WindowsString a)) (OsString (WindowsString b)) + = OsString (WindowsString (mappend a b)) +#else + (<>) (OsString (PosixString a)) (OsString (PosixString b)) + = OsString (PosixString (mappend a b)) +#endif +#else + (<>) = mappend +#endif +#endif + + +instance Lift OsString where +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) + lift (OsString (WindowsString bs)) + = [| OsString (WindowsString (BS.pack $(lift $ BS.unpack bs))) :: OsString |] +#else + lift (OsString (PosixString bs)) + = [| OsString (PosixString (BS.pack $(lift $ BS.unpack bs))) :: OsString |] +#endif +#if MIN_VERSION_template_haskell(2,17,0) + liftTyped = TH.unsafeCodeCoerce . TH.lift +#elif MIN_VERSION_template_haskell(2,16,0) + liftTyped = TH.unsafeTExpCoerce . TH.lift +#endif + + +-- | Newtype representing a code unit. +-- +-- On Windows, this is restricted to two-octet codepoints 'Word16', +-- on POSIX one-octet ('Word8'). +newtype OsChar = OsChar { getOsChar :: PlatformChar } + deriving (Typeable, Generic, NFData) + +instance Show OsChar where + show (OsChar pc) = show pc + +-- | Byte equality of the internal representation. +instance Eq OsChar where + (OsChar a) == (OsChar b) = a == b + +-- | Byte ordering of the internal representation. +instance Ord OsChar where + compare (OsChar a) (OsChar b) = compare a b + diff --git a/System/OsString/Posix.hs b/System/OsString/Posix.hs index 33b4d843..de00ab3c 100644 --- a/System/OsString/Posix.hs +++ b/System/OsString/Posix.hs @@ -1,7 +1,6 @@ -{-# LANGUAGE CPP #-} -#undef WINDOWS -#define MODULE_NAME Posix -#define PLATFORM_STRING PosixString -#define PLATFORM_WORD PosixChar -#define IS_WINDOWS False -#include "Common.hs" +module System.OsString.Posix {-# DEPRECATED "Use System.OsString.Posix from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} + ( module System.OsString.Posix.Hidden + ) + where + +import System.OsString.Posix.Hidden diff --git a/System/OsString/Posix/Hidden.hs b/System/OsString/Posix/Hidden.hs new file mode 100644 index 00000000..b105769a --- /dev/null +++ b/System/OsString/Posix/Hidden.hs @@ -0,0 +1,7 @@ +{-# LANGUAGE CPP #-} +#undef WINDOWS +#define MODULE_NAME Posix +#define PLATFORM_STRING PosixString +#define PLATFORM_WORD PosixChar +#define IS_WINDOWS False +#include "../Common.hs" diff --git a/System/OsString/Types.hs b/System/OsString/Types.hs index 67a4e116..b9985959 100644 --- a/System/OsString/Types.hs +++ b/System/OsString/Types.hs @@ -11,4 +11,4 @@ module System.OsString.Types {-# DEPRECATED "Use System.OsString.Types from os-s ) where -import System.OsString.Internal.Types +import System.OsString.Internal.Types.Hidden diff --git a/System/OsString/Windows.hs b/System/OsString/Windows.hs index 1f15653b..d8ad7b0d 100644 --- a/System/OsString/Windows.hs +++ b/System/OsString/Windows.hs @@ -1,13 +1,6 @@ -{-# LANGUAGE CPP #-} -#undef POSIX -#define MODULE_NAME Windows -#define PLATFORM_STRING WindowsString -#define PLATFORM_WORD WindowsChar -#define IS_WINDOWS True -#define WINDOWS -#include "Common.hs" -#undef MODULE_NAME -#undef FILEPATH_NAME -#undef OSSTRING_NAME -#undef IS_WINDOWS -#undef WINDOWS +module System.OsString.Windows {-# DEPRECATED "Use System.OsString.Posix from os-string >= 2.0.0 package instead. This module will be removed in filepath >= 1.5." #-} + ( module System.OsString.Windows.Hidden + ) + where + +import System.OsString.Windows.Hidden diff --git a/System/OsString/Windows/Hidden.hs b/System/OsString/Windows/Hidden.hs new file mode 100644 index 00000000..87e50047 --- /dev/null +++ b/System/OsString/Windows/Hidden.hs @@ -0,0 +1,13 @@ +{-# LANGUAGE CPP #-} +#undef POSIX +#define MODULE_NAME Windows +#define PLATFORM_STRING WindowsString +#define PLATFORM_WORD WindowsChar +#define IS_WINDOWS True +#define WINDOWS +#include "../Common.hs" +#undef MODULE_NAME +#undef FILEPATH_NAME +#undef OSSTRING_NAME +#undef IS_WINDOWS +#undef WINDOWS diff --git a/bench/BenchFilePath.hs b/bench/BenchFilePath.hs index 5319f1c0..958d4c56 100644 --- a/bench/BenchFilePath.hs +++ b/bench/BenchFilePath.hs @@ -6,15 +6,15 @@ module Main where import System.OsPath.Types import System.OsPath.Encoding ( ucs2le ) -import qualified System.OsString.Internal.Types as OST +import qualified System.OsString.Internal.Types.Hidden as OST import qualified Data.ByteString.Short as SBS import Test.Tasty.Bench import qualified System.FilePath.Posix as PF import qualified System.FilePath.Posix as WF -import qualified System.OsString.Posix as OSP -import qualified System.OsString.Windows as WSP +import qualified System.OsString.Posix.Hidden as OSP +import qualified System.OsString.Windows.Hidden as WSP import qualified System.OsPath.Posix as APF import qualified System.OsPath.Windows as AWF diff --git a/filepath.cabal b/filepath.cabal index a44b2066..3918cb12 100644 --- a/filepath.cabal +++ b/filepath.cabal @@ -94,6 +94,16 @@ library System.OsString.Posix System.OsString.Windows + other-modules: + System.OsPath.Data.ByteString.Short.Hidden + System.OsPath.Data.ByteString.Short.Internal.Hidden + System.OsPath.Data.ByteString.Short.Word16.Hidden + System.OsPath.Encoding.Internal.Hidden + System.OsString.Internal.Hidden + System.OsString.Internal.Types.Hidden + System.OsString.Posix.Hidden + System.OsString.Windows.Hidden + other-extensions: CPP PatternGuards