haskell · clyring · Feb 4, 2024 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
@@ -1,8 +1,11 @@
-{-# LANGUAGE BangPatterns #-}
 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
 {-# OPTIONS_HADDOCK prune #-}
 {-# LANGUAGE Trustworthy #-}
 
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
 -- |
 -- Module : Data.ByteString.Lazy
 -- Copyright : (c) Don Stewart 2006
@@ -237,9 +240,9 @@ import qualified Data.ByteString as P (ByteString) -- type name only
 import qualified Data.ByteString as S -- S for strict (hmm...)
 import qualified Data.ByteString.Internal.Type as S
 import qualified Data.ByteString.Unsafe as S
-import qualified Data.ByteString.Lazy.Internal.Deque as D
 import Data.ByteString.Lazy.Internal
 
+import Control.Exception (assert)
 import Control.Monad (mplus)
 import Data.Word (Word8)
 import Data.Int (Int64)
@@ -790,15 +793,73 @@ take i cs0 = take' i cs0
 --
 -- @since 0.11.2.0
 takeEnd :: Int64 -> ByteString -> ByteString
-takeEnd i _ | i <= 0 = Empty
-takeEnd i cs0 = takeEnd' i cs0
- where takeEnd' 0 _ = Empty
- takeEnd' n cs =
- snd $ foldrChunks takeTuple (n,Empty) cs
- takeTuple _ (0, cs) = (0, cs)
- takeTuple c (n, cs)
- | n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs)
- | otherwise = (0, Chunk (S.takeEnd (fromIntegral n) c) cs)
+takeEnd i bs
+ | i <= 0 = Empty
+ | otherwise = splitAtEndFold (\_ res -> res) id i bs
+
+-- | Helper function for implementing 'takeEnd' and 'dropEnd'
+splitAtEndFold
+ :: forall result
+ . (S.StrictByteString -> result -> result)
+ -- ^ What to do when one chunk of output is ready
+ -- (The StrictByteString will not be empty.)
+ -> (ByteString -> result)
+ -- ^ What to do when the split-point is reached
+ -> Int64
+ -- ^ Number of bytes to leave at the end (must be strictly positive)
+ -> ByteString -- ^ Input ByteString
+ -> result
+{-# INLINE splitAtEndFold #-}
+splitAtEndFold step end len bs0 = assert (len > 0) $ case bs0 of
+ Empty -> end Empty
+ Chunk c t -> goR len c t t
+ where
+ -- Idea: Keep two references into the input ByteString:
+ -- "bsL" tracks the current split point,
+ -- "bsR" tracks the yet-unprocessed tail.
+ -- When they are closer than "len" bytes apart, process more input. ("goR")
+ -- When they are at least "len" bytes apart, produce more output. ("goL")
+ goR :: Int64 -> S.StrictByteString -> ByteString -> ByteString -> result
+ goR !undershoot nextOutput@(S.BS noFp noLen) bsL bsR =
+ assert (undershoot > 0) $
+ -- INVARIANT: length bsL == length bsR + len - undershoot
+ -- (not 'assert'ed because that would break our laziness properties)
+ case bsR of
+ Empty
+ | undershoot >= intToInt64 noLen
+ -> end (Chunk nextOutput bsL)
+ | undershootW <- fromIntegral @Int64 @Int undershoot
+ -- conversion Int64->Int is OK because 0 < undershoot < noLen
+ , splitIndex <- noLen - undershootW
+ , beforeSplit <- S.BS noFp splitIndex
+ , afterSplit <- S.BS (noFp `S.plusForeignPtr` splitIndex) undershootW
+ -> step beforeSplit $ end (Chunk afterSplit bsL)
+
+ Chunk (S.BS _ cLen) newBsR
+ | cLen64 <- intToInt64 cLen
+ , undershoot > cLen64
+ -> goR (undershoot - cLen64) nextOutput bsL newBsR
+ | undershootW <- fromIntegral @Int64 @Int undershoot
+ -> step nextOutput $ goL (cLen - undershootW) bsL newBsR
+
+ goL :: Int -> ByteString -> ByteString -> result
+ goL !overshoot bsL bsR =
+ assert (overshoot >= 0) $
+ -- INVARIANT: length bsL == length bsR + len + intToInt64 overshoot
+ -- (not 'assert'ed because that would break our laziness properties)
+ case bsL of
+ Empty -> splitAtEndFoldInvariantFailed
+ Chunk c@(S.BS _ cLen) newBsL
+ | overshoot >= cLen
+ -> step c $ goL (overshoot - cLen) newBsL bsR
+ | otherwise
+ -> goR (intToInt64 $ cLen - overshoot) c newBsL bsR
+
+splitAtEndFoldInvariantFailed :: a
+-- See Note [Float error calls out of INLINABLE things] in D.B.Internal.Type
+splitAtEndFoldInvariantFailed =
+ moduleError "splitAtEndFold"
+ "internal error: bsL not longer than bsR"
 
 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 -- elements, or 'empty' if @n > 'length' xs@.
@@ -824,44 +885,9 @@ drop i cs0 = drop' i cs0
 --
 -- @since 0.11.2.0
 dropEnd :: Int64 -> ByteString -> ByteString
-dropEnd i p | i <= 0 = p
-dropEnd i p = go D.empty p
- where go :: D.Deque -> ByteString -> ByteString
- go deque (Chunk c cs)
- | D.byteLength deque < i = go (D.snoc c deque) cs
- | otherwise =
- let (output, deque') = getOutput empty (D.snoc c deque)
- in foldrChunks Chunk (go deque' cs) output
- go deque Empty = fromDeque $ dropEndBytes deque i
-
- len c = fromIntegral (S.length c)
-
- -- get a `ByteString` from all the front chunks of the accumulating deque
- -- for which we know they won't be dropped
- getOutput :: ByteString -> D.Deque -> (ByteString, D.Deque)
- getOutput out deque = case D.popFront deque of
- Nothing -> (reverseChunks out, deque)
- Just (x, deque') | D.byteLength deque' >= i ->
- getOutput (Chunk x out) deque'
- _ -> (reverseChunks out, deque)
-
- -- reverse a `ByteString`s chunks, keeping all internal `S.StrictByteString`s
- -- unchanged
- reverseChunks = foldlChunks (flip Chunk) empty
-
- -- drop n elements from the rear of the accumulating `deque`
- dropEndBytes :: D.Deque -> Int64 -> D.Deque
- dropEndBytes deque n = case D.popRear deque of
- Nothing -> deque
- Just (deque', x) | len x <= n -> dropEndBytes deque' (n - len x)
- | otherwise ->
- D.snoc (S.dropEnd (fromIntegral n) x) deque'
-
- -- build a lazy ByteString from an accumulating `deque`
- fromDeque :: D.Deque -> ByteString
- fromDeque deque =
- List.foldr chunk Empty (D.front deque) `append`
- List.foldl' (flip chunk) Empty (D.rear deque)
+dropEnd i p
+ | i <= 0 = p
+ | otherwise = splitAtEndFold Chunk (const Empty) i p
 
 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
@@ -1688,6 +1714,9 @@ revNonEmptyChunks = List.foldl' (flip Chunk) Empty
 revChunks :: [P.ByteString] -> ByteString
 revChunks = List.foldl' (flip chunk) Empty
 
+intToInt64 :: Int -> Int64
+intToInt64 = fromIntegral @Int @Int64
+
 -- $IOChunk
 --
 -- ⚠ Using lazy I\/O functions like 'readFile' or 'hGetContents'

diff --git a/Data/ByteString/Lazy/Internal/Deque.hs b/Data/ByteString/Lazy/Internal/Deque.hs
diff --git a/bytestring.cabal b/bytestring.cabal
@@ -106,7 +106,6 @@ library
  Data.ByteString.Builder.RealFloat.Internal
  Data.ByteString.Builder.RealFloat.TableGenerator
  Data.ByteString.Internal.Type
- Data.ByteString.Lazy.Internal.Deque
  Data.ByteString.Lazy.ReadInt
  Data.ByteString.Lazy.ReadNat
  Data.ByteString.ReadInt

diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs
@@ -80,6 +80,10 @@ import Test.Tasty
 import Test.Tasty.QuickCheck
 import QuickCheckUtils
 
+#ifdef BYTESTRING_LAZY
+import Data.Int
+#endif
+
 #ifndef BYTESTRING_CHAR8
 toElem :: Word8 -> Word8
 toElem = id
@@ -114,16 +118,25 @@ instance Arbitrary Natural where
 
 testRdInt :: forall a. (Arbitrary a, RdInt a) => String -> TestTree
 testRdInt s = testGroup s $
- [ testProperty "from string" $ \ prefix value suffix ->
+ [ testProperty "from string" $ int64OK $ \value prefix suffix ->
  let si = show @a value
  b = prefix <> B.pack si <> suffix
  in fmap (second B.unpack) (bread @a b)
  === sread @a (B.unpack prefix ++ si ++ B.unpack suffix)
- , testProperty "from number" $ \n ->
+ , testProperty "from number" $ int64OK $ \n ->
  bread @a (B.pack (show n)) === Just (n, B.empty)
  ]
 #endif
 
+intToIndexTy :: Int -> IndexTy
+#ifdef BYTESTRING_LAZY
+type IndexTy = Int64
+intToIndexTy = fromIntegral @Int @Int64
+#else
+type IndexTy = Int
+intToIndexTy = id
+#endif
+
 tests :: [TestTree]
 tests =
  [ testProperty "pack . unpack" $
@@ -308,7 +321,7 @@ tests =
 #endif
 
  , testProperty "drop" $
- \n x -> B.unpack (B.drop n x) === List.genericDrop n (B.unpack x)
+ \(intToIndexTy -> n) x -> B.unpack (B.drop n x) === List.genericDrop n (B.unpack x)
  , testProperty "drop 10" $
  \x -> let n = 10 in B.unpack (B.drop n x) === List.genericDrop n (B.unpack x)
  , testProperty "drop 2^31" $
@@ -325,7 +338,7 @@ tests =
 #endif
 
  , testProperty "take" $
- \n x -> B.unpack (B.take n x) === List.genericTake n (B.unpack x)
+ \(intToIndexTy -> n) x -> B.unpack (B.take n x) === List.genericTake n (B.unpack x)
  , testProperty "take 10" $
  \x -> let n = 10 in B.unpack (B.take n x) === List.genericTake n (B.unpack x)
  , testProperty "take 2^31" $
@@ -342,11 +355,11 @@ tests =
 #endif
 
  , testProperty "dropEnd" $
- \n x -> B.dropEnd n x === B.take (B.length x - n) x
+ \(intToIndexTy -> n) x -> B.dropEnd n x === B.take (B.length x - n) x
  , testProperty "dropWhileEnd" $
  \f x -> B.dropWhileEnd f x === B.reverse (B.dropWhile f (B.reverse x))
  , testProperty "takeEnd" $
- \n x -> B.takeEnd n x === B.drop (B.length x - n) x
+ \(intToIndexTy -> n) x -> B.takeEnd n x === B.drop (B.length x - n) x
  , testProperty "takeWhileEnd" $
  \f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))
 
@@ -366,7 +379,7 @@ tests =
  , testProperty "compareLength 4" $
  \x (toElem -> c) -> B.compareLength (B.snoc x c <> undefined) (B.length x) === GT
  , testProperty "compareLength 5" $
- \x n -> B.compareLength x n === compare (B.length x) n
+ \x (intToIndexTy -> n) -> B.compareLength x n === compare (B.length x) n
  , testProperty "dropEnd lazy" $
  \(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> undefined)) === B.singleton c
  , testProperty "dropWhileEnd lazy" $
@@ -470,7 +483,8 @@ tests =
  (l1 == l2 || l1 == l2 + 1) && sum (map B.length splits) + l2 == B.length x
 
  , testProperty "splitAt" $
- \n x -> (B.unpack *** B.unpack) (B.splitAt n x) === List.genericSplitAt n (B.unpack x)
+ \(intToIndexTy -> n) x -> (B.unpack *** B.unpack) (B.splitAt n x)
+ === List.genericSplitAt n (B.unpack x)
  , testProperty "splitAt 10" $
  \x -> let n = 10 in (B.unpack *** B.unpack) (B.splitAt n x) === List.genericSplitAt n (B.unpack x)
  , testProperty "splitAt (2^31)" $
@@ -594,13 +608,13 @@ tests =
 #endif
 
  , testProperty "index" $
- \(NonNegative n) x -> fromIntegral n < B.length x ==> B.index x (fromIntegral n) === B.unpack x !! n
+ \(NonNegative n) x -> intToIndexTy n < B.length x ==> B.index x (intToIndexTy n) === B.unpack x !! n
  , testProperty "indexMaybe" $
- \(NonNegative n) x -> fromIntegral n < B.length x ==> B.indexMaybe x (fromIntegral n) === Just (B.unpack x !! n)
+ \(NonNegative n) x -> intToIndexTy n < B.length x ==> B.indexMaybe x (intToIndexTy n) === Just (B.unpack x !! n)
  , testProperty "indexMaybe Nothing" $
- \n x -> (n :: Int) < 0 || fromIntegral n >= B.length x ==> B.indexMaybe x (fromIntegral n) === Nothing
+ \n x -> n < 0 || intToIndexTy n >= B.length x ==> B.indexMaybe x (intToIndexTy n) === Nothing
  , testProperty "!?" $
- \n x -> B.indexMaybe x (fromIntegral (n :: Int)) === x B.!? (fromIntegral n)
+ \(intToIndexTy -> n) x -> B.indexMaybe x n === x B.!? n
 
 #ifdef BYTESTRING_CHAR8
  , testProperty "isString" $

diff --git a/tests/QuickCheckUtils.hs b/tests/QuickCheckUtils.hs
@@ -7,6 +7,7 @@ module QuickCheckUtils
  , String8(..)
  , CByteString(..)
  , Sqrt(..)
+ , int64OK
  ) where
 
 import Test.Tasty.QuickCheck
@@ -112,3 +113,21 @@ instance Arbitrary SB.ShortByteString where
 
 instance CoArbitrary SB.ShortByteString where
  coarbitrary s = coarbitrary (SB.unpack s)
+
+-- | This /poison instance/ exists to make accidental mis-use
+-- of the @Arbitrary Int64@ instance a bit less likely.
+instance {-# OVERLAPPING #-} Testable (Int64 -> prop) where
+ property = error $ unlines [
+ "Found a test taking a raw Int64 argument.",
+ "'instance Arbitrary Int64' by default is likely to",
+ "produce very large numbers after the first few tests,",
+ "which doesn't make great indices into a LazyByteString.",
+ "For indices, try 'intToIndexTy' in Properties/ByteString.hs.",
+ "",
+ "If very few small-numbers tests is OK, use",
+ "'int64OK' to bypass this poison-instance."]
+
+-- | Use this to bypass the poison instance for @Testable (Int64 -> prop)@
+-- defined in "QuickCheckUtils".
+int64OK :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property
+int64OK f = propertyForAllShrinkShow arbitrary shrink (\v -> [show v]) f