From a41622fd69f3787ffe2da797844948a2a44f831d Mon Sep 17 00:00:00 2001 From: Matthew Craven Date: Wed, 5 Jun 2024 16:24:24 -0400 Subject: [PATCH] Improve benchmarks for small Builders (#680) * Improve benchmarks for small Builders * Do not measure the overhead of allocating destination chunks * Add several more benchmarks for P.cstring and P.cstringUtf8 * More benchmark fiddling * Update "since" markers for new NFData instances --- Data/ByteString/Builder/Internal.hs | 12 ++++ bench/BenchAll.hs | 106 +++++++++++++++++++++++----- 2 files changed, 99 insertions(+), 19 deletions(-) diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs index 109c762e2..6605a0aa4 100644 --- a/Data/ByteString/Builder/Internal.hs +++ b/Data/ByteString/Builder/Internal.hs @@ -128,6 +128,7 @@ module Data.ByteString.Builder.Internal ( ) where import Control.Arrow (second) +import Control.DeepSeq (NFData(..)) import Data.Semigroup (Semigroup(..)) import Data.List.NonEmpty (NonEmpty(..)) @@ -155,11 +156,22 @@ import System.IO.Unsafe (unsafeDupablePerformIO) data BufferRange = BufferRange {-# UNPACK #-} !(Ptr Word8) -- First byte of range {-# UNPACK #-} !(Ptr Word8) -- First byte /after/ range +-- | @since 0.12.2.0 +instance NFData BufferRange where + rnf !_ = () + -- | A 'Buffer' together with the 'BufferRange' of free bytes. The filled -- space starts at offset 0 and ends at the first free byte. data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) {-# UNPACK #-} !BufferRange +-- | Like the @NFData@ instance for @StrictByteString@, +-- this does not force the @ForeignPtrContents@ field +-- of the underlying @ForeignPtr@. +-- +-- @since 0.12.2.0 +instance NFData Buffer where + rnf !_ = () -- | Combined size of the filled and free space in the buffer. {-# INLINE bufferSize #-} diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs index 76d2e5206..7f95a3e6f 100644 --- a/bench/BenchAll.hs +++ b/bench/BenchAll.hs @@ -14,9 +14,11 @@ import Data.Monoid import Data.Semigroup import Data.String import Test.Tasty.Bench + import Prelude hiding (words) import qualified Data.List as List import Control.DeepSeq +import Control.Exception import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 @@ -24,16 +26,17 @@ import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as L8 import Data.ByteString.Builder -import Data.ByteString.Builder.Extra (byteStringCopy, - byteStringInsert, - intHost) -import Data.ByteString.Builder.Internal (ensureFree) +import qualified Data.ByteString.Builder.Extra as Extra +import qualified Data.ByteString.Builder.Internal as BI import Data.ByteString.Builder.Prim (BoundedPrim, FixedPrim, (>$<)) import qualified Data.ByteString.Builder.Prim as P import qualified Data.ByteString.Builder.Prim.Internal as PI import Foreign +import Foreign.ForeignPtr +import qualified GHC.Exts as Exts +import GHC.Ptr (Ptr(..)) import System.Random @@ -121,15 +124,45 @@ loremIpsum = S8.unlines $ map S8.pack -- benchmark wrappers --------------------- -{-# INLINE benchB #-} benchB :: String -> a -> (a -> Builder) -> Benchmark -benchB name x b = - bench (name ++" (" ++ show nRepl ++ ")") $ - whnf (L.length . toLazyByteString . b) x +{-# INLINE benchB #-} +benchB name x b = benchB' (name ++" (" ++ show nRepl ++ ")") x b -{-# INLINE benchB' #-} benchB' :: String -> a -> (a -> Builder) -> Benchmark -benchB' name x b = bench name $ whnf (L.length . toLazyByteString . b) x +{-# INLINE benchB' #-} +benchB' name x mkB = + env (BI.newBuffer BI.defaultChunkSize) $ \buf -> + bench name $ whnfAppIO (runBuildStepOn buf . BI.runBuilder . mkB) x + +benchB'_ :: String -> Builder -> Benchmark +{-# INLINE benchB'_ #-} +benchB'_ name b = + env (BI.newBuffer BI.defaultChunkSize) $ \buf -> + bench name $ whnfIO (runBuildStepOn buf (BI.runBuilder b)) + +-- | @runBuilderOn@ runs a @BuildStep@'s actions all on the same @Buffer@. +-- It is used to avoid measuring driver allocation overhead. +runBuildStepOn :: BI.Buffer -> BI.BuildStep () -> IO () +{-# NOINLINE runBuildStepOn #-} +runBuildStepOn (BI.Buffer fp br@(BI.BufferRange op ope)) b = go b + where + !len = ope `minusPtr` op + + go :: BI.BuildStep () -> IO () + go bs = BI.fillWithBuildStep bs doneH fullH insertChunkH br + + doneH :: Ptr Word8 -> () -> IO () + doneH _ _ = touchForeignPtr fp + -- 'touchForeignPtr' is adequate because the given BuildStep + -- will always terminate. (We won't measure an infinite loop!) + + fullH :: Ptr Word8 -> Int -> BI.BuildStep () -> IO () + fullH _ minLen nextStep + | len < minLen = throwIO (ErrorCall "runBuilderOn: action expects too long of a BufferRange") + | otherwise = go nextStep + + insertChunkH :: Ptr Word8 -> S.ByteString -> BI.BuildStep () -> IO () + insertChunkH _ _ nextStep = go nextStep {-# INLINE benchBInts #-} benchBInts :: String -> ([Int] -> Builder) -> Benchmark @@ -247,18 +280,53 @@ largeTraversalInput = S.concat (replicate 10 byteStringData) smallTraversalInput :: S.ByteString smallTraversalInput = S8.pack "The quick brown fox" +asciiBuf, utf8Buf, halfNullBuf, allNullBuf :: Ptr Word8 +asciiBuf = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"# +utf8Buf = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xc0\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"# +halfNullBuf = Ptr "\xc0\x80xx\xc0\x80x\xc0\x80\xc0\x80x\xc0\x80\xc0\x80xx\xc0\x80\xc0\x80xxx\xc0\x80x\xc0\x80x\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80xxx\xc0\x80x\xc0\x80xx\xc0\x80\xc0\x80xxxxxxxxxx\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80x\xc0\x80\xc0\x80x\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80xxx"# +allNullBuf = Ptr "\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80"# + +asciiLit, utf8Lit :: Ptr Word8 -> Builder +asciiLit (Ptr p#) = P.cstring p# +utf8Lit (Ptr p#) = P.cstringUtf8 p# + +asciiStr, utf8Str :: String +asciiStr = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +utf8Str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + main :: IO () main = do defaultMain [ bgroup "Data.ByteString.Builder" [ bgroup "Small payload" - [ benchB' "mempty" () (const mempty) - , benchB' "ensureFree 8" () (const (ensureFree 8)) - , benchB' "intHost 1" 1 intHost - , benchB' "UTF-8 String (naive)" "hello world\0" fromString - , benchB' "UTF-8 String" () $ \() -> P.cstringUtf8 "hello world\0"# - , benchB' "String (naive)" "hello world!" fromString - , benchB' "String" () $ \() -> P.cstring "hello world!"# + [ benchB'_ "mempty" mempty + , bench "toLazyByteString mempty" $ nf toLazyByteString mempty + , benchB'_ "empty (10000 times)" $ + stimes (10000 :: Int) (Exts.lazy BI.empty) + , benchB'_ "ensureFree 8" (BI.ensureFree 8) + , benchB' "intHost 1" 1 Extra.intHost + , benchB' "UTF-8 String (12B, naive)" "hello world\0" fromString + , benchB'_ "UTF-8 String (12B)" $ utf8Lit (Ptr "hello world\xc0\x80"#) + , benchB' "UTF-8 String (64B, naive)" utf8Str fromString + , benchB'_ "UTF-8 String (64B, one null)" $ utf8Lit utf8Buf + , benchB' + "UTF-8 String (64B, one null, no shared work)" + utf8Buf + utf8Lit + , benchB'_ "UTF-8 String (64B, half nulls)" $ utf8Lit halfNullBuf + , benchB'_ "UTF-8 String (64B, all nulls)" $ utf8Lit allNullBuf + , benchB' + "UTF-8 String (64B, all nulls, no shared work)" + allNullBuf + utf8Lit + , benchB' + "UTF-8 String (1 byte, no shared work)" + (Ptr "\xc0\x80"#) + utf8Lit + , benchB' "ASCII String (12B, naive)" "hello world!" fromString + , benchB'_ "ASCII String (12B)" $ asciiLit (Ptr "hello wurld!"#) + , benchB' "ASCII String (64B, naive)" asciiStr fromString + , benchB'_ "ASCII String (64B)" $ asciiLit asciiBuf ] , bgroup "Encoding wrappers" @@ -275,11 +343,11 @@ main = do ] , bgroup "ByteString insertion" $ [ benchB "foldMap byteStringInsert" byteStringChunksData - (foldMap byteStringInsert) + (foldMap Extra.byteStringInsert) , benchB "foldMap byteString" byteStringChunksData (foldMap byteString) , benchB "foldMap byteStringCopy" byteStringChunksData - (foldMap byteStringCopy) + (foldMap Extra.byteStringCopy) ] , bgroup "Non-bounded encodings"