Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion containers-tests/containers-tests.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ common benchmark-deps
build-depends:
containers-tests
, deepseq >=1.1.0.0 && <1.6
, tasty-bench >=0.3.1 && <0.4
, tasty-bench >=0.3.1 && <0.5

-- Flags recommended by tasty-bench
if impl(ghc >= 8.6)
Expand Down
4 changes: 4 additions & 0 deletions containers/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@
* Improved performance for `Data.Set`'s `fromList`, `map` and `Data.Map`'s
`fromList`, `fromListWith`, `fromListWithKey`, `mapKeys`, `mapKeysWith`.

* Improved performance for many `Set` and `Map` modification operations,
including `insert` and `delete`, by inlining part of the balancing
routine. (Soumik Sarkar)

## Unreleased with `@since` annotation for 0.7.1:

### Additions
Expand Down
48 changes: 32 additions & 16 deletions containers/src/Data/Map/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4190,7 +4190,14 @@ ratio = 2
-- It is only written in such a way that every node is pattern-matched only once.

balance :: k -> a -> Map k a -> Map k a -> Map k a
balance k x l r = case l of
balance k x l r = case (l, r) of
(Bin ls _ _ _ _, Bin rs _ _ _ _)
| rs <= delta*ls && ls <= delta*rs -> Bin (1+ls+rs) k x l r
_ -> balance_ k x l r
{-# INLINE balance #-} -- See Note [Inlining balance] in Data.Set.Internal

balance_ :: k -> a -> Map k a -> Map k a -> Map k a
balance_ k x l r = case l of
Tip -> case r of
Tip -> Bin 1 k x Tip Tip
(Bin _ _ _ Tip Tip) -> Bin 2 k x Tip r
Expand All @@ -4214,13 +4221,12 @@ balance k x l r = case l of
| rls < ratio*rrs -> Bin (1+ls+rs) rk rx (Bin (1+ls+rls) k x l rl) rr
| otherwise -> Bin (1+ls+rs) rlk rlx (Bin (1+ls+size rll) k x l rll) (Bin (1+rrs+size rlr) rk rx rlr rr)
(_, _) -> error "Failure in Data.Map.balance"
| ls > delta*rs -> case (ll, lr) of
| {- ls > delta*rs -} otherwise -> case (ll, lr) of
(Bin lls _ _ _ _, Bin lrs lrk lrx lrl lrr)
| lrs < ratio*lls -> Bin (1+ls+rs) lk lx ll (Bin (1+rs+lrs) k x lr r)
| otherwise -> Bin (1+ls+rs) lrk lrx (Bin (1+lls+size lrl) lk lx ll lrl) (Bin (1+rs+size lrr) k x lrr r)
(_, _) -> error "Failure in Data.Map.balance"
| otherwise -> Bin (1+ls+rs) k x l r
{-# NOINLINE balance #-}
{-# NOINLINE balance_ #-}

-- Functions balanceL and balanceR are specialised versions of balance.
-- balanceL only checks whether the left subtree is too big,
Expand All @@ -4229,7 +4235,14 @@ balance k x l r = case l of
-- balanceL is called when left subtree might have been inserted to or when
-- right subtree might have been deleted from.
balanceL :: k -> a -> Map k a -> Map k a -> Map k a
balanceL k x l r = case r of
balanceL k x l r = case (l, r) of
(Bin ls _ _ _ _, Bin rs _ _ _ _)
| ls <= delta*rs -> Bin (1+ls+rs) k x l r
_ -> balanceL_ k x l r
{-# INLINE balanceL #-} -- See Note [Inlining balance] in Data.Set.Internal

balanceL_ :: k -> a -> Map k a -> Map k a -> Map k a
balanceL_ k x l r = case r of
Tip -> case l of
Tip -> Bin 1 k x Tip Tip
(Bin _ _ _ Tip Tip) -> Bin 2 k x l Tip
Expand All @@ -4242,19 +4255,24 @@ balanceL k x l r = case r of
(Bin rs _ _ _ _) -> case l of
Tip -> Bin (1+rs) k x Tip r

(Bin ls lk lx ll lr)
| ls > delta*rs -> case (ll, lr) of
(Bin ls lk lx ll lr) -> case (ll, lr) of
(Bin lls _ _ _ _, Bin lrs lrk lrx lrl lrr)
| lrs < ratio*lls -> Bin (1+ls+rs) lk lx ll (Bin (1+rs+lrs) k x lr r)
| otherwise -> Bin (1+ls+rs) lrk lrx (Bin (1+lls+size lrl) lk lx ll lrl) (Bin (1+rs+size lrr) k x lrr r)
(_, _) -> error "Failure in Data.Map.balanceL"
| otherwise -> Bin (1+ls+rs) k x l r
{-# NOINLINE balanceL #-}
(_, _) -> error "Failure in Data.Map.balanceL_"
{-# NOINLINE balanceL_ #-}

-- balanceR is called when right subtree might have been inserted to or when
-- left subtree might have been deleted from.
balanceR :: k -> a -> Map k a -> Map k a -> Map k a
balanceR k x l r = case l of
balanceR k x l r = case (l, r) of
(Bin ls _ _ _ _, Bin rs _ _ _ _)
| rs <= delta*ls -> Bin (1+ls+rs) k x l r
_ -> balanceR_ k x l r
{-# INLINE balanceR #-} -- See Note [Inlining balance] in Data.Set.Internal

balanceR_ :: k -> a -> Map k a -> Map k a -> Map k a
balanceR_ k x l r = case l of
Tip -> case r of
Tip -> Bin 1 k x Tip Tip
(Bin _ _ _ Tip Tip) -> Bin 2 k x Tip r
Expand All @@ -4267,14 +4285,12 @@ balanceR k x l r = case l of
(Bin ls _ _ _ _) -> case r of
Tip -> Bin (1+ls) k x l Tip

(Bin rs rk rx rl rr)
| rs > delta*ls -> case (rl, rr) of
(Bin rs rk rx rl rr) -> case (rl, rr) of
(Bin rls rlk rlx rll rlr, Bin rrs _ _ _ _)
| rls < ratio*rrs -> Bin (1+ls+rs) rk rx (Bin (1+ls+rls) k x l rl) rr
| otherwise -> Bin (1+ls+rs) rlk rlx (Bin (1+ls+size rll) k x l rll) (Bin (1+rrs+size rlr) rk rx rlr rr)
(_, _) -> error "Failure in Data.Map.balanceR"
| otherwise -> Bin (1+ls+rs) k x l r
{-# NOINLINE balanceR #-}
(_, _) -> error "Failure in Data.Map.balanceR_"
{-# NOINLINE balanceR_ #-}


{--------------------------------------------------------------------
Expand Down
46 changes: 34 additions & 12 deletions containers/src/Data/Set/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1866,10 +1866,29 @@ ratio = 2
-- balanceL only checks whether the left subtree is too big,
-- balanceR only checks whether the right subtree is too big.

-- Note [Inlining balance]
-- ~~~~~~~~~~~~~~~~~~~~~~~
-- Benchmarks show that we benefit from inlining balanceL and balanceR, but
-- we don't want to cause code bloat from inlining these large functions.
-- As a compromise, we inline only one case: that of two Bins already balanced
-- with respect to each other.
--
-- This is the most common case for typical scenarios. For instance, for n
-- inserts there may be O(n log n) calls to balanceL/balanceR but at most O(n)
-- of them actually require rebalancing. So, inlining this common case provides
-- most of the potential benefits of inlining the full function.

-- balanceL is called when left subtree might have been inserted to or when
-- right subtree might have been deleted from.
balanceL :: a -> Set a -> Set a -> Set a
balanceL x l r = case r of
balanceL x l r = case (l, r) of
(Bin ls _ _ _, Bin rs _ _ _)
| ls <= delta*rs -> Bin (1+ls+rs) x l r
_ -> balanceL_ x l r
{-# INLINE balanceL #-} -- See Note [Inlining balance]

balanceL_ :: a -> Set a -> Set a -> Set a
balanceL_ x l r = case r of
Tip -> case l of
Tip -> Bin 1 x Tip Tip
(Bin _ _ Tip Tip) -> Bin 2 x l Tip
Expand All @@ -1882,19 +1901,24 @@ balanceL x l r = case r of
(Bin rs _ _ _) -> case l of
Tip -> Bin (1+rs) x Tip r

(Bin ls lx ll lr)
| ls > delta*rs -> case (ll, lr) of
(Bin ls lx ll lr) -> case (ll, lr) of
(Bin lls _ _ _, Bin lrs lrx lrl lrr)
| lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
| otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
(_, _) -> error "Failure in Data.Set.balanceL"
| otherwise -> Bin (1+ls+rs) x l r
{-# NOINLINE balanceL #-}
(_, _) -> error "Failure in Data.Set.balanceL_"
{-# NOINLINE balanceL_ #-}

-- balanceR is called when right subtree might have been inserted to or when
-- left subtree might have been deleted from.
balanceR :: a -> Set a -> Set a -> Set a
balanceR x l r = case l of
balanceR x l r = case (l, r) of
(Bin ls _ _ _, Bin rs _ _ _)
| rs <= delta*ls -> Bin (1+ls+rs) x l r
_ -> balanceR_ x l r
{-# INLINE balanceR #-} -- See Note [Inlining balance]

balanceR_ :: a -> Set a -> Set a -> Set a
balanceR_ x l r = case l of
Tip -> case r of
Tip -> Bin 1 x Tip Tip
(Bin _ _ Tip Tip) -> Bin 2 x Tip r
Expand All @@ -1907,14 +1931,12 @@ balanceR x l r = case l of
(Bin ls _ _ _) -> case r of
Tip -> Bin (1+ls) x l Tip

(Bin rs rx rl rr)
| rs > delta*ls -> case (rl, rr) of
(Bin rs rx rl rr) -> case (rl, rr) of
(Bin rls rlx rll rlr, Bin rrs _ _ _)
| rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
| otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
(_, _) -> error "Failure in Data.Set.balanceR"
| otherwise -> Bin (1+ls+rs) x l r
{-# NOINLINE balanceR #-}
(_, _) -> error "Failure in Data.Set.balanceR_"
{-# NOINLINE balanceR_ #-}

{--------------------------------------------------------------------
The bin constructor maintains the size of the tree
Expand Down