Skip to content

Commit 63daa84

Browse files
authored
perf: Improve Xor method by ~20% for big sets (#1)
Handling larger bitsets in 8-batches is more efficient on modern CPUs. I assume it's related to instruction-level parallelism.
1 parent 1dde49f commit 63daa84

File tree

3 files changed

+70
-68
lines changed

3 files changed

+70
-68
lines changed

bench_test.go

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,26 +258,47 @@ func BenchmarkXor(b *testing.B) {
258258
}
259259

260260
func BenchmarkBitSet_Xor(b *testing.B) {
261-
small1, small2 := New(1, 2, 3, 4, 5), New(3, 4, 5, 6, 7)
262-
large1, large2 := New(), New()
263-
for i := range 10000 {
264-
if i%2 == 0 {
265-
large1.Add(i)
261+
b.Run("empty", func(b *testing.B) {
262+
s1, s2 := New(), New()
263+
for b.Loop() {
264+
s1.Xor(s2)
266265
}
267-
if i%3 == 0 {
268-
large2.Add(i)
266+
})
267+
268+
b.Run("5", func(b *testing.B) {
269+
s1, s2 := New(1, 2, 3, 4, 5), New(3, 4, 5, 6, 7)
270+
for b.Loop() {
271+
s1.Xor(s2)
269272
}
270-
}
273+
})
271274

272-
b.Run("small sets", func(b *testing.B) {
275+
b.Run("10k", func(b *testing.B) {
276+
s1, s2 := New(), New()
277+
for i := range 10_000 {
278+
switch {
279+
case i%2 == 0:
280+
s1.Add(i)
281+
case i%3 == 0:
282+
s2.Add(i)
283+
}
284+
}
273285
for b.Loop() {
274-
small1.Xor(small2)
286+
s1.Xor(s2)
275287
}
276288
})
277289

278-
b.Run("large sets", func(b *testing.B) {
290+
b.Run("1m", func(b *testing.B) {
291+
s1, s2 := New(), New()
292+
for i := range 1_000_000 {
293+
switch {
294+
case i%2 == 0:
295+
s1.Add(i)
296+
case i%3 == 0:
297+
s2.Add(i)
298+
}
299+
}
279300
for b.Loop() {
280-
large1.Xor(large2)
301+
s1.Xor(s2)
281302
}
282303
})
283304
}

bitset.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ func Or(s1, s2 BitSet) BitSet {
393393
return BitSet{}
394394
}
395395
if otherLen == 0 { // s2 is empty, return a copy of s1 with trailing zeros removed
396-
var last = bsLen - 1
396+
last := bsLen - 1
397397
for last >= 0 && s1[last] == 0 {
398398
last--
399399
}
@@ -468,8 +468,27 @@ func (bs *BitSet) Xor(other BitSet) {
468468
if len(other) > len(*bs) {
469469
bs.resize(len(other))
470470
}
471-
for i := 0; i < len(other); i++ {
472-
(*bs)[i] ^= other[i]
471+
if len(other) < 8 {
472+
for i := range other {
473+
(*bs)[i] ^= other[i]
474+
}
475+
bs.trim()
476+
return
477+
}
478+
479+
b := *bs
480+
for ; len(other) > 7; b, other = b[8:], other[8:] {
481+
(b)[0] ^= other[0]
482+
(b)[1] ^= other[1]
483+
(b)[2] ^= other[2]
484+
(b)[3] ^= other[3]
485+
(b)[4] ^= other[4]
486+
(b)[5] ^= other[5]
487+
(b)[6] ^= other[6]
488+
(b)[7] ^= other[7]
489+
}
490+
for i := range other {
491+
b[i] ^= other[i]
473492
}
474493
bs.trim()
475494
}

bitset_test.go

Lines changed: 15 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,7 @@ func TestBitSet_Visit(t *testing.T) {
258258
count := 0
259259
aborted := bs.Visit(func(n int) bool {
260260
count++
261-
if n == 1 {
262-
return true
263-
}
264-
return false
261+
return n == 1
265262
})
266263
require.True(t, aborted)
267264
require.Equal(t, 1, count)
@@ -539,30 +536,6 @@ func TestOr(t *testing.T) {
539536
}
540537
}
541538

542-
func TestBitSet_Or(t *testing.T) {
543-
tests := []struct {
544-
name string
545-
a, b BitSet
546-
expect string
547-
}{
548-
{"both empty", New(), New(), "{}"},
549-
{"a empty", New(), New(1), "{1}"},
550-
{"b empty", New(1), New(), "{1}"},
551-
{"same", New(1), New(1), "{1}"},
552-
{"no overlap", New(1), New(2), "{1 2}"},
553-
{"partial overlap", New(1, 2), New(2, 3), "{1..3}"},
554-
{"large", New(100, 200), New(200, 300), "{100 200 300}"},
555-
}
556-
557-
for _, tt := range tests {
558-
t.Run(tt.name, func(t *testing.T) {
559-
res := &tt.a
560-
res.Or(tt.b)
561-
require.Equal(t, tt.expect, res.String())
562-
})
563-
}
564-
}
565-
566539
func TestXor(t *testing.T) {
567540
tests := []struct {
568541
name string
@@ -572,40 +545,29 @@ func TestXor(t *testing.T) {
572545
{"both empty", New(), New(), "{}"},
573546
{"a empty", New(), New(1), "{1}"},
574547
{"b empty", New(1), New(), "{1}"},
575-
{"same", New(1), New(1), "{}"},
548+
{"equal", New(1), New(1), "{}"},
549+
{"2 elems equal", New(1, 2), New(1, 2), "{}"},
576550
{"no overlap", New(1), New(2), "{1 2}"},
577551
{"partial overlap", New(1, 2), New(2, 3), "{1 3}"},
578-
{"large", New(100, 200), New(200, 300), "{100 300}"},
552+
{"partial overlap trailing zero", New(1, 2, 0), New(2, 3, 0), "{1 3}"},
553+
{"partial overlap hundrets", New(100, 200), New(200, 300), "{100 300}"},
554+
{
555+
"20 elems no overlap",
556+
New(1, 100, 200, 300, 400, 500, 600, 700, 800, 900),
557+
New(2, 101, 202, 303, 404, 505, 606, 707, 808, 909),
558+
"{1 2 100 101 200 202 300 303 400 404 500 505 600 606 " +
559+
"700 707 800 808 900 909}",
560+
},
579561
}
580562

581563
for _, tt := range tests {
582564
t.Run(tt.name, func(t *testing.T) {
583565
res := Xor(tt.a, tt.b)
584566
require.Equal(t, tt.expect, res.String())
585-
})
586-
}
587-
}
588567

589-
func TestBitSet_Xor(t *testing.T) {
590-
tests := []struct {
591-
name string
592-
a, b BitSet
593-
expect string
594-
}{
595-
{"both empty", New(), New(), "{}"},
596-
{"a empty", New(), New(1), "{1}"},
597-
{"b empty", New(1), New(), "{1}"},
598-
{"same", New(1), New(1), "{}"},
599-
{"no overlap", New(1), New(2), "{1 2}"},
600-
{"partial overlap", New(1, 2), New(2, 3), "{1 3}"},
601-
{"large", New(100, 200), New(200, 300), "{100 300}"},
602-
}
603-
604-
for _, tt := range tests {
605-
t.Run(tt.name, func(t *testing.T) {
606-
res := &tt.a
607-
res.Xor(tt.b)
608-
require.Equal(t, tt.expect, res.String())
568+
cp := tt.a.Copy()
569+
cp.Xor(tt.b)
570+
require.Equal(t, tt.expect, cp.String())
609571
})
610572
}
611573
}

0 commit comments

Comments
 (0)