-
Notifications
You must be signed in to change notification settings - Fork 12
/
hasher.go
38 lines (30 loc) · 631 Bytes
/
hasher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
package datatable
import (
"bytes"
"encoding/gob"
"github.com/cespare/xxhash"
)
var hasher = &hasherImpl{}
type hasherImpl struct{}
func (h *hasherImpl) Row(row Row, cols []string) uint64 {
if row == nil {
return 0
}
buff := new(bytes.Buffer)
enc := gob.NewEncoder(buff)
for _, name := range cols {
enc.Encode(row[name])
}
return xxhash.Sum64(buff.Bytes())
}
func (h *hasherImpl) Table(dt *DataTable, cols []string) map[uint64][]int {
if dt == nil {
return nil
}
mh := make(map[uint64][]int, 0)
for i, row := range dt.Rows() {
hash := h.Row(row, cols)
mh[hash] = append(mh[hash], i)
}
return mh
}