forked from miku/solrbulk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworker.go
123 lines (114 loc) · 3.38 KB
/
worker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Copyright 2015 by Leipzig University Library, http://ub.uni-leipzig.de
// by The Finc Authors, http://finc.info
// by Martin Czygan, <[email protected]>
//
// This file is part of some open source application.
//
// Some open source application is free software: you can redistribute
// it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either
// version 3 of the License, or (at your option) any later version.
//
// Some open source application is distributed in the hope that it will
// be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Foobar. If not, see <http://www.gnu.org/licenses/>.
//
// @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>
// Package solrbulk implements bulk SOLR imports.
package solrbulk
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"strings"
"sync"
"time"
"github.com/sethgrid/pester"
log "github.com/sirupsen/logrus"
)
// Version of application.
const Version = "0.3.8"
// Options holds bulk indexing options.
type Options struct {
BatchSize int
CommitSize int
Verbose bool
Server string
UpdateRequestHandlerName string
}
// BulkIndex takes a set of documents as strings and indexes them into SOLR.
func BulkIndex(docs []string, options Options) error {
link := fmt.Sprintf("%s%s", options.Server, options.UpdateRequestHandlerName)
var lines []string
for _, doc := range docs {
if len(strings.TrimSpace(doc)) == 0 {
continue
}
lines = append(lines, doc)
}
body := fmt.Sprintf("[%s]\n", strings.Join(lines, ","))
resp, err := pester.Post(link, "application/json", strings.NewReader(body))
if err != nil {
return err
}
if resp.StatusCode != 200 {
var buf bytes.Buffer
if _, err := io.Copy(&buf, resp.Body); err != nil {
return err
}
// Write out the failed request payload to temporary file.
f, err := ioutil.TempFile("", fmt.Sprintf("solrbulk-%d", time.Now().Unix()))
if err != nil {
log.Printf("failed to write failed request payload to debug file: %v", err)
} else {
defer f.Close()
_, err := f.Write([]byte(body))
if err == nil {
log.Printf("failed payload written to: %v", f.Name())
}
}
log.Printf("%s: %s", link, buf.String())
log.Fatal(resp.Status)
}
return resp.Body.Close()
}
// Worker will batch index documents from lines channel.
func Worker(id string, options Options, lines chan string, wg *sync.WaitGroup) {
defer wg.Done()
var docs []string
i := 0
for s := range lines {
docs = append(docs, s)
i++
if i%options.BatchSize == 0 {
// TODO: we do not need a copy, BulkIndex is synchronous
msg := make([]string, len(docs))
if n := copy(msg, docs); n != len(docs) {
log.Fatalf("%d docs in batch, but only %d copied", len(docs), n)
}
if err := BulkIndex(msg, options); err != nil {
log.Fatal(err)
}
if options.Verbose {
log.Printf("[%s] @%d", id, i)
}
docs = nil
}
}
if len(docs) == 0 {
return
}
msg := make([]string, len(docs))
copy(msg, docs)
if err := BulkIndex(msg, options); err != nil {
log.Fatal(err)
}
if options.Verbose {
log.Printf("[%s] @%d", id, i)
}
}