@@ -21,7 +21,6 @@ import (
21
21
"crypto/x509"
22
22
"fmt"
23
23
"io/ioutil"
24
- "math/rand"
25
24
"net"
26
25
"net/http"
27
26
"net/url"
@@ -31,7 +30,8 @@ import (
31
30
32
31
kingpin "gopkg.in/alecthomas/kingpin.v2"
33
32
34
- "github.com/ShowMax/go-fqdn"
33
+ "github.com/Showmax/go-fqdn"
34
+ "github.com/cenkalti/backoff/v4"
35
35
"github.com/go-kit/kit/log"
36
36
"github.com/go-kit/kit/log/level"
37
37
"github.com/pkg/errors"
53
53
insecureSkipVerify = kingpin .Flag ("insecure-skip-verify" , "Disable SSL security checks for client" ).Default ("false" ).Bool ()
54
54
useLocalhost = kingpin .Flag ("use-localhost" , "Use 127.0.0.1 to scrape metrics instead of FQDN" ).Default ("false" ).Bool ()
55
55
allowPort = kingpin .Flag ("allow-port" , "Restricts the proxy to only being allowed to scrape the given port" ).Default ("*" ).String ()
56
+
57
+ retryInitialWait = kingpin .Flag ("proxy.retry.initial-wait" , "Amount of time to wait after proxy failure" ).Default ("1s" ).Duration ()
58
+ retryMaxWait = kingpin .Flag ("proxy.retry.max-wait" , "Maximum amount of time to wait between proxy poll retries" ).Default ("5s" ).Duration ()
56
59
)
57
60
58
61
var (
@@ -80,6 +83,15 @@ func init() {
80
83
prometheus .MustRegister (pushErrorCounter , pollErrorCounter , scrapeErrorCounter )
81
84
}
82
85
86
+ func newBackOffFromFlags () backoff.BackOff {
87
+ b := backoff .NewExponentialBackOff ()
88
+ b .InitialInterval = * retryInitialWait
89
+ b .Multiplier = 1.5
90
+ b .MaxInterval = * retryMaxWait
91
+ b .MaxElapsedTime = time .Duration (0 )
92
+ return b
93
+ }
94
+
83
95
// Coordinator for scrape requests and responses
84
96
type Coordinator struct {
85
97
logger log.Logger
@@ -193,7 +205,7 @@ func (c *Coordinator) doPush(resp *http.Response, origRequest *http.Request, cli
193
205
return nil
194
206
}
195
207
196
- func loop (c Coordinator , client * http.Client ) error {
208
+ func (c * Coordinator ) doPoll ( client * http.Client ) error {
197
209
base , err := url .Parse (* proxyURL )
198
210
if err != nil {
199
211
level .Error (c .logger ).Log ("msg" , "Error parsing url:" , "err" , err )
@@ -226,35 +238,18 @@ func loop(c Coordinator, client *http.Client) error {
226
238
return nil
227
239
}
228
240
229
- // decorrelated Jitter increases the maximum jitter based on the last random value.
230
- type decorrelatedJitter struct {
231
- duration time.Duration // sleep time
232
- min time.Duration // min sleep time
233
- cap time.Duration // max sleep time
234
- }
235
-
236
- func newJitter () decorrelatedJitter {
237
- rand .Seed (time .Now ().UnixNano ())
238
- return decorrelatedJitter {
239
- min : 50 * time .Millisecond ,
240
- cap : 5 * time .Second ,
241
+ func (c * Coordinator ) loop (bo backoff.BackOff , client * http.Client ) {
242
+ op := func () error {
243
+ return c .doPoll (client )
241
244
}
242
- }
243
245
244
- func (d * decorrelatedJitter ) calc () time.Duration {
245
- change := rand .Float64 () * float64 (d .duration * time .Duration (3 )- d .min )
246
- d .duration = d .min + time .Duration (change )
247
- if d .duration > d .cap {
248
- d .duration = d .cap
249
- }
250
- if d .duration < d .min {
251
- d .duration = d .min
246
+ for {
247
+ if err := backoff .RetryNotify (op , bo , func (err error , _ time.Duration ) {
248
+ pollErrorCounter .Inc ()
249
+ }); err != nil {
250
+ level .Error (c .logger ).Log ("err" , err )
251
+ }
252
252
}
253
- return d .duration
254
- }
255
-
256
- func (d * decorrelatedJitter ) sleep () {
257
- time .Sleep (d .calc ())
258
253
}
259
254
260
255
func main () {
@@ -333,14 +328,7 @@ func main() {
333
328
TLSClientConfig : tlsConfig ,
334
329
}
335
330
336
- jitter := newJitter ()
337
331
client := & http.Client {Transport : transport }
338
- for {
339
- err := loop (coordinator , client )
340
- if err != nil {
341
- pollErrorCounter .Inc ()
342
- jitter .sleep ()
343
- continue
344
- }
345
- }
332
+
333
+ coordinator .loop (newBackOffFromFlags (), client )
346
334
}
0 commit comments