Skip to content
This repository was archived by the owner on Jul 21, 2021. It is now read-only.

Commit 00d5f34

Browse files
author
Yu Xie
committed
Update DNS lookup behavior
1 parent c4fab1a commit 00d5f34

File tree

2 files changed

+107
-16
lines changed

2 files changed

+107
-16
lines changed

zk/dnshostprovider.go

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,68 @@ import (
44
"fmt"
55
"net"
66
"sync"
7+
"time"
78
)
89

10+
// lookupInterval is the interval of retrying DNS lookup for unresolved hosts
11+
const lookupInterval = time.Minute * 3
12+
913
// DNSHostProvider is the default HostProvider. It currently matches
1014
// the Java StaticHostProvider, resolving hosts from DNS once during
1115
// the call to Init. It could be easily extended to re-query DNS
1216
// periodically or if there is trouble connecting.
1317
type DNSHostProvider struct {
14-
mu sync.Mutex // Protects everything, so we can add asynchronous updates later.
15-
servers []string
16-
curr int
17-
last int
18-
lookupHost func(string) ([]string, error) // Override of net.LookupHost, for testing.
18+
sleep func(time.Duration) // Override of time.Sleep, for testing.
19+
20+
mu sync.Mutex // Protects everything, so we can add asynchronous updates later.
21+
servers []string
22+
unresolvedServers map[string]struct{}
23+
curr int
24+
last int
25+
lookupHost func(string) ([]string, error) // Override of net.LookupHost, for testing.
1926
}
2027

2128
// Init is called first, with the servers specified in the connection
2229
// string. It uses DNS to look up addresses for each server, then
2330
// shuffles them all together.
2431
func (hp *DNSHostProvider) Init(servers []string) error {
32+
if hp.sleep == nil {
33+
hp.sleep = time.Sleep
34+
}
35+
hp.servers = make([]string, 0, len(servers))
36+
hp.unresolvedServers = make(map[string]struct{}, len(servers))
37+
for _, server := range servers {
38+
hp.unresolvedServers[server] = struct{}{}
39+
}
40+
41+
done, err := hp.lookupUnresolvedServers()
42+
if err != nil {
43+
return err
44+
}
45+
46+
// as long as any host resolved successfully, consider the connection as success
47+
// but start a lookup loop until all servers are resolved and added to servers list
48+
if !done {
49+
go hp.lookupLoop()
50+
}
51+
52+
return nil
53+
}
54+
55+
// lookupLoop calls lookupUnresolvedServers in an infinite loop until all hosts are resolved
56+
// should be called in a separate goroutine
57+
func (hp *DNSHostProvider) lookupLoop() {
58+
for {
59+
if done, _ := hp.lookupUnresolvedServers(); done {
60+
break
61+
}
62+
hp.sleep(lookupInterval)
63+
}
64+
}
65+
66+
// lookupUnresolvedServers DNS lookup the hosts that not successfully resolved yet
67+
// and add them to servers list
68+
func (hp *DNSHostProvider) lookupUnresolvedServers() (bool, error) {
2569
hp.mu.Lock()
2670
defer hp.mu.Unlock()
2771

@@ -30,33 +74,37 @@ func (hp *DNSHostProvider) Init(servers []string) error {
3074
lookupHost = net.LookupHost
3175
}
3276

33-
found := []string{}
34-
for _, server := range servers {
77+
if len(hp.unresolvedServers) == 0 {
78+
return true, nil
79+
}
80+
81+
found := make([]string, 0, len(hp.unresolvedServers))
82+
for server := range hp.unresolvedServers {
3583
host, port, err := net.SplitHostPort(server)
3684
if err != nil {
37-
return err
85+
return false, err
3886
}
3987
addrs, err := lookupHost(host)
4088
if err != nil {
41-
return err
89+
continue
4290
}
91+
delete(hp.unresolvedServers, server)
4392
for _, addr := range addrs {
4493
found = append(found, net.JoinHostPort(addr, port))
4594
}
4695
}
47-
48-
if len(found) == 0 {
49-
return fmt.Errorf("No hosts found for addresses %q", servers)
50-
}
51-
5296
// Randomize the order of the servers to avoid creating hotspots
5397
stringShuffle(found)
5498

55-
hp.servers = found
99+
hp.servers = append(hp.servers, found...)
56100
hp.curr = -1
57101
hp.last = -1
58102

59-
return nil
103+
if len(hp.servers) == 0 {
104+
return true, fmt.Errorf("No hosts found for addresses %q", hp.servers)
105+
}
106+
107+
return false, nil
60108
}
61109

62110
// Len returns the number of servers available

zk/dnshostprovider_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package zk
22

33
import (
4+
"errors"
45
"fmt"
56
"log"
67
"testing"
@@ -165,6 +166,48 @@ func TestDNSHostProviderReconnect(t *testing.T) {
165166
}
166167
}
167168

169+
// TestDNSHostOneHostDead tests whether
170+
func TestDNSHostOneHostDead(t *testing.T) {
171+
hp := &DNSHostProvider{lookupHost: func(host string) ([]string, error) {
172+
if host == "foo.failure.com" {
173+
return nil, errors.New("Fails to ns lookup")
174+
}
175+
return []string{"192.0.2.1", "192.0.2.2"}, nil
176+
}, sleep: func(_ time.Duration) {}}
177+
178+
if err := hp.Init([]string{"foo.failure.com:12345", "foo.success.com:12345"}); err != nil {
179+
t.Fatal(err)
180+
}
181+
182+
hp.mu.Lock()
183+
if len(hp.servers) != 2 {
184+
t.Fatal("Only servers that resolved by lookupHost should be in servers list")
185+
}
186+
187+
// update lookupHost to mock a successful lookup
188+
hp.lookupHost = func(host string) ([]string, error) {
189+
if host == "foo.failure.com" {
190+
return []string{"192.0.2.3"}, nil
191+
}
192+
return []string{"192.0.2.1", "192.0.2.2"}, nil
193+
}
194+
hp.mu.Unlock()
195+
196+
// Starts a 30s retry loop to wait servers list to be updated
197+
startRetryLoop := time.Now()
198+
for {
199+
time.Sleep(time.Millisecond * 5)
200+
hp.mu.Lock()
201+
if len(hp.servers) == 3 {
202+
break
203+
}
204+
hp.mu.Unlock()
205+
if time.Since(startRetryLoop) > time.Second * 30 {
206+
t.Fatal("Servers get back online should be added to the servers list")
207+
}
208+
}
209+
}
210+
168211
// TestDNSHostProviderRetryStart tests the `retryStart` functionality
169212
// of DNSHostProvider.
170213
// It's also probably the clearest visual explanation of exactly how

0 commit comments

Comments
 (0)