@@ -2,18 +2,21 @@ package rpc
2
2
3
3
import (
4
4
"context"
5
+ "errors"
5
6
"fmt"
7
+ "strings"
6
8
"sync"
7
9
8
- "github.com/cloudwego/kitex/client"
9
- "github.com/selectdb/ccr_syncer/pkg/ccr/base"
10
10
festruct "github.com/selectdb/ccr_syncer/pkg/rpc/kitex_gen/frontendservice"
11
11
feservice "github.com/selectdb/ccr_syncer/pkg/rpc/kitex_gen/frontendservice/frontendservice"
12
12
tstatus "github.com/selectdb/ccr_syncer/pkg/rpc/kitex_gen/status"
13
13
festruct_types "github.com/selectdb/ccr_syncer/pkg/rpc/kitex_gen/types"
14
14
"github.com/selectdb/ccr_syncer/pkg/utils"
15
15
"github.com/selectdb/ccr_syncer/pkg/xerror"
16
16
17
+ "github.com/cloudwego/kitex/client"
18
+ "github.com/cloudwego/kitex/pkg/kerrors"
19
+ "github.com/selectdb/ccr_syncer/pkg/ccr/base"
17
20
log "github.com/sirupsen/logrus"
18
21
)
19
22
25
28
ErrFeNotMasterCompatible = xerror .NewWithoutStack (xerror .FE , "not master compatible" )
26
29
)
27
30
31
+ // canUseNextAddr means can try next addr, err is a connection error, not a method not found or other error
32
+ func canUseNextAddr (err error ) bool {
33
+ if errors .Is (err , kerrors .ErrNoConnection ) {
34
+ return true
35
+ }
36
+ if errors .Is (err , kerrors .ErrNoResolver ) {
37
+ return true
38
+ }
39
+ if errors .Is (err , kerrors .ErrNoDestAddress ) {
40
+ return true
41
+ }
42
+
43
+ errMsg := err .Error ()
44
+ if strings .Contains (errMsg , "connection has been closed by peer" ) {
45
+ return true
46
+ }
47
+ if strings .Contains (errMsg , "closed network connection" ) {
48
+ return true
49
+ }
50
+ if strings .Contains (errMsg , "connection reset by peer" ) {
51
+ return true
52
+ }
53
+
54
+ return false
55
+ }
56
+
28
57
type IFeRpc interface {
29
58
BeginTransaction (* base.Spec , string , []int64 ) (* festruct.TBeginTxnResult_ , error )
30
59
CommitTransaction (* base.Spec , int64 , []* festruct_types.TTabletCommitInfo ) (* festruct.TCommitTxnResult_ , error )
@@ -62,6 +91,17 @@ func NewFeRpc(spec *base.Spec) (*FeRpc, error) {
62
91
cachedFeAddrs := make (map [string ]bool )
63
92
for _ , fe := range spec .Frontends {
64
93
addr := fmt .Sprintf ("%s:%s" , fe .Host , fe .ThriftPort )
94
+
95
+ if _ , ok := cachedFeAddrs [addr ]; ok {
96
+ continue
97
+ }
98
+
99
+ // for cached all spec clients
100
+ if client , err := newSingleFeClient (addr ); err != nil {
101
+ log .Warnf ("new fe client error: %v" , err )
102
+ } else {
103
+ clients [client .Address ()] = client
104
+ }
65
105
cachedFeAddrs [addr ] = true
66
106
}
67
107
@@ -124,41 +164,129 @@ func (rpc *FeRpc) getCacheFeAddrs() map[string]bool {
124
164
return utils .CopyMap (rpc .cachedFeAddrs )
125
165
}
126
166
127
- func (rpc * FeRpc ) callWithMasterRedirect (caller callerType ) (resultType , error ) {
128
- masterClient := rpc .getMasterClient ()
167
+ type retryWithMasterRedirectAndCachedClientsRpc struct {
168
+ rpc * FeRpc
169
+ caller callerType
170
+ notriedClients map [string ]* singleFeClient
171
+ }
172
+
173
+ type call0Result struct {
174
+ canUseNextAddr bool
175
+ resp resultType
176
+ err error
177
+ masterAddr string
178
+ }
129
179
130
- result , err := caller (masterClient )
180
+ func (r * retryWithMasterRedirectAndCachedClientsRpc ) call0 (masterClient * singleFeClient ) * call0Result {
181
+ caller := r .caller
182
+ resp , err := caller (masterClient )
183
+ log .Tracef ("call resp: %+v, error: %+v" , resp , err )
184
+
185
+ // Step 1: check error
131
186
if err != nil {
132
- return result , err
187
+ if ! canUseNextAddr (err ) {
188
+ return & call0Result {
189
+ canUseNextAddr : false ,
190
+ err : xerror .Wrap (err , xerror .FE , "thrift error" ),
191
+ }
192
+ } else {
193
+ log .Warnf ("call error: %v, try next addr" , err )
194
+ return & call0Result {
195
+ canUseNextAddr : true ,
196
+ err : xerror .Wrap (err , xerror .FE , "thrift error" ),
197
+ }
198
+ }
133
199
}
134
200
135
- if result .GetStatus ().GetStatusCode () != tstatus .TStatusCode_NOT_MASTER {
136
- return result , err
201
+ // Step 2: check need redirect
202
+ if resp .GetStatus ().GetStatusCode () != tstatus .TStatusCode_NOT_MASTER {
203
+ return & call0Result {
204
+ canUseNextAddr : false ,
205
+ resp : resp ,
206
+ err : nil ,
207
+ }
137
208
}
138
209
139
210
// no compatible for master
140
- if ! result .IsSetMasterAddress () {
141
- return result , xerror .XPanicWrapf (ErrFeNotMasterCompatible , "fe addr [%s]" , masterClient .Address ())
211
+ if ! resp .IsSetMasterAddress () {
212
+ err = xerror .XPanicWrapf (ErrFeNotMasterCompatible , "fe addr [%s]" , masterClient .Address ())
213
+ return & call0Result {
214
+ canUseNextAddr : true ,
215
+ err : err , // not nil
216
+ }
142
217
}
143
218
144
219
// switch to master
145
- masterAddr := result .GetMasterAddress ()
146
- log .Infof ("switch to master %s" , masterAddr )
147
- addr := fmt .Sprintf ("%s:%d" , masterAddr .Hostname , masterAddr .Port )
220
+ masterAddr := resp .GetMasterAddress ()
221
+ err = xerror .Errorf (xerror .FE , "addr [%s] is not master" , masterAddr )
222
+ return & call0Result {
223
+ canUseNextAddr : true ,
224
+ resp : resp ,
225
+ masterAddr : fmt .Sprintf ("%s:%d" , masterAddr .Hostname , masterAddr .Port ),
226
+ err : err , // not nil
227
+ }
228
+ }
148
229
149
- client , ok := rpc .getClient (addr )
150
- if ok {
151
- masterClient = client
152
- } else {
153
- masterClient , err = newSingleFeClient (addr )
154
- if err != nil {
155
- return nil , xerror .Wrapf (err , xerror .RPC , "NewFeClient error: %v" , err )
230
+ func (r * retryWithMasterRedirectAndCachedClientsRpc ) call () (resultType , error ) {
231
+ rpc := r .rpc
232
+ masterClient := rpc .masterClient
233
+
234
+ // Step 1: try master
235
+ result := r .call0 (masterClient )
236
+ log .Tracef ("call0 result: %+v" , result )
237
+ if result .err == nil {
238
+ return result .resp , nil
239
+ }
240
+
241
+ // Step 2: check error, if can't use next addr, return error
242
+ // canUseNextAddr means can try next addr, contains ErrNoConnection, ErrNoResolver, ErrNoDestAddress => (feredirect && use next cached addr)
243
+ if ! result .canUseNextAddr {
244
+ return nil , result .err
245
+ }
246
+
247
+ // Step 3: if set master addr, redirect to master
248
+ // redirect to master
249
+ if result .masterAddr != "" {
250
+ masterAddr := result .masterAddr
251
+ log .Infof ("switch to master %s" , masterAddr )
252
+
253
+ var err error
254
+ client , ok := rpc .getClient (masterAddr )
255
+ if ok {
256
+ masterClient = client
257
+ } else {
258
+ masterClient , err = newSingleFeClient (masterAddr )
259
+ if err != nil {
260
+ return nil , xerror .Wrapf (err , xerror .RPC , "NewFeClient [%s] error: %v" , masterAddr , err )
261
+ }
156
262
}
263
+ rpc .updateMasterClient (masterClient )
264
+ return r .call ()
265
+ }
266
+
267
+ // Step 4: try all cached fe clients
268
+ if r .notriedClients == nil {
269
+ r .notriedClients = rpc .getClients ()
270
+ }
271
+ delete (r .notriedClients , masterClient .Address ())
272
+ if len (r .notriedClients ) == 0 {
273
+ return nil , result .err
274
+ }
275
+ // get first notried client
276
+ var client * singleFeClient
277
+ for _ , client = range r .notriedClients {
278
+ break
157
279
}
158
- rpc .updateMasterClient (masterClient )
280
+ // because call0 failed, so original masterClient is not master now, set client as masterClient for retry
281
+ rpc .updateMasterClient (client )
282
+ return r .call ()
283
+ }
159
284
160
- // retry
161
- return caller (masterClient )
285
+ func (rpc * FeRpc ) callWithMasterRedirect (caller callerType ) (resultType , error ) {
286
+ r := & retryWithMasterRedirectAndCachedClientsRpc {rpc : rpc ,
287
+ caller : caller ,
288
+ }
289
+ return r .call ()
162
290
}
163
291
164
292
type retryCallerType func (client * singleFeClient ) (any , error )
@@ -194,7 +322,7 @@ func (rpc *FeRpc) callWithRetryAllClients(caller retryCallerType) (result any, e
194
322
195
323
usedClientAddrs [addr ] = true
196
324
if client , err := newSingleFeClient (addr ); err != nil {
197
- log .Errorf ("new fe client error: %v" , err )
325
+ log .Warnf ("new fe client error: %v" , err )
198
326
} else {
199
327
rpc .addClient (client )
200
328
if result , err = caller (client ); err == nil {
0 commit comments