Skip to content

Commit d6c4e24

Browse files
committed
Globalnet: stop leader election on controller startup failure
...instead of exiting with a fatal error. Fixes #3337 Signed-off-by: Tom Pantelis <[email protected]>
1 parent a09f3d4 commit d6c4e24

File tree

3 files changed

+74
-6
lines changed

3 files changed

+74
-6
lines changed

pkg/globalnet/controllers/controllers_suite_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package controllers_test
2020

2121
import (
2222
"context"
23+
"fmt"
2324
"net"
2425
"os"
2526
"testing"
@@ -29,6 +30,7 @@ import (
2930
. "github.com/onsi/gomega"
3031
fakeDynClient "github.com/submariner-io/admiral/pkg/fake"
3132
"github.com/submariner-io/admiral/pkg/ipam"
33+
"github.com/submariner-io/admiral/pkg/log"
3234
"github.com/submariner-io/admiral/pkg/log/kzerolog"
3335
"github.com/submariner-io/admiral/pkg/resource"
3436
"github.com/submariner-io/admiral/pkg/slices"
@@ -87,6 +89,11 @@ var _ = BeforeSuite(func() {
8789
Addr: cniInterfaceIP + "/24",
8890
}}, nil
8991
}
92+
93+
log.Exit = func(code int) {
94+
defer GinkgoRecover()
95+
Fail(fmt.Sprintf("Exited with code %d", code))
96+
}
9097
})
9198

9299
func TestControllers(t *testing.T) {

pkg/globalnet/controllers/gateway_monitor.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,10 @@ func (g *gatewayMonitor) startLeaderElection() {
296296
Callbacks: leaderelection.LeaderCallbacks{
297297
OnStartedLeading: func(_ context.Context) {
298298
err := g.startControllers() //nolint:contextcheck // Intentional to not pass context
299-
logger.FatalOnError(err, "Error starting the controllers")
299+
if err != nil {
300+
logger.Error(err, "Error starting the controllers - stopping leader election")
301+
leaderElectionInfo.stop()
302+
}
300303
},
301304
OnStoppedLeading: func() {
302305
logger.Info("Leader election stopped")

pkg/globalnet/controllers/gateway_monitor_test.go

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ import (
2222
"context"
2323
"fmt"
2424
"os"
25+
"sync/atomic"
2526
"time"
2627

2728
. "github.com/onsi/ginkgo/v2"
2829
. "github.com/onsi/gomega"
30+
"github.com/pkg/errors"
2931
fake "github.com/submariner-io/admiral/pkg/fake"
3032
"github.com/submariner-io/admiral/pkg/resource"
3133
"github.com/submariner-io/admiral/pkg/syncer/test"
@@ -41,8 +43,10 @@ import (
4143
corev1 "k8s.io/api/core/v1"
4244
apierrors "k8s.io/apimachinery/pkg/api/errors"
4345
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
46+
"k8s.io/apimachinery/pkg/runtime"
4447
"k8s.io/client-go/dynamic"
4548
fakek8s "k8s.io/client-go/kubernetes/fake"
49+
"k8s.io/client-go/testing"
4650
k8snet "k8s.io/utils/net"
4751
)
4852

@@ -52,7 +56,13 @@ const (
5256
remoteCIDR = "169.254.2.0/24"
5357
)
5458

55-
var _ = Describe("Endpoint monitoring", func() {
59+
var _ = Describe("Gateway Monitor", func() {
60+
Describe("Endpoint monitoring", testEndpointMonitoring)
61+
Describe("Uninstall", testUninstall)
62+
})
63+
64+
//nolint:maintidx // Ignore for unit test.
65+
func testEndpointMonitoring() {
5666
t := newGatewayMonitorTestDriver()
5767

5868
var endpoint *submarinerv1.Endpoint
@@ -210,7 +220,7 @@ var _ = Describe("Endpoint monitoring", func() {
210220
})
211221
})
212222

213-
When("and the local Gateway is deleted", func() {
223+
Context("and the local Gateway is deleted", func() {
214224
It("should remove the ingress rules for its glonal IP", func() {
215225
globalIP := t.awaitGatewayGlobalIP("")
216226

@@ -226,6 +236,54 @@ var _ = Describe("Endpoint monitoring", func() {
226236
constants.SmGlobalnetIngressChain, And(ContainSubstring(globalIP), ContainSubstring(cniInterfaceIP)))
227237
})
228238
})
239+
240+
Context("and the controllers initially fail to start", func() {
241+
Context("", func() {
242+
BeforeEach(func() {
243+
fake.FailOnAction(&t.dynClient.Fake, "clusterglobalegressips", "get", nil, true)
244+
})
245+
246+
It("should eventually start the controllers", func() {
247+
t.awaitControllersStarted()
248+
})
249+
})
250+
251+
Context("and renewal of the leader lock also initially fails", func() {
252+
var leaseFailed chan struct{}
253+
254+
BeforeEach(func() {
255+
t.leaderElectionConfig.RenewDeadline = time.Millisecond * 200
256+
t.leaderElectionConfig.RetryPeriod = time.Millisecond * 20
257+
258+
leaseFailed = make(chan struct{})
259+
260+
t.dynClient.Fake.Lock()
261+
defer t.dynClient.Fake.Unlock()
262+
263+
var fail atomic.Bool
264+
fail.Store(true)
265+
266+
t.dynClient.Fake.PrependReactor("get", "clusterglobalegressips",
267+
func(_ testing.Action) (bool, runtime.Object, error) {
268+
if fail.Load() {
269+
fail.Store(false)
270+
t.leaderElection.FailLease(t.leaderElectionConfig.RenewDeadline)
271+
close(leaseFailed)
272+
273+
return true, nil, errors.New("mock error")
274+
}
275+
276+
return false, nil, nil
277+
})
278+
})
279+
280+
It("should eventually start the controllers", func() {
281+
Eventually(leaseFailed).Within(5 * time.Second).Should(BeClosed())
282+
t.leaderElection.SucceedLease()
283+
t.awaitControllersStarted()
284+
})
285+
})
286+
})
229287
})
230288

231289
Context("and a local gateway Endpoint corresponding to another host is created", func() {
@@ -279,9 +337,9 @@ var _ = Describe("Endpoint monitoring", func() {
279337
t.pFilter.AwaitRule(packetfilter.TableTypeNAT, constants.SmGlobalnetMarkChain, ContainSubstring(remoteCIDR))
280338
})
281339
})
282-
})
340+
}
283341

284-
var _ = Describe("Uninstall", func() {
342+
func testUninstall() {
285343
var t *testDriverBase
286344

287345
const ipSetName = controllers.IPSetPrefix + "abd"
@@ -390,7 +448,7 @@ var _ = Describe("Uninstall", func() {
390448

391449
test.AwaitNoResource(t.services, internalServiceName)
392450
})
393-
})
451+
}
394452

395453
type gatewayMonitorTestDriver struct {
396454
*testDriverBase

0 commit comments

Comments
 (0)