Skip to content

Commit 28cea3f

Browse files
committed
#60 : When a deployed Replica's state changes to from 'ready' to 'not ready' then we undeploy that Replica and we deploy a new Replica.
1 parent b7e0cc4 commit 28cea3f

File tree

7 files changed

+111
-135
lines changed

7 files changed

+111
-135
lines changed

Backlog

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,47 @@
1-
Scheduled for 1st week of December:
2-
- #58: We should be able to add a SSL certificate
3-
- #52: Allow Kubegres cluster to run on secure Kubernetes environments (add security field "context")
41

5-
Scheduled for 1st week of January:
6-
- update local kind
7-
- Build controller with Kubebuilder v3.2.0
8-
- Remove warning for CronJob which was updated from beta to stable since Kubernetes 1.21
2+
- Scheduled for November 2021:
3+
- #52 : Allow Kubegres cluster to run on secure Kubernetes environments (add security field "securityContext")
4+
- #60 : When the StatefulSet is healthy but the Replica Pod is not, then the replica Pod should be replaced
95

10-
Scheduled for 1st week of February:
11-
- #20 : Delete PVC
6+
- Scheduled for December 2021:
7+
- #58 : We should be able to add a SSL certificate
8+
- update local kind
9+
- Build controller with Kubebuilder v3.2.0
10+
- Remove warning for CronJob which was updated from beta to stable since Kubernetes 1.21
1211

13-
Scheduled for 1st week of March:
14-
- #12 : Reuse PVC (see below) and Primary becomes a Replica
12+
- Scheduled for May 2022:
13+
- #20 : Delete PVC
1514

16-
As part of the available options for the field "failover.pvc", there would be:
17-
"keep": the default option currently with Kubegres where PVC are kept but not reused for safety and investigation reasons
18-
"delete": the PVC will be deleted
19-
"reuse": if the state of the PVC is healthy, it will be reused by the newly created Replica pod. I think that matches with your suggestion?
15+
- Scheduled for June 2022:
16+
- #12 : Reuse PVC (see below) and Primary becomes a Replica
2017

21-
Scheduled for 1st week of April:
22-
- #?: PG bouncer
18+
As part of the available options for the field "failover.pvc", there would be:
19+
- "keep": the default option currently with Kubegres where PVC are kept but not reused for safety and investigation reasons
20+
- "delete": the PVC will be deleted
21+
- "reuse": if the state of the PVC is healthy, it will be reused by the newly created Replica pod. I think that matches with your suggestion?
2322

24-
Scheduled for 1st week of May:
25-
- #51: add documentation about how to recover backup
26-
- add use cases documentation, for example how to expand storage manually and how to upgrade Postgres major version.
27-
- check how to setup log archiving in case of replica does not found a data
23+
- Scheduled for July 2022:
24+
- #?: PG bouncer
2825

29-
Scheduled for 1st week of June:
30-
- #46: Define Service Type for Primary and Replica
26+
- Scheduled for August 2022:
27+
- #51: add documentation about how to recover backup
28+
- add use cases documentation, for example how to expand storage manually and how to upgrade Postgres major version.
29+
- check how to setup log archiving in case of replica does not found a data
3130

32-
Scheduled for 1st week of July:
33-
- #7 : Allow major version upgrade using pg_upgrade
31+
- Scheduled for September 2022:
32+
- #46: Define Service Type for Primary and Replica
3433

35-
Scheduled for 1st week of August:
36-
- #35 : Restore database from a PV backup
34+
- Scheduled for October 2022:
35+
- #7 : Allow major version upgrade using pg_upgrade
3736

38-
Scheduled for 1st week of September:
39-
- #10 : Deploy Kubegres with a HELM chart
37+
- Scheduled for November 2022:
38+
- #35 : Restore database from a PV backup
4039

41-
Scheduled for 1st week of October:
42-
- #? : Add a field to allow restarting StatefulSets and Pods via the YAML of "Kind: Kubegres"?
40+
- Scheduled for December 2022:
41+
- #10 : Deploy Kubegres with a HELM chart
42+
43+
- Scheduled for January 2023:
44+
- #? : Add a field to allow restarting StatefulSets and Pods via the YAML of "Kind: Kubegres"?
4345

4446
Blocked:
4547
#49 : Expand Storage (waiting on the Kubernetes feature: https://github.com/kubernetes/enhancements/pull/2842)

controllers/spec/enforcer/resources_count_spec/ServicesCountSpecEnforcer.go

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,14 @@ func CreateServicesCountSpecEnforcer(kubegresContext ctx.KubegresContext,
4646

4747
func (r *ServicesCountSpecEnforcer) EnforceSpec() error {
4848

49-
if !r.isPrimaryDbReady() {
50-
return nil
51-
}
52-
53-
if !r.isPrimaryServiceDeployed() {
49+
if !r.isPrimaryServiceDeployed() && r.isPrimaryDbReady() {
5450
err := r.deployPrimaryService()
5551
if err != nil {
5652
return err
5753
}
5854
}
5955

60-
if !r.isReplicaServiceDeployed() && r.areThereReplicaDbsDeployed() {
56+
if !r.isReplicaServiceDeployed() && r.isThereReadyReplica() {
6157
err := r.deployReplicaService()
6258
if err != nil {
6359
return err
@@ -79,8 +75,8 @@ func (r *ServicesCountSpecEnforcer) isPrimaryDbReady() bool {
7975
return r.resourcesStates.StatefulSets.Primary.IsReady
8076
}
8177

82-
func (r *ServicesCountSpecEnforcer) areThereReplicaDbsDeployed() bool {
83-
return r.resourcesStates.StatefulSets.Replicas.NbreDeployed > 0
78+
func (r *ServicesCountSpecEnforcer) isThereReadyReplica() bool {
79+
return r.resourcesStates.StatefulSets.Replicas.NbreReady > 0
8480
}
8581

8682
func (r *ServicesCountSpecEnforcer) deployPrimaryService() error {

controllers/spec/enforcer/resources_count_spec/statefulset/PrimaryDbCountSpecEnforcer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@ func (r *PrimaryDbCountSpecEnforcer) logKubegresFeaturesAreReEnabled() {
119119

120120
func (r *PrimaryDbCountSpecEnforcer) shouldWeDeployNewPrimaryDb() bool {
121121

122-
shouldWeDeployNewPrimary := r.resourcesStates.StatefulSets.Replicas.NbreDeployed == 0 &&
123-
!r.resourcesStates.StatefulSets.Primary.IsDeployed
122+
shouldWeDeployNewPrimary := !r.resourcesStates.StatefulSets.Primary.IsDeployed &&
123+
r.resourcesStates.StatefulSets.Replicas.NbreDeployed == 0
124124

125125
if shouldWeDeployNewPrimary {
126126
if *r.kubegresContext.Kubegres.Spec.Replicas == 1 || !r.hasPrimaryEverBeenDeployed() {

controllers/spec/enforcer/resources_count_spec/statefulset/ReplicaDbCountSpecEnforcer.go

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,14 @@ func (r *ReplicaDbCountSpecEnforcer) Enforce() error {
100100
r.resetInSpecManualFailover()
101101
}
102102

103-
if r.shouldMoreReplicaBeDeployed() {
103+
if r.isReplicaOperationInProgress() {
104+
return nil
105+
}
106+
107+
// Check if the number of deployed replicas == spec if not then deploy one
108+
nbreNewReplicaToDeploy := r.getExpectedNbreReplicasToDeploy() - r.getNbreDeployedReplicas()
109+
110+
if nbreNewReplicaToDeploy > 0 {
104111

105112
if r.isAutomaticFailoverDisabled() &&
106113
!isManualFailoverRequested &&
@@ -110,15 +117,44 @@ func (r *ReplicaDbCountSpecEnforcer) Enforce() error {
110117
return nil
111118
}
112119

113-
return r.deployReplicaStatefulSets()
120+
return r.deployReplicaStatefulSet()
121+
122+
} else if nbreNewReplicaToDeploy < 0 {
123+
replicaToUndeploy := r.getReplicaToUndeploy()
124+
return r.undeployReplicaStatefulSets(replicaToUndeploy)
114125

115-
} else if r.shouldLessReplicaBeDeployed() {
116-
return r.undeployReplicaStatefulSets()
126+
} else if nbreNewReplicaToDeploy == 0 {
127+
for _, replicaStatefulSet := range r.getDeployedReplicas() {
128+
if !replicaStatefulSet.IsReady {
129+
return r.undeployReplicaStatefulSets(replicaStatefulSet)
130+
}
131+
}
117132
}
118133

119134
return nil
120135
}
121136

137+
func (r *ReplicaDbCountSpecEnforcer) isReplicaOperationInProgress() bool {
138+
return r.blockingOperation.GetActiveOperation().OperationId == operation.OperationIdReplicaDbCountSpecEnforcement
139+
}
140+
141+
func (r *ReplicaDbCountSpecEnforcer) getDeployedReplicas() []statefulset.StatefulSetWrapper {
142+
return r.resourcesStates.StatefulSets.Replicas.All.GetAllSortedByInstanceIndex()
143+
}
144+
145+
func (r *ReplicaDbCountSpecEnforcer) getNbreDeployedReplicas() int32 {
146+
return r.resourcesStates.StatefulSets.Replicas.NbreDeployed
147+
}
148+
149+
func (r *ReplicaDbCountSpecEnforcer) getExpectedNbreReplicasToDeploy() int32 {
150+
expectedNbreToDeploy := r.resourcesStates.StatefulSets.SpecExpectedNbreToDeploy
151+
152+
if expectedNbreToDeploy <= 1 {
153+
return 0
154+
}
155+
return expectedNbreToDeploy - 1
156+
}
157+
122158
func (r *ReplicaDbCountSpecEnforcer) hasLastAttemptTimedOut() bool {
123159
return r.blockingOperation.HasActiveOperationIdTimedOut(operation.OperationIdReplicaDbCountSpecEnforcement)
124160
}
@@ -179,10 +215,6 @@ func (r *ReplicaDbCountSpecEnforcer) resetInSpecManualFailover() error {
179215
return r.kubegresContext.Client.Update(r.kubegresContext.Ctx, r.kubegresContext.Kubegres)
180216
}
181217

182-
func (r *ReplicaDbCountSpecEnforcer) shouldMoreReplicaBeDeployed() bool {
183-
return r.resourcesStates.StatefulSets.ShouldMoreReplicaBeDeployed()
184-
}
185-
186218
func (r *ReplicaDbCountSpecEnforcer) isPrimaryDbReady() bool {
187219
return r.resourcesStates.StatefulSets.Primary.IsReady
188220
}
@@ -207,23 +239,6 @@ func (r *ReplicaDbCountSpecEnforcer) isReplicaDbUndeployed(operation postgresV1.
207239
return err != nil
208240
}
209241

210-
func (r *ReplicaDbCountSpecEnforcer) deployReplicaStatefulSets() error {
211-
212-
if !r.resourcesStates.StatefulSets.ShouldMoreReplicaBeDeployed() {
213-
return nil
214-
}
215-
216-
numberOfReplicasToDeploy := r.resourcesStates.StatefulSets.GetNbreReplicaToDeploy()
217-
r.kubegresContext.Log.Info("We are going to deploy " + strconv.Itoa(int(numberOfReplicasToDeploy)) + " Replica statefulSet(s)")
218-
219-
var index int32
220-
for index = 0; index < numberOfReplicasToDeploy; index++ {
221-
return r.deployReplicaStatefulSet()
222-
}
223-
224-
return nil
225-
}
226-
227242
func (r *ReplicaDbCountSpecEnforcer) deployReplicaStatefulSet() error {
228243

229244
instanceIndex := r.kubegresContext.Status.GetLastCreatedInstanceIndex() + 1
@@ -268,27 +283,11 @@ func (r *ReplicaDbCountSpecEnforcer) activateBlockingOperationForUndeployment(st
268283
statefulSetInstanceIndex)
269284
}
270285

271-
func (r *ReplicaDbCountSpecEnforcer) shouldLessReplicaBeDeployed() bool {
272-
return r.resourcesStates.StatefulSets.ShouldLessReplicaBeDeployed()
273-
}
274-
275-
func (r *ReplicaDbCountSpecEnforcer) undeployReplicaStatefulSets() error {
276-
277-
if !r.resourcesStates.StatefulSets.ShouldLessReplicaBeDeployed() {
278-
return nil
279-
}
280-
281-
nbreReplicasToUndeploy := r.resourcesStates.StatefulSets.GetNbreReplicaToUndeploy()
282-
283-
if nbreReplicasToUndeploy == 0 {
284-
return nil
285-
}
286+
func (r *ReplicaDbCountSpecEnforcer) undeployReplicaStatefulSets(replicaToUndeploy statefulset.StatefulSetWrapper) error {
286287

287-
replicasToUndeploy := r.getReplicasReverseSortedByInstanceIndex()
288-
if len(replicasToUndeploy) == 0 {
288+
if replicaToUndeploy.StatefulSet.Name == "" {
289289
return nil
290290
}
291-
replicaToUndeploy := replicasToUndeploy[0]
292291

293292
r.kubegresContext.Log.Info("We are going to undeploy a Replica statefulSet.", "InstanceIndex", replicaToUndeploy.InstanceIndex)
294293

@@ -309,6 +308,17 @@ func (r *ReplicaDbCountSpecEnforcer) undeployReplicaStatefulSets() error {
309308
return nil
310309
}
311310

311+
func (r *ReplicaDbCountSpecEnforcer) getReplicaToUndeploy() statefulset.StatefulSetWrapper {
312+
313+
replicasToUndeploy := r.getReplicasReverseSortedByInstanceIndex()
314+
315+
if len(replicasToUndeploy) == 0 {
316+
return statefulset.StatefulSetWrapper{}
317+
}
318+
319+
return replicasToUndeploy[0]
320+
}
321+
312322
func (r *ReplicaDbCountSpecEnforcer) getReplicasReverseSortedByInstanceIndex() []statefulset.StatefulSetWrapper {
313323
return r.resourcesStates.StatefulSets.Replicas.All.GetAllReverseSortedByInstanceIndex()
314324
}

controllers/spec/enforcer/resources_count_spec/statefulset/failover/PrimaryToReplicaFailOver.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ func (r *PrimaryToReplicaFailOver) ShouldWeFailOver() bool {
7171
if !r.hasPrimaryEverBeenDeployed() {
7272
return false
7373

74-
} else if !r.isThereDeployedReplica() {
74+
} else if !r.isThereReadyReplica() {
7575
r.logFailoverCannotHappenAsNoReplicaDeployed()
7676
return false
7777

@@ -136,8 +136,8 @@ func (r *PrimaryToReplicaFailOver) isPrimaryDbReady() bool {
136136
return r.resourcesStates.StatefulSets.Primary.IsReady
137137
}
138138

139-
func (r *PrimaryToReplicaFailOver) isThereDeployedReplica() bool {
140-
return r.resourcesStates.StatefulSets.Replicas.NbreDeployed > 0
139+
func (r *PrimaryToReplicaFailOver) isThereReadyReplica() bool {
140+
return r.resourcesStates.StatefulSets.Replicas.NbreReady > 0
141141
}
142142

143143
func (r *PrimaryToReplicaFailOver) isAutomaticFailoverDisabled() bool {

controllers/states/log/ResourcesStatesLogger.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func (r *ResourcesStatesLogger) logConfigStates() {
4444
func (r *ResourcesStatesLogger) logStatefulSetsStates() {
4545
statefulSets := r.resourcesStates.StatefulSets
4646
r.kubegresContext.Log.Info("All StatefulSets deployment states: ",
47-
"Spec expected to deploy", statefulSets.SpecNbreToDeploy,
47+
"Spec expected to deploy", statefulSets.SpecExpectedNbreToDeploy,
4848
"Nbre Deployed", statefulSets.NbreDeployed)
4949

5050
r.logStatefulSetWrapper("Primary states", statefulSets.Primary)

0 commit comments

Comments
 (0)