Skip to content

Commit

Permalink
Enhance Etcd configMap to provide consistency and controllability (#812)
Browse files Browse the repository at this point in the history
* rename configMap | add snapshotCount to spec.etcd to make the snapshot-count configurable | use proper url formatting for peer and client urls in etcd config

* Use list for advertise-client-urls as well
  • Loading branch information
anveshreddy18 authored Nov 26, 2024
1 parent 130892e commit 06080a2
Show file tree
Hide file tree
Showing 16 changed files with 180 additions and 61 deletions.
4 changes: 4 additions & 0 deletions api/v1alpha1/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ type EtcdConfig struct {
// Quota defines the etcd DB quota.
// +optional
Quota *resource.Quantity `json:"quota,omitempty"`
// SnapshotCount defines the number of applied Raft entries to hold in-memory before compaction.
// More info: https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention
// +optional
SnapshotCount *int64 `json:"snapshotCount,omitempty"`
// DefragmentationSchedule defines the cron standard schedule for defragmentation of etcd.
// +optional
DefragmentationSchedule *string `json:"defragmentationSchedule,omitempty"`
Expand Down
18 changes: 10 additions & 8 deletions api/v1alpha1/etcd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,11 @@ func TestIsReconciliationInProgress(t *testing.T) {

func createEtcd(name, namespace string) *Etcd {
var (
clientPort int32 = 2379
serverPort int32 = 2380
backupPort int32 = 8080
metricLevel = Basic
clientPort int32 = 2379
serverPort int32 = 2380
backupPort int32 = 8080
metricLevel = Basic
snapshotCount int64 = 75000
)

garbageCollectionPeriod := metav1.Duration{
Expand Down Expand Up @@ -238,10 +239,11 @@ func createEtcd(name, namespace string) *Etcd {
"memory": resource.MustParse("1000Mi"),
},
},
ClientPort: &clientPort,
ServerPort: &serverPort,
ClientUrlTLS: clientTlsConfig,
PeerUrlTLS: peerTlsConfig,
ClientPort: &clientPort,
ServerPort: &serverPort,
SnapshotCount: &snapshotCount,
ClientUrlTLS: clientTlsConfig,
PeerUrlTLS: peerTlsConfig,
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion api/v1alpha1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func GetServiceAccountName(etcdObjMeta metav1.ObjectMeta) string {

// GetConfigMapName returns the name of the configmap for the Etcd.
func GetConfigMapName(etcdObjMeta metav1.ObjectMeta) string {
return fmt.Sprintf("etcd-bootstrap-%s", string(etcdObjMeta.UID[:6]))
return fmt.Sprintf("%s-config", etcdObjMeta.Name)
}

// GetCompactionJobName returns the compaction job name for the Etcd.
Expand Down
2 changes: 1 addition & 1 deletion api/v1alpha1/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func TestGetConfigMapName(t *testing.T) {
uid := uuid.NewUUID()
etcdObjMeta := createEtcdObjectMetadata(uid, nil, nil, false)
configMapName := GetConfigMapName(etcdObjMeta)
g.Expect(configMapName).To(Equal("etcd-bootstrap-" + string(uid[:6])))
g.Expect(configMapName).To(Equal(etcdObjMeta.Name + "-config"))
}

func TestGetCompactionJobName(t *testing.T) {
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,12 @@ spec:
serverPort:
format: int32
type: integer
snapshotCount:
description: |-
SnapshotCount defines the number of applied Raft entries to hold in-memory before compaction.
More info: https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention
format: int64
type: integer
type: object
labels:
additionalProperties:
Expand Down
6 changes: 6 additions & 0 deletions config/crd/bases/crd-druid.gardener.cloud_etcds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,12 @@ spec:
serverPort:
format: int32
type: integer
snapshotCount:
description: |-
SnapshotCount defines the number of applied Raft entries to hold in-memory before compaction.
More info: https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention
format: int64
type: integer
type: object
labels:
additionalProperties:
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/etcd-druid-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `quota` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#quantity-resource-api)_ | Quota defines the etcd DB quota. | | |
| `snapshotCount` _integer_ | SnapshotCount defines the number of applied Raft entries to hold in-memory before compaction.<br />More info: https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention | | |
| `defragmentationSchedule` _string_ | DefragmentationSchedule defines the cron standard schedule for defragmentation of etcd. | | |
| `serverPort` _integer_ | | | |
| `clientPort` _integer_ | | | |
Expand Down
2 changes: 1 addition & 1 deletion docs/proposals/03-scaling-up-an-etcd-cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Now, it is detected whether peer URL was TLS enabled or not for single node etcd
- If peer URL was not TLS enabled then etcd-druid has to intervene and make sure peer URL should be TLS enabled first for the single node before marking the cluster for scale-up.

## Action taken by etcd-druid to enable the peerURL TLS
1. Etcd-druid will update the `etcd-bootstrap` config-map with new config like initial-cluster,initial-advertise-peer-urls etc. Backup-restore will detect this change and update the member lease annotation to `member.etcd.gardener.cloud/tls-enabled: "true"`.
1. Etcd-druid will update the `{etcd.Name}-config` config-map with new config like initial-cluster,initial-advertise-peer-urls etc. Backup-restore will detect this change and update the member lease annotation to `member.etcd.gardener.cloud/tls-enabled: "true"`.
2. In case the peer URL TLS has been changed to `enabled`: Etcd-druid will add tasks to the deployment flow:
- Check if peer TLS has been enabled for existing StatefulSet pods, by checking the member leases for the annotation `member.etcd.gardener.cloud/tls-enabled`.
- If peer TLS enablement is pending for any of the members, then check and patch the StatefulSet with the peer TLS volume mounts, if not already patched. This will cause a rolling update of the existing StatefulSet pods, which allows etcd-backup-restore to update the member peer URL in the etcd cluster.
Expand Down
73 changes: 62 additions & 11 deletions docs/usage/recovering-etcd-clusters.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ For a multi-node `Etcd` cluster quorum loss can either be `Transient` or `Perman

## Transient quorum loss

If quorum is lost through transient network failures (e.g. n/w partitions), spike in resource usage which results in OOM, `etcd` automatically and safely resumes (once the network recovers or the resource consumption has come down) and restores quorum. In other cases like transient power loss, etcd persists the Raft log to disk and replays the log to the point of failure and resumes cluster operation.
If quorum is lost through transient network failures (e.g. n/w partitions) or there is a spike in resource usage which results in OOM, `etcd` automatically and safely resumes (once the network recovers or the resource consumption has come down) and restores quorum. In other cases like transient power loss, etcd persists the Raft log to disk and replays the log to the point of failure and resumes cluster operation.

## Permanent quorum loss
## Permanent quorum loss

In case the quorum is lost due to hardware failures or disk corruption etc, automatic recovery is no longer possible and it is categorized as a permanent quorum loss.

Expand Down Expand Up @@ -43,6 +43,7 @@ Identify the etcd-cluster which has a permanent quorum loss. Most of the resourc
To ensure that only one actor (in this case an operator) makes changes to the `Etcd` resource and also to the `Etcd` cluster resources, following must be done:

Add the annotation to the `Etcd` resource:

```bash
kubectl annotate etcd <etcd-name> -n <namespace> druid.gardener.cloud/suspend-etcd-spec-reconcile=
```
Expand Down Expand Up @@ -74,6 +75,7 @@ kubectl delete pvc -l instance=<sts-name> -n <namespace>
For a `n` member `Etcd` cluster there should be `n` member `Lease` objects. The lease names should start with the `Etcd` name.

Example leases for a 3 node `Etcd` cluster:

```b
NAME HOLDER AGE
<etcd-name>-0 4c37667312a3912b:Member 1m
Expand All @@ -82,6 +84,7 @@ Example leases for a 3 node `Etcd` cluster:
```

Delete all the member leases.

```bash
kubectl delete lease <space separated lease names>
# Alternatively you can use label selector. From v0.23.0 onwards leases will have common set of labels
Expand All @@ -90,18 +93,66 @@ kubectl delete lease -l app.kubernetes.io.component=etcd-member-lease, app.kuber

#### 05-Modify ConfigMap

Prerequisite to scale up etcd-cluster from 0->1 is to change `initial-cluster` in the ConfigMap. Assuming that prior to scale-down to 0, there were 3 members, the `initial-cluster` field would look like the following (assuming that the name of the etcd resource is `etcd-main`):
Prerequisite to scale up etcd-cluster from 0->1 is to change the fields `initial-cluster`, `initial-advertise-peer-urls`, and `advertise-client-urls` in the ConfigMap.

Assuming that prior to scale-down to 0, there were 3 members:

The `initial-cluster` field would look like the following (assuming that the name of the etcd resource is `etcd-main`):

```yaml
# Initial cluster
initial-cluster: etcd-main-0=https://etcd-main-0.etcd-main-peer.default.svc:2380,etcd-main-1=https://etcd-main-1.etcd-main-peer.default.svc:2380,etcd-main-2=https://etcd-main-2.etcd-main-peer.default.svc:2380
```
Change the `initial-cluster` field to have only one member (in this case `etc-main-0`). After the change it should look like:
```bash
Change the `initial-cluster` field to have only one member (in this case `etcd-main-0`). After the change it should look like:

```yaml
# Initial cluster
initial-cluster: etcd-main-0=https://etcd-main-0.etcd-main-peer.default.svc:2380
```

The `initial-advertise-peer-urls` field would look like the following:

```yaml
# Initial advertise peer urls
initial-advertise-peer-urls:
etcd-main-0:
- http://etcd-main-0.etcd-main-peer.default.svc:2380
etcd-main-1:
- http://etcd-main-1.etcd-main-peer.default.svc:2380
etcd-main-2:
- http://etcd-main-2.etcd-main-peer.default.svc:2380
```

Change the `initial-advertise-peer-urls` field to have only one member (in this case `etcd-main-0`). After the change it should look like:

```yaml
# Initial advertise peer urls
initial-advertise-peer-urls:
etcd-main-0:
- http://etcd-main-0.etcd-main-peer.default.svc:2380
```

The `advertise-client-urls` field would look like the following:

```yaml
advertise-client-urls:
etcd-main-0:
- http://etcd-main-0.etcd-main-peer.default.svc:2379
etcd-main-1:
- http://etcd-main-1.etcd-main-peer.default.svc:2379
etcd-main-2:
- http://etcd-main-2.etcd-main-peer.default.svc:2379
```

Change the `advertise-client-urls` field to have only one member (in this case `etcd-main-0`). After the change it should look like:

```yaml
advertise-client-urls:
etcd-main-0:
- http://etcd-main-0.etcd-main-peer.default.svc:2379
```

#### 06-Scale up Etcd cluster to size 1

```bash
Expand All @@ -111,6 +162,7 @@ kubectl scale sts <sts-name> -n <namespace> --replicas=1
#### 07-Wait for Single-Member etcd cluster to be completely ready

To check if the `single-member` etcd cluster is ready check the status of the pod.

```bash
kubectl get pods <etcd-name-0> -n <namespace>
NAME READY STATUS RESTARTS AGE
Expand All @@ -122,6 +174,7 @@ If both containers report readiness (as seen above), then the etcd-cluster is co
#### 08-Enable Etcd reconciliation and resource protection

All manual changes are now done. We must now re-enable etcd-cluster resource protection and also enable reconciliation by etcd-druid by doing the following:

```bash
kubectl annotate etcd <etcd-name> -n <namespace> druid.gardener.cloud/suspend-etcd-spec-reconcile-
kubectl annotate etcd <etcd-name> -n <namespace> druid.gardener.cloud/disable-etcd-component-protection-
Expand All @@ -136,8 +189,9 @@ kubectl scale sts <sts-name> -n namespace --replicas=3
```

If etcd-druid has been set up with `--enable-etcd-spec-auto-reconcile` switched-off then to ensure reconciliation one must annotate `Etcd` resource with the following command:

```bash
# Annotate etcd-test CR to reconcile
# Annotate etcd CR to reconcile
kubectl annotate etcd <etcd-name> -n <namespace> gardener.cloud/operation="reconcile"
```

Expand All @@ -154,21 +208,18 @@ NAME READY STATUS RESTARTS AGE
```

Additionally, check if the `Etcd` CR is ready:

```bash
kubectl get etcd <etcd-name> -n <namespace>
NAME READY AGE
<etcd-name> true 13d
```

Check member leases, whose `holderIdentity` should reflect the member role. Check if all members are voting members (their role should either be `Member` or `Leader`). Monitor the leases for some time and check if the leases are getting updated. You can monitor the `AGE` field.

```bash
NAME HOLDER AGE
<etcd-name>-0 4c37667312a3912b:Member 1m
<etcd-name>-1 75a9b74cfd3077cc:Member 1m
<etcd-name>-2 c62ee6af755e890d:Leader 1m
```





49 changes: 36 additions & 13 deletions internal/component/configmap/configmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package configmap
import (
"context"
"fmt"
"strconv"
"testing"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
Expand Down Expand Up @@ -306,12 +305,6 @@ func newConfigMap(g *WithT, etcd *druidv1alpha1.Etcd) *corev1.ConfigMap {
return cm
}

func ensureConfigMapExists(g *WithT, cl client.WithWatch, etcd *druidv1alpha1.Etcd) {
cm, err := getLatestConfigMap(cl, etcd)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(cm).ToNot(BeNil())
}

func getLatestConfigMap(cl client.Client, etcd *druidv1alpha1.Etcd) (*corev1.ConfigMap, error) {
cm := &corev1.ConfigMap{}
err := cl.Get(context.Background(), client.ObjectKey{Name: druidv1alpha1.GetConfigMapName(etcd.ObjectMeta), Namespace: etcd.Namespace}, cm)
Expand Down Expand Up @@ -341,10 +334,10 @@ func matchConfigMap(g *WithT, etcd *druidv1alpha1.Etcd, actualConfigMap corev1.C
err := yaml.Unmarshal([]byte(actualETCDConfigYAML), &actualETCDConfig)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(actualETCDConfig).To(MatchKeys(IgnoreExtras|IgnoreMissing, Keys{
"name": Equal(fmt.Sprintf("etcd-%s", etcd.UID[:6])),
"name": Equal("etcd-config"),
"data-dir": Equal(fmt.Sprintf("%s/new.etcd", common.VolumeMountPathEtcdData)),
"metrics": Equal(string(druidv1alpha1.Basic)),
"snapshot-count": Equal(int64(75000)),
"snapshot-count": Equal(ptr.Deref(etcd.Spec.Etcd.SnapshotCount, defaultSnapshotCount)),
"enable-v2": Equal(false),
"quota-backend-bytes": Equal(etcd.Spec.Etcd.Quota.Value()),
"initial-cluster-token": Equal("etcd-cluster"),
Expand All @@ -360,7 +353,7 @@ func matchClientTLSRelatedConfiguration(g *WithT, etcd *druidv1alpha1.Etcd, actu
if etcd.Spec.Etcd.ClientUrlTLS != nil {
g.Expect(actualETCDConfig).To(MatchKeys(IgnoreExtras|IgnoreMissing, Keys{
"listen-client-urls": Equal(fmt.Sprintf("https://0.0.0.0:%d", ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient))),
"advertise-client-urls": Equal(fmt.Sprintf("https@%s@%s@%d", druidv1alpha1.GetPeerServiceName(etcd.ObjectMeta), etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient))),
"advertise-client-urls": Equal(expectedAdvertiseURLsAsInterface(etcd, advertiseURLTypeClient, "https")),
"client-transport-security": MatchKeys(IgnoreExtras, Keys{
"cert-file": Equal("/var/etcd/ssl/server/tls.crt"),
"key-file": Equal("/var/etcd/ssl/server/tls.key"),
Expand All @@ -377,8 +370,38 @@ func matchClientTLSRelatedConfiguration(g *WithT, etcd *druidv1alpha1.Etcd, actu
}
}

func expectedAdvertiseURLs(etcd *druidv1alpha1.Etcd, advertiseURLType, scheme string) map[string][]string {
var port int32
switch advertiseURLType {
case advertiseURLTypePeer:
port = ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer)
case advertiseURLTypeClient:
port = ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient)
default:
return nil
}
advUrlsMap := make(map[string][]string)
for i := 0; i < int(etcd.Spec.Replicas); i++ {
podName := druidv1alpha1.GetOrdinalPodName(etcd.ObjectMeta, i)
advUrlsMap[podName] = []string{fmt.Sprintf("%s://%s.%s.%s.svc:%d", scheme, podName, druidv1alpha1.GetPeerServiceName(etcd.ObjectMeta), etcd.Namespace, port)}
}
return advUrlsMap
}

func expectedAdvertiseURLsAsInterface(etcd *druidv1alpha1.Etcd, advertiseURLType, scheme string) map[string]interface{} {
advertiseUrlsMap := expectedAdvertiseURLs(etcd, advertiseURLType, scheme)
advertiseUrlsInterface := make(map[string]interface{}, len(advertiseUrlsMap))
for podName, urlList := range advertiseUrlsMap {
urlsListInterface := make([]interface{}, len(urlList))
for i, url := range urlList {
urlsListInterface[i] = url
}
advertiseUrlsInterface[podName] = urlsListInterface
}
return advertiseUrlsInterface
}

func matchPeerTLSRelatedConfiguration(g *WithT, etcd *druidv1alpha1.Etcd, actualETCDConfig map[string]interface{}) {
peerSvcName := druidv1alpha1.GetPeerServiceName(etcd.ObjectMeta)
if etcd.Spec.Etcd.PeerUrlTLS != nil {
g.Expect(actualETCDConfig).To(MatchKeys(IgnoreExtras|IgnoreMissing, Keys{
"peer-transport-security": MatchKeys(IgnoreExtras, Keys{
Expand All @@ -389,12 +412,12 @@ func matchPeerTLSRelatedConfiguration(g *WithT, etcd *druidv1alpha1.Etcd, actual
"auto-tls": Equal(false),
}),
"listen-peer-urls": Equal(fmt.Sprintf("https://0.0.0.0:%d", ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))),
"initial-advertise-peer-urls": Equal(fmt.Sprintf("https@%s@%s@%s", peerSvcName, etcd.Namespace, strconv.Itoa(int(ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))))),
"initial-advertise-peer-urls": Equal(expectedAdvertiseURLsAsInterface(etcd, advertiseURLTypePeer, "https")),
}))
} else {
g.Expect(actualETCDConfig).To(MatchKeys(IgnoreExtras|IgnoreMissing, Keys{
"listen-peer-urls": Equal(fmt.Sprintf("http://0.0.0.0:%d", ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))),
"initial-advertise-peer-urls": Equal(fmt.Sprintf("http@%s@%s@%s", peerSvcName, etcd.Namespace, strconv.Itoa(int(ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))))),
"initial-advertise-peer-urls": Equal(expectedAdvertiseURLsAsInterface(etcd, advertiseURLTypePeer, "http")),
}))
g.Expect(actualETCDConfig).ToNot(HaveKey("peer-transport-security"))
}
Expand Down
Loading

0 comments on commit 06080a2

Please sign in to comment.