Skip to content

Commit

Permalink
rename configMap | add snapshotCount to spec.etcd to make the snapsho…
Browse files Browse the repository at this point in the history
…t-count configurable | use proper url formatting for peer and client urls in etcd config
  • Loading branch information
anveshreddy18 committed Oct 7, 2024
1 parent 549d4a4 commit 932759c
Show file tree
Hide file tree
Showing 15 changed files with 85 additions and 38 deletions.
4 changes: 4 additions & 0 deletions api/v1alpha1/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ type EtcdConfig struct {
// Quota defines the etcd DB quota.
// +optional
Quota *resource.Quantity `json:"quota,omitempty"`
// SnapshotCount defines the number of applied Raft entries to hold in-memory before compaction.
// More info: https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention
// +optional
SnapshotCount *int64 `json:"snapshotCount,omitempty"`
// DefragmentationSchedule defines the cron standard schedule for defragmentation of etcd.
// +optional
DefragmentationSchedule *string `json:"defragmentationSchedule,omitempty"`
Expand Down
18 changes: 10 additions & 8 deletions api/v1alpha1/etcd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,11 @@ func TestIsReconciliationInProgress(t *testing.T) {

func createEtcd(name, namespace string) *Etcd {
var (
clientPort int32 = 2379
serverPort int32 = 2380
backupPort int32 = 8080
metricLevel = Basic
clientPort int32 = 2379
serverPort int32 = 2380
backupPort int32 = 8080
metricLevel = Basic
snapshotCount int64 = 75000
)

garbageCollectionPeriod := metav1.Duration{
Expand Down Expand Up @@ -238,10 +239,11 @@ func createEtcd(name, namespace string) *Etcd {
"memory": resource.MustParse("1000Mi"),
},
},
ClientPort: &clientPort,
ServerPort: &serverPort,
ClientUrlTLS: clientTlsConfig,
PeerUrlTLS: peerTlsConfig,
ClientPort: &clientPort,
ServerPort: &serverPort,
SnapshotCount: &snapshotCount,
ClientUrlTLS: clientTlsConfig,
PeerUrlTLS: peerTlsConfig,
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion api/v1alpha1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func GetServiceAccountName(etcdObjMeta metav1.ObjectMeta) string {

// GetConfigMapName returns the name of the configmap for the Etcd.
func GetConfigMapName(etcdObjMeta metav1.ObjectMeta) string {
return fmt.Sprintf("etcd-bootstrap-%s", string(etcdObjMeta.UID[:6]))
return fmt.Sprintf("%s-config-%s", etcdObjMeta.Name, (etcdObjMeta.UID[:6]))
}

// GetCompactionJobName returns the compaction job name for the Etcd.
Expand Down
2 changes: 1 addition & 1 deletion api/v1alpha1/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func TestGetConfigMapName(t *testing.T) {
uid := uuid.NewUUID()
etcdObjMeta := createEtcdObjectMetadata(uid, nil, nil, false)
configMapName := GetConfigMapName(etcdObjMeta)
g.Expect(configMapName).To(Equal("etcd-bootstrap-" + string(uid[:6])))
g.Expect(configMapName).To(Equal(etcdObjMeta.Name + "-config-" + string(uid[:6])))
}

func TestGetCompactionJobName(t *testing.T) {
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,12 @@ spec:
serverPort:
format: int32
type: integer
snapshotCount:
description: |-
SnapshotCount defines the number of applied Raft entries to hold in-memory before compaction.
More info: https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention
format: int64
type: integer
type: object
labels:
additionalProperties:
Expand Down
6 changes: 6 additions & 0 deletions config/crd/bases/crd-druid.gardener.cloud_etcds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,12 @@ spec:
serverPort:
format: int32
type: integer
snapshotCount:
description: |-
SnapshotCount defines the number of applied Raft entries to hold in-memory before compaction.
More info: https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention
format: int64
type: integer
type: object
labels:
additionalProperties:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Target the control plane of affected shoot cluster via `kubectl`. Alternatively,
Volumes:
etcd-config-file:
Type: ConfigMap (a volume populated by a ConfigMap)
Name: etcd-bootstrap-4785b0
Name: etcd-main-config-4785b0
Optional: false
```
Expand Down Expand Up @@ -68,7 +68,7 @@ Target the control plane of affected shoot cluster via `kubectl`. Alternatively,
Delete all `etcd-main` member leases.
6. Edit the `etcd-main` cluster's configmap (ex: `etcd-bootstrap-4785b0`) as follows:
6. Edit the `etcd-main` cluster's configmap (ex: `etcd-main-config-4785b0`) as follows:
Find the `initial-cluster` field in the configmap. It should look similar to the following:
```
Expand Down
2 changes: 1 addition & 1 deletion docs/proposals/03-scaling-up-an-etcd-cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Now, it is detected whether peer URL was TLS enabled or not for single node etcd
- If peer URL was not TLS enabled then etcd-druid has to intervene and make sure peer URL should be TLS enabled first for the single node before marking the cluster for scale-up.

## Action taken by etcd-druid to enable the peerURL TLS
1. Etcd-druid will update the `etcd-bootstrap` config-map with new config like initial-cluster,initial-advertise-peer-urls etc. Backup-restore will detect this change and update the member lease annotation to `member.etcd.gardener.cloud/tls-enabled: "true"`.
1. Etcd-druid will update the `{etcd.Name}-config` config-map with new config like initial-cluster,initial-advertise-peer-urls etc. Backup-restore will detect this change and update the member lease annotation to `member.etcd.gardener.cloud/tls-enabled: "true"`.
2. In case the peer URL TLS has been changed to `enabled`: Etcd-druid will add tasks to the deployment flow:
- Check if peer TLS has been enabled for existing StatefulSet pods, by checking the member leases for the annotation `member.etcd.gardener.cloud/tls-enabled`.
- If peer TLS enablement is pending for any of the members, then check and patch the StatefulSet with the peer TLS volume mounts, if not already patched. This will cause a rolling update of the existing StatefulSet pods, which allows etcd-backup-restore to update the member peer URL in the etcd cluster.
Expand Down
9 changes: 4 additions & 5 deletions internal/component/configmap/configmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package configmap
import (
"context"
"fmt"
"strconv"
"testing"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
Expand Down Expand Up @@ -344,7 +343,7 @@ func matchConfigMap(g *WithT, etcd *druidv1alpha1.Etcd, actualConfigMap corev1.C
"name": Equal(fmt.Sprintf("etcd-%s", etcd.UID[:6])),
"data-dir": Equal(fmt.Sprintf("%s/new.etcd", common.VolumeMountPathEtcdData)),
"metrics": Equal(string(druidv1alpha1.Basic)),
"snapshot-count": Equal(int64(75000)),
"snapshot-count": Equal(ptr.Deref(etcd.Spec.Etcd.SnapshotCount, defaultSnapshotCount)),
"enable-v2": Equal(false),
"quota-backend-bytes": Equal(etcd.Spec.Etcd.Quota.Value()),
"initial-cluster-token": Equal("etcd-cluster"),
Expand All @@ -360,7 +359,7 @@ func matchClientTLSRelatedConfiguration(g *WithT, etcd *druidv1alpha1.Etcd, actu
if etcd.Spec.Etcd.ClientUrlTLS != nil {
g.Expect(actualETCDConfig).To(MatchKeys(IgnoreExtras|IgnoreMissing, Keys{
"listen-client-urls": Equal(fmt.Sprintf("https://0.0.0.0:%d", ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient))),
"advertise-client-urls": Equal(fmt.Sprintf("https@%s@%s@%d", druidv1alpha1.GetPeerServiceName(etcd.ObjectMeta), etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient))),
"advertise-client-urls": Equal(fmt.Sprintf("https://%s.%s:%d", druidv1alpha1.GetPeerServiceName(etcd.ObjectMeta), etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient))),
"client-transport-security": MatchKeys(IgnoreExtras, Keys{
"cert-file": Equal("/var/etcd/ssl/server/tls.crt"),
"key-file": Equal("/var/etcd/ssl/server/tls.key"),
Expand Down Expand Up @@ -389,12 +388,12 @@ func matchPeerTLSRelatedConfiguration(g *WithT, etcd *druidv1alpha1.Etcd, actual
"auto-tls": Equal(false),
}),
"listen-peer-urls": Equal(fmt.Sprintf("https://0.0.0.0:%d", ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))),
"initial-advertise-peer-urls": Equal(fmt.Sprintf("https@%s@%s@%s", peerSvcName, etcd.Namespace, strconv.Itoa(int(ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))))),
"initial-advertise-peer-urls": Equal(fmt.Sprintf("https://%s.%s:%d", peerSvcName, etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))),
}))
} else {
g.Expect(actualETCDConfig).To(MatchKeys(IgnoreExtras|IgnoreMissing, Keys{
"listen-peer-urls": Equal(fmt.Sprintf("http://0.0.0.0:%d", ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))),
"initial-advertise-peer-urls": Equal(fmt.Sprintf("http@%s@%s@%s", peerSvcName, etcd.Namespace, strconv.Itoa(int(ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))))),
"initial-advertise-peer-urls": Equal(fmt.Sprintf("http://%s.%s:%d", peerSvcName, etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))),
}))
g.Expect(actualETCDConfig).ToNot(HaveKey("peer-transport-security"))
}
Expand Down
49 changes: 35 additions & 14 deletions internal/component/configmap/etcdconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,22 @@ const (
defaultInitialClusterToken = "etcd-cluster"
defaultInitialClusterState = "new"
// For more information refer to https://etcd.io/docs/v3.4/op-guide/maintenance/#raft-log-retention
// TODO: Ideally this should be made configurable via Etcd resource as this has a direct impact on the memory requirements for etcd container.
// which in turn is influenced by the size of objects that are getting stored in etcd.
defaultSnapshotCount = 75000
defaultSnapshotCount = int64(75000)
)

var (
defaultDataDir = fmt.Sprintf("%s/new.etcd", common.VolumeMountPathEtcdData)
)

type tlsTarget string
type advPeerUrls map[string][]string

const (
clientTLS tlsTarget = "client"
peerTLS tlsTarget = "peer"
)
type advClientUrls map[string]string

type etcdConfig struct {
Name string `yaml:"name"`
DataDir string `yaml:"data-dir"`
Metrics druidv1alpha1.MetricsLevel `yaml:"metrics"`
SnapshotCount int `yaml:"snapshot-count"`
SnapshotCount int64 `yaml:"snapshot-count"`
EnableV2 bool `yaml:"enable-v2"`
QuotaBackendBytes int64 `yaml:"quota-backend-bytes"`
InitialClusterToken string `yaml:"initial-cluster-token"`
Expand All @@ -52,8 +47,8 @@ type etcdConfig struct {
AutoCompactionRetention string `yaml:"auto-compaction-retention"`
ListenPeerUrls string `yaml:"listen-peer-urls"`
ListenClientUrls string `yaml:"listen-client-urls"`
AdvertisePeerUrls string `yaml:"initial-advertise-peer-urls"`
AdvertiseClientUrls string `yaml:"advertise-client-urls"`
AdvertisePeerUrls advPeerUrls `yaml:"initial-advertise-peer-urls"`
AdvertiseClientUrls advClientUrls `yaml:"advertise-client-urls"`
ClientSecurity securityConfig `yaml:"client-transport-security,omitempty"`
PeerSecurity securityConfig `yaml:"peer-transport-security,omitempty"`
}
Expand All @@ -74,7 +69,7 @@ func createEtcdConfig(etcd *druidv1alpha1.Etcd) *etcdConfig {
Name: fmt.Sprintf("etcd-%s", etcd.UID[:6]),
DataDir: defaultDataDir,
Metrics: ptr.Deref(etcd.Spec.Etcd.Metrics, druidv1alpha1.Basic),
SnapshotCount: defaultSnapshotCount,
SnapshotCount: getSnapshotCount(etcd),
EnableV2: false,
QuotaBackendBytes: getDBQuotaBytes(etcd),
InitialClusterToken: defaultInitialClusterToken,
Expand All @@ -84,8 +79,8 @@ func createEtcdConfig(etcd *druidv1alpha1.Etcd) *etcdConfig {
AutoCompactionRetention: ptr.Deref(etcd.Spec.Common.AutoCompactionRetention, defaultAutoCompactionRetention),
ListenPeerUrls: fmt.Sprintf("%s://0.0.0.0:%d", peerScheme, ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer)),
ListenClientUrls: fmt.Sprintf("%s://0.0.0.0:%d", clientScheme, ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient)),
AdvertisePeerUrls: fmt.Sprintf("%s@%s@%s@%d", peerScheme, peerSvcName, etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer)),
AdvertiseClientUrls: fmt.Sprintf("%s@%s@%s@%d", clientScheme, peerSvcName, etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient)),
AdvertisePeerUrls: getAdvertisePeerUrlsMap(etcd, peerScheme, peerSvcName),
AdvertiseClientUrls: getAdvertiseClientUrlMap(etcd, clientScheme, peerSvcName),
}
if peerSecurityConfig != nil {
cfg.PeerSecurity = *peerSecurityConfig
Expand All @@ -97,6 +92,14 @@ func createEtcdConfig(etcd *druidv1alpha1.Etcd) *etcdConfig {
return cfg
}

func getSnapshotCount(etcd *druidv1alpha1.Etcd) int64 {
snapshotCount := defaultSnapshotCount
if etcd.Spec.Etcd.SnapshotCount != nil {
snapshotCount = *etcd.Spec.Etcd.SnapshotCount
}
return snapshotCount
}

func getDBQuotaBytes(etcd *druidv1alpha1.Etcd) int64 {
dbQuotaBytes := defaultDBQuotaBytes
if etcd.Spec.Etcd.Quota != nil {
Expand Down Expand Up @@ -129,3 +132,21 @@ func prepareInitialCluster(etcd *druidv1alpha1.Etcd, peerScheme string) string {
}
return strings.Trim(builder.String(), ",")
}

func getAdvertisePeerUrlsMap(etcd *druidv1alpha1.Etcd, peerScheme string, peerSvcName string) advPeerUrls {
advPeerUrlsMap := make(map[string][]string)
for i := 0; i < int(etcd.Spec.Replicas); i++ {
podName := druidv1alpha1.GetOrdinalPodName(etcd.ObjectMeta, i)
advPeerUrlsMap[podName] = []string{fmt.Sprintf("%s://%s.%s.%s.svc:%d", peerScheme, podName, peerSvcName, etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer))}
}
return advPeerUrlsMap
}

func getAdvertiseClientUrlMap(etcd *druidv1alpha1.Etcd, clientScheme string, peerSvcName string) advClientUrls {
advClientUrlMap := make(map[string]string)
for i := 0; i < int(etcd.Spec.Replicas); i++ {
podName := druidv1alpha1.GetOrdinalPodName(etcd.ObjectMeta, i)
advClientUrlMap[podName] = fmt.Sprintf("%s://%s.%s.%s.svc:%d", clientScheme, podName, peerSvcName, etcd.Namespace, ptr.Deref(etcd.Spec.Etcd.ClientPort, common.DefaultPortEtcdClient))
}
return advClientUrlMap
}
4 changes: 2 additions & 2 deletions test/e2e/etcd_backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func checkEtcdReady(ctx context.Context, cl client.Client, logger logr.Logger, e

logger.Info("Checking configmap")
cm := &corev1.ConfigMap{}
ExpectWithOffset(2, cl.Get(ctx, client.ObjectKey{Name: "etcd-bootstrap-" + string(etcd.UID[:6]), Namespace: etcd.Namespace}, cm)).To(Succeed())
ExpectWithOffset(2, cl.Get(ctx, client.ObjectKey{Name: etcd.Name + "-config-" + string(etcd.UID[:6]), Namespace: etcd.Namespace}, cm)).To(Succeed())

logger.Info("Checking client service")
svc := &corev1.Service{}
Expand Down Expand Up @@ -280,7 +280,7 @@ func deleteAndCheckEtcd(ctx context.Context, cl client.Client, logger logr.Logge
ExpectWithOffset(1,
cl.Get(
ctx,
client.ObjectKey{Name: "etcd-bootstrap-" + string(etcd.UID[:6]), Namespace: etcd.Namespace},
client.ObjectKey{Name: etcd.Name + "-config-" + string(etcd.UID[:6]), Namespace: etcd.Namespace},
&corev1.ConfigMap{},
),
).Should(matchers.BeNotFoundError())
Expand Down
6 changes: 4 additions & 2 deletions test/e2e/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ var (
"memory": resource.MustParse("256Mi"),
},
}
etcdClientPort = int32(2379)
etcdServerPort = int32(2380)
etcdClientPort = int32(2379)
etcdServerPort = int32(2380)
etcdSnapshotCount = int64(75000)

backupPort = int32(8080)
backupFullSnapshotSchedule = "0 */1 * * *"
Expand Down Expand Up @@ -182,6 +183,7 @@ func getDefaultEtcd(name, namespace, container, prefix string, provider TestProv
Resources: &etcdResources,
ClientPort: &etcdClientPort,
ServerPort: &etcdServerPort,
SnapshotCount: &etcdSnapshotCount,
ClientUrlTLS: &etcdTLS,
}

Expand Down
4 changes: 2 additions & 2 deletions test/it/setup/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
k8sruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"k8s.io/utils/pointer"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/envtest"
Expand Down Expand Up @@ -168,7 +168,7 @@ func (t *itTestEnv) startTestEnvironment(crdDirectoryPaths []string) error {
CRDDirectoryPaths: crdDirectoryPaths,
}
if useExistingK8SCluster() {
testEnv.UseExistingCluster = pointer.Bool(true)
testEnv.UseExistingCluster = ptr.To(true)
}

cfg, err := testEnv.Start()
Expand Down
2 changes: 2 additions & 0 deletions test/utils/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ var (
deltaSnapShotMemLimit = resource.MustParse("100Mi")
autoCompactionMode = druidv1alpha1.Periodic
autoCompactionRetention = "2m"
snapshotCount = int64(75000)
quota = resource.MustParse("8Gi")
localProvider = druidv1alpha1.StorageProvider("Local")
prefix = "/tmp"
Expand Down Expand Up @@ -387,6 +388,7 @@ func getDefaultEtcd(name, namespace string) *druidv1alpha1.Etcd {
Backup: getBackupSpec(),
Etcd: druidv1alpha1.EtcdConfig{
Quota: &quota,
SnapshotCount: &snapshotCount,
Metrics: &metricsBasic,
Image: &imageEtcd,
DefragmentationSchedule: &defragSchedule,
Expand Down

0 comments on commit 932759c

Please sign in to comment.