Skip to content

Commit

Permalink
Update to add resize cluster support & Bug fixes
Browse files Browse the repository at this point in the history
1. Add resize cluster support.
2. Fix failed to edit cluster name.
3. Fix failed to edit security group.
4. Update to handle cluster upgrade failed properly.
  • Loading branch information
STARRY-S committed Nov 30, 2023
1 parent 10f0668 commit ab128b4
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 53 deletions.
5 changes: 1 addition & 4 deletions Dockerfile.dapper
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
FROM registry.suse.com/bci/golang:1.20
FROM registry.suse.com/bci/golang:1.21

ARG DAPPER_HOST_ARCH
ENV ARCH=${DAPPER_HOST_ARCH}

ARG HTTP_PROXY
ARG HTTPS_PROXY
ENV HTTP_PROXY=${HTTP_PROXY} HTTPS_PROXY=${HTTPS_PROXY}
RUN zypper ref && \
zypper -n up && \
zypper -n in vim wget git tar gzip && \
Expand Down
3 changes: 3 additions & 0 deletions charts/cce-operator-crd/templates/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,9 @@ spec:
phase:
nullable: true
type: string
resizeClusterJobID:
nullable: true
type: string
upgradeClusterTaskID:
nullable: true
type: string
Expand Down
15 changes: 13 additions & 2 deletions examples/docs/parameters-zh_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,21 @@
"name": "example-update", // 集群名称
"huaweiCredentialSecret": "cattle-global-data:cc-xxxxx", // 更新云凭证
"description": "", // 更新集群描述
// "version": "v1.25" // 集群升级(暂不支持)
// 集群升级,直接改动集群版本即可升级。例如将 1.25 改为 1.27,即升级至 1.27 版本。
// 需要注意集群升级前需要手动检查所有插件必须运行正常(没有未就绪的工作负载),否则会造成升级失败。
"version": "v1.25",
"hostNetwork": {
"securityGroup": "SECURITY-GROUP-ID" // Security Group
"securityGroup": "SECURITY-GROUP-ID" // 修改节点默认安全组
},
// 变更集群 (Resize) 存在约束限制:https://support.huaweicloud.com/usermanual-cce/cce_10_0403.html
// 需要额外注意以下几点:
// 1. 变更集群规格不支持修改控制节点数量。例如无法将 s1 修改为 s2
// 2. 不支持降低集群规格。例如不能将 medium 降为 small
// 3. 单控制节点的集群不允许变更到 1000 节点及以上。例如 s1.small (50 节点) 只能升级到 s1.medium (200 节点)。
// Resize 请求可能遇到规格不足:Insufficient resources under expected master specifications 错误。
"flavor": "cce.s1.medium", // s1:单控制节点CCE集群。
// s2:多控制节点CCE集群 (高可用)。
// small (最大 50 节点), medium (200 节点), large (1k 节点), xlarge (2k 节点)
"nodePools": [ // 可编辑节点池数组,用于增加/删除节点池
{
"name": "nodepool-1", // 节点池名称,可编辑
Expand Down
27 changes: 14 additions & 13 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
module github.com/cnrancher/cce-operator

go 1.19
go 1.21

require (
github.com/Masterminds/semver/v3 v3.2.1
github.com/antonfisher/nested-logrus-formatter v1.3.1
github.com/huaweicloud/huaweicloud-sdk-go-v3 v0.1.57
github.com/huaweicloud/huaweicloud-sdk-go-v3 v0.1.69
github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29
github.com/rancher/wrangler v1.1.1
github.com/rancher/wrangler-api v0.6.1-0.20200427172631-a7c2f09b783e
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.8.4
k8s.io/api v0.27.6
k8s.io/apimachinery v0.27.6
k8s.io/client-go v0.27.6
k8s.io/api v0.27.8
k8s.io/apimachinery v0.27.8
k8s.io/client-go v0.27.8
)

require (
Expand Down Expand Up @@ -48,17 +48,18 @@ require (
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/tjfoc/gmsm v1.4.1 // indirect
go.mongodb.org/mongo-driver v1.12.0 // indirect
golang.org/x/crypto v0.11.0 // indirect
golang.org/x/mod v0.9.0 // indirect
golang.org/x/net v0.12.0 // indirect
golang.org/x/crypto v0.14.0 // indirect
golang.org/x/mod v0.12.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.10.0 // indirect
golang.org/x/term v0.10.0 // indirect
golang.org/x/text v0.11.0 // indirect
golang.org/x/sync v0.3.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.7.0 // indirect
golang.org/x/tools v0.12.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
Expand Down
58 changes: 35 additions & 23 deletions go.sum

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pkg/apis/cce.pandaria.io/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type CCEClusterConfigStatus struct {
CreatedSNatRuleEIPID string `json:"createdSNatRuleEIPID"` // EIP ID for SNAT Rule
CreatedSNATRuleID string `json:"createdSNATRuleID"` // SNAT Rule ID

ResizeClusterJobID string `json:"resizeClusterJobID"` // resize cluster job ID
UpgradeClusterTaskID string `json:"upgradeClusterTaskID"` // upgrade cluster task ID
}

Expand Down
93 changes: 82 additions & 11 deletions pkg/controller/cce-cluster-config-handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net/url"
"time"

"github.com/Masterminds/semver/v3"
ccev1 "github.com/cnrancher/cce-operator/pkg/apis/cce.pandaria.io/v1"
ccecontrollers "github.com/cnrancher/cce-operator/pkg/generated/controllers/cce.pandaria.io/v1"
"github.com/cnrancher/cce-operator/pkg/huawei"
Expand Down Expand Up @@ -630,8 +631,29 @@ func (h *Handler) checkAndUpdate(config *ccev1.CCEClusterConfig) (*ccev1.CCEClus
return config, err
}

// Get cluster status.
cluster, err := cce.ShowCluster(driver.CCE, config.Spec.ClusterID)
if err != nil {
return config, err
}
if cluster == nil || cluster.Status == nil || cluster.Spec == nil || cluster.Spec.HostNetwork == nil {
return config, fmt.Errorf("GetCluster returns invalid data")
}

// Check cluster upgrade status.
if config.Status.UpgradeClusterTaskID != "" {
ok, err := clusterUpgradeable(config.Spec.Version, *cluster.Spec.Version)
if err != nil {
return config, err
}
if !ok {
// Delete UpgradeClusterTaskID if the config cluster version match
// the upstream cluster version.
config = config.DeepCopy()
config.Status.UpgradeClusterTaskID = ""
return h.cceCC.UpdateStatus(config)
}

res, err := cce.ShowUpgradeClusterTask(driver.CCE, config.Spec.ClusterID, config.Status.UpgradeClusterTaskID)
if err != nil {
hwerr, _ := huawei.NewHuaweiError(err)
Expand All @@ -649,11 +671,14 @@ func (h *Handler) checkAndUpdate(config *ccev1.CCEClusterConfig) (*ccev1.CCEClus
logrus.WithFields(logrus.Fields{
"cluster": config.Name,
"phase": config.Status.Phase,
}).Infof("cluster [%s] finished upgrade",
config.Spec.Name)
}).Infof("cluster [%s] upgrade to [%v]",
config.Spec.Name, utils.Value(cluster.Spec.Version))
config = config.DeepCopy()
config.Status.UpgradeClusterTaskID = ""
return h.cceCC.UpdateStatus(config)
case "Failed":
return config, fmt.Errorf("failed to upgrade cluster [%s] to %v, status [%s]",
config.Spec.Name, config.Spec.Version, utils.Value(res.Status.Phase))
default:
logrus.WithFields(logrus.Fields{
"cluster": config.Name,
Expand All @@ -667,13 +692,6 @@ func (h *Handler) checkAndUpdate(config *ccev1.CCEClusterConfig) (*ccev1.CCEClus
}

// Check cluster status.
cluster, err := cce.ShowCluster(driver.CCE, config.Spec.ClusterID)
if err != nil {
return config, err
}
if cluster == nil || cluster.Status == nil || cluster.Spec == nil || cluster.Spec.HostNetwork == nil {
return config, fmt.Errorf("GetCluster returns invalid data")
}
switch utils.Value(cluster.Status.Phase) {
case cce.ClusterStatusDeleting,
cce.ClusterStatusResizing,
Expand Down Expand Up @@ -816,8 +834,10 @@ func (h *Handler) updateUpstreamClusterState(
return config, nil
}

// Update security group ID for created cluster.
if config.Spec.HostNetwork.SecurityGroup != upstreamSpec.HostNetwork.SecurityGroup {
// Init security group ID for created cluster if the security group wasn't
// provided when creating the cluster.
if config.Spec.HostNetwork.SecurityGroup == "" &&
config.Spec.HostNetwork.SecurityGroup != upstreamSpec.HostNetwork.SecurityGroup {
configUpdate := config.DeepCopy()
configUpdate.Spec.HostNetwork.SecurityGroup = upstreamSpec.HostNetwork.SecurityGroup
config, err = h.cceCC.Update(configUpdate)
Expand All @@ -832,6 +852,57 @@ func (h *Handler) updateUpstreamClusterState(
} else if ok {
return h.upgradeCluster(config)
}
// Check cluster flavor is resizable.
var clusterResizable = false
if config.Spec.Flavor != "" && config.Spec.Flavor != upstreamSpec.Flavor {
cv, err := semver.NewVersion(config.Spec.Version)
if err != nil {
return config, err
}
minVersion := semver.New(1, 15, 0, "", "")
if cv.Compare(minVersion) >= 0 {
clusterResizable = true
}
}
if clusterResizable {
logrus.WithFields(logrus.Fields{
"cluster": config.Name,
"phase": config.Status.Phase,
}).Infof("cluster [%s] flavor change detected: %v -> %v",
config.Spec.Name, upstreamSpec.Flavor, config.Spec.Flavor)

res, err := cce.ResizeCluster(
driver.CCE,
config.Spec.ClusterID,
config.Spec.Flavor,
config.Spec.ExtendParam.IsAutoPay,
)
if err != nil {
return config, err
}
if res == nil || res.JobID == nil {
return config, fmt.Errorf("ResizeCluster returns invalid data")
}
logrus.WithFields(logrus.Fields{
"cluster": config.Name,
"phase": config.Status.Phase,
}).Infof("start resize cluster [%s] job ID %q",
config.Spec.Name, utils.Value(res.JobID))
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
config, err = h.cceCC.Get(config.Namespace, config.Name, metav1.GetOptions{})
if err != nil {
return err
}
configUpdate := config.DeepCopy()
configUpdate.Status.ResizeClusterJobID = utils.Value(res.JobID)
config, err = h.cceCC.UpdateStatus(configUpdate)
return err
})
if err != nil {
return config, err
}
return h.enqueueUpdate(config)
}

// Update cluster info.
if _, err = cce.UpdateCluster(driver.CCE, config); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/upstream.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ func BuildUpstreamClusterState(
KubeProxyMode: c.Spec.KubeProxyMode.Value(),
PublicAccess: false,
}
if utils.Value(c.Metadata.Alias) != "" && spec.Name != utils.Value(c.Metadata.Alias) {
// Set cluster name to edited alias instead of the original name.
spec.Name = utils.Value(c.Metadata.Alias)
}
if c.Spec.HostNetwork != nil {
spec.HostNetwork.VpcID = c.Spec.HostNetwork.Vpc
spec.HostNetwork.SubnetID = c.Spec.HostNetwork.Subnet
Expand Down
19 changes: 19 additions & 0 deletions pkg/huawei/cce/cce.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,25 @@ func ShowUpgradeClusterTask(
return res, err
}

func ResizeCluster(
client *cce.CceClient, ID, flavor, isAutoPay string,
) (*model.ResizeClusterResponse, error) {
req := &model.ResizeClusterRequest{
ClusterId: ID,
Body: &model.ResizeClusterRequestBody{
FlavorResize: flavor,
ExtendParam: &model.ResizeClusterRequestExtendParam{
IsAutoPay: &isAutoPay,
},
},
}
res, err := client.ResizeCluster(req)
if err != nil {
logrus.Debugf("ResizeCluster failed: %v", utils.PrintObject(req))
}
return res, err
}

func DeleteCluster(client *cce.CceClient, ID string) (*model.DeleteClusterResponse, error) {
res, err := client.DeleteCluster(&model.DeleteClusterRequest{
ClusterId: ID,
Expand Down

0 comments on commit ab128b4

Please sign in to comment.