Skip to content

Commit

Permalink
Add AWSMachines to back the EC2 instances in AWSMachinePools and AWSM…
Browse files Browse the repository at this point in the history
…anagedMachinePools

Co-authored-by: Cameron McAvoy <[email protected]>
  • Loading branch information
AndiDog and cnmcavoy committed Oct 22, 2024
1 parent 8c2168b commit 99a1613
Show file tree
Hide file tree
Showing 16 changed files with 544 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1159,6 +1159,10 @@ spec:
can be added as events to the Machine object and/or logged in the
controller's output.
type: string
infrastructureMachineKind:
description: InfrastructureMachineKind is the kind of the infrastructure
resources behind MachinePool Machines.
type: string
instances:
description: Instances contains the status for each instance in the
pool
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,10 @@ spec:
can be added as events to the MachinePool object and/or logged in the
controller's output.
type: string
infrastructureMachineKind:
description: InfrastructureMachineKind is the kind of the infrastructure
resources behind MachinePool Machines.
type: string
launchTemplateID:
description: The ID of the launch template
type: string
Expand Down
9 changes: 9 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ rules:
- cluster.x-k8s.io
resources:
- machines
verbs:
- delete
- get
- list
- watch
- apiGroups:
- cluster.x-k8s.io
resources:
- machines/status
verbs:
- get
Expand Down Expand Up @@ -310,6 +318,7 @@ rules:
resources:
- awsmachines
verbs:
- create
- delete
- get
- list
Expand Down
59 changes: 42 additions & 17 deletions controllers/awsmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,11 @@ func (r *AWSMachineReconciler) getObjectStoreService(scope scope.S3Scope) servic
return s3.NewService(scope)
}

// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines,verbs=get;list;watch;update;patch;delete
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=*,verbs=get;list;watch
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines,verbs=create;get;list;watch;update;patch;delete
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines/status,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
Expand Down Expand Up @@ -459,6 +460,7 @@ func (r *AWSMachineReconciler) findInstance(machineScope *scope.MachineScope, ec
return instance, nil
}

//nolint:gocyclo
func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *scope.MachineScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope, elbScope scope.ELBScope, objectStoreScope scope.S3Scope) (ctrl.Result, error) {
machineScope.Trace("Reconciling AWSMachine")

Expand All @@ -482,7 +484,7 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
}

// Make sure bootstrap data is available and populated.
if machineScope.Machine.Spec.Bootstrap.DataSecretName == nil {
if !machineScope.IsMachinePoolMachine() && machineScope.Machine.Spec.Bootstrap.DataSecretName == nil {
machineScope.Info("Bootstrap data secret reference is not yet available")
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{}, nil
Expand All @@ -497,6 +499,12 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
conditions.MarkUnknown(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, err.Error())
return ctrl.Result{}, err
}
if instance == nil && machineScope.IsMachinePoolMachine() {
err = errors.New("no instance found for machine pool")
machineScope.Error(err, "unable to find instance")
conditions.MarkUnknown(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, err.Error())
return ctrl.Result{}, err
}

// If the AWSMachine doesn't have our finalizer, add it.
if controllerutil.AddFinalizer(machineScope.AWSMachine, infrav1.MachineFinalizer) {
Expand Down Expand Up @@ -586,9 +594,18 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
conditions.MarkTrue(machineScope.AWSMachine, infrav1.InstanceReadyCondition)
case infrav1.InstanceStateShuttingDown, infrav1.InstanceStateTerminated:
machineScope.SetNotReady()
machineScope.Info("Unexpected EC2 instance termination", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "InstanceUnexpectedTermination", "Unexpected EC2 instance termination")
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceTerminatedReason, clusterv1.ConditionSeverityError, "")

if machineScope.IsMachinePoolMachine() {
// In an auto-scaling group, instance termination is perfectly normal on scale-down
// and therefore should not be reported as error.
machineScope.Info("EC2 instance of machine pool was terminated", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeNormal, infrav1.InstanceTerminatedReason, "EC2 instance termination")
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceTerminatedReason, clusterv1.ConditionSeverityInfo, "")
} else {
machineScope.Info("Unexpected EC2 instance termination", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "InstanceUnexpectedTermination", "Unexpected EC2 instance termination")
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceTerminatedReason, clusterv1.ConditionSeverityError, "")
}
default:
machineScope.SetNotReady()
machineScope.Info("EC2 instance state is undefined", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
Expand All @@ -599,14 +616,18 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
}

// reconcile the deletion of the bootstrap data secret now that we have updated instance state
if deleteSecretErr := r.deleteBootstrapData(machineScope, clusterScope, objectStoreScope); deleteSecretErr != nil {
r.Log.Error(deleteSecretErr, "unable to delete secrets")
return ctrl.Result{}, deleteSecretErr
}
if !machineScope.IsMachinePoolMachine() {
if deleteSecretErr := r.deleteBootstrapData(machineScope, clusterScope, objectStoreScope); deleteSecretErr != nil {
r.Log.Error(deleteSecretErr, "unable to delete secrets")
return ctrl.Result{}, deleteSecretErr
}

if instance.State == infrav1.InstanceStateTerminated {
machineScope.SetFailureReason(capierrors.UpdateMachineError)
machineScope.SetFailureMessage(errors.Errorf("EC2 instance state %q is unexpected", instance.State))
// For machine pool machines, it is expected that the ASG terminates instances at any time,
// so no error is logged for those.
if instance.State == infrav1.InstanceStateTerminated {
machineScope.SetFailureReason(capierrors.UpdateMachineError)
machineScope.SetFailureMessage(errors.Errorf("EC2 instance state %q is unexpected", instance.State))
}
}

// tasks that can take place during all known instance states
Expand Down Expand Up @@ -876,9 +897,13 @@ func getIgnitionVersion(scope *scope.MachineScope) string {
}

func (r *AWSMachineReconciler) deleteBootstrapData(machineScope *scope.MachineScope, clusterScope cloud.ClusterScoper, objectStoreScope scope.S3Scope) error {
_, userDataFormat, err := machineScope.GetRawBootstrapDataWithFormat()
if client.IgnoreNotFound(err) != nil {
return errors.Wrap(err, "failed to get raw userdata")
var userDataFormat string
var err error
if machineScope.Machine.Spec.Bootstrap.DataSecretName != nil {
_, userDataFormat, err = machineScope.GetRawBootstrapDataWithFormat()
if client.IgnoreNotFound(err) != nil {
return errors.Wrap(err, "failed to get raw userdata")
}
}

if machineScope.UseSecretsManager(userDataFormat) {
Expand Down
12 changes: 11 additions & 1 deletion exp/api/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error {
if restored.Spec.AvailabilityZoneSubnetType != nil {
dst.Spec.AvailabilityZoneSubnetType = restored.Spec.AvailabilityZoneSubnetType
}
dst.Status.InfrastructureMachineKind = restored.Status.InfrastructureMachineKind

if restored.Spec.AWSLaunchTemplate.PrivateDNSName != nil {
dst.Spec.AWSLaunchTemplate.PrivateDNSName = restored.Spec.AWSLaunchTemplate.PrivateDNSName
Expand Down Expand Up @@ -83,7 +84,6 @@ func (src *AWSMachinePoolList) ConvertTo(dstRaw conversion.Hub) error {
// ConvertFrom converts the v1beta2 AWSMachinePoolList receiver to v1beta1 AWSMachinePoolList.
func (r *AWSMachinePoolList) ConvertFrom(srcRaw conversion.Hub) error {
src := srcRaw.(*infrav1exp.AWSMachinePoolList)

return Convert_v1beta2_AWSMachinePoolList_To_v1beta1_AWSMachinePoolList(src, r, nil)
}

Expand Down Expand Up @@ -114,6 +114,8 @@ func (src *AWSManagedMachinePool) ConvertTo(dstRaw conversion.Hub) error {
dst.Spec.AvailabilityZoneSubnetType = restored.Spec.AvailabilityZoneSubnetType
}

dst.Status.InfrastructureMachineKind = restored.Status.InfrastructureMachineKind

return nil
}

Expand All @@ -133,6 +135,14 @@ func Convert_v1beta2_AWSManagedMachinePoolSpec_To_v1beta1_AWSManagedMachinePoolS
return autoConvert_v1beta2_AWSManagedMachinePoolSpec_To_v1beta1_AWSManagedMachinePoolSpec(in, out, s)
}

func Convert_v1beta2_AWSMachinePoolStatus_To_v1beta1_AWSMachinePoolStatus(in *infrav1exp.AWSMachinePoolStatus, out *AWSMachinePoolStatus, s apiconversion.Scope) error {
return autoConvert_v1beta2_AWSMachinePoolStatus_To_v1beta1_AWSMachinePoolStatus(in, out, s)
}

func Convert_v1beta2_AWSManagedMachinePoolStatus_To_v1beta1_AWSManagedMachinePoolStatus(in *infrav1exp.AWSManagedMachinePoolStatus, out *AWSManagedMachinePoolStatus, s apiconversion.Scope) error {
return autoConvert_v1beta2_AWSManagedMachinePoolStatus_To_v1beta1_AWSManagedMachinePoolStatus(in, out, s)
}

// ConvertTo converts the v1beta1 AWSManagedMachinePoolList receiver to a v1beta2 AWSManagedMachinePoolList.
func (src *AWSManagedMachinePoolList) ConvertTo(dstRaw conversion.Hub) error {
dst := dstRaw.(*infrav1exp.AWSManagedMachinePoolList)
Expand Down
32 changes: 12 additions & 20 deletions exp/api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions exp/api/v1beta2/awsmachinepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ type AWSMachinePoolStatus struct {
// +optional
LaunchTemplateVersion *string `json:"launchTemplateVersion,omitempty"`

// InfrastructureMachineKind is the kind of the infrastructure resources behind MachinePool Machines.
// +optional
InfrastructureMachineKind string `json:"infrastructureMachineKind,omitempty"`

// FailureReason will be set in the event that there is a terminal problem
// reconciling the Machine and will contain a succinct value suitable
// for machine interpretation.
Expand Down
4 changes: 4 additions & 0 deletions exp/api/v1beta2/awsmanagedmachinepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@ type AWSManagedMachinePoolStatus struct {
// +optional
LaunchTemplateVersion *string `json:"launchTemplateVersion,omitempty"`

// InfrastructureMachineKind is the kind of the infrastructure resources behind MachinePool Machines.
// +optional
InfrastructureMachineKind string `json:"infrastructureMachineKind,omitempty"`

// FailureReason will be set in the event that there is a terminal problem
// reconciling the MachinePool and will contain a succinct value suitable
// for machine interpretation.
Expand Down
5 changes: 5 additions & 0 deletions exp/api/v1beta2/conditions_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ const (
InstanceRefreshNotReadyReason = "InstanceRefreshNotReady"
// InstanceRefreshFailedReason used to report when there instance refresh is not initiated.
InstanceRefreshFailedReason = "InstanceRefreshFailed"

// AWSMachineCreationFailed reports if creating AWSMachines to represent ASG (machine pool) machines failed.
AWSMachineCreationFailed = "AWSMachineCreationFailed"
// AWSMachineDeletionFailed reports if deleting AWSMachines failed.
AWSMachineDeletionFailed = "AWSMachineDeletionFailed"
)

const (
Expand Down
5 changes: 5 additions & 0 deletions exp/api/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ import (
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
)

const (
// KindMachinePool is a MachinePool resource Kind
KindMachinePool string = "MachinePool"
)

// EBS can be used to automatically set up EBS volumes when an instance is launched.
type EBS struct {
// Encrypted is whether the volume should be encrypted or not.
Expand Down
Loading

0 comments on commit 99a1613

Please sign in to comment.