Skip to content

Commit

Permalink
Change worker node pools from uniform to flexible orchestration mode
Browse files Browse the repository at this point in the history
* Use flexible orchestration mode. Azure has started to recommend this
mode because it allows interacting with VMSS instances like regular VMs
via the CLI or via the Azure Portal
* Add options to allow workers nodes to use ephemeral local disks
  * Add `controller_disk_type` and `controller_disk_size` variables
  * Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
  • Loading branch information
dghubble committed Jul 14, 2024
1 parent a4fab61 commit 0d10d18
Show file tree
Hide file tree
Showing 12 changed files with 196 additions and 111 deletions.
8 changes: 8 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ Notable changes between versions.
* Configure worker nodes to use outbound rules and the load balancer for SNAT
* Extend network security rules to allow IPv6 traffic, analogous to IPv4
* Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469))
* Change worker pools from uniform to flexible orchestration mode ([#1473](https://github.com/poseidon/typhoon/pull/1473))
* Add options to allow workers nodes to use ephemeral local disks ([#1473](https://github.com/poseidon/typhoon/pull/1473))
* Add `controller_disk_type` and `controller_disk_size` variables
* Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
* Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470))

```diff
Expand All @@ -30,6 +34,10 @@ module "cluster" {
+ network_cidr = {
+ ipv4 = ["10.0.0.0/16"]
+ }

# optional
+ controller_disk_type = "StandardSSD_LRS"
+ worker_ephemeral_disk = true
}
```

Expand Down
4 changes: 2 additions & 2 deletions azure/fedora-coreos/kubernetes/controllers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ resource "azurerm_linux_virtual_machine" "controllers" {
source_image_id = var.os_image
os_disk {
name = "${var.cluster_name}-controller-${count.index}"
storage_account_type = var.controller_disk_type
disk_size_gb = var.controller_disk_size
caching = "None"
disk_size_gb = var.disk_size
storage_account_type = "Premium_LRS"
}

# network
Expand Down
45 changes: 35 additions & 10 deletions azure/fedora-coreos/kubernetes/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ variable "dns_zone_group" {

# instances

variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
variable "os_image" {
type = string
description = "Fedora CoreOS image for instances"
}

variable "worker_count" {

variable "controller_count" {
type = number
description = "Number of workers"
description = "Number of controllers (i.e. masters)"
default = 1
}

Expand All @@ -40,23 +40,48 @@ variable "controller_type" {
default = "Standard_B2s"
}

variable "controller_disk_type" {
type = string
description = "Type of managed disk for controller node(s)"
default = "Premium_LRS"
}

variable "controller_disk_size" {
type = number
description = "Size of the managed disk in GB for controller node(s)"
default = 30
}

variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}

variable "worker_type" {
type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5"
}

variable "os_image" {
variable "worker_disk_type" {
type = string
description = "Fedora CoreOS image for instances"
description = "Type of managed disk for worker nodes"
default = "Standard_LRS"
}

variable "disk_size" {
variable "worker_disk_size" {
type = number
description = "Size of the disk in GB"
description = "Size of the managed disk in GB for worker nodes"
default = 30
}

variable "worker_ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}

variable "worker_priority" {
type = string
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
Expand Down
11 changes: 7 additions & 4 deletions azure/fedora-coreos/kubernetes/workers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ module "workers" {
security_group_id = azurerm_network_security_group.worker.id
backend_address_pool_ids = local.backend_address_pool_ids

worker_count = var.worker_count
vm_type = var.worker_type
os_image = var.os_image
priority = var.worker_priority
worker_count = var.worker_count
vm_type = var.worker_type
os_image = var.os_image
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
ephemeral_disk = var.worker_ephemeral_disk
priority = var.worker_priority

# configuration
kubeconfig = module.bootstrap.kubeconfig-kubelet
Expand Down
18 changes: 18 additions & 0 deletions azure/fedora-coreos/kubernetes/workers/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,24 @@ variable "os_image" {
description = "Fedora CoreOS image for instances"
}

variable "disk_type" {
type = string
description = "Type of managed disk"
default = "Standard_LRS"
}

variable "disk_size" {
type = number
description = "Size of the managed disk in GB"
default = 30
}

variable "ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}

variable "priority" {
type = string
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
Expand Down
67 changes: 30 additions & 37 deletions azure/fedora-coreos/kubernetes/workers/workers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,29 @@ locals {
}

# Workers scale set
resource "azurerm_linux_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker"
resource_group_name = var.resource_group_name
location = var.location
sku = var.vm_type
instances = var.worker_count
# instance name prefix for instances in the set
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker"
resource_group_name = var.resource_group_name
location = var.location
platform_fault_domain_count = 1
sku_name = var.vm_type
instances = var.worker_count

# storage
source_image_id = var.os_image
encryption_at_host_enabled = true
source_image_id = var.os_image
os_disk {
storage_account_type = "Standard_LRS"
caching = "ReadWrite"
storage_account_type = var.disk_type
disk_size_gb = var.disk_size
caching = "ReadOnly"
# Optionally, use the ephemeral disk of the instance type (support varies)
dynamic "diff_disk_settings" {
for_each = var.ephemeral_disk ? [1] : []
content {
option = "Local"
placement = "ResourceDisk"
}
}
}

# network
Expand All @@ -44,20 +52,24 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
}

# boot
custom_data = base64encode(data.ct_config.worker.rendered)
user_data_base64 = base64encode(data.ct_config.worker.rendered)
boot_diagnostics {
# defaults to a managed storage account
}

# Azure requires an RSA admin_ssh_key
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
os_profile {
linux_configuration {
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
computer_name_prefix = "${var.name}-worker"
}
}

# lifecycle
upgrade_mode = "Manual"
# eviction policy may only be set when priority is Spot
priority = var.priority
eviction_policy = var.priority == "Spot" ? "Delete" : null
Expand All @@ -66,25 +78,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
}
}

# Scale up or down to maintain desired number, tolerating deallocations.
resource "azurerm_monitor_autoscale_setting" "workers" {
name = "${var.name}-maintain-desired"
resource_group_name = var.resource_group_name
location = var.location
# autoscale
enabled = true
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id

profile {
name = "default"
capacity {
minimum = var.worker_count
default = var.worker_count
maximum = var.worker_count
}
}
}

# Fedora CoreOS worker
data "ct_config" "worker" {
content = templatefile("${path.module}/butane/worker.yaml", {
Expand Down
4 changes: 2 additions & 2 deletions azure/flatcar-linux/kubernetes/controllers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ resource "azurerm_linux_virtual_machine" "controllers" {
# storage
os_disk {
name = "${var.cluster_name}-controller-${count.index}"
storage_account_type = var.controller_disk_type
disk_size_gb = var.controller_disk_size
caching = "None"
disk_size_gb = var.disk_size
storage_account_type = "Premium_LRS"
}

# Flatcar Container Linux
Expand Down
56 changes: 40 additions & 16 deletions azure/flatcar-linux/kubernetes/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,20 @@ variable "dns_zone_group" {

# instances

variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
variable "os_image" {
type = string
description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
default = "flatcar-stable"

validation {
condition = contains(["flatcar-stable", "flatcar-beta", "flatcar-alpha"], var.os_image)
error_message = "The os_image must be flatcar-stable, flatcar-beta, or flatcar-alpha."
}
}

variable "worker_count" {
variable "controller_count" {
type = number
description = "Number of workers"
description = "Number of controllers (i.e. masters)"
default = 1
}

Expand All @@ -40,29 +45,48 @@ variable "controller_type" {
default = "Standard_B2s"
}

variable "controller_disk_type" {
type = string
description = "Type of managed disk for controller node(s)"
default = "Premium_LRS"
}

variable "controller_disk_size" {
type = number
description = "Size of the managed disk in GB for controller node(s)"
default = 30
}

variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}

variable "worker_type" {
type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5"
}

variable "os_image" {
variable "worker_disk_type" {
type = string
description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
default = "flatcar-stable"

validation {
condition = contains(["flatcar-stable", "flatcar-beta", "flatcar-alpha"], var.os_image)
error_message = "The os_image must be flatcar-stable, flatcar-beta, or flatcar-alpha."
}
description = "Type of managed disk for worker nodes"
default = "Standard_LRS"
}

variable "disk_size" {
variable "worker_disk_size" {
type = number
description = "Size of the disk in GB"
description = "Size of the managed disk in GB for worker nodes"
default = 30
}

variable "worker_ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}

variable "worker_priority" {
type = string
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
Expand Down
11 changes: 7 additions & 4 deletions azure/flatcar-linux/kubernetes/workers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ module "workers" {
security_group_id = azurerm_network_security_group.worker.id
backend_address_pool_ids = local.backend_address_pool_ids

worker_count = var.worker_count
vm_type = var.worker_type
os_image = var.os_image
priority = var.worker_priority
worker_count = var.worker_count
vm_type = var.worker_type
os_image = var.os_image
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
ephemeral_disk = var.worker_ephemeral_disk
priority = var.worker_priority

# configuration
kubeconfig = module.bootstrap.kubeconfig-kubelet
Expand Down
18 changes: 18 additions & 0 deletions azure/flatcar-linux/kubernetes/workers/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,24 @@ variable "os_image" {
}
}

variable "disk_type" {
type = string
description = "Type of managed disk"
default = "Standard_LRS"
}

variable "disk_size" {
type = number
description = "Size of the managed disk in GB"
default = 30
}

variable "ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}

variable "priority" {
type = string
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
Expand Down
Loading

0 comments on commit 0d10d18

Please sign in to comment.