Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions .github/workflows/extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ jobs:
matrix: # build RL8, RL9
build:
- image_name: openhpc-extra-RL8
source_image_name_key: RL8 # key into environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
appliances_distro_version: RL8
inventory_groups: doca,cuda # lustre disabled due to https://github.com/stackhpc/ansible-slurm-appliance/pull/759
volume_size: 35 # needed for cuda
- image_name: openhpc-extra-RL9
source_image_name_key: RL9
appliances_distro_version: RL9
inventory_groups: doca,cuda,lustre
volume_size: 35 # needed for cuda
env:
Expand All @@ -37,23 +37,15 @@ jobs:
CI_CLOUD: ${{ vars.CI_CLOUD }} # default from repo settings
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
PACKER_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
APPLIANCES_DISTRO_VERSION: ${{ matrix.build.appliances_distro_version}}

steps:
- uses: actions/checkout@v4

- name: Load current fat images into GITHUB_ENV
# see https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#example-of-a-multiline-string
run: |
{
echo 'FAT_IMAGES<<EOF'
cat environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
echo EOF
} >> "$GITHUB_ENV"

- name: Record settings
run: |
echo CI_CLOUD: ${{ env.CI_CLOUD }}
echo "FAT_IMAGES: ${FAT_IMAGES}"
echo APPLIANCES_DISTRO_VERSION: ${{ env.APPLIANCES_DISTRO_VERSION }}

- name: Setup ssh
run: |
Expand Down Expand Up @@ -93,7 +85,6 @@ jobs:
PACKER_LOG=1 packer build \
-on-error=${{ vars.PACKER_ON_ERROR }} \
-var-file="$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl" \
-var "source_image_name=${{ fromJSON(env.FAT_IMAGES)['cluster_image'][matrix.build.source_image_name_key] }}" \
-var "image_name=${{ matrix.build.image_name }}" \
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
-var "volume_size=${{ matrix.build.volume_size }}" \
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,14 @@ jobs:
- 'dev/setup-env.sh'
- '.github/workflows/stackhpc.yml'
extra_on_push:
- 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
- 'environments/.stackhpc/inventory/group_vars/all/community_image_names.json'
- 'ansible/roles/doca/**'
- 'ansible/roles/cuda/**'
- 'ansible/roles/slurm_recompile/**' # runs on cuda group
- 'ansible/roles/lustre/**'
- '.github/workflows/extra.yml'
extra_on_pull_request:
- 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
- 'environments/.stackhpc/inventory/group_vars/all/community_image_names.json'
- 'ansible/roles/doca/**'
- 'ansible/roles/cuda/**'
- 'ansible/roles/lustre/**'
Expand Down
17 changes: 17 additions & 0 deletions ansible/adhoc/image-pull.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
- hosts: localhost
become: false
gather_facts: false
tasks:
- name: Ensure images exist on cloud
ansible.builtin.include_role:
name: azimuth_cloud.azimuth_ops.community_images
tasks_from: upload_image_and_store_id.yml
loop: "{{ community_images | dict2items | list }}"
loop_control:
label: "{{ item.value.name }}"
- name: Write image IDs
ansible.builtin.copy:
dest: "{{ appliances_repository_root }}/environments/site/images/community_image_ids.json"
content: "{{ community_images_image_ids | to_nice_json }}"
mode: ug=rw,o=r
delegate_to: localhost
2 changes: 1 addition & 1 deletion docs/image-build.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ For either a site-specific fat-image build or an extra-build:
- For a site-specific fatimage build: A RockyLinux GenericCloud or
compatible image.
- For an extra-build image: Usually the appropriate StackHPC fat image,
as defined in `environments/.stackhpc/tofu/cluster_image.auto.tfvars.json` at the
as defined in `environments/.stackhpc/inventory/group_vars/all/community_image_names.json` at the
checkout's current commit. See the [GitHub release page](https://github.com/stackhpc/ansible-slurm-appliance/releases)
for download links. In some cases extra builds may be chained, e.g.
one extra build adds a Lustre client, and the resulting image is used
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CI sets env from matrix for RL8 or RL8:
appliances_distro_version: "{{ lookup('env', 'APPLIANCES_DISTRO_VERSION', default='RL9') }}"

# Use prerelease bucket only for stackhpc env:
community_images_stackhpc_source_url: https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/openhpc-images-prerelease
17 changes: 0 additions & 17 deletions environments/.stackhpc/tofu/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,6 @@ variable "cluster_name" {
description = "Name for cluster, used as prefix for resources - set by environment var in CI"
}

variable "os_version" {
type = string
description = "RL8 or RL9"
default = "RL9"
}

variable "cluster_image" {
description = "single image for all cluster nodes, keyed by os_version - a convenience for CI"
type = map(string)
}

# tflint-ignore: terraform_typed_variables
variable "cluster_networks" {}

Expand Down Expand Up @@ -60,19 +49,13 @@ variable "volume_backed_instances" {
default = false
}

data "openstack_images_image_v2" "cluster" {
name = var.cluster_image[var.os_version]
most_recent = true
}

module "cluster" {
source = "../../site/tofu/"

cluster_name = var.cluster_name
cluster_networks = var.cluster_networks
vnic_types = var.vnic_types
key_pair = "slurm-app-ci"
cluster_image_id = data.openstack_images_image_v2.cluster.id
control_node_flavor = var.control_node_flavor

login = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"cluster_image": {
"stackhpc_image_names": {
"RL8": "openhpc-RL8-251213-1133-31273766",
"RL9": "openhpc-RL9-251213-1133-31273766"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
appliances_distro_version: RL9

# Default StackHPC images - usually won't need overriding:
# (either used directly for cluster or as source images for Packer builds)
# Image names are provided in adjacent json file as used to drive StackHPC CI:
community_images_stackhpc_source_url: https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/openhpc-images
community_images_stackhpc:
stackhpc:
name: "{{ stackhpc_image_names[appliances_distro_version] }}"
source_url: "{{ community_images_stackhpc_source_url }}/{{ stackhpc_image_names[appliances_distro_version] }}"
source_disk_format: qcow2
container_format: bare
community_images_default: "{{ community_images_stackhpc }}"

# Per-cloud image settings - may be overriden:
community_images_disk_format: raw # format required on the cloud
community_images_custom_properties_default:
- hw_machine_type=q35
- hw_architecture=x86_64
- hw_vif_multiqueue_enabled=true
- hw_firmware_type=uefi
- os_type=linux
- "os_admin_user={{ ansible_user }}"
community_images_custom_properties_disk_format:
raw:
- hw_scsi_model=virtio-scsi
- hw_disk_bus=scsi
qcow2:
- hw_disk_bus=virtio
community_images_custom_properties: |
{{ community_images_custom_properties_default +
community_images_custom_properties_disk_format[community_images_disk_format]
}}

# Additional images - usually won't need overriding
community_images_extra: {}

community_images: "{{ community_images_default | combine(community_images_extra) }}"
1 change: 1 addition & 0 deletions environments/site/images/community_image_ids.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
2 changes: 1 addition & 1 deletion environments/site/tofu/additional.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module "additional" {
config_drive = var.config_drive

# can be set for group, defaults to top-level value:
image_id = lookup(each.value, "image_id", var.cluster_image_id)
image_id = local.image_ids[lookup(each.value, "image_key", var.cluster_image_key)]
vnic_types = lookup(each.value, "vnic_types", var.vnic_types)
volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size)
Expand Down
2 changes: 1 addition & 1 deletion environments/site/tofu/compute.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module "compute" {
config_drive = var.config_drive

# can be set for group, defaults to top-level value:
image_id = lookup(each.value, "image_id", var.cluster_image_id)
image_id = local.image_ids[lookup(each.value, "image_key", var.cluster_image_key)]
vnic_types = lookup(each.value, "vnic_types", var.vnic_types)
volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size)
Expand Down
5 changes: 3 additions & 2 deletions environments/site/tofu/control.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ locals {
environment_name = basename(var.environment_root)
}
)
cluster_image_id = local.image_ids[var.cluster_image_key]
}

resource "openstack_networking_port_v2" "control" {
Expand Down Expand Up @@ -46,13 +47,13 @@ resource "openstack_networking_port_v2" "control" {
resource "openstack_compute_instance_v2" "control" {

name = split(".", local.control_fqdn)[0]
image_id = var.cluster_image_id
image_id = local.cluster_image_id
flavor_name = var.control_node_flavor
key_pair = var.key_pair

# root device:
block_device {
uuid = var.cluster_image_id
uuid = local.cluster_image_id
source_type = "image"
destination_type = var.volume_backed_instances ? "volume" : "local"
volume_size = var.volume_backed_instances ? var.root_volume_size : null
Expand Down
3 changes: 3 additions & 0 deletions environments/site/tofu/images.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
locals {
image_ids = jsondecode(file("${path.module}/../images/community_image_ids.json"))
}
2 changes: 1 addition & 1 deletion environments/site/tofu/login.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module "login" {
config_drive = var.config_drive

# can be set for group, defaults to top-level value:
image_id = lookup(each.value, "image_id", var.cluster_image_id)
image_id = local.image_ids[lookup(each.value, "image_key", var.cluster_image_key)]
vnic_types = lookup(each.value, "vnic_types", var.vnic_types)
volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size)
Expand Down
12 changes: 6 additions & 6 deletions environments/site/tofu/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ variable "login" {
nodes: List of node names
flavor: String flavor name
Optional:
image_id: Overrides variable cluster_image_id
image_key: Overrides variable cluster_image_key
extra_networks: List of mappings in same format as cluster_networks
vnic_types: Overrides variable vnic_types
volume_backed_instances: Overrides variable volume_backed_instances
Expand Down Expand Up @@ -106,13 +106,13 @@ variable "login" {
))
}
EOF

}
}

variable "cluster_image_id" {
variable "cluster_image_key" {
type = string
description = "ID of default image for the cluster"
description = "Key in community_images of default image for the cluster"
default = "stackhpc"
}

variable "compute" {
Expand All @@ -129,11 +129,11 @@ variable "compute" {
nodes: List of node names
flavor: String flavor name
Optional:
image_id: Overrides variable cluster_image_id
image_key: Overrides variable cluster_image_key
extra_networks: List of mappings in same format as cluster_networks
vnic_types: Overrides variable vnic_types
compute_init_enable: Toggles compute-init rebuild (see compute-init role docs)
ignore_image_changes: Ignore changes to the image_id parameter (see docs/experimental/compute-init.md)
ignore_image_changes: Ignore changes to the image ID (see docs/experimental/compute-init.md)
volume_backed_instances: Overrides variable volume_backed_instances
root_volume_size: Overrides variable root_volume_size
extra_volumes: Mapping defining additional volumes to create and attach
Expand Down
15 changes: 8 additions & 7 deletions packer/openstack.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ locals {
git_commit = data.git-commit.cwd-head.hash
timestamp = formatdate("YYMMDD-hhmm", timestamp())
image_name_version = var.image_name_version == "auto" ? "-${local.timestamp}-${substr(local.git_commit, 0, 8)}" : var.image_name_version
community_image_ids = jsondecode(file("${path.root}/environments/site/images/community_image_ids.json"))
}

# Path pointing to root of repository - automatically set by environment variable PKR_VAR_repo_root
Expand All @@ -44,13 +45,13 @@ variable "networks" {
variable "source_image_name" {
type = string
default = null
description = "name of source image"
description = "Name of source image"
}

variable "source_image" {
variable "source_image_key" {
type = string
default = null
description = "UUID of source image"
default = "stackhpc"
description = "Name of community_images key defining source image"
}

variable "flavor" {
Expand Down Expand Up @@ -177,9 +178,9 @@ source "openstack" "openhpc" {
floating_ip = var.floating_ip
security_groups = var.security_groups

# Input image:
source_image = "${var.source_image}"
source_image_name = "${var.source_image_name}" # NB: must already exist in OpenStack
# Input image - NB must already exist in OpenStack:
source_image = local.community_image_ids[var.source_image_key]
source_image_name = "${var.source_image_name}"

# SSH:
ssh_username = var.ssh_username
Expand Down
8 changes: 6 additions & 2 deletions requirements.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ collections:
- name: community.grafana
version: 1.5.4
- name: ansible.posix
version: 1.5.4
version: 2.1.0
- name: ansible.netcommon
version: 5.1.1
- name: community.general
version: 7.1.0
version: 11.2.1
- name: community.crypto
version: 2.10.0
- name: community.mysql
Expand All @@ -55,3 +55,7 @@ collections:
version: 0.5.5
- name: stackhpc.linux
version: 1.5.0
# for community_images role:
- name: https://github.com/azimuth-cloud/ansible-collection-azimuth-ops
type: git
version: 0.20.0