diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml index 043d61d33..871aff155 100644 --- a/.github/workflows/stackhpc.yml +++ b/.github/workflows/stackhpc.yml @@ -178,12 +178,13 @@ jobs: ansible-playbook -v ansible/site.yml ansible-playbook -v ansible/ci/check_slurm.yml - - name: Test reimage of compute nodes and compute-init (via rebuild adhoc) + - name: Test compute node reboot and compute-init run: | . venv/bin/activate . environments/.stackhpc/activate ansible-playbook -v --limit compute ansible/adhoc/rebuild.yml ansible-playbook -v ansible/ci/check_slurm.yml + ansible-playbook -v ansible/adhoc/reboot_via_slurm.yml - name: Check sacct state survived reimage run: | diff --git a/ansible/.gitignore b/ansible/.gitignore index c6e7887c8..94c094ae6 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -80,3 +80,5 @@ roles/* !roles/slurm_stats/** !roles/pytools/ !roles/pytools/** +!roles/rebuild/ +!roles/rebuild/** diff --git a/ansible/adhoc/reboot_via_slurm.yml b/ansible/adhoc/reboot_via_slurm.yml new file mode 100644 index 000000000..b5d5d0d0f --- /dev/null +++ b/ansible/adhoc/reboot_via_slurm.yml @@ -0,0 +1,24 @@ +# Reboot compute nodes via slurm. Nodes will be rebuilt if `image_id` in inventory is different to the currently-provisioned image. +# Example: +# ansible-playbook -v ansible/adhoc/reboot_via_slurm.yml + +- hosts: login + run_once: true + become: yes + gather_facts: no + tasks: + - name: Submit a Slurm job to reboot compute nodes + ansible.builtin.shell: | + set -e + srun --reboot -N 2 uptime + become_user: root + register: slurm_result + failed_when: slurm_result.rc != 0 + + - name: Fetch Slurm controller logs if reboot fails + ansible.builtin.shell: | + journalctl -u slurmctld --since "10 minutes ago" | tail -n 50 + become_user: root + register: slurm_logs + when: slurm_result.rc != 0 + delegate_to: "{{ groups['control'] | first }}" \ No newline at end of file diff --git a/ansible/roles/compute_init/README.md b/ansible/roles/compute_init/README.md index 3d9f7558d..d016c7168 100644 --- a/ansible/roles/compute_init/README.md +++ b/ansible/roles/compute_init/README.md @@ -1,11 +1,104 @@ -# EXPERIMENTAL: compute-init - -Experimental / in-progress functionality to allow compute nodes to rejoin the -cluster after a reboot. - -To enable this add compute nodes (or a subset of them into) the `compute_init` -group. - +# EXPERIMENTAL: compute_init + +Experimental functionality to allow compute nodes to rejoin the cluster after +a reboot without running the `ansible/site.yml` playbook. + +To enable this: +1. Add the `compute` group (or a subset) into the `compute_init` group. This is + the default when using cookiecutter to create an environment, via the + "everything" template. +2. Build an image which includes the `compute_init` group. This is the case + for StackHPC-built release images. +3. Enable the required functionalities during boot, by setting the + `compute_init_enable` property for a compute group in the + OpenTofu `compute` variable to a list which includes "compute", plus the + other roles/functionalities required, e.g.: + + ```terraform + ... + compute = { + general = { + nodes = ["general-0", "general-1"] + compute_init_enable = ["compute", ... ] # see below + } + } + ... + ``` + +## Supported appliance functionalities + +The string "compute" must be present in the `compute_init_enable` flag to enable +this functionality. The table below shows which other appliance functionalities +are currently supported - use the name in the role column to enable these. + +| Playbook | Role (or functionality) | Support | +| -------------------------|-------------------------|-----------------| +| hooks/pre.yml | ? | None at present | +| validate.yml | n/a | Not relevant during boot | +| bootstrap.yml | (wait for ansible-init) | Not relevant during boot | +| bootstrap.yml | resolv_conf | Fully supported | +| bootstrap.yml | etc_hosts | Fully supported | +| bootstrap.yml | proxy | None at present | +| bootstrap.yml | (/etc permissions) | None required - use image build | +| bootstrap.yml | (ssh /home fix) | None required - use image build | +| bootstrap.yml | (system users) | None required - use image build | +| bootstrap.yml | systemd | None required - use image build | +| bootstrap.yml | selinux | None required - use image build | +| bootstrap.yml | sshd | None at present | +| bootstrap.yml | dnf_repos | None at present (requirement TBD) | +| bootstrap.yml | squid | Not relevant for compute nodes | +| bootstrap.yml | tuned | None | +| bootstrap.yml | freeipa_server | Not relevant for compute nodes | +| bootstrap.yml | cockpit | None required - use image build | +| bootstrap.yml | firewalld | Not relevant for compute nodes | +| bootstrap.yml | fail2ban | Not relevant for compute nodes | +| bootstrap.yml | podman | Not relevant for compute nodes | +| bootstrap.yml | update | Not relevant during boot | +| bootstrap.yml | reboot | Not relevant for compute nodes | +| bootstrap.yml | ofed | Not relevant during boot | +| bootstrap.yml | ansible_init (install) | Not relevant during boot | +| bootstrap.yml | k3s (install) | Not relevant during boot | +| hooks/post-bootstrap.yml | ? | None at present | +| iam.yml | freeipa_client | None at present [1] | +| iam.yml | freeipa_server | Not relevant for compute nodes | +| iam.yml | sssd | None at present | +| filesystems.yml | block_devices | None required - role deprecated | +| filesystems.yml | nfs | All client functionality | +| filesystems.yml | manila | All functionality | +| filesystems.yml | lustre | None at present | +| extras.yml | basic_users | All functionality [2] | +| extras.yml | eessi | All functionality [3] | +| extras.yml | cuda | None required - use image build [4] | +| extras.yml | persist_hostkeys | Not expected to be required for compute nodes | +| extras.yml | compute_init (export) | Not relevant for compute nodes | +| extras.yml | k9s (install) | Not relevant during boot | +| extras.yml | extra_packages | None at present. Would require dnf_repos | +| slurm.yml | mysql | Not relevant for compute nodes | +| slurm.yml | rebuild | Not relevant for compute nodes | +| slurm.yml | openhpc [5] | All slurmd-related functionality | +| slurm.yml | (set memory limits) | None at present | +| slurm.yml | (block ssh) | None at present | +| portal.yml | (openondemand server) | Not relevant for compute nodes | +| portal.yml | (openondemand vnc desktop) | None required - use image build | +| portal.yml | (openondemand jupyter server) | None required - use image build | +| monitoring.yml | (all monitoring) | None at present [6] | +| disable-repos.yml | dnf_repos | None at present (requirement TBD) | +| hooks/post.yml | ? | None at present | + + +Notes: +1. FreeIPA client functionality would be better provided using a client fork + which uses pkinit keys rather than OTP to reenrol nodes. +2. Assumes home directory already exists on shared storage. +3. Assumes `cvmfs_config` is the same on control node and all compute nodes +4. If `cuda` role was run during build, the nvidia-persistenced is enabled + and will start during boot. +5. `openhpc` does not need to be added to `compute_init_enable`, this is + automatically enabled by adding `compute`. +5. Only node-exporter tasks are relevant, and will be done via k3s in a future release. + + +## Approach This works as follows: 1. During image build, an ansible-init playbook and supporting files (e.g. templates, filters, etc) are installed. @@ -31,21 +124,7 @@ The check in 4b. above is what prevents the compute-init script from trying to configure the node before the services on the control node are available (which requires running the site.yml playbook). -The following roles/groups are currently fully functional: -- `resolv_conf`: all functionality -- `etc_hosts`: all functionality -- `nfs`: client functionality only -- `manila`: all functionality -- `basic_users`: all functionality, assumes home directory already exists on - shared storage -- `eessi`: all functionality, assumes `cvmfs_config` is the same on control - node and all compute nodes. -- `openhpc`: all functionality - -The above may be enabled by setting the compute_init_enable property on the -tofu compute variable. - -# Development/debugging +## Development/debugging To develop/debug changes to the compute script without actually having to build a new image: @@ -83,7 +162,7 @@ reimage the compute node(s) first as in step 2 and/or add additional metadata as in step 3. -# Design notes +## Design notes - Duplicating code in roles into the `compute-init` script is unfortunate, but does allow developing this functionality without wider changes to the appliance. diff --git a/ansible/roles/compute_init/tasks/export.yml b/ansible/roles/compute_init/tasks/export.yml index 12b648f6e..d1682208f 100644 --- a/ansible/roles/compute_init/tasks/export.yml +++ b/ansible/roles/compute_init/tasks/export.yml @@ -2,9 +2,9 @@ file: path: /exports/cluster state: directory - owner: root + owner: slurm group: root - mode: u=rwX,go= + mode: u=rX,g=rwX,o= run_once: true delegate_to: "{{ groups['control'] | first }}" @@ -23,21 +23,27 @@ file: path: /exports/cluster/hostvars/{{ inventory_hostname }}/ state: directory - mode: u=rwX,go= - # TODO: owner,mode,etc + owner: slurm + group: root + mode: u=rX,g=rwX,o= delegate_to: "{{ groups['control'] | first }}" - name: Template out hostvars template: src: hostvars.yml.j2 dest: /exports/cluster/hostvars/{{ inventory_hostname }}/hostvars.yml - mode: u=rw,go= + owner: slurm + group: root + mode: u=r,g=rw,o= delegate_to: "{{ groups['control'] | first }}" - name: Copy manila share info to /exports/cluster copy: content: "{{ os_manila_mount_share_info_var | to_nice_yaml }}" dest: /exports/cluster/manila_share_info.yml + owner: root + group: root + mode: u=rw,g=r run_once: true delegate_to: "{{ groups['control'] | first }}" when: os_manila_mount_share_info is defined diff --git a/ansible/roles/rebuild/README.md b/ansible/roles/rebuild/README.md new file mode 100644 index 000000000..314d7c94d --- /dev/null +++ b/ansible/roles/rebuild/README.md @@ -0,0 +1,30 @@ +rebuild +========= + +Enables reboot tool from https://github.com/stackhpc/slurm-openstack-tools.git to be run from control node. + +Requirements +------------ + +clouds.yaml file + +Role Variables +-------------- + +- `openhpc_rebuild_clouds`: Directory. Path to clouds.yaml file. + + +Example Playbook +---------------- + + - hosts: control + become: yes + tasks: + - import_role: + name: rebuild + +License +------- + +Apache-2.0 + diff --git a/ansible/roles/rebuild/defaults/main.yml b/ansible/roles/rebuild/defaults/main.yml new file mode 100644 index 000000000..06b237ef2 --- /dev/null +++ b/ansible/roles/rebuild/defaults/main.yml @@ -0,0 +1,2 @@ +--- +openhpc_rebuild_clouds: ~/.config/openstack/clouds.yaml \ No newline at end of file diff --git a/ansible/roles/rebuild/tasks/main.yml b/ansible/roles/rebuild/tasks/main.yml new file mode 100644 index 000000000..c677716c7 --- /dev/null +++ b/ansible/roles/rebuild/tasks/main.yml @@ -0,0 +1,21 @@ +--- + +- name: Create /etc/openstack + file: + path: /etc/openstack + state: directory + owner: slurm + group: root + mode: u=rX,g=rwX + +- name: Copy out clouds.yaml + copy: + src: "{{ openhpc_rebuild_clouds }}" + dest: /etc/openstack/clouds.yaml + owner: slurm + group: root + mode: u=r,g=rw + +- name: Setup slurm tools + include_role: + name: slurm_tools diff --git a/ansible/roles/slurm_stats/README.md b/ansible/roles/slurm_stats/README.md index 69032c13a..f8bd38caf 100644 --- a/ansible/roles/slurm_stats/README.md +++ b/ansible/roles/slurm_stats/README.md @@ -21,7 +21,7 @@ Example Playbook - hosts: compute tasks: - import_role: - name: stackhpc.slurm_openstack_tools.slurm-stats + name: slurm_stats License diff --git a/ansible/roles/slurm_tools/.travis.yml b/ansible/roles/slurm_tools/.travis.yml deleted file mode 100644 index 36bbf6208..000000000 --- a/ansible/roles/slurm_tools/.travis.yml +++ /dev/null @@ -1,29 +0,0 @@ ---- -language: python -python: "2.7" - -# Use the new container infrastructure -sudo: false - -# Install ansible -addons: - apt: - packages: - - python-pip - -install: - # Install ansible - - pip install ansible - - # Check ansible version - - ansible --version - - # Create ansible.cfg with correct roles_path - - printf '[defaults]\nroles_path=../' >ansible.cfg - -script: - # Basic role syntax check - - ansible-playbook tests/test.yml -i tests/inventory --syntax-check - -notifications: - webhooks: https://galaxy.ansible.com/api/v1/notifications/ \ No newline at end of file diff --git a/ansible/roles/slurm_tools/tasks/main.yml b/ansible/roles/slurm_tools/tasks/main.yml index e1eb3e51d..deedb034a 100644 --- a/ansible/roles/slurm_tools/tasks/main.yml +++ b/ansible/roles/slurm_tools/tasks/main.yml @@ -27,7 +27,7 @@ module_defaults: ansible.builtin.pip: virtualenv: /opt/slurm-tools - virtualenv_command: python3 -m venv + virtualenv_command: "{{ 'python3.9 -m venv' if ansible_distribution_major_version == '8' else 'python3 -m venv' }}" state: latest become: true become_user: "{{ pytools_user }}" diff --git a/ansible/slurm.yml b/ansible/slurm.yml index 71e2ab9c3..80812ae7d 100644 --- a/ansible/slurm.yml +++ b/ansible/slurm.yml @@ -9,6 +9,16 @@ - include_role: name: mysql +- name: Setup slurm-driven rebuild + hosts: rebuild:!builder + become: yes + tags: + - rebuild + - openhpc + tasks: + - import_role: + name: rebuild + - name: Setup slurm hosts: openhpc become: yes diff --git a/environments/.stackhpc/inventory/extra_groups b/environments/.stackhpc/inventory/extra_groups index 2531b803e..416d50566 100644 --- a/environments/.stackhpc/inventory/extra_groups +++ b/environments/.stackhpc/inventory/extra_groups @@ -1,10 +1,6 @@ [basic_users:children] cluster -[rebuild:children] -control -compute - [etc_hosts:children] cluster @@ -35,3 +31,6 @@ builder [sssd:children] # Install sssd into fat image builder + +[rebuild:children] +control \ No newline at end of file diff --git a/environments/.stackhpc/tofu/SMS.tfvars b/environments/.stackhpc/tofu/SMS.tfvars index 66113a68d..808821bc0 100644 --- a/environments/.stackhpc/tofu/SMS.tfvars +++ b/environments/.stackhpc/tofu/SMS.tfvars @@ -1,4 +1,8 @@ -cluster_net = "stackhpc-ipv4-geneve" -cluster_subnet = "stackhpc-ipv4-geneve-subnet" +cluster_networks = [ + { + network = "stackhpc-ipv4-geneve" + subnet = "stackhpc-ipv4-geneve-subnet" + } +] control_node_flavor = "general.v1.small" other_node_flavor = "general.v1.small" \ No newline at end of file diff --git a/environments/.stackhpc/tofu/main.tf b/environments/.stackhpc/tofu/main.tf index cdf3e2f72..91c3e178c 100644 --- a/environments/.stackhpc/tofu/main.tf +++ b/environments/.stackhpc/tofu/main.tf @@ -81,7 +81,7 @@ module "cluster" { nodes: ["compute-0", "compute-1"] flavor: var.other_node_flavor compute_init_enable: ["compute", "etc_hosts", "nfs", "basic_users", "eessi"] - # ignore_image_changes: true + ignore_image_changes: true } # Example of how to add another partition: # extra: { diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml index 3b3879de9..ecaec5663 100644 --- a/environments/common/inventory/group_vars/all/openhpc.yml +++ b/environments/common/inventory/group_vars/all/openhpc.yml @@ -35,8 +35,14 @@ openhpc_config_default: SlurmctldParameters: - enable_configless TaskPlugin: task/cgroup,task/affinity + ReturnToService: 2 # is stackhpc.openhpc default, but templating bug means it is needed here too +openhpc_config_rebuild: + RebootProgram: /opt/slurm-tools/bin/slurm-openstack-rebuild + SlurmctldParameters: + - reboot_from_controller + ResumeTimeout: 300 openhpc_config_extra: {} -openhpc_config: "{{ openhpc_config_default | combine(openhpc_config_extra, list_merge='append') }}" +openhpc_config: "{{ openhpc_config_default | combine(openhpc_config_rebuild if groups['rebuild'] | length > 0 else {}, openhpc_config_extra, list_merge='append') }}" openhpc_state_save_location: "{{ appliances_state_dir + '/slurmctld' if appliances_state_dir is defined else '/var/spool' }}" openhpc_install_type: ohpc # 'ohpc' or 'generic', see https://github.com/stackhpc/ansible-slurm-appliance/pull/326 diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index cb49b92e2..fd302d0fc 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -63,7 +63,7 @@ mysql cluster [rebuild] -# Enable rebuild of nodes on an OpenStack cloud; add 'control' group plus 'compute' group or a subset of it. +# Enable rebuild of nodes on an OpenStack cloud; add 'control' group. [update] # All hosts to (optionally) run yum update on. diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 8b5046bfc..30b695d67 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -25,7 +25,7 @@ control [filebeat:children] slurm_stats -# NB: [rebuild] not defined here as this template is used in CI +# NB: [rebuild] not defined here as likely to need features not currently supported [update:children] diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl index 05ec27ef8..ae3bcbc40 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl @@ -37,9 +37,11 @@ ${cluster_name}_${group_name}: ${ node.name }: ansible_host: ${node.access_ip_v4} instance_id: ${ node.id } - image_id: ${ node.image_id } networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} %{ endfor ~} + vars: + # NB: this is the target image, not necessarily what is provisioned + image_id: ${compute_groups[group_name]["image_id"]} %{ endfor ~} compute: diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf index bb15733c7..312f304ca 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf @@ -154,3 +154,7 @@ resource "openstack_compute_instance_v2" "compute" { output "compute_instances" { value = local.compute_instances } + +output "image_id" { + value = var.image_id +}