diff --git a/ansible/.gitignore b/ansible/.gitignore index bf09f0468..26a456fec 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -4,8 +4,8 @@ roles/* # Whitelist roles that are checked into this repository. !roles/filebeat/ !roles/filebeat/** -!roles/opendistro/ -!roles/opendistro/** +!roles/opensearch/ +!roles/opensearch/** !roles/podman/ !roles/podman/** !roles/grafana-dashboards/ diff --git a/ansible/monitoring.yml b/ansible/monitoring.yml index 0685f790e..f60678874 100644 --- a/ansible/monitoring.yml +++ b/ansible/monitoring.yml @@ -1,19 +1,13 @@ # --- # # NOTE: Requires slurmdbd -- name: Setup elasticsearch - hosts: opendistro - tags: opendistro +- name: Setup OpenSearch + hosts: opensearch + tags: opensearch tasks: - import_role: - name: opendistro - tasks_from: config.yml - tags: config - - - import_role: - name: opendistro - tasks_from: deploy.yml - tags: deploy + name: opensearch + become: true - name: Setup slurm stats hosts: slurm_stats diff --git a/ansible/roles/filebeat/defaults/main.yml b/ansible/roles/filebeat/defaults/main.yml index fd51aa652..edba48b71 100644 --- a/ansible/roles/filebeat/defaults/main.yml +++ b/ansible/roles/filebeat/defaults/main.yml @@ -2,3 +2,4 @@ #filebeat_config_path: undefined # REQUIRED. Path to filebeat.yml configuration file template filebeat_podman_user: "{{ ansible_user }}" # User that runs the filebeat container +filebeat_version: 7.12.1 # latest usable with opensearch - see https://opensearch.org/docs/2.4/tools/index/#compatibility-matrix-for-beats diff --git a/ansible/roles/filebeat/templates/filebeat.service.j2 b/ansible/roles/filebeat/templates/filebeat.service.j2 index 454ed2339..7a3a14277 100644 --- a/ansible/roles/filebeat/templates/filebeat.service.j2 +++ b/ansible/roles/filebeat/templates/filebeat.service.j2 @@ -12,7 +12,19 @@ After=network-online.target [Service] Environment=PODMAN_SYSTEMD_UNIT=%n Restart=always -ExecStart=/usr/bin/podman run --network slirp4netns:cidr={{ podman_cidr }} --sdnotify=conmon --cgroups=no-conmon --replace --name filebeat --user root --restart=always --security-opt label=disable --volume /var/log/:/logs:ro --volume /etc/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro --detach=True docker.elastic.co/beats/filebeat-oss:7.9.3 -e -strict.perms=false -d "*" +ExecStart=/usr/bin/podman run \ + --network=host \ + --sdnotify=conmon \ + --cgroups=no-conmon \ + --replace \ + --name filebeat \ + --user root \ + --restart=always \ + --security-opt label=disable \ + --volume /var/log/:/logs:ro \ + --volume /etc/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro \ + --detach=True docker.elastic.co/beats/filebeat-oss:{{ filebeat_version }} \ + -e -strict.perms=false -d "*" ExecStop=/usr/bin/podman stop --ignore filebeat -t 10 ExecStopPost=/usr/bin/podman rm --ignore -f filebeat KillMode=none diff --git a/ansible/roles/mysql/templates/mysql.service.j2 b/ansible/roles/mysql/templates/mysql.service.j2 index e61bb4002..3b531cd3f 100644 --- a/ansible/roles/mysql/templates/mysql.service.j2 +++ b/ansible/roles/mysql/templates/mysql.service.j2 @@ -15,13 +15,17 @@ EnvironmentFile=/etc/sysconfig/mysqld ExecStartPre=+install -d -o {{ mysql_podman_user }} -g {{ mysql_podman_user }} -Z container_file_t {{ mysql_datadir }} ExecStartPre=+chown -R {{ mysql_podman_user }}:{{ mysql_podman_user }} {{ mysql_datadir }} ExecStart=/usr/bin/podman run \ - --network slirp4netns:cidr={{ podman_cidr }} \ - --sdnotify=conmon --cgroups=no-conmon \ - --detach --replace --name mysql --restart=no \ + --network=host \ + --sdnotify=conmon \ + --cgroups=no-conmon \ + --detach \ + --replace \ + --name mysql \ + --restart=no \ --user mysql \ --volume {{ mysql_datadir }}:/var/lib/mysql:U \ --publish 3306:3306 \ - -e MYSQL_ROOT_PASSWORD=${MYSQL_INITIAL_ROOT_PASSWORD} \ + --env MYSQL_ROOT_PASSWORD=${MYSQL_INITIAL_ROOT_PASSWORD} \ mysql:{{ mysql_tag }}{%- for opt in mysql_mysqld_options %} \ --{{ opt }}{% endfor %} diff --git a/ansible/roles/opendistro/defaults/main.yml b/ansible/roles/opendistro/defaults/main.yml deleted file mode 100644 index eecf52f97..000000000 --- a/ansible/roles/opendistro/defaults/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -# Used to set passwords -#opendistro_internal_users_path: - -opendistro_podman_user: "{{ ansible_user }}" -opendistro_data_path: "/usr/share/elasticsearch/data" # path to host data directory diff --git a/ansible/roles/opendistro/handlers/main.yml b/ansible/roles/opendistro/handlers/main.yml deleted file mode 100644 index 6b8395d87..000000000 --- a/ansible/roles/opendistro/handlers/main.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- - -- name: Restart opendistro container - systemd: - name: opendistro.service - state: restarted - enabled: yes - daemon_reload: yes - become: true diff --git a/ansible/roles/opendistro/tasks/config.yml b/ansible/roles/opendistro/tasks/config.yml deleted file mode 100644 index 580d840d8..000000000 --- a/ansible/roles/opendistro/tasks/config.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- - -- name: Collect usernamespace facts - user_namespace_facts: - -- name: Set facts containing group_ids - set_fact: - # Elastic search user is 1000 - opendistro_host_user_id: "{{ ansible_facts.subuid[opendistro_podman_user]['start'] + 1000 - 1 }}" - opendistro_host_group_id: "{{ ansible_facts.subgid[opendistro_podman_user]['start'] + 1000 - 1 }}" - -- name: Ensure parent directory exists - file: - state: directory - path: "/etc/elastic" - owner: "{{ opendistro_host_user_id }}" - group: "{{ opendistro_host_group_id }}" - mode: 0770 - become: true - -- name: Template configuration files - template: - src: "{{ opendistro_internal_users_path }}" - dest: /etc/elastic/internal_users.yml - owner: "{{ opendistro_host_user_id }}" - # NOTE: root user in container maps to user on host, so this will appear as - # owned by root in the container. - group: "{{ opendistro_podman_user }}" - mode: 0660 - # This needs to smarter as bcrypt hash changes everytime - notify: Restart opendistro container - become: true diff --git a/ansible/roles/opendistro/tasks/deploy.yml b/ansible/roles/opendistro/tasks/deploy.yml deleted file mode 100644 index b02490692..000000000 --- a/ansible/roles/opendistro/tasks/deploy.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- - -- name: Create systemd unit file - template: - dest: /etc/systemd/system/opendistro.service - src: opendistro.service.j2 - become: true - notify: Restart opendistro container diff --git a/ansible/roles/opendistro/tasks/post.yml b/ansible/roles/opendistro/tasks/post.yml deleted file mode 100644 index e69de29bb..000000000 diff --git a/ansible/roles/opendistro/tasks/validate.yml b/ansible/roles/opendistro/tasks/validate.yml deleted file mode 100644 index e69de29bb..000000000 diff --git a/ansible/roles/opendistro/templates/opendistro.service.j2 b/ansible/roles/opendistro/templates/opendistro.service.j2 deleted file mode 100644 index af7eb8e78..000000000 --- a/ansible/roles/opendistro/templates/opendistro.service.j2 +++ /dev/null @@ -1,42 +0,0 @@ -# container-opendistro.service - -[Unit] -Description=Podman container-opendistro.service -Documentation=man:podman-generate-systemd(1) -Wants=network.target -After=network-online.target - -[Service] -Environment=PODMAN_SYSTEMD_UNIT=%n -Restart=always -ExecStartPre=+install -d -o {{ opendistro_podman_user }} -g {{ opendistro_podman_user }} -Z container_file_t {{ opendistro_data_path }} -ExecStartPre=+chown -R {{ opendistro_podman_user }}:{{ opendistro_podman_user }} {{ opendistro_data_path }} -ExecStart=/usr/bin/podman run \ - --network slirp4netns:cidr={{ podman_cidr }} \ - --sdnotify=conmon --cgroups=no-conmon \ - --detach --replace --name opendistro --restart=no \ - --user elasticsearch \ - --ulimit memlock=-1:-1 --ulimit nofile=65536:65536 \ - --volume {{ opendistro_data_path }}:/usr/share/elasticsearch/data:U \ - --volume /etc/elastic/internal_users.yml:/usr/share/elasticsearch/plugins/opendistro_security/securityconfig/internal_users.yml:ro \ - --env node.name=opendistro \ - --env discovery.type=single-node \ - --env bootstrap.memory_lock=true \ - --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ - --publish 9200:9200 \ - amazon/opendistro-for-elasticsearch:1.12.0 -ExecStop=/usr/bin/podman stop --ignore opendistro -t 10 -# note for some reason this returns status=143 which makes systemd show the unit as failed, not stopped -ExecStopPost=/usr/bin/podman rm --ignore -f opendistro -SuccessExitStatus=143 SIGTERM -KillMode=none -Type=notify -NotifyAccess=all -LimitNOFILE=65536 -LimitMEMLOCK=infinity -User={{ opendistro_podman_user }} -Group={{ opendistro_podman_user }} -TimeoutStartSec=180 - -[Install] -WantedBy=multi-user.target default.target diff --git a/ansible/roles/opensearch/defaults/main.yml b/ansible/roles/opensearch/defaults/main.yml new file mode 100644 index 000000000..953b3fc1c --- /dev/null +++ b/ansible/roles/opensearch/defaults/main.yml @@ -0,0 +1,11 @@ +--- +# Used to set passwords +#opensearch_internal_users_path: + +opensearch_podman_user: "{{ ansible_user }}" +opensearch_version: '2.4.0' # https://hub.docker.com/r/opensearchproject/opensearch/tags +opensearch_config_path: /usr/share/opensearch/config +opensearch_data_path: /usr/share/opensearch/data +opensearch_state: started # will be restarted if required +opensearch_systemd_service_enabled: true +opensearch_certs_duration: "{{ 365 * 10 }}" # days validity for self-signed certs diff --git a/ansible/roles/opensearch/handlers/main.yml b/ansible/roles/opensearch/handlers/main.yml new file mode 100644 index 000000000..539a06125 --- /dev/null +++ b/ansible/roles/opensearch/handlers/main.yml @@ -0,0 +1,9 @@ +--- + +- name: Restart opensearch service + systemd: + name: opensearch.service + state: "{{ 'restarted' if 'started' in opensearch_state else opensearch_state }}" + enabled: "{{ opensearch_systemd_service_enabled }}" + daemon_reload: "{{ 'started' in opensearch_state }}" + become: true diff --git a/ansible/roles/opensearch/tasks/certs.yml b/ansible/roles/opensearch/tasks/certs.yml new file mode 100644 index 000000000..e40f65242 --- /dev/null +++ b/ansible/roles/opensearch/tasks/certs.yml @@ -0,0 +1,70 @@ +- name: Ensure host certs directory exists + file: + path: "{{ opensearch_config_path }}/certs" + state: directory + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + mode: ug=rwx,o= + +# Cert generation based on https://opensearch.org/docs/latest/security-plugin/configuration/generate-certificates/ + +- name: Generate root private key + community.crypto.openssl_privatekey: + path: "{{ opensearch_config_path }}/certs/root-ca-key.pem" + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + mode: ug=rw,o= + return_content: false + +- name: Generate root CSR + # Required as cert has to have a subject + community.crypto.openssl_csr_pipe: + privatekey_path: "{{ opensearch_config_path }}/certs/root-ca-key.pem" + country_name: XX + register: _opensearch_root_csr + +- name: Generate root cert + community.crypto.x509_certificate: + provider: selfsigned + selfsigned_not_after: "+{{ opensearch_certs_duration }}d" + ignore_timestamps: false # so will be regenerated when run if necessary + privatekey_path: "{{ opensearch_config_path }}/certs/root-ca-key.pem" + path: "{{ opensearch_config_path }}/certs/root-ca.pem" + csr_content: "{{ _opensearch_root_csr.csr }}" + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + mode: ug=rw,o= + return_content: false + +- name: Generate node private key in pkcs8 format + community.crypto.openssl_privatekey: + path: "{{ opensearch_config_path }}/certs/esnode-key.pem" + format: pkcs8 + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + mode: ug=rw,o= + return_content: false + +- name: Generate node CSR + # Required as cert has to have a subject and subject_alt_name + # Actual subject_alt_name is irrelevant as using enforce_hostname_verification=false + community.crypto.openssl_csr_pipe: + privatekey_path: "{{ opensearch_config_path }}/certs/esnode-key.pem" + country_name: XX + subject_alt_name: + - "DNS:esnode.dns.a-record" + register: _opensearch_node_csr + +- name: Generate node cert + community.crypto.x509_certificate: + provider: ownca + ownca_not_after: "+{{ opensearch_certs_duration }}d" + ignore_timestamps: false # so will be regenerated when run if necessary + ownca_path: "{{ opensearch_config_path }}/certs/root-ca.pem" + ownca_privatekey_path: "{{ opensearch_config_path }}/certs/root-ca-key.pem" + path: "{{ opensearch_config_path }}/certs/esnode.pem" + csr_content: "{{ _opensearch_node_csr.csr }}" + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + mode: ug=rw,o= + return_content: false diff --git a/ansible/roles/opensearch/tasks/main.yml b/ansible/roles/opensearch/tasks/main.yml new file mode 100644 index 000000000..65da51404 --- /dev/null +++ b/ansible/roles/opensearch/tasks/main.yml @@ -0,0 +1,85 @@ +--- + +- name: Check for existing opendistro service + stat: + path: /etc/systemd/system/opendistro.service + register: _opensearch_opendistro_service + +- name: Migrate opendistro data + import_tasks: + file: migrate-opendistro.yml + when: _opensearch_opendistro_service.stat.exists + +- name: Remove opendistro service + file: + path: /etc/systemd/system/opendistro.service + state: absent + +- name: Increase maximum number of virtual memory maps + # see https://opensearch.org/docs/2.0/opensearch/install/important-settings/ + ansible.posix.sysctl: + name: vm.max_map_count + value: '262144' + state: present + reload: yes + become: true + +- name: Ensure required opensearch host directories exist + file: + state: directory + path: "{{ item }}" + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + mode: 0770 + become: true + loop: + - "{{ opensearch_config_path }}" + - "{{ opensearch_data_path }}" + when: "'started' in opensearch_state" # don't run during image build + +- name: Create certs + import_tasks: certs.yml + when: "'started' in opensearch_state" # don't run during image build + +- name: Template general configuration + ansible.builtin.template: + src: opensearch.yml.j2 + dest: "{{ opensearch_config_path }}/opensearch.yml" + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + # NOTE: root user in container maps to user on host, so this will appear as + # owned by root in the container. + mode: 0660 + notify: Restart opensearch service + become: true + when: "'started' in opensearch_state" # don't run during image build + +- name: Template internal user configuration + template: + src: "{{ opensearch_internal_users_path }}" + dest: "{{ opensearch_config_path }}/internal_users.yml" + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + # NOTE: root user in container maps to user on host, so this will appear as + # owned by root in the container. + mode: 0660 + notify: Restart opensearch service + become: true + when: "'started' in opensearch_state" # don't run during image build + +- name: Create systemd unit file + template: + dest: /etc/systemd/system/opensearch.service + src: opensearch.service.j2 + become: true + notify: Restart opensearch service + +- name: Flush handlers + meta: flush_handlers + +- name: Ensure opensearch service state + systemd: + name: opensearch.service + state: "{{ opensearch_state }}" + enabled: "{{ opensearch_systemd_service_enabled }}" + become: true diff --git a/ansible/roles/opensearch/tasks/migrate-opendistro.yml b/ansible/roles/opensearch/tasks/migrate-opendistro.yml new file mode 100644 index 000000000..7cb5c8190 --- /dev/null +++ b/ansible/roles/opensearch/tasks/migrate-opendistro.yml @@ -0,0 +1,28 @@ +# Migrate data from existing containerised opendistro v1.12.0 to containerised opensearch 2.1.0. +# +# This relies on: +# - Both opendistro and opensearch using host directories for data. See `_default_opendistro_data_path` below +# - Pre-upgrade group `opendistro` and current group `opensearch` containing the same host. +# +# NB: If `opendistro_data_path` was set to something non-default it MUST be set again in the `opensearch` group_vars, +# as the `opendistro` group will not exist in the groups. + +# NB: This deliberately does not remove the opendistro data - this could be done manually if required. + +- name: Stop opendistro + ansible.builtin.systemd: + name: opendistro.service + state: stopped + enabled: false + +- name: Copy opendistro data directory + ansible.builtin.copy: + remote_src: true + src: "{{ opendistro_data_path | default(_default_opendistro_data_path) }}" + dest: "{{ opensearch_data_path | dirname }}/" # copying a directory, so need to specify the parent for destination + owner: "{{ opensearch_podman_user }}" + group: "{{ opensearch_podman_user }}" + mode: 0770 + vars: + # from environments/common/inventory/group_vars/all/opendistro.yml: + _default_opendistro_data_path: "{{ appliances_state_dir | default('/usr/share') }}/elasticsearch/data" diff --git a/ansible/roles/opensearch/templates/opensearch.service.j2 b/ansible/roles/opensearch/templates/opensearch.service.j2 new file mode 100644 index 000000000..6951bafc0 --- /dev/null +++ b/ansible/roles/opensearch/templates/opensearch.service.j2 @@ -0,0 +1,47 @@ +# container-opensearch.service + +[Unit] +Description=Podman container-opensearch.service +Documentation=man:podman-generate-systemd(1) +Wants=network.target +After=network-online.target + +[Service] +Environment=PODMAN_SYSTEMD_UNIT=%n +Restart=always +# paths below based on https://opensearch.org/docs/latest/opensearch/configuration/ and https://opensearch.org/docs/latest/security-plugin/configuration/yaml +# see also https://opensearch.org/docs/2.0/opensearch/install/important-settings/ +ExecStart=/usr/bin/podman run \ + --network=host \ + --sdnotify=conmon \ + --cgroups=no-conmon \ + --detach \ + --replace \ + --name opensearch \ + --restart=no --user opensearch \ + --ulimit memlock=-1:-1 --ulimit nofile=65536:65536 \ + --volume {{ opensearch_data_path }}:/usr/share/opensearch/data:U \ + --volume {{ opensearch_config_path }}/internal_users.yml:/usr/share/opensearch/config/opensearch-security/internal_users.yml:U \ + --volume {{ opensearch_config_path }}/opensearch.yml:/usr/share/opensearch/config/opensearch.yml:U \ + --volume {{ opensearch_config_path}}/certs:/usr/share/opensearch/config/certs:U \ + --env node.name=opensearch \ + --env discovery.type=single-node \ + --env bootstrap.memory_lock=true \ + --env "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" \ + --env DISABLE_INSTALL_DEMO_CONFIG=true \ + opensearchproject/opensearch:{{ opensearch_version }} +ExecStop=/usr/bin/podman stop --ignore opensearch -t 10 +# note for some reason this returns status=143 which makes systemd show the unit as failed, not stopped +ExecStopPost=/usr/bin/podman rm --ignore -f opensearch +SuccessExitStatus=143 SIGTERM +KillMode=none +Type=notify +NotifyAccess=all +LimitNOFILE=65536 +LimitMEMLOCK=infinity +User={{ opensearch_podman_user }} +Group={{ opensearch_podman_user }} +TimeoutStartSec=180 + +[Install] +WantedBy=multi-user.target default.target diff --git a/ansible/roles/opensearch/templates/opensearch.yml.j2 b/ansible/roles/opensearch/templates/opensearch.yml.j2 new file mode 100644 index 000000000..cd9aa939a --- /dev/null +++ b/ansible/roles/opensearch/templates/opensearch.yml.j2 @@ -0,0 +1,34 @@ +### This section copies a running container's default config ### +cluster.name: docker-cluster + +# Bind to all interfaces because we don't know what IP address Docker will assign to us. +network.host: 0.0.0.0 + +# # minimum_master_nodes need to be explicitly set when bound on a public IP +# # set to 1 to allow single node clusters +# discovery.zen.minimum_master_nodes: 1 + +# Setting network.host to a non-loopback address enables the annoying bootstrap checks. "Single-node" mode disables them again. +# discovery.type: single-node + +### Custom config section ### + +# Security config: +plugins.security.ssl.transport.pemcert_filepath: certs/esnode.pem +plugins.security.ssl.transport.pemkey_filepath: certs/esnode-key.pem +plugins.security.ssl.transport.pemtrustedcas_filepath: certs/root-ca.pem +plugins.security.ssl.transport.enforce_hostname_verification: false +plugins.security.ssl.http.enabled: true +plugins.security.ssl.http.pemcert_filepath: certs/esnode.pem +plugins.security.ssl.http.pemkey_filepath: certs/esnode-key.pem +plugins.security.ssl.http.pemtrustedcas_filepath: certs/root-ca.pem +plugins.security.allow_unsafe_democertificates: false +plugins.security.allow_default_init_securityindex: true +plugins.security.audit.type: internal_opensearch +plugins.security.enable_snapshot_restore_privilege: true +plugins.security.check_snapshot_restore_write_privileges: true +plugins.security.restapi.roles_enabled: ["all_access", "security_rest_api_access"] +plugins.security.system_indices.enabled: true + +# Fake version for filebeat: https://opensearch.org/docs/2.4/tools/index/#agents-and-ingestion-tools +compatibility.override_main_response_version: true diff --git a/ansible/roles/podman/tasks/config.yml b/ansible/roles/podman/tasks/config.yml index d7e6175cb..05dc8f757 100644 --- a/ansible/roles/podman/tasks/config.yml +++ b/ansible/roles/podman/tasks/config.yml @@ -5,7 +5,7 @@ content: | # WARNING: This file is managed by ansible, do not modify. # This is so non-root containers can use more resources. This is useful - # for opendistro. + # for opensearch. * soft memlock unlimited * hard memlock unlimited * soft nofile 65536 @@ -13,10 +13,17 @@ dest: /etc/security/limits.d/custom.conf become: true -- name: Up default keys permitted +- name: Up number of non-root kernel keys permitted per user + # See https://www.kernel.org/doc/html/v4.18/security/keys/core.html ansible.posix.sysctl: name: kernel.keys.maxkeys # /proc/sys/kernel/keys/maxkeys - value: 50000 + value: 50000 # default is 200 + become: true + +- name: Up size of non-root kernel keys permitted per user + ansible.posix.sysctl: + name: kernel.keys.maxbytes # /proc/sys/kernel/keys/maxbytes + value: 25000000 # set same as root. Non-root default is 20000 become: true - name: reset ssh connection to allow user changes to affect 'current login user' diff --git a/ansible/roles/podman/tasks/validate.yml b/ansible/roles/podman/tasks/validate.yml index 2b7bcb18d..14b13d11f 100644 --- a/ansible/roles/podman/tasks/validate.yml +++ b/ansible/roles/podman/tasks/validate.yml @@ -7,9 +7,3 @@ assert: that: podman_tmp_fstype.stdout == 'tmpfs' fail_msg: "{{ podman_tmp_fstype }} (variable podman_tmp_fstype) must be on tmpfs" - -- name: Check host IPs are not within podman network CIDR - assert: - that: ( podman_cidr | ansible.netcommon.network_in_network(item)) == false - fail_msg: "Address {{ item }} for {{ inventory_hostname }} is in podman network range {{ podman_cidr }} - set `podman_cidr` to avoid host network address ranges" - loop: "{{ ansible_all_ipv4_addresses }}" \ No newline at end of file diff --git a/ansible/validate.yml b/ansible/validate.yml index 8087bceb8..d294e98e5 100644 --- a/ansible/validate.yml +++ b/ansible/validate.yml @@ -38,15 +38,6 @@ tasks_from: validate.yml tags: validate -- name: Validate opendistro configuration - hosts: opendistro - tags: opendistro - tasks: - - import_role: - name: opendistro - tasks_from: validate.yml - tags: validate - - name: Validate rebuild configuration hosts: rebuild gather_facts: false diff --git a/docs/monitoring-and-logging.README.md b/docs/monitoring-and-logging.README.md index b607b674b..3e3de38c0 100644 --- a/docs/monitoring-and-logging.README.md +++ b/docs/monitoring-and-logging.README.md @@ -11,11 +11,10 @@ Parses log files and ships them to elasticsearch. Note we use the version shippe Visualisation tool that supports multiple different datasources. In our stack, we use it to visualise prometheus and elasticsearch data. -### [opendistro](https://opendistro.github.io/) +### [opensearch](https://https://opensearch.org/) -An open-source distribution of elasticsearch. Elasticsearch is a search engine that provides full -text search over a collection of JSON documents. In this project, the main use is for the archiving -and retrieval of log files. +A search engine that provides full text search over a collection of JSON documents. In this project, +the main use is for the archiving and retrieval of log files. ### [prometheus](https://prometheus.io/) @@ -97,11 +96,7 @@ The `grafana` group controls the placement of the grafana service. Load balancin ### Access -The default URL is: - -> https://:3000 - -The port can be controlled with the `grafana_port` variable. +If Open Ondemand is enabled then by default this is used to proxy Grafana, otherwise Grafana is accessed through the first . See `grafana_url` in [environments/common/inventory/group_vars/all/grafana.yml](../environments/common/inventory/group_vars/all/grafana.yml). The port used (variable `grafana_port`) defaults to `3000`. The default credentials for the admin user are: @@ -112,7 +107,7 @@ Where `vault_grafana_admin_password` is a variable containing the actual passwor ### grafana dashboards -The appliance ships with a default set of dashboards. The set of dashboards can be configured via the `grafana_dashboards` variable. The dashboards are published to grafana.com and are referenced by URL. +The appliance ships with a default set of dashboards. The set of dashboards can be configured via the `grafana_dashboards` variable. The dashboards are either internal to the [grafana-dashboards role](../ansible/roles/grafana-dashboards/files/) or downloaded from grafana.com. #### node exporter @@ -160,33 +155,33 @@ The default configuration configures the following datasources: This can be customised with the `grafana_datasources` variable. -## opendistro +## opensearch -This section details the configuration of Open Distro. +This section details the configuration of OpenSearch. ### Defaults -The internal `opendistro` role is ued to configure the service. The list of variables that can be customised can found in: +The internal `opensearch` role is used to configure the service. The list of variables that can be customised can found in: -> [ansible/roles/opendistro/defaults/main.yml](../ansible/roles/opendistro/defaults/main.yml) +> [ansible/roles/opensearch/defaults/main.yml](../ansible/roles/opensearch/defaults/main.yml) The appliance defaults are in the following file: -> [environments/common/inventory/group_vars/all/opendistro.yml](../environments/common/inventory/group_vars/all/opendistro.yml) +> [environments/common/inventory/group_vars/all/opensearch.yml](../environments/common/inventory/group_vars/all/opensearch.yml) ### Placement -The `opendistro` group determines the placement of the opendistro service. Load balancing is currently unsupported so it is important that you only assign one host to this group. +The `opensearch` group determines the placement of the OpenSearch service. Load balancing is currently unsupported so it is important that you only assign one host to this group. ### Access -By default opendistro only listens on the loopback interface. It should therefore be placed on the same node as `filebeat` which needs to access the opendistro API. +By default, OpenSearch only listens on the loopback interface. It should therefore be placed on the same node as `filebeat` and `grafana` which need to access the OpenSearch API. ### Users The default set of users is defined in: -> [environments/common/files/opendistro/internal_users.yml](../environments/common/files/opendistro/internal_users.yml) +> [environments/common/files/opensearch/internal_users.yml](../environments/common/files/opensearch/internal_users.yml) This defines an the following accounts: @@ -202,12 +197,12 @@ the credentials should be treated with extreme care. To override the default set of users, you can customize the variable: -> [environments/common/files/opendistro/internal_users.yml](../environments/common/files/opendistro/internal_users.yml) +> [environments/common/files/opensearch/internal_users.yml](../environments/common/files/opensearch/internal_users.yml) -You can change this file by modifying the variable, `opendistro_internal_users_path`, where the default can be +You can change this file by modifying the variable, `opensearch_internal_users_path`, where the default can be found in: -> [environments/common/inventory/group_vars/all/opendistro.yml](../environments/common/inventory/group_vars/all/opendistro.yml) +> [environments/common/inventory/group_vars/all/opensearch.yml](../environments/common/inventory/group_vars/all/opensearch.yml) ## Prometheus diff --git a/environments/arcus/builder.pkrvars.hcl b/environments/arcus/builder.pkrvars.hcl index eb39e9bf7..f55c25d38 100644 --- a/environments/arcus/builder.pkrvars.hcl +++ b/environments/arcus/builder.pkrvars.hcl @@ -1,6 +1,6 @@ flavor = "vm.alaska.cpu.general.small" networks = ["a262aabd-e6bf-4440-a155-13dbc1b5db0e"] # WCDC-iLab-60 -source_image_name = "openhpc-230106-1107.qcow2" # https://github.com/stackhpc/slurm_image_builder/pull/13 +source_image_name = "openhpc-230110-1629.qcow2" # https://github.com/stackhpc/slurm_image_builder/pull/14 #source_image_name = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2" ssh_keypair_name = "slurm-app-ci" security_groups = ["default", "SSH"] diff --git a/environments/arcus/terraform/main.tf b/environments/arcus/terraform/main.tf index d324f4cb0..010ea6b7e 100644 --- a/environments/arcus/terraform/main.tf +++ b/environments/arcus/terraform/main.tf @@ -17,7 +17,7 @@ variable "create_nodes" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-230106-1107.qcow2" # https://github.com/stackhpc/slurm_image_builder/pull/13 + default = "openhpc-230110-1629.qcow2" # https://github.com/stackhpc/slurm_image_builder/pull/14 # default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2" # default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" } @@ -31,7 +31,7 @@ module "cluster" { vnic_type = "direct" key_pair = "slurm-app-ci" control_node = { - flavor: "vm.alaska.cpu.general.small" + flavor: "vm.alaska.cpu.general.quarter" image: var.cluster_image } login_nodes = { diff --git a/environments/common/files/filebeat/filebeat.yml b/environments/common/files/filebeat/filebeat.yml index d233d86f5..f5f385a5f 100644 --- a/environments/common/files/filebeat/filebeat.yml +++ b/environments/common/files/filebeat/filebeat.yml @@ -49,7 +49,7 @@ processors: - {from: "json.ElapsedRaw", type: "integer"} output.elasticsearch: - hosts: ["{{ elasticsearch_address }}:9200"] + hosts: ["{{ opensearch_address }}:9200"] protocol: "https" ssl.verification_mode: none username: "admin" diff --git a/environments/common/files/opendistro/internal_users.yml b/environments/common/files/opendistro/internal_users.yml deleted file mode 100644 index d9bff6adf..000000000 --- a/environments/common/files/opendistro/internal_users.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -# See: https://aws.amazon.com/blogs/opensource/change-passwords-open-distro-for-elasticsearch/ - -# This is the internal user database -# The hash value is a bcrypt hash and can be generated with plugin/tools/hash.sh - -_meta: - type: "internalusers" - config_version: 2 - -# Define your internal users here - -admin: - hash: "{{ vault_elasticsearch_admin_password | password_hash('bcrypt') }}" - reserved: true - backend_roles: - - "admin" - description: "Admin user" diff --git a/environments/common/files/opensearch/internal_users.yml.j2 b/environments/common/files/opensearch/internal_users.yml.j2 new file mode 100644 index 000000000..1c2deb559 --- /dev/null +++ b/environments/common/files/opensearch/internal_users.yml.j2 @@ -0,0 +1,13 @@ +--- +# See https://opensearch.org/docs/latest/security-plugin/configuration/yaml#internal_usersyml + +_meta: + type: "internalusers" + config_version: 2 + +admin: + hash: "{{ opensearch_admin_password_hash }}" + reserved: true + backend_roles: + - "admin" + description: "Admin user" diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 1a3f6b421..b02fd5151 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -14,7 +14,7 @@ internal_address: "{{ inventory_hostname }}" api_address: "{{ inventory_hostname }}" # Service endpoints -elasticsearch_address: "{{ hostvars[groups['opendistro'].0].api_address }}" +opensearch_address: "127.0.0.1" prometheus_address: "{{ hostvars[groups['prometheus'].0].api_address }}" openondemand_address: "{{ hostvars[groups['openondemand'].0].api_address if groups['openondemand'] | count > 0 else '' }}" grafana_address: "{{ hostvars[groups['grafana'].0].api_address }}" diff --git a/environments/common/inventory/group_vars/all/grafana.yml b/environments/common/inventory/group_vars/all/grafana.yml index 68e0f2a7a..ce83e3c92 100644 --- a/environments/common/inventory/group_vars/all/grafana.yml +++ b/environments/common/inventory/group_vars/all/grafana.yml @@ -2,7 +2,7 @@ # See: https://github.com/cloudalchemy/ansible-grafana # for variable definitions. -grafana_version: '8.5.9' +grafana_version: '9.0.3' # need to copy some role defaults here so we can use in inventory: grafana_port: 3000 @@ -63,8 +63,8 @@ grafana_datasources: url: "http://{{ prometheus_address }}:9090" # default prometheus port editable: true - name: slurmstats - type: grafana-es-open-distro-datasource - url: "https://{{ elasticsearch_address }}:9200" + type: grafana-opensearch-datasource + url: "https://{{ opensearch_address }}:9200" basicAuth: true basicAuthUser: admin secureJsonData: @@ -72,13 +72,14 @@ grafana_datasources: withCredentials: true jsonData: tlsSkipVerify: true - database: "filebeat-*" + database: filebeat-* timeField: "@timestamp" - es_version: 70 + flavor: opensearch editable: true + # readOnly: false grafana_plugins: - - grafana-es-open-distro-datasource + - grafana-opensearch-datasource # want to set grafana_server.serve_from_sub_path if have Open Ondemand to proxy: grafana_server: diff --git a/environments/common/inventory/group_vars/all/opendistro.yml b/environments/common/inventory/group_vars/all/opendistro.yml deleted file mode 100644 index 6c3ba7847..000000000 --- a/environments/common/inventory/group_vars/all/opendistro.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -# See: https://opendistro.github.io/for-elasticsearch-docs/docs/security/configuration/yaml/ - -# Path to template that specifies opendistro users -opendistro_internal_users_path: "{{ appliances_repository_root }}/environments/common/files/opendistro/internal_users.yml" - -# The user which runs the opendistro container -opendistro_podman_user: podman - -# Path to host data directory -opendistro_data_path: "{{ appliances_state_dir | default('/usr/share') }}/elasticsearch/data" diff --git a/environments/common/inventory/group_vars/all/opensearch.yml b/environments/common/inventory/group_vars/all/opensearch.yml new file mode 100644 index 000000000..47d38de29 --- /dev/null +++ b/environments/common/inventory/group_vars/all/opensearch.yml @@ -0,0 +1,16 @@ +--- +# See: https://opensearch.org/docs/latest/security-plugin/configuration/index/ + +# Path to template that specifies opensearch users +opensearch_internal_users_path: "{{ appliances_repository_root }}/environments/common/files/opensearch/internal_users.yml.j2" + +# define an idempotent bcrypt hash for the above (requires a 128bit salt in base64 encoding): +opensearch_admin_password_salt: "{{ (2 | pow(128) | int) | random(seed=inventory_hostname) | b64encode }}" +opensearch_admin_password_hash: "{{ vault_elasticsearch_admin_password | password_hash('bcrypt', opensearch_admin_password_salt[0:22]) }}" + +# user running the opensearch container +opensearch_podman_user: podman + +# Path to host directories +opensearch_config_path: "{{ appliances_state_dir | default('/usr/share') }}/opensearch/config" +opensearch_data_path: "{{ appliances_state_dir | default('/usr/share') }}/opensearch/data" diff --git a/environments/common/inventory/group_vars/all/podman.yml b/environments/common/inventory/group_vars/all/podman.yml index 714a36f5d..8ca8eb1eb 100644 --- a/environments/common/inventory/group_vars/all/podman.yml +++ b/environments/common/inventory/group_vars/all/podman.yml @@ -1,2 +1 @@ podman_users: "{{ [appliances_local_users_podman] }}" # user to use for podman -podman_cidr: 10.0.2.0/24 # IP range to use for podman - see slirp4netns:cidr= at https://docs.podman.io/en/latest/markdown/podman-run.1.html diff --git a/environments/common/inventory/group_vars/all/systemd.yml b/environments/common/inventory/group_vars/all/systemd.yml index 98f5fd97b..1afbc0e55 100644 --- a/environments/common/inventory/group_vars/all/systemd.yml +++ b/environments/common/inventory/group_vars/all/systemd.yml @@ -4,8 +4,8 @@ _systemd_requiresmount_statedir: | _systemd_dropins_statedir: # mysql not included as role handles state dir correctly - opendistro: - group: opendistro + opensearch: + group: opensearch content: "{{ _systemd_requiresmount_statedir }}" grafana-server: group: grafana diff --git a/environments/common/inventory/group_vars/builder/defaults.yml b/environments/common/inventory/group_vars/builder/defaults.yml index b6107fc1a..45aed3118 100644 --- a/environments/common/inventory/group_vars/builder/defaults.yml +++ b/environments/common/inventory/group_vars/builder/defaults.yml @@ -13,3 +13,4 @@ basic_users_manage_homedir: false grafana_state: stopped # as it tries to listen on the "real" grafana node block_devices_configurations: [] # as volumes will not be attached to Packer build VMs mysql_state: stopped # as it tries to connect to real mysql node +opensearch_state: stopped # avoid writing config+certs+db into image diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index 98be33c66..b71500e62 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -26,7 +26,7 @@ openhpc [podman:children] # Hosts running containers for below services: -opendistro +opensearch filebeat mysql @@ -39,12 +39,12 @@ mysql [alertmanager] # TODO: -[opendistro] +[opensearch] # Single node to host ElasticSearch search engine for Slurm monitoring. [slurm_stats] # Single node to run tools to integrate Slurm's accounting information with ElasticSearch. -# NB: Host must be in `openhpc` group (for `sacct` command) and `opendistro` group. +# NB: Host must be in `openhpc` group (for `sacct` command) and `opensearch` group. [filebeat] # Single node to parses log files for ElasticSearch - must be co-located with `slurm_stats`. @@ -100,7 +100,7 @@ etc_hosts [systemd:children] # Hosts to make systemd unit adjustments on -opendistro +opensearch grafana control prometheus diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 6e2538cc4..c4c01afd0 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -16,7 +16,7 @@ control [node_exporter:children] cluster -[opendistro:children] +[opensearch:children] control [slurm_stats:children]