Skip to content
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
8fe4bab
add support for default eessi proxy config to squid role
sjpb Nov 28, 2025
f0bd623
make eeesi/squid autoconfigure for proxy
sjpb Dec 5, 2025
b917e04
add missing eessi cvmfs config template
sjpb Dec 10, 2025
731f020
address linter docs errors
sjpb Dec 19, 2025
db44f2b
stop textlint changing 'local storage' to 'localStorage'
sjpb Dec 19, 2025
81d24e2
Revert "stop textlint changing 'local storage' to 'localStorage'"
sjpb Dec 19, 2025
f397e49
work around textlint changing 'local storage' to 'localStorage'
sjpb Dec 19, 2025
faacea3
fix ansible-lint errors
sjpb Dec 19, 2025
6f66d68
Merge branch 'main' into feat/eessi-proxy
sjpb Jan 21, 2026
6d26824
after self-review of PR
sjpb Jan 21, 2026
3459dff
configure stackhpc env to use control node as eeesi proxy
sjpb Jan 21, 2026
42c9c2a
simplify eessi production docs
sjpb Jan 22, 2026
95e8d01
simplify subnet TF inventory var
sjpb Jan 22, 2026
780bfcf
tofu fmt
sjpb Jan 22, 2026
f792759
deploy squid on control node by default
sjpb Jan 22, 2026
59bf7a6
default squid to eessi config and make squid config more flexible
sjpb Jan 22, 2026
f039c08
set sensible squid disk cache size for stackhpc CI
sjpb Jan 22, 2026
79a8898
fix linter errors
sjpb Jan 22, 2026
cd006d8
fix lint errors (2)
sjpb Jan 22, 2026
6f45efc
update superlinter version in docs
sjpb Jan 22, 2026
801b4e8
Revert "fix linter errors" - was run with older superlinter version
sjpb Jan 22, 2026
a8c04db
fix lint errors (again)
sjpb Jan 22, 2026
ca8f935
add eessi checks back to CI
sjpb Jan 22, 2026
2daf71e
don't try to configure squid during build - need TF-templated vars
sjpb Jan 22, 2026
1a75639
fix lint errors
sjpb Jan 22, 2026
225a388
Merge branch 'main' into feat/eessi-proxy
sjpb Jan 22, 2026
c3ad1e5
try to fix ansible-lint thinking appliances_mode is undefined
sjpb Jan 22, 2026
95a36f5
try to fix ansible-lint by passing inventory
sjpb Jan 22, 2026
6f64a78
try to fix ansible lint via configuring extra_vars for it
sjpb Jan 22, 2026
42dbf21
try another way
sjpb Jan 22, 2026
c5e31eb
fix autoselection of squid nodes for eessi proxy
sjpb Jan 22, 2026
af938c8
bump CI image
sjpb Jan 22, 2026
7ca6497
fix general mode squid template
sjpb Jan 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/stackhpc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,6 @@ jobs:
. environments/.stackhpc/activate
ansible-playbook -vv ansible/adhoc/hpctests.yml --tags pingpong

# - name: Run EESSI tests
# run: |
# . venv/bin/activate
# . environments/.stackhpc/activate
# ansible-playbook -vv ansible/ci/check_eessi.yml

- name: Checkout current branch
run: git checkout ${{ github.head_ref || github.ref_name }}

Expand Down Expand Up @@ -211,6 +205,12 @@ jobs:
. environments/.stackhpc/activate
ansible-playbook -vv ansible/adhoc/hpctests.yml

- name: Run EESSI tests
run: |
. venv/bin/activate
. environments/.stackhpc/activate
ansible-playbook -vv ansible/ci/check_eessi.yml

- name: Confirm Open Ondemand is up (via SOCKS proxy)
run: |
. venv/bin/activate
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ docker run --rm \
-e RUN_LOCAL=true \
--env-file "super-linter.env" \
-v "$(pwd)":/tmp/lint \
ghcr.io/super-linter/super-linter:v7.3.0
ghcr.io/super-linter/super-linter:v8.3.2
```

```shell
Expand Down
3 changes: 2 additions & 1 deletion ansible/bootstrap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,9 @@
tasks_from: set_repos.yml
when: "'dnf_repos' in group_names"
- name: Configure squid proxy
ansible.builtin.import_role:
ansible.builtin.include_role:
name: squid
tasks_from: "{{ 'install.yml' if appliances_mode != 'configure' else 'main.yml' }}"

- hosts: dnf_repos
tags: dnf_repos
Expand Down
70 changes: 24 additions & 46 deletions ansible/ci/check_eessi.yml
Original file line number Diff line number Diff line change
@@ -1,52 +1,30 @@
---
- name: Run EESSI test job
hosts: login[0]
become: true
gather_facts: false
become_user: demo_user
vars:
eessi_test_rootdir: /home/eessi_test
eessi_test_module: GCCcore/12.2.0
tasks:
- name: Create test root directory
ansible.builtin.file:
path: "{{ eessi_test_rootdir }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: "0755"
become: true
- name: Activate EESSI and load GCC
ansible.builtin.shell:
cmd: >-
bash -lc '
source /cvmfs/software.eessi.io/versions/2023.06/init/bash &&
module load {{ eessi_test_module }} &&
module list
'
register: eeesi_modules
changed_when: true

- name: Clone eessi-demo repo # noqa: latest[git]
ansible.builtin.git:
repo: "https://github.com/eessi/eessi-demo.git"
dest: "{{ eessi_test_rootdir }}/eessi-demo"

- name: Create batch script
ansible.builtin.copy:
dest: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow/tensorflow.sh"
content: |
#!/usr/bin/env bash
#SBATCH --output=%x.out
#SBATCH --error=%x.out
source /cvmfs/pilot.eessi-hpc.org/latest/init/bash
srun ./run.sh
mode: "0644"

- name: Run test job # noqa: no-changed-when
ansible.builtin.command:
cmd: sbatch --wait tensorflow.sh
chdir: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow"
register: job_output

- name: Retrieve job output
ansible.builtin.slurp:
src: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow/tensorflow.sh.out"
register: _tensorflow_out
no_log: true # as its base64 encoded so useless

- name: Show job output
ansible.builtin.debug:
msg: "{{ _tensorflow_out.content | b64decode }}"

- name: Fail if job output contains error
ansible.builtin.fail:
# Note: Job prints live progress bar to terminal, so use regex filter to remove this from stdout
msg: "Test job using EESSI modules failed. Job output was: {{ job_output.stdout | regex_replace('\b', '') }}"
when: '"Epoch 5/5" not in _tensorflow_out.content | b64decode'
- name: Ensure module loaded (shown in stderr)
ansible.builtin.assert:
that: eessi_test_module in eeesi_modules.stderr
fail_msg: |
Expected: '{{ eessi_test_module }}'
Got: {{ eeesi_modules.stderr }}
vars:
expected_output: |
Currently Loaded Modules:
1) GCCcore/12.2.0
18 changes: 14 additions & 4 deletions ansible/roles/eessi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,20 @@ None.

## Role Variables

- `cvmfs_quota_limit_mb`: Optional int. Maximum size of local package cache on each node in MB.
- `cvmfs_config_overrides`: Optional dict. Set of key-value pairs for additional CernVM-FS settings see [official docs](https://cvmfs.readthedocs.io/en/stable/cpt-configure.html) for list of options.
Each dict key should correspond to a valid config variable (e.g. `CVMFS_HTTP_PROXY`) and the corresponding dict value will be set as the variable value (e.g. `https://my-proxy.com`).
These configuration parameters will be written to the `/etc/cvmfs/default.local` config file on each host in the form `KEY=VALUE`.
All variables relate to [CernVM-FS configuration](https://cvmfs.readthedocs.io/en/stable/cpt-configure.html).
By default, the configuration is that [recommended by EESSI for single clients](https://www.eessi.io/docs/getting_access/native_installation/#installation-for-single-clients).
However if `cvmfs_http_proxy` is set to a non-empty string then a configuration
suitable for using a [squid proxy](https://www.eessi.io/docs/getting_access/native_installation/#configuring-your-client-to-use-a-squid-proxy)
is applied instead. See [docs/production](../../../docs/eessi.md#eessi-proxy-configuration)
for guidance on appliance configuration.

- `cvmfs_quota_limit_mb`: Optional int. Maximum size of local package cache on
each node in MB. Default 10GB.
- `cvmfs_http_proxy`: Optional string. Value for [CVMFS_HTTP_PROXY](https://cvmfs.readthedocs.io/en/stable/cpt-configure.html#proxy-lists). Quotes are added around the provided value. Default empty string.
- `cvmfs_config_overrides`: Optional dict. Set of key-value pairs for additional
CernVM-FS settings, written to `/etc/cvmfs/default.local`. Keys are
[CVMFS configuration options](https://cvmfs.readthedocs.io/en/stable/cpt-configure.html)
(e.g. `CVMFS_TIMEOUT_DIRECT`). Default empty dict.

## Dependencies

Expand Down
13 changes: 9 additions & 4 deletions ansible/roles/eessi/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
---
cvmfs_release_version: "6-3"

# Default to 10GB
cvmfs_quota_limit_mb: 10000

cvmfs_config_default:
cvmfs_quota_limit_mb: 10000 # local cache soft quota in MB (default 10GB)
cvmfs_config_single:
CVMFS_CLIENT_PROFILE: single
CVMFS_QUOTA_LIMIT: "{{ cvmfs_quota_limit_mb }}"

cvmfs_http_proxy: '' # as per docs, quotes are added automatically
# See https://www.eessi.io/docs/getting_access/native_installation/#configuring-your-client-to-use-a-squid-proxy
cvmfs_config_proxy:
CVMFS_QUOTA_LIMIT: "{{ cvmfs_quota_limit_mb }}"
CVMFS_HTTP_PROXY: "'{{ cvmfs_http_proxy }}'"

cvmfs_config_default: "{{ cvmfs_config_single if cvmfs_http_proxy == '' else cvmfs_config_proxy }}"
cvmfs_config_overrides: {}
cvmfs_config: "{{ cvmfs_config_default | combine(cvmfs_config_overrides) }}"
19 changes: 11 additions & 8 deletions ansible/roles/eessi/tasks/configure.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
---

- name: Add base CVMFS config
community.general.ini_file:
ansible.builtin.template:
dest: /etc/cvmfs/default.local
section: null
option: "{{ item.key }}"
value: "{{ item.value }}"
no_extra_spaces: true
mode: "0644"
loop: "{{ cvmfs_config | dict2items }}"

src: cvmfs.config.j2
mode: u=rw,go=r
owner: root
register: cvmfs_config

# NOTE: Not clear how to make this idempotent
- name: Ensure CVMFS config is setup # noqa: no-changed-when
ansible.builtin.command:
cmd: "cvmfs_config setup"

- name: Reload CVMFS config
ansible.builtin.command:
cmd: cvmfs_config reload
when: cvmfs_config.changed # noqa: no-handler
changed_when: true # workaround ansible-lint

# configure gpus
- name: Check for NVIDIA GPU
ansible.builtin.stat:
Expand Down
3 changes: 3 additions & 0 deletions ansible/roles/eessi/templates/cvmfs.config.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% for k, v in cvmfs_config.items() %}
{{ k }}={{ v }}
{% endfor %}
94 changes: 68 additions & 26 deletions ansible/roles/squid/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,80 @@

Deploy a caching proxy.

**NB:** The default configuration is aimed at providing a proxy for package installs etc. for
nodes which do not have direct internet connectivity. It assumes access to the proxy is protected
by the OpenStack security groups applied to the cluster. The generated configuration should be
reviewed if this is not case.
**NB:** This role provides two default configurations, selected by setting
`squid_conf_mode`:

- `eessi`: Default. This provides a proxy server for EESSI clients. It uses the
[recommended configuration](https://www.eessi.io/docs/tutorial/access/proxy/#configuration).
See [docs/eessi.md#eessi-proxy-configuration](./eessi.md#eessi-proxy-configuration)
for more information and general proxy node recommendations.

- `general`: This is aimed at providing a proxy for package installs etc.
for nodes which do not have direct internet connectivity. It assumes access
to the proxy is protected by the OpenStack security groups applied to the
cluster. The generated configuration should be reviewed if this is not case.

## Role Variables

Where noted these map to squid parameters of the same name without the `squid_` prefix - see [squid documentation](https://www.squid-cache.org/Doc/config) for details.

- `squid_conf_template`: Optional str. Path (using Ansible search paths) to squid.conf template. Default is in-role template.
## Both modes

These role variables apply to both `squid_conf_mode` settings.

- `squid_conf_mode`: Optional str, `default` (the default) or `eessi`. See above.
- `squid_conf_template`: Optional str. Path (using Ansible search paths) to
squid.conf template. Default is in-role templates. If this is overriden then
`squid_conf_mode` has no effect.
- `squid_http_port`: Optional str. Socket addresses to listen for client requests,
default '3128'. See squid parameter.
- `squid_cache_mem`: Optional str. Size of memory cache, e.g "1024 KB", "12 GB"
etc. Default`'1024 MB'` which is recommended size for EESSI cache - should
probably be increased for `general` mode. See squid parameter.
- `squid_cache_dir`: Optional. Path to cache directory. Default `/var/spool/squid`.
- `squid_cache_disk`: Optional int. Size of IFS disk cache in MB. Default 50000
(50GB) which is recommended size for EESSI cache. For general use, see advice
for `Mbytes` parameter under "ufs" type of [cache_dir](https://www.squid-cache.org/Doc/config/cache_dir/).
- `squid_maximum_object_size_in_memory`: Optional str. Upper size limit for
objects in memory cache. Default '128 KB'/'64 MB' for `eessi`/`general` modes
respectively. See squid parameter.
- `squid_maximum_object_size`: Optional str. Upper size limit for objects in
disk cache. Default '1024 MB'/'200 MB' for `eessi`/`general` modes
respectively. See squid parameter.
- `squid_local_nodes_cidr`: Optional str. CIDR or address range of nodes allowed
to connect to squid. Default is CIDR of subnet for first cluster network. See
squid docs for [acl src](https://www.squid-cache.org/Doc/config/acl/).
- `squid_started`: Optional bool. Whether to start squid service. Default `true`.
- `squid_enabled`: Optional bool. Whether squid service is enabled on boot. Default `true`.
- `squid_cache_mem`: Required str. Size of memory cache, e.g "1024 KB", "12 GB" etc. See squid parameter.
- `squid_cache_dir`: Optional. Path to cache directory. Default `/var/spool/squid`.
- `squid_cache_disk`: Required int. Size of disk cache in MB. See Mbytes under "ufs" store type for squid parameter [cache_dir](https://www.squid-cache.org/Doc/config/cache_dir/).
- `squid_maximum_object_size_in_memory`: Optional str. Upper size limit for objects in memory cache, default '64 MB'. See squid parameter.
- `squid_maximum_object_size`: Optional str. Upper size limit for objects in disk cache, default '200 MB'. See squid parameter.
- `squid_http_port`: Optional str. Socket addresses to listen for client requests, default '3128'. See squid parameter.
- `squid_acls`: Optional str, can be multiline. Define access lists. Default `acl anywhere src all`, i.e. rely on OpenStack security groups (or other firewall if deployed). See squid parameter `acl`. NB: The default template also defines acls for `SSL_ports` and `Safe_ports` as is common practice.
- `squid_http_access`: Optional str, can be multiline. Allow/deny access based on access lists. Default:

# Deny requests to certain unsafe ports
http_access deny !Safe_ports
# Deny CONNECT to other than secure SSL ports
http_access deny CONNECT !SSL_ports
# Only allow cachemgr access from localhost
http_access allow localhost manager
http_access deny manager
# Rules allowing http access
http_access allow anywhere
http_access allow localhost
# Finally deny all other access to this proxy
http_access deny all

### Role Variables for squid_conf_mode: general

- `squid_acls`: Optional str, can be multiline. Define access lists. Default is
`acl local_nodes src {{ squid_local_nodes_cidr }}`, i.e. only permit connections
from address in defined CIDR. In this mode acls for `SSL_ports` and `Safe_ports`
are also defined as is common practice.
- `squid_http_access`: Optional str, can be multiline. Allow/deny access based
on access lists. Default:

```text
# Deny requests to certain unsafe ports
http_access deny !Safe_ports
# Deny CONNECT to other than secure SSL ports
http_access deny CONNECT !SSL_ports
# Only allow cachemgr access from localhost
http_access allow localhost manager
http_access deny manager
# Rules allowing http access
http_access allow local_nodes
http_access allow localhost
# Finally deny all other access to this proxy
http_access deny all
```

See squid parameter.

### Role Variables for squid_conf_mode: eessi

- `squid_eessi_stratum_1`: Optional str. Domain (in squid `acl dstdomain`
format) of Stratum 1 replica servers. Defaults to upstream EEESI Stratum 1
servers.
29 changes: 20 additions & 9 deletions ansible/roles/squid/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
---
squid_conf_template: squid.conf.j2
# squid_conf_mode=eessi/general:
squid_conf_mode: eessi # or 'general'
squid_conf_template: "squid-{{ squid_conf_mode }}.conf.j2"
squid_http_port: 3128
squid_cache_mem: '1024 MB'
squid_cache_dir: /var/spool/squid
squid_cache_disk: 50000 # MB
squid_maximum_object_size_in_memory:
eessi: '128 KB'
general: '64 MB'
squid_maximum_object_size:
eessi: '1024 MB'
general: '200 MB'
squid_local_nodes_cidr: "{{ cluster_subnets[0].cidr }}"
squid_started: true
squid_enabled: true

squid_cache_mem: "{{ undef(hint='squid_cache_mem required, e.g. \"12 GB\"') }}"
squid_cache_dir: /var/spool/squid
squid_cache_disk: "{{ undef(hint='squid_cache_disk (in MB) required, e.g. \"1024\"') }}" # always in MB
squid_maximum_object_size_in_memory: "64 MB"
squid_maximum_object_size: "200 MB"
squid_http_port: 3128
squid_acls: acl anywhere src all # rely on openstack security groups
# squid_conf_mode=general:
squid_acls: "acl local_nodes src {{ squid_local_nodes_cidr }}"
squid_http_access: |
# Deny requests to certain unsafe ports
http_access deny !Safe_ports
Expand All @@ -19,7 +27,10 @@ squid_http_access: |
http_access allow localhost manager
http_access deny manager
# Rules allowing http access
http_access allow anywhere
http_access allow local_nodes
http_access allow localhost
# Finally deny all other access to this proxy
http_access deny all

# squid_conf_mode=eessi:
squid_eessi_stratum_1: '.eessi.science'
28 changes: 28 additions & 0 deletions ansible/roles/squid/templates/squid-eessi.conf.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# From https://www.eessi.io/docs/tutorial/access/proxy/
# Define ACLs:
acl local_nodes src {{ squid_local_nodes_cidr }}

# Destination domains that are allowed
# cern.ch + opensciencegrid.org domains because of cvmfs-config.cern.ch repository,
# which are provided via Stratum-1 mirror servers hosted by CERN and OSG
acl stratum_ones dstdomain .cern.ch .opensciencegrid.org {{ squid_eessi_stratum_1 }}

# Squid port
http_port {{ squid_http_port }}

# Deny access to anything which is not part of our stratum_ones ACL.
http_access deny !stratum_ones

# Only allow access from our local machines
http_access allow local_nodes
http_access allow localhost

# Finally, deny all other access to this proxy
http_access deny all

minimum_expiry_time 0
maximum_object_size {{ squid_maximum_object_size[squid_conf_mode] }}

cache_mem {{ squid_cache_mem }}
maximum_object_size_in_memory {{ squid_maximum_object_size_in_memory[squid_conf_mode] }}
cache_dir ufs {{ squid_cache_dir }} {{ squid_cache_disk | int }} 16 256
Loading