Skip to content

Commit 22b959d

Browse files
committed
CI: Fix qemu-1-setup failure, remove debug stuff
- For whatever reason, the runner will now startup with either two 75GB disks or one 150GB disk. Previously the runner was always booting with two 75GB, but about a quarter of the time it now starts up with a single 150GB disk. This caused qemu-1-setup.sh to fail since it expected the two 75GB disks. This commit updates qemu-1-setup.sh to work with either disk config. - Remove the watchdog from qemu-1-setup.sh. It didn't turn out to be useful. - Remove the timestamps that zfs-qemu.yml added to the qemu-1-setup.sh output. The timestamps were redundant, since you can already download timestamped logs from the Github web interface. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tino Reichardt <milky-zfs@mcmilk.de> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #18166
1 parent 2347627 commit 22b959d

File tree

2 files changed

+78
-38
lines changed

2 files changed

+78
-38
lines changed

.github/workflows/scripts/qemu-1-setup.sh

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,6 @@
66

77
set -eu
88

9-
# We've been seeing this script take over 15min to run. This may or
10-
# may not be normal. Just to get a little more insight, print out
11-
# a message to stdout with the top running process, and do this every
12-
# 30 seconds. We can delete this watchdog later once we get a better
13-
# handle on what the timeout value should be.
14-
(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu | head -n 2 | tail -n 1)}')]"; done) &
15-
169
# The default 'azure.archive.ubuntu.com' mirrors can be really slow.
1710
# Prioritize the official Ubuntu mirrors.
1811
#
@@ -41,35 +34,89 @@ ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -q -N ""
4134
sudo systemctl stop docker.socket
4235
sudo systemctl stop multipathd.socket
4336

44-
# remove default swapfile and /mnt
4537
sudo swapoff -a
46-
sudo umount -l /mnt
47-
DISK="/dev/disk/cloud/azure_resource-part1"
48-
sudo sed -e "s|^$DISK.*||g" -i /etc/fstab
49-
sudo wipefs -aq $DISK
50-
sudo systemctl daemon-reload
38+
39+
# Special case:
40+
#
41+
# For reasons unknown, the runner can boot-up with two different block device
42+
# configurations. On one config you get two 75GB block devices, and on the
43+
# other you get a single 150GB block device. Here's what both look like:
44+
#
45+
# --- Two 75GB block devices ---
46+
# NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
47+
# sda 8:0 0 150G 0 disk
48+
# ├─sda1 8:1 0 149G 0 part /
49+
# ├─sda14 8:14 0 4M 0 part
50+
# ├─sda15 8:15 0 106M 0 part /boot/efi
51+
# └─sda16 259:0 0 913M 0 part /boot
52+
#
53+
# lrwxrwxrwx 1 root root 9 Jan 29 18:07 azure_root -> ../../sda
54+
# lrwxrwxrwx 1 root root 10 Jan 29 18:07 azure_root-part1 -> ../../sda1
55+
# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part14 -> ../../sda14
56+
# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part15 -> ../../sda15
57+
# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part16 -> ../../sda16
58+
#
59+
# --- One 150GB block device ---
60+
# NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
61+
# sda 8:0 0 75G 0 disk
62+
# ├─sda1 8:1 0 74G 0 part /
63+
# ├─sda14 8:14 0 4M 0 part
64+
# ├─sda15 8:15 0 106M 0 part /boot/efi
65+
# └─sda16 259:0 0 913M 0 part /boot
66+
# sdb 8:16 0 75G 0 disk
67+
# └─sdb1 8:17 0 75G 0 part
68+
#
69+
# lrwxrwxrwx 1 root root 9 Jan 29 18:07 azure_resource -> ../../sdb
70+
# lrwxrwxrwx 1 root root 10 Jan 29 18:07 azure_resource-part1 -> ../../sdb1
71+
# lrwxrwxrwx 1 root root 9 Jan 29 18:07 azure_root -> ../../sda
72+
# lrwxrwxrwx 1 root root 10 Jan 29 18:07 azure_root-part1 -> ../../sda1
73+
# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part14 -> ../../sda14
74+
# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part15 -> ../../sda15
75+
#
76+
# If we have the azure_resource-part1 partition, umount it, partition it, and
77+
# use it as our ZFS disk and swap partition. If not, just create a file VDEV
78+
# and swap file and use that instead.
79+
80+
# remove default swapfile and /mnt
81+
if [ -e /dev/disk/cloud/azure_resource-part1 ] ; then
82+
sudo umount -l /mnt
83+
DISK="/dev/disk/cloud/azure_resource-part1"
84+
sudo sed -e "s|^$DISK.*||g" -i /etc/fstab
85+
sudo wipefs -aq $DISK
86+
sudo systemctl daemon-reload
87+
fi
5188

5289
sudo modprobe loop
5390
sudo modprobe zfs
5491

55-
# partition the disk as needed
56-
DISK="/dev/disk/cloud/azure_resource"
57-
sudo sgdisk --zap-all $DISK
58-
sudo sgdisk -p \
59-
-n 1:0:+16G -c 1:"swap" \
60-
-n 2:0:0 -c 2:"tests" \
61-
$DISK
62-
sync
63-
sleep 1
92+
if [ -e /dev/disk/cloud/azure_resource-part1 ] ; then
93+
echo "We have two 75GB block devices"
94+
# partition the disk as needed
95+
DISK="/dev/disk/cloud/azure_resource"
96+
sudo sgdisk --zap-all $DISK
97+
sudo sgdisk -p \
98+
-n 1:0:+16G -c 1:"swap" \
99+
-n 2:0:0 -c 2:"tests" \
100+
$DISK
101+
sync
102+
sleep 1
64103

65-
# swap with same size as RAM (16GiB)
66-
sudo mkswap $DISK-part1
67-
sudo swapon $DISK-part1
104+
sudo fallocate -l 12G /test.ssd2
105+
DISKS="$DISK-part2 /test.ssd2"
68106

69-
# JBOD 2xdisk for OpenZFS storage (test vm's)
70-
SSD1="$DISK-part2"
71-
sudo fallocate -l 12G /test.ssd2
72-
SSD2=$(sudo losetup -b 4096 -f /test.ssd2 --show)
107+
SWAP=$DISK-part1
108+
else
109+
echo "We have a single 150GB block device"
110+
sudo fallocate -l 72G /test.ssd2
111+
SWAP=/swapfile.ssd
112+
sudo fallocate -l 16G $SWAP
113+
sudo chmod 600 $SWAP
114+
DISKS="/test.ssd2"
115+
fi
116+
117+
# swap with same size as RAM (16GiB)
118+
sudo mkswap $SWAP
119+
sudo swapon $SWAP
73120

74121
# adjust zfs module parameter and create pool
75122
exec 1>/dev/null
@@ -78,14 +125,11 @@ ARC_MAX=$((1024*1024*512))
78125
echo $ARC_MIN | sudo tee /sys/module/zfs/parameters/zfs_arc_min
79126
echo $ARC_MAX | sudo tee /sys/module/zfs/parameters/zfs_arc_max
80127
echo 1 | sudo tee /sys/module/zfs/parameters/zvol_use_blk_mq
81-
sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 -O relatime=off \
128+
sudo zpool create -f -o ashift=12 zpool $DISKS -O relatime=off \
82129
-O atime=off -O xattr=sa -O compression=lz4 -O sync=disabled \
83130
-O redundant_metadata=none -O mountpoint=/mnt/tests
84131

85132
# no need for some scheduler
86133
for i in /sys/block/s*/queue/scheduler; do
87134
echo "none" | sudo tee $i
88135
done
89-
90-
# Kill off our watchdog
91-
kill $(jobs -p)

.github/workflows/zfs-qemu.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,7 @@ jobs:
8888

8989
- name: Setup QEMU
9090
timeout-minutes: 60
91-
run: |
92-
# Add a timestamp to each line to debug timeouts
93-
while IFS=$'\n' read -r line; do
94-
echo "$(date +'%H:%M:%S') $line"
95-
done < <(.github/workflows/scripts/qemu-1-setup.sh)
91+
run: .github/workflows/scripts/qemu-1-setup.sh
9692

9793
- name: Start build machine
9894
timeout-minutes: 10

0 commit comments

Comments
 (0)