Skip to content

Commit 7def1ad

Browse files
andrewd-zededaeriknordmark
authored andcommitted
kubevirt: upgrade k3s,multus,kubevirt,cdi,longhorn
A kube container generation number is tracked in cluster-update.sh and compared to an 'applied' version in the persistent node fs /var/lib. When the applied version is behind it triggers component upgrades. cluster-update.sh uses an inline command in the zedkube micro service to publish status updates to zedagent which will trigger info messages (ZInfoKubeClusterUpdateStatus) to a controller. Signed-off-by: Andrew Durbin <[email protected]>
1 parent 6b41528 commit 7def1ad

21 files changed

+2404
-9
lines changed

.spdxignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ pkg/kube/descheduler_rbac.yaml
1616
pkg/kube/lh-cfg-v1.6.2.yaml
1717
pkg/vtpm/swtpm-vtpm/vendor/
1818
pkg/dom0-ztools/rootfs/usr/bin/rungetty.sh
19+
pkg/kube/update-component/vendor/

pkg/kube/Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,20 @@ COPY cert-gen /plugins/cert-gen
2020
WORKDIR /plugins/cert-gen
2121
RUN GO111MODULE=on CGO_ENABLED=0 go build -v -ldflags "-s -w" -o /out/usr/bin/cert-gen .
2222

23+
COPY update-component /plugins/update-component
24+
WORKDIR /plugins/update-component
25+
RUN GO111MODULE=on go build -v -ldflags "-s -w" -mod=vendor -o /out/usr/bin/update-component .
26+
2327
FROM scratch
2428
COPY --from=build /out/ /
2529
COPY cluster-init.sh /usr/bin/
2630
COPY cluster-utils.sh /usr/bin/
2731
COPY cgconfig.conf /etc
2832

33+
# upgrades
2934
COPY cluster-update.sh /usr/bin/
35+
COPY update-component/expected_versions.yaml /etc/
36+
COPY update-component/settings_longhorn.yaml /etc/
3037

3138
# k3s
3239
COPY install-etcdctl.sh /usr/bin/

pkg/kube/cluster-init.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,8 @@ logmsg "Using ZFS persistent storage"
669669

670670
setup_prereqs
671671

672+
Update_CheckNodeComponents
673+
672674

673675
if [ -f /var/lib/convert-to-single-node ]; then
674676
logmsg "remove /var/lib and copy saved single node /var/lib"
@@ -931,6 +933,7 @@ fi
931933
check_kubeconfig_yaml_files
932934
check_and_remove_excessive_k3s_logs
933935
check_and_run_vnc
936+
Update_CheckClusterComponents
934937
wait_for_item "wait"
935938
sleep 15
936939
done

pkg/kube/cluster-update.sh

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,222 @@
22
#
33
# Copyright (c) 2024 Zededa, Inc.
44
# SPDX-License-Identifier: Apache-2.0
5+
K3S_VERSION=v1.28.5+k3s1
6+
7+
#
8+
# Handle any migrations needed due to updated cluster-init.sh
9+
# This is expected to be bumped any time:
10+
# - a migration is needed (new path for something)
11+
# - a version bump of: K3s, multus, kubevirt, cdi, longhorn
12+
#
13+
KUBE_VERSION=1
14+
APPLIED_KUBE_VERSION_PATH="/var/lib/applied-kube-version"
15+
update_Version_Set() {
16+
version=$1
17+
echo "$version" > "$APPLIED_KUBE_VERSION_PATH"
18+
}
19+
20+
update_Version_Get() {
21+
if [ ! -f "$APPLIED_KUBE_VERSION_PATH" ]; then
22+
# First Boot
23+
echo "0"
24+
fi
25+
cat "$APPLIED_KUBE_VERSION_PATH"
26+
}
27+
28+
#
29+
# update_Failed()
30+
# Mark failure if Status == COMP_STATUS_FAILED and DestinationKubeUpdateVersion == KUBE_VERSION
31+
# This allows:
32+
# - update retry control for a given version
33+
# - recovery update if the eve os version is updated to another release (with a different cluster-init.sh)
34+
#
35+
UPDATE_STATUS_PATH=/persist/status/zedkube/KubeClusterUpdateStatus/global.json
36+
update_Failed() {
37+
if [ -f $UPDATE_STATUS_PATH ]; then
38+
if [ "$(jq --arg gen $KUBE_VERSION '.Status==4 and .DestinationKubeUpdateVersion==$gen' < $UPDATE_STATUS_PATH)" = "true" ]; then
39+
return 0
40+
fi
41+
fi
42+
return 1
43+
}
44+
45+
trigger_k3s_selfextraction() {
46+
# Run some k3s cli command so that binaries are self-extracted
47+
/usr/bin/k3s check-config >> "$INSTALL_LOG" 2>&1
48+
}
549

650
link_multus_into_k3s() {
751
ln -s /var/lib/cni/bin/multus /var/lib/rancher/k3s/data/current/bin/multus
852
}
53+
54+
update_k3s() {
55+
logmsg "Installing K3S version $K3S_VERSION"
56+
mkdir -p /var/lib/k3s/bin
57+
/usr/bin/curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${K3S_VERSION} INSTALL_K3S_SKIP_ENABLE=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_BIN_DIR=/var/lib/k3s/bin sh -
58+
sleep 5
59+
logmsg "Initializing K3S version $K3S_VERSION"
60+
ln -s /var/lib/k3s/bin/* /usr/bin
61+
trigger_k3s_selfextraction
62+
link_multus_into_k3s
63+
touch /var/lib/k3s_installed_unpacked
64+
}
65+
66+
# k3s_get_version: return version in form "vW.X.Y+k3sZ"
67+
k3s_get_version() {
68+
if [ ! -f /var/lib/k3s/bin/k3s ]; then
69+
echo "v0.0.0+k3s0"
70+
return
71+
fi
72+
/var/lib/k3s/bin/k3s --version | awk '$1=="k3s" {print $3}' | tr -d '\n'
73+
}
74+
75+
# Run on every boot before k3s starts
76+
Update_CheckNodeComponents() {
77+
applied_version=$(update_Version_Get)
78+
if [ "$KUBE_VERSION" = "$applied_version" ]; then
79+
return
80+
fi
81+
82+
if update_Failed; then
83+
return
84+
fi
85+
logmsg "update_HandleNode: version:$KUBE_VERSION appliedversion:$applied_version continuing"
86+
87+
# Handle version specific node migrations here
88+
89+
# Handle node specific updates, just k3s for now
90+
if [ "$(k3s_get_version)" != "$K3S_VERSION" ]; then
91+
publishUpdateStatus "k3s" "download"
92+
update_k3s
93+
current_k3s_version=$(k3s_get_version)
94+
if [ "$current_k3s_version" != "$K3S_VERSION" ]; then
95+
logmsg "k3s version mismatch after install:$current_k3s_version"
96+
publishUpdateStatus "k3s" "failed" "version mismatch after install:$current_k3s_version"
97+
else
98+
logmsg "k3s installed and unpacked or copied"
99+
publishUpdateStatus "k3s" "completed"
100+
fi
101+
fi
102+
}
103+
104+
# Run on every boot after k3s is started
105+
Update_CheckClusterComponents() {
106+
wait_for_item "update_cluster_pre"
107+
108+
applied_version=$(update_Version_Get)
109+
if [ "$KUBE_VERSION" = "$applied_version" ]; then
110+
return
111+
fi
112+
113+
if update_Failed; then
114+
return
115+
fi
116+
117+
if ! update_isClusterReady; then
118+
return
119+
fi
120+
logmsg "update_HandleCluster: version:$KUBE_VERSION appliedversion:$applied_version continuing"
121+
122+
# Handle cluster wide component updates
123+
for comp in multus kubevirt cdi longhorn; do
124+
while ! update_Component_CheckReady "$comp"; do
125+
logmsg "Component: $comp not ready on existing version"
126+
sleep 60
127+
done
128+
logmsg "Component: $comp ready on existing version"
129+
if update_Component_IsRunningExpectedVersion "$comp"; then
130+
logmsg "Component:$comp running expected version, continuing"
131+
publishUpdateStatus "$comp" "completed"
132+
continue
133+
fi
134+
if ! update_Component "$comp"; then
135+
logmsg "Not continuing with further updates after component:${comp} update failed"
136+
break
137+
fi
138+
done
139+
140+
update_Version_Set "$KUBE_VERSION"
141+
wait_for_item "update_cluster_post"
142+
}
143+
144+
update_isClusterReady() {
145+
if ! kubectl cluster-info; then
146+
return 1
147+
fi
148+
149+
if ! update_Helper_APIResponding; then
150+
return 1
151+
fi
152+
return 0
153+
}
154+
155+
#
156+
# Handle kube component updates
157+
#
158+
COMP_UPDATE_PATH="/usr/bin/update-component"
159+
160+
update_Helper_APIResponding() {
161+
if $COMP_UPDATE_PATH --check-api-ready; then
162+
return 0
163+
fi
164+
return 1
165+
}
166+
update_Component_CheckReady() {
167+
comp=$1
168+
if $COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --check-comp-ready; then
169+
return 0
170+
fi
171+
return 1
172+
}
173+
update_Component_Uptime() {
174+
comp=$1
175+
$COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --get-uptime
176+
}
177+
update_Component_IsRunningExpectedVersion() {
178+
comp=$1
179+
if $COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --compare; then
180+
return 0
181+
fi
182+
return 1
183+
}
184+
185+
update_Component() {
186+
comp=$1
187+
# Run go app to check and apply updates and block until new version is ready
188+
publishUpdateStatus "$comp" "in_progress"
189+
if $COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --upgrade; then
190+
publishUpdateStatus "$comp" "completed"
191+
return 0
192+
fi
193+
upgrade_log_path="/persist/kubelog/upgrade-component.log"
194+
logmsg "update_Component comp:${comp} error starting update, see $upgrade_log_path"
195+
publishUpdateStatus "$comp" "failed" "error in $upgrade_log_path"
196+
return 1
197+
}
198+
199+
publishUpdateStatus() {
200+
component=$1
201+
status=$2
202+
errorstr=""
203+
if [ ! -x "$3" ]; then
204+
errorstr=$3
205+
fi
206+
207+
# If gen==0, then we are in the initial boot not updating, just installing first versions at most-likely first
208+
# boot of the device. Don't publish as this will trigger zedagent to claim baseos_updating.
209+
cur_version=$(update_Version_Get)
210+
if [ "$cur_version" = "0" ]; then
211+
return
212+
fi
213+
214+
node=$(jq -r '.DeviceName' < /persist/status/zedagent/EdgeNodeInfo/global.json | tr -d '\n')
215+
logmsg "publishUpdateStatus() $node $component $status"
216+
217+
pillarRootfs=/hostfs/containers/services/pillar/rootfs
218+
LD_LIBRARY_PATH=${pillarRootfs}/usr/lib/ ${pillarRootfs}/opt/zededa/bin/zedkube pubKubeClusterUpdateStatus "$node" "$component" "$status" "$KUBE_VERSION" "$errorstr"
219+
rc=$?
220+
if [ $rc -ne 0 ]; then
221+
logmsg "publishUpdateStatus() $node $component $status in error:$rc"
222+
fi
223+
}

pkg/kube/update-component/README.md

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Kube "update-component" helper tool
2+
3+
update-component is a utility, short lived process which provides a convenience interface for kubernetes
4+
component upgrades and status information.
5+
6+
The interface specifies a series of generalized upgrade methods and backend handlers are implemented for
7+
currently used "infrastructure" components which the kube service installs in HV=kubevirt eve builds.
8+
9+
Supported Components: cdi, kubevirt, longhorn, multus
10+
11+
Upgrade Interface:
12+
13+
1. GetVersion - returns a string version
14+
1. UpgradeSupported - accepts source and destination version, checks component backend to determine
15+
if the upgrade is supported. Some components have strict version max distance upgrade rules.
16+
eg. v1.0.0->v3.1.0 not supported.
17+
1. Uptime - returns the time a component has been ready at a given version
18+
1. Ready - returns nil if the component is online
19+
1. UpgradeStart - initiates a component upgrade to requested version.
20+
21+
## Options
22+
23+
### General Arguments
24+
25+
--component : string component name
26+
--versions-file : path to a single level yaml file defining a list of `<component> : "<expected version>"`
27+
28+
### Optional Arguments
29+
30+
-f Force: skip uptime checks and version constraints
31+
32+
### Check Kubernetes API Ready "--check-api-ready"
33+
34+
Check if api is responding, (rc 0 for success)
35+
eg.
36+
`$ /usr/bin/update-component --check-api-ready
37+
$ echo $?
38+
0`
39+
40+
### Check Component Ready "--check-comp-ready"
41+
42+
Check if component is ready, according to its daemonsets (rc 0 for success)
43+
eg.
44+
`$ /usr/bin/update-component --versions-file /etc/expected_versions.yaml --component longhorn --check-comp-ready
45+
$ echo $?
46+
0`
47+
48+
### Check Component Uptime "--get-uptime"
49+
50+
Print component uptime in seconds
51+
eg.
52+
`$ /usr/bin/update-component --versions-file /etc/expected_versions.yaml --component longhorn --get-uptime
53+
623011`
54+
55+
### Compare Component Version Against Expected "--compare"
56+
57+
Just compare current version, return 0 for matching, 1 for not matching
58+
eg.
59+
`$ /usr/bin/update-component --versions-file /etc/expected_versions.yaml --component longhorn --compare
60+
$ echo $?
61+
0`
62+
63+
### Execute Component Upgrade "--upgrade"
64+
65+
Begin component upgrade to the version listed for it in --versions-file
66+
eg.
67+
`$ /usr/bin/update-component --versions-file /etc/expected_versions.yaml --component "$comp" --upgrade
68+
$ echo $?
69+
0`
70+
71+
## Logging
72+
73+
By default this tool logs to /persist/kubelog/upgrade-component.log
74+
75+
Example Output:
76+
2024/11/19 19:44:30 Component:multus ready:true running:v3.9.3 expected_version:v3.9.3 uptime_seconds:569.930566
77+
2024/11/19 19:44:32 Component:kubevirt ready:true running:v1.1.0-dirty expected_version:v1.1.0-dirty uptime_seconds:478.254250
78+
2024/11/19 19:44:33 Component:cdi ready:true running:v1.57.1 expected_version:v1.57.1 uptime_seconds:499.523674
79+
2024/11/19 19:44:34 Component:longhorn ready:true running:v1.6.3 expected_version:v1.6.3 uptime_seconds:553.801213
80+
81+
## EVE Runtime Usage
82+
83+
After the kube service container has started and k3s has been started, the main run loop will call
84+
Update_CheckClusterComponents which checks a series of prerequisites:
85+
86+
- if applied overall kube version (integer in /var/lib/applied-kube-version) is less than requested version as defined in cluster-update.sh
87+
- if previous update is not failed
88+
89+
If both above checks pass then cluster-update proceeds to check component health and initiate upgrades serially.
90+
After all component upgrades are complete then the applied overall kube version is incremented.

0 commit comments

Comments
 (0)