Skip to content

Commit 37980af

Browse files
authored
Merge pull request #1835 from cncf/bug/1834
[bug/1834] Uncordon node after running `node_drain` test for a CNF
2 parents 03f708a + d3f2a96 commit 37980af

File tree

6 files changed

+85
-9
lines changed

6 files changed

+85
-9
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Set up Sample CoreDNS CNF
2+
./sample-cnfs/sample-coredns-cnf/readme.md
3+
# Prerequistes
4+
### Install helm
5+
```
6+
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
7+
chmod 700 get_helm.sh
8+
./get_helm.sh
9+
```
10+
### Optional: Use a helm version manager
11+
https://github.com/yuya-takeyama/helmenv
12+
Check out helmenv into any path (here is ${HOME}/.helmenv)
13+
```
14+
${HOME}/.helmenv)
15+
$ git clone https://github.com/yuya-takeyama/helmenv.git ~/.helmenv
16+
```
17+
Add ~/.helmenv/bin to your $PATH any way you like
18+
```
19+
$ echo 'export PATH="$HOME/.helmenv/bin:$PATH"' >> ~/.bash_profile
20+
```
21+
```
22+
helmenv versions
23+
helmenv install <version 3.1?>
24+
```
25+
26+
### core-dns installation
27+
```
28+
helm install coredns stable/coredns
29+
```
30+
### Pull down the helm chart code, untar it, and put it in the cnfs/coredns directory
31+
```
32+
helm pull stable/coredns
33+
```
34+
### Example cnf-testsuite config file for sample-core-dns-cnf
35+
In ./cnfs/sample-core-dns-cnf/cnf-testsuite.yml
36+
```
37+
---
38+
container_names: [coredns-coredns]
39+
```
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
release_name: coredns2
3+
service_name: coredns-coredns
4+
helm_repository:
5+
name: stable
6+
repo_url: https://cncf.gitlab.io/stable
7+
helm_chart: stable/coredns
8+
helm_install_namespace: cnfspace2
9+
allowlist_helm_chart_container_names: [falco, node-cache, nginx, coredns, calico-node, kube-proxy, nginx-proxy]

shard.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ shards:
5454

5555
kubectl_client:
5656
git: https://github.com/cnf-testsuite/kubectl_client.git
57-
version: 1.0.3
57+
version: 1.0.4
5858

5959
popcorn:
6060
git: https://github.com/icyleaf/popcorn.git

shard.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ dependencies:
4949
version: ~> 1.0.0
5050
kubectl_client:
5151
github: cnf-testsuite/kubectl_client
52-
version: ~> 1.0.3
52+
version: ~> 1.0.4
5353
cluster_tools:
5454
github: cnf-testsuite/cluster_tools
5555
version: ~> 1.0.0

src/tasks/litmus_setup.cr

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,12 @@ module LitmusManager
7676
File.write(MODIFIED_LITMUS_FILE, output_file) unless output_file == nil
7777
end
7878

79-
def self.cordon_target_node(deployment_label, deployment_value, namespace)
79+
def self.get_target_node_to_cordon(deployment_label, deployment_value, namespace)
8080
app_nodeName_cmd = "kubectl get pods -l #{deployment_label}=#{deployment_value} -n #{namespace} -o=jsonpath='{.items[0].spec.nodeName}'"
8181
Log.info { "Getting the operator node name: #{app_nodeName_cmd}" }
8282
status_code = Process.run("#{app_nodeName_cmd}", shell: true, output: appNodeName_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status
8383
Log.for("verbose").info { "status_code: #{status_code}" }
84-
app_nodeName = appNodeName_response.to_s
85-
status_code = KubectlClient::Cordon.command("#{app_nodeName}")
86-
Log.for("verbose").info { "status_code: #{status_code}" }
87-
Log.info { "The target node has been cordoned sucessfully" }
84+
appNodeName_response.to_s
8885
end
8986

9087
## wait_for_test will wait for the completion of litmus test

src/tasks/workload/state.cr

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,29 @@ task "node_drain", ["install_litmus"] do |t, args|
229229
spec_labels = KubectlClient::Get.resource_spec_labels(resource["kind"], resource["name"], resource["namespace"])
230230

231231
schedulable_nodes_count=KubectlClient::Get.schedulable_nodes_list
232+
cordon_deployment_label = "#{spec_labels.as_h.first_key}"
233+
cordon_deployment_value = "#{spec_labels.as_h.first_value}"
234+
235+
# Declare this outside the block so that the name of the node can be used to uncordon later.
236+
cordon_target_node_name = nil
237+
232238
if schedulable_nodes_count.size > 1
233-
LitmusManager.cordon_target_node("#{spec_labels.as_h.first_key}","#{spec_labels.as_h.first_value}", namespace: resource["namespace"])
239+
# Identify cordon node target.
240+
cordon_target_node_name = LitmusManager.get_target_node_to_cordon(cordon_deployment_label, cordon_deployment_value, namespace: app_namespace)
241+
Log.info { "Found node to cordon #{cordon_target_node_name} using label #{cordon_deployment_label}='#{cordon_deployment_value}' in #{app_namespace} namespace." }
242+
243+
# Cordon the node.
244+
result = KubectlClient::Cordon.command("#{cordon_target_node_name}")
245+
246+
# If cordoning fails, skip the test.
247+
if result[:status].success?
248+
Log.info { "Cordoned node #{cordon_target_node_name} successfully." }
249+
else
250+
Log.info { "Unable to cordon node #{cordon_target_node_name}." }
251+
skipped = true
252+
end
234253
else
235-
Log.info { "The target node was unable to cordoned sucessfully" }
254+
Log.info { "Skipping test. Scheduleable node count is not > 1." }
236255
skipped = true
237256
end
238257

@@ -328,7 +347,19 @@ task "node_drain", ["install_litmus"] do |t, args|
328347
LitmusManager.wait_for_test(test_name,chaos_experiment_name,total_chaos_duration,args, namespace: app_namespace)
329348
test_passed = LitmusManager.check_chaos_verdict(chaos_result_name,chaos_experiment_name,args, namespace: app_namespace)
330349
end
350+
351+
# Uncordon the node.
352+
result = KubectlClient::Uncordon.command("#{cordon_target_node_name}")
353+
354+
# If uncordoning fails, log the error.
355+
if result[:status].success?
356+
Log.info { "Uncordoned node #{cordon_target_node_name} successfully." }
357+
else
358+
Log.error { "Uncordoning node #{cordon_target_node_name} failed." }
359+
skipped = true
360+
end
331361
end
362+
332363
test_passed
333364
end
334365
if skipped

0 commit comments

Comments
 (0)