Skip to content

Commit a785acc

Browse files
authored
Merge pull request #1839 from cncf/node-drain-random-failures
[bug/1838] Increase wait_count in LitmusManager.wait_for_test to allow for slow verdicts
2 parents 0b64940 + 82853d4 commit a785acc

File tree

6 files changed

+109
-6
lines changed

6 files changed

+109
-6
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Set up Sample CoreDNS CNF
2+
./sample-cnfs/sample-coredns-cnf/readme.md
3+
# Prerequistes
4+
### Install helm
5+
```
6+
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
7+
chmod 700 get_helm.sh
8+
./get_helm.sh
9+
```
10+
### Optional: Use a helm version manager
11+
https://github.com/yuya-takeyama/helmenv
12+
Check out helmenv into any path (here is ${HOME}/.helmenv)
13+
```
14+
${HOME}/.helmenv)
15+
$ git clone https://github.com/yuya-takeyama/helmenv.git ~/.helmenv
16+
```
17+
Add ~/.helmenv/bin to your $PATH any way you like
18+
```
19+
$ echo 'export PATH="$HOME/.helmenv/bin:$PATH"' >> ~/.bash_profile
20+
```
21+
```
22+
helmenv versions
23+
helmenv install <version 3.1?>
24+
```
25+
26+
### core-dns installation
27+
```
28+
helm install coredns stable/coredns
29+
```
30+
### Pull down the helm chart code, untar it, and put it in the cnfs/coredns directory
31+
```
32+
helm pull stable/coredns
33+
```
34+
### Example cnf-testsuite config file for sample-core-dns-cnf
35+
In ./cnfs/sample-core-dns-cnf/cnf-testsuite.yml
36+
```
37+
---
38+
container_names: [coredns-coredns]
39+
```
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
release_name: coredns3
3+
service_name: coredns-coredns
4+
helm_repository:
5+
name: stable
6+
repo_url: https://cncf.gitlab.io/stable
7+
helm_chart: stable/coredns
8+
helm_install_namespace: cnfspace3
9+
allowlist_helm_chart_container_names: [falco, node-cache, nginx, coredns, calico-node, kube-proxy, nginx-proxy]
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Set up Sample CoreDNS CNF
2+
./sample-cnfs/sample-coredns-cnf/readme.md
3+
# Prerequistes
4+
### Install helm
5+
```
6+
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
7+
chmod 700 get_helm.sh
8+
./get_helm.sh
9+
```
10+
### Optional: Use a helm version manager
11+
https://github.com/yuya-takeyama/helmenv
12+
Check out helmenv into any path (here is ${HOME}/.helmenv)
13+
```
14+
${HOME}/.helmenv)
15+
$ git clone https://github.com/yuya-takeyama/helmenv.git ~/.helmenv
16+
```
17+
Add ~/.helmenv/bin to your $PATH any way you like
18+
```
19+
$ echo 'export PATH="$HOME/.helmenv/bin:$PATH"' >> ~/.bash_profile
20+
```
21+
```
22+
helmenv versions
23+
helmenv install <version 3.1?>
24+
```
25+
26+
### core-dns installation
27+
```
28+
helm install coredns stable/coredns
29+
```
30+
### Pull down the helm chart code, untar it, and put it in the cnfs/coredns directory
31+
```
32+
helm pull stable/coredns
33+
```
34+
### Example cnf-testsuite config file for sample-core-dns-cnf
35+
In ./cnfs/sample-core-dns-cnf/cnf-testsuite.yml
36+
```
37+
---
38+
container_names: [coredns-coredns]
39+
```
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
release_name: coredns4
3+
service_name: coredns-coredns
4+
helm_repository:
5+
name: stable
6+
repo_url: https://cncf.gitlab.io/stable
7+
helm_chart: stable/coredns
8+
helm_install_namespace: cnfspace4
9+
allowlist_helm_chart_container_names: [falco, node-cache, nginx, coredns, calico-node, kube-proxy, nginx-proxy]

src/tasks/litmus_setup.cr

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ module LitmusManager
9999
Log.for("wait_for_test").info { "Checking experiment status #{experimentStatus_cmd}" } if check_verbose(args)
100100

101101
## Wait for completion of chaosengine which indicates the completion of chaos
102-
until (status_code == 0 && experimentStatus == "Completed") || wait_count >= retry
102+
until (status_code == 0 && experimentStatus == "Completed") || wait_count >= 1800
103103
sleep delay
104104
experimentStatus_cmd = "kubectl get chaosengine.litmuschaos.io #{test_name} -n #{namespace} -o jsonpath='{.status.experiments[0].status}'"
105105
Log.for("wait_for_test").info { "Checking experiment status #{experimentStatus_cmd}" } if check_verbose(args)
@@ -135,17 +135,22 @@ module LitmusManager
135135
## check_chaos_verdict will check the verdict of chaosexperiment
136136
def self.check_chaos_verdict(chaos_result_name, chaos_experiment_name, args, namespace : String = "default") : Bool
137137
verdict_cmd = "kubectl get chaosresults.litmuschaos.io #{chaos_result_name} -n #{namespace} -o jsonpath='{.status.experimentStatus.verdict}'"
138-
Log.for("check_chaos_verdict").info { "Checking experiment verdict #{verdict_cmd}" } if check_verbose(args)
138+
Log.for("LitmusManager.check_chaos_verdict").debug { "Checking experiment verdict with command: #{verdict_cmd}" }
139139
status_code = Process.run("#{verdict_cmd}", shell: true, output: verdict_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status
140-
Log.for("check_chaos_verdict").info { "status_code: #{status_code}" } if check_verbose(args)
141-
Log.for("check_chaos_verdict").info { "verdict: #{verdict_response.to_s}" } if check_verbose(args)
140+
Log.for("LitmusManager.check_chaos_verdict").debug { "status_code: #{status_code}; verdict: #{verdict_response.to_s}" }
142141
verdict = verdict_response.to_s
143142

144143
emoji_test_failed= "🗡️💀♻️"
145144
if verdict == "Pass"
146145
return true
147146
else
148-
Log.info {"#{chaos_experiment_name} chaos test failed: #{chaos_result_name}, verdict: #{verdict}"}
147+
Log.for("LitmusManager.check_chaos_verdict#details").debug do
148+
verdict_details_cmd = "kubectl get chaosresults.litmuschaos.io #{chaos_result_name} -n #{namespace} -o json"
149+
status_code = Process.run("#{verdict_details_cmd}", shell: true, output: verdict_details_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status
150+
"#{verdict_details_response.to_s}"
151+
end
152+
153+
Log.for("LitmusManager.check_chaos_verdict").info {"#{chaos_experiment_name} chaos test failed: #{chaos_result_name}, verdict: #{verdict}"}
149154
return false
150155
end
151156
end

src/tasks/workload/state.cr

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ task "node_drain", ["install_litmus"] do |t, args|
330330

331331
chaos_experiment_name = "node-drain"
332332
total_chaos_duration = "90"
333-
test_name = "#{resource["name"]}-#{Random.rand(99)}"
333+
test_name = "#{resource["name"]}-#{Random::Secure.hex(4)}"
334334
chaos_result_name = "#{test_name}-#{chaos_experiment_name}"
335335

336336
template = ChaosTemplates::NodeDrain.new(
@@ -342,6 +342,8 @@ task "node_drain", ["install_litmus"] do |t, args|
342342
total_chaos_duration,
343343
app_nodeName
344344
).to_s
345+
Log.for("node_drain").info { "Chaos test name: #{test_name}; Experiment name: #{chaos_experiment_name}; Label #{deployment_label}=#{deployment_label_value}; namespace: #{app_namespace}" }
346+
345347
File.write("#{destination_cnf_dir}/#{chaos_experiment_name}-chaosengine.yml", template)
346348
KubectlClient::Apply.file("#{destination_cnf_dir}/#{chaos_experiment_name}-chaosengine.yml")
347349
LitmusManager.wait_for_test(test_name,chaos_experiment_name,total_chaos_duration,args, namespace: app_namespace)

0 commit comments

Comments
 (0)