Skip to content

Commit f283c39

Browse files
committed
[CI][Benchmarks] Remove 'passed' field from Result
It suffices to print just a single result failure within a single run. Use exceptions instead of a dedicated Result field.
1 parent f86b606 commit f283c39

File tree

4 files changed

+40
-28
lines changed

4 files changed

+40
-28
lines changed

devops/scripts/benchmarks/benches/base.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,17 @@ def teardown(self):
7171

7272
@abstractmethod
7373
def run(self, env_vars) -> list[Result]:
74+
"""Execute the benchmark with the given environment variables.
75+
76+
Args:
77+
env_vars: Environment variables to use when running the benchmark.
78+
79+
Returns:
80+
A list of Result objects with the benchmark results.
81+
82+
Raises:
83+
Exception: If the benchmark fails for any reason.
84+
"""
7485
pass
7586

7687
@staticmethod

devops/scripts/benchmarks/benches/syclbench.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,13 @@ def run(self, env_vars) -> list[Result]:
158158
res_list = []
159159
for row in reader:
160160
if not row[0].startswith("#"):
161+
# Check if the test passed
162+
if row[1] != "PASS":
163+
raise Exception(f"{row[0]} failed")
161164
res_list.append(
162165
Result(
163166
label=f"{self.name()} {row[0]}",
164167
value=float(row[12]) * 1000, # convert to ms
165-
passed=(row[1] == "PASS"),
166168
command=command,
167169
env=env_vars,
168170
unit="ms",

devops/scripts/benchmarks/main.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -43,39 +43,39 @@ def run_iterations(
4343
):
4444
for iter in range(iters):
4545
log.info(f"running {benchmark.name()}, iteration {iter}... ")
46-
bench_results = benchmark.run(env_vars)
47-
if bench_results is None:
48-
if options.exit_on_failure:
49-
raise RuntimeError(f"Benchmark {benchmark.name()} produced no results!")
50-
else:
51-
failures[benchmark.name()] = "benchmark produced no results!"
52-
break
53-
54-
for bench_result in bench_results:
55-
if not bench_result.passed:
46+
try:
47+
bench_results = benchmark.run(env_vars)
48+
if bench_results is None:
5649
if options.exit_on_failure:
5750
raise RuntimeError(
58-
f"Benchmark {benchmark.name()} failed: {bench_result.label} verification failed."
51+
f"Benchmark {benchmark.name()} produced no results!"
5952
)
6053
else:
61-
failures[bench_result.label] = "verification failed"
62-
log.warning(
63-
f"complete ({bench_result.label}: verification failed)."
64-
)
65-
continue
66-
67-
log.info(
68-
f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
69-
)
54+
failures[benchmark.name()] = "benchmark produced no results!"
55+
break
7056

71-
bench_result.name = bench_result.label
72-
bench_result.lower_is_better = benchmark.lower_is_better()
73-
bench_result.suite = benchmark.get_suite_name()
57+
for bench_result in bench_results:
58+
log.info(
59+
f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
60+
)
61+
bench_result.name = bench_result.label
62+
bench_result.lower_is_better = benchmark.lower_is_better()
63+
bench_result.suite = benchmark.get_suite_name()
7464

75-
if bench_result.label not in results:
76-
results[bench_result.label] = []
65+
if bench_result.label not in results:
66+
results[bench_result.label] = []
7767

78-
results[bench_result.label].append(bench_result)
68+
results[bench_result.label].append(bench_result)
69+
except Exception as e:
70+
failure_label = f"{benchmark.name()} iteration {iter}"
71+
if options.exit_on_failure:
72+
raise RuntimeError(
73+
f"Benchmark failed: {failure_label} verification failed: {str(e)}"
74+
)
75+
else:
76+
failures[failure_label] = f"verification failed: {str(e)}"
77+
log.error(f"complete ({failure_label}: verification failed: {str(e)}).")
78+
continue
7979

8080

8181
# https://www.statology.org/modified-z-score/

devops/scripts/benchmarks/utils/result.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ class Result:
1515
value: float
1616
command: list[str]
1717
env: dict[str, str]
18-
passed: bool = True
1918
unit: str = ""
2019
# stddev can be optionally set by the benchmark,
2120
# if not set, it will be calculated automatically.

0 commit comments

Comments
 (0)