Skip to content

Commit f12c898

Browse files
committed
[CI][Benchmarks] Remove 'passed' field from Result
It suffices to print just a single result failure within a single run. Use exceptions instead of a dedicated Result field.
1 parent 1be6e9c commit f12c898

File tree

4 files changed

+38
-28
lines changed

4 files changed

+38
-28
lines changed

devops/scripts/benchmarks/benches/base.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,17 @@ def teardown(self):
7171

7272
@abstractmethod
7373
def run(self, env_vars) -> list[Result]:
74+
"""Execute the benchmark with the given environment variables.
75+
76+
Args:
77+
env_vars: Environment variables to use when running the benchmark.
78+
79+
Returns:
80+
A list of Result objects with the benchmark results.
81+
82+
Raises:
83+
Exception: If the benchmark fails for any reason.
84+
"""
7485
pass
7586

7687
@staticmethod

devops/scripts/benchmarks/benches/syclbench.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,13 @@ def run(self, env_vars) -> list[Result]:
158158
res_list = []
159159
for row in reader:
160160
if not row[0].startswith("#"):
161+
# Check if the test passed
162+
if row[1] != "PASS":
163+
raise Exception(f"{row[0]} failed")
161164
res_list.append(
162165
Result(
163166
label=f"{self.name()} {row[0]}",
164167
value=float(row[12]) * 1000, # convert to ms
165-
passed=(row[1] == "PASS"),
166168
command=command,
167169
env=env_vars,
168170
unit="ms",

devops/scripts/benchmarks/main.py

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -41,37 +41,35 @@ def run_iterations(
4141
):
4242
for iter in range(iters):
4343
print(f"running {benchmark.name()}, iteration {iter}... ", flush=True)
44-
bench_results = benchmark.run(env_vars)
45-
if bench_results is None:
46-
if options.exit_on_failure:
47-
raise RuntimeError(f"Benchmark {benchmark.name()} produced no results!")
48-
else:
49-
failures[benchmark.name()] = "benchmark produced no results!"
50-
break
51-
52-
for bench_result in bench_results:
53-
if not bench_result.passed:
44+
try:
45+
bench_results = benchmark.run(env_vars)
46+
if bench_results is None:
5447
if options.exit_on_failure:
55-
raise RuntimeError(
56-
f"Benchmark {benchmark.name()} failed: {bench_result.label} verification failed."
57-
)
48+
raise RuntimeError(f"Benchmark produced no results!")
5849
else:
59-
failures[bench_result.label] = "verification failed"
60-
print(f"complete ({bench_result.label}: verification failed).")
61-
continue
62-
63-
print(
64-
f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
65-
)
50+
failures[benchmark.name()] = "benchmark produced no results!"
51+
break
6652

67-
bench_result.name = bench_result.label
68-
bench_result.lower_is_better = benchmark.lower_is_better()
69-
bench_result.suite = benchmark.get_suite_name()
53+
for bench_result in bench_results:
54+
print(f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit}).")
55+
bench_result.name = bench_result.label
56+
bench_result.lower_is_better = benchmark.lower_is_better()
57+
bench_result.suite = benchmark.get_suite_name()
7058

71-
if bench_result.label not in results:
72-
results[bench_result.label] = []
59+
if bench_result.label not in results:
60+
results[bench_result.label] = []
7361

74-
results[bench_result.label].append(bench_result)
62+
results[bench_result.label].append(bench_result)
63+
except Exception as e:
64+
failure_label = f"{benchmark.name()} iteration {iter}"
65+
if options.exit_on_failure:
66+
raise RuntimeError(
67+
f"Benchmark failed: {failure_label} verification failed: {str(e)}"
68+
)
69+
else:
70+
failures[failure_label] = f"verification failed: {str(e)}"
71+
print(f"complete ({failure_label}: verification failed: {str(e)}).")
72+
continue
7573

7674

7775
# https://www.statology.org/modified-z-score/

devops/scripts/benchmarks/utils/result.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ class Result:
1515
value: float
1616
command: list[str]
1717
env: dict[str, str]
18-
passed: bool = True
1918
unit: str = ""
2019
# stddev can be optionally set by the benchmark,
2120
# if not set, it will be calculated automatically.

0 commit comments

Comments
 (0)