Skip to content

Commit 21373fa

Browse files
author
Songki Choi
committed
Add anomaly perf benchmark tests (#3170)
* Add anomaly perf benchmark tests * Refine workflow * Add options for model-category * Remove num_classes / data_format setting
1 parent 10f66e8 commit 21373fa

10 files changed

+306
-65
lines changed

.github/workflows/perf_benchmark.yaml

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
type: choice
88
description: Model category to run benchmark
99
options:
10+
- speed
11+
- balance
12+
- accuracy
1013
- default # speed, balance, accuracy models only
1114
- all # default + other models
1215
default: default
@@ -50,6 +53,45 @@ on:
5053
`pip install otx[full]@https://github.com/openvinotoolkit/training_extensions.git@{otx_ref}` will be executed before run,
5154
and reverted after run. Works only for v2.x assuming CLI compatibility.
5255
default: __CURRENT_BRANCH_COMMIT__
56+
workflow_call:
57+
inputs:
58+
model-category:
59+
type: string
60+
description: Model category to run benchmark [speed, balance, accuracy, default, all]
61+
default: default
62+
data-group:
63+
type: string
64+
description: Data group to run benchmark [small, medium, large, all]
65+
default: all
66+
num-repeat:
67+
type: number
68+
description: Overrides default per-data-group number of repeat setting
69+
default: 0
70+
num-epoch:
71+
type: number
72+
description: Overrides default per-model number of epoch setting
73+
default: 0
74+
eval-upto:
75+
type: string
76+
description: The last operation to evaluate. 'optimize' means all. [train, export, optimize]
77+
default: optimize
78+
pytest-args:
79+
type: string
80+
description: |
81+
Additional perf-benchmark pytest arguments.
82+
"-k detection" -> detection task only
83+
"--dry-run" -> print command w/o execution.
84+
data-root:
85+
type: string
86+
description: Root directory containing validation data in CI server.
87+
default: "/home/validation/data/v2/"
88+
otx-ref:
89+
type: string
90+
description: |
91+
Target OTX ref (tag / branch name / commit hash) on main repo to test. Defaults to the current branch.
92+
`pip install otx[full]@https://github.com/openvinotoolkit/training_extensions.git@{otx_ref}` will be executed before run,
93+
and reverted after run. Works only for v2.x assuming CLI compatibility.
94+
default: __CURRENT_BRANCH_COMMIT__
5395

5496
# Declare default permissions as read only.
5597
permissions: read-all
@@ -73,7 +115,7 @@ jobs:
73115
- task-short: "vsp"
74116
task: "visual_prompting"
75117
name: Perf-Benchmark-${{ matrix.task-short }}
76-
runs-on: [self-hosted, linux, x64, dmount-v2, perf]
118+
runs-on: [self-hosted, linux, x64, dmount-v2]
77119
timeout-minutes: 8640
78120
steps:
79121
- name: Checkout repository
@@ -85,6 +127,10 @@ jobs:
85127
- name: Install tox
86128
run: python -m pip install --require-hashes --no-deps -r .ci/tox-deps.txt
87129
- name: Run Performance Test
130+
env:
131+
BENCHMARK_RESULTS_CLEAR: ${{ vars.BENCHMARK_RESULTS_CLEAR }}
132+
GH_CTX_REF_NAME: ${{ github.ref_name }}
133+
GH_CTX_SHA: ${{ github.sha }}
88134
run: >
89135
tox -vv -e perf-benchmark -- tests/perf/test_${{ matrix.task }}.py ${{ inputs.pytest-args }}
90136
--model-category ${{ inputs.model-category }}

.github/workflows/weekly.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,3 @@ jobs:
1919
num-repeat: 0
2020
num-epoch: 0
2121
eval-upto: optimize
22-
artifact-prefix: weekly-perf-benchmark

tests/perf/benchmark.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ class Dataset:
5656
name: str
5757
path: Path
5858
group: str
59-
data_format: str
60-
num_classes: int
6159
num_repeat: int = 1
6260
extra_overrides: dict | None = None
6361

@@ -155,10 +153,6 @@ def run(
155153
str(data_root),
156154
"--work_dir",
157155
str(sub_work_dir),
158-
"--model.num_classes",
159-
str(dataset.num_classes),
160-
"--data.config.data_format",
161-
dataset.data_format,
162156
"--engine.device",
163157
self.accelerator,
164158
]
@@ -172,7 +166,10 @@ def run(
172166
start_time = time()
173167
self._run_command(command)
174168
extra_metrics = {"train/e2e_time": time() - start_time}
175-
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "train", replaces={"epoch": "train/epoch"})
169+
self._rename_raw_data(
170+
work_dir=sub_work_dir / ".latest" / "train",
171+
replaces={"train_": "train/", "{pre}": "train/"},
172+
)
176173
self._log_metrics(
177174
work_dir=sub_work_dir / ".latest" / "train",
178175
tags=tags,
@@ -187,6 +184,10 @@ def run(
187184
str(sub_work_dir),
188185
]
189186
self._run_command(command)
187+
self._rename_raw_data(
188+
work_dir=sub_work_dir / ".latest" / "test",
189+
replaces={"test_": "test/", "{pre}": "test/"},
190+
)
190191
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
191192

192193
# Export & test
@@ -215,7 +216,10 @@ def run(
215216
]
216217
self._run_command(command)
217218

218-
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "export"})
219+
self._rename_raw_data(
220+
work_dir=sub_work_dir / ".latest" / "test",
221+
replaces={"test": "export", "{pre}": "export/"},
222+
)
219223
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
220224

221225
# Optimize & test
@@ -250,7 +254,10 @@ def run(
250254
]
251255
self._run_command(command)
252256

253-
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "optimize"})
257+
self._rename_raw_data(
258+
work_dir=sub_work_dir / ".latest" / "test",
259+
replaces={"test": "optimize", "{pre}": "optimize/"},
260+
)
254261
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
255262

256263
# Force memory clean up
@@ -310,11 +317,25 @@ def _log_metrics(
310317
metrics.to_csv(work_dir / "benchmark.raw.csv", index=False)
311318

312319
def _rename_raw_data(self, work_dir: Path, replaces: dict[str, str]) -> None:
320+
replaces = {**self.NAME_MAPPING, **replaces}
321+
322+
def _rename_col(col_name: str) -> str:
323+
for src_str, dst_str in replaces.items():
324+
if src_str == "{pre}":
325+
if not col_name.startswith(dst_str):
326+
col_name = dst_str + col_name
327+
elif src_str == "{post}":
328+
if not col_name.endswith(dst_str):
329+
col_name = col_name + dst_str
330+
else:
331+
col_name = col_name.replace(src_str, dst_str)
332+
return col_name
333+
313334
csv_files = work_dir.glob("**/metrics.csv")
314335
for csv_file in csv_files:
315336
data = pd.read_csv(csv_file)
316-
for src_str, dst_str in replaces.items():
317-
data.columns = data.columns.str.replace(src_str, dst_str)
337+
data = data.rename(columns=_rename_col) # Column names
338+
data = data.replace(replaces) # Values
318339
data.to_csv(csv_file, index=False)
319340

320341
@staticmethod
@@ -338,7 +359,7 @@ def load_result(result_path: Path) -> pd.DataFrame | None:
338359
return pd.concat(results, ignore_index=True).set_index(["task", "model", "data_group", "data"])
339360

340361
@staticmethod
341-
def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame:
362+
def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame | None:
342363
"""Average result w.r.t. given keys
343364
344365
Args:
@@ -348,6 +369,9 @@ def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame:
348369
Retruns:
349370
pd.DataFrame: Averaged result table
350371
"""
372+
if data is None:
373+
return None
374+
351375
# Flatten index
352376
index_names = data.index.names
353377
column_names = data.columns
@@ -391,3 +415,5 @@ def check(self, result: pd.DataFrame, criteria: list[Criterion]):
391415

392416
for criterion in criteria:
393417
criterion(result_entry, target_entry)
418+
419+
NAME_MAPPING: dict[str, str] = {} # noqa: RUF012

tests/perf/conftest.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ def pytest_addoption(parser):
2727
"--model-category",
2828
action="store",
2929
default="all",
30-
choices=("default", "all"),
31-
help="Choose default|all. Defaults to all.",
30+
choices=("speed", "balance", "accuracy", "default", "other", "all"),
31+
help="Choose speed|balcence|accuracy|default|other|all. Defaults to all.",
3232
)
3333
parser.addoption(
3434
"--data-group",
@@ -290,7 +290,9 @@ def fxt_mlflow_client(request: pytest.FixtureRequest) -> MlflowClient:
290290
def fxt_model(request: pytest.FixtureRequest, fxt_model_category) -> Benchmark.Model:
291291
"""Skip models according to user options."""
292292
model: Benchmark.Model = request.param
293-
if fxt_model_category == "default" and model.category == "other":
293+
if fxt_model_category == "all":
294+
return model
295+
if (fxt_model_category == "default" and model.category == "other") or fxt_model_category != model.category:
294296
pytest.skip(f"{model.category} category model")
295297
return model
296298

0 commit comments

Comments
 (0)