Skip to content

Commit d1016cb

Browse files
authored
Merge branch 'main' into feature/log
2 parents 20803fa + 7055ff0 commit d1016cb

File tree

6 files changed

+37
-7
lines changed

6 files changed

+37
-7
lines changed

.github/workflows/jekyll-gh-pages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
make html
3939
cp -r _build/html ../_site/
4040
- name: Upload artifact
41-
uses: actions/upload-pages-artifact@v1
41+
uses: actions/upload-pages-artifact@v4
4242

4343
# Deployment job
4444
deploy:

dlio_benchmark/main.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
# To make sure the output folder is the same in all the nodes. We have to do this.
5252
import hydra
5353

54+
dftracer_initialize = True
55+
dftracer_finalize = True
56+
dtracer = None
57+
5458
class DLIOBenchmark(object):
5559
"""
5660
The Benchmark represents the I/O behavior of deep learning applications.
@@ -66,6 +70,8 @@ def __init__(self, cfg):
6670
<li> local variables </li>
6771
</ul>
6872
"""
73+
global dftracer, dftracer_initialize, dftracer_finalize
74+
6975
t0 = time()
7076
self.args = ConfigArguments.get_instance()
7177
LoadConfig(self.args, cfg)
@@ -92,7 +98,8 @@ def __init__(self, cfg):
9298
# Configure the logging library
9399
self.args.configure_dlio_logging(is_child=False)
94100
self.logger = DLIOLogger.get_instance()
95-
self.dftracer = self.args.configure_dftracer(is_child=False, use_pid=False)
101+
if dftracer_initialize:
102+
dftracer = self.args.configure_dftracer(is_child=False, use_pid=False)
96103
with Profile(name=f"{self.__init__.__qualname__}", cat=MODULE_DLIO_BENCHMARK):
97104
if self.args.my_rank == 0:
98105
self.logger.output(f"{utcnow()} Running DLIO with {self.args.comm_size} process(es)")
@@ -342,6 +349,9 @@ def finalize(self):
342349
"""
343350
It finalizes the dataset once training is completed.
344351
"""
352+
353+
global dftracer, dftracer_initialize, dftracer_finalize
354+
345355
self.comm.barrier()
346356
self.checkpointing_mechanism.finalize()
347357
if not self.generate_only:
@@ -363,7 +373,8 @@ def finalize(self):
363373
self.stats.finalize()
364374
self.stats.save_data()
365375
self.comm.barrier()
366-
self.args.finalize_dftracer(self.dftracer)
376+
if dftracer_finalize and dftracer:
377+
self.args.finalize_dftracer(dftracer)
367378

368379

369380
@hydra.main(version_base=None, config_path="configs", config_name="config")
@@ -374,6 +385,13 @@ def run_benchmark(cfg: DictConfig):
374385
benchmark.run()
375386
benchmark.finalize()
376387

388+
def set_dftracer_initialize(status):
389+
global dftracer, dftracer_initialize, dftracer_finalize
390+
dftracer_initialize = status
391+
392+
def set_dftracer_finalize(status):
393+
global dftracer, dftracer_initialize, dftracer_finalize
394+
dftracer_finalize = status
377395

378396
def main() -> None:
379397
"""

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ nvidia-dali-cuda110>=1.34.0
1010
omegaconf~=2.2.0
1111
pandas~=1.5.1
1212
psutil~=5.9.8
13-
pydftracer==1.0.2
13+
pydftracer==1.0.8
1414
pytest
1515
tensorflow>=2.11.0
1616
torch>=2.2.0

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"omegaconf>=2.2.0",
1717
"pandas>=1.5.1",
1818
"psutil>=5.9.8",
19-
"pydftracer==1.0.2",
19+
"pydftracer==1.0.8",
2020
]
2121
x86_deps = [
2222
f"hydra-core>={HYDRA_VERSION}",

tests/conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# HACK: to fix the reinitialization problem
2+
def pytest_configure(config):
3+
config.is_dftracer_initialized = False

tests/dlio_benchmark_test.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
# logging's max timestamp resolution is msecs, we will pass in usecs in the message
4242
)
4343

44-
from dlio_benchmark.main import DLIOBenchmark
44+
from dlio_benchmark.main import DLIOBenchmark, set_dftracer_initialize, set_dftracer_finalize
4545
import glob
4646

4747
def init():
@@ -127,9 +127,11 @@ def test_subset() -> None:
127127
logging.info(f" DLIO training test for subset")
128128
logging.info("=" * 80)
129129
with initialize_config_dir(version_base=None, config_dir=config_dir):
130+
set_dftracer_finalize(False)
130131
cfg = compose(config_name='config', overrides=['++workload.workflow.train=False', \
131132
'++workload.workflow.generate_data=True'])
132133
benchmark=run_benchmark(cfg, verify=False)
134+
set_dftracer_initialize(False)
133135
cfg = compose(config_name='config', overrides=['++workload.workflow.train=True', \
134136
'++workload.workflow.generate_data=False', \
135137
'++workload.dataset.num_files_train=8', \
@@ -506,7 +508,7 @@ def test_custom_storage_root_train(fmt, framework) -> None:
506508

507509
@pytest.mark.timeout(60, method="thread")
508510
@pytest.mark.parametrize("dist", list(compute_time_distributions.keys()))
509-
def test_computation_time_distribution(dist) -> None:
511+
def test_computation_time_distribution(request, dist) -> None:
510512
init()
511513
clean()
512514
compute_time_overrides = []
@@ -523,11 +525,18 @@ def test_computation_time_distribution(dist) -> None:
523525
logging.info(f" DLIO test for computation time distribution")
524526
logging.info("=" * 80)
525527
with initialize_config_dir(version_base=None, config_dir=config_dir):
528+
if request.config.is_dftracer_initialized:
529+
set_dftracer_initialize(False)
530+
else:
531+
set_dftracer_finalize(False)
532+
526533
cfg = compose(config_name='config',
527534
overrides=['++workload.workflow.train=True', \
528535
'++workload.workflow.generate_data=True', \
529536
'++workload.train.epochs=4'] + compute_time_overrides)
530537
benchmark = run_benchmark(cfg)
538+
if not request.config.is_dftracer_initialized:
539+
request.config.is_dftracer_initialized = True
531540
clean()
532541
finalize()
533542

0 commit comments

Comments
 (0)