From a749c1ac51856ac131d50f413a8e3bc38bbe26b1 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Mon, 24 Feb 2025 15:55:26 -0800 Subject: [PATCH] Add TPU verbose logging flags --- torchprime/launcher/thunk.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/torchprime/launcher/thunk.py b/torchprime/launcher/thunk.py index d7526590..5d60aa74 100644 --- a/torchprime/launcher/thunk.py +++ b/torchprime/launcher/thunk.py @@ -48,6 +48,12 @@ ) print(f"Dumping XLA compiler outputs to {xla_dump_path}", flush=True) +# Verbose logging flags to help debug TPU hardware issue +os.environ["TPU_VMODULE"] = "slice_configuration=1,real_program_continuator=1" +os.environ["TPU_STDERR_LOG_LEVEL"] = "0" +os.environ["TPU_MIN_LOG_LEVEL"] = "0" +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0" + # Determine the profile dir profile_dir = mounted_artifact_dir / jobset_name / "profile" / host_name print(f"Profile output directory: {profile_dir}", flush=True)