Skip to content

Commit

Permalink
torchx - add exception_type, exception_message, and exception_source_…
Browse files Browse the repository at this point in the history
…location to torchx event (#966) (#966)

Summary:

Add exception type, exception message, exception source location to torchx event. This allows for better logging of exception details for further analysis.

Differential Revision: D64406552

Co-authored-by: Tony Kao <[email protected]>
  • Loading branch information
tonykao8080 and Tony Kao authored Oct 16, 2024
1 parent 3855ae4 commit af5acd6
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 0 deletions.
16 changes: 16 additions & 0 deletions torchx/runner/events/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
"""

import json
import logging
import sys
import time
import traceback
from types import TracebackType
Expand Down Expand Up @@ -123,6 +125,20 @@ def __exit__(
) // 1000
if traceback_type:
self._torchx_event.raw_exception = traceback.format_exc()
typ, value, tb = sys.exc_info()
if tb:
last_frame = traceback.extract_tb(tb)[-1]
self._torchx_event.exception_source_location = json.dumps(
{
"filename": last_frame.filename,
"lineno": last_frame.lineno,
"name": last_frame.name,
}
)
if exec_type:
self._torchx_event.exception_type = exec_type.__name__
if exec_value:
self._torchx_event.exception_message = str(exec_value)
record(self._torchx_event)

def _generate_torchx_event(
Expand Down
3 changes: 3 additions & 0 deletions torchx/runner/events/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ class TorchxEvent:
wall_time_usec: Optional[int] = None
start_epoch_time_usec: Optional[int] = None
workspace: Optional[str] = None
exception_type: Optional[str] = None
exception_message: Optional[str] = None
exception_source_location: Optional[str] = None

def __str__(self) -> str:
return self.serialize()
Expand Down

0 comments on commit af5acd6

Please sign in to comment.