pytorch
diff --git a/‎test/profiler/test_profiler.py
Lines changed: 69 additions & 0 deletions b/‎test/profiler/test_profiler.py
Lines changed: 69 additions & 0 deletions
diff --git a/‎torch/csrc/Module.cpp
Lines changed: 6 additions & 4 deletions b/‎torch/csrc/Module.cpp
Lines changed: 6 additions & 4 deletions
diff --git a/‎torch/csrc/profiler/combined_traceback.cpp
Lines changed: 2 additions & 1 deletion b/‎torch/csrc/profiler/combined_traceback.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎torch/csrc/profiler/python/init.cpp
Lines changed: 31 additions & 8 deletions b/‎torch/csrc/profiler/python/init.cpp
Lines changed: 31 additions & 8 deletions
diff --git a/‎torch/csrc/profiler/unwind/action.h
Lines changed: 4 additions & 0 deletions b/‎torch/csrc/profiler/unwind/action.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎torch/csrc/profiler/unwind/communicate.h
Lines changed: 3 additions & 0 deletions b/‎torch/csrc/profiler/unwind/communicate.h
Lines changed: 3 additions & 0 deletions
@@ -16,9 +16,12 @@
 import collections
 import gc
 import json
+import mmap
 import os
 import pickle
+import random
 import re
+import struct
 import subprocess
 import sys
 import threading
@@ -64,7 +67,9 @@
 from torch.testing._internal.common_device_type import skipCUDAVersionIn
 from torch.testing._internal.common_utils import (
     instantiate_parametrized_tests,
+    IS_ARM64,
     IS_JETSON,
+    IS_LINUX,
     IS_WINDOWS,
     parametrize,
     run_tests,
@@ -2436,6 +2441,70 @@ def test_profiler_pattern_matcher_json_report(self):
         finally:
             os.remove("torchtidy_report.json")
 
+    @unittest.skipIf(IS_ARM64 or not IS_LINUX, "x86 linux only cpp unwinding")
+    def test_fuzz_symbolize(self):
+        # generate some random addresses in the text section and make sure the
+        # symbolizers do not throw exceptions/crash
+        def get_text_sections():
+            text_sections = []
+            seen = set()
+            for filename in os.listdir("/proc/self/map_files"):
+                library = os.readlink("/proc/self/map_files/" + filename)
+                if ".so" not in library or library in seen:
+                    continue
+                seen.add(library)
+                with open(os.path.join("/proc/self/map_files", library), "rb") as f:
+                    mm = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
+
+                    def unpack(fmt, offset):
+                        return struct.unpack(
+                            fmt, mm[offset : offset + struct.calcsize(fmt)]
+                        )
+
+                    if mm[:4] != b"\x7fELF":
+                        continue
+                    (section_headers_start,) = unpack("Q", 40)
+                    (section_header_size,) = unpack("H", 58)
+                    (num_section_headers,) = unpack("H", 60)
+                    (shstrndx,) = unpack("H", 62)
+                    (shstrtab_offset,) = unpack(
+                        "Q", section_headers_start + shstrndx * section_header_size + 24
+                    )
+                    for i in range(num_section_headers):
+                        (section_name_offset,) = unpack(
+                            "I", section_headers_start + i * section_header_size
+                        )
+                        name_start = shstrtab_offset + section_name_offset
+                        section_name = mm[name_start : name_start + 6]
+                        if section_name != b".text\0":
+                            continue
+                        (section_offset,) = unpack(
+                            "Q", section_headers_start + i * section_header_size + 24
+                        )
+                        (section_size,) = unpack(
+                            "Q", section_headers_start + i * section_header_size + 32
+                        )
+                        start = int(filename.split("-")[0], 16) + section_offset
+                        text_sections.append((start, section_size))
+                        break
+                    mm.close()
+            return text_sections
+
+        r = random.Random()
+        r.seed(1)
+        text_sections = get_text_sections()
+        addrs = []
+        for i in range(200):
+            s = r.randrange(0, len(text_sections))
+            start, size = text_sections[s]
+            addr = r.randrange(start, start + size)
+            addrs.append(addr)
+        fast = torch._C._profiler.symbolize_addresses(addrs, "fast")
+        dladdr = torch._C._profiler.symbolize_addresses(addrs, "dladdr")
+        addr2line = torch._C._profiler.symbolize_addresses(addrs, "addr2line")
+        self.assertEqual(len(fast), len(addrs))
+        self.assertEqual(len(addr2line), len(fast))
+
 
 if __name__ == "__main__":
     run_tests()
@@ -168,12 +168,14 @@ static PyObject* THPModule_initExtension(
     PyObject* shm_manager_path) {
   HANDLE_TH_ERRORS
 #if !defined(FBCODE_CAFFE2)
-  if (torch::get_cpp_stacktraces_enabled() && !torch::get_disable_addr2line()) {
+  if (torch::get_cpp_stacktraces_enabled()) {
     c10::SetStackTraceFetcher([]() -> std::string {
       auto tb = torch::CapturedTraceback::gather(false, false, true);
-      LOG(WARNING)
-          << "symbolizing C++ stack trace for exception; if this hangs, rerun with TORCH_DISABLE_ADDR2LINE=1..."
-          << std::endl;
+      if (torch::get_symbolize_mode() == torch::unwind::Mode::addr2line) {
+        LOG(WARNING)
+            << "symbolizing C++ stack trace for exception; if this hangs, rerun with TORCH_DISABLE_ADDR2LINE=1..."
+            << std::endl;
+      }
       auto s_tbs = torch::symbolize({tb.get()});
       std::stringstream oss;
       oss << "C++ CapturedTraceback:" << std::endl;
 
@@ -1,4 +1,5 @@
 #include <torch/csrc/profiler/combined_traceback.h>
+#include <torch/csrc/utils/cpp_stacktraces.h>
 
 namespace torch {
 
@@ -77,7 +78,7 @@ SymbolizedTracebacks symbolize(
   }
   // gather symbol names for C++ frames
   if (!all_cpp_ips.empty()) {
-    r.all_frames = unwind::symbolize(all_cpp_ips);
+    r.all_frames = unwind::symbolize(all_cpp_ips, torch::get_symbolize_mode());
   }
 
   // batch symbolization requests so we dedup frame objects
 
@@ -79,8 +79,7 @@ PyTypeObject THPCapturedTracebackType = {
     nullptr, /* tp_new */
 };
 
-namespace pybind11 {
-namespace detail {
+namespace pybind11::detail {
 
 template <>
 struct type_caster<std::shared_ptr<torch::CapturedTraceback>> {
@@ -107,11 +106,9 @@ struct type_caster<std::shared_ptr<torch::CapturedTraceback>> {
   }
 };
 
-} // namespace detail
-} // namespace pybind11
+} // namespace pybind11::detail
 
-namespace torch {
-namespace profiler {
+namespace torch::profiler {
 
 /* [NOTE: RecordFunctionFast]
  * This is an alternate way to call record_function from python.
@@ -606,6 +603,33 @@ void initPythonBindings(PyObject* module) {
     }
     return py_symbolize(tb_ptrs);
   });
+  // directly convert address pointers to frames, used for testing symbolize
+  m.def(
+      "symbolize_addresses",
+      [](const std::vector<uint64_t>& frames, const std::string& mode_s) {
+        std::vector<std::tuple<std::string, int64_t, std::string>> frames_out;
+        torch::unwind::Mode mode = torch::unwind::Mode::addr2line;
+        if (mode_s == "fast") {
+          mode = torch::unwind::Mode::fast;
+        } else if (mode_s == "addr2line") {
+          mode = torch::unwind::Mode::addr2line;
+        } else if (mode_s == "dladdr") {
+          mode = torch::unwind::Mode::dladdr;
+        } else {
+          TORCH_CHECK(false, "unexpected mode ", mode_s);
+        }
+        std::vector<void*> frames_p;
+        frames_p.reserve(frames.size());
+        for (auto f : frames) {
+          frames_p.push_back((void*)f); // NOLINT
+        }
+        auto frame_objects = unwind::symbolize(frames_p, mode);
+        frames_out.reserve(frame_objects.size());
+        for (auto& frame : frame_objects) {
+          frames_out.emplace_back(frame.filename, frame.lineno, frame.funcname);
+        }
+        return frames_out;
+      });
   installCapturedTracebackPython();
 
   // NOLINTNEXTLINE(*-c-arrays*)
@@ -639,5 +663,4 @@ void initPythonBindings(PyObject* module) {
     throw python_error();
   }
 }
-} // namespace profiler
-} // namespace torch
+} // namespace torch::profiler
@@ -2,6 +2,8 @@
 #include <stdint.h>
 #include <ostream>
 
+namespace torch::unwind {
+
 enum {
   A_UNDEFINED = 0x0,
   A_REG_PLUS_DATA = 0x1, // exp = REG[reg] + data0
@@ -53,3 +55,5 @@ struct Action {
     return out;
   }
 };
+
+} // namespace torch::unwind
@@ -5,6 +5,7 @@
 #include <unistd.h>
 #include <memory>
 
+namespace torch::unwind {
 // helper to open a process with stdin/stdout/stderr streams.
 struct Communicate {
   Communicate(const char* command, const char** args) {
@@ -63,3 +64,5 @@ struct Communicate {
   std::unique_ptr<std::ostream> out_;
   std::unique_ptr<std::ostream> err_;
 };
+
+} // namespace torch::unwind
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`#include <torch/csrc/profiler/combined_traceback.h>`
	`2`	`+#include <torch/csrc/utils/cpp_stacktraces.h>`
`2`	`3`
`3`	`4`	`namespace torch {`
`4`	`5`
`@@ -77,7 +78,7 @@ SymbolizedTracebacks symbolize(`
`77`	`78`	`}`
`78`	`79`	`// gather symbol names for C++ frames`
`79`	`80`	`if (!all_cpp_ips.empty()) {`
`80`		`- r.all_frames = unwind::symbolize(all_cpp_ips);`
	`81`	`+ r.all_frames = unwind::symbolize(all_cpp_ips, torch::get_symbolize_mode());`
`81`	`82`	`}`
`82`	`83`
`83`	`84`	`// batch symbolization requests so we dedup frame objects`