-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Add tests for accessing uninitialized holders #5654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
XuehaiPan
wants to merge
15
commits into
pybind:master
Choose a base branch
from
XuehaiPan:invalid-holder-access
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+350
−1
Draft
Changes from 7 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
2cbc975
Add tests for accessing uninitialized holders
XuehaiPan edcb5ac
Enable `faulthandler` for pytest
XuehaiPan 4bc7a11
Disable GC test for non-CPython
XuehaiPan 20d23e2
Move segfault test to subprocess
XuehaiPan 2b00621
Refine tests
XuehaiPan 6c6db0e
Add `STATUS_ACCESS_VIOLATION` for Windows exitcode
XuehaiPan 2c73d2b
Update tests
XuehaiPan d103e26
Update docs
XuehaiPan 7db0a9f
Update docs
XuehaiPan 73e2615
Merge remote-tracking branch 'upstream/master' into invalid-holder-ac…
XuehaiPan a5818b6
Merge branch 'master' into invalid-holder-access
XuehaiPan 752d883
Add semi-public API: `pybind11::detail::is_holder_constructed`
XuehaiPan 110d024
Apply suggestions from code review
XuehaiPan cf8f5e6
Merge branch 'master' into invalid-holder-access
XuehaiPan cc0f9c4
Merge branch 'master' into invalid-holder-access
XuehaiPan File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#include "pybind11_tests.h" | ||
|
||
#include <Python.h> | ||
#include <memory> | ||
#include <vector> | ||
|
||
class VecOwnsObjs { | ||
public: | ||
void append(const py::object &obj) { vec.emplace_back(obj); } | ||
|
||
void set_item(py::ssize_t i, const py::object &obj) { | ||
if (!(i >= 0 && i < size())) { | ||
throw std::out_of_range("Index out of range"); | ||
} | ||
vec[py::size_t(i)] = obj; | ||
} | ||
|
||
py::object get_item(py::ssize_t i) const { | ||
if (!(i >= 0 && i < size())) { | ||
throw std::out_of_range("Index out of range"); | ||
} | ||
return vec[py::size_t(i)]; | ||
} | ||
|
||
py::ssize_t size() const { return py::ssize_t_cast(vec.size()); } | ||
|
||
bool is_empty() const { return vec.empty(); } | ||
|
||
static int tp_traverse(PyObject *self_base, visitproc visit, void *arg) { | ||
#if PY_VERSION_HEX >= 0x03090000 // Python 3.9 | ||
Py_VISIT(Py_TYPE(self_base)); | ||
#endif | ||
if (should_check_holder_initialization) { | ||
auto *const instance = reinterpret_cast<py::detail::instance *>(self_base); | ||
if (!instance->get_value_and_holder().holder_constructed()) { | ||
// The holder has not been constructed yet. Skip the traversal to avoid | ||
// segmentation faults. | ||
return 0; | ||
} | ||
} | ||
auto &self = py::cast<VecOwnsObjs &>(py::handle{self_base}); | ||
for (const auto &obj : self.vec) { | ||
Py_VISIT(obj.ptr()); | ||
} | ||
return 0; | ||
} | ||
|
||
static int tp_clear(PyObject *self_base) { | ||
if (should_check_holder_initialization) { | ||
auto *const instance = reinterpret_cast<py::detail::instance *>(self_base); | ||
if (!instance->get_value_and_holder().holder_constructed()) { | ||
// The holder has not been constructed yet. Skip the traversal to avoid | ||
// segmentation faults. | ||
return 0; | ||
} | ||
} | ||
auto &self = py::cast<VecOwnsObjs &>(py::handle{self_base}); | ||
for (auto &obj : self.vec) { | ||
Py_CLEAR(obj.ptr()); | ||
} | ||
self.vec.clear(); | ||
return 0; | ||
} | ||
|
||
py::object get_state() const { | ||
py::list state{}; | ||
for (const auto &item : vec) { | ||
state.append(item); | ||
} | ||
return py::tuple(state); | ||
} | ||
|
||
static bool get_should_check_holder_initialization() { | ||
return should_check_holder_initialization; | ||
} | ||
|
||
static void set_should_check_holder_initialization(bool value) { | ||
should_check_holder_initialization = value; | ||
} | ||
|
||
static bool get_should_raise_error_on_set_state() { return should_raise_error_on_set_state; } | ||
|
||
static void set_should_raise_error_on_set_state(bool value) { | ||
should_raise_error_on_set_state = value; | ||
} | ||
|
||
static bool should_check_holder_initialization; | ||
static bool should_raise_error_on_set_state; | ||
|
||
private: | ||
std::vector<py::object> vec{}; | ||
}; | ||
|
||
bool VecOwnsObjs::should_check_holder_initialization = false; | ||
bool VecOwnsObjs::should_raise_error_on_set_state = false; | ||
|
||
TEST_SUBMODULE(invalid_holder_access, m) { | ||
m.doc() = "Test invalid holder access"; | ||
|
||
#if defined(PYBIND11_CPP14) | ||
m.def("create_vector", [](const py::iterable &iterable) -> std::unique_ptr<VecOwnsObjs> { | ||
auto vec = std::make_unique<VecOwnsObjs>(); | ||
for (const auto &item : iterable) { | ||
vec->append(py::reinterpret_borrow<py::object>(item)); | ||
} | ||
return vec; | ||
}); | ||
#endif | ||
|
||
py::class_<VecOwnsObjs>( | ||
m, "VecOwnsObjs", py::custom_type_setup([](PyHeapTypeObject *heap_type) -> void { | ||
auto *const type = &heap_type->ht_type; | ||
type->tp_flags |= Py_TPFLAGS_HAVE_GC; | ||
type->tp_traverse = &VecOwnsObjs::tp_traverse; | ||
type->tp_clear = &VecOwnsObjs::tp_clear; | ||
})) | ||
.def_static("set_should_check_holder_initialization", | ||
&VecOwnsObjs::set_should_check_holder_initialization, | ||
py::arg("value")) | ||
.def_static("set_should_raise_error_on_set_state", | ||
&VecOwnsObjs::set_should_raise_error_on_set_state, | ||
py::arg("value")) | ||
#if defined(PYBIND11_CPP14) | ||
.def(py::pickle([](const VecOwnsObjs &self) -> py::object { return self.get_state(); }, | ||
[](const py::object &state) -> std::unique_ptr<VecOwnsObjs> { | ||
if (!py::isinstance<py::tuple>(state)) { | ||
throw std::runtime_error("Invalid state"); | ||
} | ||
auto vec = std::make_unique<VecOwnsObjs>(); | ||
if (VecOwnsObjs::get_should_raise_error_on_set_state()) { | ||
throw std::runtime_error("raise error on set_state for testing"); | ||
} | ||
for (const auto &item : state) { | ||
vec->append(py::reinterpret_borrow<py::object>(item)); | ||
} | ||
return vec; | ||
}), | ||
py::arg("state")) | ||
#endif | ||
.def("append", &VecOwnsObjs::append, py::arg("obj")) | ||
.def("is_empty", &VecOwnsObjs::is_empty) | ||
.def("__setitem__", &VecOwnsObjs::set_item, py::arg("i"), py::arg("obj")) | ||
.def("__getitem__", &VecOwnsObjs::get_item, py::arg("i")) | ||
.def("__len__", &VecOwnsObjs::size); | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
from __future__ import annotations | ||
|
||
import gc | ||
import multiprocessing | ||
import pickle | ||
import signal | ||
import sys | ||
import weakref | ||
|
||
import pytest | ||
|
||
import env # noqa: F401 | ||
import pybind11_tests | ||
from pybind11_tests import invalid_holder_access as m | ||
|
||
XFAIL_REASON = "Known issues: https://github.com/pybind/pybind11/pull/5654" | ||
|
||
|
||
try: | ||
import multiprocessing | ||
|
||
spawn_context = multiprocessing.get_context("spawn") | ||
except (ImportError, ValueError): | ||
spawn_context = None | ||
|
||
|
||
def check_segfault(target): | ||
"""Check if the target function causes a segmentation fault. | ||
|
||
The check should be done in a separate process to avoid crashing the main | ||
process with the segfault. | ||
""" | ||
process = spawn_context.Process(target=target) | ||
process.start() | ||
process.join() | ||
rc = abs(process.exitcode) | ||
if 128 < rc < 256: | ||
rc -= 128 | ||
assert rc in ( | ||
0, | ||
signal.SIGSEGV, | ||
signal.SIGABRT, | ||
0xC0000005, # STATUS_ACCESS_VIOLATION on Windows | ||
) | ||
if rc != 0: | ||
raise SystemError( | ||
"Segmentation Fault: The C++ compiler initializes container incorrectly." | ||
) | ||
|
||
|
||
def test_no_init(): | ||
with pytest.raises(TypeError, match=r"No constructor defined"): | ||
m.VecOwnsObjs() | ||
vec = m.VecOwnsObjs.__new__(m.VecOwnsObjs) | ||
with pytest.raises(TypeError, match=r"No constructor defined"): | ||
vec.__init__() | ||
|
||
|
||
def manual_new_target(): | ||
# Repeatedly trigger allocation without initialization (raw malloc'ed) to | ||
# detect uninitialized memory bugs. | ||
for _ in range(32): | ||
# The holder is a pointer variable while the C++ ctor is not called. | ||
vec = m.VecOwnsObjs.__new__(m.VecOwnsObjs) | ||
if vec.is_empty(): # <= this line could cause a segfault | ||
# The C++ compiler initializes container correctly. | ||
assert len(vec) == 0 | ||
else: | ||
# The program is not supposed to reach here. It will abort with | ||
# SIGSEGV on `vec.is_empty()`. | ||
sys.exit(signal.SIGSEGV) # manually trigger SIGSEGV if not raised | ||
|
||
|
||
# This test might succeed on some platforms with some compilers, but it is not | ||
# guaranteed to work everywhere. It is marked as non-strict xfail. | ||
@pytest.mark.xfail(reason=XFAIL_REASON, raises=SystemError, strict=False) | ||
@pytest.mark.skipif(spawn_context is None, reason="spawn context not available") | ||
@pytest.mark.skipif( | ||
sys.platform.startswith("emscripten"), | ||
reason="Requires multiprocessing", | ||
) | ||
def test_manual_new(): | ||
""" | ||
Manually calling the constructor (__new__) of a class that has C++ STL | ||
container will allocate memory without initializing it can cause a | ||
segmentation fault. | ||
""" | ||
check_segfault(manual_new_target) | ||
|
||
|
||
@pytest.mark.skipif( | ||
pybind11_tests.cpp_std_num < 14, | ||
reason="std::{unique_ptr,make_unique} not available in C++11", | ||
) | ||
def test_set_state_with_error_no_segfault_if_gc_checks_holder_has_initialized(): | ||
""" | ||
The ``tp_traverse`` and ``tp_clear`` functions should check if the holder | ||
has been initialized before traversing or clearing the C++ STL container. | ||
""" | ||
m.VecOwnsObjs.set_should_check_holder_initialization(True) | ||
|
||
vec = m.create_vector((1, 2, 3, 4)) | ||
|
||
m.VecOwnsObjs.set_should_raise_error_on_set_state(False) | ||
pickle.loads(pickle.dumps(vec)) | ||
|
||
# During the unpickling process, Python firstly allocates the object with | ||
# the `__new__` method and then calls the `__setstate__`` method to set the | ||
# state of the object. | ||
# | ||
# obj = cls.__new__(cls) | ||
# obj.__setstate__(state) | ||
# | ||
# The `__init__` method is not called during unpickling. | ||
# So, if the `__setstate__` method raises an error, the object will be in | ||
# an uninitialized state. | ||
m.VecOwnsObjs.set_should_raise_error_on_set_state(True) | ||
serialized = pickle.dumps(vec) | ||
with pytest.raises( | ||
RuntimeError, | ||
match="raise error on set_state for testing", | ||
): | ||
pickle.loads(serialized) | ||
|
||
|
||
def unpicklable_with_error_target(): | ||
m.VecOwnsObjs.set_should_check_holder_initialization(False) | ||
m.VecOwnsObjs.set_should_raise_error_on_set_state(True) | ||
|
||
vec = m.create_vector((1, 2, 3, 4)) | ||
serialized = pickle.dumps(vec) | ||
|
||
# Repeatedly trigger allocation without initialization (raw malloc'ed) to | ||
# detect uninitialized memory bugs. | ||
for _ in range(32): | ||
# During the unpickling process, Python firstly allocates the object with | ||
# the `__new__` method and then calls the `__setstate__`` method to set the | ||
# state of the object. | ||
# | ||
# obj = cls.__new__(cls) | ||
# obj.__setstate__(state) | ||
# | ||
# The `__init__` method is not called during unpickling. | ||
# So, if the `__setstate__` method raises an error, the object will be in | ||
# an uninitialized state. The GC will traverse the uninitialized C++ STL | ||
# container and cause a segmentation fault. | ||
try: # noqa: SIM105 | ||
pickle.loads(serialized) | ||
except RuntimeError: | ||
pass | ||
|
||
|
||
# This test might succeed on some platforms with some compilers, but it is not | ||
# guaranteed to work everywhere. It is marked as non-strict xfail. | ||
@pytest.mark.xfail(reason=XFAIL_REASON, raises=SystemError, strict=False) | ||
@pytest.mark.skipif(spawn_context is None, reason="spawn context not available") | ||
@pytest.mark.skipif( | ||
pybind11_tests.cpp_std_num < 14, | ||
reason="std::{unique_ptr,make_unique} not available in C++11", | ||
) | ||
def test_set_state_with_error_will_segfault_if_gc_does_not_check_holder_has_initialized(): | ||
m.VecOwnsObjs.set_should_check_holder_initialization(False) | ||
|
||
vec = m.create_vector((1, 2, 3, 4)) | ||
|
||
m.VecOwnsObjs.set_should_raise_error_on_set_state(False) | ||
pickle.loads(pickle.dumps(vec)) | ||
|
||
# See comments above. | ||
check_segfault(unpicklable_with_error_target) | ||
|
||
|
||
@pytest.mark.skipif("env.PYPY or env.GRAALPY", reason="Cannot reliably trigger GC") | ||
@pytest.mark.skipif( | ||
pybind11_tests.cpp_std_num < 14, | ||
reason="std::{unique_ptr,make_unique} not available in C++11", | ||
) | ||
def test_gc(): | ||
vec = m.create_vector(()) | ||
vec.append((vec, vec)) | ||
|
||
wr = weakref.ref(vec) | ||
assert wr() is vec | ||
del vec | ||
for _ in range(10): | ||
gc.collect() | ||
assert wr() is None |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rwgk I added a test and comments for this in the latest comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, this goes into a different direction?
Is the primary goal to fix this hole in the pickle support?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see we touched on this before:
__new__
does not initialize STL containers and resulting in undefined behavior #4549 (comment)Well, that's a choice.
That, or we need to find a mechanism to gracefully handle exceptions in
__setstate__
.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding a public API to check whether the holder is constructed can resolve my issue.
I also updated the documentation about this but accessing the
pybind11::detail
namespace.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The general idea sounds good, but I have doubts about adding it as
pybind11:: is_holder_constructed()
.To be safe enough for a public API, we'd need to guard with a check: "is this actually a pybind11 class_-wrapped type"?
Something like
PyTypeObject *srctype = Py_TYPE(src.ptr());
andall_type_info(srctype)
probably (see pybind11/detail/type_caster_base.h).But in the context of
custom_type_setup
,tp_traverse
,tp_clear
, is that actually needed?Another idea:
I see this in the
tp_traverse
andtp_clear
implementations:That could do the
holder_constructed
check internally, my guess is cheaply, although I could be wrong.We might have to change the recipe to:
And if the holder isn't constructed, this could return
nullptr
.Would that still work for your purposes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the instance is not initialized, what behavior would be expected? Should it raise an error? I don't know if it is allowed to raise exceptions in
tp_traverse
andtp_clear
. These two functions should always succeed, I think.This works for me! I think this would be the best solution to have minimal impact on the downstream and be easy to migrate.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think you could try? — I can only offer to assist, by answering question if you run into roadblocks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It crossed my mind: Possibly the suggested change could get very involved ("hairy"). In case you start to feel it's too much, I'd be OK to add
pybind11::detail:: is_holder_constructed()
as a semi-private API, without runtime guards, but a carefully written comment to explain the critical precondition: the caller is responsible for ensuring that thereinterpret_cast
is valid.