-
Notifications
You must be signed in to change notification settings - Fork 43
intermittent segfault during automated tests #169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
I think what's happening here is that if the Response signals successful completion before the Instead of using the |
I tried inserting some debug logging in functions that interact with the |
While trying to investigate flatpak#169 I noticed that inputcapture was using guint instead of the correct gulong for GObject signal IDs, which could conceivably result in a signal ID being truncated and the wrong signal being disconnected (although in practice signal IDs are allocated starting from 0, so this is unlikely). The same pattern is seen in all of the other portals. Using the correct type also allows g_clear_signal_handler() to be used, for idempotent signal disconnections. Signed-off-by: Simon McVittie <[email protected]>
…ycle Each Call wraps a single GTask, which is created along with the Call and cleared when the Call is destroyed. If the Call has become invalid, that would indicate a use-after-free similar to flatpak#169, or some other memory corruption. Signed-off-by: Simon McVittie <[email protected]>
I've been given access to a cloud VM instance that was being used for Debian QA, where this crash happens much more often than on Debian's official autobuilders or on my own system. I don't know what combination of factors has caused the stars to have aligned differently (CPU speed to vCPU count ratio? CPU speed to I/O speed ratio? ...) but for whatever reason, it seems that this machine reproduces the crash after a few iterations of repeated testing. In particular, on this machine I can reproduce the failure under I added #184, #185, #186 and some extra debug: diffdiff --git a/libportal/inputcapture.c b/libportal/inputcapture.c
index 0921709..cd48a1f 100644
--- a/libportal/inputcapture.c
+++ b/libportal/inputcapture.c
@@ -233,6 +233,7 @@ typedef struct {
/* SetPointerBarrier only */
GList *barriers;
+ int in_flight;
} Call;
static void create_session (Call *call);
@@ -241,7 +242,9 @@ static void get_zones (Call *call);
static void
call_free (Call *call)
{
+ g_debug ("%s: %p", G_STRFUNC, call);
g_return_if_fail (G_IS_TASK (call->task));
+ g_return_if_fail (call->in_flight == 0);
/* CreateSesssion */
if (call->parent)
@@ -277,13 +280,24 @@ call_returned (GObject *object,
GError *error = NULL;
g_autoptr(GVariant) ret = NULL;
+ g_debug ("%s: %p", G_STRFUNC, call);
+ call->in_flight--;
+ g_debug ("%s: %p now in-flight %d", G_STRFUNC, call, call->in_flight);
+
ret = g_dbus_connection_call_finish (G_DBUS_CONNECTION (object), result, &error);
if (error)
{
+ g_debug ("%s: %p -> error %s #%d %s", G_STRFUNC, call,
+ g_quark_to_string (error->domain), error->code, error->message);
g_clear_signal_handler (&call->cancelled_id, g_task_get_cancellable (call->task));
+ g_debug ("%s: %p -> returning error", G_STRFUNC, call);
g_task_return_error (call->task, error);
call_free (call);
}
+ else
+ {
+ g_debug ("%s: %p -> success, continue", G_STRFUNC, call);
+ }
}
static gboolean
@@ -330,8 +344,10 @@ prep_call (Call *call, GDBusSignalCallback callback, GVariantBuilder *options, v
{
g_autofree char *token = NULL;
+ g_debug ("%s: %p", G_STRFUNC, call);
token = g_strdup_printf ("portal%d", g_random_int_range (0, G_MAXINT));
call->request_path = g_strconcat (REQUEST_PATH_PREFIX, call->portal->sender, "/", token, NULL);
+ g_debug ("%s: %p subscribing, already=%u", G_STRFUNC, call, call->signal_id);
call->signal_id = g_dbus_connection_signal_subscribe (call->portal->bus,
PORTAL_BUS_NAME,
REQUEST_INTERFACE,
@@ -491,6 +507,7 @@ get_zones_done (GDBusConnection *bus,
g_autoptr(GVariant) ret = NULL;
g_variant_get (parameters, "(u@a{sv})", &response, &ret);
+ g_debug ("%s: %p -> response code %d", G_STRFUNC, call, response);
if (response != 0)
g_clear_signal_handler (&call->cancelled_id, g_task_get_cancellable (call->task));
@@ -501,6 +518,7 @@ get_zones_done (GDBusConnection *bus,
guint32 zone_set;
XdpInputCaptureSession *session = call->session;
+ g_debug ("%s: %p unsubscribing", G_STRFUNC, call);
g_dbus_connection_signal_unsubscribe (call->portal->bus, call->signal_id);
call->signal_id = 0;
@@ -560,6 +578,7 @@ get_zones_done (GDBusConnection *bus,
g_variant_lookup (ret, "zones", "@a(uuii)", &zones))
{
set_zones (session, zones, zone_set);
+ g_debug ("%s: %p returning Session %p", G_STRFUNC, call, session);
g_task_return_pointer (call->task, session, g_object_unref);
}
else
@@ -575,7 +594,10 @@ get_zones_done (GDBusConnection *bus,
g_task_return_new_error (call->task, G_IO_ERROR, G_IO_ERROR_FAILED, "InputCapture GetZones() failed");
if (response != 0)
- call_free (call);
+ {
+ g_debug ("%s: %p -> returned error", G_STRFUNC, call);
+ call_free (call);
+ }
}
static void
@@ -589,6 +611,9 @@ get_zones (Call *call)
session_id = call->session ? call->session->parent_session->id : call->session_path;
prep_call (call, get_zones_done, &options, NULL);
+ g_debug ("%s: %p calling GetZones", G_STRFUNC, call);
+ call->in_flight++;
+ g_debug ("%s: %p now in-flight %d", G_STRFUNC, call, call->in_flight);
g_dbus_connection_call (call->portal->bus,
PORTAL_BUS_NAME,
PORTAL_OBJECT_PATH,
@@ -617,12 +642,14 @@ session_created (GDBusConnection *bus,
g_autoptr(GVariant) ret = NULL;
g_variant_get (parameters, "(u@a{sv})", &response, &ret);
+ g_debug ("%s: %p -> response code %d", G_STRFUNC, call, response);
if (response != 0)
g_clear_signal_handler (&call->cancelled_id, g_task_get_cancellable (call->task));
if (response == 0)
{
+ g_debug ("%s: %p unsubscribing", G_STRFUNC, call);
g_dbus_connection_signal_unsubscribe (call->portal->bus, call->signal_id);
call->signal_id = 0;
@@ -640,7 +667,10 @@ session_created (GDBusConnection *bus,
g_task_return_new_error (call->task, G_IO_ERROR, G_IO_ERROR_FAILED, "CreateSession failed");
if (response != 0)
- call_free (call);
+ {
+ g_debug ("%s: %p -> returned error", G_STRFUNC, call);
+ call_free (call);
+ }
}
static void
@@ -649,6 +679,7 @@ call_cancelled_cb (GCancellable *cancellable,
{
Call *call = data;
+ g_debug ("%s: %p", G_STRFUNC, call);
g_dbus_connection_call (call->portal->bus,
PORTAL_BUS_NAME,
call->request_path,
@@ -667,6 +698,8 @@ parent_exported (XdpParent *parent,
gpointer data)
{
Call *call = data;
+
+ g_debug ("%s: %p", G_STRFUNC, call);
call->parent_handle = g_strdup (handle);
create_session (call);
}
@@ -678,8 +711,11 @@ create_session (Call *call)
g_autofree char *session_token = NULL;
GCancellable *cancellable;
+ g_debug ("%s: %p", G_STRFUNC, call);
+
if (call->parent_handle == NULL)
{
+ g_debug ("%s: %p needs to export parent first", G_STRFUNC, call);
call->parent->parent_export (call->parent, parent_exported, call);
return;
}
@@ -694,6 +730,9 @@ create_session (Call *call)
g_variant_builder_add (&options, "{sv}", "session_handle_token", g_variant_new_string (session_token));
g_variant_builder_add (&options, "{sv}", "capabilities", g_variant_new_uint32 (call->capabilities));
+ g_debug ("%s: %p calling CreateSession", G_STRFUNC, call);
+ call->in_flight++;
+ g_debug ("%s: %p now in-flight %d", G_STRFUNC, call, call->in_flight);
g_dbus_connection_call (call->portal->bus,
PORTAL_BUS_NAME,
PORTAL_OBJECT_PATH,
@@ -737,6 +776,7 @@ xdp_portal_create_input_capture_session (XdpPortal *portal,
call = g_new0 (Call, 1);
call->portal = g_object_ref (portal);
call->task = g_task_new (portal, cancellable, callback, data);
+ g_debug ("%s: new call %p", G_STRFUNC, call);
if (parent)
call->parent = xdp_parent_copy (parent);
@@ -771,6 +811,7 @@ xdp_portal_create_input_capture_session_finish (XdpPortal *portal,
g_return_val_if_fail (g_task_is_valid (result, portal), NULL);
session = g_task_propagate_pointer (G_TASK (result), error);
+ g_debug ("%s -> %p", G_STRFUNC, session);
if (session)
return session;
@@ -952,6 +993,7 @@ set_pointer_barriers (Call *call)
GVariantBuilder barriers;
g_autoptr(GVariantType) vtype = NULL;
+ g_debug ("%s: %p", G_STRFUNC, call);
prep_call (call, set_pointer_barriers_done, &options, NULL);
vtype = g_variant_type_new ("aa{sv}");
@@ -959,6 +1001,9 @@ set_pointer_barriers (Call *call)
g_variant_builder_init (&barriers, vtype);
g_list_foreach (call->barriers, convert_barrier, &barriers);
+ g_debug ("%s: %p calling SetPointerBarriers", G_STRFUNC, call);
+ call->in_flight++;
+ g_debug ("%s: %p now in-flight %d", G_STRFUNC, call, call->in_flight);
g_dbus_connection_call (call->portal->bus,
PORTAL_BUS_NAME,
PORTAL_OBJECT_PATH,
@@ -1023,6 +1068,7 @@ xdp_input_capture_session_set_pointer_barriers (XdpInputCaptureSession *session,
call->portal = g_object_ref (portal);
call->session = g_object_ref (session);
call->task = g_task_new (session, cancellable, callback, data);
+ g_debug ("%s: new call %p", G_STRFUNC, call);
call->barriers = barriers;
set_pointer_barriers (call);
@@ -1048,6 +1094,7 @@ xdp_input_capture_session_set_pointer_barriers_finish (XdpInputCaptureSession *
g_return_val_if_fail (_xdp_input_capture_session_is_valid (session), NULL);
g_return_val_if_fail (g_task_is_valid (result, session), NULL);
+ g_debug ("%s", G_STRFUNC);
return g_task_propagate_pointer (G_TASK (result), error);
}
@@ -1065,6 +1112,7 @@ xdp_input_capture_session_enable (XdpInputCaptureSession *session)
GVariantBuilder options;
g_return_if_fail (_xdp_input_capture_session_is_valid (session));
+ g_debug ("%s", G_STRFUNC);
portal = session->parent_session->portal;
@@ -1099,6 +1147,7 @@ xdp_input_capture_session_disable (XdpInputCaptureSession *session)
GVariantBuilder options;
g_return_if_fail (_xdp_input_capture_session_is_valid (session));
+ g_debug ("%s", G_STRFUNC);
g_variant_builder_init (&options, G_VARIANT_TYPE_VARDICT);
@@ -1130,6 +1179,7 @@ release_session (XdpInputCaptureSession *session,
GVariantBuilder options;
g_return_if_fail (_xdp_input_capture_session_is_valid (session));
+ g_debug ("%s", G_STRFUNC);
g_variant_builder_init (&options, G_VARIANT_TYPE_VARDICT);
g_variant_builder_add (&options, "{sv}", "activation_id", g_variant_new_uint32 (activation_id));
@@ -1170,6 +1220,7 @@ xdp_input_capture_session_release (XdpInputCaptureSession *session,
guint activation_id)
{
g_return_if_fail (_xdp_input_capture_session_is_valid (session));
+ g_debug ("%s", G_STRFUNC);
release_session (session, activation_id, FALSE, 0, 0);
}
@@ -1190,6 +1241,7 @@ xdp_input_capture_session_release_at (XdpInputCaptureSession *session,
gdouble cursor_y_position)
{
g_return_if_fail (_xdp_input_capture_session_is_valid (session));
+ g_debug ("%s", G_STRFUNC);
release_session (session, activation_id, TRUE, cursor_x_position, cursor_y_position);
} and the resulting log indicates that my theory in #169 (comment) is likely to have been correct:
If I remove the assertion I think this is going to need some sort of reference-counting (so that the Call stays alive for as long as there is at least one GDBus method call in-flight with it as the user-data), and perhaps separating the concept of "the Call is still ongoing" from "the Call has not been freed" (so that when the Call is no longer useful, it unsubscribes from signals and generally cancels as much as possible, but it remains a valid pointer in some sort of "tombstone" state for as long as it is the user-data on at least one GDBus method call). |
While trying to investigate flatpak#169 I noticed that inputcapture was using guint instead of the correct gulong for GObject signal IDs, which could conceivably result in a signal ID being truncated and the wrong signal being disconnected (although in practice signal IDs are allocated starting from 0, so this is unlikely). The same pattern is seen in all of the other portals. Using the correct type also allows g_clear_signal_handler() to be used, for idempotent signal disconnections. Signed-off-by: Simon McVittie <[email protected]>
To avoid a use-after-free (flatpak#169), we need to make sure that there is at least one reference held to the memory used to store the Call struct for the duration of each D-Bus method call, so consistently take a new ref to the user-data of g_dbus_connection_call(), and release it in call_returned(). However, we also need to keep the Call alive for as long as it is listening to any D-Bus signals, without introducing a long-term circular reference that would make it be leaked indefinitely. To achieve this, create a temporary circular reference between the Call and the GTask, but break it via call_dispose() every time we reach a resolution to the task, whether that's success or a failure. Resolves: flatpak#169 Resolves: flatpak#190 Signed-off-by: Simon McVittie <[email protected]>
To avoid a use-after-free (flatpak#169), we need to make sure that there is at least one reference held to the memory used to store the Call struct for the duration of each D-Bus method call, so consistently take a new ref to the user-data of g_dbus_connection_call(), and release it in call_returned(). However, we also need to keep the Call alive for as long as it is listening to any D-Bus signals, without introducing a long-term circular reference that would make it be leaked indefinitely. To achieve this, create a temporary circular reference between the Call and the GTask, but break it via call_dispose() every time we reach a resolution to the task, whether that's success or a failure. Resolves: flatpak#169 Resolves: flatpak#190 Signed-off-by: Simon McVittie <[email protected]>
To avoid a use-after-free (flatpak#169), we need to make sure that there is at least one reference held to the memory used to store the Call struct for the duration of each D-Bus method call, so consistently take a new ref to the user-data of g_dbus_connection_call(), and release it in call_returned(). However, we also need to keep the Call alive for as long as it is listening to any D-Bus signals, without introducing a long-term circular reference that would make it be leaked indefinitely. To achieve this, create a temporary circular reference between the Call and the GTask, but break it via call_dispose() every time we reach a resolution to the task, whether that's success or a failure. Resolves: flatpak#169 Resolves: flatpak#190 Signed-off-by: Simon McVittie <[email protected]>
To avoid a use-after-free (#169), we need to make sure that there is at least one reference held to the memory used to store the Call struct for the duration of each D-Bus method call, so consistently take a new ref to the user-data of g_dbus_connection_call(), and release it in call_returned(). However, we also need to keep the Call alive for as long as it is listening to any D-Bus signals, without introducing a long-term circular reference that would make it be leaked indefinitely. To achieve this, create a temporary circular reference between the Call and the GTask, but break it via call_dispose() every time we reach a resolution to the task, whether that's success or a failure. Resolves: #169 Resolves: #190 Signed-off-by: Simon McVittie <[email protected]>
To avoid a use-after-free (#169), we need to make sure that there is at least one reference held to the memory used to store the Call struct for the duration of each D-Bus method call, so consistently take a new ref to the user-data of g_dbus_connection_call(), and release it in call_returned(). However, we also need to keep the Call alive for as long as it is listening to any D-Bus signals, without introducing a long-term circular reference that would make it be leaked indefinitely. To achieve this, create a temporary circular reference between the Call and the GTask, but break it via call_dispose() every time we reach a resolution to the task, whether that's success or a failure. Resolves: #169 Resolves: #190 Signed-off-by: Simon McVittie <[email protected]>
While trying to investigate #169 I noticed that inputcapture was using guint instead of the correct gulong for GObject signal IDs, which could conceivably result in a signal ID being truncated and the wrong signal being disconnected (although in practice signal IDs are allocated starting from 0, so this is unlikely). The same pattern is seen in all of the other portals. Using the correct type also allows g_clear_signal_handler() to be used, for idempotent signal disconnections. Signed-off-by: Simon McVittie <[email protected]>
To avoid a use-after-free (flatpak#169), we need to make sure that there is at least one reference held to the memory used to store the Call struct for the duration of each D-Bus method call, so consistently take a new ref to the user-data of g_dbus_connection_call(), and release it in call_returned(). However, we also need to keep the Call alive for as long as it is listening to any D-Bus signals, without introducing a long-term circular reference that would make it be leaked indefinitely. To achieve this, create a temporary circular reference between the Call and the GTask, but break it via call_dispose() every time we reach a resolution to the task, whether that's success or a failure. Resolves: flatpak#169 Resolves: flatpak#190 Signed-off-by: Simon McVittie <[email protected]>
To avoid a use-after-free (#169), we need to make sure that there is at least one reference held to the memory used to store the Call struct for the duration of each D-Bus method call, so consistently take a new ref to the user-data of g_dbus_connection_call(), and release it in call_returned(). However, we also need to keep the Call alive for as long as it is listening to any D-Bus signals, without introducing a long-term circular reference that would make it be leaked indefinitely. To achieve this, create a temporary circular reference between the Call and the GTask, but break it via call_dispose() every time we reach a resolution to the task, whether that's success or a failure. Resolves: #169 Resolves: #190 Signed-off-by: Simon McVittie <[email protected]>
Uh oh!
There was an error while loading. Please reload this page.
While trying to reproduce #166 interactively, I encountered a segfault in the tests.
To reproduce:
mkdir _build
podman run --rm -it -w $(pwd) -v $(pwd):$(pwd):ro -v $(pwd)/_build:$(pwd)/_build:rw debian:sid-slim
sed -i -e 's/Types:.*/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
apt update
apt upgrade
apt build-dep libportal
meson setup _build
meson compile -C _build
meson test -C _build --timeout-multiplier=3 --repeat=20 pytest
Backtrace:
and remaining stack frames are CPython.
It looks like a use-after-free of the
Call
: all of its pointer members point to inaccessible memory.The text was updated successfully, but these errors were encountered: