From 4f6e62d50c434509a7c42e1080ad0c6d6795c63d Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Thu, 24 Dec 2020 02:37:42 -0800 Subject: [PATCH] Avoid unsynchronized access to scheduledTaskQueue in GlobalEventExecutor (#10890) Motivation: A race detector discovered a data race in GlobalEventExecutor present in netty 4.1.51.Final: ``` Write of size 4 at 0x0000cea08774 by thread T103: #0 io.netty.util.internal.DefaultPriorityQueue.poll()Lio/netty/util/internal/PriorityQueueNode; DefaultPriorityQueue.java:113 #1 io.netty.util.internal.DefaultPriorityQueue.poll()Ljava/lang/Object; DefaultPriorityQueue.java:31 #2 java.util.AbstractQueue.remove()Ljava/lang/Object; AbstractQueue.java:113 #3 io.netty.util.concurrent.AbstractScheduledEventExecutor.pollScheduledTask(J)Ljava/lang/Runnable; AbstractScheduledEventExecutor.java:133 #4 io.netty.util.concurrent.GlobalEventExecutor.fetchFromScheduledTaskQueue()V GlobalEventExecutor.java:119 #5 io.netty.util.concurrent.GlobalEventExecutor.takeTask()Ljava/lang/Runnable; GlobalEventExecutor.java:106 #6 io.netty.util.concurrent.GlobalEventExecutor$TaskRunner.run()V GlobalEventExecutor.java:240 #7 io.netty.util.internal.ThreadExecutorMap$2.run()V ThreadExecutorMap.java:74 #8 io.netty.util.concurrent.FastThreadLocalRunnable.run()V FastThreadLocalRunnable.java:30 #9 java.lang.Thread.run()V Thread.java:835 #10 (Generated Stub) Previous read of size 4 at 0x0000cea08774 by thread T110: #0 io.netty.util.internal.DefaultPriorityQueue.size()I DefaultPriorityQueue.java:46 #1 io.netty.util.concurrent.GlobalEventExecutor$TaskRunner.run()V GlobalEventExecutor.java:263 #2 io.netty.util.internal.ThreadExecutorMap$2.run()V ThreadExecutorMap.java:74 #3 io.netty.util.concurrent.FastThreadLocalRunnable.run()V FastThreadLocalRunnable.java:30 #4 java.lang.Thread.run()V Thread.java:835 #5 (Generated Stub) ``` The race is legit, but benign. To trigger it requires a TaskRunner to begin exiting and set 'started' to false, more work to be scheduled which starts a new TaskRunner, that work then needs to schedule additional work which modifies 'scheduledTaskQueue', and then the original TaskRunner checks 'scheduledTaskQueue'. But there is no danger to this race as it can only produce a false negative in the condition which causes the code to CAS 'started' which is thread-safe. Modifications: Delete problematic references to scheduledTaskQueue. The only way scheduledTaskQueue could be modified since the last check is if another TaskRunner is running, in which case the current TaskRunner doesn't care. Result: Data-race free code, and a bit less code to boot. --- .../java/io/netty/util/concurrent/GlobalEventExecutor.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/src/main/java/io/netty/util/concurrent/GlobalEventExecutor.java b/common/src/main/java/io/netty/util/concurrent/GlobalEventExecutor.java index 40f69c4a93bb..b3a5f39347df 100644 --- a/common/src/main/java/io/netty/util/concurrent/GlobalEventExecutor.java +++ b/common/src/main/java/io/netty/util/concurrent/GlobalEventExecutor.java @@ -260,7 +260,9 @@ public void run() { assert stopped; // Check if there are pending entries added by execute() or schedule*() while we do CAS above. - if (taskQueue.isEmpty() && (scheduledTaskQueue == null || scheduledTaskQueue.size() == 1)) { + // Do not check scheduledTaskQueue because it is not thread-safe and can only be mutated from a + // TaskRunner actively running tasks. + if (taskQueue.isEmpty()) { // A) No new task was added and thus there's nothing to handle // -> safe to terminate because there's nothing left to do // B) A new thread started and handled all the new tasks.