Skip to content

Commit d427703

Browse files
authored
Attempt to fix a flaky coroutine-dump-verifying test (#4589)
Fixes #4418 (unless it keeps happening) This problem couldn't be reproduced locally, to this fix is purely analytical. The problematic test attempts to launch a coroutine then await until the coroutine suspends. The way it was doing that before the change is: - Hold a monitor and `wait` on the test body side; - Acquire a monitor and `notify` on the coroutine side *right before* the suspension point; - On the test body side, wait for the coroutine thread to enter the `TIMED_WAIT` state, indicating that its scheduler worker has finished its piece of work and now waits for new commands, which must mean the suspension point was reached. The problem is that thread states are not synchronization primitives, and no happens-before is established between the code a thread executes before the state change and the code right after the state change is observed. With this change, we establish a complete happens-before chain: - The test body wakes up after it's `resume`d as a coroutine. - `complete` on a latch happens-before the `resume`. - The suspension happens-before the `complete`, as suspension and the `complete` are done in the same thread. With no way to verify the fix, it's unclear if that was the problem, so we can only hope the change helps.
1 parent 2553230 commit d427703

File tree

1 file changed

+25
-15
lines changed

1 file changed

+25
-15
lines changed

kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,27 @@ package kotlinx.coroutines.debug
22

33
import kotlinx.coroutines.testing.*
44
import kotlinx.coroutines.*
5-
import org.junit.*
6-
import org.junit.Test
7-
import kotlin.coroutines.*
85
import kotlin.test.*
96

107
class CoroutinesDumpTest : DebugTestBase() {
118
private val monitor = Any()
129
private var coroutineThread: Thread? = null // guarded by monitor
1310

14-
@Before
11+
@BeforeTest
1512
override fun setUp() {
1613
super.setUp()
1714
DebugProbes.enableCreationStackTraces = true
1815
}
1916

2017
@Test
2118
fun testSuspendedCoroutine() = runBlocking {
22-
val deferred = async(Dispatchers.Default) {
23-
sleepingOuterMethod()
19+
val latch = CompletableDeferred<Unit>()
20+
val singleThreadedDispatcher = newSingleThreadContext("TestCoroutineThread")
21+
val deferred = async(singleThreadedDispatcher) {
22+
sleepingOuterMethod(singleThreadedDispatcher, latch)
2423
}
2524

26-
awaitCoroutine()
25+
latch.await()
2726
val found = DebugProbes.dumpCoroutinesInfo().single { it.job === deferred }
2827
verifyDump(
2928
"Coroutine \"coroutine#1\":DeferredCoroutine{Active}@1e4a7dd4, state: SUSPENDED\n" +
@@ -35,6 +34,7 @@ class CoroutinesDumpTest : DebugTestBase() {
3534
"\tat kotlinx.coroutines.CoroutineStart.invoke(CoroutineStart.kt)\n",
3635
ignoredCoroutine = "BlockingCoroutine"
3736
) {
37+
singleThreadedDispatcher.close()
3838
deferred.cancel()
3939
coroutineThread!!.interrupt()
4040
}
@@ -181,29 +181,39 @@ class CoroutinesDumpTest : DebugTestBase() {
181181
private suspend fun nestedActiveMethod(shouldSuspend: Boolean) {
182182
if (shouldSuspend) yield()
183183
notifyCoroutineStarted()
184-
while (coroutineContext[Job]!!.isActive) {
184+
while (currentCoroutineContext()[Job]!!.isActive) {
185185
try {
186186
Thread.sleep(60_000)
187187
} catch (_ : InterruptedException) {
188188
}
189189
}
190190
}
191191

192-
private suspend fun sleepingOuterMethod() {
193-
sleepingNestedMethod()
194-
yield() // TCE
192+
private suspend fun sleepingOuterMethod(currentDispatcher: CoroutineDispatcher, latch: CompletableDeferred<Unit>) {
193+
sleepingNestedMethod(currentDispatcher, latch)
194+
yield() // TCE: make sure `sleepingOuterMethod` is contained in the continuation of `sleepingNestedMethod`
195195
}
196196

197-
private suspend fun sleepingNestedMethod() {
198-
yield() // Suspension point
199-
notifyCoroutineStarted()
197+
private suspend fun sleepingNestedMethod(currentDispatcher: CoroutineDispatcher, latch: CompletableDeferred<Unit>) {
198+
/* Schedule a computation on the current single-threaded dispatcher.
199+
Since that thread is currently running this code,
200+
the start notification will happen *after* the currently running coroutine suspends. */
201+
currentDispatcher.dispatch(currentDispatcher) {
202+
coroutineThread = Thread.currentThread()
203+
latch.complete(Unit)
204+
}
200205
delay(Long.MAX_VALUE)
206+
yield() // TCE: make sure `sleepingNestedMethod` is contained in the continuation of `delay`
201207
}
202208

203209
private fun awaitCoroutine() = synchronized(monitor) {
204210
while (coroutineThread == null) (monitor as Object).wait()
205211
while (coroutineThread!!.state != Thread.State.TIMED_WAITING) {
206-
// Wait until thread sleeps to have a consistent stacktrace
212+
/* Wait until the thread we're awaiting goes to sleep.
213+
Note: this does not establish a happens-before relationship with the thread entering `Thread.sleep`.
214+
There still doesn't seem to be any guarantee that the stacktrace will be accurately updated.
215+
<https://docs.oracle.com/javase/8/docs/api/java/lang/Thread.html#getStackTrace--> says that how accurate
216+
a stacktrace is is implementation-dependent. */
207217
}
208218
}
209219

0 commit comments

Comments
 (0)