Skip to content

Commit 9710b24

Browse files
committed
resolve semaphore leak
1 parent 2890d51 commit 9710b24

File tree

2 files changed

+151
-1
lines changed

2 files changed

+151
-1
lines changed

internal/pool/pool.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,11 @@ func (p *ConnPool) queuedNewConn(ctx context.Context) (*Conn, error) {
576576
// If dial completed before timeout, try to deliver connection to other waiters
577577
if cn := w.cancel(); cn != nil {
578578
p.putIdleConn(ctx, cn)
579-
// freeTurn will be called by the dial goroutine or by the waiter who receives the connection
579+
// Free the turn since:
580+
// - Dial goroutine returned thinking delivery succeeded (tryDeliver returned true)
581+
// - Original waiter won't call Put() (they got an error, not a connection)
582+
// - Another waiter will receive this connection but won't free this turn
583+
p.freeTurn()
580584
}
581585
// If dial hasn't completed yet, freeTurn will be called by the dial goroutine
582586
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
package pool_test
2+
3+
import (
4+
"context"
5+
"net"
6+
"sync"
7+
"sync/atomic"
8+
"testing"
9+
"time"
10+
11+
"github.com/redis/go-redis/v9/internal/pool"
12+
)
13+
14+
// TestRaceConditionFreeTurn tests the race condition where:
15+
// 1. Dial completes and tryDeliver succeeds
16+
// 2. Waiter's context times out before receiving from result channel
17+
// 3. Waiter's defer retrieves connection via cancel() and delivers to another waiter
18+
// 4. Turn must be freed by the defer, not by dial goroutine or new waiter
19+
func TestRaceConditionFreeTurn(t *testing.T) {
20+
// Create a pool with PoolSize=2 to make the race easier to trigger
21+
opt := &pool.Options{
22+
Dialer: func(ctx context.Context) (net.Conn, error) {
23+
// Slow dial to allow context timeout to race with delivery
24+
time.Sleep(50 * time.Millisecond)
25+
return dummyDialer(ctx)
26+
},
27+
PoolSize: 2,
28+
MaxConcurrentDials: 2,
29+
DialTimeout: 1 * time.Second,
30+
PoolTimeout: 1 * time.Second,
31+
}
32+
33+
connPool := pool.NewConnPool(opt)
34+
defer connPool.Close()
35+
36+
// Run multiple iterations to increase chance of hitting the race
37+
for iteration := 0; iteration < 10; iteration++ {
38+
// Request 1: Will timeout quickly
39+
ctx1, cancel1 := context.WithTimeout(context.Background(), 30*time.Millisecond)
40+
defer cancel1()
41+
42+
var wg sync.WaitGroup
43+
wg.Add(2)
44+
45+
// Goroutine 1: Request with short timeout
46+
go func() {
47+
defer wg.Done()
48+
cn, err := connPool.Get(ctx1)
49+
if err == nil {
50+
// If we got a connection, put it back
51+
connPool.Put(ctx1, cn)
52+
}
53+
// Expected: context deadline exceeded
54+
}()
55+
56+
// Goroutine 2: Request with longer timeout, should receive the orphaned connection
57+
go func() {
58+
defer wg.Done()
59+
time.Sleep(20 * time.Millisecond) // Start slightly after first request
60+
ctx2, cancel2 := context.WithTimeout(context.Background(), 200*time.Millisecond)
61+
defer cancel2()
62+
63+
cn, err := connPool.Get(ctx2)
64+
if err != nil {
65+
t.Logf("Request 2 error: %v", err)
66+
return
67+
}
68+
// Got connection, put it back
69+
connPool.Put(ctx2, cn)
70+
}()
71+
72+
wg.Wait()
73+
74+
// Give some time for all operations to complete
75+
time.Sleep(100 * time.Millisecond)
76+
77+
// Check QueueLen - should be 0 (all turns freed)
78+
queueLen := connPool.QueueLen()
79+
if queueLen != 0 {
80+
t.Errorf("Iteration %d: QueueLen = %d, expected 0 (turn leak detected!)", iteration, queueLen)
81+
}
82+
}
83+
}
84+
85+
// TestRaceConditionFreeTurnStress is a stress test for the race condition
86+
func TestRaceConditionFreeTurnStress(t *testing.T) {
87+
var dialIndex atomic.Int32
88+
opt := &pool.Options{
89+
Dialer: func(ctx context.Context) (net.Conn, error) {
90+
// Variable dial time to create more race opportunities
91+
// Use atomic increment to avoid data race
92+
idx := dialIndex.Add(1)
93+
time.Sleep(time.Duration(10+idx%40) * time.Millisecond)
94+
return dummyDialer(ctx)
95+
},
96+
PoolSize: 10,
97+
MaxConcurrentDials: 10,
98+
DialTimeout: 1 * time.Second,
99+
PoolTimeout: 500 * time.Millisecond,
100+
}
101+
102+
connPool := pool.NewConnPool(opt)
103+
defer connPool.Close()
104+
105+
const numRequests = 50
106+
107+
var wg sync.WaitGroup
108+
wg.Add(numRequests)
109+
110+
// Launch many concurrent requests with varying timeouts
111+
for i := 0; i < numRequests; i++ {
112+
go func(i int) {
113+
defer wg.Done()
114+
115+
// Varying timeouts to create race conditions
116+
timeout := time.Duration(20+i%80) * time.Millisecond
117+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
118+
defer cancel()
119+
120+
cn, err := connPool.Get(ctx)
121+
if err == nil {
122+
// Simulate some work
123+
time.Sleep(time.Duration(i%20) * time.Millisecond)
124+
connPool.Put(ctx, cn)
125+
}
126+
}(i)
127+
}
128+
129+
wg.Wait()
130+
131+
// Give time for all cleanup to complete
132+
time.Sleep(200 * time.Millisecond)
133+
134+
// Check for turn leaks
135+
queueLen := connPool.QueueLen()
136+
if queueLen != 0 {
137+
t.Errorf("QueueLen = %d, expected 0 (turn leak detected!)", queueLen)
138+
t.Errorf("This indicates that some turns were never freed")
139+
}
140+
141+
// Also check pool stats
142+
stats := connPool.Stats()
143+
t.Logf("Pool stats: Hits=%d, Misses=%d, Timeouts=%d, TotalConns=%d, IdleConns=%d",
144+
stats.Hits, stats.Misses, stats.Timeouts, stats.TotalConns, stats.IdleConns)
145+
}
146+

0 commit comments

Comments
 (0)