Skip to content

defer largeObject.close seem to cause a race condition (break connection in a weird way - causes 'conn busy') #1908

Closed
@borod108

Description

@borod108

Describe the bug
After using "defer obj.close" in a function, other functions that get the released connection from the pool fail with 'conn busy'
There seems to be a race condition.
To Reproduce
A runnable example: (this is the smallest one I managed to create, note if "defer obj.close" is removed, everything works fine.

package main

import (
	"context"
	"fmt"
	"github.com/jackc/pgx/v4"
	"github.com/jackc/pgx/v4/pgxpool"
	"os"
	"sync"
)

func main() {
	dbURL := os.Getenv("DATABASE_URL")
	ctx := context.Background()
	ctx, _ = context.WithCancel(ctx)
	pool, err := pgxpool.Connect(ctx, dbURL)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Unable to connect to database: %v\n", err)
		os.Exit(1)
	}
	defer pool.Close()

	numGoroutines := 20

	var wg sync.WaitGroup
	for i := 0; i < numGoroutines; i++ {
		wg.Add(1)
		go func(routineId int) {
			defer wg.Done()
			if err := openLargeObject(ctx, pool, routineId); err != nil {
				fmt.Fprintf(os.Stderr, "Error in openLargeObject: %v\n", err)
				return
			}
		}(i)
	}
	wg.Wait()
	fmt.Println("All goroutines done.")
}

func openLargeObject(ctx context.Context, pool *pgxpool.Pool, routineId int) error {
	tx, err := pool.Begin(ctx)
	if err != nil {
		return fmt.Errorf("error in routine %d at - begin: %w", routineId, err)
	}
	defer func() {
		tx.Rollback(ctx)
	}()

	objects := tx.LargeObjects()
	oid, err := objects.Create(ctx, 0)
	if err != nil {
		return fmt.Errorf("error in routine %d at - create %w", routineId, err)
	}
	obj, err := objects.Open(ctx, oid, pgx.LargeObjectModeWrite)
	if err != nil {
		return fmt.Errorf("error in routine %d at - open %w", routineId, err)
	}

	//  Comment this line and all is well
	defer obj.Close()

	if err := tx.Commit(ctx); err != nil {
		return fmt.Errorf("error in routine %d at - commit %w", routineId, err)
	}

	fmt.Printf("routine %d: Completed with no error\n", routineId)
	return nil
}

Please run your example with the race detector enabled. For example, go run -race main.go or go test -race.

Expected behavior

routine 8: Completed with no error
routine 0: Completed with no error
routine 12: Completed with no error
routine 13: Completed with no error
routine 2: Completed with no error
routine 14: Completed with no error
routine 11: Completed with no error
routine 15: Completed with no error
routine 10: Completed with no error
routine 17: Completed with no error
routine 1: Completed with no error
routine 9: Completed with no error
routine 18: Completed with no error
routine 19: Completed with no error
routine 3: Completed with no error
routine 6: Completed with no error
routine 5: Completed with no error
routine 7: Completed with no error
routine 4: Completed with no error
routine 16: Completed with no error
All goroutines done.

Actual behavior

routine 0: Completed with no error
==================
WARNING: DATA RACE
Read at 0x00c00009c040 by goroutine 18:
  github.com/jackc/pgconn.(*PgConn).lock()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgconn@v1.14.0/pgconn.go:701 +0x4c
  github.com/jackc/pgconn.(*PgConn).Exec()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgconn@v1.14.0/pgconn.go:980 +0xbc
  github.com/jackc/pgx/v4.(*Conn).execSimpleProtocol()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/conn.go:459 +0xa4
  github.com/jackc/pgx/v4.(*Conn).exec()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/conn.go:429 +0x3a4
  github.com/jackc/pgx/v4.(*Conn).Exec()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/conn.go:389 +0x78
  github.com/jackc/pgx/v4.(*Conn).BeginTx()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/tx.go:86 +0x9c
  github.com/jackc/pgx/v4/pgxpool.(*Conn).BeginTx()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/pgxpool/conn.go:113 +0x104
  github.com/jackc/pgx/v4/pgxpool.(*Pool).BeginTx()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/pgxpool/pool.go:688 +0x98
  github.com/jackc/pgx/v4/pgxpool.(*Pool).Begin()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/pgxpool/pool.go:675 +0x74
  main.openLargeObject()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:42 +0x40
  main.main.func1()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:31 +0x9c
  main.main.func3()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:35 +0x44

Previous write at 0x00c00009c040 by goroutine 15:
  github.com/jackc/pgconn.(*PgConn).lock()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgconn@v1.14.0/pgconn.go:709 +0xec
  github.com/jackc/pgconn.(*PgConn).Prepare()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgconn@v1.14.0/pgconn.go:815 +0xe0
  github.com/jackc/pgconn/stmtcache.(*LRU).prepare()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgconn@v1.14.0/stmtcache/lru.go:157 +0x174
  github.com/jackc/pgconn/stmtcache.(*LRU).Get()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgconn@v1.14.0/stmtcache/lru.go:76 +0x554
  github.com/jackc/pgx/v4.(*Conn).exec()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/conn.go:433 +0x1d4
  github.com/jackc/pgx/v4.(*Conn).Exec()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/conn.go:389 +0x78
  github.com/jackc/pgx/v4.(*dbTx).Exec()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/tx.go:267 +0x70
  github.com/jackc/pgx/v4.(*LargeObject).Close()
      /Users/bod108/go/pkg/mod/github.com/jackc/pgx/v4@v4.18.1/large_objects.go:119 +0xf4
  main.openLargeObject.func2()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:61 +0x34
  runtime.deferreturn()
      /opt/homebrew/Cellar/go/1.21.6/libexec/src/runtime/panic.go:477 +0x34
  main.main.func1()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:31 +0x9c
  main.main.func3()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:35 +0x44

Goroutine 18 (running) created at:
  main.main()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:29 +0x210

Goroutine 15 (running) created at:
  main.main()
      /Users/bod108/GolandProjects/reproduceconnbusy/rep.do.go:29 +0x210
==================
Error in openLargeObject: error in routine 3 at - begin: conn busy
routine 2: Completed with no error
Error in openLargeObject: error in routine 16 at - begin: conn busy
routine 7: Completed with no error
routine 4: Completed with no error
Error in openLargeObject: error in routine 14 at - begin: conn busy
Error in openLargeObject: error in routine 15 at - begin: conn busy
routine 8: Completed with no error
Error in openLargeObject: error in routine 13 at - begin: conn busy
routine 5: Completed with no error
routine 6: Completed with no error
routine 11: Completed with no error
routine 19: Completed with no error
routine 10: Completed with no error
routine 1: Completed with no error
routine 12: Completed with no error
routine 9: Completed with no error
routine 17: Completed with no error
routine 18: Completed with no error
All goroutines done.
Found 1 data race(s)
exit status 66

Version

  • Go: $ go version -> go1.21.6 darwin/arm64

  • PostgreSQL: $ psql --no-psqlrc --tuples-only -c 'select version()' -> PostgreSQL 16.0 on aarch64-apple-darwin21.6.0, compiled by Apple clang version 14.0.0 (clang-1400.0.29.102), 64-bit

  • pgx: $ grep 'github.com/jackc/pgx/v[0-9]' go.mod -> v4.18.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions