Skip to content

Resolve Issue#29 #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 12, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Resolve issue#29: collector hanging after backend exit/error.
  • Loading branch information
ololobus committed Dec 15, 2020
commit 085b3c510d24eb83698bdb503da70801f41ce374
79 changes: 56 additions & 23 deletions pg_wait_sampling.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ shm_mq *collector_mq = NULL;
uint64 *proc_queryids = NULL;
CollectorShmqHeader *collector_hdr = NULL;

/* Receiver (backend) local shm_mq pointers and lock */
shm_mq *recv_mq = NULL;
shm_mq_handle *recv_mqh = NULL;
LOCKTAG queueTag;

static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
static PGPROC * search_proc(int backendPid);
static PlannedStmt *pgws_planner_hook(Query *parse,
Expand Down Expand Up @@ -290,6 +295,14 @@ check_shmem(void)
}
}

static void
pgws_cleanup_callback(int code, Datum arg)
{
elog(DEBUG3, "pg_wait_sampling cleanup: detaching shm_mq and releasing queue lock");
shm_mq_detach_compat(recv_mqh, recv_mq);
LockRelease(&queueTag, ExclusiveLock, false);
}

/*
* Module load callback
*/
Expand Down Expand Up @@ -499,16 +512,14 @@ init_lock_tag(LOCKTAG *tag, uint32 lock)
static void *
receive_array(SHMRequest request, Size item_size, Size *count)
{
LOCKTAG queueTag;
LOCKTAG collectorTag;
shm_mq *mq;
shm_mq_handle *mqh;
shm_mq_result res;
Size len,
i;
void *data;
Pointer result,
ptr;
MemoryContext oldctx;

/* Ensure nobody else trying to send request to queue */
init_lock_tag(&queueTag, PGWS_QUEUE_LOCK);
Expand All @@ -519,7 +530,7 @@ receive_array(SHMRequest request, Size item_size, Size *count)
LockAcquire(&collectorTag, ExclusiveLock, false, false);
LockRelease(&collectorTag, ExclusiveLock, false);

mq = shm_mq_create(collector_mq, COLLECTOR_QUEUE_SIZE);
recv_mq = shm_mq_create(collector_mq, COLLECTOR_QUEUE_SIZE);
collector_hdr->request = request;

if (!collector_hdr->latch)
Expand All @@ -528,33 +539,55 @@ receive_array(SHMRequest request, Size item_size, Size *count)

SetLatch(collector_hdr->latch);

shm_mq_set_receiver(mq, MyProc);
mqh = shm_mq_attach(mq, NULL, NULL);
shm_mq_set_receiver(recv_mq, MyProc);

res = shm_mq_receive(mqh, &len, &data, false);
if (res != SHM_MQ_SUCCESS || len != sizeof(*count))
{
shm_mq_detach_compat(mqh, mq);
elog(ERROR, "Error reading mq.");
}
memcpy(count, data, sizeof(*count));

result = palloc(item_size * (*count));
ptr = result;
/*
* We switch to TopMemoryContext, so that recv_mqh is allocated there
* and is guaranteed to survive until before_shmem_exit callbacks are
* fired. Anyway, shm_mq_detach() will free handler on its own.
*/
oldctx = MemoryContextSwitchTo(TopMemoryContext);
recv_mqh = shm_mq_attach(recv_mq, NULL, NULL);
MemoryContextSwitchTo(oldctx);

for (i = 0; i < *count; i++)
/*
* Now we surely attached to the shm_mq and got collector's attention.
* If anything went wrong (e.g. Ctrl+C received from the client) we have
* to cleanup some things, i.e. detach from the shm_mq, so collector was
* able to continue responding to other requests.
*
* PG_ENSURE_ERROR_CLEANUP() guaranties that cleanup callback will be
* fired for both ERROR and FATAL.
*/
PG_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);
{
res = shm_mq_receive(mqh, &len, &data, false);
if (res != SHM_MQ_SUCCESS || len != item_size)
res = shm_mq_receive(recv_mqh, &len, &data, false);
if (res != SHM_MQ_SUCCESS || len != sizeof(*count))
{
shm_mq_detach_compat(mqh, mq);
shm_mq_detach_compat(recv_mqh, recv_mq);
elog(ERROR, "Error reading mq.");
}
memcpy(ptr, data, item_size);
ptr += item_size;
memcpy(count, data, sizeof(*count));

result = palloc(item_size * (*count));
ptr = result;

for (i = 0; i < *count; i++)
{
res = shm_mq_receive(recv_mqh, &len, &data, false);
if (res != SHM_MQ_SUCCESS || len != item_size)
{
shm_mq_detach_compat(recv_mqh, recv_mq);
elog(ERROR, "Error reading mq.");
}
memcpy(ptr, data, item_size);
ptr += item_size;
}
}
PG_END_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);

shm_mq_detach_compat(mqh, mq);
/* We still have to detach and release lock during normal operation. */
shm_mq_detach_compat(recv_mqh, recv_mq);

LockRelease(&queueTag, ExclusiveLock, false);

Expand Down