Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backend connection queue #4030

Merged
merged 13 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 125 additions & 1 deletion bin/varnishd/cache/cache_backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,21 @@

/*--------------------------------------------------------------------*/

enum connwait_e {
CW_DO_CONNECT = 1,
CW_QUEUED,
CW_DEQUEUED,
CW_BE_BUSY,
};

struct connwait {
unsigned magic;
#define CONNWAIT_MAGIC 0x75c7a52b
enum connwait_e cw_state;
VTAILQ_ENTRY(connwait) cw_list;
pthread_cond_t cw_cond;
};
dridi marked this conversation as resolved.
Show resolved Hide resolved

static const char * const vbe_proto_ident = "HTTP Backend";

static struct lock backends_mtx;
Expand Down Expand Up @@ -105,6 +120,60 @@ VBE_Connect_Error(struct VSC_vbe *vsc, int err)
dst = cache_param->tmx; \
} while (0)

#define FIND_BE_SPEC(tmx, dst, be, def) \
do { \
CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC); \
dst = be->tmx; \
if (dst == def) \
dst = cache_param->tmx; \
} while (0)

#define FIND_BE_PARAM(tmx, dst, be) \
FIND_BE_SPEC(tmx, dst, be, 0)

#define FIND_BE_TMO(tmx, dst, be) \
FIND_BE_SPEC(tmx, dst, be, -1.0)

#define BE_BUSY(be) \
(be->max_connections > 0 && be->n_conn >= be->max_connections)

/*--------------------------------------------------------------------*/

static void
vbe_connwait_signal_locked(const struct backend *bp)
{
struct connwait *cw;

Lck_AssertHeld(bp->director->mtx);

if (bp->n_conn < bp->max_connections) {
cw = VTAILQ_FIRST(&bp->cw_head);
if (cw != NULL) {
CHECK_OBJ(cw, CONNWAIT_MAGIC);
assert(cw->cw_state == CW_QUEUED);
PTOK(pthread_cond_signal(&cw->cw_cond));
}
}
}

static void
vbe_connwait_dequeue_locked(struct backend *bp, struct connwait *cw)
{
Lck_AssertHeld(bp->director->mtx);
VTAILQ_REMOVE(&bp->cw_head, cw, cw_list);
vbe_connwait_signal_locked(bp);
cw->cw_state = CW_DEQUEUED;
}

static void
vbe_connwait_fini(struct connwait *cw)
{
CHECK_OBJ_NOTNULL(cw, CONNWAIT_MAGIC);
assert(cw->cw_state != CW_QUEUED);
PTOK(pthread_cond_destroy(&cw->cw_cond));
FINI_OBJ(cw);
}

/*--------------------------------------------------------------------
* Get a connection to the backend
*
Expand All @@ -121,6 +190,10 @@ vbe_dir_getfd(VRT_CTX, struct worker *wrk, VCL_BACKEND dir, struct backend *bp,
vtim_dur tmod;
char abuf1[VTCP_ADDRBUFSIZE], abuf2[VTCP_ADDRBUFSIZE];
char pbuf1[VTCP_PORTBUFSIZE], pbuf2[VTCP_PORTBUFSIZE];
unsigned wait_limit;
walid-git marked this conversation as resolved.
Show resolved Hide resolved
vtim_dur wait_tmod;
vtim_dur wait_end;
struct connwait cw[1];

CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CHECK_OBJ_NOTNULL(ctx->bo, BUSYOBJ_MAGIC);
Expand All @@ -135,20 +208,56 @@ vbe_dir_getfd(VRT_CTX, struct worker *wrk, VCL_BACKEND dir, struct backend *bp,
VSC_C_main->backend_unhealthy++;
return (NULL);
}
INIT_OBJ(cw, CONNWAIT_MAGIC);
PTOK(pthread_cond_init(&cw->cw_cond, NULL));
Lck_Lock(bp->director->mtx);
FIND_BE_PARAM(backend_wait_limit, wait_limit, bp);
FIND_BE_TMO(backend_wait_timeout, wait_tmod, bp);
cw->cw_state = CW_DO_CONNECT;
if (!VTAILQ_EMPTY(&bp->cw_head) || BE_BUSY(bp))
cw->cw_state = CW_BE_BUSY;

if (cw->cw_state == CW_BE_BUSY && wait_limit > 0 &&
wait_tmod > 0.0 && bp->cw_count < wait_limit) {
VTAILQ_INSERT_TAIL(&bp->cw_head, cw, cw_list);
bp->cw_count++;
VSC_C_main->backend_wait++;
cw->cw_state = CW_QUEUED;
wait_end = VTIM_real() + wait_tmod;
do {
err = Lck_CondWaitUntil(&cw->cw_cond, bp->director->mtx,
wait_end);
} while (err == EINTR);
bp->cw_count--;
if (err != 0 && BE_BUSY(bp)) {
VTAILQ_REMOVE(&bp->cw_head, cw, cw_list);
VSC_C_main->backend_wait_fail++;
cw->cw_state = CW_BE_BUSY;
dridi marked this conversation as resolved.
Show resolved Hide resolved
}
}
Lck_Unlock(bp->director->mtx);

if (bp->max_connections > 0 && bp->n_conn >= bp->max_connections) {
if (cw->cw_state == CW_BE_BUSY) {
VSLb(bo->vsl, SLT_FetchError,
"backend %s: busy", VRT_BACKEND_string(dir));
bp->vsc->busy++;
VSC_C_main->backend_busy++;
vbe_connwait_fini(cw);
return (NULL);
}

AZ(bo->htc);
bo->htc = WS_Alloc(bo->ws, sizeof *bo->htc);
/* XXX: we may want to detect the ws overflow sooner */
if (bo->htc == NULL) {
VSLb(bo->vsl, SLT_FetchError, "out of workspace");
/* XXX: counter ? */
if (cw->cw_state == CW_QUEUED) {
Lck_Lock(bp->director->mtx);
vbe_connwait_dequeue_locked(bp, cw);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we not move this whole htc alloc block to the top, even before the cw init?

Reason: the ws does not change during waiting, so if it overflows, it does so right from the start.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would we allocate workspace before we are sure that we can get a backend connection ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we wouldn't be able to allocate the htc in the first place, why wait at all?

On the other hand, if we are effectively not connecting and workspace would have been too tight, then we fail for the wrong reason, and we don't visit vcl_backend_error at all.

Since the default values for the parameters make this an opt-in feature, may I suggest adding an XXX comment for now to take the proper time later to see how to best approach this? (snapshot/reset for certain paths for example)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added an XXX comment as suggested

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand, if we are effectively not connecting and workspace would have been too tight, then we fail for the wrong reason, and we don't visit vcl_backend_error at all.

I disagree on "fail for the wrong reason". This code only runs because we do want to connect and having enough workspace is a precondition for the connect to succeed. Potentially running into the connection or wait limit does not make it a "wrong reason" to fail for insufficient workspace.

Lck_Unlock(bp->director->mtx);
}
vbe_connwait_fini(cw);
return (NULL);
}
bo->htc->doclose = SC_NULL;
Expand All @@ -165,6 +274,12 @@ vbe_dir_getfd(VRT_CTX, struct worker *wrk, VCL_BACKEND dir, struct backend *bp,
VRT_BACKEND_string(dir), err, VAS_errtxt(err));
VSC_C_main->backend_fail++;
bo->htc = NULL;
if (cw->cw_state == CW_QUEUED) {
Lck_Lock(bp->director->mtx);
vbe_connwait_dequeue_locked(bp, cw);
Lck_Unlock(bp->director->mtx);
}
vbe_connwait_fini(cw);
return (NULL);
}

Expand All @@ -177,6 +292,9 @@ vbe_dir_getfd(VRT_CTX, struct worker *wrk, VCL_BACKEND dir, struct backend *bp,
bp->n_conn++;
bp->vsc->conn++;
bp->vsc->req++;
if (cw->cw_state == CW_QUEUED)
vbe_connwait_dequeue_locked(bp, cw);

Lck_Unlock(bp->director->mtx);

CHECK_OBJ_NOTNULL(bo->htc->doclose, STREAM_CLOSE_MAGIC);
Expand All @@ -198,7 +316,9 @@ vbe_dir_getfd(VRT_CTX, struct worker *wrk, VCL_BACKEND dir, struct backend *bp,
bp->n_conn--;
bp->vsc->conn--;
bp->vsc->req--;
walid-git marked this conversation as resolved.
Show resolved Hide resolved
vbe_connwait_signal_locked(bp);
Lck_Unlock(bp->director->mtx);
vbe_connwait_fini(cw);
return (NULL);
}
bo->acct.bereq_hdrbytes += err;
Expand All @@ -217,6 +337,7 @@ vbe_dir_getfd(VRT_CTX, struct worker *wrk, VCL_BACKEND dir, struct backend *bp,
bo->htc->first_byte_timeout, bo, bp);
FIND_TMO(between_bytes_timeout,
bo->htc->between_bytes_timeout, bo, bp);
vbe_connwait_fini(cw);
return (pfd);
}

Expand Down Expand Up @@ -258,6 +379,7 @@ vbe_dir_finish(VRT_CTX, VCL_BACKEND d)
bp->vsc->conn--;
#define ACCT(foo) bp->vsc->foo += bo->acct.foo;
#include "tbl/acct_fields_bereq.h"
vbe_connwait_signal_locked(bp);
Lck_Unlock(bp->director->mtx);
bo->htc = NULL;
}
Expand Down Expand Up @@ -455,6 +577,7 @@ vbe_free(struct backend *be)
#undef DN
free(be->endpoint);

assert(VTAILQ_EMPTY(&be->cw_head));
FREE_OBJ(be);
}

Expand Down Expand Up @@ -687,6 +810,7 @@ VRT_new_backend_clustered(VRT_CTX, struct vsmw_cluster *vc,
ALLOC_OBJ(be, BACKEND_MAGIC);
if (be == NULL)
return (NULL);
VTAILQ_INIT(&be->cw_head);

#define DA(x) do { if (vrt->x != NULL) REPLACE((be->x), (vrt->x)); } while (0)
#define DN(x) do { be->x = vrt->x; } while (0)
Expand Down
4 changes: 4 additions & 0 deletions bin/varnishd/cache/cache_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ struct vbp_target;
struct vrt_ctx;
struct vrt_backend_probe;
struct conn_pool;
struct connwait;

/*--------------------------------------------------------------------
* An instance of a backend from a VCL program.
Expand All @@ -69,6 +70,9 @@ struct backend {
struct conn_pool *conn_pool;

VCL_BACKEND director;

VTAILQ_HEAD(, connwait) cw_head;
unsigned cw_count;
};

/*---------------------------------------------------------------------
Expand Down
99 changes: 99 additions & 0 deletions bin/varnishtest/tests/v00072.vtc
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
varnishtest "Check backend wait limit"

#
# s1, v1, c1, c2 are the success case
# c2 will get the backend connection after c1 is finished with it.
#
barrier b1 cond 2

server s1 {
rxreq
barrier b1 sync
delay 0.2
txresp

rxreq
txresp
} -start

varnish v1 -vcl {
backend s1 {
.host = "${s1_addr}";
.port = "${s1_port}";
.max_connections = 1;
.connect_timeout = 2s;
.wait_timeout = 1s; # longer than the s1 'delay 0.2'
.wait_limit = 1;
}

sub vcl_recv {
return(pass);
}
} -start

client c1 -connect ${v1_sock} {
txreq
rxresp
expect resp.status == 200
} -start

client c2 -connect ${v1_sock} {
barrier b1 sync
txreq
rxresp
expect resp.status == 200
} -run

client c1 -wait

varnish v1 -expect backend_wait == 1
varnish v1 -expect backend_wait_fail == 0

#
# s2, v2, c3, c4 are the fail case.
# c4 will timeout before it gets the backend connection.
#
barrier b2 cond 2

server s2 {
rxreq
barrier b2 sync
delay 0.2
txresp

rxreq
txresp
} -start

varnish v2 -vcl {
backend s2 {
.host = "${s2_addr}";
.port = "${s2_port}";
.max_connections = 1;
.connect_timeout = 2s;
.wait_timeout = 100ms; # shorter than the s2 'delay 0.2'
.wait_limit = 1;
}

sub vcl_recv {
return(pass);
}
} -start

client c3 -connect ${v2_sock} {
txreq
rxresp
expect resp.status == 200
} -start

client c4 -connect ${v2_sock} {
barrier b2 sync
txreq
rxresp
expect resp.status == 503
} -run

client c3 -wait

varnish v2 -expect backend_wait == 1
varnish v2 -expect backend_wait_fail == 1
46 changes: 46 additions & 0 deletions bin/varnishtest/tests/v00073.vtc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
varnishtest "Check backend wait limit fail"

barrier b1 cond 2

server s1 {
rxreq
barrier b1 sync
delay 0.2
txresp

rxreq
txresp
} -start

varnish v1 -vcl {
backend s1 {
.host = "${s1_addr}";
.port = "${s1_port}";
.max_connections = 1;
.connect_timeout = 2s;
.wait_timeout = 100ms;
.wait_limit = 1;
}

sub vcl_recv {
return(pass);
}
} -start

client c1 -connect ${v1_sock} {
txreq
rxresp
expect resp.status == 200
} -start

client c2 -connect ${v1_sock} {
barrier b1 sync
txreq
rxresp
expect resp.status == 503
} -run

client c1 -wait

varnish v1 -expect backend_wait == 1
varnish v1 -expect backend_wait_fail == 1
Loading