Skip to content

Commit

Permalink
Fix reconnection hanging bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
nviennot committed Nov 6, 2019
1 parent fa49dc9 commit c71307e
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 51 deletions.
13 changes: 8 additions & 5 deletions tmate-msgpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static int on_encoder_write(void *userdata, const char *buf, size_t len)
tmate_fatal("Cannot buffer encoded data");

if (!encoder->ev_active) {
event_active(&encoder->ev_buffer, EV_READ, 0);
event_active(encoder->ev_buffer, EV_READ, 0);
encoder->ev_active = true;
}

Expand Down Expand Up @@ -57,10 +57,12 @@ void tmate_encoder_init(struct tmate_encoder *encoder,
if (!encoder->buffer)
tmate_fatal("Can't allocate buffer");

event_set(&encoder->ev_buffer, -1,
EV_READ | EV_PERSIST, on_encoder_buffer_ready, encoder);
encoder->ev_buffer = event_new(tmate_session.ev_base, -1,
EV_READ | EV_PERSIST, on_encoder_buffer_ready, encoder);
if (!encoder->ev_buffer)
tmate_fatal("Can't allocate event");

event_add(&encoder->ev_buffer, NULL);
event_add(encoder->ev_buffer, NULL);

encoder->ev_active = false;
}
Expand All @@ -69,7 +71,8 @@ void tmate_encoder_destroy(struct tmate_encoder *encoder)
{
/* encoder->pk doesn't need any cleanup */
evbuffer_free(encoder->buffer);
event_del(&encoder->ev_buffer);
event_del(encoder->ev_buffer);
event_free(encoder->ev_buffer);
memset(encoder, 0, sizeof(*encoder));
}

Expand Down
81 changes: 49 additions & 32 deletions tmate-session.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,35 +20,50 @@ struct tmate_session tmate_session;
static void lookup_and_connect(void);

static void on_dns_retry(__unused evutil_socket_t fd, __unused short what,
__unused void *arg)
void *arg)
{
struct tmate_session *session = arg;

assert(session->ev_dns_retry);
event_free(session->ev_dns_retry);
session->ev_dns_retry = NULL;

lookup_and_connect();
}

static void dns_cb(int errcode, struct evutil_addrinfo *addr, void *ptr)
{
struct evutil_addrinfo *ai;
struct timeval tv;
const char *host = ptr;

if (errcode) {
tmate_status_message("%s lookup failure. Retrying in %d seconds (%s)",
host, TMATE_DNS_RETRY_TIMEOUT,
evutil_gai_strerror(errcode));
struct tmate_session *session = &tmate_session;

tv.tv_sec = TMATE_DNS_RETRY_TIMEOUT;
tv.tv_usec = 0;
if (session->ev_dns_retry)
return;

evtimer_assign(&tmate_session.ev_dns_retry, tmate_session.ev_base,
on_dns_retry, NULL);
evtimer_add(&tmate_session.ev_dns_retry, &tv);
struct timeval tv = { .tv_sec = TMATE_DNS_RETRY_TIMEOUT, .tv_usec = 0 };

session->ev_dns_retry = evtimer_new(session->ev_base, on_dns_retry, session);
if (!session->ev_dns_retry)
tmate_fatal("out of memory");
evtimer_add(session->ev_dns_retry, &tv);

tmate_status_message("%s lookup failure. Retrying in %d seconds (%s)",
host, TMATE_DNS_RETRY_TIMEOUT,
evutil_gai_strerror(errcode));
return;
}

tmate_status_message("Connecting to %s...", host);

for (ai = addr; ai; ai = ai->ai_next) {
int i, num_clients = 0;
for (ai = addr; ai; ai = ai->ai_next)
num_clients++;

struct tmate_ssh_client *ssh_clients[num_clients];

for (ai = addr, i = 0; ai; ai = ai->ai_next, i++) {
char buf[128];
const char *ip = NULL;
if (ai->ai_family == AF_INET) {
Expand All @@ -59,32 +74,25 @@ static void dns_cb(int errcode, struct evutil_addrinfo *addr, void *ptr)
ip = evutil_inet_ntop(AF_INET6, &sin6->sin6_addr, buf, 128);
}

tmate_debug("Trying server %s", ip);

/*
* Note: We don't deal with the client list. Clients manage it
* and free client structs when necessary.
*/
(void)tmate_ssh_client_alloc(&tmate_session, ip);
ssh_clients[i] = tmate_ssh_client_alloc(&tmate_session, ip);
}

for (i = 0; i < num_clients; i++)
connect_ssh_client(ssh_clients[i]);

evutil_freeaddrinfo(addr);

/*
* XXX For some reason, freeing the DNS resolver makes MacOSX flip out...
* not sure what's going on...
* evdns_base_free(tmate_session.ev_dnsbase, 0);
* tmate_session.ev_dnsbase = NULL;
*/
evdns_base_free(tmate_session.ev_dnsbase, 0);
tmate_session.ev_dnsbase = NULL;
}

static void lookup_and_connect(void)
{
struct evutil_addrinfo hints;
const char *tmate_server_host;

if (!tmate_session.ev_dnsbase)
tmate_session.ev_dnsbase = evdns_base_new(tmate_session.ev_base, 1);
assert(!tmate_session.ev_dnsbase);
tmate_session.ev_dnsbase = evdns_base_new(tmate_session.ev_base, 1);
if (!tmate_session.ev_dnsbase)
tmate_fatal("Cannot initialize the DNS lookup service");

Expand Down Expand Up @@ -191,12 +199,18 @@ static void on_reconnect_retry(__unused evutil_socket_t fd, __unused short what,
{
struct tmate_session *session = arg;

assert(session->ev_connection_retry);
event_free(session->ev_connection_retry);
session->ev_connection_retry = NULL;

if (session->last_server_ip) {
/*
* We have a previous server ip. Let's try that again first,
* but then connect to any server if it fails again.
*/
(void)tmate_ssh_client_alloc(&tmate_session, session->last_server_ip);
struct tmate_ssh_client *c = tmate_ssh_client_alloc(session,
session->last_server_ip);
connect_ssh_client(c);
free(session->last_server_ip);
session->last_server_ip = NULL;
} else {
Expand All @@ -214,18 +228,21 @@ void tmate_reconnect_session(struct tmate_session *session, const char *message)
*/
struct timeval tv = { .tv_sec = TMATE_RECONNECT_RETRY_TIMEOUT, .tv_usec = 0 };

evtimer_assign(&session->ev_connection_retry, session->ev_base,
on_reconnect_retry, session);
evtimer_add(&session->ev_connection_retry, &tv);
if (session->ev_connection_retry)
return;

session->ev_connection_retry = evtimer_new(session->ev_base, on_reconnect_retry, session);
if (!session->ev_connection_retry)
tmate_fatal("out of memory");
evtimer_add(session->ev_connection_retry, &tv);

if (message)
if (message && !tmate_foreground)
tmate_status_message("Reconnecting... (%s)", message);
else
tmate_status_message("Reconnecting...");

/*
* This says that we'll need to send a snapshot of the current state.
* Until we have persisted logs...
*/
session->reconnected = true;
}
24 changes: 14 additions & 10 deletions tmate-ssh-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,12 @@ static void init_conn_fd(struct tmate_ssh_client *client, bool tune_socket)
if (tune_socket)
tune_socket_opts(fd);

event_set(&client->ev_ssh, fd, EV_READ | EV_PERSIST, __on_ssh_client_event, client);
event_add(&client->ev_ssh, NULL);
assert(!client->ev_ssh);
client->ev_ssh = event_new(client->tmate_session->ev_base,
fd, EV_READ | EV_PERSIST, __on_ssh_client_event, client);
if (!client->ev_ssh)
tmate_fatal("out of memory");
event_add(client->ev_ssh, NULL);

client->has_init_conn_fd = true;
}
Expand Down Expand Up @@ -479,7 +483,8 @@ static void kill_ssh_client(struct tmate_ssh_client *client,
tmate_debug("SSH client killed (%s)", client->server_ip);

if (client->has_init_conn_fd) {
event_del(&client->ev_ssh);
event_del(client->ev_ssh);
event_free(client->ev_ssh);
client->has_init_conn_fd = false;
}

Expand All @@ -506,12 +511,11 @@ static void kill_ssh_client(struct tmate_ssh_client *client,
free(client);
}

static void connect_ssh_client(struct tmate_ssh_client *client)
void connect_ssh_client(struct tmate_ssh_client *client)
{
if (!client->session) {
client->state = SSH_INIT;
on_ssh_client_event(client);
}
assert(!client->session);
client->state = SSH_INIT;
on_ssh_client_event(client);
}

static void ssh_log_function(int priority, const char *function,
Expand All @@ -522,9 +526,11 @@ static void ssh_log_function(int priority, const char *function,

struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session,
const char *server_ip)

{
struct tmate_ssh_client *client;
client = xmalloc(sizeof(*client));
memset(client, 0, sizeof(*client));

ssh_set_log_callback(ssh_log_function);

Expand All @@ -544,7 +550,5 @@ struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session,

client->has_init_conn_fd = false;

connect_ssh_client(client);

return client;
}
9 changes: 5 additions & 4 deletions tmate.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct tmate_encoder {
tmate_encoder_write_cb *ready_callback;
void *userdata;
struct evbuffer *buffer;
struct event ev_buffer;
struct event *ev_buffer;
bool ev_active;
};

Expand Down Expand Up @@ -142,10 +142,11 @@ struct tmate_ssh_client {
ssh_channel channel;

bool has_init_conn_fd;
struct event ev_ssh;
struct event *ev_ssh;
};
TAILQ_HEAD(tmate_ssh_clients, tmate_ssh_client);

extern void connect_ssh_client(struct tmate_ssh_client *client);
extern struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session,
const char *server_ip);

Expand All @@ -154,7 +155,7 @@ extern struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *ses
struct tmate_session {
struct event_base *ev_base;
struct evdns_base *ev_dnsbase;
struct event ev_dns_retry;
struct event *ev_dns_retry;

struct tmate_encoder encoder;
struct tmate_decoder decoder;
Expand All @@ -175,7 +176,7 @@ struct tmate_session {
char *passphrase;

bool reconnected;
struct event ev_connection_retry;
struct event *ev_connection_retry;
char *last_server_ip;
char *reconnection_data;
/*
Expand Down

0 comments on commit c71307e

Please sign in to comment.