Skip to content

Commit

Permalink
watchfrr: Add WATCHFRR_ERR_XXX for zlog_err to zlog_ferr
Browse files Browse the repository at this point in the history
Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
  • Loading branch information
donaldsharp authored and qlyoung committed Aug 14, 2018
1 parent 0f41a26 commit b647dc2
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 32 deletions.
2 changes: 2 additions & 0 deletions watchfrr/subdir.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ endif

noinst_HEADERS += \
watchfrr/watchfrr.h \
watchfrr/watchfrr_errors.h \
# end

watchfrr_watchfrr_LDADD = lib/libfrr.la @LIBCAP@
watchfrr_watchfrr_SOURCES = \
watchfrr/watchfrr.c \
watchfrr/watchfrr_errors.c \
watchfrr/watchfrr_vty.c \
# end
78 changes: 46 additions & 32 deletions watchfrr/watchfrr.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "command.h"
#include "memory_vty.h"
#include "libfrr.h"
#include "lib_errors.h"

#include <getopt.h>
#include <sys/un.h>
Expand All @@ -35,6 +36,7 @@
#include <systemd.h>

#include "watchfrr.h"
#include "watchfrr_errors.h"

#ifndef MIN
#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
Expand Down Expand Up @@ -247,8 +249,9 @@ static pid_t run_background(char *shell_cmd)

switch (child = fork()) {
case -1:
zlog_err("fork failed, cannot run command [%s]: %s", shell_cmd,
safe_strerror(errno));
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"fork failed, cannot run command [%s]: %s", shell_cmd,
safe_strerror(errno));
return -1;
case 0:
/* Child process. */
Expand All @@ -262,14 +265,16 @@ static pid_t run_background(char *shell_cmd)
char dashc[] = "-c";
char *const argv[4] = {shell, dashc, shell_cmd, NULL};
execv("/bin/sh", argv);
zlog_err("execv(/bin/sh -c '%s') failed: %s", shell_cmd,
safe_strerror(errno));
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"execv(/bin/sh -c '%s') failed: %s", shell_cmd,
safe_strerror(errno));
_exit(127);
}
default:
/* Parent process: we will reap the child later. */
zlog_err("Forked background command [pid %d]: %s", (int)child,
shell_cmd);
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"Forked background command [pid %d]: %s", (int)child,
shell_cmd);
return child;
}
}
Expand Down Expand Up @@ -326,7 +331,8 @@ static void sigchild(void)

switch (child = waitpid(-1, &status, WNOHANG)) {
case -1:
zlog_err("waitpid failed: %s", safe_strerror(errno));
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"waitpid failed: %s", safe_strerror(errno));
return;
case 0:
zlog_warn("SIGCHLD received, but waitpid did not reap a child");
Expand All @@ -349,9 +355,9 @@ static void sigchild(void)
* completed. */
gettimeofday(&restart->time, NULL);
} else {
zlog_err(
"waitpid returned status for an unknown child process %d",
(int)child);
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"waitpid returned status for an unknown child process %d",
(int)child);
name = "(unknown)";
what = "background";
}
Expand All @@ -370,8 +376,9 @@ static void sigchild(void)
zlog_debug("%s %s process %d exited normally", what,
name, (int)child);
} else
zlog_err("cannot interpret %s %s process %d wait status 0x%x",
what, name, (int)child, status);
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"cannot interpret %s %s process %d wait status 0x%x",
what, name, (int)child, status);
phase_check();
}

Expand Down Expand Up @@ -481,8 +488,9 @@ static int wakeup_init(struct thread *t_wakeup)
dmn->t_wakeup = NULL;
if (try_connect(dmn) < 0) {
SET_WAKEUP_DOWN(dmn);
zlog_err("%s state -> down : initial connection attempt failed",
dmn->name);
zlog_ferr(WATCHFRR_ERR_CONNECTION,
"%s state -> down : initial connection attempt failed",
dmn->name);
dmn->state = DAEMON_DOWN;
}
return 0;
Expand All @@ -491,7 +499,8 @@ static int wakeup_init(struct thread *t_wakeup)
static void daemon_down(struct daemon *dmn, const char *why)
{
if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
zlog_err("%s state -> down : %s", dmn->name, why);
zlog_ferr(WATCHFRR_ERR_CONNECTION,
"%s state -> down : %s", dmn->name, why);
else if (gs.loglevel > LOG_DEBUG)
zlog_debug("%s still down : %s", dmn->name, why);
if (IS_UP(dmn))
Expand Down Expand Up @@ -684,21 +693,24 @@ static int try_connect(struct daemon *dmn)
of creating a socket. */
if (access(addr.sun_path, W_OK) < 0) {
if (errno != ENOENT)
zlog_err("%s: access to socket %s denied: %s",
dmn->name, addr.sun_path,
safe_strerror(errno));
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"%s: access to socket %s denied: %s",
dmn->name, addr.sun_path,
safe_strerror(errno));
return -1;
}

if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
zlog_err("%s(%s): cannot make socket: %s", __func__,
addr.sun_path, safe_strerror(errno));
zlog_ferr(LIB_ERR_SOCKET,
"%s(%s): cannot make socket: %s", __func__,
addr.sun_path, safe_strerror(errno));
return -1;
}

if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", __func__,
addr.sun_path, sock);
zlog_ferr(LIB_ERR_SYSTEM_CALL,
"%s(%s): set_nonblocking/cloexec(%d) failed",
__func__, addr.sun_path, sock);
close(sock);
return -1;
}
Expand Down Expand Up @@ -735,8 +747,9 @@ static int try_connect(struct daemon *dmn)
static int phase_hanging(struct thread *t_hanging)
{
gs.t_phase_hanging = NULL;
zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
phase_str[gs.phase], PHASE_TIMEOUT);
zlog_ferr(WATCHFRR_ERR_CONNECTION,
"Phase [%s] hanging for %ld seconds, aborting phased restart",
phase_str[gs.phase], PHASE_TIMEOUT);
gs.phase = PHASE_NONE;
return 0;
}
Expand Down Expand Up @@ -850,10 +863,10 @@ static int wakeup_unresponsive(struct thread *t_wakeup)

dmn->t_wakeup = NULL;
if (dmn->state != DAEMON_UNRESPONSIVE)
zlog_err(
"%s: no longer unresponsive (now %s), "
"wakeup should have been cancelled!",
dmn->name, state_str[dmn->state]);
zlog_ferr(WATCHFRR_ERR_CONNECTION,
"%s: no longer unresponsive (now %s), "
"wakeup should have been cancelled!",
dmn->name, state_str[dmn->state]);
else {
SET_WAKEUP_UNRESPONSIVE(dmn);
try_restart(dmn);
Expand All @@ -867,10 +880,10 @@ static int wakeup_no_answer(struct thread *t_wakeup)

dmn->t_wakeup = NULL;
dmn->state = DAEMON_UNRESPONSIVE;
zlog_err(
"%s state -> unresponsive : no response yet to ping "
"sent %ld seconds ago",
dmn->name, gs.timeout);
zlog_ferr(WATCHFRR_ERR_CONNECTION,
"%s state -> unresponsive : no response yet to ping "
"sent %ld seconds ago",
dmn->name, gs.timeout);
SET_WAKEUP_UNRESPONSIVE(dmn);
try_restart(dmn);
return 0;
Expand Down Expand Up @@ -1149,6 +1162,7 @@ int main(int argc, char **argv)
gs.restart.interval = gs.min_restart_interval;

master = frr_init();
watchfrr_error_init();

zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
if (watchfrr_di.daemon_mode) {
Expand Down
42 changes: 42 additions & 0 deletions watchfrr/watchfrr_errors.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* watchfrr_errors - code for error messages that may occur in the
* watchfrr process
* Copyright (C) 2018 Cumulus Networks, Inc.
* Donald Sharp
*
* FRR is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* FRR is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <zebra.h>

#include "watchfrr_errors.h"

static struct ferr_ref ferr_watchfrr_err[] = {
{
.code = WATCHFRR_ERR_CONNECTION,
.title = "WATCHFRR Connection Error",
.description = "WATCHFRR has detected a connectivity issue with one of the FRR daemons",
.suggestion = "Ensure that FRR is still running and if not please open an Issue"
},
{
.code = END_FERR,
}
};

void watchfrr_error_init(void)
{
ferr_ref_init();

ferr_ref_add(ferr_watchfrr_err);
}
33 changes: 33 additions & 0 deletions watchfrr/watchfrr_errors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* watchfrr_errors - header for error messages that may occur in the
* watchfrr process
* Copyright (C) 2018 Cumulus Networks, Inc.
* Donald Sharp
*
* FRR is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* FRR is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __WATCHFRR_ERRORS_H__
#define __WATCHFRR_ERRORS_H__

#include "lib/ferr.h"
#include "watchfrr_errors.h"

enum watchfrr_ferr_refs {
WATCHFRR_ERR_CONNECTION = WATCHFRR_FERR_START,
};

extern void watchfrr_error_init(void);

#endif

0 comments on commit b647dc2

Please sign in to comment.