Skip to content

Commit

Permalink
errorlib: avoid rush of exception notifications
Browse files Browse the repository at this point in the history
* Notification emails for identical exceptions are now sent with an
  exponentially large interval (the first exception is sent, then
  the second, then the fourth, then the eighth...).
  (closes #59)
  • Loading branch information
kaplun authored and tiborsimko committed Dec 3, 2010
1 parent 687c496 commit 84f6dc4
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 6 deletions.
11 changes: 11 additions & 0 deletions config/invenio.conf
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ CFG_SITE_EMERGENCY_PHONE_NUMBERS =
## receive all captured exceptions.
CFG_SITE_ADMIN_EMAIL_EXCEPTIONS = 1

## CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER -- set this to
## the number of seconds after which to reset the exception notification
## counter. A given repetitive exception is notified via email with a
## logarithmic strategy: the first time it is seen it is sent via email,
## then the second time, then the fourth, then the eighth and so forth.
## If the number of seconds elapsed since the last time it was notified
## is greater than CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER
## then the internal counter is reset in order not to have exception
## notification become more and more rare.
CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER = 14400

## CFG_CERN_SITE -- do we want to enable CERN-specific code?
## Put "1" for "yes" and "0" for "no".
CFG_CERN_SITE = 0
Expand Down
62 changes: 57 additions & 5 deletions modules/miscutil/lib/errorlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,21 @@
import os
import sys
import time
import datetime
import re
from cStringIO import StringIO

from invenio.config import CFG_SITE_LANG, CFG_LOGDIR, \
CFG_WEBALERT_ALERT_ENGINE_EMAIL, CFG_SITE_ADMIN_EMAIL, \
CFG_SITE_SUPPORT_EMAIL, CFG_SITE_NAME, CFG_SITE_URL, CFG_VERSION, \
CFG_CERN_SITE, CFG_SITE_EMERGENCY_PHONE_NUMBERS, \
CFG_SITE_ADMIN_EMAIL_EXCEPTIONS
CFG_SITE_ADMIN_EMAIL_EXCEPTIONS, \
CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER
from invenio.miscutil_config import CFG_MISCUTIL_ERROR_MESSAGES
from invenio.urlutils import wash_url_argument
from invenio.messages import wash_language, gettext_set_language
from invenio.dateutils import convert_datestruct_to_datetext
from invenio.dbquery import run_sql

## Regular expression to match possible password related variable that should
## be disclosed in frame analysis.
Expand Down Expand Up @@ -265,6 +268,52 @@ def get_pretty_traceback(req=None, exc_info=None, force_stack=True):
else:
return ""

def _is_pow_of_2(n):
"""
Return True if n is a power of 2
"""
while n > 1:
if n % 2:
return False
n = n / 2
return True

def exception_should_be_notified(name, filename, line):
"""
Return True if the exception should be notified to the admin.
This actually depends on several considerations, e.g. wethever
it has passed some since the last time this exception has been notified.
"""
try:
exc_log = run_sql("SELECT id,last_notified,counter,total FROM hstEXCEPTION WHERE name=%s AND filename=%s AND line=%s", (name, filename, line))
if exc_log:
exc_id, last_notified, counter, total = exc_log[0]
delta = datetime.datetime.now() - last_notified
counter += 1
total += 1
if (delta.seconds + delta.days * 86400) >= CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER:
run_sql("UPDATE hstEXCEPTION SET last_seen=NOW(), last_notified=NOW(), counter=1, total=%s WHERE id=%s", (total, exc_id))
return True
else:
run_sql("UPDATE hstEXCEPTION SET last_seen=NOW(), counter=%s, total=%s WHERE id=%s", (counter, total, exc_id))
return _is_pow_of_2(counter)
else:
run_sql("INSERT INTO hstEXCEPTION(name, filename, line, last_seen, last_notified, counter, total) VALUES(%s, %s, %s, NOW(), NOW(), 1, 1)", (name, filename, line))
return True
except:
raise
return True

def get_pretty_notification_info(name, filename, line):
"""
Return a sentence describing when this exception was already seen.
"""
exc_log = run_sql("SELECT last_notified,last_seen,total FROM hstEXCEPTION WHERE name=%s AND filename=%s AND line=%s", (name, filename, line))
if exc_log:
last_notified, last_seen, total = exc_log[0]
return "This exception has already been seen %s times\n last time it was seen: %s\n last time it was notified: %s\n" % (total, last_seen.strftime("%Y-%m-%d %H:%M:%S"), last_notified.strftime("%Y-%m-%d %H:%M:%S"))
else:
return "It is the first time this exception has been seen.\n"

def register_exception(force_stack=False,
stream='error',
Expand Down Expand Up @@ -308,6 +357,7 @@ def register_exception(force_stack=False,
try:
## Let's extract exception information
exc_info = sys.exc_info()
exc_name = exc_info[0].__name__
output = get_pretty_traceback(
req=req, exc_info=exc_info, force_stack=force_stack)
if output:
Expand Down Expand Up @@ -350,16 +400,18 @@ def register_exception(force_stack=False,
written_to_log = True
except:
written_to_log = False
filename, line_no, function_name = _get_filename_and_line(exc_info)

if CFG_SITE_ADMIN_EMAIL_EXCEPTIONS > 1 or \
(alert_admin and CFG_SITE_ADMIN_EMAIL_EXCEPTIONS > 0) or \
not written_to_log:
## let's log the exception and see whether we should report it.
if exception_should_be_notified(exc_name, filename, line_no) and (CFG_SITE_ADMIN_EMAIL_EXCEPTIONS > 1 or
(alert_admin and CFG_SITE_ADMIN_EMAIL_EXCEPTIONS > 0) or
not written_to_log):
## If requested or if it's impossible to write in the log
from invenio.mailutils import send_email
if not subject:
filename, line_no, function_name = _get_filename_and_line(exc_info)
subject = 'Exception (%s:%s:%s)' % (filename, line_no, function_name)
subject = '%s at %s' % (subject, CFG_SITE_URL)
email_text = "\n%s\n%s" % (get_pretty_notification_info(exc_name, filename, line_no), email_text)
if not written_to_log:
email_text += """\
Note that this email was sent to you because it has been impossible to log
Expand Down
3 changes: 2 additions & 1 deletion modules/miscutil/sql/tabbibclean.sql
Original file line number Diff line number Diff line change
Expand Up @@ -306,4 +306,5 @@ TRUNCATE sbmPUBLICATIONCOMM;
TRUNCATE sbmPUBLICATIONDATA;
TRUNCATE hstRECORD;
TRUNCATE hstDOCUMENT;
TRUNCATE bibHOLDINGPEN;
TRUNCATE bibHOLDINGPEN;
TRUNCATE hstEXCEPTION;
20 changes: 20 additions & 0 deletions modules/miscutil/sql/tabcreate.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3756,5 +3756,25 @@ CREATE TABLE IF NOT EXISTS swrCLIENTDATA (
PRIMARY KEY (id)
) TYPE=MyISAM;

-- tables for exception management

-- This table is used to log exceptions
-- to discover the full details of an exception either check the email
-- that are sent to CFG_SITE_ADMIN_EMAIL or look into invenio.err
CREATE TABLE IF NOT EXISTS hstEXCEPTION (
id int(15) unsigned NOT NULL auto_increment,
name varchar(50) NOT NULL, -- name of the exception
filename varchar(255) NULL, -- file where the exception was raised
line int(9) NULL, -- line at which the exception was raised
last_seen datetime NOT NULL default '0000-00-00 00:00:00', -- last time this exception has been seen
last_notified datetime NOT NULL default '0000-00-00 00:00:00', -- last time this exception has been notified
counter int(15) NOT NULL default 0, -- internal counter to decide when to notify this exception
total int(15) NOT NULL default 0, -- total number of times this exception has been seen
PRIMARY KEY (id),
KEY (last_seen),
KEY (last_notified),
KEY (total),
UNIQUE KEY (name(50), filename(255), line)
) TYPE=MyISAM;

-- end of file
1 change: 1 addition & 0 deletions modules/miscutil/sql/tabdrop.sql
Original file line number Diff line number Diff line change
Expand Up @@ -431,5 +431,6 @@ DROP TABLE IF EXISTS expJOBRESULT_expQUERYRESULT;
DROP TABLE IF EXISTS user_expJOB;
DROP TABLE IF EXISTS swrREMOTESERVER;
DROP TABLE IF EXISTS swrCLIENTDATA;
DROP TABLE IF EXISTS hstEXCEPTION;

-- end of file

0 comments on commit 84f6dc4

Please sign in to comment.