Skip to content

Commit

Permalink
implemented assert method checks
Browse files Browse the repository at this point in the history
  • Loading branch information
alimanfoo committed Jul 19, 2011
1 parent 927910d commit 00f1980
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 6 deletions.
79 changes: 77 additions & 2 deletions csvvalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@
RECORD_LENGTH_CHECK_FAILED = 3
VALUE_PREDICATE_FALSE = 4
RECORD_PREDICATE_FALSE = 5
UNIQUE_CHECK_FAILED = 6
ASSERT_CHECK_FAILED = 7

MESSAGES = {
UNEXPECTED_ERROR: 'Unexpected error.',
VALUE_CHECK_FAILED: 'Value check failed.',
HEADER_CHECK_FAILED: 'Header check failed.',
RECORD_LENGTH_CHECK_FAILED: 'Record length check failed.',
VALUE_PREDICATE_FALSE: 'Value predicate returned false.',
RECORD_PREDICATE_FALSE: 'Record predicate returned false.'
RECORD_PREDICATE_FALSE: 'Record predicate returned false.',
UNIQUE_CHECK_FAILED: 'Unique check failed.',
ASSERT_CHECK_FAILED: 'Assertion check failed.'
}


Expand All @@ -35,6 +39,7 @@ def __init__(self, field_names):
self._record_length_checks = []
self._value_predicates = []
self._record_predicates = []
self._unique_checks = []


def add_value_check(self, field_name, value_check,
Expand Down Expand Up @@ -86,6 +91,20 @@ def add_record_predicate(self, record_predicate,

t = record_predicate, code, message, modulus
self._record_predicates.append(t)


def add_unique_check(self, key,
code=UNIQUE_CHECK_FAILED,
message=MESSAGES[UNIQUE_CHECK_FAILED]):
"""Add a unique check on a single column or combination of columns."""

if isinstance(key, basestring):
assert key in self._field_names, 'unexpected field name: %s' % key
else:
for f in key:
assert f in self._field_names, 'unexpected field name: %s' % key
t = key, code, message
self._unique_checks.append(t)


def validate(self, data_source,
Expand All @@ -112,6 +131,7 @@ def ivalidate(self, data_source,
"""

unique_sets = self._init_unique_sets() # used for unique checks
for i, r in enumerate(data_source):
if expect_header_row and i == ignore_lines:
# r is the header row
Expand All @@ -127,6 +147,18 @@ def ivalidate(self, data_source,
yield p
for p in self._apply_record_predicates(i, r, summarize):
yield p
for p in self._apply_unique_checks(i, r, unique_sets, summarize):
yield p
for p in self._apply_assert_methods(i, r, summarize):
yield p


def _init_unique_sets(self):
ks = dict()
for t in self._unique_checks:
key = t[0]
ks[key] = set() # empty set
return ks


def _apply_value_checks(self, i, r, summarize):
Expand Down Expand Up @@ -205,7 +237,50 @@ def _apply_record_predicates(self, i, r, summarize):
p['record'] = r
yield p



def _apply_unique_checks(self, i, r, unique_sets, summarize):
for key, code, message in self._unique_checks:
value = None
values = unique_sets[key]
if isinstance(key, basestring): # assume key is a field name
fi = self._field_names.index(key)
value = r[fi]
else: # assume key is a list or tuple, i.e., compound key
value = []
for f in key:
fi = self._field_names.index(f)
value.append(r[fi])
value = tuple(value) # enable hashing
if value in values:
p = {'code': code}
if not summarize:
p['message'] = message
p['row'] = i + 1
p['record'] = r
p['key'] = key
p['value'] = value
yield p
values.add(value)


def _apply_assert_methods(self, i, r, summarize):
for a in dir(self):
if a.startswith('assert'):
rdict = self._as_dict(r)
f = getattr(self, a)
try:
f(i, rdict)
except AssertionError as e:
code = e.args[0] if len(e.args) > 0 else ASSERT_CHECK_FAILED
p = {'code': code}
if not summarize:
message = e.args[1] if len(e.args) > 1 else MESSAGES[ASSERT_CHECK_FAILED]
p['message'] = message
p['row'] = i + 1
p['record'] = r
yield p


def _as_dict(self, r):
"""Convert the record to a dictionary using field names as keys."""
d = dict()
Expand Down
116 changes: 112 additions & 4 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from csvvalidator import CSVValidator, VALUE_CHECK_FAILED, MESSAGES,\
HEADER_CHECK_FAILED, RECORD_LENGTH_CHECK_FAILED, enumeration, match_pattern,\
search_pattern, number_range_inclusive, number_range_exclusive,\
VALUE_PREDICATE_FALSE, RECORD_PREDICATE_FALSE
VALUE_PREDICATE_FALSE, RECORD_PREDICATE_FALSE, UNIQUE_CHECK_FAILED,\
ASSERT_CHECK_FAILED
import pprint


Expand Down Expand Up @@ -453,11 +454,118 @@ def foo_gt_2bar(r):
p = row4_problems_custom[0]
assert p['message'] == 'custom message'
assert p['record'] == ('1', '3')


def test_unique_checks():
"""Test the uniqueness checks."""

field_names = ('foo', 'bar')
validator = CSVValidator(field_names)
validator.add_unique_check('foo')

data = (
('foo', 'bar'),
('1', 'A'),
('2', 'B'),
('1', 'C')
)

problems = validator.validate(data)
n = len(problems)
assert n == 1, n

p = problems[0]
assert p['code'] == UNIQUE_CHECK_FAILED
assert p['message'] == MESSAGES[UNIQUE_CHECK_FAILED]
assert p['row'] == 4
assert p['key'] == 'foo'
assert p['value'] == '1'
assert p['record'] == ('1', 'C')


def test_compound_unique_checks():
"""Test the uniqueness checks on compound keys."""

# TODO record predicates
# TODO unique checks
# TODO assert methods
field_names = ('foo', 'bar')
validator = CSVValidator(field_names)
validator.add_unique_check(('foo', 'bar'), 'X5', 'custom message')

data = (
('foo', 'bar'),
('1', 'A'),
('2', 'B'),
('1', 'B'),
('2', 'A'),
('1', 'A')
)

problems = validator.validate(data)
n = len(problems)
assert n == 1, n

p = problems[0]
assert p['code'] == 'X5'
assert p['message'] == 'custom message'
assert p['row'] == 6
assert p['key'] == ('foo', 'bar')
assert p['value'] == ('1', 'A')
assert p['record'] == ('1', 'A')


def test_assert_methods():
"""Test use of 'assert' methods."""

# define a custom validator class
class MyValidator(CSVValidator):

def __init__(self, threshold):
field_names = ('foo', 'bar')
super(MyValidator, self).__init__(field_names)
self._threshold = threshold

def assert_foo_plus_bar_gt_threshold(self, i, r):
assert int(r['foo']) + int(r['bar']) > self._threshold # use default error code and message

def assert_foo_times_bar_gt_threshold(self, i, r):
assert int(r['foo']) * int(r['bar']) > self._threshold, ('X6', 'custom message')

validator = MyValidator(42)

data = (
('foo', 'bar'),
('33', '10'), # valid
('7', '8'), # invalid (foo + bar less than threshold)
('3', '4'), # invalid (both)
)

problems = validator.validate(data)
debug(pprint.pprint(problems))
n = len(problems)
assert n == 3, n

row3_problems = [p for p in problems if p['row'] == 3]
assert len(row3_problems) == 1
p = row3_problems[0]
assert p['code'] == ASSERT_CHECK_FAILED
assert p['message'] == MESSAGES[ASSERT_CHECK_FAILED]
assert p['record'] == ('7', '8')

row4_problems = [p for p in problems if p['row'] == 4]
assert len(row4_problems) == 2

row4_problems_custom = [p for p in row4_problems if p['code'] == 'X6']
assert len(row4_problems_custom) == 1
p = row4_problems_custom[0]
assert p['message'] == 'custom message'
assert p['record'] == ('3', '4')

row4_problems_default = [p for p in row4_problems if p['code'] == ASSERT_CHECK_FAILED]
assert len(row4_problems_default) == 1
p = row4_problems_default[0]
assert p['message'] == MESSAGES[ASSERT_CHECK_FAILED]
assert p['record'] == ('3', '4')


# TODO each methods
# TODO finally assert methods
# TODO what happens if value checks or value predicates or .. raise unexpected exceptions?
Expand Down

0 comments on commit 00f1980

Please sign in to comment.