Skip to content

Commit

Permalink
Regularly run system tests. (#227)
Browse files Browse the repository at this point in the history
- Run one system test per model for each commit.
 - Run all system tests as daily cron jobs.
 - Factored out logging and optionally disable logging in run_train_translate.
  • Loading branch information
tdomhan authored and fhieber committed Nov 29, 2017
1 parent e1a58db commit f1a418c
Show file tree
Hide file tree
Showing 12 changed files with 80 additions and 38 deletions.
6 changes: 5 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,8 @@ script:
- pylint --rcfile=pylintrc test -E
- mypy --ignore-missing-imports --follow-imports=silent @typechecked-files
- check-manifest --ignore sockeye/git_version.py
#- python -m pytest test/system
- if [ "$TRAVIS_EVENT_TYPE" != "cron" ]; then python -m pytest -k "Copy:lstm:lstm" test/system; fi
- if [ "$TRAVIS_EVENT_TYPE" != "cron" ]; then python -m pytest -k "Copy:transformer:transformer" test/system; fi
- if [ "$TRAVIS_EVENT_TYPE" != "cron" ]; then python -m pytest -k "Copy:cnn:cnn" test/system; fi
- if [ "$TRAVIS_EVENT_TYPE" = "cron" ]; then python -m pytest test/system; fi

4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Note that Sockeye has checks in place to not translate with an old model that wa

Each version section may have have subsections for: _Added_, _Changed_, _Removed_, _Deprecated_, and _Fixed_.

## [1.14.0]
### Changed
- Downscaled fixed positional embeddings for CNN models.

## [1.13.2]
### Added
- A tool that extracts specified parameters from params.x into a .npz file for downstream applications or analysis.
Expand Down
2 changes: 1 addition & 1 deletion sockeye/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

__version__ = '1.13.2'
__version__ = '1.14.0'
19 changes: 11 additions & 8 deletions sockeye/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from . import constants as C
from . import data_io


def regular_file() -> Callable:
"""
Returns a method that can be used in argument parsing to check the argument is a regular file or a symbolic link,
Expand Down Expand Up @@ -226,6 +227,14 @@ def add_lexicon_args(params):
help="Number of target translations to keep per source. Default: %(default)s.")


def add_logging_args(params):
logging_params = params.add_argument_group("Logging")
logging_params.add_argument('--quiet', '-q',
default=False,
action="store_true",
help='Suppress console logging.')


def add_io_args(params):
data_params = params.add_argument_group("Data & I/O")

Expand Down Expand Up @@ -283,11 +292,6 @@ def add_io_args(params):
help="Statistics function to run on monitored outputs/weights/gradients. "
"Default: %(default)s.")

data_params.add_argument('--quiet', '-q',
default=False,
action="store_true",
help='Suppress console logging.')


def add_device_args(params):
device_params = params.add_argument_group("Device parameters")
Expand Down Expand Up @@ -790,11 +794,13 @@ def add_train_cli_args(params):
add_model_parameters(params)
add_training_args(params)
add_device_args(params)
add_logging_args(params)


def add_translate_cli_args(params):
add_inference_args(params)
add_device_args(params)
add_logging_args(params)


def add_inference_args(params):
Expand Down Expand Up @@ -896,9 +902,6 @@ def add_evaluate_args(params):
type=file_or_stdin(),
default=sys.stdin,
help="File with hypotheses. If none will read from stdin. Default: %(default)s.")
eval_params.add_argument('--quiet', '-q',
action="store_true",
help="Do not print logging information.")
eval_params.add_argument('--metrics',
nargs='+',
default=[C.BLEU, C.CHRF],
Expand Down
2 changes: 1 addition & 1 deletion sockeye/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ def __init__(self,
num_embed=config.num_embed,
max_seq_len=config.max_seq_len_target,
fixed_pos_embed_scale_up_input=False,
fixed_pos_embed_scale_down_positions=False,
fixed_pos_embed_scale_down_positions=True,
prefix=C.TARGET_POSITIONAL_EMBEDDING_PREFIX)

self.layers = [convolution.ConvolutionBlock(
Expand Down
2 changes: 1 addition & 1 deletion sockeye/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def get_convolutional_encoder(config: ConvolutionalEncoderConfig) -> 'Encoder':
config.num_embed,
max_seq_len=config.max_seq_len_source,
fixed_pos_embed_scale_up_input=False,
fixed_pos_embed_scale_down_positions=False,
fixed_pos_embed_scale_down_positions=True,
prefix=C.SOURCE_POSITIONAL_EMBEDDING_PREFIX))
encoders.append(ConvolutionalEncoder(config=config))
encoders.append(BatchMajor2TimeMajor())
Expand Down
1 change: 1 addition & 0 deletions sockeye/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def main():
params = argparse.ArgumentParser(description='Evaluate translations by calculating metrics with '
'respect to a reference set.')
arguments.add_evaluate_args(params)
arguments.add_logging_args(params)
args = params.parse_args()

if args.quiet:
Expand Down
6 changes: 4 additions & 2 deletions sockeye/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,15 @@ def main():
"""
Commandline interface for building top-k lexicons using during decoding.
"""
logger = setup_main_logger(__name__, console=True, file_logging=False)
log_sockeye_version(logger)

params = argparse.ArgumentParser(description="Build a top-k lexicon for use during decoding.")
arguments.add_lexicon_args(params)
arguments.add_logging_args(params)
args = params.parse_args()

logger = setup_main_logger(__name__, console=not args.quiet, file_logging=False)
log_sockeye_version(logger)

logger.info("Reading source and target vocab from \"%s\"", args.model)
vocab_source = vocab.vocab_from_json_or_pickle(os.path.join(args.model, C.VOCAB_SRC_NAME))
vocab_target = vocab.vocab_from_json_or_pickle(os.path.join(args.model, C.VOCAB_TRG_NAME))
Expand Down
5 changes: 4 additions & 1 deletion sockeye/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ def main():

if args.output is not None:
global logger
logger = setup_main_logger(__name__, file_logging=True, path="%s.%s" % (args.output, C.LOG_NAME))
logger = setup_main_logger(__name__,
console=not args.quiet,
file_logging=True,
path="%s.%s" % (args.output, C.LOG_NAME))

if args.checkpoints is not None:
check_condition(len(args.checkpoints) == len(args.models), "must provide checkpoints for each model")
Expand Down
43 changes: 30 additions & 13 deletions test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import random
import sys
import logging
from contextlib import contextmanager
from tempfile import TemporaryDirectory
from typing import Optional, Tuple
Expand All @@ -33,6 +34,8 @@
from sockeye.evaluate import raw_corpus_bleu
from sockeye.chrf import corpus_chrf

logger = logging.getLogger(__name__)


def gaussian_vector(shape, return_symbol=False):
"""
Expand Down Expand Up @@ -116,7 +119,7 @@ def generate_fast_align_lex(lex_path: str):
print("{0}\t{0}\t0".format(digit), file=lex_out)


_LEXICON_PARAMS_COMMON = "-i {input} -m {model} -k 1 -o {json}"
_LEXICON_PARAMS_COMMON = "-i {input} -m {model} -k 1 -o {json} {quiet}"


@contextmanager
Expand Down Expand Up @@ -144,13 +147,13 @@ def tmp_digits_dataset(prefix: str,


_TRAIN_PARAMS_COMMON = "--use-cpu --max-seq-len {max_len} --source {train_source} --target {train_target}" \
" --validation-source {dev_source} --validation-target {dev_target} --output {model}"
" --validation-source {dev_source} --validation-target {dev_target} --output {model} {quiet}"

_TRANSLATE_PARAMS_COMMON = "--use-cpu --models {model} --input {input} --output {output}"
_TRANSLATE_PARAMS_COMMON = "--use-cpu --models {model} --input {input} --output {output} {quiet}"

_TRANSLATE_PARAMS_RESTRICT = "--restrict-lexicon {json}"

_EVAL_PARAMS_COMMON = "--hypotheses {hypotheses} --references {references} --metrics {metrics}"
_EVAL_PARAMS_COMMON = "--hypotheses {hypotheses} --references {references} --metrics {metrics} {quiet}"


def run_train_translate(train_params: str,
Expand All @@ -162,7 +165,8 @@ def run_train_translate(train_params: str,
dev_target_path: str,
max_seq_len: int = 10,
restrict_lexicon: bool = False,
work_dir: Optional[str] = None) -> Tuple[float, float, float, float]:
work_dir: Optional[str] = None,
quiet: bool = False) -> Tuple[float, float, float, float]:
"""
Train a model and translate a dev set. Report validation perplexity and BLEU.
Expand All @@ -176,8 +180,13 @@ def run_train_translate(train_params: str,
:param max_seq_len: The maximum sequence length.
:param restrict_lexicon: Additional translation run with top-k lexicon-based vocabulary restriction.
:param work_dir: The directory to store the model and other outputs in.
:param quiet: Suppress the console output of training and decoding.
:return: A tuple containing perplexity, bleu scores for standard and reduced vocab decoding, chrf score.
"""
if quiet:
quiet_arg = "--quiet"
else:
quiet_arg = ""
with TemporaryDirectory(dir=work_dir, prefix="test_train_translate.") as work_dir:
# Train model
model_path = os.path.join(work_dir, "model")
Expand All @@ -187,17 +196,21 @@ def run_train_translate(train_params: str,
dev_source=dev_source_path,
dev_target=dev_target_path,
model=model_path,
max_len=max_seq_len),
max_len=max_seq_len,
quiet=quiet_arg),
train_params)
logger.info("Starting training with parameters %s.", train_params)
with patch.object(sys, "argv", params.split()):
sockeye.train.main()

logger.info("Translating with parameters %s.", translate_params)
# Translate corpus with the 1st params
out_path = os.path.join(work_dir, "out.txt")
params = "{} {} {}".format(sockeye.translate.__file__,
_TRANSLATE_PARAMS_COMMON.format(model=model_path,
input=dev_source_path,
output=out_path),
output=out_path,
quiet=quiet_arg),
translate_params)
with patch.object(sys, "argv", params.split()):
sockeye.translate.main()
Expand All @@ -208,7 +221,8 @@ def run_train_translate(train_params: str,
params = "{} {} {}".format(sockeye.translate.__file__,
_TRANSLATE_PARAMS_COMMON.format(model=model_path,
input=dev_source_path,
output=out_path_equiv),
output=out_path_equiv,
quiet=quiet_arg),
translate_params_equiv)
with patch.object(sys, "argv", params.split()):
sockeye.translate.main()
Expand All @@ -231,14 +245,16 @@ def run_train_translate(train_params: str,
params = "{} {}".format(sockeye.lexicon.__file__,
_LEXICON_PARAMS_COMMON.format(input=lex_path,
model=model_path,
json=json_path))
json=json_path,
quiet=quiet_arg))
with patch.object(sys, "argv", params.split()):
sockeye.lexicon.main()
# Translate corpus with restrict-lexicon
params = "{} {} {} {}".format(sockeye.translate.__file__,
_TRANSLATE_PARAMS_COMMON.format(model=model_path,
input=dev_source_path,
output=out_restrict_path),
output=out_restrict_path,
quiet=quiet_arg),
translate_params,
_TRANSLATE_PARAMS_RESTRICT.format(json=json_path))
with patch.object(sys, "argv", params.split()):
Expand All @@ -253,9 +269,9 @@ def run_train_translate(train_params: str,
averaged_params = sockeye.average.average(points)
assert averaged_params

# get last validation perplexity
# get best validation perplexity
metrics = sockeye.utils.read_metrics_file(path=os.path.join(model_path, C.METRICS_NAME))
perplexity = metrics[-1][C.PERPLEXITY + '-val']
perplexity = min(m[C.PERPLEXITY + '-val'] for m in metrics)

hypotheses = open(out_path, "r").readlines()
references = open(dev_target_path, "r").readlines()
Expand All @@ -272,7 +288,8 @@ def run_train_translate(train_params: str,
eval_params = "{} {} ".format(sockeye.evaluate.__file__,
_EVAL_PARAMS_COMMON.format(hypotheses=out_path,
references=dev_target_path,
metrics="bleu chrf"), )
metrics="bleu chrf",
quiet=quiet_arg))
with patch.object(sys, "argv", eval_params.split()):
sockeye.evaluate.main()

Expand Down
10 changes: 5 additions & 5 deletions test/system/test_seq_copy_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


@pytest.mark.parametrize("name, train_params, translate_params, perplexity_thresh, bleu_thresh", [
("Copy:lstm",
("Copy:lstm:lstm",
"--encoder rnn --num-layers 1 --rnn-cell-type lstm --rnn-num-hidden 64 --num-embed 32 --rnn-attention-type mlp"
" --rnn-attention-num-hidden 32 --batch-size 16 --loss cross-entropy --optimized-metric perplexity"
" --checkpoint-frequency 1000 --optimizer adam --initial-learning-rate 0.001"
Expand Down Expand Up @@ -80,7 +80,7 @@
"--beam-size 1",
1.01,
0.999),
("Copy:cnn",
("Copy:cnn:cnn",
"--encoder cnn --decoder cnn "
" --batch-size 16 --num-layers 3 --max-updates 3000"
" --cnn-num-hidden 32 --cnn-positional-embedding-type fixed"
Expand Down Expand Up @@ -154,16 +154,16 @@ def test_seq_copy(name, train_params, translate_params, perplexity_thresh, bleu_
" --transformer-feed-forward-num-hidden 64"
" --checkpoint-frequency 1000 --optimizer adam --initial-learning-rate 0.001",
"--beam-size 1",
1.01,
1.02,
0.99),
("Sort:cnn",
"--encoder cnn --decoder cnn "
" --batch-size 16 --num-layers 3 --max-updates 5000"
" --cnn-num-hidden 32 --cnn-positional-embedding-type fixed"
" --checkpoint-frequency 1000 --optimizer adam --initial-learning-rate 0.001",
"--beam-size 1",
1.10,
0.93)
1.05,
0.97)
])
def test_seq_sort(name, train_params, translate_params, perplexity_thresh, bleu_thresh):
"""Task: sort short sequences of digits"""
Expand Down
18 changes: 13 additions & 5 deletions test/unit/test_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from itertools import zip_longest


@pytest.mark.parametrize("test_params, expected_params", [
# mandatory parameters
('--source test_src --target test_tgt '
Expand All @@ -28,35 +29,42 @@
dict(source='test_src', target='test_tgt', limit=None,
validation_source='test_validation_src', validation_target='test_validation_tgt',
output='test_output', overwrite_output=False,
source_vocab=None, target_vocab=None, use_tensorboard=False, quiet=False,
source_vocab=None, target_vocab=None, use_tensorboard=False,
monitor_pattern=None, monitor_stat_func='mx_default')),
# all parameters
('--source test_src --target test_tgt --limit 10 '
'--validation-source test_validation_src --validation-target test_validation_tgt '
'--output test_output '
'--source-vocab test_src_vocab --target-vocab test_tgt_vocab '
'--use-tensorboard --overwrite-output --quiet',
'--use-tensorboard --overwrite-output',
dict(source='test_src', target='test_tgt', limit=10,
validation_source='test_validation_src', validation_target='test_validation_tgt',
output='test_output', overwrite_output=True,
source_vocab='test_src_vocab', target_vocab='test_tgt_vocab', use_tensorboard=True, quiet=True,
source_vocab='test_src_vocab', target_vocab='test_tgt_vocab', use_tensorboard=True,
monitor_pattern=None, monitor_stat_func='mx_default')),
# short parameters
('-s test_src -t test_tgt '
'-vs test_validation_src -vt test_validation_tgt '
'-o test_output -q',
'-o test_output',
dict(source='test_src', target='test_tgt', limit=None,
validation_source='test_validation_src', validation_target='test_validation_tgt',
output='test_output', overwrite_output=False,
source_vocab=None, target_vocab=None, use_tensorboard=False, quiet=True,
source_vocab=None, target_vocab=None, use_tensorboard=False,
monitor_pattern=None, monitor_stat_func='mx_default'))
])
def test_io_args(test_params, expected_params):
_test_args(test_params, expected_params, arguments.add_io_args)


@pytest.mark.parametrize("test_params, expected_params", [
('', dict(quiet=False)),
])
def test_logging_args(test_params, expected_params):
_test_args(test_params, expected_params, arguments.add_logging_args)


@pytest.mark.parametrize("test_params, expected_params", [
('', dict(device_ids=[-1], use_cpu=False, disable_device_locking=False, lock_dir='/tmp')),
('--device-ids 1 2 3 --use-cpu --disable-device-locking --lock-dir test_dir',
Expand Down

0 comments on commit f1a418c

Please sign in to comment.