Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix non-deterministic behavior #817

Merged
merged 5 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ All notable changes to this project will be documented in this file.
- Add filter for entity match feature [#814](https://github.com/snipsco/snips-nlu/pull/815)
- Add noise re-weight factor in `LogRegIntentClassifier` [#815](https://github.com/snipsco/snips-nlu/pull/815)

### Fixed
- Fix non-deterministic behavior [#817](https://github.com/snipsco/snips-nlu/pull/817)

## [0.19.7]
### Changed
- Re-score ambiguous `DeterministicIntentParser` results based on slots [#791](https://github.com/snipsco/snips-nlu/pull/791)
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
"mock>=2.0,<3.0",
"snips_nlu_metrics>=0.14.1,<0.15",
"pylint<2",
"coverage>=4.4.2,<5.0"
"coverage>=4.4.2,<5.0",
"checksumdir~=1.1.6",
]
}

Expand Down
7 changes: 7 additions & 0 deletions snips_nlu/dataset/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,19 @@ def validate_and_format_dataset(dataset):
if language not in get_all_languages():
raise DatasetFormatError("Unknown language: '%s'" % language)

dataset[INTENTS] = {
intent_name: intent_data
for intent_name, intent_data in sorted(iteritems(dataset[INTENTS]))}
for intent in itervalues(dataset[INTENTS]):
_validate_and_format_intent(intent, dataset[ENTITIES])

utterance_entities_values = extract_utterance_entities(dataset)
builtin_entity_parser = BuiltinEntityParser.build(dataset=dataset)

dataset[ENTITIES] = {
intent_name: entity_data
for intent_name, entity_data in sorted(iteritems(dataset[ENTITIES]))}

for entity_name, entity in iteritems(dataset[ENTITIES]):
uterrance_entities = utterance_entities_values[entity_name]
if is_builtin_entity(entity_name):
Expand Down
10 changes: 4 additions & 6 deletions snips_nlu/intent_classifier/log_reg_classifier_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import itertools
import re
from builtins import next, range, str, zip
from builtins import next, range, str
from copy import deepcopy
from uuid import uuid4

Expand Down Expand Up @@ -120,14 +120,12 @@ def build_training_data(dataset, language, data_augmentation_config, resources,

noise_class = intent_index

# Computing dataset statistics
nb_utterances = [len(intent[UTTERANCES]) for intent in itervalues(intents)]

augmented_utterances = []
utterance_classes = []
for nb_utterance, intent_name in zip(nb_utterances, intents):
for intent_name, intent_data in sorted(iteritems(intents)):
nb_utterances = len(intent_data[UTTERANCES])
min_utterances_to_generate = max(
data_augmentation_config.min_utterances, nb_utterance)
data_augmentation_config.min_utterances, nb_utterances)
utterances = augment_utterances(
dataset, intent_name, language=language,
min_utterances=min_utterances_to_generate,
Expand Down
4 changes: 2 additions & 2 deletions snips_nlu/intent_parser/deterministic_intent_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import defaultdict
from pathlib import Path

from future.utils import iteritems, iterkeys, itervalues
from future.utils import iteritems, itervalues
from snips_nlu_utils import normalize

from snips_nlu.common.dataset_utils import get_slot_name_mappings
Expand Down Expand Up @@ -483,7 +483,7 @@ def _get_range_shift(matched_range, ranges_mapping):

def _get_group_names_to_slot_names(slot_names_mapping):
slot_names = {slot_name for mapping in itervalues(slot_names_mapping)
for slot_name in iterkeys(mapping)}
for slot_name in mapping}
return {"group%s" % i: name
for i, name in enumerate(sorted(slot_names))}

Expand Down
51 changes: 1 addition & 50 deletions snips_nlu/tests/integration_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# coding=utf-8
from __future__ import print_function, unicode_literals

import json
from builtins import range, str
from builtins import str

from future.utils import iteritems
from snips_nlu_metrics import compute_cross_val_metrics
Expand Down Expand Up @@ -51,33 +50,6 @@ def check_metrics(self, results):
"Slot f1 score is too low (%.3f) for slot '%s' of intent "
"'%s'" % (slot_f1, slot_name, intent_name))

def test_nlu_engine_training_is_deterministic(self):
# We can't write a test to ensure the NLU training is always the same
# instead we train the NLU 10 times and check the learnt parameters.
# It does not bring any guarantee but it might alert us once in a while

# Given
num_runs = 10
random_state = 42

with PERFORMANCE_DATASET_PATH.open("r") as f:
dataset = json.load(f)

ref_log_reg, ref_crfs = None, None
for _ in range(num_runs):
# When
engine = TrainingEngine(random_state=random_state).fit(dataset)
log_reg = _extract_log_reg(engine)
crfs = _extract_crfs(engine)

if ref_log_reg is None:
ref_log_reg = log_reg
ref_crfs = crfs
else:
# Then
self.assertDictEqual(ref_log_reg, log_reg)
self.assertDictEqual(ref_crfs, crfs)


def _slot_matching_lambda(lhs_slot, rhs_slot):
lhs_value = lhs_slot["text"]
Expand All @@ -93,24 +65,3 @@ def _slot_matching_lambda(lhs_slot, rhs_slot):
if rhs_tokens and rhs_tokens[0].lower() in SKIPPED_DATE_PREFIXES:
rhs_tokens = rhs_tokens[1:]
return lhs_tokens == rhs_tokens


def _extract_log_reg(engine):
log_reg = dict()
intent_classifier = engine.intent_parsers[1].intent_classifier
log_reg["intent_list"] = intent_classifier.intent_list
log_reg["coef"] = intent_classifier.classifier.coef_.tolist()
log_reg["intercept"] = intent_classifier.classifier.intercept_.tolist()
log_reg["t_"] = intent_classifier.classifier.t_
return log_reg


def _extract_crfs(engine):
crfs = dict()
slot_fillers = engine.intent_parsers[1].slot_fillers
for intent, slot_filler in iteritems(slot_fillers):
crfs[intent] = {
"state_features": slot_filler.crf_model.state_features_,
"transition_features": slot_filler.crf_model.transition_features_
}
return crfs
25 changes: 25 additions & 0 deletions snips_nlu/tests/test_crf_slot_filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,3 +989,28 @@ def __del__(self):
Features not seen at train time:
- ngram_1:text"""
self.assertEqual(expected_log, log)

def test_training_should_be_reproducible(self):
# Given
random_state = 42
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

# When
slot_filler1 = CRFSlotFiller(random_state=random_state)
slot_filler1.fit(dataset, "MakeTea")

slot_filler2 = CRFSlotFiller(random_state=random_state)
slot_filler2.fit(dataset, "MakeTea")

# Then
self.assertDictEqual(slot_filler1.crf_model.state_features_,
slot_filler2.crf_model.state_features_)
self.assertDictEqual(slot_filler1.crf_model.transition_features_,
slot_filler2.crf_model.transition_features_)
38 changes: 38 additions & 0 deletions snips_nlu/tests/test_deterministic_intent_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import io
from builtins import range

from checksumdir import dirhash
from mock import patch

from snips_nlu.common.io_utils import temp_dir
from snips_nlu.constants import (
DATA, END, ENTITY, LANGUAGE_EN, RES_ENTITY, RES_INTENT, RES_INTENT_NAME,
RES_PROBA, RES_SLOTS, RES_VALUE, SLOT_NAME, START, TEXT, STOP_WORDS)
Expand Down Expand Up @@ -1201,3 +1203,39 @@ def test_should_get_range_shift(self):
# When / Then
self.assertEqual(-1, _get_range_shift((6, 7), ranges_mapping))
self.assertEqual(2, _get_range_shift((12, 13), ranges_mapping))

def test_training_should_be_reproducible(self):
# Given
random_state = 42
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

# When
parser1 = DeterministicIntentParser(random_state=random_state)
parser1.fit(dataset)

parser2 = DeterministicIntentParser(random_state=random_state)
parser2.fit(dataset)

# Then
with temp_dir() as tmp_dir:
dir_parser1 = tmp_dir / "parser1"
dir_parser2 = tmp_dir / "parser2"
parser1.persist(dir_parser1)
parser2.persist(dir_parser2)
hash1 = dirhash(str(dir_parser1), 'sha256')
hash2 = dirhash(str(dir_parser2), 'sha256')
self.assertEqual(hash1, hash2)
120 changes: 120 additions & 0 deletions snips_nlu/tests/test_intent_classifier_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from builtins import str, zip, range

import numpy as np
from checksumdir import dirhash
from mock import patch, MagicMock

from snips_nlu.common.io_utils import temp_dir
from snips_nlu.common.utils import json_string
from snips_nlu.constants import LANGUAGE_EN, STEMS, WORD_CLUSTERS, STOP_WORDS
from snips_nlu.dataset import Dataset
Expand Down Expand Up @@ -309,6 +311,48 @@ def test_fit_cooccurrence_vectorizer_feature_selection(self, mocked_chi2):
self.assertDictEqual(
expected_pairs, featurizer.cooccurrence_vectorizer.word_pairs)

def test_training_should_be_reproducible(self):
# Given
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
utterances = [
text_to_utterance("please make me two hots cups of tea"),
text_to_utterance("i want a cup of coffee"),
]
classes = np.array([0, 1])
shared = self.get_shared_data(dataset)
shared["random_state"] = 42

# When
featurizer1 = Featurizer(**shared)
featurizer1.fit(dataset, utterances, classes, max(classes))

featurizer2 = Featurizer(**shared)
featurizer2.fit(dataset, utterances, classes, max(classes))

# Then
with temp_dir() as tmp_dir:
dir_featurizer1 = tmp_dir / "featurizer1"
dir_featurizer2 = tmp_dir / "featurizer2"
featurizer1.persist(dir_featurizer1)
featurizer2.persist(dir_featurizer2)
hash1 = dirhash(str(dir_featurizer1), 'sha256')
hash2 = dirhash(str(dir_featurizer2), 'sha256')
self.assertEqual(hash1, hash2)


class TestTfidfVectorizer(FixtureTest):

Expand Down Expand Up @@ -675,6 +719,44 @@ def test_preprocess(self):

self.assertSequenceEqual(expected_data, processed_data)

def test_training_should_be_reproducible(self):
# Given
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
x = [text_to_utterance("please make me two hots cups of tea")]
shared = self.get_shared_data(dataset)
shared["random_state"] = 42

# When
vectorizer1 = TfidfVectorizer(**shared)
vectorizer1.fit(x, dataset)

vectorizer2 = TfidfVectorizer(**shared)
vectorizer2.fit(x, dataset)

# Then
with temp_dir() as tmp_dir:
dir_vectorizer1 = tmp_dir / "vectorizer1"
dir_vectorizer2 = tmp_dir / "vectorizer2"
vectorizer1.persist(dir_vectorizer1)
vectorizer2.persist(dir_vectorizer2)
hash1 = dirhash(str(dir_vectorizer1), 'sha256')
hash2 = dirhash(str(dir_vectorizer2), 'sha256')
self.assertEqual(hash1, hash2)


class CooccurrenceVectorizerTest(FixtureTest):

Expand Down Expand Up @@ -1193,3 +1275,41 @@ def test_preprocess(self):
]

self.assertSequenceEqual(expected_data, processed_data)

def test_training_should_be_reproducible(self):
# Given
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
x = [text_to_utterance("please make me two hots cups of tea")]
shared = self.get_shared_data(dataset)
shared["random_state"] = 42

# When
vectorizer1 = CooccurrenceVectorizer(**shared)
vectorizer1.fit(x, dataset)

vectorizer2 = CooccurrenceVectorizer(**shared)
vectorizer2.fit(x, dataset)

# Then
with temp_dir() as tmp_dir:
dir_vectorizer1 = tmp_dir / "vectorizer1"
dir_vectorizer2 = tmp_dir / "vectorizer2"
vectorizer1.persist(dir_vectorizer1)
vectorizer2.persist(dir_vectorizer2)
hash1 = dirhash(str(dir_vectorizer1), 'sha256')
hash2 = dirhash(str(dir_vectorizer2), 'sha256')
self.assertEqual(hash1, hash2)
Loading