Skip to content

Commit

Permalink
Improved notebook and supporting code
Browse files Browse the repository at this point in the history
  • Loading branch information
cgpotts committed Mar 20, 2019
1 parent c018c81 commit 0f56eb8
Show file tree
Hide file tree
Showing 21 changed files with 4,385 additions and 4,426 deletions.
6 changes: 3 additions & 3 deletions evaluation_methods.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"outputs": [],
"source": [
"__author__ = \"Christopher Potts\"\n",
"__version__ = \"CS224u, Stanford, Spring 2018 term\""
"__version__ = \"CS224u, Stanford, Spring 2019\""
]
},
{
Expand All @@ -23,7 +23,7 @@
"source": [
"## Contents\n",
"\n",
"0. [Overview](#Overview)\n",
"1. [Overview](#Overview)\n",
"0. [Set-up](#Set-up)\n",
"0. [Data organization](#Data-organization)\n",
" 0. [Train/dev/test](#Train/dev/test)\n",
Expand Down Expand Up @@ -743,7 +743,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.7.1"
}
},
"nbformat": 4,
Expand Down
6 changes: 3 additions & 3 deletions evaluation_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
"outputs": [],
"source": [
"__author__ = \"Christopher Potts\"\n",
"__version__ = \"CS224u, Stanford, Spring 2018 term\""
"__version__ = \"CS224u, Stanford, Spring 2019\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"0. [Overview](#Overview)\n",
"1. [Overview](#Overview)\n",
"0. [Set-up](#Set-up)\n",
"0. [Classifier metrics](#Classifier-metrics)\n",
" 0. [Confusion matrix](#Confusion-matrix)\n",
Expand Down Expand Up @@ -1881,7 +1881,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.7.1"
}
},
"nbformat": 4,
Expand Down
Binary file added fig/wordentail-diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
207 changes: 118 additions & 89 deletions nli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,104 +5,119 @@
import os
import random
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import classification_report
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import utils


__author__ = "Christopher Potts"
__version__ = "CS224u, Stanford, Spring 2018"
__version__ = "CS224u, Stanford, Spring 2019"


BAKEOFF_CONDITION_NAMES = ['edge_disjoint', 'word_disjoint', 'word_disjoint_balanced']
CONDITION_NAMES = [
'edge_disjoint',
'word_disjoint',
'word_disjoint_balanced']


def build_bakeoff_dataset(wordentail_data, vector_func, vector_combo_func):
"""
def word_entail_featurize(data, vector_func, vector_combo_func):
X = []
y = []
for (w1, w2), label in data:
rep = vector_combo_func(vector_func(w1), vector_func(w2))
X.append(rep)
y.append(label)
return X, y


def wordentail_experiment(
train_data,
assess_data,
vector_func,
vector_combo_func,
model):
"""Train and evaluation code for the word-level entailment task.
Parameters
----------
wordentail_data
The contents of `wordentail_filename` loaded from JSON.
train_data : list
assess_data : list
vector_func : function
Any function mapping words in the vocab for `wordentail_data`
to vector representations
vector_combo_func : function
Any function for combining two vectors into a new vector
of fixed dimensionality.
model : class with `fit` and `predict` methods
Prints
------
To standard ouput
An sklearn classification report for all three splits.
Returns
-------
A dict in the same format as `wordentail_data` but with the
pairs of strings for each example replaced by a single vector.
dict with structure
'model': the trained model
'train_condition': train_condition
'assess_condition': assess_condition
'macro-F1': score for 'assess_condition'
'vector_func': vector_func
'vector_combo_func': vector_combo_func
We pass 'vector_func' and 'vector_combo_func' through to ensure alignment
between these experiments and the bake-off evaluation.
"""
# A mapping from words (as strings) to their vector
# representations, as determined by vector_func:
vocab = wordentail_data['vocab']
vectors = {w: vector_func(w) for w in vocab}
# Dataset in the format required by the neural network:
dataset = defaultdict(lambda: defaultdict(list))
for condition in BAKEOFF_CONDITION_NAMES:
for split, data in wordentail_data[condition].items():
for (w1, w2), label in data:
# Use vector_combo_func to combine the word vectors for
# w1 and w2, as given by the vectors dictionary above,
# and pair it with the singleton array containing clsname:
rep = vector_combo_func(vectors[w1], vectors[w2])
example = [rep, label]
dataset[condition][split].append(example)
dataset['vocab'] = vocab
return dataset


def bakeoff_experiment(dataset, model, conditions=None):
"""Train and evaluation code for the word-level entailment task.
X_train, y_train = word_entail_featurize(
train_data, vector_func, vector_combo_func)
X_dev, y_dev = word_entail_featurize(
assess_data, vector_func, vector_combo_func)
model.fit(X_train, y_train)
predictions = model.predict(X_dev)
# Report:
print(classification_report(y_dev, predictions))
macrof1 = utils.safe_macro_f1(y_dev, predictions)
return {
'model': model,
'train_data': train_data,
'assess_data': assess_data,
'macro-F1': macrof1,
'vector_func': vector_func,
'vector_combo_func': vector_combo_func}


def bake_off_evaluation(experiment_results, test_data_filename=None):
"""Function for evaluating a trained model on the bake-off test set.
Parameters
----------
dataset : dict
With keys `BAKEOFF_CONDITION_NAMES`, each with values that are lists of
vector pairs, the first giving the example representation and the second
giving its 1d output vector. The expectation is that this was created
by `build_bakeoff_dataset`.
model : class with `fit` and `predict` methods
conditions : list or None
If None, then all of `BAKEOFF_CONDITION_NAMES` are evaluated.
If this is a list, then it should be a subset of
`BAKEOFF_CONDITION_NAMES`.
experiment_results : dict
This should be the return value of `experiment` with at least
keys 'model', 'vector_func', and 'vector_combo_func'.
test_data_filename : str or None
Full path to the test data. If `None`, then we assume the file is
'data/nlidata/nli_wordentail_bakeoff_data-test.json'.
Prints
------
To standard ouput
An sklearn classification report for all three splits.
"""
if conditions is None:
conditions = BAKEOFF_CONDITION_NAMES
else:
for c in conditions:
if c not in BAKEOFF_CONDITION_NAMES:
raise ValueError(
"Condition {} is not recogized. Conditions must "
"be in {}".format(c, BAKEOFF_CONDITION_NAMES))
# Train the network:
for condition in conditions:
cond_data = dataset[condition]
X_train, y_train = zip(*cond_data['train'])
model.fit(X_train, y_train)
X_dev, y_dev = zip(*cond_data['dev'])
predictions = model.predict(X_dev)
# Report:
print("="*70)
print("{}".format(condition))
print(classification_report(y_dev, predictions))
if condition == 'word_disjoint_balanced':
X_train, y_train = zip(*dataset['word_disjoint']['train'])
model.fit(X_train, y_train)
predictions = model.predict(X_dev)
# Report:
print("="*70)
print("{}, training on word_disjoint".format(condition))
print(classification_report(y_dev, predictions))
if test_data_filename is None:
test_data_filename = os.path.join(
'data', 'nlidata', 'nli_wordentail_bakeoff_data-test.json')
with open(test_data_filename) as f:
wordentail_data = json.load(f)
X_test, y_test = word_entail_featurize(
wordentail_data['word_disjoint']['test'],
vector_func=experiment_results['vector_func'],
vector_combo_func=experiment_results['vector_combo_func'])
predictions = experiment_results['model'].predict(X_test)
# Report:
print(classification_report(y_test, predictions))



def str2tree(s, binarize=False):
Expand Down Expand Up @@ -214,47 +229,43 @@ def __repr__(self):
return """"NLIReader({})""".format(d)


SNLI_HOME = os.path.join("nlidata", "snli_1.0")

MULTINLI_HOME = os.path.join("nlidata", "multinli_1.0")


class SNLITrainReader(NLIReader):
def __init__(self, snli_home=SNLI_HOME, **kwargs):
def __init__(self, snli_home, **kwargs):
src_filename = os.path.join(
snli_home, "snli_1.0_train.jsonl")
super(SNLITrainReader, self).__init__(src_filename, **kwargs)


class SNLIDevReader(NLIReader):
def __init__(self, snli_home=SNLI_HOME, **kwargs):
def __init__(self, snli_home, **kwargs):
src_filename = os.path.join(
snli_home, "snli_1.0_dev.jsonl")
super(SNLIDevReader, self).__init__(src_filename, **kwargs)


class MultiNLITrainReader(NLIReader):
def __init__(self, snli_home=MULTINLI_HOME, **kwargs):
def __init__(self, snli_home, **kwargs):
src_filename = os.path.join(
snli_home, "multinli_1.0_train.jsonl")
super(MultiNLITrainReader, self).__init__(src_filename, **kwargs)


class MultiNLIMatchedDevReader(NLIReader):
def __init__(self, multinli_home=MULTINLI_HOME, **kwargs):
def __init__(self, multinli_home, **kwargs):
src_filename = os.path.join(
multinli_home, "multinli_1.0_dev_matched.jsonl")
super(MultiNLIMatchedDevReader, self).__init__(src_filename, **kwargs)


class MultiNLIMismatchedDevReader(NLIReader):
def __init__(self, multinli_home=MULTINLI_HOME, **kwargs):
def __init__(self, multinli_home, **kwargs):
src_filename = os.path.join(
multinli_home, "multinli_1.0_dev_mismatched.jsonl")
super(MultiNLIMismatchedDevReader, self).__init__(src_filename, **kwargs)


def read_annotated_subset(src_filename):
def read_annotated_subset(src_filename, multinli_home):
"""Given an annotation filename from MultiNLI's separate
annotation distribution, associate it with the appropriate
dev examples.
Expand All @@ -263,6 +274,8 @@ def read_annotated_subset(src_filename):
----------
src_filename : str
Full pat to the annotation file.
multinli_home : str
Full path to the MultiNLI corpus directory.
Returns
-------
Expand All @@ -273,9 +286,9 @@ def read_annotated_subset(src_filename):
"""
if 'mismatched' in src_filename:
reader = MultiNLIMismatchedDevReader()
reader = MultiNLIMismatchedDevReader(multinli_home)
else:
reader = MultiNLIMatchedDevReader()
reader = MultiNLIMatchedDevReader(multinli_home)
id2ex = {ex.pairID: ex for ex in reader.read()}
data = {}
with open(src_filename) as f:
Expand Down Expand Up @@ -404,8 +417,13 @@ def experiment(
Returns
-------
float
The overall scoring metric as determined by `score_metric`.
dict with keys
'model': trained model
'train_dataset': a dataset as returned by `build_dataset`
'assess_dataset': a dataset as returned by `build_dataset`
'predictions': predictions on the assessment data
'metric': `score_func.__name__`
'score': the `score_func` score on the assessment data
"""
# Train dataset:
Expand All @@ -417,12 +435,16 @@ def experiment(
# Manage the assessment set-up:
X_train = train['X']
y_train = train['y']
X_assess = None
y_assess = None
raw_train = train['raw_examples']
if assess_reader == None:
X_train, X_assess, y_train, y_assess = train_test_split(
X_train, y_train, train_size=train_size, test_size=None,
random_state=random_state)
X_train, X_assess, y_train, y_assess, raw_train, raw_assess = train_test_split(
X_train, y_train, raw_train,
train_size=train_size, test_size=None, random_state=random_state)
assess = {
'X': X_assess,
'y': y_assess,
'vectorizer': train['vectorizer'],
'raw_examples': raw_assess}
else:
# Assessment dataset using the training vectorizer:
assess = build_dataset(
Expand All @@ -437,6 +459,13 @@ def experiment(
predictions = mod.predict(X_assess)
# Report:
if verbose:
print('Accuracy: {0:0.03f}'.format(accuracy_score(y_assess, predictions)))
print(classification_report(y_assess, predictions, digits=3))
# Return the overall score:
return score_func(y_assess, predictions)
# Return the overall score and experimental info:
return {
'model': mod,
'train_dataset': train,
'assess_dataset': assess,
'predictions': predictions,
'metric': score_func.__name__,
'score': score_func(y_assess, predictions)}
Loading

0 comments on commit 0f56eb8

Please sign in to comment.