Skip to content

Commit

Permalink
Initial code review and notebook expansion.
Browse files Browse the repository at this point in the history
cgpotts committed Apr 17, 2022
1 parent 05617d7 commit 9da46b4
Showing 6 changed files with 1,322 additions and 1,240 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -67,3 +67,5 @@ nlidata/*
rel_ext_data*
*_solved.ipynb
.DS_Store
ColBERT*
experiments*
1,027 changes: 0 additions & 1,027 deletions IIT_01.ipynb

This file was deleted.

108 changes: 66 additions & 42 deletions iit.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,10 @@
from utils import randvec
import copy

__author__ = "Atticus Geiger"
__version__ = "CS224u, Stanford, Spring 2022"


class IITModel(torch.nn.Module):
def __init__(self, model, layers, id_to_coords,device):
super().__init__()
@@ -16,8 +20,11 @@ def no_IIT_forward(self, X):
return self.model(X)

def forward(self, X):
base,coord_ids,sources = X[:,0,:].squeeze(1).type(torch.FloatTensor).to(self.device), X[:,1,:].squeeze(1).type(torch.FloatTensor).to(self.device), X[:,2:,:].to(self.device)
sources = [sources[:,j,:].squeeze(1).type(torch.FloatTensor).to(self.device) for j in range(sources.shape[1])]
base = X[:,0,:].squeeze(1).type(torch.FloatTensor).to(self.device)
coord_ids = X[:,1,:].squeeze(1).type(torch.FloatTensor).to(self.device)
sources = X[:,2:,:].to(self.device)
sources = [sources[:,j,:].squeeze(1).type(torch.FloatTensor).to(self.device)
for j in range(sources.shape[1])]
gets = self.id_to_coords[int(coord_ids.flatten()[0])]
sets = copy.deepcopy(gets)
self.activation = dict()
@@ -68,52 +75,56 @@ def retrieve_activations(self, input, get, sets):
handler.remove()
return self.activation[f'{get["layer"]}-{get["start"]}-{get["end"]}']


# def get_IIT_MoNLI_dataset(variable, embed_dim, size):
def get_IIT_equality_dataset_both(embed_dim, size):
train_dataset = IIT_PremackDatasetBoth(
embed_dim=embed_dim,
size=size)
X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions = train_dataset.create()
X_base_train = torch.tensor(X_base_train)
X_sources_train = [torch.tensor(X_source_train) for X_source_train in X_sources_train]
y_base_train = torch.tensor(y_base_train)
y_IIT_train = torch.tensor(y_IIT_train)
interventions = torch.tensor(interventions)
return X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions
train_dataset = IIT_PremackDatasetBoth(
embed_dim=embed_dim,
size=size)
X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions = train_dataset.create()
X_base_train = torch.tensor(X_base_train)
X_sources_train = [torch.tensor(X_source_train) for X_source_train in X_sources_train]
y_base_train = torch.tensor(y_base_train)
y_IIT_train = torch.tensor(y_IIT_train)
interventions = torch.tensor(interventions)
return X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions


def get_IIT_equality_dataset(variable, embed_dim, size):
class_size = size/2
train_dataset = IIT_PremackDataset(variable,
embed_dim=embed_dim,
n_pos=class_size,
n_neg=class_size)
X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions = train_dataset.create()
X_base_train = torch.tensor(X_base_train)
X_sources_train = [torch.tensor(X_source_train) for X_source_train in X_sources_train]
y_base_train = torch.tensor(y_base_train)
y_IIT_train = torch.tensor(y_IIT_train)
interventions = torch.tensor(interventions)
return X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions
class_size = size/2
train_dataset = IIT_PremackDataset(
variable,
embed_dim=embed_dim,
n_pos=class_size,
n_neg=class_size)
X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions = train_dataset.create()
X_base_train = torch.tensor(X_base_train)
X_sources_train = [torch.tensor(X_source_train) for X_source_train in X_sources_train]
y_base_train = torch.tensor(y_base_train)
y_IIT_train = torch.tensor(y_IIT_train)
interventions = torch.tensor(interventions)
return X_base_train, X_sources_train, y_base_train, y_IIT_train, interventions


def get_equality_dataset(embed_dim, size):
class_size = size/2
train_dataset = PremackDataset(
embed_dim=embed_dim,
n_pos=class_size,
n_neg=class_size)
X_train, y_train = train_dataset.create()
class_size = size/2
train_dataset = PremackDataset(
embed_dim=embed_dim,
n_pos=class_size,
n_neg=class_size)
X_train, y_train = train_dataset.create()

test_dataset = PremackDataset(
embed_dim=embed_dim,
n_pos=class_size,
n_neg=class_size)
X_test, y_test = test_dataset.create()
test_dataset = PremackDataset(
embed_dim=embed_dim,
n_pos=class_size,
n_neg=class_size)
X_test, y_test = test_dataset.create()

train_dataset.test_disjoint(test_dataset)
X_train = torch.tensor(X_train)
X_test = torch.tensor(X_test)
train_dataset.test_disjoint(test_dataset)
X_train = torch.tensor(X_train)
X_test = torch.tensor(X_test)

return X_train, X_test, y_train, y_test, test_dataset
return X_train, X_test, y_train, y_test, test_dataset


class EqualityDataset:
@@ -461,7 +472,8 @@ def create(self):
data = [((np.concatenate(x1), np.concatenate(x2)),(np.concatenate(x3), np.concatenate(x4)), base_label, IIT_label, intervention)
for (x1, x2,x3,x4), base_label, IIT_label, intervention in data]
if self.flatten_root:
data = [(np.concatenate(base), np.concatenate(source), label, IIT_label, intervention) for base, source, label, IIT_label, intervention in data]
data = [(np.concatenate(base), np.concatenate(source), label, IIT_label, intervention)
for base, source, label, IIT_label, intervention in data]
base, source, y, IIT_y, interventions = zip(*data)
self.base = np.array(base)
self.source = np.array(source)
@@ -648,6 +660,7 @@ def _create_diff_pair(self):
assert not np.array_equal(vec1, vec2)
return (vec1, vec2)


class IIT_PremackDatasetBoth:

V1 = 0
@@ -682,9 +695,20 @@ def create(self):
random.shuffle(data)
data = data.copy()
if self.flatten_root or self.flatten_leaves:
data = [(((np.concatenate(x1), np.concatenate(x2)),(np.concatenate(x3), np.concatenate(x4)),(np.concatenate(x5), np.concatenate(x6))), base_label, IIT_label, intervention) for (x1, x2,x3,x4,x5,x6), base_label, IIT_label, intervention in data]
data = [
(
(
(np.concatenate(x1), np.concatenate(x2)),
(np.concatenate(x3), np.concatenate(x4)),
(np.concatenate(x5), np.concatenate(x6))
),
base_label, IIT_label, intervention
)
for (x1, x2,x3,x4,x5,x6), base_label, IIT_label, intervention in data
]
if self.flatten_root:
data = [(np.concatenate(base), np.concatenate(source),np.concatenate(source2), label, IIT_label, intervention) for (base, source, source2), label, IIT_label, intervention in data]
data = [(np.concatenate(base), np.concatenate(source),np.concatenate(source2), label, IIT_label, intervention)
for (base, source, source2), label, IIT_label, intervention in data]
base, source, source2, y, IIT_y, interventions = zip(*data)
self.base = np.array(base)
self.source = np.array(source)
1,212 changes: 1,212 additions & 0 deletions iit_equality.ipynb

Large diffs are not rendered by default.

156 changes: 25 additions & 131 deletions torch_deep_neural_classifier.py
Original file line number Diff line number Diff line change
@@ -2,34 +2,32 @@
import torch
import torch.nn as nn
import torch.utils.data
from torch_model_base import TorchModelBase
from torch_shallow_neural_classifier import TorchShallowNeuralClassifier
import utils

__author__ = "Christopher Potts"
__version__ = "CS224u, Stanford, Spring 2021"
__author__ = "Atticus Geiger"
__version__ = "CS224u, Stanford, Spring 2022"


class ActivationLayer(torch.nn.Module):
def __init__(self, input_dim, output_dim, device, hidden_activation):
super().__init__()
self.linear = nn.Linear(input_dim, output_dim, device=device)
self.activation = hidden_activation

def forward(self,x):
def forward(self, x):
return self.activation(self.linear(x))

class TorchDeepNeuralClassifier(TorchModelBase):

class TorchDeepNeuralClassifier(TorchShallowNeuralClassifier):
def __init__(self,
hidden_dim=50,
hidden_activation=nn.Tanh(),
num_layers=1,
**base_kwargs):
"""
A model
h = f(xW_xh + b_h)
y = softmax(hW_hy + b_y)
with a cross-entropy loss and f determined by `hidden_activation`.
A dense, feed-forward network with the number of hidden layers
set by `num_layers`.
Parameters
----------
@@ -40,6 +38,9 @@ def __init__(self,
The non-activation function used by the network for the
hidden layer.
num_layers : int
Number of hidden layers in the network.
**base_kwargs
For details, see `torch_model_base.py`.
@@ -58,7 +59,7 @@ def __init__(self,
self.hidden_activation = hidden_activation
super().__init__(**base_kwargs)
self.loss = nn.CrossEntropyLoss(reduction="mean")
self.params += ['hidden_dim', 'hidden_activation']
self.params += ['hidden_dim', 'hidden_activation', 'num_layers']

def build_graph(self):
"""
@@ -69,128 +70,21 @@ def build_graph(self):
nn.Module
"""
self.layers = [ActivationLayer(self.input_dim, self.hidden_dim, self.device, self.hidden_activation)]
# Input to hidden:
self.layers = [
ActivationLayer(
self.input_dim, self.hidden_dim, self.device, self.hidden_activation)]
# Hidden to hidden:
for _ in range(self.num_layers-1):
self.layers += [ActivationLayer(self.hidden_dim, self.hidden_dim, self.device, self.hidden_activation)]
self.layers.append(nn.Linear(self.hidden_dim, self.n_classes_, device=self.device))
self.layers += [
ActivationLayer(
self.hidden_dim, self.hidden_dim, self.device, self.hidden_activation)]
# Hidden to output:
self.layers.append(
nn.Linear(self.hidden_dim, self.n_classes_, device=self.device))
return nn.Sequential(*self.layers)

def build_dataset(self, X, y=None):
"""
Define datasets for the model.
Parameters
----------
X : iterable of length `n_examples`
Each element must have the same length.
y: None or iterable of length `n_examples`
Attributes
----------
input_dim : int
Set based on `X.shape[1]` after `X` has been converted to
`np.array`.
Returns
-------
torch.utils.data.TensorDataset` Where `y=None`, the dataset will
yield single tensors `X`. Where `y` is specified, it will yield
`(X, y)` pairs.
"""
X = np.array(X)
self.input_dim = X.shape[1]
X = torch.FloatTensor(X)
if y is None:
dataset = torch.utils.data.TensorDataset(X)
else:
self.classes_ = sorted(set(y))
self.n_classes_ = len(self.classes_)
class2index = dict(zip(self.classes_, range(self.n_classes_)))
y = [class2index[label] for label in y]
y = torch.tensor(y)
dataset = torch.utils.data.TensorDataset(X, y)
return dataset

def score(self, X, y, device=None):
"""
Uses macro-F1 as the score function. Note: this departs from
`sklearn`, where classifiers use accuracy as their scoring
function. Using macro-F1 is more consistent with our course.
This function can be used to evaluate models, but its primary
use is in cross-validation and hyperparameter tuning.

Parameters
----------
X: np.array, shape `(n_examples, n_features)`
y: iterable, shape `len(n_examples)`
These can be the raw labels. They will converted internally
as needed. See `build_dataset`.
device: str or None
Allows the user to temporarily change the device used
during prediction. This is useful if predictions require a
lot of memory and so are better done on the CPU. After
prediction is done, the model is returned to `self.device`.
Returns
-------
float
"""
preds = self.predict(X, device=device)
return utils.safe_macro_f1(y, preds)

def predict_proba(self, X, device=None):
"""
Predicted probabilities for the examples in `X`.
Parameters
----------
X : np.array, shape `(n_examples, n_features)`
device: str or None
Allows the user to temporarily change the device used
during prediction. This is useful if predictions require a
lot of memory and so are better done on the CPU. After
prediction is done, the model is returned to `self.device`.
Returns
-------
np.array, shape `(len(X), self.n_classes_)`
Each row of this matrix will sum to 1.0.
"""
preds = self._predict(X, device=device)
probs = torch.softmax(preds, dim=1).cpu().numpy()
return probs

def predict(self, X, device=None):
"""
Predicted labels for the examples in `X`. These are converted
from the integers that PyTorch needs back to their original
values in `self.classes_`.
Parameters
----------
X : np.array, shape `(n_examples, n_features)`
device: str or None
Allows the user to temporarily change the device used
during prediction. This is useful if predictions require a
lot of memory and so are better done on the CPU. After
prediction is done, the model is returned to `self.device`.
Returns
-------
list, length len(X)
"""
probs = self.predict_proba(X, device=device)
return [self.classes_[i] for i in probs.argmax(axis=1)]

def simple_example():
"""Assess on the digits dataset."""
@@ -207,7 +101,7 @@ def simple_example():
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42)

mod = TorchShallowNeuralClassifier()
mod = TorchDeepNeuralClassifier(num_layers=2)

print(mod)

57 changes: 17 additions & 40 deletions torch_deep_neural_classifier_iit.py
Original file line number Diff line number Diff line change
@@ -6,15 +6,23 @@
import utils
from iit import IITModel

__author__ = "Christopher Potts"
__version__ = "CS224u, Stanford, Spring 2021"
__author__ = "Atticus Geiger"
__version__ = "CS224u, Stanford, Spring 2022"


class CrossEntropyLossIIT(nn.Module):
def __init__(self):
super().__init__()
self.loss = nn.CrossEntropyLoss(reduction="mean")

def forward(self, preds, labels):
return self.loss(preds[0], labels[: , 0]) + self.loss(preds[1], labels[:,1])


class TorchDeepNeuralClassifierIIT(TorchDeepNeuralClassifier):
def __init__(self,id_to_coords = None, **base_kwargs):
def __init__(self, id_to_coords=None, **base_kwargs):
super().__init__(**base_kwargs)
loss_function= nn.CrossEntropyLoss(reduction="mean")
self.loss = lambda preds, labels: loss_function(preds[0],labels[:,0]) + loss_function(preds[1],labels[:,1])
self.loss = CrossEntropyLossIIT()
self.id_to_coords = id_to_coords
self.shuffle_train = False

@@ -33,48 +41,22 @@ def batched_indices(self, max_len):
return output

def build_dataset(self, base, sources, base_y, IIT_y, coord_ids):
"""
Define datasets for the model.
Parameters
----------
X : iterable of length `n_examples`
Each element must have the same length.
y: None or iterable of length `n_examples`
Attributes
----------
input_dim : int
Set based on `X.shape[1]` after `X` has been converted to
`np.array`.
Returns
-------
torch.utils.data.TensorDataset` Where `y=None`, the dataset will
yield single tensors `X`. Where `y` is specified, it will yield
`(X, y)` pairs.
"""
base = torch.FloatTensor(np.array(base))
sources = [torch.FloatTensor(np.array(source)) for source in sources]
self.input_dim = base.shape[1]
coord_ids = torch.FloatTensor(np.array(coord_ids))

IIT_y = np.array(IIT_y)
self.classes_ = sorted(set(IIT_y))
self.n_classes_ = len(self.classes_)
class2index = dict(zip(self.classes_, range(self.n_classes_)))
IIT_y = [class2index[int(label)] for label in IIT_y]
IIT_y = torch.tensor(IIT_y)

base_y = np.array(base_y)
self.classes_ = sorted(set(base_y))
self.n_classes_ = len(self.classes_)
class2index = dict(zip(self.classes_, range(self.n_classes_)))
base_y = [class2index[label] for label in base_y]
base_y = torch.tensor(base_y)

IIT_y = np.array(IIT_y)
IIT_y = [class2index[int(label)] for label in IIT_y]
IIT_y = torch.tensor(IIT_y)

bigX = torch.stack([base, coord_ids.unsqueeze(1).expand(-1, base.shape[1])] + sources, dim=1)
bigy = torch.stack((IIT_y, base_y), dim=1)
dataset = torch.utils.data.TensorDataset(bigX,bigy)
@@ -83,8 +65,3 @@ def build_dataset(self, base, sources, base_y, IIT_y, coord_ids):
def prep_input(self, base, sources, coord_ids):
bigX = torch.stack([base, coord_ids.unsqueeze(1).expand(-1, base.shape[1])] + sources, dim=1)
return bigX



if __name__ == '__main__':
simple_example()

0 comments on commit 9da46b4

Please sign in to comment.