Skip to content

Commit

Permalink
Add Python code, README and plot notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
jbcdnr committed May 28, 2018
1 parent d331cf8 commit 1a17873
Show file tree
Hide file tree
Showing 29 changed files with 1,633 additions and 2 deletions.
41 changes: 39 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,39 @@
# sparsifiedSGD
Code for Sparsified SGD.
# Sparsified SGD with Memory

Code for the experimental part of the paper [Sparsified SGD with Memory TODO link](). It contains the code the following experiments:

- Theoretical convergence with different sparsification operator
- Comparison with QSGD
- Multi-core experiments

Use `notebooks/plots.ipynb` to visualize the results.

Please open an issue if you have questions or problems.

### Reproduce the results

To reproduce the results, you can download the datasets from [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html)

```bash
mkdir data
cd data/
wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_test.binary.bz2
wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/epsilon_normalized.bz2
```

We decompress the libsvm file and use pickle format instead. It takes more space but is faster to load. You can create a file as follow

```python
import pickle
from sklearn.datasets import load_svmlight_file
X, y = load_svmlight_file('rcv1_test.binary.bz2')
with open('rcv1_test.pickle', 'wb') as f:
pickle.dump((X, y), f)
```

After updating the path to the data files in `experiment.py` , you can then run our experiments, for example

```bash
python3 experiment.py rcv1-th results/rcv1-th --nproc 10
```

68 changes: 68 additions & 0 deletions base_logistic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import numpy as np
from scipy.special import expit as sigmoid

from parameters import Parameters


class BaseLogistic:
def __init__(self, params: Parameters):
self.params = params
self.w_estimate = None
self.w = None

def lr(self, epoch, iteration, num_samples, d):
p = self.params
t = epoch * num_samples + iteration
if p.lr_type == 'constant':
return p.initial_lr
if p.lr_type == 'epoch-decay':
return p.initial_lr * (p.epoch_decay_lr ** epoch)
if p.lr_type == 'decay':
return p.initial_lr / (p.regularizer * (t + p.tau))

def loss(self, X, y):
w = self.w_estimate if self.w_estimate is not None else self.w
w = w.copy()
p = self.params
loss = np.sum(np.log(1 + np.exp(-y * (X @ w)))) / X.shape[0]
if p.regularizer:
loss += p.regularizer * np.square(w).sum()
return loss

def predict(self, X):
w = self.w_estimate if self.w_estimate is not None else self.w
logits = X @ w
pred = 1 * (logits >= 0.)
return pred

def predict_proba(self, X):
w = self.w_estimate if self.w_estimate is not None else self.w
logits = X @ w
return sigmoid(logits)

def score(self, X, y):
w = self.w_estimate if self.w_estimate is not None else self.w
logits = X @ w
pred = 2 * (logits >= 0.) - 1
acc = np.mean(pred == y)
return acc

def update_estimate(self, t):
p = self.params
if p.estimate == 'final':
self.w_estimate = self.w
elif p.estimate == 'mean':
rho = 1 / (t + 1)
self.w_estimate = self.w_estimate * (1 - rho) + self.w * rho
elif p.estimate == 't+tau':
rho = 2 * (t + p.tau) / ((1 + t) * (t + 2 * p.tau))
self.w_estimate = self.w_estimate * (1 - rho) + self.w * rho
elif p.estimate == '(t+tau)^2':
rho = 6 * ((t + p.tau) ** 2) / ((1 + t) * (6 * (p.tau ** 2) + t + 6 * p.tau * t + 2 * (t ** 2)))
self.w_estimate = self.w_estimate * (1 - rho) + self.w * rho

def __str__(self):
return "{}({})".format(self.__class__.__name__, self.params)

def __repr__(self):
return str(self)
58 changes: 58 additions & 0 deletions baselines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import argparse
import os
import pickle

import numpy as np
from sklearn.linear_model import SGDClassifier

from utils import pickle_it

"""Arguments"""

parser = argparse.ArgumentParser()
parser.add_argument('directory', type=str)

args = parser.parse_args()
if not os.path.exists(args.directory):
print('create {}'.format(args.directory))
os.makedirs(args.directory)

baselines = {}


def loss(clf, X, y, reg):
baseline_loss = np.sum(np.log(1 + np.exp(-y * (X @ clf.coef_.transpose()).squeeze()))) / X.shape[0]
baseline_loss += reg * np.sum(np.square(clf.coef_))
return baseline_loss


""" RCV1 test"""
print('RCV1-test')
with open(os.path.expanduser('/mlodata1/jb/data/rcv1-test-1.pickle'), 'rb') as f:
X, y = pickle.load(f)

reg = 1 / X.shape[0]
clf = SGDClassifier(tol=1e-4, loss='log', penalty='l2', alpha=reg, fit_intercept=False)
clf.fit(X, y)
l = loss(clf, X, y, reg)
print("loss: {}".format(l))
print("train accuracy: {}".format(clf.score(X, y)))
baselines['RCV1-test'] = l

""" EPSILON """

print('epsilon')
with open(os.path.expanduser('/mlodata1/jb/data/epsilon_normalized_1.pickle'), 'rb') as f:
X, y = pickle.load(f)

reg = 1 / X.shape[0]
clf = SGDClassifier(tol=1e-4, loss='log', penalty='l2', alpha=reg)
clf.fit(X, y)
l = loss(clf, X, y, reg)
print("loss: {}".format(l))
print("train accuracy: {}".format(clf.score(X, y)))
baselines['epsilon'] = l

""" Pickle """
print('baselines', baselines)
pickle_it(baselines, 'baselines', args.directory)
2 changes: 2 additions & 0 deletions constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
INIT_WEIGHT_STD = 0.01
LOSS_PER_EPOCH = 100
197 changes: 197 additions & 0 deletions experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import argparse
import multiprocessing as mp
import os
import pickle

import numpy as np

from logistic import LogisticSGD
from logistic_parallel import LogisticParallelSGD
from parameters import Parameters
from utils import pickle_it

X, y = None, None


def run_logistic(param):
m = LogisticSGD(param)
res = m.fit(X, y)
print('{} - score: {}'.format(param, m.score(X, y)))
return res


def run_experiment(directory, dataset_pickle, params, nproc=None):
global X, y
if not os.path.exists(directory):
os.makedirs(directory)
pickle_it(params, 'params', directory)

print('load dataset')
with open(dataset_pickle, 'rb') as f:
X, y = pickle.load(f)

print('start experiment')
with mp.Pool(nproc) as pool:
results = pool.map(run_logistic, params)

pickle_it(results, 'results', directory)
print('results saved in "{}"'.format(directory))


def run_parallel_experiment(directory, dataset_pickle, models, cores, baseline, repeat=3):
if not os.path.exists(directory):
os.makedirs(directory)
pickle_it([m(1) for m in models], 'models', directory)
pickle_it(cores, 'cores', directory)

print('load dataset')
with open(dataset_pickle, 'rb') as f:
X, y = pickle.load(f)

print('start experiment')

chronos = np.zeros((len(models), len(cores), repeat))
stop_times = np.zeros((len(models), len(cores), repeat), dtype=int)

for r in range(repeat):
for c_idx, core in enumerate(cores):
for m_idx, model in enumerate(models):
p = model(core)
print("{} - cores {} - repeat {}".format(p, core, r))
m = LogisticParallelSGD(p)
timing, epoch, iteration, losses = m.fit_until(X, y, num_features=X.shape[1], num_samples=X.shape[0],
baseline=baseline)
chronos[m_idx, c_idx, r] = timing
stop_times[m_idx, c_idx, r] = epoch * X.shape[0] + iteration

pickle_it(chronos, 'chronos', directory)
pickle_it(stop_times, 'stop_times', directory)

pickle_it(chronos, 'chronos', directory)
pickle_it(stop_times, 'stop_times', directory)
print('results saved in "{}"'.format(directory))


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('experiment', type=str)
parser.add_argument('directory', type=str)
parser.add_argument('--nproc', type=int, default=1)
args = parser.parse_args()

assert args.experiment in ['epsilon-th', 'epsilon-quant', 'epsilon-parallel',
'rcv1-th', 'rcv1-quant', 'rcv1-parallel']

# dataset
if args.experiment.startswith('epsilon'):
dataset = os.path.expanduser('/mlodata1/jb/data/epsilon_normalized_1.pickle')
n, d = 400000, 2000
elif args.experiment.startswith('rcv1'):
dataset = os.path.expanduser('/mlodata1/jb/data/rcv1-test-1.pickle')
n, d = 677399, 47236

# parameters to evaluate
if args.experiment == 'epsilon-th':
params = [
Parameters(name="full-sgd", num_epoch=3, lr_type='decay', initial_lr=2, tau=1,
regularizer=1 / n, estimate='(t+tau)^2'),
Parameters(name="top1", num_epoch=3, lr_type='decay', initial_lr=2, tau=d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True, take_top=True),
Parameters(name="top1-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=1,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True, take_top=True),
Parameters(name="rand1", num_epoch=3, lr_type='decay', initial_lr=2, tau=d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True),
Parameters(name="rand1-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=1,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True),
Parameters(name="rand2", num_epoch=3, lr_type='decay', initial_lr=2, tau=d / 2,
regularizer=1 / n, estimate='(t+tau)^2', take_k=2, with_memory=True),
Parameters(name="rand2-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=1,
regularizer=1 / n, estimate='(t+tau)^2', take_k=2, with_memory=True),
Parameters(name="rand3", num_epoch=3, lr_type='decay', initial_lr=2, tau=d / 3,
regularizer=1 / n, estimate='(t+tau)^2', take_k=3, with_memory=True),
Parameters(name="rand3-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=1,
regularizer=1 / n, estimate='(t+tau)^2', take_k=3, with_memory=True),
]
elif args.experiment == 'epsilon-quant':
params = [
Parameters(name="qsgd-8bits", num_epoch=3, lr_type='decay', initial_lr=2, tau=d,
regularizer=1 / n, estimate='(t+tau)^2', qsgd_s=2 ** 8),
Parameters(name="qsgd-4bits", num_epoch=3, lr_type='decay', initial_lr=2, tau=d,
regularizer=1 / n, estimate='(t+tau)^2', qsgd_s=2 ** 4),
Parameters(name="top1", num_epoch=3, lr_type='decay', initial_lr=2, tau=d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True, take_top=True),
Parameters(name="rand1", num_epoch=3, lr_type='decay', initial_lr=2, tau=d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True),
]
elif args.experiment == 'epsilon-parallel':
models = [
lambda n_cores: Parameters(name="rand1", num_epoch=5, lr_type='constant', initial_lr=.05, n_cores=n_cores,
regularizer=1 / n, take_k=1, with_memory=True, estimate='final'),
lambda n_cores: Parameters(name="top1", num_epoch=5, lr_type='constant', initial_lr=.05, n_cores=n_cores,
regularizer=1 / n, take_k=1, take_top=True, with_memory=True, estimate='final'),
lambda n_cores: Parameters(name="hogwild", num_epoch=5, lr_type='constant', initial_lr=.05, n_cores=n_cores,
regularizer=1 / n, estimate='final'),
]
cores = [1, 2, 3, 5, 8, 10, 12, 14, 16, 18, 20, 22, 24]
baseline = 0.305

elif args.experiment == 'rcv1-th':
params = [
Parameters(name="full-sgd", num_epoch=3, lr_type='decay', initial_lr=2, tau=10,
regularizer=1 / n, estimate='(t+tau)^2'),
Parameters(name="top10", num_epoch=3, lr_type='decay', initial_lr=2, tau=10 * d / 10,
regularizer=1 / n, estimate='(t+tau)^2', take_k=10, with_memory=True, take_top=True),
Parameters(name="top10-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=10,
regularizer=1 / n, estimate='(t+tau)^2', take_k=10, with_memory=True, take_top=True),
Parameters(name="rand10", num_epoch=3, lr_type='decay', initial_lr=2, tau=10 * d / 10,
regularizer=1 / n, estimate='(t+tau)^2', take_k=10, with_memory=True),
Parameters(name="rand10-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=10,
regularizer=1 / n, estimate='(t+tau)^2', take_k=10, with_memory=True),
Parameters(name="rand20", num_epoch=3, lr_type='decay', initial_lr=2, tau=10 * d / 20,
regularizer=1 / n, estimate='(t+tau)^2', take_k=20, with_memory=True),
Parameters(name="rand20-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=10,
regularizer=1 / n, estimate='(t+tau)^2', take_k=20, with_memory=True),
Parameters(name="rand30", num_epoch=3, lr_type='decay', initial_lr=2, tau=10 * d / 30,
regularizer=1 / n, estimate='(t+tau)^2', take_k=30, with_memory=True),
Parameters(name="rand30-no-shift", num_epoch=3, lr_type='decay', initial_lr=2, tau=10,
regularizer=1 / n, estimate='(t+tau)^2', take_k=30, with_memory=True),
]
elif args.experiment == 'rcv1-quant':
params = [
Parameters(name="qsgd-8bits", num_epoch=2, lr_type='decay', initial_lr=2, tau=10 * d,
regularizer=1 / n, estimate='(t+tau)^2', qsgd_s=2 ** 8),
Parameters(name="qsgd-4bits", num_epoch=2, lr_type='decay', initial_lr=2, tau=10 * d,
regularizer=1 / n, estimate='(t+tau)^2', qsgd_s=2 ** 4),
Parameters(name="qsgd-2bits", num_epoch=2, lr_type='decay', initial_lr=2, tau=10 * d,
regularizer=1 / n, estimate='(t+tau)^2', qsgd_s=2 ** 2),
Parameters(name="top1", num_epoch=2, lr_type='decay', initial_lr=2, tau=10 * d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True, take_top=True),
Parameters(name="rand1", num_epoch=2, lr_type='decay', initial_lr=2, tau=10 * d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=1, with_memory=True),
Parameters(name="top10", num_epoch=2, lr_type='decay', initial_lr=2, tau=10 * d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=10, with_memory=True, take_top=True),
Parameters(name="rand10", num_epoch=2, lr_type='decay', initial_lr=2, tau=10 * d,
regularizer=1 / n, estimate='(t+tau)^2', take_k=10, with_memory=True),
]
elif args.experiment == 'rcv1-parallel':
models = [
lambda n_cores: Parameters(name="top100", num_epoch=6, lr_type='decay', initial_lr=2., n_cores=n_cores,
tau=10 / 100 * d,
regularizer=1 / n, estimate='final', take_k=100, take_top=True,
with_memory=True),
lambda n_cores: Parameters(name="rand100", num_epoch=6, lr_type='decay', initial_lr=2., n_cores=n_cores,
tau=10 / 100 * d,
regularizer=1 / n, estimate='final', take_k=100, take_top=False,
with_memory=True),
lambda n_cores: Parameters(name="hogwild", num_epoch=6, lr_type='decay', initial_lr=2., n_cores=n_cores,
tau=10, regularizer=1 / n,
estimate='final'),
]

cores = [1, 2, 3, 5, 8, 10, 12, 14, 16, 18, 20, 22, 24]
baseline = 0.101

if 'parallel' in args.experiment:
run_parallel_experiment(args.directory, dataset, models, cores, baseline, repeat=3)
else:
run_experiment(args.directory, dataset, params, nproc=args.nproc)
Loading

0 comments on commit 1a17873

Please sign in to comment.