Skip to content

Commit

Permalink
Updated examples and tests with new Learning APIs.
Browse files Browse the repository at this point in the history
  • Loading branch information
tangyuq committed Nov 13, 2017
1 parent 35255ed commit f1d6fc9
Show file tree
Hide file tree
Showing 54 changed files with 149 additions and 152 deletions.
2 changes: 1 addition & 1 deletion Examples/1stSteps/LogisticRegression_FunctionalAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def criterion(data, label_one_hot):

# Learner object. The learner implements the update algorithm, in this case plain SGD.
learning_rate = 0.1
learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch))
learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))

# Trainer configuration parameters.
progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches
Expand Down
2 changes: 1 addition & 1 deletion Examples/1stSteps/LogisticRegression_GraphAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def generate_synthetic_data(N):

# Learner object. The learner implements the update algorithm, in this case plain SGD.
learning_rate = 0.1
learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch))
learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))

# Trainer.
minibatch_size = 32
Expand Down
12 changes: 6 additions & 6 deletions Examples/1stSteps/MNIST_Complex_Training.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,14 @@ def criterion(data, label_one_hot):

# Learner object. The learner implements the update algorithm, in this case momentum SGD.
# Because this script supports data-parallel training, the learning rate is specified
# "per sample" (UnitType.sample), the value is already pre-divided by the minibatch size.
# "per sample", the value is already pre-divided by the minibatch size.
# This allows data-parallel training to slice the data into subsets and also to increase
# the minibatch size where possible, while maintaining the same contribution per sample gradient.
epoch_size = len(X_train)
lr_per_sample = 0.001
lr_schedule = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample)
mm_time_constant = [0]*5 + [1024] # 5 epochs without momentum, then switch it on
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample)
mm_per_sample = [0]*5 + [0.9990239141819757] # 5 epochs without momentum, then switch it on
mm_schedule = C.learners.momentum_schedule_per_sample(mm_per_sample, epoch_size=epoch_size)

# Instantiate the trainer object to drive the model training.
learner = C.learners.momentum_sgd(model.parameters, lr_schedule, mm_schedule)
Expand All @@ -114,7 +114,7 @@ def criterion(data, label_one_hot):
def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches):
global prev_metric
if (prev_metric - average_error) / prev_metric < 0.05: # relative gain must reduce metric by at least 5% rel
learner.reset_learning_rate(C.learning_rate_schedule(learner.learning_rate() / 2, C.learners.UnitType.sample))
learner.reset_learning_rate(C.learning_parameter_per_sample(learner.learning_rate() / 2))
if learner.learning_rate() < lr_per_sample / (2**7-0.1): # we are done after the 6-th LR cut
print("Learning rate {} too small. Training complete.".format(learner.learning_rate()))
return False # means we are done
Expand All @@ -137,7 +137,7 @@ def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches)
# For distributed training, we must maximize the minibatch size, as to minimize
# communication cost and GPU underutilization. Hence, we use a "schedule"
# that increases the minibatch size after a few epochs. By specifying the learning rate
# as UnitType.sample, the contribution per sample maintains the same scale without
# as per sample, the contribution per sample maintains the same scale without
# having to fix up the learning rate.
# For this MNIST model, larger minibatch sizes make it faster, because the
# model is too small to utilize a full GPU. Hence data-parallel training cannot
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_

# Set learning parameters
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
lr_schedule = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size)
mm_time_constant = [0]*20 + [600]*20 + [1200]
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mms = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
l2_reg_weight = 0.002

# trainer object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from cntk.layers.typing import *
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
from cntk import Trainer, use_default_device
from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule, learning_parameter_schedule
from cntk.learners import momentum_sgd, momentum_schedule, momentum_schedule_per_sample, learning_parameter_schedule, learning_parameter_schedule_per_sample
from cntk import cross_entropy_with_softmax, classification_error, relu
from cntk.ops import Function
from cntk.debugging import set_computation_network_trace_level
Expand Down Expand Up @@ -109,8 +109,8 @@ def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):

# learning parameters
learner = momentum_sgd(model.parameters,
lr = learning_parameter_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], minibatch_size=1, epoch_size=epoch_size),
momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size),
lr = learning_parameter_schedule_per_sample([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], epoch_size=epoch_size),
momentum = momentum_schedule_per_sample([0]*20+[0.9983347214509387]*20+[0.9991670137924583], epoch_size=epoch_size),
l2_regularization_weight = 0.002)

# trainer object
Expand Down Expand Up @@ -147,8 +147,8 @@ def Evaluator(criterion):
if metric:
parameters |= set(metric.parameters)
dummy_learner = momentum_sgd(tuple(parameters),
lr = learning_rate_schedule(1, UnitType.minibatch),
momentum = momentum_as_time_constant_schedule(0))
lr = learning_parameter_schedule(1),
momentum = momentum_schedule(0))
return Trainer(None, (loss, metric), dummy_learner)

def evaluate(reader, criterion, device=None, minibatch_size=16, max_samples=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def create_conv_network():
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
# Set learning parameters
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
lr_schedule = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size)
mm_time_constant = [0]*20 + [600]*20 + [1200]
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mms = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
l2_reg_weight = 0.002

# Create learner
Expand Down
6 changes: 3 additions & 3 deletions Examples/Image/Classification/ConvNet/Python/ConvNet_MNIST.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ def convnet_mnist(debug_output=False, epoch_size=60000, minibatch_size=64, max_e

# Set learning parameters
lr_per_sample = [0.001]*10 + [0.0005]*10 + [0.0001]
lr_schedule = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample, epoch_size)
mm_time_constant = [0]*5 + [1024]
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mms = [0]*5 + [0.9990239141819757]
mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)

# Instantiate the trainer object to drive the model training
learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler, enable_profiler
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error
Expand Down Expand Up @@ -114,7 +114,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, progress_wri
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor

lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.ops import input
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
Expand Down Expand Up @@ -51,7 +51,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor

lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler, enable_profiler
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error
Expand Down Expand Up @@ -115,7 +115,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size):
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor

lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.train.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
Expand Down Expand Up @@ -51,7 +51,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor

lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size):
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor

lr_schedule = C.learners.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = C.learners.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor

lr_schedule = C.learners.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = C.learners.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
4 changes: 2 additions & 2 deletions Examples/Image/Classification/MLP/Python/SimpleMNIST.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from cntk.train import Trainer, minibatch_size_schedule
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
from cntk.device import cpu, try_set_default_device
from cntk.learners import adadelta, learning_rate_schedule, UnitType
from cntk.learners import adadelta, learning_parameter_schedule_per_sample
from cntk.ops import relu, element_times, constant
from cntk.layers import Dense, Sequential, For
from cntk.losses import cross_entropy_with_softmax
Expand Down Expand Up @@ -85,7 +85,7 @@ def simple_mnist(tensorboard_logdir=None):
progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))

# Instantiate the trainer object to drive the model training
lr = learning_rate_schedule(1, UnitType.sample)
lr = learning_parameter_schedule_per_sample(1)
trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers)

training_session(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from cntk import cross_entropy_with_softmax, classification_error, reduce_mean
from cntk import Trainer, cntk_py
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
from cntk.debugging import *
from cntk.logging import *
from resnet_models import *
Expand Down Expand Up @@ -80,13 +80,12 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_

# shared training parameters
minibatch_size = 128
momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001

# Set learning parameters
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9, minibatch_size)

# progress writers
progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
import cntk as C
import numpy as np

import cntk as C
from cntk import input, cross_entropy_with_softmax, classification_error, Trainer, cntk_py
from cntk import data_parallel_distributed_learner, block_momentum_distributed_learner, Communicator
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
from cntk.learners import momentum_sgd, learning_parameter_schedule, momentum_schedule
from cntk.device import try_set_default_device, gpu
from cntk.train.training_session import *
from cntk.debugging import *
Expand Down Expand Up @@ -71,15 +72,13 @@ def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, b
else:
return RuntimeError("Unknown model name!")

momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001

# Set learning parameters
minibatch_size = 128
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

lr_schedule = learning_parameter_schedule(lr_per_mb, minibatch_size = minibatch_size, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9, minibatch_size = minibatch_size)
# learner object
if block_size != None and num_quantization_bits != 32:
raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def create_vgg16():
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
# Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = C.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def create_vgg19():
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
# Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = C.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe

Expand Down
Loading

0 comments on commit f1d6fc9

Please sign in to comment.