Skip to content
/ Pynet Public

pynet is meant to be a flexible and modular deep learning framework base on theano.

License

Notifications You must be signed in to change notification settings

hycis/Pynet

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Pynet

Pynet is used to train an autoencoder to extract low dimensional features for speech synthesis. The pynet is used to implement the results from the paper
Deep Denoising Auto-encoder for Statistical Speech Synthesis

1. Setting Environment Variables

In pynet, there are three environment variables to be set.

PYNET_DATA_PATH   # the directory for all the datasets
PYNET_SAVE_PATH   # the directory to save the best models, the outputs logs and the hyperparameters
PYNET_DATABASE_PATH # after training, the hyperparameters and training results from various
                      # experiments is saved into a database for comparisions

2. Model Script

In order to build and run an AutoEncoder, we need to put together the various components (model, layer, dataset, learning_rule, log, cost function) into a train_object and run the training. The example model below is saved to the script AE_example.py.

import theano
import theano.tensor as T
import numpy as np

from pynet.model AutoEncoder
from pynet.layer import RELU, Sigmoid, Softmax, Linear
from pynet.datasets.spec import *
from pynet.learning_rule import LearningRule
from pynet.log import Log
from pynet.train_object import TrainObject
from pynet.cost import Cost
from pynet.datasets.preprocessor import Standardize, GCN

def autoencoder():

    # set environment
    NNdir = os.path.dirname(os.path.realpath(__file__))
    NNdir = os.path.dirname(NNdir)
    NNdir = os.path.dirname(NNdir)

    if not os.getenv('PYNET_DATA_PATH'):
        os.environ['PYNET_DATA_PATH'] = NNdir + '/data'

    if not os.getenv('PYNET_DATABASE_PATH'):
        os.environ['PYNET_DATABASE_PATH'] = NNdir + '/database'
        if not os.path.exists(os.environ['PYNET_DATABASE_PATH']):
            os.mkdir(os.environ['PYNET_DATABASE_PATH'])

    if not os.getenv('PYNET_SAVE_PATH'):
        os.environ['PYNET_SAVE_PATH'] = NNdir + '/save'
        if not os.path.exists(os.environ['PYNET_SAVE_PATH']):
            os.mkdir(os.environ['PYNET_SAVE_PATH'])


    # logging is optional, it is used to save the best trained model and records the training result to a database
    log = Log(experiment_name = 'AE',
            description = 'This experiment is about autoencoder',
            save_outputs = True, # saves to outputs.log
            save_learning_rule = True,
            save_model = True,
            save_to_database = {'name': 'Example.db',
                                'records' : {'Dataset' : data.__class__.__name__,
                                             'Weight_Init_Seed' : mlp.rand_seed,
                                             'Dropout_Below' : str([layer.dropout_below for layer in mlp.layers]),
                                             'Batch_Size' : data.batch_size,
                                             'Layer_Size' : len(mlp.layers),
                                             'Layer_Dim' : str([layer.dim for layer in mlp.layers]),
                                             'Preprocessor' : data.preprocessor.__class__.__name__,
                                             'Learning_Rate' : learning_rule.learning_rate,
                                             'Momentum' : learning_rule.momentum}}
            ) # end log


    learning_rule = LearningRule(max_col_norm = 1, # max length of the weight vector from lower layer going into upper neuron
                                learning_rate = 0.01,
                                momentum = 0.1,
                                momentum_type = 'normal',
                                L1_lambda = None, # L1 regularization coefficient
                                L2_lambda = None, # L2 regularization coefficient
                                cost = Cost(type='mse'), # cost type use for backprop during training
                                stopping_criteria = {'max_epoch' : 100, # maximum number of epochs for the training
                                                    'cost' : Cost(type='mse'), # cost type use for testing the quality of the trained model
                                                    'epoch_look_back' : 10, # number of epoch to look back for error improvement
                                                    'percent_decrease' : 0.001} # requires at least 0.001 = 0.1% decrease in error when look back of 10 epochs
                                )


    # building dataset, batch_size and preprocessor
    data = Laura_Blocks(train_valid_test_ratio=[8,1,1], batch_size=100, preprocessor=GCN())

    # for AutoEncoder, the inputs and outputs must be the same
    train = data.get_train()
    data.set_train(train.X, train.X)

    valid = data.get_valid()
    data.set_valid(valid.X, valid.X)

    test = data.get_test()
    data.set_test(test.X, test.X)

    # building autoencoder
    ae = AutoEncoder(input_dim = data.feature_size(), rand_seed=123)
    h1_layer = Tanh(dim=500, name='h1_layer', W=None, b=None)

    # adding encoding layer
    ae.add_encode_layer(h1_layer)

    # mirror layer has W = h1_layer.W.T
    h1_mirror = Tanh(name='h1_mirror', W=h1_layer.W.T, b=None)

    # adding decoding mirror layer
    ae.add_decode_layer(h1_mirror)

    # put all the components into a TrainObject
    train_object = TrainObject(model = ae,
                                dataset = data,
                                learning_rule = learning_rule,
                                log = log)

    # finally run the training
    train_object.run()

3. Hyperparams Search

In order to do hyperparams search, run the script in launch.py in hps dir. To do that, first log into helios

cd Pynet/hps
cat model_config.py # this will show the configurations of different models

Inside model_config.py, if the values is placed in a tuple for a variable, it means that during the sampling of values for a variable, the value are sampled uniformly from the values in the tuple. For example for 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), learning_rate is uniformly set as any of the 6 values in the tuple.

Below is the sample of model Laura from model_config.py

    'Laura' : DD({
            'model' : DD({
                    'rand_seed'             : None
                    }), # end mlp

            'log' : DD({
                    'experiment_name'       : 'AE0918_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
                    # 'experiment_name'       : 'AE0914_Warp_Blocks_500_180_tanh_tanh_gpu_clean', #helios

                    # 'experiment_name'       : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
                    # 'experiment_name'       : 'AE0918_Blocks_180_120_tanh_tanh_gpu_clean', #helios

                    # 'experiment_name'       : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_dropout',
                    # 'experiment_name'       : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_clean',


                    'description'           : '',
                    'save_outputs'          : True,
                    'save_learning_rule'      : True,
                    'save_model'            : True,
                    'save_to_database_name' : 'Laura.db'
                    }), # end log


            'learning_rule' : DD({
                    'max_col_norm'          : (1, 10, 50),
                    'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
                    'momentum'              : (1e-3, 1e-2, 1e-1, 0.5, 0.9),
                    'momentum_type'         : 'normal',
                    'L1_lambda'             : None,
                    'L2_lambda'             : None,
                    'cost'                  : 'mse',
                    'stopping_criteria'     : DD({
                                                'max_epoch'         : 100,
                                                'epoch_look_back'   : 10,
                                                'cost'              : 'mse',
                                                'percent_decrease'  : 0.05
                                                }) # end stopping_criteria
                    }), # end learning_rule

            #===========================[ Dataset ]===========================#
            'dataset' : DD({
                    # 'type'                  : 'Laura_Warp_Blocks_500_Tanh',
                    'type'                 : 'Laura_Warp_Blocks_180_Tanh_Dropout',
                    # 'type'                  : 'Laura_Cut_Warp_Blocks_300',
                    # 'type'                  : 'Laura_Blocks_180_Tanh_Tanh',
                    # 'type'                  : 'Laura_Blocks_180_Tanh_Tanh_Dropout',
                    # 'type'                  : 'Laura_Blocks_500_Tanh_Sigmoid',
                    # 'type'                  : 'Laura_Blocks_500',
                    # 'type'                  : 'Laura_Blocks',
                    # 'type'                  : 'Laura_Warp_Blocks',
                    # 'type'                  : 'Laura_Warp_Standardize_Blocks',
                    # 'type'                  : 'Laura_Standardize_Blocks',
                    # 'type'                  : 'Mnist',

                    'feature_size'          : 180,
                    'train_valid_test_ratio': [8, 1, 1],

                    'preprocessor'          : None,
                    # 'preprocessor'          : 'Scale',
                    # 'preprocessor'          : 'GCN',
                    # 'preprocessor'          : 'LogGCN',
                    # 'preprocessor'          : 'Standardize',

                    'batch_size'            : (50, 100, 150, 200),
                    'num_batches'           : None,
                    'iter_class'            : 'SequentialSubsetIterator',
                    'rng'                   : None
                    }), # end dataset

            #============================[ Layers ]===========================#
            'num_layers' : 1,

            'hidden1' : DD({
                    'name'                  : 'hidden1',
                    'type'                  : 'Tanh',
                    'dim'                   : 120,

                    # 'dropout_below'         : None,
                    'dropout_below'         : (0.1, 0.2, 0.3, 0.4, 0.5),
                    # 'dropout_below'         : 0.5,

                    }), # end hidden_layer

            'hidden2' : DD({
                    'name'                  : 'hidden2',
                    'type'                  : 'RELU',
                    'dim'                   : 100,
                    'dropout_below'         : None,
                    }), # end hidden_layer

            'h2_mirror' : DD({
                    'name'                  : 'h2_mirror',
                    'type'                  : 'RELU',
                    # 'dim'                   : 2049, # dim = input.dim
                    'dropout_below'         : None,
                    }), # end output_layer

            'h1_mirror' : DD({
                    'name'                  : 'h1_mirror',
                    'type'                  : 'Tanh',
                    # 'dim'                   : 2049, # dim = input.dim
                    'dropout_below'         : None,
                    }) # end output_layer

            }), # end autoencoder

To sample one set of hyperparams and run it locally, issue

cd Pynet/hps
python launch.py --model Laura -c 1

To submit 5 jobs to the gpu cluster, issue

cd Pynet/hps
python launch.py --model Laura -n 5 -g
showq -u hycis

After finished running, you can checkout the results from the database

cdwu
sqlite3 Pynet/database/Laura.db
>>> .header on
>>> .mode column
>>> .table
>>> select * from some_table order by test_error;

I have named the the experiment group in as way that is easier for understanding, for example for an experiment group name of AE0912_Blocks_2049_500_tanh_tanh_gpu_clean means AE0912 trained on Linear Blocks of autoencoder with 2049-500-2049 dims, and tanh-tanh units, it's run on gpu and it's a clean model without noise during training. The best model for the experiment group is AE0912_Blocks_2049_500_tanh_tanh_gpu_clean_20140914_1242_27372903 where the last few numbers are the actual date_time_microsec in which the model is generated.

I have saved the best results for each pretrain layer in the http://1drv.ms/1qSyrZI under the combinations section.

4. Reproduce Best Results

To reproduce the results you can plug the hyperparams saved in the database into AE_example.py and run the job locally, or you can modify the model_config.py and set the hyperparams in the config file and run python launch.py --model Laura -c 1

Stacking up Models

To reproduce the stackup of trained model is very simple. Just put the name of the best model under 'hidden1' and 'hidden2' in the model_config.py and set the hyperparams, and issue python launch.py --model Laura_Two_Layers -c 1 to run the job locally.

'Laura_Two_Layers' : DD({
        'model' : DD({
                'rand_seed'             : None
                }), # end mlp

        'log' : DD({
                # 'experiment_name'       : 'AE0917_Blocks_2layers_finetune_2049_180_tanh_tanh_gpu_clean',
                # 'experiment_name'       : 'AE0918_Blocks_2layers_finetune_2049_180_tanh_tanh_gpu_noisy',

                # 'experiment_name'       : 'AE0918_Blocks_2layers_finetune_2049_180_tanh_sigmoid_gpu_clean',
                # 'experiment_name'       : 'AE0917_Blocks_2layers_finetune_2049_180_tanh_sigmoid_gpu_noisy',

                # 'experiment_name'       : 'AE0917_Warp_Blocks_2layers_finetune_2049_180_tanh_tanh_gpu_clean',
                'experiment_name'       : 'AE0918_Warp_Blocks_2layers_finetune_2049_180_tanh_tanh_gpu_noisy',



                'description'           : '',
                'save_outputs'          : True,
                'save_learning_rule'      : True,
                'save_model'            : True,
                'save_to_database_name' : 'Laura.db'
                }), # end log


        'learning_rule' : DD({
                'max_col_norm'          : (1, 10, 50),
                'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
                # 'learning_rate'         : ((1e-5, 9e-1), float),
                # 'learning_rate'         : 0.01,
                'momentum'              : (1e-3, 1e-2, 1e-1, 0.5, 0.9),
                # 'momentum'              : 0.05,
                'momentum_type'         : 'normal',
                'L1_lambda'             : None,
                'L2_lambda'             : None,
                'cost'                  : 'mse',
                'stopping_criteria'     : DD({
                                            'max_epoch'         : 100,
                                            'epoch_look_back'   : 10,
                                            'cost'              : 'mse',
                                            'percent_decrease'  : 0.05
                                            }) # end stopping_criteria
                }), # end learning_rule

        #===========================[ Dataset ]===========================#
        'dataset' : DD({
                # 'type'                  : 'Laura_Warp_Blocks_500',
                # 'type'                  : 'Laura_Blocks_500',
                # 'type'                  : 'Laura_Blocks',
                'type'                  : 'Laura_Warp_Blocks',
                # 'type'                  : 'Mnist_Blocks',
                'feature_size'          : 2049,
                'train_valid_test_ratio': [8, 1, 1],

                # 'preprocessor'          : None,
                # 'preprocessor'          : 'Scale',
                'preprocessor'          : 'GCN',
                # 'preprocessor'          : 'LogGCN',
                # 'preprocessor'          : 'Standardize',

                'batch_size'            : (50, 100, 150, 200),
                'num_batches'           : None,
                'iter_class'            : 'SequentialSubsetIterator',
                'rng'                   : None
                }), # end dataset

        # #============================[ Layers ]===========================#

        'hidden1' : DD({
                'name'                  : 'hidden1',

                # 'model'                 : 'AE0912_Blocks_2049_500_tanh_tanh_gpu_clean_20140914_1242_27372903',
                # 'model'                 : 'AE0915_Blocks_2049_500_tanh_tanh_gpu_Dropout_20140915_1900_37160748',

                # 'model'                 : 'AE0912_Blocks_2049_500_tanh_sigmoid_gpu_clean_20140913_1342_18300926',

                # 'model'                 : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067',
                'model'                 : 'AE0916_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20140916_1705_29139505',


                'dropout_below'         : None,
                # 'dropout_below'         : 0.1,
                }), # end hidden_layer

        'hidden2' : DD({
                'name'                  : 'hidden2',
                # 'model'                 : 'AE0916_Blocks_500_180_tanh_tanh_gpu_clean_20140916_2255_06553688',
                # 'model'                 : 'AE0914_Blocks_500_180_tanh_tanh_gpu_dropout_20140916_1059_59760060',
                # 'model'                 : 'AE0918_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_0920_42738052',

                # 'model'                 : 'AE0916_Blocks_500_180_tanh_tanh_gpu_output_sig_clean_20140917_0301_44075773',

                # 'model'                 : 'AE0914_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20140915_0400_30113212',
                # 'model'                 : 'AE0916_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20140916_1326_09742695',
                'model'                 : 'AE0918_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_1125_23612485',

                'dropout_below'         : None,
                }), # end hidden_layer


        }), # end autoencoder

About

pynet is meant to be a flexible and modular deep learning framework base on theano.

Resources

License

Stars

Watchers

Forks

Packages

No packages published