forked from yusugomori/DeepLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Yusuke Sugomori
committed
Mar 16, 2013
1 parent
c291e7e
commit 8c99aa9
Showing
7 changed files
with
870 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
import java.util.Random; | ||
|
||
public class DBN { | ||
public int N; | ||
public int n_ins; | ||
public int[] hidden_layer_sizes; | ||
public int n_outs; | ||
public int n_layers; | ||
HiddenLayer[] sigmoid_layers; | ||
RBM[] rbm_layers; | ||
LogisticRegression log_layer; | ||
Random rng; | ||
|
||
public static double sigmoid(double x) { | ||
return 1.0 / (1.0 + Math.pow(Math.E, -x)); | ||
} | ||
|
||
|
||
public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { | ||
int input_size; | ||
|
||
this.N = N; | ||
this.n_ins = n_ins; | ||
this.hidden_layer_sizes = hidden_layer_sizes; | ||
this.n_outs = n_outs; | ||
this.n_layers = n_layers; | ||
|
||
this.sigmoid_layers = new HiddenLayer[n_layers]; | ||
this.rbm_layers = new RBM[n_layers]; | ||
|
||
if(rng == null) this.rng = new Random(1234); | ||
else this.rng = rng; | ||
|
||
// construct multi-layer | ||
for(int i=0; i<this.n_layers; i++) { | ||
if(i == 0) { | ||
input_size = this.n_ins; | ||
} else { | ||
input_size = this.hidden_layer_sizes[i-1]; | ||
} | ||
|
||
// construct sigmoid_layer | ||
this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); | ||
|
||
// construct rbm_layer | ||
this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); | ||
} | ||
|
||
// layer for output using LogisticRegression | ||
this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); | ||
} | ||
|
||
public void pretrain(int[][] train_X, double lr, int k, int epochs) { | ||
int[] layer_input = new int[0]; | ||
int prev_layer_input_size; | ||
int[] prev_layer_input; | ||
|
||
for(int i=0; i<n_layers; i++) { // layer-wise | ||
for(int epoch=0; epoch<epochs; epoch++) { // training epochs | ||
for(int n=0; n<N; n++) { // input x1...xN | ||
// layer input | ||
for(int l=0; l<=i; l++) { | ||
|
||
if(l == 0) { | ||
layer_input = new int[n_ins]; | ||
for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j]; | ||
} else { | ||
if(l == 1) prev_layer_input_size = n_ins; | ||
else prev_layer_input_size = hidden_layer_sizes[l-2]; | ||
|
||
prev_layer_input = new int[prev_layer_input_size]; | ||
for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; | ||
|
||
layer_input = new int[hidden_layer_sizes[l-1]]; | ||
|
||
sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input); | ||
} | ||
} | ||
|
||
rbm_layers[i].contrastive_divergence(layer_input, lr, k); | ||
} | ||
} | ||
} | ||
} | ||
|
||
public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) { | ||
int[] layer_input = new int[0]; | ||
// int prev_layer_input_size; | ||
int[] prev_layer_input = new int[0]; | ||
|
||
for(int epoch=0; epoch<epochs; epoch++) { | ||
for(int n=0; n<N; n++) { | ||
|
||
// layer input | ||
for(int i=0; i<n_layers; i++) { | ||
if(i == 0) { | ||
prev_layer_input = new int[n_ins]; | ||
for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j]; | ||
} else { | ||
prev_layer_input = new int[hidden_layer_sizes[i-1]]; | ||
for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; | ||
} | ||
|
||
layer_input = new int[hidden_layer_sizes[i]]; | ||
sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input); | ||
} | ||
|
||
log_layer.train(layer_input, train_Y[n], lr); | ||
} | ||
// lr *= 0.95; | ||
} | ||
} | ||
|
||
public void predict(int[] x, double[] y) { | ||
double[] layer_input = new double[0]; | ||
// int prev_layer_input_size; | ||
double[] prev_layer_input = new double[n_ins]; | ||
for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; | ||
|
||
double linear_output; | ||
|
||
|
||
// layer activation | ||
for(int i=0; i<n_layers; i++) { | ||
layer_input = new double[sigmoid_layers[i].n_out]; | ||
|
||
linear_output = 0.0; | ||
for(int k=0; k<sigmoid_layers[i].n_out; k++) { | ||
for(int j=0; j<sigmoid_layers[i].n_in; j++) { | ||
linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; | ||
} | ||
linear_output += sigmoid_layers[i].b[k]; | ||
layer_input[k] = sigmoid(linear_output); | ||
} | ||
|
||
if(i < n_layers-1) { | ||
prev_layer_input = new double[sigmoid_layers[i].n_out]; | ||
for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; | ||
} | ||
} | ||
|
||
for(int i=0; i<log_layer.n_out; i++) { | ||
y[i] = 0; | ||
for(int j=0; j<log_layer.n_in; j++) { | ||
y[i] += log_layer.W[i][j] * layer_input[j]; | ||
} | ||
y[i] += log_layer.b[i]; | ||
} | ||
|
||
log_layer.softmax(y); | ||
} | ||
|
||
public static void main(String[] arg) { | ||
Random rng = new Random(123); | ||
|
||
double pretrain_lr = 0.1; | ||
int pretraining_epochs = 1000; | ||
int k = 1; | ||
double finetune_lr = 0.1; | ||
int finetune_epochs = 500; | ||
|
||
int train_N = 6; | ||
int test_N = 4; | ||
int n_ins = 6; | ||
int n_outs = 2; | ||
int[] hidden_layer_sizes = {3, 3}; | ||
int n_layers = hidden_layer_sizes.length; | ||
|
||
// training data | ||
int[][] train_X = { | ||
{1, 1, 1, 0, 0, 0}, | ||
{1, 0, 1, 0, 0, 0}, | ||
{1, 1, 1, 0, 0, 0}, | ||
{0, 0, 1, 1, 1, 0}, | ||
{0, 0, 1, 1, 0, 0}, | ||
{0, 0, 1, 1, 1, 0} | ||
}; | ||
|
||
int[][] train_Y = { | ||
{1, 0}, | ||
{1, 0}, | ||
{1, 0}, | ||
{0, 1}, | ||
{0, 1}, | ||
{0, 1}, | ||
}; | ||
|
||
|
||
// construct DBN | ||
DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng); | ||
|
||
// pretrain | ||
dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs); | ||
|
||
// finetune | ||
dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs); | ||
|
||
|
||
// test data | ||
int[][] test_X = { | ||
{1, 1, 0, 0, 0, 0}, | ||
{1, 1, 1, 1, 0, 0}, | ||
{0, 0, 0, 1, 1, 0}, | ||
{0, 0, 1, 1, 1, 0}, | ||
}; | ||
|
||
double[][] test_Y = new double[test_N][n_outs]; | ||
|
||
// test | ||
for(int i=0; i<test_N; i++) { | ||
dbn.predict(test_X[i], test_Y[i]); | ||
for(int j=0; j<n_outs; j++) { | ||
System.out.print(test_Y[i][j] + " "); | ||
} | ||
System.out.println(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import java.util.Random; | ||
|
||
public class HiddenLayer { | ||
public int N; | ||
public int n_in; | ||
public int n_out; | ||
public double[][] W; | ||
public double[] b; | ||
Random rng; | ||
|
||
public double uniform(double min, double max) { | ||
return rng.nextDouble() * (max - min) + min; | ||
} | ||
|
||
public int binomial(int n, double p) { | ||
if(p < 0 || p > 1) return 0; | ||
|
||
int c = 0; | ||
double r; | ||
|
||
for(int i=0; i<n; i++) { | ||
r = rng.nextDouble(); | ||
if (r < p) c++; | ||
} | ||
|
||
return c; | ||
} | ||
|
||
public static double sigmoid(double x) { | ||
return 1.0 / (1.0 + Math.pow(Math.E, -x)); | ||
} | ||
|
||
|
||
|
||
public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { | ||
this.N = N; | ||
this.n_in = n_in; | ||
this.n_out = n_out; | ||
|
||
if(rng == null) this.rng = new Random(1234); | ||
else this.rng = rng; | ||
|
||
if(W == null) { | ||
this.W = new double[n_out][n_in]; | ||
double a = 1.0 / this.n_in; | ||
|
||
for(int i=0; i<n_out; i++) { | ||
for(int j=0; j<n_in; j++) { | ||
this.W[i][j] = uniform(-a, a); | ||
} | ||
} | ||
} else { | ||
this.W = W; | ||
} | ||
|
||
if(b == null) this.b = new double[n_out]; | ||
else this.b = b; | ||
} | ||
|
||
public double output(int[] input, double[] w, double b) { | ||
double linear_output = 0.0; | ||
for(int j=0; j<n_in; j++) { | ||
linear_output += w[j] * input[j]; | ||
} | ||
linear_output += b; | ||
return sigmoid(linear_output); | ||
} | ||
|
||
public void sample_h_given_v(int[] input, int[] sample) { | ||
for(int i=0; i<n_out; i++) { | ||
sample[i] = binomial(1, output(input, W[i], b[i])); | ||
} | ||
} | ||
} |
Oops, something went wrong.