java

ajohnston9 · Mar 16, 2013 · 8c99aa9 · 8c99aa9
1 parent c291e7e
commit 8c99aa9
Show file tree

Hide file tree

Showing 7 changed files with 870 additions and 28 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,33 +3,8 @@
 *.out
 *.o
 
-#
-# from https://github.com/github/gitignore/blob/master/Global/Eclipse.gitignore
-#
-
-*.pydevproject
+*.class
+*.classpath
+*.settings
 .project
 .metadata
-bin/**
-tmp/**
-tmp/**/*
-*.tmp
-*.bak
-*.swp
-*~.nib
-local.properties
-.classpath
-.settings/
-.loadpath
-
-# External tool builders
-.externalToolBuilders/
-
-# Locally stored "Eclipse launch configurations"
-*.launch
-
-# CDT-specific
-.cproject
-
-# PDT-specific
-.buildpath
diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java
@@ -0,0 +1,218 @@
+import java.util.Random;
+
+public class DBN {
+	public int N;
+	public int n_ins;
+	public int[] hidden_layer_sizes;
+	public int n_outs;
+	public int n_layers;
+	HiddenLayer[] sigmoid_layers;
+	RBM[] rbm_layers;
+	LogisticRegression log_layer;
+	Random rng;
+
+	public static double sigmoid(double x) {
+		return 1.0 / (1.0 + Math.pow(Math.E, -x));
+	}
+
+
+	public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+		int input_size;
+
+		this.N = N;
+		this.n_ins = n_ins;
+		this.hidden_layer_sizes = hidden_layer_sizes;
+		this.n_outs = n_outs;
+		this.n_layers = n_layers;
+
+		this.sigmoid_layers = new HiddenLayer[n_layers];
+		this.rbm_layers = new RBM[n_layers];
+
+		if(rng == null)	this.rng = new Random(1234);
+		else this.rng = rng;		
+
+		// construct multi-layer
+		for(int i=0; i<this.n_layers; i++) {
+			if(i == 0) {
+				input_size = this.n_ins;
+			} else {
+				input_size = this.hidden_layer_sizes[i-1];
+			}
+
+			// construct sigmoid_layer
+			this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+
+			// construct rbm_layer
+			this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+		}
+
+		// layer for output using LogisticRegression
+		this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+	}
+
+	public void pretrain(int[][] train_X, double lr, int k, int epochs) {
+		int[] layer_input = new int[0];
+		int prev_layer_input_size;
+		int[] prev_layer_input;
+
+		for(int i=0; i<n_layers; i++) {  // layer-wise			
+			for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+				for(int n=0; n<N; n++) {  // input x1...xN
+					// layer input
+					for(int l=0; l<=i; l++) {
+
+						if(l == 0) {
+							layer_input = new int[n_ins];
+							for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+						} else {
+							if(l == 1) prev_layer_input_size = n_ins;
+							else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+							prev_layer_input = new int[prev_layer_input_size];
+							for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+
+							layer_input = new int[hidden_layer_sizes[l-1]];
+
+							sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+						}
+					}
+
+					rbm_layers[i].contrastive_divergence(layer_input, lr, k);
+				}
+			}
+		}
+	}
+
+	public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+		int[] layer_input = new int[0];
+		// int prev_layer_input_size;
+		int[] prev_layer_input = new int[0];
+
+		for(int epoch=0; epoch<epochs; epoch++) {
+			for(int n=0; n<N; n++) {
+
+				// layer input
+				for(int i=0; i<n_layers; i++) {
+					if(i == 0) {
+						prev_layer_input = new int[n_ins];
+						for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+					} else {
+						prev_layer_input = new int[hidden_layer_sizes[i-1]];
+						for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+					}
+
+					layer_input = new int[hidden_layer_sizes[i]];
+					sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+				}
+
+				log_layer.train(layer_input, train_Y[n], lr);
+			}
+			// lr *= 0.95;
+		}
+	}
+
+	public void predict(int[] x, double[] y) {
+		double[] layer_input = new double[0];
+		// int prev_layer_input_size;
+		double[] prev_layer_input = new double[n_ins];
+		for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+		double linear_output;
+
+
+		// layer activation
+		for(int i=0; i<n_layers; i++) {
+			layer_input = new double[sigmoid_layers[i].n_out];
+
+			linear_output = 0.0;
+			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+				}
+				linear_output += sigmoid_layers[i].b[k];
+				layer_input[k] = sigmoid(linear_output);
+			}
+
+			if(i < n_layers-1) {
+				prev_layer_input = new double[sigmoid_layers[i].n_out];
+				for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+			}
+		}
+
+		for(int i=0; i<log_layer.n_out; i++) {
+			y[i] = 0;
+			for(int j=0; j<log_layer.n_in; j++) {
+				y[i] += log_layer.W[i][j] * layer_input[j];
+			}
+			y[i] += log_layer.b[i];
+		}
+
+		log_layer.softmax(y);
+	}
+
+	public static void main(String[] arg) {
+		Random rng = new Random(123);
+
+		double pretrain_lr = 0.1;
+		int pretraining_epochs = 1000;
+		int k = 1;
+		double finetune_lr = 0.1;
+		int finetune_epochs = 500;
+
+		int train_N = 6;
+		int test_N = 4;
+		int n_ins = 6;
+		int n_outs = 2;
+		int[] hidden_layer_sizes = {3, 3};
+		int n_layers = hidden_layer_sizes.length;
+
+		// training data
+		int[][] train_X = {
+			{1, 1, 1, 0, 0, 0},
+			{1, 0, 1, 0, 0, 0},
+			{1, 1, 1, 0, 0, 0},
+			{0, 0, 1, 1, 1, 0},
+			{0, 0, 1, 1, 0, 0},
+			{0, 0, 1, 1, 1, 0}
+		};
+
+		int[][] train_Y = {
+			{1, 0},
+			{1, 0},
+			{1, 0},
+			{0, 1},
+			{0, 1},
+			{0, 1},
+		};
+
+
+		// construct DBN
+		DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+
+		// pretrain
+		dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
+
+		// finetune
+		dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+
+
+		// test data
+		int[][] test_X = {
+			{1, 1, 0, 0, 0, 0},
+			{1, 1, 1, 1, 0, 0},
+			{0, 0, 0, 1, 1, 0},
+			{0, 0, 1, 1, 1, 0},
+		};
+
+		double[][] test_Y = new double[test_N][n_outs];
+
+		// test
+		for(int i=0; i<test_N; i++) {
+			dbn.predict(test_X[i], test_Y[i]);
+			for(int j=0; j<n_outs; j++) {
+				System.out.print(test_Y[i][j] + " ");
+			}
+			System.out.println();
+		}
+	}
+}
diff --git a/java/DBN/src/HiddenLayer.java b/java/DBN/src/HiddenLayer.java
@@ -0,0 +1,74 @@
+import java.util.Random;
+
+public class HiddenLayer {
+	public int N;
+	public int n_in;
+	public int n_out;
+	public double[][] W;
+	public double[] b;
+	Random rng;
+
+	public double uniform(double min, double max) {
+		return rng.nextDouble() * (max - min) + min;
+	}
+
+	public int binomial(int n, double p) {
+		if(p < 0 || p > 1) return 0;
+
+		int c = 0;
+		double r;
+
+		for(int i=0; i<n; i++) {
+			r = rng.nextDouble();
+			if (r < p) c++;
+		}
+
+		return c;
+	}
+
+	public static double sigmoid(double x) {
+		return 1.0 / (1.0 + Math.pow(Math.E, -x));
+	}
+
+
+
+	public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+		this.N = N;
+		this.n_in = n_in;
+		this.n_out = n_out;
+
+		if(rng == null)	this.rng = new Random(1234);
+		else this.rng = rng;
+
+		if(W == null) {
+			this.W = new double[n_out][n_in];
+			double a = 1.0 / this.n_in;
+
+			for(int i=0; i<n_out; i++) {
+				for(int j=0; j<n_in; j++) {
+					this.W[i][j] = uniform(-a, a);
+				}
+			}
+		} else {
+			this.W = W;
+		}
+
+		if(b == null) this.b = new double[n_out];
+		else this.b = b;
+	}
+
+	public double output(int[] input, double[] w, double b) {
+		double linear_output = 0.0;
+		for(int j=0; j<n_in; j++) {
+			linear_output += w[j] * input[j];
+		}
+		linear_output += b;
+		return sigmoid(linear_output);
+	}
+
+	public void sample_h_given_v(int[] input, int[] sample) {
+		for(int i=0; i<n_out; i++) {
+			sample[i] = binomial(1, output(input, W[i], b[i]));
+		}
+	}
+}