hollance
diff --git a/‎.gitignore
Lines changed: 95 additions & 0 deletions b/‎.gitignore
Lines changed: 95 additions & 0 deletions
diff --git a/‎README.markdown
Lines changed: 40 additions & 0 deletions b/‎README.markdown
Lines changed: 40 additions & 0 deletions
diff --git a/‎Scripts/clean.sh
Lines changed: 4 additions & 0 deletions b/‎Scripts/clean.sh
Lines changed: 4 additions & 0 deletions
diff --git a/‎Scripts/export_weights.py
Lines changed: 38 additions & 0 deletions b/‎Scripts/export_weights.py
Lines changed: 38 additions & 0 deletions
diff --git a/‎Scripts/split_data.py
Lines changed: 40 additions & 0 deletions b/‎Scripts/split_data.py
Lines changed: 40 additions & 0 deletions
diff --git a/‎Scripts/test.py
Lines changed: 50 additions & 0 deletions b/‎Scripts/test.py
Lines changed: 50 additions & 0 deletions
diff --git a/‎Scripts/train.py
Lines changed: 125 additions & 0 deletions b/‎Scripts/train.py
Lines changed: 125 additions & 0 deletions
@@ -0,0 +1,95 @@
+# Temporary files
+.DS_Store
+.Trashes
+.Spotlight-V100
+*.swp
+*.lock
+
+# Xcode
+build/
+DerivedData/
+
+*.pbxuser
+*.mode1v3
+*.mode2v3
+*.perspectivev3
+*.xccheckout
+*.moved-aside
+*.xcuserstate
+
+xcuserdata
+
+!default.pbxuser
+!default.mode1v3
+!default.mode2v3
+!default.perspectivev3
+
+profile
+*.hmap
+*.ipa
+
+# CocoaPods
+Pods/
+!Podfile.lock
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+.ipynb_checkpoints/
@@ -0,0 +1,40 @@
+# TensorFlow on iOS demo
+
+This is the code that accompanies my blog post [Getting started with TensorFlow on iOS](http://machinethink.net/blog/tensorflow-on-ios/).
+
+It uses TensorFlow to train a basic binary classifier on the [Gender Recognition by Voice and Speech Analysis](https://www.kaggle.com/primaryobjects/voicegender) dataset.
+
+This project includes the following:
+
+- The dataset in the file **voice.csv**.
+- Python scripts to train the model with TensorFlow on your Mac.
+- An iOS app that uses the TensorFlow C++ API to do inference.
+- An iOS app that uses Metal to do inference using the trained model.
+
+## Training the model
+
+To train the model, do the following:
+
+1. Make sure these are installed: `python3`, `numpy`, `pandas`, `scikit-learn`, `tensorflow`.
+2. Run the **split_data.py** script to divide the dataset into a training set and a test set. This creates 4 new files: `X_train.npy`, `y_train.npy`, `X_test.npy`, and `y_test.npy`.
+3. Run the **train.py** script. This trains the logistic classifier and saves the model to `/tmp/voice` every 10,000 training steps. Training happens in an infinite loop and goes on forever, so press Ctrl+C when you're happy with the training set accuracy and the loss no longer becomes any lower.
+4. Run the **test.py** script to compute the accuracy on the test set. This also prints out a report with precision / recall / f1-score and a confusion matrix.
+
+## Using the model with the iOS TensorFlow app
+
+To run the model on the iOS TensorFlow app, do the following:
+
+1. Clone [TensorFlow](https://github.com/tensorflow) and [build the iOS library](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/makefile).
+2. Open the **VoiceTensorFlow** Xcode project. In **Build Settings**, **Other Linker Flags** and **Header Search Paths**, change the paths to your local installation of TensorFlow.
+
+The model is already included in the app as **inference.pb**. If you train the model with different settings, you need to run the `freeze_graph` and `optimize_for_inference` tools to create a new inference.pb.
+
+## Using the model with the iOS Metal app
+
+To run the model on the iOS Metal app, do the following:
+
+1. Run the **export_weights.py** script. This creates two new files that contain the model's learned parameters: `W.bin` for the weights and `b.bin` for the bias.
+2. Copy `W.bin` and `b.bin` into the **VoiceMetal** Xcode project and build the app.
+
+You need to run the Metal app on a device, it won't work in the simulator.
+
@@ -0,0 +1,4 @@
+#!/bin/sh
+# Run this script to start afresh.
+rm *.npy
+rm *.bin
@@ -0,0 +1,38 @@
+# This script exports the learned parameters so that we can use them from Metal.
+
+# Note: Dor this simple demo project the weight matrix is only 20 values and the bias
+# is a single number. With such a simple model you might as well stick the parameters
+# inside a static array in the iOS app source code. In practice, however, most models 
+# will have millions of parameters.
+
+import os
+import numpy as np
+import tensorflow as tf
+from sklearn import metrics
+
+checkpoint_dir = "/tmp/voice/"
+
+with tf.Session() as sess:
+    # Load the graph.
+    graph_file = os.path.join(checkpoint_dir, "graph.pb")
+    with tf.gfile.FastGFile(graph_file, "rb") as f:
+        graph_def = tf.GraphDef()
+        graph_def.ParseFromString(f.read())
+        tf.import_graph_def(graph_def, name="")
+
+    # Get the model's variables.
+    W = sess.graph.get_tensor_by_name("model/W:0")
+    b = sess.graph.get_tensor_by_name("model/b:0")
+
+    # Load the saved variables from the checkpoint back into the session.
+    checkpoint_file = os.path.join(checkpoint_dir, "model")
+    saver = tf.train.Saver([W, b])
+    saver.restore(sess, checkpoint_file)
+
+    # Just for debugging, print out the learned parameters.
+    print("W:", W.eval())
+    print("b:", b.eval())
+    
+    # Export the contents of W and b as binary files.
+    W.eval().tofile("W.bin")
+    b.eval().tofile("b.bin")
@@ -0,0 +1,40 @@
+# This script loads the original dataset and splits it into a training set and test set. 
+
+import numpy as np
+import pandas as pd
+
+# Read the CSV file.
+df = pd.read_csv("voice.csv", header=0)
+
+# Extract the labels into a numpy array. The original labels are text but we convert
+# this to numbers: 1 = male, 0 = female.
+labels = (df["label"] == "male").values * 1
+
+# labels is a row vector but TensorFlow expects a column vector, so reshape it.
+labels = labels.reshape(-1, 1)
+
+# Remove the column with the labels.
+del df["label"]
+
+# OPTIONAL: Do additional preprocessing, such as scaling the features.
+# for column in df.columns:
+#     mean = df[column].mean()
+#     std = df[column].std()
+#     df[column] = (df[column] - mean) / std
+
+# Convert the training data to a numpy array.
+data = df.values
+print("Full dataset size:", data.shape)
+
+# Split into a random training set and a test set.
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=123456)
+
+print("Training set size:", X_train.shape)
+print("Test set size:", X_test.shape)
+
+# Save the matrices using numpy's native format.
+np.save("X_train.npy", X_train)
+np.save("X_test.npy", X_test)
+np.save("y_train.npy", y_train)
+np.save("y_test.npy", y_test)
@@ -0,0 +1,50 @@
+# This script tests how well the trained model performs on the portion of the 
+# data that was not used for training.
+
+import os
+import numpy as np
+import tensorflow as tf
+from sklearn import metrics
+
+checkpoint_dir = "/tmp/voice/"
+
+# Load the test data.
+X_test = np.load("X_test.npy")
+y_test = np.load("y_test.npy")
+
+print("Test set size:", X_test.shape)
+
+with tf.Session() as sess:
+    # Load the graph.
+    graph_file = os.path.join(checkpoint_dir, "graph.pb")
+    with tf.gfile.FastGFile(graph_file, "rb") as f:
+        graph_def = tf.GraphDef()
+        graph_def.ParseFromString(f.read())
+        tf.import_graph_def(graph_def, name="")
+
+    # Uncomment the next line in case you're curious what the graph looks like.
+    #print(graph_def.ListFields())
+
+    # Get the model's variables.
+    W = sess.graph.get_tensor_by_name("model/W:0")
+    b = sess.graph.get_tensor_by_name("model/b:0")
+
+    # Load the saved variables from the checkpoint back into the session.
+    checkpoint_file = os.path.join(checkpoint_dir, "model")
+    saver = tf.train.Saver([W, b])
+    saver.restore(sess, checkpoint_file)
+
+    # Get the placeholders and the accuracy operation, so that we can compute
+    # the accuracy (% correct) of the test set.
+    x = sess.graph.get_tensor_by_name("inputs/x-input:0")
+    y = sess.graph.get_tensor_by_name("inputs/y-input:0")
+    accuracy = sess.graph.get_tensor_by_name("score/accuracy:0")
+    print("Test set accuracy:", sess.run(accuracy, feed_dict={x: X_test, y: y_test}))
+
+    # Also show some other reports.
+    inference = sess.graph.get_tensor_by_name("inference/inference:0")
+    predictions = sess.run(inference, feed_dict={x: X_test})
+    print("\nClassification report:")
+    print(metrics.classification_report(y_test.ravel(), predictions))
+    print("Confusion matrix:")
+    print(metrics.confusion_matrix(y_test.ravel(), predictions))
@@ -0,0 +1,125 @@
+# This script is used to train the model. It repeats indefinitely and saves the
+# model every so often to a checkpoint. 
+#
+# Press Ctrl+C when you feel that training has gone on long enough (since this is 
+# only a simple model it takes less than a minute to train, but a training a deep l
+# earning model could take days).
+
+import os
+import numpy as np
+import tensorflow as tf
+
+checkpoint_dir = "/tmp/voice/"
+print_every = 1000
+save_every = 10000
+num_inputs = 20
+num_classes = 1
+
+# Load the training data.
+X_train = np.load("X_train.npy")
+y_train = np.load("y_train.npy")
+
+print("Training set size:", X_train.shape)
+
+# Below we'll define the computational graph using TensorFlow. The different parts 
+# of the model are grouped into different "scopes", making it easier to understand
+# what each part is doing.
+
+# Hyperparameters let you configure the model and how it is trained. They're
+# called "hyper" parameters because unlike the regular parameters they are not
+# learned by the model -- you have to set them to appropriate values yourself.
+#
+# The learning_rate tells the optimizer how big of a steps it should take.
+# Regularization is used to prevent overfitting on the training set.
+with tf.name_scope("hyperparameters"):
+    regularization = tf.placeholder(tf.float32, name="regularization")
+    learning_rate = tf.placeholder(tf.float32, name="learning-rate")
+
+# This is where we feed the training data (and later the test data) into the model. 
+# In this dataset there are 20 features, so x is a matrix with 20 columns. Its number 
+# of rows is None because it depends on how many examples at a time we put into this 
+# matrix. This is a binary classifier so for every training example, y gives a single 
+# output: 1 = male, 0 = female.
+with tf.name_scope("inputs"):
+    x = tf.placeholder(tf.float32, [None, num_inputs], name="x-input")
+    y = tf.placeholder(tf.float32, [None, num_classes], name="y-input")
+    
+# The parameters that we'll learn consist of W, a weight matrix, and b, a vector
+# of bias values. (Actually, b is just a single value since the classifier has only
+# one output. For a classifier that can recognize multiple classes, b would have as
+# many elements as there are classes.)
+with tf.name_scope("model"):
+    W = tf.Variable(tf.zeros([num_inputs, num_classes]), name="W")
+    b = tf.Variable(tf.zeros([num_classes]), name="b")
+
+    # The output is the probability the speaker is male. If this is greater than
+    # 0.5, we consider the speaker to be male, otherwise female.
+    y_pred = tf.sigmoid(tf.matmul(x, W) + b, name="y_pred")
+
+# This is a logistic classifier, so the loss function is the logistic loss.
+with tf.name_scope("loss-function"):
+    loss = tf.losses.log_loss(labels=y, predictions=y_pred)
+    
+    # Add L2 regularization to the loss.
+    loss += regularization * tf.nn.l2_loss(W)
+
+# Use the ADAM optimizer to minimize the loss.
+with tf.name_scope("train"):
+    optimizer = tf.train.AdamOptimizer(learning_rate)
+    train_op = optimizer.minimize(loss)
+
+# For doing inference on new data for which we don't have labels.
+with tf.name_scope("inference"):
+    inference = tf.to_float(y_pred > 0.5, name="inference")
+
+# The accuracy operation computes the % correct on a dataset with known labels. 
+with tf.name_scope("score"):
+    correct_prediction = tf.equal(inference, y)
+    accuracy = tf.reduce_mean(tf.to_float(correct_prediction), name="accuracy")
+
+init = tf.global_variables_initializer()
+
+# For writing training checkpoints and reading them back in.
+saver = tf.train.Saver()
+tf.gfile.MakeDirs(checkpoint_dir)
+
+with tf.Session() as sess:
+    # Write the graph definition to a file. We'll load this in the test.py script.
+    tf.train.write_graph(sess.graph_def, checkpoint_dir, "graph.pb", False)
+
+    # Reset W and b to zero.
+    sess.run(init)
+
+    # Sanity check: the initial loss should be 0.693146, which is -ln(0.5).
+    loss_value = sess.run(loss, feed_dict={x: X_train, y: y_train, regularization: 0})
+    print("Initial loss:", loss_value)
+
+    # Loop forever:
+    step = 0
+    while True:
+        # We randomly shuffle the examples every time we train.
+        perm = np.arange(len(X_train))
+        np.random.shuffle(perm)
+        X_train = X_train[perm]
+        y_train = y_train[perm]
+
+        # Run the optimizer over the entire training set at once. For larger datasets
+        # you would train in batches of 100-1000 examples instead of the entire thing.
+        feed = {x: X_train, y: y_train, learning_rate: 1e-2, regularization: 1e-5}
+        sess.run(train_op, feed_dict=feed)
+
+        # Print the loss once every so many steps. Because of the regularization, 
+        # at some point the loss won't become smaller anymore. At that point, it's
+        # safe to press Ctrl+C to stop the training.
+        if step % print_every == 0:
+            train_accuracy, loss_value = sess.run([accuracy, loss], feed_dict=feed)
+            print("step: %4d, loss: %.4f, training accuracy: %.4f" % \
+                    (step, loss_value, train_accuracy))
+
+        step += 1
+
+        # Save the model. You should only press Ctrl+C after you see this message.
+        if step % save_every == 0:
+            checkpoint_file = os.path.join(checkpoint_dir, "model")
+            saver.save(sess, checkpoint_file)            
+            print("*** SAVED MODEL ***")