GestureGeneration
diff --git a/‎README.md
Lines changed: 19 additions & 18 deletions b/‎README.md
Lines changed: 19 additions & 18 deletions
diff --git a/‎config.py
Lines changed: 15 additions & 17 deletions b/‎config.py
Lines changed: 15 additions & 17 deletions
diff --git a/‎config.yaml
Lines changed: 7 additions & 10 deletions b/‎config.yaml
Lines changed: 7 additions & 10 deletions
diff --git a/‎motion_repr_learning/ae/decode.py
Lines changed: 17 additions & 14 deletions b/‎motion_repr_learning/ae/decode.py
Lines changed: 17 additions & 14 deletions
diff --git a/‎motion_repr_learning/ae/train.py
Lines changed: 2 additions & 1 deletion b/‎motion_repr_learning/ae/train.py
Lines changed: 2 additions & 1 deletion
@@ -30,6 +30,10 @@ ________________________________________________________________________________
 
 # How to use this repository?
 
+# 0. Notation
+
+Whenever a parameter is written in caps (such as DATA_DIR), it has to be specified by the user on the command line as a positional argument.
+
 ## 1. Obtain raw data
 
 - Clone this repository
@@ -42,8 +46,9 @@ git checkout GENEA_2020
 ```
 - Download a dataset from KTH Box using the link you obtained after singing the license agreement
 
-
 ## 2. Pre-process the data
+By default, the model expects the dataset in the `<repository>/dataset/raw` folder, and the processed dataset will be available in the `<repository>/dataset/processed folder`. If your dataset is elsewhere, please provide the correct paths with the `--raw_data_dir` and `--proc_data_dir` command line arguments. You can also use '--help' argument to see more details about the scripts.
+
 ```
 cd data_processing
 
@@ -59,23 +64,20 @@ python process_dataset.py
 cd ..
 ```
 
-By default, the model expects the dataset in the `<repository>/dataset/raw` folder, and the processed dataset will be available in the `<repository>/dataset/processed folder`. If your dataset is elsewhere, please provide the correct paths with the `--raw_data_dir` and `--proc_data_dir` command line arguments for the 'split_dataset.py' and `process_dataset.py`. You can also use '--help' argument to see more details about the scripts.
-
-As a result of running this script
-- numpy binary files `X_train.npy`, `Y_train.npy` (training dataset files) are created under `--proc_data_dir`
-- under `/test_inputs/` subfolder of the processed dataset folder test audios, such as `X_test_audio1168.npy` , are created
+As a result of running this script, the dataset is created in `--proc_data_dir`:
+- the training dataset files `X_train.npy`, `Y_train.npy` and the validation dataset files `X_dev.npy`, `Y_dev.npy`are binary numpy files
+- the audio inputs for testing (such as `X_test_NaturalTalking_04.npy`) are under the `/test_inputs/` subfolder
 
+There rest of the folders in `--proc_data_dir` (e.g. `/dev_inputs/` or `/train/`) can be ignored (they are a side effect of the preprocessing script).
 
-## 3. Learn motion representation by AutoEncoder and Encode the datset
-
-Create a directory to save training checkpoints such as `chkpt/` and use it as CHKPT_DIR parameter.
-#### Learn dataset encoding and encode the training and validation datasets
-```sh
-python motion_repr_learning/ae/learn_ae_n_encode_dataset.py --data_dir <path/to/your/dataset> --layer1_width 40
+## 3. Learn motion representation by AutoEncoder and encode the training and validation datasets
+```python
+python motion_repr_learning/ae/learn_ae_n_encode_dataset.py --layer1_width DIM
 ```
+There are several parameters that can be modified in the `config.yaml` file or through the command line, see `config.py` for details.
+The optimal dimensionality (DIM) in our experiment was 40. 
 
-The optimal dimensionality (DIM) in our experiment was 40
-
+More information can be found in the folder `motion_repr_learning` 
 
 ## 4. Learn speech-driven gesture generation model
 
@@ -97,15 +99,14 @@ python predict.py MODEL_NAME.hdf5 INPUT_SPEECH_FILE OUTPUT_GESTURE_FILE
 
 ```sh
 # Usage example
-python predict.py model.hdf5 data/test_inputs/X_test_audio1168.npy data/test_inputs/predict_1168_20fps.txt
+python predict.py model.hdf5 data/test_inputs/X_test_NaturalTalking_04.npy data/test_inputs/predict_04_20fps.txt
 ```
 
+The predicted gestures have to be decoded with `decode.py`, which reuses the config from step 3.
 ```sh
-# You need to decode the gestures
-python motion_repr_learning/ae/decode.py DATA_DIR ENCODED_PREDICTION_FILE DECODED_GESTURE_FILE -restore=True -pretrain=False -layer1_width=DIM -chkpt_dir=CHKPT_DIR -batch_size=8 
+python motion_repr_learning/ae/decode.py python decode.py -input_file INPUT_FILE -output_file OUTPUT_FILE --layer1_width DIM --batch_size=8 
 ```
 
-
 ## 6. Quantitative evaluation
 Use scripts in the `evaluation` folder of this directory.
 
 
@@ -3,36 +3,34 @@
 
 # NOTE: the global variable 'args' for accessing the config parameters from other modules
 #       is defined at the very bottom of this file
-
-# Modify this function to set the default home directory for this repo
-def home_out(path):
-    return os.path.join(os.environ['HOME'], 'tmp', 'MoCap', path)
-
 def construct_config_parser():
     parser = ArgumentParser(args_for_setting_config_path = ['-config'],
                             default_config_files = ['./config.yaml'],
                             config_file_parser_class = YAMLConfigFileParser)
 
     parser.add('--seed', type=int, help='Random seed')
 
-    # ---- The data directories ----
+    # ---- Data directories ----
+    
+    parser.add('--data_dir',     help='The directory with the preprocessed dataset')
+    parser.add('--summary_dir',  help='Directory for saving the summary data')
+    parser.add('--chkpt_dir',    help='Directory for saving the model checkpoints')
+    parser.add('--results_file', help='File for saving the results of the experiments')
+
+    # ---- Input/output files for 'decode.py' only ----
 
-    parser.add('-data_dir', '--data_dir', required=True,
-               help='The directory with the preprocessed dataset')
-    parser.add('--summary_dir', default=home_out('summaries_exp'),
-               help='Directory for saving the summary data')
-    parser.add('--chkpt_dir', default=home_out('chkpts_exp'),
-               help='Directory for saving the model checkpoints')
-    parser.add('--results_file', default=home_out('results.txt'),
-               help='File for saving the results of the experiments')
+    parser.add('-input_file',  default=None, 
+               help="The encoded prediction file that will be decoded (only used in 'decode.py')")
+    parser.add('-output_file', default=None,
+               help="The output file where the decoded gesture will be stored (only used in 'decode.py')")
 
     # ---- Flags ----
 
-    parser.add('-pretrain', '--pretrain_network',           action='store_true', 
+    parser.add('-pretrain', '--pretrain_network',               action='store_true', 
                help='If set, pretrain the model in a layerwise manner')
-    parser.add('-load_model', '--load_model_from_checkpoint',     action='store_true',
+    parser.add('-load_model', '--load_model_from_checkpoint',   action='store_true',
                help='If set, load the model from a checkpoint')
-    parser.add('-no_early_stopping', '--no_early_stopping', action='store_false',
+    parser.add('-no_early_stopping', '--no_early_stopping',     action='store_false',
                help='If set, disable early stopping')
 
     # ---- Network architecture --- 
 
@@ -1,14 +1,12 @@
 # See config.py for details about these parameters.
 
-# NOTE: The defaults checkpoint, result and dataset directories are set in the code.
+data_dir:     "./dataset/processed/"
+summary_dir:    "./results/summaries/"
+chkpt_dir:  "./results/checkpoints/"
+results_file: "./results/results.txt"
 
 seed: 123456
 
-#-------------------------------------------------------
-# These boolean flags can be enabled by supplying them |
-# through the command-line or uncommenting them below  |
-#-------------------------------------------------------
-
 delta_for_early_stopping: 0.5
 
 # ---- Network architecture ----
@@ -31,9 +29,8 @@ lr: 0.0001
 pretraining_lr: 0.001
 
 
-#-----------------------------------------------------
-# Weight decay is disabled by default.               |
-# You can enable it by setting its multiplier below: |
-#-----------------------------------------------------
+
+# Weight decay is disabled by default.               
+# You can enable it by setting its multiplier below: 
 
 # weight_decay: <some value>
@@ -2,30 +2,33 @@
 This file contains a usage script, intended to test using interface.
 Developed by Taras Kucherenko (tarask@kth.se)
 """
+import sys
+sys.path.append('.')
+import numpy as np
 
 import train as tr
-import utils.data as dt
-import utils.flags as fl
 from learn_ae_n_encode_dataset import create_nn, prepare_motion_data
-
-import numpy as np
-
-import sys
-
-DATA_DIR = sys.argv[1]
-TEST_FILE = sys.argv[2]
-OUTPUT_FILE = sys.argv[3]
+from config import args
 
 if __name__ == '__main__':
+    # Make sure that the two mandatory arguments are provided
+    if args.input_file is None or args.output_file is None:
+        print("Usage: python decode.py -input_file INPUT_FILE -output_file OUTPUT_FILE \n" + \
+              "Where INPUT_FILE is the encoded prediction file and OUTPUT_FILE is the file in which the decoded gestures will be saved.")
+        exit(-1)
+    
+    # For decoding these arguments are always False and True
+    args.pretrain_network = False
+    args.load_model_from_checkpoint = True
 
     # Get the data
-    Y_train_normalized, Y_train, Y_dev_normalized, max_val, mean_pose  = prepare_motion_data(DATA_DIR)
+    Y_train_normalized, Y_train, Y_dev_normalized, max_val, mean_pose  = prepare_motion_data(args.data_dir)
 
     # Train the network
-    nn = create_nn(Y_train_normalized, Y_dev_normalized, max_val, mean_pose, restoring=True)
+    nn = create_nn(Y_train_normalized, Y_dev_normalized, max_val, mean_pose)
 
     # Read the encoding
-    encoding = np.loadtxt(TEST_FILE)
+    encoding = np.loadtxt(args.input_file)
 
     print(encoding.shape)
 
@@ -34,7 +37,7 @@
 
     print(decoding.shape)
 
-    np.save(OUTPUT_FILE, decoding)
+    np.save(args.output_file, decoding)
 
     # Close Tf session
     nn.session.close()
@@ -8,6 +8,7 @@
 """
 
 import time
+from os.path import join, abspath
 import tensorflow as tf
 from tensorflow.python import debug as tf_debug
 import numpy as np
@@ -162,7 +163,7 @@ def learning(data, data_info, just_restore=False):
 
             # Create a saver
             saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
-            chkpt_file = args.chkpt_dir + '/chkpt-final'
+            chkpt_file = abspath(join(args.chkpt_dir, 'chkpt-final'))
 
             # restore model, if needed
             if args.load_model_from_checkpoint: