pcastonguay
diff --git a/‎PyTorch/Recommendation/NCF/.gitmodules‎ b/‎PyTorch/Recommendation/NCF/.gitmodules‎
diff --git a/‎PyTorch/Recommendation/NCF/README.md‎
Lines changed: 12 additions & 11 deletions b/‎PyTorch/Recommendation/NCF/README.md‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎PyTorch/Recommendation/NCF/convert.py‎
Lines changed: 0 additions & 6 deletions b/‎PyTorch/Recommendation/NCF/convert.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎PyTorch/Recommendation/NCF/inference.py‎
Lines changed: 17 additions & 11 deletions b/‎PyTorch/Recommendation/NCF/inference.py‎
Lines changed: 17 additions & 11 deletions
diff --git a/‎PyTorch/Recommendation/NCF/logger/analyzer.py‎
Lines changed: 0 additions & 125 deletions b/‎PyTorch/Recommendation/NCF/logger/analyzer.py‎
Lines changed: 0 additions & 125 deletions
diff --git a/‎PyTorch/Recommendation/NCF/logger/autologging.py‎
Lines changed: 0 additions & 61 deletions b/‎PyTorch/Recommendation/NCF/logger/autologging.py‎
Lines changed: 0 additions & 61 deletions
@@ -214,7 +214,7 @@ After the Docker container is launched, the training with the default hyperparam
 
 ```bash
 ./prepare_dataset.sh
-python -m torch.distributed.launch --nproc_per_node=8 ncf.py --data /data/cache/ml-20m
+python -m torch.distributed.launch --nproc_per_node=8 --use_env ncf.py --data /data/cache/ml-20m
 ```
 
 This will result in a checkpoint file being written to `/data/checkpoints/model.pth`.
@@ -225,7 +225,7 @@ This will result in a checkpoint file being written to `/data/checkpoints/model.
 The trained model can be evaluated by passing the `--mode` test flag to the `run.sh` script:
 
 ```bash
-python -m torch.distributed.launch --nproc_per_node=1 ncf.py --data /data/cache/ml-20m  --mode test --load_checkpoint_path /data/checkpoints/model.pth
+python -m torch.distributed.launch --nproc_per_node=1 --use_env ncf.py --data /data/cache/ml-20m  --mode test --load_checkpoint_path /data/checkpoints/model.pth
 ```
 
 
@@ -330,13 +330,13 @@ For a smaller dataset you might experience slower performance.
 To download, preprocess and train on the ML-1m dataset run:
 ```bash
 ./prepare_dataset.sh ml-1m
-python -m torch.distributed.launch --nproc_per_node=8 ncf.py --data /data/cache/ml-1m
+python -m torch.distributed.launch --nproc_per_node=8 --use_env ncf.py --data /data/cache/ml-1m
 ```
 
 ### Training process
 The name of the training script is `ncf.py`. Because of the multi-GPU support, it should always be run with the torch distributed launcher like this:
 ```bash
-python -m torch.distributed.launch --nproc_per_node=<number_of_gpus> ncf.py --data <path_to_dataset> [other_parameters]
+python -m torch.distributed.launch --nproc_per_node=<number_of_gpus> --use_env ncf.py --data <path_to_dataset> [other_parameters]
 ```
 
 The main result of the training are checkpoints stored by default in `/data/checkpoints/`. This location can be controlled
@@ -351,7 +351,7 @@ The HR@10 metric is the number of hits in the entire test set divided by the num
 
 Inference can be launched with the same script used for training by passing the `--mode test` flag:
 ```bash
-python -m torch.distributed.launch --nproc_per_node=<number_of_gpus> ncf.py  --data <path_to_dataset> --mode test [other_parameters]
+python -m torch.distributed.launch --nproc_per_node=<number_of_gpus> --use_env ncf.py  --data <path_to_dataset> --mode test [other_parameters]
 ```
 
 The script will then:
@@ -368,7 +368,7 @@ The script will then:
 NCF training on NVIDIA DGX systems is very fast, therefore, in order to measure train and validation throughput, you can simply run the full training job with: 
 ```bash
 ./prepare_dataset.sh
-python -m torch.distributed.launch --nproc_per_node=8 ncf.py --data /data/cache/ml-20m --epochs 5
+python -m torch.distributed.launch --nproc_per_node=8 --use_env ncf.py --data /data/cache/ml-20m --epochs 5
 ```
 
 At the end of the script, a line reporting the best train throughput is printed.
@@ -379,7 +379,7 @@ At the end of the script, a line reporting the best train throughput is printed.
 Validation throughput can be measured by running the full training job with:
 ```bash
 ./prepare_dataset.sh
-python -m torch.distributed.launch --nproc_per_node=8 ncf.py --data /data/cache/ml-20m --epochs 5
+python -m torch.distributed.launch --nproc_per_node=8 --use_env ncf.py --data /data/cache/ml-20m --epochs 5
 ```
 
 The best validation throughput is reported to the standard output. 
@@ -405,7 +405,7 @@ The training time was measured excluding data downloading, preprocessing, valida
 To reproduce this result, start the NCF Docker container interactively and run:
 ```bash
 ./prepare_dataset.sh
-python -m torch.distributed.launch --nproc_per_node=8 ncf.py --data /data/cache/ml-20m
+python -m torch.distributed.launch --nproc_per_node=8 --use_env ncf.py --data /data/cache/ml-20m
 ```
 
 ##### NVIDIA DGX-1 (8x V100 32G)
@@ -428,7 +428,7 @@ Here's an example validation accuracy curve for mixed precision vs single precis
 To reproduce this result, start the NCF Docker container interactively and run:
 ```bash
 ./prepare_dataset.sh
-python -m torch.distributed.launch --nproc_per_node=8 ncf.py --data /data/cache/ml-20m
+python -m torch.distributed.launch --nproc_per_node=8 --use_env ncf.py --data /data/cache/ml-20m
 ```
 
 ##### NVIDIA DGX-2 (16x V100 32G)
@@ -449,7 +449,7 @@ The training time was measured excluding data downloading, preprocessing, valida
 To reproduce this result, start the NCF Docker container interactively and run:
 ```bash
 ./prepare_dataset.sh
-python -m torch.distributed.launch --nproc_per_node=16 ncf.py --data /data/cache/ml-20m
+python -m torch.distributed.launch --nproc_per_node=16 --use_env ncf.py --data /data/cache/ml-20m
 ```
 
 
@@ -555,7 +555,8 @@ The following table shows the best inference throughput:
 4. September, 2019
     * Adjusting for API changes in PyTorch and APEX
     * Checkpoints loading fix
-
+5. January, 2020
+   * DLLogger support added
 
 ### Known issues
 
 
@@ -34,15 +34,10 @@
 import torch
 import tqdm
 
-from logger.logger import LOGGER
-from logger import tags
-
 MIN_RATINGS = 20
 USER_COLUMN = 'user_id'
 ITEM_COLUMN = 'item_id'
 
-LOGGER.model = 'ncf'
-
 def parse_args():
     parser = ArgumentParser()
     parser.add_argument('--path', type=str, default='/data/ml-20m/ratings.csv',
@@ -98,7 +93,6 @@ def main():
 
     print("Filtering out users with less than {} ratings".format(MIN_RATINGS))
     grouped = df.groupby(USER_COLUMN)
-    LOGGER.log(key=tags.PREPROC_HP_MIN_RATINGS, value=MIN_RATINGS)
     df = grouped.filter(lambda x: len(x) >= MIN_RATINGS)
 
     print("Mapping original user and item IDs to new sequential IDs")
 
@@ -17,17 +17,14 @@
 import torch.jit
 import time
 from argparse import ArgumentParser
-
+import numpy as np
 import torch
 
 from neumf import NeuMF
 
-from logger.logger import LOGGER, timed_block, timed_function
-from logger.autologging import log_hardware, log_args
-
 from apex import amp
 
-LOGGER.model = 'ncf'
+import dllogger
 
 
 def parse_args():
@@ -51,14 +48,19 @@ def parse_args():
     parser.add_argument('--opt_level', default='O2', type=str,
                         help='Optimization level for Automatic Mixed Precision',
                         choices=['O0', 'O2'])
+    parser.add_argument('--log_path', default='log.json', type=str,
+                        help='Path for the JSON training log')
 
     return parser.parse_args()
 
 
 def main():
-    log_hardware()
     args = parse_args()
-    log_args(args)
+    dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
+                                                       filename=args.log_path),
+                            dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)])
+
+    dllogger.log(data=vars(args), step='PARAMETER')
 
     model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors,
                   mlp_layer_sizes=args.layers, dropout=args.dropout)
@@ -85,10 +87,14 @@ def main():
         torch.cuda.synchronize()
         latencies.append(time.time() - start)
 
-    LOGGER.log(key='batch_size', value=args.batch_size)
-    LOGGER.log(key='best_inference_throughput', value=args.batch_size / min(latencies))
-    LOGGER.log(key='best_inference_latency', value=min(latencies))
-    LOGGER.log(key='inference_latencies', value=latencies)
+    dllogger.log(data={'batch_size': args.batch_size,
+                   'best_inference_throughput': args.batch_size / min(latencies),
+                   'best_inference_latency': min(latencies),
+                   'mean_inference_throughput': args.batch_size / np.mean(latencies),
+                   'mean_inference_latency': np.mean(latencies),
+                   'inference_latencies': latencies},
+                 step=tuple())
+    dllogger.flush()
     return