pcastonguay
diff --git a/‎PyTorch/Classification/ConvNets/.gitmodules‎ b/‎PyTorch/Classification/ConvNets/.gitmodules‎
diff --git a/‎PyTorch/Classification/ConvNets/Dockerfile‎
Lines changed: 4 additions & 1 deletion b/‎PyTorch/Classification/ConvNets/Dockerfile‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎PyTorch/Classification/ConvNets/README.md‎
Lines changed: 68 additions & 10 deletions b/‎PyTorch/Classification/ConvNets/README.md‎
Lines changed: 68 additions & 10 deletions
diff --git a/‎PyTorch/Classification/ConvNets/image_classification/dataloaders.py‎
Lines changed: 11 additions & 10 deletions b/‎PyTorch/Classification/ConvNets/image_classification/dataloaders.py‎
Lines changed: 11 additions & 10 deletions
@@ -1,5 +1,8 @@
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.07-py3
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3
 FROM ${FROM_IMAGE_NAME}
 
+ADD requirements.txt /workspace/
+WORKDIR /workspace/
+RUN pip install --no-cache-dir -r requirements.txt
 ADD . /workspace/rn50
 WORKDIR /workspace/rn50
@@ -1,39 +1,88 @@
 # Convolutional Networks for Image Classification in PyTorch
 
-In this repository you will find implementations of various image classification  models.
+In this repository you will find implementations of various image classification models.
 
-Detailed information on each model can be found here:
+## Table Of Contents
+
+* [Models](#models)
+* [Validation accuracy results](#validation-accuracy-results)
+* [Training performance results](#training-performance-results)
+  * [Training performance: NVIDIA DGX-1 (8x V100 16G)](#training-performance-nvidia-dgx-1-(8x-v100-16G))
+  * [Training performance: NVIDIA DGX-2 (16x V100 32G)](#training-performance-nvidia-dgx-2-(16x-v100-32G))
+* [Model comparison](#model-comparison)
+  * [Accuracy vs FLOPS](#accuracy-vs-flops)
+  * [Latency vs Throughput on different batch sizes](#latency-vs-throughput-on-different-batch-sizes)
+
+## Models
+
+The following table provides links to where you can find additional information on each model:
 
 | **Model** | **Link**|
 |:-:|:-:|
 | resnet50 | [README](./resnet50v1.5/README.md) |
 | resnext101-32x4d | [README](./resnext101-32x4d/README.md) |
 | se-resnext101-32x4d | [README](./se-resnext101-32x4d/README.md) |
 
-## Accuracy
+## Validation accuracy results
+
+Our results were obtained by running the applicable 
+training scripts in the [framework-container-name] NGC container 
+on NVIDIA DGX-1 with (8x V100 16G) GPUs. 
+The specific training script that was run is documented 
+in the corresponding model's README.
+
 
+The following table shows the validation accuracy results of the 
+three classification models side-by-side.
 
-| **Model** | **AMP Top1** | **AMP Top5** | **FP32 Top1** | **FP32 Top1** |
+
+| **arch** | **AMP Top1** | **AMP Top5** | **FP32 Top1** | **FP32 Top1** |
 |:-:|:-:|:-:|:-:|:-:|
 | resnet50 | 78.46 | 94.15 | 78.50 | 94.11 |
 | resnext101-32x4d | 80.08 | 94.89 | 80.14 | 95.02 |
 | se-resnext101-32x4d | 81.01 | 95.52 | 81.12 | 95.54 |
 
 
-## Training Performance
+## Training performance results
+
+
+### Training performance: NVIDIA DGX-1 (8x V100 16G)
+
 
+Our results were obtained by running the applicable 
+training scripts in the pytorch-19.10 NGC container 
+on NVIDIA DGX-1 with (8x V100 16G) GPUs. 
+Performance numbers (in images per second) 
+were averaged over an entire training epoch.
+The specific training script that was run is documented 
+in the corresponding model's README.
 
-### NVIDIA DGX-1 (8x V100 16G)
+The following table shows the training accuracy results of the 
+three classification models side-by-side.
 
-| **Model** | **Mixed Precision** | **FP32** | **Mixed Precision speedup** |
+
+| **arch** | **Mixed Precision** | **FP32** | **Mixed Precision speedup** |
 |:-:|:-:|:-:|:-:|
 | resnet50 | 6888.75 img/s | 2945.37 img/s | 2.34x |
 | resnext101-32x4d | 2384.85 img/s | 1116.58 img/s | 2.14x |
 | se-resnext101-32x4d | 2031.17 img/s | 977.45 img/s | 2.08x |
 
-### NVIDIA DGX-2 (16x V100 32G)
+### Training performance: NVIDIA DGX-2 (16x V100 32G)
+
+
+Our results were obtained by running the applicable 
+training scripts in the pytorch-19.10 NGC container 
+on NVIDIA DGX-2 with (16x V100 32G) GPUs. 
+Performance numbers (in images per second) 
+were averaged over an entire training epoch.
+The specific training script that was run is documented 
+in the corresponding model's README.
+
+The following table shows the training accuracy results of the 
+three classification models side-by-side.
 
-| **Model** | **Mixed Precision** | **FP32** | **Mixed Precision speedup** |
+
+| **arch** | **Mixed Precision** | **FP32** | **Mixed Precision speedup** |
 |:-:|:-:|:-:|:-:|
 | resnet50 | 13443.82 img/s | 6263.41 img/s | 2.15x |
 | resnext101-32x4d | 4473.37 img/s | 2261.97 img/s | 1.98x |
@@ -45,7 +94,16 @@ Detailed information on each model can be found here:
 ### Accuracy vs FLOPS
 ![ACCvsFLOPS](./img/ACCvsFLOPS.png)
 
-Dot size indicates number of trainable parameters
+Plot describes relationship between floating point operations
+needed for computing forward pass on a 224px x 224px image, 
+for the implemented models.
+Dot size indicates number of trainable parameters.
 
 ### Latency vs Throughput on different batch sizes
 ![LATvsTHR](./img/LATvsTHR.png)
+
+Plot describes relationship between 
+inference latency, throughput and batch size 
+for the implemented models.
+
+
@@ -93,20 +93,21 @@ def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=
 
         if dali_cpu:
             dali_device = "cpu"
-            self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB,
-                                                    random_aspect_ratio=[0.75, 4./3.],
-                                                    random_area=[0.08, 1.0],
-                                                    num_attempts=100)
+            self.decode = ops.ImageDecoder(device=dali_device, output_type=types.RGB)
         else:
             dali_device = "gpu"
             # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
             # without additional reallocations
-            self.decode = ops.nvJPEGDecoderRandomCrop(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512,
-                                                      random_aspect_ratio=[0.75, 4./3.],
-                                                      random_area=[0.08, 1.0],
-                                                      num_attempts=100)
+            self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512)
+
+        self.res = ops.RandomResizedCrop(
+                device=dali_device,
+                size=[crop, crop],
+                interp_type=types.INTERP_LINEAR,
+                random_aspect_ratio=[0.75, 4./3.],
+                random_area=[0.08, 1.0],
+                num_attempts=100)
 
-        self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop)
         self.cmnp = ops.CropMirrorNormalize(device = "gpu",
                                             output_dtype = types.FLOAT,
                                             output_layout = types.NCHW,
@@ -141,7 +142,7 @@ def __init__(self, batch_size, num_threads, device_id, data_dir, crop, size):
                 num_shards = world_size,
                 random_shuffle = False)
 
-        self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
+        self.decode = ops.ImageDecoder(device = "mixed", output_type = types.RGB)
         self.res = ops.Resize(device = "gpu", resize_shorter = size)
         self.cmnp = ops.CropMirrorNormalize(device = "gpu",
                 output_dtype = types.FLOAT,