Tiiiger
diff --git a/‎README.md
+18-1 b/‎README.md
+18-1
diff --git a/‎examples/SWALP/README.md
+4-4 b/‎examples/SWALP/README.md
+4-4
diff --git a/‎qtorch/quant/quant_cpu/quant_cpu.cpp
+24-24 b/‎qtorch/quant/quant_cpu/quant_cpu.cpp
+24-24
diff --git a/‎qtorch/quant/quant_cpu/sim_helper.cpp
+10-6 b/‎qtorch/quant/quant_cpu/sim_helper.cpp
+10-6
diff --git a/‎qtorch/quant/quant_cuda/quant.cu
+31-31 b/‎qtorch/quant/quant_cuda/quant.cu
+31-31
diff --git a/‎qtorch/quant/quant_cuda/quant_cuda.cpp
+2-2 b/‎qtorch/quant/quant_cuda/quant_cuda.cpp
+2-2
diff --git a/‎qtorch/quant/quant_cuda/sim_helper.cu
+2-1 b/‎qtorch/quant/quant_cuda/sim_helper.cu
+2-1
@@ -1,6 +1,22 @@
 # QPyTorch
 [![Downloads](https://pepy.tech/badge/qtorch)](https://pepy.tech/project/qtorch) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
+#### News:
+- Updated to version 0.2.0:
+  - **Bug fixed**: previously in our floating point quantization, numbers that are closer to 0 than the smallest 
+  representable positive number rounded to the smallest rep positive number. Now we round to 0 or the smallest 
+  representable number based on which one is the nearest
+  - **Different Behavior**: To be consistent with PyTorch [Issue #17443](https://github.com/pytorch/pytorch/pull/17443),
+  we round the nearest even now.
+  - We migrate to PyTorch 1.5.0. There are several changes in the C++ API of PyTorch. 
+  This new version is not backward-compatible with older PyTorch. 
+  - *Note*: if you are using CUDA 10.1, please install CUDA 10.1 Update 1 (or later version). There is a bug in 
+  the first version of CUDA 10.1 which leads to compilation error.
+  - *Note*: previous users, please remove the cache in the pytorch extension directory. 
+  For example, you can run this command `rm -rf /tmp/torch_extensions/quant_cuda /tmp/torch_extensions/quant_cuda` if 
+  you are using the default directory for pytorch extensions.
+
+
 QPyTorch is a low-precision arithmetic simulation package in
 PyTorch. It is designed to support researches on low-precision machine
 learning, especially for researches in low-precision training. 
@@ -30,8 +46,9 @@ and QPyTorch's simulation of half-precision numbers.
 requirements:
 
 - Python >= 3.6
-- PyTorch >= 1.0
+- PyTorch >= 1.5.0
 - GCC >= 4.9 on linux
+- CUDA >= 10.1 on linux
 
 Install other requirements by:
 ```bash
 
@@ -53,10 +53,10 @@ bash example.sh
 ```
 
 ## Results
-| Datset   | Model        | SGD-FP     | SWA-FP     | SGD-LP     | SWALP      |
-|----------|--------------|------------|------------|------------|------------|
-| CIFAR10  | VGG16        | 6.81±0.09  | 6.51±0.14  | 7.61±0.15  | 6.70±0.12  |
-| CIFAR100 | VGG16        | 27.23±0.17 | 25.93±0.21 | 29.59±0.32 | 26.65±0.29 |
+| Datset   | Model | SGD-FP     | SWA-FP     | SGD-LP     | SWALP      |
+| -------- | ----- | ---------- | ---------- | ---------- | ---------- |
+| CIFAR10  | VGG16 | 6.81±0.09  | 6.51±0.14  | 7.61±0.15  | 6.70±0.12  |
+| CIFAR100 | VGG16 | 27.23±0.17 | 25.93±0.21 | 29.59±0.32 | 26.65±0.29 |
 
 ## References
 This repo is modified from the PyTorch repo of [SWALP](https://github.com/stevenygd/SWALP)
 
@@ -12,8 +12,8 @@ enum Mode
   rStochastic
 };
 
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous")
-#define CHECK_CPU(x) AT_CHECK(!x.type().is_cuda(), #x " must be a CPU tensor")
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_CPU(x) TORCH_CHECK(!x.is_cuda(), #x " must be a CPU tensor")
 #define CHECK_INPUT(x) \
   CHECK_CPU(x);        \
   CHECK_CONTIGUOUS(x);
@@ -63,12 +63,12 @@ std::tuple<Tensor, Tensor> fixed_point_quantize_stochastic_mask(Tensor a, int wl
 {
   CHECK_INPUT(a);
   auto r = rand_like(a);
-  auto a_array = a.data<float>();
-  auto r_array = r.data<float>();
+  auto a_array = a.data_ptr<float>();
+  auto r_array = r.data_ptr<float>();
   auto o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   auto m = zeros_like(a, torch::CPU(kByte));
-  auto m_array = m.data<uint8_t>();
+  auto m_array = m.data_ptr<uint8_t>();
   int64_t size = a.numel();
   int sigma = -fl;
   float t_min, t_max;
@@ -84,11 +84,11 @@ std::tuple<Tensor, Tensor> fixed_point_quantize_stochastic_mask(Tensor a, int wl
 std::tuple<Tensor, Tensor> fixed_point_quantize_nearest_mask(Tensor a, int wl, int fl, bool symmetric)
 {
   CHECK_INPUT(a);
-  auto a_array = a.data<float>();
+  auto a_array = a.data_ptr<float>();
   auto o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   auto m = zeros_like(a, torch::CPU(kByte));
-  auto m_array = m.data<uint8_t>();
+  auto m_array = m.data_ptr<uint8_t>();
   int64_t size = a.numel();
   int sigma = -fl;
   float t_min, t_max;
@@ -105,10 +105,10 @@ Tensor fixed_point_quantize_stochastic(Tensor a, int wl, int fl, bool clamp, boo
 {
   CHECK_INPUT(a);
   auto r = rand_like(a);
-  auto a_array = a.data<float>();
-  auto r_array = r.data<float>();
+  auto a_array = a.data_ptr<float>();
+  auto r_array = r.data_ptr<float>();
   Tensor o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   int64_t size = a.numel();
   int sigma = -fl;
   float t_min, t_max;
@@ -127,9 +127,9 @@ Tensor fixed_point_quantize_stochastic(Tensor a, int wl, int fl, bool clamp, boo
 Tensor fixed_point_quantize_nearest(Tensor a, int wl, int fl, bool clamp, bool symmetric)
 {
   CHECK_INPUT(a);
-  auto a_array = a.data<float>();
+  auto a_array = a.data_ptr<float>();
   Tensor o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   int64_t size = a.numel();
   int sigma = -fl;
   float t_min, t_max;
@@ -217,29 +217,29 @@ Tensor get_max_entry(Tensor a, int dim)
 Tensor block_quantize_nearest(Tensor a, int wl, int dim)
 {
   CHECK_INPUT(a);
-  auto a_array = a.data<float>();
+  auto a_array = a.data_ptr<float>();
   Tensor o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   int64_t size = a.numel();
 
   // get maximum number and base
   Tensor max_entry = get_max_entry(a, dim);
-  auto max_elem = max_entry.data<float>();
+  auto max_elem = max_entry.data_ptr<float>();
   block_quantize_helper(a_array, o_array, max_elem, wl, size, rNearest);
   return o;
 }
 
 Tensor block_quantize_stochastic(Tensor a, int wl, int dim)
 {
   CHECK_INPUT(a);
-  auto a_array = a.data<float>();
+  auto a_array = a.data_ptr<float>();
   Tensor o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   int64_t size = a.numel();
 
   // get maximum number and base
   Tensor max_entry = get_max_entry(a, dim);
-  auto max_elem = max_entry.data<float>();
+  auto max_elem = max_entry.data_ptr<float>();
   // std::srand(time(0));
   block_quantize_helper(a_array, o_array, max_elem, wl, size, rStochastic);
   return o;
@@ -248,9 +248,9 @@ Tensor block_quantize_stochastic(Tensor a, int wl, int dim)
 Tensor float_quantize_stochastic(Tensor a, int man_bits, int exp_bits)
 {
   // use external random number right now
-  auto a_array = a.data<float>();
+  auto a_array = a.data_ptr<float>();
   auto o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   int size = a.numel();
 
   for (int64_t i = 0; i < size; i++)
@@ -268,9 +268,9 @@ Tensor float_quantize_stochastic(Tensor a, int man_bits, int exp_bits)
 
 Tensor float_quantize_nearest(Tensor a, int man_bits, int exp_bits)
 {
-  auto a_array = a.data<float>();
+  auto a_array = a.data_ptr<float>();
   auto o = zeros_like(a);
-  auto o_array = o.data<float>();
+  auto o_array = o.data_ptr<float>();
   int size = a.numel();
 
   for (int64_t i = 0; i < size; i++)
 
@@ -2,16 +2,20 @@
 #include <math.h>
 #include <stdint.h>
 
-void fixed_min_max(int wl, int fl, bool symmetric, float* t_min, float* t_max) {
+void fixed_min_max(int wl, int fl, bool symmetric, float *t_min, float *t_max)
+{
   int sigma = -fl;
-  *t_min = -ldexp(1.0, wl-fl-1);
-  *t_max = -*t_min-ldexp(1.0, sigma);
-  if (symmetric) *t_min = *t_min+ldexp(1.0, sigma);
+  *t_min = -ldexp(1.0, wl - fl - 1);
+  *t_max = -*t_min - ldexp(1.0, sigma);
+  if (symmetric)
+    *t_min = *t_min + ldexp(1.0, sigma);
 }
 
-float round(float a, float r, int sigma) {
+float round(float a, float r, int sigma)
+{
   a = ldexp(a, -sigma);
-  a = floor(a+r);
+  a = nearbyint(a + r - 0.5);
+  // a = floor(a + r);
   a = ldexp(a, sigma);
   return a;
 }
@@ -36,11 +36,11 @@ Tensor block_quantize_stochastic_cuda(Tensor a, int wl, int dim) {
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  block_kernel_stochastic<<<blockNums, blockSize>>>(a.data<float>(),
-                                                    rand_ints.data<int>(),
-                                                    o.data<float>(),
+  block_kernel_stochastic<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                    rand_ints.data_ptr<int>(),
+                                                    o.data_ptr<float>(),
                                                     size,
-                                                    max_entry.data<float>(),
+                                                    max_entry.data_ptr<float>(),
                                                     wl);
   return o;
 }
@@ -53,10 +53,10 @@ Tensor block_quantize_nearest_cuda(Tensor a, int wl, int dim) {
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  block_kernel_nearest<<<blockNums, blockSize>>>(a.data<float>(),
-                                                 o.data<float>(),
+  block_kernel_nearest<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                 o.data_ptr<float>(),
                                                  size,
-                                                 max_entry.data<float>(),
+                                                 max_entry.data_ptr<float>(),
                                                  wl);
   return o;
 }
@@ -70,11 +70,11 @@ Tensor block_quantize_sim_stochastic_cuda(Tensor a, int wl) {
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  block_kernel_sim_stochastic<<<blockNums, blockSize>>>(a.data<float>(),
-                                                        rand_probs.data<float>(),
-                                                        o.data<float>(),
+  block_kernel_sim_stochastic<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                        rand_probs.data_ptr<float>(),
+                                                        o.data_ptr<float>(),
                                                         size,
-                                                        max_entry.data<float>(),
+                                                        max_entry.data_ptr<float>(),
                                                         wl);
   return o;
 }
@@ -88,10 +88,10 @@ Tensor block_quantize_sim_nearest_cuda(Tensor a, int wl) {
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  block_kernel_sim_nearest<<<blockNums, blockSize>>>(a.data<float>(),
-                                                 o.data<float>(),
+  block_kernel_sim_nearest<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                 o.data_ptr<float>(),
                                                  size,
-                                                 max_entry.data<float>(),
+                                                 max_entry.data_ptr<float>(),
                                                  wl);
   return o;
 }
@@ -104,9 +104,9 @@ Tensor float_quantize_stochastic_cuda(Tensor a, int man_bits, int exp_bits) {
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  float_kernel_stochastic<<<blockNums, blockSize>>>(a.data<float>(),
-                                                    rand_ints.data<int>(),
-                                                    o.data<float>(),
+  float_kernel_stochastic<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                    rand_ints.data_ptr<int>(),
+                                                    o.data_ptr<float>(),
                                                     size,
                                                     man_bits,
                                                     exp_bits);
@@ -120,8 +120,8 @@ Tensor float_quantize_nearest_cuda(Tensor a, int man_bits, int exp_bits) {
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  float_kernel_nearest<<<blockNums, blockSize>>>(a.data<float>(),
-                                                 o.data<float>(),
+  float_kernel_nearest<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                 o.data_ptr<float>(),
                                                  size,
                                                  man_bits,
                                                  exp_bits);
@@ -146,9 +146,9 @@ Tensor fixed_point_quantize_stochastic_cuda(Tensor a, int wl, int fl, bool use_c
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  fixed_point_quantize_kernel_stochastic<<<blockNums, blockSize>>>(a.data<float>(),
-                                                                   rand_probs.data<float>(),
-                                                                   o.data<float>(),
+  fixed_point_quantize_kernel_stochastic<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                                   rand_probs.data_ptr<float>(),
+                                                                   o.data_ptr<float>(),
                                                                    size,
                                                                    sigma,
                                                                    use_clamp,
@@ -167,8 +167,8 @@ Tensor fixed_point_quantize_nearest_cuda(Tensor a, int wl, int fl, bool use_clam
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  fixed_point_quantize_kernel_nearest<<<blockNums, blockSize>>>(a.data<float>(),
-                                                                o.data<float>(),
+  fixed_point_quantize_kernel_nearest<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                                o.data_ptr<float>(),
                                                                 size,
                                                                 sigma,
                                                                 use_clamp,
@@ -189,10 +189,10 @@ std::tuple<Tensor, Tensor> fixed_point_quantize_stochastic_mask_cuda(Tensor a, i
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  fixed_point_quantize_kernel_mask_stochastic<<<blockNums, blockSize>>>(a.data<float>(),
-                                                                        rand_probs.data<float>(),
-                                                                        o.data<float>(),
-                                                                        m.data<uint8_t>(),
+  fixed_point_quantize_kernel_mask_stochastic<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                                        rand_probs.data_ptr<float>(),
+                                                                        o.data_ptr<float>(),
+                                                                        m.data_ptr<uint8_t>(),
                                                                         size,
                                                                         sigma,
                                                                         t_min,
@@ -211,9 +211,9 @@ std::tuple<Tensor, Tensor> fixed_point_quantize_nearest_mask_cuda(Tensor a, int
   int blockSize = 1024;
   int blockNums = (size + blockSize - 1) / blockSize;
 
-  fixed_point_quantize_kernel_mask_nearest<<<blockNums, blockSize>>>(a.data<float>(),
-                                                                     o.data<float>(),
-                                                                     m.data<uint8_t>(),
+  fixed_point_quantize_kernel_mask_nearest<<<blockNums, blockSize>>>(a.data_ptr<float>(),
+                                                                     o.data_ptr<float>(),
+                                                                     m.data_ptr<uint8_t>(),
                                                                      size,
                                                                      sigma,
                                                                      t_min,
 
@@ -4,8 +4,8 @@
 
 using namespace at;
 
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 #define CHECK_INPUT(x) \
   CHECK_CUDA(x);       \
   CHECK_CONTIGUOUS(x)
 
@@ -2,7 +2,8 @@
 #include <cmath>
 
 __device__ __forceinline__ float round_helper(float a, float r) {
-  return floor(a+r);
+  // return floor(a+r);
+  return nearbyint(a+r-0.5);
 }
 
 __device__ __forceinline__ float round(float a, float r, int sigma) {
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,8 @@`
`2`	`2`	`#include <cmath>`
`3`	`3`
`4`	`4`	`__device__ __forceinline__ float round_helper(float a, float r) {`
`5`		`- return floor(a+r);`
	`5`	`+ // return floor(a+r);`
	`6`	`+ return nearbyint(a+r-0.5);`
`6`	`7`	`}`
`7`	`8`
`8`	`9`	`__device__ __forceinline__ float round(float a, float r, int sigma) {`