Skip to content

Commit

Permalink
[pt][quant] Parallelize quantize and dequantize (pytorch#33765)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#33765

quantize and dequantize methods now make use of multiple threads. This makes use of shz0116's recent parallelization of quantize/dequantize routines in FBGEMM.

Fixes:
pytorch#32006
pytorch/FBGEMM#142

Alternative to pytorch#30153

```
#!/usr/bin/env python

import time
import torch
import torch.nn as nn
torch.set_num_threads(4)
# print(torch.__config__.parallel_info())

W = torch.rand(1, 54, 54, 256)

NITER = 1000
s = time.time()
for i in range(NITER):
    W_q = torch.quantize_per_tensor(W, scale=1.0, zero_point = 0, dtype=torch.quint8)
time_per_iter = (time.time() - s) / NITER

print('quantize time per iter ms', time_per_iter * 1000)

s = time.time()
for i in range(NITER):
    W_deq = W_q.dequantize()
time_per_iter = (time.time() - s) / NITER

print('dequantize time per iter ms', time_per_iter * 1000)
```

### With 1 thread
quantize time per iter ms 0.22633790969848633
dequantize time per iter ms 0.6573665142059326

### With 4 threads
quantize time per iter ms 0.0905618667602539
dequantize time per iter ms 0.19511842727661133
ghstack-source-id: 98935895

Test Plan: python test/test_quantized.py

Reviewed By: jspark1105

Differential Revision: D20098521

fbshipit-source-id: bd8c45761b4651fcd5b20b95759e3868a136c048
  • Loading branch information
dskhudia authored and facebook-github-bot committed Feb 26, 2020
1 parent 2eb95d8 commit a8e7ed4
Showing 1 changed file with 25 additions and 8 deletions.
33 changes: 25 additions & 8 deletions aten/src/ATen/quantized/Quantizer.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <ATen/ATen.h>
#include <ATen/Parallel.h>
#include <ATen/quantized/Quantizer.h>
#include <c10/core/Allocator.h>
#include <c10/core/CPUAllocator.h>
Expand Down Expand Up @@ -126,10 +127,18 @@ Tensor quantize_tensor(Tensor rtensor, Tensor qtensor, double scale, int64_t zer
qparams.scale = scale;
qparams.zero_point = zero_point;
qparams.precision = CHAR_BIT * sizeof(typename T::underlying);
fbgemm::Quantize<typename T::underlying>(/*src=*/rd,
/*dst=*/qd,
/*len=*/rtensor.numel(),
/*qparams=*/qparams);
int num_tasks = at::get_num_threads();
at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
for (int task_id = begin; task_id < end; ++task_id) {
fbgemm::Quantize<typename T::underlying>(
rd, /*src=*/
qd, /*dst=*/
rtensor.numel(), /*len*/
qparams, /*qparams=*/
task_id, /*thread_id*/
num_tasks /*num_threads*/);
}
});
return qtensor;
}

Expand All @@ -153,10 +162,18 @@ Tensor dequantize_tensor(Tensor qtensor, Tensor rtensor, double scale, int64_t z
qparams.zero_point = zero_point;
qparams.precision = CHAR_BIT * sizeof(typename T::underlying);
float* rd = rtensor.data_ptr<float>();
fbgemm::Dequantize<typename T::underlying>(/*src=*/qd,
/*dst=*/rd,
/*len=*/qtensor.numel(),
/*qparams=*/qparams);
int num_tasks = at::get_num_threads();
at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
for (int task_id = begin; task_id < end; ++task_id) {
fbgemm::Dequantize<typename T::underlying>(
qd, /*src=*/
rd, /*dst=*/
qtensor.numel(), /*len=*/
qparams, /*qparams=*/
task_id, /*thread_id*/
num_tasks /*num_threads*/);
}
});
return rtensor;
}
#else // USE_FBGEMM
Expand Down

0 comments on commit a8e7ed4

Please sign in to comment.