@@ -21,6 +21,25 @@ namespace executor {
21
21
namespace xnnpack {
22
22
namespace delegate {
23
23
24
+ /*
25
+ * Provide compile-time allocation.
26
+ */
27
+ class CompileAllocator {
28
+ public:
29
+ /*
30
+ * Allocate memory which will be automatically freed at the end
31
+ * of the compilation process.
32
+ */
33
+ void * allocateTemporary (size_t size) {
34
+ auto mem = new uint8_t [size];
35
+ temporaries_.emplace_back (mem);
36
+ return mem;
37
+ }
38
+
39
+ private:
40
+ std::vector<std::unique_ptr<uint8_t []>> temporaries_;
41
+ };
42
+
24
43
// Flatbuffer types
25
44
using ValuePtr = const fb_xnnpack::XValue*;
26
45
using NodePtr = const fb_xnnpack::XNode*;
@@ -35,6 +54,23 @@ using DefineNodeFunc = Error (*)(
35
54
const std::unordered_map<uint32_t , uint32_t >&,
36
55
NodePtr) noexcept ;
37
56
57
+ /*
58
+ Convert a tensor from fp32 to bf16.
59
+ */
60
+ void convertF32TensorToBF16 (
61
+ const float * f32_data,
62
+ uint16_t * bf16_data_out,
63
+ size_t numel) {
64
+ for (auto i = 0u ; i < numel; i++) {
65
+ // Adjust the f32 value such that it rounds properly after truncation.
66
+ // Constant factor scales 1+2^-8 to 1+2e-7.
67
+ float f32_adjusted = f32_data[i] * 1 .00389105f ;
68
+ uint32_t f32_bits;
69
+ memcpy (&f32_bits, &f32_adjusted, sizeof (float ));
70
+ bf16_data_out[i] = static_cast <uint16_t >(f32_bits >> 16 );
71
+ }
72
+ }
73
+
38
74
/*
39
75
Gets the output min and output max for a given node operator
40
76
*/
@@ -152,7 +188,8 @@ Error defineTensor(
152
188
GraphPtr flatbuffer_graph,
153
189
const uint8_t * constant_data_ptr,
154
190
std::vector<uint32_t >& input_ids,
155
- std::vector<uint32_t >& output_ids) {
191
+ std::vector<uint32_t >& output_ids,
192
+ CompileAllocator& allocator) {
156
193
const fb_xnnpack::XNNTensorValue* tensor_value = nullptr ;
157
194
const fb_xnnpack::XNNQuantizedTensorValue* qtensor_value = nullptr ;
158
195
@@ -356,12 +393,31 @@ Error defineTensor(
356
393
size_t group_size = qparams->group_size ();
357
394
size_t output_channels = tensor_value->dims ()->Get (0 );
358
395
size_t input_channels = tensor_value->dims ()->Get (1 );
396
+
397
+ const uint16_t * scale_data = nullptr ;
398
+ uint32_t scale_numel = 0 ;
399
+
400
+ // Block scales are preferably serialized as bf16 but can also be
401
+ // serialized as fp32 for backwards compatability.
402
+ if (qparams->scale_bf16 () != nullptr ) {
403
+ scale_data =
404
+ static_cast <const uint16_t *>(qparams->scale_bf16 ()->data ());
405
+ scale_numel = qparams->scale_bf16 ()->size ();
406
+ } else {
407
+ // Read fp32 scales, convert to bf16.
408
+ auto conv_buffer = static_cast <uint16_t *>(allocator.allocateTemporary (
409
+ qparams->scale ()->size () * sizeof (uint16_t )));
410
+ scale_numel = qparams->scale ()->size ();
411
+ convertF32TensorToBF16 (
412
+ qparams->scale ()->data (), conv_buffer, scale_numel);
413
+ scale_data = conv_buffer;
414
+ }
415
+
359
416
ET_CHECK_OR_RETURN_ERROR (
360
- qparams->scale ()->size () ==
361
- output_channels * input_channels / group_size,
417
+ scale_numel == output_channels * input_channels / group_size,
362
418
Internal,
363
419
" scale size %zu != output channels %zu * group size %zu" ,
364
- ( size_t )qparams-> scale ()-> size ( ),
420
+ static_cast < size_t >(scale_numel ),
365
421
output_channels,
366
422
group_size);
367
423
int32_t zero_point =
@@ -370,18 +426,19 @@ Error defineTensor(
370
426
Debug,
371
427
" define quant tensor (per channel group): buffer_ptr: %p, scale.numel(): %u, channel_dim: %u, grpup_size: %zu, output_channels: %zu, dtype: %u, zero_point: %d, datatype: %d\n " ,
372
428
buffer_ptr,
373
- qparams-> scale ()-> size () ,
429
+ scale_numel ,
374
430
qparams->channel_dim (),
375
431
group_size,
376
432
output_channels,
377
433
datatype,
378
434
zero_point,
379
435
datatype);
436
+
380
437
status = xnn_define_blockwise_quantized_tensor_value (
381
438
/* subgraph=*/ subgraph_ptr,
382
439
/* datatype=*/ datatype,
383
440
/* zero_point=*/ zero_point,
384
- /* scale=*/ qparams-> scale ()-> data () ,
441
+ /* scale=*/ scale_data ,
385
442
/* num_dims=*/ tensor_value->num_dims (),
386
443
/* channel_dim=*/ qparams->channel_dim (),
387
444
/* block_size=*/ qparams->group_size (),
@@ -1617,6 +1674,7 @@ ET_NODISCARD Error XNNCompiler::compileModel(
1617
1674
Result<XNNHeader> header = XNNHeader::Parse (buffer_pointer, num_bytes);
1618
1675
const uint8_t * flatbuffer_data = nullptr ;
1619
1676
const uint8_t * constant_data = nullptr ;
1677
+ CompileAllocator compile_allocator;
1620
1678
1621
1679
// Header status can only either be Error::Ok or Error::NotFound
1622
1680
if (header.ok ()) {
@@ -1688,7 +1746,8 @@ ET_NODISCARD Error XNNCompiler::compileModel(
1688
1746
flatbuffer_graph,
1689
1747
constant_data,
1690
1748
input_ids,
1691
- output_ids);
1749
+ output_ids,
1750
+ compile_allocator);
1692
1751
1693
1752
if (err != Error::Ok) {
1694
1753
return err;
0 commit comments