Skip to content

Commit

Permalink
CMSIS-NN: Get optimized implementation temporary buffer sizes
Browse files Browse the repository at this point in the history
  • Loading branch information
freddan80 committed Sep 27, 2019
1 parent f5bafd1 commit a1c7ee0
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 3 deletions.
63 changes: 60 additions & 3 deletions CMSIS/NN/Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ extern "C"
* @param[in] buffer_a pointer to buffer space used for input optimization(partial im2col) and is necessary
* when both ARM_MATH_LOOPUNROLL and ARM_MATH_DSP are defined.
* Required space: (2 * input_ch * kernel_x * kernel_y) * sizeof(q15_t) bytes
* Use arm_convolve_s8_get_buffer_size() to get the size.
* @return The function returns <code>ARM_MATH_SUCCESS</code>
*
* @details
Expand Down Expand Up @@ -180,6 +181,18 @@ extern "C"
const uint16_t output_y,
q15_t *buffer_a);

/**
* @brief Get the required buffer size for s8 convolution function
* @param[in] input_ch number of input tensor channels
* @param[in] kernel_x filter/kernel width
* @param[in] kernel_y filter/kernel height
* @return The function returns required buffer size
*
*/
int32_t arm_convolve_s8_get_buffer_size(const uint16_t input_ch,
const uint16_t kernel_x,
const uint16_t kernel_y);

/**
* @brief Basic Q7 convolution function
* @param[in] Im_in pointer to input tensor
Expand Down Expand Up @@ -470,6 +483,7 @@ extern "C"
* @param[in] buffer_a pointer to buffer space used for input optimization(partial im2col) and is necessary
* when ARM_MATH_LOOPUNROLL and ARM_MATH_DSP is defined.
* Required space: 2 * input_ch * sizeof(q15_t) bytes
* Use arm_convolve_1x1_s8_fast_get_buffer_size() to get teh size
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
* <code>ARM_MATH_SUCCESS</code> on successful completion.
Expand Down Expand Up @@ -504,6 +518,15 @@ extern "C"
const uint16_t output_y,
q15_t *buffer_a);

/**
* @brief Get the required buffer size for the fast 1x1 convolution
* (non-square shape) s8 convolution function
* @param[in] input_ch number of input tensor channels
* @return The function returns required buffer size
*
*/
int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const uint16_t input_ch);

/**
* @brief Q7 version of convolution for RGB image
* @param[in] Im_in pointer to input tensor
Expand Down Expand Up @@ -837,6 +860,7 @@ extern "C"
* @param[in] buffer_a Buffer for partial im2col optimization. This is mandatory when ARM_MATH_LOOPUNROLL and
* ARM_MATH_DSP are defined.
* Required space: (2 * input_ch * kernel_x * kernel_y) * sizeof(q15_t) bytes
* Use arm_depthwise_conv_s8_opt_get_buffer_size() to get the size.
*
* @return The function returns one of the following
* <code>ARM_MATH_SIZE_MISMATCH</code> - Unsupported dimension of tensors
Expand Down Expand Up @@ -874,6 +898,18 @@ extern "C"
const uint16_t dilation_y,
q15_t *buffer_a);

/**
* @brief Get the required buffer size for optimized s8 depthwise convolution
* function with constraint that in_channel equals out_channel.
* @param[in] input_ch number of input tensor channels
* @param[in] kernel_x filter/kernel width
* @param[in] kernel_y filter/kernel height
* @return The function returns required buffer size
*
*/
int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const uint16_t input_ch,
const uint16_t kernel_x,
const uint16_t kernel_y);

/**
* @defgroup FC Fully-connected Layer Functions
Expand Down Expand Up @@ -935,6 +971,7 @@ extern "C"
* @param[in] vec_buffer pointer to buffer space used for optimization and is necessary
* when both ARM_MATH_LOOPUNROLL and ARM_MATH_DSP are defined.
* Required space: col_dim * sizeof(q15_t) bytes
* Use arm_fully_connected_s8_get_buffer_size() to get the size.
* @return The function returns ARM_MATH_SUCCESS
*
* @details
Expand Down Expand Up @@ -968,6 +1005,15 @@ extern "C"
const int32_t output_activation_max,
q15_t *vec_buffer);

/**
* @brief Get the required buffer size for S8 basic fully-connected and
* matrix multiplication layer function for TF Lite
* @param[in] col_dim dimension of the input vector
* @return The function returns required buffer size
*
*/
int32_t arm_fully_connected_s8_get_buffer_size(const uint16_t col_dim);

/**
* @brief Q7 opt fully-connected layer function
* @param[in] pV pointer to input vector
Expand Down Expand Up @@ -1431,7 +1477,8 @@ extern "C"
* @param[in,out] src pointer to input tensor
* @param[in] bufferA temporary buffer used for optimization and is necessary when both
* ARM_MATH_LOOPUNROLL and ARM_MATH_DSP are defined.
* Required space: (input_ch * dim_dst_width) * sizeof(q15_t) bytes
* Required space: (ch_src * dim_dst_width) * sizeof(q15_t) bytes
* Use arm_avgpool_s8_get_buffer_size() to get the size
* @param[in,out] dst pointer to output tensor
*
* @note This pooling function is input-destructive. Input data is undefined after calling this function.
Expand Down Expand Up @@ -1459,6 +1506,16 @@ extern "C"
int16_t *bufferA,
int8_t *dst);

/**
* @brief Get the required buffer size for S8 average pooling function
* @param[in] dim_dst_width output tensor dimension
* @param[in] ch_src number of input tensor channels
* @return The function returns required buffer size
*
*/
int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width,
const int ch_src);

/**
* @brief s8 DSP optimized max pooling function
* @param[in] input_y input tensor dimension along y
Expand Down Expand Up @@ -1555,7 +1612,7 @@ extern "C"
*
*/

void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out);
void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out);

/**
* @brief Q7 softmax function with batch parameter
Expand All @@ -1577,7 +1634,7 @@ void arm_softmax_with_batch_q7(const q7_t * vec_in, const uint16_t nb_batches,co
*
*/

void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);

/**
* @brief uint8 depthwise convolution function with asymmetric quantization for even number of channel multiplier
Expand Down
10 changes: 10 additions & 0 deletions CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,16 @@ arm_status arm_convolve_1x1_s8_fast(const q7_t *input,
return ARM_MATH_SUCCESS;
}

int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const uint16_t input_ch)
{
#if defined(ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
return 2 * input_ch * sizeof(int16_t);
#else
(void)input_ch;
return 0;
#endif
}

/**
* @} end of NNConv group
*/
14 changes: 14 additions & 0 deletions CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,20 @@ arm_status arm_convolve_s8(const q7_t *input,
return ARM_MATH_SUCCESS;
}

int32_t arm_convolve_s8_get_buffer_size(const uint16_t input_ch,
const uint16_t kernel_x,
const uint16_t kernel_y)
{
#if defined(ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
return (2 * input_ch * kernel_x * kernel_y) * sizeof(int16_t);
#else
(void)input_ch;
(void)kernel_x;
(void)kernel_y;
return 0;
#endif
}

/**
* @} end of NNConv group
*/
15 changes: 15 additions & 0 deletions CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,21 @@ arm_status arm_depthwise_conv_s8_opt(const q7_t *input,
return ARM_MATH_SUCCESS;
}

int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const uint16_t input_ch,
const uint16_t kernel_x,
const uint16_t kernel_y)
{
#if defined(ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
return (2 * input_ch * kernel_x * kernel_y) * sizeof(int16_t);
#else
(void)input_ch;
(void)kernel_x;
(void)kernel_y;
return 0;
#endif
}

/**
* @} end of NNConv group
*/

10 changes: 10 additions & 0 deletions CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,16 @@ arm_fully_connected_s8(const int8_t *input,
return (ARM_MATH_SUCCESS);
#endif /* defined(ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) */
}

int32_t arm_fully_connected_s8_get_buffer_size(const uint16_t col_dim)
{
#if defined(ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
return col_dim * sizeof(int16_t);
#else
(void)col_dim;
return 0;
#endif
}
/**
* @} end of FC group
*/
12 changes: 12 additions & 0 deletions CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,18 @@ void arm_avgpool_s8(const int dim_src_height,
#endif
}

int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width,
const int ch_src)
{
#if defined(ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
return (ch_src * dim_dst_width) * sizeof(int16_t);
#else
(void)dim_dst_width;
(void)ch_src;
return 0;
#endif
}

/**
* @} end of Pooling group
*/

0 comments on commit a1c7ee0

Please sign in to comment.