Skip to content
This repository was archived by the owner on Feb 24, 2025. It is now read-only.

(Arduino) Automated sync from github.com/tensorflow/tflite-micro #185

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions src/tensorflow/lite/c/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,4 @@ limitations under the License.

#include "tensorflow/lite/core/c/common.h"

// TfLiteOpaqueDelegate: allows delegation of nodes to alternative backends.
// TfLiteOpaqueDelegate is an abstract type that is intended to have the same
// role as TfLiteDelegate, but without necessarily exposing the implementation
// details of how delegates are implemented.
typedef TfLiteDelegate TfLiteOpaqueDelegate;

#endif // TENSORFLOW_LITE_C_COMMON_H_
13 changes: 7 additions & 6 deletions src/tensorflow/lite/kernels/internal/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,14 +328,16 @@ template <typename T>
int CountLeadingZeros(T integer_input) {
static_assert(std::is_unsigned<T>::value,
"Only unsigned integer types handled.");
#if defined(__GNUC__)
return integer_input ? __builtin_clz(integer_input)
: std::numeric_limits<T>::digits;
#else
if (integer_input == 0) {
return std::numeric_limits<T>::digits;
}

#if defined(__GNUC__)
if (std::is_same<T, uint32_t>::value) {
return __builtin_clz(integer_input);
} else if (std::is_same<T, uint64_t>::value) {
return __builtin_clzll(integer_input);
}
#endif
const T one_in_leading_positive = static_cast<T>(1)
<< (std::numeric_limits<T>::digits - 1);
int leading_zeros = 0;
Expand All @@ -344,7 +346,6 @@ int CountLeadingZeros(T integer_input) {
++leading_zeros;
}
return leading_zeros;
#endif
}

template <typename T>
Expand Down
23 changes: 12 additions & 11 deletions src/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,25 @@ inline void CheckArithmeticParams(const ArithmeticParams& params) {
TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
}

inline void ElementWise(
int size, const ArithmeticParams& params, const int8_t* input1_data,
const int8_t* input2_data, int8_t* output_data,
void (*check_arithmetic_params)(const ArithmeticParams&),
int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
// TODO(b/270589088): move to a more appropriate file (b/270589088#comment2)
template <typename T>
void ElementWise(int size, const ArithmeticParams& params, const T* input1_data,
const T* input2_data, T* output_data,
void (*check_arithmetic_params)(const ArithmeticParams&),
T (*binary_func)(T, T, const ArithmeticParams&)) {
CheckArithmeticParams(params);
for (int i = 0; i < size; ++i) {
output_data[i] = binary_func(input1_data[i], input2_data[i], params);
}
}

inline void BroadcastBinaryFunction4DSlow(
// TODO(b/270589088): move to a more appropriate file. (b/270589088#comment2)
template <typename T>
void BroadcastBinaryFunction4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const int8_t* input1_data, const RuntimeShape& input2_shape,
const int8_t* input2_data, const RuntimeShape& output_shape,
int8_t* output_data,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data,
void (*check_arithmetic_params)(const ArithmeticParams&),
int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
T (*binary_func)(T, T, const ArithmeticParams&)) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
Expand All @@ -15,65 +15,4 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_

#include <algorithm>

#include "tensorflow/lite/kernels/internal/common.h"

namespace tflite {
namespace reference_integer_ops {

template <typename integer_type>
inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
int32_t shift, const RuntimeShape& unextended_input_shape,
const integer_type* input_data, int32_t input_zero_point,
const RuntimeShape& unextended_output_shape,
integer_type* output_data, int32_t output_zero_point) {
// Current implementation only supports dimension equals 4 and simultaneous
// reduction over width and height.
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int output_batch = output_shape.Dims(0);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int output_depth = output_shape.Dims(3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int num_elements_in_axis = input_width * input_height;

TFLITE_CHECK_EQ(op_params.axis_count, 2);
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
TFLITE_CHECK_EQ(output_height, 1);
TFLITE_CHECK_EQ(output_width, 1);

static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();

for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
int32_t acc = 0;
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
input_zero_point;
}
}
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
: (acc - num_elements_in_axis / 2) / num_elements_in_axis;
acc += output_zero_point;
acc = std::min(std::max(acc, kMinInt), kMaxInt);
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
static_cast<integer_type>(acc);
}
}
}

} // namespace reference_integer_ops
} // namespace tflite

#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
135 changes: 42 additions & 93 deletions src/tensorflow/lite/kernels/internal/reference/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,11 @@ inline bool Mean(const T* input_data, const int* input_dims,
return true;
}

template <typename T>
inline void Mean(const tflite::MeanParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
const float* input_data,
const RuntimeShape& unextended_output_shape,
float* output_data) {
ruy::profiler::ScopeLabel label("Mean4D");

// Current implementation only supports dimension equals 4 and simultaneous
Expand Down Expand Up @@ -312,78 +312,21 @@ inline void Mean(const tflite::MeanParams& op_params,
}
}

inline void Mean(const tflite::MeanParams& op_params,
const RuntimeShape& unextended_input_shape,
const uint8_t* input_data, int32_t input_zero_point,
float input_scale, const RuntimeShape& unextended_output_shape,
uint8_t* output_data, int32_t output_zero_point,
float output_scale) {
ruy::profiler::ScopeLabel label("Mean4D/Uint8");

// Current implementation only supports dimension equals 4 and simultaneous
// reduction over width and height.
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int output_batch = output_shape.Dims(0);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int output_depth = output_shape.Dims(3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const float num_elements_in_axis = input_width * input_height;

TFLITE_CHECK_EQ(op_params.axis_count, 2);
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
TFLITE_CHECK_EQ(output_height, 1);
TFLITE_CHECK_EQ(output_width, 1);

constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();

float temp = input_zero_point * input_scale / output_scale;
temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
int32_t bias = output_zero_point - static_cast<int32_t>(temp);
double real_scale =
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));

int32_t multiplier;
int shift;
QuantizeMultiplier(real_scale, &multiplier, &shift);
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
int32_t acc = 0;
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
}
}
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
acc += bias;
acc = std::min(std::max(acc, kMinValue), kMaxValue);
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
static_cast<uint8_t>(acc);
}
}
}

// Computes the mean of elements across dimensions given in axis.
// It does so in two stages, first calculates the sum of elements along the axis
// then divides it by the number of element in axis for quantized values.
template <typename T, typename U>
inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
float input_scale, const int* input_dims,
const int input_num_dims, T* output_data,
int32_t output_zero_point, float output_scale,
const int* input_dims, const int input_num_dims,
T* output_data, int32_t output_multiplier,
int output_shift, int32_t output_zero_point,
const int* output_dims,
const int output_num_dims, const int* axis,
const int num_axis_dimensions, bool keep_dims,
int* temp_index, int* resolved_axis, U* temp_sum,
bool compute_sum) {
const int32_t kMinValue = std::numeric_limits<T>::min();
const int32_t kMaxValue = std::numeric_limits<T>::max();
const bool uint8_case = std::is_same<T, uint8_t>::value;
const bool int16_case = std::is_same<T, int16_t>::value;
if (uint8_case) {
Expand Down Expand Up @@ -430,40 +373,46 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
}

// Calculate mean by dividing output_data by num of aggregated element.
size_t num_elements_in_axis = 1;
int64_t num_elements_in_axis = 1;
for (int idx = 0; idx < num_resolved_axis; ++idx) {
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
// Overflow prevention.
if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
if (current > static_cast<size_t>(std::numeric_limits<int64_t>::max() /
num_elements_in_axis)) {
return false;
}
num_elements_in_axis *= current;
}

if (num_elements_in_axis > 0) {
const float scale = input_scale / output_scale;
if (compute_sum) {
// TODO(b/116341117): Eliminate float and do this completely in 8bit.
const float bias = -input_zero_point * scale * num_elements_in_axis;
for (size_t idx = 0; idx < num_outputs; ++idx) {
const U value =
static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
output_zero_point;
output_data[idx] = static_cast<T>(value);
}
} else {
const float bias = -input_zero_point * scale;
for (size_t idx = 0; idx < num_outputs; ++idx) {
float float_mean = static_cast<float>(temp_sum[idx]) /
static_cast<float>(num_elements_in_axis);
float result = TfLiteMin(
TfLiteRound(float_mean * scale + bias) + output_zero_point,
static_cast<float>(std::numeric_limits<T>::max()));
result = TfLiteMax(result,
static_cast<float>(std::numeric_limits<T>::min()));
output_data[idx] = static_cast<T>(result);
}
}
if (num_elements_in_axis == 0) {
return true;
}

// Readapt output rescaling when calculating the mean to integrate a
// 1/num_elements_in_axis multiplier.
if (!compute_sum) {
TFLITE_DCHECK_GE(num_elements_in_axis, 0);
int shift =
63 - CountLeadingZeros(static_cast<uint64_t>(num_elements_in_axis));
// To avoid any overflow risk 'shift' should be <= 32 and to satisfy
// 'MultiplyByQuantizedMultiplier' pre-conditions 'output_shift - shift'
// should be >= -31. Clamp the value at the price of some precision loss.
shift = std::min(shift, 32);
shift = std::min(shift, 31 + output_shift);
output_multiplier = static_cast<int32_t>(
(static_cast<int64_t>(output_multiplier) << shift) /
num_elements_in_axis);
output_shift = output_shift - shift;
}

for (size_t idx = 0; idx < num_outputs; ++idx) {
const U shifted_sum =
static_cast<U>(temp_sum[idx] - input_zero_point * num_elements_in_axis);
int32_t output = MultiplyByQuantizedMultiplier(
shifted_sum, output_multiplier, output_shift) +
output_zero_point;
output = std::min(std::max(output, kMinValue), kMaxValue);
output_data[idx] = static_cast<T>(output);
}
return true;
}
Expand All @@ -478,8 +427,8 @@ inline bool QuantizedMeanOrSumExtraArgs(
bool keep_dims, int* temp_index, int* resolved_axis, U* temp_sum,
bool compute_sum) {
return QuantizedMeanOrSum<T, U>(
input_data, input_zero_point, input_scale, input_dims, input_num_dims,
output_data, output_zero_point, output_scale, output_dims,
input_data, input_zero_point, input_dims, input_num_dims, output_data,
output_multiplier, output_shift, output_zero_point, output_dims,
output_num_dims, axis, num_axis_dimensions, keep_dims, temp_index,
resolved_axis, temp_sum, compute_sum);
}
Expand Down
3 changes: 1 addition & 2 deletions src/tensorflow/lite/micro/kernels/cmsis_nn/conv.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,6 @@ limitations under the License.

#include "tensorflow/lite/micro/kernels/conv.h"

#include "third_party/cmsis_nn/Include/arm_nn_types.h"
#include "third_party/cmsis_nn/Include/arm_nnfunctions.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
Expand Down
3 changes: 1 addition & 2 deletions src/tensorflow/lite/micro/kernels/cmsis_nn/svdf.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,6 @@ limitations under the License.

#include "tensorflow/lite/micro/kernels/svdf.h"

#include "third_party/cmsis_nn/Include/arm_nn_types.h"
#include "third_party/cmsis_nn/Include/arm_nnfunctions.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
Expand Down
Loading