Support user-specified string input in tflite benchmark tools.

- Users can give it with flag "--input_layer_value_files". For example, if there are 2 input tensors, --input_layer_value_files=input1:input1_file_path,input2:input2_file. - The list of strings are separated with the delimiter '\0'. For example, if the tensor shape is (1x3xSTRING), the file format should be "first_string_value\0second_string_value\0third_string_value\0". PiperOrigin-RevId: 299272934 Change-Id: Iab5951c903fb4976e0dcf43dd09ba4695653dab3
khaled-besrour · Mar 6, 2020 · 70fab67 · 70fab67
1 parent 8f50df8
commit 70fab67
Show file tree

Hide file tree

Showing 5 changed files with 124 additions and 47 deletions.
diff --git a/tensorflow/lite/testdata/string_input_model.bin b/tensorflow/lite/testdata/string_input_model.bin
diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD
@@ -90,10 +90,12 @@ cc_test(
     args = [
         "--fp32_graph=$(location //tensorflow/lite:testdata/multi_add.bin)",
         "--int8_graph=$(location //tensorflow/lite:testdata/add_quantized_int8.bin)",
+        "--string_graph=$(location //tensorflow/lite:testdata/string_input_model.bin)",
     ],
     data = [
         "//tensorflow/lite:testdata/add_quantized_int8.bin",
         "//tensorflow/lite:testdata/multi_add.bin",
+        "//tensorflow/lite:testdata/string_input_model.bin",
     ],
     tags = [
         "tflite_not_portable_android",
@@ -103,6 +105,7 @@ cc_test(
         ":benchmark_performance_options",
         ":benchmark_tflite_model_lib",
         "//tensorflow/lite:framework",
+        "//tensorflow/lite:string_util",
         "//tensorflow/lite/testing:util",
         "//tensorflow/lite/tools:command_line_flags",
         "@com_google_absl//absl/algorithm",

diff --git a/tensorflow/lite/tools/benchmark/benchmark_test.cc b/tensorflow/lite/tools/benchmark/benchmark_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "absl/algorithm/algorithm.h"
 #include "absl/strings/str_format.h"
 #include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/string_util.h"
 #include "tensorflow/lite/testing/util.h"
 #include "tensorflow/lite/tools/benchmark/benchmark_performance_options.h"
 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
@@ -30,13 +31,14 @@ limitations under the License.
 namespace {
 const std::string* g_fp32_model_path = nullptr;
 const std::string* g_int8_model_path = nullptr;
+const std::string* g_string_model_path = nullptr;
 }  // namespace
 
 namespace tflite {
 namespace benchmark {
 namespace {
 
-enum class ModelGraphType { FP32, INT8 };
+enum class ModelGraphType { FP32, INT8, STRING };
 
 BenchmarkParams CreateParams(int32_t num_runs, float min_secs, float max_secs,
                              ModelGraphType graph_type = ModelGraphType::FP32) {
@@ -53,6 +55,9 @@ BenchmarkParams CreateParams(int32_t num_runs, float min_secs, float max_secs,
   if (graph_type == ModelGraphType::INT8) {
     params.AddParam("graph",
                     BenchmarkParam::Create<std::string>(*g_int8_model_path));
+  } else if (graph_type == ModelGraphType::STRING) {
+    params.AddParam("graph",
+                    BenchmarkParam::Create<std::string>(*g_string_model_path));
   } else {
     // by default, simply use the fp32 one.
     params.AddParam("graph",
@@ -97,6 +102,9 @@ BenchmarkParams CreateFp32Params() {
 BenchmarkParams CreateInt8Params() {
   return CreateParams(2, 1.0f, 150.0f, ModelGraphType::INT8);
 }
+BenchmarkParams CreateStringParams() {
+  return CreateParams(2, 1.0f, 150.0f, ModelGraphType::STRING);
+}
 
 std::string CreateFilePath(const std::string& file_name) {
   return std::string(getenv("TEST_TMPDIR")) + file_name;
@@ -126,11 +134,20 @@ void WriteInputLayerValueFile(const std::string& file_path,
 }
 
 void CheckInputTensorValue(const TfLiteTensor* input_tensor,
-                           char tensor_value) {
+                           char expected_value) {
   ASSERT_THAT(input_tensor, testing::NotNull());
   EXPECT_TRUE(std::all_of(
       input_tensor->data.raw, input_tensor->data.raw + input_tensor->bytes,
-      [tensor_value](char c) { return c == tensor_value; }));
+      [expected_value](char c) { return c == expected_value; }));
+}
+
+void CheckInputTensorValue(const TfLiteTensor* input_tensor,
+                           int tensor_dim_index,
+                           const std::string& expected_value) {
+  StringRef tensor_value = GetString(input_tensor, tensor_dim_index);
+  EXPECT_TRUE(absl::equal(tensor_value.str, tensor_value.str + tensor_value.len,
+                          expected_value.c_str(),
+                          expected_value.c_str() + expected_value.length()));
 }
 
 class TestBenchmark : public BenchmarkTfLiteModel {
@@ -165,6 +182,13 @@ TEST(BenchmarkTest, DoesntCrashInt8Model) {
   benchmark.Run();
 }
 
+TEST(BenchmarkTest, DoesntCrashStringModel) {
+  ASSERT_THAT(g_int8_model_path, testing::NotNull());
+
+  TestBenchmark benchmark(CreateStringParams());
+  benchmark.Run();
+}
+
 TEST(BenchmarkTest, DoesntCrashMultiPerfOptions) {
   ASSERT_THAT(g_fp32_model_path, testing::NotNull());
 
@@ -267,6 +291,38 @@ TEST(BenchmarkTest, DoesntCrashWithExplicitInputValueFilesInt8Model) {
   CheckInputTensorValue(benchmark.GetInputTensor(0), file_value);
 }
 
+TEST(BenchmarkTest, DoesntCrashWithExplicitInputValueFilesStringModel) {
+  ASSERT_THAT(g_string_model_path, testing::NotNull());
+  const std::string file_path = CreateFilePath("string_binary");
+  const std::string string_value_0 = "abcd";
+  const std::string string_value_1 = "12345";
+  const std::string string_value_2 = "a1b2c3d4e5";
+  std::ofstream file(file_path);
+  // Store the terminating null-character ('\0') at the end of the returned
+  // value by std::string::c_str().
+  file.write(string_value_0.c_str(), string_value_0.length() + 1);
+  file.write(string_value_1.c_str(), string_value_1.length() + 1);
+  file.write(string_value_2.c_str(), string_value_2.length() + 1);
+  file.close();
+
+  // Note: the following input-related params are *specific* to model
+  // 'g_string_model_path' which is specified as
+  // 'lite:testdata/string_input_model.bin for the test.
+  BenchmarkParams params = CreateStringParams();
+  params.Set<std::string>("input_layer", "a");
+  params.Set<std::string>("input_layer_shape", "1,3");
+  params.Set<std::string>("input_layer_value_files", "a:" + file_path);
+  TestBenchmark benchmark(std::move(params));
+  benchmark.Run();
+
+  auto input_tensor = benchmark.GetInputTensor(0);
+  ASSERT_THAT(input_tensor, testing::NotNull());
+  EXPECT_EQ(GetStringCount(input_tensor), 3);
+  CheckInputTensorValue(input_tensor, 0, string_value_0);
+  CheckInputTensorValue(input_tensor, 1, string_value_1);
+  CheckInputTensorValue(input_tensor, 2, string_value_2);
+}
+
 class MaxDurationWorksTestListener : public BenchmarkListener {
   void OnBenchmarkEnd(const BenchmarkResults& results) override {
     const int64_t num_actul_runs = results.inference_time_us().count();
@@ -316,16 +372,19 @@ TEST(BenchmarkTest, ParametersArePopulatedWhenInputShapeIsNotSpecified) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  std::string fp32_model_path, int8_model_path;
+  std::string fp32_model_path, int8_model_path, string_model_path;
   std::vector<tflite::Flag> flags = {
       tflite::Flag::CreateFlag("fp32_graph", &fp32_model_path,
                                "Path to a fp32 model file."),
       tflite::Flag::CreateFlag("int8_graph", &int8_model_path,
                                "Path to a int8 model file."),
+      tflite::Flag::CreateFlag("string_graph", &string_model_path,
+                               "Path to a string model file."),
   };
 
   g_fp32_model_path = &fp32_model_path;
   g_int8_model_path = &int8_model_path;
+  g_string_model_path = &string_model_path;
 
   const bool parse_result =
       tflite::Flags::Parse(&argc, const_cast<const char**>(argv), flags);

diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -101,13 +101,18 @@ std::vector<std::string> Split(const std::string& str, const char delim) {
   return results;
 }
 
-void FillRandomString(tflite::DynamicBuffer* buffer,
-                      const std::vector<int>& sizes,
-                      const std::function<std::string()>& random_func) {
+int GetNumElements(const TfLiteIntArray* dim_array) {
   int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
+  for (size_t i = 0; i < dim_array->size; i++) {
+    num_elements *= dim_array->data[i];
   }
+  return num_elements;
+}
+
+void FillRandomString(tflite::DynamicBuffer* buffer,
+                      const TfLiteIntArray* dim_array,
+                      const std::function<std::string()>& random_func) {
+  int num_elements = GetNumElements(dim_array);
   for (int i = 0; i < num_elements; ++i) {
     auto str = random_func();
     buffer->AddString(str.data(), str.length());
@@ -233,15 +238,6 @@ TfLiteStatus PopulateInputLayerInfo(
   return kTfLiteOk;
 }
 
-std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
-  std::vector<int> values;
-  values.reserve(int_array->size);
-  for (size_t i = 0; i < int_array->size; i++) {
-    values.push_back(int_array->data[i]);
-  }
-  return values;
-}
-
 std::shared_ptr<profiling::ProfileSummaryFormatter>
 CreateProfileSummaryFormatter(bool format_as_csv) {
   return format_as_csv
@@ -317,7 +313,9 @@ std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
           "of input layer name and value file path separated by ':', e.g. "
           "input1:file_path1,input2:file_path2. If the input_name appears both "
           "in input_layer_value_range and input_layer_value_files, "
-          "input_layer_value_range of the input_name will be ignored."),
+          "input_layer_value_range of the input_name will be ignored. The file "
+          "format is binary and it should be array format or null separated "
+          "strings format."),
       CreateFlag<bool>("use_legacy_nnapi", &params_, "use legacy nnapi api"),
       CreateFlag<bool>("allow_fp16", &params_, "allow fp16"),
       CreateFlag<bool>("require_full_delegation", &params_,
@@ -416,25 +414,41 @@ int64_t BenchmarkTfLiteModel::MayGetModelFileSize() {
 
 BenchmarkTfLiteModel::InputTensorData BenchmarkTfLiteModel::LoadInputTensorData(
     const TfLiteTensor& t, const std::string& input_file_path) {
+  std::ifstream value_file(input_file_path, std::ios::binary);
+  if (!value_file.good()) {
+    TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
+                      << input_file_path;
+  }
   InputTensorData t_data;
   if (t.type == kTfLiteString) {
-    // TODO(b/149184079): Will update string type logic.
-  } else {
-    t_data.bytes = t.bytes;
-    std::ifstream value_file(input_file_path, std::ios::binary | std::ios::ate);
-    if (!value_file.good()) {
-      TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
-                        << input_file_path;
+    t_data.data = VoidUniquePtr(
+        static_cast<void*>(new tflite::DynamicBuffer()),
+        [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); });
+    std::string line;
+    size_t num_line = 0;
+    // Read the line with the delimiter '\0'.
+    while (std::getline(value_file, line, '\0')) {
+      num_line++;
+      static_cast<DynamicBuffer*>(t_data.data.get())
+          ->AddString(line.data(), line.length());
     }
+    int num_elements = GetNumElements(t.dims);
+    if (num_line != num_elements) {
+      TFLITE_LOG(FATAL) << "The number of string in the input_layer_value_file("
+                        << input_file_path << ") is " << num_line
+                        << ". It should be " << num_elements << ".";
+    }
+  } else {
+    value_file.seekg(0, std::ios_base::end);
     if (value_file.tellg() != t.bytes) {
       TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is "
                         << value_file.tellg() << " bytes. It should be "
                         << t.bytes << " bytes.";
     }
-    // Now initialize the type-erased unique_ptr (with custom deleter).
-    t_data.data = std::unique_ptr<void, void (*)(void*)>(
-        static_cast<void*>(new char[t.bytes]),
-        [](void* ptr) { delete[] static_cast<char*>(ptr); });
+    t_data.bytes = t.bytes;
+    t_data.data =
+        VoidUniquePtr(static_cast<void*>(new char[t.bytes]),
+                      [](void* ptr) { delete[] static_cast<char*>(ptr); });
     value_file.clear();
     value_file.seekg(0, std::ios_base::beg);
     value_file.read(static_cast<char*>(t_data.data.get()), t.bytes);
@@ -453,11 +467,7 @@ BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t,
     low_range = layer_info->low;
     high_range = layer_info->high;
   }
-  std::vector<int> sizes = TfLiteIntArrayToVector(t.dims);
-  int num_elements = 1;
-  for (int i = 0; i < sizes.size(); ++i) {
-    num_elements *= sizes[i];
-  }
+  int num_elements = GetNumElements(t.dims);
   switch (t.type) {
     case kTfLiteFloat32: {
       return CreateInputTensorData<float>(
@@ -564,12 +574,17 @@ TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
     int i = interpreter_inputs[j];
     TfLiteTensor* t = interpreter_->tensor(i);
     if (t->type == kTfLiteString) {
-      tflite::DynamicBuffer buffer;
-      std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
-      FillRandomString(&buffer, sizes, []() {
-        return "we're have some friends over saturday to hang out in the yard";
-      });
-      buffer.WriteToTensor(t, /*new_shape=*/nullptr);
+      if (inputs_data_[j].data) {
+        static_cast<DynamicBuffer*>(inputs_data_[j].data.get())
+            ->WriteToTensor(t, /*new_shape=*/nullptr);
+      } else {
+        tflite::DynamicBuffer buffer;
+        FillRandomString(&buffer, t->dims, []() {
+          return "we're have some friends over saturday to hang out in the "
+                 "yard";
+        });
+        buffer.WriteToTensor(t, /*new_shape=*/nullptr);
+      }
     } else {
       std::memcpy(t->data.raw, inputs_data_[j].data.get(),
                   inputs_data_[j].bytes);

diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
@@ -89,10 +89,13 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
   std::unique_ptr<tflite::Interpreter> interpreter_;
 
  private:
+  // Implement type erasure with unique_ptr with custom deleter.
+  using VoidUniquePtr = std::unique_ptr<void, void (*)(void*)>;
+
   struct InputTensorData {
     InputTensorData() : data(nullptr, nullptr) {}
 
-    std::unique_ptr<void, void (*)(void*)> data;
+    VoidUniquePtr data;
     size_t bytes;
   };
 
@@ -105,11 +108,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
     std::generate_n(raw, num_elements, [&]() {
       return static_cast<T>(distribution(random_engine_));
     });
-    // Now initialize the type-erased unique_ptr (with custom deleter) from
-    // 'raw'.
-    tmp.data = std::unique_ptr<void, void (*)(void*)>(
-        static_cast<void*>(raw),
-        [](void* ptr) { delete[] static_cast<T*>(ptr); });
+    tmp.data = VoidUniquePtr(static_cast<void*>(raw),
+                             [](void* ptr) { delete[] static_cast<T*>(ptr); });
     return tmp;
   }