From 034e09808907821974f8c7b10cf68bb4ebe832ed Mon Sep 17 00:00:00 2001
From: generatedunixname89002005232357
 <generatedunixname89002005232357@meta.com>
Date: Fri, 13 Sep 2024 14:09:54 -0700
Subject: [PATCH] Revert D62617066 (#5351)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/5351

This diff reverts D62617066
Breaking several jobs in OSS CI, as well as the seamless model internally

Reviewed By: lucylq, shoumikhin, dbort

Differential Revision: D62650762

fbshipit-source-id: 8205d32e498f51745529e174082fbcc36a8dd384
---
 .../oss_scripts/llama2/runner/runner.cpp      |  8 +-
 .../qaihub_scripts/llama/runner/io_memory.cpp |  5 +-
 .../qaihub_scripts/llama/runner/runner.cpp    |  3 +-
 .../stable_diffusion/runner/runner.cpp        | 10 +-
 extension/module/module.cpp                   | 20 ++--
 extension/module/module.h                     | 95 +++++++------------
 6 files changed, 56 insertions(+), 85 deletions(-)

diff --git a/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp
index 0085e0069c..86fa8d0eb8 100644
--- a/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp
+++ b/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp
@@ -187,7 +187,7 @@ Result<exec_aten::Tensor> Runner::run_model_step(
             *kv_outputs[j], new_out_addr, kv_outputs[j]->nbytes()) == Error::Ok,
         "Failed to set output tensor when updating v_cache");
     ET_CHECK_MSG(
-        module_->set_output(kv_outputs[j], j + 1) == Error::Ok,
+        module_->set_output_data_ptr(*kv_outputs[j], j + 1) == Error::Ok,
         "Failed to set llama output data pointer");
   }
 
@@ -291,7 +291,7 @@ Error Runner::generate(
         sizes,
         kv_tensors.back()->scalar_type()));
     ET_CHECK_MSG(
-        module_->set_output(kv_outputs.back(), i + 1) == Error::Ok,
+        module_->set_output_data_ptr(kv_outputs.back(), i + 1) == Error::Ok,
         "Failed to set output tensor for kv cache");
   }
 
@@ -323,7 +323,7 @@ Error Runner::generate(
         sizes,
         kv_tensors.back()->scalar_type()));
     ET_CHECK_MSG(
-        module_->set_output(kv_outputs.back(), output_index) ==
+        module_->set_output_data_ptr(kv_outputs.back(), output_index) ==
             Error::Ok,
         "Failed to set output tensor for llama block");
   }
@@ -333,7 +333,7 @@ Error Runner::generate(
       logits_data_shape,
       ScalarType::Float);
   ET_CHECK_MSG(
-      module_->set_output(affine_logits) == Error::Ok,
+      module_->set_output_data_ptr(affine_logits, 0) == Error::Ok,
       "Failed to set output tensor for affine module - logits");
 
   // Start consuming user's prompts and generating new tokens
diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp b/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp
index 163bd67f8a..3283d81a9f 100644
--- a/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp
+++ b/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp
@@ -427,7 +427,7 @@ void KVCachedMemory::update_io(
             // k, v are placed interleaved
             int index = (cache_stride << 1) + (cache_group << 5) + head;
             ET_CHECK_MSG(
-                modules_[shard]->set_out
+                modules_[shard]->set_output_data_ptr(
                     output_tensors[shard][index], index) == Error::Ok,
                 "failed to set output tensor for module %d's %d'th output "
                 "while updating kv_cache output tensors",
@@ -450,7 +450,8 @@ void KVCachedMemory::update_io(
     for (int shard = 0; shard < output_tensors.size(); shard++) {
       for (int index = 0; index < output_tensors[shard].size(); index++) {
         ET_CHECK_MSG(
-            modules_[shard]->set_output(output_tensors[shard][index], index) == Error::Ok,
+            modules_[shard]->set_output_data_ptr(
+                output_tensors[shard][index], index) == Error::Ok,
             "failed to set output tensor for module %d's %d'th output "
             "while updating kv_cache output tensors",
             shard,
diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp
index c928cea71c..d6d9911293 100644
--- a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp
+++ b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp
@@ -177,7 +177,8 @@ Error Runner::generate(
       output_tensors.emplace_back(io_mem_->get_output_tensors(i));
       for (size_t j = 0; j < output_tensors[i].size(); ++j) {
         ET_CHECK_MSG(
-            modules_[i]->set_output(output_tensors[i][j], j) == Error::Ok,
+            modules_[i]->set_output_data_ptr(output_tensors[i][j], j) ==
+                Error::Ok,
             "failed to set output tensor for module %d's %zu'th output",
             i,
             j);
diff --git a/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp b/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp
index fddd3527b5..b6c211d8ac 100644
--- a/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp
+++ b/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp
@@ -373,11 +373,11 @@ Error Runner::generate(std::string prompt) {
       uncond_emb_vec.data(),
       {1, 77, 1024},
       encoder_method_meta.output_tensor_meta(0)->scalar_type());
-  modules_[0]->set_output(cond_emb_tensor);
+  modules_[0]->set_output_data_ptr(cond_emb_tensor, 0);
   long encoder_start = util::time_in_ms();
   auto cond_res = modules_[0]->forward(cond_tokens_tensor);
   stats_.text_encoder_execution_time += (util::time_in_ms() - encoder_start);
-  modules_[0]->set_output(uncond_emb_tensor);
+  modules_[0]->set_output_data_ptr(uncond_emb_tensor, 0);
   encoder_start = util::time_in_ms();
   auto uncond_res = modules_[0]->forward(uncond_tokens_tensor);
   stats_.text_encoder_execution_time += (util::time_in_ms() - encoder_start);
@@ -462,13 +462,13 @@ Error Runner::generate(std::string prompt) {
 
     stats_.unet_aggregate_post_processing_time +=
         (util::time_in_ms() - start_post_process);
-    modules_[1]->set_output(noise_pred_text_tensor);
+    modules_[1]->set_output_data_ptr(noise_pred_text_tensor, 0);
     long start_unet_execution = util::time_in_ms();
     auto cond_res = modules_[1]->forward(
         {latent_tensor, time_emb_tensors[step_index], cond_emb_tensor});
     stats_.unet_aggregate_execution_time +=
         (util::time_in_ms() - start_unet_execution);
-    modules_[1]->set_output(noise_pred_uncond_tensor);
+    modules_[1]->set_output_data_ptr(noise_pred_uncond_tensor, 0);
     start_unet_execution = util::time_in_ms();
     auto uncond_res = modules_[1]->forward(
         {latent_tensor,
@@ -519,7 +519,7 @@ Error Runner::generate(std::string prompt) {
 
   quant_tensor(latent, vae_input, vae_input_scale_, vae_input_offset_);
 
-  modules_[2]->set_output(output_tensor);
+  modules_[2]->set_output_data_ptr(output_tensor, 0);
   long start_vae_execution = util::time_in_ms();
   auto vae_res = modules_[2]->forward(vae_input_tensor);
   stats_.vae_execution_time = (util::time_in_ms() - start_vae_execution);
diff --git a/extension/module/module.cpp b/extension/module/module.cpp
index 6b60c8bcf9..4e0e70936d 100644
--- a/extension/module/module.cpp
+++ b/extension/module/module.cpp
@@ -167,13 +167,12 @@ runtime::Result<runtime::MethodMeta> Module::method_meta(
 
 runtime::Result<std::vector<runtime::EValue>> Module::execute(
     const std::string& method_name,
-    const std::vector<runtime::EValue>& input_values) {
+    const std::vector<runtime::EValue>& input) {
   ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name));
   auto& method = methods_.at(method_name).method;
 
-  ET_CHECK_OK_OR_RETURN_ERROR(
-      method->set_inputs(exec_aten::ArrayRef<runtime::EValue>(
-          input_values.data(), input_values.size())));
+  ET_CHECK_OK_OR_RETURN_ERROR(method->set_inputs(
+      exec_aten::ArrayRef<runtime::EValue>(input.data(), input.size())));
   ET_CHECK_OK_OR_RETURN_ERROR(method->execute());
 
   const auto outputs_size = method->outputs_size();
@@ -184,18 +183,13 @@ runtime::Result<std::vector<runtime::EValue>> Module::execute(
   return outputs;
 }
 
-runtime::Error Module::set_output(
-    const std::string& method_name,
+runtime::Error Module::set_output_data_ptr(
     runtime::EValue output_value,
-    size_t output_index) {
+    size_t output_index,
+    const std::string& method_name) {
   ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name));
+  auto& output_tensor = output_value.toTensor();
   auto& method = methods_.at(method_name).method;
-  ET_CHECK_OR_RETURN_ERROR(
-      output_value.isTensor(),
-      InvalidArgument,
-      "output type: %zu is not tensor",
-      (size_t)output_value.tag);
-  const auto& output_tensor = output_value.toTensor();
   return method->set_output_data_ptr(
       output_tensor.mutable_data_ptr(), output_tensor.nbytes(), output_index);
 }
diff --git a/extension/module/module.h b/extension/module/module.h
index 3f0110d0e8..1a3855c5c0 100644
--- a/extension/module/module.h
+++ b/extension/module/module.h
@@ -165,12 +165,11 @@ class Module {
       const std::string& method_name);
 
   /**
-   * Execute a specific method with the given input values and retrieve the
-   * output values. Loads the program and method before executing if needed.
+   * Execute a specific method with the given input and retrieve output.
+   * Loads the program and method before executing if needed.
    *
    * @param[in] method_name The name of the method to execute.
-   * @param[in] input_values A vector of input values to be passed to the
-   * method.
+   * @param[in] input A vector of input values to be passed to the method.
    *
    * @returns A Result object containing either a vector of output values
    *          from the method or an error to indicate failure.
@@ -178,22 +177,22 @@ class Module {
   ET_NODISCARD
   runtime::Result<std::vector<runtime::EValue>> execute(
       const std::string& method_name,
-      const std::vector<runtime::EValue>& input_values);
+      const std::vector<runtime::EValue>& input);
 
   /**
    * Execute a specific method with a single input value.
    * Loads the program and method before executing if needed.
    *
    * @param[in] method_name The name of the method to execute.
-   * @param[in] input_value A value to be passed to the method.
+   * @param[in] input A value to be passed to the method.
    *
    * @returns A Result object containing either a vector of output values
    *          from the method or an error to indicate failure.
    */
   ET_NODISCARD inline runtime::Result<std::vector<runtime::EValue>> execute(
       const std::string& method_name,
-      const runtime::EValue& input_value) {
-    return execute(method_name, std::vector<runtime::EValue>{input_value});
+      const runtime::EValue& input) {
+    return execute(method_name, std::vector<runtime::EValue>{input});
   }
 
   /**
@@ -211,20 +210,19 @@ class Module {
   }
 
   /**
-   * Retrieve the output value of a specific method with the given input values.
+   * Retrieve the output value of a specific method with the given input.
    * Loads the program and method before execution if needed.
    *
    * @param[in] method_name The name of the method to execute.
-   * @param[in] input_values A vector of input values to be passed to the
-   * method.
+   * @param[in] input A vector of input values to be passed to the method.
    *
    * @returns A Result object containing either the first output value from the
    * method or an error to indicate failure.
    */
   ET_NODISCARD inline runtime::Result<runtime::EValue> get(
       const std::string& method_name,
-      const std::vector<runtime::EValue>& input_values) {
-    auto result = ET_UNWRAP(execute(method_name, input_values));
+      const std::vector<runtime::EValue>& input) {
+    auto result = ET_UNWRAP(execute(method_name, input));
     if (result.empty()) {
       return runtime::Error::InvalidArgument;
     }
@@ -236,15 +234,15 @@ class Module {
    * Loads the program and method before execution if needed.
    *
    * @param[in] method_name The name of the method to execute.
-   * @param[in] input_value A value to be passed to the method.
+   * @param[in] input A value to be passed to the method.
    *
    * @returns A Result object containing either the first output value from the
    * method or an error to indicate failure.
    */
   ET_NODISCARD inline runtime::Result<runtime::EValue> get(
       const std::string& method_name,
-      const runtime::EValue& input_value) {
-    return get(method_name, std::vector<runtime::EValue>{input_value});
+      const runtime::EValue& input) {
+    return get(method_name, std::vector<runtime::EValue>{input});
   }
 
   /**
@@ -262,31 +260,31 @@ class Module {
   }
 
   /**
-   * Execute the 'forward' method with the given input values and retrieve the
-   * output values. Loads the program and method before executing if needed.
+   * Execute the 'forward' method with the given input and retrieve output.
+   * Loads the program and method before executing if needed.
    *
-   * @param[in] input_values A vector of input values for the 'forward' method.
+   * @param[in] input A vector of input values for the 'forward' method.
    *
    * @returns A Result object containing either a vector of output values
    *          from the 'forward' method or an error to indicate failure.
    */
   ET_NODISCARD inline runtime::Result<std::vector<runtime::EValue>> forward(
-      const std::vector<runtime::EValue>& input_values) {
-    return execute("forward", input_values);
+      const std::vector<runtime::EValue>& input) {
+    return execute("forward", input);
   }
 
   /**
    * Execute the 'forward' method with a single value.
    * Loads the program and method before executing if needed.
    *
-   * @param[in] input_value A value for the 'forward' method.
+   * @param[in] input A value for the 'forward' method.
    *
    * @returns A Result object containing either a vector of output values
    *          from the 'forward' method or an error to indicate failure.
    */
   ET_NODISCARD inline runtime::Result<std::vector<runtime::EValue>> forward(
-      const runtime::EValue& input_value) {
-    return forward(std::vector<runtime::EValue>{input_value});
+      const runtime::EValue& input) {
+    return forward(std::vector<runtime::EValue>{input});
   }
 
   /**
@@ -300,42 +298,6 @@ class Module {
     return forward(std::vector<runtime::EValue>{});
   }
 
-  /**
-   * Sets the output tensor for a specific method.
-   *
-   * @param[in] method_name The name of the method.
-   * @param[in] output_value The EValue containing the Tensor to set as the
-   * method output.
-   * @param[in] output_index Zero-based index of the output to set.
-   *
-   * @returns An Error to indicate success or failure.
-   *
-   * @note Only Tensor outputs are currently supported for setting.
-   */
-  ET_NODISCARD
-  runtime::Error set_output(
-      const std::string& method_name,
-      runtime::EValue output_value,
-      size_t output_index = 0);
-
-  /**
-   * Sets the output tensor for the "forward" method.
-   *
-   * @param[in] output_value The EValue containing the Tensor to set as the
-   * method output.
-   * @param[in] output_index Zero-based index of the output to set.
-   *
-   * @returns An Error to indicate success or failure.
-   *
-   * @note Only Tensor outputs are currently supported for setting.
-   */
-  ET_NODISCARD
-  inline runtime::Error set_output(
-      runtime::EValue output_value,
-      size_t output_index = 0) {
-    return set_output("forward", output_value, output_index);
-  }
-
   /**
    * Retrieves the EventTracer instance being used by the Module.
    * EventTracer is used for tracking and logging events during the execution
@@ -348,6 +310,19 @@ class Module {
     return event_tracer_.get();
   }
 
+  /**
+   * Set output data pointer for forward method.
+   *
+   * @param[in] output_value A Tensor for the output of 'forward' method.
+   * @param[in] output_index Index of the output in 'forward' method.
+   *
+   * @returns An Error to indicate success or failure of the loading process.
+   */
+  runtime::Error set_output_data_ptr(
+      runtime::EValue output_value,
+      size_t output_index,
+      const std::string& method_name = "forward");
+
  private:
   struct MethodHolder {
     std::vector<std::vector<uint8_t>> planned_buffers;