From 034e09808907821974f8c7b10cf68bb4ebe832ed Mon Sep 17 00:00:00 2001 From: generatedunixname89002005232357 Date: Fri, 13 Sep 2024 14:09:54 -0700 Subject: [PATCH] Revert D62617066 (#5351) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/5351 This diff reverts D62617066 Breaking several jobs in OSS CI, as well as the seamless model internally Reviewed By: lucylq, shoumikhin, dbort Differential Revision: D62650762 fbshipit-source-id: 8205d32e498f51745529e174082fbcc36a8dd384 --- .../oss_scripts/llama2/runner/runner.cpp | 8 +- .../qaihub_scripts/llama/runner/io_memory.cpp | 5 +- .../qaihub_scripts/llama/runner/runner.cpp | 3 +- .../stable_diffusion/runner/runner.cpp | 10 +- extension/module/module.cpp | 20 ++-- extension/module/module.h | 95 +++++++------------ 6 files changed, 56 insertions(+), 85 deletions(-) diff --git a/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp index 0085e0069c..86fa8d0eb8 100644 --- a/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama2/runner/runner.cpp @@ -187,7 +187,7 @@ Result Runner::run_model_step( *kv_outputs[j], new_out_addr, kv_outputs[j]->nbytes()) == Error::Ok, "Failed to set output tensor when updating v_cache"); ET_CHECK_MSG( - module_->set_output(kv_outputs[j], j + 1) == Error::Ok, + module_->set_output_data_ptr(*kv_outputs[j], j + 1) == Error::Ok, "Failed to set llama output data pointer"); } @@ -291,7 +291,7 @@ Error Runner::generate( sizes, kv_tensors.back()->scalar_type())); ET_CHECK_MSG( - module_->set_output(kv_outputs.back(), i + 1) == Error::Ok, + module_->set_output_data_ptr(kv_outputs.back(), i + 1) == Error::Ok, "Failed to set output tensor for kv cache"); } @@ -323,7 +323,7 @@ Error Runner::generate( sizes, kv_tensors.back()->scalar_type())); ET_CHECK_MSG( - module_->set_output(kv_outputs.back(), output_index) == + module_->set_output_data_ptr(kv_outputs.back(), output_index) == Error::Ok, "Failed to set output tensor for llama block"); } @@ -333,7 +333,7 @@ Error Runner::generate( logits_data_shape, ScalarType::Float); ET_CHECK_MSG( - module_->set_output(affine_logits) == Error::Ok, + module_->set_output_data_ptr(affine_logits, 0) == Error::Ok, "Failed to set output tensor for affine module - logits"); // Start consuming user's prompts and generating new tokens diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp b/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp index 163bd67f8a..3283d81a9f 100644 --- a/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp +++ b/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp @@ -427,7 +427,7 @@ void KVCachedMemory::update_io( // k, v are placed interleaved int index = (cache_stride << 1) + (cache_group << 5) + head; ET_CHECK_MSG( - modules_[shard]->set_out + modules_[shard]->set_output_data_ptr( output_tensors[shard][index], index) == Error::Ok, "failed to set output tensor for module %d's %d'th output " "while updating kv_cache output tensors", @@ -450,7 +450,8 @@ void KVCachedMemory::update_io( for (int shard = 0; shard < output_tensors.size(); shard++) { for (int index = 0; index < output_tensors[shard].size(); index++) { ET_CHECK_MSG( - modules_[shard]->set_output(output_tensors[shard][index], index) == Error::Ok, + modules_[shard]->set_output_data_ptr( + output_tensors[shard][index], index) == Error::Ok, "failed to set output tensor for module %d's %d'th output " "while updating kv_cache output tensors", shard, diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp index c928cea71c..d6d9911293 100644 --- a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp @@ -177,7 +177,8 @@ Error Runner::generate( output_tensors.emplace_back(io_mem_->get_output_tensors(i)); for (size_t j = 0; j < output_tensors[i].size(); ++j) { ET_CHECK_MSG( - modules_[i]->set_output(output_tensors[i][j], j) == Error::Ok, + modules_[i]->set_output_data_ptr(output_tensors[i][j], j) == + Error::Ok, "failed to set output tensor for module %d's %zu'th output", i, j); diff --git a/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp b/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp index fddd3527b5..b6c211d8ac 100644 --- a/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp +++ b/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp @@ -373,11 +373,11 @@ Error Runner::generate(std::string prompt) { uncond_emb_vec.data(), {1, 77, 1024}, encoder_method_meta.output_tensor_meta(0)->scalar_type()); - modules_[0]->set_output(cond_emb_tensor); + modules_[0]->set_output_data_ptr(cond_emb_tensor, 0); long encoder_start = util::time_in_ms(); auto cond_res = modules_[0]->forward(cond_tokens_tensor); stats_.text_encoder_execution_time += (util::time_in_ms() - encoder_start); - modules_[0]->set_output(uncond_emb_tensor); + modules_[0]->set_output_data_ptr(uncond_emb_tensor, 0); encoder_start = util::time_in_ms(); auto uncond_res = modules_[0]->forward(uncond_tokens_tensor); stats_.text_encoder_execution_time += (util::time_in_ms() - encoder_start); @@ -462,13 +462,13 @@ Error Runner::generate(std::string prompt) { stats_.unet_aggregate_post_processing_time += (util::time_in_ms() - start_post_process); - modules_[1]->set_output(noise_pred_text_tensor); + modules_[1]->set_output_data_ptr(noise_pred_text_tensor, 0); long start_unet_execution = util::time_in_ms(); auto cond_res = modules_[1]->forward( {latent_tensor, time_emb_tensors[step_index], cond_emb_tensor}); stats_.unet_aggregate_execution_time += (util::time_in_ms() - start_unet_execution); - modules_[1]->set_output(noise_pred_uncond_tensor); + modules_[1]->set_output_data_ptr(noise_pred_uncond_tensor, 0); start_unet_execution = util::time_in_ms(); auto uncond_res = modules_[1]->forward( {latent_tensor, @@ -519,7 +519,7 @@ Error Runner::generate(std::string prompt) { quant_tensor(latent, vae_input, vae_input_scale_, vae_input_offset_); - modules_[2]->set_output(output_tensor); + modules_[2]->set_output_data_ptr(output_tensor, 0); long start_vae_execution = util::time_in_ms(); auto vae_res = modules_[2]->forward(vae_input_tensor); stats_.vae_execution_time = (util::time_in_ms() - start_vae_execution); diff --git a/extension/module/module.cpp b/extension/module/module.cpp index 6b60c8bcf9..4e0e70936d 100644 --- a/extension/module/module.cpp +++ b/extension/module/module.cpp @@ -167,13 +167,12 @@ runtime::Result Module::method_meta( runtime::Result> Module::execute( const std::string& method_name, - const std::vector& input_values) { + const std::vector& input) { ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name)); auto& method = methods_.at(method_name).method; - ET_CHECK_OK_OR_RETURN_ERROR( - method->set_inputs(exec_aten::ArrayRef( - input_values.data(), input_values.size()))); + ET_CHECK_OK_OR_RETURN_ERROR(method->set_inputs( + exec_aten::ArrayRef(input.data(), input.size()))); ET_CHECK_OK_OR_RETURN_ERROR(method->execute()); const auto outputs_size = method->outputs_size(); @@ -184,18 +183,13 @@ runtime::Result> Module::execute( return outputs; } -runtime::Error Module::set_output( - const std::string& method_name, +runtime::Error Module::set_output_data_ptr( runtime::EValue output_value, - size_t output_index) { + size_t output_index, + const std::string& method_name) { ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name)); + auto& output_tensor = output_value.toTensor(); auto& method = methods_.at(method_name).method; - ET_CHECK_OR_RETURN_ERROR( - output_value.isTensor(), - InvalidArgument, - "output type: %zu is not tensor", - (size_t)output_value.tag); - const auto& output_tensor = output_value.toTensor(); return method->set_output_data_ptr( output_tensor.mutable_data_ptr(), output_tensor.nbytes(), output_index); } diff --git a/extension/module/module.h b/extension/module/module.h index 3f0110d0e8..1a3855c5c0 100644 --- a/extension/module/module.h +++ b/extension/module/module.h @@ -165,12 +165,11 @@ class Module { const std::string& method_name); /** - * Execute a specific method with the given input values and retrieve the - * output values. Loads the program and method before executing if needed. + * Execute a specific method with the given input and retrieve output. + * Loads the program and method before executing if needed. * * @param[in] method_name The name of the method to execute. - * @param[in] input_values A vector of input values to be passed to the - * method. + * @param[in] input A vector of input values to be passed to the method. * * @returns A Result object containing either a vector of output values * from the method or an error to indicate failure. @@ -178,22 +177,22 @@ class Module { ET_NODISCARD runtime::Result> execute( const std::string& method_name, - const std::vector& input_values); + const std::vector& input); /** * Execute a specific method with a single input value. * Loads the program and method before executing if needed. * * @param[in] method_name The name of the method to execute. - * @param[in] input_value A value to be passed to the method. + * @param[in] input A value to be passed to the method. * * @returns A Result object containing either a vector of output values * from the method or an error to indicate failure. */ ET_NODISCARD inline runtime::Result> execute( const std::string& method_name, - const runtime::EValue& input_value) { - return execute(method_name, std::vector{input_value}); + const runtime::EValue& input) { + return execute(method_name, std::vector{input}); } /** @@ -211,20 +210,19 @@ class Module { } /** - * Retrieve the output value of a specific method with the given input values. + * Retrieve the output value of a specific method with the given input. * Loads the program and method before execution if needed. * * @param[in] method_name The name of the method to execute. - * @param[in] input_values A vector of input values to be passed to the - * method. + * @param[in] input A vector of input values to be passed to the method. * * @returns A Result object containing either the first output value from the * method or an error to indicate failure. */ ET_NODISCARD inline runtime::Result get( const std::string& method_name, - const std::vector& input_values) { - auto result = ET_UNWRAP(execute(method_name, input_values)); + const std::vector& input) { + auto result = ET_UNWRAP(execute(method_name, input)); if (result.empty()) { return runtime::Error::InvalidArgument; } @@ -236,15 +234,15 @@ class Module { * Loads the program and method before execution if needed. * * @param[in] method_name The name of the method to execute. - * @param[in] input_value A value to be passed to the method. + * @param[in] input A value to be passed to the method. * * @returns A Result object containing either the first output value from the * method or an error to indicate failure. */ ET_NODISCARD inline runtime::Result get( const std::string& method_name, - const runtime::EValue& input_value) { - return get(method_name, std::vector{input_value}); + const runtime::EValue& input) { + return get(method_name, std::vector{input}); } /** @@ -262,31 +260,31 @@ class Module { } /** - * Execute the 'forward' method with the given input values and retrieve the - * output values. Loads the program and method before executing if needed. + * Execute the 'forward' method with the given input and retrieve output. + * Loads the program and method before executing if needed. * - * @param[in] input_values A vector of input values for the 'forward' method. + * @param[in] input A vector of input values for the 'forward' method. * * @returns A Result object containing either a vector of output values * from the 'forward' method or an error to indicate failure. */ ET_NODISCARD inline runtime::Result> forward( - const std::vector& input_values) { - return execute("forward", input_values); + const std::vector& input) { + return execute("forward", input); } /** * Execute the 'forward' method with a single value. * Loads the program and method before executing if needed. * - * @param[in] input_value A value for the 'forward' method. + * @param[in] input A value for the 'forward' method. * * @returns A Result object containing either a vector of output values * from the 'forward' method or an error to indicate failure. */ ET_NODISCARD inline runtime::Result> forward( - const runtime::EValue& input_value) { - return forward(std::vector{input_value}); + const runtime::EValue& input) { + return forward(std::vector{input}); } /** @@ -300,42 +298,6 @@ class Module { return forward(std::vector{}); } - /** - * Sets the output tensor for a specific method. - * - * @param[in] method_name The name of the method. - * @param[in] output_value The EValue containing the Tensor to set as the - * method output. - * @param[in] output_index Zero-based index of the output to set. - * - * @returns An Error to indicate success or failure. - * - * @note Only Tensor outputs are currently supported for setting. - */ - ET_NODISCARD - runtime::Error set_output( - const std::string& method_name, - runtime::EValue output_value, - size_t output_index = 0); - - /** - * Sets the output tensor for the "forward" method. - * - * @param[in] output_value The EValue containing the Tensor to set as the - * method output. - * @param[in] output_index Zero-based index of the output to set. - * - * @returns An Error to indicate success or failure. - * - * @note Only Tensor outputs are currently supported for setting. - */ - ET_NODISCARD - inline runtime::Error set_output( - runtime::EValue output_value, - size_t output_index = 0) { - return set_output("forward", output_value, output_index); - } - /** * Retrieves the EventTracer instance being used by the Module. * EventTracer is used for tracking and logging events during the execution @@ -348,6 +310,19 @@ class Module { return event_tracer_.get(); } + /** + * Set output data pointer for forward method. + * + * @param[in] output_value A Tensor for the output of 'forward' method. + * @param[in] output_index Index of the output in 'forward' method. + * + * @returns An Error to indicate success or failure of the loading process. + */ + runtime::Error set_output_data_ptr( + runtime::EValue output_value, + size_t output_index, + const std::string& method_name = "forward"); + private: struct MethodHolder { std::vector> planned_buffers;