Skip to content

Commit

Permalink
Revert D62617066 (#5351)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #5351

This diff reverts D62617066
Breaking several jobs in OSS CI, as well as the seamless model internally

Reviewed By: lucylq, shoumikhin, dbort

Differential Revision: D62650762

fbshipit-source-id: 8205d32e498f51745529e174082fbcc36a8dd384
  • Loading branch information
generatedunixname89002005232357 authored and facebook-github-bot committed Sep 13, 2024
1 parent 0d1644f commit 034e098
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 85 deletions.
8 changes: 4 additions & 4 deletions examples/qualcomm/oss_scripts/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ Result<exec_aten::Tensor> Runner::run_model_step(
*kv_outputs[j], new_out_addr, kv_outputs[j]->nbytes()) == Error::Ok,
"Failed to set output tensor when updating v_cache");
ET_CHECK_MSG(
module_->set_output(kv_outputs[j], j + 1) == Error::Ok,
module_->set_output_data_ptr(*kv_outputs[j], j + 1) == Error::Ok,
"Failed to set llama output data pointer");
}

Expand Down Expand Up @@ -291,7 +291,7 @@ Error Runner::generate(
sizes,
kv_tensors.back()->scalar_type()));
ET_CHECK_MSG(
module_->set_output(kv_outputs.back(), i + 1) == Error::Ok,
module_->set_output_data_ptr(kv_outputs.back(), i + 1) == Error::Ok,
"Failed to set output tensor for kv cache");
}

Expand Down Expand Up @@ -323,7 +323,7 @@ Error Runner::generate(
sizes,
kv_tensors.back()->scalar_type()));
ET_CHECK_MSG(
module_->set_output(kv_outputs.back(), output_index) ==
module_->set_output_data_ptr(kv_outputs.back(), output_index) ==
Error::Ok,
"Failed to set output tensor for llama block");
}
Expand All @@ -333,7 +333,7 @@ Error Runner::generate(
logits_data_shape,
ScalarType::Float);
ET_CHECK_MSG(
module_->set_output(affine_logits) == Error::Ok,
module_->set_output_data_ptr(affine_logits, 0) == Error::Ok,
"Failed to set output tensor for affine module - logits");

// Start consuming user's prompts and generating new tokens
Expand Down
5 changes: 3 additions & 2 deletions examples/qualcomm/qaihub_scripts/llama/runner/io_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ void KVCachedMemory::update_io(
// k, v are placed interleaved
int index = (cache_stride << 1) + (cache_group << 5) + head;
ET_CHECK_MSG(
modules_[shard]->set_out
modules_[shard]->set_output_data_ptr(
output_tensors[shard][index], index) == Error::Ok,
"failed to set output tensor for module %d's %d'th output "
"while updating kv_cache output tensors",
Expand All @@ -450,7 +450,8 @@ void KVCachedMemory::update_io(
for (int shard = 0; shard < output_tensors.size(); shard++) {
for (int index = 0; index < output_tensors[shard].size(); index++) {
ET_CHECK_MSG(
modules_[shard]->set_output(output_tensors[shard][index], index) == Error::Ok,
modules_[shard]->set_output_data_ptr(
output_tensors[shard][index], index) == Error::Ok,
"failed to set output tensor for module %d's %d'th output "
"while updating kv_cache output tensors",
shard,
Expand Down
3 changes: 2 additions & 1 deletion examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ Error Runner::generate(
output_tensors.emplace_back(io_mem_->get_output_tensors(i));
for (size_t j = 0; j < output_tensors[i].size(); ++j) {
ET_CHECK_MSG(
modules_[i]->set_output(output_tensors[i][j], j) == Error::Ok,
modules_[i]->set_output_data_ptr(output_tensors[i][j], j) ==
Error::Ok,
"failed to set output tensor for module %d's %zu'th output",
i,
j);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,11 +373,11 @@ Error Runner::generate(std::string prompt) {
uncond_emb_vec.data(),
{1, 77, 1024},
encoder_method_meta.output_tensor_meta(0)->scalar_type());
modules_[0]->set_output(cond_emb_tensor);
modules_[0]->set_output_data_ptr(cond_emb_tensor, 0);
long encoder_start = util::time_in_ms();
auto cond_res = modules_[0]->forward(cond_tokens_tensor);
stats_.text_encoder_execution_time += (util::time_in_ms() - encoder_start);
modules_[0]->set_output(uncond_emb_tensor);
modules_[0]->set_output_data_ptr(uncond_emb_tensor, 0);
encoder_start = util::time_in_ms();
auto uncond_res = modules_[0]->forward(uncond_tokens_tensor);
stats_.text_encoder_execution_time += (util::time_in_ms() - encoder_start);
Expand Down Expand Up @@ -462,13 +462,13 @@ Error Runner::generate(std::string prompt) {

stats_.unet_aggregate_post_processing_time +=
(util::time_in_ms() - start_post_process);
modules_[1]->set_output(noise_pred_text_tensor);
modules_[1]->set_output_data_ptr(noise_pred_text_tensor, 0);
long start_unet_execution = util::time_in_ms();
auto cond_res = modules_[1]->forward(
{latent_tensor, time_emb_tensors[step_index], cond_emb_tensor});
stats_.unet_aggregate_execution_time +=
(util::time_in_ms() - start_unet_execution);
modules_[1]->set_output(noise_pred_uncond_tensor);
modules_[1]->set_output_data_ptr(noise_pred_uncond_tensor, 0);
start_unet_execution = util::time_in_ms();
auto uncond_res = modules_[1]->forward(
{latent_tensor,
Expand Down Expand Up @@ -519,7 +519,7 @@ Error Runner::generate(std::string prompt) {

quant_tensor(latent, vae_input, vae_input_scale_, vae_input_offset_);

modules_[2]->set_output(output_tensor);
modules_[2]->set_output_data_ptr(output_tensor, 0);
long start_vae_execution = util::time_in_ms();
auto vae_res = modules_[2]->forward(vae_input_tensor);
stats_.vae_execution_time = (util::time_in_ms() - start_vae_execution);
Expand Down
20 changes: 7 additions & 13 deletions extension/module/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,12 @@ runtime::Result<runtime::MethodMeta> Module::method_meta(

runtime::Result<std::vector<runtime::EValue>> Module::execute(
const std::string& method_name,
const std::vector<runtime::EValue>& input_values) {
const std::vector<runtime::EValue>& input) {
ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name));
auto& method = methods_.at(method_name).method;

ET_CHECK_OK_OR_RETURN_ERROR(
method->set_inputs(exec_aten::ArrayRef<runtime::EValue>(
input_values.data(), input_values.size())));
ET_CHECK_OK_OR_RETURN_ERROR(method->set_inputs(
exec_aten::ArrayRef<runtime::EValue>(input.data(), input.size())));
ET_CHECK_OK_OR_RETURN_ERROR(method->execute());

const auto outputs_size = method->outputs_size();
Expand All @@ -184,18 +183,13 @@ runtime::Result<std::vector<runtime::EValue>> Module::execute(
return outputs;
}

runtime::Error Module::set_output(
const std::string& method_name,
runtime::Error Module::set_output_data_ptr(
runtime::EValue output_value,
size_t output_index) {
size_t output_index,
const std::string& method_name) {
ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name));
auto& output_tensor = output_value.toTensor();
auto& method = methods_.at(method_name).method;
ET_CHECK_OR_RETURN_ERROR(
output_value.isTensor(),
InvalidArgument,
"output type: %zu is not tensor",
(size_t)output_value.tag);
const auto& output_tensor = output_value.toTensor();
return method->set_output_data_ptr(
output_tensor.mutable_data_ptr(), output_tensor.nbytes(), output_index);
}
Expand Down
95 changes: 35 additions & 60 deletions extension/module/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,35 +165,34 @@ class Module {
const std::string& method_name);

/**
* Execute a specific method with the given input values and retrieve the
* output values. Loads the program and method before executing if needed.
* Execute a specific method with the given input and retrieve output.
* Loads the program and method before executing if needed.
*
* @param[in] method_name The name of the method to execute.
* @param[in] input_values A vector of input values to be passed to the
* method.
* @param[in] input A vector of input values to be passed to the method.
*
* @returns A Result object containing either a vector of output values
* from the method or an error to indicate failure.
*/
ET_NODISCARD
runtime::Result<std::vector<runtime::EValue>> execute(
const std::string& method_name,
const std::vector<runtime::EValue>& input_values);
const std::vector<runtime::EValue>& input);

/**
* Execute a specific method with a single input value.
* Loads the program and method before executing if needed.
*
* @param[in] method_name The name of the method to execute.
* @param[in] input_value A value to be passed to the method.
* @param[in] input A value to be passed to the method.
*
* @returns A Result object containing either a vector of output values
* from the method or an error to indicate failure.
*/
ET_NODISCARD inline runtime::Result<std::vector<runtime::EValue>> execute(
const std::string& method_name,
const runtime::EValue& input_value) {
return execute(method_name, std::vector<runtime::EValue>{input_value});
const runtime::EValue& input) {
return execute(method_name, std::vector<runtime::EValue>{input});
}

/**
Expand All @@ -211,20 +210,19 @@ class Module {
}

/**
* Retrieve the output value of a specific method with the given input values.
* Retrieve the output value of a specific method with the given input.
* Loads the program and method before execution if needed.
*
* @param[in] method_name The name of the method to execute.
* @param[in] input_values A vector of input values to be passed to the
* method.
* @param[in] input A vector of input values to be passed to the method.
*
* @returns A Result object containing either the first output value from the
* method or an error to indicate failure.
*/
ET_NODISCARD inline runtime::Result<runtime::EValue> get(
const std::string& method_name,
const std::vector<runtime::EValue>& input_values) {
auto result = ET_UNWRAP(execute(method_name, input_values));
const std::vector<runtime::EValue>& input) {
auto result = ET_UNWRAP(execute(method_name, input));
if (result.empty()) {
return runtime::Error::InvalidArgument;
}
Expand All @@ -236,15 +234,15 @@ class Module {
* Loads the program and method before execution if needed.
*
* @param[in] method_name The name of the method to execute.
* @param[in] input_value A value to be passed to the method.
* @param[in] input A value to be passed to the method.
*
* @returns A Result object containing either the first output value from the
* method or an error to indicate failure.
*/
ET_NODISCARD inline runtime::Result<runtime::EValue> get(
const std::string& method_name,
const runtime::EValue& input_value) {
return get(method_name, std::vector<runtime::EValue>{input_value});
const runtime::EValue& input) {
return get(method_name, std::vector<runtime::EValue>{input});
}

/**
Expand All @@ -262,31 +260,31 @@ class Module {
}

/**
* Execute the 'forward' method with the given input values and retrieve the
* output values. Loads the program and method before executing if needed.
* Execute the 'forward' method with the given input and retrieve output.
* Loads the program and method before executing if needed.
*
* @param[in] input_values A vector of input values for the 'forward' method.
* @param[in] input A vector of input values for the 'forward' method.
*
* @returns A Result object containing either a vector of output values
* from the 'forward' method or an error to indicate failure.
*/
ET_NODISCARD inline runtime::Result<std::vector<runtime::EValue>> forward(
const std::vector<runtime::EValue>& input_values) {
return execute("forward", input_values);
const std::vector<runtime::EValue>& input) {
return execute("forward", input);
}

/**
* Execute the 'forward' method with a single value.
* Loads the program and method before executing if needed.
*
* @param[in] input_value A value for the 'forward' method.
* @param[in] input A value for the 'forward' method.
*
* @returns A Result object containing either a vector of output values
* from the 'forward' method or an error to indicate failure.
*/
ET_NODISCARD inline runtime::Result<std::vector<runtime::EValue>> forward(
const runtime::EValue& input_value) {
return forward(std::vector<runtime::EValue>{input_value});
const runtime::EValue& input) {
return forward(std::vector<runtime::EValue>{input});
}

/**
Expand All @@ -300,42 +298,6 @@ class Module {
return forward(std::vector<runtime::EValue>{});
}

/**
* Sets the output tensor for a specific method.
*
* @param[in] method_name The name of the method.
* @param[in] output_value The EValue containing the Tensor to set as the
* method output.
* @param[in] output_index Zero-based index of the output to set.
*
* @returns An Error to indicate success or failure.
*
* @note Only Tensor outputs are currently supported for setting.
*/
ET_NODISCARD
runtime::Error set_output(
const std::string& method_name,
runtime::EValue output_value,
size_t output_index = 0);

/**
* Sets the output tensor for the "forward" method.
*
* @param[in] output_value The EValue containing the Tensor to set as the
* method output.
* @param[in] output_index Zero-based index of the output to set.
*
* @returns An Error to indicate success or failure.
*
* @note Only Tensor outputs are currently supported for setting.
*/
ET_NODISCARD
inline runtime::Error set_output(
runtime::EValue output_value,
size_t output_index = 0) {
return set_output("forward", output_value, output_index);
}

/**
* Retrieves the EventTracer instance being used by the Module.
* EventTracer is used for tracking and logging events during the execution
Expand All @@ -348,6 +310,19 @@ class Module {
return event_tracer_.get();
}

/**
* Set output data pointer for forward method.
*
* @param[in] output_value A Tensor for the output of 'forward' method.
* @param[in] output_index Index of the output in 'forward' method.
*
* @returns An Error to indicate success or failure of the loading process.
*/
runtime::Error set_output_data_ptr(
runtime::EValue output_value,
size_t output_index,
const std::string& method_name = "forward");

private:
struct MethodHolder {
std::vector<std::vector<uint8_t>> planned_buffers;
Expand Down

0 comments on commit 034e098

Please sign in to comment.