forked from triton-inference-server/python_backend
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix error handling for GPU tensors (triton-inference-server#249)
* Fix error handling for GPU tensors * Fix GPU buffer handling * Review edit * Fix for dynamically batched responses with GPU tensor * Review edits * Fix unused i variable for GPU=OFF * Review comments * Review edit
- Loading branch information
Showing
10 changed files
with
280 additions
and
172 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions | ||
// are met: | ||
// * Redistributions of source code must retain the above copyright | ||
// notice, this list of conditions and the following disclaimer. | ||
// * Redistributions in binary form must reproduce the above copyright | ||
// notice, this list of conditions and the following disclaimer in the | ||
// documentation and/or other materials provided with the distribution. | ||
// * Neither the name of NVIDIA CORPORATION nor the names of its | ||
// contributors may be used to endorse or promote products derived | ||
// from this software without specific prior written permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
#include "gpu_buffers.h" | ||
#include "pb_string.h" | ||
|
||
namespace triton { namespace backend { namespace python { | ||
GPUBuffersHelper::GPUBuffersHelper() | ||
{ | ||
completed_ = false; | ||
} | ||
|
||
void | ||
GPUBuffersHelper::AddBuffer(const bi::managed_external_buffer::handle_t& handle) | ||
{ | ||
if (completed_) { | ||
throw PythonBackendException( | ||
"It is not possible to add buffers after 'Complete' has been called on " | ||
"a GPUBuffersHelper."); | ||
} | ||
|
||
buffers_.emplace_back(handle); | ||
} | ||
|
||
void | ||
GPUBuffersHelper::SetError( | ||
std::unique_ptr<SharedMemoryManager>& shm_pool, const std::string& error) | ||
{ | ||
error_shm_ = PbString::Create(shm_pool, error); | ||
} | ||
|
||
void | ||
GPUBuffersHelper::Complete(std::unique_ptr<SharedMemoryManager>& shm_pool) | ||
{ | ||
if (completed_) { | ||
throw PythonBackendException( | ||
"Complete has already been called. Complete should only be called " | ||
"once."); | ||
} | ||
gpu_buffers_shm_ = shm_pool->Construct<GPUBuffersShm>(); | ||
if (!error_shm_) { | ||
buffers_handle_shm_ = | ||
shm_pool->Construct<bi::managed_external_buffer::handle_t>( | ||
buffers_.size()); | ||
gpu_buffers_shm_.data_->buffer_count = buffers_.size(); | ||
gpu_buffers_shm_.data_->success = true; | ||
gpu_buffers_shm_.data_->buffers = buffers_handle_shm_.handle_; | ||
for (size_t i = 0; i < buffers_.size(); ++i) { | ||
buffers_handle_shm_.data_.get()[i] = buffers_[i]; | ||
} | ||
} else { | ||
gpu_buffers_shm_.data_->success = false; | ||
gpu_buffers_shm_.data_->error = error_shm_->ShmHandle(); | ||
} | ||
completed_ = true; | ||
} | ||
|
||
|
||
bi::managed_external_buffer::handle_t | ||
GPUBuffersHelper::ShmHandle() | ||
{ | ||
return gpu_buffers_shm_.handle_; | ||
} | ||
|
||
}}} // namespace triton::backend::python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions | ||
// are met: | ||
// * Redistributions of source code must retain the above copyright | ||
// notice, this list of conditions and the following disclaimer. | ||
// * Redistributions in binary form must reproduce the above copyright | ||
// notice, this list of conditions and the following disclaimer in the | ||
// documentation and/or other materials provided with the distribution. | ||
// * Neither the name of NVIDIA CORPORATION nor the names of its | ||
// contributors may be used to endorse or promote products derived | ||
// from this software without specific prior written permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
#pragma once | ||
|
||
#include "pb_string.h" | ||
#include "pb_utils.h" | ||
#include "scoped_defer.h" | ||
|
||
namespace triton { namespace backend { namespace python { | ||
|
||
/// \param success indicating whether the process of fetching the GPU buffers | ||
/// was successful. | ||
/// \param error if success is equal to false, the error object will be set. | ||
/// \param buffers list of buffers elements. | ||
/// \param buffer_count the number of buffers. | ||
struct GPUBuffersShm { | ||
bool success; | ||
bi::managed_external_buffer::handle_t error; | ||
bi::managed_external_buffer::handle_t buffers; | ||
uint32_t buffer_count; | ||
}; | ||
|
||
/// Helper class to facilitate transfer of metadata associated | ||
/// the GPU buffers in shared memory. | ||
class GPUBuffersHelper { | ||
public: | ||
GPUBuffersHelper(); | ||
void AddBuffer(const bi::managed_external_buffer::handle_t& handle); | ||
void Complete(std::unique_ptr<SharedMemoryManager>& shm_pool); | ||
void SetError( | ||
std::unique_ptr<SharedMemoryManager>& shm_pool, const std::string& error); | ||
bi::managed_external_buffer::handle_t ShmHandle(); | ||
|
||
private: | ||
AllocatedSharedMemory<GPUBuffersShm> gpu_buffers_shm_; | ||
std::vector<bi::managed_external_buffer::handle_t> buffers_; | ||
AllocatedSharedMemory<bi::managed_external_buffer::handle_t> | ||
buffers_handle_shm_; | ||
std::unique_ptr<PbString> error_shm_; | ||
bool completed_; | ||
}; | ||
|
||
}}}; // namespace triton::backend::python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.