Open
Description
openedon Jul 29, 2021
Describe the bug
gpu memory can not release.
System information
- OS Platform and Distribution (Linux Ubuntu 16.04):
- ONNX Runtime installed from (source):
- ONNX Runtime version: 1.6.0
- GCC/Compiler version (5.4.0):
To Reproduce
static constexpr int64_t IMG_WIDTH = 224;
static constexpr int64_t IMG_HEIGHT = 224;
static constexpr int64_t IMG_CHANNEL = 3;
cv::Mat img = cv::imread(img_path);
if (img.empty())
{
std::cerr << "Failed to read input image" << std::endl;
return EXIT_FAILURE;
}
cv::resize(img, img, cv::Size(IMG_WIDTH, IMG_HEIGHT));
float* dst = new float[IMG_WIDTH * IMG_HEIGHT * IMG_CHANNEL];
int64_t dataLength = IMG_HEIGHT * IMG_WIDTH * IMG_CHANNEL;
memcpy(dst, reinterpret_cast<const float*>(img.data), dataLength);
std::vector<float* > inputData = {reinterpret_cast<float*>(dst)};
std::string m_modelPath = model_path;
Ort::AllocatorWithDefaultOptions m_ortAllocator;
int m_gpuIdx = 6;
std::vector<std::vector<int64_t>> m_inputShapes;
std::vector<std::vector<int64_t>> m_outputShapes;
std::vector<int64_t> m_inputTensorSizes;
std::vector<int64_t> m_outputTensorSizes;
uint8_t m_numInputs;
uint8_t m_numOutputs;
std::vector<char*> m_inputNodeNames;
std::vector<char*> m_outputNodeNames;
bool m_inputShapesProvided = false;
Ort::Env m_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test");
Ort::SessionOptions sessionOptions;
// TODO: need to take care of the following line as it is related to CPU
// consumption using openmp
sessionOptions.SetIntraOpNumThreads(1);
if (m_gpuIdx != -1)
{
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, m_gpuIdx));
}
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
Ort::Session m_session = Ort::Session(m_env, m_modelPath.c_str(), sessionOptions);
m_numInputs = m_session.GetInputCount();
DEBUG_LOG("Model number of inputs: %d\n", m_numInputs);
m_inputNodeNames.reserve(m_numInputs);
m_inputTensorSizes.reserve(m_numInputs);
m_numOutputs = m_session.GetOutputCount();
DEBUG_LOG("Model number of outputs: %d\n", m_numOutputs);
m_outputNodeNames.reserve(m_numOutputs);
m_outputTensorSizes.reserve(m_numOutputs);
// 确定模型输入的尺寸
for (int i = 0; i < m_numInputs; i++)
{
if (!m_inputShapesProvided)
{
Ort::TypeInfo typeInfo = m_session.GetInputTypeInfo(i);
auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();
m_inputShapes.emplace_back(tensorInfo.GetShape());
}
const auto& curInputShape = m_inputShapes[i];
m_inputTensorSizes.emplace_back(
std::accumulate(std::begin(curInputShape), std::end(curInputShape), 1, std::multiplies<int64_t>()));
char* inputName = m_session.GetInputName(i, m_ortAllocator);
m_inputNodeNames.emplace_back(strdup(inputName));
m_ortAllocator.Free(inputName);
}
// 确定模型输出的尺寸
for (int i = 0; i < m_numOutputs; ++i)
{
Ort::TypeInfo typeInfo = m_session.GetOutputTypeInfo(i);
auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();
m_outputShapes.emplace_back(tensorInfo.GetShape());
char* outputName = m_session.GetOutputName(i, m_ortAllocator);
m_outputNodeNames.emplace_back(strdup(outputName));
m_ortAllocator.Free(outputName);
}
if (m_numInputs != inputData.size())
{
throw std::runtime_error("Mismatch size of input data\n");
}
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
std::vector<Ort::Value> inputTensors;
inputTensors.reserve(m_numInputs);
for (int i = 0; i < m_numInputs; ++i)
{
inputTensors.emplace_back(std::move(
Ort::Value::CreateTensor<float>(memoryInfo, const_cast<float*>(inputData[i]), m_inputTensorSizes[i],
m_inputShapes[i].data(), m_inputShapes[i].size())));
}
auto outputTensors = m_session.Run(Ort::RunOptions{nullptr}, m_inputNodeNames.data(), inputTensors.data(),
m_numInputs, m_outputNodeNames.data(), m_numOutputs);
assert(outputTensors.size() == m_numOutputs);
std::vector<DataOutputType> outputData;
outputData.reserve(m_numOutputs);
int count = 1;
for (auto& elem : outputTensors)
{
DEBUG_LOG("type of input %d: %s", count++, toString(elem.GetTensorTypeAndShapeInfo().GetElementType()).c_str());
outputData.emplace_back(
std::make_pair(std::move(elem.GetTensorMutableData<float>()), elem.GetTensorTypeAndShapeInfo().GetShape()));
}
std::cout << "interface success. " << std::endl;
// Ort::GetApi().ReleaseSession(m_session.release());
Ort::OrtRelease(m_session.release());
Ort::OrtRelease(sessionOptions.release());
Ort::OrtRelease(m_env.release());
for (int i = 0; i < m_numInputs; ++i)
{
Ort::OrtRelease(inputTensors[i].release());
}
for (int i = 0; i < m_numOutputs; ++i)
{
Ort::OrtRelease(outputTensors[i].release());
}
Ort::OrtRelease(memoryInfo.release());
// Ort::OrtRelease((OrtAllocator*)m_ortAllocator);
std::cout << "onnxruntime release memeory. " << std::endl;
Additional context
After all OrtRelease method still have 491M gpu memory not release, util main mothod exit.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment