Skip to content

gpu memory can not release. #8544

Open
Open

Description

Describe the bug
gpu memory can not release.

System information

  • OS Platform and Distribution (Linux Ubuntu 16.04):
  • ONNX Runtime installed from (source):
  • ONNX Runtime version: 1.6.0
  • GCC/Compiler version (5.4.0):
    To Reproduce
static constexpr int64_t IMG_WIDTH = 224;
    static constexpr int64_t IMG_HEIGHT = 224;
    static constexpr int64_t IMG_CHANNEL = 3;


    cv::Mat img = cv::imread(img_path);
    if (img.empty()) 
    {
        std::cerr << "Failed to read input image" << std::endl;
        return EXIT_FAILURE;
    }

    cv::resize(img, img, cv::Size(IMG_WIDTH, IMG_HEIGHT));
    float* dst = new float[IMG_WIDTH * IMG_HEIGHT * IMG_CHANNEL];   
    int64_t dataLength = IMG_HEIGHT * IMG_WIDTH * IMG_CHANNEL;
    memcpy(dst, reinterpret_cast<const float*>(img.data), dataLength);

    std::vector<float* > inputData = {reinterpret_cast<float*>(dst)};


    std::string m_modelPath = model_path;    
    
    Ort::AllocatorWithDefaultOptions m_ortAllocator;

    int m_gpuIdx = 6;

    std::vector<std::vector<int64_t>> m_inputShapes;
    std::vector<std::vector<int64_t>> m_outputShapes;

    std::vector<int64_t> m_inputTensorSizes;
    std::vector<int64_t> m_outputTensorSizes;

    uint8_t m_numInputs;
    uint8_t m_numOutputs;

    std::vector<char*> m_inputNodeNames;
    std::vector<char*> m_outputNodeNames;

    bool m_inputShapesProvided = false;


    Ort::Env m_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test");
    Ort::SessionOptions sessionOptions;

    // TODO: need to take care of the following line as it is related to CPU
    // consumption using openmp
    sessionOptions.SetIntraOpNumThreads(1);

    if (m_gpuIdx != -1) 
    {
        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, m_gpuIdx));
    }

    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
    Ort::Session m_session = Ort::Session(m_env, m_modelPath.c_str(), sessionOptions);
    m_numInputs = m_session.GetInputCount();
    DEBUG_LOG("Model number of inputs: %d\n", m_numInputs);

    m_inputNodeNames.reserve(m_numInputs);
    m_inputTensorSizes.reserve(m_numInputs);

    m_numOutputs = m_session.GetOutputCount();
    DEBUG_LOG("Model number of outputs: %d\n", m_numOutputs);

    m_outputNodeNames.reserve(m_numOutputs);
    m_outputTensorSizes.reserve(m_numOutputs);

    // 确定模型输入的尺寸
    for (int i = 0; i < m_numInputs; i++) 
    {
        if (!m_inputShapesProvided)
        {
            Ort::TypeInfo typeInfo = m_session.GetInputTypeInfo(i);
            auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();

            m_inputShapes.emplace_back(tensorInfo.GetShape());
        }

        const auto& curInputShape = m_inputShapes[i];

        m_inputTensorSizes.emplace_back(
            std::accumulate(std::begin(curInputShape), std::end(curInputShape), 1, std::multiplies<int64_t>()));

        char* inputName = m_session.GetInputName(i, m_ortAllocator);
        m_inputNodeNames.emplace_back(strdup(inputName));
        m_ortAllocator.Free(inputName);
    }

    // 确定模型输出的尺寸
    for (int i = 0; i < m_numOutputs; ++i) 
    {
        Ort::TypeInfo typeInfo = m_session.GetOutputTypeInfo(i);
        auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();

        m_outputShapes.emplace_back(tensorInfo.GetShape());

        char* outputName = m_session.GetOutputName(i, m_ortAllocator);
        m_outputNodeNames.emplace_back(strdup(outputName));
        m_ortAllocator.Free(outputName);
    }

    if (m_numInputs != inputData.size()) 
    {
        throw std::runtime_error("Mismatch size of input data\n");
    }

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

    std::vector<Ort::Value> inputTensors;
    inputTensors.reserve(m_numInputs);

    for (int i = 0; i < m_numInputs; ++i)
    {
        inputTensors.emplace_back(std::move(
            Ort::Value::CreateTensor<float>(memoryInfo, const_cast<float*>(inputData[i]), m_inputTensorSizes[i],
                                            m_inputShapes[i].data(), m_inputShapes[i].size())));
    }

    auto outputTensors = m_session.Run(Ort::RunOptions{nullptr}, m_inputNodeNames.data(), inputTensors.data(),
                                       m_numInputs, m_outputNodeNames.data(), m_numOutputs);

    assert(outputTensors.size() == m_numOutputs);
    std::vector<DataOutputType> outputData;
    outputData.reserve(m_numOutputs);

    int count = 1;
    for (auto& elem : outputTensors) 
    {
        DEBUG_LOG("type of input %d: %s", count++, toString(elem.GetTensorTypeAndShapeInfo().GetElementType()).c_str());
        outputData.emplace_back(
            std::make_pair(std::move(elem.GetTensorMutableData<float>()), elem.GetTensorTypeAndShapeInfo().GetShape()));
    }

    std::cout << "interface success. " << std::endl;

    // Ort::GetApi().ReleaseSession(m_session.release());
    Ort::OrtRelease(m_session.release());
    Ort::OrtRelease(sessionOptions.release());
    Ort::OrtRelease(m_env.release());
    for (int i = 0; i < m_numInputs; ++i) 
    {
        Ort::OrtRelease(inputTensors[i].release());
    }

    for (int i = 0; i < m_numOutputs; ++i) 
    {
        Ort::OrtRelease(outputTensors[i].release());
    }    
    Ort::OrtRelease(memoryInfo.release());    
    // Ort::OrtRelease((OrtAllocator*)m_ortAllocator);   

    std::cout << "onnxruntime release memeory. " << std::endl;

Additional context
After all OrtRelease method still have 491M gpu memory not release, util main mothod exit.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    ep:CUDAissues related to the CUDA execution providermore info neededissues that cannot be triaged until more information is submitted by the original user

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions