Closed as not planned
Description
Problem Description
When hipGraphKernelNodeSetParams is called the hipKernelNodeParams should be copied by the runtime, but this dose not happen. Instead the hipKernelNodeParams's address is taken as is, this causes a failure later in hipGraphDestroy() as the runtime will attempt to free() the parameters pointer as passed into hipGraphKernelNodeSetParams, which could be on stack or deep inside an allocation etc and is not owned by the runtime in the first place.
Operating System
Any
CPU
Epyc 7552
GPU
MI100
ROCm Version
6.3.1
ROCm Component
No response
Steps to Reproduce
reproducer:
#include <iostream>
#include <vector>
#include <hip/hip_runtime.h>
#define HIP_CHECK(fn) { hipError_t err = fn; if(err != hipSuccess){fprintf(stderr, "Error: %s: %s at %d\n", hipGetErrorName(err), hipGetErrorString(err), __LINE__); exit(1);} }
__global__ void dummyKernel(float* out, float* in, size_t size)
{
int idx = blockIdx.x*blockDim.x+threadIdx.x;
if(idx < size)
out[idx]=2*in[idx];
}
int main()
{
hipGraph_t graph;
hipGraphExec_t instance;
hipStream_t stream;
HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
constexpr size_t count = 500000;
float* inDeviceBuffer;
float* outDeviceBufferA;
float* outDeviceBufferB;
float* inHostBuffer;
HIP_CHECK(hipMalloc(&inDeviceBuffer, sizeof(*inDeviceBuffer)*count));
HIP_CHECK(hipMalloc(&outDeviceBufferA, sizeof(*inDeviceBuffer)*count));
HIP_CHECK(hipMalloc(&outDeviceBufferB, sizeof(*inDeviceBuffer)*count));
HIP_CHECK(hipHostMalloc(&inHostBuffer, sizeof(*inDeviceBuffer)*count));
for(size_t i = 0; i < count; ++i)
inHostBuffer[i] = i;
HIP_CHECK(hipMemcpy(inDeviceBuffer, inHostBuffer, count*sizeof(*inDeviceBuffer), hipMemcpyHostToDevice));
std::cout<<"outDeviceBufferA: "<<outDeviceBufferA<<'\n';
std::cout<<"outDeviceBufferB: "<<outDeviceBufferB<<'\n';
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeRelaxed));
for(size_t i = 0; i < 500; i++)
dummyKernel<<<count/1024+1, 1024, 0, stream>>>(inDeviceBuffer, outDeviceBufferA, count);
HIP_CHECK(hipStreamEndCapture(stream, &graph));
HIP_CHECK(hipGraphInstantiate(&instance, graph, NULL, NULL, 0));
HIP_CHECK(hipGraphLaunch(instance, stream));
HIP_CHECK(hipStreamSynchronize(stream));
size_t num_nodes;
HIP_CHECK(hipGraphGetNodes(graph, nullptr, &num_nodes));
std::cout<<"Graph has "<<num_nodes<<" nodes\n";
std::vector<hipGraphNode_t> nodes(num_nodes);
std::vector<hipKernelNodeParams> params(num_nodes);
HIP_CHECK(hipGraphGetNodes(graph, nodes.data(), &num_nodes));
for (size_t i = 0; i < num_nodes; i++)
{
hipGraphNodeType node_type;
HIP_CHECK(hipGraphNodeGetType(nodes[i], &node_type));
if(node_type == hipGraphNodeTypeKernel)
HIP_CHECK(hipGraphKernelNodeGetParams(nodes[i], ¶ms[i]));
}
constexpr int intervention_node = 250;
hipGraphNodeType node_type;
HIP_CHECK(hipGraphNodeGetType(nodes[intervention_node], &node_type));
if(node_type == hipGraphNodeTypeKernel)
{
std::cout<<"node "<<intervention_node<<" is a kernel node\n";
}
else
{
std::cout<<"node "<<intervention_node<<" is not a kernel node!!\n";
return 1;
}
void** updated_output_buffer_location_ptr = (void**)&outDeviceBufferB;
std::cout<<"Original output buffer: "<<*(void**)params[intervention_node].kernelParams[1]<<'\n';
params[intervention_node].kernelParams[1] = updated_output_buffer_location_ptr;
HIP_CHECK(hipGraphKernelNodeSetParams(nodes[intervention_node], ¶ms[intervention_node]));
std::cout<<"Replaced output buffer: "<<*(void**)params[intervention_node].kernelParams[1]<<'\n';
hipGraphNode_t errorNode;
hipGraphExecUpdateResult result_info;
HIP_CHECK(hipGraphExecUpdate(instance, graph, &errorNode, &result_info));
HIP_CHECK(hipGraphLaunch(instance, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipGraphExecDestroy(instance));
HIP_CHECK(hipGraphDestroy(graph));
return 0;
}
result:
% ./graphRepo
outDeviceBufferA: 0x78158c800000
outDeviceBufferB: 0x78158c400000
Graph has 500 nodes
node 250 is a kernel node
Original output buffer: 0x78158c800000
Replaced output buffer: 0x78158c400000
free(): invalid pointer
(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support
No response
Additional Information
No response