Skip to content

[Issue]: hipGraphKernelNodeSetParams fails to copy input hipKernelNodeParams causing the runtime to later attempt to free() memory owned by the client #138

Closed as not planned
@IMbackK

Description

@IMbackK

Problem Description

When hipGraphKernelNodeSetParams is called the hipKernelNodeParams should be copied by the runtime, but this dose not happen. Instead the hipKernelNodeParams's address is taken as is, this causes a failure later in hipGraphDestroy() as the runtime will attempt to free() the parameters pointer as passed into hipGraphKernelNodeSetParams, which could be on stack or deep inside an allocation etc and is not owned by the runtime in the first place.

Operating System

Any

CPU

Epyc 7552

GPU

MI100

ROCm Version

6.3.1

ROCm Component

No response

Steps to Reproduce

reproducer:

#include <iostream>
#include <vector>
#include <hip/hip_runtime.h>

#define HIP_CHECK(fn) { hipError_t err = fn; if(err != hipSuccess){fprintf(stderr, "Error: %s: %s at %d\n", hipGetErrorName(err), hipGetErrorString(err), __LINE__); exit(1);} }

__global__ void dummyKernel(float* out, float* in,  size_t size)
{
	int idx = blockIdx.x*blockDim.x+threadIdx.x;
	if(idx < size)
		out[idx]=2*in[idx];
}

int main()
{
	hipGraph_t graph;
	hipGraphExec_t instance;
	hipStream_t stream;

	HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));

	constexpr size_t count = 500000;
	float* inDeviceBuffer;
	float* outDeviceBufferA;
	float* outDeviceBufferB;
	float* inHostBuffer;
	HIP_CHECK(hipMalloc(&inDeviceBuffer, sizeof(*inDeviceBuffer)*count));
	HIP_CHECK(hipMalloc(&outDeviceBufferA, sizeof(*inDeviceBuffer)*count));
	HIP_CHECK(hipMalloc(&outDeviceBufferB, sizeof(*inDeviceBuffer)*count));
	HIP_CHECK(hipHostMalloc(&inHostBuffer, sizeof(*inDeviceBuffer)*count));
	for(size_t i = 0; i < count; ++i)
		inHostBuffer[i] = i;
	HIP_CHECK(hipMemcpy(inDeviceBuffer, inHostBuffer, count*sizeof(*inDeviceBuffer), hipMemcpyHostToDevice));

	std::cout<<"outDeviceBufferA: "<<outDeviceBufferA<<'\n';
	std::cout<<"outDeviceBufferB: "<<outDeviceBufferB<<'\n';

	HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeRelaxed));
	for(size_t i = 0; i < 500; i++)
		dummyKernel<<<count/1024+1, 1024, 0, stream>>>(inDeviceBuffer, outDeviceBufferA, count);
	HIP_CHECK(hipStreamEndCapture(stream, &graph));
	HIP_CHECK(hipGraphInstantiate(&instance, graph, NULL, NULL, 0));
	HIP_CHECK(hipGraphLaunch(instance, stream));
	HIP_CHECK(hipStreamSynchronize(stream));

	size_t num_nodes;
	HIP_CHECK(hipGraphGetNodes(graph, nullptr, &num_nodes));
	std::cout<<"Graph has "<<num_nodes<<" nodes\n";

	std::vector<hipGraphNode_t> nodes(num_nodes);
	std::vector<hipKernelNodeParams> params(num_nodes);

	HIP_CHECK(hipGraphGetNodes(graph, nodes.data(), &num_nodes));

	for (size_t i = 0; i < num_nodes; i++)
	{
		hipGraphNodeType node_type;
		HIP_CHECK(hipGraphNodeGetType(nodes[i], &node_type));
		if(node_type == hipGraphNodeTypeKernel)
			HIP_CHECK(hipGraphKernelNodeGetParams(nodes[i], &params[i]));
	}

	constexpr int intervention_node = 250;
	hipGraphNodeType node_type;
	HIP_CHECK(hipGraphNodeGetType(nodes[intervention_node], &node_type));
	if(node_type == hipGraphNodeTypeKernel)
	{
		std::cout<<"node "<<intervention_node<<" is a kernel node\n";
	}
	else
	{
		std::cout<<"node "<<intervention_node<<" is not a kernel node!!\n";
		return 1;
	}

	void** updated_output_buffer_location_ptr = (void**)&outDeviceBufferB;
	std::cout<<"Original output buffer: "<<*(void**)params[intervention_node].kernelParams[1]<<'\n';
	params[intervention_node].kernelParams[1] = updated_output_buffer_location_ptr;
	HIP_CHECK(hipGraphKernelNodeSetParams(nodes[intervention_node], &params[intervention_node]));
	std::cout<<"Replaced output buffer: "<<*(void**)params[intervention_node].kernelParams[1]<<'\n';

	hipGraphNode_t errorNode;
	hipGraphExecUpdateResult result_info;
	HIP_CHECK(hipGraphExecUpdate(instance, graph, &errorNode, &result_info));
	HIP_CHECK(hipGraphLaunch(instance, stream));
	HIP_CHECK(hipStreamSynchronize(stream));

	HIP_CHECK(hipGraphExecDestroy(instance));
	HIP_CHECK(hipGraphDestroy(graph));
	return 0;
}

result:

% ./graphRepo
outDeviceBufferA: 0x78158c800000
outDeviceBufferB: 0x78158c400000
Graph has 500 nodes
node 250 is a kernel node
Original output buffer: 0x78158c800000
Replaced output buffer: 0x78158c400000
free(): invalid pointer

(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support

No response

Additional Information

No response

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions