fix rocm build

eee4017 · eee4017 · commit a38f7dc9cf99 · 2024-07-10T07:19:30.000Z
diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -897,9 +897,11 @@ class AllocatorFacadePrivate {
   }
 
   void InitCUDAMallocAsyncAllocator(phi::GPUPlace p, gpuStream_t stream) {
+#ifdef PADDLE_WITH_CUDA
     std::shared_ptr<Allocator>& allocator = cuda_allocators_[p][stream];
     cuda_allocators_[p][stream] =
         std::make_shared<CUDAMallocAsyncAllocator>(allocator, p, stream);
+#endif
   }
 
   void InitAutoGrowthCUDAAllocator(phi::GPUPlace p, gpuStream_t stream) {
@@ -1169,6 +1171,7 @@ class AllocatorFacadePrivate {
   }
 
   void WrapCUDAMallocAsyncAllocatorForDefault() {
+#ifdef PADDLE_WITH_CUDA
     for (auto& pair : allocators_) {
       auto& place = pair.first;
       if (platform::is_gpu_place(place)) {
@@ -1188,6 +1191,7 @@ class AllocatorFacadePrivate {
                 << ", allocator address = " << pair.second.get();
       }
     }
+#endif
   }
 
   void WrapCUDARetryAllocator(phi::GPUPlace p,
diff --git a/paddle/fluid/memory/allocation/cuda_malloc_async_allocator.cc b/paddle/fluid/memory/allocation/cuda_malloc_async_allocator.cc
@@ -24,18 +24,14 @@
 #ifdef PADDLE_WITH_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
+#include "paddle/phi/backends/gpu/cuda/cuda_graph.h"
 #endif
 
 #include <string>
 
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/fluid/platform/enforce.h"
-#if defined(PADDLE_WITH_CUDA)
-#include "paddle/phi/backends/gpu/cuda/cuda_graph.h"
-#elif defined(PADDLE_WITH_HIP)
-#include "paddle/phi/backends/gpu/rocm/hip_graph.h"
-#endif
 
 #include "paddle/utils/optional.h"
 
diff --git a/paddle/fluid/memory/allocation/cuda_malloc_async_allocator.h b/paddle/fluid/memory/allocation/cuda_malloc_async_allocator.h
@@ -37,6 +37,7 @@ namespace allocation {
 // managing memory allocations with CUDA async malloc. It includes methods to
 // handle stream associations and to query the owning stream of the allocation.
 class CUDAMallocAsyncAllocation : public Allocation {
+#ifdef PADDLE_WITH_CUDA
  public:
   CUDAMallocAsyncAllocation(void* ptr,
                             size_t size,
@@ -60,12 +61,14 @@ class CUDAMallocAsyncAllocation : public Allocation {
 
   SpinLock recorded_streams_lock_;
   std::unordered_set<gpuStream_t> recorded_streams_;
+#endif
 };
 
 // The `CUDAMallocAsyncAllocator` class extends `Allocator` and is specialized
 // for asynchronous memory allocation in CUDA. It offers thread-safe allocation
 // and incorporates a default stream for memory operations.
 class CUDAMallocAsyncAllocator : public Allocator {
+#ifdef PADDLE_WITH_CUDA
  public:
   explicit CUDAMallocAsyncAllocator(
       std::shared_ptr<Allocator> underlying_allocator,
@@ -128,6 +131,7 @@ class CUDAMallocAsyncAllocator : public Allocator {
   std::unordered_map<CUDAMallocAsyncAllocation*, CUDAGraphID>
       graph_owned_allocations_;
   SpinLock graph_owned_allocations_lock_;
+#endif
 };
 
 }  // namespace allocation

Original file line number	Diff line number	Diff line change
`@@ -897,9 +897,11 @@ class AllocatorFacadePrivate {`
`897`	`897`	`}`
`898`	`898`
`899`	`899`	`void InitCUDAMallocAsyncAllocator(phi::GPUPlace p, gpuStream_t stream) {`
	`900`	`+#ifdef PADDLE_WITH_CUDA`
`900`	`901`	`std::shared_ptr<Allocator>& allocator = cuda_allocators_[p][stream];`
`901`	`902`	`cuda_allocators_[p][stream] =`
`902`	`903`	`std::make_shared<CUDAMallocAsyncAllocator>(allocator, p, stream);`
	`904`	`+#endif`
`903`	`905`	`}`
`904`	`906`
`905`	`907`	`void InitAutoGrowthCUDAAllocator(phi::GPUPlace p, gpuStream_t stream) {`
`@@ -1169,6 +1171,7 @@ class AllocatorFacadePrivate {`
`1169`	`1171`	`}`
`1170`	`1172`
`1171`	`1173`	`void WrapCUDAMallocAsyncAllocatorForDefault() {`
	`1174`	`+#ifdef PADDLE_WITH_CUDA`
`1172`	`1175`	`for (auto& pair : allocators_) {`
`1173`	`1176`	`auto& place = pair.first;`
`1174`	`1177`	`if (platform::is_gpu_place(place)) {`
`@@ -1188,6 +1191,7 @@ class AllocatorFacadePrivate {`
`1188`	`1191`	`<< ", allocator address = " << pair.second.get();`
`1189`	`1192`	`}`
`1190`	`1193`	`}`
	`1194`	`+#endif`
`1191`	`1195`	`}`
`1192`	`1196`
`1193`	`1197`	`void WrapCUDARetryAllocator(phi::GPUPlace p,`