@@ -360,8 +360,10 @@ class InsertGPUAllocsPass final
360360 auto newAlloc = builder.create <mlir::memref::AllocOp>(
361361 loc, alloc.getType (), alloc.getDynamicSizes (),
362362 alloc.getSymbolOperands ());
363- builder.create <mlir::memref::CopyOp>(loc, allocResult,
364- newAlloc.getResult ());
363+ builder.create <mlir::gpu::MemcpyOp>(
364+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
365+ /* asyncDependencies*/ std::nullopt , newAlloc.getResult (),
366+ allocResult);
365367 use.set (newAlloc.getResult ());
366368 }
367369 }
@@ -401,8 +403,9 @@ class InsertGPUAllocsPass final
401403 /* symbolOperands*/ std::nullopt , hostShared);
402404 auto allocResult = gpuAlloc.getResult (0 );
403405 if (access.hostWrite && access.deviceRead ) {
404- auto copy =
405- builder.create <mlir::memref::CopyOp>(loc, op, allocResult);
406+ auto copy = builder.create <mlir::gpu::MemcpyOp>(
407+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
408+ /* asyncDependencies*/ std::nullopt , allocResult, op);
406409 filter.insert (copy);
407410 }
408411
@@ -421,7 +424,9 @@ class InsertGPUAllocsPass final
421424 op.replaceAllUsesExcept (allocResult, filter);
422425 builder.setInsertionPoint (term);
423426 if (access.hostRead && access.deviceWrite ) {
424- builder.create <mlir::memref::CopyOp>(loc, allocResult, op);
427+ builder.create <mlir::gpu::MemcpyOp>(
428+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
429+ /* asyncDependencies*/ std::nullopt , op, allocResult);
425430 }
426431 builder.create <mlir::gpu::DeallocOp>(loc, std::nullopt , allocResult);
427432 }
0 commit comments