show cuda error string

lgarithm · lgarithm · commit 71da56464896 · 2020-03-08T19:08:04.000Z
diff --git a/include/ttl/bits/fake_cuda_runtime.hpp b/include/ttl/bits/fake_cuda_runtime.hpp
@@ -1,4 +1,5 @@
 #pragma once
+#include <cstdio>
 #include <cstring>
 #include <map>
 #include <stdexcept>
@@ -12,7 +13,7 @@ constexpr const cudaMemcpyKind cudaMemcpyHostToDevice = 1;
 constexpr const cudaMemcpyKind cudaMemcpyDeviceToHost = 2;
 constexpr const cudaMemcpyKind cudaMemcpyDeviceToDevice = 3;
 
-class fake_device
+class fake_cuda_device
 {
     std::map<const void *, size_t> _allocs;
 
@@ -33,7 +34,9 @@ class fake_device
     }
 
   public:
-    ~fake_device() { check_leak(); }
+    fake_cuda_device() { std::printf("using fake_cuda_device!\n"); }
+
+    ~fake_cuda_device() { check_leak(); }
 
     void *alloc(size_t size)
     {
@@ -68,7 +71,7 @@ class fake_device
     }
 };
 
-fake_device fake_cuda;
+fake_cuda_device fake_cuda;
 
 cudaError_t cudaMalloc(void **ptr, size_t count)
 {
@@ -88,3 +91,8 @@ cudaError_t cudaMemcpy(void *dst, const void *src, size_t size,
     fake_cuda.memcpy(dst, src, size, dir);
     return cudaSuccess;
 }
+
+std::string cudaGetErrorString(const cudaError_t err)
+{
+    return "fake_cudaError_t(" + std::to_string(static_cast<int>(err)) + ")";
+}
diff --git a/include/ttl/bits/std_cuda_allocator.hpp b/include/ttl/bits/std_cuda_allocator.hpp
@@ -1,6 +1,7 @@
 #pragma once
 #include <cstddef>
 #include <stdexcept>
+#include <string>
 
 #include <ttl/bits/std_cuda_runtime.hpp>
 #include <ttl/bits/std_device.hpp>
@@ -10,6 +11,23 @@ namespace ttl
 {
 namespace internal
 {
+class std_cuda_error_checker_t
+{
+    const std::string func_name_;
+
+  public:
+    std_cuda_error_checker_t(const char *func_name) : func_name_(func_name) {}
+
+    void operator<<(const cudaError_t err) const
+    {
+        if (err != cudaSuccess) {
+            throw std::runtime_error(func_name_ + " failed with: " +
+                                     std::to_string(static_cast<int>(err)) +
+                                     ": " + cudaGetErrorString(err));
+        }
+    }
+};  // namespace ttl
+
 struct cuda_copier {
     static constexpr auto h2d = cudaMemcpyHostToDevice;
     static constexpr auto d2h = cudaMemcpyDeviceToHost;
@@ -18,10 +36,8 @@ struct cuda_copier {
     template <cudaMemcpyKind dir>
     static void copy(void *dst, const void *src, size_t size)
     {
-        const cudaError_t err = cudaMemcpy(dst, src, size, dir);
-        if (err != cudaSuccess) {
-            throw std::runtime_error("cudaMemcpy failed");
-        }
+        static std_cuda_error_checker_t check("cudaMemcpy");
+        check << cudaMemcpy(dst, src, size, dir);
     }
 };
 
@@ -54,10 +70,8 @@ class basic_allocator<R, cuda_memory>
         void *deviceMem;
         // cudaMalloc<R>(&deviceMem, count);
         // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY
-        const cudaError_t err = cudaMalloc(&deviceMem, count * sizeof(R));
-        if (err != cudaSuccess) {
-            throw std::runtime_error("cudaMalloc failed");
-        }
+        static std_cuda_error_checker_t check("cudaMalloc");
+        check << cudaMalloc(&deviceMem, count * sizeof(R));
         return reinterpret_cast<R *>(deviceMem);
     }
 };
@@ -68,8 +82,8 @@ class basic_deallocator<R, cuda_memory>
   public:
     void operator()(R *data)
     {
-        const cudaError_t err = cudaFree(data);
-        if (err != cudaSuccess) { throw std::runtime_error("cudaFree failed"); }
+        static std_cuda_error_checker_t check("cudaFree");
+        check << cudaFree(data);
     }
 };
 }  // namespace internal

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`#pragma once`
	`2`	`+#include <cstdio>`
`2`	`3`	`#include <cstring>`
`3`	`4`	`#include <map>`
`4`	`5`	`#include <stdexcept>`
`@@ -12,7 +13,7 @@ constexpr const cudaMemcpyKind cudaMemcpyHostToDevice = 1;`
`12`	`13`	`constexpr const cudaMemcpyKind cudaMemcpyDeviceToHost = 2;`
`13`	`14`	`constexpr const cudaMemcpyKind cudaMemcpyDeviceToDevice = 3;`
`14`	`15`
`15`		`-class fake_device`
	`16`	`+class fake_cuda_device`
`16`	`17`	`{`
`17`	`18`	`std::map<const void *, size_t> _allocs;`
`18`	`19`
`@@ -33,7 +34,9 @@ class fake_device`
`33`	`34`	`}`
`34`	`35`
`35`	`36`	`public:`
`36`		`- ~fake_device() { check_leak(); }`
	`37`	`+ fake_cuda_device() { std::printf("using fake_cuda_device!\n"); }`
	`38`	`+`
	`39`	`+ ~fake_cuda_device() { check_leak(); }`
`37`	`40`
`38`	`41`	`void *alloc(size_t size)`
`39`	`42`	`{`
`@@ -68,7 +71,7 @@ class fake_device`
`68`	`71`	`}`
`69`	`72`	`};`
`70`	`73`
`71`		`-fake_device fake_cuda;`
	`74`	`+fake_cuda_device fake_cuda;`
`72`	`75`
`73`	`76`	`cudaError_t cudaMalloc(void **ptr, size_t count)`
`74`	`77`	`{`
`@@ -88,3 +91,8 @@ cudaError_t cudaMemcpy(void dst, const void src, size_t size,`
`88`	`91`	`fake_cuda.memcpy(dst, src, size, dir);`
`89`	`92`	`return cudaSuccess;`
`90`	`93`	`}`
	`94`	`+`
	`95`	`+std::string cudaGetErrorString(const cudaError_t err)`
	`96`	`+{`
	`97`	`+ return "fake_cudaError_t(" + std::to_string(static_cast<int>(err)) + ")";`
	`98`	`+}`