pytorch · mrshenli · Jan 4, 2019 · Jan 4, 2019 · Jan 4, 2019 · Jan 4, 2019
diff --git a/c10/cuda/CUDAStream.h b/c10/cuda/CUDAStream.h
@@ -5,7 +5,9 @@
 
 #include <cuda_runtime_api.h>
 
+#include <c10/cuda/CUDAException.h>
 #include <c10/cuda/CUDAMacros.h>
+#include <c10/DeviceGuard.h>
 #include <c10/util/Exception.h>
 #include <c10/Stream.h>
 
@@ -99,6 +101,19 @@ class C10_CUDA_API CUDAStream {
   /// Return the stream ID corresponding to this particular stream.
   StreamId id() const { return stream_.id(); }
 
+  bool query() const {
+    DeviceGuard device_guard{stream_.device()};
+    cudaError_t err = cudaStreamQuery(stream());
+
+    if (err == cudaErrorNotReady) {
+      return false;
+    } else if (err != cudaSuccess) {
+      C10_CUDA_CHECK(err);
+    }
+
+    return true;
+  }
+
   /// Explicit conversion to cudaStream_t.
   cudaStream_t stream() const;
 

diff --git a/torch/csrc/cuda/Stream.cpp b/torch/csrc/cuda/Stream.cpp
@@ -3,6 +3,7 @@
 #include <torch/csrc/THP.h>
 #include <torch/csrc/cuda/Module.h>
 
+#include <c10/cuda/CUDAGuard.h>
 #include <c10/cuda/CUDAStream.h>
 
 #include <structmember.h>
@@ -43,6 +44,12 @@ static PyObject * THCPStream_pynew(PyTypeObject *type, PyObject *args, PyObject
   END_HANDLE_TH_ERRORS
 }
 
+static PyObject * THCPStream_query(THCPStream *self) {
+  HANDLE_TH_ERRORS
+  return PyBool_FromLong(at::cuda::CUDAStream::unpack(self->cdata).query());
+  END_HANDLE_TH_ERRORS
+}
+
 static struct PyMemberDef THCPStream_members[] = {
   {(char*)"_cdata", T_ULONGLONG, offsetof(THCPStream, cdata), READONLY, nullptr},
   {(char*)"device", T_INT, offsetof(THCPStream, device), READONLY, nullptr},
@@ -51,6 +58,7 @@ static struct PyMemberDef THCPStream_members[] = {
 };
 
 static PyMethodDef THCPStream_methods[] = {
+  {(char*)"query", (PyCFunction)THCPStream_query, METH_NOARGS, nullptr},
   {nullptr}
 };
 

diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py
@@ -473,7 +473,7 @@ def init_err(self):
         class_name = self.__class__.__name__
         raise RuntimeError(
             "Tried to instantiate dummy base class {}".format(class_name))
-    return type(storage_name, (object,), {"__init__": init_err})
+    return type(name, (object,), {"__init__": init_err})
 
 
 if not hasattr(torch._C, 'CudaDoubleStorageBase'):

diff --git a/torch/cuda/streams.py b/torch/cuda/streams.py
@@ -2,6 +2,7 @@
 import torch
 from . import cudart, check_error, cudaStatus
 from ._utils import _get_device_index
+from torch._C import _add_docstr
 
 
 class Stream(torch._C._CudaStreamBase):
@@ -73,15 +74,8 @@ def query(self):
         r"""Checks if all the work submitted has been completed.
 
         Returns:
-            A boolean indicating if all kernels in this stream are completed.
-        """
-        with torch.cuda.device(self.device):
-            res = cudart().cudaStreamQuery(self)
-            if res == cudaStatus.ERROR_NOT_READY:
-                return False
-            check_error(res)
-            return True
-        return False
+            A boolean indicating if all kernels in this stream are completed."""
+        return super(Stream, self).query()
 
     def synchronize(self):
         r"""Wait for all the kernels in this stream to complete.