Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/tvm/runtime/device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ namespace runtime {
enum DeviceAttrKind : int {
kExist = 0,
kMaxThreadsPerBlock = 1,
kWarpSize = 2
kWarpSize = 2,
kComputeVersion = 3
};

/*! \brief Number of bytes each allocation must align to */
Expand Down
14 changes: 14 additions & 0 deletions python/tvm/_ffi/runtime_ctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ def warp_size(self):
return _api_internal._GetDeviceAttr(
self.device_type, self.device_id, 2)

@property
def compute_version(self):
"""Get compute verison number in string.

Currently used to get compute capability of CUDA device.

Returns
-------
version : str
The version string in `major.minor` format.
"""
return _api_internal._GetDeviceAttr(
self.device_type, self.device_id, 3)

def sync(self):
"""Synchronize until jobs finished at the context."""
check_call(_LIB.TVMSynchronize(self.device_type, self.device_id, None))
Expand Down
5 changes: 1 addition & 4 deletions python/tvm/contrib/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,8 @@ def create_shared(output,
if options:
cmd += options

args = ' '.join(cmd)
proc = subprocess.Popen(
args, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
(out, _) = proc.communicate()

if proc.returncode != 0:
Expand Down
40 changes: 22 additions & 18 deletions python/tvm/contrib/nvcc.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# pylint: disable=invalid-name
"""Utility to invoke nvcc compiler in the system"""
from __future__ import absolute_import as _abs
import sys

import subprocess
from . import util
from .. import ndarray as nd

def compile_cuda(code, target="ptx", arch=None,
options=None, path_target=None):
def compile_cuda(code,
target="ptx",
arch=None,
options=None,
path_target=None):
"""Compile cuda code with NVCC from env.

Parameters
Expand Down Expand Up @@ -39,32 +43,32 @@ def compile_cuda(code, target="ptx", arch=None,

with open(temp_code, "w") as out_file:
out_file.write(code)
if target == "cubin" and arch is None:
raise ValueError("arch(sm_xy) must be passed for generating cubin")

if arch is None:
if nd.gpu(0).exist:
# auto detect the compute arch argument
arch = "sm_" + "".join(nd.gpu(0).compute_version.split('.'))
else:
raise ValueError("arch(sm_xy) is not passed, and we cannot detect it from env")

file_target = path_target if path_target else temp_target
cmd = ["nvcc"]
cmd += ["--%s" % target, "-O3"]
if arch:
cmd += ["-arch", arch]
cmd += ["-arch", arch]
cmd += ["-o", file_target]

if options:
cmd += options
cmd += [temp_code]
args = ' '.join(cmd)

proc = subprocess.Popen(
args, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

(out, _) = proc.communicate()

if proc.returncode != 0:
sys.stderr.write("Compilation error:\n")
sys.stderr.write(str(out))
sys.stderr.flush()
cubin = None
else:
cubin = bytearray(open(file_target, "rb").read())
return cubin
msg = "Compilation error:\n"
msg += out
raise RuntimeError(msg)

return bytearray(open(file_target, "rb").read())
11 changes: 11 additions & 0 deletions src/runtime/cuda/cuda_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ class CUDADeviceAPI final : public DeviceAPI {
&value, cudaDevAttrWarpSize, ctx.device_id));
break;
}
case kComputeVersion: {
std::ostringstream os;
CUDA_CALL(cudaDeviceGetAttribute(
&value, cudaDevAttrComputeCapabilityMajor, ctx.device_id));
os << value << ".";
CUDA_CALL(cudaDeviceGetAttribute(
&value, cudaDevAttrComputeCapabilityMinor, ctx.device_id));
os << value;
*rv = os.str();
return;
}
}
*rv = value;
}
Expand Down
1 change: 1 addition & 0 deletions src/runtime/metal/metal_device_api.mm
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
*rv = 1;
break;
}
case kComputeVersion: return;
case kExist: break;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/runtime/opencl/opencl_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ void OpenCLWorkspace::GetAttr(
*rv = 1;
break;
}
case kComputeVersion: return;
case kExist: break;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/runtime/rocm/rocm_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class ROCMDeviceAPI final : public DeviceAPI {
value = 64;
break;
}
case kComputeVersion: return;
}
*rv = value;
}
Expand Down
2 changes: 1 addition & 1 deletion topi/python/topi/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _compute(begin, *indices):
begin_ids = [seg_size * i for i in range(indices_or_sections)]
elif isinstance(indices_or_sections, (tuple, list)):
assert tuple(indices_or_sections) == tuple(sorted(indices_or_sections)),\
"Should be sorted, recieved %s" %str(indices_or_sections)
"Should be sorted, recieved %s" % str(indices_or_sections)
begin_ids = [0] + list(indices_or_sections)
else:
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion topi/recipe/broadcast/test_broadcast_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@tvm.register_func
def tvm_callback_cuda_compile(code):
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_52"])
ptx = nvcc.compile_cuda(code, target="ptx")
return ptx


Expand Down
2 changes: 1 addition & 1 deletion topi/recipe/conv/depthwise_conv2d_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@tvm.register_func
def tvm_callback_cuda_compile(code):
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_37"]) # 37 for k80(ec2 instance)
ptx = nvcc.compile_cuda(code, target="ptx")
return ptx

def write_code(code, fname):
Expand Down
2 changes: 1 addition & 1 deletion topi/recipe/conv/test_conv2d_hwcn_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@tvm.register_func
def tvm_callback_cuda_compile(code):
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_37"])
ptx = nvcc.compile_cuda(code, target="ptx")
return ptx

def write_code(code, fname):
Expand Down
2 changes: 1 addition & 1 deletion topi/recipe/gemm/cuda_gemm_square.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

@tvm.register_func
def tvm_callback_cuda_compile(code):
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_52"])
ptx = nvcc.compile_cuda(code, target="ptx")
return ptx

def write_code(code, fname):
Expand Down
2 changes: 1 addition & 1 deletion topi/recipe/reduce/test_reduce_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@tvm.register_func
def tvm_callback_cuda_compile(code):
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_52"])
ptx = nvcc.compile_cuda(code, target="ptx")
return ptx


Expand Down
2 changes: 1 addition & 1 deletion topi/recipe/rnn/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
@tvm.register_func
def tvm_callback_cuda_compile(code):
"""Use nvcc compiler for better perf."""
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_52"])
ptx = nvcc.compile_cuda(code, target="ptx")
return ptx

def write_code(code, fname):
Expand Down
2 changes: 1 addition & 1 deletion topi/recipe/rnn/matexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
@tvm.register_func
def tvm_callback_cuda_compile(code):
"""Use nvcc compiler for better perf."""
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_52"])
ptx = nvcc.compile_cuda(code, target="ptx")
return ptx

def write_code(code, fname):
Expand Down