Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP CUDA 11.7 support #150

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mmdet/ops/dcn/functions/deform_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
from torch.autograd import Function
from torch.nn.modules.utils import _pair

from .. import deform_conv_cuda


class DeformConvFunction(Function):

Expand All @@ -18,6 +16,8 @@ def forward(ctx,
groups=1,
deformable_groups=1,
im2col_step=64):
from .. import deform_conv_cuda

if input is not None and input.dim() != 4:
raise ValueError(
"Expected 4D tensor as input, got {}D tensor instead.".format(
Expand Down
6 changes: 4 additions & 2 deletions mmdet/ops/dcn/functions/deform_pool.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import torch
from torch.autograd import Function

from .. import deform_pool_cuda


class DeformRoIPoolingFunction(Function):

Expand All @@ -19,6 +17,8 @@ def forward(ctx,
part_size=None,
sample_per_part=4,
trans_std=.0):
from .. import deform_pool_cuda

ctx.spatial_scale = spatial_scale
ctx.out_size = out_size
ctx.out_channels = out_channels
Expand Down Expand Up @@ -48,6 +48,8 @@ def forward(ctx,

@staticmethod
def backward(ctx, grad_output):
from .. import deform_pool_cuda

if not grad_output.is_cuda:
raise NotImplementedError

Expand Down
10 changes: 8 additions & 2 deletions mmdet/ops/dcn/modules/deform_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import torch.nn as nn
from torch.nn.modules.utils import _pair

from ..functions.deform_conv import deform_conv, modulated_deform_conv


class DeformConv(nn.Module):

Expand Down Expand Up @@ -52,6 +50,8 @@ def reset_parameters(self):
self.weight.data.uniform_(-stdv, stdv)

def forward(self, x, offset):
from ..functions.deform_conv import deform_conv

return deform_conv(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deformable_groups)

Expand All @@ -76,6 +76,8 @@ def init_offset(self):
self.conv_offset.bias.data.zero_()

def forward(self, x):
from ..functions.deform_conv import deform_conv

offset = self.conv_offset(x)
return deform_conv(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deformable_groups)
Expand Down Expand Up @@ -123,6 +125,8 @@ def reset_parameters(self):
self.bias.data.zero_()

def forward(self, x, offset, mask):
from ..functions.deform_conv import modulated_deform_conv

return modulated_deform_conv(x, offset, mask, self.weight, self.bias,
self.stride, self.padding, self.dilation,
self.groups, self.deformable_groups)
Expand All @@ -148,6 +152,8 @@ def init_offset(self):
self.conv_offset_mask.bias.data.zero_()

def forward(self, x):
from ..functions.deform_conv import modulated_deform_conv

out = self.conv_offset_mask(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
Expand Down
42 changes: 21 additions & 21 deletions mmdet/ops/dcn/src/deform_conv_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,26 +62,26 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
at::Tensor weight, int kH, int kW, int dH, int dW, int padH,
int padW, int dilationH, int dilationW, int group,
int deformable_group) {
AT_CHECK(weight.ndimension() == 4,
TORCH_CHECK(weight.ndimension() == 4,
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"but got: %s",
weight.ndimension());

AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

AT_CHECK(kW > 0 && kH > 0,
TORCH_CHECK(kW > 0 && kH > 0,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH,
kW);

AT_CHECK((weight.size(2) == kH && weight.size(3) == kW),
TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
"kernel size should be consistent with weight, ",
"but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
kW, weight.size(2), weight.size(3));

AT_CHECK(dW > 0 && dH > 0,
TORCH_CHECK(dW > 0 && dH > 0,
"stride should be greater than zero, but got dH: %d dW: %d", dH, dW);

AT_CHECK(
TORCH_CHECK(
dilationW > 0 && dilationH > 0,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d",
dilationH, dilationW);
Expand All @@ -97,7 +97,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
dimw++;
}

AT_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
ndim);

long nInputPlane = weight.size(1) * group;
Expand All @@ -109,7 +109,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
long outputWidth =
(inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;

AT_CHECK(nInputPlane % deformable_group == 0,
TORCH_CHECK(nInputPlane % deformable_group == 0,
"input channels must divide deformable group size");

if (outputWidth < 1 || outputHeight < 1)
Expand All @@ -119,27 +119,27 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
outputWidth);

AT_CHECK(input.size(1) == nInputPlane,
TORCH_CHECK(input.size(1) == nInputPlane,
"invalid number of input planes, expected: %d, but got: %d",
nInputPlane, input.size(1));

AT_CHECK((inputHeight >= kH && inputWidth >= kW),
TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
"input image is smaller than kernel");

AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
"invalid spatial size of offset, expected height: %d width: %d, but "
"got height: %d width: %d",
outputHeight, outputWidth, offset.size(2), offset.size(3));

AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
"invalid number of channels of offset");

if (gradOutput != NULL) {
AT_CHECK(gradOutput->size(dimf) == nOutputPlane,
TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane,
"invalid number of gradOutput planes, expected: %d, but got: %d",
nOutputPlane, gradOutput->size(dimf));

AT_CHECK((gradOutput->size(dimh) == outputHeight &&
TORCH_CHECK((gradOutput->size(dimh) == outputHeight &&
gradOutput->size(dimw) == outputWidth),
"invalid size of gradOutput, expected height: %d width: %d , but "
"got height: %d width: %d",
Expand Down Expand Up @@ -189,7 +189,7 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
outputHeight, outputWidth});
Expand Down Expand Up @@ -295,7 +295,7 @@ int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
Expand Down Expand Up @@ -410,7 +410,7 @@ int deform_conv_backward_parameters_cuda(
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
Expand Down Expand Up @@ -490,8 +490,8 @@ void modulated_deform_conv_cuda_forward(
const int pad_h, const int pad_w, const int dilation_h,
const int dilation_w, const int group, const int deformable_group,
const bool with_bias) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down Expand Up @@ -571,8 +571,8 @@ void modulated_deform_conv_cuda_backward(
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
const bool with_bias) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down
6 changes: 3 additions & 3 deletions mmdet/ops/dcn/src/deform_pool_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void deform_psroi_pooling_cuda_forward(
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");

const int batch = input.size(0);
const int channels = input.size(1);
Expand All @@ -57,8 +57,8 @@ void deform_psroi_pooling_cuda_backward(
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down
3 changes: 2 additions & 1 deletion mmdet/ops/masked_conv/functions/masked_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import torch
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from .. import masked_conv2d_cuda


class MaskedConv2dFunction(Function):

@staticmethod
def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
from .. import masked_conv2d_cuda

assert mask.dim() == 3 and mask.size(0) == 1
assert features.dim() == 4 and features.size(0) == 1
assert features.size()[2:] == mask.size()[1:]
Expand Down
4 changes: 2 additions & 2 deletions mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ int MaskedCol2imForwardLaucher(const at::Tensor col, const int height,
const at::Tensor mask_w_idx, const int mask_cnt,
at::Tensor im);

#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
Expand Down
4 changes: 2 additions & 2 deletions mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height,
output_size, bottom_data_, height, width, kernel_h, kernel_w,
pad_h, pad_w, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_);
}));
THCudaCheck(cudaGetLastError());
TORCH_CHECK(cudaGetLastError() == cudaSuccess);
return 1;
}

Expand Down Expand Up @@ -108,6 +108,6 @@ int MaskedCol2imForwardLaucher(const at::Tensor bottom_data, const int height,
output_size, bottom_data_, height, width, channels, mask_h_idx_,
mask_w_idx_, mask_cnt, top_data_);
}));
THCudaCheck(cudaGetLastError());
TORCH_CHECK(cudaGetLastError() == cudaSuccess);
return 1;
}
3 changes: 2 additions & 1 deletion mmdet/ops/nms/nms_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import torch

from . import nms_cuda, nms_cpu
from .soft_nms_cpu import soft_nms_cpu


def nms(dets, iou_thr, device_id=None):
Expand Down Expand Up @@ -50,6 +49,8 @@ def nms(dets, iou_thr, device_id=None):


def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
from .soft_nms_cpu import soft_nms_cpu

if isinstance(dets, torch.Tensor):
is_tensor = True
dets_np = dets.detach().cpu().numpy()
Expand Down
2 changes: 1 addition & 1 deletion mmdet/ops/nms/src/nms_cuda.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include <torch/extension.h>

#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")

at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);

Expand Down
Loading