Skip to content

Commit

Permalink
Add torch.nn.functional.conv2d (#70)
Browse files Browse the repository at this point in the history
* Refactor convolution code

* Support torch.nn.functional.conv2d

* Support for same and valid padding

* View supports also args
  • Loading branch information
alessandropalla authored Jun 24, 2024
1 parent b67bd8b commit ed0993b
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 55 deletions.
6 changes: 2 additions & 4 deletions intel_npu_acceleration_library/backend/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ def init_network_factory(lib: ctypes.CDLL):
lib.linear.restype = handler

lib.convolution.argtypes = [
handler,
handler,
handler,
handler,
ctypes.c_int,
Expand All @@ -172,10 +174,6 @@ def init_network_factory(lib: ctypes.CDLL):
ctypes.c_int,
c_u32_array,
ctypes.c_int,
c_u32_array,
ctypes.c_int,
ctypes.c_bool,
ctypes.c_char_p,
ctypes.c_char_p,
]
lib.convolution.restype = handler
Expand Down
27 changes: 7 additions & 20 deletions intel_npu_acceleration_library/backend/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,21 @@ def __init__(
"""
super().__init__(profile, device)
input = self.parameter(input_shape)

# Get the number of spatial dimensions
n_spatial_dims = len(input_shape) - 2

if isinstance(strides, int):
strides = [strides] * n_spatial_dims

if isinstance(padding, int):
padding_begins = [padding] * n_spatial_dims
padding_ends = [padding] * n_spatial_dims
weights = self.parameter(weights_shape)
if bias is not None:
bias_node = self.parameter((1, weights_shape[0], 1, 1))
else:
padding_begins = list(padding)
padding_ends = list(padding)

if isinstance(dilation, int):
dilation = [dilation] * n_spatial_dims
bias_node = None

conv = self.convolution(
input,
weights_shape,
bias=bias,
weights,
bias=bias_node,
strides=strides,
padding_begins=padding_begins,
padding_ends=padding_ends,
padding=padding,
dilation=dilation,
groups=groups,
act_dtype=np.float16,
wt_dtype=np.float16,
)

self.compile(conv)
49 changes: 32 additions & 17 deletions intel_npu_acceleration_library/backend/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ def constant(
data = np.array([data], dtype=np.float32)
elif isinstance(data, torch.Tensor):
data = data.detach().numpy()
elif data is None:
return ctypes.cast(ctypes.c_void_p(0), ctypes.POINTER(ctypes.c_char))

dst = data.ctypes.data_as(ctypes.c_void_p)
shape_ptr = np.array(data.shape, dtype=np.uint32)
Expand All @@ -199,44 +201,59 @@ def constant(
def convolution(
self,
input_node: ctypes._Pointer,
weights_shape: Sequence[int],
bias: bool,
strides: Sequence[int] = (1, 1),
padding_begins: Sequence[int] = (0, 0),
padding_ends: Sequence[int] = (0, 0),
dilation: Sequence[int] = (1, 1),
weights_node: ctypes._Pointer,
bias: Optional[ctypes._Pointer] = None,
strides: Union[int, Sequence[int]] = 1,
padding: Union[int, Sequence[int]] = 0,
dilation: Union[int, Sequence[int]] = 1,
groups: int = 1,
act_dtype: npt.DTypeLike = np.float16,
wt_dtype: npt.DTypeLike = np.float16,
n_spatial_dims: int = 2,
) -> ctypes._Pointer:
"""Generate a convolution layer.
Args:
input_node (ctypes._Pointer): layer input node
weights_shape (Sequence[int]): weights shape
weights_node (ctypes._Pointer): weights node
bias (Optional[ctypes._Pointer}): bias node
strides (Sequence[int]): strides
padding_begins (Sequence[int]): padding
padding_ends (Sequence[int]): padding
padding (Sequence[int]): padding
dilation (Sequence[int]): dilation
groups (int): groups
bias (bool): enable/disable bias
act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
n_spatial_dims (int): number of spatial dimensions
Returns:
ctypes._Pointer: output node
"""
weights_shape_ptr = np.array(weights_shape, dtype=np.uint32)
if isinstance(strides, int):
strides = [strides] * n_spatial_dims

if isinstance(padding, int):
padding_begins = [padding] * n_spatial_dims
padding_ends = [padding] * n_spatial_dims
else:
padding_begins = list(padding)
padding_ends = list(padding)

if isinstance(dilation, int):
dilation = [dilation] * n_spatial_dims

strides_ptr = np.array(strides, dtype=np.uint32)
padding_begins_ptr = np.array(padding_begins, dtype=np.uint32)
padding_ends_ptr = np.array(padding_ends, dtype=np.uint32)
dilation_ptr = np.array(dilation, dtype=np.uint32)

if bias is not None:
bias_node = bias
else:
bias_node = ctypes.cast(ctypes.c_void_p(0), ctypes.POINTER(ctypes.c_char))

return backend_lib.convolution(
self._mm,
input_node,
weights_shape_ptr.size,
weights_shape_ptr,
weights_node,
bias_node,
strides_ptr.size,
strides_ptr,
padding_begins_ptr.size,
Expand All @@ -246,9 +263,7 @@ def convolution(
dilation_ptr.size,
dilation_ptr,
groups,
bias,
self.get_backend_dtype(act_dtype),
self.get_backend_dtype(wt_dtype),
)

@return_tensor
Expand Down
7 changes: 5 additions & 2 deletions intel_npu_acceleration_library/backend/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,16 +335,19 @@ def reshape(self, *shape: Union[int, Sequence[int]]) -> "Tensor":
shape = shape[0] # type: ignore
return generate_op([self], "reshape", shape)

def view(self, shape: Sequence[int]) -> "Tensor":
def view(self, *shape: Union[Sequence[int], int]) -> "Tensor":
"""
Return the transpose of the tensor.
Args:
shape (Sequence[int]): The new shape of the tensor.
shape (Union[Sequence[int], int]): The new shape of the tensor.
Returns:
Tensor: The transposed tensor.
"""
if len(shape) == 1 and isinstance(shape[0], (list, tuple)):
shape = shape[0] # type: ignore

return self.reshape(*shape)

def flatten(self, start_dim=0, end_dim=-1) -> "Tensor":
Expand Down
2 changes: 1 addition & 1 deletion intel_npu_acceleration_library/nn/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def __init__(
self.backend_cls = partial(
Convolution,
weights_shape=weights.shape,
bias=bias is not None,
bias=bias,
strides=strides,
padding=padding,
dilation=dilation,
Expand Down
54 changes: 54 additions & 0 deletions intel_npu_acceleration_library/nn/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,3 +928,57 @@ def batch_norm(
result = result + bias.view(1, -1, 1, 1)

return result


@implements(torch.nn.functional.conv2d)
def conv2d(
input: Tensor,
weight: Union[Tensor, torch.Tensor],
bias: Optional[Union[Tensor, torch.Tensor]] = None,
stride: int = 1,
padding: Union[int, str] = 0,
dilation: int = 1,
groups: int = 1,
) -> Tensor:
"""Generate a convolution layer.
Args:
input (Tensor): layer input node
weight (Union[Tensor, torch.Tensor]): weight
bias (Union[Tensor, torch.Tensor]): bias
stride (int): stride
padding (Union[int, str]): padding
dilation (int): dilation
groups (int): groups
Raises:
ValueError: Padding mode not supported
Returns:
Tensor: output node
"""
if isinstance(padding, str):
if padding == "valid":
padding = 0
elif padding == "same":
padding = weight.shape[2] // 2
else:
raise ValueError(f"Padding mode {padding} not supported")

if bias is not None:
bias = bias.view((1, weight.shape[0], 1, 1))

if groups > 1:
new_shape = [groups, weight.shape[0] // groups] + list(weight.shape[1:])
weight = weight.view(new_shape)

conv = generate_op(
[input, weight, bias],
"convolution",
strides=stride,
padding=padding,
dilation=dilation,
groups=groups,
)

return conv
20 changes: 10 additions & 10 deletions src/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,24 +425,25 @@ intel_npu_acceleration_library_DLL_API ov::op::Op* linear(intel_npu_acceleration
return mm;
}

intel_npu_acceleration_library_DLL_API ov::op::Op* convolution(
intel_npu_acceleration_library::ModelFactory* factory, ov::op::Op* in0, size_t weight_shape_size,
unsigned int* weight_shape_data, size_t strides_size, unsigned int* strides_data, size_t pad_begins_size,
unsigned int* pad_begins_data, size_t pad_ends_size, unsigned int* pad_ends_data, size_t dilations_size,
unsigned int* dilations_data, size_t groups, bool bias, char* act_dtype, char* wt_dtype) {
intel_npu_acceleration_library_DLL_API ov::op::Op* convolution(intel_npu_acceleration_library::ModelFactory* factory,
ov::op::Op* in0, ov::op::Op* weights, ov::op::Op* bias,
size_t strides_size, unsigned int* strides_data,
size_t pad_begins_size, unsigned int* pad_begins_data,
size_t pad_ends_size, unsigned int* pad_ends_data,
size_t dilations_size, unsigned int* dilations_data,
size_t groups, char* act_dtype) {
ov::element::Type_t act_ov_dtype = intel_npu_acceleration_library::dtype_from_string(std::string(act_dtype));
ov::element::Type_t wt_ov_dtype = intel_npu_acceleration_library::dtype_from_string(std::string(wt_dtype));

// Create vectors from the input data
std::vector<size_t> weight_shape(weight_shape_data, weight_shape_data + weight_shape_size);
std::vector<size_t> strides(strides_data, strides_data + strides_size);
std::vector<size_t> pad_begins(pad_begins_data, pad_begins_data + pad_begins_size);
std::vector<size_t> pad_ends(pad_ends_data, pad_ends_data + pad_ends_size);
std::vector<size_t> dilations(dilations_data, dilations_data + dilations_size);

bool quantized = wt_ov_dtype == ov::element::Type_t::i8 || wt_ov_dtype == ov::element::Type_t::i4;
auto weight_shape = weights->get_output_shape(0);
auto wt_ov_dtype = static_cast<ov::element::Type_t>(weights->get_output_element_type(0));

auto weights = factory->parameter(weight_shape, wt_ov_dtype);
bool quantized = wt_ov_dtype == ov::element::Type_t::i8 || wt_ov_dtype == ov::element::Type_t::i4;

if (quantized) {
weights = factory->convert_to(weights, act_ov_dtype);
Expand All @@ -459,7 +460,6 @@ intel_npu_acceleration_library_DLL_API ov::op::Op* convolution(
}

if (bias) {
auto bias = factory->parameter({1, weight_shape[0], 1, 1}, act_ov_dtype);
return factory->eltwise_add(mm, bias);
}
return mm;
Expand Down
49 changes: 49 additions & 0 deletions test/python/test_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,52 @@ def test_batch_norm(shape, mean, variance, weight, bias):
result = model.run(x.numpy())

assert 1 - r2_score(reference.flatten(), result.flatten()) < 0.01


@pytest.mark.parametrize("in_channels", [32, 128, 256])
@pytest.mark.parametrize("out_channels", [32, 128, 256])
@pytest.mark.parametrize("kernels", [1, 3])
@pytest.mark.parametrize("dim", [16, 32])
@pytest.mark.parametrize("bias", [True, False])
@pytest.mark.parametrize("dtype", [torch.float16])
@pytest.mark.parametrize("stride", [1, 2])
@pytest.mark.parametrize("padding", [0, 1, "same", "valid"])
@pytest.mark.parametrize("groups", [1, -1])
def test_conv(
in_channels, out_channels, kernels, dim, bias, dtype, stride, padding, groups
):
torch.manual_seed(42)

if groups != 1 and in_channels != out_channels:
pytest.skip("DW convolutions require in_channels == out_channels")

if padding == "same" and stride > 1:
pytest.skip("padding='same' is not supported for strided convolutions")

if groups == -1:
groups = in_channels

x = torch.rand((1, in_channels, dim, dim)).to(torch.float16)

weight = torch.rand((out_channels, in_channels // groups, kernels, kernels)).to(
torch.float16
)
bias = torch.rand((out_channels,)).to(torch.float16) if bias else None

reference = (
torch.nn.functional.conv2d(x, weight, bias, stride, padding, groups=groups)
.detach()
.numpy()
)

model = NNFactory()
par = model.parameter(x.shape, np.float16)

out = torch.nn.functional.conv2d(par, weight, bias, stride, padding, groups=groups)
model.compile(out)

assert out.shape == list(reference.shape)

result = model.run(x.numpy())

assert 1 - r2_score(reference.flatten(), result.flatten()) < 0.01
6 changes: 5 additions & 1 deletion test/python/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ def test_model_creation():

assert ff.dim() == 3

model.compile(ff)
gg = ff.view(1, -1, 1, 1)

assert gg.shape == [1, 32 * 128 * 64, 1, 1]

model.compile(gg)


def test_slice():
Expand Down

0 comments on commit ed0993b

Please sign in to comment.