Skip to content

Commit

Permalink
[Vision] add test cases for flip, normalize, to_tensor (apache#8919)
Browse files Browse the repository at this point in the history
* [vision] ut for to_tensor, normalize, flip

* [vision] fix flip

* [vision] flip name

* [vision] test non-random flip op

* remove transform.FlipXXXX
  • Loading branch information
yzhliu authored and piiswrong committed Jan 22, 2018
1 parent b9569ee commit 62ffb92
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 45 deletions.
16 changes: 8 additions & 8 deletions python/mxnet/gluon/data/vision/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,26 +184,26 @@ def forward(self, x):
return image.imresize(x, *self._args)


class RandomHorizontalFlip(HybridBlock):
"""Randomly flip the input image horizontally with a probability
class RandomFlipLeftRight(HybridBlock):
"""Randomly flip the input image left to right with a probability
of 0.5.
"""
def __init__(self):
super(RandomHorizontalFlip, self).__init__()
super(RandomFlipLeftRight, self).__init__()

def hybrid_forward(self, F, x):
return F.image.random_horizontal_flip(x)
return F.image.random_flip_left_right(x)


class RandomVerticalFlip(HybridBlock):
"""Randomly flip the input image vertically with a probability
class RandomFlipTopBottom(HybridBlock):
"""Randomly flip the input image top to bottom with a probability
of 0.5.
"""
def __init__(self):
super(RandomVerticalFlip, self).__init__()
super(RandomFlipTopBottom, self).__init__()

def hybrid_forward(self, F, x):
return F.image.random_vertical_flip(x)
return F.image.random_flip_top_bottom(x)


class RandomBrightness(HybridBlock):
Expand Down
68 changes: 47 additions & 21 deletions src/operator/image/image_random-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,6 @@ void ToTensor(const nnvm::NodeAttrs &attrs,
});
}

inline bool TensorShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
TShape& dshape = (*in_attrs)[0];
SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
return true;
}

struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
nnvm::Tuple<float> mean;
nnvm::Tuple<float> std;
Expand Down Expand Up @@ -179,16 +171,16 @@ inline bool ImageShape(const nnvm::NodeAttrs& attrs,
return true;
}

template<typename DType>
void FlipImpl(const TShape &shape, DType *src, DType *dst, int axis) {
template<typename DType, int axis>
void FlipImpl(const TShape &shape, DType *src, DType *dst) {
int head = 1, mid = shape[axis], tail = 1;
for (int i = 0; i < axis; ++i) head *= shape[i];
for (int i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];

for (int i = 0; i < head; ++i) {
for (int j = 0; j < (mid >>2); ++j) {
int idx1 = (i*mid + j)*tail;
int idx2 = idx1 + (mid - (j<<2))*tail;
for (int j = 0; j < (mid >> 1); ++j) {
int idx1 = (i*mid + j) * tail;
int idx2 = idx1 + (mid-(j << 1)-1) * tail;
for (int k = 0; k < tail; ++k, ++idx1, ++idx2) {
DType tmp = src[idx1];
dst[idx1] = src[idx2];
Expand All @@ -198,7 +190,31 @@ void FlipImpl(const TShape &shape, DType *src, DType *dst, int axis) {
}
}

void RandomHorizontalFlip(
void FlipLeftRight(const nnvm::NodeAttrs &attrs,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &outputs) {
using namespace mshadow;
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
FlipImpl<DType, 1>(inputs[0].shape_, inputs[0].dptr<DType>(),
outputs[0].dptr<DType>());
});
}

void FlipTopBottom(const nnvm::NodeAttrs &attrs,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &outputs) {
using namespace mshadow;
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
FlipImpl<DType, 0>(inputs[0].shape_, inputs[0].dptr<DType>(),
outputs[0].dptr<DType>());
});
}

void RandomFlipLeftRight(
const nnvm::NodeAttrs &attrs,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
Expand All @@ -207,14 +223,19 @@ void RandomHorizontalFlip(
using namespace mshadow;
Stream<cpu> *s = ctx.get_stream<cpu>();
Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
if (std::bernoulli_distribution()(prnd->GetRndEngine())) return;
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
FlipImpl(inputs[0].shape_, inputs[0].dptr<DType>(),
outputs[0].dptr<DType>(), 1);
if (std::bernoulli_distribution()(prnd->GetRndEngine())) {
if (outputs[0].dptr_ != inputs[0].dptr_) {
std::memcpy(outputs[0].dptr_, inputs[0].dptr_, inputs[0].Size() * sizeof(DType));
}
} else {
FlipImpl<DType, 1>(inputs[0].shape_, inputs[0].dptr<DType>(),
outputs[0].dptr<DType>());
}
});
}

void RandomVerticalFlip(
void RandomFlipTopBottom(
const nnvm::NodeAttrs &attrs,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
Expand All @@ -223,10 +244,15 @@ void RandomVerticalFlip(
using namespace mshadow;
Stream<cpu> *s = ctx.get_stream<cpu>();
Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
if (std::bernoulli_distribution()(prnd->GetRndEngine())) return;
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
FlipImpl(inputs[0].shape_, inputs[0].dptr<DType>(),
outputs[0].dptr<DType>(), 0);
if (std::bernoulli_distribution()(prnd->GetRndEngine())) {
if (outputs[0].dptr_ != inputs[0].dptr_) {
std::memcpy(outputs[0].dptr_, inputs[0].dptr_, inputs[0].Size() * sizeof(DType));
}
} else {
FlipImpl<DType, 0>(inputs[0].shape_, inputs[0].dptr<DType>(),
outputs[0].dptr<DType>());
}
});
}

Expand Down
16 changes: 12 additions & 4 deletions src/operator/image/image_random.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ NNVM_REGISTER_OP(_image_to_tensor)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
.add_argument("data", "NDArray-or-Symbol", "The input.");


NNVM_REGISTER_OP(_image_normalize)
.describe(R"code()code" ADD_FILELINE)
.set_num_inputs(1)
Expand All @@ -65,19 +64,28 @@ NNVM_REGISTER_OP(_image_normalize)
.add_argument("data", "NDArray-or-Symbol", "The input.")
.add_arguments(NormalizeParam::__FIELDS__());

MXNET_REGISTER_IMAGE_AUG_OP(_image_flip_left_right)
.describe(R"code()code" ADD_FILELINE)
.set_attr<FCompute>("FCompute<cpu>", FlipLeftRight);

MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_flip_left_right)
.describe(R"code()code" ADD_FILELINE)
.set_attr<FCompute>("FCompute<cpu>", RandomFlipLeftRight);

MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_horizontal_flip)
MXNET_REGISTER_IMAGE_AUG_OP(_image_flip_top_bottom)
.describe(R"code()code" ADD_FILELINE)
.set_attr<FCompute>("FCompute<cpu>", RandomHorizontalFlip);
.set_attr<FCompute>("FCompute<cpu>", FlipTopBottom);

MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_flip_top_bottom)
.describe(R"code()code" ADD_FILELINE)
.set_attr<FCompute>("FCompute<cpu>", RandomFlipTopBottom);

MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_brightness)
.describe(R"code()code" ADD_FILELINE)
.set_attr_parser(ParamParser<RandomEnhanceParam>)
.set_attr<FCompute>("FCompute<cpu>", RandomBrightness)
.add_arguments(RandomEnhanceParam::__FIELDS__());


MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_contrast)
.describe(R"code()code" ADD_FILELINE)
.set_attr_parser(ParamParser<RandomEnhanceParam>)
Expand Down
41 changes: 29 additions & 12 deletions tests/python/unittest/test_gluon_data_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,39 @@
import mxnet as mx
import mxnet.ndarray as nd
import numpy as np
from PIL import Image
from mxnet import gluon
from mxnet.gluon.data.vision.transforms import AdjustLighting
from mxnet.gluon.data.vision import transforms
from mxnet.test_utils import assert_almost_equal
from mxnet.test_utils import almost_equal

def test_adjust_lighting():
def test_to_tensor():
data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
alpha_rgb = [0.05, 0.06, 0.07]
eigval = np.array([55.46, 4.794, 1.148])
eigvec = np.array([[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5808, -0.0045, -0.8140]])
f = AdjustLighting(alpha_rgb=alpha_rgb, eigval=eigval.ravel().tolist(), eigvec=eigvec.ravel().tolist())
out_nd = f(nd.array(data_in, dtype=np.uint8))
out_gt = np.clip(data_in.astype(np.float32)
+ np.dot(eigvec * alpha_rgb, eigval.reshape((3, 1))).reshape((1, 1, 3)), 0, 255).astype(np.uint8)
assert_almost_equal(out_nd.asnumpy(), out_gt)
out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
assert_almost_equal(out_nd.asnumpy(), np.transpose(
data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))

def test_normalize():
data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
data_in = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
out_nd = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in)
data_expected = data_in.asnumpy()
data_expected[:][:][0] = data_expected[:][:][0] / 3.0
data_expected[:][:][1] = (data_expected[:][:][1] - 1.0) / 2.0
data_expected[:][:][2] = data_expected[:][:][2] - 2.0
assert_almost_equal(data_expected, out_nd.asnumpy())

def test_flip_left_right():
data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
pil_img = Image.fromarray(data_in).transpose(Image.FLIP_LEFT_RIGHT)
data_trans = nd.image.flip_left_right(nd.array(data_in, dtype='uint8'))
assert_almost_equal(np.array(pil_img), data_trans.asnumpy())

def test_flip_top_bottom():
data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
pil_img = Image.fromarray(data_in).transpose(Image.FLIP_TOP_BOTTOM)
data_trans = nd.image.flip_top_bottom(nd.array(data_in, dtype='uint8'))
assert_almost_equal(np.array(pil_img), data_trans.asnumpy())

if __name__ == '__main__':
import nose
Expand Down

0 comments on commit 62ffb92

Please sign in to comment.