diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index eb206fc2f9d9..33167fab20f5 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -135,7 +135,7 @@ class CV_EXPORTS _InputArray bool isUMat() const; bool isMatVector() const; bool isUMatVector() const; - bool isMatx(); + bool isMatx() const; virtual ~_InputArray(); diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index d289e3a2fe73..d463eec671a7 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -112,7 +112,7 @@ inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; } inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; } inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; } inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; } -inline bool _InputArray::isMatx() { return kind() == _InputArray::MATX; } +inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; } //////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 5db8ef7d88e5..fb9ec24c5605 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -169,6 +169,10 @@ class CV_EXPORTS Device VENDOR_NVIDIA=3 }; int vendorID() const; + // FIXIT + // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform. + // This method should use platform name instead of vendor name. + // After fix restore code in arithm.cpp: ocl_compare() inline bool isAMD() const { return vendorID() == VENDOR_AMD; } inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index f4680aacbd6c..7e0e00edc498 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -540,7 +540,7 @@ typedef TestBaseWithParam CompareFixture; OCL_PERF_TEST_P(CompareFixture, Compare, ::testing::Combine(OCL_TEST_SIZES, - OCL_TEST_TYPES, CmpCode::all())) + OCL_TEST_TYPES_134, CmpCode::all())) { const CompareParams params = GetParam(); const Size srcSize = get<0>(params); @@ -549,7 +549,7 @@ OCL_PERF_TEST_P(CompareFixture, Compare, checkDeviceMaxMemoryAllocSize(srcSize, type); - UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, CV_8UC1); + UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, CV_8UC(CV_MAT_CN(type))); declare.in(src1, src2, WARMUP_RNG).out(dst); OCL_TEST_CYCLE() cv::compare(src1, src2, dst, cmpCode); @@ -557,6 +557,26 @@ OCL_PERF_TEST_P(CompareFixture, Compare, SANITY_CHECK(dst); } +OCL_PERF_TEST_P(CompareFixture, CompareScalar, + ::testing::Combine(OCL_TEST_SIZES, + OCL_PERF_ENUM((MatType)CV_32FC1), // TODO: OCL_TEST_TYPES_134 + CmpCode::all())) +{ + const CompareParams params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + const int cmpCode = get<2>(params); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + UMat src1(srcSize, type), dst(srcSize, CV_8UC(CV_MAT_CN(type))); + declare.in(src1, WARMUP_RNG).out(dst); + + OCL_TEST_CYCLE() cv::compare(src1, 32, dst, cmpCode); + + SANITY_CHECK(dst); +} + ///////////// pow //////////////////////// typedef Size_MatType PowFixture; diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 436239cb5893..5672c02ad900 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -2617,44 +2617,90 @@ static double getMaxVal(int depth) #ifdef HAVE_OPENCL -static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) +static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op, bool haveScalar) { - if ( !((_src1.isMat() || _src1.isUMat()) && (_src2.isMat() || _src2.isUMat())) ) - return false; + const ocl::Device& dev = ocl::Device::getDefault(); + bool doubleSupport = dev.doubleFPConfig() > 0; + int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); + int type2 = _src2.type(); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), type2 = _src2.type(); - if ( (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F)) || - !_src1.sameSize(_src2) || type != type2) + if (!haveScalar) + { + if ( (!doubleSupport && (depth1 == CV_64F || _src2.depth() == CV_64F)) || + !_src1.sameSize(_src2) || type1 != type2) + return false; + } + else + { + if (cn > 1 || depth1 <= CV_32S) // FIXIT: if (cn > 4): Need to clear CPU-based compare behavior + return false; + } + + if (!doubleSupport && depth1 == CV_64F) return false; - int kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst); + int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); + // Workaround for bug with "?:" operator in AMD OpenCL compiler + bool workaroundForAMD = /*dev.isAMD() &&*/ + ( + (depth1 != CV_8U && depth1 != CV_8S) + ); + if (workaroundForAMD) + kercn = 1; + + int scalarcn = kercn == 3 ? 4 : kercn; + const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; char cvt[40]; - ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D BINARY_OP -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" - " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s%s -D srcT1_C1=%s" - " -D srcT2_C1=%s -D dstT_C1=%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), - ocl::typeToStr(CV_8UC(kercn)), kercn, - ocl::convertTypeStr(depth, CV_8U, kercn, cvt), - operationMap[op], doubleSupport ? " -D DOUBLE_SUPPORT" : "", - ocl::typeToStr(depth), ocl::typeToStr(depth), ocl::typeToStr(CV_8U))); + String buildOptions = format( + "-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" + " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s" + " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s%s", + (haveScalar ? "UNARY_OP" : "BINARY_OP"), + ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)), + ocl::typeToStr(CV_8UC(kercn)), kercn, + ocl::convertTypeStr(depth1, CV_8U, kercn, cvt), + operationMap[op], + ocl::typeToStr(depth1), ocl::typeToStr(depth1), ocl::typeToStr(CV_8U), + ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)), + doubleSupport ? " -D DOUBLE_SUPPORT" : "" + ); + + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, buildOptions); if (k.empty()) return false; - CV_Assert(type == type2); - UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); + UMat src1 = _src1.getUMat(); Size size = src1.size(); - CV_Assert(size == src2.size()); - _dst.create(size, CV_8UC(cn)); UMat dst = _dst.getUMat(); - k.args(ocl::KernelArg::ReadOnlyNoSize(src1), - ocl::KernelArg::ReadOnlyNoSize(src2), - ocl::KernelArg::WriteOnly(dst, cn, kercn)); + if (haveScalar) + { + size_t esz = CV_ELEM_SIZE1(type1)*scalarcn; + double buf[4]={0,0,0,0}; + Mat src2sc = _src2.getMat(); + + if (!src2sc.empty()) + convertAndUnrollScalar(src2sc, type1, (uchar*)buf, 1); + + ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz); + + k.args(ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn), + ocl::KernelArg::WriteOnly(dst, cn, kercn), + scalararg); + } + else + { + CV_DbgAssert(type1 == type2); + UMat src2 = _src2.getUMat(); + CV_DbgAssert(size == src2.size()); + + k.args(ocl::KernelArg::ReadOnlyNoSize(src1), + ocl::KernelArg::ReadOnlyNoSize(src2), + ocl::KernelArg::WriteOnly(dst, cn, kercn)); + } size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); @@ -2669,8 +2715,29 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ || op == CMP_NE || op == CMP_GE || op == CMP_GT ); + bool haveScalar = false; + + if ((_src1.isMatx() + _src2.isMatx()) == 1 + || !_src1.sameSize(_src2) + || _src1.type() != _src2.type()) + { + if (checkScalar(_src1, _src2.type(), _src1.kind(), _src2.kind())) + { + op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE : + op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op; + // src1 is a scalar; swap it with src2 + compare(_src2, _src1, _dst, op); + return; + } + else if( !checkScalar(_src2, _src1.type(), _src2.kind(), _src1.kind()) ) + CV_Error( CV_StsUnmatchedSizes, + "The operation is neither 'array op array' (where arrays have the same size and the same type), " + "nor 'array op scalar', nor 'scalar op array'" ); + haveScalar = true; + } + CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat(), - ocl_compare(_src1, _src2, _dst, op)) + ocl_compare(_src1, _src2, _dst, op, haveScalar)) int kind1 = _src1.kind(), kind2 = _src2.kind(); Mat src1 = _src1.getMat(), src2 = _src2.getMat(); @@ -2685,26 +2752,6 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) return; } - bool haveScalar = false; - - if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 || - src1.size != src2.size || src1.type() != src2.type() ) - { - if( checkScalar(src1, src2.type(), kind1, kind2) ) - { - // src1 is a scalar; swap it with src2 - swap(src1, src2); - op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE : - op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op; - } - else if( !checkScalar(src2, src1.type(), kind2, kind1) ) - CV_Error( CV_StsUnmatchedSizes, - "The operation is neither 'array op array' (where arrays have the same size and the same type), " - "nor 'array op scalar', nor 'scalar op array'" ); - haveScalar = true; - } - - int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth(); _dst.create(src1.dims, src1.size, CV_8UC(cn)); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 865b4335f4f4..5faf7de12529 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -280,8 +280,13 @@ #elif defined OP_CMP #define srcT2 srcT1 +#ifndef convertToWT1 #define convertToWT1 -#define PROCESS_ELEM storedst(convertToDT(srcelem1 CMP_OPERATOR srcelem2 ? (dstT)(255) : (dstT)(0))) +#endif +#define PROCESS_ELEM \ + workT __s1 = srcelem1; \ + workT __s2 = srcelem2; \ + storedst(((__s1 CMP_OPERATOR __s2) ? (dstT)(255) : (dstT)(0))) #elif defined OP_CONVERT_SCALE_ABS #undef EXTRA_PARAMS diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index e6bcf4e789bb..4dd8d150c3cf 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -119,6 +119,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool) int cn; bool use_roi; cv::Scalar val; + cv::Scalar val_in_range; TEST_DECLARE_INPUT_PARAMETER(src1) TEST_DECLARE_INPUT_PARAMETER(src2) @@ -133,16 +134,19 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool) use_roi = GET_PARAM(2); } - virtual void generateTestData() + virtual void generateTestData(bool with_val_in_range = false) { const int type = CV_MAKE_TYPE(depth, cn); + double minV = getMinVal(type); + double maxV = getMaxVal(type); + Size roiSize = randomSize(1, MAX_VALUE); Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0); - randomSubMat(src1, src1_roi, roiSize, src1Border, type, 2, 11); + randomSubMat(src1, src1_roi, roiSize, src1Border, type, 2, 11); // FIXIT: Test with minV, maxV Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0); - randomSubMat(src2, src2_roi, roiSize, src2Border, type, -1540, 1740); + randomSubMat(src2, src2_roi, roiSize, src2Border, type, std::max(-1540., minV), std::min(1740., maxV)); Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0); randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type, 5, 16); @@ -157,6 +161,12 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool) val = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0)); + if (with_val_in_range) + { + val_in_range = cv::Scalar(rng.uniform(minV, maxV), rng.uniform(minV, maxV), + rng.uniform(minV, maxV), rng.uniform(minV, maxV)); + } + UMAT_UPLOAD_INPUT_PARAMETER(src1) UMAT_UPLOAD_INPUT_PARAMETER(src2) UMAT_UPLOAD_INPUT_PARAMETER(mask) @@ -750,12 +760,15 @@ OCL_TEST_P(Bitwise_not, Mat) typedef ArithmTestBase Compare; +static const int cmp_codes[] = { CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE }; +static const char* cmp_strs[] = { "CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE" }; +static const int cmp_num = sizeof(cmp_codes) / sizeof(int); + OCL_TEST_P(Compare, Mat) { - int cmp_codes[] = { CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE }; - int cmp_num = sizeof(cmp_codes) / sizeof(int); - for (int i = 0; i < cmp_num; ++i) + { + SCOPED_TRACE(cmp_strs[i]); for (int j = 0; j < test_loop_times; j++) { generateTestData(); @@ -765,6 +778,41 @@ OCL_TEST_P(Compare, Mat) Near(0); } + } +} + +OCL_TEST_P(Compare, Scalar) +{ + for (int i = 0; i < cmp_num; ++i) + { + SCOPED_TRACE(cmp_strs[i]); + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(true); + + OCL_OFF(cv::compare(src1_roi, val_in_range, dst1_roi, cmp_codes[i])); + OCL_ON(cv::compare(usrc1_roi, val_in_range, udst1_roi, cmp_codes[i])); + + Near(0); + } + } +} + +OCL_TEST_P(Compare, Scalar2) +{ + for (int i = 0; i < cmp_num; ++i) + { + SCOPED_TRACE(cmp_strs[i]); + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(true); + + OCL_OFF(cv::compare(val_in_range, src1_roi, dst1_roi, cmp_codes[i])); + OCL_ON(cv::compare(val_in_range, usrc1_roi, udst1_roi, cmp_codes[i])); + + Near(0); + } + } } //////////////////////////////// Pow ///////////////////////////////////////////////// @@ -783,7 +831,7 @@ OCL_TEST_P(Pow, Mat) OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi)); OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi)); - Near(1); + Near(1); // FIXIT: Relative error check! } }