Skip to content

Commit 2509f96

Browse files
committed
Warper: generalize previous commit to UInt16
1 parent 1c5796f commit 2509f96

File tree

2 files changed

+64
-38
lines changed

2 files changed

+64
-38
lines changed

alg/gdalwarpkernel.cpp

+36-37
Original file line numberDiff line numberDiff line change
@@ -2996,7 +2996,6 @@ static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
29962996
return _mm_cvtepi32_ps(xmm_i);
29972997
}
29982998

2999-
#if defined(USE_SSE_CUBIC_IMPL)
30002999
static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
30013000
{
30023001
GUInt64 i;
@@ -3011,7 +3010,6 @@ static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
30113010
#endif
30123011
return _mm_cvtepi32_ps(xmm_i);
30133012
}
3014-
#endif
30153013

30163014
/************************************************************************/
30173015
/* XMMHorizontalAdd() */
@@ -5857,7 +5855,7 @@ static CPLErr GWKRealCase(GDALWarpKernel *poWK)
58575855
}
58585856

58595857
/************************************************************************/
5860-
/* GWKCubicResampleNoMasks4ByteMultiBand() */
5858+
/* GWKCubicResampleNoMasks4MultiBandT() */
58615859
/************************************************************************/
58625860

58635861
/* We restrict to 64bit processors because they are guaranteed to have SSE2 */
@@ -5878,9 +5876,10 @@ static inline float Convolute4x4(const __m128 row0, const __m128 row1,
58785876
_mm_mul_ps(_mm_mul_ps(row3, weightsX), weightsY3)));
58795877
}
58805878

5881-
static void GWKCubicResampleNoMasks4ByteMultiBand(const GDALWarpKernel *poWK,
5882-
double dfSrcX, double dfSrcY,
5883-
const GPtrDiff_t iDstOffset)
5879+
template <class T>
5880+
static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
5881+
double dfSrcX, double dfSrcY,
5882+
const GPtrDiff_t iDstOffset)
58845883
{
58855884
const double dfSrcXShifted = dfSrcX - 0.5;
58865885
const int iSrcX = static_cast<int>(dfSrcXShifted);
@@ -5895,10 +5894,10 @@ static void GWKCubicResampleNoMasks4ByteMultiBand(const GDALWarpKernel *poWK,
58955894
{
58965895
for (int iBand = 0; iBand < poWK->nBands; iBand++)
58975896
{
5898-
GByte value;
5897+
T value;
58995898
GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
59005899
&value);
5901-
reinterpret_cast<GByte *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5900+
reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
59025901
value;
59035902
}
59045903
}
@@ -5923,26 +5922,25 @@ static void GWKCubicResampleNoMasks4ByteMultiBand(const GDALWarpKernel *poWK,
59235922
// Process 2 bands at a time
59245923
for (; iBand + 1 < poWK->nBands; iBand += 2)
59255924
{
5926-
const GByte *CPL_RESTRICT pabyBand0 =
5927-
reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
5928-
const auto row0_0 = XMMLoad4Values(pabyBand0 + iOffset);
5925+
const T *CPL_RESTRICT pBand0 =
5926+
reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5927+
const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
59295928
const auto row1_0 =
5930-
XMMLoad4Values(pabyBand0 + iOffset + poWK->nSrcXSize);
5929+
XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
59315930
const auto row2_0 =
5932-
XMMLoad4Values(pabyBand0 + iOffset + 2 * poWK->nSrcXSize);
5931+
XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
59335932
const auto row3_0 =
5934-
XMMLoad4Values(pabyBand0 + iOffset + 3 * poWK->nSrcXSize);
5933+
XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
59355934

5936-
const GByte *CPL_RESTRICT pabyBand1 =
5937-
reinterpret_cast<const GByte *>(
5938-
poWK->papabySrcImage[iBand + 1]);
5939-
const auto row0_1 = XMMLoad4Values(pabyBand1 + iOffset);
5935+
const T *CPL_RESTRICT pBand1 =
5936+
reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
5937+
const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
59405938
const auto row1_1 =
5941-
XMMLoad4Values(pabyBand1 + iOffset + poWK->nSrcXSize);
5939+
XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
59425940
const auto row2_1 =
5943-
XMMLoad4Values(pabyBand1 + iOffset + 2 * poWK->nSrcXSize);
5941+
XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
59445942
const auto row3_1 =
5945-
XMMLoad4Values(pabyBand1 + iOffset + 3 * poWK->nSrcXSize);
5943+
XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
59465944

59475945
const float fValue_0 =
59485946
Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsX,
@@ -5952,32 +5950,32 @@ static void GWKCubicResampleNoMasks4ByteMultiBand(const GDALWarpKernel *poWK,
59525950
Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsX,
59535951
weightsY0, weightsY1, weightsY2, weightsY3);
59545952

5955-
GByte *CPL_RESTRICT pabyDstBand0 =
5956-
reinterpret_cast<GByte *>(poWK->papabyDstImage[iBand]);
5957-
pabyDstBand0[iDstOffset] = GWKClampValueT<GByte>(fValue_0);
5953+
T *CPL_RESTRICT pDstBand0 =
5954+
reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5955+
pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
59585956

5959-
GByte *CPL_RESTRICT pabyDstBand1 =
5960-
reinterpret_cast<GByte *>(poWK->papabyDstImage[iBand + 1]);
5961-
pabyDstBand1[iDstOffset] = GWKClampValueT<GByte>(fValue_1);
5957+
T *CPL_RESTRICT pDstBand1 =
5958+
reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
5959+
pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
59625960
}
59635961
if (iBand < poWK->nBands)
59645962
{
5965-
const GByte *pabyBand0 =
5966-
reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
5967-
const auto row0 = XMMLoad4Values(pabyBand0 + iOffset);
5963+
const T *pBand0 =
5964+
reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5965+
const auto row0 = XMMLoad4Values(pBand0 + iOffset);
59685966
const auto row1 =
5969-
XMMLoad4Values(pabyBand0 + iOffset + poWK->nSrcXSize);
5967+
XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
59705968
const auto row2 =
5971-
XMMLoad4Values(pabyBand0 + iOffset + 2 * poWK->nSrcXSize);
5969+
XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
59725970
const auto row3 =
5973-
XMMLoad4Values(pabyBand0 + iOffset + 3 * poWK->nSrcXSize);
5971+
XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
59745972

59755973
const float fValue =
59765974
Convolute4x4(row0, row1, row2, row3, weightsX, weightsY0,
59775975
weightsY1, weightsY2, weightsY3);
59785976

5979-
reinterpret_cast<GByte *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5980-
GWKClampValueT<GByte>(fValue);
5977+
reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5978+
GWKClampValueT<T>(fValue);
59815979
}
59825980
}
59835981

@@ -6093,11 +6091,12 @@ static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
60936091

60946092
#if defined(__x86_64) || defined(_M_X64)
60956093
if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6096-
std::is_same<T, GByte>::value)
6094+
(std::is_same<T, GByte>::value ||
6095+
std::is_same<T, GUInt16>::value))
60976096
{
60986097
if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
60996098
{
6100-
GWKCubicResampleNoMasks4ByteMultiBand(
6099+
GWKCubicResampleNoMasks4MultiBandT<T>(
61016100
poWK, padfX[iDstX] - poWK->nSrcXOff,
61026101
padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
61036102

autotest/utilities/test_gdalwarp_lib.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -4333,7 +4333,7 @@ def test_gdalwarp_lib_src_is_geog_arc_second():
43334333

43344334

43354335
###############################################################################
4336-
# Test GWKCubicResampleNoMasks4ByteMultiBand()
4336+
# Test GWKCubicResampleNoMasks4MultiBandT<Byte>()
43374337

43384338

43394339
def test_gdalwarp_lib_cubic_multiband_byte_4sample_optim():
@@ -4380,3 +4380,30 @@ def test_gdalwarp_lib_cubic_multiband_byte_4sample_optim():
43804380
assert out_ds.RasterXSize == 400
43814381
assert out_ds.RasterYSize == 200
43824382
assert out_ds.ReadRaster() == src_ds.ReadRaster()
4383+
4384+
4385+
###############################################################################
4386+
# Test GWKCubicResampleNoMasks4MultiBandT<GUInt16>()
4387+
4388+
4389+
def test_gdalwarp_lib_cubic_multiband_uint16_4sample_optim():
4390+
4391+
src_ds = gdal.Open("../gdrivers/data/small_world.tif")
4392+
src_ds = gdal.Translate(
4393+
"", src_ds, options="-f MEM -ot UInt16 -scale 0 255 0 65535"
4394+
)
4395+
4396+
# RGB only
4397+
out_ds = gdal.Warp(
4398+
"",
4399+
src_ds,
4400+
options="-f MEM -tr 0.9 0.9 -te -10 40.1 8.9 59 -r cubic",
4401+
)
4402+
out_ds = gdal.Translate("", out_ds, options="-f MEM -ot Byte -scale 0 65535 0 255")
4403+
assert out_ds.RasterXSize == 21
4404+
assert out_ds.RasterYSize == 21
4405+
assert [out_ds.GetRasterBand(i + 1).Checksum() for i in range(3)] == [
4406+
4785,
4407+
4689,
4408+
5007,
4409+
]

0 commit comments

Comments
 (0)