Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ public void AddInPlace(float value)
}

/// <summary>
/// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
/// Quantize input block, transpose, apply zig-zag ordering and store as <see cref="Block8x8"/>.
/// </summary>
/// <param name="block">Source block.</param>
/// <param name="dest">Destination block.</param>
Expand All @@ -291,19 +291,19 @@ public static void Quantize(ref Block8x8F block, ref Block8x8 dest, ref Block8x8
if (Avx2.IsSupported)
{
MultiplyIntoInt16_Avx2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingAvx2(ref dest);
ZigZag.ApplyTransposingZigZagOrderingAvx2(ref dest);
}
else if (Ssse3.IsSupported)
{
MultiplyIntoInt16_Sse2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingSsse3(ref dest);
ZigZag.ApplyTransposingZigZagOrderingSsse3(ref dest);
}
else
#endif
{
for (int i = 0; i < Size; i++)
{
int idx = ZigZag.ZigZagOrder[i];
int idx = ZigZag.TransposingOrder[i];
float quantizedVal = block[idx] * qt[idx];
quantizedVal += quantizedVal < 0 ? -0.5f : 0.5f;
dest[i] = (short)quantizedVal;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ private static void FDCT8x8_Avx(ref Block8x8F block)
{
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");

// First pass - process rows
block.TransposeInplace();
// First pass - process columns
FDCT8x8_1D_Avx(ref block);

// Second pass - process columns
// Second pass - process rows
block.TransposeInplace();
FDCT8x8_1D_Avx(ref block);

Expand Down
133 changes: 7 additions & 126 deletions src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ public static void AdjustToFDCT(ref Block8x8F quantTable)
tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i));
tableRef = ref Unsafe.Add(ref tableRef, 1);
}

// Spectral macroblocks are not transposed before quantization
// Transpose is done after quantization at zig-zag stage
// so we must transpose quantization table
quantTable.TransposeInplace();
}

/// <summary>
Expand Down Expand Up @@ -133,14 +138,9 @@ public static void TransformFDCT(ref Block8x8F block)
}
else
#endif
if (Vector.IsHardwareAccelerated)
{
FDCT_Vector4(ref block);
}
else
{
FDCT_Scalar(ref block);
}
}

/// <summary>
Expand Down Expand Up @@ -217,136 +217,17 @@ static void IDCT8x4_Vector4(ref Vector4 vecRef)
}
}

/// <summary>
/// Apply 2D floating point FDCT inplace using scalar operations.
/// </summary>
/// <remarks>
/// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c.
/// </remarks>
/// <param name="block">Input block.</param>
private static void FDCT_Scalar(ref Block8x8F block)
{
const int dctSize = 8;

float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
float tmp10, tmp11, tmp12, tmp13;
float z1, z2, z3, z4, z5, z11, z13;

// First pass - process rows
ref float blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--)
{
tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 7);
tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 7);
tmp1 = Unsafe.Add(ref blockRef, 1) + Unsafe.Add(ref blockRef, 6);
tmp6 = Unsafe.Add(ref blockRef, 1) - Unsafe.Add(ref blockRef, 6);
tmp2 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 5);
tmp5 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 5);
tmp3 = Unsafe.Add(ref blockRef, 3) + Unsafe.Add(ref blockRef, 4);
tmp4 = Unsafe.Add(ref blockRef, 3) - Unsafe.Add(ref blockRef, 4);

// Even part
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;

Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref blockRef, 4) = tmp10 - tmp11;

z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref blockRef, 2) = tmp13 + z1;
Unsafe.Add(ref blockRef, 6) = tmp13 - z1;

// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;

z5 = (tmp10 - tmp12) * 0.382683433f;
z2 = (0.541196100f * tmp10) + z5;
z4 = (1.306562965f * tmp12) + z5;
z3 = tmp11 * 0.707106781f;

z11 = tmp7 + z3;
z13 = tmp7 - z3;

Unsafe.Add(ref blockRef, 5) = z13 + z2;
Unsafe.Add(ref blockRef, 3) = z13 - z2;
Unsafe.Add(ref blockRef, 1) = z11 + z4;
Unsafe.Add(ref blockRef, 7) = z11 - z4;

blockRef = ref Unsafe.Add(ref blockRef, dctSize);
}

// Second pass - process columns
blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--)
{
tmp0 = Unsafe.Add(ref blockRef, dctSize * 0) + Unsafe.Add(ref blockRef, dctSize * 7);
tmp7 = Unsafe.Add(ref blockRef, dctSize * 0) - Unsafe.Add(ref blockRef, dctSize * 7);
tmp1 = Unsafe.Add(ref blockRef, dctSize * 1) + Unsafe.Add(ref blockRef, dctSize * 6);
tmp6 = Unsafe.Add(ref blockRef, dctSize * 1) - Unsafe.Add(ref blockRef, dctSize * 6);
tmp2 = Unsafe.Add(ref blockRef, dctSize * 2) + Unsafe.Add(ref blockRef, dctSize * 5);
tmp5 = Unsafe.Add(ref blockRef, dctSize * 2) - Unsafe.Add(ref blockRef, dctSize * 5);
tmp3 = Unsafe.Add(ref blockRef, dctSize * 3) + Unsafe.Add(ref blockRef, dctSize * 4);
tmp4 = Unsafe.Add(ref blockRef, dctSize * 3) - Unsafe.Add(ref blockRef, dctSize * 4);

// Even part
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;

Unsafe.Add(ref blockRef, dctSize * 0) = tmp10 + tmp11;
Unsafe.Add(ref blockRef, dctSize * 4) = tmp10 - tmp11;

z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref blockRef, dctSize * 2) = tmp13 + z1;
Unsafe.Add(ref blockRef, dctSize * 6) = tmp13 - z1;

// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;

z5 = (tmp10 - tmp12) * 0.382683433f;
z2 = (0.541196100f * tmp10) + z5;
z4 = (1.306562965f * tmp12) + z5;
z3 = tmp11 * 0.707106781f;

z11 = tmp7 + z3;
z13 = tmp7 - z3;

Unsafe.Add(ref blockRef, dctSize * 5) = z13 + z2;
Unsafe.Add(ref blockRef, dctSize * 3) = z13 - z2;
Unsafe.Add(ref blockRef, dctSize * 1) = z11 + z4;
Unsafe.Add(ref blockRef, dctSize * 7) = z11 - z4;

blockRef = ref Unsafe.Add(ref blockRef, 1);
}
}

/// <summary>
/// Apply floating point FDCT inplace using <see cref="Vector4"/> API.
/// </summary>
/// <remarks>
/// This implementation must be called only if hardware supports 4
/// floating point numbers vector. Otherwise explicit scalar
/// implementation <see cref="FDCT_Scalar"/> is faster
/// because it does not rely on block transposition.
/// </remarks>
/// <param name="block">Input block.</param>
public static void FDCT_Vector4(ref Block8x8F block)
{
DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");

// First pass - process rows
block.TransposeInplace();
// First pass - process columns
FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R);

// Second pass - process columns
// Second pass - process rows
block.TransposeInplace();
FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R);
Expand Down
Loading