google · ram-mohan · Dec 13, 2024
diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
@@ -20,7 +20,7 @@ jobs:
        language: c++
        fuzz-seconds: 600
    - name: Upload Crash
-     uses: actions/upload-artifact@v3
+     uses: actions/upload-artifact@v4
      if: failure() && steps.build.outcome == 'success'
      with:
        name: artifacts

diff --git a/examples/ultrahdr_app.cpp b/examples/ultrahdr_app.cpp
@@ -32,23 +32,23 @@
 
 #include "ultrahdr_api.h"
 
-const float BT601YUVtoRGBMatrix[9] = {
-    1.f, 0.f, 1.402f, 1.f, (-0.202008f / 0.587f), (-0.419198f / 0.587f), 1.0f, 1.772f, 0.0f};
+const float DisplayP3YUVtoRGBMatrix[9] = {
+    1.f, 0.f, 1.542f, 1.f, (-0.146023f / 0.6917f), (-0.353118f / 0.6917f), 1.0f, 1.8414f, 0.0f};
 const float BT709YUVtoRGBMatrix[9] = {
     1.f,  0.f,     1.5748f, 1.f, (-0.13397432f / 0.7152f), (-0.33480248f / 0.7152f),
     1.0f, 1.8556f, 0.0f};
 const float BT2020YUVtoRGBMatrix[9] = {
     1.f, 0.f, 1.4746f, 1.f, (-0.11156702f / 0.6780f), (-0.38737742f / 0.6780f), 1.f, 1.8814f, 0.f};
 
-const float BT601RGBtoYUVMatrix[9] = {0.299f,
-                                      0.587f,
-                                      0.114f,
-                                      (-0.299f / 1.772f),
-                                      (-0.587f / 1.772f),
-                                      0.5f,
-                                      0.5f,
-                                      (-0.587f / 1.402f),
-                                      (-0.114f / 1.402f)};
+const float DisplayP3RGBtoYUVMatrix[9] = {0.229f,
+                                          0.6917f,
+                                          0.0793f,
+                                          (-0.229f / 1.8414f),
+                                          (-0.6917f / 1.8414f),
+                                          0.5f,
+                                          0.5f,
+                                          (-0.6917f / 1.542f),
+                                          (-0.0793f / 1.542f)};
 const float BT709RGBtoYUVMatrix[9] = {0.2126f,
                                       0.7152f,
                                       0.0722f,
@@ -892,7 +892,7 @@ bool UltraHdrAppInput::convertP010ToRGBImage() {
   } else if (mHdrCg == UHDR_CG_BT_2100) {
     coeffs = BT2020YUVtoRGBMatrix;
   } else if (mHdrCg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601YUVtoRGBMatrix;
+    coeffs = DisplayP3YUVtoRGBMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mHdrCg << " using BT2020Matrix"
               << std::endl;
@@ -984,15 +984,15 @@ bool UltraHdrAppInput::convertYuv420ToRGBImage() {
   uint8_t* u = static_cast<uint8_t*>(mRawYuv420Image.planes[UHDR_PLANE_U]);
   uint8_t* v = static_cast<uint8_t*>(mRawYuv420Image.planes[UHDR_PLANE_V]);
 
-  const float* coeffs = BT601YUVtoRGBMatrix;
+  const float* coeffs = BT709YUVtoRGBMatrix;
   if (mSdrCg == UHDR_CG_BT_709) {
     coeffs = BT709YUVtoRGBMatrix;
   } else if (mSdrCg == UHDR_CG_BT_2100) {
     coeffs = BT2020YUVtoRGBMatrix;
   } else if (mSdrCg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601YUVtoRGBMatrix;
+    coeffs = DisplayP3YUVtoRGBMatrix;
   } else {
-    std::cerr << "color matrix not present for gamut " << mSdrCg << " using BT601Matrix"
+    std::cerr << "color matrix not present for gamut " << mSdrCg << " using BT709Matrix"
               << std::endl;
   }
   for (size_t i = 0; i < mRawYuv420Image.h; i++) {
@@ -1054,16 +1054,16 @@ bool UltraHdrAppInput::convertRgba8888ToYUV444Image() {
   uint8_t* uData = static_cast<uint8_t*>(mDecodedUhdrYuv444Image.planes[UHDR_PLANE_U]);
   uint8_t* vData = static_cast<uint8_t*>(mDecodedUhdrYuv444Image.planes[UHDR_PLANE_V]);
 
-  const float* coeffs = BT601RGBtoYUVMatrix;
+  const float* coeffs = BT709RGBtoYUVMatrix;
   if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_709) {
     coeffs = BT709RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_2100) {
     coeffs = BT2020RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601RGBtoYUVMatrix;
+    coeffs = DisplayP3RGBtoYUVMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mDecodedUhdrRgbImage.cg
-              << " using BT601Matrix" << std::endl;
+              << " using BT709Matrix" << std::endl;
   }
 
   for (size_t i = 0; i < mDecodedUhdrRgbImage.h; i++) {
@@ -1108,7 +1108,7 @@ bool UltraHdrAppInput::convertRgba1010102ToYUV444Image() {
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_2100) {
     coeffs = BT2020RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601RGBtoYUVMatrix;
+    coeffs = DisplayP3RGBtoYUVMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mDecodedUhdrRgbImage.cg
               << " using BT2020Matrix" << std::endl;

diff --git a/lib/include/ultrahdr/gainmapmath.h b/lib/include/ultrahdr/gainmapmath.h
@@ -347,6 +347,13 @@ Color pqInvOetfLUT(Color e_gamma);
 constexpr int32_t kPqInvOETFPrecision = 12;
 constexpr int32_t kPqInvOETFNumEntries = 1 << kPqInvOETFPrecision;
 
+////////////////////////////////////////////////////////////////////////////////
+// BT.601 transformations
+
+// BT.601 rgb <-> yuv conversion
+Color bt601RgbToYuv(Color e_gamma);
+Color bt601YuvToRgb(Color e_gamma);
+
 // util class to prepare look up tables for oetf/eotf functions
 class LookUpTable {
  public:
@@ -415,20 +422,26 @@ Color bt2100ToP3(Color e);
 
 // convert between yuv encodings
 extern const std::array<float, 9> kYuvBt709ToBt601;
+extern const std::array<float, 9> kYuvBt709ToDisplayP3;
 extern const std::array<float, 9> kYuvBt709ToBt2100;
-extern const std::array<float, 9> kYuvBt601ToBt709;
-extern const std::array<float, 9> kYuvBt601ToBt2100;
-extern const std::array<float, 9> kYuvBt2100ToBt709;
+extern const std::array<float, 9> kYuvDisplayP3ToBt601;
+extern const std::array<float, 9> kYuvDisplayP3ToBt709;
+extern const std::array<float, 9> kYuvDisplayP3ToBt2100;
 extern const std::array<float, 9> kYuvBt2100ToBt601;
+extern const std::array<float, 9> kYuvBt2100ToBt709;
+extern const std::array<float, 9> kYuvBt2100ToDisplayP3;
 
 #if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
 
 extern const int16_t kYuv709To601_coeffs_neon[8];
+extern const int16_t kYuv709ToP3_coeffs_neon[8];
 extern const int16_t kYuv709To2100_coeffs_neon[8];
-extern const int16_t kYuv601To709_coeffs_neon[8];
-extern const int16_t kYuv601To2100_coeffs_neon[8];
-extern const int16_t kYuv2100To709_coeffs_neon[8];
+extern const int16_t kYuvP3To601_coeffs_neon[8];
+extern const int16_t kYuvP3To709_coeffs_neon[8];
+extern const int16_t kYuvP3To2100_coeffs_neon[8];
 extern const int16_t kYuv2100To601_coeffs_neon[8];
+extern const int16_t kYuv2100To709_coeffs_neon[8];
+extern const int16_t kYuv2100ToP3_coeffs_neon[8];
 
 /*
  * The Y values are provided at half the width of U & V values to allow use of the widening
@@ -608,10 +621,11 @@ std::unique_ptr<uhdr_raw_image_ext_t> copy_raw_image(uhdr_raw_image_t* src);
 uhdr_error_info_t copy_raw_image(uhdr_raw_image_t* src, uhdr_raw_image_t* dst);
 
 std::unique_ptr<uhdr_raw_image_ext_t> convert_raw_input_to_ycbcr(
-    uhdr_raw_image_t* src, bool chroma_sampling_enabled = false);
+    uhdr_raw_image_t* src, bool use_bt601 = false, bool chroma_sampling_enabled = false);
 
 #if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
-std::unique_ptr<uhdr_raw_image_ext_t> convert_raw_input_to_ycbcr_neon(uhdr_raw_image_t* src);
+std::unique_ptr<uhdr_raw_image_ext_t> convert_raw_input_to_ycbcr_neon(uhdr_raw_image_t* src,
+                                                                      bool use_bt601 = false);
 #endif
 
 bool floatToSignedFraction(float v, int32_t* numerator, uint32_t* denominator);

diff --git a/lib/include/ultrahdr/ultrahdrcommon.h b/lib/include/ultrahdr/ultrahdrcommon.h
@@ -159,6 +159,8 @@
 
 static const uhdr_error_info_t g_no_error = {UHDR_CODEC_OK, 0, ""};
 
+static const int UHDR_CG_BT_601 = 3; /**< BT.601 */
+
 namespace ultrahdr {
 
 // ===============================================================================================

diff --git a/lib/src/dsp/arm/gainmapmath_neon.cpp b/lib/src/dsp/arm/gainmapmath_neon.cpp
@@ -35,47 +35,68 @@ namespace ultrahdr {
 // {Y1, Y2, U1, U2, V1, V2, 0, 0}
 
 // Yuv Bt709 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.101579f * U) + ( 0.196076f * V)
-// U' = (0.0f * Y) + ( 0.989854f * U) + (-0.110653f * V)
-// V' = (0.0f * Y) + (-0.072453f * U) + ( 0.983398f * V)
+// Y' = (1.0 * Y) + ( 0.101579 * U) + ( 0.196076 * V)
+// U' = (0.0 * Y) + ( 0.989854 * U) + (-0.110653 * V)
+// V' = (0.0 * Y) + (-0.072453 * U) + ( 0.983398 * V)
 ALIGNED(16)
 const int16_t kYuv709To601_coeffs_neon[8] = {1664, 3213, 16218, -1813, -1187, 16112, 0, 0};
 
+// Yuv Bt709 -> Display P3
+// Y' = (1.0 * Y) + ( 0.017545 * U) + ( 0.03677 * V)
+// U' = (0.0 * Y) + ( 0.998169 * U) + (-0.019968 * V)
+// V' = (0.0 * Y) + (-0.011378 * U) + ( 0.997393 * V)
+ALIGNED(16)
+const int16_t kYuv709ToP3_coeffs_neon[8] = {287, 602, 16354, -327, -186, 16341, 0, 0};
+
 // Yuv Bt709 -> Yuv Bt2100
 // Y' = (1.0f * Y) + (-0.016969f * U) + ( 0.096312f * V)
 // U' = (0.0f * Y) + ( 0.995306f * U) + (-0.051192f * V)
 // V' = (0.0f * Y) + ( 0.011507f * U) + ( 1.002637f * V)
 ALIGNED(16)
 const int16_t kYuv709To2100_coeffs_neon[8] = {-278, 1578, 16307, -839, 189, 16427, 0, 0};
 
-// Yuv Bt601 -> Yuv Bt709
-// Y' = (1.0f * Y) + (-0.118188f * U) + (-0.212685f * V),
-// U' = (0.0f * Y) + ( 1.018640f * U) + ( 0.114618f * V),
-// V' = (0.0f * Y) + ( 0.075049f * U) + ( 1.025327f * V);
+// Yuv Display P3 -> Yuv Bt601
+// Y' = (1.0 * Y) + ( 0.086028 * U) + ( 0.161445 * V)
+// U' = (0.0 * Y) + ( 0.990631 * U) + (-0.091109 * V)
+// V' = (0.0 * Y) + (-0.061361 * U) + ( 0.98474 * V)
 ALIGNED(16)
-const int16_t kYuv601To709_coeffs_neon[8] = {-1936, -3485, 16689, 1878, 1230, 16799, 0, 0};
+const int16_t kYuvP3To601_coeffs_neon[8] = {1409, 2645, 16230, -1493, -1005, 16134, 0, 0};
 
-// Yuv Bt601 -> Yuv Bt2100
-// Y' = (1.0f * Y) + (-0.128245f * U) + (-0.115879f * V)
-// U' = (0.0f * Y) + ( 1.010016f * U) + ( 0.061592f * V)
-// V' = (0.0f * Y) + ( 0.086969f * U) + ( 1.029350f * V)
+// Yuv Display P3 -> Yuv Bt709
+// Y' = (1.0 * Y) + (-0.018002 * U) + (-0.037226 * V)
+// U' = (0.0 * Y) + ( 1.002063 * U) + ( 0.020061 * V)
+// V' = (0.0 * Y) + ( 0.011431 * U) + ( 1.002843 * V)
 ALIGNED(16)
-const int16_t kYuv601To2100_coeffs_neon[8] = {-2101, -1899, 16548, 1009, 1425, 16865, 0, 0};
+const int16_t kYuvP3To709_coeffs_neon[8] = {-295, -610, 16418, 329, 187, 16431, 0, 0};
 
-// Yuv Bt2100 -> Yuv Bt709
-// Y' = (1.0f * Y) + ( 0.018149f * U) + (-0.095132f * V)
-// U' = (0.0f * Y) + ( 1.004123f * U) + ( 0.051267f * V)
-// V' = (0.0f * Y) + (-0.011524f * U) + ( 0.996782f * V)
+// Yuv Display P3 -> Yuv Bt2100
+// Y' = (1.0 * Y) + (-0.033905 * U) + ( 0.059019 * V)
+// U' = (0.0 * Y) + ( 0.996774 * U) + ( -0.03137 * V)
+// V' = (0.0 * Y) + ( 0.022992 * U) + ( 1.005718 * V)
 ALIGNED(16)
-const int16_t kYuv2100To709_coeffs_neon[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
+const int16_t kYuvP3To2100_coeffs_neon[8] = {-555, 967, 16331, -514, 377, 16478, 0, 0};
 
 // Yuv Bt2100 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.117887f * U) + ( 0.105521f * V)
-// U' = (0.0f * Y) + ( 0.995211f * U) + (-0.059549f * V)
-// V' = (0.0f * Y) + (-0.084085f * U) + ( 0.976518f * V)
+// Y' = (1.0 * Y) + ( 0.117887 * U) + ( 0.105521 * V)
+// U' = (0.0 * Y) + ( 0.995211 * U) + (-0.059549 * V)
+// V' = (0.0 * Y) + (-0.084085 * U) + ( 0.976518 * V)
 ALIGNED(16)
 const int16_t kYuv2100To601_coeffs_neon[8] = {1931, 1729, 16306, -976, -1378, 15999, 0, 0};
 
+// Yuv Bt2100 -> Yuv Bt709
+// Y' = (1.0 * Y) + ( 0.018149 * U) + (-0.095132 * V)
+// U' = (0.0 * Y) + ( 1.004123 * U) + ( 0.051267 * V)
+// V' = (0.0 * Y) + (-0.011524 * U) + ( 0.996782 * V)
+ALIGNED(16)
+const int16_t kYuv2100To709_coeffs_neon[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
+
+// Yuv Bt2100 -> Yuv Display P3
+// Y' = (1.0 * Y) + ( 0.035343 * U) + ( -0.057581 * V)
+// U' = (0.0 * Y) + ( 1.002515 * U) + ( 0.03127 * V)
+// V' = (0.0 * Y) + (-0.022919 * U) + ( 0.9936 * V)
+ALIGNED(16)
+const int16_t kYuv2100ToP3_coeffs_neon[8] = {579, -943, 16425, 512, -376, 16279, 0, 0};
+
 static inline int16x8_t yConversion_neon(uint8x8_t y, int16x8_t u, int16x8_t v, int16x8_t coeffs) {
   int32x4_t lo = vmull_lane_s16(vget_low_s16(u), vget_low_s16(coeffs), 0);
   int32x4_t hi = vmull_lane_s16(vget_high_s16(u), vget_low_s16(coeffs), 0);
@@ -240,11 +261,14 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
 
   switch (src_encoding) {
     case UHDR_CG_BT_709:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuv709To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
           return status;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv709To601_coeffs_neon;
+          coeffs = kYuv709ToP3_coeffs_neon;
           break;
         case UHDR_CG_BT_2100:
           coeffs = kYuv709To2100_coeffs_neon;
@@ -258,14 +282,17 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
       }
       break;
     case UHDR_CG_DISPLAY_P3:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuvP3To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
-          coeffs = kYuv601To709_coeffs_neon;
+          coeffs = kYuvP3To709_coeffs_neon;
           break;
         case UHDR_CG_DISPLAY_P3:
           return status;
         case UHDR_CG_BT_2100:
-          coeffs = kYuv601To2100_coeffs_neon;
+          coeffs = kYuvP3To2100_coeffs_neon;
           break;
         default:
           status.error_code = UHDR_CODEC_INVALID_PARAM;
@@ -276,12 +303,15 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
       }
       break;
     case UHDR_CG_BT_2100:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuv2100To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
           coeffs = kYuv2100To709_coeffs_neon;
           break;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv2100To601_coeffs_neon;
+          coeffs = kYuv2100ToP3_coeffs_neon;
           break;
         case UHDR_CG_BT_2100:
           return status;
@@ -323,6 +353,13 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
 // In the 3x3 conversion matrix, 0.5 is duplicated. But represented as only one entry in lut leaving
 // with an array size of 8 elements.
 
+// RGB Bt601 -> Yuv Bt601
+// Y = 0.299 * R + 0.587 * G + 0.114 * B
+// U = -0.168735892 * R + -0.331264108 * G + 0.5 * B
+// V = 0.5 * R + -0.418687589 * G + -0.081312411 * B
+ALIGNED(16)
+const uint16_t kRgb601ToYuv_coeffs_neon[8] = {4899, 9617, 1868, 2765, 5427, 8192, 6860, 1332};
+
 // RGB Bt709 -> Yuv Bt709
 // Y = 0.212639 * R + 0.715169 * G + 0.072192 * B
 // U = -0.114592135 * R + -0.385407865 * G + 0.5 * B
@@ -454,12 +491,15 @@ static void ConvertRgba8888ToYuv444_neon(uhdr_raw_image_t* src, uhdr_raw_image_t
   } while (++h < src->h);
 }
 
-std::unique_ptr<uhdr_raw_image_ext_t> convert_raw_input_to_ycbcr_neon(uhdr_raw_image_t* src) {
+std::unique_ptr<uhdr_raw_image_ext_t> convert_raw_input_to_ycbcr_neon(uhdr_raw_image_t* src,
+                                                                      bool use_bt601) {
   if (src->fmt == UHDR_IMG_FMT_32bppRGBA8888) {
     std::unique_ptr<uhdr_raw_image_ext_t> dst = nullptr;
     const uint16_t* coeffs_ptr = nullptr;
 
-    if (src->cg == UHDR_CG_BT_709) {
+    if (use_bt601) {
+      coeffs_ptr = kRgb601ToYuv_coeffs_neon;
+    } else if (src->cg == UHDR_CG_BT_709) {
       coeffs_ptr = kRgb709ToYuv_coeffs_neon;
     } else if (src->cg == UHDR_CG_BT_2100) {
       coeffs_ptr = kRgbDispP3ToYuv_coeffs_neon;