|
| 1 | +/* |
| 2 | + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD |
| 3 | + * |
| 4 | + * SPDX-License-Identifier: Apache-2.0 |
| 5 | + */ |
| 6 | + |
| 7 | +#include "xtensa_context.h" |
| 8 | +#include "lv_macro_memset.S" |
| 9 | + |
| 10 | +// This is LVGL ARGB8888 simple fill for ESP32, ESP32S2 processor |
| 11 | + |
| 12 | + .section .text |
| 13 | + .align 4 |
| 14 | + .global lv_color_blend_to_argb8888_esp |
| 15 | + .type lv_color_blend_to_argb8888_esp,@function |
| 16 | + |
| 17 | +// The function implements the following C code: |
| 18 | +// void lv_color_blend_to_argb8888(_lv_draw_sw_blend_fill_dsc_t * dsc); |
| 19 | + |
| 20 | +// Input params |
| 21 | +// |
| 22 | +// dsc - a2 |
| 23 | + |
| 24 | +// typedef struct { |
| 25 | +// uint32_t opa; l32i 0 |
| 26 | +// void * dst_buf; l32i 4 |
| 27 | +// uint32_t dst_w; l32i 8 |
| 28 | +// uint32_t dst_h; l32i 12 |
| 29 | +// uint32_t dst_stride; l32i 16 |
| 30 | +// const void * src_buf; l32i 20 |
| 31 | +// uint32_t src_stride; l32i 24 |
| 32 | +// const lv_opa_t * mask_buf; l32i 28 |
| 33 | +// uint32_t mask_stride; l32i 32 |
| 34 | +// } asm_dsc_t; |
| 35 | + |
| 36 | +lv_color_blend_to_argb8888_esp: |
| 37 | + |
| 38 | + entry a1, 32 |
| 39 | + |
| 40 | + l32i.n a3, a2, 4 // a3 - dest_buff |
| 41 | + l32i.n a4, a2, 8 // a4 - dest_w in uint32_t |
| 42 | + l32i.n a5, a2, 12 // a5 - dest_h in uint32_t |
| 43 | + l32i.n a6, a2, 16 // a6 - dest_stride in bytes |
| 44 | + l32i.n a7, a2, 20 // a7 - src_buff (color) |
| 45 | + l32i.n a8, a7, 0 // a8 - color as value |
| 46 | + slli a11, a4, 2 // a11 - dest_w_bytes = sizeof(uint32_t) * dest_w |
| 47 | + |
| 48 | + beqz a4, _zero_matrix_len_check // Check if dest_w a4 is zero |
| 49 | + beqz a5, _zero_matrix_len_check // Check if dest_h a5 is zero |
| 50 | + movi a7, 0xff000000 // opacity mask |
| 51 | + or a10, a7, a8 // apply opacity |
| 52 | + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes |
| 53 | + |
| 54 | + // Check dest_w length |
| 55 | + bltui a4, 8, _matrix_width_check // Branch if dest_w (a4) is lower than 8 |
| 56 | + srli a9, a4, 3 // a9 - loop_len = dest_w / 8 |
| 57 | + |
| 58 | +#if !XCHAL_HAVE_LOOPS |
| 59 | + slli a14, a9, 5 // a14 = loop_len (a9) * 32 (main loop increments address pointers by 32) |
| 60 | +#endif |
| 61 | + |
| 62 | + .outer_loop: |
| 63 | + |
| 64 | +#if XCHAL_HAVE_LOOPS |
| 65 | + loopnez a9, ._main_loop // zero-overhead loop (not supported for esp32s2) |
| 66 | +#else |
| 67 | + // Init loop parameters |
| 68 | + beqz a9, ._main_loop // Branch to the end, if a9 is 0 (no need to run the main loop) |
| 69 | + add a15, a14, a3 // a15 = a14 + dest_buf address |
| 70 | + .main_loop_done: |
| 71 | +#endif |
| 72 | + // Run main loop which sets 32 bytes (8 ARGB8888 pixels) in one loop run |
| 73 | + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes |
| 74 | + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 4 bytes |
| 75 | + s32i.n a10, a3, 8 // save 32 bits from a10 to dest_buff a3, offset 8 bytes |
| 76 | + s32i.n a10, a3, 12 // save 32 bits from a10 to dest_buff a3, offset 12 bytes |
| 77 | + s32i.n a10, a3, 16 // save 32 bits from a10 to dest_buff a3, offset 16 bytes |
| 78 | + s32i.n a10, a3, 20 // save 32 bits from a10 to dest_buff a3, offset 20 bytes |
| 79 | + s32i.n a10, a3, 24 // save 32 bits from a10 to dest_buff a3, offset 24 bytes |
| 80 | + s32i.n a10, a3, 28 // save 32 bits from a10 to dest_buff a3, offset 28 bytes |
| 81 | + addi.n a3, a3, 32 // increment dest_buff a3 pointer by 32 bytes |
| 82 | +#if !XCHAL_HAVE_LOOPS |
| 83 | + blt a3, a15, .main_loop_done // Check end of the main loop, branch if dest_buf (a3) lower than a15 |
| 84 | +#endif |
| 85 | + ._main_loop: |
| 86 | + |
| 87 | + // Finish the remaining bytes out of the loop |
| 88 | + |
| 89 | + // Check modulo 16 of the dest_w_bytes (a11), if - then set 16 bytes (4 ARGB8888 pixels) |
| 90 | + // src_reg a10, dest_buff a3, dest_w_bytes a11 |
| 91 | + macro_memset_mod_16 a10, a3, a11, __LINE__ |
| 92 | + |
| 93 | + // Check modulo 8 of the dest_w_bytes (a11), if - then set 8 bytes (2 ARGB8888 pixels) |
| 94 | + // src_reg a10, dest_buff a3, dest_w_bytes a11 |
| 95 | + macro_memset_mod_8 a10, a3, a11, __LINE__ |
| 96 | + |
| 97 | + // Check modulo 4 of the dest_w_bytes (a11), if - then set 4 bytes (1 ARGB8888 pixel) |
| 98 | + // src_reg a10, dest_buff a3, dest_w_bytes a11 |
| 99 | + macro_memset_mod_4 a10, a3, a11, __LINE__ |
| 100 | + |
| 101 | + add a3, a3, a6 // dest_buff + dest_stride |
| 102 | + addi.n a5, a5, -1 // decrease the outer loop |
| 103 | + bnez a5, .outer_loop |
| 104 | + |
| 105 | + movi.n a2, 1 // return LV_RESULT_OK = 1 |
| 106 | + retw.n // return |
| 107 | + |
| 108 | +//********************************************************************************************************************** |
| 109 | + |
| 110 | + // Small matrix width, keep it simple for lengths less than 8 pixels |
| 111 | + |
| 112 | + _matrix_width_check: |
| 113 | + |
| 114 | +#if !XCHAL_HAVE_LOOPS |
| 115 | + slli a14, a4, 2 // a14 = loop_len (a9) * 4 (main loop increments address pointers by 4) |
| 116 | +#endif |
| 117 | + |
| 118 | + .outer_loop_short_matrix: |
| 119 | + |
| 120 | +#if XCHAL_HAVE_LOOPS |
| 121 | + loopnez a4, ._main_loop_short_matrix // zero-overhead loop (not supported for esp32s2) |
| 122 | +#else |
| 123 | + // Init loop parameters |
| 124 | + add a15, a14, a3 // a15 = a14 + dest_buf address |
| 125 | + ._main_loop_short_matrix_done: |
| 126 | +#endif |
| 127 | + // Run main loop which sets 4 bytes (one ARGB8888 pixel) in one loop run |
| 128 | + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3 |
| 129 | + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes |
| 130 | +#if !XCHAL_HAVE_LOOPS |
| 131 | + blt a3, a15, ._main_loop_short_matrix_done // Check end of the main loop, branch if dest_buf (a3) lower than a15 |
| 132 | +#endif |
| 133 | + ._main_loop_short_matrix: |
| 134 | + |
| 135 | + add a3, a3, a6 // dest_buff + dest_stride |
| 136 | + addi.n a5, a5, -1 // decrease the outer loop |
| 137 | + bnez a5, .outer_loop_short_matrix |
| 138 | + |
| 139 | + movi.n a2, 1 // return LV_RESULT_OK = 1 |
| 140 | + retw.n // return |
| 141 | + |
| 142 | +//********************************************************************************************************************** |
| 143 | + |
| 144 | + // One of the matrix dimensions is zero, return early |
| 145 | + _zero_matrix_len_check: |
| 146 | + movi.n a2, 1 // return LV_RESULT_OK = 1 |
| 147 | + retw.n // return |
0 commit comments