Skip to content

Commit ee3dfa2

Browse files
feat(lvgl_port_simd): Support for esp32s2, assembly rendering
- renamed assembly src files to xtensa_pie, xtensa_base - xtensa_base src files use zero-overhead loops only for esp32 - added zero length matrix check into all src files
1 parent 23d3a43 commit ee3dfa2

19 files changed

+549
-200
lines changed

components/esp_lvgl_port/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,14 @@ if("usb_host_hid" IN_LIST build_components)
7676
list(APPEND ADD_LIBS idf::usb_host_hid)
7777
endif()
7878

79-
# Include SIMD assembly source code for rendering, only for (9.1.0 <= LVG_version < 9.2.0) and only for esp32 and esp32s3
79+
# Include SIMD assembly source code for rendering, only for (9.1.0 <= LVG_version < 9.2.0) and only for Xtensa targets (esp32, esp32s2, esp32s3)
8080
if((lvgl_ver VERSION_GREATER_EQUAL "9.1.0") AND (lvgl_ver VERSION_LESS "9.2.0"))
81-
if(CONFIG_IDF_TARGET_ESP32 OR CONFIG_IDF_TARGET_ESP32S3)
81+
if(CONFIG_IDF_TARGET_ESP32 OR CONFIG_IDF_TARGET_ESP32S3 OR CONFIG_IDF_TARGET_ESP32S2)
8282
message(VERBOSE "Compiling SIMD")
8383
if(CONFIG_IDF_TARGET_ESP32S3)
84-
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32s3.S) # Select only esp32s3 related files
84+
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_xtensa_pie.S) # Select Xtensa PIE, for esp32s3 target
8585
else()
86-
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files
86+
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_xtensa_base.S) # Select Xtensa Base for esp32, esp32s2 targets
8787
endif()
8888

8989
# Explicitly add all assembly macro files

components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32.S

Lines changed: 0 additions & 81 deletions
This file was deleted.
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
#include "xtensa_context.h"
8+
#include "lv_macro_memset.S"
9+
10+
// This is LVGL ARGB8888 simple fill for ESP32, ESP32S2 processor
11+
12+
.section .text
13+
.align 4
14+
.global lv_color_blend_to_argb8888_esp
15+
.type lv_color_blend_to_argb8888_esp,@function
16+
17+
// The function implements the following C code:
18+
// void lv_color_blend_to_argb8888(_lv_draw_sw_blend_fill_dsc_t * dsc);
19+
20+
// Input params
21+
//
22+
// dsc - a2
23+
24+
// typedef struct {
25+
// uint32_t opa; l32i 0
26+
// void * dst_buf; l32i 4
27+
// uint32_t dst_w; l32i 8
28+
// uint32_t dst_h; l32i 12
29+
// uint32_t dst_stride; l32i 16
30+
// const void * src_buf; l32i 20
31+
// uint32_t src_stride; l32i 24
32+
// const lv_opa_t * mask_buf; l32i 28
33+
// uint32_t mask_stride; l32i 32
34+
// } asm_dsc_t;
35+
36+
lv_color_blend_to_argb8888_esp:
37+
38+
entry a1, 32
39+
40+
l32i.n a3, a2, 4 // a3 - dest_buff
41+
l32i.n a4, a2, 8 // a4 - dest_w in uint32_t
42+
l32i.n a5, a2, 12 // a5 - dest_h in uint32_t
43+
l32i.n a6, a2, 16 // a6 - dest_stride in bytes
44+
l32i.n a7, a2, 20 // a7 - src_buff (color)
45+
l32i.n a8, a7, 0 // a8 - color as value
46+
slli a11, a4, 2 // a11 - dest_w_bytes = sizeof(uint32_t) * dest_w
47+
48+
beqz a4, _zero_matrix_len_check // Check if dest_w a4 is zero
49+
beqz a5, _zero_matrix_len_check // Check if dest_h a5 is zero
50+
movi a7, 0xff000000 // opacity mask
51+
or a10, a7, a8 // apply opacity
52+
sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes
53+
54+
// Check dest_w length
55+
bltui a4, 8, _matrix_width_check // Branch if dest_w (a4) is lower than 8
56+
srli a9, a4, 3 // a9 - loop_len = dest_w / 8
57+
58+
#if !XCHAL_HAVE_LOOPS
59+
slli a14, a9, 5 // a14 = loop_len (a9) * 32 (main loop increments address pointers by 32)
60+
#endif
61+
62+
.outer_loop:
63+
64+
#if XCHAL_HAVE_LOOPS
65+
loopnez a9, ._main_loop // zero-overhead loop (not supported for esp32s2)
66+
#else
67+
// Init loop parameters
68+
beqz a9, ._main_loop // Branch to the end, if a9 is 0 (no need to run the main loop)
69+
add a15, a14, a3 // a15 = a14 + dest_buf address
70+
.main_loop_done:
71+
#endif
72+
// Run main loop which sets 32 bytes (8 ARGB8888 pixels) in one loop run
73+
s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes
74+
s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 4 bytes
75+
s32i.n a10, a3, 8 // save 32 bits from a10 to dest_buff a3, offset 8 bytes
76+
s32i.n a10, a3, 12 // save 32 bits from a10 to dest_buff a3, offset 12 bytes
77+
s32i.n a10, a3, 16 // save 32 bits from a10 to dest_buff a3, offset 16 bytes
78+
s32i.n a10, a3, 20 // save 32 bits from a10 to dest_buff a3, offset 20 bytes
79+
s32i.n a10, a3, 24 // save 32 bits from a10 to dest_buff a3, offset 24 bytes
80+
s32i.n a10, a3, 28 // save 32 bits from a10 to dest_buff a3, offset 28 bytes
81+
addi.n a3, a3, 32 // increment dest_buff a3 pointer by 32 bytes
82+
#if !XCHAL_HAVE_LOOPS
83+
blt a3, a15, .main_loop_done // Check end of the main loop, branch if dest_buf (a3) lower than a15
84+
#endif
85+
._main_loop:
86+
87+
// Finish the remaining bytes out of the loop
88+
89+
// Check modulo 16 of the dest_w_bytes (a11), if - then set 16 bytes (4 ARGB8888 pixels)
90+
// src_reg a10, dest_buff a3, dest_w_bytes a11
91+
macro_memset_mod_16 a10, a3, a11, __LINE__
92+
93+
// Check modulo 8 of the dest_w_bytes (a11), if - then set 8 bytes (2 ARGB8888 pixels)
94+
// src_reg a10, dest_buff a3, dest_w_bytes a11
95+
macro_memset_mod_8 a10, a3, a11, __LINE__
96+
97+
// Check modulo 4 of the dest_w_bytes (a11), if - then set 4 bytes (1 ARGB8888 pixel)
98+
// src_reg a10, dest_buff a3, dest_w_bytes a11
99+
macro_memset_mod_4 a10, a3, a11, __LINE__
100+
101+
add a3, a3, a6 // dest_buff + dest_stride
102+
addi.n a5, a5, -1 // decrease the outer loop
103+
bnez a5, .outer_loop
104+
105+
movi.n a2, 1 // return LV_RESULT_OK = 1
106+
retw.n // return
107+
108+
//**********************************************************************************************************************
109+
110+
// Small matrix width, keep it simple for lengths less than 8 pixels
111+
112+
_matrix_width_check:
113+
114+
#if !XCHAL_HAVE_LOOPS
115+
slli a14, a4, 2 // a14 = loop_len (a9) * 4 (main loop increments address pointers by 4)
116+
#endif
117+
118+
.outer_loop_short_matrix:
119+
120+
#if XCHAL_HAVE_LOOPS
121+
loopnez a4, ._main_loop_short_matrix // zero-overhead loop (not supported for esp32s2)
122+
#else
123+
// Init loop parameters
124+
add a15, a14, a3 // a15 = a14 + dest_buf address
125+
._main_loop_short_matrix_done:
126+
#endif
127+
// Run main loop which sets 4 bytes (one ARGB8888 pixel) in one loop run
128+
s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3
129+
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
130+
#if !XCHAL_HAVE_LOOPS
131+
blt a3, a15, ._main_loop_short_matrix_done // Check end of the main loop, branch if dest_buf (a3) lower than a15
132+
#endif
133+
._main_loop_short_matrix:
134+
135+
add a3, a3, a6 // dest_buff + dest_stride
136+
addi.n a5, a5, -1 // decrease the outer loop
137+
bnez a5, .outer_loop_short_matrix
138+
139+
movi.n a2, 1 // return LV_RESULT_OK = 1
140+
retw.n // return
141+
142+
//**********************************************************************************************************************
143+
144+
// One of the matrix dimensions is zero, return early
145+
_zero_matrix_len_check:
146+
movi.n a2, 1 // return LV_RESULT_OK = 1
147+
retw.n // return

components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32s3.S renamed to components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_xtensa_pie.S

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
2+
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
33
*
44
* SPDX-License-Identifier: Apache-2.0
55
*/
@@ -42,11 +42,13 @@ lv_color_blend_to_argb8888_esp:
4242
l32i.n a8, a7, 0 // a8 - color as value
4343
slli a11, a4, 2 // a11 - dest_w_bytes = sizeof(uint32_t) * dest_w
4444

45-
movi a7, 0xff000000 // oppactiy mask
46-
or a10, a7, a8 // apply oppacity
45+
beqz a4, _zero_matrix_len_check // Check if dest_w a4 is zero
46+
beqz a5, _zero_matrix_len_check // Check if dest_h a5 is zero
47+
movi a7, 0xff000000 // opacity mask
48+
or a10, a7, a8 // apply opacity
4749

4850
// Check for short lengths
49-
// dest_w should be at least 8, othewise it's not worth using esp32s3 TIE
51+
// dest_w should be at least 8, otherwise it's not worth using esp32s3 TIE
5052
bgei a4, 8, _esp32s3_implementation // Branch if dest_w is greater than or equal to 8
5153
j .lv_color_blend_to_argb8888_esp32_body // Jump to esp32 implementation
5254

@@ -227,7 +229,7 @@ lv_color_blend_to_argb8888_esp:
227229
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
228230
_dest_buff_aligned_by_1byte:
229231

230-
// Shift q reg, allowing to set 16-byte unaligned adata
232+
// Shift q reg, allowing to set 16-byte unaligned data
231233
wur.sar_byte a15 // apply unalignment to the SAR_BYTE
232234
ee.src.q q2, q0, q1 // shift concat. of q0 and q1 to q2 by SAR_BYTE amount
233235

@@ -323,3 +325,10 @@ lv_color_blend_to_argb8888_esp:
323325

324326
movi.n a2, 1 // return LV_RESULT_OK = 1
325327
retw.n // return
328+
329+
//**********************************************************************************************************************
330+
331+
// One of the matrix dimensions is zero, return early
332+
_zero_matrix_len_check:
333+
movi.n a2, 1 // return LV_RESULT_OK = 1
334+
retw.n // return

0 commit comments

Comments
 (0)