espressif
diff --git a/‎.gitignore
Lines changed: 4 additions & 0 deletions b/‎.gitignore
Lines changed: 4 additions & 0 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 31 additions & 14 deletions b/‎CMakeLists.txt
Lines changed: 31 additions & 14 deletions
diff --git a/‎component.mk
Lines changed: 16 additions & 2 deletions b/‎component.mk
Lines changed: 16 additions & 2 deletions
diff --git a/‎docs/Doxyfile
Lines changed: 1 addition & 1 deletion b/‎docs/Doxyfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎idf_component.yml
Lines changed: 1 addition & 1 deletion b/‎idf_component.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎modules/common/include/esp_dsp.h
Lines changed: 2 additions & 2 deletions b/‎modules/common/include/esp_dsp.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎modules/fir/fixed/dsps_fird_s16_ansi.c
Lines changed: 1 addition & 2 deletions b/‎modules/fir/fixed/dsps_fird_s16_ansi.c
Lines changed: 1 addition & 2 deletions
diff --git a/‎modules/kalman/ekf_imu13states/ekf_imu13states.cpp
Lines changed: 1 addition & 1 deletion b/‎modules/kalman/ekf_imu13states/ekf_imu13states.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎modules/kalman/ekf_imu13states/test/test_ekf_imu13states.cpp
Lines changed: 3 additions & 3 deletions b/‎modules/kalman/ekf_imu13states/test/test_ekf_imu13states.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎modules/math/mulc/include/dsps_mulc.h
Lines changed: 0 additions & 1 deletion b/‎modules/math/mulc/include/dsps_mulc.h
Lines changed: 0 additions & 1 deletion
diff --git a/‎modules/matrix/add/float/dspm_add_f32_ae32.S
Lines changed: 63 additions & 0 deletions b/‎modules/matrix/add/float/dspm_add_f32_ae32.S
Lines changed: 63 additions & 0 deletions
diff --git a/‎modules/matrix/add/float/dspm_add_f32_ansi.c
Lines changed: 42 additions & 0 deletions b/‎modules/matrix/add/float/dspm_add_f32_ansi.c
Lines changed: 42 additions & 0 deletions
diff --git a/‎modules/matrix/add/include/dspm_add.h
Lines changed: 65 additions & 0 deletions b/‎modules/matrix/add/include/dspm_add.h
Lines changed: 65 additions & 0 deletions
diff --git a/‎modules/matrix/add/include/dspm_add_platform.h
Lines changed: 20 additions & 0 deletions b/‎modules/matrix/add/include/dspm_add_platform.h
Lines changed: 20 additions & 0 deletions
@@ -32,6 +32,7 @@ exmaples/**/managed_components
 test_app/build
 test_app/sdkconfig
 test_app/sdkconfig.old
+test_app/dependencies.lock
 
 # Doc build artifacts
 docs/_build/
@@ -54,3 +55,6 @@ coverage_report/
 
 # VS Code Settings
 .vscode/
+
+# incorrect build locations
+build/
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Application for Azure IoT board
 - FIR f32 decimation filter optimized for esp32s3
 - Add Esp32-LyraT board application 
+- New methods for Mat class to support sub-matrix operations
 
 ### Fixed
 - Add Bi-Quad for esp32s3 into common CMakeLists.txt 
 
@@ -31,21 +31,32 @@ set(srcs            "modules/common/misc/dsps_pwroftwo.cpp"
                     "modules/dotprod/fixed/dspi_dotprod_u8_aes3.S"
                     "modules/dotprod/fixed/dspi_dotprod_off_u8_aes3.S"
                     "modules/dotprod/fixed/dspi_dotprod_off_s8_aes3.S"
-                    
 
-                    "modules/matrix/float/dspm_mult_3x3x1_f32_ae32.S"
-                    "modules/matrix/float/dspm_mult_3x3x3_f32_ae32.S"
-                    "modules/matrix/float/dspm_mult_4x4x1_f32_ae32.S"
-                    "modules/matrix/float/dspm_mult_4x4x4_f32_ae32.S"
-                    "modules/matrix/float/dspm_mult_f32_ae32.S"
-                    "modules/matrix/float/dspm_mult_f32_aes3.S"
-                    "modules/matrix/float/dspm_mult_f32_ansi.c"
-                    "modules/matrix/fixed/dspm_mult_s16_ae32.S"
-                    "modules/matrix/fixed/dspm_mult_s16_m_ae32_vector.S"
-                    "modules/matrix/fixed/dspm_mult_s16_m_ae32.S"
-                    "modules/matrix/fixed/dspm_mult_s16_ansi.c"
-                    "modules/matrix/fixed/dspm_mult_s16_aes3.S"
+                    "modules/matrix/mul/float/dspm_mult_3x3x1_f32_ae32.S"
+                    "modules/matrix/mul/float/dspm_mult_3x3x3_f32_ae32.S"
+                    "modules/matrix/mul/float/dspm_mult_4x4x1_f32_ae32.S"
+                    "modules/matrix/mul/float/dspm_mult_4x4x4_f32_ae32.S"
+                    "modules/matrix/mul/float/dspm_mult_f32_ae32.S"
+                    "modules/matrix/mul/float/dspm_mult_f32_aes3.S"
+                    "modules/matrix/mul/float/dspm_mult_f32_ansi.c"
+                    "modules/matrix/mul/float/dspm_mult_ex_f32_ansi.c"
+                    "modules/matrix/mul/float/dspm_mult_ex_f32_ae32.S"
+                    "modules/matrix/mul/float/dspm_mult_ex_f32_aes3.S"
+                    "modules/matrix/mul/fixed/dspm_mult_s16_ae32.S"
+                    "modules/matrix/mul/fixed/dspm_mult_s16_m_ae32_vector.S"
+                    "modules/matrix/mul/fixed/dspm_mult_s16_m_ae32.S"
+                    "modules/matrix/mul/fixed/dspm_mult_s16_ansi.c"
+                    "modules/matrix/mul/fixed/dspm_mult_s16_aes3.S"
+                    "modules/matrix/add/float/dspm_add_f32_ansi.c"
+                    "modules/matrix/add/float/dspm_add_f32_ae32.S"
+                    "modules/matrix/addc/float/dspm_addc_f32_ansi.c"
+                    "modules/matrix/addc/float/dspm_addc_f32_ae32.S"
+                    "modules/matrix/mulc/float/dspm_mulc_f32_ansi.c"
+                    "modules/matrix/mulc/float/dspm_mulc_f32_ae32.S"
+                    "modules/matrix/sub/float/dspm_sub_f32_ansi.c"
+                    "modules/matrix/sub/float/dspm_sub_f32_ae32.S"
                     "modules/matrix/mat/mat.cpp"
+
                     "modules/math/mulc/float/dsps_mulc_f32_ansi.c"
                     "modules/math/addc/float/dsps_addc_f32_ansi.c"
                     "modules/math/mulc/fixed/dsps_mulc_s16_ansi.c"
@@ -75,7 +86,7 @@ set(srcs            "modules/common/misc/dsps_pwroftwo.cpp"
                     "modules/fft/fixed/dsps_fft2r_sc16_ae32.S"
                     "modules/fft/fixed/dsps_fft2r_sc16_ansi.c"
                     "modules/fft/fixed/dsps_fft2r_sc16_aes3.S"
-                    
+
                     "modules/dct/float/dsps_dct_f32.c"
                     "modules/support/snr/float/dsps_snr_f32.cpp"
                     "modules/support/sfdr/float/dsps_sfdr_f32.cpp"
@@ -143,11 +154,17 @@ set(include_dirs                "modules/dotprod/include"
                                 "modules/math/addc/include"
                                 "modules/math/mulc/include"
                                 "modules/math/sqrt/include"
+                                "modules/matrix/mul/include"
+                                "modules/matrix/add/include"
+                                "modules/matrix/addc/include"
+                                "modules/matrix/mulc/include"
+                                "modules/matrix/sub/include"
                                 "modules/matrix/include"
                                 "modules/fft/include"
                                 "modules/dct/include"
                                 "modules/conv/include"
                                 "modules/common/include"
+                                "modules/matrix/mul/test/include"
 # EKF files
                                 "modules/kalman/ekf/include"
                                 "modules/kalman/ekf_imu13states/include"
 
@@ -16,6 +16,11 @@ COMPONENT_ADD_INCLUDEDIRS := modules/dotprod/include \
 							modules/math/addc/include \
 							modules/math/mulc/include \
 							modules/math/sqrt/include \
+							modules/matrix/add/include \
+							modules/matrix/addc/include \
+							modules/matrix/mul/include \
+							modules/matrix/mulc/include \
+							modules/matrix/sub/include \
 							modules/matrix/include \
 							modules/fft/include \
 							modules/dct/include \
@@ -31,8 +36,17 @@ COMPONENT_SRCDIRS :=. \
 					modules/dotprod/float \
 					modules/dotprod/fixed \
 					modules/matrix \
-					modules/matrix/float \
-					modules/matrix/fixed \
+					modules/matrix/add \
+					modules/matrix/add/float \
+					modules/matrix/addc \
+					modules/matrix/addc/float \
+					modules/matrix/mul \
+					modules/matrix/mul/float \
+					modules/matrix/mul/fixed \
+					modules/matrix/mulc \
+					modules/matrix/mulc/float \
+					modules/matrix/sub \
+					modules/matrix/sub/float \
 					modules/matrix/mat \
 					modules/math \
 					modules/math/mulc \
 
@@ -35,7 +35,7 @@ INPUT = \
     $(PROJECT_PATH)/modules/math/add/include/dsps_add.h \
     $(PROJECT_PATH)/modules/math/sub/include/dsps_sub.h \
     $(PROJECT_PATH)/modules/math/mul/include/dsps_mul.h \
-    $(PROJECT_PATH)/modules/matrix/include/dspm_mult.h \
+    $(PROJECT_PATH)/modules/matrix/mul/include/dspm_mult.h \
     $(PROJECT_PATH)/modules/matrix/include/mat.h \
     $(PROJECT_PATH)/modules/conv/include/dsps_conv.h \
     $(PROJECT_PATH)/modules/conv/include/dsps_corr.h \
 
@@ -1,4 +1,4 @@
-version: "1.4.8"
+version: "1.4.9"
 
 description: ESP-DSP is the official DSP library for Espressif SoCs.
 url: https://github.com/espressif/esp-dsp
 
@@ -1,4 +1,4 @@
-// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@ extern "C"
 #include "dsps_dct.h"
 
 // Matrix operations
-#include "dspm_mult.h"
+#include "dspm_matrix.h"
 
 // Support functions
 #include "dsps_view.h"
 
@@ -1,11 +1,10 @@
 /*
- * SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
 
 #include "dsps_fir.h"
-#include "esp_dsp.h"
 
 int32_t dsps_fird_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len)
 {
 
@@ -117,7 +117,7 @@ void ekf_imu13states::Test()
 
 void ekf_imu13states::TestFull(bool enable_att)
 {
-    int total_N = 4096;
+    int total_N = 2048;
     float pi = std::atan(1) * 4;
     float gyro_err_data[] = {0.1, 0.2, 0.3}; // static constatnt error
     dspm::Mat gyro_err(gyro_err_data, 3, 1);
 
@@ -51,9 +51,9 @@ TEST_CASE("ekf_imu13states functionality gyro and magn", "[dspm]")
     unsigned int end_b = xthal_get_ccount();
     ESP_LOGI(TAG, "Total time %i (K cycles)", (end_b - start_b)/1000);
 
-    TEST_ASSERT_LESS_THAN(100, (int)(1000*abs(ekf13->X.data[4] - 0.1)));
-    TEST_ASSERT_LESS_THAN(100, (int)(1000*abs(ekf13->X.data[5] - 0.2)));
-    TEST_ASSERT_LESS_THAN(100, (int)(1000*abs(ekf13->X.data[6] - 0.3)));
+    TEST_ASSERT_LESS_THAN(300, (int)(1000*abs(ekf13->X.data[4] - 0.1)));
+    TEST_ASSERT_LESS_THAN(300, (int)(1000*abs(ekf13->X.data[5] - 0.2)));
+    TEST_ASSERT_LESS_THAN(300, (int)(1000*abs(ekf13->X.data[6] - 0.3)));
     printf("Expected result = %i, calculated result = %i\n", 100, (int)(1000*ekf13->X.data[4] + 0.5));
     printf("Expected result = %i, calculated result = %i\n", 200, (int)(1000*ekf13->X.data[5] + 0.5));
     printf("Expected result = %i, calculated result = %i\n", 300, (int)(1000*ekf13->X.data[6] + 0.5));
 
@@ -48,7 +48,6 @@ esp_err_t dsps_mulc_f32_ae32(const float *input, float *output, int len, float C
 esp_err_t dsps_mulc_s16_ae32(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out);
 esp_err_t dsps_mulc_s16_ansi(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out);
 
-/**@}*/
 
 #ifdef __cplusplus
 }
 
@@ -0,0 +1,63 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dspm_add_platform.h"
+#if (dspm_add_f32_ae32_enabled == 1)
+
+// This is an add function for sub-matrices for ESP32 processor
+    .text
+    .align  4
+    .global dspm_add_f32_ae32
+    .type   dspm_add_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
+
+dspm_add_f32_ae32: 
+// input1           - a2
+// input2           - a3
+// output           - a4
+// rows             - a5
+// cols             - a6
+// padd1            - a7
+// padd2            - a8
+// padd_out         - a9
+// step1            - a10
+// step2            - a11
+// step_out         - a12
+
+    entry   a1, 16
+
+    l32i.n  a8,  a1, 16         // padd2
+    l32i.n  a9,  a1, 20         // padd_out
+    l32i.n  a10, a1, 24         // step1
+    l32i.n  a11, a1, 24         // step2
+    l32i.n  a12, a1, 24         // step_out
+
+    slli    a10, a10, 2         // a10  - step1 << 2
+    slli    a11, a11, 2         // a11  - step2 << 2
+    slli    a12, a12, 2         // a12  - step_out << 2
+
+    .outer_loop_add_f32_ae32:
+
+        loopnez a6, .loop_add_f32_ae32
+            lsxp     f0,  a2,  a10      // load input1 to f0, increment input1 (input1_ptr+=step1)
+            lsxp     f1,  a3,  a11      // load input2 to f1, increment input2 (input2_ptr+=step2)
+
+            add.s    f2,  f0,  f1       // f2 = f0 + f1
+            ssxp     f2,  a4,  a12      // save result f2 to output a4, increment output (output_ptr+=step_out)
+        .loop_add_f32_ae32:
+
+        addx4    a3,  a8,  a3           // input2_ptr += (padd2 << 2);
+        addx4    a2,  a7,  a2           // input1_ptr += (padd1 << 2);
+        addx4    a4,  a9,  a4           // output_ptr += (padd_out << 2);
+        addi.n   a5,  a5,  -1           // rows - 1
+
+    bnez a5, .outer_loop_add_f32_ae32
+
+    movi.n  a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dspm_add_f32_ae32_enabled
@@ -0,0 +1,42 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "dspm_add.h"
+
+esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out)
+{
+    if (NULL == input1) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    if (NULL == input2) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    if (NULL == output) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+
+    if (rows <= 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    if (cols <= 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+
+    if (padd1 < 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    if (padd2 < 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    if (padd_out < 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+
+    if (step1 <= 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    if (step2 <= 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    if (step_out <= 0) return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+
+    const int ptr_input1_step = cols + padd1;
+    const int ptr_input2_step = cols + padd2;
+    const int ptr_output_step = cols + padd_out;
+    float *ptr_input1 = (float *)input1;
+    float *ptr_input2 = (float *)input2;
+
+    for (int row = 0; row < rows; row++) {
+        for (int col = 0; col < cols; col++) {
+            output[col * step_out] = ptr_input1[col * step1] + ptr_input2[col * step2];
+        }
+        ptr_input1 += ptr_input1_step;
+        ptr_input2 += ptr_input2_step;
+        output += ptr_output_step;
+    }
+    return ESP_OK;
+}
@@ -0,0 +1,65 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#ifndef _dspm_add_H_
+#define _dspm_add_H_
+#include "dsp_err.h"
+
+#include "dspm_add_platform.h"
+
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/**@{*/
+/**
+ * @brief   add two arrays with paddings (add two sub-matrices)
+ * 
+ * The function adds two arrays defined as sub-matrices with paddings
+ * out[row * ptr_step_out + col * step_out] = in1[row * ptr_step_in1 + col * step1] + in2[row * ptr_step_in2 + col * step2];
+ * The implementation use ANSI C and could be compiled and run on any platform
+ * 
+ * @param[in]  input1: input array 1
+ * @param[in]  input2: input array 2
+ * @param[out] output: output array
+ * @param[in]  rows: matrix rows
+ * @param[in]  cols: matrix cols
+ * @param[in]  padd1: input array 1 padding
+ * @param[in]  padd2: input array 2 padding
+ * @param[in]  padd_out: output array padding
+ * @param[in]  step1: step over input array 1 (by default should be 1)
+ * @param[in]  step2: step over input array 2 (by default should be 1)
+ * @param[in]  step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
+esp_err_t dspm_add_f32_ae32(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+
+#if (dspm_add_f32_ae32_enabled == 1)
+#define dspm_add_f32 dspm_add_f32_ae32
+#else
+#define dspm_add_f32 dspm_add_f32_ansi
+#endif
+
+#else // CONFIG_DSP_OPTIMIZED
+#define dspm_add_f32 dspm_add_f32_ansi
+#endif // CONFIG_DSP_OPTIMIZED
+
+#endif // _dspm_add_H_
@@ -0,0 +1,20 @@
+#ifndef _dspm_add_platform_H_
+#define _dspm_add_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dspm_add_f32_ae32_enabled  1
+
+#endif
+
+#endif // __XTENSA__
+
+
+#endif // _dspm_add_platform_H_
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-version: "1.4.8"`
	`1`	`+version: "1.4.9"`
`2`	`2`
`3`	`3`	`description: ESP-DSP is the official DSP library for Espressif SoCs.`
`4`	`4`	`url: https://github.com/espressif/esp-dsp`
Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,10 @@`
`1`	`1`	`/*`
`2`		`- * SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD`
	`2`	`+ * SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD`
`3`	`3`	`*`
`4`	`4`	`* SPDX-License-Identifier: Apache-2.0`
`5`	`5`	`*/`
`6`	`6`
`7`	`7`	`#include "dsps_fir.h"`
`8`		`-#include "esp_dsp.h"`
`9`	`8`
`10`	`9`	`int32_t dsps_fird_s16_ansi(fir_s16_t fir, const int16_t input, int16_t *output, int32_t len)`
`11`	`10`	`{`
Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ void ekf_imu13states::Test()`
`117`	`117`
`118`	`118`	`void ekf_imu13states::TestFull(bool enable_att)`
`119`	`119`	`{`
`120`		`- int total_N = 4096;`
	`120`	`+ int total_N = 2048;`
`121`	`121`	`float pi = std::atan(1) * 4;`
`122`	`122`	`float gyro_err_data[] = {0.1, 0.2, 0.3}; // static constatnt error`
`123`	`123`	`dspm::Mat gyro_err(gyro_err_data, 3, 1);`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,6 @@ esp_err_t dsps_mulc_f32_ae32(const float input, float output, int len, float C`
`48`	`48`	`esp_err_t dsps_mulc_s16_ae32(const int16_t input, int16_t output, int len, int16_t C, int step_in, int step_out);`
`49`	`49`	`esp_err_t dsps_mulc_s16_ansi(const int16_t input, int16_t output, int len, int16_t C, int step_in, int step_out);`
`50`	`50`
`51`		`-/*@}/`
`52`	`51`
`53`	`52`	`#ifdef __cplusplus`
`54`	`53`	`}`