Tencent · nihui · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml
@@ -9,6 +9,7 @@ on:
     - 'src/*'
     - 'src/layer/*'
     - 'src/layer/arm/**'
+    - 'src/layer/riscv/**'
     - 'src/layer/x86/**'
     - 'src/layer/vulkan/**'
   pull_request:
@@ -20,6 +21,7 @@ on:
     - 'src/*'
     - 'src/layer/*'
     - 'src/layer/arm/**'
+    - 'src/layer/riscv/**'
     - 'src/layer/x86/**'
     - 'src/layer/vulkan/**'
 concurrency:
@@ -64,6 +66,11 @@ jobs:
         mkdir build-x86_64 && cd build-x86_64
         cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="x86_64" ..
         cmake --build . -j $(nproc)
+    - name: riscv64
+      run: |
+        mkdir build-riscv64 && cd build-riscv64
+        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="riscv64" ..
+        cmake --build . -j $(nproc)
 
     - name: armeabi-v7a-shared
       run: |
@@ -85,6 +92,11 @@ jobs:
         mkdir build-x86_64-shared && cd build-x86_64-shared
         cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="x86_64" -DNCNN_SHARED_LIB=ON ..
         cmake --build . -j $(nproc)
+    - name: riscv64-shared
+      run: |
+        mkdir build-riscv64-shared && cd build-riscv64-shared
+        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="riscv64" -DNCNN_SHARED_LIB=ON ..
+        cmake --build . -j $(nproc)
 
   ndk-r16b:
     runs-on: ubuntu-latest

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -420,40 +420,53 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
 
     if(CMAKE_SIZEOF_VOID_P EQUAL 8)
         set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
-        check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m1_t _s, _w; float _v; size_t vl; _s = vfmacc_vf_f32m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV)
+        check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m8_t _s, _w; float _v; size_t vl; _s = __riscv_vfmacc_vf_f32m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_V)
 
-        set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh")
-        check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m1_t _s, _w; __fp16 _v; size_t vl; _s = vfmacc_vf_f16m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV_ZFH)
+        set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
+        check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m8_t _s, _w; __fp16 _v; size_t vl; _s = __riscv_vfmacc_vf_f16m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_ZVFH)
 
-        if(NOT NCNN_COMPILER_SUPPORT_RVV_ZFH)
-            set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16")
-            check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m1_t _s, _w; __fp16 _v; size_t vl; _s = vfmacc_vf_f16m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV_ZVFH)
-        endif()
+        set(CMAKE_REQUIRED_FLAGS "-march=rv64gc_zfh_xtheadvector -D__fp16=_Float16")
+        check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m8_t _s, _w; __fp16 _v; size_t vl; _s = __riscv_vfmacc_vf_f16m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
 
         unset(CMAKE_REQUIRED_FLAGS)
 
-        if(NCNN_COMPILER_SUPPORT_RVV)
+        if(NCNN_COMPILER_SUPPORT_RISCV_V OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
             option(NCNN_RVV "optimize risc-v platform with v extension" ON)
-            option(NCNN_RVV_CHECK_VFREDSUM "check compilter about support rvv-intrinsic" ON)
-            if(NCNN_RVV_CHECK_VFREDSUM)
-                include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/ncnn_check_rvv_vfredusum.cmake)
-            endif()
-            if(NOT (NCNN_COMPILER_SUPPORT_RVV_ZFH OR NCNN_COMPILER_SUPPORT_RVV_ZVFH))
-                message(WARNING "The compiler does not support risc-v zfh extension. Upgrading your toolchain is strongly recommended.")
-            endif()
-            option(NCNN_RVV_CHECK_PLAIN_SEGMENT "check compilter about rvv segment load/store interface" ON)
-            if(NCNN_RVV_CHECK_PLAIN_SEGMENT)
-                set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
-                check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m1_t _s, _w; size_t vl; float src[32]={.0f}; vlseg2e32_v_f32m1(&_s, &_w, src, vl); return 0; }" NCNN_COMPILER_USE_RVV_PLAIN_SEG)
-                unset(CMAKE_REQUIRED_FLAGS)
-            endif()
-            if(NOT NCNN_COMPILER_USE_RVV_PLAIN_SEG)
-                message(WARNING "The compiler uses tuple types for segment load/store. Upgrading your toolchain is strongly recommended.")
-                add_definitions(-D__rvv_tuple)
+            if(NCNN_COMPILER_SUPPORT_RISCV_ZVFH OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
+                if(NCNN_RVV)
+                    option(NCNN_ZVFH "optimize risc-v platform with zvfh extension" ON)
+                endif()
+            else()
+                message(WARNING "The compiler does not support zvfh extension. NCNN_ZVFH will be OFF.")
             endif()
+
+        #     option(NCNN_RVV_CHECK_VFREDSUM "check compilter about support rvv-intrinsic" ON)
+        #     if(NCNN_RVV_CHECK_VFREDSUM)
+        #         include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/ncnn_check_rvv_vfredusum.cmake)
+        #     endif()
+        #     if(NOT (NCNN_COMPILER_SUPPORT_RVV_ZFH OR NCNN_COMPILER_SUPPORT_RVV_ZVFH))
+        #         message(WARNING "The compiler does not support risc-v zfh extension. Upgrading your toolchain is strongly recommended.")
+        #     endif()
+        #     option(NCNN_RVV_CHECK_PLAIN_SEGMENT "check compilter about rvv segment load/store interface" ON)
+        #     if(NCNN_RVV_CHECK_PLAIN_SEGMENT)
+        #         set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
+        #         check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m1_t _s, _w; size_t vl; float src[32]={.0f}; vlseg2e32_v_f32m1(&_s, &_w, src, vl); return 0; }" NCNN_COMPILER_USE_RVV_PLAIN_SEG)
+        #         unset(CMAKE_REQUIRED_FLAGS)
+        #     endif()
+        #     if(NOT NCNN_COMPILER_USE_RVV_PLAIN_SEG)
+        #         message(WARNING "The compiler uses tuple types for segment load/store. Upgrading your toolchain is strongly recommended.")
+        #         add_definitions(-D__rvv_tuple)
+        #     endif()
+        else()
+            message(WARNING "The compiler does not support risc-v v or xtheadvector extension. NCNN_RVV will be OFF.")
+        endif()
+
+        if(NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
+            option(NCNN_XTHEADVECTOR "optimize risc-v platform with xtheadvector extension" ON)
         else()
-            message(WARNING "The compiler does not support risc-v v extension. NCNN_RVV will be OFF.")
+            message(WARNING "The compiler does not support risc-v xtheadvector extension. NCNN_XTHEADVECTOR will be OFF.")
         endif()
+
     endif()
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
     set(NCNN_TARGET_ARCH powerpc)

diff --git a/build-android.cmd b/build-android.cmd
@@ -1,7 +1,7 @@
 :: Set android ndk root
 @ECHO OFF
 @SETLOCAL
-@SET ANDROID_NDK=<your-ndk-root_path, such as"E:\android-ndk-r18b">
+@SET ANDROID_NDK=<your-ndk-root_path, such as"E:\android-ndk-r27">
 
 :: Set ninja.exe
 :: @SET NINJA_EXE=<your-ninja-exe_path, such as"D:\android\sdk\cmake\3.10.2.4988404\bin\ninja.exe">
@@ -38,4 +38,12 @@ cmake --build . --parallel %NUMBER_OF_PROCESSORS%
 cmake --build . --target install
 popd
 
+:: android riscv64
+mkdir build-android-riscv64
+pushd build-android-riscv64
+cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="riscv64" -DANDROID_PLATFORM=android-35 -DNCNN_VULKAN=ON ..
+cmake --build . --parallel %NUMBER_OF_PROCESSORS%
+cmake --build . --target install
+popd
+
 @ENDLOCAL
diff --git a/build.sh b/build.sh
@@ -40,6 +40,14 @@ make -j4
 make install
 popd
 
+##### android riscv64
+mkdir -p build-android-riscv64
+pushd build-android-riscv64
+cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="riscv64" -DANDROID_PLATFORM=android-35 -DNCNN_VULKAN=ON ..
+make -j4
+make install
+popd
+
 ##### linux of hisiv300 (forgot the chip name) toolchain with neon and openmp
 mkdir -p build-hisiv300-linux
 pushd build-hisiv300-linux

diff --git a/cmake/ncnn_add_layer.cmake b/cmake/ncnn_add_layer.cmake
@@ -364,13 +364,13 @@ macro(ncnn_add_layer class)
 
     if(NCNN_TARGET_ARCH STREQUAL "riscv" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
         if(NCNN_RUNTIME_CPU AND NCNN_RVV)
-            if(NCNN_COMPILER_SUPPORT_RVV_ZFH)
-                ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv_zfh")
-            elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
-                ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16")
-            elseif(NCNN_COMPILER_SUPPORT_RVV)
-                ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv")
-            endif()
+            ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv")
+        endif()
+        if(NCNN_RUNTIME_CPU AND NCNN_XTHEADVECTOR)
+            ncnn_add_arch_opt_layer(${class} xtheadvector "-march=rv64gc_zfh_xtheadvector -D__fp16=_Float16")
+        endif()
+        if(NCNN_ZVFH)
+            ncnn_add_arch_opt_source(${class} zvfh "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
         endif()
     endif()
 

diff --git a/cmake/ncnn_generate_xtheadvector_source.cmake b/cmake/ncnn_generate_xtheadvector_source.cmake
@@ -0,0 +1,14 @@
+
+# must define SRC DST CLASS
+
+file(READ ${SRC} source_data)
+
+# replace
+string(TOUPPER ${CLASS} CLASS_UPPER)
+string(TOLOWER ${CLASS} CLASS_LOWER)
+
+string(REGEX REPLACE "LAYER_${CLASS_UPPER}_RISCV_H" "LAYER_${CLASS_UPPER}_RISCV_XTHEADVECTOR_H" source_data "${source_data}")
+string(REGEX REPLACE "${CLASS}_riscv" "${CLASS}_riscv_xtheadvector" source_data "${source_data}")
+string(REGEX REPLACE "#include \"${CLASS_LOWER}_riscv.h\"" "#include \"${CLASS_LOWER}_riscv_xtheadvector.h\"" source_data "${source_data}")
+
+file(WRITE ${DST} "${source_data}")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -630,16 +630,18 @@ if(NCNN_TARGET_ARCH STREQUAL "loongarch")
     endif()
 endif()
 
-if(NCNN_TARGET_ARCH STREQUAL "riscv" AND NOT C906)
+if(NCNN_TARGET_ARCH STREQUAL "riscv" AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT C906)
     if(NOT NCNN_RUNTIME_CPU AND NCNN_RVV)
-        if(NCNN_COMPILER_SUPPORT_RVV_ZFH)
-            target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh)
-        elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
-            target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16)
-        elseif(NCNN_COMPILER_SUPPORT_RVV)
-            target_compile_options(ncnn PRIVATE -march=rv64gcv)
+        set(RISCV_MARCH_FLAG "-march=rv64gcv")
+        if(NCNN_ZVFH)
+            set(RISCV_MARCH_FLAG "${RISCV_MARCH_FLAG}_zfh_zvfh")
+            target_compile_options(ncnn PRIVATE -D__fp16=_Float16)
         endif()
+    elseif(NOT NCNN_RUNTIME_CPU AND NCNN_ZVFH)
+        set(RISCV_MARCH_FLAG "-march=rv64gc_zfh_xtheadvector")
+        target_compile_options(ncnn PRIVATE -D__fp16=_Float16)
     endif()
+    target_compile_options(ncnn PRIVATE ${RISCV_MARCH_FLAG})
 endif()
 
 if(NCNN_PPC64LE_VSX)

diff --git a/src/cpu.cpp b/src/cpu.cpp
@@ -2564,6 +2564,38 @@ int cpu_support_riscv_zfh()
 #endif
 }
 
+int cpu_support_riscv_zvfh()
+{
+    try_initialize_global_cpu_info();
+#if defined __ANDROID__ || defined __linux__
+#if __riscv
+    // v + f does not imply zfh, but how to discover zvfh properly ?
+    // upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
+    return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
+#else
+    return 0;
+#endif
+#else
+    return 0;
+#endif
+}
+
+int cpu_support_riscv_xtheadvector()
+{
+    try_initialize_global_cpu_info();
+#if defined __ANDROID__ || defined __linux__
+#if __riscv
+    // v + f does not imply zfh, but how to discover zvfh properly ?
+    // upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
+    return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
+#else
+    return 0;
+#endif
+#else
+    return 0;
+#endif
+}
+
 int cpu_riscv_vlenb()
 {
     try_initialize_global_cpu_info();

diff --git a/src/cpu.h b/src/cpu.h
@@ -116,6 +116,10 @@ NCNN_EXPORT int cpu_support_loongson_mmi();
 NCNN_EXPORT int cpu_support_riscv_v();
 // zfh = riscv half-precision float
 NCNN_EXPORT int cpu_support_riscv_zfh();
+// zvfh = riscv vector half-precision float
+NCNN_EXPORT int cpu_support_riscv_zvfh();
+// xtheadvector = riscv xtheadvector
+NCNN_EXPORT int cpu_support_riscv_xtheadvector();
 // vlenb = riscv vector length in bytes
 NCNN_EXPORT int cpu_riscv_vlenb();
 

diff --git a/src/layer.cpp b/src/layer.cpp
@@ -553,6 +553,13 @@ Layer* create_layer_cpu(int index)
     }
     else
 #endif // NCNN_RUNTIME_CPU && NCNN_RVV
+#if NCNN_RUNTIME_CPU && NCNN_XTHEADVECTOR
+    if (ncnn::cpu_support_riscv_xtheadvector())
+    {
+        layer_creator = layer_registry_xtheadvector[index].creator;
+    }
+    else
+#endif // NCNN_RUNTIME_CPU && NCNN_XTHEADVECTOR
     {
         layer_creator = layer_registry_arch[index].creator;
     }

diff --git a/src/layer/noop.cpp b/src/layer/noop.cpp
@@ -21,7 +21,7 @@ Noop::Noop()
 {
     support_inplace = true;
     support_packing = true;
-    support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zfh();
+    support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zvfh();
     support_bf16_storage = true;
 }
 

diff --git a/src/layer/riscv/absval_fp16.h b/src/layer/riscv/absval_fp16.h
@@ -0,0 +1,66 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#if NCNN_RUNTIME_CPU && NCNN_ZVFH && __riscv_vector && !__riscv_zvfh
+void absval_fp16_zvfh(Mat& bottom_top_blob, const Option& opt);
+#endif
+
+#if __riscv_zvfh
+static inline vfloat16m8_t __riscv_vfabs_v_f16m8_absval(vfloat16m8_t op1, size_t vl)
+{
+    return __riscv_vfsgnjx_vv_f16m8(op1, op1, vl);
+}
+#endif // __riscv_zvfh
+
+static void absval_fp16(Mat& bottom_top_blob, const Option& opt)
+{
+#if NCNN_RUNTIME_CPU && NCNN_ZVFH && __riscv_vector && !__riscv_xtheadvector && !__riscv_zvfh
+    if (ncnn::cpu_support_riscv_zvfh())
+    {
+        absval_fp16_zvfh(bottom_top_blob, opt);
+        return;
+    }
+#endif
+
+#if __riscv_zvfh
+    const int w = bottom_top_blob.w;
+    const int h = bottom_top_blob.h;
+    const int d = bottom_top_blob.d;
+    const int channels = bottom_top_blob.c;
+    const int elempack = bottom_top_blob.elempack;
+    const int size = w * h * d * elempack;
+
+    #pragma omp parallel for num_threads(opt.num_threads)
+    for (int q = 0; q < channels; q++)
+    {
+        __fp16* ptr = bottom_top_blob.channel(q);
+
+        int n = size;
+        while (n > 0)
+        {
+            size_t vl = __riscv_vsetvl_e16m8(n);
+
+            vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
+            _p = __riscv_vfabs_v_f16m8_absval(_p, vl);
+            __riscv_vse16_v_f16m8(ptr, _p, vl);
+
+            ptr += vl;
+            n -= vl;
+        }
+    }
+#else
+    (void)bottom_top_blob;
+    (void)opt;
+#endif // __riscv_zvfh
+}