Skip to content

Commit

Permalink
Compute Library v24.05
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenkins committed May 28, 2024
1 parent 4fda7a8 commit a53ffdc
Show file tree
Hide file tree
Showing 85 changed files with 6,114 additions and 2,664 deletions.
11 changes: 11 additions & 0 deletions Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ opencl_srcs = [
"src/core/CL/cl_kernels/common/roi_align_layer.cl",
"src/core/CL/cl_kernels/common/roi_align_layer_quantized.cl",
"src/core/CL/cl_kernels/common/roi_pooling_layer.cl",
"src/core/CL/cl_kernels/common/scatter.cl",
"src/core/CL/cl_kernels/common/select.cl",
"src/core/CL/cl_kernels/common/slice_ops.cl",
"src/core/CL/cl_kernels/common/softmax_layer.cl",
Expand Down Expand Up @@ -488,6 +489,8 @@ cc_library_static {
"src/cpu/kernels/depthwiseconv2d/generic/neon/impl.cpp",
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8.cpp",
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/dequantize/generic/neon/fp16.cpp",
"src/cpu/kernels/dequantize/generic/neon/fp32.cpp",
"src/cpu/kernels/directconv2d/nchw/all.cpp",
"src/cpu/kernels/directconv2d/nchw/fp16.cpp",
"src/cpu/kernels/directconv2d/nhwc/neon/fp16.cpp",
Expand Down Expand Up @@ -553,9 +556,17 @@ cc_library_static {
"src/cpu/kernels/pool3d/neon/fp32.cpp",
"src/cpu/kernels/pool3d/neon/qasymm8.cpp",
"src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp",
"src/cpu/kernels/quantize/generic/neon/fp16.cpp",
"src/cpu/kernels/quantize/generic/neon/fp32.cpp",
"src/cpu/kernels/quantize/generic/neon/integer.cpp",
"src/cpu/kernels/range/generic/neon/fp16.cpp",
"src/cpu/kernels/range/generic/neon/fp32.cpp",
"src/cpu/kernels/range/generic/neon/integer.cpp",
"src/cpu/kernels/reduction_layer/generic/neon/fp16.cpp",
"src/cpu/kernels/reduction_layer/generic/neon/fp32.cpp",
"src/cpu/kernels/reduction_layer/generic/neon/integer.cpp",
"src/cpu/kernels/reduction_layer/generic/neon/qasymm8.cpp",
"src/cpu/kernels/reduction_layer/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/roialign/generic/neon/fp16.cpp",
"src/cpu/kernels/roialign/generic/neon/fp32.cpp",
"src/cpu/kernels/roialign/generic/neon/qasymm8.cpp",
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
project(
ArmCompute
VERSION 36.0.0
VERSION 37.0.0
DESCRIPTION
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
LANGUAGES C CXX ASM)
Expand Down
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
</div>

# Compute Library ![](https://img.shields.io/badge/latest_release-24.04-green)
# Compute Library ![](https://img.shields.io/badge/latest_release-24.05-green)


The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
Expand Down Expand Up @@ -37,7 +37,7 @@ Key Features:
<br>

## Documentation
[![Documentation](https://img.shields.io/badge/documentation-24.04-green)](https://arm-software.github.io/ComputeLibrary/latest)
[![Documentation](https://img.shields.io/badge/documentation-24.05-green)](https://arm-software.github.io/ComputeLibrary/latest)

> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
Expand All @@ -50,24 +50,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C

| Platform | Operating System | Release archive (Download) |
| -------------- | ---------------- | -------------------------- |
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-neon.tar.gz) |
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) |
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon-cl.tar.gz) |
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon-cl.tar.gz) |
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-neon.tar.gz) |
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) |
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon-cl.tar.gz) |
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon-cl.tar.gz) |

<br>

| Architecture | Operating System | Release archive (Download) |
| ------------ | ---------------- | -------------------------- |
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-neon-cl.tar.gz) |
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8a-neon-cl.tar.gz) |
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon-cl.tar.gz) |
| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-neon-cl.tar.gz) |
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8a-neon-cl.tar.gz) |
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon-cl.tar.gz) |
| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |

<br>

Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.04-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.04)
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.05-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.05)

Pre-build binaries are generated with the following security / good coding practices related flags:
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
Expand Down
5 changes: 3 additions & 2 deletions SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ import json
import codecs
import platform

VERSION = "v24.04"
LIBRARY_VERSION_MAJOR = 36
VERSION = "v24.05"
LIBRARY_VERSION_MAJOR = 37
LIBRARY_VERSION_MINOR = 0
LIBRARY_VERSION_PATCH = 0
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
Expand Down Expand Up @@ -429,6 +429,7 @@ if env['opencl'] and env['embed_kernels']:
'src/core/CL/cl_kernels/common/fill_border.cl',
'src/core/CL/cl_kernels/common/floor.cl',
'src/core/CL/cl_kernels/common/gather.cl',
'src/core/CL/cl_kernels/common/scatter.cl',
'src/core/CL/cl_kernels/common/gemm.cl',
'src/core/CL/cl_kernels/common/gemm_reshaped_only_rhs_mmul.cl',
'src/core/CL/cl_kernels/common/gemm_utils.cl',
Expand Down
19 changes: 15 additions & 4 deletions arm_compute/core/CPP/CPPTypes.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Arm Limited.
* Copyright (c) 2017-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_CPP_TYPES_H
#define ARM_COMPUTE_CPP_TYPES_H
#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H

#include "arm_compute/core/Error.h"

Expand Down Expand Up @@ -170,6 +170,17 @@ class CPUInfo final
* @return Number of CPUs
*/
unsigned int get_cpu_num() const;
/** Return the maximum number of CPUs present excluding the little cores
* in case of an Android device
*
* @return Number of CPUs excluding little
*/
unsigned int get_cpu_num_excluding_little() const;
/** Return the vector length in bytes for sme2
*
* @return Vector length if sme2 is enabled, otherwise returns 0.
*/
unsigned long get_sme2_vector_length() const;

private:
struct Impl;
Expand All @@ -184,4 +195,4 @@ struct ThreadInfo
const CPUInfo *cpu_info{nullptr};
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CPP_TYPES_H */
#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
7 changes: 4 additions & 3 deletions arm_compute/runtime/CL/functions/CLScatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,16 @@ class CLScatter : public IFunction
/** Default destructor */
~CLScatter();
/** Initialise the kernel's inputs and outputs
*
* @note Negative indices are treated as out of bounds.
*
* Valid data layouts:
* - All
*
*
* @param[in] compile_context The compile context to be used.
* @param[in] src Source tensor. Values used to fill output. Can be nullptr when zero initialization is true.
* @param[in] updates Tensor containing values used to update output tensor. Data types supported: same as @p src
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : U32
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : S32
* @param[out] output Destination tensor. Data types supported: same as @p src.
* @param[in] info Scatter info object.
*/
Expand All @@ -85,7 +86,7 @@ class CLScatter : public IFunction
*
* @param[in] src Source tensor.
* @param[in] updates Tensor containing values used for updating the output Tensor. Data types supported : same as @p src
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : U32
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : S32
* @param[in] output Destination tensor. Data types supported: same as @p src.
* @param[in] info Scatter info containing type of scatter.
*
Expand Down
9 changes: 5 additions & 4 deletions arm_compute/runtime/OMP/OMPScheduler.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2021 Arm Limited.
* Copyright (c) 2017-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_OMPSCHEDULER_H
#define ARM_COMPUTE_OMPSCHEDULER_H
#ifndef ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H
#define ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H

#include "arm_compute/runtime/IScheduler.h"

Expand Down Expand Up @@ -79,6 +79,7 @@ class OMPScheduler final : public IScheduler

private:
unsigned int _num_threads;
unsigned int _nonlittle_num_cpus;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_OMPSCHEDULER_H */
#endif // ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H
2 changes: 1 addition & 1 deletion docs/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "Compute Library"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = 24.04
PROJECT_NUMBER = 24.05

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down
7 changes: 6 additions & 1 deletion docs/user_guide/release_version_and_change_log.dox
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@ If there is more than one release in a month then an extra sequential number is

@section S2_2_changelog Changelog

v24.05 Public major release
- Add @ref CLScatter operator for FP32/16, S32/16/8, U32/16/8 data types
- Various fixes to enable FP16 kernels in armv8a multi_isa builds.
- Updated logic in the OpenMP scheduler to exclude LITTLE cores.

v24.04 Public major release
- Add Bfloat16 data type support for @ref NEMatMul.
- Add support for SoftMax in SME2 for FP32 and FP16.
- Add support for SoftMax in SME2 for FP32, FP16, QASYMM8 and QASYMM8_SIGNED.
- Add support for in place accumulation to CPU GEMM kernels.
- Add low-precision Int8 * Int8 -> FP32 CPU GEMM which dequantizes after multiplication
- Add is_dynamic flag to QuantizationInfo to signal to operators that it may change after configuration
Expand Down
Loading

0 comments on commit a53ffdc

Please sign in to comment.