File tree 4 files changed +10
-5
lines changed
kernels/cpu/aarch64/valpacking 4 files changed +10
-5
lines changed Original file line number Diff line number Diff line change @@ -29,13 +29,17 @@ if(TORCHAO_BUILD_KLEIDIAI)
29
29
endif ()
30
30
include (CMakePrintHelpers)
31
31
32
- add_compile_options ("-Wall" "-Werror" "-Wno-deprecated" )
32
+ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" )
33
+ add_compile_options ("-Wall" "-Werror" "-Wno-deprecated" "-march=armv8.2-a+dotprod" "-fPIC" "-Wno-error=unknown-pragmas" )
34
+ else ()
35
+ add_compile_options ("-Wall" "-Werror" "-Wno-deprecated" )
36
+ endif ()
33
37
34
38
include (CMakePrintHelpers)
35
39
message ("TORCHAO_INCLUDE_DIRS: ${TORCHAO_INCLUDE_DIRS} " )
36
40
include_directories (${TORCHAO_INCLUDE_DIRS} )
37
41
38
- if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" )
42
+ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" )
39
43
if (TORCHAO_BUILD_KLEIDIAI)
40
44
message (STATUS "Building with Arm KleidiAI library" )
41
45
add_compile_definitions (TORCHAO_ENABLE_KLEIDI=1)
Original file line number Diff line number Diff line change 7
7
#include < torchao/experimental/kernels/cpu/aarch64/valpacking/valpack.h>
8
8
#include < cassert>
9
9
#include < cstring>
10
+ #include < cstdint>
10
11
11
12
// Interleaves data across channels (row/column) and groups.
12
13
// Each channel is the same size (vals_per_channel) and is
Original file line number Diff line number Diff line change @@ -51,7 +51,7 @@ class PackedWeightsHeader {
51
51
auto header = reinterpret_cast <const int *>(packed_weights);
52
52
assert (header[0 ] == PackedWeightsHeader::magic);
53
53
params_type params;
54
- for (int i = 0 ; i < params.size (); i++) {
54
+ for (size_t i = 0 ; i < params.size (); i++) {
55
55
params[i] = header[i + 2 ];
56
56
}
57
57
return PackedWeightsHeader (
@@ -62,7 +62,7 @@ class PackedWeightsHeader {
62
62
if (type != other.type ) {
63
63
return false ;
64
64
}
65
- for (int i = 0 ; i < params.size (); i++) {
65
+ for (size_t i = 0 ; i < params.size (); i++) {
66
66
if (params[i] != other.params [i]) {
67
67
return false ;
68
68
}
Original file line number Diff line number Diff line change 5
5
// LICENSE file in the root directory of this source tree.
6
6
7
7
#pragma once
8
- #include < Aten/Parallel .h>
8
+ #include < torchao/experimental/ops/parallel .h>
9
9
#include < torch/library.h>
10
10
#include < torch/torch.h>
11
11
You can’t perform that action at this time.
0 commit comments