@@ -93,9 +93,9 @@ def __init__(self):
93
93
default = (self ._is_arm64 () and self ._is_macos ()),
94
94
)
95
95
if self .build_cpu_aarch64 :
96
- assert self . _is_arm64 (), (
97
- "TORCHAO_BUILD_CPU_AARCH64 requires an arm64 machine"
98
- )
96
+ assert (
97
+ self . _is_arm64 ()
98
+ ), "TORCHAO_BUILD_CPU_AARCH64 requires an arm64 machine"
99
99
100
100
# TORCHAO_BUILD_KLEIDIAI is disabled by default for now because
101
101
# 1) It increases the build time
@@ -104,9 +104,9 @@ def __init__(self):
104
104
"TORCHAO_BUILD_KLEIDIAI" , default = False
105
105
)
106
106
if self .build_kleidi_ai :
107
- assert self . build_cpu_aarch64 , (
108
- "TORCHAO_BUILD_KLEIDIAI requires TORCHAO_BUILD_CPU_AARCH64 be set"
109
- )
107
+ assert (
108
+ self . build_cpu_aarch64
109
+ ), "TORCHAO_BUILD_KLEIDIAI requires TORCHAO_BUILD_CPU_AARCH64 be set"
110
110
111
111
# TORCHAO_BUILD_EXPERIMENTAL_MPS is disabled by default.
112
112
self .build_experimental_mps = self ._os_bool_var (
@@ -115,9 +115,9 @@ def __init__(self):
115
115
if self .build_experimental_mps :
116
116
assert self ._is_macos (), "TORCHAO_BUILD_EXPERIMENTAL_MPS requires MacOS"
117
117
assert self ._is_arm64 (), "TORCHAO_BUILD_EXPERIMENTAL_MPS requires arm64"
118
- assert torch . mps . is_available (), (
119
- "TORCHAO_BUILD_EXPERIMENTAL_MPS requires MPS be available"
120
- )
118
+ assert (
119
+ torch . mps . is_available ()
120
+ ), "TORCHAO_BUILD_EXPERIMENTAL_MPS requires MPS be available"
121
121
122
122
# TORCHAO_PARALLEL_BACKEND specifies which parallel backend to use
123
123
# Possible values: aten_openmp, executorch, openmp, pthreadpool, single_threaded
@@ -130,19 +130,19 @@ def __init__(self):
130
130
default = (self ._is_arm64 () and self ._is_macos ()),
131
131
)
132
132
if self .enable_arm_neon_dot :
133
- assert self . build_cpu_aarch64 , (
134
- "TORCHAO_ENABLE_ARM_NEON_DOT requires TORCHAO_BUILD_CPU_AARCH64 be set"
135
- )
133
+ assert (
134
+ self . build_cpu_aarch64
135
+ ), "TORCHAO_ENABLE_ARM_NEON_DOT requires TORCHAO_BUILD_CPU_AARCH64 be set"
136
136
137
137
# TORCHAO_ENABLE_ARM_I8MM enable ARM 8-bit Integer Matrix Multiply instructions
138
138
# Not enabled by default on macOS as not all silicon mac supports it
139
139
self .enable_arm_i8mm = self ._os_bool_var (
140
140
"TORCHAO_ENABLE_ARM_I8MM" , default = False
141
141
)
142
142
if self .enable_arm_i8mm :
143
- assert self . build_cpu_aarch64 , (
144
- "TORCHAO_ENABLE_ARM_I8MM requires TORCHAO_BUILD_CPU_AARCH64 be set"
145
- )
143
+ assert (
144
+ self . build_cpu_aarch64
145
+ ), "TORCHAO_ENABLE_ARM_I8MM requires TORCHAO_BUILD_CPU_AARCH64 be set"
146
146
147
147
def _is_arm64 (self ) -> bool :
148
148
return platform .machine ().startswith ("arm64" ) or platform .machine () == "aarch64"
@@ -364,6 +364,7 @@ def get_extensions():
364
364
365
365
use_cutlass = False
366
366
cutlass_90a_sources = None
367
+ cutlass_100a_sources = None
367
368
if use_cuda and not IS_ROCM and not IS_WINDOWS :
368
369
use_cutlass = True
369
370
cutlass_dir = os .path .join (third_party_path , "cutlass" )
0 commit comments