Skip to content

Commit 247557a

Browse files
lucylqkirklandsign
authored andcommitted
Dtype selective build for optimized ops
Pull Request resolved: #10878 Add dtype selective build for optimized ops. Follows the same process as portable, where we copy the source files and rebuild the library. 1. Generalize copy genrule for portable/optimized/source/header. 2. Copy optimized source files + headers. 3. Build optimized ops using source files, dependencies, portable header. 4. Add test, confirm that we can run addmul with float dtypes (when we remove, the test fails). ghstack-source-id: 284862896 @exported-using-ghexport Differential Revision: [D74688554](https://our.internmc.facebook.com/intern/diff/D74688554/)
1 parent 40736e2 commit 247557a

File tree

5 files changed

+194
-48
lines changed

5 files changed

+194
-48
lines changed

examples/selective_build/targets.bzl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,19 @@ def define_common_targets():
6969
visibility = ["//executorch/..."],
7070
)
7171

72+
executorch_generated_lib(
73+
name = "select_ops_in_dict_lib_optimized",
74+
functions_yaml_target = "//executorch/kernels/optimized:optimized.yaml",
75+
kernel_deps = [
76+
"//executorch/kernels/optimized:optimized_operators",
77+
],
78+
deps = [
79+
":select_ops_in_dict",
80+
],
81+
dtype_selective_build = True,
82+
visibility = ["//executorch/..."],
83+
)
84+
7285
# Select all ops from a yaml file
7386
et_operator_library(
7487
name = "select_ops_from_yaml",
@@ -121,6 +134,8 @@ def define_common_targets():
121134
lib.append(":select_ops_in_list_lib")
122135
elif select_ops == "dict":
123136
lib.append(":select_ops_in_dict_lib")
137+
elif select_ops == "dict_optimized":
138+
lib.append(":select_ops_in_dict_lib_optimized")
124139
elif select_ops == "yaml":
125140
lib.append(":select_ops_from_yaml_lib")
126141
elif select_ops == "model":

kernels/optimized/cpu/targets.bzl

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def define_common_targets():
2525
name = "add_sub_impl",
2626
srcs = [],
2727
exported_headers = ["op_add_sub_impl.h"],
28-
visibility = ["//executorch/kernels/optimized/cpu/..."],
28+
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
2929
exported_deps = [
3030
"//executorch/runtime/core:core",
3131
"//executorch/kernels/portable/cpu/util:broadcast_indexes_range",
@@ -36,14 +36,14 @@ def define_common_targets():
3636
name = "fft_utils",
3737
srcs = [],
3838
exported_headers = ["fft_utils.h"],
39-
visibility = ["//executorch/kernels/optimized/cpu/..."],
39+
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
4040
exported_deps = [] if runtime.is_oss else ["fbsource//third-party/pocket_fft:pocketfft"],
4141
)
4242

4343
runtime.cxx_library(
4444
name = "binary_ops",
4545
exported_headers = ["binary_ops.h"],
46-
visibility = ["//executorch/kernels/optimized/cpu/..."],
46+
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
4747
exported_deps = ["//executorch/runtime/core:core"],
4848
)
4949

@@ -58,9 +58,22 @@ def define_common_targets():
5858
name = "moments_utils",
5959
srcs = [],
6060
exported_headers = ["moments_utils.h"],
61-
visibility = ["//executorch/kernels/optimized/..."],
61+
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS",],
6262
exported_deps = [
6363
"//executorch/kernels/optimized:libvec",
6464
"//executorch/kernels/optimized:libutils",
6565
],
6666
)
67+
68+
# Used for dtype selective build. Collect source and header files.
69+
runtime.filegroup(
70+
name = "optimized_source_files",
71+
srcs = native.glob(["*.cpp"]),
72+
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
73+
)
74+
75+
runtime.filegroup(
76+
name = "optimized_header_files",
77+
srcs = native.glob(["*.h"]),
78+
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
79+
)

runtime/core/portable_type/c10/c10/targets.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def define_common_targets():
5353
runtime.cxx_library(
5454
name = "aten_headers_for_executorch",
5555
srcs = [],
56-
visibility = ["//executorch/kernels/optimized/..."],
56+
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS"],
5757
exported_deps = select({
5858
"DEFAULT": [],
5959
"ovr_config//cpu:arm64": [

shim_et/xplat/executorch/codegen/codegen.bzl

Lines changed: 153 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_default_executorch_platforms", "is_xplat", "runtime", "struct_to_json")
22
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
33
load("@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", "portable_header_list", "portable_source_list")
4+
load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "optimized_header_list", "optimized_source_list")
5+
load(
6+
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
7+
"get_vec_deps",
8+
"get_vec_preprocessor_flags",
9+
)
410

511
# Headers that declare the function signatures of the C++ functions that
612
# map to entries in functions.yaml and custom_ops.yaml.
@@ -384,52 +390,60 @@ def exir_custom_ops_aot_lib(
384390
force_static = False,
385391
)
386392

387-
# Used for dtype selective build. Genrules to copy source and header files.
388-
def portable_outs(target_name, file_list):
389-
outs = {}
390-
for file in file_list:
391-
outs[file] = ["{}/{}".format(target_name, file)]
392-
return outs
393-
394-
def copy_portable_source_files(name):
395-
target_name = "portable_source_files"
393+
def copy_files(genrule_name, target, file_list):
394+
"""
395+
Copy files from `target` to current directory.
396+
genrule_name: name of this copy genrule.
397+
target: a runtime.filegroup that globs together files.
398+
eg. //executorch/kernels/portable/cpu:portable_source_files.
399+
file_list: list of filenames, used to generate the outfiles.
400+
eg. //executorch/kernels/portable/cpu:portable_source_list.
401+
"""
402+
target_name = target.split(":")[1]
396403
runtime.genrule(
397-
name = name,
398-
cmd = "cp -f -r $(location //executorch/kernels/portable/cpu:{}) $OUT/".format(target_name),
399-
outs = portable_outs(target_name, portable_source_list()),
404+
name = genrule_name,
405+
cmd = "cp -f -r $(location {}) $OUT/".format(target),
406+
outs = {file: ["{}/{}".format(target_name, file)] for file in file_list},
400407
default_outs = ["."],
401408
)
402409

403-
def copy_portable_header_files(name):
404-
target_name = "portable_header_files"
405-
runtime.genrule(
410+
def build_portable_header_lib(name, oplist_header_name, feature = None):
411+
"""Build the portable headers into a header-only library.
412+
Ensures that includes work across portable and optimized libs.
413+
#include "executorch/kernels/portable/cpu/<header.h>"
414+
"""
415+
# Copy portable header files.
416+
portable_header_files = {}
417+
genrule_name = name + "_copy_portable_header"
418+
copy_files(genrule_name, "//executorch/kernels/portable/cpu:portable_header_files", portable_header_list())
419+
for header in portable_header_list():
420+
portable_header_files[header] = ":{}[{}]".format(genrule_name, header)
421+
422+
# Include dtype header.
423+
portable_header_files["selected_op_variants.h"] = ":{}[selected_op_variants]".format(oplist_header_name)
424+
425+
# Build portable headers lib.
426+
runtime.cxx_library(
406427
name = name,
407-
cmd = "cp -f -r $(location //executorch/kernels/portable/cpu:{}) $OUT/".format(target_name),
408-
outs = portable_outs(target_name, portable_header_list()),
409-
default_outs = ["."],
428+
srcs = [],
429+
exported_headers = portable_header_files,
430+
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
431+
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
432+
header_namespace = "executorch/kernels/portable/cpu",
433+
feature = feature,
410434
)
411435

412-
def build_portable_lib(name, oplist_header_name, feature = None, expose_operator_symbols = False):
436+
def build_portable_lib(name, oplist_header_name, portable_header_lib, feature = None, expose_operator_symbols = False):
413437
"""Build portable lib from source. We build from source so that the generated header file,
414438
selected_op_variants.h, can be used to selectively build the lib for different dtypes.
415439
"""
416440

417441
# Copy portable cpp files.
418442
portable_source_files = []
419-
copy_portable_source_files_genrule = name + "_copy_portable_source"
420-
copy_portable_source_files(copy_portable_source_files_genrule)
443+
genrule_name = name + "_copy_portable_source"
444+
copy_files(genrule_name, "//executorch/kernels/portable/cpu:portable_source_files", portable_source_list())
421445
for op in portable_source_list():
422-
portable_source_files.append(":{}[{}]".format(copy_portable_source_files_genrule, op))
423-
424-
# Copy portable header files.
425-
portable_header_files = {}
426-
copy_portable_header_files_genrule = name + "_copy_portable_header"
427-
copy_portable_header_files(copy_portable_header_files_genrule)
428-
for header in portable_header_list():
429-
portable_header_files[header] = ":{}[{}]".format(copy_portable_header_files_genrule, header)
430-
431-
# Include dtype header.
432-
portable_header_files["selected_op_variants.h"] = ":{}[selected_op_variants]".format(oplist_header_name)
446+
portable_source_files.append(":{}[{}]".format(genrule_name, op))
433447

434448
# For shared library build, we don't want to expose symbols of
435449
# kernel implementation (ex torch::executor::native::tanh_out)
@@ -449,9 +463,8 @@ def build_portable_lib(name, oplist_header_name, feature = None, expose_operator
449463
runtime.cxx_library(
450464
name = name,
451465
srcs = portable_source_files,
452-
exported_headers = portable_header_files,
453466
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
454-
deps = ["//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/kernels/portable/cpu/util:all_deps"],
467+
deps = ["//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/kernels/portable/cpu/util:all_deps"] + [":" + portable_header_lib],
455468
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
456469
header_namespace = "executorch/kernels/portable/cpu",
457470
compiler_flags = compiler_flags,
@@ -467,6 +480,88 @@ def build_portable_lib(name, oplist_header_name, feature = None, expose_operator
467480
feature = feature,
468481
)
469482

483+
def build_optimized_lib(name, oplist_header_name, portable_header_lib, feature = None, expose_operator_symbols = False):
484+
"""Build optimized lib from source. We build from source so that the generated header file,
485+
selected_op_variants.h, can be used to selectively build the lib for different dtypes.
486+
"""
487+
488+
# Copy optimized cpp files.
489+
optimized_source_files = []
490+
source_genrule = name + "_copy_optimized_source"
491+
copy_files(source_genrule, "//executorch/kernels/optimized/cpu:optimized_source_files", optimized_source_list())
492+
for op in optimized_source_list():
493+
optimized_source_files.append(":{}[{}]".format(source_genrule, op))
494+
495+
# Copy optimized header files.
496+
optimized_header_files = {}
497+
header_genrule = name + "_copy_optimized_header"
498+
copy_files(header_genrule, "//executorch/kernels/optimized/cpu:optimized_header_files", optimized_header_list())
499+
for header in optimized_header_list():
500+
optimized_header_files[header] = ":{}[{}]".format(header_genrule, header)
501+
502+
# For shared library build, we don't want to expose symbols of
503+
# kernel implementation (ex torch::executor::native::tanh_out)
504+
# to library users. They should use kernels through registry only.
505+
# With visibility=hidden, linker won't expose kernel impl symbols
506+
# so it can prune unregistered kernels.
507+
# Currently fbcode links all dependent libraries through shared
508+
# library, and it blocks users like unit tests to use kernel
509+
# implementation directly. So we enable this for xplat only.
510+
compiler_flags = ["-Wno-missing-prototypes", "-Wno-pass-failed","-Wno-global-constructors","-Wno-shadow",]
511+
if not expose_operator_symbols:
512+
# Removing '-fvisibility=hidden' exposes operator symbols.
513+
# This allows operators to be called outside of the kernel registry.
514+
compiler_flags += ["-fvisibility=hidden"]
515+
516+
# Set up dependencies.
517+
optimized_lib_deps = [
518+
"//executorch/kernels/optimized/cpu:add_sub_impl",
519+
"//executorch/kernels/optimized/cpu:binary_ops",
520+
"//executorch/kernels/optimized/cpu:fft_utils",
521+
"//executorch/kernels/optimized/cpu:moments_utils",
522+
"//executorch/kernels/optimized:libblas",
523+
"//executorch/kernels/optimized:libutils",
524+
"//executorch/kernels/optimized:libvec",
525+
"//executorch/kernels/portable/cpu/pattern:all_deps",
526+
"//executorch/kernels/portable/cpu/util:all_deps",
527+
"//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
528+
"//executorch/runtime/kernel:kernel_includes",
529+
":" + portable_header_lib,
530+
] + get_vec_deps()
531+
532+
# Build optimized lib.
533+
runtime.cxx_library(
534+
name = name,
535+
srcs = optimized_source_files,
536+
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
537+
deps = optimized_lib_deps,
538+
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
539+
header_namespace = "executorch/kernels/optimized/cpu",
540+
compiler_flags = compiler_flags,
541+
preprocessor_flags = get_vec_preprocessor_flags(),
542+
# sleef needs to be added as a direct dependency of the operator target when building for Android,
543+
# or a linker error may occur. Not sure why this happens; it seems that fbandroid_platform_deps of
544+
# dependencies are not transitive
545+
fbandroid_platform_deps = [
546+
(
547+
"^android-arm64.*$",
548+
[
549+
"fbsource//third-party/sleef:sleef_arm",
550+
],
551+
),
552+
],
553+
# WARNING: using a deprecated API to avoid being built into a shared
554+
# library. In the case of dynamically loading so library we don't want
555+
# it to depend on other so libraries because that way we have to
556+
# specify library directory path.
557+
force_static = True,
558+
# link_whole is necessary because the operators register themselves
559+
# via static initializers that run at program startup.
560+
# @lint-ignore BUCKLINT link_whole
561+
link_whole = True,
562+
feature = feature,
563+
)
564+
470565
def executorch_generated_lib(
471566
name,
472567
functions_yaml_target = None,
@@ -629,14 +724,29 @@ def executorch_generated_lib(
629724
)
630725

631726
portable_lib = []
632-
if dtype_selective_build and is_xplat() and "//executorch/kernels/portable:operators" in kernel_deps:
633-
# Remove portable from kernel_deps as we're building it from source.
634-
kernel_deps.remove("//executorch/kernels/portable:operators")
635-
636-
# Build portable lib.
637-
portable_lib_name = name + "_portable_lib"
638-
build_portable_lib(portable_lib_name, oplist_header_name, feature, expose_operator_symbols)
639-
portable_lib = [":{}".format(portable_lib_name)]
727+
optimized_lib = []
728+
if dtype_selective_build and is_xplat():
729+
# Build portable headers lib. Used for portable and optimized kernel libraries.
730+
portable_header_lib = name + "_portable_header_lib"
731+
build_portable_header_lib(portable_header_lib, oplist_header_name, feature)
732+
733+
if "//executorch/kernels/portable:operators" in kernel_deps:
734+
# Remove portable from kernel_deps as we're building it from source.
735+
kernel_deps.remove("//executorch/kernels/portable:operators")
736+
737+
# Build portable lib.
738+
portable_lib_name = name + "_portable_lib"
739+
build_portable_lib(portable_lib_name, oplist_header_name, portable_header_lib, feature, expose_operator_symbols)
740+
portable_lib = [":{}".format(portable_lib_name)]
741+
742+
if "//executorch/kernels/optimized:optimized_operators" in kernel_deps:
743+
# Remove optimized from kernel_deps as we're building it from source.
744+
kernel_deps.remove("//executorch/kernels/optimized:optimized_operators")
745+
746+
# Build optimized lib.
747+
optimized_lib_name = name + "_optimized_lib"
748+
build_optimized_lib(optimized_lib_name, oplist_header_name, portable_header_lib, feature, expose_operator_symbols)
749+
optimized_lib = [":{}".format(optimized_lib_name)]
640750

641751
# Exports headers that declare the function signatures of the C++ functions
642752
# that map to entries in `functions.yaml` and `custom_ops.yaml`.
@@ -690,7 +800,7 @@ def executorch_generated_lib(
690800
"//executorch/kernels/prim_ops:prim_ops_registry" + aten_suffix,
691801
"//executorch/runtime/core:evalue" + aten_suffix,
692802
"//executorch/codegen:macros",
693-
] + deps + kernel_deps + portable_lib,
803+
] + deps + kernel_deps + portable_lib + optimized_lib,
694804
exported_deps = [
695805
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
696806
"//executorch/runtime/kernel:kernel_runtime_context" + aten_suffix,

shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,11 @@ OPTIMIZED_ATEN_OPS = (
265265
],
266266
),
267267
)
268+
269+
def optimized_source_list():
270+
"""All the source file names from //executorch/kernels/optimized/cpu"""
271+
return [op["name"] + ".cpp" for op in OPTIMIZED_ATEN_OPS]
272+
273+
def optimized_header_list():
274+
"""All the header file names from //executorch/kernels/optimized/cpu"""
275+
return ["binary_ops.h", "fft_utils.h", "moments_utils.h", "op_add_sub_impl.h",]

0 commit comments

Comments
 (0)