Skip to content

Commit 7e1dd6d

Browse files
authored
Merge pull request #21849 from JuliaLang/yyc/codegen/clone
Implement function multi versioning in sysimg
2 parents 894ab2b + ded8d46 commit 7e1dd6d

30 files changed

+5003
-519
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ before_install:
101101
export JULIA_CPU_CORES=2;
102102
export JULIA_TEST_MAXRSS_MB=600;
103103
TESTSTORUN="all --skip linalg/triangular subarray"; fi # TODO: re enable these if possible without timing out
104+
- echo "override JULIA_CPU_TARGET=generic;native" >> Make.user
104105
- git clone -q git://git.kitenet.net/moreutils
105106
script:
106107
- echo BUILDOPTS=$BUILDOPTS

Make.inc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,6 @@ HAVE_SSP := 0
8181
WITH_GC_VERIFY := 0
8282
WITH_GC_DEBUG_ENV := 0
8383

84-
# When set, give julia binaries CPUID specific names. This is useful in cluster environments
85-
# with heterogeneous architectures. N.B.: will not be automatically rebuilt for all
86-
# architectures if julia is updated.
87-
CPUID_SPECIFIC_BINARIES ?= 0
88-
8984
# Prevent picking up $ARCH from the environment variables
9085
ARCH:=
9186

Makefile

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -102,17 +102,11 @@ julia-ui-release julia-ui-debug : julia-ui-% : julia-src-%
102102
julia-inference : julia-base julia-ui-$(JULIA_BUILD_MODE) $(build_prefix)/.examples
103103
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/inference.ji JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
104104

105-
ifneq ($(CPUID_SPECIFIC_BINARIES), 0)
106-
CPUID_TAG = _$(call exec,$(JULIA_EXECUTABLE) --cpuid)
107-
else
108-
CPUID_TAG =
109-
endif
110-
111105
julia-sysimg-release : julia-inference julia-ui-release
112-
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys$(CPUID_TAG).$(SHLIB_EXT) JULIA_BUILD_MODE=release
106+
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys.$(SHLIB_EXT) JULIA_BUILD_MODE=release
113107

114108
julia-sysimg-debug : julia-inference julia-ui-debug
115-
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys-debug$(CPUID_TAG).$(SHLIB_EXT) JULIA_BUILD_MODE=debug
109+
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys-debug.$(SHLIB_EXT) JULIA_BUILD_MODE=debug
116110

117111
julia-debug julia-release : julia-% : julia-ui-% julia-sysimg-% julia-symlink julia-libccalltest
118112

@@ -229,13 +223,8 @@ $$(build_private_libdir)/sys$1.o: $$(build_private_libdir)/inference.ji $$(JULIA
229223
fi )
230224
.SECONDARY: $(build_private_libdir)/sys$1.o
231225
endef
232-
ifneq ($(CPUID_SPECIFIC_BINARIES),0)
233-
$(eval $(call sysimg_builder,_%,-O3,$(JULIA_EXECUTABLE_release)))
234-
$(eval $(call sysimg_builder,-debug_%,-O0,$(JULIA_EXECUTABLE_debug)))
235-
else
236226
$(eval $(call sysimg_builder,,-O3,$(JULIA_EXECUTABLE_release)))
237227
$(eval $(call sysimg_builder,-debug,-O0,$(JULIA_EXECUTABLE_debug)))
238-
endif
239228

240229
$(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir)
241230
@$(call PRINT_CC, $(HOSTCC) -o $(build_depsbindir)/stringreplace $(JULIAHOME)/contrib/stringreplace.c)

base/pkg/pkg.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,6 @@ init(meta::AbstractString=DEFAULT_META, branch::AbstractString=META_BRANCH) = Di
8686

8787
function __init__()
8888
vers = "v$(VERSION.major).$(VERSION.minor)"
89-
vers = ccall(:jl_uses_cpuid_tag, Cint, ()) == 0 ? vers :
90-
joinpath(vers,hex(ccall(:jl_cpuid_tag, UInt64, ()), 2*sizeof(UInt64)))
9189
unshift!(Base.LOAD_CACHE_PATH, abspath(Dir._pkgroot(), "lib", vers))
9290
end
9391

contrib/windows/appveyor_build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ else
5353
echo 'LIBBLAS = -L$(JULIAHOME)/usr/bin -lopenblas' >> Make.user
5454
echo 'LIBBLASNAME = libopenblas' >> Make.user
5555
fi
56+
echo "override JULIA_CPU_TARGET=generic;native" >> Make.user
5657

5758
# Set XC_HOST if in Cygwin or Linux
5859
case $(uname) in

doc/src/devdocs/sysimg.md

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,68 @@ and `force` set to `true`, one would execute:
3838
```
3939
julia build_sysimg.jl /tmp/sys core2 ~/userimg.jl --force
4040
```
41+
42+
## System image optimized for multiple microarchitectures
43+
44+
The system image can be compiled simultaneously for multiple CPU microarchitectures
45+
under the same instruction set architecture (ISA). Multiple versions of the same function
46+
may be created with minimum dispatch point inserted into shared functions
47+
in order to take advantage of different ISA extensions or other microarchitecture features.
48+
The version that offers the best performance will be selected automatically at runtime
49+
based on available features.
50+
51+
### Specifying multiple system image targets
52+
53+
Multi-microarch system image can be enabled by passing multiple targets
54+
during system image compilation. This can be done either with the `JULIA_CPU_TARGET` make option
55+
or with the `-C` command line option when running the compilation command manually.
56+
Multiple targets are separated by `;` in the option.
57+
The syntax for each target is a CPU name followed by multiple features separated by `,`.
58+
All features supported by LLVM is supported and a feature can be disabled with a `-` prefix.
59+
(`+` prefix is also allowed and ignored to be consistent with LLVM syntax).
60+
Additionally, two special features are supported to control the function cloning behavior.
61+
62+
1. `clone_all`
63+
64+
By default, only functions that are the most likely to benefit from
65+
the microarchitecture features will be cloned.
66+
When `clone_all` is specified for a target, however,
67+
**all** functions in the system image will be cloned for the target.
68+
The negative form `-clone_all` can be used to prevent the built-in
69+
heuristic from cloning all functions.
70+
71+
2. `base(<n>)`
72+
73+
Where `<n>` is a placeholder for a non-negative number (e.g. `base(0)`, `base(1)`).
74+
By default, a partially cloned (i.e. not `clone_all`) target will use functions
75+
from the default target (first one specified) if a function is not cloned.
76+
This behavior can be changed by specifying a different base with the `base(<n>)` option.
77+
The `n`th target (0-based) will be used as the base target instead of the default (`0`th) one.
78+
The base target has to be either `0` or another `clone_all` target.
79+
Specifying a non default `clone_all` target as the base target will cause an error.
80+
81+
### Implementation overview
82+
83+
This is a brief overview of different part involved in the implementation.
84+
See code comments for each components for more implementation details.
85+
86+
1. System image compilation
87+
88+
The parsing and cloning decision are done in `src/processor*`.
89+
We currently support cloning of function based on the present of loops, simd instructions,
90+
or other math operations (e.g. fastmath, fma, muladd).
91+
This information is passed on to `src/llvm-multiversioning.cpp` which does the actual cloning.
92+
In addition to doing the cloning and insert dispatch slots
93+
(see comments in `MultiVersioning::runOnModule` for how this is done),
94+
the pass also generates metadata so that the runtime can load and initialize the
95+
system image correctly.
96+
A detail description of the metadata is available in `src/processor.h`.
97+
98+
2. System image loading
99+
100+
The loading and initialization of the system image is done in `src/processor*` by
101+
parsing the metadata saved during system image generation.
102+
Host feature detection and selection decision are done in `src/processor_*.cpp`
103+
depending on the ISA. The target selection will prefer exact CPU name match,
104+
larger vector register size, and larget number of features.
105+
An overview of this process is in `src/processor.cpp`.

src/Makefile

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ SRCS := \
4141
simplevector APInt-C runtime_intrinsics runtime_ccall precompile \
4242
threadgroup threading stackwalk gc gc-debug gc-pages method \
4343
jlapi signal-handling safepoint jloptions timing subtype rtutils \
44-
crc32c
44+
crc32c processor
4545

4646
ifeq ($(USEMSVC), 1)
4747
SRCS += getopt
@@ -52,7 +52,7 @@ LLVMLINK :=
5252
ifeq ($(JULIACODEGEN),LLVM)
5353
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
5454
llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \
55-
llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
55+
llvm-propagate-addrspaces llvm-multiversioning llvm-alloc-opt cgmemmgr
5656
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
5757
LLVM_LIBS := all
5858
ifeq ($(USE_POLLY),1)
@@ -109,10 +109,6 @@ SHIPFLAGS += $(FLAGS)
109109
SHIPFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\""
110110
DEBUGFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\""
111111

112-
ifneq ($(CPUID_SPECIFIC_BINARIES), 0)
113-
override CPPFLAGS += "-DCPUID_SPECIFIC_BINARIES=1"
114-
endif
115-
116112
FLISP_EXECUTABLE_debug := $(BUILDDIR)/flisp/flisp-debug
117113
FLISP_EXECUTABLE_release := $(BUILDDIR)/flisp/flisp
118114
ifeq ($(OS),WINNT)
@@ -186,12 +182,15 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
186182
# additional dependency links
187183
$(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
188184
$(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
189-
intrinsics.cpp jitlayers.h intrinsics.h debuginfo.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp)
185+
intrinsics.cpp jitlayers.h intrinsics.h debuginfo.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h)
186+
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
190187
$(BUILDDIR)/anticodegen.o $(BUILDDIR)/anticodegen.dbg.obj: $(SRCDIR)/intrinsics.h
191-
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(SRCDIR)/debuginfo.h
192-
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h
188+
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: \
189+
$(addprefix $(SRCDIR)/,debuginfo.h processor.h)
190+
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
193191
$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h
194192
$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/table.c
193+
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h
195194
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h
196195
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
197196
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h

src/anticodegen.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int no
3636
return 0;
3737
}
3838

39-
void jl_register_fptrs(uint64_t sysimage_base, const char *base, const int32_t *offsets,
39+
void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
4040
jl_method_instance_t **linfos, size_t n)
4141
{
42-
(void)sysimage_base; (void)base; (void)offsets; (void)linfos; (void)n;
42+
(void)sysimage_base; (void)fptrs; (void)linfos; (void)n;
4343
}
4444

4545
void jl_compile_linfo(jl_method_instance_t *li) { }

0 commit comments

Comments
 (0)