Skip to content

Commit

Permalink
build: support app version script
Browse files Browse the repository at this point in the history
This patch introduces another new build mechanism that allows creating
custom kernel exporting only symbols required by specific application.
Such kernel benefits from smaller size and better security as all unneeded
code is removed. This patch addresses remaining part of the modularization/librarization
functionality as explained by the issue #1110 and this part of the roadmap -
https://github.com/cloudius-systems/osv/wiki/Roadmap#modularizationlibrarization.
This idea was also mentioned in the P99 OSv presentation - see slide 12.

In essence, this patch adds two new scripts that analyse the build
manifest, detect ELF files and identify symbols required from OSv kernel
and finally produce an application specific version script under
build/last/app_version_script:

- scripts/list_manifest_files.py - reads build/last/usr.manifest and
  produces a list of file paths on host filesystem
- scripts/generate_app_version_script.sh - iterates over manifest files
  produced by list_manifest_files.py, identifies undefined symbols in the
  ELF files using objdump that are also exported by OSv kernel and
  finally generates build/last/app_version_script

This patch also makes some modest changes to the main makefile to
support new parameter - conf_version_script - intended to point to a
custom version script. Please note that this new functionality only
works when building kernel with most symbols hidden
(conf_hide_symbols=1).

To take advantage of this new feature one would follow these steps:
1. Build image for given application.
2. Run scripts/generate_app_version_script.sh to produce
app_version_script.
3. Re-build the image with kernel exporting only symbols needed by an
app like so:

./scripts/build fs=rofs conf_hide_symbols=1 image=golang-pie-example \
 conf_version_script=build/last/app_version_script

The version script generated for the golang ELF list only 30 symbols.

My experiments show that for many apps this can reduce kernel size by
close to 0.5MB. For example the size of kernel taylored to the
golang app above is 3196K vs 3632K of the generic ones. Obviously this
feature can be used together with the driver profile to further reduce
kernel size. The kernel produced with the build command below is only 2688K
in size:

./scripts/build fs=rofs conf_hide_symbols=1 image=golang-pie-example \
 drivers_profile=virtio-mmio conf_version_script=build/last/app_version_script

Please note that some application use dlsym() to dynamically resolve
symbols which would be missed by this technique. In such scenarios
such symbols would have to be manually added to app_version_script.

Fixes #1110

Signed-off-by: Waldemar Kozaczuk <jwkozaczuk@gmail.com>
  • Loading branch information
wkozaczuk committed Mar 17, 2022
1 parent 06ad9a2 commit d19ccb1
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 8 deletions.
31 changes: 23 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2036,7 +2036,7 @@ $(out)/dummy-shlib.so: $(out)/dummy-shlib.o
$(call quiet, $(CXX) -nodefaultlibs -shared $(gcc-sysroot) -o $@ $^, LINK $@)

stage1_targets = $(out)/arch/$(arch)/boot.o $(out)/loader.o $(out)/runtime.o $(drivers:%=$(out)/%) $(objects:%=$(out)/%) $(out)/dummy-shlib.so
stage1: $(stage1_targets) links $(out)/version_script
stage1: $(stage1_targets) links $(out)/default_version_script
.PHONY: stage1

loader_options_dep = $(out)/arch/$(arch)/loader_options.ld
Expand All @@ -2047,20 +2047,35 @@ $(loader_options_dep): stage1
fi

ifeq ($(conf_hide_symbols),1)
version_script_file:=$(out)/version_script
#Detect which version script to be used and copy to $(out)/version_script
#so that loader.elf/kernel.elf is rebuilt accordingly if version script has changed
ifdef conf_version_script
ifeq (,$(wildcard $(conf_version_script)))
$(error Missing version script: $(conf_version_script))
endif
ifneq ($(shell cmp $(out)/version_script $(conf_version_script)),)
$(shell cp $(conf_version_script) $(out)/version_script)
endif
else
ifneq ($(shell cmp $(out)/version_script $(out)/default_version_script),)
$(shell cp $(out)/default_version_script $(out)/version_script)
endif
endif
linker_archives_options = --no-whole-archive $(libstdc++.a) $(libgcc.a) $(libgcc_eh.a) $(boost-libs) \
--exclude-libs libstdc++.a --gc-sections --version-script=$(out)/version_script
--exclude-libs libstdc++.a --gc-sections
else
linker_archives_options = --whole-archive $(libstdc++.a) $(libgcc_eh.a) $(boost-libs) --no-whole-archive $(libgcc.a)
endif

$(out)/version_script: exported_symbols/*.symbols exported_symbols/$(arch)/*.symbols
$(call quiet, scripts/generate_version_script.sh $(out)/version_script, GEN version_script)
$(out)/default_version_script: exported_symbols/*.symbols exported_symbols/$(arch)/*.symbols
$(call quiet, scripts/generate_version_script.sh $(out)/default_version_script, GEN default_version_script)

$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(loader_options_dep)
$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(loader_options_dep) $(version_script_file)
$(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \
--defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \
$(^:%.ld=-T %.ld) \
$(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
$(linker_archives_options) $(conf_linker_extra_options), \
LINK loader.elf)
@# Build libosv.so matching this loader.elf. This is not a separate
Expand All @@ -2069,11 +2084,11 @@ $(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(lo
@scripts/libosv.py $(out)/osv.syms $(out)/libosv.ld `scripts/osv-version.sh` | $(CC) -c -o $(out)/osv.o -x assembler -
$(call quiet, $(CC) $(out)/osv.o -nostdlib -shared -o $(out)/libosv.so -T $(out)/libosv.ld, LIBOSV.SO)

$(out)/kernel.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/empty_bootfs.o $(loader_options_dep)
$(out)/kernel.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/empty_bootfs.o $(loader_options_dep) $(version_script_file)
$(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \
--defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \
$(^:%.ld=-T %.ld) \
$(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
$(linker_archives_options) $(conf_linker_extra_options), \
LINK kernel.elf)
$(call quiet, $(STRIP) $(out)/kernel.elf -o $(out)/kernel-stripped.elf, STRIP kernel.elf -> kernel-stripped.elf )
Expand Down
84 changes: 84 additions & 0 deletions scripts/generate_app_version_script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

if [[ "$1" == "--help" || "$1" == "-h" ]]; then
cat <<-EOF
Produce version script file under build/last/app_version_script intended
to build custom kernel exporting only symbols listed in this file.
The script reads default user manifest file - build/last/usr.manifest
to identify all ELF files - executables and shared libraries - and
extract names of all symbols required to be exported by OSv kernel.
You can override location of the source manifest and pass its path
as 1st argument.
Usage: ${0} [<manifest_file_path>]
NOTE: Given that some executables and libraries may dynamically resolve
symbols using dlsym(), this script would miss to identify those. In this
case one would have to manually add those symbols to build/last/app_version_script.
EOF
exit 0
fi

MACHINE=$(uname -m)
if [ "${MACHINE}" == "x86_64" ]; then
ARCH="x64"
else
ARCH="aarch64"
fi

VERSION_SCRIPT_START=$(cat <<"EOF"
{
global:
EOF
)

VERSION_SCRIPT_END=$(cat <<"EOF"
local:
*;
};
EOF
)

BUILD_DIR=$(dirname $0)/../build/last
VERSION_SCRIPT_FILE=$(dirname $0)/../build/last/app_version_script

ALL_SYMBOLS_FILE=$BUILD_DIR/all.symbols
if [[ ! -f $ALL_SYMBOLS_FILE ]]; then
echo "Could not find $ALL_SYMBOLS_FILE. Please run build first!"
exit 1
fi

USR_MANIFEST=$1
if [[ "$USR_MANIFEST" == "" ]]; then
USR_MANIFEST=$BUILD_DIR/usr.manifest
fi
if [[ ! -f $USR_MANIFEST ]]; then
echo "Could not find $USR_MANIFEST. Please run build first!"
exit 1
fi

MANIFEST_FILES=$BUILD_DIR/usr.manifest.files
echo "Extracting list of files on host from $USR_MANIFEST"
scripts/list_manifest_files.py > $MANIFEST_FILES

extract_symbols_from_elf()
{
local ELF_PATH=$1
echo "/*------- $ELF_PATH */"
objdump -wT ${ELF_PATH} | grep UND | cut -c 62- | \
sort -d | uniq | comm - ${ALL_SYMBOLS_FILE} -12 | \
awk '// { printf(" %s;\n", $0) }' | tee /tmp/generate_app_version_script_symbols
if [[ $(grep dlsym /tmp/generate_app_version_script_symbols) != "" ]]; then
echo "WARNING: the $ELF_PATH may use dlsym() to dynamically reference symbols!" 1>&2
fi
}

echo "Writing to $VERSION_SCRIPT_FILE ..."
echo "$VERSION_SCRIPT_START" > $VERSION_SCRIPT_FILE

cat $MANIFEST_FILES | xargs file | grep "ELF 64-bit" | cut --delimiter=: -f 1 | \
while read file; do extract_symbols_from_elf "$file"; done >> $VERSION_SCRIPT_FILE

echo "$VERSION_SCRIPT_END" >> $VERSION_SCRIPT_FILE
3 changes: 3 additions & 0 deletions scripts/generate_version_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ VERSION_SCRIPT_END=$(cat <<"EOF"
EOF
)

ALL_SYMBOLS_FILE=$(dirname $VERSION_SCRIPT_FILE)/all.symbols
cat exported_symbols/*.symbols exported_symbols/$ARCH/*.symbols | sort -d | uniq > $ALL_SYMBOLS_FILE

echo "$VERSION_SCRIPT_START" > $VERSION_SCRIPT_FILE

#Firstly output list of symbols from files common to all architectures
Expand Down
50 changes: 50 additions & 0 deletions scripts/list_manifest_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/python3

import optparse, os, sys, subprocess
from manifest_common import add_var, expand, unsymlink, read_manifest, defines

def list_files(manifest,manifest_dir):
manifest = [(x, y % defines) for (x, y) in manifest]
files = list(expand(manifest))
files = [(x, unsymlink(y)) for (x, y) in files]

for name, hostname in files:
if not hostname.startswith("->"):
if os.path.islink(hostname):
link = os.readlink(hostname)
print(link)
elif not os.path.isdir(hostname):
if not os.path.isabs(hostname):
hostname = os.path.join(manifest_dir,hostname)
print(hostname)

def main():
make_option = optparse.make_option

opt = optparse.OptionParser(option_list=[
make_option('-m',
dest='manifest',
help='read manifest from FILE',
metavar='FILE'),
make_option('-D',
type='string',
help='define VAR=DATA',
metavar='VAR=DATA',
action='callback',
callback=add_var)
])

(options, args) = opt.parse_args()

if not 'libgcc_s_dir' in defines:
libgcc_s_path = subprocess.check_output(['gcc', '-print-file-name=libgcc_s.so.1']).decode('utf-8')
defines['libgcc_s_dir'] = os.path.dirname(libgcc_s_path)

manifest_path = options.manifest or 'build/last/usr.manifest'
manifest_dir = os.path.abspath(os.path.dirname(manifest_path))

manifest = read_manifest(manifest_path)
list_files(manifest,manifest_dir)

if __name__ == "__main__":
main()

0 comments on commit d19ccb1

Please sign in to comment.