From 7882b16e8efd12d2ea4632f13f5d7e6d416e6c26 Mon Sep 17 00:00:00 2001 From: Vitaly Chernooky Date: Mon, 5 Dec 2016 13:44:36 +0100 Subject: [PATCH] Initial Import Signed-off-by: Vitaly Chernooky --- .gitattributes | 4 + .gitignore | 22 + CODING_STYLE.md | 1 + CONTRIBUTING.md | 55 ++ ChangeLog | 4 + LICENSE | 36 ++ Makefile | 140 +++++ Makefile.inc | 291 ++++++++++ README | 17 + TODO.rst | 95 ++++ doc/.gitattributes | 1 + doc/.gitignore | 2 + doc/DESIGN.pdf | Bin 0 -> 48133 bytes doc/DESIGN.rst | 91 ++++ doc/DSGN_beh_act_dia.uml | 19 + doc/DSGN_rst_style.yaml | 631 ++++++++++++++++++++++ doc/DSGN_struct_comp_dia.uml | 43 ++ doc/Makefile | 43 ++ man/.gitignore | 5 + man/Makefile | 112 ++++ man/README | 11 + man/default.man | 59 +++ man/generated/strace.ebpf.1 | 291 ++++++++++ man/strace.ebpf.1.md | 273 ++++++++++ src/.gitignore | 8 + src/Makefile | 166 ++++++ src/Makefile.inc | 247 +++++++++ src/common.inc | 183 +++++++ src/compat/bcc/perf_reader.h | 48 ++ src/ebpf/Makefile | 60 +++ src/ebpf/README | 1 + src/ebpf/trace.h | 103 ++++ src/ebpf/trace_file_tmpl.c | 98 ++++ src/ebpf/trace_fileat_tmpl.c | 98 ++++ src/ebpf/trace_head.c | 54 ++ src/ebpf/trace_kern_tmpl.c | 98 ++++ src/ebpf/trace_libc_tmpl.c | 96 ++++ src/ebpf/trace_tp_all.c | 88 +++ src/file_sc_bench.c | 119 +++++ src/libstrace/Makefile | 64 +++ src/libstrace/README | 1 + src/libstrace/attach_probes.c | 451 ++++++++++++++++ src/libstrace/attach_probes.h | 46 ++ src/libstrace/bpf.c | 408 ++++++++++++++ src/libstrace/bpf.h | 84 +++ src/libstrace/ebpf_syscalls.c | 455 ++++++++++++++++ src/libstrace/ebpf_syscalls.h | 70 +++ src/libstrace/generate_ebpf.c | 334 ++++++++++++ src/libstrace/generate_ebpf.h | 72 +++ src/libstrace/main.h | 79 +++ src/libstrace/print_event_cb.c | 506 ++++++++++++++++++ src/libstrace/print_event_cb.h | 54 ++ src/libstrace/utils.c | 365 +++++++++++++ src/libstrace/utils.h | 66 +++ src/main.c | 497 +++++++++++++++++ src/make-redis.sh | 55 ++ src/redis.conf | 943 +++++++++++++++++++++++++++++++++ utils/md2man.sh | 66 +++ 58 files changed, 8329 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 CODING_STYLE.md create mode 100644 CONTRIBUTING.md create mode 100644 ChangeLog create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 Makefile.inc create mode 100644 README create mode 100644 TODO.rst create mode 100644 doc/.gitattributes create mode 100644 doc/.gitignore create mode 100644 doc/DESIGN.pdf create mode 100644 doc/DESIGN.rst create mode 100644 doc/DSGN_beh_act_dia.uml create mode 100644 doc/DSGN_rst_style.yaml create mode 100644 doc/DSGN_struct_comp_dia.uml create mode 100644 doc/Makefile create mode 100644 man/.gitignore create mode 100644 man/Makefile create mode 100644 man/README create mode 100644 man/default.man create mode 100644 man/generated/strace.ebpf.1 create mode 100644 man/strace.ebpf.1.md create mode 100644 src/.gitignore create mode 100644 src/Makefile create mode 100644 src/Makefile.inc create mode 100644 src/common.inc create mode 100644 src/compat/bcc/perf_reader.h create mode 100644 src/ebpf/Makefile create mode 100644 src/ebpf/README create mode 100644 src/ebpf/trace.h create mode 100644 src/ebpf/trace_file_tmpl.c create mode 100644 src/ebpf/trace_fileat_tmpl.c create mode 100644 src/ebpf/trace_head.c create mode 100644 src/ebpf/trace_kern_tmpl.c create mode 100644 src/ebpf/trace_libc_tmpl.c create mode 100644 src/ebpf/trace_tp_all.c create mode 100644 src/file_sc_bench.c create mode 100644 src/libstrace/Makefile create mode 100644 src/libstrace/README create mode 100644 src/libstrace/attach_probes.c create mode 100644 src/libstrace/attach_probes.h create mode 100644 src/libstrace/bpf.c create mode 100644 src/libstrace/bpf.h create mode 100644 src/libstrace/ebpf_syscalls.c create mode 100644 src/libstrace/ebpf_syscalls.h create mode 100644 src/libstrace/generate_ebpf.c create mode 100644 src/libstrace/generate_ebpf.h create mode 100644 src/libstrace/main.h create mode 100644 src/libstrace/print_event_cb.c create mode 100644 src/libstrace/print_event_cb.h create mode 100644 src/libstrace/utils.c create mode 100644 src/libstrace/utils.h create mode 100644 src/main.c create mode 100755 src/make-redis.sh create mode 100644 src/redis.conf create mode 100755 utils/md2man.sh diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..3d5c903f4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +* text=auto eol=lf +*.jpg binary +*.png binary +*.gif binary diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..d606775fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +*~ +*.swp +*.o +make.out +core +a.out +.cproject +.project +.settings/ +nbproject/ +rpmbuild/ +dpkgbuild/ +rpm/ +dpkg/ +.deps +.vscode +*.user +~* +*~ +*.db +*.htmp +*.hpptmp diff --git a/CODING_STYLE.md b/CODING_STYLE.md new file mode 100644 index 000000000..17bb57134 --- /dev/null +++ b/CODING_STYLE.md @@ -0,0 +1 @@ +https://www.cis.upenn.edu/~lee/06cse480/data/cstyle.ms.pdf diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..11fbddd30 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,55 @@ +# Contributing to the strace.ebpf + +Here you'll find instructions on how to contribute to the strace.ebpf. + +Your contributions are most welcome! You'll find it is best to begin +with a conversation about your changes, rather than just writing a bunch +of code and contributing it out of the blue. +There are several good ways to suggest new features, offer to add a feature, +or just begin a dialog about the strace.ebpf: + +* Open an issue in GitHub + +**NOTE: If you do decide to implement code changes and contribute them, +please make sure you agree your contribution can be made available +under the [BSD-style License used for the strace.ebpf] + +**NOTE: Submitting your changes also means that you certify the following:** + +``` +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +In case of any doubt, the gatekeeper may ask you to certify the above in writing, +i.e. via email or by including a `Signed-off-by:` line at the bottom +of your commit comments. + +To improve tracking of who is the author of a contribution, we kindly ask you +to use your real name (not an alias) when commiting your changes to the strace.ebpf: +``` +Author: Random J Developer +``` diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 000000000..23f1d4ea0 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,4 @@ +Mon 5 Dec CET 2016 Vitalii Chernookyi + + * Version 0.1 + Extracting from NVML project diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..e1e250bc2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,36 @@ +Copyright 2014-2016, Intel Corporation + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Everything in this source tree is covered by the previous license +with the following exceptions: + + +* utils/cstyle (used only during development) licensed under CDDL. diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..303c4c754 --- /dev/null +++ b/Makefile @@ -0,0 +1,140 @@ +# +# Copyright 2014-2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# Makefile -- top-level Makefile for NVM Library +# +# Use "make" to build the library. +# +# Use "make doc" to build documentation. +# +# Use "make test" to build unit tests. Add "SKIP_SYNC_REMOTES=y" to skip +# or "FORCE_SYNC_REMOTES=y" to force syncing remote nodes if any is defined. +# +# Use "make check" to run unit tests. +# +# Use "make check-remote" to run only remote unit tests. +# +# Use "make clean" to delete all intermediate files (*.o, etc). +# +# Use "make clobber" to delete everything re-buildable (binaries, etc.). +# +# Use "make cstyle" to run cstyle on all C source files +# +# Use "make check-license" to check copyright and license in all source files +# +# Use "make rpm" to build rpm packages +# +# Use "make dpkg" to build dpkg packages +# +# Use "make source DESTDIR=path_to_dir" to copy source files +# from HEAD to 'path_to_dir/nvml' directory. +# +# As root, use "make install" to install the library in the usual +# locations (/usr/local/lib, /usr/local/include, and /usr/local/share/man). +# You can provide custom directory prefix for installation using +# DESTDIR variable e.g.: "make install DESTDIR=/opt" +# You can override the prefix within DESTDIR using prefix variable +# e.g.: "make install prefix=/usr" + +include src/common.inc + +export SRCVERSION = $(shell git describe 2>/dev/null ||\ + cat .version 2>/dev/null ||\ + git log -1 --format=%h 2>/dev/null) + +RPM_BUILDDIR=rpmbuild +DPKG_BUILDDIR=dpkgbuild +EXPERIMENTAL ?= n +BUILD_PACKAGE_CHECK ?= y +rpm : override DESTDIR=$(CURDIR)/$(RPM_BUILDDIR) +dpkg: override DESTDIR=$(CURDIR)/$(DPKG_BUILDDIR) +rpm dpkg: override prefix=/usr + +all: + $(MAKE) -C src $@ + +doc: + $(MAKE) -C doc all + +clean: + $(MAKE) -C src $@ + $(MAKE) -C doc $@ + $(MAKE) -C utils $@ + $(RM) -r $(RPM_BUILDDIR) $(DPKG_BUILDDIR) + +clobber: + $(MAKE) -C src $@ + $(MAKE) -C doc $@ + $(MAKE) -C utils $@ + $(RM) -r $(RPM_BUILDDIR) $(DPKG_BUILDDIR) rpm dpkg + +test check pcheck check-remote: all + $(MAKE) -C src $@ + +cstyle: + $(MAKE) -C src $@ + $(MAKE) -C utils $@ + @echo Checking files for whitespace issues... + @utils/check_whitespace -g + @echo Done. + +format: + $(MAKE) -C src $@ + $(MAKE) -C utils $@ + @echo Done. + +check-license: + $(MAKE) -C utils $@ + @utils/check_license/check-headers.sh + @echo Done. + +source: + $(if $(shell git rev-parse 2>&1), $(error Not a git repository)) + $(if $(shell git status --porcelain), $(error Working directory is dirty: $(shell git status --porcelain))) + $(if $(DESTDIR), , $(error Please provide DESTDIR variable)) + mkdir -p $(DESTDIR)/nvml + echo -n $(SRCVERSION) > $(DESTDIR)/nvml/.version + git archive HEAD | tar -x -C $(DESTDIR)/nvml + +pkg-clean: + $(RM) -r $(DESTDIR) + +rpm dpkg: pkg-clean source + +utils/build-$@.sh $(SRCVERSION) $(DESTDIR)/nvml $(DESTDIR) $(CURDIR)/$@\ + ${EXPERIMENTAL} ${BUILD_PACKAGE_CHECK} $(CURDIR)/src/test/testconfig.sh + +install uninstall: + $(MAKE) -C src $@ + $(MAKE) -C doc $@ + +.PHONY: all clean clobber test check cstyle check-license install uninstall\ + source rpm dpkg pkg-clean pcheck check-remote format doc $(SUBDIRS) diff --git a/Makefile.inc b/Makefile.inc new file mode 100644 index 000000000..17512213c --- /dev/null +++ b/Makefile.inc @@ -0,0 +1,291 @@ +# Copyright 2014-2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# src/Makefile.inc -- Makefile include for all tools +# + +TOP := $(dir $(lastword $(MAKEFILE_LIST))) + +include $(TOP)/src/common.inc + +INSTALL_TARGET ?= y + +INCS += -I. +INCS += -I$(TOP)/src/include +CFLAGS += -std=gnu99 +CFLAGS += -Wall +CFLAGS += -Werror +CFLAGS += -Wmissing-prototypes +CFLAGS += -Wpointer-arith +CFLAGS += -Wunused-macros +CFLAGS += -Wmissing-field-initializers +CFLAGS += -Wsign-conversion +CFLAGS += -Wsign-compare +ifeq ($(call check_Wconversion), y) +CFLAGS += -Wconversion +endif +CFLAGS += -fno-common + +CFLAGS += -DSRCVERSION='"$(SRCVERSION)"' +ifeq ($(call check_flag, -Wunreachable-code-return), y) +CFLAGS += -Wunreachable-code-return +endif +ifeq ($(call check_flag, -Wmissing-variable-declarations), y) +CFLAGS += -Wmissing-variable-declarations +endif + +ifeq ($(DEBUG),1) +CFLAGS += -ggdb $(EXTRA_CFLAGS_DEBUG) +else +CFLAGS += -O2 -D_FORTIFY_SOURCE=2 $(EXTRA_CFLAGS_RELEASE) +endif + +CFLAGS += $(EXTRA_CFLAGS) + +LDFLAGS += -Wl,-z,relro -Wl,--warn-common -Wl,--fatal-warnings $(EXTRA_LDFLAGS) -L$(TOP)/src/nondebug +TARGET_DIR=$(DESTDIR)$(bindir) +BASH_COMP_FILES ?= +BASH_COMP_DESTDIR = $(DESTDIR)$(bashcompdir) + +TARGET_STATIC_NONDEBUG=$(TARGET).static-nondebug +TARGET_STATIC_DEBUG=$(TARGET).static-debug + +LIBSDIR=$(TOP)/src +LIBSDIR_DEBUG=$(LIBSDIR)/debug +LIBSDIR_NONDEBUG=$(LIBSDIR)/nondebug + +ifneq ($(DEBUG),) +LIBSDIR_PRIV=$(LIBSDIR_DEBUG) +else +LIBSDIR_PRIV=$(LIBSDIR_NONDEBUG) +endif + +PMEMLOG_PRIV_OBJ=$(LIBSDIR_PRIV)/libpmemlog/libpmemlog_unscoped.o +PMEMOBJ_PRIV_OBJ=$(LIBSDIR_PRIV)/libpmemobj/libpmemobj_unscoped.o +PMEMBLK_PRIV_OBJ=$(LIBSDIR_PRIV)/libpmemblk/libpmemblk_unscoped.o + +LIBS += -pthread + +ifeq ($(TOOLS_COMMON), y) +LIBPMEMCOMMON=y +endif + +ifeq ($(LIBPMEMCOMMON), y) +DYNAMIC_LIBS += $(LIBSDIR_DEBUG)/libpmemcommon.a +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemcommon.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemcommon.a +CFLAGS += -I$(TOP)/src/common +LIBS += -ldl +endif + +ifeq ($(LIBPMEMPOOL), y) +DYNAMIC_LIBS += -lpmempool +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmempool.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmempool.a +endif + +ifeq ($(LIBPMEMBLK), y) +DYNAMIC_LIBS += -lpmemblk +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemblk.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemblk.a +endif + +ifeq ($(LIBPMEMLOG), y) +DYNAMIC_LIBS += -lpmemlog +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemlog.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemlog.a +endif + +ifeq ($(LIBPMEMOBJ), y) +LIBS += -ldl +DYNAMIC_LIBS += -lpmemobj +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemobj.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemobj.a +endif + +ifeq ($(LIBPMEM),y) +DYNAMIC_LIBS += -lpmem +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmem.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmem.a +endif + +ifeq ($(LIBVMEM),y) +DYNAMIC_LIBS += -lvmem +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libvmem.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libvmem.a +endif + +ifeq ($(TOOLS_COMMON), y) +vpath %.c $(TOP)/src/tools/pmempool + +OBJS += common.o output.o + +CFLAGS += -I$(TOP)/src/common +CFLAGS += -I$(TOP)/src/libpmemlog +CFLAGS += -I$(TOP)/src/libpmemblk +CFLAGS += -I$(TOP)/src/libpmemobj +CFLAGS += -I$(TOP)/src/tools/pmempool +common.o: CFLAGS += -D__USE_UNIX98 + +endif + +ifneq ($(LIBPMEMLOG_PRIV),) +OBJS += pmemlog_priv.o +endif + +ifneq ($(LIBPMEMOBJ_PRIV),) +OBJS += pmemobj_priv.o +endif + +ifneq ($(LIBPMEMBLK_PRIV),) +OBJS += pmemblk_priv.o +endif + +ifneq ($(HEADERS),) +ifneq ($(filter 1 2, $(CSTYLEON)),) +TMP_HEADERS := $(addsuffix tmp, $(HEADERS)) +endif +endif + +MAKEFILE_DEPS=$(TOP)/src/Makefile.inc $(TOP)/src/common.inc + +ifneq ($(TARGET),) +all: $(TARGET) $(TARGET_STATIC_NONDEBUG) $(TARGET_STATIC_DEBUG) +else +all: +endif + +SYNC_FILE=.synced + +ifneq ($(EXTRA_TARGETS),) +EXTRA_TARGETS_CLEAN = $(EXTRA_TARGETS:=-clean) +EXTRA_TARGETS_CLOBBER = $(EXTRA_TARGETS:=-clobber) +endif + +clean: $(EXTRA_TARGETS_CLEAN) + $(RM) $(OBJS) $(CLEAN_FILES) $(SYNC_FILE) $(TMP_HEADERS) + +clobber: clean $(EXTRA_TARGETS_CLOBBER) +ifneq ($(TARGET),) + $(RM) $(TARGET) + $(RM) $(TARGET_STATIC_NONDEBUG) + $(RM) $(TARGET_STATIC_DEBUG) + $(RM) -r .deps +endif + +install: all +ifeq ($(INSTALL_TARGET),y) +ifneq ($(TARGET),) + install -d $(TARGET_DIR) + install -p -m 0755 $(TARGET) $(TARGET_DIR) +endif +ifneq ($(BASH_COMP_FILES),) + install -d $(BASH_COMP_DESTDIR) + install -p -m 0644 $(BASH_COMP_FILES) $(BASH_COMP_DESTDIR) +endif +endif + +uninstall: +ifeq ($(INSTALL_TARGET),y) +ifneq ($(TARGET),) + $(RM) $(TARGET_DIR)/$(TARGET) +endif +ifneq ($(BASH_COMP_FILES),) + $(RM) $(BASH_COMP_DESTDIR)/$(BASH_COMP_FILES) +endif +endif + +%.gz: % + gzip -c ./$< > $@ + +%.txt: % + man ./$< > $@ + +%.html: % + groff -mandoc -Thtml ./$< > $@ + +$(TARGET) $(TARGET_STATIC_DEBUG) $(TARGET_STATIC_NONDEBUG): $(TMP_HEADERS) $(OBJS) $(MAKEFILE_DEPS) + +$(TARGET_STATIC_DEBUG): $(STATIC_DEBUG_LIBS) + $(CC) $(LDFLAGS) -o $@ $(OBJS) $(STATIC_DEBUG_LIBS) $(LIBS) + +$(TARGET_STATIC_NONDEBUG): $(STATIC_NONDEBUG_LIBS) + $(CC) $(LDFLAGS) -o $@ $(OBJS) $(STATIC_NONDEBUG_LIBS) $(LIBS) + +$(TARGET): + $(CC) $(LDFLAGS) -o $@ $(OBJS) $(DYNAMIC_LIBS) $(LIBS) + +$(PMEMLOG_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemlog + +pmemlog_priv.o: $(PMEMLOG_PRIV_OBJ) + $(OBJCOPY) --localize-hidden $(addprefix -G, $(LIBPMEMLOG_PRIV)) $< $@ + +$(PMEMOBJ_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemobj + +pmemobj_priv.o: $(PMEMOBJ_PRIV_OBJ) + $(OBJCOPY) --localize-hidden $(addprefix -G, $(LIBPMEMOBJ_PRIV)) $< $@ + +$(PMEMBLK_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemblk + +pmemblk_priv.o: $(PMEMBLK_PRIV_OBJ) + $(OBJCOPY) --localize-hidden $(addprefix -G, $(LIBPMEMBLK_PRIV)) $< $@ + +objdir=. + +%.o: %.c $(MAKEFILE_DEPS) + $(call check-cstyle, $<) + @mkdir -p .deps + $(CC) -MD $(CFLAGS) $(INCS) -c -o $@ $< + $(create-deps) + +%.htmp: %.h + $(call check-cstyle, $<, $@) + +test check pcheck: all + +TESTCONFIG=$(TOP)/src/test/testconfig.sh +DIR_SYNC=$(TOP)/src/test/.sync-dir + +$(TESTCONFIG): + +sync-remotes: all $(SYNC_FILE) + +$(SYNC_FILE): $(TARGET) $(TESTCONFIG) +ifeq ($(SCP_TO_REMOTE_NODES), y) + cp $(TARGET) $(DIR_SYNC) + @touch $(SYNC_FILE) +endif + +.PHONY: all clean clobber install uninstall test check pcheck + +-include .deps/*.P diff --git a/README b/README new file mode 100644 index 000000000..6fa569f85 --- /dev/null +++ b/README @@ -0,0 +1,17 @@ +This directory contains a tool which traces syscalls in a fast +way using eBPF linux kernel feature. + +** DEPENDENCIES: ** +The strace.ebpf depends on libbcc library: + +$ sudo apt-get install libbcc + +Bcc sources: + +https://github.com/iovisor/bcc + +** WARNING ** + +Some old libbcc packages require manual coping of libbcc.pc from sources to +appropriate place in a system. In case of Ubuntu 16.04 LTS appropriate place +is /usr/lib/x86_64-linux-gnu/pkgconfig/libbcc.pc. diff --git a/TODO.rst b/TODO.rst new file mode 100644 index 000000000..a87478838 --- /dev/null +++ b/TODO.rst @@ -0,0 +1,95 @@ +TODO +##### + +1. Performance improovement +============================ + +Currently we require a bit more than 1000 nsec for tracing single syscall. +It is not bad but there are at least few places which could allow us to +reduce that values may be to 600 nsec. Every syscall itself currently require +a bit more than 100 nsec for entering, and close value for returning. So a bit +more then 200 nsec together. + +1.1 Extra poll() +----------------- + +Currently libbcc do two same poll() syscalls per iter. There are no reason for +it and we should drop it. It will improove our time for about 200 nsec, but it +is a libbcc bug. Back trace for one of that poll() syscalls: + +(gdb) bt +#0 poll () at ../sysdeps/unix/syscall-template.S:84 +#1 0x00007f9c40a07566 in perf_reader_poll () from /usr/lib/x86_64-linux-gnu/libbcc.so.0 +#2 0x0000000000401a7b in kprobe_poll (b=, timeout=) at BPF.c:82 +#3 0x000000000040175d in main (argc=, argv=0x7fffe635c888) at snoop.c:228 + +1.2 Tracepoints support +------------------------ + +Currently kernel provide a way for fast intercepting of all syscalls together. +But we temporarily can't use it because of this bug: + + - https://github.com/iovisor/bcc/issues/748 + +As soon as bug will be fixed we should try it one time more. + +1.3 out buffering +------------------ + +Optimization of this place is critical to achieve maximum possible log +bandwidth. Most likely we should use fd directly. + + +2. Debuging +============ + +2.1 Enable Valgrind +-------------------- + +Currently Valgrind fails with a message like: + +--12470-- WARNING: unhandled amd64-linux syscall: 321 +==12470== at 0x77F7C19: syscall (syscall.S:38) +==12470== by 0x5129133: bpf_create_map (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x5181809: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x51AE4A7: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x51835E6: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x522FE1C: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x53DCE85: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x520B9BD: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x51E0065: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x51751A4: ??? (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x51209B3: ebpf::BPFModule::load_cfile(std::__cxx11::basic_string, std::allocator > const&, bool, char const**, int) (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +==12470== by 0x51268FD: ebpf::BPFModule::load_string(std::__cxx11::basic_string, std::allocator > const&, char const**, int) (in /usr/lib/x86_64-linux-gnu/libbcc.so.0.1.8) +--12470-- You may be able to write your own handler. +--12470-- Read the file README_MISSING_SYSCALL_OR_IOCTL. +--12470-- Nevertheless we consider this a bug. Please report +--12470-- it at http://valgrind.org/support/bug_reports.html. + +3. Extra features +================== + +3.1 Multi-process tracing +-------------------------- + +It is not difficult to attach to few PIDs simultaneously. Maybe we should do +it for parrallel applications like apache, nginx and like. + +3.2 Attaching by name +---------------------- + +It is good to have ability to attach to processes not only by PIDs but also by +names. + +3.3 eBPF sources +----------------- + +It is good to have embedded copies of these files as fallback. + +4. Documentation +================= + +4.1 Doc format +--------------- + +It is good to convert rst to md for consistency diff --git a/doc/.gitattributes b/doc/.gitattributes new file mode 100644 index 000000000..d72fd520b --- /dev/null +++ b/doc/.gitattributes @@ -0,0 +1 @@ +*.pdf binary diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 000000000..016931d98 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,2 @@ +*.rst.build_temp +*.png diff --git a/doc/DESIGN.pdf b/doc/DESIGN.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b92ea873f0a666e8b8ba43607efee82c929c8698 GIT binary patch literal 48133 zcmdSA*%q=)lqGndYvnt94v4de42lEbz#t-`$RLV>GAbabRn^bXeb@K>tj+glc2@Q3 zTD|HvZ$va8h!cks=j^=$*e#h(zl;3qzX1Q||M9>7Uw`)QJI%iv+xf$~iJRMB_wlD_ z^5>D>f3cf<{{-Lfo9Ola>wo>P-Ta@nFWc$f)A(QirT+)|lRDAA{>xwV!c3~XF{va^a9U;i*&*e{VXXk(I4P*ay z$=>~>zx?ig{`_O!zeD_M@WQ~mJiQ;>Z*i~Q-;Ggx zqyMx6`s-W&-vJH&I#}T){|RU4uW$WN2K4W4|DSL`e|_tJGN6ACl>Z3_)c@;T|1qL@ z`2FnMe38cQH2E95hJSjC^p)m^w|y^y^wRw=;oaS{MO0$K*VoPehAjH9;Q1lCbc=BR zH>$si!1V7YMG)lN&=0b5)WS#6 z9)Zz+0@>%AyLPcmu}&Gnf$6f_Jlp#xH{F!2fmS(M#3IlcbL>8M!dnPmD$Jdy-Q)bW zXn3lgd2{Ld)0hTYRYq*2cj@GSYis%Bi}7CWVe^n1Hf^Ee=fpw@la`Q!86Uuu=o1{>|i@<3&FU;ee# zmQM;9{!qr50MlMmZ8M`+*PLjz|HUTaJguFUj2tu$`j>|*Dr?V=5cbSEO<~*u&%awq zTOyQL@7n9l`z(JF<>|F#^DuymfwO~M7Gmx=46w$%uTD3b{2=}1`x4K|2RtvWcb-MJ z;w@Nlqs(C5N^u2`wR4=jnTytr5J?ksW(y33)$4fXnjraHXWXc9FGkcE3z@BFM?4Qg z8JBz4s{ZCz_0>$Y-|2gA#9VUwh?GF@P8+=QqxSSt-_9gDZI80RuObv)0p0hU3%=HC}Im1bbe$ z^`OxiZ|cCEHHznXWU;z{duyyUi0KaqGX&p(7q08%HRek|&o)+$X7^9v+gk+bz4|?J zf36`4+y1nBB9gJ~#rfz9w=+Y8w&b%~NWxGW~W)$wt!73W)U` zRpY3SPv9(@dix>V$G#{8S<-rK+(C=diSse=w9L^8|9UKu-quMSwQdeC7m(W$Bbs|h zvNGQ@ayjb0UOIFtWfcD4vl$p|6r7BPQguv--no1%g%XrNU6Y)F*v+wbYs8Wdep3D_ z;IU-x(QbwRxfuda2DyUW<^DNt&PA83G@%T9%aGQKgZW0R`@|A4(bDAWt*qx;lp&?A zEQweBaWcjcF5U5CbLbw#)4vroMmxF=^_^wzDMQ=e{D?)4?_=pWh1F zP)&7X4s@#iQjKnd?t(?b_0T_(QgfE7q`9J0t(r<$(}* zCl_006%_l-7QRJ>uJ(ELv3YC*@=)5FOMk}1%dIt< z1I+SJ@jgR&Ut8Tbha2-NcjklFK|O@&7I(lIhh)<9%}vOtS7z|mVSX7skHlzn9@o={ zaAKj50*3N&{ks0O-woWm^Y$C*h+LmGI6`uFIw6P|KZHGu>r4-bs*W%H(qzAXhV@}& zl)z&*ir$assJ&l$4^g1D366F*%g<1>%Q6da9y+FWA7W1II|`g~&TQB0&cHWL{eY1= zS;(z!k7=vo%BaYrYW_pV!00hI3n zeR!{+<9;ZICM4 z`1nen_2X=X&`!{#t5<}`u6yTv`5gKUb}O^$J_*r9Q|}c2~UO zP!&Sxm8ak>YcK}(o~>NmDf0HxY}(*#k8|gc_-IJUep=fONe1v(Y_PC@6R+&Ff{oj# z)T=R@5Ye0?ptiYxB~9y{;RIEG)Qh*Gc-NtgM^uA+tMm`w`Ji~WXC0>T>G(CEV(XPu zs3$K7-Z_DC6|M27Y)(g63s_b(ve#Gb87qyV-!FV+B@66kkH%=7)^;mjPZmVFx_i z%qEApYjvxo^uT4$@@N&r(hK#2l%B??MZDE0 z#!kgEN$7GDUfVNmdD`A|rn~$i$4s9!{B{2@v!enpp<6==yNZy0-`r@Z;^xO>+hB#M z_bL(2H7Sozu)^eOC9~UajydcGv5b7-=A|TZxfp6sO;(qnRpyA&nQ$&^E081bTziyn zrBeSm81Pb)wc?X{95{{nIO`5btn|PMJUa__XxSPFD)}j|HuJ;AV=C-L6BkbBN}jt+ zr%Pa^^8GAX56`;jlasMLbWX!-sc`e5qe<>w@)V#d26*}ODIzgF%%#rr%k*00MX2cWUFZb>}kH@U!fE2Qc`qee#QgVd_sp64Q>AEb(v zS7mt-vh&+aRK(3YCl#xuc&#|4ih|oB9-!<5Fb&$+Vn7}rrUW8m~n;b2k4`90( zHBMaX1~1}P6HB3GV2&VSKF;3=)tYyjYerNTW!yPbN5d8GM%Afb-~Y<9ig@C;`Ech% z>(?#lt^B9ETmvW7Ovei)U?ZUXfqF5KU1GO7N1iVt=#ORo4jk(HOc1MnFR0$=`X0&G zxA#w_&6ig?;*8n|iZ#zbFA~!ej*I9_3n-g6hc%GIEs&`RftyBsQ#(0$6I>q~Ns$iB zBbf+5r7TU+C`h8^Vmit0vTX$zhXrn7J{tD9Ts+@QQ+4lnj`+4?74h|gH^4EH&hFyPMpj7mvo&3{;}TIot} z;?Q@YO!c@%enh{$89cjvjLYxv>8!{do#61^>dw%nb2Y2x^VX!_P5I2Shp!BB@@Z0PQePZHr^)X*UO?{b zgOBP4zL*cYNk>??Z29KHF8J)+=T9XY-;Tk3NACK+YiGV0Cv;c_4#Vfzq)*fHXq}N@ zD^HD)?JWIY^Q4yD7+#^*#&Y;#ZMk%gs-@4)AjZSz`opfrD|FnmTuWp5Ynk1!V64x* zZ@%5sxPuPueyyCXG~XA|-o#s8(EJey&PGoW{sHjHJcsddLpA&(^wjeH#(p~@1Rl>Z zdS2=~2=6CtVr!dOU=F#`3|_{Q7b&i<`$G@FQ7E*KAP7C^G;0`Iuqvz9y};*t&bZUW zN*avip;_}l-XWLGjt}%?iky%bne6}R4Vmt{eg?l&d$wrRqJ=~Pi+?&|f53Ikom{6n zgOmpFf}WSXW}&b2z6~DjI`wX>M7bh87F#o(j?fA22*HAt!{f~UojO5zy=&1uTn5*+ zXWyMynOR+Pc6B9HYM~)rNH;qqw^5h7;GLGXIg<+{ zTl$M<4#k}*2Ll!jE#Fk<`&bnCtVmC&_8~vWF9<{dI^NM`7C6l8@+15(0A$(qlO8b3lKT@ED9p-?U%g9^uX_FsqPmWCt#NwH4K-Nn46l^nPoE|5;q*VVuvqCD zJQCf%@8w{W^f!905lCslc_uWdXoHTy+M(e=;kG?U-$_X%wF$P(P%A+*AS z@O{V*1S(TU@-{B<-Ys5t&_VPPPy4!r!usVa0n5w1DjIg5IZ~Bfz?3WX;GF~$_XAA; z3b9Fss>xn6y)P>Jn$QVJ!}wI<>9ph=q$t%(HX@Js!DwBn5J9{}=-9Fq z{VLeT3443|7mW0n*DuTz>7(XzKd9Z!%>Pz2cGaE+C=BM;`9UJ(c<6iWQ7SuHxN1zd zvox85Xv2w)zlluGi0-T<*y?A&fOs+*v=+o(yLMjZNHNoe(_BGgKqc(5gUUwV<`eq_riAeDnmBtk zO85B2-UbreJ&3c$_tCQa@nRA$RREEOKJf)vz-bXpM6k5e1gxGit;jUH%3L!NaJxOJ zn}qY6ck+$I=ceXHwDDXoYgbPEA}1`8a{syYCjF2Yw!uYp7QfQdsPvJ)E4Y7F8Y@_?Bl9opnIC#rQXV_x zJ}p+7-kZ>_H$Aac`GlR_5#Uk_l6^%Kx|iA8#napza)f1Y@rSMMl5hEwOZz#-5-kq& zs)eoc><}&SUw2f-m`1HMn(6KIB_I0rQ{0u}e)jU4&-S)5xK1a;qxQXZ>d#fU$<@N? zJL>aN?s->>P6p5GyTuUBMhWE3Dh%alvoLs(+h6@zI=qWX|kdpK_ zX{|-J{5&_&kLlj|E1!9b3$sKPMctn4gvO~UuFm#tb(r@7-`JSr_*EYP;PyNn-EN}Y zZ!D~I@QROpw)IK!eES=81aP0ctUWqC39F&qRVeWAWz@96fb$pb1$KjmyS&SzxvNuO z+1+U-9>wXbY1A)>hz}?UYz^#qIQ7P#L9K^@Phh5a%L2JW-^-_v;K$Ojvb)+*+p-URvc}9_VM`zo9wNJF$Pza1tIy9uJ)O*zm<>U&+0ZiuSy{>AQ>b=%g(xG?Y4~1P|})NG``;x(%0V!;KA}vWdZA`HRn;t28Dq zcd14L_!Jq~EvVcxi+AC3T^4s{<%R7yTwbt&}q7!@DuV(Rf5Px1vzwz*b^mM!c66kG@{3Xyfw0QUeb0c0=(Q)$CS(1%;#Sv# z`RZPD`?b5=SD=`Ch9up`4UfFTYl;Ew+owH*R@Tm*!ZTJdsEB&KRptti5l}d)NB*wW zx@Iy@a4qrraBtwoK{jSicJE8AIN)w6)w8~BKn&e<#_r?Zb;jWOW@n+bY|vE{=VECMa_$!-e>1mb9)3y zzPk^KQ!>i5K=iBPbJ!hqjae-_ijUpQ?e>O8)qih0_We1F^ZaaqP7j&&n9XXbwBep!0q2~XTGDL()RL_&2W!&V zZ`Hxly&Ohf-l*y;qNz`NGeE`~E27pF>Wut=;R$oxZ%bSkA&JtwgN_HO{>#0%*^hSj z0*-wbxYtLG#s*Em=dSqs!X3rH(3$&Bf{Kbgt-dcd@tHR$_rm%&G|JeKyX+#i})# z1Lu^vp}F?*DghBSkD))C4@XF>xO8o|{Gc*De_qGS-mdbis*~0`yxNy@7rEZ1A#8lr zK_@B5qH$1{%v{f6xU2KPb_%viH8o1b;0;$c&qmU(lIQbSg&sMF%g=|q8xHuW`!&ig z_siz}XmA6@(mESC_YdbdJK)NcT8lf=r`=uzQ~okpY013=Ra@6uxtgxinJ{B-&ks!Z zj<|o~EOktGP;dX*e=oSXXdfJ^n{mG5@zES<3csER{Do@g3*%5NEtc3{ypxx&M&;hE zCe6)I{fVQMQ|dO2b#2QIn?giLy!Bf)`6IV8d$lj-q{?7I4+ApLtekf;phqFxoW4)V8P#p^ilD@HZWm7fcw)*e$T9TmD@X$ zx_Sm34jEY%JMW_eH<~K=nA8Z3i_XdzNz6jFOYId&1`K}yDo6}x ztge2B%LTI>H#eU~>%5!kAm?Tc0xvC(zqFpyuju)kDKZ-5<4Qhk__fa!T=uajWcG44 zP#3H8?b9<8Z%!^*WZZj?vOj`b=_7%XAuz&2w0pd8jK0_E!-smT_k7)uAS$L)i~y2~ zvYLZz%aGL$ioZP4h)?CtZC!hH-Wf97F)@FfGSH!cU7!)bDVh@j@{d}bo@oIlhp4nU zF$1VkK8eW+-+ZV0^h%DOb@u7siRmn-sBP~69d1Bw!n!c{Ey8T4lsjoCmV-Gyg>+uY zUWP5FvM%6sM)zSbZE4ItfapMbk81afS=CF7Lygem`?6@_K2+Y_h9Zq=7(_R;`|)63 z_!57SG;aUM=+{r^3RN$c>v+HZecinJ$3BZiUajtq zZGLvr-^YG4#h_1mn!KkI|K;MhFDI~2XG^(H=D7 zF5pY3noAJ+$lm1D;Frx=XJx&ybbEq|Ugk6@g?;l)^`}{2Q9oWSP&XuR)q3SkIBv)L zm$eKAR*^65N^(x$Jtwf1u4T;Q7;;N$19UQe(OCXQFUMYYKX1TgPVK3wUPt|(LOk1< z?0=Ag=r;#aL{Pi(z}z9f!{*}U9{YOuW>RZ(rudSu%c+pD%r&j}vzN;Rgey6hTw13$-bS(ws&?=lF;-H z8VSo^V?is&t3{E_ahW8pJhw*dYS9O0_pX$q` z-hCs7q11X#MwV*-=3Qim@_iU%F#}`{YPmIe*msccw+4HoESe+POX+IfaTd$TVHJl` zd%JGw!Xap?lTJ`cwp`H4hBfR3x2qI%xe^;=oCM~1HWOM^h#FrjPanUx#^eW;#@P4U z(?>m{)E$xa=!0L&)4*<*iRBh&N#j_)bape}wQ~E=>cdZCr&z?b^*oj9K83F9Bbjl} z8rcnIC5`Kho2wXPtGjSrBbu8q*joK{vsS(}S*IQOA!eCt4Zliut z$+#qU9F)qU+!&X*?XgHHhdIR$OG^NVYYv)YBXMb z8AzYVQj&Q!0OkfTkzvW}^X+EzJE|+JlEr$kJ&k4`I{mzt5ZW)L(Y;|!=;n4&Z9k|j zjrMn&{dylWhXwLLFJDuci#D-1Cw7+h(5^ygWYr6sweD+yv_`e8SL=tEEwaQ#*D-C8_grik7;?0R1HmM2?--lg|59F=XK zi_h%qO8#U)S;qV6*p{J$4ldyusfUnG1$mmDgzF=CLc~2Kp zJRgRO`XdHodNa6j~Xeqgm2(r?ShqJl+c75!@3H#vI32 zy=2lwI==gPlJE;5qSSs?DfNT_(>1Ylz>q|T6=>1iGnI+7yp>N{H!io}QJ^GG6as_n z7VnZ8Z^AV-wb7$Qt@m>k9WJBFa!?IU{n{2$%PG;oVJ?~twR9Y^VYa$i_IF|&@Rjuw zcJroJcYdpaUXj{b}*Wy`^DjNaXVW{DKDISLEi`o!2uXJ?+eWJIGa2!A3S)iQx3T6G1=K) zrfp|h&x2~UjSOZ<#Wh2;^z;i7ic8zN6ZbGISgqUk<~Xc~&eR54Ep!R_LhkQd^xb~z z_H)jjbgj+48l%M*^@ML_k-vYnVXKfHbfuul#_#@U>QnKMZHku<7cPuWt8bc9lFftq z_WX>zwrotX_Cd|;!*0RvJzg@~V>&0b8FYMq6bH=O*zwL%5D-%?Ct7`ksaYoR-n64f zLrfO{U!ZmQ>d9wmr&lBP-2mq}}ebmE8T-qbOV-$$+qVJaNQL%B2%WcQ|#W;wc$YX`Cz>N{=tuT3DVS zMz^Pj0&m_Pnonfj1D;_)hqe)9)lkCKdo;Zd5S8YOvY|B=rs6HD6anu}%dKr%pv5dtaOY|RLo;;rS1x>(%J#w z#Ak0*%zehv*$jrugsi;lU|GC1JBS8C{YsvwNJRoBLiSz66u7I0 z!HjfnXO+A6Cp#}@=HLvz0qU#6ehkC92U8G;D|ET@yQdf1Klj-iL*~d zxTUvCExERD>vc!YvQ0VVF2o2CTa)8@3Eg(Z0}0P*FB|VR$)M+MwaZCC>4$kOjj}dm zPfS5F;KsqrSJzF6BgrWx;6Hbll?n3#qKBdhaM)tq;e3x>HXz@qo)xm;BawaTc3`n_ zxpK#$=(wj_{nnjdlx7v$u(Fi)0-=%3TLt7=Q@XzMH4OIp!Ne|JGAgezj--|MwUm0F zD)>O^Aim{_=={MW%?Ny+S)&vR+ou&<)bGln29A!V`yN?kDzVLFkuH$QEkkPC!S;w# z*JKcfKz)aG8;AQPsTJyo)gv*N)5fCbP@)Mv;rqz2_@CIN@j7|IlFnyzOd9p(FHRc1~#{`1Of4;cDVfzn+y1`wx>EwIwodS}EI z?Ry)a$sjObhHV!+h^6+Nq8`MbwyDH`v-ANU#=Uzh)5nwbY?$#Q-0ZT43rKtaCF+( zWp|M8z^nU6U}C|wuouhCRbs4juT2yV7+bs+#PGX2aChrMM*ZUUJICF0+&rn4m#v4( z^K@}>;9g4r-;QKvKP%myoXGb&zV-lzs~I_x4qd`uqbxNxUY=O?p3=HX+seykm&j^{ zBKci*$7@Ipug?+oklYr`-8)CVlxWX0@^jJjdqjSugbS|a(F~}-3sd*=+uwF z$X>;Z61Dx%1tf^{7p00dwo=7&cQPOqnCu!XJH?B~jnp@}yA^d#YI1X~8o04?R$;qV zWlGC8GycsrwNj*-`V_^F>4G7b%AM!wHdd{7jo+jgQgR7*jq&QqD(y@E1lWK!Y znFC^)T*u41kILX|%=%6y+tn%wtm&fO5Kwq}lKDD2e=n+Jw6j!W6Ui%A$C3Ni;ny>M zTgVdM=CpkBYW^0+dY|UFzak#J<#u+lW034;>OIkF&K z$u~CXk>3xn&w=x825(Yuk!Cds88#r|eULyz>DM-IqkgUfujE&M4vGT(J=EK{ZZDK} zF=<}7{Y;IABoGGfK;>>5d^Bf7{lJ!>;5(Q6yCA+)rEXE;BVtW_u=bZof|SeCJbCaj zzy0V?;zzP5dj?ZW8RE2Gd{1I+&U|fn>#J}D1@_%Vr*Wxxc@H@&NFZ+p^KNn_5*h|- z@lSTvtG%wDxX!l)W#d9`r%Y$vuSHkE4Vo{iP!X0~>lyZ>{BFb;h)mA)R;{UC(%!3w zgPlHPGM!1G3z?Cd%hWflcP*>YQ1wF7^B<=VoJu2h^CcXQ$-4nA3!GrlOyx_#)Nt#Q=}sKAr54`syn*JoNTELBtIyT-5*m|2IZ>Wy zyH%g}nnm^Gein}scfSOCJcB2gd?+$$^4u4&n)0GaO%>z)egH$xwr4w(6KRx z_67T4Qd9f3bQC8`hVm}C>L9rtg#^Y(7X=xW3!O*9Y5y9IJ}cjbLQAwcCkm42>A`?>(=cA_4oL@8AB@z)I_>aP17}LxrurN5^?BDpKcp z9*}Lg(1=Zuo1e}4?E89VmHSnhFL?YGTk9{VULX6BK|~K{&YgvIz^^gS4X*z56#w>< zF`rr7`|KcF-4UqoL}MqC-}eEX$qaKyZ2I1?onE8!WBa+*oY_lkYzGv%QnqhP{Yyk* zZ5u6Qo|mLmQGDN+2M<7b^sE&;N}7vTt(n;2LOv{KQ740v*`e((&_u%$Yx}Z0>c+8E}uFUbVuKre%)n8(|Kry{^r!Cvh z#;^n!M*gr+;A4N!vyxbC!LP(W6{-9Wqd+)@mW^@a{=9#rk>%aj<@Vzvz^oom>aNKX zJp(VnK0w5NHXI}E8@20wsRquyWh^82crG{VA=&N6%=aV|5M>j1o=U%|M^^5X2hN|Q zNXqLgVBiB`9Xq?dHV#t9&F16v{?aaUNR$`adAKiO&C1qFn|0pbTJO)uyP}o37Pzi_ zJ9pnEIw81P7pf$QbebA@#%<>^!!9JllYGjW+(86k4ooAL9#-+uf8OlmbmCE}J)9&; zSv6PBa+By*;BkplxH$J#9~ZD6^y~!|;@ho^mgvj#2RGtgF0fL6K(YG$yoRu<0GtN$ z?>OO$lF~Ov(mh7E?|VHdtG#Fu&~tH9fx?9lfA;-zC2WeVov1b~j@(;)(Po+gTsyoA z9;Pjt9ixyQ=%gf}oj9bAg!LgHeyp1!*S@-&&6>(GFB+-F1hU_)30-2{VITl_cvSJf z;;99JewX`(bOH@clvQtja6KLmp~cgbNqfatILPdE-|yxMpKa{3d$Sb5$3`=#N!FYPHabi{<0OML6YDpm# zS>-at+jMj3oJ#tmyPaF6Ah$TE%ZuO3t^1Zb@x(=<(7eto|By5-g2ex@QD2A@dM#mEi@v8 znDz1b;nW-s&s7>Kt8_D%5(q)+qxu6lpuJ+T6E?FY;>pyh)*sB*huV?qQ^BZ_L|XSV zpLl29#PYRY$jn=poAQ%^`^-njAN9@mv2rmlF!w3|mb$L^u7gAGcxeP*?wcH<(n3(q zB-`*?;TkGT16tb}w%ZNrB|)Dg_p1oZ%)vU59iZ8x%E$4ka(}`zb8fsEdb&~(d0kqy z)D~61pz*a};Xin`zTGzEW!Y!|4+gOIFCxO6c6?@RRXxzUeO=9d+chi+s5KrSp=(B zf)Z}Oh*@ui2us?Iy*Gy@dw0YFAAaDt)3JhVXotTN){4~*)QoZK)N)oCQ)_@~bkYU- z7}oC_c9R(4vCy^?X>SU#b8{obyH|27uU9i%^=8ddH2P=(*L0D3Q}3ts^57ea-0W@@ zq)BfN&G4Rlbeg5?!R?xwGXu(_^yF_0W8XT_@|a(^)zty;S7n!A-bYb)5A|7TM%N>= z+AYe%+k87n`EAHn=zi~U;VbB&?l4X>>dqalkncXmGD|$0g?uXieyt9olD=9Zpo@+z zdNxraCU$kIn8IF@w*q_GRXdDTIEadfv$ZPkHsoZ(l%|xlpY+9IcbA2Cn>X92-jRaC z>>d`o^K`y@?-EiT+V}f&I}^e{is5b7-(DKNK6!`*Isc9P@>*|fh5faR59>;OvOX$Q z5OnV?V|QK4K2|+!@Q|iL^6rzLJc(z8&Mk83B4Hm)o=z@ztxi;HpEss6gs0E3n)fc& zU_L$P3ng2Fy_S+U6+;nFOHnqIy%Z2-1H|hwjF90yuLx1-S2M2V>jlq=r-=}&j(BkEUo?Z ztQ~`4P*E*L+oo`hG2=Kf!?YKn_aPfV4YZ7ogY7JU{_%%zmI3>FnYu7*4`>) zO2Ph)NQIVVi!wBn1-XRz6gJk%{S7Q#9cx`HVs>M&3A@GfG41KZ`(@Si4St4^i}g1` zv;K=K?h{^PY$72Sm$+_be$%9Z143rO(tooXQN1dacu?}IIne1AyC& z{iU@B>j0An;7H1|`SQWa5p4BKwg3UcVUKAn>ems$qi?ng+SPd%I3yorM-R5n5mGnj z-tjJ`wRSyhl=kn6G!&JHn3TW0?4|6vqai=da5pHp@tU}KV5J5=1kz*TwyTCRG5Ph5 zd3FpVr5|d!ZySD>S;y+FU%4n}{)2_}K1q1N&qNzw)%NMA{%dZ4^|!0{jlsdkV}8>) z6bY++p{ECJ4bRUQ%Z^xbf(@?(JSesw=C^w#xJo!KbLmXY4_d7 zXOi!9!2V5*F&c7EHQXdwUDUxsB4(C03o*G)$L39{&st6e2D(quFR!4*qa`O!CK?45 znso20LfPUdZXxKp?~h#oEwq0DRAt`h&Jv43a}y6Z*a*+}sO7%{Qo5DvrRFwCuBSEA zva7@9iyNx=<&NDa&D9$1*Kb+fk-{2@D`TbKX(nx`%4HMO99*R%#y2Xq7((D$bom;6 zaP0O8yIh*r+UQd3*D;aJg7Vw!>^UFrymJb=!uNW+1K^x5VCwdJ$UaaSDbe1F zZogr=vO~tpv-rCoon*I_G~masWOY|IP6N2`73+4H35wF9QKYd!cf_2Xjmm@fVqCwT z9@|0-%JFSRKKEzathZWH{%sA_@DzDSu{C|E{h&?Dep%Z3*Ilfx4@oI*($3~04_b|z zZoe=48HUvs-Ca@ZGgjUnHbkq3(8sE=I(%UUuNWP$wsO9c?z7|J1=`cic^@Ld55QN^ zXzLI98zg?rY+>uwu&@NE<1n_%9fm7*m0vex*qiJHH|~9bnukJ8`F z<*kq#xci$>)QxX5dvZZGt&W6_Hm#Q?0B(M81lwr1ooe^^J#;4Bb(p_yp$>IdUpKCI z1I0s{A6bk1w+tI*YOcslw@nH!?Wc9S7S&Osi_}aN2dEPFE4cVv)yuFAeBu39VNSv9 zbmg~h7y#KX$Ot(JG>h?arCRfK6~TA$*L{2`(Ome z-eY>{ei3b1g_#|`yctQ1>cZ5jqI>-sBz9v`uJE{Z8vxK@ik*7L3PF|m##Lw!iQfRJ zJw|b!XS;B}tUNc9X)jKxt6wTl2`^>(8lqQhm~pS$FDboKEA0z>hQdpi`=UG`ys{~ZU1`$ONP(f?}MOqQT z20O3=0h`-=_N@JX^1OMIt0YyaRGo`+>eShLACy4-%o2(gQXBEY=Isn?~R!2 za2_67z5biD#6aWkt*(DZk{a8M&eVLV$m#k+0hcmV*!>h3k+8;(!Ya z(k=B*onn}c_}BY`IY-dwn8iv#jIsJ+L2p;hu+)cE-MF2Q+O`cHudZ*?+KY20@InqP z?HownIsuw4iDSRt(laMTZ{)dm@J2K*%vxucTf5Ckgq{jvGaWOeJ+fz=lFfErOA0OB zqDt?%h3R86rHc8TEeJaw?uX z^2PD$(G#h1UY-o0>TbLj3isRS6PxEtObhaOxU zRXQHs`LXVWYUxxA46~ocQk1QWlz^UTYiRP71~JFB6ZDAJ9E&8{oy?hW{sFl@2^7wT zF}fPao6xul-G0|IE(=pH-?nDCwWy_Wd<(%7xw{I%Y*|Y^b=RBjl3lu{8k^QB8fNen z%#G{GrNJvjB_Ovz`T{pN;OaS!?WOJI(<3@TOxMsCrD6Y=+U;#aEev|w`U<^3!1@<) zyY*|k2zy^NsK48GPt9!>`;lq1otit;^`Dqv43P*T!Ofdf#nA`vQ^G3!>=UdsY9DyL zO^&lSTQ{s{@0*d;+Rbl_c0w;iH>h^f#}({)8_fFSq*@ZwqOWFWg6aY}Fxeh}U|ot? z)i(xT6Mp74C;7@R+#nU01FpA~ZT-X`^ID2?T;c(I>FRm$=?4B_n-){y8knnJi+@(f zNjzkQt587H>L?dYEL?vU`%e`nIdrw#oDHR~O>jMUD~FFOT(yZR`}^;CwL5NlJfm;| z)>put;&vVqz{ocaT2K?Eun*B`H7b*hMW%ALsJA;(Ser7x{&9bynZg4){J>;P`Eq6C zjFr-*-vOe*^=(tkkISu2oYlvOzgBKh`X(BPB=YX!-DTl@->vmA3OA`D+mh-%u=Kt> zms0cjA(DQ&n=gNXXpQys*B{F58gP}c<`^D~<-zoX_qtMwKIxR6HX#@$tDaw-m%2GF{NZZOz?JKn6`t!IbHWwwzl~!|gSOyi=j0#d7 zo|k8DK&&koy$+8jQm?Qhq17&dSx@4?jSpS7!xw%E9JdJeGLrkbxx)Eg2C$DhhTv+e%`8@hv^P**>7$MZ$I{cH zaJ)%CMux5-a(5^lrF)}1=YME}E?d1x<=yWOlpa4m7jUZ;PbR_QRqX|DOxuRIek>6) zzN`P7bE$XAJ)m6Fih-ORrkQW-+4)&Gw>HaUumVQAY+y65<;n@yy@+&skWu;8u;#N7 zd*7^AyshRnzzV|jX^74hv7d)UO~0%Yjj!c)1#<-?4ZF-Gz;(m7bpJGCE=$vf_aPW9<8_sUB+Bo zmixe~Z-f0p?15=hp=>p*>E>KU?}k@?GU_3P-(T@5`LrNz2CL+S9(GM3rZyQ|!CVwM zgU#)f!hVOi{~S^@uwQ#+%<{4g*&FZDs=qJCg*hbZK8$!pJ9g_gjQy;=N|U3il_}qp zB`^j_CJIqC|;$*NdijLL2mMMDEzlAFTeY@bUOC%7L&;l*velH%lg z#BRyVTWBz#r;L_@HCF}5Y(})6J*RE``{=S~_cUoKtjdIy-(Zts+8sFYQK8N<$2(== zm+jbK9MGAli$VLZ*7+~aBInLz>grue0?j!v{06J6hw3+tjv^GmAi%l=pE#%5Z@?7e z>`;zchgLxcuBe<@nbpCs5lO8vnh?C267JQA|xP z`MOo_%-G8d4C123^TOTR<7Tve>BTR25ti@T@?LH*!mHx32YDV|sg+8f?ww1!yT4jo zQJ6=@2pgT(G!ztZ)~O8D4}HY8{o6N%qxP98-&i7Dn?wEl0P-@T;ZnOP)f*Xl!S0Ew z)yu_b3GC9w45!Sd*UW(!QJY%D%%HTSf-rBjzPvCHzaawnSE1W6IHA({oFhXu5325T zOy@lP?8j|aD}~?FhM6B|Lk@6zc{#bSWU;=%%iqiLy>eqQ5V5BM@AKyttlH(nlaN29 zbJ?BQg)Za}m+S|*)}uGzZb{@slpN`EvbfF4ylhr3yXwvAIc+VsSbL5@@ibE6tnZaRZ-MspFN& zLYI;QSerScwOT6}DXlf@O;E!+bD(BN`AT6-D8gh0gdqIymN?8fmXq(DC>*9p!BdI4 zU!8#UD8ExzpWzhj^|R7s3KT((B1yoG6L{sz71!Is4_-|gbyK}>;7)>`t3jn2-$0e- zpOwLSubt$*RA;YX9_v5p(@j;Dc(fTrb>u#vhlRNZAG1qb$d^h9TaNE8F{|J-0_a%5 zV*qrTQh}H)^t6ncB1oql25WKaD^|&7)y@W(lU8)v%Dn_1@ z)5df@r+o#T57(nJc|aD08x1_##-@X%So9mGU^vVv!;Qb_cUt5gU>>ym7Y+|0JPGi@ z=QsZ;+#R{GFM3%E_P7p2X{+PvLZ&J)pqb8fPH#(NquK$suYEfY%xz_vXe~}>fc3cX z{ap8ykDw)Wzr!n_=qFH#f+jzu+1!R6o4l^i9piX=j)28~zzwLHanx32QdEFFBHq8m zm)gAilCp8qss*Z1av=_0?Z?3Lv#eb@%f0rr<2ITr&F6!Bvt_>$;lilY0*b=skwyE9 zbF;Z>jqGu)R9KyuIdEHVqZeJ+SE%Ry*@Uy3yV&q9abI zrxt)bq?>1}Jk*-@aJ4*vpZ&NlwXD(-%v63$$QYxAVI|R4b7yMc6EouTALtGHunoi( zWok_bjA>xUB%cOYgB^ z`XA@Oj;r;0Ra^#$`|1&7e0vX{S*~5;(xB{QM0~u9F+w zn+iGy^`4UC0mz;V3~Z=$kY1;)iPj?O6q{9fap&H>d_bP}VeM9-0X(a_xl-}#a^vR| z7^xWDHMsTBz0RPlt=@a|41R?-VZI=={A;o&t|sLT0P%N%y3vJFCT5+ zJdVX;v0~mUYgswY8?!6cLf1Q-Y}W=~j^(>;?Xp}BVzJj}_5I++|NdIH#`LR#F90+# z`EwqUJG9!}P)%Z*?^6%S4>=H7<(vGGw__-IyZVe2KaW9tH*va9JfDK7%OvrJ{w=!P zwQZWuAD_$ZhBtkG-X(caeSnM5soHkC=j%Jc^ePK3@l{1Kx~c=vcV3kD2X4b%Z3BD1 z9P^XXU=c*}x80Y01dK6Lv{GrsV`vRP5UNaoQT?{oY=SNfeC~TYDiGW;_M-N(CmW5X zdpxYEObBR|=m52P=hR$sP%4!I1(LRpx3Lup>1-(Oj^ezvIPtfhxjv33U*L$$pIxUQ zM)Kik*h$x`LA8o?BN@!6FU#iq=1} zJ)3RSwqccSDIuk3d=iVCr#E2P%kQlb_VV0K^WEoz!>K)wV1rsxKtg5fyFO$Q6$Cad z{m|Oovx!jT@y)(z6c$am`<>jK^%kI@(CC1yJ9>z9Wup=|iMVU|(=6c5!$h@ygRT5T z7{V%zN)GG2Dw`v)&S-JjyYu5RdYO+81L&xqiex3ft$f4>phRXazUB|9$ zjK!wa4$tq)i>tGLC#Ryj$J>^tojY4msTY81#=VRw;mDdV=YJ?Kw`eF{v8R^=fCP&EuTR^OpPA2@_o1imY4|ltWH2&1(O) zc?VUQ; zI%ZTQZvoz=`4DvMj{1d=;drkQZO$p3=kLy?Gnbi<&s$gg_Dj>J4Z?geKtFMYlNUTV?9y6_{1Tgy1Ttrz2;7=&aw^kxwNA zRtv~huM6RS`XnY1@>yix;!uqF$8dPn0o=_B%_VguCwgqw^LqFl-mL7Is~=FTcBe~? z|MKfa!*laIUP8p!WZH+%p%zAwsCiQt{FW+0PXA}SW7w`cyoneJAHj0S(My~XS z|0)z~J;YOvKYc$|uGF>VBTA@sAVTT1_*e9zeDgY|2Tcxyq49}=G zF4hlK@(^Cb@P=(-sU$^qLD*KcLNLAVUBmHH2W3x53j@W{^9gT4tpWS#Dm+YspS*2h z+;{2gsUEZOJsQ(H>#ezp9}4O5(CJsYfwQf*j6GVi2xx3Hz`m;bZs7m@3(Rjb0L$^H z9ffqm%LRz4?e(UjfZ67Hdv_@%0FSdfpuaI8G@D>}gpXdt+CQ{a+|b5{_i^2n=bM2Fd99vJ)#dU1 z^QuIp%;G#^w?!ZQ#?d@A3NhNcn^pUrPAXThp;MaDqYn2kt;^rZ#KulBZgu`9a-DxB zax1P`V9mXYR;`OQhnd~iWPxY-dcZ%24LU77c5J5sm`weI&8ph!E`h(zbh3(f?Uh1a zX?xjMTc0azG^xwvZS2z6-mltDqgM~F2eY!Rd|H+xP8;C_Oq~X&$uU7JG+C`92nI`B zx$Ul40<~>m+&muVmTi!>%Q91Xi?Q&a-ovS39~(|558i@whw0ehkHW8CC$%X#xoRDy z10-Xq-VE2x>GkxEiWhbElvV@1AMH#0E#|J(^#D9|RB^c;5x{Z5p^2mm_gej0JWbq&VVzw|{TAGUq|d+hVE(0n>wZ5l6$wsZFcSfR!AyF*<4ou(EC0$%E{*>ik5Wb!D}|TU1zubPq9e~e;Q5VA+OjeUa%SXj1zl!G#*Bk>Lu9i z(pKwzG+Pq6S?>oEzZrduX6f$xGf^D9HWic*gCX*=#CnjHmXp_{y(1OhN8k^4n2*+O zo|I?Tq?Rz;r~hnrZ=7@|u>oP3tfY6cPq%P8!h*wYhOiM)pT?pwd73S2#|!T0vhvgS z@01r|ByRxyVee^V))hSN(;OP192_$5{(EGTmQ@_m=X^GtH)?V#%Rf$2l9xa_A&73R z2GzIQ8{F+VXX?p%+VqQL9Ws7+podYB=v-RSA$%!wU!8nyM+guUr~2wpsMyS`ePIAV zecY;}FTDC451za!Bvd!=!3uaRDyPF}SfBmuY+NjwJ=ur$GkFzX=a$&&V2^tL;hNd( zia%hPpy7|Updqn3r|S=PnlzR7_rS~yYcZdA+9n&GC--_qJj(07k#!;U-E0kZ(6A-Z zhjg10o8tEIn+o)r9gmdgcQ}5)D@qXt z%5!OpE_5i+04ueyp?B)3R+=@jbD(2`nKhjZY3uL?T+iw1ciKPm7s3KY0XoD0jT<+; zON>pzNFP3*DzyHoTF`+>V=n)WCuEMcKZQt4=90>_z=}l^=QE_tR)lpKjAgCjwJ-hZ zH831;CqrpnM}S!iaDFD0JWLJ~X!E(JV<>cgJ-0mQUdl-gsA&M94ScY7UUe&;5YpQ> z2aZ}AUZ7IDy-zoDrlL+&-<<;_cJO+vIm$0Y-(X5t~I*T#agp&@itm5oXlj573A|1s7bBFk>vL1ZTJa> z%;kl2p6nK$t`9qFR&2A~g}DDNn6P{|8>RvDY26oJCD-lBEiR`0ISiA$)4LS*;PP2} zWkIp`^3gqkgtm z1JbPFDc~R%=y4sZ5G2Z3xe}A-QGjfY*g|-;5d0VrGTZ=NfV~!=4!2YXcdL~mPpnd! z<5=y&G*2h1FmCC3-b0=GvWOO{T1 z^XBH%bP17cs1$Fr%JsWA`>TVpY!3Rh*My z=feNa+f9gxedTlSgqwEZ?c1jMm}J%%SsBMt<#Ed6K|9mUhr`dnCla+V?{u%en0VMV z9~u3``iIlQO0;OP8%D((nU%zHVK^8l$IQ&eQ#!l`3I>7?hR~`fQI|iO_^jM|DOCW( zM3ILBjmWheEu2+<&RG8}hgRoOSO-_ws6!*YuulI(wG}pg@OM-k#!>gPEVV07biWpi%g_%$=;4mgQDAABBAiSpMT2=s=mhmIdFmSF7Gd-=G?ZOj{`x-*pzUxt?m@nQC}Pcz~Dy}9}A)IA_G^I*dNr(m(F(u#FBSl z@$ZmgWoH>d_=D_IrLcY*n^CbMl%Swdl43;%LdkZKv*F^kTwSbpr*ya89;PSvbfq^8 zTjeFfJH^@(1Cn^p9=Fqc@d^utyHYnk?^Ch2c4chSbJ)i#49{E?zCail>L1VIe!J6Y`S>&SpQS5ctOz0pPfDrTX^!F}vX9%nkeGmyr=xjM?ob>h|9}sd;@t(_6Pf z*4`i1x9fTw0Y%k)+@9PAjSEj;`b-r#Al#Qc^sqhwy|~3v-nEMKVrHDa=^zE| zZIR6}v%d#;_5G)lc*LB29&}u$nA!?^A8XC{O~UxOMoeaTh5iuBv42>fd2P82I+1hz zOk3b3?-?t41x}fp#h~Uj8#Y#z^-(>r>)4Zh^-_xL?}fT-bJpIxlos_?PilV^5CbvP zpEW*6un~@`BL$ad;}WeJongvHkH4$&x%L6-azlLtl9`mwwIsfbow$<~^1Plgk9i&fK!k&R22B{QZP3ZQaB#Jo!<2n;X`S3o zPsA2p`LWiL17*9>L(oMlMv7h8cWvMkh^HgAI(808=YrI7YTKK%)nBnl05X;UgiQnN z(Q_(?LW)%2XORi32<5$}JmSn+h(0|u)B2-p|>0goMfm)G?p zaU}6k3cc?$pv=O2wkft;jZ;V<5+lB+6|nVv>;`rcUU6q-;=<`jIo1x7tK*91UJ8(A zs{vRP`Lrf>bar>d$fX)=Ci6(T6xuDIgwzh5LS?uKVyd$7S6QP_IWxmC2n=DkEhX8|JB&xqV>O_?xPP-!dmr=qbs=FOS1Md-E?z`dq)1ktw08L2 zP!3tYH(Y@!#ERW(YEG>and8hdcV_^IElxNf4~Z`K*{mI_yZH;~KoQHZ21amFTwHk_ zW*20)i3=EBZTV|>F~%r}RJ|x>0RclJo&{A4V`izZIN1i%e6g{wa1epY2NpxI|vVss`z1hBS0mw+To&it+ zQI`hbQlo6$#V=mg_QQEqF_UoTG)1}7y7U07?^v2*CA}vfoZY-%3Y#4iD&CzPd?*#u zx(cn_Ck|t<`yek?CuKeP_3tt8q{3TasTAJ5uMS^)YsTY`uj!ultUc!P z3(Y&74$o}anKu*)L3$>Z8T7zU@()X#mGt0}liY(!Xf*U$0dlYDv=7U?Jl#UY*_8s4 z!n@JcAu%4j8!3AkdY)hV5NmB8Pgua7*aMyPrz^aj+6oZOraJX$x$rBkB)aOSIfu{~V?7QK_cne(wDz>y*H*O;mH(nsGT zAqJn)(}>J>wjeh?wAKFhe+{no`5kuqN6lUvtqd|?8&>p0K1{(gSnOW?);^y%%U7|y z4Gv(o+wnkI5=J7RGB+FFnAB<-~$ccVb zua9w9R05 zWmltfTU!G7C;_&K=_T%|;?J|I`+EdB;%?oY_sj15@nptD4ldd*@NdB-qBH6ewIgvZ z0N)mGuhF#VfDx$=NB}*B5;w+i-a#g`G6Qy$WiOvT${Cmkxn(b$Zk@G0Sa{9{J&?QV zqTg83pV!7u4C|3n?eVWXK9{uZv?~h7M&Z>wQlDP57@}|c6t2(4)-JbpQ$OUPKbtTo9VJya3Gdk>&h@(GI?d1uHe9* z?=?!8IG+P;I|g+CKEu3Id37=Bb}av!bzI3HNJc;hH@O{KVb{%|v4sAR-Uu&TC|AW5cauXZa&OFIPSDw;bv_igD}7 z^8-c~DBzKO3Jp4`0#F%-D@ZA{x9!GbaVQRTa74C7%TUk!R^z0tK_n}4Tv`;5M?_#Q zcHe;r$Vccykm9r$e<7BfQdiSt6@4;eT%;9^2BX*VDseULS(_M#!jNQ_70&DwylzB) zlr2lc7w8eS>@LZh!`4Qf$)_JZrCtVbM+*%SCpeSWBV~ts-OBSi^j_ zvPKqM>vsmyS;9s!>rXWxT^n*CJ+$*4kQFqv(|HAq$InR_+ts=!BYzux&+feh!A*(I z`_lW?SFbCC>9Y#h0<QXMtM=P$y^S1F%IR z1}*ogT4YdPf=`atf-G%0+lu66mj-B2uai|T*=EWZn_j$}l{Du>SCl=lS63Xn=eei; zQ=lHz*>*h1UVF`$ z5O(P*U;)|z-`ME>_T$;DTDReK1kQ87ifV#SV&t|O?mL?G+O$^M3HKU0Ey$Czk=3}{ zD)*4DldQ`586aD_$067Py=sf$y>OqSq2v0(mn$yoFSv%QzAW9k!Vdjt>6Kq-Oi8R z)ts-GdCczTb5rqoc%-=N8_iCevc zDq)E};jd0G8&M;K5kH`SHnmqUM-wnw2B`N*YsUh1I#jx{fl|Cj@bM&qg8baP+rx%> zS-T)Grutr=em4n&`GX%E4+m$KjVhP>p&5qO#CpGm8|6`HTuGwgqobbXp(Ht?1>OLM z>Kv#X9O7o9i2}{MqtFCca2|k}{Wp9cugQ9W6gRh3@N2{z(C?%!T_J#YE#x{4U5pXmW z%cr(CWxqwF4ZYE81?hf+BM3J$Cb;5AqF%YY7#B9lNsHXxY+XME(x(|@#5rpDJHFN^ z?l#wpzndIIVK1TG)$Ud>AT>ERKLmB@f$QnGtmT3>GN=72g9&uc0n{iQxy+Q2UYhH^*jcwcWAT8PRiv$>=+btxfRg0V&EO zO-9p~FdN+qHz|FwZv9-M+R?uKXw_%$`{9O3#eBkS$cxd0YY2bMD^GBtApJT>Y4~sy z$d~LNTgXSoh!{-2KX$$AZ(Dw~k|>WSa4(J;m+lyEFLNI+2LtSfy)3KA#UlW-vlF15 z0?-iD$am(vxSvdW(61+CPjezMfrBicZexGHc%J9Ju=QcJg+Gm^GwfYk>e%62jCyho zu^tZ0>b{%8YMjEloz-+AUw(DSd_p6kGQi+!ngb4WDEEocCEjg8Lrqr23O}txhaKRe zjJ4ijbp3Q73CJJ;PlKJX*c*P$un`ZDi=**egX|P_s7QdAzzc)!n(izhEkIdS6bb@q zUiX8ofro=#96kp|@mBHegF1OUYTzZNzg@fIR}(8Y_33%qTBt_OHvJ!*v7nbfd6k3E zT9F`cEiO!sjffow%GW6@wwno!98U#WkBCU?$vq?(AN?Pw1+$x{8q?~o5B8$t)`YSa z*f%souU^N^`}I|LbePiPQuNvY`)SIXaY1BgOB;tLnR_%w?sD(gR;kuZlx5?Mj*t6r zC>u-nTVSI85$sBXXAwA(&h75%_QH+j*eKb_H(9yo%PM69zISdF^T(FGT&7B5f z8aYsx@^&>1M7#8&ZlwieXox&}Ub_p0ECBS};thN_D}$W)T**yZMJ{R1BZed4;|0BJ z=(^zdHh~$_vsV{xCYSqevKncI*l$x*4`_qEtXvX7tNNCTGsg3VPz7* zhL_Bp-_~?W#PlRWKPy1jT%I;z1r^V60$`D%ezL6>){c=L5?yE*MdNFRHwwK*W4ogZ zipP}O9+FH8{rS|?7GRDdzRE6*C*N`T*&fP2xOsyc*P6}0?x5T>R;!0YDl0JE@0-F* zk;ikd?Yz$B<+GUn(&b^MzQp1Zd5>=F1*HDH^YBYJC*~e~gyq41%frPhty}sAuhD>> zl%V%zEB7m`9 zFlxR-aN^XvNvCwOdA||Xw&wGjTC_I$0w@W`-M4L(zf7^EQ;fK(e02aSydiCy@QyLg zoZTL^w8!e8KJ9ZI`3q7@JM{9zNm_diGY%e86yegyPXQbDT}`m|u=8+HeRk%zF`{ej!9dijN^URq3g>^3Zb!!H7;emu(M-4cTw z0K)>X!hU+SYwu;P#M;2&+&nJ*w{eDiR&5Quni~}F$El{aFY9T`PVvF@erh<0^ERiZ zXP2O35PZ(bV@YZ|GX2;6BB}9BF>|rRwHHu zK`M7IU}N4Wu+=VidX!6^>(-Hq%^Jg_1d#Zp1zedJWKXye0EmWt+b^xKsBo*BbF5m4 zL=+86(?@JGI~YZ0Q=D&^=e{$-E315N51j{5wM(~pWm4+kiSLAdty-!%}5rh8R%FkLA^?eLv6wrhLaYB@{r z!Mvrhnyp(Mu7_68V)s@?_kFzF38sq@ASjeu1A2x5%V5pU9+IjZG-LR_yi@4-3+R1x z<&x;j_SHv;@3gz8ws~09$mw1OD1lix5}JRcFSUinqh$+Tt7hDv&AuY zF*O_yvbRmiW1!samBnJU`uz6b_pI?QNT8P_`}e{DoRvDX04Ze)LHU?YdScM-L9lDd zDLzy<`oN*h>8SD0xfZ=isL8i=y^_Xi%1%-N6XUFzW6k@(bcoWl>&BxbWmgyqAQ^_! z5Seg@P=k`e!$$l!NcO&uRB&Pct(Rxy3tUonT@DIwVe%Q( zMs-D{y|#4dD9_Tc0bVMPaSPw{X;a2@%e;HQ+`s1gAR~^>UtC|qvCKW8qgOsP0Xw29 zH+X%ygkO;M_dD=LY?;$=(YW3F4N1HvPIkCQrurk=i)6Zi>tuJEw_o*ewj9GfftEOX zgmg!Tn>PeBLp2%$Rnc;iv^LL1y9(AI@<=yE1ItjNd*lkuVmztiY$PiC$Bd2i5Y5Da zw7JG-p0`SNgGZjgeT`ofJt&4CzpADI3JcOnJN`91gjs*C)D{A9{Y8c>HAgm^MejCn z^!?u5FG{au2G%mghK@wnW1Vz_0eibWj|~v64j~inq=nA06Cl;`Arudt<)J5U_6*zNK~k*Pnwi zMA(j4P9(%qX_X;h@-wP}tMfR5e27lBm=+y+-&lm&Ktx}Uo}ymdFa2#ZpOMIF3I{^y zE?!q^vL7SCpn!e-l(BRV9k>Lp2QP)x`T#eTq8E!FPDIplZS656d))dD7qCE>`U7R858;6coC$nm+PBQ zL;(v*Yw)%(&I7THAa*68jVZneUU;^ zcJ^!D8|qE(l52KT5wUCcFZ%i{Hx6*!zU!+s(d|u$P*Oh@knbepAtpTgL__aJYHh={ zsvqissn1oA4d}o=>Au$An@gY)6w%`Qo^fXuY>&M_iC6k%r~p;s{BnEjA`t9+eh<+e zek%;d+M9VfSTMTJc*r^m>tJ@8IID^H>XG@h{VaLwp&^N+&+!?7xXi}N;DU%i|sk7@P)|!>E#2u9j`*y<4!Goz>T~h zv4IB=9NvZ_rk#d$J}|#%!BH2Zgnm#D?a-Ur&^bQQ4RNA)`EnjRr;=-l&P@Bh!ts%I zRjaV^rLO!4qL(lhue;g35feI#PS;Xzs7}L+9N#B!P@H-&jnp zZmBcrd>h0*PnzX72deF{QF%FjV*~eewXzk=OA%Eq6laMuQyGtm>QXop99#Gl*OA%( zo(?+FUGcs21LiOrG=;zAJvv@#)_os(>1!~H3u~vmDuXLR45mUYNW5Xtr)In3rPYWA zb9{|F^F-Q{doptN@Q0uu@VC#(65xPLw-5Xie>R1gacW!Tzxf2=<;Gl)!Rr zkrM~PZ2hHV{E%hI1^E554bwQzb|Q4CQ21&3+MQtwuHzy$-x}^5KeJK2c&_nxD+5kr zH0e5ZkY?E_Nvk-~^Os1lI<}b_??e5zp4S>#{S_U@}lLe2Ofd%d6zvCexiLP%XAgL>w6^6CaAkxYcIRx5;OJ_bmD?3R7it5YV`^KNF$%lW>{dTE zK&r#H-G1KFhXYSyEr4j}lAxG-L9ge7z>uQl^RTPOCq}Pst?N%; z<^B!OSBp6Asb!jLt$-o|`ehdu3R#@G3(e6i8FhCBV_r9SdjiJ7X1ux-tJ!t{TY4eg z{W6%}QkRGbxO9C1QKLBs(R@vW}@^}X|C)_FTKPp!Vy_rtw@ z$d!gc>=-sdu!rY42iuE$8?%EN(7kVBcKR}|hMi@m(XMu*D|^?76%*OvP|!q{3nQwY znl|TtyE^fb2f$nS@qYBfavT+X=kj4W7ANhtV8>bcC52x?5`ZU;V7@JjRmG{}n+AB4 z7Tr}UJD>F<&04Y0t=JMZYA~&Esv1vsJp4tz@^9{U z=c{6~xN)-AA6IXGd*?rqvkPWR8(~f%LaU7|gsYdj2NDWISbzqV+P=SX;7honb2bVbJ9=*oy z_{RQ;#k_gi9np$!V|MG*-|@>H!1eEA;n)(BTAS{~ z+`5&{q%<+8&2o*O2utjWfE1lHJ3%3Hn>l*Vj=f&}21P^{RdVIlhUo1ih+2ZXcNw+X zk9f0NK8+@uL3-_@S6LWWy&pt9<9SN0?Y(19|%Er-dKkwyL`%r`o zj?_Aln}7*7)~`o{^&sd0-_XnMWRd8t$5!n#O2(}w>r|gzQJ-ZB_&i5&h>=*=XH2Z! zYo)b1D_1vSRNVO)V@o&b!O#hiaE=4tY8m2!;`utaouyT0SFdK;vChLkmV*cpfWfMO z8k6kxSsdd`on+{d`gjdyVTkuBJSM;$PeXnZ;~eVh>O~oqi`iaL^=7SxG=alooptX@ zW?)ye30TX+gIU3;(CdjeaE>glMkL7MoT^;b<$LTzqr$ix{-O(Gg z;|2G29kEzvYs}b1tr0C9Dr1*^MLdHb6ul>~z47JJYSXWpz(LSAmKiS@PoLaUj~rH* zYL4aN`>jjKi41=i8cK?280+3Bn=_GtebgsfX7nKqz{vL+V7K!%hHn$~f!DCL469rk zoo-OB*i0%NkEZPzB7@st@Y(?BE8=(fM7L$q>JlT%)5*ei=lM>zT0d1RJ5z=rbn`J^ z@V@NUcCC$S%%kn|_6T-g>L?!u{<^50f?=k~l#d9Pw{LPmf30U(AQ6bgE6*uaQJ?ym zuU1>@Defuj;yLvg@zRLzc5s>SHr3eSK>=)gjoNOo19wu93Z`p``=srtJ#=;;bDa#^ zS8daU2WJe}#Sb66MaMlW1-2KB&&^c+ zi3_I;9OmUtUv6(zeL(pJ8)Rau%*<{RuLEc+37?xT?C!fStH9y^*hS ze}3Dzt%_N_Jir2hTTY#~&rd?sUSrw4Tw#zPz6ymvf<9QFjAvg~x(nc^heY z6&C`w?AQ$8iNx`|`-t>)ujtK`kr-h4%IWN%c&QYDNS!J|b_cscs&woR)+>G~1wa&2 z>Rj&o#O2f=vuu%(g2uTyF?Xv;`p~KZ)@y{r`}`_>T|Ioh{3`Vcp$BT4>!@kFHr@d) zT=wBSnn4s$#8YkI0YsH%L9VZxs|%0=*`4h!3o!?bqpdHI1Bw$H4O?~SLphzzKB5rd zlgX5u1F8lj*FCPjtnVEZDaRm&-G-MWXDs%a(*zFMi4{@MpVAHExxscl0qCZJX+<}^ zJ1bsu{a)vxtJVcsuF%|Ny{K1Db^fqKYonVXEmr%dR(G&s*O(^e_fE&kpigQ8&;wnp zPoEl=H(5r6$1DyZgib*0NFRum^=*6{WhZx8O3Al$H#|I(L(MAO|86$wa$|Kj=6*vqGo*#ox4^ zLac@lOS>tX9yM}Xy+UzbY#pD=0OnSzyk_9|s^Za1VxC34X2NA}RaY|EbBx9xK1H!n ziNVXU>!~!W1CI1GJ`vpN}T&Ni`(=;;lg8&v+^b zpQ&tLaAK$WE*cnU9;}Kf6ieX)sh+P+vt~KhpUPS~<;@A;?F?>-o*orqxlv^vX`rt>6_rq{ zna^MauHNPI3y8jIxT7Jr#Ck*uA|M9#V+B;4x$O$uFJiMILF>L+%uXt8)gl%~m#r1l zE~ss4yv!d@GtC|W^3yB9`@`gHR>ES|!0SqtO=cR^@ivE!CIV@o)xITb1&9jh+EbY6y`f zrIjVH%Em5JNjOYoyY{)+>iB-Rx4%WH+hM+Y1u=lv30n|l(&|0zNsl(CY19Vz69AzU z%h4$6_t{jw<&kNnHf`Uca)dD&+4yna7Q5-q%6*RRM5hl?s*-ZZCU(!g<<`I9+~O-LW8bL4INP_7lC1HRd-olgB}r(Mva(bGZ2 zTjMdj`IVha!a2SawMUHse)XlI54 zX;$P|iHRi_D<sIBa!IgcL~r!?l>1bRYj3!2fjKB<85;*B_t|1rQ^N?Pf#3+Z zr{gze^vGQXgzXj+hbjWFWnsg@TZ(Lk8%$=B$Pw8hrK)&ky0510Xew(yhgL&YoY7*w zSee-_dU*^ZyJ+%hMDW%nH}?UQ6lgGWzs{+6OP=ud=ih&O%Ut+(P6Z9cnalxQyix7+hXVOREXj!;P?hkb6M@q#1RZhj*(0?%_%>Vlt`QK)VymkHOh=#A= z>bTZ31bG=h)>tuYt7q)3>oY{L|1Ro#rWMM~p0HQA@XuSVXNdf$F#YomWi=x(uAcoP zz2j$+&RdP2!YZ%eCkF5XWx2v(w9=sbG`u(@ikL;GSbqIddiKzo@@S)@+@Qs6&wW`1;9m-98;|{q4 zpzA%cIO@SXu*$PX2JpWL78BuT&j_H%aM>i=x4 zZoyUS(8pzqty^3jGNGv${}$67A++k=w;-@h=Ls+)sd~BR_5M|8P-QmiW9V`|wqR9G86% z&v%!CdmM-F`I_Qqz#JPPAckL4NAX}A@2{4HNNfs0KzS*6ZPsThU6UF%LDyqt3gGT^ zdlmC|`cm6B!DTK?WJ{m7L|{1`KgdWC0Py0&RGiz52EwszsuVd077vK=e5@D6bFRq4 zi_CS;&e6yyvuJXwvPIHQojlcPDL$D#662+_)AHKBjjL=#6gL59rot_mFB;ALVo^;t zsUFfE;s#+YI(V(>qJ5PSjTbjRAqdZI-lYdh|3qk&r5E0KE?!lb#0;k0MOowrPQ|D=67K}R;#xv zoZ328jpDdQ3&-yHnyW;!j;R0gC4~HR!1#wsPvh)2rC;OMp{K|5fR5>308@%T(&T2x z1hc7t^FFPynej&4LyBrgj*gH5#BT_8mIA+5JBY`%I;8qX8c}4}#Fgsy2)W&>%+%{2g~Jb?agEM%x^D$)Vxv2Wfm;+Rwbr?@ze7(%LX zDpgL!eMK8^g(O|T&P%*#(m^a>#hL3yMh@layAs5IG@@bVRy^>nqA=6gU4>Dvd8f=2 zfn#E|p6;6x1mBG+rc#D1N}xZ`w$4E5W%7y3VJN=P2SK6^r5Z=M)fR)6*1;iFMw~wy zD87l_hsS-;j2D)VoSH%}yK|+}f)H1fY@|hXkl6w=iE%s_0L(U7qybkcl?4{l)mFj^Ub{!M2G@`Dxw!8xFC} z7%)bgi)al$_9F5&VK2|40y4ezAUsz;8qdNvc&v#ZQLA4yNcu?1R_%I(IH?+uka zHDv)mv3xsEnhWiCs$E%b8jkYGZDKU2+?MAWEze}j8&J`Fx3wKe1sWMdR{?nehYg36 z@FxeLPbVu)wYPI+Lp8c#vFDemSM{nCh{YWx5B1%a8v8tFqtUCCzi@=q4~9Eu8n#RO zf)W@?;k9?Qa{8&VQn^NpSmfc4$I4j>pLP%}N;u2(VJJ+>oPxwtcwRi`)6?vwNz3_3 z`Oa@iz(l! z?b(D`Ydj#!s0TngI>pV7%r{b+T-xVu$Hvl`?wCO#4sj%muWNnm zw%^O5LaoKlGgaPb=~-#wps`k;+C(1u@>yk&#I<8T^8+~?HtCc4>ATKS$ zXk;ylz3GkH43MaoB>9v&iaa@g>z`a=IkOss5{yRW<7k$+K_+yiez>w@*J@-ir_mJq z=;q*Wv;CG8KMm6QFkP95IL1MzuQXk7EQYI(b>x4%qwFg9uPWo-WN}RCx z0ZALHZU-$sl@bnl*v($6wHl3jY86n$tzlCT#5g)g?6BETnWI~?^3vNHWB^ZwKntn+ zW3tHY1prp?DuHs=!~2G?AC-;*1Y$O44t7%PoElLKa+RDny-6dxz*jek9-}))PloXg z7pw87CdzgrN65)ma%_MOGp7Oyd^3tH4f~yS7Jyyc?D1IkroF*f7@rr1slK_CkwH*) zXO%k81VD&?nw<99YttUoiuG1pU*5~rC?33oJamX3DgYdlk*CkWD4tqr{iC)spW#FG zpq19!@U88|@)kmFR@Ei4*eq0Pj^O7-rYgjFahU)Ie?gMNZGVpy#&RSGl3s7984t=^yB0{@|UGvCjG`JGPg!$p8Y^OsE5W6%AH5}oQJo0;U5 zQzCu7Bmi4lpLy6CLK~Y~5rRkd4YMfUD#S1mo{m!Yy0+w<<{eY%6e1C?pAFGluthL_VTnD zhxv#=*K0*TW>(vxul5TSYQc4A)L=Ktg6MbenZ-`c5=qb4SWNeRnnb8BSe^0-oNtAD zucYCY-06YbHA)dqPMrjerSb%qp0(1_9fBi4kq5lbUOo+UkIh7#!&WxN#!b`{ofmKz3rkI%O&2%|WT{9o(GvHO@NL!7&91msTbXHY zPfja+V2kDo+5wyN#;IfM#02J3l>$I5G*`Pv$2_ITX*9T$F8LfLtt&m^(67<;I{)w| z_=-B>myuK?8A?ShAGrdx*=qurOIhwZC91nsZNqP+R&I&pgQ_^YU8e;IQh&(Tn?Tz4 zYt?&pZ&tXUu=5vci#!ekfYuUzt&pFL1piPWNsj*f#5TYXzn3z>D=|5g=8tk=*dlJI zoy$+cx6_h%o}U}hWrgyCZx}L?g)O$bB!HT8b1O&dXgqaR(=xVxw4Uu))@!Jc3$};& zhuZD5307>HT}Mc5sSM)ABn6T2fPx_#%Mp8&XtaxxM}rvYOENpSGtX)JNZv1lYzkS zJfE#0FaVc(<#=qh$pdl3TuW`R$%ar9naQA+KVwV^tJa3s`JOq`=Yo-%qbuDzE7xDo z_}g3j|MrYTa$n9!hCi2SG_N1yb@Td&qpJuwv4A9Ih9i5{}kR`EnJRgsAGkXak( z#Y?oazB=vV2HfLQuN$nJ1O3il9TVk+giP+ok7Wc-_IbrCrnyV0I&3=AUPDq^g0U~w zFX-IavUJM~1BGv5QoW+)pga=*wrz7^CfKj%`;*1zA8I%a7MM2!%TJpKoG4awykK+^ zla(;`i-kdt7drdry2+fEm|_NYuCgjZhVIsN(_V6?CrNspD)b_?+bkI|iOGFp5aZ$^ zxqNVz!q6FTiFc;l3|#Tc%`5|Kms59>;T!=SGS-u2zBKHLu_I;c9Fo z+=k?G=#kHTOi5#7YKcuJ+SUKyVeu~ z^lJvR*$ZxUMA({~0|LNtWVebimD%AIyDlYHgUu}v9D_9SotoL07h`q98mx6nY98e0 zRh=)!(#r>K>f|)vm_*Ki48!rv!xeBGL&LGxai}-w4Mg=Cd+q47uu}K{$GC~GjeFMK zmG-8StjVQjOxSl0jR`d|)L$?CCj;Ezb$heB;UVhX&_;x)cUv17f^*}WWtR%k?+wf+hal5XXh+cT*f6qTda8+7B)H%cTE9z|UKJ63W%V{Qy?_&y_xz zg??6gzrTOB^yxMFXC>-WOFugOUEKTL{8vjq2K2oW^SPx@$CIDp-rvT5w)E+d@w3u9 zB-qbNpLV&QmH5x%KHZu>E4?4$|7z(!eCzo8Z+M7i;qT8+h-Dax3BCRD@!_Dcw@=9Z zSw_6MNxqbkZw!3dxOpX!mU zz<_psF5^h*s|-qD_^;yys0`+-3>wGT?_~t{RW6Ow%vTvS&b*l}KlKGHGjAIIrHq58 z($_Kp|JELX6W^`{cj34G5hVLfF7aj)`XU4P1HP^sO|aklOyX~Nf}i_La>Ca&qA61N zHV%sCzSW~y{M$Gf@|$kZ3`>1$kA-f0?F-A`U)P*wS@IWU?6>i<-1m8Axwnn&^LSbA z+xoKm1;?p#wfy&pgLeJR;fjR*?Vz-H-NghA!#~#*IFBr0gPpe#ut!CNTwqj ziX(53WJjXl+5JDNr$L(lK6Ug$#Jk@mH}*W(oncMiyn7SpKP}j7er#0F!0?0j4Txq1 P7XSS>1ZfPLzyJ1M`58Tq literal 0 HcmV?d00001 diff --git a/doc/DESIGN.rst b/doc/DESIGN.rst new file mode 100644 index 000000000..a4e258859 --- /dev/null +++ b/doc/DESIGN.rst @@ -0,0 +1,91 @@ +Strace.eBPF +############################### + +.. role:: large1 +.. role:: large2 +.. role:: large3 + +.. space:: 50 + +.. class:: center + +:large3:`Fast syscall's tracing` + +.. space:: 150 + +.. class:: center + +:large1:`Vitalii Chernookyi` + + +Why we need new tool +--------------------- + + - regular system tracing tools are slow + - regular tools slowdown traced application for few orders + - output of regular tools is human-oriented and don't assume automated + processing + - overcoming above problems in regular way require: + + - kernel hacking (sysdig) + - special HW (Lauterbach). + + +Used technologies +------------------ + + - eBPF + - KProbe + - Perf Event Circular Buffer + - event-loop + + +System requirements +-------------------- + + - libbcc + - Linux Kernel 4.4 (for Perf Event Circular Buffer) + - CAP_SYS_ADMIN capability for bpf() syscall + - mounted tracefs + + +Pros +----- + + - Used combination of technologies allow tool to be about one order faster + than regular system strace. + - This tool consume much less amount of CPU resource + - Output of this tool is designed to be suiteable for processing with + classical tools and technologies, like awk. + - Could trace syscalls system-wide. + + +Cons +----- + + - Limited functionality + - Slow attaching and detaching + - Asyncronity. If user will not provide enough system resources for + performace tool will skip some calls. Tool does not assume to try + any work-around behind the scene. + + +Structural Component Diagram +----------------------------- + +.. uml:: + !include DSGN_struct_comp_dia.uml + + +Behavioral Activity Diagram +---------------------------- + +.. uml:: + !include DSGN_beh_act_dia.uml + + +Conclusion +----------- + + - we reached performance about 1000000 syscalls per second. + - there is places for future optimization. diff --git a/doc/DSGN_beh_act_dia.uml b/doc/DSGN_beh_act_dia.uml new file mode 100644 index 000000000..bf7a80c14 --- /dev/null +++ b/doc/DSGN_beh_act_dia.uml @@ -0,0 +1,19 @@ +@startuml +scale 0.50 +start +:Command Line Parsing; +:Loading "command"; +note right +Optional +end note +:Generating eBPF source code; +:Compiling eBPF source code; +:Attaching eBPF handlers to syscalls using KProbe and eBPF VM; +:Starting "command"; +note right +Optional +end note +while (cont?) +partition libbcc { + :poll() - wait for events; +} diff --git a/doc/DSGN_rst_style.yaml b/doc/DSGN_rst_style.yaml new file mode 100644 index 000000000..d65f07715 --- /dev/null +++ b/doc/DSGN_rst_style.yaml @@ -0,0 +1,631 @@ + # This file has RSON syntax which is superset of JSON and, probably(?), + # a subset of YAML. 'yaml' extension is used primarily for syntax + # highlighting. 'json' extension is not used for comments incompatibilty + # reason. + # + # Project's homepage: https://code.google.com/archive/p/rson/ + + # List any fonts you would like to embed in the PDF here + embeddedFonts: [] + + # Default page setup. Can be overridden by including other + # stylesheets with -s + + pageSetup: + size: a5-landscape + width: null + height: null + margin-top: 0cm + margin-bottom: 0cm + margin-left: 0cm + margin-right: 0cm + margin-gutter: 0cm + spacing-header: 0mm + spacing-footer: 0mm + + # The first template is one of the 'pageTemplates" + # (See next section) + + firstTemplate: coverPage + + # pageTemplates can be accessed with the .. raw:: pdf PageBreak command + + pageTemplates: + coverPage: + frames: [] + [0cm, 0cm, 100%, 100%] + showHeader : false + showFooter : false + + emptyPage: + frames: [] + [0cm, 0cm, 100%, 100%] + showHeader : false + showFooter : false + + oneColumn: + frames: [] + [0cm, 0cm, 100%, 100%] + showHeader : true + showFooter : true + + twoColumn: + frames: [] + [0cm, 0cm, 49%, 100%] + [51%, 0cm, 49%, 100%] + showHeader : true + showFooter : true + + threeColumn: + frames: [] + [2%, 0cm, 29.333%, 100%] + [35.333%, 0cm, 29.333%, 100%] + [68.666%, 0cm, 29.333%, 100%] + showHeader : true + showFooter : true + + cutePage: + frames: [] + [0%, 0%, 100%, 100%] + showHeader : true + showFooter : true + defaultFooter : ###Page### + defaultHeader : ###Section### + + fontsAlias: + stdFont: Helvetica + stdBold: Helvetica-Bold + stdItalic: Helvetica-Oblique + stdBoldItalic: Helvetica-BoldOblique + stdSans: Helvetica + stdSansBold: Helvetica-Bold + stdSansItalic: Helvetica-Oblique + stdSansBoldItalic: Helvetica-BoldOblique + stdMono: Courier + stdMonoItalic: Courier-Oblique + stdMonoBold: Courier-Bold + stdMonoBoldItalic: Courier-BoldOblique + stdSerif: Times-Roman + + linkColor: navy + + styles: + base: + parent: null + fontName: stdFont + fontSize: 10 + leading: 12 + leftIndent: 0 + rightIndent: 0 + firstLineIndent: 0 + alignment: TA_LEFT + spaceBefore: 0 + spaceAfter: 0 + bulletFontName: stdFont + bulletFontSize: 10 + bulletIndent: 0 + textColor: black + backColor: null + wordWrap: null + borderWidth: 0 + borderPadding: 0 + borderColor: null + borderRadius: null + allowWidows: false + allowOrphans: false + hyphenation: false + kerning: false + underline: false + strike: false + commands: [] + + normal: + parent: base + + large1: + parent: normal + fontName: stdBold + fontSize: 175% + hyphenation: true + + large2: + parent: large1 + fontName: stdBold + fontSize: 175% + + large3: + parent: large2 + fontName: stdBold + fontSize: 175% + + large4: + parent: large3 + fontName: stdBold + fontSize: 175% + + large5: + parent: large4 + fontName: stdBold + fontSize: 175% + + large6: + parent: large5 + fontName: stdBold + fontSize: 175% + + large7: + parent: large6 + fontName: stdBold + fontSize: 175% + + + title-reference: + parent: normal + fontName: stdItalic + + bodytext: + parent: normal + spaceBefore: 6 + alignment: TA_JUSTIFY + hyphenation: true + fontSize: 175% + + toc: + parent: normal + + blockquote: + parent: bodytext + leftIndent: 20 + + lineblock: + parent: bodytext + + line: + parent: lineblock + spaceBefore: 0 + + toc1: + parent: toc + fontName: stdBold + + toc2: + parent: toc + leftIndent: 20 + + toc3: + parent: toc + leftIndent: 40 + + toc4: + parent: toc + leftIndent: 60 + + toc5: + parent: toc + leftIndent: 80 + + toc6: + parent: toc + leftIndent: 100 + + toc7: + parent: toc + leftIndent: 100 + + toc8: + parent: toc + leftIndent: 100 + + toc9: + parent: toc + leftIndent: 100 + + toc10: + parent: toc + leftIndent: 100 + + toc11: + parent: toc + leftIndent: 100 + + toc12: + parent: toc + leftIndent: 100 + + toc13: + parent: toc + leftIndent: 100 + + toc14: + parent: toc + leftIndent: 100 + + toc15: + parent: toc + leftIndent: 100 + + footer: + parent: normal + alignment: TA_CENTER + + header: + parent: normal + alignment: TA_CENTER + + attribution: + parent: bodytext + alignment: TA_RIGHT + + image: + parent: bodytext + alignment: TA_CENTER + + figure: + parent: bodytext + alignment: TA_CENTER + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [ALIGN, [ 0, 0 ], [ -1, -1 ], CENTER ] + colWidths: [100%] + + figure-caption: + parent: bodytext + fontName: stdItalic + alignment: TA_CENTER + + figure-legend: + parent: bodytext + + bullet-list: + parent: bodytext + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [RIGHTPADDING, [ 0, 0 ], [ 1, -1 ], 0 ] + colWidths: ["20", null] + + bullet-list-item: + parent: bodytext + + item-list: + parent: bodytext + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [RIGHTPADDING, [ 0, 0 ], [ 1, -1 ], 0 ] + colWidths: [20pt,null] + + item-list-item: + parent: bodytext + + definition-list-term: + parent: normal + fontName: stdBold + spaceBefore: 4 + spaceAfter: 0 + keepWithNext: false + + definition-list-classifier: + parent: normal + fontName: stdItalic + + definition: + parent: bodytext + firstLineIndent: 0 + bulletIndent: 0 + spaceBefore: 0 + colWidths: [20pt,null] + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [LEFTPADDING, [ 0, 0 ], [ -1, -1 ], 0 ] + [BOTTOMPADDING, [ 0, 0 ], [ -1, -1 ], 0 ] + [RIGHTPADDING, [ 0, 0 ], [ -1, -1 ], 0 ] + + fieldname: + parent: bodytext + alignment: TA_RIGHT + fontName: stdBold + + fieldvalue: + parent: bodytext + + rubric: + parent: bodytext + textColor: darkred + alignment: TA_CENTER + + italic: + parent: bodytext + fontName: stdItalic + + heading: + parent: normal + keepWithNext: true + spaceBefore: 12 + spaceAfter: 6 + fontSize: 175% + alignment: TA_CENTER + + title: + parent: heading + fontName: stdBold + fontSize: 200% + alignment: TA_CENTER + keepWithNext: false + spaceAfter: 10 + + subtitle: + parent: title + spaceBefore: 12 + fontSize: 75% + + heading1: + parent: heading + fontName: stdBold + fontSize: 175% + + heading2: + parent: heading + fontName: stdBold + fontSize: 150% + + heading3: + parent: heading + fontName: stdBoldItalic + fontSize: 125% + + heading4: + parent: heading + fontName: stdBoldItalic + + heading5: + parent: heading + fontName: stdBoldItalic + + heading6: + parent: heading + fontName: stdBoldItalic + + topic-title: + parent: heading3 + + sidebar-title: + parent: heading3 + + sidebar-subtitle: + parent: heading4 + + sidebar: + float: none + width: 100% + parent: normal + backColor: beige + borderColor: darkgray + borderPadding: 8 + borderWidth: 0.5 + + admonition: + parent: normal + spaceBefore: 12 + spaceAfter: 6 + borderPadding: [16,16,16,16] + backColor: beige + borderColor: darkgray + borderWidth: 0.5 + commands:[] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + + attention: + parent: admonition + + caution: + parent: admonition + + danger: + parent: admonition + + error: + parent: admonition + + hint: + parent: admonition + + important: + parent: admonition + + note: + parent: admonition + + tip: + parent: admonition + + warning: + parent: admonition + + admonition-title: + parent: heading3 + + admonition-heading: + parent: heading3 + + attention-heading: + parent: admonition-heading + + caution-heading: + parent: admonition-heading + + danger-heading: + parent: admonition-heading + + error-heading: + parent: admonition-heading + + hint-heading: + parent: admonition-heading + + important-heading: + parent: admonition-heading + + note-heading: + parent: admonition-heading + + tip-heading: + parent: admonition-heading + + warning-heading: + parent: admonition-heading + + literal: + parent: normal + fontName: stdMono + firstLineIndent: 0 + hyphenation: false + wordWrap: null + + aafigure: + parent: literal + + table: + spaceBefore:6 + spaceAfter:0 + alignment: TA_CENTER + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [INNERGRID, [ 0, 0 ], [ -1, -1 ], 0.25, black ] + [ROWBACKGROUNDS, [0, 0], [-1, -1], [white,#E0E0E0]] + [BOX, [ 0, 0 ], [ -1, -1 ], 0.25, black ] + + table-title: + parent : heading4 + keepWithNext: false + alignment : TA_CENTER + + table-heading: + parent : heading + backColor : beige + alignment : TA_CENTER + valign : BOTTOM + borderPadding : 0 + + table-body: + parent : normal + + dedication: + parent : normal + + abstract: + parent : normal + + contents: + parent : normal + + tableofcontents: + parent : normal + + code: + parent: literal + leftIndent: 0 + spaceBefore: 8 + spaceAfter: 8 + backColor: beige + borderColor: darkgray + borderWidth: 0.5 + borderPadding: 6 + + linenumber: + parent: code + + right: + parent: bodytext + alignment: right + + center: + parent: bodytext + alignment: center + + pygments-n: parent: code + pygments-nx: parent: code + pygments-p: parent: code + pygments-hll: {parent: code, backColor: #ffffcc} + pygments-c: {textColor: #008800, parent: code} + pygments-err: {parent: code} + pygments-k: {textColor: #AA22FF, parent: code} + pygments-o: {textColor: #666666, parent: code} + pygments-cm: {textColor: #008800, parent: code} + pygments-cp: {textColor: #008800, parent: code} + pygments-c1: {textColor: #008800, parent: code} + pygments-cs: {textColor: #008800, parent: code} + pygments-gd: {textColor: #A00000, parent: code} + pygments-ge: {parent: code} + pygments-gr: {textColor: #FF0000, parent: code} + pygments-gh: {textColor: #000080, parent: code} + pygments-gi: {textColor: #00A000, parent: code} + pygments-go: {textColor: #808080, parent: code} + pygments-gp: {textColor: #000080, parent: code} + pygments-gs: {parent: code} + pygments-gu: {textColor: #800080, parent: code} + pygments-gt: {textColor: #0040D0, parent: code} + pygments-kc: {textColor: #AA22FF, parent: code} + pygments-kd: {textColor: #AA22FF, parent: code} + pygments-kn: {textColor: #AA22FF, parent: code} + pygments-kp: {textColor: #AA22FF, parent: code} + pygments-kr: {textColor: #AA22FF, parent: code} + pygments-kt: {textColor: #00BB00, parent: code} + pygments-m: {textColor: #666666, parent: code} + pygments-s: {textColor: #BB4444, parent: code} + pygments-na: {textColor: #BB4444, parent: code} + pygments-nb: {textColor: #AA22FF, parent: code} + pygments-nc: {textColor: #0000FF, parent: code} + pygments-no: {textColor: #880000, parent: code} + pygments-nd: {textColor: #AA22FF, parent: code} + pygments-ni: {textColor: #999999, parent: code} + pygments-ne: {textColor: #D2413A, parent: code} + pygments-nf: {textColor: #00A000, parent: code} + pygments-nl: {textColor: #A0A000, parent: code} + pygments-nn: {textColor: #0000FF, parent: code} + pygments-nt: {textColor: #008000, parent: code} + pygments-nv: {textColor: #B8860B, parent: code} + pygments-ow: {textColor: #AA22FF, parent: code} + pygments-w: {textColor: #bbbbbb, parent: code} + pygments-mf: {textColor: #666666, parent: code} + pygments-mh: {textColor: #666666, parent: code} + pygments-mi: {textColor: #666666, parent: code} + pygments-mo: {textColor: #666666, parent: code} + pygments-sb: {textColor: #BB4444, parent: code} + pygments-sc: {textColor: #BB4444, parent: code} + pygments-sd: {textColor: #BB4444, parent: code} + pygments-s2: {textColor: #BB4444, parent: code} + pygments-se: {textColor: #BB6622, parent: code} + pygments-sh: {textColor: #BB4444, parent: code} + pygments-si: {textColor: #BB6688, parent: code} + pygments-sx: {textColor: #008000, parent: code} + pygments-sr: {textColor: #BB6688, parent: code} + pygments-s1: {textColor: #BB4444, parent: code} + pygments-ss: {textColor: #B8860B, parent: code} + pygments-bp: {textColor: #AA22FF, parent: code} + pygments-vc: {textColor: #B8860B, parent: code} + pygments-vg: {textColor: #B8860B, parent: code} + pygments-vi: {textColor: #B8860B, parent: code} + pygments-il: {textColor: #666666, parent: code} + + endnote: + parent: bodytext + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [TOPPADDING, [ 0, 0 ], [ -1, -1 ], 0 ] + [BOTTOMPADDING, [ 0, 0 ], [ -1, -1 ], 0 ] + [RIGHTPADDING, [ 0, 0 ], [ 1, -1 ], 0 ] + colWidths: [3cm, null] + + field-list: + parent: bodytext + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [TOPPADDING, [ 0, 0 ], [ -1, -1 ], 0 ] + colWidths: [3cm, null] + spaceBefore: 6 + + option-list: + commands: [] + [VALIGN, [ 0, 0 ], [ -1, -1 ], TOP ] + [TOPPADDING, [ 0, 0 ], [ -1, -1 ], 0 ] + colWidths: [null,null] + diff --git a/doc/DSGN_struct_comp_dia.uml b/doc/DSGN_struct_comp_dia.uml new file mode 100644 index 000000000..1bf4b11fd --- /dev/null +++ b/doc/DSGN_struct_comp_dia.uml @@ -0,0 +1,43 @@ +@startuml +skinparam componentStyle uml2 +scale 0.6 +package "User Space" { +[Traced Application] as TA +[Tracing Tool] as TT +} +cloud { +() "Circular Buffer" as CB +CB -left-> TT : events +} +package "Kernel Space" { +folder "SysCalls table" { +() "SC #1" as SC1 +() "..." as SC2 +() "SC #x" as SCx +() "..." as SC4 +() "SC #n" as SCn +TA -down-> SCx : SysCall +} +'1 +[EBPF VM #1] as VM1 +SC1 -down-> VM1 : KProbe +VM1 -up-> CB : event + +[SC #1 Handler] as SCH1 +VM1 -down-> SCH1 : KProbe +'x +[EBPF VM #x] as VMx +SCx -down-> VMx : KProbe +VMx -up-> CB : event + +[SC #x Handler] as SCHx +VMx -down-> SCHx : KProbe +'n +[EBPF VM #n] as VMn +SCn -down-> VMn : KProbe +VMn -up-> CB : event + +[SC #n Handler] as SCHn +VMn -down-> SCHn : KProbe +} +@enduml diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 000000000..c7424edd8 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,43 @@ +# Copyright 2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# Makefile -- Makefile for documentation +# + + +DESIGN.pdf: DESIGN.rst Makefile DSGN_rst_style.yaml *.uml + rst2pdf -c -b 1 \ + --verbose \ + --default-dpi=300 \ + -l en \ + -e preprocess -e plantuml \ + -s DSGN_rst_style.yaml \ + $< diff --git a/man/.gitignore b/man/.gitignore new file mode 100644 index 000000000..976a94634 --- /dev/null +++ b/man/.gitignore @@ -0,0 +1,5 @@ +*.txt +*.html +*.gz +cpp_html +LICENSE diff --git a/man/Makefile b/man/Makefile new file mode 100644 index 000000000..065c3c748 --- /dev/null +++ b/man/Makefile @@ -0,0 +1,112 @@ +# +# Copyright 2014-2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# man/Makefile -- Makefile for man page +# + +#include ../src/common.inc + +MANPAGES_1_MD = strace.ebpf.1.md + +MANPAGES_BUILDDIR = generated + +MANPAGES_1_GROFF = $(MANPAGES_1_MD:.1.md=.1) + +MANPAGES_1 = $(MANPAGES_1_GROFF) + + +MANPAGES_GROFF_1 = $(MANPAGES_1:.1.md=.1) + +MANPAGES = $(MANPAGES_GROFF_1) + +MANPAGES_BUILD = $(addprefix $(MANPAGES_BUILDDIR)/, $(MANPAGES)) + +HTMLFILES = $(MANPAGES_BUILD:=.html) +TXTFILES = $(MANPAGES_BUILD:=.txt) + +GZFILES_1 = $(MANPAGES_1:=.gz) +GZFILES = $(GZFILES_1) + +GZFILES_BUILD = $(addprefix $(MANPAGES_BUILDDIR)/, $(GZFILES)) +GZFILES_1_BUILD = $(addprefix $(MANPAGES_BUILDDIR)/, $(GZFILES_1)) + +MANPAGES_DESTDIR_1 = $(DESTDIR)$(man1dir) + +DOCS_DESTDIR = $(DESTDIR)$(docdir) + +all: $(MANPAGES_BUILD) $(TXTFILES) | $(MANPAGES_BUILDDIR) + +$(MANPAGES_BUILDDIR): + $(MKDIR) -p $@ + +%.txt: % + man ./$< > $@ + +groff: $(MANPAGES_1) + +doxygen_docs: + doxygen cppobj.Doxyfile + +html: $(HTMLFILES) doxygen_docs + +%.html: % + groff -mandoc -Thtml ./$< > $@ + +$(MANPAGES_BUILDDIR)/%.1: %.1.md default.man ../utils/md2man.sh FORCE + ../utils/md2man.sh ./$< default.man $@ + +compress: $(GZFILES_BUILD) + +%.gz: + gzip -c ./$* > $@ + +clean: + +clobber: clean + $(RM) -rf $(DOXYGEN_HTMLDIR) \ + $(MANPAGES_BUILDDIR)/*.txt \ + $(MANPAGES_BUILDDIR)/*.html \ + $(MANPAGES_BUILDDIR)/*.gz + + +install: compress + install -d $(MANPAGES_DESTDIR_1) + install -p -m 0644 $(GZFILES_1_BUILD) $(MANPAGES_DESTDIR_1) + +uninstall: + $(foreach f, $(GZFILES_1), $(RM) $(MANPAGES_DESTDIR_1)/$(f)) + +FORCE: + +.PHONY: all html clean compress clobber cstyle install uninstall install-cpp\ + uninstall-cpp doxygen_docs diff --git a/man/README b/man/README new file mode 100644 index 000000000..732c2239e --- /dev/null +++ b/man/README @@ -0,0 +1,11 @@ +strace.ebpf + +This is man/README. + +This directory contains source for the man page. + +To create more readable text files from the source, use: + $ make +An even more convenient way to read these is to use the "man" command to +format them (includes bold, underline, etc. when run in a terminal window): + $ man -l strace.ebpf.1 diff --git a/man/default.man b/man/default.man new file mode 100644 index 000000000..faf6e572f --- /dev/null +++ b/man/default.man @@ -0,0 +1,59 @@ +$if(has-tables)$ +.\"t +$endif$ +$if(pandoc-version)$ +.\" Automatically generated by Pandoc $pandoc-version$ +.\" +$endif$ +$if(adjusting)$ +.ad $adjusting$ +$endif$ +.TH "$title$" "$section$" "$version$" "$date$" "$footer$" "$header$" +$if(hyphenate)$ +.hy +$else$ +.nh \" Turn off hyphenation by default. +$endif$ +$for(header-includes)$ +$header-includes$ +$endfor$ +.\" Copyright 2014-$year$, Intel Corporation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" * Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" * Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in +.\" the documentation and/or other materials provided with the +.\" distribution. +.\" +.\" * Neither the name of the copyright holder nor the names of its +.\" contributors may be used to endorse or promote products derived +.\" from this software without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +.\" OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +.\" OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +$for(include-before)$ +$include-before$ +$endfor$ +$body$ +$for(include-after)$ +$include-after$ +$endfor$ +$if(author)$ +.SH AUTHORS +$for(author)$$author$$sep$; $endfor$. +$endif$ diff --git a/man/generated/strace.ebpf.1 b/man/generated/strace.ebpf.1 new file mode 100644 index 000000000..dfd1042af --- /dev/null +++ b/man/generated/strace.ebpf.1 @@ -0,0 +1,291 @@ +.\" Automatically generated by Pandoc 1.16.0.2 +.\" +.TH "strace" "1" "pmem Tools version 1.0.2" "" "" "" +.hy +.\" Copyright 2014-2016, Intel Corporation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" * Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" * Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in +.\" the documentation and/or other materials provided with the +.\" distribution. +.\" +.\" * Neither the name of the copyright holder nor the names of its +.\" contributors may be used to endorse or promote products derived +.\" from this software without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +.\" OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +.\" OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.SH NAME +.PP +\f[B]strace.ebpf\f[] \-\- extreamely fast strace\-like tool builded on +top of eBPF and KProbe technologies. +.SH SYNOPSIS +.IP +.nf +\f[C] +$\ strace.ebpf\ [options]\ [command\ [arg\ ...]] +\f[] +.fi +.SH DESCRIPTION +.PP +strace.ebpf is a limited functional strace equivalent for Linux but +based on eBPF and KProbe technologies and libbcc library. +.IP \[bu] 2 +Pros: +.RS 2 +.IP \[bu] 2 +Used combination of technologies allow tool to be about one order faster +than regular system strace. +.IP \[bu] 2 +This tool consume much less amount of CPU resource +.IP \[bu] 2 +Output of this tool is designed to be suiteable for processing with +classical tools and technologies, like awk. +.IP \[bu] 2 +Could trace syscalls system\-wide. +.RE +.IP \[bu] 2 +Cons: +.RS 2 +.IP \[bu] 2 +Limited functionality +.IP \[bu] 2 +Slow attaching and detaching +.IP \[bu] 2 +Asyncronity. +If user will not provide enough system resources for performace tool +will skip some calls. +Tool does not assume to try any work\-around behind the scene. +.RE +.PP +WARNING: System\-wide tracing can fill out your disk really fast. +.SH OPTIONS +.PP +\f[C]\-t,\ \-\-timestamp\f[] +.PP +include timestamp in output +.PP +\f[C]\-X,\ \-\-failed\f[] +.PP +only show failed syscalls +.PP +\f[C]\-d,\ \-\-debug\f[] +.PP +enable debug output +.PP +\f[C]\-p,\ \-\-pid\f[] +.PP +this PID only. +Command arg should be missing +.PP +\f[C]\-o,\ \-\-output\f[] +.PP +filename +.PP +\f[C]\-l,\ \-\-format\f[] +.PP +output logs format. +Possible values: +.IP +.nf +\f[C] +\[aq]bin\[aq],\ \[aq]binary\[aq],\ \[aq]hex\[aq],\ \[aq]strace\[aq],\ \[aq]list\[aq]\ &\ \[aq]help\[aq]. +\f[] +.fi +.PP +\[aq]bin\[aq]/\[aq]binary\[aq] file format is described in generated +trace.h. +If current directory is not writable generating is skipped. +.PP +Default: \[aq]hex\[aq] +.PP +\f[C]\-K,\ \-\-hex\-separator\f[] +.PP +set field separator for hex logs. +Default is \[aq]\[aq]. +.PP +\f[C]\-e,\ \-\-expr\f[] +.PP +expression, \[aq]help\[aq] or \[aq]list\[aq] for supported list. +.PP +Default: trace=kp\-kern\-all. +.PP +\f[C]\-L,\ \-\-list\f[] +.PP +Print a list of all traceable syscalls of the running kernel. +.PP +\f[C]\-R,\ \-\-ll\-list\f[] +.PP +Print a list of all traceable low\-level funcs of the running kernel. +.PP +WARNING: really long. +~45000 functions for 4.4 kernel. +.PP +\f[C]\-b,\ \-\-builtin\-list\f[] +.PP +Print a list of all syscalls known by glibc. +.PP +\f[C]\-h,\ \-\-help\f[] +.PP +print help +.SH CONFIGURATION +.PP +** System Configuring ** +.IP "1." 3 +You should provide permissions to access tracefs for final user. +.IP "2." 3 +It\[aq]s good to put this command in init scripts such as local.rc: +.RS 4 +.PP +echo 1 > /proc/sys/net/core/bpf_jit_enable +.PP +It will significantly improve performance and avoid \[aq]Lost +events\[aq] +.RE +.IP "3." 3 +You should increase "Open File Limit", for example according to this +instruction: +.RS 4 +.PP +https://easyengine.io/tutorials/linux/increase\-open\-files\-limit/ +.RE +.SH FILES +.PP +Putting into current directory following files allow to customize eBPF +code for supporting more newer eBPF VM features in newer kernels. +Also if current directory does not contain trace.h strace.ebpf on first +start saves built\-in trace.h into current directory. +Saved built\-in describe binary log\[aq]s format. +.IP \[bu] 2 +trace.h +.IP \[bu] 2 +trace_head.c +.IP \[bu] 2 +trace_tp_all.c +.IP \[bu] 2 +trace_kern_tmpl.c +.IP \[bu] 2 +trace_libc_tmpl.c +.IP \[bu] 2 +trace_file_tmpl.c +.IP \[bu] 2 +trace_fileat_tmpl.c +.SH EXAMPLES +.SH Example output: +.PP +# ./strace.ebpf \-l hex +.PP +\&./strace.ebpf \-l hex PID ERR RES SYSCALL ARG1 ARG2 ARG3 AUX_DATA +0000000000000AFD 000000000000000B FFFFFFFFFFFFFFFF read 0000000000000005 +0000000000000427 0000000000000000 0000000000000020 read 000000000000000A +0000000000000B3D 0000000000000000 0000000000000001 write +000000000000001C 0000000000000B11 0000000000000000 0000000000000001 read +000000000000001B 0000000000000427 0000000000000000 0000000000000020 read +000000000000000A 0000000000000B3D 0000000000000000 0000000000000001 +write 000000000000001C 0000000000000B11 0000000000000000 +0000000000000001 read 000000000000001B 0000000000000B3D 0000000000000000 +0000000000000001 write 000000000000001C 0000000000000B11 +0000000000000000 0000000000000001 read 000000000000001B 0000000000000B3D +0000000000000000 0000000000000001 write 000000000000001C +0000000000000B11 0000000000000000 0000000000000001 read 000000000000001B +... +.PP +^C +.PP +# +.SH The \-p option can be used to filter on a PID, which is filtered +in\-kernel. +.PP +Here \-t option is used to print timestamps: +.PP +# ./strace.ebpf \-l hex \-tp 2833 +.PP +\&./strace.ebpf \-l hex \-tp 2833 PID TIME(usec) ERR RES SYSCALL ARG1 +ARG2 ARG3 AUX_DATA +.PP +0000000000000B11 0000000000000000 0000000000000000 0000000000000001 read +000000000000001B +.PP +0000000000000B11 0000000000004047 0000000000000000 0000000000000001 read +000000000000001B +.PP +0000000000000B11 0000000000008347 0000000000000000 0000000000000001 read +000000000000001B +.PP +0000000000000B11 000000000000C120 0000000000000000 0000000000000001 read +000000000000001B +.PP +0000000000000B11 000000000000C287 0000000000000000 0000000000000001 read +000000000000001B +.PP +0000000000000B11 000000000000C508 0000000000000000 0000000000000001 read +000000000000001B +.PP +0000000000000B11 0000000000010548 0000000000000000 0000000000000001 read +000000000000001B +.PP +0000000000000B11 00000000000144A4 0000000000000000 0000000000000001 read +000000000000001B +.PP +\&... +.PP +^C +.PP +# +.SH The \-X option only prints failed syscalls: +.PP +# ./strace.ebpf \-l hex \-X mkdir . +.PP +\&./strace.ebpf \-l hex \-X mkdir . +.PP +PID ERR RES SYSCALL ARG1 ARG2 ARG3 AUX_DATA +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/share/locale/en_US/LC_MESSAGES/coreutils.mo mkdir +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/share/locale/en/LC_MESSAGES/coreutils.mo mkdir +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/share/locale\-langpack/en_US/LC_MESSAGES/coreutils.mo mkdir +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/lib/x86_64\-linux\-gnu/charset.alias mkdir +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/share/locale/en_US/LC_MESSAGES/libc.mo mkdir +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/share/locale/en/LC_MESSAGES/libc.mo mkdir +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/share/locale\-langpack/en_US/LC_MESSAGES/libc.mo mkdir +.PP +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open +/usr/share/locale\-langpack/en/LC_MESSAGES/libc.mo mkdir +.PP +# +.PP +The ERR column is the system error number. +Error number 2 is ENOENT: no such file or directory. +.SH SEE ALSO +.PP +\f[B]strace\f[](1), \f[B]bpf\f[](2), \f[B]\f[]. +.PP +Also Documentation/networking/filter.txt in kernel sources. diff --git a/man/strace.ebpf.1.md b/man/strace.ebpf.1.md new file mode 100644 index 000000000..c207a4994 --- /dev/null +++ b/man/strace.ebpf.1.md @@ -0,0 +1,273 @@ +--- +layout: manual +Content-Style: 'text/css' +title: strace.ebpf(1) +header: NVM Library +date: pmem Tools version 1.0.2 +... + +[comment]: <> (Copyright 2016, Intel Corporation) + +[comment]: <> (Redistribution and use in source and binary forms, with or without) +[comment]: <> (modification, are permitted provided that the following conditions) +[comment]: <> (are met:) +[comment]: <> ( * Redistributions of source code must retain the above copyright) +[comment]: <> ( notice, this list of conditions and the following disclaimer.) +[comment]: <> ( * Redistributions in binary form must reproduce the above copyright) +[comment]: <> ( notice, this list of conditions and the following disclaimer in) +[comment]: <> ( the documentation and/or other materials provided with the) +[comment]: <> ( distribution.) +[comment]: <> ( * Neither the name of the copyright holder nor the names of its) +[comment]: <> ( contributors may be used to endorse or promote products derived) +[comment]: <> ( from this software without specific prior written permission.) + +[comment]: <> (THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS) +[comment]: <> ("AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT) +[comment]: <> (LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR) +[comment]: <> (A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT) +[comment]: <> (OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,) +[comment]: <> (SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT) +[comment]: <> (LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,) +[comment]: <> (DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY) +[comment]: <> (THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT) +[comment]: <> ((INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE) +[comment]: <> (OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.) + +[comment]: <> (strace.ebpf.1 -- man page for strace.ebpf) + +[NAME](#name)
+[SYNOPSIS](#synopsis)
+[DESCRIPTION](#description)
+[OPTIONS](#options)
+[CONFIGURATION](#configuration)
+[FILES](#files)
+[EXAMPLES](#examples)
+[SEE ALSO](#see-also)
+ + +# NAME # + +**strace.ebpf** -- extreamely fast strace-like tool builded on top of eBPF +and KProbe technologies. + + +# SYNOPSIS # + +``` +$ strace.ebpf [options] [command [arg ...]] +``` + + +# DESCRIPTION # + +strace.ebpf is a limited functional strace equivalent for Linux but based on +eBPF and KProbe technologies and libbcc library. + ++ Pros: + + - Used combination of technologies allow tool to be about one order faster + than regular system strace. + - This tool consume much less amount of CPU resource + - Output of this tool is designed to be suiteable for processing with + classical tools and technologies, like awk. + - Could trace syscalls system-wide. + ++ Cons: + + - Limited functionality + - Slow attaching and detaching + - Asyncronity. If user will not provide enough system resources for + performace tool will skip some calls. Tool does not assume to try + any work-around behind the scene. + + +WARNING: System-wide tracing can fill out your disk really fast. + + +# OPTIONS # + +`-t, --timestamp` + +include timestamp in output + +`-X, --failed` + +only show failed syscalls + +`-d, --debug` + +enable debug output + +`-p, --pid` + +this PID only. Command arg should be missing + +`-o, --output` + +filename + +`-l, --format` + +output logs format. Possible values: + + 'bin', 'binary', 'hex', 'strace', 'list' & 'help'. + +'bin'/'binary' file format is described in generated trace.h. If current +directory is not writable generating is skipped. + +Default: 'hex' + +`-K, --hex-separator` + +set field separator for hex logs. Default is '\t'. + +`-e, --expr` + +expression, 'help' or 'list' for supported list. + +Default: trace=kp-kern-all. + +`-L, --list` + +Print a list of all traceable syscalls of the running kernel. + +`-R, --ll-list` + +Print a list of all traceable low-level funcs of the running kernel. + +WARNING: really long. ~45000 functions for 4.4 kernel. + +`-b, --builtin-list` + +Print a list of all syscalls known by glibc. + +`-h, --help` + +print help + + +# CONFIGURATION # + +** System Configuring ** + +1. You should provide permissions to access tracefs for final user. + +2. It's good to put this command in init scripts such as local.rc: + + echo 1 > /proc/sys/net/core/bpf_jit_enable + + It will significantly improve performance and avoid 'Lost events' + +3. You should increase "Open File Limit", for example according to this + instruction: + + https://easyengine.io/tutorials/linux/increase-open-files-limit/ + + +# FILES # + +Putting into current directory following files allow to customize eBPF code for +supporting more newer eBPF VM features in newer kernels. Also if current +directory does not contain trace.h strace.ebpf on first start saves built-in +trace.h into current directory. Saved built-in describe binary log's format. + +- trace.h +- trace_head.c +- trace_tp_all.c +- trace_kern_tmpl.c +- trace_libc_tmpl.c +- trace_file_tmpl.c +- trace_fileat_tmpl.c + + +# EXAMPLES # + +#Example output: + + # ./strace.ebpf -l hex + +./strace.ebpf -l hex +PID ERR RES SYSCALL ARG1 ARG2 ARG3 AUX_DATA +0000000000000AFD 000000000000000B FFFFFFFFFFFFFFFF read 0000000000000005 +0000000000000427 0000000000000000 0000000000000020 read 000000000000000A +0000000000000B3D 0000000000000000 0000000000000001 write 000000000000001C +0000000000000B11 0000000000000000 0000000000000001 read 000000000000001B +0000000000000427 0000000000000000 0000000000000020 read 000000000000000A +0000000000000B3D 0000000000000000 0000000000000001 write 000000000000001C +0000000000000B11 0000000000000000 0000000000000001 read 000000000000001B +0000000000000B3D 0000000000000000 0000000000000001 write 000000000000001C +0000000000000B11 0000000000000000 0000000000000001 read 000000000000001B +0000000000000B3D 0000000000000000 0000000000000001 write 000000000000001C +0000000000000B11 0000000000000000 0000000000000001 read 000000000000001B +... + +^C + + # + + +#The -p option can be used to filter on a PID, which is filtered in-kernel. +Here -t option is used to print timestamps: + + # ./strace.ebpf -l hex -tp 2833 + +./strace.ebpf -l hex -tp 2833 +PID TIME(usec) ERR RES SYSCALL ARG1 ARG2 ARG3 AUX_DATA + +0000000000000B11 0000000000000000 0000000000000000 0000000000000001 read 000000000000001B + +0000000000000B11 0000000000004047 0000000000000000 0000000000000001 read 000000000000001B + +0000000000000B11 0000000000008347 0000000000000000 0000000000000001 read 000000000000001B + +0000000000000B11 000000000000C120 0000000000000000 0000000000000001 read 000000000000001B + +0000000000000B11 000000000000C287 0000000000000000 0000000000000001 read 000000000000001B + +0000000000000B11 000000000000C508 0000000000000000 0000000000000001 read 000000000000001B + +0000000000000B11 0000000000010548 0000000000000000 0000000000000001 read 000000000000001B + +0000000000000B11 00000000000144A4 0000000000000000 0000000000000001 read 000000000000001B + +... + +^C + + # + + +#The -X option only prints failed syscalls: + + # ./strace.ebpf -l hex -X mkdir . + +./strace.ebpf -l hex -X mkdir . + +PID ERR RES SYSCALL ARG1 ARG2 ARG3 AUX_DATA + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/share/locale/en_US/LC_MESSAGES/coreutils.mo mkdir + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/share/locale/en/LC_MESSAGES/coreutils.mo mkdir + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/share/locale-langpack/en_US/LC_MESSAGES/coreutils.mo mkdir + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/lib/x86_64-linux-gnu/charset.alias mkdir + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/share/locale/en_US/LC_MESSAGES/libc.mo mkdir + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/share/locale/en/LC_MESSAGES/libc.mo mkdir + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/share/locale-langpack/en_US/LC_MESSAGES/libc.mo mkdir + +000000000000441A 0000000000000002 FFFFFFFFFFFFFFFF open /usr/share/locale-langpack/en/LC_MESSAGES/libc.mo mkdir + + # + +The ERR column is the system error number. Error number 2 is ENOENT: no such +file or directory. + +# SEE ALSO # + +**strace**(1), **bpf**(2), ****. + +Also Documentation/networking/filter.txt in kernel sources. diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 000000000..c531b287c --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,8 @@ +strace.ebpf +file_sc_bench +*.trc +*.1.txt +*.rst.build_temp +*.png +/trace.h +/nondebug diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 000000000..6f7297e34 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,166 @@ +# Copyright 2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Makefile -- top Makefile for strace.ebpf +# + + +TARGET = strace.ebpf + +TOP = ../ + +EXTRA_TARGETS += nondebug/libebpf.a nondebug/libstrace.a + +$(TARGET): nondebug/libebpf.a nondebug/libstrace.a +$(TARGET).static-debug: nondebug/libebpf.a nondebug/libstrace.a +$(TARGET).static-nondebug: nondebug/libebpf.a nondebug/libstrace.a + +nondebug/libebpf.a: + $(MAKE) -C ebpf + +nondebug/libstrace.a: + $(MAKE) -C libstrace + +nondebug/libebpf.a-clean: + $(MAKE) -C ebpf clean + $(RM) nondebug/libebpf.a + +nondebug/libstrace.a-clean: + $(MAKE) -C libstrace clean + $(RM) nondebug/libstrace.a + +nondebug/libebpf.a-clobber: nondebug/libebpf.a-clean +nondebug/libstrace.a-clobber: nondebug/libstrace.a-clean + +.PHONY: nondebug/libebpf.a-clean nondebug/libstrace.a-clean +.PHONY: nondebug/libebpf.a-clobber nondebug/libstrace.a-clobber + +OBJS = main.o \ + + +CFLAGS += $(shell $(PKG_CONFIG) --cflags libbcc) + +# XXX libbcc expects multi-treading safity. Currently it's required for +# print_event_cb.o only, although we will apply it for overall application. +CFLAGS += -pthread + +CFLAGS += -g -Wextra + +ifeq ($(CC),clang) +CFLAGS += -Wno-initializer-overrides +else +CFLAGS += -Wno-override-init +endif + +CFLAGS += -I compat +CFLAGS += -I libstrace +CFLAGS += -I ebpf + +LDFLAGS += -g -Wextra + +# XXX libbcc expects multi-treading safity. +LDFLAGS += -pthread + +LIBS += $(shell $(PKG_CONFIG) --libs libbcc) +LIBS += nondebug/libstrace.a +LIBS += nondebug/libebpf.a + + +INSTALL_TARGET=$(EXPERIMENTAL) + + +include ../Makefile.inc + + +# Local BenchMark +PROGS = file_sc_bench +PROGS_CFLAGS = -g -O2 -Wall -Wextra -D__USE_GNU + + +$(PROGS).o: $(PROGS).c Makefile + $(CC) $(PROGS_CFLAGS) -c -o $@ $< + +$(PROGS): $(PROGS).o Makefile + $(CC) $(PROGS_CFLAGS) -o $@ $< + +.PHONY: run +run: $(PROGS) + time -p -v sudo ./$(PROGS) 500000 + @echo "Tracepoint's version is skipped because of this bug:" + @echo "\t - https://github.com/iovisor/bcc/issues/748" + #time -p -v sudo ./strace.ebpf -l hex -e trace=tp-all \ + # -o $(TMP)/$(PROGS).tp-all.trc ./$(PROGS) 50000 + #ln -f -s $(TMP)/$(PROGS).tp-all.trc $(PROGS).tp-all.trc + @echo + time -p -v sudo ./strace.ebpf -l hex -e trace=kp-libc-all \ + -o $(TMP)/$(PROGS).libc-all.trc ./$(PROGS) 50000 + ln -f -s $(TMP)/$(PROGS).libc-all.trc $(PROGS).libc-all.trc + @echo + time -p -v sudo ./strace.ebpf -l hex -e trace=kp-kern-all \ + -o $(TMP)/$(PROGS).kern-all.trc ./$(PROGS) 50000 + ln -f -s $(TMP)/$(PROGS).kern-all.trc $(PROGS).kern-all.trc + @echo + time -p -v sudo strace \ + -o $(TMP)/$(PROGS).trc ./$(PROGS) 10000 + ln -f -s $(TMP)/$(PROGS).trc $(PROGS).trc + +.PHONY: redis +redis: + @echo ">>>>> WARNING: Please disable system redis service in advance" + -redis-cli shutdown + # Should be same as in make-redis.sh + $(ECHO) > redis-server.log + sudo ./make-redis.sh + @echo "Tracepoint's version is skipped because of this bug:" + @echo "\t - https://github.com/iovisor/bcc/issues/748" + #sudo ./make-redis.sh ./strace.ebpf -l hex -e trace=tp-all \ + # -o $(TMP)/redis-server.tp-all.trc + #ln -f -s $(TMP)/redis-server.tp-all.trc redis-server.tp-all.trc + @echo + sudo ./make-redis.sh ./strace.ebpf -l hex -e trace=kp-libc-all \ + -o $(TMP)/redis-server.libc-all.trc + ln -f -s $(TMP)/redis-server.libc-all.trc redis-server.libc-all.trc + @echo + sudo ./make-redis.sh ./strace.ebpf -l hex -e trace=kp-kern-all \ + -o $(TMP)/redis-server.kern-all.trc + ln -f -s $(TMP)/redis-server.kern-all.trc redis-server.kern-all.trc + @echo + sudo ./make-redis.sh strace -f \ + -o $(TMP)/redis-server.trc + ln -f -s $(TMP)/redis-server.trc redis-server.trc + +# XXX Valgrind is confused with bpf() syscall and behaves unexpectedly on it +.PHONY: valgrind +valgrind: $(PROGS) + sudo valgrind -v\ + --leak-check=full \ + --track-origins=yes \ + --log-file=strace.ebpf.valgrind \ + ./strace.ebpf -d -o $(PROGS).ebpf.trc ./$(PROGS) 40 diff --git a/src/Makefile.inc b/src/Makefile.inc new file mode 100644 index 000000000..11b703a06 --- /dev/null +++ b/src/Makefile.inc @@ -0,0 +1,247 @@ +# Copyright 2014-2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# src/Makefile.inc -- common Makefile rules for NVM library +# + +TOP := $(dir $(lastword $(MAKEFILE_LIST))).. + +include $(TOP)/src/common.inc + +INCLUDE = $(TOP)/src/include + +RPMEM_COMMON = $(TOP)/src/rpmem_common +vpath %.c $(RPMEM_COMMON) + +COMMON = $(TOP)/src/common +vpath %.c $(COMMON) + +INCS += -I../include -I../common/ + +CFLAGS += -std=gnu99 +CFLAGS += -Wall +CFLAGS += -Werror +CFLAGS += -Wmissing-prototypes +CFLAGS += -Wpointer-arith +CFLAGS += -Wunused-macros +CFLAGS += -Wmissing-field-initializers +CFLAGS += -Wsign-conversion +CFLAGS += -Wsign-compare +ifeq ($(call check_Wconversion), y) +CFLAGS += -Wconversion +endif +CFLAGS += -pthread +CFLAGS += -fno-common +CFLAGS += -DSRCVERSION=\"$(SRCVERSION)\" +ifeq ($(call check_flag, -Wunreachable-code-return), y) +CFLAGS += -Wunreachable-code-return +endif +ifeq ($(call check_flag, -Wmissing-variable-declarations), y) +CFLAGS += -Wmissing-variable-declarations +endif + +ifeq ($(DEBUG),1) +CFLAGS += -O0 -ggdb -DDEBUG $(EXTRA_CFLAGS_DEBUG) +LIB_SUBDIR = /nvml_debug +OBJDIR = debug +else +CFLAGS += -O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $(EXTRA_CFLAGS_RELEASE) +LIB_SUBDIR = +OBJDIR = nondebug +endif + +CFLAGS += $(EXTRA_CFLAGS) + +LDFLAGS += -Wl,-z,relro -Wl,--fatal-warnings -Wl,--warn-common $(EXTRA_LDFLAGS) + +define arch32_error_msg + +################################################## +### 32-bit builds of NVML are not supported! ### +### Please, use 64-bit platform/compiler. ### +################################################## + +endef + +TESTCMD := $(CC) $(CFLAGS) -dM -E -x c /dev/null -o /dev/null +TESTBUILD := $(shell $(TESTCMD) && echo 1 || echo 0) +ifneq ($(TESTBUILD), 1) +$(error "$(TESTCMD)" failed) +endif + +LP64 := $(shell $(CC) $(CFLAGS) -dM -E -x c /dev/null | grep -Ec "__SIZEOF_LONG__.+8|__SIZEOF_POINTER__.+8" ) +ifneq ($(LP64), 2) +$(error $(arch32_error_msg)) +endif + +LIBS_DESTDIR = $(DESTDIR)$(libdir)$(LIB_SUBDIR) + +DIRNAME = $(shell basename $(CURDIR)) + +ifeq ($(OBJDIR),$(abspath $(OBJDIR))) +objdir = $(OBJDIR)/$(DIRNAME) +else +objdir = ../$(OBJDIR)/$(DIRNAME) +endif + +LIB_OUTDIR = $(objdir)/.. + +LDFLAGS += -L$(LIB_OUTDIR) + +ifneq ($(SOURCE),) +_OBJS = $(SOURCE:.c=.o) +_OBJS_COMMON = $(patsubst $(COMMON)/%, %, $(_OBJS)) +_OBJS_RPMEM_COMMON = $(patsubst $(RPMEM_COMMON)/%, %, $(_OBJS_COMMON)) +OBJS += $(addprefix $(objdir)/, $(_OBJS_RPMEM_COMMON)) +endif + +ifneq ($(HEADERS),) +ifneq ($(filter 1 2, $(CSTYLEON)),) +TMP_HEADERS := $(addsuffix tmp, $(HEADERS)) +TMP_HEADERS := $(addprefix $(objdir)/, $(TMP_HEADERS)) +endif +endif + +ifneq ($(LIBRARY_NAME),) +LIB_NAME = lib$(LIBRARY_NAME) +endif + +ifneq ($(LIBRARY_SO_VERSION),) +LIB_MAP = $(LIB_NAME).map +LIB_SONAME = $(LIB_NAME).so.$(LIBRARY_SO_VERSION) +LIB_SO = $(LIB_OUTDIR)/$(LIB_NAME).so + +LIB_SO_SONAME = $(LIB_SO).$(LIBRARY_SO_VERSION) + +ifneq ($(LIBRARY_VERSION),) +LIB_SO_REAL = $(LIB_SO_SONAME).$(LIBRARY_VERSION) +else +$(error LIBRARY_VERSION not set) +endif + +TARGET_LIBS = $(LIB_SO_REAL) +TARGET_LINKS = $(LIB_SO_SONAME) $(LIB_SO) +endif + +ifneq ($(LIB_NAME),) +LIB_AR = $(LIB_OUTDIR)/$(LIB_NAME).a +LIB_AR_UNSCOPED = $(objdir)/$(LIB_NAME)_unscoped.o +LIB_AR_ALL = $(objdir)/$(LIB_NAME)_all.o +TARGET_LIBS += $(LIB_AR) +endif + +ifneq ($(EXTRA_TARGETS),) +EXTRA_TARGETS_CLEAN = $(EXTRA_TARGETS:=-clean) +EXTRA_TARGETS_CLOBBER = $(EXTRA_TARGETS:=-clobber) +endif + +PMEMLOG_PRIV_OBJ=$(LIB_OUTDIR)/libpmemlog/libpmemlog_unscoped.o +PMEMBLK_PRIV_OBJ=$(LIB_OUTDIR)/libpmemblk/libpmemblk_unscoped.o + +ifneq ($(LIBPMEMLOG_PRIV_FUNCS),) +OBJS += pmemlog_priv_funcs.o +endif + +ifneq ($(LIBPMEMBLK_PRIV_FUNCS),) +OBJS += pmemblk_priv_funcs.o +endif + +MAKEFILE_DEPS=$(TOP)/src/Makefile.inc Makefile $(TOP)/src/common.inc + +all: $(objdir) $(LIB_OUTDIR) $(EXTRA_TARGETS) $(LIB_AR) $(LIB_SO_SONAME) $(LIB_SO_REAL) $(LIB_SO) $(TMP_HEADERS) + +$(objdir) $(LIB_OUTDIR): + $(MKDIR) -p $@ + +$(LIB_SO_REAL): $(OBJS) $(EXTRA_OBJS) $(LIB_MAP) $(MAKEFILE_DEPS) + $(CC) $(LDFLAGS) -shared -Wl,--version-script=$(LIB_MAP),-soname,$(LIB_SONAME) -o $@ $(OBJS) $(EXTRA_OBJS) $(LIBS) + +$(LIB_SO_SONAME): $(LIB_SO_REAL) $(MAKEFILE_DEPS) + $(LN) -sf $(shell basename $<) $@ + +$(LIB_SO): $(LIB_SO_SONAME) $(MAKEFILE_DEPS) + $(LN) -sf $(shell basename $<) $@ + +$(LIB_AR_UNSCOPED): $(OBJS) $(EXTRA_OBJS) $(MAKEFILE_DEPS) + $(LD) -o $@ -r $(OBJS) $(EXTRA_OBJS) + +ifeq ($(LIB_MAP),) +$(LIB_AR_ALL): $(LIB_AR_UNSCOPED) $(MAKEFILE_DEPS) + $(OBJCOPY) $< $@ +else +$(LIB_AR_ALL): $(LIB_AR_UNSCOPED) $(LIB_MAP) $(MAKEFILE_DEPS) + $(OBJCOPY) --localize-hidden `sed -n 's/^ *\([a-zA-Z0-9_]*\);$$/-G \1/p' $(LIB_MAP)` $< $@ +endif + +$(LIB_AR): $(LIB_AR_ALL) $(MAKEFILE_DEPS) + $(AR) rv $@ $(LIB_AR_ALL) + +$(PMEMBLK_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemblk + +install: all +ifneq ($(LIBRARY_NAME),) + $(INSTALL) -d $(LIBS_DESTDIR) + $(INSTALL) -p -m 0755 $(TARGET_LIBS) $(LIBS_DESTDIR) + $(CP) -d $(TARGET_LINKS) $(LIBS_DESTDIR) +endif + +uninstall: +ifneq ($(LIBRARY_NAME),) + $(foreach f, $(TARGET_LIBS), $(RM) $(LIBS_DESTDIR)/$(notdir $(f))) + $(foreach f, $(TARGET_LINKS), $(RM) $(LIBS_DESTDIR)/$(notdir $(f))) +endif + +clean: $(EXTRA_TARGETS_CLEAN) +ifneq ($(LIBRARY_NAME),) + $(RM) $(OBJS) $(TMP_HEADERS) + $(RM) $(LIB_AR_ALL) $(LIB_AR_UNSCOPED) +endif + +clobber: clean $(EXTRA_TARGETS_CLOBBER) +ifneq ($(LIBRARY_NAME),) + $(RM) $(LIB_AR) $(LIB_SO_SONAME) $(LIB_SO_REAL) $(LIB_SO) + $(RM) -r $(objdir)/.deps +endif + +$(eval $(cstyle-rule)) + +$(objdir)/%.o: %.c $(MAKEFILE_DEPS) + $(call check-cstyle, $<) + @mkdir -p $(objdir)/.deps + $(CC) -MD -c -o $@ $(CFLAGS) $(INCS) -fPIC $< + $(create-deps) + +$(objdir)/%.htmp: %.h + $(call check-cstyle, $<, $@) + +.PHONY: all clean clobber install uninstall cstyle + +-include $(objdir)/.deps/*.P diff --git a/src/common.inc b/src/common.inc new file mode 100644 index 000000000..db2ad9539 --- /dev/null +++ b/src/common.inc @@ -0,0 +1,183 @@ +# Copyright 2014-2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# src/Makefile.inc -- common Makefile rules for NVM library +# + +TOP := $(dir $(lastword $(MAKEFILE_LIST))).. + +LN = ln +OBJCOPY = objcopy +MKDIR = mkdir +INSTALL = install +CP = cp +CSTYLE = $(TOP)/utils/cstyle +CSTYLEON = 0 +STYLE_CHECK = $(TOP)/utils/style_check.sh +PKG_CONFIG = pkg-config +CLANG_FORMAT ?= clang-format +HEADERS = $(wildcard *.h) $(wildcard *.hpp) + +ifeq ($(shell command -v $(PKG_CONFIG) && echo y || echo n), n) +$(error $(PKG_CONFIG) not found) +endif + +check_package = $(shell $(PKG_CONFIG) $(1) && echo y || echo n) + +check_flag = $(shell echo "int main(){return 0;}" |\ + $(CC) $(CFLAGS) $(1) -x c -o /dev/null - 2>/dev/null && echo y || echo n) + +# Check for issues with older clang compilers which assert on delete persistent<[][]>. +check_clang_template_bug = $(shell echo "using namespace nvml::obj; int main() { delete_persistent(make_persistent(2), 2); return 0; }" |\ + $(CXX) --std=c++11 -x c++ -I$(TOP)/src/include/ -include libpmemobj++/make_persistent_array.hpp -L$(TOP)/src/debug/ -c -o /dev/null - 2>/dev/null && echo y || echo n) + +# Check for issues with older gcc compilers which do not expand variadic template +# variables in lambda expressions. +check_gcc_variadic_template_bug = $(shell echo "void print() {} template void print(const T&, const Args &...arg) {auto f = [&]{ print(arg...);};} int main() {print(1, 2, 3); return 0;}" |\ + $(CXX) --std=c++11 -x c++ -o /dev/null - 2>/dev/null && echo y || echo n) + +check_cxx_flags = $(shell echo "int main(){return 0;}" |\ + $(CXX) $(1) -x c++ -o /dev/null - 2>/dev/null && echo y || echo n) + +CXX_TESTS=$(check_clang_template_bug)$(check_gcc_variadic_template_bug) + +cxx_ok=$(if $(findstring n,$(CXX_TESTS)),n,y) + +# This is a workaround for older incompatible versions of libstdc++ and clang. +# Please see https://llvm.org/bugs/show_bug.cgi?id=15517 for more info. +check_cxx_chrono = $(shell echo "int main(){return 0;}" |\ + $(CXX) -std=c++11 -x c++ -include future -o /dev/null - 2>/dev/null && echo y || echo n) + +check_Wconversion = $(shell echo "long random(void); char test(void); char test(void){char a = 0; char b = 'a'; char ret = random() == 1 ? a : b; return ret;}" |\ + $(CC) -c $(CFLAGS) -Wconversion -x c -o /dev/null - 2>/dev/null && echo y || echo n) + +check_librt = $(shell echo "int main() { struct timespec t; return clock_gettime(CLOCK_MONOTONIC, &t); }" |\ + $(CC) $(CFLAGS) -x c -include time.h -o /dev/null - 2>/dev/null && echo y || echo n) + +install_recursive = $(shell cd $(1) && find . -type f -exec install -m $(2) -D {} $(3)/{} \;) + +install_recursive_filter = $(shell cd $(1) && find . -type f -name "$(2)" -exec install -m $(3) -D {} $(4)/{} \;) + +define create-deps + @cp $(objdir)/$*.d $(objdir)/.deps/$*.P; \ + sed -e 's/#.*//' -e 's/^[^:]*: *//' -e 's/ *\\$$//' \ + -e '/^$$/ d' -e 's/$$/ :/' < $(objdir)/$*.d >> $(objdir)/.deps/$*.P; \ + $(RM) -f $(objdir)/$*.d +endef + +export prefix = /usr/local +export exec_prefix := $(prefix) +export sysconfdir := $(prefix)/etc +export datarootdir := $(prefix)/share +export mandir := $(datarootdir)/man +export docdir := $(datarootdir)/doc +export man1dir := $(mandir)/man1 +export man3dir := $(mandir)/man3 +export cstyle_bin := $(CSTYLE) +export clang_format_bin := $(CLANG_FORMAT) + +ifneq ($(wildcard $(exec_prefix)/x86_64-linux-gnu),) +LIB_PREFIX ?= x86_64-linux-gnu/lib +endif + +ifneq ($(wildcard $(exec_prefix)/lib64),) +LIB_PREFIX ?= lib64 +endif + +LIB_PREFIX ?= lib + +all: + +cstyle-%: + $(STYLE_CHECK) $* $(wildcard *.[ch]) $(wildcard *.[ch]pp) + +cstyle: cstyle-check + +format: cstyle-format + +ifeq ($(CSTYLEON),1) +define check-cstyle + @$(STYLE_CHECK) check $1 && if [ "$2" != "" ]; then mkdir -p `dirname $2` && touch $2; fi +endef +else ifeq ($(CSTYLEON),2) +define check-cstyle + @$(STYLE_CHECK) check $1 && if [ "$2" != "" ]; then mkdir -p `dirname $2` && touch $2; fi || true +endef +else +define check-cstyle +endef +endif + +define sub-target-foreach +$(1)-$(2): + $$(MAKE) -C $1 $2 +ifeq ($(3),y) +ifeq ($(custom_build),) + $$(MAKE) -C $1 $2 DEBUG=1 +endif +endif +endef + +define sub-target +$(foreach f, $(1), $(eval $(call sub-target-foreach, $f,$(2),$(3)))) +endef + +ifneq ($(wildcard $(prefix)/x86_64-linux-gnu),) +INC_PREFIX ?= x86_64-linux-gnu/include +endif + +INC_PREFIX ?= include + +test_build=$(addprefix -b, $(TEST_BUILD)) + +export libdir := $(exec_prefix)/$(LIB_PREFIX) +export includedir := $(prefix)/$(INC_PREFIX) +export pkgconfigdir := $(libdir)/pkgconfig +export bindir := $(exec_prefix)/bin +export bashcompdir := $(sysconfdir)/bash_completion.d + +check_ibv_fork_init = $(shell echo "\#include int main(void) { return ibv_fork_init(); }" |\ + $(CC) -c $(CFLAGS) -x c -o /dev/null -libverbs - 2>/dev/null && echo y || echo n) + +export HAS_LIBFABRIC := $(call check_package, libfabric) + +ifeq ($(HAS_LIBFABRIC),y) +ifeq ($(RPMEM_DISABLE_LIBIBVERBS),y) +export HAS_LIBIBVERBS := n +export BUILD_RPMEM := y +else +export HAS_LIBIBVERBS := $(call check_ibv_fork_init) +export BUILD_RPMEM := $(HAS_LIBIBVERBS) +endif +else +export BUILD_RPMEM := n +endif + +export BUILD_STRACE_EBPF := $(call check_package, libbcc) diff --git a/src/compat/bcc/perf_reader.h b/src/compat/bcc/perf_reader.h new file mode 100644 index 000000000..cc0dfef2e --- /dev/null +++ b/src/compat/bcc/perf_reader.h @@ -0,0 +1,48 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * perf_reader.h -- utility functions + */ + +#ifndef PERF_READER_H +#define PERF_READER_H + +struct perf_reader; + +void perf_reader_free(void *ptr); +int perf_reader_poll(int num_readers, + struct perf_reader **readers, + int timeout); +int perf_reader_fd(struct perf_reader *reader); + +#endif /* PERF_READER_H */ diff --git a/src/ebpf/Makefile b/src/ebpf/Makefile new file mode 100644 index 000000000..57c9f80b7 --- /dev/null +++ b/src/ebpf/Makefile @@ -0,0 +1,60 @@ +# Copyright 2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# src/ebpf/Makefile -- Makefile for libebpf +# + + +LIBRARY_NAME = ebpf + +# eBPF sources +BIN_SRCS = \ + trace.h \ + trace_head.c \ + trace_tp_all.c \ + trace_kern_tmpl.c \ + trace_libc_tmpl.c \ + trace_file_tmpl.c \ + trace_fileat_tmpl.c \ + + +BIN_OBJS = $(addsuffix .o,$(BIN_SRCS)) + +%.c.o: %.c Makefile + $(LD) -r -b binary -o $@ $< + +%.h.o: %.h Makefile + $(LD) -r -b binary -o $@ $< + +OBJS = $(BIN_OBJS) + + +include ../Makefile.inc diff --git a/src/ebpf/README b/src/ebpf/README new file mode 100644 index 000000000..80060e015 --- /dev/null +++ b/src/ebpf/README @@ -0,0 +1 @@ +This directory contains code which runs inside eBPF VM. diff --git a/src/ebpf/trace.h b/src/ebpf/trace.h new file mode 100644 index 000000000..b611587a2 --- /dev/null +++ b/src/ebpf/trace.h @@ -0,0 +1,103 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * trace.h -- Data exchange packet between packet filter and reader callback + */ + + +#ifndef TRACE_H +#define TRACE_H + + +struct ev_dt_t { + /* + * the value equals to -1 mean "header" + * the value equals to -2 mean that syscall's num is unknown for glibc + * and the field sc_name should be used to figuring out syscall. + */ + s64 sc_id; + + u64 pid_tid; + + /* Timestamps */ + u64 start_ts_nsec; + u64 finish_ts_nsec; + s64 ret; + + union { + struct { + s64 arg_1; + s64 arg_2; + s64 arg_3; + s64 arg_4; + s64 arg_5; + s64 arg_6; + }; + struct { + } open; + + struct { + s64 fd; + } close; + + struct { + s64 fd; + } read; + + struct { + s64 fd; + } write; + }; + + union { + /* + * The longest syscall's name is equal to 26 characters: + * 'SyS_sched_get_priority_max'. + * Let's to add a space for '\0' and few extra bytes. + */ + char sc_name[32]; + + struct { + char fl_nm[NAME_MAX]; + /* Current process name. XXX Reserved for future. */ + char comm[TASK_COMM_LEN]; + }; + + struct { + s32 argc; + char argv[]; + } header; + }; +}; + +#endif /* TRACE_H */ diff --git a/src/ebpf/trace_file_tmpl.c b/src/ebpf/trace_file_tmpl.c new file mode 100644 index 000000000..336df6a03 --- /dev/null +++ b/src/ebpf/trace_file_tmpl.c @@ -0,0 +1,98 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * trace_file_tmpl.c -- Trace syscalls with numbers known from libc and + * filename as first argument. Uses BCC, eBPF. + */ + +/* + * SYSCALL_NAME() entry handler + */ +int +kprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t fs = {}; + u64 pid_tid = bpf_get_current_pid_tgid(); + + PID_CHECK_HOOK + + fs.start_ts_nsec = bpf_ktime_get_ns(); + fs.arg_1 = PT_REGS_PARM1(ctx); + fs.arg_2 = PT_REGS_PARM2(ctx); + fs.arg_3 = PT_REGS_PARM3(ctx); + fs.arg_4 = PT_REGS_PARM4(ctx); + fs.arg_5 = PT_REGS_PARM5(ctx); + fs.arg_5 = PT_REGS_PARM6(ctx); + + tmp_i.update(&pid_tid, &fs); + + return 0; +}; + +/* + * SYSCALL_NAME() exit handler + */ +int +kretprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t *fsp; + struct ev_dt_t ev = {}; + + u64 cur_nsec = bpf_ktime_get_ns(); + + u64 pid_tid = bpf_get_current_pid_tgid(); + fsp = tmp_i.lookup(&pid_tid); + if (fsp == 0) + return 0; + + ev.sc_id = SYSCALL_NR; /* SysCall ID */ + ev.arg_1 = fsp->arg_1; + ev.arg_2 = fsp->arg_2; + ev.arg_3 = fsp->arg_3; + ev.arg_4 = fsp->arg_4; + ev.arg_5 = fsp->arg_5; + ev.arg_6 = fsp->arg_6; + ev.pid_tid = pid_tid; + ev.start_ts_nsec = fsp->start_ts_nsec; + ev.finish_ts_nsec = cur_nsec; + ev.ret = PT_REGS_RC(ctx); + bpf_probe_read(&ev.fl_nm, sizeof(ev.fl_nm), (void *)fsp->arg_1); + + const size_t ev_size = offsetof(struct ev_dt_t, fl_nm) + + sizeof(ev.fl_nm); + events.perf_submit(ctx, &ev, ev_size); + + tmp_i.delete(&pid_tid); + + return 0; +} diff --git a/src/ebpf/trace_fileat_tmpl.c b/src/ebpf/trace_fileat_tmpl.c new file mode 100644 index 000000000..3621243ca --- /dev/null +++ b/src/ebpf/trace_fileat_tmpl.c @@ -0,0 +1,98 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * trace_fileat_tmpl.c -- Trace syscalls with numbers known from libc and + * a fd as first arg and a filename as second argument. Uses BCC, eBPF. + */ + +/* + * SYSCALL_NAME() entry handler + */ +int +kprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t fs = {}; + u64 pid_tid = bpf_get_current_pid_tgid(); + + PID_CHECK_HOOK + + fs.start_ts_nsec = bpf_ktime_get_ns(); + fs.arg_1 = PT_REGS_PARM1(ctx); + fs.arg_2 = PT_REGS_PARM2(ctx); + fs.arg_3 = PT_REGS_PARM3(ctx); + fs.arg_4 = PT_REGS_PARM4(ctx); + fs.arg_5 = PT_REGS_PARM5(ctx); + fs.arg_5 = PT_REGS_PARM6(ctx); + + tmp_i.update(&pid_tid, &fs); + + return 0; +}; + +/* + * SYSCALL_NAME() exit handler + */ +int +kretprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t *fsp; + struct ev_dt_t ev = {}; + + u64 cur_nsec = bpf_ktime_get_ns(); + + u64 pid_tid = bpf_get_current_pid_tgid(); + fsp = tmp_i.lookup(&pid_tid); + if (fsp == 0) + return 0; + + ev.sc_id = SYSCALL_NR; /* SysCall ID */ + ev.arg_1 = fsp->arg_1; + ev.arg_2 = fsp->arg_2; + ev.arg_3 = fsp->arg_3; + ev.arg_4 = fsp->arg_4; + ev.arg_5 = fsp->arg_5; + ev.arg_6 = fsp->arg_6; + ev.pid_tid = pid_tid; + ev.start_ts_nsec = fsp->start_ts_nsec; + ev.finish_ts_nsec = cur_nsec; + ev.ret = PT_REGS_RC(ctx); + bpf_probe_read(&ev.fl_nm, sizeof(ev.fl_nm), (void *)fsp->arg_2); + + const size_t ev_size = offsetof(struct ev_dt_t, fl_nm) + + sizeof(ev.fl_nm); + events.perf_submit(ctx, &ev, ev_size); + + tmp_i.delete(&pid_tid); + + return 0; +} diff --git a/src/ebpf/trace_head.c b/src/ebpf/trace_head.c new file mode 100644 index 000000000..ef4aec4ee --- /dev/null +++ b/src/ebpf/trace_head.c @@ -0,0 +1,54 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * trace_head.c -- Header for generated eBPF code. Uses BCC, eBPF. + */ + +#include +#include +#include + +#include "trace.h" + +struct first_step_t { + s64 arg_1; + s64 arg_2; + s64 arg_3; + s64 arg_4; + s64 arg_5; + s64 arg_6; + u64 start_ts_nsec; +}; + +BPF_HASH(tmp_i, u64, struct first_step_t); +BPF_PERF_OUTPUT(events); diff --git a/src/ebpf/trace_kern_tmpl.c b/src/ebpf/trace_kern_tmpl.c new file mode 100644 index 000000000..85d13d5ef --- /dev/null +++ b/src/ebpf/trace_kern_tmpl.c @@ -0,0 +1,98 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * trace_kern_tmpl.c -- Trace syscalls with unknown numbers. + * Uses BCC, eBPF. + */ + +/* + * SYSCALL_NAME() entry handler + */ +int +kprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t fs = {}; + u64 pid_tid = bpf_get_current_pid_tgid(); + + PID_CHECK_HOOK + + fs.start_ts_nsec = bpf_ktime_get_ns(); + fs.arg_1 = PT_REGS_PARM1(ctx); + fs.arg_2 = PT_REGS_PARM2(ctx); + fs.arg_3 = PT_REGS_PARM3(ctx); + fs.arg_4 = PT_REGS_PARM4(ctx); + fs.arg_5 = PT_REGS_PARM5(ctx); + fs.arg_5 = PT_REGS_PARM6(ctx); + + tmp_i.update(&pid_tid, &fs); + + return 0; +}; + +/* + * SYSCALL_NAME() exit handler + */ +int +kretprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t *fsp; + struct ev_dt_t ev = {}; + + u64 cur_nsec = bpf_ktime_get_ns(); + + u64 pid_tid = bpf_get_current_pid_tgid(); + fsp = tmp_i.lookup(&pid_tid); + if (fsp == 0) + return 0; + + ev.sc_id = -2; /* SysCall ID */ + ev.arg_1 = fsp->arg_1; + ev.arg_2 = fsp->arg_2; + ev.arg_3 = fsp->arg_3; + ev.arg_4 = fsp->arg_4; + ev.arg_5 = fsp->arg_5; + ev.arg_6 = fsp->arg_6; + ev.pid_tid = pid_tid; + ev.start_ts_nsec = fsp->start_ts_nsec; + ev.finish_ts_nsec = cur_nsec; + ev.ret = PT_REGS_RC(ctx); + strcpy(ev.sc_name, "SYSCALL_NAME"); + + const size_t ev_size = offsetof(struct ev_dt_t, sc_name) + + sizeof(ev.sc_name); + events.perf_submit(ctx, &ev, ev_size); + + tmp_i.delete(&pid_tid); + + return 0; +} diff --git a/src/ebpf/trace_libc_tmpl.c b/src/ebpf/trace_libc_tmpl.c new file mode 100644 index 000000000..9c7634db2 --- /dev/null +++ b/src/ebpf/trace_libc_tmpl.c @@ -0,0 +1,96 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * trace_libc_tmpl.c -- Trace syscalls with numbers known from libc. + * Uses BCC, eBPF. + */ + +/* + * SYSCALL_NAME() entry handler + */ +int +kprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t fs = {}; + u64 pid_tid = bpf_get_current_pid_tgid(); + + PID_CHECK_HOOK + + fs.start_ts_nsec = bpf_ktime_get_ns(); + fs.arg_1 = PT_REGS_PARM1(ctx); + fs.arg_2 = PT_REGS_PARM2(ctx); + fs.arg_3 = PT_REGS_PARM3(ctx); + fs.arg_4 = PT_REGS_PARM4(ctx); + fs.arg_5 = PT_REGS_PARM5(ctx); + fs.arg_5 = PT_REGS_PARM6(ctx); + + tmp_i.update(&pid_tid, &fs); + + return 0; +}; + +/* + * SYSCALL_NAME() exit handler + */ +int +kretprobe__SYSCALL_NAME(struct pt_regs *ctx) +{ + struct first_step_t *fsp; + struct ev_dt_t ev = {}; + + u64 cur_nsec = bpf_ktime_get_ns(); + + u64 pid_tid = bpf_get_current_pid_tgid(); + fsp = tmp_i.lookup(&pid_tid); + if (fsp == 0) + return 0; + + ev.sc_id = SYSCALL_NR; /* SysCall ID */ + ev.arg_1 = fsp->arg_1; + ev.arg_2 = fsp->arg_2; + ev.arg_3 = fsp->arg_3; + ev.arg_4 = fsp->arg_4; + ev.arg_5 = fsp->arg_5; + ev.arg_6 = fsp->arg_6; + ev.pid_tid = pid_tid; + ev.start_ts_nsec = fsp->start_ts_nsec; + ev.finish_ts_nsec = cur_nsec; + ev.ret = PT_REGS_RC(ctx); + + const size_t ev_size = offsetof(struct ev_dt_t, sc_name); + events.perf_submit(ctx, &ev, ev_size); + + tmp_i.delete(&pid_tid); + + return 0; +} diff --git a/src/ebpf/trace_tp_all.c b/src/ebpf/trace_tp_all.c new file mode 100644 index 000000000..5ecf58005 --- /dev/null +++ b/src/ebpf/trace_tp_all.c @@ -0,0 +1,88 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * trace_tp_all.c -- Trace syscalls. Uses BCC, eBPF. + */ + +/* + * Syscall's entry handler. + */ +int +tracepoint__sys_enter(struct pt_regs *ctx) +{ + struct first_step_t fs = {}; + u64 pid_tid = bpf_get_current_pid_tgid(); + + PID_CHECK_HOOK + + if (!bpf_get_current_comm(&fs.comm, sizeof(fs.comm))) + return; + + fs.start_ts_nsec = bpf_ktime_get_ns(); + tmp_i.update(&pid_tid, &fs); + + return 0; +}; + +/* + * Syscall's exit handler. + */ +int +tracepoint__sys_exit(struct pt_regs *ctx) +{ + struct first_step_t *fsp; + struct ev_dt_t ev = {}; + + u64 cur_nsec = bpf_ktime_get_ns(); + + u64 pid_tid = bpf_get_current_pid_tgid(); + fsp = tmp_i.lookup(&pid_tid); + if (fsp == 0) + return 0; + + bpf_probe_read(&ev.comm, sizeof(ev.comm), fsp->comm); + bpf_probe_read(&ev.open.fl_nm, + sizeof(ev.open.fl_nm), + (void *)fsp->fl_nm); + /* SysCall ID */ + /* ev.sc_id = __NR_open; */ + ev.pid_tid = pid_tid; + ev.start_ts_nsec = fsp->start_ts_nsec; + ev.finish_ts_nsec = cur_nsec; + ev.ret = PT_REGS_RC(ctx); + + events.perf_submit(ctx, &ev, sizeof(ev)); + tmp_i.delete(&pid_tid); + + return 0; +} diff --git a/src/file_sc_bench.c b/src/file_sc_bench.c new file mode 100644 index 000000000..6fae43dd6 --- /dev/null +++ b/src/file_sc_bench.c @@ -0,0 +1,119 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * file_sc_bench.c -- testing BenchMark for strace.ebpf. This simple benchmark + * allow us to measure and compare different tracing tools. This benchmark + * doesn't have any dependencies and will be compiled during 'make run'. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef void (*tx_t)(); + +/* + * Tested usecase itself + */ +static void +open_close() +{ + int fd; + int x; + + fd = open("/dev/null", O_RDONLY); + x = read(fd, &x, sizeof(x)); + x = write(fd, &x, sizeof(x)); + (void) close(fd); +} + +/* + * This function runs and measures tested usecase. + */ +static void +loop_tx(char *name, tx_t tx_f, uint64_t qty, FILE *f) +{ + uint64_t i; + + uint64_t tu_start, tu_end, delta; + struct timeval tv_start, tv_end; + + gettimeofday(&tv_start, NULL); + + for (i = 0; i < qty; i++) + tx_f(); + + gettimeofday(&tv_end, NULL); + + if (NULL == f) + return; + + tu_start = tv_start.tv_sec * 1000000 + tv_start.tv_usec; + tu_end = tv_end.tv_sec * 1000000 + tv_end.tv_usec; + + delta = (tu_end - tu_start); + delta *= 1000; + + fprintf(stderr, "%s: Iter time: %ld nsec\n", name, delta / qty); +} + +/* + * BenchMark entry point + */ +int +main(int argc, char *argv[]) +{ + uint64_t iters_qty; + + if (argc != 2) { + printf("usage: %s iters qty\n", argv[0]); + return 1; + } + + iters_qty = atol(argv[1]); + + /* WARM-UP */ + loop_tx("open_read_write_close", + open_close, iters_qty / 10, NULL); + loop_tx(">>> open_read_write_close ", + open_close, iters_qty, stderr); + + return 0; +} diff --git a/src/libstrace/Makefile b/src/libstrace/Makefile new file mode 100644 index 000000000..dbfd38336 --- /dev/null +++ b/src/libstrace/Makefile @@ -0,0 +1,64 @@ +# Copyright 2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# src/libstrace/Makefile -- Makefile for libstrace +# + +#TOP = ../../.. + +LIBRARY_NAME = strace + +OBJS = bpf.o\ + utils.o\ + attach_probes.o \ + ebpf_syscalls.o \ + generate_ebpf.o \ + print_event_cb.o \ + + +CFLAGS += $(shell $(PKG_CONFIG) --cflags libbcc) + +# XXX libbcc expects multi-treading safity. Currently it's required for +# print_event_cb.o only, although we will apply it for overall application. +CFLAGS += -pthread + +CFLAGS += -g -Wextra + +ifeq ($(CC),clang) +CFLAGS += -Wno-initializer-overrides +else +CFLAGS += -Wno-override-init +endif + +CFLAGS += -I ../compat +CFLAGS += -I ../ebpf + +include ../Makefile.inc diff --git a/src/libstrace/README b/src/libstrace/README new file mode 100644 index 000000000..fccf13d14 --- /dev/null +++ b/src/libstrace/README @@ -0,0 +1 @@ +This directory contains libstrace code. diff --git a/src/libstrace/attach_probes.c b/src/libstrace/attach_probes.c new file mode 100644 index 000000000..6b40571c5 --- /dev/null +++ b/src/libstrace/attach_probes.c @@ -0,0 +1,451 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * attach_probes.c -- attach_probes() function + */ + +#include + +#include + +#include "bpf.h" +#include "main.h" +#include "utils.h" +#include "attach_probes.h" +#include "ebpf_syscalls.h" + +enum { HANDLER_NAME_MAX_SIZE = 128 }; + +/* + * This function attaches eBPF handler to each syscall known to libc. + * + * It can be useful because kernel has a lot of "unused" syscalls. + */ +static bool +attach_kp_libc_all(struct bpf_ctx *b) +{ + unsigned succ_counter = 0; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + int res; + char kprobe[HANDLER_NAME_MAX_SIZE]; + char kretprobe[HANDLER_NAME_MAX_SIZE]; + + if (NULL == sc_tbl[i].hlr_name) + continue; + + snprintf(kprobe, sizeof(kprobe), + "kprobe__%s", + sc_tbl[i].hlr_name); + + snprintf(kretprobe, sizeof(kretprobe), + "kretprobe__%s", + sc_tbl[i].hlr_name); + + /* KRetProbe should be first to prevent race condition */ + res = load_fn_and_attach_to_kretp(b, + sc_tbl[i].hlr_name, kretprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kretprobe, sc_tbl[i].hlr_name); + + /* Kretprobe fails. There is no reason to try probe */ + continue; + } + + res = load_fn_and_attach_to_kp(b, sc_tbl[i].hlr_name, kprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kprobe, sc_tbl[i].hlr_name); + + continue; + } + + succ_counter ++; + } + + return succ_counter > 0; +} + +/* XXX HACK: this syscall is exported by kernel twice. */ +static unsigned SyS_sigsuspend = 0; + +/* + * This function attaches eBPF handler to all existing syscalls in running + * kernel. It consume more time than attach_kp_libc_all(). + */ +static bool +attach_kp_kern_all(struct bpf_ctx *b) +{ + unsigned succ_counter = 0; + + char *line = NULL; + size_t len = 0; + ssize_t read; + + FILE *in = fopen(debug_tracing_aff, "r"); + + if (NULL == in) { + fprintf(stderr, "%s: ERROR: '%m'\n", __func__); + return false; + } + + while ((read = getline(&line, &len, in)) != -1) { + int res; + char kprobe[HANDLER_NAME_MAX_SIZE]; + char kretprobe[HANDLER_NAME_MAX_SIZE]; + + if (!is_a_sc(line, read - 1)) + continue; + + line [read - 1] = '\0'; + + /* XXX HACK: this syscall is exported by kernel twice. */ + if (!strcasecmp("SyS_sigsuspend", line)) { + if (SyS_sigsuspend) + continue; + + SyS_sigsuspend ++; + } + + snprintf(kprobe, sizeof(kprobe), + "kprobe__%s", line); + + snprintf(kretprobe, sizeof(kretprobe), + "kretprobe__%s", line); + + /* KRetProbe should be first to prevent race condition */ + res = load_fn_and_attach_to_kretp(b, line, kretprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kretprobe, line); + + /* Kretprobe fails. There is no reason to try probe */ + continue; + } + + res = load_fn_and_attach_to_kp(b, line, kprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kprobe, line); + + continue; + } + + succ_counter ++; + } + + free(line); + fclose(in); + + return succ_counter > 0; +} + +/* + * This function attaches eBPF handler to each syscall which operates on file + * descriptor. Inspired by: 'strace -e trace=desc' + */ +static bool +attach_kp_desc(struct bpf_ctx *b) +{ + unsigned succ_counter = 0; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + int res; + char kprobe[HANDLER_NAME_MAX_SIZE]; + char kretprobe[HANDLER_NAME_MAX_SIZE]; + + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (EM_desc != (EM_desc & sc_tbl[i].masks)) + continue; + + snprintf(kprobe, sizeof(kprobe), + "kprobe__%s", + sc_tbl[i].hlr_name); + + snprintf(kretprobe, sizeof(kretprobe), + "kretprobe__%s", + sc_tbl[i].hlr_name); + + /* KRetProbe should be first to prevent race condition */ + res = load_fn_and_attach_to_kretp(b, + sc_tbl[i].hlr_name, kretprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kretprobe, sc_tbl[i].hlr_name); + + /* Kretprobe fails. There is no reason to try probe */ + continue; + } + + res = load_fn_and_attach_to_kp(b, sc_tbl[i].hlr_name, kprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kprobe, sc_tbl[i].hlr_name); + + continue; + } + + succ_counter ++; + } + + return succ_counter > 0; +} + +/* + * This function attaches eBPF handler to each syscall which operates on + * filenames. Inspired by 'strace -e trace=file'. + */ +static bool +attach_kp_file(struct bpf_ctx *b) +{ + unsigned succ_counter = 0; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + int res; + char kprobe[HANDLER_NAME_MAX_SIZE]; + char kretprobe[HANDLER_NAME_MAX_SIZE]; + + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (EM_file != (EM_file & sc_tbl[i].masks)) + continue; + + snprintf(kprobe, sizeof(kprobe), + "kprobe__%s", + sc_tbl[i].hlr_name); + + snprintf(kretprobe, sizeof(kretprobe), + "kretprobe__%s", + sc_tbl[i].hlr_name); + + /* KRetProbe should be first to prevent race condition */ + res = load_fn_and_attach_to_kretp(b, + sc_tbl[i].hlr_name, kretprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kretprobe, sc_tbl[i].hlr_name); + + /* Kretprobe fails. There is no reason to try probe */ + continue; + } + + res = load_fn_and_attach_to_kp(b, sc_tbl[i].hlr_name, kprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kprobe, sc_tbl[i].hlr_name); + + continue; + } + + succ_counter ++; + } + + return succ_counter > 0; +} + +/* + * This function attaches eBPF handler to each syscall which operates on + * relative file path. There are no equivalents in strace. + */ +static bool +attach_kp_fileat(struct bpf_ctx *b) +{ + unsigned succ_counter = 0; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + int res; + char kprobe[HANDLER_NAME_MAX_SIZE]; + char kretprobe[HANDLER_NAME_MAX_SIZE]; + + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (EM_fileat != (EM_fileat & sc_tbl[i].masks)) + continue; + + snprintf(kprobe, sizeof(kprobe), + "kprobe__%s", + sc_tbl[i].hlr_name); + + snprintf(kretprobe, sizeof(kretprobe), + "kretprobe__%s", + sc_tbl[i].hlr_name); + + /* KRetProbe should be first to prevent race condition */ + res = load_fn_and_attach_to_kretp(b, + sc_tbl[i].hlr_name, kretprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kretprobe, sc_tbl[i].hlr_name); + + /* Kretprobe fails. There is no reason to try probe */ + continue; + } + + res = load_fn_and_attach_to_kp(b, sc_tbl[i].hlr_name, kprobe, + args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s'. Ignoring.\n", + __func__, kprobe, sc_tbl[i].hlr_name); + + continue; + } + + succ_counter ++; + } + + return succ_counter > 0; +} + +/* + * Attach eBPF handlers to all file-related syscalls. Inspired by: + * 'strace -e trace=desc,file' + */ +static bool +attach_kp_pmemfile(struct bpf_ctx *b) +{ + bool res = false; + + res |= attach_kp_desc(b); + res |= attach_kp_file(b); + res |= attach_kp_fileat(b); + + return res; +} + +static const char tp_all_category[] = "raw_syscalls"; +static const char tp_all_enter_name[] = "sys_enter"; +static const char tp_all_exit_name[] = "sys_exit"; +static const char tp_all_enter_fn[] = "tracepoint__sys_enter"; +static const char tp_all_exit_fn[] = "tracepoint__sys_exit"; + +/* + * Intercept all syscalls of running kernel using TracePoint way. + * Should be faster and better but require at kernel at least 4.6. + * + * XXX Not tested. + */ +static bool +attach_tp_all(struct bpf_ctx *b) +{ + int res; + + /* 'sys_exit' should be first to prevent race condition */ + res = load_fn_and_attach_to_tp(b, tp_all_category, tp_all_enter_name, + tp_all_enter_fn, args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s:%s'. Exiting.\n", + __func__, tp_all_enter_fn, + tp_all_category, tp_all_enter_name); + + /* Tracepoint fails. There is no reason to try continue */ + return false; + } + + res = load_fn_and_attach_to_tp(b, tp_all_category, tp_all_exit_name, + tp_all_exit_fn, args.pid, 0, -1); + + if (res == -1) { + fprintf(stderr, + "ERROR:%s:Can't attach %s to '%s:%s'. Ignoring.\n", + __func__, tp_all_exit_fn, + tp_all_category, tp_all_exit_name); + } + + return true; +} + +/* + * This function parses and processes expression. + * + * XXX Think about applying 'fn_name' via str_replace_all() + * to be more consistent + */ +bool +attach_probes(struct bpf_ctx *b) +{ + if (NULL == args.expr) + goto DeFault; + + if (!strcasecmp(args.expr, "trace=kp-libc-all")) { + return attach_kp_libc_all(b); + } else if (!strcasecmp(args.expr, "trace=kp-kern-all")) { + return attach_kp_kern_all(b); + } else if (!strcasecmp(args.expr, "trace=kp-file")) { + return attach_kp_file(b); + } else if (!strcasecmp(args.expr, "trace=kp-desc")) { + return attach_kp_desc(b); + } else if (!strcasecmp(args.expr, "trace=kp-pmemfile")) { + return attach_kp_pmemfile(b); + } else if (!strcasecmp(args.expr, "trace=tp-all")) { + return attach_tp_all(b); + } + +DeFault: + return attach_kp_kern_all(b); +} diff --git a/src/libstrace/attach_probes.h b/src/libstrace/attach_probes.h new file mode 100644 index 000000000..ad8f55381 --- /dev/null +++ b/src/libstrace/attach_probes.h @@ -0,0 +1,46 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * attach_probes.h -- attach_probes() function + */ + +#ifndef ATTACH_PROBES_H +#define ATTACH_PROBES_H + +#include + +#include "bpf.h" + +bool attach_probes(struct bpf_ctx *b); + +#endif /* ATTACH_PROBES_H */ diff --git a/src/libstrace/bpf.c b/src/libstrace/bpf.c new file mode 100644 index 000000000..5ccffdeca --- /dev/null +++ b/src/libstrace/bpf.c @@ -0,0 +1,408 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * bpf.c -- functions related to struct bpf_ctx + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "bpf.h" +#include "main.h" + + +/* + * This function checks possibility of intercepting one more syscall. + * Should be actual if we will intercept something more low-level than regular + * syscalls. + */ +static bool pr_arr_check_quota(struct bpf_ctx *sbcp, unsigned new_pr_qty) +{ + return sbcp->pr_arr_qty + new_pr_qty <= args.pr_arr_max; +} + +/* + * Save reference to hendler of intercepted syscall in pr_arr. + */ +static void append_item_to_pr_arr(struct bpf_ctx *sbcp, const char *name, + struct perf_reader *probe, bool attached) +{ + struct bpf_pr *item = + calloc(1, sizeof(*item) + strlen(name) + 1); + item->pr = probe; + item->attached = attached; + strcpy(item->key, name); + + if (NULL == sbcp->pr_arr) + sbcp->pr_arr = + calloc(args.pr_arr_max, sizeof(*sbcp->pr_arr)); + + sbcp->pr_arr[sbcp->pr_arr_qty] = item; + sbcp->pr_arr_qty += 1; +} + +/* + * Register callback to capture stream of events. + */ +int +attach_callback_to_perf_output(struct bpf_ctx *sbcp, + const char *name, perf_reader_raw_cb callback) +{ + int map_fd = bpf_table_fd(sbcp->module, name); + + if (map_fd < 0) { + fprintf(stderr, + "ERROR:%s:Can't attach to perf output '%s':%m.\n", + __func__, name); + return -1; + } + + size_t map_id = bpf_table_id(sbcp->module, name); + int ttype = bpf_table_type_id(sbcp->module, map_id); + + if (ttype != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + fprintf(stderr, "ERROR:%s:Unknown table type %d.\n", + __func__, ttype); + return -1; + } + + /* + * XXX It can be reasonable to replace sysconf with sched_getaffinity(). + * It will allow us to ignore non-actual CPUs. + */ + long cpu_qty = sysconf(_SC_NPROCESSORS_ONLN); + + if (!pr_arr_check_quota(sbcp, (unsigned)cpu_qty)) { + fprintf(stderr, + "ERROR:%s:Number of perf readers would exceed" + " global quota: %d\n", + __func__, args.pr_arr_max); + + return -1; + } + + for (int cpu = 0; cpu < cpu_qty; cpu++) { + char reader_name[128]; + + struct perf_reader *reader = + bpf_open_perf_buffer(callback, NULL, -1, cpu); + + if (NULL == reader) { + fprintf(stderr, + "WARNING:%s:" + "Could not open perf buffer on cpu %d." + " Ignored.\n", + __func__, cpu); + continue; + } + + int fd = perf_reader_fd(reader); + + int res = bpf_update_elem(map_fd, &cpu, &fd, 0); + + if (res < 0) { + fprintf(stderr, + "WARNING:%s:" + "Could not update table on cpu %d: %m." + " Ignored.\n", + __func__, cpu); + } + + snprintf(reader_name, sizeof(reader_name), "%p:%d", sbcp, cpu); + append_item_to_pr_arr(sbcp, reader_name, reader, false); + } + + return 0; +} + +/* + * Overall resource cleanup. + * + * WARNING We really need explicit cleanup to prevent in-kernel memory leaks. + * Yes, there still are kernel bugs related to eBPF. + */ +void +detach_all(struct bpf_ctx *b) +{ + fprintf(stderr, + "INFO: Detaching. PLEASE wait." + " It can hold few tens of seconds.\n"); + + for (unsigned i = 0; i < b->pr_arr_qty; i++) { + perf_reader_free(b->pr_arr[i]->pr); + + /* non-attached keys here include the perf_events reader */ + if (b->pr_arr[i]->attached) { + char desc[256]; + + snprintf(desc, sizeof(desc), + "-:kprobes/%s", b->pr_arr[i]->key); + bpf_detach_kprobe(desc); + } + + free(b->pr_arr[i]); + } + + bpf_module_destroy(b->module); + + free(b->pr_arr); + free(b); +} + +/* + * Load eBPF object code to kernel VM and obtaining a fd + */ +static int +load_obj_code_into_ebpf_vm(struct bpf_ctx *sbcp, const char *func_name, + enum bpf_prog_type prog_type) +{ + int fd = -1; + void *bfs_res = bpf_function_start(sbcp->module, func_name); + + if (NULL == bfs_res) { + fprintf(stderr, "%s: Unknown program %s\n", + __func__, func_name); + return -1; + } + + const unsigned log_buf_size = sbcp->debug ? 65536 : 0; + char *const log_buf = sbcp->debug ? calloc(1, log_buf_size) : NULL; + + fd = bpf_prog_load(prog_type, + bfs_res, + (int)bpf_function_size(sbcp->module, func_name), + bpf_module_license(sbcp->module), + bpf_module_kern_version(sbcp->module), + log_buf, log_buf_size); + + if (sbcp->debug) { + /* XXX Command line options to save it to separate file */ + fprintf(stderr, "DEBUG:%s('%s'):\n%s\n", + __func__, func_name, log_buf); + } + + if (fd < 0) { + fprintf(stderr, + "ERROR:%s:Failed to load BPF program %s: %m\n", + __func__, func_name); + + return -1; + } + + return fd; +} + +/* + * This function replaces character 'tmpl' in string 'str' with 'ch'. + */ +static void +chr_replace(char *str, const char tmpl, const char ch) +{ + if (NULL == str) + return; + + for (; *str; str++) + if (tmpl == *str) + *str = ch; +} + +/* + * Load ebpf function code into VM and attach it to syscall exit point using + * KProbe. + */ +int +load_fn_and_attach_to_kp(struct bpf_ctx *sbcp, + const char *event, const char *fn_name, + pid_t pid, unsigned cpu, int group_fd) +{ + char desc[256]; + struct perf_reader *pr; + int fn_fd; + + if (!pr_arr_check_quota(sbcp, 1)) { + fprintf(stderr, + "ERROR:%s:Number of perf readers would exceed" + " global quota: %d\n", + __func__, args.pr_arr_max); + + return -1; + } + + fn_fd = load_obj_code_into_ebpf_vm(sbcp, fn_name, BPF_PROG_TYPE_KPROBE); + if (fn_fd == -1) { + return -1; + } + + char *ev_name = calloc(1, 2 + strlen(event) + 1); + + strcpy(ev_name, "p_"); + strcat(ev_name, event); + chr_replace(ev_name, '+', '_'); + chr_replace(ev_name, '.', '_'); + + snprintf(desc, sizeof(desc), "p:kprobes/%s %s", ev_name, event); + + pr = bpf_attach_kprobe(fn_fd, ev_name, desc, pid, (int)cpu, group_fd, + NULL, NULL); + + if (NULL == pr) { + fprintf(stderr, + "ERROR:%s:Failed to attach eBPF function '%s'" + " to kprobe '%s': %m\n", + __func__, fn_name, event); + + free(ev_name); + + return -1; + } + + append_item_to_pr_arr(sbcp, ev_name, pr, true); + + free(ev_name); + + return 0; +} + +/* + * Load ebpf function code into VM and attach it to syscall exit point using + * KProbe. + */ +int +load_fn_and_attach_to_kretp(struct bpf_ctx *sbcp, + const char *event, const char *fn_name, + pid_t pid, unsigned cpu, int group_fd) +{ + char desc[256]; + struct perf_reader *pr; + int fn_fd; + + if (!pr_arr_check_quota(sbcp, 1)) { + fprintf(stderr, + "ERROR:%s:Number of perf readers would exceed" + " global quota: %d\n", + __func__, args.pr_arr_max); + + return -1; + } + + fn_fd = load_obj_code_into_ebpf_vm(sbcp, fn_name, BPF_PROG_TYPE_KPROBE); + if (fn_fd == -1) { + return -1; + } + + char *ev_name = calloc(1, 2 + strlen(event) + 1); + + strcpy(ev_name, "r_"); + strcat(ev_name, event); + chr_replace(ev_name, '+', '_'); + chr_replace(ev_name, '.', '_'); + + snprintf(desc, sizeof(desc), "r:kprobes/%s %s", ev_name, event); + + pr = bpf_attach_kprobe(fn_fd, ev_name, desc, pid, (int)cpu, group_fd, + NULL, NULL); + + if (NULL == pr) { + fprintf(stderr, + "ERROR:%s:Failed to attach eBPF function '%s'" + " to kprobe '%s': %m\n", + __func__, fn_name, event); + + return -1; + } + + append_item_to_pr_arr(sbcp, ev_name, pr, true); + + free(ev_name); + + return 0; +} + +/* + * Load ebpf function code into VM and attach it to syscall exit point using + * TracePoint. + */ +int +load_fn_and_attach_to_tp(struct bpf_ctx *sbcp, + const char *tp_category, const char *tp_name, + const char *fn_name, + int pid, unsigned cpu, int group_fd) +{ + if (!pr_arr_check_quota(sbcp, 1)) { + fprintf(stderr, + "ERROR:%s:Number of perf readers would exceed" + " global quota: %d\n", + __func__, args.pr_arr_max); + + return -1; + } + + int fn_fd = load_obj_code_into_ebpf_vm(sbcp, + fn_name, BPF_PROG_TYPE_TRACEPOINT); + + struct perf_reader *pr = bpf_attach_tracepoint(fn_fd, + tp_category, tp_name, + pid, (int)cpu, group_fd, NULL, NULL); + + if (NULL == pr) { + fprintf(stderr, + "ERROR:%s:Failed to attach eBPF function '%s'" + " to tracepoint '%s:%s': %m\n", + __func__, fn_name, tp_category, tp_name); + + return -1; + } + + char *ev_name = calloc(1, + strlen(tp_category) + 1 + strlen(tp_name) + 1); + + strcpy(ev_name, tp_category); + strcat(ev_name, ":"); + strcat(ev_name, tp_name); + + /* XXX May be we should mark this pr with some specific numeric code */ + append_item_to_pr_arr(sbcp, ev_name, pr, false); + + free(ev_name); + + return 0; +} diff --git a/src/libstrace/bpf.h b/src/libstrace/bpf.h new file mode 100644 index 000000000..53c544388 --- /dev/null +++ b/src/libstrace/bpf.h @@ -0,0 +1,84 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * bpf.h -- Key bpf_ctx structure and related functions + */ + +/* PLEASE do not rename this macro to BPF_H. There is a conflict. */ +#ifndef __BPF_H +#define __BPF_H + +#include +#include +#include + +#include + +struct bpf_pr { + struct perf_reader *pr; + + /* + * XXX May be we should replace this field with some + * enum perf_reader_type_t as soon as tracepoints + * will be fixed. + */ + bool attached; + char key[]; +}; + +struct bpf_ctx { + void *module; + unsigned debug; + struct bpf_pr **pr_arr; + unsigned pr_arr_qty; +}; + +int attach_callback_to_perf_output(struct bpf_ctx *sbcp, + const char *perf_event, perf_reader_raw_cb callback); + +int load_fn_and_attach_to_kp(struct bpf_ctx *sbcp, + const char *event, const char *fn_name, + pid_t pid, unsigned cpu, int group_fd); + +int load_fn_and_attach_to_kretp(struct bpf_ctx *sbcp, + const char *event, const char *fn_name, + pid_t pid, unsigned cpu, int group_fd); + +int load_fn_and_attach_to_tp(struct bpf_ctx *sbcp, + const char *tp_category, const char *tp_name, + const char *fn_name, + int pid, unsigned cpu, int group_fd); + +void detach_all(struct bpf_ctx *b); + +#endif /* __BPF_H */ diff --git a/src/libstrace/ebpf_syscalls.c b/src/libstrace/ebpf_syscalls.c new file mode 100644 index 000000000..8d286f5c7 --- /dev/null +++ b/src/libstrace/ebpf_syscalls.c @@ -0,0 +1,455 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ebpf_syscalls.c -- a table of glibc-supported syscalls + */ + +#include + +#include "ebpf_syscalls.h" + + +/* EBPF_SYSCALL(__NR_setxattr, sys_setxattr) */ +#define EBPF_SYSCALL(nr, sym) [nr] = {\ + .num = nr, \ + .num_name = #nr, \ + .hlr_name = #sym, \ + .masks = 0 }, + +#define EBPF_SYSCALL_FILE(nr, sym) [nr] = {\ + .num = nr, \ + .num_name = #nr, \ + .hlr_name = #sym, \ + .masks = EM_file }, + +#define EBPF_SYSCALL_FILEAT(nr, sym) [nr] = {\ + .num = nr, \ + .num_name = #nr, \ + .hlr_name = #sym, \ + .masks = EM_fileat }, + +#define EBPF_SYSCALL_DESC(nr, sym) [nr] = {\ + .num = nr, \ + .num_name = #nr, \ + .hlr_name = #sym, \ + .masks = EM_desc }, + +#define SC_NI { .num = SC_TBL_SIZE, \ + .num_name = "NI", \ + .hlr_name = NULL } + +/* + * Commented syscalls mean that syscall exists in the kernel but glibc + * does not provide __NR_* and SYS_* macros. + */ +struct sc_t sc_tbl[SC_TBL_SIZE] = { + [0 ... SC_TBL_SIZE - 1] = SC_NI, + /* + * [__NR_open] = + * { .num = __NR_open, .hlr_name = "SyS_open", .masks = EM_file }, + * [__NR_read] = + * { .num = __NR_read, .hlr_name = "SyS_read", .masks = EM_desk }, + * [__NR_write] = + * { .num = __NR_write, .hlr_name = "SyS_write", .masks = EM_desk }, + * [__NR_close] = + * { .num = __NR_close, .hlr_name = "SyS_close", .masks = EM_desk }, + */ + +EBPF_SYSCALL(__NR_arch_prctl, sys_arch_prctl) +EBPF_SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) +EBPF_SYSCALL(__NR_ioperm, sys_ioperm) +EBPF_SYSCALL(__NR_iopl, SyS_iopl) +EBPF_SYSCALL(__NR_modify_ldt, sys_modify_ldt) +EBPF_SYSCALL_DESC(__NR_mmap, SyS_mmap) +EBPF_SYSCALL(__NR_set_thread_area, SyS_set_thread_area) +EBPF_SYSCALL(__NR_get_thread_area, SyS_get_thread_area) +EBPF_SYSCALL(__NR_set_tid_address, SyS_set_tid_address) +EBPF_SYSCALL(__NR_fork, sys_fork) +EBPF_SYSCALL(__NR_vfork, sys_vfork) +EBPF_SYSCALL(__NR_clone, SyS_clone) +EBPF_SYSCALL(__NR_unshare, SyS_unshare) +EBPF_SYSCALL(__NR_personality, SyS_personality) +EBPF_SYSCALL(__NR_exit, SyS_exit) +EBPF_SYSCALL(__NR_exit_group, SyS_exit_group) +EBPF_SYSCALL(__NR_waitid, SyS_waitid) +EBPF_SYSCALL(__NR_wait4, SyS_wait4) +/* EBPF_SYSCALL(__NR_waitpid, SyS_waitpid) */ +EBPF_SYSCALL(__NR__sysctl, SyS_sysctl) +EBPF_SYSCALL(__NR_capget, SyS_capget) +EBPF_SYSCALL(__NR_capset, SyS_capset) +EBPF_SYSCALL(__NR_ptrace, SyS_ptrace) +EBPF_SYSCALL(__NR_restart_syscall, sys_restart_syscall) +EBPF_SYSCALL(__NR_rt_sigprocmask, SyS_rt_sigprocmask) +EBPF_SYSCALL(__NR_rt_sigpending, SyS_rt_sigpending) +EBPF_SYSCALL(__NR_rt_sigtimedwait, SyS_rt_sigtimedwait) +EBPF_SYSCALL(__NR_kill, SyS_kill) +EBPF_SYSCALL(__NR_tgkill, SyS_tgkill) +EBPF_SYSCALL(__NR_tkill, SyS_tkill) +EBPF_SYSCALL(__NR_rt_sigqueueinfo, SyS_rt_sigqueueinfo) +EBPF_SYSCALL(__NR_rt_tgsigqueueinfo, SyS_rt_tgsigqueueinfo) +EBPF_SYSCALL(__NR_sigaltstack, SyS_sigaltstack) +/* EBPF_SYSCALL(__NR_sigpending, SyS_sigpending) */ +/* EBPF_SYSCALL(__NR_sigprocmask, SyS_sigprocmask) */ +EBPF_SYSCALL(__NR_rt_sigaction, SyS_rt_sigaction) +/* EBPF_SYSCALL(__NR_sgetmask, sys_sgetmask) */ +/* EBPF_SYSCALL(__NR_ssetmask, SyS_ssetmask) */ +/* EBPF_SYSCALL(__NR_signal, SyS_signal) */ +EBPF_SYSCALL(__NR_pause, sys_pause) +EBPF_SYSCALL(__NR_rt_sigsuspend, SyS_rt_sigsuspend) +/* EBPF_SYSCALL(__NR_sigsuspend, SyS_sigsuspend) */ +/* EBPF_SYSCALL(__NR_sigsuspend, SyS_sigsuspend) */ +EBPF_SYSCALL(__NR_setpriority, SyS_setpriority) +EBPF_SYSCALL(__NR_getpriority, SyS_getpriority) +EBPF_SYSCALL(__NR_setregid, SyS_setregid) +EBPF_SYSCALL(__NR_setgid, SyS_setgid) +EBPF_SYSCALL(__NR_setreuid, SyS_setreuid) +EBPF_SYSCALL(__NR_setuid, SyS_setuid) +EBPF_SYSCALL(__NR_setresuid, SyS_setresuid) +EBPF_SYSCALL(__NR_getresuid, SyS_getresuid) +EBPF_SYSCALL(__NR_setresgid, SyS_setresgid) +EBPF_SYSCALL(__NR_getresgid, SyS_getresgid) +EBPF_SYSCALL(__NR_setfsuid, SyS_setfsuid) +EBPF_SYSCALL(__NR_setfsgid, SyS_setfsgid) +EBPF_SYSCALL(__NR_getpid, sys_getpid) +EBPF_SYSCALL(__NR_gettid, sys_gettid) +EBPF_SYSCALL(__NR_getppid, sys_getppid) +EBPF_SYSCALL(__NR_getuid, sys_getuid) +EBPF_SYSCALL(__NR_geteuid, sys_geteuid) +EBPF_SYSCALL(__NR_getgid, sys_getgid) +EBPF_SYSCALL(__NR_getegid, sys_getegid) +EBPF_SYSCALL(__NR_times, SyS_times) +EBPF_SYSCALL(__NR_setpgid, SyS_setpgid) +EBPF_SYSCALL(__NR_getpgid, SyS_getpgid) +EBPF_SYSCALL(__NR_getpgrp, sys_getpgrp) +EBPF_SYSCALL(__NR_getsid, SyS_getsid) +EBPF_SYSCALL(__NR_setsid, sys_setsid) +/* EBPF_SYSCALL(__NR_newuname, SyS_newuname) */ +EBPF_SYSCALL(__NR_uname, SyS_uname) +/* EBPF_SYSCALL(__NR_olduname, SyS_olduname) */ +EBPF_SYSCALL(__NR_sethostname, SyS_sethostname) +/* EBPF_SYSCALL(__NR_gethostname, SyS_gethostname) */ +EBPF_SYSCALL(__NR_setdomainname, SyS_setdomainname) +/* EBPF_SYSCALL(__NR_old_getrlimit, SyS_old_getrlimit) */ +EBPF_SYSCALL(__NR_getrlimit, SyS_getrlimit) +EBPF_SYSCALL(__NR_prlimit64, SyS_prlimit64) +EBPF_SYSCALL(__NR_setrlimit, SyS_setrlimit) +EBPF_SYSCALL(__NR_getrusage, SyS_getrusage) +EBPF_SYSCALL(__NR_umask, SyS_umask) +EBPF_SYSCALL(__NR_prctl, SyS_prctl) +EBPF_SYSCALL(__NR_getcpu, SyS_getcpu) +EBPF_SYSCALL(__NR_sysinfo, SyS_sysinfo) +/* EBPF_SYSCALL(__NR_ni_syscall, sys_ni_syscall) */ +EBPF_SYSCALL(__NR_setns, SyS_setns) +EBPF_SYSCALL(__NR_reboot, SyS_reboot) +EBPF_SYSCALL(__NR_getgroups, SyS_getgroups) +EBPF_SYSCALL(__NR_setgroups, SyS_setgroups) +/* EBPF_SYSCALL(__NR_nice, SyS_nice) */ +EBPF_SYSCALL(__NR_sched_setscheduler, SyS_sched_setscheduler) +EBPF_SYSCALL(__NR_sched_setparam, SyS_sched_setparam) +EBPF_SYSCALL(__NR_sched_setattr, SyS_sched_setattr) +EBPF_SYSCALL(__NR_sched_getscheduler, SyS_sched_getscheduler) +EBPF_SYSCALL(__NR_sched_getparam, SyS_sched_getparam) +EBPF_SYSCALL(__NR_sched_getattr, SyS_sched_getattr) +EBPF_SYSCALL(__NR_sched_setaffinity, SyS_sched_setaffinity) +EBPF_SYSCALL(__NR_sched_getaffinity, SyS_sched_getaffinity) +EBPF_SYSCALL(__NR_sched_yield, sys_sched_yield) +EBPF_SYSCALL(__NR_sched_get_priority_max, SyS_sched_get_priority_max) +EBPF_SYSCALL(__NR_sched_get_priority_min, SyS_sched_get_priority_min) +EBPF_SYSCALL(__NR_sched_rr_get_interval, SyS_sched_rr_get_interval) +EBPF_SYSCALL(__NR_syslog, SyS_syslog) +EBPF_SYSCALL(__NR_kcmp, SyS_kcmp) +EBPF_SYSCALL(__NR_time, SyS_time) +/* EBPF_SYSCALL(__NR_stime, SyS_stime) */ +EBPF_SYSCALL(__NR_gettimeofday, SyS_gettimeofday) +EBPF_SYSCALL(__NR_settimeofday, SyS_settimeofday) +EBPF_SYSCALL(__NR_adjtimex, SyS_adjtimex) +EBPF_SYSCALL(__NR_alarm, SyS_alarm) +EBPF_SYSCALL(__NR_nanosleep, SyS_nanosleep) +EBPF_SYSCALL(__NR_getitimer, SyS_getitimer) +EBPF_SYSCALL(__NR_setitimer, SyS_setitimer) +EBPF_SYSCALL(__NR_timer_create, SyS_timer_create) +EBPF_SYSCALL(__NR_timer_gettime, SyS_timer_gettime) +EBPF_SYSCALL(__NR_timer_getoverrun, SyS_timer_getoverrun) +EBPF_SYSCALL(__NR_timer_settime, SyS_timer_settime) +EBPF_SYSCALL(__NR_timer_delete, SyS_timer_delete) +EBPF_SYSCALL(__NR_clock_settime, SyS_clock_settime) +EBPF_SYSCALL(__NR_clock_gettime, SyS_clock_gettime) +EBPF_SYSCALL(__NR_clock_adjtime, SyS_clock_adjtime) +EBPF_SYSCALL(__NR_clock_getres, SyS_clock_getres) +EBPF_SYSCALL(__NR_clock_nanosleep, SyS_clock_nanosleep) +EBPF_SYSCALL(__NR_set_robust_list, SyS_set_robust_list) +EBPF_SYSCALL(__NR_get_robust_list, SyS_get_robust_list) +EBPF_SYSCALL(__NR_futex, SyS_futex) +/* EBPF_SYSCALL(__NR_chown16, SyS_chown16) */ +/* EBPF_SYSCALL(__NR_lchown16, SyS_lchown16) */ +/* EBPF_SYSCALL(__NR_fchown16, SyS_fchown16) */ +/* EBPF_SYSCALL(__NR_setregid16, SyS_setregid16) */ +/* EBPF_SYSCALL(__NR_setgid16, SyS_setgid16) */ +/* EBPF_SYSCALL(__NR_setreuid16, SyS_setreuid16) */ +/* EBPF_SYSCALL(__NR_setuid16, SyS_setuid16) */ +/* EBPF_SYSCALL(__NR_setresuid16, SyS_setresuid16) */ +/* EBPF_SYSCALL(__NR_getresuid16, SyS_getresuid16) */ +/* EBPF_SYSCALL(__NR_setresgid16, SyS_setresgid16) */ +/* EBPF_SYSCALL(__NR_getresgid16, SyS_getresgid16) */ +/* EBPF_SYSCALL(__NR_setfsuid16, SyS_setfsuid16) */ +/* EBPF_SYSCALL(__NR_setfsgid16, SyS_setfsgid16) */ +/* EBPF_SYSCALL(__NR_getgroups16, SyS_getgroups16) */ +/* EBPF_SYSCALL(__NR_setgroups16, SyS_setgroups16) */ +/* EBPF_SYSCALL(__NR_getuid16, sys_getuid16) */ +/* EBPF_SYSCALL(__NR_geteuid16, sys_geteuid16) */ +/* EBPF_SYSCALL(__NR_getgid16, sys_getgid16) */ +/* EBPF_SYSCALL(__NR_getegid16, sys_getegid16) */ +EBPF_SYSCALL_FILE(__NR_delete_module, SyS_delete_module) +EBPF_SYSCALL(__NR_init_module, SyS_init_module) +EBPF_SYSCALL_DESC(__NR_finit_module, SyS_finit_module) +EBPF_SYSCALL_FILE(__NR_acct, SyS_acct) +EBPF_SYSCALL(__NR_kexec_load, SyS_kexec_load) +EBPF_SYSCALL_DESC(__NR_kexec_file_load, SyS_kexec_file_load) +EBPF_SYSCALL(__NR_seccomp, SyS_seccomp) +EBPF_SYSCALL(__NR_bpf, SyS_bpf) +EBPF_SYSCALL(__NR_membarrier, SyS_membarrier) +EBPF_SYSCALL_DESC(__NR_readahead, SyS_readahead) +EBPF_SYSCALL_FILE(__NR_memfd_create, SyS_memfd_create) +EBPF_SYSCALL(__NR_mincore, SyS_mincore) +EBPF_SYSCALL(__NR_mlock, SyS_mlock) +EBPF_SYSCALL(__NR_mlock2, SyS_mlock2) +EBPF_SYSCALL(__NR_munlock, SyS_munlock) +EBPF_SYSCALL(__NR_mlockall, SyS_mlockall) +EBPF_SYSCALL(__NR_munlockall, sys_munlockall) +/* EBPF_SYSCALL(__NR_mmap_pgoff, SyS_mmap_pgoff) */ +EBPF_SYSCALL(__NR_brk, SyS_brk) +EBPF_SYSCALL(__NR_munmap, SyS_munmap) +EBPF_SYSCALL(__NR_remap_file_pages, SyS_remap_file_pages) +EBPF_SYSCALL(__NR_mprotect, SyS_mprotect) +EBPF_SYSCALL(__NR_mremap, SyS_mremap) +EBPF_SYSCALL(__NR_msync, SyS_msync) +EBPF_SYSCALL(__NR_process_vm_readv, SyS_process_vm_readv) +EBPF_SYSCALL(__NR_process_vm_writev, SyS_process_vm_writev) +/* EBPF_SYSCALL_DESC(__NR_fadvise64_64, SyS_fadvise64_64) */ +EBPF_SYSCALL_DESC(__NR_fadvise64, SyS_fadvise64) +EBPF_SYSCALL(__NR_madvise, SyS_madvise) +EBPF_SYSCALL_FILE(__NR_swapoff, SyS_swapoff) +EBPF_SYSCALL_FILE(__NR_swapon, SyS_swapon) +EBPF_SYSCALL(__NR_set_mempolicy, SyS_set_mempolicy) +EBPF_SYSCALL(__NR_migrate_pages, SyS_migrate_pages) +EBPF_SYSCALL(__NR_get_mempolicy, SyS_get_mempolicy) +EBPF_SYSCALL(__NR_mbind, SyS_mbind) +EBPF_SYSCALL(__NR_move_pages, SyS_move_pages) +EBPF_SYSCALL_DESC(__NR_close, SyS_close) +EBPF_SYSCALL_FILE(__NR_truncate, SyS_truncate) +EBPF_SYSCALL_DESC(__NR_ftruncate, SyS_ftruncate) +EBPF_SYSCALL_DESC(__NR_fallocate, SyS_fallocate) +EBPF_SYSCALL_FILEAT(__NR_faccessat, SyS_faccessat) +EBPF_SYSCALL_FILE(__NR_access, SyS_access) +EBPF_SYSCALL_FILE(__NR_chdir, SyS_chdir) +EBPF_SYSCALL_DESC(__NR_fchdir, SyS_fchdir) +EBPF_SYSCALL_FILE(__NR_chroot, SyS_chroot) +EBPF_SYSCALL_DESC(__NR_fchmod, SyS_fchmod) +EBPF_SYSCALL_FILEAT(__NR_fchmodat, SyS_fchmodat) +EBPF_SYSCALL_FILE(__NR_chmod, SyS_chmod) +EBPF_SYSCALL_FILEAT(__NR_fchownat, SyS_fchownat) +EBPF_SYSCALL_FILE(__NR_chown, SyS_chown) +EBPF_SYSCALL_FILE(__NR_lchown, SyS_lchown) +EBPF_SYSCALL_DESC(__NR_fchown, SyS_fchown) +EBPF_SYSCALL_FILE(__NR_open, SyS_open) +EBPF_SYSCALL_FILEAT(__NR_openat, SyS_openat) +EBPF_SYSCALL_FILE(__NR_creat, SyS_creat) +EBPF_SYSCALL(__NR_vhangup, sys_vhangup) +EBPF_SYSCALL_DESC(__NR_lseek, SyS_lseek) +/* EBPF_SYSCALL_DESC(__NR_llseek, SyS_llseek) */ +EBPF_SYSCALL_DESC(__NR_read, SyS_read) +EBPF_SYSCALL_DESC(__NR_write, SyS_write) +EBPF_SYSCALL_DESC(__NR_pread64, SyS_pread64) +EBPF_SYSCALL_DESC(__NR_pwrite64, SyS_pwrite64) +EBPF_SYSCALL_DESC(__NR_readv, SyS_readv) +EBPF_SYSCALL_DESC(__NR_writev, SyS_writev) +EBPF_SYSCALL_DESC(__NR_preadv, SyS_preadv) +EBPF_SYSCALL_DESC(__NR_pwritev, SyS_pwritev) +EBPF_SYSCALL_DESC(__NR_sendfile, SyS_sendfile) +/* EBPF_SYSCALL_DESC(__NR_sendfile64, SyS_sendfile64) */ +EBPF_SYSCALL_FILE(__NR_stat, SyS_stat) +EBPF_SYSCALL_FILE(__NR_lstat, SyS_lstat) +EBPF_SYSCALL_DESC(__NR_fstat, SyS_fstat) +EBPF_SYSCALL_FILE(__NR_stat, SyS_newstat) +EBPF_SYSCALL_FILE(__NR_lstat, SyS_newlstat) +EBPF_SYSCALL_DESC(__NR_newfstatat, SyS_newfstatat) +EBPF_SYSCALL_DESC(__NR_fstat, SyS_newfstat) +EBPF_SYSCALL_FILEAT(__NR_readlinkat, SyS_readlinkat) +EBPF_SYSCALL_FILE(__NR_readlink, SyS_readlink) +EBPF_SYSCALL_FILE(__NR_uselib, SyS_uselib) +EBPF_SYSCALL_FILE(__NR_execve, SyS_execve) +EBPF_SYSCALL_FILEAT(__NR_execveat, SyS_execveat) +EBPF_SYSCALL(__NR_pipe2, SyS_pipe2) +EBPF_SYSCALL(__NR_pipe, SyS_pipe) +EBPF_SYSCALL_FILEAT(__NR_mknodat, SyS_mknodat) +EBPF_SYSCALL_FILE(__NR_mknod, SyS_mknod) +EBPF_SYSCALL_FILEAT(__NR_mkdirat, SyS_mkdirat) +EBPF_SYSCALL_FILE(__NR_mkdir, SyS_mkdir) +EBPF_SYSCALL_FILE(__NR_rmdir, SyS_rmdir) +EBPF_SYSCALL_FILEAT(__NR_unlinkat, SyS_unlinkat) +EBPF_SYSCALL_FILE(__NR_unlink, SyS_unlink) +/* WARNING non-standard API */ +EBPF_SYSCALL_FILE(__NR_symlinkat, SyS_symlinkat) +EBPF_SYSCALL_FILE(__NR_symlink, SyS_symlink) +EBPF_SYSCALL_FILEAT(__NR_linkat, SyS_linkat) +EBPF_SYSCALL_FILE(__NR_link, SyS_link) +EBPF_SYSCALL_FILEAT(__NR_renameat2, SyS_renameat2) +EBPF_SYSCALL_FILEAT(__NR_renameat, SyS_renameat) +EBPF_SYSCALL_FILE(__NR_rename, SyS_rename) +EBPF_SYSCALL_DESC(__NR_fcntl, SyS_fcntl) +EBPF_SYSCALL_DESC(__NR_ioctl, SyS_ioctl) +/* EBPF_SYSCALL_DESC(__NR_old_readdir, SyS_old_readdir) */ +EBPF_SYSCALL_DESC(__NR_getdents, SyS_getdents) +EBPF_SYSCALL_DESC(__NR_getdents64, SyS_getdents64) +EBPF_SYSCALL(__NR_select, SyS_select) +EBPF_SYSCALL(__NR_pselect6, SyS_pselect6) +EBPF_SYSCALL(__NR_poll, SyS_poll) +EBPF_SYSCALL(__NR_ppoll, SyS_ppoll) +EBPF_SYSCALL(__NR_getcwd, SyS_getcwd) +EBPF_SYSCALL_DESC(__NR_dup3, SyS_dup3) +EBPF_SYSCALL_DESC(__NR_dup2, SyS_dup2) +EBPF_SYSCALL_DESC(__NR_dup, SyS_dup) +EBPF_SYSCALL(__NR_sysfs, SyS_sysfs) +/* EBPF_SYSCALL_FILE(__NR_umount, SyS_umount) */ +/* EBPF_SYSCALL_FILE(__NR_oldumount, SyS_oldumount) */ +EBPF_SYSCALL_FILE(__NR_mount, SyS_mount) +EBPF_SYSCALL_FILE(__NR_pivot_root, SyS_pivot_root) +EBPF_SYSCALL_FILE(__NR_setxattr, SyS_setxattr) +EBPF_SYSCALL_FILE(__NR_lsetxattr, SyS_lsetxattr) +EBPF_SYSCALL_DESC(__NR_fsetxattr, SyS_fsetxattr) +EBPF_SYSCALL_FILE(__NR_getxattr, SyS_getxattr) +EBPF_SYSCALL_FILE(__NR_lgetxattr, SyS_lgetxattr) +EBPF_SYSCALL_DESC(__NR_fgetxattr, SyS_fgetxattr) +EBPF_SYSCALL_FILE(__NR_listxattr, SyS_listxattr) +EBPF_SYSCALL_FILE(__NR_llistxattr, SyS_llistxattr) +EBPF_SYSCALL_DESC(__NR_flistxattr, SyS_flistxattr) +EBPF_SYSCALL_FILE(__NR_removexattr, SyS_removexattr) +EBPF_SYSCALL_FILE(__NR_lremovexattr, SyS_lremovexattr) +EBPF_SYSCALL_DESC(__NR_fremovexattr, SyS_fremovexattr) +EBPF_SYSCALL_DESC(__NR_vmsplice, SyS_vmsplice) +EBPF_SYSCALL_DESC(__NR_splice, SyS_splice) +EBPF_SYSCALL_DESC(__NR_tee, SyS_tee) +EBPF_SYSCALL(__NR_sync, sys_sync) +EBPF_SYSCALL_DESC(__NR_syncfs, SyS_syncfs) +EBPF_SYSCALL_DESC(__NR_fsync, SyS_fsync) +EBPF_SYSCALL_DESC(__NR_fdatasync, SyS_fdatasync) +EBPF_SYSCALL_DESC(__NR_sync_file_range, SyS_sync_file_range) +/* EBPF_SYSCALL_DESC(__NR_sync_file_range2, SyS_sync_file_range2) */ +EBPF_SYSCALL_FILE(__NR_utime, SyS_utime) +EBPF_SYSCALL_FILEAT(__NR_utimensat, SyS_utimensat) +EBPF_SYSCALL_FILEAT(__NR_futimesat, SyS_futimesat) +EBPF_SYSCALL_FILE(__NR_utimes, SyS_utimes) +EBPF_SYSCALL_FILE(__NR_statfs, SyS_statfs) +/* EBPF_SYSCALL_FILE(__NR_statfs64, SyS_statfs64) */ +EBPF_SYSCALL_DESC(__NR_fstatfs, SyS_fstatfs) +/* EBPF_SYSCALL_DESC(__NR_fstatfs64, SyS_fstatfs64) */ +EBPF_SYSCALL(__NR_ustat, SyS_ustat) +/* EBPF_SYSCALL(__NR_bdflush, SyS_bdflush) */ +EBPF_SYSCALL(__NR_inotify_init1, SyS_inotify_init1) +EBPF_SYSCALL(__NR_inotify_init, sys_inotify_init) +EBPF_SYSCALL_DESC(__NR_inotify_add_watch, SyS_inotify_add_watch) +EBPF_SYSCALL_DESC(__NR_inotify_rm_watch, SyS_inotify_rm_watch) +EBPF_SYSCALL(__NR_fanotify_init, SyS_fanotify_init) +EBPF_SYSCALL_DESC(__NR_fanotify_mark, SyS_fanotify_mark) +EBPF_SYSCALL(__NR_epoll_create1, SyS_epoll_create1) +EBPF_SYSCALL(__NR_epoll_create, SyS_epoll_create) +EBPF_SYSCALL_DESC(__NR_epoll_ctl, SyS_epoll_ctl) +EBPF_SYSCALL_DESC(__NR_epoll_wait, SyS_epoll_wait) +EBPF_SYSCALL_DESC(__NR_epoll_pwait, SyS_epoll_pwait) +EBPF_SYSCALL_DESC(__NR_signalfd4, SyS_signalfd4) +EBPF_SYSCALL_DESC(__NR_signalfd, SyS_signalfd) +EBPF_SYSCALL(__NR_timerfd_create, SyS_timerfd_create) +EBPF_SYSCALL_DESC(__NR_timerfd_settime, SyS_timerfd_settime) +EBPF_SYSCALL_DESC(__NR_timerfd_gettime, SyS_timerfd_gettime) +EBPF_SYSCALL(__NR_eventfd2, SyS_eventfd2) +EBPF_SYSCALL(__NR_eventfd, SyS_eventfd) +EBPF_SYSCALL(__NR_userfaultfd, SyS_userfaultfd) +EBPF_SYSCALL(__NR_io_setup, SyS_io_setup) +EBPF_SYSCALL(__NR_io_destroy, SyS_io_destroy) +EBPF_SYSCALL(__NR_io_submit, SyS_io_submit) +EBPF_SYSCALL(__NR_io_cancel, SyS_io_cancel) +EBPF_SYSCALL(__NR_io_getevents, SyS_io_getevents) +EBPF_SYSCALL_DESC(__NR_flock, SyS_flock) +EBPF_SYSCALL_FILEAT(__NR_name_to_handle_at, SyS_name_to_handle_at) +EBPF_SYSCALL_DESC(__NR_open_by_handle_at, SyS_open_by_handle_at) +EBPF_SYSCALL(__NR_quotactl, SyS_quotactl) +EBPF_SYSCALL(__NR_lookup_dcookie, SyS_lookup_dcookie) +EBPF_SYSCALL(__NR_msgget, SyS_msgget) +EBPF_SYSCALL(__NR_msgctl, SyS_msgctl) +EBPF_SYSCALL(__NR_msgsnd, SyS_msgsnd) +EBPF_SYSCALL(__NR_msgrcv, SyS_msgrcv) +EBPF_SYSCALL(__NR_semget, SyS_semget) +EBPF_SYSCALL(__NR_semctl, SyS_semctl) +EBPF_SYSCALL(__NR_semtimedop, SyS_semtimedop) +EBPF_SYSCALL(__NR_semop, SyS_semop) +EBPF_SYSCALL(__NR_shmget, SyS_shmget) +EBPF_SYSCALL(__NR_shmctl, SyS_shmctl) +EBPF_SYSCALL(__NR_shmat, SyS_shmat) +EBPF_SYSCALL(__NR_shmdt, SyS_shmdt) +EBPF_SYSCALL_FILE(__NR_mq_open, SyS_mq_open) +EBPF_SYSCALL_FILE(__NR_mq_unlink, SyS_mq_unlink) +EBPF_SYSCALL(__NR_mq_timedsend, SyS_mq_timedsend) +EBPF_SYSCALL(__NR_mq_timedreceive, SyS_mq_timedreceive) +EBPF_SYSCALL(__NR_mq_notify, SyS_mq_notify) +EBPF_SYSCALL(__NR_mq_getsetattr, SyS_mq_getsetattr) +EBPF_SYSCALL(__NR_add_key, SyS_add_key) +EBPF_SYSCALL(__NR_request_key, SyS_request_key) +EBPF_SYSCALL(__NR_keyctl, SyS_keyctl) +EBPF_SYSCALL(__NR_ioprio_set, SyS_ioprio_set) +EBPF_SYSCALL(__NR_ioprio_get, SyS_ioprio_get) +/* EBPF_SYSCALL(__NR_size_show, sys_size_show) */ +EBPF_SYSCALL(__NR_getrandom, SyS_getrandom) +/* EBPF_SYSCALL(__NR_dmi_field_show, sys_dmi_field_show) */ +/* EBPF_SYSCALL(__NR_dmi_modalias_show, sys_dmi_modalias_show) */ +EBPF_SYSCALL(__NR_socket, SyS_socket) +EBPF_SYSCALL(__NR_socketpair, SyS_socketpair) +EBPF_SYSCALL_DESC(__NR_bind, SyS_bind) +EBPF_SYSCALL_DESC(__NR_listen, SyS_listen) +EBPF_SYSCALL_DESC(__NR_accept4, SyS_accept4) +EBPF_SYSCALL_DESC(__NR_accept, SyS_accept) +EBPF_SYSCALL_DESC(__NR_connect, SyS_connect) +EBPF_SYSCALL_DESC(__NR_getsockname, SyS_getsockname) +EBPF_SYSCALL_DESC(__NR_getpeername, SyS_getpeername) +EBPF_SYSCALL_DESC(__NR_sendto, SyS_sendto) +/* EBPF_SYSCALL_DESC(__NR_send, SyS_send) */ +EBPF_SYSCALL_DESC(__NR_recvfrom, SyS_recvfrom) +/* EBPF_SYSCALL_DESC(__NR_recv, SyS_recv) */ +EBPF_SYSCALL_DESC(__NR_setsockopt, SyS_setsockopt) +EBPF_SYSCALL_DESC(__NR_getsockopt, SyS_getsockopt) +EBPF_SYSCALL_DESC(__NR_shutdown, SyS_shutdown) +EBPF_SYSCALL_DESC(__NR_sendmsg, SyS_sendmsg) +EBPF_SYSCALL_DESC(__NR_sendmmsg, SyS_sendmmsg) +EBPF_SYSCALL_DESC(__NR_recvmsg, SyS_recvmsg) +EBPF_SYSCALL_DESC(__NR_recvmmsg, SyS_recvmmsg) +/* EBPF_SYSCALL(__NR_socketcall, SyS_socketcall) */ +}; diff --git a/src/libstrace/ebpf_syscalls.h b/src/libstrace/ebpf_syscalls.h new file mode 100644 index 000000000..94da673f1 --- /dev/null +++ b/src/libstrace/ebpf_syscalls.h @@ -0,0 +1,70 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ebpf_syscalls.h -- a list of glibc-supported syscalls. + */ + +#ifndef EBPF_SYSCALLS_H +#define EBPF_SYSCALLS_H + +#include + +enum masks_t { + /* syscall returns an fd */ + EM_rdesc = 1 << 0, + /* syscall accepts fd as a first arg */ + EM_desc = 1 << 1, + /* syscall accepts fs path as a first arg */ + EM_file = 1 << 2, + /* syscall accepts dir fd as a first arg and path as a second */ + EM_fileat = 1 << 3, + /* syscall is actual for PMemFile */ + EM_pmemfile = 1 << 4, + EM_kern_all = 1 << 5, + EM_libc_all = 1 << 6, + + EM_ALL = -1, +}; + +struct sc_t { + unsigned num; + const char *num_name; + const char *hlr_name; + unsigned masks; +}; + +/* Currently glibc does not have appropriate macro for it */ +enum { SC_TBL_SIZE = 1024 }; +extern struct sc_t sc_tbl[SC_TBL_SIZE]; + +#endif /* EBPF_SYSCALLS_H */ diff --git a/src/libstrace/generate_ebpf.c b/src/libstrace/generate_ebpf.c new file mode 100644 index 000000000..b188ca603 --- /dev/null +++ b/src/libstrace/generate_ebpf.c @@ -0,0 +1,334 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * generate_ebpf.h -- generate_ebpf() function + */ + +#include +#include + +#include "main.h" +#include "utils.h" +#include "ebpf_syscalls.h" +#include "generate_ebpf.h" + +const char *ebpf_trace_h_file = "trace.h"; + +const char *ebpf_head_file = "trace_head.c"; +const char *ebpf_libc_tmpl_file = "trace_libc_tmpl.c"; +const char *ebpf_file_tmpl_file = "trace_file_tmpl.c"; +const char *ebpf_fileat_tmpl_file = "trace_fileat_tmpl.c"; +const char *ebpf_kern_tmpl_file = "trace_kern_tmpl.c"; + +const char *ebpf_tp_all_file = "trace_tp_all.c"; + +/* + * This function returns syscall number by name according to libc knowledge. + */ +static int +get_sc_num(const char *sc_name) +{ + for (int i = 0; i < SC_TBL_SIZE; i++) { + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (!strcasecmp(sc_name, sc_tbl[i].hlr_name)) + return i; + } + + return -1; +} + +/* + * This function generates eBPF handler for syscalls which are known to glibc. + */ +static void +generate_ebpf_kp_libc_all(FILE *ts) +{ + char *text = NULL; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (EM_file == (EM_file & sc_tbl[i].masks)) + text = load_file(ebpf_file_tmpl_file); + else if (EM_fileat == (EM_fileat & sc_tbl[i].masks)) + text = load_file(ebpf_fileat_tmpl_file); + else + text = load_file(ebpf_libc_tmpl_file); + + str_replace_all(&text, "SYSCALL_NR", + sc_tbl[i].num_name); + str_replace_all(&text, "SYSCALL_NAME", + sc_tbl[i].hlr_name); + + fwrite(text, strlen(text), 1, ts); + + free(text); text = NULL; + } +} + +/* XXX HACK: this syscall is exported by kernel twice. */ +static unsigned SyS_sigsuspend = 0; + +/* + * This function generates universal default eBPF syscall handler. + * + * Primer purpose of generated handler - new and unknown syscalls. + */ +static void +generate_ebpf_kp_kern_all(FILE *ts) +{ + char *text = NULL; + + + char *line = NULL; + size_t len = 0; + ssize_t read; + + FILE *in = fopen(debug_tracing_aff, "r"); + + if (NULL == in) { + fprintf(stderr, "%s: ERROR: '%m'\n", __func__); + return; + } + + while ((read = getline(&line, &len, in)) != -1) { + int sc_num; + + if (!is_a_sc(line, read - 1)) + continue; + + line [read - 1] = '\0'; + + /* XXX HACK: this syscall is exported by kernel twice. */ + if (!strcasecmp("SyS_sigsuspend", line)) { + if (SyS_sigsuspend) + continue; + + SyS_sigsuspend ++; + } + + sc_num = get_sc_num(line); + + /* Some optimization for glibc-supported syscalls */ + if (0 <= sc_num) { + if (EM_file == (EM_file & sc_tbl[sc_num].masks)) + text = load_file(ebpf_file_tmpl_file); + else if (EM_fileat == + (EM_fileat & sc_tbl[sc_num].masks)) + text = load_file(ebpf_fileat_tmpl_file); + else + text = load_file(ebpf_libc_tmpl_file); + + str_replace_all(&text, "SYSCALL_NR", + sc_tbl[sc_num].num_name); + } else { + text = load_file(ebpf_kern_tmpl_file); + } + + str_replace_all(&text, "SYSCALL_NAME", line); + + fwrite(text, strlen(text), 1, ts); + + free(text); text = NULL; + } + + free(line); + fclose(in); +} + +/* + * This function generates eBPF syscall handlers specific for syscalls with + * filename in arguments. + */ +static void +generate_ebpf_kp_file(FILE *ts) +{ + char *text = NULL; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (EM_file != (EM_file & sc_tbl[i].masks)) + continue; + + text = load_file(ebpf_file_tmpl_file); + + str_replace_all(&text, "SYSCALL_NR", + sc_tbl[i].num_name); + str_replace_all(&text, "SYSCALL_NAME", + sc_tbl[i].hlr_name); + + fwrite(text, strlen(text), 1, ts); + + free(text); text = NULL; + } +} + +/* + * This function generates eBPF syscall handlers specific for syscalls with + * relative filename in arguments. + */ +static void +generate_ebpf_kp_fileat(FILE *ts) +{ + char *text = NULL; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (EM_fileat != (EM_fileat & sc_tbl[i].masks)) + continue; + + text = load_file(ebpf_fileat_tmpl_file); + + str_replace_all(&text, "SYSCALL_NR", + sc_tbl[i].num_name); + str_replace_all(&text, "SYSCALL_NAME", + sc_tbl[i].hlr_name); + + fwrite(text, strlen(text), 1, ts); + + free(text); text = NULL; + } +} + +/* + * This function generates eBPF syscall handlers specific for syscalls with + * file-descriptor in arguments. + */ +static void +generate_ebpf_kp_desc(FILE *ts) +{ + char *text = NULL; + + for (unsigned i = 0; i < SC_TBL_SIZE; i++) { + if (NULL == sc_tbl[i].hlr_name) + continue; + + if (EM_desc != (EM_desc & sc_tbl[i].masks)) + continue; + + text = load_file(ebpf_libc_tmpl_file); + + str_replace_all(&text, "SYSCALL_NR", + sc_tbl[i].num_name); + str_replace_all(&text, "SYSCALL_NAME", + sc_tbl[i].hlr_name); + + fwrite(text, strlen(text), 1, ts); + + free(text); text = NULL; + } +} + +/* + * This function generates eBPF syscall handlers specific for syscalls which + * operate on files. + */ +static void +generate_ebpf_kp_pmemfile(FILE *ts) +{ + generate_ebpf_kp_file(ts); + generate_ebpf_kp_desc(ts); + generate_ebpf_kp_fileat(ts); +} + +/* + * This function generates eBPF syscall handler specific for tracepoint + * feature. + */ +static void +generate_ebpf_tp_all(FILE *ts) +{ + char *text = load_file(ebpf_tp_all_file); + + fwrite(text, strlen(text), 1, ts); + + free(text); text = NULL; +} + +/* + * This function parses and process expression. + */ +char * +generate_ebpf() +{ + char *text = NULL; + size_t text_size = 0; + + FILE *ts = open_memstream(&text, &text_size); + + /* Let's from header */ + char *head = load_file(ebpf_head_file); + fwrite(head, strlen(head), 1, ts); + free(head); head = NULL; + + if (NULL == args.expr) + goto DeFault; + + if (!strcasecmp(args.expr, "trace=kp-libc-all")) { + generate_ebpf_kp_libc_all(ts); + goto out; + } else if (!strcasecmp(args.expr, "trace=kp-kern-all")) { + generate_ebpf_kp_kern_all(ts); + goto out; + } else if (!strcasecmp(args.expr, "trace=kp-file")) { + generate_ebpf_kp_file(ts); + goto out; + } else if (!strcasecmp(args.expr, "trace=kp-desc")) { + generate_ebpf_kp_desc(ts); + goto out; + } else if (!strcasecmp(args.expr, "trace=kp-pmemfile")) { + generate_ebpf_kp_pmemfile(ts); + goto out; + } else if (!strcasecmp(args.expr, "trace=tp-all")) { + generate_ebpf_tp_all(ts); + goto out; + } + +DeFault: + fprintf(stderr, + "%s: Default expression 'trace=kp-kern-all' was chosen." + " If you would like some speed improvment think about" + " 'trace=kp-libc-all'.\n", __func__); + generate_ebpf_kp_kern_all(ts); + +out: + fclose(ts); + return text; +} diff --git a/src/libstrace/generate_ebpf.h b/src/libstrace/generate_ebpf.h new file mode 100644 index 000000000..659ed8a5a --- /dev/null +++ b/src/libstrace/generate_ebpf.h @@ -0,0 +1,72 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * generate_ebpf.h -- generate_ebpf() function + */ + +#ifndef GENERATE_EBPF_H +#define GENERATE_EBPF_H + +extern const char *ebpf_trace_h_file; + +extern const char *ebpf_head_file; +extern const char *ebpf_libc_tmpl_file; +extern const char *ebpf_file_tmpl_file; +extern const char *ebpf_fileat_tmpl_file; +extern const char *ebpf_kern_tmpl_file; +extern const char *ebpf_tp_all_file; + +char *generate_ebpf(void); + +extern const char _binary_trace_fileat_tmpl_c_size[]; +extern const char _binary_trace_fileat_tmpl_c_start[]; + +extern const char _binary_trace_file_tmpl_c_size[]; +extern const char _binary_trace_file_tmpl_c_start[]; + +extern const char _binary_trace_head_c_size[]; +extern const char _binary_trace_head_c_start[]; + +extern const char _binary_trace_h_size[]; +extern const char _binary_trace_h_start[]; + +extern const char _binary_trace_kern_tmpl_c_size[]; +extern const char _binary_trace_kern_tmpl_c_start[]; + +extern const char _binary_trace_libc_tmpl_c_size[]; +extern const char _binary_trace_libc_tmpl_c_start[]; + +extern const char _binary_trace_tp_all_c_size[]; +extern const char _binary_trace_tp_all_c_start[]; + +#endif diff --git a/src/libstrace/main.h b/src/libstrace/main.h new file mode 100644 index 000000000..757a4a3f3 --- /dev/null +++ b/src/libstrace/main.h @@ -0,0 +1,79 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * main.h -- application-wide stuff + */ + +#ifndef MAIN_H +#define MAIN_H + +#include +#include +#include + +enum out_fmt { + EOF_HEX = 0, + EOF_BIN, + EOF_STRACE, + + + EOF_QTY, /* Should be last */ +}; + +struct args_t { + bool timestamp; + bool failed; + bool command; + + unsigned debug; + + pid_t pid; + const char *out_fn; + const char *out_fmt_str; + char out_sep_ch; + const char *expr; +/* + * XXX Set this variable using args and + * command line options + */ + unsigned pr_arr_max; +}; + +extern struct args_t args; +extern bool cont; + +/* Output log */ +extern FILE *out; +extern enum out_fmt out_fmt; + +#endif /* MAIN_H */ diff --git a/src/libstrace/print_event_cb.c b/src/libstrace/print_event_cb.c new file mode 100644 index 000000000..fba31ecb5 --- /dev/null +++ b/src/libstrace/print_event_cb.c @@ -0,0 +1,506 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * print_event_cb.c -- print_event_cb() function + */ + +#include +#include +#include /* For SYS_xxx definitions */ + +#include +#include + +#include "main.h" +#include "ebpf_syscalls.h" +#include "print_event_cb.h" + +/* + * XXX A bit of black magic to have some US <-> KS portability. + * PLEASE do not add any other includes afters this comment. + */ +typedef __s32 s32; +typedef __u32 u32; +typedef __s64 s64; +typedef __u64 u64; + +enum { TASK_COMM_LEN = 16 }; + +#include "trace.h" + +static unsigned long long start_ts_nsec = 0; + +const char *sc_num2str(const int64_t sc_num); +void fprint_i64(FILE *f, uint64_t x); +char b2hex(char b); + +/* + * Process event. + * + * Also it can be a good idea to use cb_cookie for args, for out or for static + * variable above. + */ + +/* + * Print logs header. + * + * XXX A blank for human-readable strace-like logs + */ +static void +print_header_strace(int argc, char *argv[]) +{ + if (args.timestamp) + fprintf(out, "%-14s", "TIME(s)"); + + fprintf(out, "%-7s %-6s %4s %3s %s\n", + "SYSCALL", "PID_TID", "ARG1", "ERR", "PATH"); + + (void) argc; + (void) argv; +} + +/* + * Print syscall's log entry. + * + * XXX A blank for human-readable strace-like logs + */ +static void +print_event_strace(void *cb_cookie, void *data, int size) +{ + s64 res, err; + struct ev_dt_t *const event = data; + + /* XXX Check size arg */ + (void) size; + + /* split return value into result and errno */ + res = (event->ret >= 0) ? event->ret : -1; + err = (event->ret >= 0) ? 0 : -event->ret; + + if (start_ts_nsec == 0) + start_ts_nsec = event->start_ts_nsec; + + if (args.failed && (event->ret >= 0)) + return; + + if (args.timestamp) { + unsigned long long delta_nsec = + event->finish_ts_nsec - start_ts_nsec; + fprintf(out, "%-14.9f", + (double)((double)delta_nsec / 1000000000.0)); + } + + if (0 <= event->sc_id) + fprintf(out, "%-7s ", sc_num2str(event->sc_id)); + else + fprintf(out, "%-7s ", event->sc_name + 4); + + fprintf(out, "%-6llu %4lld %3lld %s\n", + event->pid_tid, res, err, event->fl_nm); + + (void) cb_cookie; +} + +/* ** Hex logs ** */ + +/* + * This function prints header for hexadecimal logs. + */ +static void +print_header_hex(int argc, char *argv[]) +{ + for (int i = 0; i < argc; i++) { + if (i + 1 != argc) + fprintf(out, "%s%c", argv[i], args.out_sep_ch); + else + fprintf(out, "%s\n", argv[i]); + } + + fprintf(out, "%s%c", "PID_TID", args.out_sep_ch); + + if (args.timestamp) + fprintf(out, "%s%c", "TIME(nsec)", args.out_sep_ch); + + fprintf(out, "%s%c", "ERR", args.out_sep_ch); + fprintf(out, "%s%c", "RES", args.out_sep_ch); + fprintf(out, "%s%c", "SYSCALL", args.out_sep_ch); + + fprintf(out, "%s%c", "ARG1", args.out_sep_ch); + fprintf(out, "%s%c", "ARG2", args.out_sep_ch); + fprintf(out, "%s%c", "ARG3", args.out_sep_ch); + fprintf(out, "%s%c", "ARG4", args.out_sep_ch); + fprintf(out, "%s%c", "ARG5", args.out_sep_ch); + fprintf(out, "%s%c", "ARG6", args.out_sep_ch); + + /* For COMM and like */ + fprintf(out, "%s", "AUX_DATA"); + + fprintf(out, "\n"); +} + +/* + * This function returnss character corresponding to hexadecimal digit. + */ +char +b2hex(char b) +{ + switch (b & 0xF) { + case 0: return '0'; + case 1: return '1'; + case 2: return '2'; + case 3: return '3'; + case 4: return '4'; + case 5: return '5'; + case 6: return '6'; + case 7: return '7'; + case 8: return '8'; + case 9: return '9'; + case 0xA: return 'A'; + case 0xB: return 'B'; + case 0xC: return 'C'; + case 0xD: return 'D'; + case 0xE: return 'E'; + case 0xF: return 'F'; + } + + return '?'; +} + +/* + * This function prints 64-bit integer in hexadecimal forn in stream. + */ +void +fprint_i64(FILE *f, uint64_t x) +{ + char str[2 * sizeof(x)]; + + const char *const px = (const char *)&x; + + for (unsigned i = 0; i < sizeof(x); i++) { + str[sizeof(str) - 1 - 2 * i - 0] = b2hex(px[i]); + str[sizeof(str) - 1 - 2 * i - 1] = b2hex(px[i]>>4); + } + + fwrite(str, sizeof(str), 1, f); +} + +/* + * This function returnss syscall's name by number + */ +const char * +sc_num2str(const int64_t sc_num) +{ + static char buf[32]; + + if ((0 <= sc_num) && (SC_TBL_SIZE > sc_num)) { + if (NULL == sc_tbl[sc_num].hlr_name) + goto out; + + return sc_tbl[sc_num].hlr_name + 4 /* strlen("sys_") */; + } + +out: + snprintf(buf, sizeof(buf), "sys_%ld", sc_num); + + return buf; +} + +/* + * This function prints syscall's logs entry in stream. + * + * WARNING + * + * PLEASE don't use *printf() calls because it will slow down this + * function too much. + */ +static void +print_event_hex(void *cb_cookie, void *data, int size) +{ + s64 res, err; + struct ev_dt_t *const event = data; + + /* XXX Check size arg */ + (void) size; + + /* split return value into result and errno */ + res = (event->ret >= 0) ? event->ret : -1; + err = (event->ret >= 0) ? 0 : -event->ret; + + if (start_ts_nsec == 0) + start_ts_nsec = event->start_ts_nsec; + + if (args.failed && (event->ret >= 0)) + return; + + fprint_i64(out, event->pid_tid); + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + if (args.timestamp) { + unsigned long long delta_nsec = + event->finish_ts_nsec - start_ts_nsec; + + fprint_i64(out, delta_nsec); + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + } + + fprint_i64(out, (uint64_t)err); + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + fprint_i64(out, (uint64_t)res); + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + if (event->sc_id >= 0) + fwrite(sc_num2str(event->sc_id), + strlen(sc_num2str(event->sc_id)), + 1, out); + else + fwrite(event->sc_name + 4, + strlen(event->sc_name + 4), + 1, out); + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + /* "ARG1" */ + switch (event->sc_id) { + case -2: + fprint_i64(out, (uint64_t)event->arg_1); + break; + + case -1: + /* + * XXX Something unexpected happened. Ma be we should issue a + * warning or do something better + */ + break; + + default: + if (EM_file == (EM_file & sc_tbl[event->sc_id].masks)) + fwrite(event->fl_nm, strlen(event->fl_nm), 1, out); + else if (EM_desc == (EM_desc & sc_tbl[event->sc_id].masks)) + fprint_i64(out, (uint64_t)event->arg_1); + else if (EM_fileat == (EM_fileat & sc_tbl[event->sc_id].masks)) + fprint_i64(out, (uint64_t)event->arg_1); + else { + /* + * XXX We don't have any idea about this syscall args. + * May be we should expand our table with additional + * syscall descriptions. + */ + } + break; + } + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + /* "ARG2" */ + switch (event->sc_id) { + case -2: + fprint_i64(out, (uint64_t)event->arg_2); + break; + + case -1: + /* + * XXX Something unexpected happened. Ma be we should issue a + * warning or do something better + */ + break; + + default: + if (EM_fileat == (EM_fileat & sc_tbl[event->sc_id].masks)) + fwrite(event->fl_nm, strlen(event->fl_nm), 1, out); + break; + } + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + /* "ARG3" */ + switch (event->sc_id) { + case -2: + fprint_i64(out, (uint64_t)event->arg_3); + break; + + case -1: + /* + * XXX Something unexpected happened. Ma be we should issue a + * warning or do something better + */ + break; + + default: + break; + } + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + /* "ARG4" */ + switch (event->sc_id) { + case -2: + fprint_i64(out, (uint64_t)event->arg_4); + break; + + case -1: + /* + * XXX Something unexpected happened. Ma be we should issue a + * warning or do something better + */ + break; + + default: + break; + } + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + /* "ARG5" */ + switch (event->sc_id) { + case -2: + fprint_i64(out, (uint64_t)event->arg_5); + break; + + case -1: + /* + * XXX Something unexpected happened. Ma be we should issue a + * warning or do something better + */ + break; + + default: + break; + } + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + /* "ARG6" */ + switch (event->sc_id) { + case -2: + fprint_i64(out, (uint64_t)event->arg_6); + break; + + case -1: + /* + * XXX Something unexpected happened. Ma be we should issue a + * warning or do something better + */ + break; + + default: + break; + } + fwrite(&args.out_sep_ch, sizeof(args.out_sep_ch), 1, out); + + /* "AUX_DATA". For COMM and like. XXX */ + /* fwrite(event->comm, strlen(event->comm), 1, out); */ + fwrite("\n", 1, 1, out); + + (void) cb_cookie; +} + +/* ** Binary logs ** */ + +/* + * This function writes header in stream. + */ +static void +print_header_bin(int argc, char *argv[]) +{ + size_t argv_size = 0; + + struct ev_dt_t d = { .sc_id = -1 }; + + const size_t d_size = sizeof(d); + d.header.argc = argc; + + /* + * here we assume that our command line will not be longer + * than 255 bytes + */ + for (int i = 0; i < argc; i++) { + strcpy(d.header.argv + argv_size, argv[i]); + argv_size += strlen(argv[i]) + 1; + } + + if (1 != fwrite(&d_size, sizeof(d_size), 1, out)) { + /* ERROR */ + cont = false; + } + + if (1 != fwrite(&d, sizeof(d), 1, out)) { + /* ERROR */ + cont = false; + } +} + +/* + * This function writes syscall's log entry in stream + */ +static void +print_event_bin(void *cb_cookie, void *data, int size) +{ + struct ev_dt_t *const event = data; + + /* XXX Check size arg */ + + if (args.failed && (event->ret >= 0)) + return; + + if (1 != fwrite(data, (size_t)size, 1, out)) { + /* ERROR */ + cont = false; + } + + (void) cb_cookie; +} + +/* + * This function parsess log's type + */ +enum out_fmt +out_fmt_str2enum(const char *str) +{ + if (!strcasecmp("bin", str) || !strcasecmp("binary", str)) + return EOF_BIN; + + if (!strcasecmp("strace", str)) + return EOF_STRACE; + + if (!strcasecmp("hex", str)) + return EOF_HEX; + + return EOF_HEX; +} + +perf_reader_raw_cb print_event_cb[EOF_QTY + 1] = { + [EOF_HEX] = print_event_hex, + [EOF_BIN] = print_event_bin, + [EOF_STRACE] = print_event_strace, +}; + +print_header_t print_header[EOF_QTY + 1] = { + [EOF_HEX] = print_header_hex, + [EOF_BIN] = print_header_bin, + [EOF_STRACE] = print_header_strace, +}; diff --git a/src/libstrace/print_event_cb.h b/src/libstrace/print_event_cb.h new file mode 100644 index 000000000..a44ea145d --- /dev/null +++ b/src/libstrace/print_event_cb.h @@ -0,0 +1,54 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * print_event_cb.h -- print_event_cb() function + */ + +#ifndef PRINT_EVENT_CB_H +#define PRINT_EVENT_CB_H + +#include + +#include + +#include "main.h" + +/* process event */ +extern perf_reader_raw_cb print_event_cb[EOF_QTY + 1]; + +typedef void (*print_header_t)(int argc, char *argv[]); +extern print_header_t print_header[EOF_QTY + 1]; + +enum out_fmt out_fmt_str2enum(const char *str); + +#endif /* PRINT_EVENT_CB_H */ diff --git a/src/libstrace/utils.c b/src/libstrace/utils.c new file mode 100644 index 000000000..616413d53 --- /dev/null +++ b/src/libstrace/utils.c @@ -0,0 +1,365 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * utils.c -- utility functions + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "main.h" +#include "utils.h" +#include "generate_ebpf.h" + +/* + * This function loads text file from disk and return malloc-ed, + * null-terminated string + */ +char * +load_file_from_disk(const char *const fn) +{ + int fd; + long res; + char *buf = NULL; + struct stat st; + + fd = open(fn, O_RDONLY); + + if (fd == -1) + return buf; + + res = fstat(fd, &st); + + if (res == -1) + goto out; + + buf = calloc(1, (size_t)st.st_size + 1); + + res = read(fd, buf, (size_t)st.st_size); + + if (st.st_size != res) { + free(buf); + buf = NULL; + } + +out: + close(fd); + + return buf; +} + +/* + * Export embedded trace.h to file + */ +void save_trace_h(void) { + int fd; + + long res = access(ebpf_trace_h_file, R_OK); + + if (res == 0) + return; + + fd = open(ebpf_trace_h_file, O_WRONLY | O_CREAT, 0666); + + if (fd == -1) + return; + + res = write(fd, _binary_trace_h_start, (size_t)_binary_trace_h_size); + + close(fd); +} + +/* + * This function loads 'virtual' file. + */ +char * +load_file(const char *const fn) +{ + char *f = load_file_from_disk(fn); + + if (NULL != f) + return f; + + /* fallback to embedded ones */ + if (0 == strcmp(ebpf_head_file, fn)) { + return strndup(_binary_trace_head_c_start, + (size_t)_binary_trace_head_c_size); + } else if (0 == strcmp(ebpf_libc_tmpl_file, fn)) { + return strndup(_binary_trace_libc_tmpl_c_start, + (size_t)_binary_trace_libc_tmpl_c_size); + } else if (0 == strcmp(ebpf_file_tmpl_file, fn)) { + return strndup(_binary_trace_file_tmpl_c_start, + (size_t)_binary_trace_file_tmpl_c_size); + } else if (0 == strcmp(ebpf_fileat_tmpl_file, fn)) { + return strndup(_binary_trace_fileat_tmpl_c_start, + (size_t)_binary_trace_fileat_tmpl_c_size); + } else if (0 == strcmp(ebpf_kern_tmpl_file, fn)) { + return strndup(_binary_trace_kern_tmpl_c_start, + (size_t)_binary_trace_kern_tmpl_c_size); + } else if (0 == strcmp(ebpf_tp_all_file, fn)) { + return strndup(_binary_trace_tp_all_c_start, + (size_t)_binary_trace_tp_all_c_size); + } else if (0 == strcmp(ebpf_trace_h_file, fn)) { + return strndup(_binary_trace_h_start, + (size_t)_binary_trace_h_size); + } + + return NULL; +} + +/* + * This function reads status of eBPF JIT compiler. + */ +static int +load_bpf_jit_status(void) +{ + int fd, err_no; + long res; + char buf[16]; + + fd = open("/proc/sys/net/core/bpf_jit_enable", O_RDONLY); + + if (fd == -1) + return -1; + + errno = 0; + res = read(fd, buf, sizeof(buf)); + + err_no = errno; + close(fd); + errno = err_no; + + if (res <= 0) + return -1; + + return atoi(buf); +} + +/* + * This function checks status of eBPF JIT compiler and prints appropriate + * message. + */ +void +check_bpf_jit_status(FILE *file) +{ + int status = load_bpf_jit_status(); + + switch (status) { + case -1: + fprintf(file, + "ERROR:%s: could not read bpf_jit status: '%m'\n", + __func__); + return; + + case 0: + fprintf(file, + "WARNING:%s: DISABLED.\n" + "\tPlease reffer to `man strace.ebpf`," + " section 'Configuration'.\n" + "\tIt will allow to improve performance significantly\n" + "\tand drop appropriate problems.\n", + __func__); + return; + + case 1: + fprintf(file, "INFO:%s: ENABLED.\n", __func__); + return; + + case 2: + fprintf(file, "INFO:%s: DEBUG.\n", __func__); + return; + + default: + fprintf(file, + "WARNING:%s: UNKNOWN. Please notify the author.\n", + __func__); + return; + } +} + + +/* + * This function recognises syscalls among in-kernel functions. + */ +bool +is_a_sc(const char *const line, const ssize_t size) +{ + static const char template[] = "sys_"; + + const size_t template_len = strlen(template); + + if (size <= (ssize_t)template_len) + return false; + + if (strncasecmp(line, template, template_len)) + return false; + + if (line[size - 1] == ']') + return false; + + return true; +} + +const char debug_tracing[] = DEBUG_TRACING; +const char debug_tracing_aff[] = DEBUG_TRACING DT_AFF; + +/* + * This function fetch syscall's list from running kernel + */ +void +get_sc_list(FILE *f, template_t template) +{ + char *line = NULL; + size_t len = 0; + ssize_t read; + + FILE *in = fopen(debug_tracing_aff, "r"); + + if (NULL == in) { + fprintf(stderr, "%s: ERROR: '%m'\n", __func__); + return; + } + + while ((read = getline(&line, &len, in)) != -1) { + if (NULL != template) { + if (!template(line, read - 1)) + continue; + } + + fwrite(line, (size_t)read, 1, f); + } + + free(line); + fclose(in); + fflush(f); +} + +/* + * Replace all occurrence of 'templt' in 'text' with 'str' + */ +void +str_replace_all(char **const text, const char *templt, const char *str) +{ + char *occ; + + const size_t templt_len = strlen(templt); + const size_t str_len = strlen(str); + + while (NULL != (occ = strstr(*text, templt))) { + char *p; + size_t text_len; + + p = *text; + text_len = strlen(p); + + *text = calloc(1, text_len - templt_len + str_len + 1); + + strncpy(*text, p, ((uintptr_t)occ) - ((uintptr_t)p)); + strcat(*text, str); + strcat(*text, occ + templt_len); + + free(p); + } +} + +/* + * This function runs traced command passed through command line. + */ +pid_t +start_command(int argc, char *argv[]) +{ + pid_t pid = -1; + + pid = fork(); + + switch (pid) { + case -1: + break; + + case 0: + /* Wait until parent will be ready */ + /* + * for unknown reason sigwait(SIGCONT) and pause() + * do not success with any signal. + */ + raise(SIGSTOP); + + execvp(argv[0], argv); + exit(errno); + break; + + default: + break; + } + + (void) argc; + return pid; +} + +/* + * SIGCHLD handler. Is used if "command" was provided on command line. + */ +void +sig_chld_handler(int sig, siginfo_t *si, void *unused) +{ + if (si->si_code == CLD_EXITED && args.pid == si->si_pid) { + cont = false; + } + + (void) sig; + (void) unused; +} + +/* + * Generic signal hendler. Is used for notification of traced process about + * parent's death. + */ +void +sig_transmit_handler(int sig, siginfo_t *si, void *unused) +{ + kill(args.pid, SIGSEGV == sig ? SIGHUP : sig); + + cont = false; + + (void) si; + (void) unused; +} diff --git a/src/libstrace/utils.h b/src/libstrace/utils.h new file mode 100644 index 000000000..a40433546 --- /dev/null +++ b/src/libstrace/utils.h @@ -0,0 +1,66 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * utils.h -- utility functions + */ + +#ifndef UTILS_H +#define UTILS_H + +#include +#include + +char *load_file(const char *fn); +char *load_file_from_disk(const char *const fn); +void check_bpf_jit_status(FILE *file); + +void save_trace_h(void); + +typedef bool (*template_t)(const char *line, ssize_t size); +bool is_a_sc(const char *const line, const ssize_t size); +void get_sc_list(FILE *f, template_t template); + +void str_replace_all(char **text, const char *templt, const char *str); + +pid_t start_command(int argc, char *argv[]); + +void sig_chld_handler(int sig, siginfo_t *si, void *unused); +void sig_transmit_handler(int sig, siginfo_t *si, void *unused); + +#define DEBUG_TRACING "/sys/kernel/debug/tracing" +#define DT_AFF "/available_filter_functions" + +extern const char debug_tracing[]; +extern const char debug_tracing_aff[]; + +#endif /* UTILS_H */ diff --git a/src/main.c b/src/main.c new file mode 100644 index 000000000..459f8d316 --- /dev/null +++ b/src/main.c @@ -0,0 +1,497 @@ +/* + * Copyright 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * main.c -- Trace syscalls. For Linux, uses BCC, ebpf. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* from bcc import BPF */ +#include +#include +#include + +#include "bpf.h" + +#include "main.h" +#include "utils.h" +#include "attach_probes.h" +#include "ebpf_syscalls.h" +#include "generate_ebpf.h" +#include "print_event_cb.h" + +static const char help_text[] = "\ +\n\ +Run the specified command and trace syscalls.\n\ +\n\ +USAGE:\n\ +\tstrace.ebpf [-h] [-t] [-X] [-p PID] [command [arg ...]]\n\ +\n\ +\t-t, --timestamp include timestamp in output\n\ +\t-X, --failed only show failed syscalls\n\ +\t-d, --debug enable debug output\n\ +\t-p, --pid trace this PID only. Command arg should be missing\n\ +\t-o, --output filename\n\ +\t-l, --format output logs format. Possible values:\n\ +\t 'bin', 'binary', 'hex', 'strace', 'list' & 'help'.\n\ +\t 'bin'/'binary' file format is described in trace.h.\n\ +\t Default: 'hex'\n\ +\t-K, --hex-separator\n\ +\t set field separator for hex logs. Default is '\\t'.\n\ +\t-e, --expr expression, 'help' or 'list' for supported list.\n\ +\t Default: trace=kp-kern-all.\n\ +\t-L, --list Print a list of all traceable syscalls\n\ +\t of the running kernel.\n\ +\t-R, --ll-list Print a list of all traceable low-level funcs\n\ +\t of the running kernel.\n\ +\t WARNING: really long. ~45000 functions.\n\ +\t-b, --builtin-list\n\ +\t Print a list of all syscalls known by glibc.\n\ +\t-h, --help print help\n\ +\n\ +examples:\n\ + ./strace.ebpf -l hex # trace all syscalls in the system\n\ + ./strace.ebpf -l hex ls # trace syscalls of ls command\n\ + ./strace.ebpf -l hex -t ls # include timestamps\n\ + ./strace.ebpf -l hex -X ls # only show failed syscalls\n\ + ./strace.ebpf -l hex -p 342 # only trace PID 342\n\ +\n\ +WARNING: System-wide tracing can fillout your disk really fast.\n\ +"; + +static const char trace_list_text[] = "\ +List of supported sets:\n" + " * Help:\n" + "\t - 'help', 'list' This list.\n" + "\n" + " * Intercepting using KProbe:\n" + "\t - 'kp-pmemfile' PMemFile - actual SCs\n" + "\t - 'kp-file' SCs with path in args\n" + "\t - 'kp-desc' SCs with fdesd in args\n" + "\t - 'kp-kern-all' All syscalls provided by kernel.\n" + "\t - A bit slower.\n" + "\t - 'kp-libc-all' All syscalls provided by glibc.\n" + "\t This list is 36%% shorter\n" + "\t than previous and loads faster.\n" + "\t - 'kp-sc_glob:*' Choose SCs by glob pattern, such as 'set*'\n" + "\t - 'kp-sc_re:.*' Choose SCs by re pattern, such as 'set.*'\n" + "\t - 'kp-raw_glob:*' Choose low-level funcs by glob pattern,\n" + "\t such as 'raw_glob:ext4_*'\n" + "\t - 'kp-raw_re:.*' Choose low-level funcs by re pattern,\n" + "\t such as 'raw_glob:ext4_*'\n" + "\t - 'kp-XXXX' Choose exact single SC by name,\n" + "\t such as 'open'\n" + "\t - 'kp-raw:XXXX' Choose exact single low-level func by\n" + "\t name, such as 'raw:ext4_mkdir'\n" + "\n" + " * Intercepting using TracePoints:\n" + " Currently malfunctions because of this bug:\n" + " https://github.com/iovisor/bcc/issues/748\n" + "\t - 'tp-all' All syscalls provided by kernel.\n" + "\t This option starts many times faster than\n" + "\t corresponding kprobe ones, but can eat\n" + "\t more of CPU resource.\n" + "\n"; + +/* + * This function prints help message in stream. + */ +static void +fprint_help(FILE *f) +{ + fwrite(help_text, sizeof(help_text)-1, 1, f); +} + +/* + * This function prints description of expressions in stream. + */ +static void +fprint_trace_list(FILE *f) +{ + fwrite(trace_list_text, sizeof(trace_list_text)-1, 1, f); +} + +struct args_t args; +bool cont = true; +FILE *out; +enum out_fmt out_fmt; + +/* HACK Should be fixed in libbcc */ +extern int perf_reader_page_cnt; + +/* 8 Megabytes should be something close to reasonable */ +static unsigned out_buf_size = 8 * 1024 * 1024; + +/* + * Tool's entry point + */ +int +main(int argc, char *argv[]) +{ + args.pid = -1; + args.out_sep_ch = '\t'; + + /* + * XXX Should be set by cl options + * if we need something over syscalls + */ + args.pr_arr_max = 1000; + + /* + * XXX Let's enlarge ring buffers. It's really improve situation + * with lost events. In the future we should do it via cl options. + */ + perf_reader_page_cnt *= perf_reader_page_cnt; + perf_reader_page_cnt *= perf_reader_page_cnt; + + while (1) { + int c; + int option_index = 0; + + static struct option long_options[] = { + {"timestamp", no_argument, 0, 't'}, + {"failed", no_argument, 0, 'X'}, + {"help", no_argument, 0, 'h'}, + {"debug", no_argument, 0, 'd'}, + {"list", no_argument, 0, 'L'}, + {"ll-list", no_argument, 0, 'R'}, + {"builtin-list", no_argument, 0, 'b'}, + + {"pid", required_argument, 0, 'p'}, + {"format", required_argument, 0, 'l'}, + {"expr", required_argument, 0, 'e'}, + {"output", required_argument, 0, 'o'}, + {"hex-separator", required_argument, 0, 'K'}, + {0, 0, 0, 0 } + }; + + c = getopt_long(argc, argv, "+tXhdp:o:l:K:e:LRb", + long_options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 't': + args.timestamp = true; + break; + + case 'X': + args.failed = true; + break; + + case 'h': + fprint_help(stdout); + exit(EXIT_SUCCESS); + + case 'd': + args.debug = true; + break; + + case 'p': + args.pid = atoi(optarg); + break; + + case 'o': + args.out_fn = optarg; + break; + + case 'K': + args.out_sep_ch = *optarg; + break; + + case 'e': + if (!strcasecmp(optarg, "list") || + !strcasecmp(optarg, "help")) { + fprintf(stderr, + "List of supported expressions:" + " 'help', 'list', 'trace=set'" + "\n"); + exit(EXIT_SUCCESS); + } else if (!strcasecmp(optarg, "trace=help") || + !strcasecmp(optarg, + "trace=list")) { + fprint_trace_list(stderr); + fprintf(stderr, + "You can combine sets" + " by using comma.\n"); + exit(EXIT_SUCCESS); + } + args.expr = optarg; + break; + + case 'l': + if (!strcasecmp(optarg, "list") || + !strcasecmp(optarg, "help")) { + fprintf(stderr, + "List of supported expressions:" + "'bin', 'binary', 'hex', " + "'strace', 'list' & 'help'\n"); + exit(EXIT_SUCCESS); + } + args.out_fmt_str = optarg; + out_fmt = out_fmt_str2enum(args.out_fmt_str); + break; + + case 'L': + get_sc_list(stdout, is_a_sc); + exit(EXIT_SUCCESS); + + case 'R': + get_sc_list(stdout, NULL); + exit(EXIT_SUCCESS); + + case 'b': + for (unsigned i = 0; i < SC_TBL_SIZE; i++) + if (NULL != sc_tbl[i].hlr_name) + fprintf(stdout, + "%03d: %-20s\t %s\n", + sc_tbl[i].num, + sc_tbl[i].num_name, + sc_tbl[i].hlr_name); + exit(EXIT_SUCCESS); + + case ':': + fprintf(stderr, "ERROR: " + "Missing mandatory option's " + "argument\n"); + fprint_help(stderr); + exit(EXIT_FAILURE); + + default: + fprintf(stderr, "ERROR: " + "Unknown option: '-%c'\n", c); + case '?': + fprint_help(stderr); + exit(EXIT_FAILURE); + } + } + + if (optind < argc) + args.command = true; + + /* Check for JIT acceleration of BPF */ + check_bpf_jit_status(stderr); + + if (NULL != args.out_fn) { + out = fopen(args.out_fn, "w"); + + if (NULL == out) { + fprintf(stderr, "ERROR: " + "Failed to open '%s' for appending: '%m'\n", + args.out_fn); + + exit(errno); + } + } else { + out = stdout; + } + + /* XXX We should improve it. May be we should use fd directly */ + /* setbuffer(out, NULL, out_buf_size); */ + (void) out_buf_size; + + if (args.pid != -1 && args.command) { + fprintf(stderr, "ERROR: " + "It is currently unsupported to watch for PID" + " and command simultaneously.\n"); + fprint_help(stderr); + exit(EXIT_FAILURE); + } + + if (args.command) { + struct sigaction sa; + + args.pid = start_command(argc - optind, argv + optind); + + if (args.pid == -1) { + fprintf(stderr, "ERROR: " + "Failed to run: '%s': %m. Exiting.\n", + argv[optind]); + exit(errno); + } + + sa.sa_sigaction = sig_chld_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO | + SA_NOCLDSTOP | SA_NOCLDWAIT; + + (void) sigaction(SIGCHLD, &sa, NULL); + + sa.sa_sigaction = sig_transmit_handler; + sa.sa_flags = SA_RESTART; + + (void) sigaction(SIGINT, &sa, NULL); + (void) sigaction(SIGHUP, &sa, NULL); + (void) sigaction(SIGQUIT, &sa, NULL); + (void) sigaction(SIGTERM, &sa, NULL); + + sa.sa_flags = (int)(SA_RESTART | SA_RESETHAND); + (void) sigaction(SIGSEGV, &sa, NULL); + } + + /* define BPF program */ + char *bpf_str = generate_ebpf(); + + if (0 < args.pid) { + char str[128]; + + snprintf(str, sizeof(str), + "if ((pid_tid >> 32) != %d) { return 0; }", + args.pid); + + str_replace_all(&bpf_str, "PID_CHECK_HOOK", str); + + if (!args.command) { + if (kill(args.pid, 0) == -1) { + fprintf(stderr, + "ERROR: Process with pid '%d'" + " does not exist: '%m'.\n", args.pid); + + exit(errno); + } + } + } else { + str_replace_all(&bpf_str, "PID_CHECK_HOOK", ""); + } + + char *trace_h = load_file(ebpf_trace_h_file); + + str_replace_all(&bpf_str, "#include \"trace.h\"\n", trace_h); + + free(trace_h); + + if (args.debug) { + fprintf(stderr, "\t>>>>> Generated eBPF code <<<<<\n"); + + if (bpf_str) + fwrite(bpf_str, strlen(bpf_str), 1, stderr); + + fprintf(stderr, "\t>>>>> EndOf generated eBPF code <<<<<<\n"); + } + + save_trace_h(); + + /* initialize BPF */ + struct bpf_ctx *b = calloc(1, sizeof(*b)); + + /* Compiling of generated eBPF code */ + b->module = bpf_module_create_c_from_string(bpf_str, 0, NULL, 0); + b->debug = args.debug; + + free(bpf_str); + + if (!attach_probes(b)) { + /* No probes were attached */ + fprintf(stderr, + "ERROR: No probes were attached. Exiting.\n"); + + if (args.command) { + /* let's KILL child */ + kill(args.pid, SIGKILL); + } + + return EXIT_FAILURE; + } + + /* header */ + print_header[out_fmt](argc, argv); + + /* + * Attach callback to perf output. "events" is a name of class declared + * with BPF_PERF_OUTPUT() in trace.c. + * + * XXX Most likely we should utilise here str_replace for consistence + * increasing. + */ +#define PERF_OUTPUT_NAME "events" + int res = attach_callback_to_perf_output(b, + PERF_OUTPUT_NAME, print_event_cb[out_fmt]); + + if (!res) { + if (args.command) { + /* let's child go */ + kill(args.pid, SIGCONT); + } + } else { + fprintf(stderr, + "ERROR: Can't attach to perf output '%s'. Exiting.\n", + PERF_OUTPUT_NAME); + + if (args.command) { + /* let's KILL child */ + kill(args.pid, SIGKILL); + } + + detach_all(b); + return EXIT_FAILURE; + } + + struct perf_reader *readers[b->pr_arr_qty]; + + for (unsigned i = 0; i < b->pr_arr_qty; i++) + readers[i] = b->pr_arr[i]->pr; + + while (cont) { + (void) perf_reader_poll((int)b->pr_arr_qty, readers, -1); + + if (!args.command && 0 < args.pid) { + if (kill(args.pid, 0) == -1) { + cont = false; + + fprintf(stderr, + "ERROR: Process with pid '%d'" + " has disappeared : '%m'.\n", + args.pid); + + fprintf(stderr, "Exit.\n"); + } + } + } + + + detach_all(b); + return EXIT_SUCCESS; +} diff --git a/src/make-redis.sh b/src/make-redis.sh new file mode 100755 index 000000000..d7bf2ece0 --- /dev/null +++ b/src/make-redis.sh @@ -0,0 +1,55 @@ +#!/bin/bash -x +# +# Copyright 2014-2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# make-redis.sh - Script for running redis-benchmark while redis-server +# is traced. +# + +set -m + +# Should be same as in redis.conf +echo -n > /tmp/appendonly.aof + +`which time` -p -v "$@" redis-server redis.conf >> redis-server.log & + +sleep 13 + +redis-benchmark -q -n 100000 + +sleep 3 + +redis-cli shutdown + +fg + +exit 0 diff --git a/src/redis.conf b/src/redis.conf new file mode 100644 index 000000000..750799001 --- /dev/null +++ b/src/redis.conf @@ -0,0 +1,943 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################ GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +daemonize no + +# When running daemonized, Redis writes a pid file in /var/run/redis.pid by +# default. You can specify a custom pid file location here. +pidfile /var/run/redis/redis-server.pid + +# Accept connections on the specified port, default is 6379. +# If port 0 is specified Redis will not listen on a TCP socket. +port 6379 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need an high backlog in order +# to avoid slow clients connections issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# By default Redis listens for connections from all the network interfaces +# available on the server. It is possible to listen to just one or multiple +# interfaces using the "bind" configuration directive, followed by one or +# more IP addresses. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 +bind 127.0.0.1 + +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +# unixsocket /var/run/redis/redis.sock +# unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 0 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 60 seconds. +tcp-keepalive 0 + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel notice + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +#logfile /var/log/redis/redis-server.log + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +################################ SNAPSHOTTING ################################ +# +# Save the DB on disk: +# +# save +# +# Will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# In the example below the behaviour will be to save: +# after 900 sec (15 min) if at least 1 key changed +# after 300 sec (5 min) if at least 10 keys changed +# after 60 sec if at least 10000 keys changed +# +# Note: you can disable saving completely by commenting out all "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +save "" + +#save 900 1 +#save 300 10 +#save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + +# Compress string objects using LZF when dump .rdb databases? +# For default that's set to 'yes' as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression yes + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# The filename where to dump the DB +dbfilename dump.rdb + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir /tmp + +################################# REPLICATION ################################# + +# Master-Slave replication. Use slaveof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of slaves. +# 2) Redis slaves are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition slaves automatically try to reconnect to masters +# and resynchronize with them. +# +# slaveof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the slave to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the slave request. +# +# masterauth + +# When a slave loses its connection with the master, or when the replication +# is still in progress, the slave can act in two different ways: +# +# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if slave-serve-stale-data is set to 'no' the slave will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO and SLAVEOF. +# +slave-serve-stale-data yes + +# You can configure a slave instance to accept writes or not. Writing against +# a slave instance may be useful to store some ephemeral data (because data +# written on a slave will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default slaves are read-only. +# +# Note: read only slaves are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only slave exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only slaves using 'rename-command' to shadow all the +# administrative / dangerous commands. +slave-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# ------------------------------------------------------- +# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY +# ------------------------------------------------------- +# +# New slaves and reconnecting slaves that are not able to continue the replication +# process just receiving differences, need to do what is called a "full +# synchronization". An RDB file is transmitted from the master to the slaves. +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the slaves incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to slave sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more slaves +# can be queued and served with the RDB file as soon as the current child producing +# the RDB file finishes its work. With diskless replication instead once +# the transfer starts, new slaves arriving will be queued and a new transfer +# will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple slaves +# will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the slaves. +# +# This is important since once the transfer starts, it is not possible to serve +# new slaves arriving, that will be queued for the next RDB transfer, so the server +# waits a delay in order to let more slaves arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# Slaves send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_slave_period option. The default value is 10 +# seconds. +# +# repl-ping-slave-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of slave. +# 2) Master timeout from the point of view of slaves (data, pings). +# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-slave-period otherwise a timeout will be detected +# every time there is low traffic between the master and the slave. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the slave socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to slaves. But this can add a delay for +# the data to appear on the slave side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the slave side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and slaves are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# slave data when slaves are disconnected for some time, so that when a slave +# wants to reconnect again, often a full resync is not needed, but a partial +# resync is enough, just passing the portion of data the slave missed while +# disconnected. +# +# The bigger the replication backlog, the longer the time the slave can be +# disconnected and later be able to perform a partial resynchronization. +# +# The backlog is only allocated once there is at least a slave connected. +# +# repl-backlog-size 1mb + +# After a master has no longer connected slaves for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last slave disconnected, for +# the backlog buffer to be freed. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The slave priority is an integer number published by Redis in the INFO output. +# It is used by Redis Sentinel in order to select a slave to promote into a +# master if the master is no longer working correctly. +# +# A slave with a low priority number is considered better for promotion, so +# for instance if there are three slaves with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the slave as not able to perform the +# role of master, so a slave with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +slave-priority 100 + +# It is possible for a master to stop accepting writes if there are less than +# N slaves connected, having a lag less or equal than M seconds. +# +# The N slaves need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the slave, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough slaves +# are available, to the specified number of seconds. +# +# For example to require at least 3 slaves with a lag <= 10 seconds use: +# +# min-slaves-to-write 3 +# min-slaves-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-slaves-to-write is set to 0 (feature disabled) and +# min-slaves-max-lag is set to 10. + +################################## SECURITY ################################### + +# Require clients to issue AUTH before processing any other +# commands. This might be useful in environments in which you do not trust +# others with access to the host running redis-server. +# +# This should stay commented out for backward compatibility and because most +# people do not need auth (e.g. they run their own servers). +# +# Warning: since Redis is pretty fast an outside user can try up to +# 150k passwords per second against a good box. This means that you should +# use a very strong password otherwise it will be very easy to break. +# +# requirepass foobared + +# Command renaming. +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to slaves may cause problems. + +################################### LIMITS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# maxclients 10000 + +# Don't use more memory than the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU cache, or to set +# a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select among five behaviors: +# +# volatile-lru -> remove the key with an expire set using an LRU algorithm +# allkeys-lru -> remove any key according to the LRU algorithm +# volatile-random -> remove a random key with an expire set +# allkeys-random -> remove a random key, any key +# volatile-ttl -> remove the key with the nearest expire time (minor TTL) +# noeviction -> don't expire at all, just return an error on write operations +# +# Note: with any of the above policies, Redis will return an error on write +# operations, when there are no suitable keys for eviction. +# +# At the date of writing these commands are: set setnx setex append +# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd +# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby +# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby +# getset mset msetnx exec sort +# +# The default is: +# +# maxmemory-policy noeviction + +# LRU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. For default Redis will check five keys and pick the one that was +# used less recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs a bit more CPU. 3 is very fast but not very accurate. +# +# maxmemory-samples 5 + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check http://redis.io/topics/persistence for more information. + +appendonly yes + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 4mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### +# +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however +# in order to mark it as "mature" we need to wait for a non trivial percentage +# of users to deploy it in production. +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A slave of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a slave to actually have a exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple slaves able to failover, they exchange messages +# in order to try to give an advantage to the slave with the best +# replication offset (more data from the master processed). +# Slaves will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single slave computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the slave will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a slave will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * slave-validity-factor) + repl-ping-slave-period +# +# So for example if node-timeout is 30 seconds, and the slave-validity-factor +# is 10, and assuming a default repl-ping-slave-period of 10 seconds, the +# slave will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large slave-validity-factor may allow slaves with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a slave at all. +# +# For maximum availability, it is possible to set the slave-validity-factor +# to a value of 0, which means, that slaves will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-slave-validity-factor 10 + +# Cluster slaves are able to migrate to orphaned masters, that are masters +# that are left without working slaves. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working slaves. +# +# Slaves migrate to orphaned masters only if there are still at least a +# given number of other working slaves for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a slave +# will migrate only if there is at least 1 other working slave for its master +# and so forth. It usually reflects the number of slaves you want for every +# master in your cluster. +# +# Default is 1 (slaves migrate only if their masters remain with at least +# one slave). To disable migration just set it to a very large value. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at http://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# A Alias for g$lshzxe, so that the "AKE" string means all the events. +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Similarly to hashes, small lists are also encoded in a special way in order +# to save a lot of space. The special representation is only used when +# you are under the following limits: +list-max-ziplist-entries 512 +list-max-ziplist-value 64 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# slave -> slave clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and slave clients, since +# subscribers and slaves receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit slave 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes diff --git a/utils/md2man.sh b/utils/md2man.sh new file mode 100755 index 000000000..8100fc4ed --- /dev/null +++ b/utils/md2man.sh @@ -0,0 +1,66 @@ +#!/bin/bash -e +# +# Copyright 2016, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# md2man.sh -- convert markdown to groff man pages +# +# usage: md2man.sh file template outfile +# +# This script converts markdown file into groff man page using pandoc. +# It performs some pre- and post-processing for better results: +# - parse input file for YAML metadata block and read man page title, +# section and version +# - cut-off metadata block and license +# - unindent code blocks +# + +set -o pipefail + +filename=$1 +template=$2 +outfile=$3 +title=`sed -n 's/^title:\ *\([a-z]*\).*$/\1/p' $filename` +section=`sed -n 's/^title:.*(\([0-9]\)).*$/\1/p' $filename` +version=`sed -n 's/^date:\ *\(.*\)$/\1/p' $filename` + +cat $filename | sed -n -e '/# NAME #/,$p' |\ +pandoc -s -t man -o $outfile --template=$template \ + -V title=$title -V section=$section \ + -V description='"NVM Library"' -V version="$version" \ + -V year=$(date +"%Y") |\ +sed '/^\.IP/{ +N +/\n\.nf/{ + s/IP/PP/ + } +}'