DocDB compaction filter

Summary: Adding a DocDB compaction filter that garbage-collects key/value pairs in RocksDB that are not visible at a specified "history cut-off timestamp". We are using the `CompactionFilter` / `CompactionFilterFactory` framework available in RocksDB to run custom logic on compactions. `CompactionFilterFactory` allows us to parameterize a `DocDBCompactionFilter` with a history cutoff timestamp at compaction start time and not worry about its thread safety, because a new `CompactionFilter` is used for each compaction. Also adding extensive randomized tests for reading at old timestamps and verifying that the state of the DB is as expected. Currently the retention policy is simply to retain history for a fixed time interval, e.g. 60 seconds or 10 seconds. Future work: - We should start tracking the read point and make sure we don't collect history at timestamps that data is still being read at, but that is not part of this revision. - Only garbage-collecting old versions on "full" (or "major") compactions. We should be able to do the same on other types of compactions in the absence of cross-tablet transactions, but that requires separate tests and is thus left to a further revision. Test Plan: Jenkins Reviewers: kannan, akashnil Reviewed By: kannan, akashnil Subscribers: jenkins-bot, eng Differential Revision: https://phabricator.dev.yugabyte.com/D810
norachoukri · Dec 8, 2016 · 6a7aba5 · 6a7aba5
1 parent b094f16
commit 6a7aba5
Show file tree

Hide file tree

Showing 60 changed files with 2,674 additions and 610 deletions.
diff --git a/.arclint b/.arclint
@@ -8,7 +8,7 @@
     "pycodestyle": {
       "type": "pep8",
       "include": "(\\.py$)",
-      "exclude": "(^thirdparty/)",
+      "exclude": "(^thirdparty/|^[.]ycm_extra_conf[.]py$)",
       "bin": "pycodestyle"
     },
     "googleccplint": {

diff --git a/.gitignore b/.gitignore
@@ -17,7 +17,8 @@ oprofile_data
 # VIM/emacs stuff
 *.swp
 *~
-core*
+# These files could be used to auto-save vim sessions.
+.session.vim
 
 # IDE cruft
 .settings/
@@ -34,3 +35,15 @@ www/tracing.*
 python/kudu/*.so
 python/kudu/*.cpp
 *.py[ocd]
+
+# Backup files
+*.bak
+
+# Core dumps
+core*
+
+# CLion directories
+/cmake-build-debug
+
+# screen sometimes makes these, I think
+/hardcopy.*
diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py
@@ -64,6 +64,8 @@
 '-I',
 'src',
 '-I',
+'src/rocksdb/include',
+'-I',
 'build/latest/src',
 '-isystem',
 'thirdparty/installed/include',

diff --git a/bin/repeat_unit_test.sh b/bin/repeat_unit_test.sh
@@ -163,13 +163,22 @@ if [[ $iteration -gt 0 ]]; then
   set +e
   export TEST_TMPDIR=/tmp/yb__${0##*/}__$RANDOM.$RANDOM.$RANDOM.$$
   mkdir -p "$TEST_TMPDIR"
-  core_dir=$TEST_TMPDIR
+  set_expected_core_dir "$TEST_TMPDIR"
+  determine_test_timeout
+
+  # TODO: deduplicate the setup here against run_one_test() in common-test-env.sh.
+  test_cmd_line=( "$abs_test_binary_path" --gtest_filter="$test_filter" $more_test_args )
+  test_wrapper_cmd_line=(
+    "$BUILD_ROOT"/bin/run-with-timeout $(( $timeout_sec + 1 )) "${test_cmd_line[@]}"
+  )
+
   (
     cd "$TEST_TMPDIR"
     if [[ $verbose_level -gt 0 ]]; then
       set -x
     fi
-    "$abs_test_binary_path" --gtest_filter="$test_filter" $more_test_args &>"$raw_test_log_path"
+    ulimit -c unlimited
+    "${test_wrapper_cmd_line[@]}" &>"$raw_test_log_path"
   )
   exit_code=$?
   set -e

diff --git a/build-support/common-test-env.sh b/build-support/common-test-env.sh
@@ -42,8 +42,6 @@ VALID_TEST_BINARY_DIRS_RE=$( regex_from_list "${VALID_TEST_BINARY_DIRS[@]}" )
 # gdb command to print a backtrace from a core dump. Taken from:
 # http://www.commandlinefu.com/commands/view/6004/print-stack-trace-of-a-core-file-without-needing-to-enter-gdb-interactively
 
-GDB_CORE_BACKTRACE_CMD_PREFIX=( gdb -q -n -ex bt -batch )
-
 DEFAULT_TEST_TIMEOUT_SEC=600
 INCREASED_TEST_TIMEOUT_SEC=1200
 
@@ -52,6 +50,10 @@ INCREASED_TEST_TIMEOUT_SEC=1200
 readonly RELEVANT_LOG_LINES_RE=\
 '^[[:space:]]*(Value of|Actual|Expected):|^Expected|^Failed|^Which is:|: Failure$'
 
+# We put log messages about tests skipped due to YB_GTEST_REGEX here so that we can log them only in
+# case we end up filtering out all the tests.
+skipped_test_log_msgs=()
+
 # -------------------------------------------------------------------------------------------------
 # Functions
 # -------------------------------------------------------------------------------------------------
@@ -185,6 +187,7 @@ is_known_non_gtest_test_by_rel_path() {
 #     This variable is incremented if it exists.
 #   num_tests
 #     Total number of tests (test functions) collected. This variable is incremented if it exists.
+# Also, this function unsets test_log_path if it was previously set.
 collect_gtest_tests() {
   expect_num_args 0 "$@"
 
@@ -223,8 +226,10 @@ collect_gtest_tests() {
   popd >/dev/null
   set -e
 
-  core_dir="$gtest_list_tests_tmp_dir"
+  set_expected_core_dir "$gtest_list_tests_tmp_dir"
+  test_log_path="$gtest_list_stderr_path"
   process_core_file
+  unset test_log_path
   rm -rf "$gtest_list_tests_tmp_dir"
 
   if [[ -z $gtest_list_tests_result ]]; then
@@ -283,8 +288,9 @@ collect_gtest_tests() {
         let total_num_tests+=1
       fi
       if [[ -n ${YB_GTEST_REGEX:-} && ! $full_test_name =~ .*${YB_GTEST_REGEX:-}.* ]]; then
-        log "Skipping test '$full_test_name' in ${rel_test_binary}: does not match the" \
-            "regular expression '${YB_GTEST_REGEX}' (specified with YB_GTEST_REGEX)."
+        local log_msg="Skipping test '$full_test_name' in ${rel_test_binary}: does not match the"
+        log_msg+=" regular expression '${YB_GTEST_REGEX}' (specified with YB_GTEST_REGEX)."
+        skipped_test_log_msgs+=( "$log_msg" )
         if [[ -n ${num_tests_skipped:-} ]]; then
           let num_tests_skipped+=1
         fi
@@ -391,7 +397,7 @@ prepare_for_running_test() {
   # directory.
 
   local abs_test_binary_path=$( create_abs_test_binary_path "$rel_test_binary" )
-  test_cmd_line=( "$abs_test_binary_path" )
+  test_cmd_line=( "$abs_test_binary_path" ${YB_EXTRA_GTEST_FLAGS:-} )
 
   test_log_path_prefix="$YB_TEST_LOG_ROOT_DIR/$rel_test_log_path_prefix"
   xml_output_file="$test_log_path_prefix.xml"
@@ -426,6 +432,18 @@ prepare_for_running_test() {
   fi
 }
 
+# Set the directory to look for core files in to the given path. On Mac OS X, the input argument is
+# ignored and the standard location is used instead.
+set_expected_core_dir() {
+  expect_num_args 1 "$@"
+  local new_core_dir=$1
+  if is_mac; then
+    core_dir=/cores
+  else
+    core_dir="$new_core_dir"
+  fi
+}
+
 # Looks for a core dump file in the specified directory, shows a stack trace using gdb, and removes
 # the core dump file to save space.
 # Inputs:
@@ -437,10 +455,7 @@ prepare_for_running_test() {
 #     If this is defined and is not empty, any output generated by this function is appended to the
 #     file at this path.
 process_core_file() {
-  if [[ $# -ne 0 ]]; then
-    echo "$FUNCNAME does not take any arguments" >&2
-    exit 1
-  fi
+  expect_num_args 0 "$@"
   expect_vars_to_be_set \
     core_dir \
     rel_test_binary
@@ -451,30 +466,100 @@ process_core_file() {
     append_output_to=$test_log_path
   fi
 
-  if [[ ! -d $core_dir ]]; then
-    echo "$FUNCNAME: directory '$core_dir' does not exist" >&2
-    exit 1
+  if is_mac; then
+    # Finding core files on Mac OS X.
+    if [[ -f ${test_log_path:-} ]]; then
+      set +e
+      local signal_received_line=$(
+        egrep 'SIG[A-Z]+.*received by PID [0-9]+' "$test_log_path" | head -1
+      )
+      set -e
+      local core_pid=""
+      local thread_for_backtrace="all"
+      # Parsing a line like this:
+      # *** SIGSEGV (@0x0) received by PID 46158 (TID 0x70000028d000) stack trace: ***
+      if [[ $signal_received_line =~ received\ by\ PID\ ([0-9]+)\ \(TID\ (0x[0-9a-fA-F]+)\) ]]; then
+        core_pid=${BASH_REMATCH[1]}
+        # TODO: find a way to only show the backtrace of the relevant thread.
+        # We can't just pass the thread id to "thread backtrace", apparently it needs the thread
+        # index or something similar.
+        # thread_for_backtrace=${BASH_REMATCH[2]}
+      fi
+      if [[ -n $core_pid ]]; then
+        local core_path=$core_dir/core.$core_pid
+        log "Determined core_path=$core_path"
+        if [[ ! -f $core_path ]]; then
+          log "Core file not found at presumed location '$core_path'."
+        fi
+      else
+        if egrep -q 'SIG[A-Z]+.*received by PID' "$test_log_path"; then
+          log "Warning: could not determine test pid from '$test_log_path'" \
+              "(matched log line: $signal_received_line)"
+        fi
+        return
+      fi
+    else
+      log "Warning: test_log_path is not set, or file does not exist ('${test_log_path:-}')," \
+          "cannot look for core files on Mac OS X"
+      return
+    fi
+  else
+    # Finding core files on Linux.
+    if [[ ! -d $core_dir ]]; then
+      echo "$FUNCNAME: directory '$core_dir' does not exist" >&2
+      exit 1
+    fi
+    local core_path=$core_dir/core
+    if [[ ! -f $core_path ]]; then
+      # If there is just one file named "core.[0-9]+" in the core directory, pick it up and assume
+      # it belongs to the test. This is necessary on some systems, e.g. CentOS workstations with no
+      # special core location overrides.
+      local core_candidates=()
+      for core_path in "$core_dir"/core.*; do
+        if [[ -f $core_path && $core_path =~ /core[.][0-9]+$ ]]; then
+          core_candidates+=( "$core_path" )
+        fi
+      done
+      if [[ ${#core_candidates[@]} -eq 1 ]]; then
+        core_path=${core_candidates[0]}
+      elif [[ ${#core_candidates[@]} -gt 1 ]]; then
+        log "Found too many core files in '$core_dir', not analyzing: ${core_candidates[@]}"
+        return
+      fi
+    fi
   fi
-  local core_path=$core_dir/core
+
   if [[ -f $core_path ]]; then
     ( echo "Found a core file at '$core_path', backtrace:" | tee -a "$append_output_to" ) >&2
     local core_binary_path=$abs_test_binary_path
+    if [[ $rel_test_binary =~ /client_samples-test([.]sh)?$ ]]; then
+      core_binary_path=$TEST_TMPDIR/sample
+    fi
+
     # The core might have been generated by yb-master or yb-tserver launched as part of an
     # ExternalMiniCluster.
-    # TODO(mbautin): don't run gdb the second time in case we get the binary name right the first
-    #                time around.
-    local gdb_output=$(
-      "${GDB_CORE_BACKTRACE_CMD_PREFIX[@]}" "$abs_test_binary_path" "$core_path" 2>&1
-    )
-    if [[ "$gdb_output" =~ Core\ was\ generated\ by\ .(yb-master|yb-tserver) ]]; then
+    # TODO(mbautin): don't run gdb/lldb the second time in case we get the binary name right the
+    #                first time around.
+
+    local debugger_cmd debugger_input
+    if is_mac; then
+      debugger_cmd=( lldb "$core_binary_path" -c "$core_path" )
+      debugger_input="thread backtrace $thread_for_backtrace"  # backtrace
+    else
+      debugger_cmd=( gdb -q -n -ex bt -batch "$core_binary_path" "$core_path" )
+      debugger_input=""
+    fi
+
+    local debugger_output=$( echo "$debugger_input" | "${debugger_cmd[@]}" 2>&1 )
+    if [[ "$debugger_output" =~ Core\ was\ generated\ by\ .(yb-master|yb-tserver) ]]; then
       # The test program may be launching masters and tablet servers through an ExternalMiniCluster.
       core_binary_path="$BUILD_ROOT/bin/${BASH_REMATCH[1]}"
     fi
 
     set +e
     (
       set -x
-      "${GDB_CORE_BACKTRACE_CMD_PREFIX[@]}" "$core_binary_path" "$core_path" 2>&1 | \
+      echo "$debugger_input" | "${debugger_cmd[@]}" 2>&1 | \
         egrep -v "^\[New LWP [0-9]+\]$" | tee -a "$append_output_to"
     ) >&2
     set -e
@@ -578,6 +663,22 @@ set_test_log_url_prefix() {
   fi
 }
 
+determine_test_timeout() {
+  expect_num_args 0 "$@"
+  expect_vars_to_be_set rel_test_binary
+  if [[ -n ${YB_TEST_TIMEOUT:-} ]]; then
+    timeout_sec=$YB_TEST_TIMEOUT
+  else
+    if [[ $rel_test_binary == "rocksdb-build/compact_on_deletion_collector_test" ]]; then
+      # This test is particularly slow on TSAN, and it has to be run all at once (we cannot use
+      # --gtest_filter) because of dependencies between tests.
+      timeout_sec=$INCREASED_TEST_TIMEOUT_SEC
+    else
+      timeout_sec=$DEFAULT_TEST_TIMEOUT_SEC
+    fi
+  fi
+}
+
 run_one_test() {
   expect_num_args 0 "$@"
   expect_vars_to_be_set \
@@ -592,28 +693,52 @@ run_one_test() {
     fatal "Expected test_failed to be false before running test, found: $test_failed"
   fi
 
-  local -i timeout_sec
-  if [[ -n ${YB_TEST_TIMEOUT:-} ]]; then
-    timeout_sec=$YB_TEST_TIMEOUT
-  else
-    if [[ $rel_test_binary == "rocksdb-build/compact_on_deletion_collector_test" ]]; then
-      # This test is particularly slow on TSAN, and it has to be run all at once (we cannot use
-      # --gtest_filter) because of dependencies between tests.
-      timeout_sec=$INCREASED_TEST_TIMEOUT_SEC
-    else
-      timeout_sec=$DEFAULT_TEST_TIMEOUT_SEC
-    fi
-  fi
+  determine_test_timeout
 
+  local test_wrapper_cmd_line=(
+    "$BUILD_ROOT"/bin/run-with-timeout $(( $timeout_sec + 1 )) "${test_cmd_line[@]}"
+  )
   pushd "$TEST_TMPDIR" >/dev/null
   set +e
   (
-    # This may fail with an error message, and that's what we want. We will still run the test.
-    # In case we do manage to set the core file size limit, we will restore the previous limit after
-    # we exit the subshell.
+    # Setting the ulimit may fail with an error message, and that's what we want. We will still run
+    # the test.  In case we do manage to set the core file size limit, we will restore the previous
+    # limit after we exit the subshell.
     ulimit -c unlimited
-    "$BUILD_ROOT"/bin/run-with-timeout $(( $timeout_sec + 1 )) "${test_cmd_line[@]}" \
-      &>"$test_log_path"
+    if [[ $? -ne 0 ]]; then
+      # Print some diagnostics if we fail to set core file size limit.
+      log "Command 'ulimit -c unlimited' failed. Current 'ulimit -c' output: $( ulimit -c )"
+    fi
+
+    # Print the load address of every dynamic library on test startup, so we can add file names and
+    # line numbers to stack traces. This should be unnecessary if we switch to libbacktrace-based
+    # stack traces that would come with line numbers already. Also, one potential problem with this
+    # approach based on dumping load offsets of dynamic libraries is that stacktrace_addr2line.pl
+    # won't be able to match lines specifying library load offsets to processes in multi-process
+    # tests (e.g. mini-cluster based tests), potentially resulting in incorrect file names / line
+    # numbers.
+    #
+    # See also https://yugabyte.atlassian.net/browse/ENG-617 for tracking the libbacktrace work.
+    export YB_LIST_LOADED_DYNAMIC_LIBS=1
+
+    # YB_CTEST_VERBOSE makes test output go to stderr, and then we separately make it show up on the
+    # console by giving ctest the --verbose option. This is intended for development. When we run
+    # tests on Jenkins or when running all tests using "ctest -j8" from the build root, one should
+    # leave YB_CTEST_VERBOSE unset.
+    if [[ -n ${YB_CTEST_VERBOSE:-} ]]; then
+      # In the verbose mode we have to perform stack trace filtering / symbolization right away.
+      local stack_trace_filter_cmd=( "$STACK_TRACE_FILTER" )
+      if [[ $STACK_TRACE_FILTER != "cat" ]]; then
+        stack_trace_filter_cmd+=( "$abs_test_binary_path" )
+      fi
+      ( set -x; "${test_wrapper_cmd_line[@]}" 2>&1 ) | \
+        "${stack_trace_filter_cmd[@]}" | \
+        tee "$test_log_path"
+      # Propagate the exit code of the test process, not any of the filters.
+      exit ${PIPESTATUS[0]}
+    else
+      "${test_wrapper_cmd_line[@]}" &>"$test_log_path"
+    fi
   )
   test_exit_code=$?
   set -e
@@ -661,7 +786,7 @@ handle_test_failure() {
         fi
       fi
     ) >&2
-    core_dir=$TEST_TMPDIR
+    set_expected_core_dir "$TEST_TMPDIR"
     process_core_file
     unset core_dir
     global_exit_code=1
@@ -809,6 +934,11 @@ did_test_succeed() {
     return 1
   fi
 
+  if egrep -q '^\[  FAILED  \]' "$log_path"; then
+    log 'GTest failures'
+    return 1
+  fi
+
   # When signals show up in the test log, that's usually not good. We can see how many of these we
   # get and gradually prune false positives.
   local signal_str