Skip to content

Commit d561f39

Browse files
authored
Add XUnitLogChecker to log libraries dumps (#93906)
Libraries CI failures that generate dumps will now show the dump output in the console log itself.
1 parent 2a9c80b commit d561f39

File tree

9 files changed

+168
-124
lines changed

9 files changed

+168
-124
lines changed

Directory.Build.props

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@
180180
<TestExclusionListTasksAssemblyPath>$([MSBuild]::NormalizePath('$(TestExclusionListTasksDir)', 'TestExclusionListTasks.dll'))</TestExclusionListTasksAssemblyPath>
181181
<CoreCLRToolPath>$([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'coreclr', '$(TargetOS).$(TargetArchitecture).$(RuntimeConfiguration)'))</CoreCLRToolPath>
182182
<ILAsmToolPath Condition="'$(DotNetBuildFromSource)' == 'true' or '$(BuildArchitecture)' == 's390x' or '$(BuildArchitecture)' == 'ppc64le'">$(CoreCLRToolPath)</ILAsmToolPath>
183-
184183
<WasmtimeDir Condition="'$(WasmtimeDir)' == '' and '$(WASMTIME_PATH)' != '' and Exists($(WASMTIME_PATH))">$(WASMTIME_PATH)</WasmtimeDir>
185184
<WasmtimeDir Condition="'$(WasmtimeDir)' == ''">$([MSBuild]::NormalizeDirectory($(ArtifactsObjDir), 'wasmtime'))</WasmtimeDir>
186185
<InstallWasmtimeForTests Condition="'$(InstallWasmtimeForTests)' == '' and !Exists($(WasmtimeDir))">true</InstallWasmtimeForTests>
@@ -332,6 +331,11 @@
332331

333332
<!-- this property is used by the SDK to pull in mono-based runtime packs -->
334333
<UseMonoRuntime Condition="'$(UseMonoRuntime)' == '' and '$(RuntimeFlavor)' == 'Mono'">true</UseMonoRuntime>
334+
335+
<!-- For enabling the use of XUnitLogChecker in coreclr and libraries test runs. -->
336+
<IsXUnitLogCheckerSupported Condition="'$(IsXUnitLogCheckerSupported)' == ''">false</IsXUnitLogCheckerSupported>
337+
<IsXUnitLogCheckerSupported Condition="'$(RuntimeFlavor)' == 'CoreCLR' and '$(TestNativeAot)' != 'true' and '$(TestRunNamePrefixSuffix)' != 'NativeAOT_Release' and '$(TargetOS)' != 'browser' and '$(TargetOS)' != 'wasi' and '$(TargetOS)' != 'ios' and '$(TargetOS)' != 'iossimulator' and '$(TargetOS)' != 'tvos' and '$(TargetOS)' != 'tvossimulator' and '$(TargetOS)' != 'maccatalyst' and '$(TargetOS)' != 'android'">true</IsXUnitLogCheckerSupported>
338+
<XUnitLogCheckerLibrariesOutDir>$([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'XUnitLogChecker'))</XUnitLogCheckerLibrariesOutDir>
335339
</PropertyGroup>
336340

337341
<!-- Packaging -->

eng/pipelines/libraries/prepare-for-bin-publish.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ steps:
1414
ref/**
1515
runtime/**
1616
testhost/**
17+
XUnitLogChecker/**
1718
1819
- task: CopyFiles@2
1920
displayName: Prepare bin folders to publish

eng/testing/RunnerTemplate.cmd

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,41 @@ if %_exit_code%==1 (
9292
)
9393
)
9494
)
95+
96+
if NOT "%__IsXUnitLogCheckerSupported%"=="1" (
97+
echo XUnitLogChecker not supported for this test case. Skipping.
98+
GOTO SKIP_XUNITLOGCHECKER
99+
)
100+
101+
echo ----- start =============== XUnitLogChecker Output =====================================================
102+
103+
set DOTNET_EXE=%RUNTIME_PATH%\dotnet.exe
104+
set XUNITLOGCHECKER_DLL=%HELIX_CORRELATION_PAYLOAD%\XUnitLogChecker.dll
105+
set XUNITLOGCHECKER_COMMAND=%DOTNET_EXE% --roll-forward Major %XUNITLOGCHECKER_DLL% --dumps-path %HELIX_DUMP_FOLDER%
106+
set XUNITLOGCHECKER_EXIT_CODE=1
107+
108+
if NOT EXIST %DOTNET_EXE% (
109+
echo dotnet.exe does not exist in the expected location: %DOTNET_EXE%
110+
GOTO XUNITLOGCHECKER_END
111+
) else if NOT EXIST %XUNITLOGCHECKER_DLL% (
112+
echo XUnitLogChecker.dll does not exist in the expected location: %XUNITLOGCHECKER_DLL%
113+
GOTO XUNITLOGCHECKER_END
114+
)
115+
116+
echo %XUNITLOGCHECKER_COMMAND%
117+
%XUNITLOGCHECKER_COMMAND%
118+
set XUNITLOGCHECKER_EXIT_CODE=%ERRORLEVEL%
119+
120+
:XUNITLOGCHECKER_END
121+
122+
if %XUNITLOGCHECKER_EXIT_CODE% NEQ 0 (
123+
set _exit_code=%XUNITLOGCHECKER_EXIT_CODE%
124+
)
125+
126+
echo ----- end =============== XUnitLogChecker Output - exit code %XUNITLOGCHECKER_EXIT_CODE% ===============
127+
128+
:SKIP_XUNITLOGCHECKER
129+
95130
exit /b %_exit_code%
96131
:: ========================= END Test Execution =================================
97132

eng/testing/RunnerTemplate.sh

Lines changed: 78 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -54,80 +54,60 @@ exitcode_list[131]="SIGQUIT Ctrl-\ occurred. Core dumped."
5454
exitcode_list[132]="SIGILL Illegal Instruction. Core dumped. Likely codegen issue."
5555
exitcode_list[133]="SIGTRAP Breakpoint hit. Core dumped."
5656
exitcode_list[134]="SIGABRT Abort. Managed or native assert, or runtime check such as heap corruption, caused call to abort(). Core dumped."
57-
exitcode_list[135]="IGBUS Unaligned memory access. Core dumped."
57+
exitcode_list[135]="IGBUS Unaligned memory access. Core dumped."
5858
exitcode_list[136]="SIGFPE Bad floating point arguments. Core dumped."
5959
exitcode_list[137]="SIGKILL Killed eg by kill"
6060
exitcode_list[139]="SIGSEGV Illegal memory access. Deref invalid pointer, overrunning buffer, stack overflow etc. Core dumped."
6161
exitcode_list[143]="SIGTERM Terminated. Usually before SIGKILL."
6262
exitcode_list[159]="SIGSYS Bad System Call."
6363

64-
function print_info_from_core_file_using_lldb {
64+
function move_core_file_to_temp_location {
6565
local core_file_name=$1
66-
local executable_name=$2
67-
local plugin_path_name="$RUNTIME_PATH/shared/Microsoft.NETCore.App/9.9.9/libsosplugin.so"
6866

69-
# check for existence of lldb on the path
70-
hash lldb 2>/dev/null || { echo >&2 "lldb was not found. Unable to print core file."; return; }
67+
# Append the dmp extension to ensure XUnitLogChecker finds it
68+
local new_location=$HELIX_DUMP_FOLDER/$core_file_name.dmp
7169

72-
# pe, clrstack, and dumpasync are defined in libsosplugin.so
73-
if [ ! -f $plugin_path_name ]; then
74-
echo $plugin_path_name cannot be found.
75-
return
76-
fi
77-
78-
echo ----- start =============== lldb Output =====================================================
79-
echo Printing managed exceptions, managed call stacks, and async state machines.
80-
lldb -O "settings set target.exec-search-paths $RUNTIME_PATH" -o "plugin load $plugin_path_name" -o "clrthreads -managedexception" -o "pe -nested" -o "clrstack -all -a -f" -o "dumpasync -fields -stacks -roots" -o "quit" --core $core_file_name $executable_name
81-
echo ----- end =============== lldb Output =======================================================
82-
}
83-
84-
function print_info_from_core_file_using_gdb {
85-
local core_file_name=$1
86-
local executable_name=$2
87-
88-
# Check for the existence of GDB on the path
89-
hash gdb 2>/dev/null || { echo >&2 "GDB was not found. Unable to print core file."; return; }
70+
echo "Copying dump file '$core_file_name' to '$new_location'"
71+
cp $core_file_name $new_location
9072

91-
echo ----- start =============== GDB Output =====================================================
92-
# Open the dump in GDB and print the stack from each thread. We can add more
93-
# commands here if desired.
94-
echo printing native stack.
95-
gdb --batch -ex "thread apply all bt full" -ex "quit" $executable_name $core_file_name
96-
echo ----- end =============== GDB Output =======================================================
73+
# Delete the old one
74+
rm $core_file_name
9775
}
9876

99-
function print_info_from_core_file {
100-
local core_file_name=$1
101-
local executable_name=$RUNTIME_PATH/$2
102-
103-
if ! [ -e $executable_name ]; then
104-
echo "Unable to find executable $executable_name"
105-
return
106-
elif ! [ -e $core_file_name ]; then
107-
echo "Unable to find core file $core_file_name"
108-
return
77+
xunitlogchecker_exit_code=0
78+
function invoke_xunitlogchecker {
79+
local dump_folder=$1
80+
81+
total_dumps=$(find $dump_folder -name "*.dmp" | wc -l)
82+
83+
if [[ $total_dumps > 0 ]]; then
84+
echo "Total dumps found in $dump_folder: $total_dumps"
85+
xunitlogchecker_file_name="$HELIX_CORRELATION_PAYLOAD/XUnitLogChecker.dll"
86+
dotnet_file_name="$RUNTIME_PATH/dotnet"
87+
88+
if [[ ! -f $dotnet_file_name ]]; then
89+
echo "'$dotnet_file_name' was not found. Unable to run XUnitLogChecker."
90+
xunitlogchecker_exit_code=1
91+
elif [[ ! -f $xunitlogchecker_file_name ]]; then
92+
echo "'$xunitlogchecker_file_name' was not found. Unable to print dump file contents."
93+
xunitlogchecker_exit_code=2
94+
elif [[ ! -d $dump_folder ]]; then
95+
echo "The dump directory '$dump_folder' does not exist."
96+
else
97+
echo "Executing XUnitLogChecker in $dump_folder..."
98+
cmd="$dotnet_file_name --roll-forward Major $xunitlogchecker_file_name --dumps-path $dump_folder"
99+
echo "$cmd"
100+
$cmd
101+
xunitlogchecker_exit_code=$?
102+
fi
103+
else
104+
echo "No dumps found in $dump_folder."
109105
fi
110-
echo "Printing info from core file $core_file_name"
111-
print_info_from_core_file_using_gdb $core_file_name $executable_name
112-
print_info_from_core_file_using_lldb $core_file_name $executable_name
113-
}
114-
115-
function copy_core_file_to_temp_location {
116-
local core_file_name=$1
117-
118-
local storage_location="/tmp/coredumps"
119-
120-
# Create the directory (this shouldn't fail even if it already exists).
121-
mkdir -p $storage_location
122-
123-
local new_location=$storage_location/core.$RANDOM
124-
125-
echo "Copying core file $core_file_name to $new_location in case you need it."
126-
cp $core_file_name $new_location
127106
}
128107

129108
# ========================= BEGIN Core File Setup ============================
130-
if [[ "$(uname -s)" == "Darwin" ]]; then
109+
system_name="$(uname -s)"
110+
if [[ $system_name == "Darwin" ]]; then
131111
# On OS X, we will enable core dump generation only if there are no core
132112
# files already in /cores/ at this point. This is being done to prevent
133113
# inadvertently flooding the CI machines with dumps.
@@ -140,21 +120,11 @@ if [[ "$(uname -s)" == "Darwin" ]]; then
140120
# https://github.com/dotnet/core-eng/issues/15597
141121
ulimit -c 0
142122
fi
143-
144-
elif [[ "$(uname -s)" == "Linux" ]]; then
145-
# On Linux, we'll enable core file generation unconditionally, and if a dump
146-
# is generated, we will print some useful information from it and delete the
147-
# dump immediately.
148-
149-
if [ -e /proc/self/coredump_filter ]; then
150-
# Include memory in private and shared file-backed mappings in the dump.
151-
# This ensures that we can see disassembly from our shared libraries when
152-
# inspecting the contents of the dump. See 'man core' for details.
153-
echo -n 0x3F > /proc/self/coredump_filter
154-
fi
155-
156-
ulimit -c unlimited
157123
fi
124+
125+
export DOTNET_DbgEnableMiniDump=1
126+
export DOTNET_EnableCrashReport=1
127+
export DOTNET_DbgMiniDumpName=$HELIX_DUMP_FOLDER/coredump.%d.dmp
158128
# ========================= END Core File Setup ==============================
159129

160130
# ========================= BEGIN support for SuperPMI collection ==============================
@@ -171,7 +141,7 @@ if [ ! -z $spmi_enable_collection ]; then
171141
fi
172142
mkdir -p $spmi_collect_dir
173143
export spmi_file_extension=so
174-
if [[ "$(uname -s)" == "Darwin" ]]; then
144+
if [[ $system_name == "Darwin" ]]; then
175145
export spmi_file_extension=dylib
176146
fi
177147
export SuperPMIShimLogPath=$spmi_collect_dir
@@ -220,51 +190,59 @@ if [[ $test_exitcode -ne 0 ]]; then
220190
echo ulimit -c value: $(ulimit -c)
221191
fi
222192

223-
if [[ "$(uname -s)" == "Linux" && $test_exitcode -ne 0 ]]; then
224-
if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then
225-
226-
# For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint
227-
if [[ $test_exitcode -ne 1 ]]; then
228-
dmesg | tail -50
229-
fi
230-
231-
have_sleep=$(which sleep)
232-
if [ -x "$have_sleep" ]; then
233-
echo Waiting a few seconds for any dump to be written..
234-
sleep 10s
235-
fi
236-
fi
237-
193+
if [[ $system_name == "Linux" && $test_exitcode -ne 0 ]]; then
238194
echo cat /proc/sys/kernel/core_pattern: $(cat /proc/sys/kernel/core_pattern)
239195
echo cat /proc/sys/kernel/core_uses_pid: $(cat /proc/sys/kernel/core_uses_pid)
240196
echo cat /proc/sys/kernel/coredump_filter: $(cat /proc/sys/kernel/coredump_filter)
241197

242-
echo Looking around for any Linux dump..
243-
244198
# Depending on distro/configuration, the core files may either be named "core"
245199
# or "core.<PID>" by default. We read /proc/sys/kernel/core_uses_pid to
246200
# determine which it is.
247201
core_name_uses_pid=0
248202
if [[ -e /proc/sys/kernel/core_uses_pid && "1" == $(cat /proc/sys/kernel/core_uses_pid) ]]; then
249203
core_name_uses_pid=1
250204
fi
205+
206+
# The osx dumps are too large to egress the machine
207+
echo Looking around for any Linux dumps...
251208

252209
if [[ "$core_name_uses_pid" == "1" ]]; then
253210
# We don't know what the PID of the process was, so let's look at all core
254211
# files whose name matches core.NUMBER
255-
echo Looking for files matching core.* ...
256-
for f in core.*; do
257-
[[ $f =~ core.[0-9]+ ]] && print_info_from_core_file "$f" "dotnet" && copy_core_file_to_temp_location "$f" && rm "$f"
212+
echo "Looking for files matching core.* ..."
213+
for f in $(find . -name "core.*"); do
214+
[[ $f =~ core.[0-9]+ ]] && move_core_file_to_temp_location "$f"
258215
done
259-
elif [ -f core ]; then
260-
echo found a dump named core in $EXECUTION_DIR !
261-
print_info_from_core_file "core" "dotnet"
262-
copy_core_file_to_temp_location "core"
263-
rm "core"
264-
else
265-
echo ... found no dump in $PWD
266216
fi
217+
218+
if [ -f core ]; then
219+
move_core_file_to_temp_location "core"
220+
fi
221+
fi
222+
223+
if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then
224+
# For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint
225+
if [[ $test_exitcode -ne 1 ]]; then
226+
dmesg | tail -50
227+
fi
228+
267229
fi
230+
231+
if [[ -z "$__IsXUnitLogCheckerSupported" ]]; then
232+
echo "The '__IsXUnitLogCheckerSupported' env var is not set."
233+
elif [[ "$__IsXUnitLogCheckerSupported" != "1" ]]; then
234+
echo "XUnitLogChecker not supported for this test case. Skipping."
235+
else
236+
echo ----- start =============== XUnitLogChecker Output =====================================================
237+
238+
invoke_xunitlogchecker "$HELIX_DUMP_FOLDER"
239+
240+
if [[ $xunitlogchecker_exit_code -ne 0 ]]; then
241+
test_exitcode=$xunitlogchecker_exit_code
242+
fi
243+
echo ----- end =============== XUnitLogChecker Output - exit code $xunitlogchecker_exit_code ===========================
244+
fi
245+
268246
popd >/dev/null
269247
# ======================== END Core File Inspection ==========================
270248
# The helix work item should not exit with non-zero if tests ran and produced results

0 commit comments

Comments
 (0)