Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for LLVM objdump #534

Merged
merged 8 commits into from
Jun 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 83 additions & 5 deletions internal/binutils/binutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ import (
"debug/elf"
"debug/macho"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"

Expand All @@ -39,6 +41,8 @@ type Binutils struct {
rep *binrep
}

var objdumpLLVMVerRE = regexp.MustCompile(`LLVM version (?:(\d*)\.(\d*)\.(\d*)|.*(trunk).*)`)

// binrep is an immutable representation for Binutils. It is atomically
// replaced on every mutation to provide thread-safe access.
type binrep struct {
Expand All @@ -51,6 +55,7 @@ type binrep struct {
nmFound bool
objdump string
objdumpFound bool
isLLVMObjdump bool

// if fast, perform symbolization using nm (symbol names only),
// instead of file-line detail from the slower addr2line.
Expand Down Expand Up @@ -140,7 +145,77 @@ func initTools(b *binrep, config string) {
b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
}
b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
}

// findObjdump finds and returns path to preferred objdump binary.
// Order of preference is: llvm-objdump, objdump.
// On MacOS only, also looks for gobjdump with least preference.
// Accepts a list of paths and returns:
// a string with path to the preferred objdump binary if found,
// or an empty string if not found;
// a boolean if any acceptable objdump was found;
// a boolen indicating if it is an LLVM objdump.
func findObjdump(paths []string) (string, bool, bool) {
objdumpNames := []string{"llvm-objdump", "objdump"}
if runtime.GOOS == "darwin" {
objdumpNames = append(objdumpNames, "gobjdump")
}

for _, objdumpName := range objdumpNames {
if objdump, objdumpFound := findExe(objdumpName, paths); objdumpFound {
cmdOut, err := exec.Command(objdump, "--version").Output()
if err != nil {
kalyanac marked this conversation as resolved.
Show resolved Hide resolved
continue
}
if isLLVMObjdump(string(cmdOut)) {
return objdump, true, true
}
if isBuObjdump(string(cmdOut)) {
return objdump, true, false
}
}
}
return "", false, false
}

// isLLVMObjdump accepts a string with path to an objdump binary,
// and returns a boolean indicating if the given binary is an LLVM
// objdump binary of an acceptable version.
func isLLVMObjdump(output string) bool {
fields := objdumpLLVMVerRE.FindStringSubmatch(output)
if len(fields) != 5 {
return false
}
if fields[4] == "trunk" {
return true
}
verMajor, err := strconv.Atoi(fields[1])
if err != nil {
return false
}
verPatch, err := strconv.Atoi(fields[3])
if err != nil {
return false
}
if runtime.GOOS == "linux" && verMajor >= 8 {
// Ensure LLVM objdump is at least version 8.0 on Linux.
// Some flags, like --demangle, and double dashes for options are
// not supported by previous versions.
return true
}
if runtime.GOOS == "darwin" {
// Ensure LLVM objdump is at least version 10.0.1 on MacOS.
kalyanac marked this conversation as resolved.
Show resolved Hide resolved
return verMajor > 10 || (verMajor == 10 && verPatch >= 1)
}
return false
}

// isBuObjdump accepts a string with path to an objdump binary,
// and returns a boolean indicating if the given binary is a GNU
// binutils objdump binary. No version check is performed.
func isBuObjdump(output string) bool {
return strings.Contains(output, "GNU objdump") && strings.Contains(output, "Binutils")
}

// findExe looks for an executable command on a set of paths.
Expand All @@ -159,13 +234,16 @@ func findExe(cmd string, paths []string) (string, bool) {
// of a binary.
func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
b := bu.get()
args := []string{"-d", "-C", "--no-show-raw-insn", "-l",
fmt.Sprintf("--start-address=%#x", start),
if !b.objdumpFound {
return nil, errors.New("cannot disasm: no objdump tool available")
}
args := []string{"--disassemble-all", "--demangle", "--no-show-raw-insn",
"--line-numbers", fmt.Sprintf("--start-address=%#x", start),
fmt.Sprintf("--stop-address=%#x", end)}

if intelSyntax {
if runtime.GOOS == "darwin" {
args = append(args, "-x86-asm-syntax=intel")
if b.isLLVMObjdump {
args = append(args, "--x86-asm-syntax=intel")
} else {
args = append(args, "-M", "intel")
}
Expand Down
108 changes: 105 additions & 3 deletions internal/binutils/binutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,25 @@ func skipUnlessDarwinAmd64(t *testing.T) {
}

func testDisasm(t *testing.T, intelSyntax bool) {
_, llvmObjdump, buObjdump := findObjdump([]string{""})
if !(llvmObjdump || buObjdump) {
t.Skip("cannot disasm: no objdump tool available")
}

bu := &Binutils{}
insts, err := bu.Disasm(filepath.Join("testdata", "exe_linux_64"), 0, math.MaxUint64, intelSyntax)
testexe := "exe_linux_64"
if runtime.GOOS == "darwin" {
testexe = "exe_mac_64"
}

insts, err := bu.Disasm(filepath.Join("testdata", testexe), 0, math.MaxUint64, intelSyntax)
if err != nil {
t.Fatalf("Disasm: unexpected error %v", err)
}
mainCount := 0
for _, x := range insts {
if x.Function == "main" {
// Mac symbols have a leading underscore.
if x.Function == "main" || x.Function == "_main" {
kalyanac marked this conversation as resolved.
Show resolved Hide resolved
mainCount++
}
}
Expand All @@ -206,7 +217,9 @@ func testDisasm(t *testing.T, intelSyntax bool) {
}

func TestDisasm(t *testing.T) {
skipUnlessLinuxAmd64(t)
if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
t.Skip("This test only works on Linux or Mac")
}
testDisasm(t, true)
testDisasm(t, false)
}
Expand Down Expand Up @@ -401,3 +414,92 @@ func TestOpenMalformedMachO(t *testing.T) {
t.Errorf("Open: got %v, want error containing 'Mach-O'", err)
}
}

func TestObjdumpVersionChecks(t *testing.T) {
// Test that the objdump version strings are parsed properly.
type testcase struct {
desc string
os string
ver string
want bool
}

for _, tc := range []testcase{
{
desc: "Valid Apple LLVM version string with usable version",
os: "darwin",
ver: "Apple LLVM version 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
want: true,
},
{
desc: "Valid Apple LLVM version string with unusable version",
os: "darwin",
ver: "Apple LLVM version 10.0.0 (clang-1000.11.45.5)\nOptimized build.",
want: false,
},
{
desc: "Invalid Apple LLVM version string with usable version",
os: "darwin",
ver: "Apple LLVM versions 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
want: false,
},
{
desc: "Valid LLVM version string with usable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM version 9.0.1\n\nOptimized build.",
want: true,
},
{
desc: "Valid LLVM version string with unusable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM version 6.0.1\n\nOptimized build.",
want: false,
},
{
desc: "Invalid LLVM version string with usable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM versions 9.0.1\n\nOptimized build.",
want: false,
},
{
desc: "Valid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nLLVM version custom-trunk 124ffeb592a00bfe\nOptimized build.",
want: true,
},
{
desc: "Invalid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nLLVM version custom-trank 124ffeb592a00bfe\nOptimized build.",
want: false,
},
{
desc: "Invalid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nllvm version custom-trunk 124ffeb592a00bfe\nOptimized build.",
want: false,
},
} {
if runtime.GOOS == tc.os {
if got := isLLVMObjdump(tc.ver); got != tc.want {
t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
}
}
}
for _, tc := range []testcase{
{
desc: "Valid GNU objdump version string",
ver: "GNU objdump (GNU Binutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
want: true,
},
{
desc: "Invalid GNU objdump version string",
ver: "GNU objdump (GNU Banutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
want: false,
},
} {
if got := isBuObjdump(tc.ver); got != tc.want {
t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
}
}
}
14 changes: 10 additions & 4 deletions internal/binutils/disasm.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ import (
)

var (
nmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
objdumpAsmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
objdumpOutputFileLine = regexp.MustCompile(`^(.*):([0-9]+)`)
objdumpOutputFunction = regexp.MustCompile(`^(\S.*)\(\):`)
nmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
objdumpAsmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
objdumpOutputFileLine = regexp.MustCompile(`^;?\s?(.*):([0-9]+)`)
objdumpOutputFunction = regexp.MustCompile(`^;?\s?(\S.*)\(\):`)
objdumpOutputFunctionLLVM = regexp.MustCompile(`^([[:xdigit:]]+)?\s?(.*):`)
kalyanac marked this conversation as resolved.
Show resolved Hide resolved
)

func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
Expand Down Expand Up @@ -143,6 +144,11 @@ func disassemble(asm []byte) ([]plugin.Inst, error) {
if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
function = fields[1]
continue
} else {
if fields := objdumpOutputFunctionLLVM.FindStringSubmatch(input); len(fields) == 3 {
function = fields[2]
continue
}
}
// Reset on unrecognized lines.
function, file, line = "", "", 0
Expand Down
24 changes: 16 additions & 8 deletions internal/binutils/disasm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,7 @@ func TestFunctionAssembly(t *testing.T) {
testcases := []testcase{
{
plugin.Sym{Name: []string{"symbol1"}, Start: 0x1000, End: 0x1FFF},
` 1000: instruction one
1001: instruction two
1002: instruction three
1003: instruction four
`,
" 1000: instruction one\n 1001: instruction two\n 1002: instruction three\n 1003: instruction four",
[]plugin.Inst{
{Addr: 0x1000, Text: "instruction one"},
{Addr: 0x1001, Text: "instruction two"},
Expand All @@ -124,14 +120,26 @@ func TestFunctionAssembly(t *testing.T) {
},
{
plugin.Sym{Name: []string{"symbol2"}, Start: 0x2000, End: 0x2FFF},
` 2000: instruction one
2001: instruction two
`,
" 2000: instruction one\n 2001: instruction two",
[]plugin.Inst{
{Addr: 0x2000, Text: "instruction one"},
{Addr: 0x2001, Text: "instruction two"},
},
},
{
plugin.Sym{Name: []string{"_main"}, Start: 0x30000, End: 0x3FFF},
"_main:\n; /tmp/hello.c:3\n30001: push %rbp",
[]plugin.Inst{
{Addr: 0x30001, Text: "push %rbp", Function: "_main", File: "/tmp/hello.c", Line: 3},
},
},
{
plugin.Sym{Name: []string{"main"}, Start: 0x4000, End: 0x4FFF},
"000000000040052d <main>:\nmain():\n/tmp/hello.c:3\n40001: push %rbp",
[]plugin.Inst{
{Addr: 0x40001, Text: "push %rbp", Function: "main", File: "/tmp/hello.c", Line: 3},
},
},
}

for _, tc := range testcases {
Expand Down