Skip to content

Commit

Permalink
Add support for LLVM objdump (google#534)
Browse files Browse the repository at this point in the history
Add support for LLVM objdump.
  • Loading branch information
kalyanac authored and Gabriel Marin committed Dec 17, 2020
1 parent d4d9798 commit a750844
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 20 deletions.
88 changes: 83 additions & 5 deletions internal/binutils/binutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ import (
"debug/elf"
"debug/macho"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"

Expand All @@ -39,6 +41,8 @@ type Binutils struct {
rep *binrep
}

var objdumpLLVMVerRE = regexp.MustCompile(`LLVM version (?:(\d*)\.(\d*)\.(\d*)|.*(trunk).*)`)

// binrep is an immutable representation for Binutils. It is atomically
// replaced on every mutation to provide thread-safe access.
type binrep struct {
Expand All @@ -51,6 +55,7 @@ type binrep struct {
nmFound bool
objdump string
objdumpFound bool
isLLVMObjdump bool

// if fast, perform symbolization using nm (symbol names only),
// instead of file-line detail from the slower addr2line.
Expand Down Expand Up @@ -140,7 +145,77 @@ func initTools(b *binrep, config string) {
b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
}
b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
}

// findObjdump finds and returns path to preferred objdump binary.
// Order of preference is: llvm-objdump, objdump.
// On MacOS only, also looks for gobjdump with least preference.
// Accepts a list of paths and returns:
// a string with path to the preferred objdump binary if found,
// or an empty string if not found;
// a boolean if any acceptable objdump was found;
// a boolen indicating if it is an LLVM objdump.
func findObjdump(paths []string) (string, bool, bool) {
objdumpNames := []string{"llvm-objdump", "objdump"}
if runtime.GOOS == "darwin" {
objdumpNames = append(objdumpNames, "gobjdump")
}

for _, objdumpName := range objdumpNames {
if objdump, objdumpFound := findExe(objdumpName, paths); objdumpFound {
cmdOut, err := exec.Command(objdump, "--version").Output()
if err != nil {
continue
}
if isLLVMObjdump(string(cmdOut)) {
return objdump, true, true
}
if isBuObjdump(string(cmdOut)) {
return objdump, true, false
}
}
}
return "", false, false
}

// isLLVMObjdump accepts a string with path to an objdump binary,
// and returns a boolean indicating if the given binary is an LLVM
// objdump binary of an acceptable version.
func isLLVMObjdump(output string) bool {
fields := objdumpLLVMVerRE.FindStringSubmatch(output)
if len(fields) != 5 {
return false
}
if fields[4] == "trunk" {
return true
}
verMajor, err := strconv.Atoi(fields[1])
if err != nil {
return false
}
verPatch, err := strconv.Atoi(fields[3])
if err != nil {
return false
}
if runtime.GOOS == "linux" && verMajor >= 8 {
// Ensure LLVM objdump is at least version 8.0 on Linux.
// Some flags, like --demangle, and double dashes for options are
// not supported by previous versions.
return true
}
if runtime.GOOS == "darwin" {
// Ensure LLVM objdump is at least version 10.0.1 on MacOS.
return verMajor > 10 || (verMajor == 10 && verPatch >= 1)
}
return false
}

// isBuObjdump accepts a string with path to an objdump binary,
// and returns a boolean indicating if the given binary is a GNU
// binutils objdump binary. No version check is performed.
func isBuObjdump(output string) bool {
return strings.Contains(output, "GNU objdump") && strings.Contains(output, "Binutils")
}

// findExe looks for an executable command on a set of paths.
Expand All @@ -159,13 +234,16 @@ func findExe(cmd string, paths []string) (string, bool) {
// of a binary.
func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
b := bu.get()
args := []string{"-d", "-C", "--no-show-raw-insn", "-l",
fmt.Sprintf("--start-address=%#x", start),
if !b.objdumpFound {
return nil, errors.New("cannot disasm: no objdump tool available")
}
args := []string{"--disassemble-all", "--demangle", "--no-show-raw-insn",
"--line-numbers", fmt.Sprintf("--start-address=%#x", start),
fmt.Sprintf("--stop-address=%#x", end)}

if intelSyntax {
if runtime.GOOS == "darwin" {
args = append(args, "-x86-asm-syntax=intel")
if b.isLLVMObjdump {
args = append(args, "--x86-asm-syntax=intel")
} else {
args = append(args, "-M", "intel")
}
Expand Down
108 changes: 105 additions & 3 deletions internal/binutils/binutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,25 @@ func skipUnlessDarwinAmd64(t *testing.T) {
}

func testDisasm(t *testing.T, intelSyntax bool) {
_, llvmObjdump, buObjdump := findObjdump([]string{""})
if !(llvmObjdump || buObjdump) {
t.Skip("cannot disasm: no objdump tool available")
}

bu := &Binutils{}
insts, err := bu.Disasm(filepath.Join("testdata", "exe_linux_64"), 0, math.MaxUint64, intelSyntax)
testexe := "exe_linux_64"
if runtime.GOOS == "darwin" {
testexe = "exe_mac_64"
}

insts, err := bu.Disasm(filepath.Join("testdata", testexe), 0, math.MaxUint64, intelSyntax)
if err != nil {
t.Fatalf("Disasm: unexpected error %v", err)
}
mainCount := 0
for _, x := range insts {
if x.Function == "main" {
// Mac symbols have a leading underscore.
if x.Function == "main" || x.Function == "_main" {
mainCount++
}
}
Expand All @@ -206,7 +217,9 @@ func testDisasm(t *testing.T, intelSyntax bool) {
}

func TestDisasm(t *testing.T) {
skipUnlessLinuxAmd64(t)
if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
t.Skip("This test only works on Linux or Mac")
}
testDisasm(t, true)
testDisasm(t, false)
}
Expand Down Expand Up @@ -401,3 +414,92 @@ func TestOpenMalformedMachO(t *testing.T) {
t.Errorf("Open: got %v, want error containing 'Mach-O'", err)
}
}

func TestObjdumpVersionChecks(t *testing.T) {
// Test that the objdump version strings are parsed properly.
type testcase struct {
desc string
os string
ver string
want bool
}

for _, tc := range []testcase{
{
desc: "Valid Apple LLVM version string with usable version",
os: "darwin",
ver: "Apple LLVM version 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
want: true,
},
{
desc: "Valid Apple LLVM version string with unusable version",
os: "darwin",
ver: "Apple LLVM version 10.0.0 (clang-1000.11.45.5)\nOptimized build.",
want: false,
},
{
desc: "Invalid Apple LLVM version string with usable version",
os: "darwin",
ver: "Apple LLVM versions 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
want: false,
},
{
desc: "Valid LLVM version string with usable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM version 9.0.1\n\nOptimized build.",
want: true,
},
{
desc: "Valid LLVM version string with unusable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM version 6.0.1\n\nOptimized build.",
want: false,
},
{
desc: "Invalid LLVM version string with usable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM versions 9.0.1\n\nOptimized build.",
want: false,
},
{
desc: "Valid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nLLVM version custom-trunk 124ffeb592a00bfe\nOptimized build.",
want: true,
},
{
desc: "Invalid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nLLVM version custom-trank 124ffeb592a00bfe\nOptimized build.",
want: false,
},
{
desc: "Invalid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nllvm version custom-trunk 124ffeb592a00bfe\nOptimized build.",
want: false,
},
} {
if runtime.GOOS == tc.os {
if got := isLLVMObjdump(tc.ver); got != tc.want {
t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
}
}
}
for _, tc := range []testcase{
{
desc: "Valid GNU objdump version string",
ver: "GNU objdump (GNU Binutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
want: true,
},
{
desc: "Invalid GNU objdump version string",
ver: "GNU objdump (GNU Banutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
want: false,
},
} {
if got := isBuObjdump(tc.ver); got != tc.want {
t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
}
}
}
14 changes: 10 additions & 4 deletions internal/binutils/disasm.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ import (
)

var (
nmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
objdumpAsmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
objdumpOutputFileLine = regexp.MustCompile(`^(.*):([0-9]+)`)
objdumpOutputFunction = regexp.MustCompile(`^(\S.*)\(\):`)
nmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
objdumpAsmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
objdumpOutputFileLine = regexp.MustCompile(`^;?\s?(.*):([0-9]+)`)
objdumpOutputFunction = regexp.MustCompile(`^;?\s?(\S.*)\(\):`)
objdumpOutputFunctionLLVM = regexp.MustCompile(`^([[:xdigit:]]+)?\s?(.*):`)
)

func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
Expand Down Expand Up @@ -143,6 +144,11 @@ func disassemble(asm []byte) ([]plugin.Inst, error) {
if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
function = fields[1]
continue
} else {
if fields := objdumpOutputFunctionLLVM.FindStringSubmatch(input); len(fields) == 3 {
function = fields[2]
continue
}
}
// Reset on unrecognized lines.
function, file, line = "", "", 0
Expand Down
24 changes: 16 additions & 8 deletions internal/binutils/disasm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,7 @@ func TestFunctionAssembly(t *testing.T) {
testcases := []testcase{
{
plugin.Sym{Name: []string{"symbol1"}, Start: 0x1000, End: 0x1FFF},
` 1000: instruction one
1001: instruction two
1002: instruction three
1003: instruction four
`,
" 1000: instruction one\n 1001: instruction two\n 1002: instruction three\n 1003: instruction four",
[]plugin.Inst{
{Addr: 0x1000, Text: "instruction one"},
{Addr: 0x1001, Text: "instruction two"},
Expand All @@ -124,14 +120,26 @@ func TestFunctionAssembly(t *testing.T) {
},
{
plugin.Sym{Name: []string{"symbol2"}, Start: 0x2000, End: 0x2FFF},
` 2000: instruction one
2001: instruction two
`,
" 2000: instruction one\n 2001: instruction two",
[]plugin.Inst{
{Addr: 0x2000, Text: "instruction one"},
{Addr: 0x2001, Text: "instruction two"},
},
},
{
plugin.Sym{Name: []string{"_main"}, Start: 0x30000, End: 0x3FFF},
"_main:\n; /tmp/hello.c:3\n30001: push %rbp",
[]plugin.Inst{
{Addr: 0x30001, Text: "push %rbp", Function: "_main", File: "/tmp/hello.c", Line: 3},
},
},
{
plugin.Sym{Name: []string{"main"}, Start: 0x4000, End: 0x4FFF},
"000000000040052d <main>:\nmain():\n/tmp/hello.c:3\n40001: push %rbp",
[]plugin.Inst{
{Addr: 0x40001, Text: "push %rbp", Function: "main", File: "/tmp/hello.c", Line: 3},
},
},
}

for _, tc := range testcases {
Expand Down

0 comments on commit a750844

Please sign in to comment.