aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/cover/backend
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2020-12-06 10:08:26 +0100
committerDmitry Vyukov <dvyukov@google.com>2020-12-13 18:56:36 +0100
commit3e671cc5ce6612d8a67495a107df5ff8091113ea (patch)
tree2cd9b52eb5a63d3672ae5276c8456492430d687f /pkg/cover/backend
parentbca53db974f570410921f59b8c2c59a3d263cb44 (diff)
pkg/cover: split into ELF-dependent/independent parts
Diffstat (limited to 'pkg/cover/backend')
-rw-r--r--pkg/cover/backend/backend.go35
-rw-r--r--pkg/cover/backend/elf.go434
2 files changed, 469 insertions, 0 deletions
diff --git a/pkg/cover/backend/backend.go b/pkg/cover/backend/backend.go
new file mode 100644
index 000000000..c99e40893
--- /dev/null
+++ b/pkg/cover/backend/backend.go
@@ -0,0 +1,35 @@
+// Copyright 2020 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package backend
+
+import (
+ "github.com/google/syzkaller/pkg/symbolizer"
+ "github.com/google/syzkaller/sys/targets"
+)
+
+type Impl struct {
+ Units []*CompileUnit
+ Symbols []*Symbol
+ Frames []symbolizer.Frame
+ Symbolize func(pcs []uint64) ([]symbolizer.Frame, error)
+}
+
+type CompileUnit struct {
+ Name string
+ Path string
+ PCs []uint64
+}
+
+type Symbol struct {
+ Unit *CompileUnit
+ Name string
+ Start uint64
+ End uint64
+ PCs []uint64
+ Symbolized bool
+}
+
+func Make(target *targets.Target, kernelObject, srcDir, buildDir string) (*Impl, error) {
+ return makeELF(target, kernelObject, srcDir, buildDir)
+}
diff --git a/pkg/cover/backend/elf.go b/pkg/cover/backend/elf.go
new file mode 100644
index 000000000..1e46578ed
--- /dev/null
+++ b/pkg/cover/backend/elf.go
@@ -0,0 +1,434 @@
+// Copyright 2020 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package backend
+
+import (
+ "bufio"
+ "bytes"
+ "debug/dwarf"
+ "debug/elf"
+ "encoding/binary"
+ "fmt"
+ "io/ioutil"
+ "runtime"
+ "sort"
+ "strconv"
+
+ "github.com/google/syzkaller/pkg/osutil"
+ "github.com/google/syzkaller/pkg/symbolizer"
+ "github.com/google/syzkaller/sys/targets"
+)
+
+func makeELF(target *targets.Target, kernelObject, srcDir, buildDir string) (*Impl, error) {
+ file, err := elf.Open(kernelObject)
+ if err != nil {
+ return nil, err
+ }
+ var coverPoints []uint64
+ var symbols []*Symbol
+ errc := make(chan error, 1)
+ go func() {
+ var err error
+ var tracePC uint64
+ symbols, tracePC, err = readSymbols(file)
+ if err != nil {
+ errc <- err
+ return
+ }
+ if target.Arch == targets.AMD64 {
+ coverPoints, err = readCoverPoints(file, tracePC)
+ } else {
+ coverPoints, err = objdump(target, kernelObject)
+ }
+ errc <- err
+ }()
+ ranges, units, err := readTextRanges(file)
+ if err != nil {
+ return nil, err
+ }
+ if err := <-errc; err != nil {
+ return nil, err
+ }
+ if len(coverPoints) == 0 {
+ return nil, fmt.Errorf("%v doesn't contain coverage callbacks (set CONFIG_KCOV=y)", kernelObject)
+ }
+ symbols = buildSymbols(symbols, ranges, coverPoints)
+ nunit := 0
+ for _, unit := range units {
+ if len(unit.PCs) == 0 {
+ continue // drop the unit
+ }
+ units[nunit] = unit
+ nunit++
+ }
+ units = units[:nunit]
+ if len(symbols) == 0 || len(units) == 0 {
+ return nil, fmt.Errorf("failed to parse DWARF (set CONFIG_DEBUG_INFO=y?)")
+ }
+ impl := &Impl{
+ Units: units,
+ Symbols: symbols,
+ Symbolize: func(pcs []uint64) ([]symbolizer.Frame, error) {
+ return symbolize(target, kernelObject, pcs)
+ },
+ }
+ return impl, nil
+}
+
+type pcRange struct {
+ start uint64
+ end uint64
+ unit *CompileUnit
+}
+
+func buildSymbols(symbols []*Symbol, ranges []pcRange, coverPoints []uint64) []*Symbol {
+ // Assign coverage point PCs to symbols.
+ // Both symbols and coverage points are sorted, so we do it one pass over both.
+ var curSymbol *Symbol
+ firstSymbolPC, symbolIdx := -1, 0
+ for i := 0; i < len(coverPoints); i++ {
+ pc := coverPoints[i]
+ for ; symbolIdx < len(symbols) && pc >= symbols[symbolIdx].End; symbolIdx++ {
+ }
+ var symb *Symbol
+ if symbolIdx < len(symbols) && pc >= symbols[symbolIdx].Start && pc < symbols[symbolIdx].End {
+ symb = symbols[symbolIdx]
+ }
+ if curSymbol != nil && curSymbol != symb {
+ curSymbol.PCs = coverPoints[firstSymbolPC:i]
+ firstSymbolPC = -1
+ }
+ curSymbol = symb
+ if symb != nil && firstSymbolPC == -1 {
+ firstSymbolPC = i
+ }
+ }
+ if curSymbol != nil {
+ curSymbol.PCs = coverPoints[firstSymbolPC:]
+ }
+ // Assign compile units to symbols based on unit pc ranges.
+ // Do it one pass as both are sorted.
+ nsymbol := 0
+ rangeIndex := 0
+ for _, s := range symbols {
+ for ; rangeIndex < len(ranges) && ranges[rangeIndex].end <= s.Start; rangeIndex++ {
+ }
+ if rangeIndex == len(ranges) || s.Start < ranges[rangeIndex].start || len(s.PCs) == 0 {
+ continue // drop the symbol
+ }
+ unit := ranges[rangeIndex].unit
+ s.Unit = unit
+ symbols[nsymbol] = s
+ nsymbol++
+ }
+ symbols = symbols[:nsymbol]
+
+ for _, s := range symbols {
+ pos := len(s.Unit.PCs)
+ s.Unit.PCs = append(s.Unit.PCs, s.PCs...)
+ s.PCs = s.Unit.PCs[pos:]
+ }
+ return symbols
+}
+
+func readSymbols(file *elf.File) ([]*Symbol, uint64, error) {
+ text := file.Section(".text")
+ if text == nil {
+ return nil, 0, fmt.Errorf("no .text section in the object file")
+ }
+ allSymbols, err := file.Symbols()
+ if err != nil {
+ return nil, 0, fmt.Errorf("failed to read ELF symbols: %v", err)
+ }
+ var tracePC uint64
+ var symbols []*Symbol
+ for _, symb := range allSymbols {
+ if symb.Value < text.Addr || symb.Value+symb.Size > text.Addr+text.Size {
+ continue
+ }
+ symbols = append(symbols, &Symbol{
+ Name: symb.Name,
+ Start: symb.Value,
+ End: symb.Value + symb.Size,
+ })
+ if tracePC == 0 && symb.Name == "__sanitizer_cov_trace_pc" {
+ tracePC = symb.Value
+ }
+ }
+ if tracePC == 0 {
+ return nil, 0, fmt.Errorf("no __sanitizer_cov_trace_pc symbol in the object file")
+ }
+ sort.Slice(symbols, func(i, j int) bool {
+ return symbols[i].Start < symbols[j].Start
+ })
+ return symbols, tracePC, nil
+}
+
+func readTextRanges(file *elf.File) ([]pcRange, []*CompileUnit, error) {
+ text := file.Section(".text")
+ if text == nil {
+ return nil, nil, fmt.Errorf("no .text section in the object file")
+ }
+ kaslr := file.Section(".rela.text") != nil
+ debugInfo, err := file.DWARF()
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to parse DWARF: %v (set CONFIG_DEBUG_INFO=y?)", err)
+ }
+ var ranges []pcRange
+ var units []*CompileUnit
+ for r := debugInfo.Reader(); ; {
+ ent, err := r.Next()
+ if err != nil {
+ return nil, nil, err
+ }
+ if ent == nil {
+ break
+ }
+ if ent.Tag != dwarf.TagCompileUnit {
+ return nil, nil, fmt.Errorf("found unexpected tag %v on top level", ent.Tag)
+ }
+ attrName := ent.Val(dwarf.AttrName)
+ if attrName == nil {
+ continue
+ }
+ unit := &CompileUnit{
+ Name: attrName.(string),
+ }
+ units = append(units, unit)
+ ranges1, err := debugInfo.Ranges(ent)
+ if err != nil {
+ return nil, nil, err
+ }
+ for _, r := range ranges1 {
+ if r[0] >= r[1] || r[0] < text.Addr || r[1] > text.Addr+text.Size {
+ if kaslr {
+ // Linux kernel binaries with CONFIG_RANDOMIZE_BASE=y are strange.
+ // .text starts at 0xffffffff81000000 and symbols point there as well,
+ // but PC ranges point to addresses around 0.
+ // So try to add text offset and retry the check.
+ // It's unclear if we also need some offset on top of text.Addr,
+ // it gives approximately correct addresses, but not necessary precisely
+ // correct addresses.
+ r[0] += text.Addr
+ r[1] += text.Addr
+ if r[0] >= r[1] || r[0] < text.Addr || r[1] > text.Addr+text.Size {
+ continue
+ }
+ }
+ }
+ ranges = append(ranges, pcRange{r[0], r[1], unit})
+ }
+ r.SkipChildren()
+ }
+ sort.Slice(ranges, func(i, j int) bool {
+ return ranges[i].start < ranges[j].start
+ })
+ return ranges, units, nil
+}
+
+func symbolize(target *targets.Target, obj string, pcs []uint64) ([]symbolizer.Frame, error) {
+ procs := runtime.GOMAXPROCS(0) / 2
+ if need := len(pcs) / 1000; procs > need {
+ procs = need
+ }
+ const (
+ minProcs = 1
+ maxProcs = 4
+ )
+ // addr2line on a beefy vmlinux takes up to 1.6GB of RAM, so don't create too many of them.
+ if procs > maxProcs {
+ procs = maxProcs
+ }
+ if procs < minProcs {
+ procs = minProcs
+ }
+ type symbolizerResult struct {
+ frames []symbolizer.Frame
+ err error
+ }
+ symbolizerC := make(chan symbolizerResult, procs)
+ pcchan := make(chan []uint64, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ symb := symbolizer.NewSymbolizer(target)
+ defer symb.Close()
+ var res symbolizerResult
+ for pcs := range pcchan {
+ frames, err := symb.SymbolizeArray(obj, pcs)
+ if err != nil {
+ res.err = fmt.Errorf("failed to symbolize: %v", err)
+ }
+ res.frames = append(res.frames, frames...)
+ }
+ symbolizerC <- res
+ }()
+ }
+ for i := 0; i < len(pcs); {
+ end := i + 100
+ if end > len(pcs) {
+ end = len(pcs)
+ }
+ pcchan <- pcs[i:end]
+ i = end
+ }
+ close(pcchan)
+ var err0 error
+ var frames []symbolizer.Frame
+ for p := 0; p < procs; p++ {
+ res := <-symbolizerC
+ if res.err != nil {
+ err0 = res.err
+ }
+ frames = append(frames, res.frames...)
+ }
+ if err0 != nil {
+ return nil, err0
+ }
+ return frames, nil
+}
+
+// readCoverPoints finds all coverage points (calls of __sanitizer_cov_trace_pc) in the object file.
+// Currently it is amd64-specific: looks for e8 opcode and correct offset.
+// Running objdump on the whole object file is too slow.
+func readCoverPoints(file *elf.File, tracePC uint64) ([]uint64, error) {
+ text := file.Section(".text")
+ if text == nil {
+ return nil, fmt.Errorf("no .text section in the object file")
+ }
+ data, err := text.Data()
+ if err != nil {
+ return nil, fmt.Errorf("failed to read .text: %v", err)
+ }
+ var pcs []uint64
+ const callLen = 5
+ end := len(data) - callLen + 1
+ for i := 0; i < end; i++ {
+ pos := bytes.IndexByte(data[i:end], 0xe8)
+ if pos == -1 {
+ break
+ }
+ pos += i
+ i = pos
+ off := uint64(int64(int32(binary.LittleEndian.Uint32(data[pos+1:]))))
+ pc := text.Addr + uint64(pos)
+ target := pc + off + callLen
+ if target == tracePC {
+ pcs = append(pcs, pc)
+ }
+ }
+ return pcs, nil
+}
+
+// objdump is an old, slow way of finding coverage points.
+// amd64 uses faster option of parsing binary directly (readCoverPoints).
+// TODO: use the faster approach for all other arches and drop this.
+func objdump(target *targets.Target, obj string) ([]uint64, error) {
+ cmd := osutil.Command(target.Objdump, "-d", "--no-show-raw-insn", obj)
+ stdout, err := cmd.StdoutPipe()
+ if err != nil {
+ return nil, err
+ }
+ defer stdout.Close()
+ stderr, err := cmd.StderrPipe()
+ if err != nil {
+ return nil, err
+ }
+ defer stderr.Close()
+ if err := cmd.Start(); err != nil {
+ return nil, fmt.Errorf("failed to run objdump on %v: %v", obj, err)
+ }
+ defer func() {
+ cmd.Process.Kill()
+ cmd.Wait()
+ }()
+ s := bufio.NewScanner(stdout)
+ callInsns, traceFuncs := archCallInsn(target)
+ var pcs []uint64
+ for s.Scan() {
+ if pc := parseLine(callInsns, traceFuncs, s.Bytes()); pc != 0 {
+ pcs = append(pcs, pc)
+ }
+ }
+ stderrOut, _ := ioutil.ReadAll(stderr)
+ if err := cmd.Wait(); err != nil {
+ return nil, fmt.Errorf("failed to run objdump on %v: %v\n%s", obj, err, stderrOut)
+ }
+ if err := s.Err(); err != nil {
+ return nil, fmt.Errorf("failed to run objdump on %v: %v\n%s", obj, err, stderrOut)
+ }
+ return pcs, nil
+}
+
+func parseLine(callInsns, traceFuncs [][]byte, ln []byte) uint64 {
+ pos := -1
+ for _, callInsn := range callInsns {
+ if pos = bytes.Index(ln, callInsn); pos != -1 {
+ break
+ }
+ }
+ if pos == -1 {
+ return 0
+ }
+ hasCall := false
+ for _, traceFunc := range traceFuncs {
+ if hasCall = bytes.Contains(ln[pos:], traceFunc); hasCall {
+ break
+ }
+ }
+ if !hasCall {
+ return 0
+ }
+ for len(ln) != 0 && ln[0] == ' ' {
+ ln = ln[1:]
+ }
+ colon := bytes.IndexByte(ln, ':')
+ if colon == -1 {
+ return 0
+ }
+ pc, err := strconv.ParseUint(string(ln[:colon]), 16, 64)
+ if err != nil {
+ return 0
+ }
+ return pc
+}
+
+func archCallInsn(target *targets.Target) ([][]byte, [][]byte) {
+ callName := [][]byte{[]byte(" <__sanitizer_cov_trace_pc>")}
+ switch target.Arch {
+ case targets.I386:
+ // c1000102: call c10001f0 <__sanitizer_cov_trace_pc>
+ return [][]byte{[]byte("\tcall ")}, callName
+ case targets.ARM64:
+ // ffff0000080d9cc0: bl ffff00000820f478 <__sanitizer_cov_trace_pc>
+ return [][]byte{[]byte("\tbl\t")}, callName
+ case targets.ARM:
+ // 8010252c: bl 801c3280 <__sanitizer_cov_trace_pc>
+ return [][]byte{[]byte("\tbl\t")}, callName
+ case targets.PPC64LE:
+ // c00000000006d904: bl c000000000350780 <.__sanitizer_cov_trace_pc>
+ // This is only known to occur in the test:
+ // 838: bl 824 <__sanitizer_cov_trace_pc+0x8>
+ // This occurs on PPC64LE:
+ // c0000000001c21a8: bl c0000000002df4a0 <__sanitizer_cov_trace_pc>
+ return [][]byte{[]byte("\tbl ")}, [][]byte{
+ []byte("<__sanitizer_cov_trace_pc>"),
+ []byte("<__sanitizer_cov_trace_pc+0x8>"),
+ []byte(" <.__sanitizer_cov_trace_pc>"),
+ }
+ case targets.MIPS64LE:
+ // ffffffff80100420: jal ffffffff80205880 <__sanitizer_cov_trace_pc>
+ // This is only known to occur in the test:
+ // b58: bal b30 <__sanitizer_cov_trace_pc>
+ return [][]byte{[]byte("\tjal\t"), []byte("\tbal\t")}, callName
+ case targets.S390x:
+ // 1001de: brasl %r14,2bc090 <__sanitizer_cov_trace_pc>
+ return [][]byte{[]byte("\tbrasl\t")}, callName
+ case targets.RiscV64:
+ // ffffffe000200018: jal ra,ffffffe0002935b0 <__sanitizer_cov_trace_pc>
+ // ffffffe0000010da: jalr 1242(ra) # ffffffe0002935b0 <__sanitizer_cov_trace_pc>
+ return [][]byte{[]byte("\tjal\t"), []byte("\tjalr\t")}, callName
+ default:
+ panic(fmt.Sprintf("unknown arch %q", target.Arch))
+ }
+}