From 3e671cc5ce6612d8a67495a107df5ff8091113ea Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Sun, 6 Dec 2020 10:08:26 +0100 Subject: pkg/cover: split into ELF-dependent/independent parts --- pkg/cover/backend/backend.go | 35 ++++ pkg/cover/backend/elf.go | 434 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 469 insertions(+) create mode 100644 pkg/cover/backend/backend.go create mode 100644 pkg/cover/backend/elf.go (limited to 'pkg/cover/backend') diff --git a/pkg/cover/backend/backend.go b/pkg/cover/backend/backend.go new file mode 100644 index 000000000..c99e40893 --- /dev/null +++ b/pkg/cover/backend/backend.go @@ -0,0 +1,35 @@ +// Copyright 2020 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package backend + +import ( + "github.com/google/syzkaller/pkg/symbolizer" + "github.com/google/syzkaller/sys/targets" +) + +type Impl struct { + Units []*CompileUnit + Symbols []*Symbol + Frames []symbolizer.Frame + Symbolize func(pcs []uint64) ([]symbolizer.Frame, error) +} + +type CompileUnit struct { + Name string + Path string + PCs []uint64 +} + +type Symbol struct { + Unit *CompileUnit + Name string + Start uint64 + End uint64 + PCs []uint64 + Symbolized bool +} + +func Make(target *targets.Target, kernelObject, srcDir, buildDir string) (*Impl, error) { + return makeELF(target, kernelObject, srcDir, buildDir) +} diff --git a/pkg/cover/backend/elf.go b/pkg/cover/backend/elf.go new file mode 100644 index 000000000..1e46578ed --- /dev/null +++ b/pkg/cover/backend/elf.go @@ -0,0 +1,434 @@ +// Copyright 2020 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package backend + +import ( + "bufio" + "bytes" + "debug/dwarf" + "debug/elf" + "encoding/binary" + "fmt" + "io/ioutil" + "runtime" + "sort" + "strconv" + + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/pkg/symbolizer" + "github.com/google/syzkaller/sys/targets" +) + +func makeELF(target *targets.Target, kernelObject, srcDir, buildDir string) (*Impl, error) { + file, err := elf.Open(kernelObject) + if err != nil { + return nil, err + } + var coverPoints []uint64 + var symbols []*Symbol + errc := make(chan error, 1) + go func() { + var err error + var tracePC uint64 + symbols, tracePC, err = readSymbols(file) + if err != nil { + errc <- err + return + } + if target.Arch == targets.AMD64 { + coverPoints, err = readCoverPoints(file, tracePC) + } else { + coverPoints, err = objdump(target, kernelObject) + } + errc <- err + }() + ranges, units, err := readTextRanges(file) + if err != nil { + return nil, err + } + if err := <-errc; err != nil { + return nil, err + } + if len(coverPoints) == 0 { + return nil, fmt.Errorf("%v doesn't contain coverage callbacks (set CONFIG_KCOV=y)", kernelObject) + } + symbols = buildSymbols(symbols, ranges, coverPoints) + nunit := 0 + for _, unit := range units { + if len(unit.PCs) == 0 { + continue // drop the unit + } + units[nunit] = unit + nunit++ + } + units = units[:nunit] + if len(symbols) == 0 || len(units) == 0 { + return nil, fmt.Errorf("failed to parse DWARF (set CONFIG_DEBUG_INFO=y?)") + } + impl := &Impl{ + Units: units, + Symbols: symbols, + Symbolize: func(pcs []uint64) ([]symbolizer.Frame, error) { + return symbolize(target, kernelObject, pcs) + }, + } + return impl, nil +} + +type pcRange struct { + start uint64 + end uint64 + unit *CompileUnit +} + +func buildSymbols(symbols []*Symbol, ranges []pcRange, coverPoints []uint64) []*Symbol { + // Assign coverage point PCs to symbols. + // Both symbols and coverage points are sorted, so we do it one pass over both. + var curSymbol *Symbol + firstSymbolPC, symbolIdx := -1, 0 + for i := 0; i < len(coverPoints); i++ { + pc := coverPoints[i] + for ; symbolIdx < len(symbols) && pc >= symbols[symbolIdx].End; symbolIdx++ { + } + var symb *Symbol + if symbolIdx < len(symbols) && pc >= symbols[symbolIdx].Start && pc < symbols[symbolIdx].End { + symb = symbols[symbolIdx] + } + if curSymbol != nil && curSymbol != symb { + curSymbol.PCs = coverPoints[firstSymbolPC:i] + firstSymbolPC = -1 + } + curSymbol = symb + if symb != nil && firstSymbolPC == -1 { + firstSymbolPC = i + } + } + if curSymbol != nil { + curSymbol.PCs = coverPoints[firstSymbolPC:] + } + // Assign compile units to symbols based on unit pc ranges. + // Do it one pass as both are sorted. + nsymbol := 0 + rangeIndex := 0 + for _, s := range symbols { + for ; rangeIndex < len(ranges) && ranges[rangeIndex].end <= s.Start; rangeIndex++ { + } + if rangeIndex == len(ranges) || s.Start < ranges[rangeIndex].start || len(s.PCs) == 0 { + continue // drop the symbol + } + unit := ranges[rangeIndex].unit + s.Unit = unit + symbols[nsymbol] = s + nsymbol++ + } + symbols = symbols[:nsymbol] + + for _, s := range symbols { + pos := len(s.Unit.PCs) + s.Unit.PCs = append(s.Unit.PCs, s.PCs...) + s.PCs = s.Unit.PCs[pos:] + } + return symbols +} + +func readSymbols(file *elf.File) ([]*Symbol, uint64, error) { + text := file.Section(".text") + if text == nil { + return nil, 0, fmt.Errorf("no .text section in the object file") + } + allSymbols, err := file.Symbols() + if err != nil { + return nil, 0, fmt.Errorf("failed to read ELF symbols: %v", err) + } + var tracePC uint64 + var symbols []*Symbol + for _, symb := range allSymbols { + if symb.Value < text.Addr || symb.Value+symb.Size > text.Addr+text.Size { + continue + } + symbols = append(symbols, &Symbol{ + Name: symb.Name, + Start: symb.Value, + End: symb.Value + symb.Size, + }) + if tracePC == 0 && symb.Name == "__sanitizer_cov_trace_pc" { + tracePC = symb.Value + } + } + if tracePC == 0 { + return nil, 0, fmt.Errorf("no __sanitizer_cov_trace_pc symbol in the object file") + } + sort.Slice(symbols, func(i, j int) bool { + return symbols[i].Start < symbols[j].Start + }) + return symbols, tracePC, nil +} + +func readTextRanges(file *elf.File) ([]pcRange, []*CompileUnit, error) { + text := file.Section(".text") + if text == nil { + return nil, nil, fmt.Errorf("no .text section in the object file") + } + kaslr := file.Section(".rela.text") != nil + debugInfo, err := file.DWARF() + if err != nil { + return nil, nil, fmt.Errorf("failed to parse DWARF: %v (set CONFIG_DEBUG_INFO=y?)", err) + } + var ranges []pcRange + var units []*CompileUnit + for r := debugInfo.Reader(); ; { + ent, err := r.Next() + if err != nil { + return nil, nil, err + } + if ent == nil { + break + } + if ent.Tag != dwarf.TagCompileUnit { + return nil, nil, fmt.Errorf("found unexpected tag %v on top level", ent.Tag) + } + attrName := ent.Val(dwarf.AttrName) + if attrName == nil { + continue + } + unit := &CompileUnit{ + Name: attrName.(string), + } + units = append(units, unit) + ranges1, err := debugInfo.Ranges(ent) + if err != nil { + return nil, nil, err + } + for _, r := range ranges1 { + if r[0] >= r[1] || r[0] < text.Addr || r[1] > text.Addr+text.Size { + if kaslr { + // Linux kernel binaries with CONFIG_RANDOMIZE_BASE=y are strange. + // .text starts at 0xffffffff81000000 and symbols point there as well, + // but PC ranges point to addresses around 0. + // So try to add text offset and retry the check. + // It's unclear if we also need some offset on top of text.Addr, + // it gives approximately correct addresses, but not necessary precisely + // correct addresses. + r[0] += text.Addr + r[1] += text.Addr + if r[0] >= r[1] || r[0] < text.Addr || r[1] > text.Addr+text.Size { + continue + } + } + } + ranges = append(ranges, pcRange{r[0], r[1], unit}) + } + r.SkipChildren() + } + sort.Slice(ranges, func(i, j int) bool { + return ranges[i].start < ranges[j].start + }) + return ranges, units, nil +} + +func symbolize(target *targets.Target, obj string, pcs []uint64) ([]symbolizer.Frame, error) { + procs := runtime.GOMAXPROCS(0) / 2 + if need := len(pcs) / 1000; procs > need { + procs = need + } + const ( + minProcs = 1 + maxProcs = 4 + ) + // addr2line on a beefy vmlinux takes up to 1.6GB of RAM, so don't create too many of them. + if procs > maxProcs { + procs = maxProcs + } + if procs < minProcs { + procs = minProcs + } + type symbolizerResult struct { + frames []symbolizer.Frame + err error + } + symbolizerC := make(chan symbolizerResult, procs) + pcchan := make(chan []uint64, procs) + for p := 0; p < procs; p++ { + go func() { + symb := symbolizer.NewSymbolizer(target) + defer symb.Close() + var res symbolizerResult + for pcs := range pcchan { + frames, err := symb.SymbolizeArray(obj, pcs) + if err != nil { + res.err = fmt.Errorf("failed to symbolize: %v", err) + } + res.frames = append(res.frames, frames...) + } + symbolizerC <- res + }() + } + for i := 0; i < len(pcs); { + end := i + 100 + if end > len(pcs) { + end = len(pcs) + } + pcchan <- pcs[i:end] + i = end + } + close(pcchan) + var err0 error + var frames []symbolizer.Frame + for p := 0; p < procs; p++ { + res := <-symbolizerC + if res.err != nil { + err0 = res.err + } + frames = append(frames, res.frames...) + } + if err0 != nil { + return nil, err0 + } + return frames, nil +} + +// readCoverPoints finds all coverage points (calls of __sanitizer_cov_trace_pc) in the object file. +// Currently it is amd64-specific: looks for e8 opcode and correct offset. +// Running objdump on the whole object file is too slow. +func readCoverPoints(file *elf.File, tracePC uint64) ([]uint64, error) { + text := file.Section(".text") + if text == nil { + return nil, fmt.Errorf("no .text section in the object file") + } + data, err := text.Data() + if err != nil { + return nil, fmt.Errorf("failed to read .text: %v", err) + } + var pcs []uint64 + const callLen = 5 + end := len(data) - callLen + 1 + for i := 0; i < end; i++ { + pos := bytes.IndexByte(data[i:end], 0xe8) + if pos == -1 { + break + } + pos += i + i = pos + off := uint64(int64(int32(binary.LittleEndian.Uint32(data[pos+1:])))) + pc := text.Addr + uint64(pos) + target := pc + off + callLen + if target == tracePC { + pcs = append(pcs, pc) + } + } + return pcs, nil +} + +// objdump is an old, slow way of finding coverage points. +// amd64 uses faster option of parsing binary directly (readCoverPoints). +// TODO: use the faster approach for all other arches and drop this. +func objdump(target *targets.Target, obj string) ([]uint64, error) { + cmd := osutil.Command(target.Objdump, "-d", "--no-show-raw-insn", obj) + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + defer stdout.Close() + stderr, err := cmd.StderrPipe() + if err != nil { + return nil, err + } + defer stderr.Close() + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to run objdump on %v: %v", obj, err) + } + defer func() { + cmd.Process.Kill() + cmd.Wait() + }() + s := bufio.NewScanner(stdout) + callInsns, traceFuncs := archCallInsn(target) + var pcs []uint64 + for s.Scan() { + if pc := parseLine(callInsns, traceFuncs, s.Bytes()); pc != 0 { + pcs = append(pcs, pc) + } + } + stderrOut, _ := ioutil.ReadAll(stderr) + if err := cmd.Wait(); err != nil { + return nil, fmt.Errorf("failed to run objdump on %v: %v\n%s", obj, err, stderrOut) + } + if err := s.Err(); err != nil { + return nil, fmt.Errorf("failed to run objdump on %v: %v\n%s", obj, err, stderrOut) + } + return pcs, nil +} + +func parseLine(callInsns, traceFuncs [][]byte, ln []byte) uint64 { + pos := -1 + for _, callInsn := range callInsns { + if pos = bytes.Index(ln, callInsn); pos != -1 { + break + } + } + if pos == -1 { + return 0 + } + hasCall := false + for _, traceFunc := range traceFuncs { + if hasCall = bytes.Contains(ln[pos:], traceFunc); hasCall { + break + } + } + if !hasCall { + return 0 + } + for len(ln) != 0 && ln[0] == ' ' { + ln = ln[1:] + } + colon := bytes.IndexByte(ln, ':') + if colon == -1 { + return 0 + } + pc, err := strconv.ParseUint(string(ln[:colon]), 16, 64) + if err != nil { + return 0 + } + return pc +} + +func archCallInsn(target *targets.Target) ([][]byte, [][]byte) { + callName := [][]byte{[]byte(" <__sanitizer_cov_trace_pc>")} + switch target.Arch { + case targets.I386: + // c1000102: call c10001f0 <__sanitizer_cov_trace_pc> + return [][]byte{[]byte("\tcall ")}, callName + case targets.ARM64: + // ffff0000080d9cc0: bl ffff00000820f478 <__sanitizer_cov_trace_pc> + return [][]byte{[]byte("\tbl\t")}, callName + case targets.ARM: + // 8010252c: bl 801c3280 <__sanitizer_cov_trace_pc> + return [][]byte{[]byte("\tbl\t")}, callName + case targets.PPC64LE: + // c00000000006d904: bl c000000000350780 <.__sanitizer_cov_trace_pc> + // This is only known to occur in the test: + // 838: bl 824 <__sanitizer_cov_trace_pc+0x8> + // This occurs on PPC64LE: + // c0000000001c21a8: bl c0000000002df4a0 <__sanitizer_cov_trace_pc> + return [][]byte{[]byte("\tbl ")}, [][]byte{ + []byte("<__sanitizer_cov_trace_pc>"), + []byte("<__sanitizer_cov_trace_pc+0x8>"), + []byte(" <.__sanitizer_cov_trace_pc>"), + } + case targets.MIPS64LE: + // ffffffff80100420: jal ffffffff80205880 <__sanitizer_cov_trace_pc> + // This is only known to occur in the test: + // b58: bal b30 <__sanitizer_cov_trace_pc> + return [][]byte{[]byte("\tjal\t"), []byte("\tbal\t")}, callName + case targets.S390x: + // 1001de: brasl %r14,2bc090 <__sanitizer_cov_trace_pc> + return [][]byte{[]byte("\tbrasl\t")}, callName + case targets.RiscV64: + // ffffffe000200018: jal ra,ffffffe0002935b0 <__sanitizer_cov_trace_pc> + // ffffffe0000010da: jalr 1242(ra) # ffffffe0002935b0 <__sanitizer_cov_trace_pc> + return [][]byte{[]byte("\tjal\t"), []byte("\tjalr\t")}, callName + default: + panic(fmt.Sprintf("unknown arch %q", target.Arch)) + } +} -- cgit mrf-deployment