From 7d74efd0c7990eea8684b6150a007071b23631dd Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 9 Apr 2024 16:43:42 +0200 Subject: pkg/symbolizer: intern file/func strings Intern/deduplicate file/func strings created during symbolization. There are lots and lots of duplicates. In my local run syz-manager heap size jumps from 1.9G to 4.0G are requesting /cover?jsonl=1 without this change, and from 1.9G to 2.9G with this change. --- pkg/cover/backend/dwarf.go | 17 +++++++++-------- pkg/symbolizer/cache.go | 23 +++++++++++++++++++++++ pkg/symbolizer/symbolizer.go | 13 +++++++------ pkg/symbolizer/symbolizer_test.go | 9 +++++---- 4 files changed, 44 insertions(+), 18 deletions(-) (limited to 'pkg') diff --git a/pkg/cover/backend/dwarf.go b/pkg/cover/backend/dwarf.go index 331cab9e3..c99bd3265 100644 --- a/pkg/cover/backend/dwarf.go +++ b/pkg/cover/backend/dwarf.go @@ -225,11 +225,12 @@ func makeDWARFUnsafe(params *dwarfParams) (*Impl, error) { // On FreeBSD .text address in ELF is 0, but .text is actually mapped at 0xffffffff. pcBase = ^uint64(0) } + var interner symbolizer.Interner impl := &Impl{ Units: allUnits, Symbols: allSymbols, Symbolize: func(pcs map[*Module][]uint64) ([]Frame, error) { - return symbolize(target, objDir, srcDir, buildDir, splitBuildDelimiters, pcs) + return symbolize(target, &interner, objDir, srcDir, buildDir, splitBuildDelimiters, pcs) }, RestorePC: makeRestorePC(params, pcBase), CallbackPoints: allCoverPoints[0], @@ -389,8 +390,8 @@ func readTextRanges(debugInfo *dwarf.Data, module *Module, pcFix pcFixFn) ( return ranges, units, nil } -func symbolizeModule(target *targets.Target, objDir, srcDir, buildDir string, splitBuildDelimiters []string, - mod *Module, pcs []uint64) ([]Frame, error) { +func symbolizeModule(target *targets.Target, interner *symbolizer.Interner, objDir, srcDir, buildDir string, + splitBuildDelimiters []string, mod *Module, pcs []uint64) ([]Frame, error) { procs := runtime.GOMAXPROCS(0) / 2 if need := len(pcs) / 1000; procs > need { procs = need @@ -451,9 +452,9 @@ func symbolizeModule(target *targets.Target, objDir, srcDir, buildDir string, sp frames = append(frames, Frame{ Module: mod, PC: frame.PC + mod.Addr, - Name: name, + Name: interner.Do(name), FuncName: frame.Func, - Path: path, + Path: interner.Do(path), Inline: frame.Inline, Range: Range{ StartLine: frame.Line, @@ -470,11 +471,11 @@ func symbolizeModule(target *targets.Target, objDir, srcDir, buildDir string, sp return frames, nil } -func symbolize(target *targets.Target, objDir, srcDir, buildDir string, splitBuildDelimiters []string, - pcs map[*Module][]uint64) ([]Frame, error) { +func symbolize(target *targets.Target, interner *symbolizer.Interner, objDir, srcDir, buildDir string, + splitBuildDelimiters []string, pcs map[*Module][]uint64) ([]Frame, error) { var frames []Frame for mod, pcs1 := range pcs { - frames1, err := symbolizeModule(target, objDir, srcDir, buildDir, splitBuildDelimiters, mod, pcs1) + frames1, err := symbolizeModule(target, interner, objDir, srcDir, buildDir, splitBuildDelimiters, mod, pcs1) if err != nil { return nil, err } diff --git a/pkg/symbolizer/cache.go b/pkg/symbolizer/cache.go index dcade3929..370833953 100644 --- a/pkg/symbolizer/cache.go +++ b/pkg/symbolizer/cache.go @@ -4,6 +4,7 @@ package symbolizer import ( + "strings" "sync" ) @@ -40,3 +41,25 @@ func (c *Cache) Symbolize(inner func(string, uint64) ([]Frame, error), bin strin c.mu.Unlock() return frames, err } + +// Interner allows to intern/deduplicate strings. +// Interner.Do semantically returns the same string, but physically it will point +// to an existing string with the same contents (if there was one passed to Do in the past). +// Interned strings are also "cloned", that is, if the passed string points to a large +// buffer, it won't after interning (and won't prevent GC'ing of the large buffer). +// The type is not thread-safe. +type Interner struct { + m map[string]string +} + +func (in *Interner) Do(s string) string { + if in.m == nil { + in.m = make(map[string]string) + } + if interned, ok := in.m[s]; ok { + return interned + } + s = strings.Clone(s) + in.m[s] = s + return s +} diff --git a/pkg/symbolizer/symbolizer.go b/pkg/symbolizer/symbolizer.go index ad57f005e..02e6b7693 100644 --- a/pkg/symbolizer/symbolizer.go +++ b/pkg/symbolizer/symbolizer.go @@ -20,6 +20,7 @@ import ( type Symbolizer struct { target *targets.Target subprocs map[string]*subprocess + interner Interner } type Frame struct { @@ -51,7 +52,7 @@ func (s *Symbolizer) SymbolizeArray(bin string, pcs []uint64) ([]Frame, error) { if err != nil { return nil, err } - return symbolize(sub.input, sub.scanner, pcs) + return symbolize(&s.interner, sub.input, sub.scanner, pcs) } func (s *Symbolizer) Close() { @@ -100,7 +101,7 @@ func (s *Symbolizer) getSubprocess(bin string) (*subprocess, error) { return sub, nil } -func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Frame, error) { +func symbolize(interner *Interner, input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Frame, error) { var frames []Frame done := make(chan error, 1) go func() { @@ -116,7 +117,7 @@ func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Fra } for range pcs { var frames1 []Frame - frames1, err = parse(scanner) + frames1, err = parse(interner, scanner) if err != nil { return } @@ -145,7 +146,7 @@ func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Fra return frames, nil } -func parse(s *bufio.Scanner) ([]Frame, error) { +func parse(interner *Interner, s *bufio.Scanner) ([]Frame, error) { pc, err := strconv.ParseUint(s.Text(), 0, 64) if err != nil { return nil, fmt.Errorf("failed to parse pc '%v' in addr2line output: %w", s.Text(), err) @@ -183,8 +184,8 @@ func parse(s *bufio.Scanner) ([]Frame, error) { } frames = append(frames, Frame{ PC: pc, - Func: fn, - File: file, + Func: interner.Do(fn), + File: interner.Do(file), Line: line, Inline: true, }) diff --git a/pkg/symbolizer/symbolizer_test.go b/pkg/symbolizer/symbolizer_test.go index 0bddb2f71..d7e996bb0 100644 --- a/pkg/symbolizer/symbolizer_test.go +++ b/pkg/symbolizer/symbolizer_test.go @@ -150,10 +150,11 @@ func TestParse(t *testing.T) { // First, symbolize all PCs one-by-one. input := bufio.NewWriter(inputw) scanner := bufio.NewScanner(outputr) + var interner Interner var allPCs []uint64 var allFrames []Frame for _, addr := range addresses { - frames, err := symbolize(input, scanner, []uint64{addr.pc}) + frames, err := symbolize(&interner, input, scanner, []uint64{addr.pc}) if err != nil { t.Fatalf("got error: %v", err) } @@ -166,11 +167,11 @@ func TestParse(t *testing.T) { // Symbolize PCs in 2 groups. for i := 0; i <= len(addresses); i++ { - frames, err := symbolize(input, scanner, allPCs[:i]) + frames, err := symbolize(&interner, input, scanner, allPCs[:i]) if err != nil { t.Fatalf("got error: %v", err) } - frames2, err := symbolize(input, scanner, allPCs[i:]) + frames2, err := symbolize(&interner, input, scanner, allPCs[i:]) if err != nil { t.Fatalf("got error: %v", err) } @@ -185,7 +186,7 @@ func TestParse(t *testing.T) { for i := range lots { lots[i] = addresses[0].pc } - frames, err := symbolize(input, scanner, lots) + frames, err := symbolize(&interner, input, scanner, lots) if err != nil { t.Fatalf("got error: %v", err) } -- cgit mrf-deployment