diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-04-09 16:43:42 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-04-10 09:08:41 +0000 |
| commit | 7d74efd0c7990eea8684b6150a007071b23631dd (patch) | |
| tree | f2d81a026859a75da85ab2e1f4d7e6a9ab4d73f0 /pkg/symbolizer | |
| parent | bb5e6c0f355e952e91e8aef0ffb51b7b153b2e25 (diff) | |
pkg/symbolizer: intern file/func strings
Intern/deduplicate file/func strings created during symbolization.
There are lots and lots of duplicates.
In my local run syz-manager heap size jumps from 1.9G to 4.0G
are requesting /cover?jsonl=1 without this change, and from
1.9G to 2.9G with this change.
Diffstat (limited to 'pkg/symbolizer')
| -rw-r--r-- | pkg/symbolizer/cache.go | 23 | ||||
| -rw-r--r-- | pkg/symbolizer/symbolizer.go | 13 | ||||
| -rw-r--r-- | pkg/symbolizer/symbolizer_test.go | 9 |
3 files changed, 35 insertions, 10 deletions
diff --git a/pkg/symbolizer/cache.go b/pkg/symbolizer/cache.go index dcade3929..370833953 100644 --- a/pkg/symbolizer/cache.go +++ b/pkg/symbolizer/cache.go @@ -4,6 +4,7 @@ package symbolizer import ( + "strings" "sync" ) @@ -40,3 +41,25 @@ func (c *Cache) Symbolize(inner func(string, uint64) ([]Frame, error), bin strin c.mu.Unlock() return frames, err } + +// Interner allows to intern/deduplicate strings. +// Interner.Do semantically returns the same string, but physically it will point +// to an existing string with the same contents (if there was one passed to Do in the past). +// Interned strings are also "cloned", that is, if the passed string points to a large +// buffer, it won't after interning (and won't prevent GC'ing of the large buffer). +// The type is not thread-safe. +type Interner struct { + m map[string]string +} + +func (in *Interner) Do(s string) string { + if in.m == nil { + in.m = make(map[string]string) + } + if interned, ok := in.m[s]; ok { + return interned + } + s = strings.Clone(s) + in.m[s] = s + return s +} diff --git a/pkg/symbolizer/symbolizer.go b/pkg/symbolizer/symbolizer.go index ad57f005e..02e6b7693 100644 --- a/pkg/symbolizer/symbolizer.go +++ b/pkg/symbolizer/symbolizer.go @@ -20,6 +20,7 @@ import ( type Symbolizer struct { target *targets.Target subprocs map[string]*subprocess + interner Interner } type Frame struct { @@ -51,7 +52,7 @@ func (s *Symbolizer) SymbolizeArray(bin string, pcs []uint64) ([]Frame, error) { if err != nil { return nil, err } - return symbolize(sub.input, sub.scanner, pcs) + return symbolize(&s.interner, sub.input, sub.scanner, pcs) } func (s *Symbolizer) Close() { @@ -100,7 +101,7 @@ func (s *Symbolizer) getSubprocess(bin string) (*subprocess, error) { return sub, nil } -func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Frame, error) { +func symbolize(interner *Interner, input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Frame, error) { var frames []Frame done := make(chan error, 1) go func() { @@ -116,7 +117,7 @@ func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Fra } for range pcs { var frames1 []Frame - frames1, err = parse(scanner) + frames1, err = parse(interner, scanner) if err != nil { return } @@ -145,7 +146,7 @@ func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Fra return frames, nil } -func parse(s *bufio.Scanner) ([]Frame, error) { +func parse(interner *Interner, s *bufio.Scanner) ([]Frame, error) { pc, err := strconv.ParseUint(s.Text(), 0, 64) if err != nil { return nil, fmt.Errorf("failed to parse pc '%v' in addr2line output: %w", s.Text(), err) @@ -183,8 +184,8 @@ func parse(s *bufio.Scanner) ([]Frame, error) { } frames = append(frames, Frame{ PC: pc, - Func: fn, - File: file, + Func: interner.Do(fn), + File: interner.Do(file), Line: line, Inline: true, }) diff --git a/pkg/symbolizer/symbolizer_test.go b/pkg/symbolizer/symbolizer_test.go index 0bddb2f71..d7e996bb0 100644 --- a/pkg/symbolizer/symbolizer_test.go +++ b/pkg/symbolizer/symbolizer_test.go @@ -150,10 +150,11 @@ func TestParse(t *testing.T) { // First, symbolize all PCs one-by-one. input := bufio.NewWriter(inputw) scanner := bufio.NewScanner(outputr) + var interner Interner var allPCs []uint64 var allFrames []Frame for _, addr := range addresses { - frames, err := symbolize(input, scanner, []uint64{addr.pc}) + frames, err := symbolize(&interner, input, scanner, []uint64{addr.pc}) if err != nil { t.Fatalf("got error: %v", err) } @@ -166,11 +167,11 @@ func TestParse(t *testing.T) { // Symbolize PCs in 2 groups. for i := 0; i <= len(addresses); i++ { - frames, err := symbolize(input, scanner, allPCs[:i]) + frames, err := symbolize(&interner, input, scanner, allPCs[:i]) if err != nil { t.Fatalf("got error: %v", err) } - frames2, err := symbolize(input, scanner, allPCs[i:]) + frames2, err := symbolize(&interner, input, scanner, allPCs[i:]) if err != nil { t.Fatalf("got error: %v", err) } @@ -185,7 +186,7 @@ func TestParse(t *testing.T) { for i := range lots { lots[i] = addresses[0].pc } - frames, err := symbolize(input, scanner, lots) + frames, err := symbolize(&interner, input, scanner, lots) if err != nil { t.Fatalf("got error: %v", err) } |
