aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/symbolizer
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2024-04-09 16:43:42 +0200
committerDmitry Vyukov <dvyukov@google.com>2024-04-10 09:08:41 +0000
commit7d74efd0c7990eea8684b6150a007071b23631dd (patch)
treef2d81a026859a75da85ab2e1f4d7e6a9ab4d73f0 /pkg/symbolizer
parentbb5e6c0f355e952e91e8aef0ffb51b7b153b2e25 (diff)
pkg/symbolizer: intern file/func strings
Intern/deduplicate file/func strings created during symbolization. There are lots and lots of duplicates. In my local run syz-manager heap size jumps from 1.9G to 4.0G are requesting /cover?jsonl=1 without this change, and from 1.9G to 2.9G with this change.
Diffstat (limited to 'pkg/symbolizer')
-rw-r--r--pkg/symbolizer/cache.go23
-rw-r--r--pkg/symbolizer/symbolizer.go13
-rw-r--r--pkg/symbolizer/symbolizer_test.go9
3 files changed, 35 insertions, 10 deletions
diff --git a/pkg/symbolizer/cache.go b/pkg/symbolizer/cache.go
index dcade3929..370833953 100644
--- a/pkg/symbolizer/cache.go
+++ b/pkg/symbolizer/cache.go
@@ -4,6 +4,7 @@
package symbolizer
import (
+ "strings"
"sync"
)
@@ -40,3 +41,25 @@ func (c *Cache) Symbolize(inner func(string, uint64) ([]Frame, error), bin strin
c.mu.Unlock()
return frames, err
}
+
+// Interner allows to intern/deduplicate strings.
+// Interner.Do semantically returns the same string, but physically it will point
+// to an existing string with the same contents (if there was one passed to Do in the past).
+// Interned strings are also "cloned", that is, if the passed string points to a large
+// buffer, it won't after interning (and won't prevent GC'ing of the large buffer).
+// The type is not thread-safe.
+type Interner struct {
+ m map[string]string
+}
+
+func (in *Interner) Do(s string) string {
+ if in.m == nil {
+ in.m = make(map[string]string)
+ }
+ if interned, ok := in.m[s]; ok {
+ return interned
+ }
+ s = strings.Clone(s)
+ in.m[s] = s
+ return s
+}
diff --git a/pkg/symbolizer/symbolizer.go b/pkg/symbolizer/symbolizer.go
index ad57f005e..02e6b7693 100644
--- a/pkg/symbolizer/symbolizer.go
+++ b/pkg/symbolizer/symbolizer.go
@@ -20,6 +20,7 @@ import (
type Symbolizer struct {
target *targets.Target
subprocs map[string]*subprocess
+ interner Interner
}
type Frame struct {
@@ -51,7 +52,7 @@ func (s *Symbolizer) SymbolizeArray(bin string, pcs []uint64) ([]Frame, error) {
if err != nil {
return nil, err
}
- return symbolize(sub.input, sub.scanner, pcs)
+ return symbolize(&s.interner, sub.input, sub.scanner, pcs)
}
func (s *Symbolizer) Close() {
@@ -100,7 +101,7 @@ func (s *Symbolizer) getSubprocess(bin string) (*subprocess, error) {
return sub, nil
}
-func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Frame, error) {
+func symbolize(interner *Interner, input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Frame, error) {
var frames []Frame
done := make(chan error, 1)
go func() {
@@ -116,7 +117,7 @@ func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Fra
}
for range pcs {
var frames1 []Frame
- frames1, err = parse(scanner)
+ frames1, err = parse(interner, scanner)
if err != nil {
return
}
@@ -145,7 +146,7 @@ func symbolize(input *bufio.Writer, scanner *bufio.Scanner, pcs []uint64) ([]Fra
return frames, nil
}
-func parse(s *bufio.Scanner) ([]Frame, error) {
+func parse(interner *Interner, s *bufio.Scanner) ([]Frame, error) {
pc, err := strconv.ParseUint(s.Text(), 0, 64)
if err != nil {
return nil, fmt.Errorf("failed to parse pc '%v' in addr2line output: %w", s.Text(), err)
@@ -183,8 +184,8 @@ func parse(s *bufio.Scanner) ([]Frame, error) {
}
frames = append(frames, Frame{
PC: pc,
- Func: fn,
- File: file,
+ Func: interner.Do(fn),
+ File: interner.Do(file),
Line: line,
Inline: true,
})
diff --git a/pkg/symbolizer/symbolizer_test.go b/pkg/symbolizer/symbolizer_test.go
index 0bddb2f71..d7e996bb0 100644
--- a/pkg/symbolizer/symbolizer_test.go
+++ b/pkg/symbolizer/symbolizer_test.go
@@ -150,10 +150,11 @@ func TestParse(t *testing.T) {
// First, symbolize all PCs one-by-one.
input := bufio.NewWriter(inputw)
scanner := bufio.NewScanner(outputr)
+ var interner Interner
var allPCs []uint64
var allFrames []Frame
for _, addr := range addresses {
- frames, err := symbolize(input, scanner, []uint64{addr.pc})
+ frames, err := symbolize(&interner, input, scanner, []uint64{addr.pc})
if err != nil {
t.Fatalf("got error: %v", err)
}
@@ -166,11 +167,11 @@ func TestParse(t *testing.T) {
// Symbolize PCs in 2 groups.
for i := 0; i <= len(addresses); i++ {
- frames, err := symbolize(input, scanner, allPCs[:i])
+ frames, err := symbolize(&interner, input, scanner, allPCs[:i])
if err != nil {
t.Fatalf("got error: %v", err)
}
- frames2, err := symbolize(input, scanner, allPCs[i:])
+ frames2, err := symbolize(&interner, input, scanner, allPCs[i:])
if err != nil {
t.Fatalf("got error: %v", err)
}
@@ -185,7 +186,7 @@ func TestParse(t *testing.T) {
for i := range lots {
lots[i] = addresses[0].pc
}
- frames, err := symbolize(input, scanner, lots)
+ frames, err := symbolize(&interner, input, scanner, lots)
if err != nil {
t.Fatalf("got error: %v", err)
}