From a7a1cea58a31ffafc3d6473559349a52d46c33a0 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 3 Jul 2024 17:36:57 +0200 Subject: pkg/corpus: don't keep serialized programs in memory We only need serialized representation on some rare operations (some web UI pages, and first hub connect). Don't keep them in memory. In my instance this saves 503MB (15.5%) of heap, which reduces RSS by 1GB (2x due to GC). --- pkg/corpus/corpus.go | 45 +++++++++++++++++++++------------------------ syz-manager/http.go | 10 +++++----- syz-manager/hub.go | 29 ++++++++++++++--------------- syz-manager/manager.go | 8 ++------ 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go index 5b2b9983f..4ad4f6f27 100644 --- a/pkg/corpus/corpus.go +++ b/pkg/corpus/corpus.go @@ -60,14 +60,13 @@ type ItemUpdate struct { // too hard to synchonize accesses to them across the whole project. // When Corpus updates one of its items, it saves a copy of it. type Item struct { - Sig string - Call int - Prog *prog.Prog - ProgData []byte // to save some Serialize() calls - HasAny bool // whether the prog contains squashed arguments - Signal signal.Signal - Cover []uint64 - Updates []ItemUpdate + Sig string + Call int + Prog *prog.Prog + HasAny bool // whether the prog contains squashed arguments + Signal signal.Signal + Cover []uint64 + Updates []ItemUpdate } func (item Item) StringCall() string { @@ -109,14 +108,13 @@ func (corpus *Corpus) Save(inp NewInput) { newCover.Merge(old.Cover) newCover.Merge(inp.Cover) newItem := &Item{ - Sig: sig, - Prog: old.Prog, - ProgData: progData, - Call: old.Call, - HasAny: old.HasAny, - Signal: newSignal, - Cover: newCover.Serialize(), - Updates: append([]ItemUpdate{}, old.Updates...), + Sig: sig, + Prog: old.Prog, + Call: old.Call, + HasAny: old.HasAny, + Signal: newSignal, + Cover: newCover.Serialize(), + Updates: append([]ItemUpdate{}, old.Updates...), } const maxUpdates = 32 if len(newItem.Updates) < maxUpdates { @@ -125,14 +123,13 @@ func (corpus *Corpus) Save(inp NewInput) { corpus.progs[sig] = newItem } else { corpus.progs[sig] = &Item{ - Sig: sig, - Call: inp.Call, - Prog: inp.Prog, - ProgData: progData, - HasAny: inp.Prog.ContainsAny(), - Signal: inp.Signal, - Cover: inp.Cover, - Updates: []ItemUpdate{update}, + Sig: sig, + Call: inp.Call, + Prog: inp.Prog, + HasAny: inp.Prog.ContainsAny(), + Signal: inp.Signal, + Cover: inp.Cover, + Updates: []ItemUpdate{update}, } corpus.saveProgram(inp.Prog, inp.Signal) } diff --git a/syz-manager/http.go b/syz-manager/http.go index fc11e26c1..ab54f86e5 100644 --- a/syz-manager/http.go +++ b/syz-manager/http.go @@ -350,13 +350,13 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF } progs = append(progs, cover.Prog{ Sig: sig, - Data: string(inp.ProgData), + Data: string(inp.Prog.Serialize()), PCs: coverToPCs(mgr.cfg, inp.Updates[updateID].RawCover), }) } else { progs = append(progs, cover.Prog{ Sig: sig, - Data: string(inp.ProgData), + Data: string(inp.Prog.Serialize()), PCs: coverToPCs(mgr.cfg, inp.Cover), }) } @@ -368,7 +368,7 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF } progs = append(progs, cover.Prog{ Sig: inp.Sig, - Data: string(inp.ProgData), + Data: string(inp.Prog.Serialize()), PCs: coverToPCs(mgr.cfg, inp.Cover), }) } @@ -507,7 +507,7 @@ func (mgr *Manager) httpInput(w http.ResponseWriter, r *http.Request) { return } w.Header().Set("Content-Type", "text/plain; charset=utf-8") - w.Write(inp.ProgData) + w.Write(inp.Prog.Serialize()) } func (mgr *Manager) httpDebugInput(w http.ResponseWriter, r *http.Request) { @@ -528,7 +528,7 @@ func (mgr *Manager) httpDebugInput(w http.ResponseWriter, r *http.Request) { return ret } data := []UIRawCallCover{} - for pos, line := range strings.Split(string(inp.ProgData), "\n") { + for pos, line := range strings.Split(string(inp.Prog.Serialize()), "\n") { line = strings.TrimSpace(line) if line == "" { continue diff --git a/syz-manager/hub.go b/syz-manager/hub.go index 7d798fdd2..218617060 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -10,9 +10,9 @@ import ( "time" "github.com/google/syzkaller/pkg/auth" + "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/flatrpc" "github.com/google/syzkaller/pkg/fuzzer" - "github.com/google/syzkaller/pkg/hash" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/mgrconfig" "github.com/google/syzkaller/pkg/report" @@ -72,7 +72,7 @@ type HubConnector struct { enabledCalls map[*prog.Syscall]bool leak bool fresh bool - hubCorpus map[hash.Sig]bool + hubCorpus map[string]bool newRepros [][]byte hubReproQueue chan *Crash needMoreRepros chan chan bool @@ -89,7 +89,7 @@ type HubConnector struct { // HubManagerView restricts interface between HubConnector and Manager. type HubManagerView interface { - getMinimizedCorpus() (corpus, repros [][]byte) + getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte) addNewCandidates(candidates []fuzzer.Candidate) hubIsUnreachable() } @@ -125,7 +125,7 @@ func (hc *HubConnector) loop() { } } -func (hc *HubConnector) connect(corpus [][]byte) (*rpctype.RPCClient, error) { +func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, error) { key, err := hc.keyGet() if err != nil { return nil, err @@ -144,10 +144,10 @@ func (hc *HubConnector) connect(corpus [][]byte) (*rpctype.RPCClient, error) { for call := range hc.enabledCalls { a.Calls = append(a.Calls, call.Name) } - hubCorpus := make(map[hash.Sig]bool) + hubCorpus := make(map[string]bool) for _, inp := range corpus { - hubCorpus[hash.Hash(inp)] = true - a.Corpus = append(a.Corpus, inp) + hubCorpus[inp.Sig] = true + a.Corpus = append(a.Corpus, inp.Prog.Serialize()) } // Never send more than this, this is never healthy but happens episodically // due to various reasons: problems with fallback coverage, bugs in kcov, @@ -172,7 +172,7 @@ func (hc *HubConnector) connect(corpus [][]byte) (*rpctype.RPCClient, error) { return hub, nil } -func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error { +func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus []*corpus.Item) error { key, err := hc.keyGet() if err != nil { return err @@ -182,22 +182,21 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error { Key: key, Manager: hc.cfg.Name, } - sigs := make(map[hash.Sig]bool) + sigs := make(map[string]bool) for _, inp := range corpus { - sig := hash.Hash(inp) - sigs[sig] = true - if hc.hubCorpus[sig] { + sigs[inp.Sig] = true + if hc.hubCorpus[inp.Sig] { continue } - hc.hubCorpus[sig] = true - a.Add = append(a.Add, inp) + hc.hubCorpus[inp.Sig] = true + a.Add = append(a.Add, inp.Prog.Serialize()) } for sig := range hc.hubCorpus { if sigs[sig] { continue } delete(hc.hubCorpus, sig) - a.Del = append(a.Del, sig.String()) + a.Del = append(a.Del, sig) } if hc.needMoreRepros != nil { needReproReply := make(chan bool) diff --git a/syz-manager/manager.go b/syz-manager/manager.go index a20a8f336..69a285d42 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -1366,15 +1366,11 @@ func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) { } } -func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) { +func (mgr *Manager) getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte) { mgr.mu.Lock() defer mgr.mu.Unlock() mgr.minimizeCorpusLocked() - items := mgr.corpus.Items() - corpus = make([][]byte, 0, len(items)) - for _, inp := range items { - corpus = append(corpus, inp.ProgData) - } + corpus = mgr.corpus.Items() repros = mgr.newRepros mgr.newRepros = nil return -- cgit mrf-deployment