From 6514729552c7761911858dec87f4c95b8aa8ab45 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 7 Mar 2025 23:27:53 +0100 Subject: syz-cluster: download latest corpuses from syzbot --- syz-cluster/pkg/api/api.go | 21 ++++++++++-- syz-cluster/pkg/workflow/template.yaml | 2 ++ syz-cluster/workflow/fuzz-step/main.go | 37 ++++++++++++++++++++-- .../workflow/fuzz-step/workflow-template.yaml | 3 ++ syz-cluster/workflow/triage-step/main.go | 7 ++-- 5 files changed, 62 insertions(+), 8 deletions(-) diff --git a/syz-cluster/pkg/api/api.go b/syz-cluster/pkg/api/api.go index 1b2b8f79f..1644b0ff0 100644 --- a/syz-cluster/pkg/api/api.go +++ b/syz-cluster/pkg/api/api.go @@ -18,9 +18,10 @@ type SkipRequest struct { // The data layout faclitates the simplicity of the workflow definition. type FuzzConfig struct { - Base BuildRequest `json:"base"` - Patched BuildRequest `json:"patched"` - Config string `json:"config"` // Refers to workflow/configs/{}. + Base BuildRequest `json:"base"` + Patched BuildRequest `json:"patched"` + Config string `json:"config"` // Refers to workflow/configs/{}. + CorpusURL string `json:"corpus_url"` } // The triage step of the workflow will request these from controller. @@ -166,3 +167,17 @@ var DefaultTrees = []*Tree{ FuzzConfig: `net`, }, } + +// TODO: find a better place for it. +func (tree *Tree) CorpusURL() string { + if url, ok := fuzzToCorpus[tree.FuzzConfig]; ok { + return url + } + return corpusFallbackURL +} + +var fuzzToCorpus = map[string]string{ + `net`: `https://storage.googleapis.com/syzkaller/corpus/ci-upstream-net-kasan-gce-corpus.db`, +} + +const corpusFallbackURL = `https://storage.googleapis.com/syzkaller/corpus/ci-upstream-kasan-gce-root-corpus.db` diff --git a/syz-cluster/pkg/workflow/template.yaml b/syz-cluster/pkg/workflow/template.yaml index 0ffa06725..53752787e 100644 --- a/syz-cluster/pkg/workflow/template.yaml +++ b/syz-cluster/pkg/workflow/template.yaml @@ -144,6 +144,8 @@ spec: value: "{{=jsonpath(steps['patched-build'].outputs.parameters.result, '$.build_id')}}" - name: base-build-id value: "{{=jsonpath(steps['base-build'].outputs.parameters.result, '$.build_id')}}" + - name: corpus-url + value: "{{=jsonpath(inputs.parameters.element, '$.corpus_url')}}" artifacts: - name: base-kernel from: "{{steps.base-build.outputs.artifacts.kernel}}" diff --git a/syz-cluster/workflow/fuzz-step/main.go b/syz-cluster/workflow/fuzz-step/main.go index 6acddc55c..0782b49db 100644 --- a/syz-cluster/workflow/fuzz-step/main.go +++ b/syz-cluster/workflow/fuzz-step/main.go @@ -9,6 +9,9 @@ import ( "errors" "flag" "fmt" + "io" + "net/http" + "os" "path/filepath" "time" @@ -30,6 +33,7 @@ var ( flagPatchedBuild = flag.String("patched_build", "", "patched build ID") flagTime = flag.String("time", "1h", "how long to fuzz") flagWorkdir = flag.String("workdir", "/workdir", "base workdir path") + flagCorpusURL = flag.String("corpus_url", "", "an URL to download corpus from") ) const testName = "Fuzzing" @@ -39,8 +43,6 @@ func main() { if *flagConfig == "" || *flagSession == "" || *flagTime == "" { app.Fatalf("--config, --session and --time must be set") } - // TODO: download the corpus from somewhere. Should that be a mgrconfig option? - client := app.DefaultClient() d, err := time.ParseDuration(*flagTime) if err != nil { @@ -85,6 +87,15 @@ func run(baseCtx context.Context, client *api.Client) error { } manager.PatchFocusAreas(patched, series.PatchBodies()) + if *flagCorpusURL != "" { + err := downloadCorpus(baseCtx, patched.Workdir, *flagCorpusURL) + if err != nil { + return fmt.Errorf("failed to download the corpus: %w", err) + } else { + log.Logf(0, "downloaded the corpus from %s", *flagCorpusURL) + } + } + eg, ctx := errgroup.WithContext(baseCtx) bugs := make(chan *manager.UniqueBug) eg.Go(func() error { @@ -128,6 +139,28 @@ func run(baseCtx context.Context, client *api.Client) error { return eg.Wait() } +func downloadCorpus(ctx context.Context, workdir, url string) error { + out, err := os.Create(filepath.Join(workdir, "corpus.db")) + if err != nil { + return err + } + defer out.Close() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return err + } + resp, err := (&http.Client{}).Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("status is not 200: %s", resp.Status) + } + _, err = io.Copy(out, resp.Body) + return err +} + // To reduce duplication, patched configs are stored as a delta to their corresponding base.cfg version. // loadConfigs performs all the necessary merging and parsing and returns two ready to use configs. func loadConfigs(configFolder, configName string, complete bool) (*mgrconfig.Config, *mgrconfig.Config, error) { diff --git a/syz-cluster/workflow/fuzz-step/workflow-template.yaml b/syz-cluster/workflow/fuzz-step/workflow-template.yaml index 9fd2abecd..31a43df19 100644 --- a/syz-cluster/workflow/fuzz-step/workflow-template.yaml +++ b/syz-cluster/workflow/fuzz-step/workflow-template.yaml @@ -16,6 +16,8 @@ spec: value: "" - name: patched-build-id value: "" + - name: corpus-url + value: "" artifacts: - name: base-kernel path: /base @@ -31,6 +33,7 @@ spec: "--session", "{{workflow.parameters.session-id}}", "--base_build", "{{inputs.parameters.base-build-id}}", "--patched_build", "{{inputs.parameters.patched-build-id}}", + "--corpus_url", "{{inputs.parameters.corpus-url}}", "--time", "3h", "--workdir", "/workdir", "--vv", "1" diff --git a/syz-cluster/workflow/triage-step/main.go b/syz-cluster/workflow/triage-step/main.go index 73cf67fd2..d284c24fd 100644 --- a/syz-cluster/workflow/triage-step/main.go +++ b/syz-cluster/workflow/triage-step/main.go @@ -100,9 +100,10 @@ func getVerdict(ctx context.Context, client *api.Client, ops triage.TreeOps) (*a } ret := &api.TriageResult{ Fuzz: &api.FuzzConfig{ - Base: base, - Patched: base, - Config: tree.FuzzConfig, + Base: base, + Patched: base, + Config: tree.FuzzConfig, + CorpusURL: tree.CorpusURL(), }, } ret.Fuzz.Patched.SeriesID = series.ID -- cgit mrf-deployment