diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2024-12-17 16:10:02 +0100 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2025-01-22 13:17:53 +0000 |
| commit | 44f2ad31190603135f4ac758273f26111ca6003c (patch) | |
| tree | 4f6190f27654e45bfb3bcd71d4c53adc533909a1 /syz-cluster/series-tracker | |
| parent | da72ac06e38cf1dd2ecbddd5502225ff7589542d (diff) | |
syz-cluster: initial code
The basic code of a K8S-based cluster that:
* Aggregates new LKML patch series.
* Determines the kernel trees to apply them to.
* Builds the basic and the patched kernel.
* Displays the results on a web dashboard.
This is a very rudimentary version with a lot of TODOs that
provides a skeleton for further work.
The project makes use of Argo workflows and Spanner DB.
Bootstrap is used for the web interface.
Overall structure:
* syz-cluster/dashboard: a web dashboard listing patch series
and their test results.
* syz-cluster/series-tracker: polls Lore archives and submits
the new patch series to the DB.
* syz-cluster/controller: schedules workflows and provides API for them.
* syz-cluster/kernel-disk: a cron job that keeps a kernel checkout up to date.
* syz-cluster/workflow/*: workflow steps.
For the DB structure see syz-cluster/pkg/db/migrations/*.
Diffstat (limited to 'syz-cluster/series-tracker')
| -rw-r--r-- | syz-cluster/series-tracker/Dockerfile | 30 | ||||
| -rw-r--r-- | syz-cluster/series-tracker/Dockerfile.test | 39 | ||||
| -rw-r--r-- | syz-cluster/series-tracker/deployment.yaml | 35 | ||||
| -rw-r--r-- | syz-cluster/series-tracker/git-pvc.yaml | 14 | ||||
| -rw-r--r-- | syz-cluster/series-tracker/kustomization.yaml | 6 | ||||
| -rw-r--r-- | syz-cluster/series-tracker/main.go | 198 | ||||
| -rw-r--r-- | syz-cluster/series-tracker/manifest.go | 139 | ||||
| -rw-r--r-- | syz-cluster/series-tracker/manifest_test.go | 44 |
8 files changed, 505 insertions, 0 deletions
diff --git a/syz-cluster/series-tracker/Dockerfile b/syz-cluster/series-tracker/Dockerfile new file mode 100644 index 000000000..4b7c49cd9 --- /dev/null +++ b/syz-cluster/series-tracker/Dockerfile @@ -0,0 +1,30 @@ +# Copyright 2024 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +FROM golang:1.23-alpine AS series-tracker-builder + +WORKDIR /build + +# Prepare the dependencies. +COPY go.mod go.sum ./ +RUN go mod download + +# Build the tool. +COPY pkg/ pkg/ +# TODO: get rid of this dependency. +COPY prog/ prog/ +COPY dashboard/dashapi/ dashboard/dashapi/ +COPY sys/targets/ sys/targets/ +COPY syz-cluster/series-tracker/*.go syz-cluster/series-tracker/ +COPY syz-cluster/pkg/ syz-cluster/pkg/ + +RUN go build -o /build/series-tracker-bin /build/syz-cluster/series-tracker + +FROM ubuntu:latest + +RUN apt-get update && \ + apt-get install -y git + +COPY --from=series-tracker-builder /build/series-tracker-bin /bin/series-tracker + +ENTRYPOINT ["/bin/series-tracker"] diff --git a/syz-cluster/series-tracker/Dockerfile.test b/syz-cluster/series-tracker/Dockerfile.test new file mode 100644 index 000000000..1f9ca22de --- /dev/null +++ b/syz-cluster/series-tracker/Dockerfile.test @@ -0,0 +1,39 @@ +# I. Checkout the git repository. +FROM ubuntu:latest AS git-source + +RUN apt-get update && \ + apt-get install -y git + +WORKDIR /git-repo + +RUN git init +RUN git remote add docs-0 http://lore.kernel.org/linux-doc/0 +RUN git fetch docs-0 +RUN git checkout docs-0/master + +# II. Build the tool. +FROM golang:1.23-alpine AS series-tracker-builder + +WORKDIR /build + +# Prepare the dependencies. +COPY go.mod go.sum ./ +RUN go mod download + +# Build the tool. +COPY pkg/ pkg/ +# TODO: get rid of this dependency. +COPY prog/ prog/ +COPY dashboard/dashapi/ dashboard/dashapi/ +COPY sys/targets/ sys/targets/ +COPY syz-cluster/series-tracker/*.go syz-cluster/series-tracker/ +COPY syz-cluster/pkg/ syz-cluster/pkg/ + +RUN go build -o /build/series-tracker-bin /build/syz-cluster/series-tracker + +# III. Create the actual container. +FROM git-source + +COPY --from=series-tracker-builder /build/series-tracker-bin /bin/series-tracker + +ENTRYPOINT ["/bin/series-tracker"] diff --git a/syz-cluster/series-tracker/deployment.yaml b/syz-cluster/series-tracker/deployment.yaml new file mode 100644 index 000000000..2a84f0722 --- /dev/null +++ b/syz-cluster/series-tracker/deployment.yaml @@ -0,0 +1,35 @@ +# Copyright 2025 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: series-tracker +spec: + replicas: 1 + selector: + matchLabels: + app: series-tracker + template: + metadata: + labels: + app: series-tracker + spec: + containers: + - name: series-tracker-image + image: series-tracker-image + envFrom: + - configMapRef: + name: global-config + volumeMounts: + - name: series-tracker-repo-disk + mountPath: /git-repo + - name: blobs-storage-disk + mountPath: /blob-storage + volumes: + - name: series-tracker-repo-disk + persistentVolumeClaim: + claimName: series-tracker-repo-disk-claim + - name: blobs-storage-disk + persistentVolumeClaim: + claimName: blob-storage-disk-claim diff --git a/syz-cluster/series-tracker/git-pvc.yaml b/syz-cluster/series-tracker/git-pvc.yaml new file mode 100644 index 000000000..b9f61f39a --- /dev/null +++ b/syz-cluster/series-tracker/git-pvc.yaml @@ -0,0 +1,14 @@ +# Copyright 2025 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: series-tracker-repo-disk-claim +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 25Gi + storageClassName: standard diff --git a/syz-cluster/series-tracker/kustomization.yaml b/syz-cluster/series-tracker/kustomization.yaml new file mode 100644 index 000000000..e949daf9c --- /dev/null +++ b/syz-cluster/series-tracker/kustomization.yaml @@ -0,0 +1,6 @@ +# Copyright 2025 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +resources: +- deployment.yaml +- git-pvc.yaml diff --git a/syz-cluster/series-tracker/main.go b/syz-cluster/series-tracker/main.go new file mode 100644 index 000000000..a8121bf92 --- /dev/null +++ b/syz-cluster/series-tracker/main.go @@ -0,0 +1,198 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "context" + "flag" + "fmt" + "log" + "path/filepath" + "regexp" + "time" + + "github.com/google/syzkaller/pkg/email" + "github.com/google/syzkaller/pkg/email/lore" + "github.com/google/syzkaller/pkg/vcs" + "github.com/google/syzkaller/syz-cluster/pkg/app" + "github.com/google/syzkaller/syz-cluster/pkg/blob" + "github.com/google/syzkaller/syz-cluster/pkg/db" +) + +var flagVerbose = flag.Bool("verbose", false, "enable verbose output") + +// TODO: add more. +var archivesToQuery = []string{"linux-wireless", "netfilter-devel"} + +func main() { + flag.Parse() + ctx := context.Background() + env, err := app.Environment(ctx) + if err != nil { + app.Fatalf("failed to set up environment: %v", err) + } + manifest := NewManifestSource(`https://lore.kernel.org`) + fetcher := &SeriesFetcher{ + gitRepoFolder: `/git-repo`, // Set in deployment.yaml. + seriesRepo: db.NewSeriesRepository(env.Spanner), + blobStorage: env.BlobStorage, + manifest: manifest, + } + go manifest.Loop(ctx) + + // On start, look at the last week of messages. + nextFrom := time.Now().Add(-time.Hour * 24 * 7) + for { + oldFrom := nextFrom + // Then, parse last 30 minutes every 15 minutes. + nextFrom = time.Now().Add(-time.Minute * 15) + err := fetcher.Update(ctx, oldFrom) + if err != nil { + // TODO: make sure these are alerted. + log.Print(err) + } + time.Sleep(15 * time.Minute) + } +} + +type SeriesFetcher struct { + gitRepoFolder string + seriesRepo *db.SeriesRepository + blobStorage blob.Storage + manifest *ManifestSource +} + +func (sf *SeriesFetcher) Update(ctx context.Context, from time.Time) error { + log.Printf("querying email threads since %v", from) + + manifest := sf.manifest.Get(ctx) + if manifest == nil { + return fmt.Errorf("failed to query the manifest data") + } + var list []lore.EmailReader + for _, name := range archivesToQuery { + info, ok := manifest[name] + if !ok { + return fmt.Errorf("manifest has no info for %q", name) + } + url := info.LastEpochURL() + log.Printf("polling %s", url) + + folderName := sanitizeName(name) + if folderName == "" { + return fmt.Errorf("invalid archive name: %q", name) + } + gitRepo := vcs.NewLKMLRepo(filepath.Join(sf.gitRepoFolder, folderName)) + // TODO: by querying only the last archive, we risk losing the series that are split between both. + // But for now let's ignore this possibility. + _, err := gitRepo.Poll(url, "master") + if err != nil { + return fmt.Errorf("failed to poll %q: %w", url, err) + } + repoList, err := lore.ReadArchive(gitRepo, from) + if err != nil { + return err + } + log.Printf("queried %d emails", len(repoList)) + list = append(list, repoList...) + } + + var emails []*email.Email + idToReader := map[string]lore.EmailReader{} + for _, item := range list { + // TODO: this could be done in several threads. + email, err := item.Parse(nil, nil) + if err != nil { + log.Printf("failed to parse email: %v", err) + continue + } + idToReader[email.MessageID] = item + emails = append(emails, email) + } + log.Printf("extracted: %d", len(list)) + + allSeries := lore.PatchSeries(emails) + log.Printf("collected %d series", len(allSeries)) + + for _, series := range allSeries { + if *flagVerbose { + logSeries(series) + } + err := sf.handleSeries(ctx, series, idToReader) + if err != nil { + return err + } + } + return nil +} + +func (sf *SeriesFetcher) handleSeries(ctx context.Context, series *lore.Series, + idToReader map[string]lore.EmailReader) error { + if series.Corrupted != "" { + log.Printf("skipping %s because of %q", series.MessageID, series.Corrupted) + return nil + } + first := series.Patches[0] + date := first.Date + if date.IsZero() || date.After(time.Now()) { + // We cannot fully trust dates from the mailing list as some of them are very weird, e.g. + // https://lore.kernel.org/all/20770915-nolibc-run-user-v1-1-3caec61726dc@weissschuh.net/raw. + date = time.Now() + } + err := sf.seriesRepo.Insert(ctx, &db.Series{ + ExtID: series.MessageID, + // TODO: set AuthorName? + AuthorEmail: first.Author, + Title: series.Subject, + Version: int64(series.Version), + Link: "https://lore.kernel.org/all/" + series.MessageID, + PublishedAt: date, + // TODO: set Cc. + }, func() ([]*db.Patch, error) { + var ret []*db.Patch + for _, patch := range series.Patches { + body, err := idToReader[patch.MessageID].Read() + if err != nil { + return nil, fmt.Errorf("failed to extract %q: %w", patch.MessageID, err) + } + // In case of errors, we will waste some space, but let's ignore it for simplicity. + // Patches are not super big. + uri, err := sf.blobStorage.Store(bytes.NewReader(body)) + if err != nil { + return nil, fmt.Errorf("failed to upload patch body: %w", err) + } + ret = append(ret, &db.Patch{ + Seq: int64(patch.Seq), + Title: patch.Subject, + Link: "https://lore.kernel.org/all/" + patch.MessageID, + BodyURI: uri, + }) + } + return ret, nil + }) + if err == db.ErrSeriesExists { + log.Printf("series %s already exists in the DB", series.MessageID) + return nil + } + log.Printf("series %s saved to the DB", series.MessageID) + return nil +} + +func logSeries(series *lore.Series) { + log.Printf("series ID=%s Subject=%s Patches=%d Version=%d Corrupted=%q", + series.MessageID, series.Subject, len(series.Patches), series.Version, + series.Corrupted) + for _, m := range series.Patches { + log.Printf(" #%d ID=%s Subject=%s", m.Seq, m.MessageID, m.Subject) + } +} + +func sanitizeName(str string) string { + reg, err := regexp.Compile("[^a-zA-Z0-9]+") + if err != nil { + return "" + } + return reg.ReplaceAllString(str, "") +} diff --git a/syz-cluster/series-tracker/manifest.go b/syz-cluster/series-tracker/manifest.go new file mode 100644 index 000000000..7bb256ebb --- /dev/null +++ b/syz-cluster/series-tracker/manifest.go @@ -0,0 +1,139 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "regexp" + "strconv" + "sync" + "time" +) + +type InboxInfo struct { + Prefix string + Epochs int +} + +func (ii InboxInfo) EpochURL(id int) string { + return fmt.Sprintf("%s/git/%d.git", ii.Prefix, id) +} + +func (ii InboxInfo) LastEpochURL() string { + return ii.EpochURL(ii.Epochs - 1) +} + +var archiveRe = regexp.MustCompile(`/([\w-]+)/git/(\d+)\.git`) + +func ParseManifest(baseURL string, jsonData []byte) (map[string]*InboxInfo, error) { + var rawMap map[string]json.RawMessage + err := json.Unmarshal(jsonData, &rawMap) + if err != nil { + return nil, err + } + ret := map[string]*InboxInfo{} + for url := range rawMap { + groups := archiveRe.FindStringSubmatch(url) + if len(groups) == 0 { + // TODO: monitor these. + log.Printf("unexpected manifest.js key: %q", url) + continue + } + epoch, err := strconv.Atoi(groups[2]) + if err != nil { + log.Printf("invalid manifest.js key: %q", url) + continue + } + inbox := ret[groups[1]] + if inbox == nil { + inbox = &InboxInfo{Prefix: fmt.Sprintf("%s/%s", baseURL, groups[1])} + ret[groups[1]] = inbox + } + inbox.Epochs = max(inbox.Epochs, epoch+1) + } + return ret, nil +} + +func QueryManifest(baseURL string) (map[string]*InboxInfo, error) { + resp, err := http.Get(baseURL + "/manifest.js.gz") + if err != nil { + return nil, err + } + defer resp.Body.Close() + + gzReader, err := gzip.NewReader(resp.Body) + if err != nil { + return nil, err + } + defer gzReader.Close() + + var buf bytes.Buffer + _, err = io.Copy(&buf, gzReader) + if err != nil { + return nil, err + } + return ParseManifest(baseURL, buf.Bytes()) +} + +// ManifestSource keeps an up to date version of the manifest. +type ManifestSource struct { + mu sync.Mutex + url string + latestOk map[string]*InboxInfo + firstLoaded chan struct{} // The channel will be closed on the first successful load. +} + +func NewManifestSource(baseURL string) *ManifestSource { + return &ManifestSource{ + url: baseURL, + firstLoaded: make(chan struct{}), + } +} + +func (ms *ManifestSource) Loop(ctx context.Context) { + // When we try to load for the first time, retry more frequently. + const backOffPeriod = time.Minute * 15 + // Then, update rarely. New epochs are very infrequent. + const refreshPeriod = time.Hour * 12 + + alreadyLoaded := false + nextAttemptIn := backOffPeriod + for { + info, err := QueryManifest(ms.url) + log.Printf("loaded manifest: %v", err) + if err == nil { + ms.mu.Lock() + ms.latestOk = info + ms.mu.Unlock() + if !alreadyLoaded { + alreadyLoaded = true + nextAttemptIn = refreshPeriod + close(ms.firstLoaded) + } + } + select { + case <-ctx.Done(): + return + case <-time.After(nextAttemptIn): + } + } +} + +func (ms *ManifestSource) Get(ctx context.Context) map[string]*InboxInfo { + select { + case <-ms.firstLoaded: + ms.mu.Lock() + defer ms.mu.Unlock() + return ms.latestOk + case <-ctx.Done(): + return nil + } +} diff --git a/syz-cluster/series-tracker/manifest_test.go b/syz-cluster/series-tracker/manifest_test.go new file mode 100644 index 000000000..2e3bbde5a --- /dev/null +++ b/syz-cluster/series-tracker/manifest_test.go @@ -0,0 +1,44 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseManifest(t *testing.T) { + info, err := ParseManifest("http://localhost", []byte(testManifest)) + assert.NoError(t, err) + assert.Len(t, info, 2) + assert.Equal(t, 1, info["name"].Epochs) + second := info["name2"] + assert.Equal(t, 2, second.Epochs) + assert.Equal(t, "http://localhost/name2/git/1.git", second.EpochURL(1)) +} + +const testManifest = `{ + "/name2/git/1.git": { + "modified": 1638806983, + "owner": null, + "reference": null, + "description": "Another repo", + "fingerprint": "788f666601f9641375e11e167b5e6b1eeb549cbb" + }, + "/name/git/0.git": { + "modified": 1638806983, + "owner": null, + "reference": null, + "description": "Some repo", + "fingerprint": "788f666601f9641375e11e167b5e6b1eeb549cbb" + }, + "/name2/git/0.git": { + "modified": 1638806983, + "owner": null, + "reference": null, + "description": "Another repo", + "fingerprint": "788f666601f9641375e11e167b5e6b1eeb549cbb" + } +}` |
