aboutsummaryrefslogtreecommitdiffstats
path: root/syz-cluster/series-tracker
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2024-12-17 16:10:02 +0100
committerAleksandr Nogikh <nogikh@google.com>2025-01-22 13:17:53 +0000
commit44f2ad31190603135f4ac758273f26111ca6003c (patch)
tree4f6190f27654e45bfb3bcd71d4c53adc533909a1 /syz-cluster/series-tracker
parentda72ac06e38cf1dd2ecbddd5502225ff7589542d (diff)
syz-cluster: initial code
The basic code of a K8S-based cluster that: * Aggregates new LKML patch series. * Determines the kernel trees to apply them to. * Builds the basic and the patched kernel. * Displays the results on a web dashboard. This is a very rudimentary version with a lot of TODOs that provides a skeleton for further work. The project makes use of Argo workflows and Spanner DB. Bootstrap is used for the web interface. Overall structure: * syz-cluster/dashboard: a web dashboard listing patch series and their test results. * syz-cluster/series-tracker: polls Lore archives and submits the new patch series to the DB. * syz-cluster/controller: schedules workflows and provides API for them. * syz-cluster/kernel-disk: a cron job that keeps a kernel checkout up to date. * syz-cluster/workflow/*: workflow steps. For the DB structure see syz-cluster/pkg/db/migrations/*.
Diffstat (limited to 'syz-cluster/series-tracker')
-rw-r--r--syz-cluster/series-tracker/Dockerfile30
-rw-r--r--syz-cluster/series-tracker/Dockerfile.test39
-rw-r--r--syz-cluster/series-tracker/deployment.yaml35
-rw-r--r--syz-cluster/series-tracker/git-pvc.yaml14
-rw-r--r--syz-cluster/series-tracker/kustomization.yaml6
-rw-r--r--syz-cluster/series-tracker/main.go198
-rw-r--r--syz-cluster/series-tracker/manifest.go139
-rw-r--r--syz-cluster/series-tracker/manifest_test.go44
8 files changed, 505 insertions, 0 deletions
diff --git a/syz-cluster/series-tracker/Dockerfile b/syz-cluster/series-tracker/Dockerfile
new file mode 100644
index 000000000..4b7c49cd9
--- /dev/null
+++ b/syz-cluster/series-tracker/Dockerfile
@@ -0,0 +1,30 @@
+# Copyright 2024 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+FROM golang:1.23-alpine AS series-tracker-builder
+
+WORKDIR /build
+
+# Prepare the dependencies.
+COPY go.mod go.sum ./
+RUN go mod download
+
+# Build the tool.
+COPY pkg/ pkg/
+# TODO: get rid of this dependency.
+COPY prog/ prog/
+COPY dashboard/dashapi/ dashboard/dashapi/
+COPY sys/targets/ sys/targets/
+COPY syz-cluster/series-tracker/*.go syz-cluster/series-tracker/
+COPY syz-cluster/pkg/ syz-cluster/pkg/
+
+RUN go build -o /build/series-tracker-bin /build/syz-cluster/series-tracker
+
+FROM ubuntu:latest
+
+RUN apt-get update && \
+ apt-get install -y git
+
+COPY --from=series-tracker-builder /build/series-tracker-bin /bin/series-tracker
+
+ENTRYPOINT ["/bin/series-tracker"]
diff --git a/syz-cluster/series-tracker/Dockerfile.test b/syz-cluster/series-tracker/Dockerfile.test
new file mode 100644
index 000000000..1f9ca22de
--- /dev/null
+++ b/syz-cluster/series-tracker/Dockerfile.test
@@ -0,0 +1,39 @@
+# I. Checkout the git repository.
+FROM ubuntu:latest AS git-source
+
+RUN apt-get update && \
+ apt-get install -y git
+
+WORKDIR /git-repo
+
+RUN git init
+RUN git remote add docs-0 http://lore.kernel.org/linux-doc/0
+RUN git fetch docs-0
+RUN git checkout docs-0/master
+
+# II. Build the tool.
+FROM golang:1.23-alpine AS series-tracker-builder
+
+WORKDIR /build
+
+# Prepare the dependencies.
+COPY go.mod go.sum ./
+RUN go mod download
+
+# Build the tool.
+COPY pkg/ pkg/
+# TODO: get rid of this dependency.
+COPY prog/ prog/
+COPY dashboard/dashapi/ dashboard/dashapi/
+COPY sys/targets/ sys/targets/
+COPY syz-cluster/series-tracker/*.go syz-cluster/series-tracker/
+COPY syz-cluster/pkg/ syz-cluster/pkg/
+
+RUN go build -o /build/series-tracker-bin /build/syz-cluster/series-tracker
+
+# III. Create the actual container.
+FROM git-source
+
+COPY --from=series-tracker-builder /build/series-tracker-bin /bin/series-tracker
+
+ENTRYPOINT ["/bin/series-tracker"]
diff --git a/syz-cluster/series-tracker/deployment.yaml b/syz-cluster/series-tracker/deployment.yaml
new file mode 100644
index 000000000..2a84f0722
--- /dev/null
+++ b/syz-cluster/series-tracker/deployment.yaml
@@ -0,0 +1,35 @@
+# Copyright 2025 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: series-tracker
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: series-tracker
+ template:
+ metadata:
+ labels:
+ app: series-tracker
+ spec:
+ containers:
+ - name: series-tracker-image
+ image: series-tracker-image
+ envFrom:
+ - configMapRef:
+ name: global-config
+ volumeMounts:
+ - name: series-tracker-repo-disk
+ mountPath: /git-repo
+ - name: blobs-storage-disk
+ mountPath: /blob-storage
+ volumes:
+ - name: series-tracker-repo-disk
+ persistentVolumeClaim:
+ claimName: series-tracker-repo-disk-claim
+ - name: blobs-storage-disk
+ persistentVolumeClaim:
+ claimName: blob-storage-disk-claim
diff --git a/syz-cluster/series-tracker/git-pvc.yaml b/syz-cluster/series-tracker/git-pvc.yaml
new file mode 100644
index 000000000..b9f61f39a
--- /dev/null
+++ b/syz-cluster/series-tracker/git-pvc.yaml
@@ -0,0 +1,14 @@
+# Copyright 2025 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: series-tracker-repo-disk-claim
+spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 25Gi
+ storageClassName: standard
diff --git a/syz-cluster/series-tracker/kustomization.yaml b/syz-cluster/series-tracker/kustomization.yaml
new file mode 100644
index 000000000..e949daf9c
--- /dev/null
+++ b/syz-cluster/series-tracker/kustomization.yaml
@@ -0,0 +1,6 @@
+# Copyright 2025 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+resources:
+- deployment.yaml
+- git-pvc.yaml
diff --git a/syz-cluster/series-tracker/main.go b/syz-cluster/series-tracker/main.go
new file mode 100644
index 000000000..a8121bf92
--- /dev/null
+++ b/syz-cluster/series-tracker/main.go
@@ -0,0 +1,198 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+import (
+ "bytes"
+ "context"
+ "flag"
+ "fmt"
+ "log"
+ "path/filepath"
+ "regexp"
+ "time"
+
+ "github.com/google/syzkaller/pkg/email"
+ "github.com/google/syzkaller/pkg/email/lore"
+ "github.com/google/syzkaller/pkg/vcs"
+ "github.com/google/syzkaller/syz-cluster/pkg/app"
+ "github.com/google/syzkaller/syz-cluster/pkg/blob"
+ "github.com/google/syzkaller/syz-cluster/pkg/db"
+)
+
+var flagVerbose = flag.Bool("verbose", false, "enable verbose output")
+
+// TODO: add more.
+var archivesToQuery = []string{"linux-wireless", "netfilter-devel"}
+
+func main() {
+ flag.Parse()
+ ctx := context.Background()
+ env, err := app.Environment(ctx)
+ if err != nil {
+ app.Fatalf("failed to set up environment: %v", err)
+ }
+ manifest := NewManifestSource(`https://lore.kernel.org`)
+ fetcher := &SeriesFetcher{
+ gitRepoFolder: `/git-repo`, // Set in deployment.yaml.
+ seriesRepo: db.NewSeriesRepository(env.Spanner),
+ blobStorage: env.BlobStorage,
+ manifest: manifest,
+ }
+ go manifest.Loop(ctx)
+
+ // On start, look at the last week of messages.
+ nextFrom := time.Now().Add(-time.Hour * 24 * 7)
+ for {
+ oldFrom := nextFrom
+ // Then, parse last 30 minutes every 15 minutes.
+ nextFrom = time.Now().Add(-time.Minute * 15)
+ err := fetcher.Update(ctx, oldFrom)
+ if err != nil {
+ // TODO: make sure these are alerted.
+ log.Print(err)
+ }
+ time.Sleep(15 * time.Minute)
+ }
+}
+
+type SeriesFetcher struct {
+ gitRepoFolder string
+ seriesRepo *db.SeriesRepository
+ blobStorage blob.Storage
+ manifest *ManifestSource
+}
+
+func (sf *SeriesFetcher) Update(ctx context.Context, from time.Time) error {
+ log.Printf("querying email threads since %v", from)
+
+ manifest := sf.manifest.Get(ctx)
+ if manifest == nil {
+ return fmt.Errorf("failed to query the manifest data")
+ }
+ var list []lore.EmailReader
+ for _, name := range archivesToQuery {
+ info, ok := manifest[name]
+ if !ok {
+ return fmt.Errorf("manifest has no info for %q", name)
+ }
+ url := info.LastEpochURL()
+ log.Printf("polling %s", url)
+
+ folderName := sanitizeName(name)
+ if folderName == "" {
+ return fmt.Errorf("invalid archive name: %q", name)
+ }
+ gitRepo := vcs.NewLKMLRepo(filepath.Join(sf.gitRepoFolder, folderName))
+ // TODO: by querying only the last archive, we risk losing the series that are split between both.
+ // But for now let's ignore this possibility.
+ _, err := gitRepo.Poll(url, "master")
+ if err != nil {
+ return fmt.Errorf("failed to poll %q: %w", url, err)
+ }
+ repoList, err := lore.ReadArchive(gitRepo, from)
+ if err != nil {
+ return err
+ }
+ log.Printf("queried %d emails", len(repoList))
+ list = append(list, repoList...)
+ }
+
+ var emails []*email.Email
+ idToReader := map[string]lore.EmailReader{}
+ for _, item := range list {
+ // TODO: this could be done in several threads.
+ email, err := item.Parse(nil, nil)
+ if err != nil {
+ log.Printf("failed to parse email: %v", err)
+ continue
+ }
+ idToReader[email.MessageID] = item
+ emails = append(emails, email)
+ }
+ log.Printf("extracted: %d", len(list))
+
+ allSeries := lore.PatchSeries(emails)
+ log.Printf("collected %d series", len(allSeries))
+
+ for _, series := range allSeries {
+ if *flagVerbose {
+ logSeries(series)
+ }
+ err := sf.handleSeries(ctx, series, idToReader)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (sf *SeriesFetcher) handleSeries(ctx context.Context, series *lore.Series,
+ idToReader map[string]lore.EmailReader) error {
+ if series.Corrupted != "" {
+ log.Printf("skipping %s because of %q", series.MessageID, series.Corrupted)
+ return nil
+ }
+ first := series.Patches[0]
+ date := first.Date
+ if date.IsZero() || date.After(time.Now()) {
+ // We cannot fully trust dates from the mailing list as some of them are very weird, e.g.
+ // https://lore.kernel.org/all/20770915-nolibc-run-user-v1-1-3caec61726dc@weissschuh.net/raw.
+ date = time.Now()
+ }
+ err := sf.seriesRepo.Insert(ctx, &db.Series{
+ ExtID: series.MessageID,
+ // TODO: set AuthorName?
+ AuthorEmail: first.Author,
+ Title: series.Subject,
+ Version: int64(series.Version),
+ Link: "https://lore.kernel.org/all/" + series.MessageID,
+ PublishedAt: date,
+ // TODO: set Cc.
+ }, func() ([]*db.Patch, error) {
+ var ret []*db.Patch
+ for _, patch := range series.Patches {
+ body, err := idToReader[patch.MessageID].Read()
+ if err != nil {
+ return nil, fmt.Errorf("failed to extract %q: %w", patch.MessageID, err)
+ }
+ // In case of errors, we will waste some space, but let's ignore it for simplicity.
+ // Patches are not super big.
+ uri, err := sf.blobStorage.Store(bytes.NewReader(body))
+ if err != nil {
+ return nil, fmt.Errorf("failed to upload patch body: %w", err)
+ }
+ ret = append(ret, &db.Patch{
+ Seq: int64(patch.Seq),
+ Title: patch.Subject,
+ Link: "https://lore.kernel.org/all/" + patch.MessageID,
+ BodyURI: uri,
+ })
+ }
+ return ret, nil
+ })
+ if err == db.ErrSeriesExists {
+ log.Printf("series %s already exists in the DB", series.MessageID)
+ return nil
+ }
+ log.Printf("series %s saved to the DB", series.MessageID)
+ return nil
+}
+
+func logSeries(series *lore.Series) {
+ log.Printf("series ID=%s Subject=%s Patches=%d Version=%d Corrupted=%q",
+ series.MessageID, series.Subject, len(series.Patches), series.Version,
+ series.Corrupted)
+ for _, m := range series.Patches {
+ log.Printf(" #%d ID=%s Subject=%s", m.Seq, m.MessageID, m.Subject)
+ }
+}
+
+func sanitizeName(str string) string {
+ reg, err := regexp.Compile("[^a-zA-Z0-9]+")
+ if err != nil {
+ return ""
+ }
+ return reg.ReplaceAllString(str, "")
+}
diff --git a/syz-cluster/series-tracker/manifest.go b/syz-cluster/series-tracker/manifest.go
new file mode 100644
index 000000000..7bb256ebb
--- /dev/null
+++ b/syz-cluster/series-tracker/manifest.go
@@ -0,0 +1,139 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+import (
+ "bytes"
+ "compress/gzip"
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "regexp"
+ "strconv"
+ "sync"
+ "time"
+)
+
+type InboxInfo struct {
+ Prefix string
+ Epochs int
+}
+
+func (ii InboxInfo) EpochURL(id int) string {
+ return fmt.Sprintf("%s/git/%d.git", ii.Prefix, id)
+}
+
+func (ii InboxInfo) LastEpochURL() string {
+ return ii.EpochURL(ii.Epochs - 1)
+}
+
+var archiveRe = regexp.MustCompile(`/([\w-]+)/git/(\d+)\.git`)
+
+func ParseManifest(baseURL string, jsonData []byte) (map[string]*InboxInfo, error) {
+ var rawMap map[string]json.RawMessage
+ err := json.Unmarshal(jsonData, &rawMap)
+ if err != nil {
+ return nil, err
+ }
+ ret := map[string]*InboxInfo{}
+ for url := range rawMap {
+ groups := archiveRe.FindStringSubmatch(url)
+ if len(groups) == 0 {
+ // TODO: monitor these.
+ log.Printf("unexpected manifest.js key: %q", url)
+ continue
+ }
+ epoch, err := strconv.Atoi(groups[2])
+ if err != nil {
+ log.Printf("invalid manifest.js key: %q", url)
+ continue
+ }
+ inbox := ret[groups[1]]
+ if inbox == nil {
+ inbox = &InboxInfo{Prefix: fmt.Sprintf("%s/%s", baseURL, groups[1])}
+ ret[groups[1]] = inbox
+ }
+ inbox.Epochs = max(inbox.Epochs, epoch+1)
+ }
+ return ret, nil
+}
+
+func QueryManifest(baseURL string) (map[string]*InboxInfo, error) {
+ resp, err := http.Get(baseURL + "/manifest.js.gz")
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ gzReader, err := gzip.NewReader(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+ defer gzReader.Close()
+
+ var buf bytes.Buffer
+ _, err = io.Copy(&buf, gzReader)
+ if err != nil {
+ return nil, err
+ }
+ return ParseManifest(baseURL, buf.Bytes())
+}
+
+// ManifestSource keeps an up to date version of the manifest.
+type ManifestSource struct {
+ mu sync.Mutex
+ url string
+ latestOk map[string]*InboxInfo
+ firstLoaded chan struct{} // The channel will be closed on the first successful load.
+}
+
+func NewManifestSource(baseURL string) *ManifestSource {
+ return &ManifestSource{
+ url: baseURL,
+ firstLoaded: make(chan struct{}),
+ }
+}
+
+func (ms *ManifestSource) Loop(ctx context.Context) {
+ // When we try to load for the first time, retry more frequently.
+ const backOffPeriod = time.Minute * 15
+ // Then, update rarely. New epochs are very infrequent.
+ const refreshPeriod = time.Hour * 12
+
+ alreadyLoaded := false
+ nextAttemptIn := backOffPeriod
+ for {
+ info, err := QueryManifest(ms.url)
+ log.Printf("loaded manifest: %v", err)
+ if err == nil {
+ ms.mu.Lock()
+ ms.latestOk = info
+ ms.mu.Unlock()
+ if !alreadyLoaded {
+ alreadyLoaded = true
+ nextAttemptIn = refreshPeriod
+ close(ms.firstLoaded)
+ }
+ }
+ select {
+ case <-ctx.Done():
+ return
+ case <-time.After(nextAttemptIn):
+ }
+ }
+}
+
+func (ms *ManifestSource) Get(ctx context.Context) map[string]*InboxInfo {
+ select {
+ case <-ms.firstLoaded:
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ return ms.latestOk
+ case <-ctx.Done():
+ return nil
+ }
+}
diff --git a/syz-cluster/series-tracker/manifest_test.go b/syz-cluster/series-tracker/manifest_test.go
new file mode 100644
index 000000000..2e3bbde5a
--- /dev/null
+++ b/syz-cluster/series-tracker/manifest_test.go
@@ -0,0 +1,44 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestParseManifest(t *testing.T) {
+ info, err := ParseManifest("http://localhost", []byte(testManifest))
+ assert.NoError(t, err)
+ assert.Len(t, info, 2)
+ assert.Equal(t, 1, info["name"].Epochs)
+ second := info["name2"]
+ assert.Equal(t, 2, second.Epochs)
+ assert.Equal(t, "http://localhost/name2/git/1.git", second.EpochURL(1))
+}
+
+const testManifest = `{
+ "/name2/git/1.git": {
+ "modified": 1638806983,
+ "owner": null,
+ "reference": null,
+ "description": "Another repo",
+ "fingerprint": "788f666601f9641375e11e167b5e6b1eeb549cbb"
+ },
+ "/name/git/0.git": {
+ "modified": 1638806983,
+ "owner": null,
+ "reference": null,
+ "description": "Some repo",
+ "fingerprint": "788f666601f9641375e11e167b5e6b1eeb549cbb"
+ },
+ "/name2/git/0.git": {
+ "modified": 1638806983,
+ "owner": null,
+ "reference": null,
+ "description": "Another repo",
+ "fingerprint": "788f666601f9641375e11e167b5e6b1eeb549cbb"
+ }
+}`