From 09706c899ce54513472d621bf9d84c0e8e5eaea8 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Mon, 12 May 2025 15:29:02 +0200 Subject: pkg/vcs: extend ListCommitHashes Rename the method to LatestCommit and make it more flexible: 1) Return the commit date alongside the commit hash. 2) Rename the time filter to highlight that it's non-inclusive. 3) Make it possible to query the commits newer than the specified commit hash. It will let us poll lore archives more efficiently. --- pkg/email/lore/read.go | 8 ++++--- pkg/vcs/fuchsia.go | 4 ++-- pkg/vcs/git.go | 31 +++++++++++++++++++-------- pkg/vcs/git_test.go | 43 +++++++++++++++----------------------- pkg/vcs/vcs.go | 11 ++++++++-- syz-cluster/series-tracker/main.go | 5 ++++- tools/syz-lore/query_lkml.go | 2 +- 7 files changed, 60 insertions(+), 44 deletions(-) diff --git a/pkg/email/lore/read.go b/pkg/email/lore/read.go index ef0a0faa0..d4e68cf33 100644 --- a/pkg/email/lore/read.go +++ b/pkg/email/lore/read.go @@ -13,12 +13,13 @@ import ( ) type EmailReader struct { + vcs.CommitShort Read func() ([]byte, error) } // ReadArchive queries the parsed messages from a single LKML message archive. -func ReadArchive(repo vcs.Repo, fromTime time.Time) ([]EmailReader, error) { - commits, err := repo.ListCommitHashes("HEAD", fromTime) +func ReadArchive(repo vcs.Repo, afterCommit string, afterTime time.Time) ([]EmailReader, error) { + commits, err := repo.LatestCommits(afterCommit, afterTime) if err != nil { return nil, fmt.Errorf("failed to get recent commits: %w", err) } @@ -26,8 +27,9 @@ func ReadArchive(repo vcs.Repo, fromTime time.Time) ([]EmailReader, error) { for _, iterCommit := range commits { commit := iterCommit ret = append(ret, EmailReader{ + CommitShort: commit, Read: func() ([]byte, error) { - return repo.Object("m", commit) + return repo.Object("m", commit.Hash) }, }) } diff --git a/pkg/vcs/fuchsia.go b/pkg/vcs/fuchsia.go index ff5f5cd57..4f7948c51 100644 --- a/pkg/vcs/fuchsia.go +++ b/pkg/vcs/fuchsia.go @@ -106,8 +106,8 @@ func (ctx *fuchsia) Contains(commit string) (bool, error) { return ctx.repo.Contains(commit) } -func (ctx *fuchsia) ListCommitHashes(baseCommit string, from time.Time) ([]string, error) { - return ctx.repo.ListCommitHashes(baseCommit, from) +func (ctx *fuchsia) LatestCommits(afterCommit string, afterDate time.Time) ([]CommitShort, error) { + return ctx.repo.LatestCommits(afterCommit, afterDate) } func (ctx *fuchsia) Object(name, commit string) ([]byte, error) { diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index fd71b8d36..ef46259f3 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -228,17 +228,18 @@ func (git *gitRepo) Contains(commit string) (bool, error) { return err == nil, nil } +const gitDateFormat = "Mon Jan 2 15:04:05 2006 -0700" + func gitParseCommit(output, user, domain []byte, ignoreCC map[string]bool) (*Commit, error) { lines := bytes.Split(output, []byte{'\n'}) if len(lines) < 8 || len(lines[0]) != 40 { return nil, fmt.Errorf("unexpected git log output: %q", output) } - const dateFormat = "Mon Jan 2 15:04:05 2006 -0700" - date, err := time.Parse(dateFormat, string(lines[4])) + date, err := time.Parse(gitDateFormat, string(lines[4])) if err != nil { return nil, fmt.Errorf("failed to parse date in git log output: %w\n%q", err, output) } - commitDate, err := time.Parse(dateFormat, string(lines[6])) + commitDate, err := time.Parse(gitDateFormat, string(lines[6])) if err != nil { return nil, fmt.Errorf("failed to parse date in git log output: %w\n%q", err, output) } @@ -346,19 +347,31 @@ func (git *gitRepo) GetCommitsByTitles(titles []string) ([]*Commit, []string, er return results, missing, nil } -func (git *gitRepo) ListCommitHashes(baseCommit string, from time.Time) ([]string, error) { - args := []string{"log", "--pretty=format:%h"} - if !from.IsZero() { - args = append(args, "--since", from.Format(time.RFC3339)) +func (git *gitRepo) LatestCommits(afterCommit string, afterDate time.Time) ([]CommitShort, error) { + args := []string{"log", "--pretty=format:%h:%cd"} + if !afterDate.IsZero() { + args = append(args, "--since", afterDate.Format(time.RFC3339)) + } + if afterCommit != "" { + args = append(args, afterCommit+"..") } - output, err := git.Run(append(args, baseCommit)...) + output, err := git.Run(args...) if err != nil { return nil, err } if len(output) == 0 { return nil, nil } - return strings.Split(string(output), "\n"), nil + var ret []CommitShort + for _, line := range strings.Split(string(output), "\n") { + hash, date, _ := strings.Cut(line, ":") + commitDate, err := time.Parse(gitDateFormat, date) + if err != nil { + return nil, fmt.Errorf("failed to parse date in %q: %w", line, err) + } + ret = append(ret, CommitShort{Hash: hash, CommitDate: commitDate}) + } + return ret, nil } func (git *gitRepo) ExtractFixTagsFromCommits(baseCommit, email string) ([]*Commit, error) { diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go index 1c1320364..40207898c 100644 --- a/pkg/vcs/git_test.go +++ b/pkg/vcs/git_test.go @@ -240,39 +240,30 @@ func TestContains(t *testing.T) { } } -func TestCommitHashes(t *testing.T) { +func TestLatestCommits(t *testing.T) { baseDir := t.TempDir() repo := MakeTestRepo(t, baseDir) repo.Git("checkout", "-b", "branch-a") repo.Git("commit", "--no-edit", "--allow-empty", "-m", "target") - repo.Git("checkout", "-b", "branch-b") repo.Git("commit", "--no-edit", "--allow-empty", "-m", "target") - got, err := repo.repo.ListCommitHashes("HEAD", time.Time{}) - if err != nil { - t.Fatal(err) - } - if len(got) != 2 { - t.Fatalf("expected 2 commits") - } + got, err := repo.repo.LatestCommits("", time.Time{}) + assert.NoError(t, err) + assert.Len(t, got, 2, "expected 2 commits") for i, commit := range got { - if contained, _ := repo.repo.Contains(commit); !contained { + if contained, _ := repo.repo.Contains(commit.Hash); !contained { t.Fatalf("commit %d is not contained", i) } } - // Now change HEAD. - repo.Git("checkout", "branch-a") - got, err = repo.repo.ListCommitHashes("HEAD", time.Time{}) - if err != nil { - t.Fatal(err) - } - if len(got) != 1 { - t.Fatalf("expected 1 commit, got %d", len(got)) - } - if contained, _ := repo.repo.Contains(got[0]); !contained { - t.Fatalf("commit in branch-b is not contained") - } + // Now ignore the first commit. + got2, err := repo.repo.LatestCommits(got[1].Hash, time.Time{}) + assert.NoError(t, err) + assert.Len(t, got2, 1, "expected 1 commit") + assert.Equal(t, got2[0].Hash, got[0].Hash, "expected to see the HEAD commit") + + // TODO: test the afterDate argument. + // It will require setting the GIT_COMMITTER_DATE env variable. } func TestObject(t *testing.T) { @@ -293,7 +284,7 @@ func TestObject(t *testing.T) { repo.Git("add", "object.txt") repo.Git("commit", "--no-edit", "--allow-empty", "-m", "target") - commits, err := repo.repo.ListCommitHashes("HEAD", time.Time{}) + commits, err := repo.repo.LatestCommits("", time.Time{}) if err != nil { t.Fatal(err) } @@ -301,7 +292,7 @@ func TestObject(t *testing.T) { t.Fatalf("expected 2 commits, got %d", len(commits)) } // Verify file's contents at the first revision. - data, err := repo.repo.Object("object.txt", commits[1]) + data, err := repo.repo.Object("object.txt", commits[1].Hash) if err != nil { t.Fatal(err) } @@ -309,14 +300,14 @@ func TestObject(t *testing.T) { t.Fatal(diff) } // And at the second one. - data, err = repo.repo.Object("object.txt", commits[0]) + data, err = repo.repo.Object("object.txt", commits[0].Hash) if err != nil { t.Fatal(err) } if diff := cmp.Diff(data, secondRev); diff != "" { t.Fatal(diff) } - com, err := repo.repo.Commit(commits[0]) + com, err := repo.repo.Commit(commits[0].Hash) if err != nil { t.Fatal(err.Error()) } diff --git a/pkg/vcs/vcs.go b/pkg/vcs/vcs.go index c9a440ed1..06fd62671 100644 --- a/pkg/vcs/vcs.go +++ b/pkg/vcs/vcs.go @@ -62,8 +62,10 @@ type Repo interface { // (e.g. do CheckoutBranch before). Contains(commit string) (bool, error) - // ListCommitHashes lists all commit hashes reachable from baseCommit. - ListCommitHashes(baseCommit string, from time.Time) ([]string, error) + // LatestCommits lists all latest commit hashes well as their commit dates. + // If afterCommit is specified, the output only includes the commits from which afterCommit is reachable. + // If afterDate is specified, the output only includes the newe commits. + LatestCommits(afterCommit string, afterDate time.Time) ([]CommitShort, error) // Object returns the contents of a git repository object at the particular moment in history. Object(name, commit string) ([]byte, error) @@ -120,6 +122,11 @@ type Commit struct { Patch []byte } +type CommitShort struct { + Hash string + CommitDate time.Time +} + type RecipientType int const ( diff --git a/syz-cluster/series-tracker/main.go b/syz-cluster/series-tracker/main.go index 2b6611798..b8638d897 100644 --- a/syz-cluster/series-tracker/main.go +++ b/syz-cluster/series-tracker/main.go @@ -99,7 +99,10 @@ func (sf *SeriesFetcher) Update(ctx context.Context, from time.Time) error { if err != nil { return fmt.Errorf("failed to poll %q: %w", url, err) } - repoList, err := lore.ReadArchive(gitRepo, from) + // We could have been fetching the emails precisely starting from the last Update() attempt, + // but since we may only save it once the whole series is there, it's easier to just look at all + // the recent messages. + repoList, err := lore.ReadArchive(gitRepo, "", from) if err != nil { return err } diff --git a/tools/syz-lore/query_lkml.go b/tools/syz-lore/query_lkml.go index ff1dbe200..10a0c1ecd 100644 --- a/tools/syz-lore/query_lkml.go +++ b/tools/syz-lore/query_lkml.go @@ -123,7 +123,7 @@ func processArchives(dir string, emails, domains []string) []*lore.Thread { g.Go(func() error { defer wg.Done() repo := vcs.NewLKMLRepo(path) - list, err := lore.ReadArchive(repo, time.Time{}) + list, err := lore.ReadArchive(repo, "", time.Time{}) if err != nil { return err } -- cgit mrf-deployment