diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2018-06-27 13:07:03 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2018-06-27 13:07:03 +0200 |
| commit | 43da5e3a1baae2b2fa4f00e2218632e882654517 (patch) | |
| tree | ffe4f2db60cb9e4b0ca7696d118ddebbb907cb31 /pkg/vcs/git.go | |
| parent | 43e60f7e0961a7b3bdb6813703c41cee2680a0dd (diff) | |
pkg/vcs: move from pkg/git
Rename pkg/git to pkg/vcs because we need to support not only git.
Diffstat (limited to 'pkg/vcs/git.go')
| -rw-r--r-- | pkg/vcs/git.go | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go new file mode 100644 index 000000000..e942b3c39 --- /dev/null +++ b/pkg/vcs/git.go @@ -0,0 +1,496 @@ +// Copyright 2017 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Package vcs provides helper functions for working with git repositories. +package vcs + +import ( + "bufio" + "bytes" + "fmt" + "io" + "net/mail" + "os" + "os/exec" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/google/syzkaller/pkg/osutil" +) + +const ( + DateFormat = "Mon Jan 2 15:04:05 2006 -0700" + timeout = time.Hour // timeout for all git invocations +) + +// Poll checkouts the specified repository/branch in dir. +// This involves fetching/resetting/cloning as necessary to recover from all possible problems. +// Returns hash of the HEAD commit in the specified branch. +func Poll(dir, repo, branch string) (*Commit, error) { + runSandboxed(dir, "git", "bisect", "reset") + runSandboxed(dir, "git", "reset", "--hard") + origin, err := runSandboxed(dir, "git", "remote", "get-url", "origin") + if err != nil || strings.TrimSpace(string(origin)) != repo { + // The repo is here, but it has wrong origin (e.g. repo in config has changed), re-clone. + if err := clone(dir, repo, branch); err != nil { + return nil, err + } + } + // Use origin/branch for the case the branch was force-pushed, + // in such case branch is not the same is origin/branch and we will + // stuck with the local version forever (git checkout won't fail). + if _, err := runSandboxed(dir, "git", "checkout", "origin/"+branch); err != nil { + // No such branch (e.g. branch in config has changed), re-clone. + if err := clone(dir, repo, branch); err != nil { + return nil, err + } + } + if _, err := runSandboxed(dir, "git", "fetch", "--no-tags"); err != nil { + // Something else is wrong, re-clone. + if err := clone(dir, repo, branch); err != nil { + return nil, err + } + } + if _, err := runSandboxed(dir, "git", "checkout", "origin/"+branch); err != nil { + return nil, err + } + return HeadCommit(dir) +} + +// CheckoutBranch checkouts the specified repository/branch in dir. +func CheckoutBranch(dir, repo, branch string) (*Commit, error) { + runSandboxed(dir, "git", "bisect", "reset") + if _, err := runSandboxed(dir, "git", "reset", "--hard"); err != nil { + if err := initRepo(dir); err != nil { + return nil, err + } + } + _, err := runSandboxed(dir, "git", "fetch", repo, branch) + if err != nil { + return nil, err + } + if _, err := runSandboxed(dir, "git", "checkout", "FETCH_HEAD"); err != nil { + return nil, err + } + return HeadCommit(dir) +} + +// CheckoutCommit checkouts the specified repository on the specified commit in dir. +func CheckoutCommit(dir, repo, commit string) (*Commit, error) { + runSandboxed(dir, "git", "bisect", "reset") + if _, err := runSandboxed(dir, "git", "reset", "--hard"); err != nil { + if err := initRepo(dir); err != nil { + return nil, err + } + } + _, err := runSandboxed(dir, "git", "fetch", repo) + if err != nil { + return nil, err + } + return SwitchCommit(dir, commit) +} + +// SwitchCommit checkouts the specified commit without fetching. +func SwitchCommit(dir, commit string) (*Commit, error) { + if _, err := runSandboxed(dir, "git", "checkout", commit); err != nil { + return nil, err + } + return HeadCommit(dir) +} + +func clone(dir, repo, branch string) error { + if err := initRepo(dir); err != nil { + return err + } + if _, err := runSandboxed(dir, "git", "remote", "add", "origin", repo); err != nil { + return err + } + if _, err := runSandboxed(dir, "git", "fetch", "origin", branch); err != nil { + return err + } + return nil +} + +func initRepo(dir string) error { + if err := os.RemoveAll(dir); err != nil { + return fmt.Errorf("failed to remove repo dir: %v", err) + } + if err := osutil.MkdirAll(dir); err != nil { + return fmt.Errorf("failed to create repo dir: %v", err) + } + if err := osutil.SandboxChown(dir); err != nil { + return err + } + if _, err := runSandboxed(dir, "git", "init"); err != nil { + return err + } + return nil +} + +type Commit struct { + Hash string + Title string + Author string + CC []string + Date time.Time +} + +// HeadCommit returns info about the HEAD commit of the current branch of git repository in dir. +func HeadCommit(dir string) (*Commit, error) { + return GetCommit(dir, "HEAD") +} + +func GetCommit(dir, commit string) (*Commit, error) { + output, err := runSandboxed(dir, "git", "log", "--format=%H%n%s%n%ae%n%ad%n%b", "-n", "1", commit) + if err != nil { + return nil, err + } + return parseCommit(output) +} + +func parseCommit(output []byte) (*Commit, error) { + lines := bytes.Split(output, []byte{'\n'}) + if len(lines) < 4 || len(lines[0]) != 40 { + return nil, fmt.Errorf("unexpected git log output: %q", output) + } + date, err := time.Parse(DateFormat, string(lines[3])) + if err != nil { + return nil, fmt.Errorf("failed to parse date in git log output: %v\n%q", err, output) + } + cc := make(map[string]bool) + cc[strings.ToLower(string(lines[2]))] = true + for _, line := range lines[4:] { + for _, re := range ccRes { + matches := re.FindSubmatchIndex(line) + if matches == nil { + continue + } + addr, err := mail.ParseAddress(string(line[matches[2]:matches[3]])) + if err != nil { + break + } + cc[strings.ToLower(addr.Address)] = true + break + } + } + sortedCC := make([]string, 0, len(cc)) + for addr := range cc { + sortedCC = append(sortedCC, addr) + } + sort.Strings(sortedCC) + com := &Commit{ + Hash: string(lines[0]), + Title: string(lines[1]), + Author: string(lines[2]), + CC: sortedCC, + Date: date, + } + return com, nil +} + +// ListRecentCommits returns list of recent commit titles starting from baseCommit. +func ListRecentCommits(dir, baseCommit string) ([]string, error) { + // On upstream kernel this produces ~11MB of output. + // Somewhat inefficient to collect whole output in a slice + // and then convert to string, but should be bearable. + output, err := runSandboxed(dir, "git", "log", + "--pretty=format:%s", "--no-merges", "-n", "200000", baseCommit) + if err != nil { + return nil, err + } + return strings.Split(string(output), "\n"), nil +} + +type FixCommit struct { + Tag string + Title string +} + +// ExtractFixTagsFromCommits extracts fixing tags for bugs from git log. +// Given email = "user@domain.com", it searches for tags of the form "user+tag@domain.com" +// and return pairs {tag, commit title}. +func ExtractFixTagsFromCommits(dir, baseCommit, email string) ([]FixCommit, error) { + since := time.Now().Add(-time.Hour * 24 * 365).Format("01-02-2006") + cmd := exec.Command("git", "log", "--no-merges", "--since", since, baseCommit) + cmd.Dir = dir + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + if err := cmd.Start(); err != nil { + return nil, err + } + defer cmd.Wait() + defer cmd.Process.Kill() + return extractFixTags(stdout, email) +} + +func extractFixTags(r io.Reader, email string) ([]FixCommit, error) { + user, domain, err := splitEmail(email) + if err != nil { + return nil, fmt.Errorf("failed to parse email %q: %v", email, err) + } + var ( + s = bufio.NewScanner(r) + commits []FixCommit + commitTitle = "" + commitStart = []byte("commit ") + bodyPrefix = []byte(" ") + userBytes = []byte(user + "+") + domainBytes = []byte(domain) + ) + for s.Scan() { + ln := s.Bytes() + if bytes.HasPrefix(ln, commitStart) { + commitTitle = "" + continue + } + if !bytes.HasPrefix(ln, bodyPrefix) { + continue + } + ln = ln[len(bodyPrefix):] + if len(ln) == 0 { + continue + } + if commitTitle == "" { + commitTitle = string(ln) + continue + } + userPos := bytes.Index(ln, userBytes) + if userPos == -1 { + continue + } + domainPos := bytes.Index(ln[userPos+len(userBytes)+1:], domainBytes) + if domainPos == -1 { + continue + } + startPos := userPos + len(userBytes) + endPos := userPos + len(userBytes) + domainPos + 1 + tag := string(ln[startPos:endPos]) + commits = append(commits, FixCommit{tag, commitTitle}) + } + return commits, s.Err() +} + +func splitEmail(email string) (user, domain string, err error) { + addr, err := mail.ParseAddress(email) + if err != nil { + return "", "", err + } + at := strings.IndexByte(addr.Address, '@') + if at == -1 { + return "", "", fmt.Errorf("no @ in email address") + } + user = addr.Address[:at] + domain = addr.Address[at:] + if plus := strings.IndexByte(user, '+'); plus != -1 { + user = user[:plus] + } + return +} + +// CanonicalizeCommit returns commit title that can be used when checking +// if a particular commit is present in a git tree. +// Some trees add prefixes to commit titles during backporting, +// so we want e.g. commit "foo bar" match "BACKPORT: foo bar". +func CanonicalizeCommit(title string) string { + for _, prefix := range commitPrefixes { + if strings.HasPrefix(title, prefix) { + title = title[len(prefix):] + break + } + } + return strings.TrimSpace(title) +} + +var commitPrefixes = []string{ + "UPSTREAM:", + "CHROMIUM:", + "FROMLIST:", + "BACKPORT:", + "FROMGIT:", + "net-backports:", +} + +func Patch(dir string, patch []byte) error { + // Do --dry-run first to not mess with partially consistent state. + cmd := osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--dry-run") + if err := osutil.Sandbox(cmd, true, true); err != nil { + return err + } + cmd.Stdin = bytes.NewReader(patch) + cmd.Dir = dir + if output, err := cmd.CombinedOutput(); err != nil { + // If it reverses clean, then it's already applied + // (seems to be the easiest way to detect it). + cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--reverse", "--dry-run") + if err := osutil.Sandbox(cmd, true, true); err != nil { + return err + } + cmd.Stdin = bytes.NewReader(patch) + cmd.Dir = dir + if _, err := cmd.CombinedOutput(); err == nil { + return fmt.Errorf("patch is already applied") + } + return fmt.Errorf("failed to apply patch:\n%s", output) + } + // Now apply for real. + cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace") + if err := osutil.Sandbox(cmd, true, true); err != nil { + return err + } + cmd.Stdin = bytes.NewReader(patch) + cmd.Dir = dir + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to apply patch after dry run:\n%s", output) + } + return nil +} + +type BisectResult int + +const ( + BisectBad BisectResult = iota + BisectGood + BisectSkip +) + +// Bisect bisects good..bad commit range against the provided predicate (wrapper around git bisect). +// The predicate should return an error only if there is no way to proceed +// (it will abort the process), if possible it should prefer to return BisectSkip. +// Progress of the process is streamed to the provided trace. +// Returns the first commit on which the predicate returns BisectBad. +func Bisect(dir, bad, good string, trace io.Writer, pred func() (BisectResult, error)) (*Commit, error) { + runSandboxed(dir, "git", "bisect", "reset") + runSandboxed(dir, "git", "reset", "--hard") + firstBad, err := GetCommit(dir, bad) + if err != nil { + return nil, err + } + output, err := runSandboxed(dir, "git", "bisect", "start", bad, good) + if err != nil { + return nil, err + } + defer runSandboxed(dir, "git", "bisect", "reset") + fmt.Fprintf(trace, "# git bisect start %v %v\n%s", bad, good, output) + current, err := HeadCommit(dir) + if err != nil { + return nil, err + } + var bisectTerms = [...]string{ + BisectBad: "bad", + BisectGood: "good", + BisectSkip: "skip", + } + for { + res, err := pred() + if err != nil { + return nil, err + } + if res == BisectBad { + firstBad = current + } + output, err = runSandboxed(dir, "git", "bisect", bisectTerms[res]) + if err != nil { + return nil, err + } + fmt.Fprintf(trace, "# git bisect %v %v\n%s", bisectTerms[res], current.Hash, output) + next, err := HeadCommit(dir) + if err != nil { + return nil, err + } + if current.Hash == next.Hash { + return firstBad, nil + } + current = next + } +} + +// PreviousReleaseTags returns list of preceding release tags that are reachable from the given commit. +// Note: linux-specific. +func PreviousReleaseTags(dir, commit string) ([]string, error) { + output, err := runSandboxed(dir, "git", "tag", "--no-contains", commit, "--merged", commit, "v*.*") + if err != nil { + return nil, err + } + return parseReleaseTags(output) +} + +func parseReleaseTags(output []byte) ([]string, error) { + var tags []string + for _, tag := range bytes.Split(output, []byte{'\n'}) { + if releaseTagRe.Match(tag) && releaseTagToInt(string(tag)) != 0 { + tags = append(tags, string(tag)) + } + } + sort.Slice(tags, func(i, j int) bool { + return releaseTagToInt(tags[i]) > releaseTagToInt(tags[j]) + }) + return tags, nil +} + +func releaseTagToInt(tag string) uint64 { + matches := releaseTagRe.FindStringSubmatchIndex(tag) + v1, err := strconv.ParseUint(tag[matches[2]:matches[3]], 10, 64) + if err != nil { + return 0 + } + v2, err := strconv.ParseUint(tag[matches[4]:matches[5]], 10, 64) + if err != nil { + return 0 + } + var v3 uint64 + if matches[6] != -1 { + v3, err = strconv.ParseUint(tag[matches[6]:matches[7]], 10, 64) + if err != nil { + return 0 + } + } + return v1*1e6 + v2*1e3 + v3 +} + +func runSandboxed(dir, command string, args ...string) ([]byte, error) { + cmd := osutil.Command(command, args...) + cmd.Dir = dir + if err := osutil.Sandbox(cmd, true, false); err != nil { + return nil, err + } + return osutil.Run(timeout, cmd) +} + +// CheckRepoAddress does a best-effort approximate check of a git repo address. +func CheckRepoAddress(repo string) bool { + return gitRepoRe.MatchString(repo) +} + +// CheckBranch does a best-effort approximate check of a git branch name. +func CheckBranch(branch string) bool { + return gitBranchRe.MatchString(branch) +} + +func CheckCommitHash(hash string) bool { + if !gitHashRe.MatchString(hash) { + return false + } + ln := len(hash) + return ln == 8 || ln == 10 || ln == 12 || ln == 16 || ln == 20 || ln == 40 +} + +var ( + // nolint: lll + gitRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps)://[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)+(:[0-9]+)?/[a-zA-Z0-9-_./]+\.git(/)?$`) + gitBranchRe = regexp.MustCompile("^[a-zA-Z0-9-_/.]{2,200}$") + gitHashRe = regexp.MustCompile("^[a-f0-9]+$") + releaseTagRe = regexp.MustCompile(`^v([0-9]+).([0-9]+)(?:\.([0-9]+))?$`) + ccRes = []*regexp.Regexp{ + regexp.MustCompile(`^Reviewed\-.*: (.*)$`), + regexp.MustCompile(`^[A-Za-z-]+\-and\-[Rr]eviewed\-.*: (.*)$`), + regexp.MustCompile(`^Acked\-.*: (.*)$`), + regexp.MustCompile(`^[A-Za-z-]+\-and\-[Aa]cked\-.*: (.*)$`), + regexp.MustCompile(`^Tested\-.*: (.*)$`), + regexp.MustCompile(`^[A-Za-z-]+\-and\-[Tt]ested\-.*: (.*)$`), + } +) |
