aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/vcs/git.go
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2018-06-27 13:07:03 +0200
committerDmitry Vyukov <dvyukov@google.com>2018-06-27 13:07:03 +0200
commit43da5e3a1baae2b2fa4f00e2218632e882654517 (patch)
treeffe4f2db60cb9e4b0ca7696d118ddebbb907cb31 /pkg/vcs/git.go
parent43e60f7e0961a7b3bdb6813703c41cee2680a0dd (diff)
pkg/vcs: move from pkg/git
Rename pkg/git to pkg/vcs because we need to support not only git.
Diffstat (limited to 'pkg/vcs/git.go')
-rw-r--r--pkg/vcs/git.go496
1 files changed, 496 insertions, 0 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go
new file mode 100644
index 000000000..e942b3c39
--- /dev/null
+++ b/pkg/vcs/git.go
@@ -0,0 +1,496 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+// Package vcs provides helper functions for working with git repositories.
+package vcs
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "io"
+ "net/mail"
+ "os"
+ "os/exec"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/google/syzkaller/pkg/osutil"
+)
+
+const (
+ DateFormat = "Mon Jan 2 15:04:05 2006 -0700"
+ timeout = time.Hour // timeout for all git invocations
+)
+
+// Poll checkouts the specified repository/branch in dir.
+// This involves fetching/resetting/cloning as necessary to recover from all possible problems.
+// Returns hash of the HEAD commit in the specified branch.
+func Poll(dir, repo, branch string) (*Commit, error) {
+ runSandboxed(dir, "git", "bisect", "reset")
+ runSandboxed(dir, "git", "reset", "--hard")
+ origin, err := runSandboxed(dir, "git", "remote", "get-url", "origin")
+ if err != nil || strings.TrimSpace(string(origin)) != repo {
+ // The repo is here, but it has wrong origin (e.g. repo in config has changed), re-clone.
+ if err := clone(dir, repo, branch); err != nil {
+ return nil, err
+ }
+ }
+ // Use origin/branch for the case the branch was force-pushed,
+ // in such case branch is not the same is origin/branch and we will
+ // stuck with the local version forever (git checkout won't fail).
+ if _, err := runSandboxed(dir, "git", "checkout", "origin/"+branch); err != nil {
+ // No such branch (e.g. branch in config has changed), re-clone.
+ if err := clone(dir, repo, branch); err != nil {
+ return nil, err
+ }
+ }
+ if _, err := runSandboxed(dir, "git", "fetch", "--no-tags"); err != nil {
+ // Something else is wrong, re-clone.
+ if err := clone(dir, repo, branch); err != nil {
+ return nil, err
+ }
+ }
+ if _, err := runSandboxed(dir, "git", "checkout", "origin/"+branch); err != nil {
+ return nil, err
+ }
+ return HeadCommit(dir)
+}
+
+// CheckoutBranch checkouts the specified repository/branch in dir.
+func CheckoutBranch(dir, repo, branch string) (*Commit, error) {
+ runSandboxed(dir, "git", "bisect", "reset")
+ if _, err := runSandboxed(dir, "git", "reset", "--hard"); err != nil {
+ if err := initRepo(dir); err != nil {
+ return nil, err
+ }
+ }
+ _, err := runSandboxed(dir, "git", "fetch", repo, branch)
+ if err != nil {
+ return nil, err
+ }
+ if _, err := runSandboxed(dir, "git", "checkout", "FETCH_HEAD"); err != nil {
+ return nil, err
+ }
+ return HeadCommit(dir)
+}
+
+// CheckoutCommit checkouts the specified repository on the specified commit in dir.
+func CheckoutCommit(dir, repo, commit string) (*Commit, error) {
+ runSandboxed(dir, "git", "bisect", "reset")
+ if _, err := runSandboxed(dir, "git", "reset", "--hard"); err != nil {
+ if err := initRepo(dir); err != nil {
+ return nil, err
+ }
+ }
+ _, err := runSandboxed(dir, "git", "fetch", repo)
+ if err != nil {
+ return nil, err
+ }
+ return SwitchCommit(dir, commit)
+}
+
+// SwitchCommit checkouts the specified commit without fetching.
+func SwitchCommit(dir, commit string) (*Commit, error) {
+ if _, err := runSandboxed(dir, "git", "checkout", commit); err != nil {
+ return nil, err
+ }
+ return HeadCommit(dir)
+}
+
+func clone(dir, repo, branch string) error {
+ if err := initRepo(dir); err != nil {
+ return err
+ }
+ if _, err := runSandboxed(dir, "git", "remote", "add", "origin", repo); err != nil {
+ return err
+ }
+ if _, err := runSandboxed(dir, "git", "fetch", "origin", branch); err != nil {
+ return err
+ }
+ return nil
+}
+
+func initRepo(dir string) error {
+ if err := os.RemoveAll(dir); err != nil {
+ return fmt.Errorf("failed to remove repo dir: %v", err)
+ }
+ if err := osutil.MkdirAll(dir); err != nil {
+ return fmt.Errorf("failed to create repo dir: %v", err)
+ }
+ if err := osutil.SandboxChown(dir); err != nil {
+ return err
+ }
+ if _, err := runSandboxed(dir, "git", "init"); err != nil {
+ return err
+ }
+ return nil
+}
+
+type Commit struct {
+ Hash string
+ Title string
+ Author string
+ CC []string
+ Date time.Time
+}
+
+// HeadCommit returns info about the HEAD commit of the current branch of git repository in dir.
+func HeadCommit(dir string) (*Commit, error) {
+ return GetCommit(dir, "HEAD")
+}
+
+func GetCommit(dir, commit string) (*Commit, error) {
+ output, err := runSandboxed(dir, "git", "log", "--format=%H%n%s%n%ae%n%ad%n%b", "-n", "1", commit)
+ if err != nil {
+ return nil, err
+ }
+ return parseCommit(output)
+}
+
+func parseCommit(output []byte) (*Commit, error) {
+ lines := bytes.Split(output, []byte{'\n'})
+ if len(lines) < 4 || len(lines[0]) != 40 {
+ return nil, fmt.Errorf("unexpected git log output: %q", output)
+ }
+ date, err := time.Parse(DateFormat, string(lines[3]))
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse date in git log output: %v\n%q", err, output)
+ }
+ cc := make(map[string]bool)
+ cc[strings.ToLower(string(lines[2]))] = true
+ for _, line := range lines[4:] {
+ for _, re := range ccRes {
+ matches := re.FindSubmatchIndex(line)
+ if matches == nil {
+ continue
+ }
+ addr, err := mail.ParseAddress(string(line[matches[2]:matches[3]]))
+ if err != nil {
+ break
+ }
+ cc[strings.ToLower(addr.Address)] = true
+ break
+ }
+ }
+ sortedCC := make([]string, 0, len(cc))
+ for addr := range cc {
+ sortedCC = append(sortedCC, addr)
+ }
+ sort.Strings(sortedCC)
+ com := &Commit{
+ Hash: string(lines[0]),
+ Title: string(lines[1]),
+ Author: string(lines[2]),
+ CC: sortedCC,
+ Date: date,
+ }
+ return com, nil
+}
+
+// ListRecentCommits returns list of recent commit titles starting from baseCommit.
+func ListRecentCommits(dir, baseCommit string) ([]string, error) {
+ // On upstream kernel this produces ~11MB of output.
+ // Somewhat inefficient to collect whole output in a slice
+ // and then convert to string, but should be bearable.
+ output, err := runSandboxed(dir, "git", "log",
+ "--pretty=format:%s", "--no-merges", "-n", "200000", baseCommit)
+ if err != nil {
+ return nil, err
+ }
+ return strings.Split(string(output), "\n"), nil
+}
+
+type FixCommit struct {
+ Tag string
+ Title string
+}
+
+// ExtractFixTagsFromCommits extracts fixing tags for bugs from git log.
+// Given email = "user@domain.com", it searches for tags of the form "user+tag@domain.com"
+// and return pairs {tag, commit title}.
+func ExtractFixTagsFromCommits(dir, baseCommit, email string) ([]FixCommit, error) {
+ since := time.Now().Add(-time.Hour * 24 * 365).Format("01-02-2006")
+ cmd := exec.Command("git", "log", "--no-merges", "--since", since, baseCommit)
+ cmd.Dir = dir
+ stdout, err := cmd.StdoutPipe()
+ if err != nil {
+ return nil, err
+ }
+ if err := cmd.Start(); err != nil {
+ return nil, err
+ }
+ defer cmd.Wait()
+ defer cmd.Process.Kill()
+ return extractFixTags(stdout, email)
+}
+
+func extractFixTags(r io.Reader, email string) ([]FixCommit, error) {
+ user, domain, err := splitEmail(email)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse email %q: %v", email, err)
+ }
+ var (
+ s = bufio.NewScanner(r)
+ commits []FixCommit
+ commitTitle = ""
+ commitStart = []byte("commit ")
+ bodyPrefix = []byte(" ")
+ userBytes = []byte(user + "+")
+ domainBytes = []byte(domain)
+ )
+ for s.Scan() {
+ ln := s.Bytes()
+ if bytes.HasPrefix(ln, commitStart) {
+ commitTitle = ""
+ continue
+ }
+ if !bytes.HasPrefix(ln, bodyPrefix) {
+ continue
+ }
+ ln = ln[len(bodyPrefix):]
+ if len(ln) == 0 {
+ continue
+ }
+ if commitTitle == "" {
+ commitTitle = string(ln)
+ continue
+ }
+ userPos := bytes.Index(ln, userBytes)
+ if userPos == -1 {
+ continue
+ }
+ domainPos := bytes.Index(ln[userPos+len(userBytes)+1:], domainBytes)
+ if domainPos == -1 {
+ continue
+ }
+ startPos := userPos + len(userBytes)
+ endPos := userPos + len(userBytes) + domainPos + 1
+ tag := string(ln[startPos:endPos])
+ commits = append(commits, FixCommit{tag, commitTitle})
+ }
+ return commits, s.Err()
+}
+
+func splitEmail(email string) (user, domain string, err error) {
+ addr, err := mail.ParseAddress(email)
+ if err != nil {
+ return "", "", err
+ }
+ at := strings.IndexByte(addr.Address, '@')
+ if at == -1 {
+ return "", "", fmt.Errorf("no @ in email address")
+ }
+ user = addr.Address[:at]
+ domain = addr.Address[at:]
+ if plus := strings.IndexByte(user, '+'); plus != -1 {
+ user = user[:plus]
+ }
+ return
+}
+
+// CanonicalizeCommit returns commit title that can be used when checking
+// if a particular commit is present in a git tree.
+// Some trees add prefixes to commit titles during backporting,
+// so we want e.g. commit "foo bar" match "BACKPORT: foo bar".
+func CanonicalizeCommit(title string) string {
+ for _, prefix := range commitPrefixes {
+ if strings.HasPrefix(title, prefix) {
+ title = title[len(prefix):]
+ break
+ }
+ }
+ return strings.TrimSpace(title)
+}
+
+var commitPrefixes = []string{
+ "UPSTREAM:",
+ "CHROMIUM:",
+ "FROMLIST:",
+ "BACKPORT:",
+ "FROMGIT:",
+ "net-backports:",
+}
+
+func Patch(dir string, patch []byte) error {
+ // Do --dry-run first to not mess with partially consistent state.
+ cmd := osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--dry-run")
+ if err := osutil.Sandbox(cmd, true, true); err != nil {
+ return err
+ }
+ cmd.Stdin = bytes.NewReader(patch)
+ cmd.Dir = dir
+ if output, err := cmd.CombinedOutput(); err != nil {
+ // If it reverses clean, then it's already applied
+ // (seems to be the easiest way to detect it).
+ cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--reverse", "--dry-run")
+ if err := osutil.Sandbox(cmd, true, true); err != nil {
+ return err
+ }
+ cmd.Stdin = bytes.NewReader(patch)
+ cmd.Dir = dir
+ if _, err := cmd.CombinedOutput(); err == nil {
+ return fmt.Errorf("patch is already applied")
+ }
+ return fmt.Errorf("failed to apply patch:\n%s", output)
+ }
+ // Now apply for real.
+ cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace")
+ if err := osutil.Sandbox(cmd, true, true); err != nil {
+ return err
+ }
+ cmd.Stdin = bytes.NewReader(patch)
+ cmd.Dir = dir
+ if output, err := cmd.CombinedOutput(); err != nil {
+ return fmt.Errorf("failed to apply patch after dry run:\n%s", output)
+ }
+ return nil
+}
+
+type BisectResult int
+
+const (
+ BisectBad BisectResult = iota
+ BisectGood
+ BisectSkip
+)
+
+// Bisect bisects good..bad commit range against the provided predicate (wrapper around git bisect).
+// The predicate should return an error only if there is no way to proceed
+// (it will abort the process), if possible it should prefer to return BisectSkip.
+// Progress of the process is streamed to the provided trace.
+// Returns the first commit on which the predicate returns BisectBad.
+func Bisect(dir, bad, good string, trace io.Writer, pred func() (BisectResult, error)) (*Commit, error) {
+ runSandboxed(dir, "git", "bisect", "reset")
+ runSandboxed(dir, "git", "reset", "--hard")
+ firstBad, err := GetCommit(dir, bad)
+ if err != nil {
+ return nil, err
+ }
+ output, err := runSandboxed(dir, "git", "bisect", "start", bad, good)
+ if err != nil {
+ return nil, err
+ }
+ defer runSandboxed(dir, "git", "bisect", "reset")
+ fmt.Fprintf(trace, "# git bisect start %v %v\n%s", bad, good, output)
+ current, err := HeadCommit(dir)
+ if err != nil {
+ return nil, err
+ }
+ var bisectTerms = [...]string{
+ BisectBad: "bad",
+ BisectGood: "good",
+ BisectSkip: "skip",
+ }
+ for {
+ res, err := pred()
+ if err != nil {
+ return nil, err
+ }
+ if res == BisectBad {
+ firstBad = current
+ }
+ output, err = runSandboxed(dir, "git", "bisect", bisectTerms[res])
+ if err != nil {
+ return nil, err
+ }
+ fmt.Fprintf(trace, "# git bisect %v %v\n%s", bisectTerms[res], current.Hash, output)
+ next, err := HeadCommit(dir)
+ if err != nil {
+ return nil, err
+ }
+ if current.Hash == next.Hash {
+ return firstBad, nil
+ }
+ current = next
+ }
+}
+
+// PreviousReleaseTags returns list of preceding release tags that are reachable from the given commit.
+// Note: linux-specific.
+func PreviousReleaseTags(dir, commit string) ([]string, error) {
+ output, err := runSandboxed(dir, "git", "tag", "--no-contains", commit, "--merged", commit, "v*.*")
+ if err != nil {
+ return nil, err
+ }
+ return parseReleaseTags(output)
+}
+
+func parseReleaseTags(output []byte) ([]string, error) {
+ var tags []string
+ for _, tag := range bytes.Split(output, []byte{'\n'}) {
+ if releaseTagRe.Match(tag) && releaseTagToInt(string(tag)) != 0 {
+ tags = append(tags, string(tag))
+ }
+ }
+ sort.Slice(tags, func(i, j int) bool {
+ return releaseTagToInt(tags[i]) > releaseTagToInt(tags[j])
+ })
+ return tags, nil
+}
+
+func releaseTagToInt(tag string) uint64 {
+ matches := releaseTagRe.FindStringSubmatchIndex(tag)
+ v1, err := strconv.ParseUint(tag[matches[2]:matches[3]], 10, 64)
+ if err != nil {
+ return 0
+ }
+ v2, err := strconv.ParseUint(tag[matches[4]:matches[5]], 10, 64)
+ if err != nil {
+ return 0
+ }
+ var v3 uint64
+ if matches[6] != -1 {
+ v3, err = strconv.ParseUint(tag[matches[6]:matches[7]], 10, 64)
+ if err != nil {
+ return 0
+ }
+ }
+ return v1*1e6 + v2*1e3 + v3
+}
+
+func runSandboxed(dir, command string, args ...string) ([]byte, error) {
+ cmd := osutil.Command(command, args...)
+ cmd.Dir = dir
+ if err := osutil.Sandbox(cmd, true, false); err != nil {
+ return nil, err
+ }
+ return osutil.Run(timeout, cmd)
+}
+
+// CheckRepoAddress does a best-effort approximate check of a git repo address.
+func CheckRepoAddress(repo string) bool {
+ return gitRepoRe.MatchString(repo)
+}
+
+// CheckBranch does a best-effort approximate check of a git branch name.
+func CheckBranch(branch string) bool {
+ return gitBranchRe.MatchString(branch)
+}
+
+func CheckCommitHash(hash string) bool {
+ if !gitHashRe.MatchString(hash) {
+ return false
+ }
+ ln := len(hash)
+ return ln == 8 || ln == 10 || ln == 12 || ln == 16 || ln == 20 || ln == 40
+}
+
+var (
+ // nolint: lll
+ gitRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps)://[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)+(:[0-9]+)?/[a-zA-Z0-9-_./]+\.git(/)?$`)
+ gitBranchRe = regexp.MustCompile("^[a-zA-Z0-9-_/.]{2,200}$")
+ gitHashRe = regexp.MustCompile("^[a-f0-9]+$")
+ releaseTagRe = regexp.MustCompile(`^v([0-9]+).([0-9]+)(?:\.([0-9]+))?$`)
+ ccRes = []*regexp.Regexp{
+ regexp.MustCompile(`^Reviewed\-.*: (.*)$`),
+ regexp.MustCompile(`^[A-Za-z-]+\-and\-[Rr]eviewed\-.*: (.*)$`),
+ regexp.MustCompile(`^Acked\-.*: (.*)$`),
+ regexp.MustCompile(`^[A-Za-z-]+\-and\-[Aa]cked\-.*: (.*)$`),
+ regexp.MustCompile(`^Tested\-.*: (.*)$`),
+ regexp.MustCompile(`^[A-Za-z-]+\-and\-[Tt]ested\-.*: (.*)$`),
+ }
+)