diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-11-07 15:50:59 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-11-11 15:41:26 +0000 |
| commit | e469b3e4e3c02383df0b1d72e960dfa0f11e9602 (patch) | |
| tree | 5b44c010ef34c3c49d9ab317b9ba4c250025c5b7 /tools | |
| parent | eb96afce744b5fa3e48bec422494c8c5e1b6fbab (diff) | |
tools/syz-declextract: fix non-determinism and syscall selection
Currently syscall selection is non-deterministic and we frequently
choose wrond ones. This leads to flaky argument names/types,
and wrong argument types (e.g. int16 instead of uid,
old_utimbuf32 instead of utimbuf, etc).
Make syscall selection robust and correct.
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/syz-declextract/run.go | 100 |
1 files changed, 60 insertions, 40 deletions
diff --git a/tools/syz-declextract/run.go b/tools/syz-declextract/run.go index 54fbf1277..edb8d55a0 100644 --- a/tools/syz-declextract/run.go +++ b/tools/syz-declextract/run.go @@ -82,7 +82,7 @@ func main() { close(files) var nodes []ast.Node - syscallNames := readSyscallNames(filepath.Join(*sourceDir, "arch")) + syscallNames := readSyscallMap(*sourceDir) var interfaces []Interface eh := ast.LoggingHandler @@ -167,7 +167,7 @@ func makeOutput(nodes []ast.Node) []byte { syscalls = append(syscalls, node) case *ast.Struct: // Special case for unsued struct. TODO: handle unused structs. - if node.Name.Name == "utimbuf$auto_record" { // NOTE: Causes side effect when truncating nodes after the loop + if node.Name.Name == "old_utimbuf32$auto_record" { continue } structs = append(structs, node) @@ -281,65 +281,85 @@ func renameSyscall(syscall *ast.Call, rename map[string][]string) []ast.Node { return renamed } -func readSyscallNames(kernelDir string) map[string][]string { - rename := map[string][]string{ - "syz_genetlink_get_family_id": {"syz_genetlink_get_family_id"}, +func readSyscallMap(sourceDir string) map[string][]string { + // Parse arch/*/*.tbl files that map functions defined with SYSCALL_DEFINE macros to actual syscall names. + // Lines in the files look as follows: + // 288 common accept4 sys_accept4 + // Total mapping is many-to-many, so we give preference to x86 arch, then to 64-bit syscalls, + // and then just order arches by name to have deterministic result. + type desc struct { + fn string + arch string + is64bit bool } + syscalls := make(map[string][]desc) for _, arch := range targets.List[targets.Linux] { - filepath.Walk(filepath.Join(kernelDir, arch.KernelHeaderArch), + filepath.Walk(filepath.Join(sourceDir, "arch", arch.KernelHeaderArch), func(path string, info fs.FileInfo, err error) error { - if err != nil { + if err != nil || !strings.HasSuffix(path, ".tbl") { return err } - if !strings.HasSuffix(path, ".tbl") { - return nil - } - fi, err := os.Lstat(path) - if err != nil { - tool.Fail(err) - } - if fi.Mode()&fs.ModeSymlink != 0 { // Some symlinks link to files outside of arch directory. - return nil - } f, err := os.Open(path) if err != nil { tool.Fail(err) } - s := bufio.NewScanner(f) - for s.Scan() { + defer f.Close() + for s := bufio.NewScanner(f); s.Scan(); { fields := strings.Fields(s.Text()) - if len(fields) < 4 { + if len(fields) < 4 || fields[0] == "#" { continue } - key := strings.TrimPrefix(fields[3], "sys_") - val := fields[2] - if fields[0] == "#" || strings.HasPrefix(fields[2], "unused") || key == "-" || - strings.HasPrefix(key, "compat") || strings.HasPrefix(key, "ia32") || - key == "ni_syscall" || isProhibited(val) { - // System calls prefixed with ia32 are ignored due to conflicting system calls for 64 bit and 32 bit. + group := fields[1] + syscall := fields[2] + fn := strings.TrimPrefix(fields[3], "sys_") + if strings.HasPrefix(syscall, "unused") || fn == "-" || + // Powerpc spu group defines some syscalls (utimesat) + // that are not present on any of our arches. + group == "spu" || + // llseek does not exist, it comes from: + // arch/arm64/tools/syscall_64.tbl -> scripts/syscall.tbl + // 62 32 llseek sys_llseek + // So scripts/syscall.tbl is pulled for 64-bit arch, but the syscall + // is defined only for 32-bit arch in that file. + syscall == "llseek" || + // Don't want to test it (see issue 5308). + syscall == "reboot" { continue } - rename[key] = append(rename[key], val) + syscalls[syscall] = append(syscalls[syscall], desc{ + fn: fn, + arch: arch.VMArch, + is64bit: group == "common" || strings.Contains(group, "64"), + }) } return nil }) } - for k := range rename { - slices.Sort(rename[k]) - rename[k] = slices.Compact(rename[k]) + rename := map[string][]string{ + "syz_genetlink_get_family_id": {"syz_genetlink_get_family_id"}, } - - return rename -} - -func isProhibited(syscall string) bool { - switch syscall { - case "reboot", "utimesat": // `utimesat` is not defined for all arches. - return true - default: - return false + const mainArch = targets.AMD64 + for syscall, descs := range syscalls { + slices.SortFunc(descs, func(a, b desc) int { + if (a.arch == mainArch) != (b.arch == mainArch) { + if a.arch == mainArch { + return -1 + } + return 1 + } + if a.is64bit != b.is64bit { + if a.is64bit { + return -1 + } + return 1 + } + return strings.Compare(a.arch, b.arch) + }) + fn := descs[0].fn + rename[fn] = append(rename[fn], syscall) } + return rename } func appendNodes(slice *[]ast.Node, interfaces *[]Interface, nodes []ast.Node, |
