aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2024-11-11 10:26:41 +0100
committerDmitry Vyukov <dvyukov@google.com>2024-11-11 15:41:26 +0000
commitd81ee624aa8da9b9d6360798bab72156e1abe4ee (patch)
tree272b84c0a02d4c8183011d2ded623cddaf9e8585 /tools
parent616b8eb3df4178dd9fcd28ada8ed34ab61528ccf (diff)
tools/syz-declextract: properly remove unused declarations
Currently we have a number of hacks to deal with unused bits: - remove some netlink syscalls if no policy union present - generate the huge union for unused policies - manually remove 1 struct But we still got more unused structs after recent changes. Properly remove all unused bits using the compiler knowledge. This is both simpler and more reliable.
Diffstat (limited to 'tools')
-rw-r--r--tools/syz-declextract/run.go152
-rw-r--r--tools/syz-declextract/syz-declextract.cpp8
2 files changed, 66 insertions, 94 deletions
diff --git a/tools/syz-declextract/run.go b/tools/syz-declextract/run.go
index 8b37cc4a6..5794c2a45 100644
--- a/tools/syz-declextract/run.go
+++ b/tools/syz-declextract/run.go
@@ -29,10 +29,14 @@ import (
"github.com/google/syzkaller/sys/targets"
)
+var (
+ autoFile = filepath.FromSlash("sys/linux/auto.txt")
+ target = targets.Get(targets.Linux, targets.AMD64)
+)
+
func main() {
var (
binary = flag.String("binary", "syz-declextract", "path to binary")
- outFile = flag.String("output", "sys/linux/auto.txt", "output file")
sourceDir = flag.String("sourcedir", "", "kernel source directory")
buildDir = flag.String("builddir", "", "kernel build directory (defaults to source directory)")
)
@@ -52,7 +56,7 @@ func main() {
tool.Fail(err)
}
- extractor := subsystem.MakeExtractor(subsystem.GetList(targets.Linux))
+ extractor := subsystem.MakeExtractor(subsystem.GetList(target.OS))
var cmds []compileCommand
if err := json.Unmarshal(fileData, &cmds); err != nil {
@@ -106,20 +110,20 @@ func main() {
appendNodes(&nodes, interfaces, parse.Nodes, syscallNames, *sourceDir, *buildDir, file)
}
- // New lines are added in the parsing step. This is why we need to Format (serialize the description),
- // Parse, then Format again.
desc := finishDescriptions(nodes)
- output := ast.Format(ast.Parse(ast.Format(desc), "", ast.LoggingHandler))
- if err := osutil.WriteFile(*outFile, output); err != nil {
- tool.Fail(err)
- }
-
- ifaces := finishInterfaces(interfaces, extractor, *outFile)
+ writeDescriptions(desc)
+ // In order to remove unused bits of the descriptions, we need to write them out first,
+ // and then parse all descriptions back b/c auto descriptions use some types defined
+ // by manual descriptions (compiler.CollectUnused requires complete descriptions).
+ removeUnused(desc)
+ writeDescriptions(desc)
+
+ ifaces := finishInterfaces(interfaces, extractor)
data, err := json.MarshalIndent(ifaces, "", "\t")
if err != nil {
tool.Failf("failed to marshal interfaces: %v", err)
}
- if err := osutil.WriteFile(*outFile+".json", data); err != nil {
+ if err := osutil.WriteFile(autoFile+".json", data); err != nil {
tool.Fail(err)
}
}
@@ -151,7 +155,7 @@ func (iface *Interface) ID() string {
return fmt.Sprintf("%v/%v", iface.Type, iface.Name)
}
-func finishInterfaces(m map[string]Interface, extractor *subsystem.Extractor, autoFile string) []Interface {
+func finishInterfaces(m map[string]Interface, extractor *subsystem.Extractor) []Interface {
var interfaces []Interface
for _, iface := range m {
slices.Sort(iface.Files)
@@ -186,11 +190,11 @@ func mergeInterface(interfaces map[string]Interface, iface Interface) {
}
func checkDescriptionPresence(interfaces []Interface, autoFile string) {
- desc := ast.ParseGlob(filepath.Join("sys", targets.Linux, "*.txt"), nil)
+ desc := ast.ParseGlob(filepath.Join("sys", target.OS, "*.txt"), nil)
if desc == nil {
tool.Failf("failed to parse descriptions")
}
- consts := compiler.ExtractConsts(desc, targets.List[targets.Linux][targets.AMD64], nil)
+ consts := compiler.ExtractConsts(desc, target, nil)
auto := make(map[string]bool)
manual := make(map[string]bool)
for file, desc := range consts {
@@ -213,7 +217,14 @@ func checkDescriptionPresence(interfaces []Interface, autoFile string) {
}
}
-const sendmsg = "sendmsg"
+func writeDescriptions(desc *ast.Description) {
+ // New lines are added in the parsing step. This is why we need to Format (serialize the description),
+ // Parse, then Format again.
+ output := ast.Format(ast.Parse(ast.Format(desc), "", ast.LoggingHandler))
+ if err := osutil.WriteFile(autoFile, output); err != nil {
+ tool.Fail(err)
+ }
+}
func finishDescriptions(nodes []ast.Node) *ast.Description {
slices.SortFunc(nodes, func(a, b ast.Node) int {
@@ -226,80 +237,19 @@ func finishDescriptions(nodes []ast.Node) *ast.Description {
return getTypeOrder(a) - getTypeOrder(b)
})
- var syscalls []*ast.Call
- var structs []*ast.Struct
+ prevCall, prevCallIndex := "", 0
for _, node := range nodes {
- switch node := node.(type) {
+ switch n := node.(type) {
case *ast.Call:
- syscalls = append(syscalls, node)
- case *ast.Struct:
- // Special case for unsued struct. TODO: handle unused structs.
- if node.Name.Name == "old_utimbuf32$auto_record" {
- continue
- }
- structs = append(structs, node)
- case *ast.Include, *ast.TypeDef, *ast.Resource, *ast.IntFlags, *ast.NewLine, *ast.Comment:
- continue
- default:
- _, typ, _ := node.Info()
- tool.Failf("unhandled Node type: %v", typ)
- }
- }
- // NOTE: The -2 at the end is to account for one unused struct and one newline
- nodes = nodes[:len(nodes)-len(structs)-len(syscalls)-2]
-
- sendmsgNo := 0
- // Some commands are executed for multiple policies. Ensure that they don't get deleted by the following compact call.
- for i := 1; i < len(syscalls); i++ {
- if syscalls[i].CallName == sendmsg && syscalls[i].Name.Name == syscalls[i-1].Name.Name {
- syscalls[i].Name.Name += strconv.Itoa(sendmsgNo)
- sendmsgNo++
- }
- }
- syscalls = slices.CompactFunc(syscalls, func(a, b *ast.Call) bool {
- // We only compare the the system call names for cases where the same system call has different parameter names,
- // but share the same syzkaller type. NOTE:Change when we have better type extraction.
- return a.Name.Name == b.Name.Name
- })
-
- usedNetlink := make(map[string]bool)
- for _, node := range syscalls {
- if node.CallName == sendmsg && len(node.Args[1].Type.Args) == 2 && len(node.Args[1].Type.Args[1].Args) > 1 {
- policy := node.Args[1].Type.Args[1].Args[1].Ident
- usedNetlink[policy] = true
- _, isDefined := slices.BinarySearchFunc(structs, policy, func(a *ast.Struct, b string) int {
- return strings.Compare(a.Name.Name, b)
- })
- if !isDefined {
- continue
+ if n.Name.Name == prevCall {
+ n.Name.Name += strconv.Itoa(prevCallIndex)
+ prevCallIndex++
+ } else {
+ prevCall = n.Name.Name
+ prevCallIndex = 0
}
}
- nodes = append(nodes, node)
- }
- var netlinkNames []string
- for _, node := range structs {
- nodes = append(nodes, node)
- name := node.Name.Name
- if !usedNetlink[name] && !strings.HasSuffix(name, "$auto_record") {
- netlinkNames = append(netlinkNames, name)
- }
}
- for i, structName := range netlinkNames {
- netlinkNames[i] = fmt.Sprintf("\tpolicy%v msghdr_auto[%v]\n", i, structName)
- }
- netlinkUnion := `
-type msghdr_auto[POLICY] msghdr_netlink[netlink_msg_t[autogenerated_netlink, genlmsghdr, POLICY]]
-resource autogenerated_netlink[int16]
-syz_genetlink_get_family_id$auto(name ptr[in, string], fd sock_nl_generic) autogenerated_netlink
-sendmsg$autorun(fd sock_nl_generic, msg ptr[in, auto_union], f flags[send_flags])
-auto_union [
-` + strings.Join(netlinkNames, "") + "]"
- eh := ast.LoggingHandler
- netlinkUnionParsed := ast.Parse([]byte(netlinkUnion), "", eh)
- if netlinkUnionParsed == nil {
- tool.Failf("parsing error")
- }
- nodes = append(nodes, netlinkUnionParsed.Nodes...)
// These additional includes must be at the top (added after sorting), because other kernel headers
// are broken and won't compile without these additional ones included first.
@@ -308,9 +258,30 @@ auto_union [
include <include/vdso/bits.h>
include <include/linux/types.h>
`
- desc := ast.Parse([]byte(header), "", eh)
- nodes = append(desc.Nodes, nodes...)
- return &ast.Description{Nodes: nodes}
+ desc := ast.Parse([]byte(header), "", nil)
+ desc.Nodes = append(desc.Nodes, nodes...)
+ return desc
+}
+
+func removeUnused(desc *ast.Description) {
+ all := ast.ParseGlob(filepath.Join("sys", target.OS, "*.txt"), nil)
+ if all == nil {
+ tool.Failf("failed to parse descriptions")
+ }
+ unusedNodes, err := compiler.CollectUnused(all, target, nil)
+ if err != nil {
+ tool.Failf("failed to typecheck descriptions: %v", err)
+ }
+ unused := make(map[string]bool)
+ for _, n := range unusedNodes {
+ if pos, typ, name := n.Info(); pos.File == autoFile {
+ unused[fmt.Sprintf("%v/%v", typ, name)] = true
+ }
+ }
+ desc.Nodes = slices.DeleteFunc(desc.Nodes, func(n ast.Node) bool {
+ _, typ, name := n.Info()
+ return unused[fmt.Sprintf("%v/%v", typ, name)]
+ })
}
func worker(outputs chan *output, files chan string, binary, compilationDatabase string) {
@@ -357,7 +328,7 @@ func readSyscallMap(sourceDir string) map[string][]string {
is64bit bool
}
syscalls := make(map[string][]desc)
- for _, arch := range targets.List[targets.Linux] {
+ for _, arch := range targets.List[target.OS] {
filepath.Walk(filepath.Join(sourceDir, "arch", arch.KernelHeaderArch),
func(path string, info fs.FileInfo, err error) error {
if err != nil || !strings.HasSuffix(path, ".tbl") {
@@ -403,11 +374,10 @@ func readSyscallMap(sourceDir string) map[string][]string {
rename := map[string][]string{
"syz_genetlink_get_family_id": {"syz_genetlink_get_family_id"},
}
- const mainArch = targets.AMD64
for syscall, descs := range syscalls {
slices.SortFunc(descs, func(a, b desc) int {
- if (a.arch == mainArch) != (b.arch == mainArch) {
- if a.arch == mainArch {
+ if (a.arch == target.Arch) != (b.arch == target.Arch) {
+ if a.arch == target.Arch {
return -1
}
return 1
diff --git a/tools/syz-declextract/syz-declextract.cpp b/tools/syz-declextract/syz-declextract.cpp
index f9b08eef5..0a24c6418 100644
--- a/tools/syz-declextract/syz-declextract.cpp
+++ b/tools/syz-declextract/syz-declextract.cpp
@@ -704,12 +704,12 @@ private:
if (!netlinkDecl) {
return;
}
- std::vector<std::vector<Expr *>> fields;
const auto *init = netlinkDecl->getInit();
if (!init) {
return;
}
+ std::vector<std::vector<Expr *>> fields;
for (const auto &policy : *llvm::dyn_cast<InitListExpr>(init)) {
fields.push_back(std::vector<Expr *>());
for (const auto &member : policy->children()) {
@@ -718,8 +718,10 @@ private:
}
auto enumData = extractDesignatedInitConsts(*context, *netlinkDecl);
- // TODO: generate an empty message for these or something.
if (enumData.empty()) {
+ // We need to emit at least some type for it.
+ // Ideally it should be void, but typedef to void currently does not work.
+ printf("type %s auto_todo\n", getPolicyName(Result, netlinkDecl)->c_str());
return;
}
for (const auto &[_, item] : enumData) {
@@ -730,7 +732,7 @@ private:
}
RecordExtractor recordExtractor(Result.SourceManager);
- printf("%s[\n", getPolicyName(Result, netlinkDecl)->c_str());
+ printf("%s [\n", getPolicyName(Result, netlinkDecl)->c_str());
for (size_t i = 0; i < fields.size(); ++i) {
// The array could have an implicitly initialized policy (i.e. empty) or an unnamed attribute
if (fields[i].empty() || enumData[i].name.empty()) {