From fb714834adfb0e1e36c4cfc7ca288391cfc18986 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 19 Jan 2026 15:15:15 +0100 Subject: pkg/codesearch: add dir-index command dir-index provides a list of subdirectories and files in the given directory in the source tree. --- pkg/aflow/tool/codesearcher/codesearcher.go | 24 +++++++ pkg/codesearch/codesearch.go | 80 ++++++++++++++++++++++++ pkg/codesearch/testdata/mm/.slub.c | 0 pkg/codesearch/testdata/mm/slub.c | 0 pkg/codesearch/testdata/mm/slub.h | 0 pkg/codesearch/testdata/mm/slub.o | 0 pkg/codesearch/testdata/query-dir-index-escaping | 3 + pkg/codesearch/testdata/query-dir-index-file | 3 + pkg/codesearch/testdata/query-dir-index-missing | 3 + pkg/codesearch/testdata/query-dir-index-mm | 7 +++ pkg/codesearch/testdata/query-dir-index-root | 10 +++ pkg/codesearch/testdata/query-dir-index-root2 | 10 +++ 12 files changed, 140 insertions(+) create mode 100644 pkg/codesearch/testdata/mm/.slub.c create mode 100644 pkg/codesearch/testdata/mm/slub.c create mode 100644 pkg/codesearch/testdata/mm/slub.h create mode 100644 pkg/codesearch/testdata/mm/slub.o create mode 100644 pkg/codesearch/testdata/query-dir-index-escaping create mode 100644 pkg/codesearch/testdata/query-dir-index-file create mode 100644 pkg/codesearch/testdata/query-dir-index-missing create mode 100644 pkg/codesearch/testdata/query-dir-index-mm create mode 100644 pkg/codesearch/testdata/query-dir-index-root create mode 100644 pkg/codesearch/testdata/query-dir-index-root2 diff --git a/pkg/aflow/tool/codesearcher/codesearcher.go b/pkg/aflow/tool/codesearcher/codesearcher.go index 79827d269..c336a0ca4 100644 --- a/pkg/aflow/tool/codesearcher/codesearcher.go +++ b/pkg/aflow/tool/codesearcher/codesearcher.go @@ -14,6 +14,9 @@ import ( ) var Tools = []aflow.Tool{ + aflow.NewFuncTool("codesearch-dir-index", dirIndex, ` +Tool provides list of source files and subdirectories in the given directory in the source tree. +`), aflow.NewFuncTool("codesearch-file-index", fileIndex, ` Tool provides list of entities defined in the given source file. Entity can be function, struct, or global variable. @@ -54,6 +57,17 @@ type prepareResult struct { Index index } +// nolint: lll +type dirIndexArgs struct { + Dir string `jsonschema:"Relative directory in the source tree. Use an empty string for the root of the tree, or paths like 'net/ipv4/' for subdirs."` +} + +type dirIndexResult struct { + Missing bool `jsonschema:"Set to true if the requested directory does not exist."` + Subdirs []string `jsonschema:"List of direct subdirectories."` + Files []string `jsonschema:"List of source files."` +} + type fileIndexArgs struct { SourceFile string `jsonschema:"Source file path."` } @@ -130,6 +144,16 @@ func prepare(ctx *aflow.Context, args prepareArgs) (prepareResult, error) { return prepareResult{index{csIndex}}, err } +func dirIndex(ctx *aflow.Context, state prepareResult, args dirIndexArgs) (dirIndexResult, error) { + ok, subdirs, files, err := state.Index.DirIndex(args.Dir) + res := dirIndexResult{ + Missing: !ok, + Subdirs: subdirs, + Files: files, + } + return res, err +} + func fileIndex(ctx *aflow.Context, state prepareResult, args fileIndexArgs) (fileIndexResult, error) { ok, entities, err := state.Index.FileIndex(args.SourceFile) res := fileIndexResult{ diff --git a/pkg/codesearch/codesearch.go b/pkg/codesearch/codesearch.go index c1e99a174..8e0259af7 100644 --- a/pkg/codesearch/codesearch.go +++ b/pkg/codesearch/codesearch.go @@ -5,10 +5,13 @@ package codesearch import ( "bytes" + "errors" "fmt" "os" "path/filepath" + "slices" "strings" + "syscall" "github.com/google/syzkaller/pkg/osutil" ) @@ -26,6 +29,22 @@ type Command struct { // Commands are used to run unit tests and for the syz-codesearch tool. var Commands = []Command{ + {"dir-index", 1, func(index *Index, args []string) (string, error) { + ok, subdirs, files, err := index.DirIndex(args[0]) + if err != nil || !ok { + return notFound, err + } + b := new(strings.Builder) + fmt.Fprintf(b, "directory %v subdirs:\n", args[0]) + for _, subdir := range subdirs { + fmt.Fprintf(b, " - %v\n", subdir) + } + fmt.Fprintf(b, "\ndirectory %v files:\n", args[0]) + for _, file := range files { + fmt.Fprintf(b, " - %v\n", file) + } + return b.String(), nil + }}, {"file-index", 1, func(index *Index, args []string) (string, error) { ok, entities, err := index.FileIndex(args[0]) if err != nil || !ok { @@ -59,6 +78,8 @@ var Commands = []Command{ }}, } +var SourceExtensions = map[string]bool{".c": true, ".h": true, ".S": true, ".rs": true} + const notFound = "not found\n" func NewIndex(databaseFile string, srcDirs []string) (*Index, error) { @@ -90,6 +111,32 @@ type Entity struct { Name string } +func (index *Index) DirIndex(dir string) (bool, []string, []string, error) { + if err := escaping(dir); err != nil { + return false, nil, nil, nil + } + exists := false + var subdirs, files []string + for _, root := range index.srcDirs { + exists1, subdirs1, files1, err := dirIndex(root, dir) + if err != nil { + return false, nil, nil, err + } + if exists1 { + exists = true + } + subdirs = append(subdirs, subdirs1...) + files = append(files, files1...) + } + slices.Sort(subdirs) + slices.Sort(files) + // Dedup dirs across src/build trees, + // also dedup files, but hopefully there are no duplicates. + subdirs = slices.Compact(subdirs) + files = slices.Compact(files) + return exists, subdirs, files, nil +} + func (index *Index) FileIndex(file string) (bool, []Entity, error) { var entities []Entity for _, def := range index.db.Definitions { @@ -188,3 +235,36 @@ func formatSourceFile(file string, start, end int, includeLines bool) (string, e } return b.String(), nil } + +func escaping(path string) error { + if strings.Contains(filepath.Clean(path), "..") { + return errors.New("path is outside of the source tree") + } + return nil +} + +func dirIndex(root, subdir string) (bool, []string, []string, error) { + dir := filepath.Join(root, subdir) + entries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + err = nil + } + var errno syscall.Errno + if errors.As(err, &errno) && errno == syscall.ENOTDIR { + err = nil + } + return false, nil, nil, err + } + var subdirs, files []string + for _, entry := range entries { + if strings.HasPrefix(entry.Name(), ".") { + // These are internal things like .git, etc. + } else if entry.IsDir() { + subdirs = append(subdirs, entry.Name()) + } else if SourceExtensions[filepath.Ext(entry.Name())] { + files = append(files, entry.Name()) + } + } + return true, subdirs, files, err +} diff --git a/pkg/codesearch/testdata/mm/.slub.c b/pkg/codesearch/testdata/mm/.slub.c new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/codesearch/testdata/mm/slub.c b/pkg/codesearch/testdata/mm/slub.c new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/codesearch/testdata/mm/slub.h b/pkg/codesearch/testdata/mm/slub.h new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/codesearch/testdata/mm/slub.o b/pkg/codesearch/testdata/mm/slub.o new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/codesearch/testdata/query-dir-index-escaping b/pkg/codesearch/testdata/query-dir-index-escaping new file mode 100644 index 000000000..fd7b55ff0 --- /dev/null +++ b/pkg/codesearch/testdata/query-dir-index-escaping @@ -0,0 +1,3 @@ +dir-index mm/../../ + +not found diff --git a/pkg/codesearch/testdata/query-dir-index-file b/pkg/codesearch/testdata/query-dir-index-file new file mode 100644 index 000000000..eecd67d67 --- /dev/null +++ b/pkg/codesearch/testdata/query-dir-index-file @@ -0,0 +1,3 @@ +dir-index source0.c + +not found diff --git a/pkg/codesearch/testdata/query-dir-index-missing b/pkg/codesearch/testdata/query-dir-index-missing new file mode 100644 index 000000000..e028d1be1 --- /dev/null +++ b/pkg/codesearch/testdata/query-dir-index-missing @@ -0,0 +1,3 @@ +dir-index mm/foobar + +not found diff --git a/pkg/codesearch/testdata/query-dir-index-mm b/pkg/codesearch/testdata/query-dir-index-mm new file mode 100644 index 000000000..554270e4e --- /dev/null +++ b/pkg/codesearch/testdata/query-dir-index-mm @@ -0,0 +1,7 @@ +dir-index /mm/ + +directory /mm/ subdirs: + +directory /mm/ files: + - slub.c + - slub.h diff --git a/pkg/codesearch/testdata/query-dir-index-root b/pkg/codesearch/testdata/query-dir-index-root new file mode 100644 index 000000000..d14d37392 --- /dev/null +++ b/pkg/codesearch/testdata/query-dir-index-root @@ -0,0 +1,10 @@ +dir-index / + +directory / subdirs: + - mm + +directory / files: + - source0.c + - source0.h + - source1.c + - source2.c diff --git a/pkg/codesearch/testdata/query-dir-index-root2 b/pkg/codesearch/testdata/query-dir-index-root2 new file mode 100644 index 000000000..c82c79609 --- /dev/null +++ b/pkg/codesearch/testdata/query-dir-index-root2 @@ -0,0 +1,10 @@ +dir-index /mm/.. + +directory /mm/.. subdirs: + - mm + +directory /mm/.. files: + - source0.c + - source0.h + - source1.c + - source2.c -- cgit mrf-deployment