From 280ea308c321115445df610f1a75b05bbadca5f3 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 17 Nov 2025 11:17:23 +0100 Subject: pkg/codesearch: add skeleton for code searching tool Add a clang tool that is used for code indexing (tools/clang/codesearch/). It follows conventions and build procedure of the declextract tool. Add pkg/codesearch package that aggregates the info exposed by the clang tools, and allows doing simple queries: - show source code of an entity (function, struct, etc) - show entity comment - show all entities defined in a source file Add tools/syz-codesearch wrapper tool that allows to create index for a kernel build, and then run code queries on it. --- pkg/codesearch/testdata/query-def-comment-close | 3 ++ pkg/codesearch/testdata/query-def-comment-header | 3 ++ pkg/codesearch/testdata/query-def-comment-open | 7 ++++ pkg/codesearch/testdata/query-def-source-close | 8 +++++ pkg/codesearch/testdata/query-def-source-header | 8 +++++ pkg/codesearch/testdata/query-def-source-missing | 3 ++ pkg/codesearch/testdata/query-def-source-open | 11 ++++++ .../testdata/query-def-source-same-name-non-static | 8 +++++ .../testdata/query-def-source-same-name-static | 8 +++++ pkg/codesearch/testdata/query-file-index-missing | 3 ++ pkg/codesearch/testdata/query-file-index-source | 7 ++++ pkg/codesearch/testdata/source0.c | 22 ++++++++++++ pkg/codesearch/testdata/source0.c.json | 41 ++++++++++++++++++++++ pkg/codesearch/testdata/source0.h | 10 ++++++ pkg/codesearch/testdata/source1.c | 7 ++++ pkg/codesearch/testdata/source1.c.json | 20 +++++++++++ pkg/codesearch/testdata/source2.c | 7 ++++ pkg/codesearch/testdata/source2.c.json | 19 ++++++++++ 18 files changed, 195 insertions(+) create mode 100644 pkg/codesearch/testdata/query-def-comment-close create mode 100644 pkg/codesearch/testdata/query-def-comment-header create mode 100644 pkg/codesearch/testdata/query-def-comment-open create mode 100644 pkg/codesearch/testdata/query-def-source-close create mode 100644 pkg/codesearch/testdata/query-def-source-header create mode 100644 pkg/codesearch/testdata/query-def-source-missing create mode 100644 pkg/codesearch/testdata/query-def-source-open create mode 100644 pkg/codesearch/testdata/query-def-source-same-name-non-static create mode 100644 pkg/codesearch/testdata/query-def-source-same-name-static create mode 100644 pkg/codesearch/testdata/query-file-index-missing create mode 100644 pkg/codesearch/testdata/query-file-index-source create mode 100644 pkg/codesearch/testdata/source0.c create mode 100644 pkg/codesearch/testdata/source0.c.json create mode 100644 pkg/codesearch/testdata/source0.h create mode 100644 pkg/codesearch/testdata/source1.c create mode 100644 pkg/codesearch/testdata/source1.c.json create mode 100644 pkg/codesearch/testdata/source2.c create mode 100644 pkg/codesearch/testdata/source2.c.json (limited to 'pkg/codesearch/testdata') diff --git a/pkg/codesearch/testdata/query-def-comment-close b/pkg/codesearch/testdata/query-def-comment-close new file mode 100644 index 000000000..df6c1c2af --- /dev/null +++ b/pkg/codesearch/testdata/query-def-comment-close @@ -0,0 +1,3 @@ +def-comment source0.c close + +function close is defined in source0.c and is not commented diff --git a/pkg/codesearch/testdata/query-def-comment-header b/pkg/codesearch/testdata/query-def-comment-header new file mode 100644 index 000000000..a940938b8 --- /dev/null +++ b/pkg/codesearch/testdata/query-def-comment-header @@ -0,0 +1,3 @@ +def-comment source0.c function_with_comment_in_header + +function function_with_comment_in_header is defined in source0.c and is not commented diff --git a/pkg/codesearch/testdata/query-def-comment-open b/pkg/codesearch/testdata/query-def-comment-open new file mode 100644 index 000000000..64bd21812 --- /dev/null +++ b/pkg/codesearch/testdata/query-def-comment-open @@ -0,0 +1,7 @@ +def-comment source0.c open + +function open is defined in source0.c and commented as: + +/* + * Comment about open. + */ diff --git a/pkg/codesearch/testdata/query-def-source-close b/pkg/codesearch/testdata/query-def-source-close new file mode 100644 index 000000000..2a9dcefad --- /dev/null +++ b/pkg/codesearch/testdata/query-def-source-close @@ -0,0 +1,8 @@ +def-source source0.c close no + +function close is defined in source0.c: + +int close() +{ + return 0; +} diff --git a/pkg/codesearch/testdata/query-def-source-header b/pkg/codesearch/testdata/query-def-source-header new file mode 100644 index 000000000..fd3ba300b --- /dev/null +++ b/pkg/codesearch/testdata/query-def-source-header @@ -0,0 +1,8 @@ +def-source source0.c function_with_comment_in_header yes + +function function_with_comment_in_header is defined in source0.c: + + 18: void function_with_comment_in_header() + 19: { + 20: same_name_in_several_files(); + 21: } diff --git a/pkg/codesearch/testdata/query-def-source-missing b/pkg/codesearch/testdata/query-def-source-missing new file mode 100644 index 000000000..0b60003c7 --- /dev/null +++ b/pkg/codesearch/testdata/query-def-source-missing @@ -0,0 +1,3 @@ +def-source source0.c some_non_existent_function no + +not found diff --git a/pkg/codesearch/testdata/query-def-source-open b/pkg/codesearch/testdata/query-def-source-open new file mode 100644 index 000000000..bdcec72fd --- /dev/null +++ b/pkg/codesearch/testdata/query-def-source-open @@ -0,0 +1,11 @@ +def-source source0.c open yes + +function open is defined in source0.c: + + 5: /* + 6: * Comment about open. + 7: */ + 8: int open() + 9: { + 10: return 0; + 11: } diff --git a/pkg/codesearch/testdata/query-def-source-same-name-non-static b/pkg/codesearch/testdata/query-def-source-same-name-non-static new file mode 100644 index 000000000..ae09d3313 --- /dev/null +++ b/pkg/codesearch/testdata/query-def-source-same-name-non-static @@ -0,0 +1,8 @@ +def-source source0.c same_name_in_several_files no + +function same_name_in_several_files is defined in source2.c: + +void same_name_in_several_files() +{ + // This is non-static version in in source2.c. +} diff --git a/pkg/codesearch/testdata/query-def-source-same-name-static b/pkg/codesearch/testdata/query-def-source-same-name-static new file mode 100644 index 000000000..3d87c010c --- /dev/null +++ b/pkg/codesearch/testdata/query-def-source-same-name-static @@ -0,0 +1,8 @@ +def-source source1.c same_name_in_several_files yes + +function same_name_in_several_files is defined in source1.c: + + 3: static void same_name_in_several_files() + 4: { + 5: // This is static version in source1.c. + 6: } diff --git a/pkg/codesearch/testdata/query-file-index-missing b/pkg/codesearch/testdata/query-file-index-missing new file mode 100644 index 000000000..1be486378 --- /dev/null +++ b/pkg/codesearch/testdata/query-file-index-missing @@ -0,0 +1,3 @@ +file-index some-non-existent-file.c + +not found diff --git a/pkg/codesearch/testdata/query-file-index-source b/pkg/codesearch/testdata/query-file-index-source new file mode 100644 index 000000000..c238079d0 --- /dev/null +++ b/pkg/codesearch/testdata/query-file-index-source @@ -0,0 +1,7 @@ +file-index source0.c + +file source0.c defines the following entities: + +function close +function function_with_comment_in_header +function open diff --git a/pkg/codesearch/testdata/source0.c b/pkg/codesearch/testdata/source0.c new file mode 100644 index 000000000..384c4c119 --- /dev/null +++ b/pkg/codesearch/testdata/source0.c @@ -0,0 +1,22 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "source0.h" + +/* + * Comment about open. + */ +int open() +{ + return 0; +} + +int close() +{ + return 0; +} + +void function_with_comment_in_header() +{ + same_name_in_several_files(); +} diff --git a/pkg/codesearch/testdata/source0.c.json b/pkg/codesearch/testdata/source0.c.json new file mode 100644 index 000000000..d33aa360c --- /dev/null +++ b/pkg/codesearch/testdata/source0.c.json @@ -0,0 +1,41 @@ +{ + "definitions": [ + { + "kind": "function", + "name": "close", + "type": "int ()", + "body": { + "file": "source0.c", + "start_line": 14, + "end_line": 17 + }, + "comment": {} + }, + { + "kind": "function", + "name": "function_with_comment_in_header", + "type": "void ()", + "body": { + "file": "source0.c", + "start_line": 19, + "end_line": 22 + }, + "comment": {} + }, + { + "kind": "function", + "name": "open", + "type": "int ()", + "body": { + "file": "source0.c", + "start_line": 6, + "end_line": 12 + }, + "comment": { + "file": "source0.c", + "start_line": 6, + "end_line": 8 + } + } + ] +} \ No newline at end of file diff --git a/pkg/codesearch/testdata/source0.h b/pkg/codesearch/testdata/source0.h new file mode 100644 index 000000000..339975b2e --- /dev/null +++ b/pkg/codesearch/testdata/source0.h @@ -0,0 +1,10 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +/* + * Comment about the function in header. + * Multi-line just in case. + */ +void function_with_comment_in_header(); + +void same_name_in_several_files(); diff --git a/pkg/codesearch/testdata/source1.c b/pkg/codesearch/testdata/source1.c new file mode 100644 index 000000000..ad7d5792c --- /dev/null +++ b/pkg/codesearch/testdata/source1.c @@ -0,0 +1,7 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +static void same_name_in_several_files() +{ + // This is static version in source1.c. +} diff --git a/pkg/codesearch/testdata/source1.c.json b/pkg/codesearch/testdata/source1.c.json new file mode 100644 index 000000000..72278a191 --- /dev/null +++ b/pkg/codesearch/testdata/source1.c.json @@ -0,0 +1,20 @@ +{ + "definitions": [ + { + "kind": "function", + "name": "same_name_in_several_files", + "type": "void ()", + "is_static": true, + "body": { + "file": "source1.c", + "start_line": 4, + "end_line": 7 + }, + "comment": { + "file": "source1.c", + "start_line": 1, + "end_line": 2 + } + } + ] +} \ No newline at end of file diff --git a/pkg/codesearch/testdata/source2.c b/pkg/codesearch/testdata/source2.c new file mode 100644 index 000000000..f7ef3d810 --- /dev/null +++ b/pkg/codesearch/testdata/source2.c @@ -0,0 +1,7 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +void same_name_in_several_files() +{ + // This is non-static version in in source2.c. +} diff --git a/pkg/codesearch/testdata/source2.c.json b/pkg/codesearch/testdata/source2.c.json new file mode 100644 index 000000000..4407152db --- /dev/null +++ b/pkg/codesearch/testdata/source2.c.json @@ -0,0 +1,19 @@ +{ + "definitions": [ + { + "kind": "function", + "name": "same_name_in_several_files", + "type": "void ()", + "body": { + "file": "source2.c", + "start_line": 4, + "end_line": 7 + }, + "comment": { + "file": "source2.c", + "start_line": 1, + "end_line": 2 + } + } + ] +} \ No newline at end of file -- cgit mrf-deployment