aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-01-17 10:39:46 +0100
committerDmitry Vyukov <dvyukov@google.com>2025-01-17 18:09:32 +0000
commit38ee454540b9b41d5cc173871dfbf7dd140e8abc (patch)
treea7f5f04a7a9286f9eeca01520b51012e3606bb99
parent953d1c45a16b7284725e337b47369a8ab111bab4 (diff)
pkg/declextract: move const handling logic from the clang tool
Export raw info about consts from the clang tool, and let the Go part handle it. The less logic is in the clang tool, the better. Also this will allow to remove unused includes when we know which consts we ended up using. The more includes we include, the higher the chances we include something that's broken.
-rw-r--r--pkg/declextract/declextract.go53
-rw-r--r--pkg/declextract/entity.go20
-rw-r--r--pkg/declextract/serialization.go4
-rw-r--r--tools/syz-declextract/clangtool/declextract.cpp28
-rw-r--r--tools/syz-declextract/clangtool/output.h26
-rw-r--r--tools/syz-declextract/testdata/file_operations.c.json28
-rw-r--r--tools/syz-declextract/testdata/io_uring.c.json23
-rw-r--r--tools/syz-declextract/testdata/netlink.c.json62
-rw-r--r--tools/syz-declextract/testdata/types.c.json11
9 files changed, 178 insertions, 77 deletions
diff --git a/pkg/declextract/declextract.go b/pkg/declextract/declextract.go
index 1f4592523..fdf06b373 100644
--- a/pkg/declextract/declextract.go
+++ b/pkg/declextract/declextract.go
@@ -29,7 +29,7 @@ func Run(out *Output, probe *ifaceprobe.Info, syscallRename map[string][]string,
}
ctx.processFunctions()
ctx.processTypingFacts()
- ctx.processIncludes()
+ ctx.processConsts()
ctx.processEnums()
ctx.processStructs()
ctx.processSyscalls()
@@ -47,6 +47,8 @@ type context struct {
structs map[string]*Struct
funcs map[string]*Function
facts map[string]*typingNode
+ includes []string
+ defines []define
uniqualizer map[string]int
interfaces []*Interface
descriptions *bytes.Buffer
@@ -54,6 +56,11 @@ type context struct {
errs []error
}
+type define struct {
+ Name string
+ Value string
+}
+
func (ctx *context) error(msg string, args ...any) {
ctx.errs = append(ctx.errs, fmt.Errorf(msg, args...))
}
@@ -68,14 +75,7 @@ func (ctx *context) trace(msg string, args ...any) {
}
}
-func (ctx *context) processIncludes() {
- // These additional includes must be at the top, because other kernel headers
- // are broken and won't compile without these additional ones included first.
- ctx.Includes = append([]string{
- "vdso/bits.h",
- "linux/types.h",
- "net/netlink.h",
- }, ctx.Includes...)
+func (ctx *context) processConsts() {
replaces := map[string]string{
// Arches may use some includes from asm-generic and some from arch/arm.
// If the arch used for extract used asm-generic for a header,
@@ -84,11 +84,40 @@ func (ctx *context) processIncludes() {
"include/uapi/asm-generic/ioctls.h": "asm/ioctls.h",
"include/uapi/asm-generic/sockios.h": "asm/sockios.h",
}
- for i, inc := range ctx.Includes {
- if replace := replaces[inc]; replace != "" {
- ctx.Includes[i] = replace
+ defineDedup := make(map[string]bool)
+ for _, ci := range ctx.Consts {
+ if strings.Contains(ci.Filename, "/uapi/") && !strings.Contains(ci.Filename, "arch/x86/") &&
+ strings.HasSuffix(ci.Filename, ".h") {
+ filename := ci.Filename
+ if replace := replaces[filename]; replace != "" {
+ filename = replace
+ }
+ ctx.includes = append(ctx.includes, filename)
+ continue
}
+ // Remove duplicate defines (even with different values). Unfortunately we get few of these.
+ // There are some syscall numbers (presumably for 32/64 bits), and some macros that
+ // are defined in different files to different values (e.g. WMI_DATA_BE_SVC).
+ // Ideally we somehow rename defines (chosing one random value is never correct).
+ // But for now this helps to prevent compilation errors.
+ if defineDedup[ci.Name] {
+ continue
+ }
+ defineDedup[ci.Name] = true
+ ctx.defines = append(ctx.defines, define{
+ Name: ci.Name,
+ Value: fmt.Sprint(ci.Value),
+ })
}
+ ctx.includes = sortAndDedupSlice(ctx.includes)
+ ctx.defines = sortAndDedupSlice(ctx.defines)
+ // These additional includes must be at the top, because other kernel headers
+ // are broken and won't compile without these additional ones included first.
+ ctx.includes = append([]string{
+ "vdso/bits.h",
+ "linux/types.h",
+ "net/netlink.h",
+ }, ctx.includes...)
}
func (ctx *context) processEnums() {
diff --git a/pkg/declextract/entity.go b/pkg/declextract/entity.go
index 266647ed8..8167d8b99 100644
--- a/pkg/declextract/entity.go
+++ b/pkg/declextract/entity.go
@@ -12,8 +12,7 @@ import (
type Output struct {
Functions []*Function `json:"functions,omitempty"`
- Includes []string `json:"includes,omitempty"`
- Defines []*Define `json:"defines,omitempty"`
+ Consts []*ConstInfo `json:"consts,omitempty"`
Enums []*Enum `json:"enums,omitempty"`
Structs []*Struct `json:"structs,omitempty"`
Syscalls []*Syscall `json:"syscalls,omitempty"`
@@ -36,9 +35,10 @@ type Function struct {
facts map[string]*typingNode
}
-type Define struct {
- Name string `json:"name,omitempty"`
- Value string `json:"value,omitempty"`
+type ConstInfo struct {
+ Name string `json:"name"`
+ Filename string `json:"filename"`
+ Value int64 `json:"value"`
}
type Field struct {
@@ -199,8 +199,7 @@ type EntityGlobalAddr struct {
func (out *Output) Merge(other *Output) {
out.Functions = append(out.Functions, other.Functions...)
- out.Includes = append(out.Includes, other.Includes...)
- out.Defines = append(out.Defines, other.Defines...)
+ out.Consts = append(out.Consts, other.Consts...)
out.Enums = append(out.Enums, other.Enums...)
out.Structs = append(out.Structs, other.Structs...)
out.Syscalls = append(out.Syscalls, other.Syscalls...)
@@ -212,8 +211,7 @@ func (out *Output) Merge(other *Output) {
func (out *Output) SortAndDedup() {
out.Functions = sortAndDedupSlice(out.Functions)
- out.Includes = sortAndDedupSlice(out.Includes)
- out.Defines = sortAndDedupSlice(out.Defines)
+ out.Consts = sortAndDedupSlice(out.Consts)
out.Enums = sortAndDedupSlice(out.Enums)
out.Structs = sortAndDedupSlice(out.Structs)
out.Syscalls = sortAndDedupSlice(out.Syscalls)
@@ -229,8 +227,8 @@ func (out *Output) SetSourceFile(file string, updatePath func(string) string) {
for _, fn := range out.Functions {
fn.File = updatePath(fn.File)
}
- for i, inc := range out.Includes {
- out.Includes[i] = updatePath(inc)
+ for _, ci := range out.Consts {
+ ci.Filename = updatePath(ci.Filename)
}
for _, call := range out.Syscalls {
call.SourceFile = file
diff --git a/pkg/declextract/serialization.go b/pkg/declextract/serialization.go
index d69358679..571336097 100644
--- a/pkg/declextract/serialization.go
+++ b/pkg/declextract/serialization.go
@@ -39,14 +39,14 @@ func (ctx *context) fmt(msg string, args ...any) {
}
func (ctx *context) serializeIncludes() {
- for _, inc := range ctx.Includes {
+ for _, inc := range ctx.includes {
ctx.fmt("include <%s>\n", inc)
}
ctx.fmt("\n")
}
func (ctx *context) serializeDefines() {
- for _, def := range ctx.Defines {
+ for _, def := range ctx.defines {
ctx.fmt("define %v %v\n", def.Name, def.Value)
}
ctx.fmt("\n")
diff --git a/tools/syz-declextract/clangtool/declextract.cpp b/tools/syz-declextract/clangtool/declextract.cpp
index c41904d8b..4012900d6 100644
--- a/tools/syz-declextract/clangtool/declextract.cpp
+++ b/tools/syz-declextract/clangtool/declextract.cpp
@@ -113,7 +113,7 @@ private:
template <typename T> const T* getResult(StringRef ID) const;
FieldType extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName);
std::string extractEnum(const EnumDecl* Decl);
- void noteConstUse(const std::string& Name, int64_t Val, const SourceRange& Range);
+ void emitConst(const std::string& Name, int64_t Val, SourceLocation Loc);
std::string getDeclName(const Expr* Expr);
const ValueDecl* getValueDecl(const Expr* Expr);
std::string getDeclFileID(const Decl* Decl);
@@ -304,7 +304,7 @@ std::string Extractor::extractEnum(const EnumDecl* Decl) {
std::vector<std::string> Values;
for (const auto* Enumerator : Decl->enumerators()) {
const std::string& Name = Enumerator->getNameAsString();
- noteConstUse(Name, Enumerator->getInitVal().getExtValue(), Decl->getSourceRange());
+ emitConst(Name, Enumerator->getInitVal().getExtValue(), Decl->getBeginLoc());
Values.push_back(Name);
}
Output.emit(Enum{
@@ -314,19 +314,11 @@ std::string Extractor::extractEnum(const EnumDecl* Decl) {
return Name;
}
-void Extractor::noteConstUse(const std::string& Name, int64_t Val, const SourceRange& Range) {
- const std::string& Filename = std::filesystem::relative(SourceManager->getFilename(Range.getBegin()).str());
- // Include only uapi headers. Some ioctl commands defined in internal headers, or even in .c files.
- // They have high chances of breaking compilation during const extract.
- // If it's not defined in uapi, emit define with concrete value.
- // Note: the value may be wrong for other arches.
- if (Filename.find("/uapi/") != std::string::npos && Filename.back() == 'h') {
- Output.emit(Include{Filename});
- return;
- }
- Output.emit(Define{
+void Extractor::emitConst(const std::string& Name, int64_t Val, SourceLocation Loc) {
+ Output.emit(ConstInfo{
.Name = Name,
- .Value = std::to_string(Val),
+ .Filename = std::filesystem::relative(SourceManager->getFilename(Loc).str()),
+ .Value = Val,
});
}
@@ -445,8 +437,8 @@ std::vector<std::pair<int, std::string>> Extractor::extractDesignatedInitConsts(
for (auto* Match : Matches) {
const int64_t Val = *Match->getAPValueResult().getInt().getRawData();
const auto& Name = Match->getEnumConstantDecl()->getNameAsString();
- const auto& SR = Match->getEnumConstantDecl()->getSourceRange();
- noteConstUse(Name, Val, SR);
+ const auto& Loc = Match->getEnumConstantDecl()->getBeginLoc();
+ emitConst(Name, Val, Loc);
Inits.emplace_back(Val, Name);
}
return Inits;
@@ -523,7 +515,7 @@ void Extractor::matchNetlinkFamily() {
if (!CmdInit)
continue;
const std::string& OpName = CmdInit->getNameAsString();
- noteConstUse(OpName, CmdInit->getInitVal().getExtValue(), CmdInit->getSourceRange());
+ emitConst(OpName, CmdInit->getInitVal().getExtValue(), CmdInit->getBeginLoc());
std::string Policy;
if (OpsFields.count("policy") != 0) {
if (const auto* PolicyDecl = OpInit->getInit(OpsFields["policy"])->getAsBuiltinConstantDeclRef(*Context))
@@ -818,7 +810,7 @@ std::vector<IoctlCmd> Extractor::extractIoctlCommands(const std::string& Ioctl)
if (MacroDef == Macros.end())
continue;
int64_t CmdVal = evaluate(Cmd);
- noteConstUse(CmdStr, CmdVal, MacroDef->second.SourceRange);
+ emitConst(CmdStr, CmdVal, MacroDef->second.SourceRange.getBegin());
FieldType CmdType;
const auto Dir = _IOC_DIR(CmdVal);
if (Dir == _IOC_NONE) {
diff --git a/tools/syz-declextract/clangtool/output.h b/tools/syz-declextract/clangtool/output.h
index 24ab82f61..d89d64afb 100644
--- a/tools/syz-declextract/clangtool/output.h
+++ b/tools/syz-declextract/clangtool/output.h
@@ -62,13 +62,10 @@ struct BufferType {
bool IsNonTerminated = false;
};
-struct Include {
- std::string Filename;
-};
-
-struct Define {
+struct ConstInfo {
std::string Name;
- std::string Value;
+ std::string Filename;
+ int64_t Value;
};
struct Field {
@@ -201,9 +198,10 @@ struct NetlinkPolicy {
std::vector<NetlinkAttr> Attrs;
};
-inline void print(JSONPrinter& Printer, const Define& V) {
+inline void print(JSONPrinter& Printer, const ConstInfo& V) {
JSONPrinter::Scope Scope(Printer);
Printer.Field("name", V.Name);
+ Printer.Field("filename", V.Filename);
Printer.Field("value", V.Value, true);
}
@@ -412,13 +410,8 @@ inline FieldType TodoType() {
class Output {
public:
- void emit(Include&& Inc) {
- if (IncludesDedup.insert(Inc.Filename).second)
- Includes.push_back(Inc.Filename);
- }
-
void emit(Function&& V) { Functions.push_back(std::move(V)); }
- void emit(Define&& V) { Defines.push_back(std::move(V)); }
+ void emit(ConstInfo&& V) { Consts.push_back(std::move(V)); }
void emit(Struct&& V) { Structs.push_back(std::move(V)); }
void emit(Enum&& V) { Enums.push_back(std::move(V)); }
void emit(Syscall&& V) { Syscalls.push_back(std::move(V)); }
@@ -430,8 +423,7 @@ public:
void print() const {
JSONPrinter Printer;
Printer.Field("functions", Functions);
- Printer.Field("includes", Includes);
- Printer.Field("defines", Defines);
+ Printer.Field("consts", Consts);
Printer.Field("enums", Enums);
Printer.Field("structs", Structs);
Printer.Field("syscalls", Syscalls);
@@ -443,9 +435,7 @@ public:
private:
std::vector<Function> Functions;
- std::vector<std::string> Includes;
- std::unordered_set<std::string> IncludesDedup;
- std::vector<Define> Defines;
+ std::vector<ConstInfo> Consts;
std::vector<Enum> Enums;
std::vector<Struct> Structs;
std::vector<Syscall> Syscalls;
diff --git a/tools/syz-declextract/testdata/file_operations.c.json b/tools/syz-declextract/testdata/file_operations.c.json
index 94ad7bef5..df1c9a20f 100644
--- a/tools/syz-declextract/testdata/file_operations.c.json
+++ b/tools/syz-declextract/testdata/file_operations.c.json
@@ -47,8 +47,32 @@
"is_static": true
}
],
- "includes": [
- "include/uapi/file_operations.h"
+ "consts": [
+ {
+ "name": "FOO_IOCTL1",
+ "filename": "include/uapi/file_operations.h",
+ "value": 25345
+ },
+ {
+ "name": "FOO_IOCTL2",
+ "filename": "include/uapi/file_operations.h",
+ "value": 2147771138
+ },
+ {
+ "name": "FOO_IOCTL3",
+ "filename": "include/uapi/file_operations.h",
+ "value": 2148033283
+ },
+ {
+ "name": "FOO_IOCTL4",
+ "filename": "include/uapi/file_operations.h",
+ "value": 1074291460
+ },
+ {
+ "name": "FOO_IOCTL5",
+ "filename": "include/uapi/file_operations.h",
+ "value": 3221775109
+ }
],
"structs": [
{
diff --git a/tools/syz-declextract/testdata/io_uring.c.json b/tools/syz-declextract/testdata/io_uring.c.json
index 3f0a74a95..da91ce1b6 100644
--- a/tools/syz-declextract/testdata/io_uring.c.json
+++ b/tools/syz-declextract/testdata/io_uring.c.json
@@ -29,8 +29,27 @@
"file": "io_uring.c"
}
],
- "includes": [
- "include/uapi/io_uring.h"
+ "consts": [
+ {
+ "name": "IORING_OP_NOP",
+ "filename": "include/uapi/io_uring.h",
+ "value": 0
+ },
+ {
+ "name": "IORING_OP_NOT_SUPPORTED",
+ "filename": "include/uapi/io_uring.h",
+ "value": 3
+ },
+ {
+ "name": "IORING_OP_READV",
+ "filename": "include/uapi/io_uring.h",
+ "value": 1
+ },
+ {
+ "name": "IORING_OP_WRITEV",
+ "filename": "include/uapi/io_uring.h",
+ "value": 2
+ }
],
"iouring_ops": [
{
diff --git a/tools/syz-declextract/testdata/netlink.c.json b/tools/syz-declextract/testdata/netlink.c.json
index 4233eab7d..e1c2754b1 100644
--- a/tools/syz-declextract/testdata/netlink.c.json
+++ b/tools/syz-declextract/testdata/netlink.c.json
@@ -37,25 +37,71 @@
"is_static": true
}
],
- "includes": [
- "include/uapi/netlink_family.h"
- ],
- "defines": [
+ "consts": [
{
"name": "NETLINK_BAR_CMD_FOO",
- "value": "0"
+ "filename": "netlink.c",
+ "value": 0
+ },
+ {
+ "name": "NETLINK_FOO_ATTR1",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 0
+ },
+ {
+ "name": "NETLINK_FOO_ATTR2",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 1
+ },
+ {
+ "name": "NETLINK_FOO_ATTR3",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 4
+ },
+ {
+ "name": "NETLINK_FOO_ATTR4",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 5
+ },
+ {
+ "name": "NETLINK_FOO_ATTR5",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 6
+ },
+ {
+ "name": "NETLINK_FOO_ATTR6",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 7
+ },
+ {
+ "name": "NETLINK_FOO_ATTR7",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 8
+ },
+ {
+ "name": "NETLINK_FOO_CMD_BAR",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 1
+ },
+ {
+ "name": "NETLINK_FOO_CMD_FOO",
+ "filename": "include/uapi/netlink_family.h",
+ "value": 0
},
{
"name": "NETLINK_FOO_NESTED_ATTR1",
- "value": "0"
+ "filename": "netlink.c",
+ "value": 0
},
{
"name": "NETLINK_FOO_NESTED_ATTR2",
- "value": "1"
+ "filename": "netlink.c",
+ "value": 1
},
{
"name": "NETLINK_NOPOLICY_CMD",
- "value": "0"
+ "filename": "netlink.c",
+ "value": 0
}
],
"structs": [
diff --git a/tools/syz-declextract/testdata/types.c.json b/tools/syz-declextract/testdata/types.c.json
index a5a7088db..82d45e3fe 100644
--- a/tools/syz-declextract/testdata/types.c.json
+++ b/tools/syz-declextract/testdata/types.c.json
@@ -111,18 +111,21 @@
]
}
],
- "defines": [
+ "consts": [
{
"name": "a",
- "value": "0"
+ "filename": "types.c",
+ "value": 0
},
{
"name": "b",
- "value": "1"
+ "filename": "types.c",
+ "value": 1
},
{
"name": "c",
- "value": "2"
+ "filename": "types.c",
+ "value": 2
}
],
"enums": [