From 9a3002038e891237ff5b561f756a0ff6e32d1d2f Mon Sep 17 00:00:00 2001 From: Ethan Graham Date: Mon, 15 Sep 2025 12:56:38 +0000 Subject: pkg/kfuzztest: add pkg/kfuzztest Add a new package, pkg/kfuzztest, that implements dynamic discovery of KFuzzTest targets by parsing a vmlinux kernel binary. Signed-off-by: Ethan Graham --- pkg/kfuzztest/builder.go | 254 ++++++++++++++++ pkg/kfuzztest/description_generation_test.go | 103 +++++++ pkg/kfuzztest/extractor.go | 435 +++++++++++++++++++++++++++ pkg/kfuzztest/kfuzztest.go | 207 +++++++++++++ pkg/kfuzztest/testdata/.gitignore | 1 + pkg/kfuzztest/testdata/1/desc.txt | 7 + pkg/kfuzztest/testdata/1/prog.c | 24 ++ pkg/kfuzztest/testdata/2/desc.txt | 15 + pkg/kfuzztest/testdata/2/prog.c | 39 +++ pkg/kfuzztest/testdata/common.h | 81 +++++ pkg/kfuzztest/testdata/linker.ld | 39 +++ pkg/kfuzztest/types.go | 135 +++++++++ 12 files changed, 1340 insertions(+) create mode 100644 pkg/kfuzztest/builder.go create mode 100644 pkg/kfuzztest/description_generation_test.go create mode 100644 pkg/kfuzztest/extractor.go create mode 100644 pkg/kfuzztest/kfuzztest.go create mode 100644 pkg/kfuzztest/testdata/.gitignore create mode 100644 pkg/kfuzztest/testdata/1/desc.txt create mode 100644 pkg/kfuzztest/testdata/1/prog.c create mode 100644 pkg/kfuzztest/testdata/2/desc.txt create mode 100644 pkg/kfuzztest/testdata/2/prog.c create mode 100644 pkg/kfuzztest/testdata/common.h create mode 100644 pkg/kfuzztest/testdata/linker.ld create mode 100644 pkg/kfuzztest/types.go (limited to 'pkg') diff --git a/pkg/kfuzztest/builder.go b/pkg/kfuzztest/builder.go new file mode 100644 index 000000000..1c62e1093 --- /dev/null +++ b/pkg/kfuzztest/builder.go @@ -0,0 +1,254 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +package kfuzztest + +import ( + "debug/dwarf" + "fmt" + "strings" + + "github.com/google/syzkaller/pkg/ast" +) + +type Builder struct { + funcs []SyzFunc + structs []SyzStruct + constraints []SyzConstraint + annotations []SyzAnnotation +} + +func NewBuilder( + funcs []SyzFunc, + structs []SyzStruct, + constraints []SyzConstraint, + annotations []SyzAnnotation, +) *Builder { + return &Builder{funcs, structs, constraints, annotations} +} + +func (b *Builder) AddStruct(s SyzStruct) { + b.structs = append(b.structs, s) +} + +func (b *Builder) AddFunc(f SyzFunc) { + b.funcs = append(b.funcs, f) +} + +func (b *Builder) EmitSyzlangDescription() (string, error) { + constraintMap := make(map[string]map[string]SyzConstraint) + for _, constraint := range b.constraints { + if _, contains := constraintMap[constraint.InputType]; !contains { + constraintMap[constraint.InputType] = make(map[string]SyzConstraint) + } + constraintMap[constraint.InputType][constraint.FieldName] = constraint + } + annotationMap := make(map[string]map[string]SyzAnnotation) + for _, annotation := range b.annotations { + if _, contains := annotationMap[annotation.InputType]; !contains { + annotationMap[annotation.InputType] = make(map[string]SyzAnnotation) + } + annotationMap[annotation.InputType][annotation.FieldName] = annotation + } + + var descBuilder strings.Builder + descBuilder.WriteString("# This description was automatically generated with tools/kfuzztest-gen\n") + for _, s := range b.structs { + structDesc, err := syzStructToSyzlang(s, constraintMap, annotationMap) + if err != nil { + return "", err + } + descBuilder.WriteString(structDesc) + descBuilder.WriteString("\n\n") + } + + for i, fn := range b.funcs { + descBuilder.WriteString(syzFuncToSyzlang(fn)) + if i < len(b.funcs)-1 { + descBuilder.WriteString("\n") + } + } + + // Format the output syzlang descriptions for consistency. + var astError error + eh := func(pos ast.Pos, msg string) { + astError = fmt.Errorf("ast failure: %v: %v", pos, msg) + } + descAst := ast.Parse([]byte(descBuilder.String()), "", eh) + if astError != nil { + return "", astError + } + if descAst == nil { + return "", fmt.Errorf("failed to format generated syzkaller description - is it well-formed?") + } + return string(ast.Format(descAst)), nil +} + +func syzStructToSyzlang(s SyzStruct, constraintMap map[string]map[string]SyzConstraint, + annotationMap map[string]map[string]SyzAnnotation) (string, error) { + var builder strings.Builder + + fmt.Fprintf(&builder, "%s {\n", s.Name) + structAnnotations := annotationMap["struct "+s.Name] + structConstraints := constraintMap["struct "+s.Name] + for _, field := range s.Fields { + line, err := syzFieldToSyzLang(field, structConstraints, structAnnotations) + if err != nil { + return "", err + } + fmt.Fprintf(&builder, "\t%s\n", line) + } + fmt.Fprint(&builder, "}") + return builder.String(), nil +} + +func syzFieldToSyzLang(field SyzField, constraintMap map[string]SyzConstraint, + annotationMap map[string]SyzAnnotation) (string, error) { + constraint, hasConstraint := constraintMap[field.Name] + annotation, hasAnnotation := annotationMap[field.Name] + + var typeDesc string + var err error + if hasAnnotation { + // Annotations override the existing type definitions. + typeDesc, err = processAnnotation(field, annotation) + } else { + typeDesc, err = dwarfToSyzlangType(field.dwarfType) + } + if err != nil { + return "", err + } + + // Process constraints only if unannotated. + // TODO: is there a situation where we would want to process both? + if hasConstraint && !hasAnnotation { + constraint, err := processConstraint(constraint) + if err != nil { + return "", err + } + typeDesc += constraint + } + return fmt.Sprintf("%s %s", field.Name, typeDesc), nil +} + +func processConstraint(c SyzConstraint) (string, error) { + switch c.ConstraintType { + case ExpectEq: + return fmt.Sprintf("[%d]", c.Value1), nil + case ExpectNe: + // syzkaller does not have a built-in way to support an inequality + // constraint AFAIK. + return "", nil + case ExpectLt: + return fmt.Sprintf("[0:%d]", c.Value1-1), nil + case ExpectLe: + return fmt.Sprintf("[0:%d]", c.Value1), nil + case ExpectGt: + return fmt.Sprintf("[%d]", c.Value1+1), nil + case ExpectGe: + return fmt.Sprintf("[%d]", c.Value1), nil + case ExpectInRange: + return fmt.Sprintf("[%d:%d]", c.Value1, c.Value2), nil + default: + fmt.Printf("c = %d\n", c.ConstraintType) + return "", fmt.Errorf("unsupported constraint type") + } +} + +func processAnnotation(field SyzField, annotation SyzAnnotation) (string, error) { + switch annotation.Attribute { + case AttributeLen: + underlyingType, err := dwarfToSyzlangType(field.dwarfType) + if err != nil { + return "", err + } + return fmt.Sprintf("len[%s, %s]", annotation.LinkedFieldName, underlyingType), nil + case AttributeString: + return "ptr[in, string]", nil + case AttributeArray: + pointeeType, isPtr := resolvesToPtr(field.dwarfType) + if !isPtr { + return "", fmt.Errorf("can only annotate pointer fields are arrays") + } + // TODO: discards const qualifier. + typeDesc, err := dwarfToSyzlangType(pointeeType) + if err != nil { + return "", err + } + return fmt.Sprintf("ptr[in, array[%s]]", typeDesc), nil + default: + return "", fmt.Errorf("unsupported attribute type") + } +} + +// Returns true iff `dwarfType` resolved down to a pointer. For example, +// a `const *void` which isn't directly a pointer. +func resolvesToPtr(dwarfType dwarf.Type) (dwarf.Type, bool) { + switch t := dwarfType.(type) { + case *dwarf.QualType: + return resolvesToPtr(t.Type) + case *dwarf.PtrType: + return t.Type, true + } + return nil, false +} + +func syzFuncToSyzlang(s SyzFunc) string { + var builder strings.Builder + typeName := strings.TrimPrefix(s.InputStructName, "struct ") + + fmt.Fprintf(&builder, "syz_kfuzztest_run$%s(", s.Name) + fmt.Fprintf(&builder, "name ptr[in, string[\"%s\"]], ", s.Name) + fmt.Fprintf(&builder, "data ptr[in, %s], ", typeName) + builder.WriteString("len bytesize[data])") + // TODO:(ethangraham) The only other way I can think of getting this name + // would involve using the "reflect" package and matching against the + // KFuzzTest name, which isn't much better than hardcoding this. + builder.WriteString("(kfuzz_test)") + return builder.String() +} + +// Given a dwarf type, returns a syzlang string representation of this type. +func dwarfToSyzlangType(dwarfType dwarf.Type) (string, error) { + switch t := dwarfType.(type) { + case *dwarf.PtrType: + underlyingType, err := dwarfToSyzlangType(t.Type) + if err != nil { + return "", err + } + return fmt.Sprintf("ptr[in, %s]", underlyingType), nil + case *dwarf.QualType: + if t.Qual == "const" { + return dwarfToSyzlangType(t.Type) + } else { + return "", fmt.Errorf("no support for %s qualifier", t.Qual) + } + case *dwarf.ArrayType: + underlyingType, err := dwarfToSyzlangType(t.Type) + if err != nil { + return "", err + } + // If t.Count == -1 then this is a varlen array as per debug/dwarf + // documentation. + if t.Count == -1 { + return fmt.Sprintf("array[%s]", underlyingType), nil + } else { + return fmt.Sprintf("array[%s, %d]", underlyingType, t.Count), nil + } + case *dwarf.TypedefType: + return dwarfToSyzlangType(t.Type) + case *dwarf.IntType, *dwarf.UintType: + numBits := t.Size() * 8 + return fmt.Sprintf("int%d", numBits), nil + case *dwarf.CharType, *dwarf.UcharType: + return "int8", nil + // `void` isn't a valid type by itself, so we know that it would have + // been wrapped in a pointer, e.g., `void *`. For this reason, we can return + // just interpret it as a byte, i.e., int8. + case *dwarf.VoidType: + return "int8", nil + case *dwarf.StructType: + return strings.TrimPrefix(t.StructName, "struct "), nil + default: + return "", fmt.Errorf("unsupported type %s", dwarfType.String()) + } +} diff --git a/pkg/kfuzztest/description_generation_test.go b/pkg/kfuzztest/description_generation_test.go new file mode 100644 index 000000000..d68a96b18 --- /dev/null +++ b/pkg/kfuzztest/description_generation_test.go @@ -0,0 +1,103 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +package kfuzztest + +import ( + "fmt" + "os" + "path" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/sys/targets" + "github.com/stretchr/testify/require" +) + +type testData struct { + dir string + desc string +} + +func TestBuildDescriptions(t *testing.T) { + testCases, err := readTestCases("./testdata") + require.NoError(t, err) + + target := targets.Get(targets.Linux, targets.AMD64) + for _, tc := range testCases { + t.Run(tc.dir, func(t *testing.T) { + runTest(t, target, tc) + }) + } +} + +// Tests that the description inferred from a compiled binary matches an +// expected description. +func runTest(t *testing.T, target *targets.Target, tc testData) { + // Compile the C binary containing the metadata. + cmd := flags(tc.dir) + out, err := osutil.RunCmd(time.Hour, "", target.CCompiler, cmd...) + require.NoErrorf(t, err, "Failed to compile: %s", string(out)) + // Cleanup the compiled binary. + defer func() { + out, err := osutil.RunCmd(time.Hour, "", "rm", path.Join(tc.dir, "bin")) + if err != nil { + require.NoErrorf(t, err, "Failed to cleanup: %s", string(out)) + } + }() + + binaryPath := path.Join(tc.dir, "bin") + desc, err := ExtractDescription(binaryPath) + require.NoError(t, err) + + if diffDesc := cmp.Diff(tc.desc, desc); diffDesc != "" { + fmt.Print(diffDesc) + t.Fail() + return + } +} + +func flags(testDir string) []string { + return []string{ + "-g", + "-T", + path.Join(testDir, "..", "linker.ld"), + "-o", + path.Join(testDir, "bin"), + path.Join(testDir, "prog.c"), + } +} + +func readTestCases(dir string) ([]testData, error) { + var testCases []testData + testDirs, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + + for _, subDir := range testDirs { + if !subDir.IsDir() { + continue + } + testData, err := readTestdata(path.Join(dir, subDir.Name())) + if err != nil { + return nil, err + } + testCases = append(testCases, testData) + } + + return testCases, nil +} + +func readTestdata(testDir string) (testData, error) { + content, err := os.ReadFile(path.Join(testDir, "desc.txt")) + if err != nil { + return testData{}, err + } + + return testData{ + dir: testDir, + desc: string(content), + }, nil +} diff --git a/pkg/kfuzztest/extractor.go b/pkg/kfuzztest/extractor.go new file mode 100644 index 000000000..e13ea4662 --- /dev/null +++ b/pkg/kfuzztest/extractor.go @@ -0,0 +1,435 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +package kfuzztest + +import ( + "debug/dwarf" + "debug/elf" + "fmt" + "strings" +) + +// Extractor's job is to extract all information relevant to KFuzzTest from a +// VMlinux binary. +type Extractor struct { + // Path to the `vmlinux` being parsed. + vmlinuxPath string + elfFile *elf.File + dwarfData *dwarf.Data + + // We use an index to avoid repeated sequential scans of the whole binary, + // as this is by far the most expensive operation. + symbolsIndexInitialized bool + symbolsIndex map[string]elf.Symbol +} + +func NewExtractor(vmlinuxPath string) (*Extractor, error) { + elfFile, err := elf.Open(vmlinuxPath) + if err != nil { + return nil, err + } + dwarfData, err := elfFile.DWARF() + if err != nil { + elfFile.Close() + return nil, err + } + return &Extractor{vmlinuxPath, elfFile, dwarfData, false, make(map[string]elf.Symbol)}, nil +} + +type ExtractAllResult struct { + VMLinuxPath string + Funcs []SyzFunc + Structs []SyzStruct + Constraints []SyzConstraint + Annotations []SyzAnnotation +} + +func (e *Extractor) ExtractAll() (ExtractAllResult, error) { + funcs, err := e.extractFuncs() + if err != nil { + return ExtractAllResult{}, err + } + structs, err := e.extractStructs(funcs) + if err != nil { + return ExtractAllResult{}, err + } + constraints, err := e.extractDomainConstraints() + if err != nil { + return ExtractAllResult{}, err + } + annotations, err := e.extractAnnotations() + if err != nil { + return ExtractAllResult{}, err + } + + if len(structs) < len(funcs) { + return ExtractAllResult{}, fmt.Errorf("inconsistent KFuzzTest metadata found in vmlinux") + } + if len(funcs) == 0 { + return ExtractAllResult{}, nil + } + + return ExtractAllResult{ + VMLinuxPath: e.vmlinuxPath, + Funcs: funcs, + Structs: structs, + Constraints: constraints, + Annotations: annotations, + }, nil +} + +func (e *Extractor) Close() { + e.elfFile.Close() +} + +func (e *ExtractAllResult) String() string { + var builder strings.Builder + + fmt.Fprint(&builder, "extraction result:\n") + fmt.Fprintf(&builder, "\tVMLinux image: %s\n", e.VMLinuxPath) + fmt.Fprintf(&builder, "\tnum targets: %d\n", len(e.Funcs)) + fmt.Fprintf(&builder, "\tnum struct: %d\n", len(e.Structs)) + fmt.Fprintf(&builder, "\tnum constraints: %d\n", len(e.Constraints)) + fmt.Fprintf(&builder, "\tnum annotations: %d\n", len(e.Annotations)) + + return builder.String() +} + +// Given an address, returns the elf section that this address belongs to in +// the Extractor's elf file. +func (e *Extractor) elfSection(addr uint64) *elf.Section { + for _, section := range e.elfFile.Sections { + if addr >= section.Addr && addr < section.Addr+section.Size { + return section + } + } + return nil +} + +// Reads a string of length at most 128 bytes from the Extractor's elf file. +func (e *Extractor) readElfString(offset uint64) (string, error) { + strSection := e.elfSection(offset) + if strSection == nil { + return "", fmt.Errorf("unable to find section for offset 0x%X", offset) + } + + // 128 bytes is longer than we expect to see in KFuzzTest metadata. + buffer := make([]byte, 128) + _, err := strSection.ReadAt(buffer, int64(offset-strSection.Addr)) + if err != nil { + return "", err + } + + var builder strings.Builder + for _, chr := range buffer { + if chr == 0 { + return builder.String(), nil + } + builder.WriteByte(chr) + } + + return "", fmt.Errorf("could not find null-terminated string with length < 128") +} + +func (e *Extractor) buildSymbolIndex() error { + symbols, err := e.elfFile.Symbols() + if err != nil { + return err + } + for _, sym := range symbols { + e.symbolsIndex[sym.Name] = sym + } + return nil +} + +func (e *Extractor) getSymbol(symbolName string) (elf.Symbol, error) { + if !e.symbolsIndexInitialized { + err := e.buildSymbolIndex() + e.symbolsIndexInitialized = true + if err != nil { + return elf.Symbol{}, err + } + } + + symbol, contains := e.symbolsIndex[symbolName] + if !contains { + return elf.Symbol{}, fmt.Errorf("symbol %s not found in %s", symbolName, e.vmlinuxPath) + } + return symbol, nil +} + +func (e *Extractor) extractFuncs() ([]SyzFunc, error) { + var rawFuncs []*kfuzztestTarget + var err error + + rawFuncs, err = parseKftfObjects[*kfuzztestTarget](e) + if err != nil { + return nil, err + } + + fuzzTargets := make([]SyzFunc, len(rawFuncs)) + for i, raw := range rawFuncs { + name, err := e.readElfString(raw.name) + if err != nil { + return []SyzFunc{}, err + } + argType, err := e.readElfString(raw.argType) + if err != nil { + return []SyzFunc{}, err + } + fuzzTargets[i] = SyzFunc{ + Name: name, + InputStructName: argType, + } + } + + return fuzzTargets, nil +} + +func (e *Extractor) extractDomainConstraints() ([]SyzConstraint, error) { + var rawConstraints []*kfuzztestConstraint + var err error + + rawConstraints, err = parseKftfObjects[*kfuzztestConstraint](e) + if err != nil { + return nil, err + } + + constraints := make([]SyzConstraint, len(rawConstraints)) + for i, raw := range rawConstraints { + typeName, err := e.readElfString(raw.inputType) + if err != nil { + return []SyzConstraint{}, err + } + fieldName, err := e.readElfString(raw.fieldName) + if err != nil { + return []SyzConstraint{}, err + } + + constraints[i] = SyzConstraint{ + InputType: typeName, + FieldName: fieldName, + Value1: raw.value1, + Value2: raw.value2, + ConstraintType: ConstraintType(raw.constraintType), + } + } + + return constraints, nil +} + +func (e *Extractor) extractAnnotations() ([]SyzAnnotation, error) { + var rawAnnotations []*kfuzztestAnnotation + var err error + + rawAnnotations, err = parseKftfObjects[*kfuzztestAnnotation](e) + if err != nil { + return nil, err + } + + annotations := make([]SyzAnnotation, len(rawAnnotations)) + for i, raw := range rawAnnotations { + typeName, err := e.readElfString(raw.inputType) + if err != nil { + return nil, err + } + fieldName, err := e.readElfString(raw.fieldName) + if err != nil { + return nil, err + } + linkedFieldName, err := e.readElfString(raw.linkedFieldName) + if err != nil { + return nil, err + } + + annotations[i] = SyzAnnotation{ + InputType: typeName, + FieldName: fieldName, + LinkedFieldName: linkedFieldName, + Attribute: AnnotationAttribute(raw.annotationAttribute), + } + } + + return annotations, nil +} + +func (e *Extractor) dwarfGetType(entry *dwarf.Entry) (dwarf.Type, error) { + // Case 1: The entry is itself a type definition (e.g., TagStructType, TagBaseType). + // We use its own offset to get the dwarf.Type object. + switch entry.Tag { + case dwarf.TagStructType, dwarf.TagBaseType, dwarf.TagTypedef, dwarf.TagPointerType, dwarf.TagArrayType: + return e.dwarfData.Type(entry.Offset) + } + + // Case 2: The entry refers to a type (e.g., TagMember, TagVariable). + // We use its AttrType field to find the offset of the type definition. + typeOffset, ok := entry.Val(dwarf.AttrType).(dwarf.Offset) + if !ok { + return nil, fmt.Errorf("entry (Tag: %s) has no AttrType field", entry.Tag) + } + + return e.dwarfData.Type(typeOffset) +} + +// extractStructs extracts input structure metadata from discovered KFuzzTest +// targets (funcs). +// Performs a tree-traversal as all struct types need to be defined in the +// resulting description that is emitted by the builder. +func (e *Extractor) extractStructs(funcs []SyzFunc) ([]SyzStruct, error) { + // Set of input map names so that we can skip over entries that aren't + // interesting. + inputStructs := make(map[string]bool) + for _, fn := range funcs { + inputStructs[fn.InputStructName] = true + } + // Unpacks nested types to find an underlying struct type, or return nil + // if nothing is found. For example, when called on `struct myStruct **` + // we return `struct myStruct`. + unpackNested := func(t dwarf.Type) *dwarf.StructType { + for { + switch concreteType := t.(type) { + case *dwarf.StructType: + return concreteType + case *dwarf.PtrType: + t = concreteType.Type + case *dwarf.QualType: + t = concreteType.Type + default: + return nil + } + } + } + + structs := make([]SyzStruct, 0) + + // Perform a DFS on discovered struct types in order to discover nested + // struct types that may be contained within them. + visited := make(map[string]bool) + var visitRecur func(*dwarf.StructType) + visitRecur = func(start *dwarf.StructType) { + newStruct := SyzStruct{dwarfType: start, Name: start.StructName, Fields: make([]SyzField, 0)} + for _, child := range start.Field { + newField := SyzField{Name: child.Name, dwarfType: child.Type} + newStruct.Fields = append(newStruct.Fields, newField) + switch childType := child.Type.(type) { + case *dwarf.StructType: + if _, contains := visited[childType.StructName]; !contains { + visited[childType.StructName] = true + visitRecur(childType) + } + case *dwarf.PtrType, *dwarf.QualType: + // If we hit a pointer or a qualifier, we unpack to see if we + // find a nested struct type so that we can visit it. + maybeStructType := unpackNested(childType) + if maybeStructType != nil { + if _, contains := visited[maybeStructType.StructName]; !contains { + visited[maybeStructType.StructName] = true + visitRecur(maybeStructType) + } + } + default: + continue + } + } + structs = append(structs, newStruct) + } + + dwarfReader := e.dwarfData.Reader() + for { + entry, err := dwarfReader.Next() + if err != nil { + return nil, err + } + // EOF. + if entry == nil { + break + } + if entry.Tag != dwarf.TagStructType { + continue + } + // Skip over unnamed structures. + nameField := entry.AttrField(dwarf.AttrName) + if nameField == nil { + continue + } + name, ok := nameField.Val.(string) + if !ok { + fmt.Printf("unable to get name field\n") + continue + } + // Dwarf file prefixes structures with `struct` so we must prepend + // before lookup. + structName := "struct " + name + // Check whether or not this type is one that we parsed previously + // while traversing the .kftf section of the vmlinux binary, discarding + // if this is not the case. + if _, ok := inputStructs[structName]; !ok { + continue + } + + t, err := e.dwarfGetType(entry) + if err != nil { + return nil, err + } + + switch entryType := t.(type) { + case *dwarf.StructType: + visitRecur(entryType) + default: + // We shouldn't hit this branch if everything before this is + // correct. + panic("Error parsing dwarf - well-formed?") + } + } + + return structs, nil +} + +// Parses a slice of kftf objects contained within a dedicated section. This +// function assumes that all entries are tightly packed, and that each section +// contains only one type of statically-sized entry types. +func parseKftfObjects[T interface { + *P + parsableFromBytes +}, P any](e *Extractor) ([]T, error) { + var typeinfo T + + startSymbol, err := e.getSymbol(typeinfo.startSymbol()) + if err != nil { + return nil, err + } else if startSymbol.Value == 0 { + return nil, fmt.Errorf("failed to resolve %s", typeinfo.startSymbol()) + } + + endSymbol, err := e.getSymbol(typeinfo.endSymbol()) + if err != nil { + return nil, err + } else if endSymbol.Value == 0 { + return nil, fmt.Errorf("failed to resolve %s", typeinfo.endSymbol()) + } + + out := make([]T, 0) + data := make([]byte, typeinfo.size()) + for addr := startSymbol.Value; addr < endSymbol.Value; addr += typeinfo.size() { + section := e.elfSection(addr) + if section == nil { + return nil, fmt.Errorf("failed to locate section for addr=0x%x", addr) + } + + n, err := section.ReadAt(data, int64(addr-section.Addr)) + if err != nil || n < int(typeinfo.size()) { + // If n < sizeof(T), then err is non-nil as per the documentation + // of section.ReadAt. + return nil, err + } + + obj := T(new(P)) + err = obj.fromBytes(e.elfFile, data) + if err != nil { + return nil, err + } + out = append(out, obj) + } + + return out, nil +} diff --git a/pkg/kfuzztest/kfuzztest.go b/pkg/kfuzztest/kfuzztest.go new file mode 100644 index 000000000..c4702ac42 --- /dev/null +++ b/pkg/kfuzztest/kfuzztest.go @@ -0,0 +1,207 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Package kfuzztest exposes functions discovering KFuzzTest test cases from a +// vmlinux binary and parsing them into syzkaller-compatible formats. +// The general flow includes: +// - Creating an Extractor that extracts these test cases from the binary +// - Creating a Builder that takes the extractor's output and returns some +// compatible encoding of the test cases that were discovered +package kfuzztest + +import ( + "debug/dwarf" + "fmt" + "path" + "strings" + "sync" + + "github.com/google/syzkaller/pkg/ast" + "github.com/google/syzkaller/pkg/compiler" + "github.com/google/syzkaller/prog" + "github.com/google/syzkaller/sys/targets" +) + +type SyzField struct { + Name string + dwarfType dwarf.Type +} + +type SyzStruct struct { + dwarfType *dwarf.StructType + Name string + Fields []SyzField +} + +type SyzFunc struct { + Name string + InputStructName string +} + +type ConstraintType uint8 + +const ( + ExpectEq ConstraintType = iota + ExpectNe + ExpectLt + ExpectLe + ExpectGt + ExpectGe + ExpectInRange +) + +func (c ConstraintType) String() string { + return [...]string{"EXPECT_EQ", "EXPECT_NE", "EXPECT_LT", "EXPECT_LE", "EXPECT_GT", "EXPECT_GE", "EXPECT_IN_RANGE"}[c] +} + +type SyzConstraint struct { + InputType string + FieldName string + Value1 uintptr + Value2 uintptr + ConstraintType +} + +type AnnotationAttribute uint8 + +const ( + AttributeLen AnnotationAttribute = iota + AttributeString + AttributeArray +) + +func (a AnnotationAttribute) String() string { + return [...]string{"ATTRIBUTE_LEN", "ATTRIBUTE_STRING", "ATTRIBUTE_ARRAY"}[a] +} + +type SyzAnnotation struct { + InputType string + FieldName string + LinkedFieldName string + Attribute AnnotationAttribute +} + +// ExtractDescription returns a syzlang description of all discovered KFuzzTest +// targets, or an error on failure. +func ExtractDescription(vmlinuxPath string) (string, error) { + extractor, err := NewExtractor(vmlinuxPath) + if err != nil { + return "", err + } + defer extractor.Close() + eRes, err := extractor.ExtractAll() + if err != nil { + return "", err + } + builder := NewBuilder(eRes.Funcs, eRes.Structs, eRes.Constraints, eRes.Annotations) + return builder.EmitSyzlangDescription() +} + +type KFuzzTestData struct { + Description string + Calls []*prog.Syscall + Resources []*prog.ResourceDesc + Types []prog.Type +} + +func extractData(vmlinuxPath string) (KFuzzTestData, error) { + desc, err := ExtractDescription(vmlinuxPath) + if err != nil { + return KFuzzTestData{}, err + } + + var astError error + eh := func(pos ast.Pos, msg string) { + astError = fmt.Errorf("ast error: %v: %v", pos, msg) + } + descAst := ast.Parse([]byte(desc), "kfuzztest-autogen", eh) + if astError != nil { + return KFuzzTestData{}, astError + } + if descAst == nil { + return KFuzzTestData{}, fmt.Errorf("failed to build AST for program") + } + + // TODO: this assumes x86_64, but KFuzzTest supports (in theory) any + // architecture. + target := targets.Get(targets.Linux, targets.AMD64) + program := compiler.Compile(descAst, make(map[string]uint64), target, eh) + if astError != nil { + return KFuzzTestData{}, fmt.Errorf("failed to compile extracted KFuzzTest target: %w", astError) + } + + kFuzzTestCalls := []*prog.Syscall{} + for _, call := range program.Syscalls { + // The generated descriptions contain some number of built-ins, which + // we want to filter out. + if call.Attrs.KFuzzTest { + kFuzzTestCalls = append(kFuzzTestCalls, call) + } + } + + // We restore links on all generated system calls for completeness, but we + // only return the filtered slice. + prog.RestoreLinks(program.Syscalls, program.Resources, program.Types) + + return KFuzzTestData{ + Description: desc, + Calls: kFuzzTestCalls, + Resources: program.Resources, + Types: program.Types, + }, nil +} + +type extractKFuzzTestDataState struct { + once sync.Once + data KFuzzTestData + err error +} + +var extractState extractKFuzzTestDataState + +// ExtractData extracts KFuzzTest data from a vmlinux binary. The return value +// of this call is cached so that it can be safely called multiple times +// without incurring a new scan of a vmlinux image. +// NOTE: the implementation assumes the existence of only one vmlinux image +// per process, i.e. no attempt is made to distinguish different vmlinux images +// based on their path. +func ExtractData(vmlinuxPath string) (KFuzzTestData, error) { + extractState.once.Do(func() { + extractState.data, extractState.err = extractData(vmlinuxPath) + }) + + return extractState.data, extractState.err +} + +// ActivateKFuzzTargets extracts all KFuzzTest targets from a vmlinux binary +// and extends a target with the discovered pseudo-syscalls. +func ActivateKFuzzTargets(target *prog.Target, vmlinuxPath string) ([]*prog.Syscall, error) { + data, err := ExtractData(vmlinuxPath) + if err != nil { + return nil, err + } + // TODO: comment this properly. It's important to note here that despite + // extending the target, correct encoding relies on syz_kfuzztest_run being + // compiled into the target, and its ID being available. + target.Extend(data.Calls, data.Types, data.Resources) + return data.Calls, nil +} + +const syzKfuzzTestRun string = "syz_kfuzztest_run" + +// Common prefix that all discriminated syz_kfuzztest_run pseudo-syscalls share. +const KfuzzTestTargetPrefix string = syzKfuzzTestRun + "$" + +func GetTestName(syscall *prog.Syscall) (string, bool) { + if syscall.CallName != syzKfuzzTestRun { + return "", false + } + return strings.CutPrefix(syscall.Name, KfuzzTestTargetPrefix) +} + +const kFuzzTestDir string = "/sys/kernel/debug/kfuzztest" +const inputFile string = "input" + +func GetInputFilepath(testName string) string { + return path.Join(kFuzzTestDir, testName, inputFile) +} diff --git a/pkg/kfuzztest/testdata/.gitignore b/pkg/kfuzztest/testdata/.gitignore new file mode 100644 index 000000000..837170fcd --- /dev/null +++ b/pkg/kfuzztest/testdata/.gitignore @@ -0,0 +1 @@ +*bin diff --git a/pkg/kfuzztest/testdata/1/desc.txt b/pkg/kfuzztest/testdata/1/desc.txt new file mode 100644 index 000000000..71c4acb39 --- /dev/null +++ b/pkg/kfuzztest/testdata/1/desc.txt @@ -0,0 +1,7 @@ +# This description was automatically generated with tools/kfuzztest-gen +pkcs7_parse_message_arg { + data ptr[in, array[int8]] + datalen len[data, int64] +} + +syz_kfuzztest_run$test_pkcs7_parse_message(name ptr[in, string["test_pkcs7_parse_message"]], data ptr[in, pkcs7_parse_message_arg], len bytesize[data]) (kfuzz_test) diff --git a/pkg/kfuzztest/testdata/1/prog.c b/pkg/kfuzztest/testdata/1/prog.c new file mode 100644 index 000000000..b1940ba1f --- /dev/null +++ b/pkg/kfuzztest/testdata/1/prog.c @@ -0,0 +1,24 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +#include "../common.h" + +#include +#include + +struct pkcs7_parse_message_arg { + const void* data; + size_t datalen; +}; + +DEFINE_FUZZ_TARGET(test_pkcs7_parse_message, struct pkcs7_parse_message_arg); +/* Expect data != NULL. */ +DEFINE_CONSTRAINT(pkcs7_parse_message_arg, data, NULL, NULL, EXPECT_NE); +/* Expect datalen == len(data). */ +DEFINE_ANNOTATION(pkcs7_parse_message_arg, datalen, data, ATTRIBUTE_LEN); +/* Annotate data as an array. */ +DEFINE_ANNOTATION(pkcs7_parse_message_arg, data, , ATTRIBUTE_ARRAY); + +/* Define a main function, otherwise the compiler complains. */ +int main(void) +{ +} diff --git a/pkg/kfuzztest/testdata/2/desc.txt b/pkg/kfuzztest/testdata/2/desc.txt new file mode 100644 index 000000000..2705252dd --- /dev/null +++ b/pkg/kfuzztest/testdata/2/desc.txt @@ -0,0 +1,15 @@ +# This description was automatically generated with tools/kfuzztest-gen +bar { + a int32 + b int32 +} + +foo { + b ptr[in, bar] + str ptr[in, string] + data ptr[in, array[int8]] + datalen len[data, int64] + numbers ptr[in, array[int64]] +} + +syz_kfuzztest_run$some_target(name ptr[in, string["some_target"]], data ptr[in, foo], len bytesize[data]) (kfuzz_test) diff --git a/pkg/kfuzztest/testdata/2/prog.c b/pkg/kfuzztest/testdata/2/prog.c new file mode 100644 index 000000000..908ccd271 --- /dev/null +++ b/pkg/kfuzztest/testdata/2/prog.c @@ -0,0 +1,39 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +#include "../common.h" + +#include + +struct bar { + int a; + int b; +}; + +struct foo { + struct bar* b; + const char* str; + const char* data; + size_t datalen; + uint64_t* numbers; +}; + +DEFINE_FUZZ_TARGET(some_target, struct foo); +/* Expect foo.bar != NULL. */ +DEFINE_CONSTRAINT(foo, bar, NULL, NULL, EXPECT_NE); +/* Expect foo.str != NULL. */ +DEFINE_CONSTRAINT(foo, str, NULL, NULL, EXPECT_NE); +/* Annotate foo.str as a string. */ +DEFINE_ANNOTATION(foo, str, , ATTRIBUTE_STRING); +/* Expect foo.data != NULL. */ +DEFINE_CONSTRAINT(foo, data, NULL, NULL, EXPECT_NE); +/* Annotate foo.data as an array. */ +DEFINE_ANNOTATION(foo, data, , ATTRIBUTE_ARRAY); +/* Annotate foo.datalen == len(foo.data). */ +DEFINE_ANNOTATION(foo, datalen, data, ATTRIBUTE_LEN); +/* Annotate foo.numbers as an array. */ +DEFINE_ANNOTATION(foo, numbers, , ATTRIBUTE_ARRAY); + +/* Define a main function, otherwise the compiler complains. */ +int main(void) +{ +} diff --git a/pkg/kfuzztest/testdata/common.h b/pkg/kfuzztest/testdata/common.h new file mode 100644 index 000000000..29e8b193e --- /dev/null +++ b/pkg/kfuzztest/testdata/common.h @@ -0,0 +1,81 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Common struct definitions that ressemble those sound in the kernel source +// under include/linux/kfuzztest.h. For testing purposes, it is only required +// that these have the same sizes and emitted metadata as the kernel +// definitions, and therefore there is no strict requirement that their fields +// match one-to-one. +#ifndef COMMON_H +#define COMMON_H + +#include + +struct kfuzztest_target { + const char *name; + const char *arg_type_name; + uintptr_t write_input_cb; +} __attribute__((aligned(32))); + +enum kfuzztest_constraint_type { + EXPECT_EQ, + EXPECT_NE, + EXPECT_LT, + EXPECT_LE, + EXPECT_GT, + EXPECT_GE, + EXPECT_IN_RANGE, +}; + +struct kfuzztest_constraint { + const char *input_type; + const char *field_name; + uintptr_t value1; + uintptr_t value2; + enum kfuzztest_constraint_type type; +} __attribute__((aligned(64))); + +enum kfuzztest_annotation_attribute { + ATTRIBUTE_LEN, + ATTRIBUTE_STRING, + ATTRIBUTE_ARRAY, +}; + +struct kfuzztest_annotation { + const char *input_type; + const char *field_name; + const char *linked_field_name; + enum kfuzztest_annotation_attribute attrib; +} __attribute__((aligned(32))); + +#define DEFINE_FUZZ_TARGET(test_name, test_arg_type) \ + struct kfuzztest_target __fuzz_test__##test_name \ + __attribute__((section(".kfuzztest_target"), __used__)) = { \ + .name = #test_name, \ + .arg_type_name = #test_arg_type, \ + }; \ + /* Avoid the compiler optimizing out the struct definition. */ \ + static test_arg_type arg; + +#define DEFINE_CONSTRAINT(arg_type, field, val1, val2, tpe) \ + static struct kfuzztest_constraint __constraint_##arg_type##_##field \ + __attribute__((section(".kfuzztest_constraint"), \ + __used__)) = { \ + .input_type = "struct " #arg_type, \ + .field_name = #field, \ + .value1 = (uintptr_t)val1, \ + .value2 = (uintptr_t)val2, \ + .type = tpe, \ + } + +#define DEFINE_ANNOTATION(arg_type, field, linked_field, attribute) \ + static struct kfuzztest_annotation __annotation_##arg_type##_##field \ + __attribute__((section(".kfuzztest_annotation"), \ + __used__)) = { \ + .input_type = "struct " #arg_type, \ + .field_name = #field, \ + .linked_field_name = #linked_field, \ + .attrib = attribute, \ + } + +#endif /* COMMON_H */ diff --git a/pkg/kfuzztest/testdata/linker.ld b/pkg/kfuzztest/testdata/linker.ld new file mode 100644 index 000000000..345c02128 --- /dev/null +++ b/pkg/kfuzztest/testdata/linker.ld @@ -0,0 +1,39 @@ +/* Copyright 2025 syzkaller project authors. All rights reserved. */ +/* Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. */ + +/* Defines a basic linkage script for building kernel-like KFuzzTest metadata into a binary. */ +PAGE_SIZE = 0x1000; + +PHDRS +{ + text PT_LOAD FLAGS(5); /* R, X */ + data PT_LOAD FLAGS(6); /* R, W */ +} + +SECTIONS +{ + .text : { *(.text) } :text + + .rodata : { + *(.rodata*) + + . = ALIGN(PAGE_SIZE); + __kfuzztest_targets_start = .; + KEEP(*(.kfuzztest_target)); + __kfuzztest_targets_end = .; + + . = ALIGN(PAGE_SIZE); + __kfuzztest_constraints_start = .; + KEEP(*(.kfuzztest_constraint)); + __kfuzztest_constraints_end = .; + + . = ALIGN(PAGE_SIZE); + __kfuzztest_annotations_start = .; + KEEP(*(.kfuzztest_annotation)); + __kfuzztest_annotations_end = .; + + } :text + + .data : { *(.data) } :data + .bss : { *(.bss) } :data +} diff --git a/pkg/kfuzztest/types.go b/pkg/kfuzztest/types.go new file mode 100644 index 000000000..b533f95c3 --- /dev/null +++ b/pkg/kfuzztest/types.go @@ -0,0 +1,135 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +package kfuzztest + +import ( + "debug/elf" + "fmt" +) + +// The parsableFromBytes interface describes a kftf object that can be parsed +// from a vmlinux binary. All objects are expected to satisfy the following +// constraints +// - Must be statically sized. I.e. the size() function should return some +// fixed value +// - Densely packed: size must exactly describe the number of bytes between +// the start address of instance i and that of instance i+1. +// +// No further assumptions are made about the semantics of the object. For +// example if some field is a pointer to a string (*const char) this will not +// be read from the binary. This responsibility is offloaded to the caller. +type parsableFromBytes interface { + fromBytes(elfFile *elf.File, data []byte) error + size() uint64 + startSymbol() string + endSymbol() string +} + +type kfuzztestTarget struct { + name uint64 + argType uint64 + writeCb uint64 + readCb uint64 +} + +const kfuzztestTargetStart string = "__kfuzztest_targets_start" +const kfuzztestTargetEnd string = "__kfuzztest_targets_end" +const kfuzztestTargetSize uint64 = 32 + +func incorrectByteSizeErr(expected, actual uint64) error { + return fmt.Errorf("incorrect number of bytes: expected %d, got %d", expected, actual) +} + +func (targ *kfuzztestTarget) fromBytes(elfFile *elf.File, data []byte) error { + if targ.size() != uint64(len(data)) { + return incorrectByteSizeErr(targ.size(), uint64(len(data))) + } + targ.name = elfFile.ByteOrder.Uint64(data[0:8]) + targ.argType = elfFile.ByteOrder.Uint64(data[8:16]) + targ.writeCb = elfFile.ByteOrder.Uint64(data[16:24]) + targ.readCb = elfFile.ByteOrder.Uint64(data[24:32]) + return nil +} + +func (targ *kfuzztestTarget) size() uint64 { + return kfuzztestTargetSize +} + +func (targ *kfuzztestTarget) startSymbol() string { + return kfuzztestTargetStart +} + +func (targ *kfuzztestTarget) endSymbol() string { + return kfuzztestTargetEnd +} + +type kfuzztestConstraint struct { + inputType uint64 + fieldName uint64 + value1 uintptr + value2 uintptr + constraintType uint8 +} + +const kfuzztestConstraintStart string = "__kfuzztest_constraints_start" +const kfuzztestConstraintEnd string = "__kfuzztest_constraints_end" +const kfuzztestConstraintSize uint64 = 64 + +func (c *kfuzztestConstraint) fromBytes(elfFile *elf.File, data []byte) error { + if c.size() != uint64(len(data)) { + return incorrectByteSizeErr(c.size(), uint64(len(data))) + } + constraintTypeBytes := elfFile.ByteOrder.Uint64(data[32:40]) + c.inputType = elfFile.ByteOrder.Uint64(data[0:8]) + c.fieldName = elfFile.ByteOrder.Uint64(data[8:16]) + c.value1 = uintptr(elfFile.ByteOrder.Uint64(data[16:24])) + c.value2 = uintptr(elfFile.ByteOrder.Uint64(data[24:32])) + c.constraintType = uint8(constraintTypeBytes & 0xFF) + return nil +} + +func (c *kfuzztestConstraint) size() uint64 { + return kfuzztestConstraintSize +} + +func (c *kfuzztestConstraint) startSymbol() string { + return kfuzztestConstraintStart +} + +func (c *kfuzztestConstraint) endSymbol() string { + return kfuzztestConstraintEnd +} + +type kfuzztestAnnotation struct { + inputType uint64 + fieldName uint64 + linkedFieldName uint64 + annotationAttribute uint8 +} + +func (a *kfuzztestAnnotation) fromBytes(elfFile *elf.File, data []byte) error { + if a.size() != uint64(len(data)) { + return incorrectByteSizeErr(a.size(), uint64(len(data))) + } + a.inputType = elfFile.ByteOrder.Uint64(data[0:8]) + a.fieldName = elfFile.ByteOrder.Uint64(data[8:16]) + a.linkedFieldName = elfFile.ByteOrder.Uint64(data[16:24]) + a.annotationAttribute = data[24] + return nil +} + +const kftfAnnotationStart string = "__kfuzztest_annotations_start" +const kftfAnnotationEnd string = "__kfuzztest_annotations_end" +const kftfAnnotationSize uint64 = 32 + +func (a *kfuzztestAnnotation) size() uint64 { + return kftfAnnotationSize +} + +func (a *kfuzztestAnnotation) startSymbol() string { + return kftfAnnotationStart +} + +func (a *kfuzztestAnnotation) endSymbol() string { + return kftfAnnotationEnd +} -- cgit mrf-deployment