diff options
| author | Ethan Graham <ethangraham@google.com> | 2025-09-15 12:56:38 +0000 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2025-09-22 09:11:54 +0000 |
| commit | 9a3002038e891237ff5b561f756a0ff6e32d1d2f (patch) | |
| tree | de544a42881a86c3d5e60f31bd83a1d5dd52425c /pkg/kfuzztest/extractor.go | |
| parent | 91601ce4199e5c85a51aa48270101c7fa5bff51a (diff) | |
pkg/kfuzztest: add pkg/kfuzztest
Add a new package, pkg/kfuzztest, that implements dynamic discovery of
KFuzzTest targets by parsing a vmlinux kernel binary.
Signed-off-by: Ethan Graham <ethangraham@google.com>
Diffstat (limited to 'pkg/kfuzztest/extractor.go')
| -rw-r--r-- | pkg/kfuzztest/extractor.go | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/pkg/kfuzztest/extractor.go b/pkg/kfuzztest/extractor.go new file mode 100644 index 000000000..e13ea4662 --- /dev/null +++ b/pkg/kfuzztest/extractor.go @@ -0,0 +1,435 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +package kfuzztest + +import ( + "debug/dwarf" + "debug/elf" + "fmt" + "strings" +) + +// Extractor's job is to extract all information relevant to KFuzzTest from a +// VMlinux binary. +type Extractor struct { + // Path to the `vmlinux` being parsed. + vmlinuxPath string + elfFile *elf.File + dwarfData *dwarf.Data + + // We use an index to avoid repeated sequential scans of the whole binary, + // as this is by far the most expensive operation. + symbolsIndexInitialized bool + symbolsIndex map[string]elf.Symbol +} + +func NewExtractor(vmlinuxPath string) (*Extractor, error) { + elfFile, err := elf.Open(vmlinuxPath) + if err != nil { + return nil, err + } + dwarfData, err := elfFile.DWARF() + if err != nil { + elfFile.Close() + return nil, err + } + return &Extractor{vmlinuxPath, elfFile, dwarfData, false, make(map[string]elf.Symbol)}, nil +} + +type ExtractAllResult struct { + VMLinuxPath string + Funcs []SyzFunc + Structs []SyzStruct + Constraints []SyzConstraint + Annotations []SyzAnnotation +} + +func (e *Extractor) ExtractAll() (ExtractAllResult, error) { + funcs, err := e.extractFuncs() + if err != nil { + return ExtractAllResult{}, err + } + structs, err := e.extractStructs(funcs) + if err != nil { + return ExtractAllResult{}, err + } + constraints, err := e.extractDomainConstraints() + if err != nil { + return ExtractAllResult{}, err + } + annotations, err := e.extractAnnotations() + if err != nil { + return ExtractAllResult{}, err + } + + if len(structs) < len(funcs) { + return ExtractAllResult{}, fmt.Errorf("inconsistent KFuzzTest metadata found in vmlinux") + } + if len(funcs) == 0 { + return ExtractAllResult{}, nil + } + + return ExtractAllResult{ + VMLinuxPath: e.vmlinuxPath, + Funcs: funcs, + Structs: structs, + Constraints: constraints, + Annotations: annotations, + }, nil +} + +func (e *Extractor) Close() { + e.elfFile.Close() +} + +func (e *ExtractAllResult) String() string { + var builder strings.Builder + + fmt.Fprint(&builder, "extraction result:\n") + fmt.Fprintf(&builder, "\tVMLinux image: %s\n", e.VMLinuxPath) + fmt.Fprintf(&builder, "\tnum targets: %d\n", len(e.Funcs)) + fmt.Fprintf(&builder, "\tnum struct: %d\n", len(e.Structs)) + fmt.Fprintf(&builder, "\tnum constraints: %d\n", len(e.Constraints)) + fmt.Fprintf(&builder, "\tnum annotations: %d\n", len(e.Annotations)) + + return builder.String() +} + +// Given an address, returns the elf section that this address belongs to in +// the Extractor's elf file. +func (e *Extractor) elfSection(addr uint64) *elf.Section { + for _, section := range e.elfFile.Sections { + if addr >= section.Addr && addr < section.Addr+section.Size { + return section + } + } + return nil +} + +// Reads a string of length at most 128 bytes from the Extractor's elf file. +func (e *Extractor) readElfString(offset uint64) (string, error) { + strSection := e.elfSection(offset) + if strSection == nil { + return "", fmt.Errorf("unable to find section for offset 0x%X", offset) + } + + // 128 bytes is longer than we expect to see in KFuzzTest metadata. + buffer := make([]byte, 128) + _, err := strSection.ReadAt(buffer, int64(offset-strSection.Addr)) + if err != nil { + return "", err + } + + var builder strings.Builder + for _, chr := range buffer { + if chr == 0 { + return builder.String(), nil + } + builder.WriteByte(chr) + } + + return "", fmt.Errorf("could not find null-terminated string with length < 128") +} + +func (e *Extractor) buildSymbolIndex() error { + symbols, err := e.elfFile.Symbols() + if err != nil { + return err + } + for _, sym := range symbols { + e.symbolsIndex[sym.Name] = sym + } + return nil +} + +func (e *Extractor) getSymbol(symbolName string) (elf.Symbol, error) { + if !e.symbolsIndexInitialized { + err := e.buildSymbolIndex() + e.symbolsIndexInitialized = true + if err != nil { + return elf.Symbol{}, err + } + } + + symbol, contains := e.symbolsIndex[symbolName] + if !contains { + return elf.Symbol{}, fmt.Errorf("symbol %s not found in %s", symbolName, e.vmlinuxPath) + } + return symbol, nil +} + +func (e *Extractor) extractFuncs() ([]SyzFunc, error) { + var rawFuncs []*kfuzztestTarget + var err error + + rawFuncs, err = parseKftfObjects[*kfuzztestTarget](e) + if err != nil { + return nil, err + } + + fuzzTargets := make([]SyzFunc, len(rawFuncs)) + for i, raw := range rawFuncs { + name, err := e.readElfString(raw.name) + if err != nil { + return []SyzFunc{}, err + } + argType, err := e.readElfString(raw.argType) + if err != nil { + return []SyzFunc{}, err + } + fuzzTargets[i] = SyzFunc{ + Name: name, + InputStructName: argType, + } + } + + return fuzzTargets, nil +} + +func (e *Extractor) extractDomainConstraints() ([]SyzConstraint, error) { + var rawConstraints []*kfuzztestConstraint + var err error + + rawConstraints, err = parseKftfObjects[*kfuzztestConstraint](e) + if err != nil { + return nil, err + } + + constraints := make([]SyzConstraint, len(rawConstraints)) + for i, raw := range rawConstraints { + typeName, err := e.readElfString(raw.inputType) + if err != nil { + return []SyzConstraint{}, err + } + fieldName, err := e.readElfString(raw.fieldName) + if err != nil { + return []SyzConstraint{}, err + } + + constraints[i] = SyzConstraint{ + InputType: typeName, + FieldName: fieldName, + Value1: raw.value1, + Value2: raw.value2, + ConstraintType: ConstraintType(raw.constraintType), + } + } + + return constraints, nil +} + +func (e *Extractor) extractAnnotations() ([]SyzAnnotation, error) { + var rawAnnotations []*kfuzztestAnnotation + var err error + + rawAnnotations, err = parseKftfObjects[*kfuzztestAnnotation](e) + if err != nil { + return nil, err + } + + annotations := make([]SyzAnnotation, len(rawAnnotations)) + for i, raw := range rawAnnotations { + typeName, err := e.readElfString(raw.inputType) + if err != nil { + return nil, err + } + fieldName, err := e.readElfString(raw.fieldName) + if err != nil { + return nil, err + } + linkedFieldName, err := e.readElfString(raw.linkedFieldName) + if err != nil { + return nil, err + } + + annotations[i] = SyzAnnotation{ + InputType: typeName, + FieldName: fieldName, + LinkedFieldName: linkedFieldName, + Attribute: AnnotationAttribute(raw.annotationAttribute), + } + } + + return annotations, nil +} + +func (e *Extractor) dwarfGetType(entry *dwarf.Entry) (dwarf.Type, error) { + // Case 1: The entry is itself a type definition (e.g., TagStructType, TagBaseType). + // We use its own offset to get the dwarf.Type object. + switch entry.Tag { + case dwarf.TagStructType, dwarf.TagBaseType, dwarf.TagTypedef, dwarf.TagPointerType, dwarf.TagArrayType: + return e.dwarfData.Type(entry.Offset) + } + + // Case 2: The entry refers to a type (e.g., TagMember, TagVariable). + // We use its AttrType field to find the offset of the type definition. + typeOffset, ok := entry.Val(dwarf.AttrType).(dwarf.Offset) + if !ok { + return nil, fmt.Errorf("entry (Tag: %s) has no AttrType field", entry.Tag) + } + + return e.dwarfData.Type(typeOffset) +} + +// extractStructs extracts input structure metadata from discovered KFuzzTest +// targets (funcs). +// Performs a tree-traversal as all struct types need to be defined in the +// resulting description that is emitted by the builder. +func (e *Extractor) extractStructs(funcs []SyzFunc) ([]SyzStruct, error) { + // Set of input map names so that we can skip over entries that aren't + // interesting. + inputStructs := make(map[string]bool) + for _, fn := range funcs { + inputStructs[fn.InputStructName] = true + } + // Unpacks nested types to find an underlying struct type, or return nil + // if nothing is found. For example, when called on `struct myStruct **` + // we return `struct myStruct`. + unpackNested := func(t dwarf.Type) *dwarf.StructType { + for { + switch concreteType := t.(type) { + case *dwarf.StructType: + return concreteType + case *dwarf.PtrType: + t = concreteType.Type + case *dwarf.QualType: + t = concreteType.Type + default: + return nil + } + } + } + + structs := make([]SyzStruct, 0) + + // Perform a DFS on discovered struct types in order to discover nested + // struct types that may be contained within them. + visited := make(map[string]bool) + var visitRecur func(*dwarf.StructType) + visitRecur = func(start *dwarf.StructType) { + newStruct := SyzStruct{dwarfType: start, Name: start.StructName, Fields: make([]SyzField, 0)} + for _, child := range start.Field { + newField := SyzField{Name: child.Name, dwarfType: child.Type} + newStruct.Fields = append(newStruct.Fields, newField) + switch childType := child.Type.(type) { + case *dwarf.StructType: + if _, contains := visited[childType.StructName]; !contains { + visited[childType.StructName] = true + visitRecur(childType) + } + case *dwarf.PtrType, *dwarf.QualType: + // If we hit a pointer or a qualifier, we unpack to see if we + // find a nested struct type so that we can visit it. + maybeStructType := unpackNested(childType) + if maybeStructType != nil { + if _, contains := visited[maybeStructType.StructName]; !contains { + visited[maybeStructType.StructName] = true + visitRecur(maybeStructType) + } + } + default: + continue + } + } + structs = append(structs, newStruct) + } + + dwarfReader := e.dwarfData.Reader() + for { + entry, err := dwarfReader.Next() + if err != nil { + return nil, err + } + // EOF. + if entry == nil { + break + } + if entry.Tag != dwarf.TagStructType { + continue + } + // Skip over unnamed structures. + nameField := entry.AttrField(dwarf.AttrName) + if nameField == nil { + continue + } + name, ok := nameField.Val.(string) + if !ok { + fmt.Printf("unable to get name field\n") + continue + } + // Dwarf file prefixes structures with `struct` so we must prepend + // before lookup. + structName := "struct " + name + // Check whether or not this type is one that we parsed previously + // while traversing the .kftf section of the vmlinux binary, discarding + // if this is not the case. + if _, ok := inputStructs[structName]; !ok { + continue + } + + t, err := e.dwarfGetType(entry) + if err != nil { + return nil, err + } + + switch entryType := t.(type) { + case *dwarf.StructType: + visitRecur(entryType) + default: + // We shouldn't hit this branch if everything before this is + // correct. + panic("Error parsing dwarf - well-formed?") + } + } + + return structs, nil +} + +// Parses a slice of kftf objects contained within a dedicated section. This +// function assumes that all entries are tightly packed, and that each section +// contains only one type of statically-sized entry types. +func parseKftfObjects[T interface { + *P + parsableFromBytes +}, P any](e *Extractor) ([]T, error) { + var typeinfo T + + startSymbol, err := e.getSymbol(typeinfo.startSymbol()) + if err != nil { + return nil, err + } else if startSymbol.Value == 0 { + return nil, fmt.Errorf("failed to resolve %s", typeinfo.startSymbol()) + } + + endSymbol, err := e.getSymbol(typeinfo.endSymbol()) + if err != nil { + return nil, err + } else if endSymbol.Value == 0 { + return nil, fmt.Errorf("failed to resolve %s", typeinfo.endSymbol()) + } + + out := make([]T, 0) + data := make([]byte, typeinfo.size()) + for addr := startSymbol.Value; addr < endSymbol.Value; addr += typeinfo.size() { + section := e.elfSection(addr) + if section == nil { + return nil, fmt.Errorf("failed to locate section for addr=0x%x", addr) + } + + n, err := section.ReadAt(data, int64(addr-section.Addr)) + if err != nil || n < int(typeinfo.size()) { + // If n < sizeof(T), then err is non-nil as per the documentation + // of section.ReadAt. + return nil, err + } + + obj := T(new(P)) + err = obj.fromBytes(e.elfFile, data) + if err != nil { + return nil, err + } + out = append(out, obj) + } + + return out, nil +} |
