aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/kfuzztest/extractor.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/kfuzztest/extractor.go')
-rw-r--r--pkg/kfuzztest/extractor.go435
1 files changed, 435 insertions, 0 deletions
diff --git a/pkg/kfuzztest/extractor.go b/pkg/kfuzztest/extractor.go
new file mode 100644
index 000000000..e13ea4662
--- /dev/null
+++ b/pkg/kfuzztest/extractor.go
@@ -0,0 +1,435 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+package kfuzztest
+
+import (
+ "debug/dwarf"
+ "debug/elf"
+ "fmt"
+ "strings"
+)
+
+// Extractor's job is to extract all information relevant to KFuzzTest from a
+// VMlinux binary.
+type Extractor struct {
+ // Path to the `vmlinux` being parsed.
+ vmlinuxPath string
+ elfFile *elf.File
+ dwarfData *dwarf.Data
+
+ // We use an index to avoid repeated sequential scans of the whole binary,
+ // as this is by far the most expensive operation.
+ symbolsIndexInitialized bool
+ symbolsIndex map[string]elf.Symbol
+}
+
+func NewExtractor(vmlinuxPath string) (*Extractor, error) {
+ elfFile, err := elf.Open(vmlinuxPath)
+ if err != nil {
+ return nil, err
+ }
+ dwarfData, err := elfFile.DWARF()
+ if err != nil {
+ elfFile.Close()
+ return nil, err
+ }
+ return &Extractor{vmlinuxPath, elfFile, dwarfData, false, make(map[string]elf.Symbol)}, nil
+}
+
+type ExtractAllResult struct {
+ VMLinuxPath string
+ Funcs []SyzFunc
+ Structs []SyzStruct
+ Constraints []SyzConstraint
+ Annotations []SyzAnnotation
+}
+
+func (e *Extractor) ExtractAll() (ExtractAllResult, error) {
+ funcs, err := e.extractFuncs()
+ if err != nil {
+ return ExtractAllResult{}, err
+ }
+ structs, err := e.extractStructs(funcs)
+ if err != nil {
+ return ExtractAllResult{}, err
+ }
+ constraints, err := e.extractDomainConstraints()
+ if err != nil {
+ return ExtractAllResult{}, err
+ }
+ annotations, err := e.extractAnnotations()
+ if err != nil {
+ return ExtractAllResult{}, err
+ }
+
+ if len(structs) < len(funcs) {
+ return ExtractAllResult{}, fmt.Errorf("inconsistent KFuzzTest metadata found in vmlinux")
+ }
+ if len(funcs) == 0 {
+ return ExtractAllResult{}, nil
+ }
+
+ return ExtractAllResult{
+ VMLinuxPath: e.vmlinuxPath,
+ Funcs: funcs,
+ Structs: structs,
+ Constraints: constraints,
+ Annotations: annotations,
+ }, nil
+}
+
+func (e *Extractor) Close() {
+ e.elfFile.Close()
+}
+
+func (e *ExtractAllResult) String() string {
+ var builder strings.Builder
+
+ fmt.Fprint(&builder, "extraction result:\n")
+ fmt.Fprintf(&builder, "\tVMLinux image: %s\n", e.VMLinuxPath)
+ fmt.Fprintf(&builder, "\tnum targets: %d\n", len(e.Funcs))
+ fmt.Fprintf(&builder, "\tnum struct: %d\n", len(e.Structs))
+ fmt.Fprintf(&builder, "\tnum constraints: %d\n", len(e.Constraints))
+ fmt.Fprintf(&builder, "\tnum annotations: %d\n", len(e.Annotations))
+
+ return builder.String()
+}
+
+// Given an address, returns the elf section that this address belongs to in
+// the Extractor's elf file.
+func (e *Extractor) elfSection(addr uint64) *elf.Section {
+ for _, section := range e.elfFile.Sections {
+ if addr >= section.Addr && addr < section.Addr+section.Size {
+ return section
+ }
+ }
+ return nil
+}
+
+// Reads a string of length at most 128 bytes from the Extractor's elf file.
+func (e *Extractor) readElfString(offset uint64) (string, error) {
+ strSection := e.elfSection(offset)
+ if strSection == nil {
+ return "", fmt.Errorf("unable to find section for offset 0x%X", offset)
+ }
+
+ // 128 bytes is longer than we expect to see in KFuzzTest metadata.
+ buffer := make([]byte, 128)
+ _, err := strSection.ReadAt(buffer, int64(offset-strSection.Addr))
+ if err != nil {
+ return "", err
+ }
+
+ var builder strings.Builder
+ for _, chr := range buffer {
+ if chr == 0 {
+ return builder.String(), nil
+ }
+ builder.WriteByte(chr)
+ }
+
+ return "", fmt.Errorf("could not find null-terminated string with length < 128")
+}
+
+func (e *Extractor) buildSymbolIndex() error {
+ symbols, err := e.elfFile.Symbols()
+ if err != nil {
+ return err
+ }
+ for _, sym := range symbols {
+ e.symbolsIndex[sym.Name] = sym
+ }
+ return nil
+}
+
+func (e *Extractor) getSymbol(symbolName string) (elf.Symbol, error) {
+ if !e.symbolsIndexInitialized {
+ err := e.buildSymbolIndex()
+ e.symbolsIndexInitialized = true
+ if err != nil {
+ return elf.Symbol{}, err
+ }
+ }
+
+ symbol, contains := e.symbolsIndex[symbolName]
+ if !contains {
+ return elf.Symbol{}, fmt.Errorf("symbol %s not found in %s", symbolName, e.vmlinuxPath)
+ }
+ return symbol, nil
+}
+
+func (e *Extractor) extractFuncs() ([]SyzFunc, error) {
+ var rawFuncs []*kfuzztestTarget
+ var err error
+
+ rawFuncs, err = parseKftfObjects[*kfuzztestTarget](e)
+ if err != nil {
+ return nil, err
+ }
+
+ fuzzTargets := make([]SyzFunc, len(rawFuncs))
+ for i, raw := range rawFuncs {
+ name, err := e.readElfString(raw.name)
+ if err != nil {
+ return []SyzFunc{}, err
+ }
+ argType, err := e.readElfString(raw.argType)
+ if err != nil {
+ return []SyzFunc{}, err
+ }
+ fuzzTargets[i] = SyzFunc{
+ Name: name,
+ InputStructName: argType,
+ }
+ }
+
+ return fuzzTargets, nil
+}
+
+func (e *Extractor) extractDomainConstraints() ([]SyzConstraint, error) {
+ var rawConstraints []*kfuzztestConstraint
+ var err error
+
+ rawConstraints, err = parseKftfObjects[*kfuzztestConstraint](e)
+ if err != nil {
+ return nil, err
+ }
+
+ constraints := make([]SyzConstraint, len(rawConstraints))
+ for i, raw := range rawConstraints {
+ typeName, err := e.readElfString(raw.inputType)
+ if err != nil {
+ return []SyzConstraint{}, err
+ }
+ fieldName, err := e.readElfString(raw.fieldName)
+ if err != nil {
+ return []SyzConstraint{}, err
+ }
+
+ constraints[i] = SyzConstraint{
+ InputType: typeName,
+ FieldName: fieldName,
+ Value1: raw.value1,
+ Value2: raw.value2,
+ ConstraintType: ConstraintType(raw.constraintType),
+ }
+ }
+
+ return constraints, nil
+}
+
+func (e *Extractor) extractAnnotations() ([]SyzAnnotation, error) {
+ var rawAnnotations []*kfuzztestAnnotation
+ var err error
+
+ rawAnnotations, err = parseKftfObjects[*kfuzztestAnnotation](e)
+ if err != nil {
+ return nil, err
+ }
+
+ annotations := make([]SyzAnnotation, len(rawAnnotations))
+ for i, raw := range rawAnnotations {
+ typeName, err := e.readElfString(raw.inputType)
+ if err != nil {
+ return nil, err
+ }
+ fieldName, err := e.readElfString(raw.fieldName)
+ if err != nil {
+ return nil, err
+ }
+ linkedFieldName, err := e.readElfString(raw.linkedFieldName)
+ if err != nil {
+ return nil, err
+ }
+
+ annotations[i] = SyzAnnotation{
+ InputType: typeName,
+ FieldName: fieldName,
+ LinkedFieldName: linkedFieldName,
+ Attribute: AnnotationAttribute(raw.annotationAttribute),
+ }
+ }
+
+ return annotations, nil
+}
+
+func (e *Extractor) dwarfGetType(entry *dwarf.Entry) (dwarf.Type, error) {
+ // Case 1: The entry is itself a type definition (e.g., TagStructType, TagBaseType).
+ // We use its own offset to get the dwarf.Type object.
+ switch entry.Tag {
+ case dwarf.TagStructType, dwarf.TagBaseType, dwarf.TagTypedef, dwarf.TagPointerType, dwarf.TagArrayType:
+ return e.dwarfData.Type(entry.Offset)
+ }
+
+ // Case 2: The entry refers to a type (e.g., TagMember, TagVariable).
+ // We use its AttrType field to find the offset of the type definition.
+ typeOffset, ok := entry.Val(dwarf.AttrType).(dwarf.Offset)
+ if !ok {
+ return nil, fmt.Errorf("entry (Tag: %s) has no AttrType field", entry.Tag)
+ }
+
+ return e.dwarfData.Type(typeOffset)
+}
+
+// extractStructs extracts input structure metadata from discovered KFuzzTest
+// targets (funcs).
+// Performs a tree-traversal as all struct types need to be defined in the
+// resulting description that is emitted by the builder.
+func (e *Extractor) extractStructs(funcs []SyzFunc) ([]SyzStruct, error) {
+ // Set of input map names so that we can skip over entries that aren't
+ // interesting.
+ inputStructs := make(map[string]bool)
+ for _, fn := range funcs {
+ inputStructs[fn.InputStructName] = true
+ }
+ // Unpacks nested types to find an underlying struct type, or return nil
+ // if nothing is found. For example, when called on `struct myStruct **`
+ // we return `struct myStruct`.
+ unpackNested := func(t dwarf.Type) *dwarf.StructType {
+ for {
+ switch concreteType := t.(type) {
+ case *dwarf.StructType:
+ return concreteType
+ case *dwarf.PtrType:
+ t = concreteType.Type
+ case *dwarf.QualType:
+ t = concreteType.Type
+ default:
+ return nil
+ }
+ }
+ }
+
+ structs := make([]SyzStruct, 0)
+
+ // Perform a DFS on discovered struct types in order to discover nested
+ // struct types that may be contained within them.
+ visited := make(map[string]bool)
+ var visitRecur func(*dwarf.StructType)
+ visitRecur = func(start *dwarf.StructType) {
+ newStruct := SyzStruct{dwarfType: start, Name: start.StructName, Fields: make([]SyzField, 0)}
+ for _, child := range start.Field {
+ newField := SyzField{Name: child.Name, dwarfType: child.Type}
+ newStruct.Fields = append(newStruct.Fields, newField)
+ switch childType := child.Type.(type) {
+ case *dwarf.StructType:
+ if _, contains := visited[childType.StructName]; !contains {
+ visited[childType.StructName] = true
+ visitRecur(childType)
+ }
+ case *dwarf.PtrType, *dwarf.QualType:
+ // If we hit a pointer or a qualifier, we unpack to see if we
+ // find a nested struct type so that we can visit it.
+ maybeStructType := unpackNested(childType)
+ if maybeStructType != nil {
+ if _, contains := visited[maybeStructType.StructName]; !contains {
+ visited[maybeStructType.StructName] = true
+ visitRecur(maybeStructType)
+ }
+ }
+ default:
+ continue
+ }
+ }
+ structs = append(structs, newStruct)
+ }
+
+ dwarfReader := e.dwarfData.Reader()
+ for {
+ entry, err := dwarfReader.Next()
+ if err != nil {
+ return nil, err
+ }
+ // EOF.
+ if entry == nil {
+ break
+ }
+ if entry.Tag != dwarf.TagStructType {
+ continue
+ }
+ // Skip over unnamed structures.
+ nameField := entry.AttrField(dwarf.AttrName)
+ if nameField == nil {
+ continue
+ }
+ name, ok := nameField.Val.(string)
+ if !ok {
+ fmt.Printf("unable to get name field\n")
+ continue
+ }
+ // Dwarf file prefixes structures with `struct` so we must prepend
+ // before lookup.
+ structName := "struct " + name
+ // Check whether or not this type is one that we parsed previously
+ // while traversing the .kftf section of the vmlinux binary, discarding
+ // if this is not the case.
+ if _, ok := inputStructs[structName]; !ok {
+ continue
+ }
+
+ t, err := e.dwarfGetType(entry)
+ if err != nil {
+ return nil, err
+ }
+
+ switch entryType := t.(type) {
+ case *dwarf.StructType:
+ visitRecur(entryType)
+ default:
+ // We shouldn't hit this branch if everything before this is
+ // correct.
+ panic("Error parsing dwarf - well-formed?")
+ }
+ }
+
+ return structs, nil
+}
+
+// Parses a slice of kftf objects contained within a dedicated section. This
+// function assumes that all entries are tightly packed, and that each section
+// contains only one type of statically-sized entry types.
+func parseKftfObjects[T interface {
+ *P
+ parsableFromBytes
+}, P any](e *Extractor) ([]T, error) {
+ var typeinfo T
+
+ startSymbol, err := e.getSymbol(typeinfo.startSymbol())
+ if err != nil {
+ return nil, err
+ } else if startSymbol.Value == 0 {
+ return nil, fmt.Errorf("failed to resolve %s", typeinfo.startSymbol())
+ }
+
+ endSymbol, err := e.getSymbol(typeinfo.endSymbol())
+ if err != nil {
+ return nil, err
+ } else if endSymbol.Value == 0 {
+ return nil, fmt.Errorf("failed to resolve %s", typeinfo.endSymbol())
+ }
+
+ out := make([]T, 0)
+ data := make([]byte, typeinfo.size())
+ for addr := startSymbol.Value; addr < endSymbol.Value; addr += typeinfo.size() {
+ section := e.elfSection(addr)
+ if section == nil {
+ return nil, fmt.Errorf("failed to locate section for addr=0x%x", addr)
+ }
+
+ n, err := section.ReadAt(data, int64(addr-section.Addr))
+ if err != nil || n < int(typeinfo.size()) {
+ // If n < sizeof(T), then err is non-nil as per the documentation
+ // of section.ReadAt.
+ return nil, err
+ }
+
+ obj := T(new(P))
+ err = obj.fromBytes(e.elfFile, data)
+ if err != nil {
+ return nil, err
+ }
+ out = append(out, obj)
+ }
+
+ return out, nil
+}