// Copyright 2025 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package kfuzztest

import (
	"debug/dwarf"
	"debug/elf"
	"fmt"
	"strings"
)

// Extractor's job is to extract all information relevant to KFuzzTest from a
// VMlinux binary.
type Extractor struct {
	// Path to the `vmlinux` being parsed.
	vmlinuxPath string
	elfFile     *elf.File
	dwarfData   *dwarf.Data

	// We use an index to avoid repeated sequential scans of the whole binary,
	// as this is by far the most expensive operation.
	symbolsIndexInitialized bool
	symbolsIndex            map[string]elf.Symbol
}

func NewExtractor(vmlinuxPath string) (*Extractor, error) {
	elfFile, err := elf.Open(vmlinuxPath)
	if err != nil {
		return nil, err
	}
	dwarfData, err := elfFile.DWARF()
	if err != nil {
		elfFile.Close()
		return nil, err
	}
	return &Extractor{vmlinuxPath, elfFile, dwarfData, false, make(map[string]elf.Symbol)}, nil
}

type ExtractAllResult struct {
	VMLinuxPath string
	Funcs       []SyzFunc
	Structs     []SyzStruct
	Constraints []SyzConstraint
	Annotations []SyzAnnotation
}

func (e *Extractor) ExtractAll() (ExtractAllResult, error) {
	funcs, err := e.extractFuncs()
	if err != nil {
		return ExtractAllResult{}, err
	}
	structs, err := e.extractStructs(funcs)
	if err != nil {
		return ExtractAllResult{}, err
	}
	constraints, err := e.extractDomainConstraints()
	if err != nil {
		return ExtractAllResult{}, err
	}
	annotations, err := e.extractAnnotations()
	if err != nil {
		return ExtractAllResult{}, err
	}

	if len(structs) < len(funcs) {
		return ExtractAllResult{}, fmt.Errorf("inconsistent KFuzzTest metadata found in vmlinux")
	}
	if len(funcs) == 0 {
		return ExtractAllResult{}, nil
	}

	return ExtractAllResult{
		VMLinuxPath: e.vmlinuxPath,
		Funcs:       funcs,
		Structs:     structs,
		Constraints: constraints,
		Annotations: annotations,
	}, nil
}

func (e *Extractor) Close() {
	e.elfFile.Close()
}

func (e *ExtractAllResult) String() string {
	var builder strings.Builder

	fmt.Fprint(&builder, "extraction result:\n")
	fmt.Fprintf(&builder, "\tVMLinux image:   %s\n", e.VMLinuxPath)
	fmt.Fprintf(&builder, "\tnum targets:     %d\n", len(e.Funcs))
	fmt.Fprintf(&builder, "\tnum struct:      %d\n", len(e.Structs))
	fmt.Fprintf(&builder, "\tnum constraints: %d\n", len(e.Constraints))
	fmt.Fprintf(&builder, "\tnum annotations: %d\n", len(e.Annotations))

	return builder.String()
}

// Given an address, returns the elf section that this address belongs to in
// the Extractor's elf file.
func (e *Extractor) elfSection(addr uint64) *elf.Section {
	for _, section := range e.elfFile.Sections {
		if addr >= section.Addr && addr < section.Addr+section.Size {
			return section
		}
	}
	return nil
}

// Reads a string of length at most 128 bytes from the Extractor's elf file.
func (e *Extractor) readElfString(offset uint64) (string, error) {
	strSection := e.elfSection(offset)
	if strSection == nil {
		return "", fmt.Errorf("unable to find section for offset 0x%X", offset)
	}

	// 128 bytes is longer than we expect to see in KFuzzTest metadata.
	buffer := make([]byte, 128)
	_, err := strSection.ReadAt(buffer, int64(offset-strSection.Addr))
	if err != nil {
		return "", err
	}

	var builder strings.Builder
	for _, chr := range buffer {
		if chr == 0 {
			return builder.String(), nil
		}
		builder.WriteByte(chr)
	}

	return "", fmt.Errorf("could not find null-terminated string with length < 128")
}

func (e *Extractor) buildSymbolIndex() error {
	symbols, err := e.elfFile.Symbols()
	if err != nil {
		return err
	}
	for _, sym := range symbols {
		e.symbolsIndex[sym.Name] = sym
	}
	return nil
}

func (e *Extractor) getSymbol(symbolName string) (elf.Symbol, error) {
	if !e.symbolsIndexInitialized {
		err := e.buildSymbolIndex()
		e.symbolsIndexInitialized = true
		if err != nil {
			return elf.Symbol{}, err
		}
	}

	symbol, contains := e.symbolsIndex[symbolName]
	if !contains {
		return elf.Symbol{}, fmt.Errorf("symbol %s not found in %s", symbolName, e.vmlinuxPath)
	}
	return symbol, nil
}

func (e *Extractor) extractFuncs() ([]SyzFunc, error) {
	var rawFuncs []*kfuzztestTarget
	var err error

	rawFuncs, err = parseKftfObjects[*kfuzztestTarget](e)
	if err != nil {
		return nil, err
	}

	fuzzTargets := make([]SyzFunc, len(rawFuncs))
	for i, raw := range rawFuncs {
		name, err := e.readElfString(raw.name)
		if err != nil {
			return []SyzFunc{}, err
		}
		argType, err := e.readElfString(raw.argType)
		if err != nil {
			return []SyzFunc{}, err
		}
		fuzzTargets[i] = SyzFunc{
			Name:            name,
			InputStructName: argType,
		}
	}

	return fuzzTargets, nil
}

func (e *Extractor) extractDomainConstraints() ([]SyzConstraint, error) {
	var rawConstraints []*kfuzztestConstraint
	var err error

	rawConstraints, err = parseKftfObjects[*kfuzztestConstraint](e)
	if err != nil {
		return nil, err
	}

	constraints := make([]SyzConstraint, len(rawConstraints))
	for i, raw := range rawConstraints {
		typeName, err := e.readElfString(raw.inputType)
		if err != nil {
			return []SyzConstraint{}, err
		}
		fieldName, err := e.readElfString(raw.fieldName)
		if err != nil {
			return []SyzConstraint{}, err
		}

		constraints[i] = SyzConstraint{
			InputType:      typeName,
			FieldName:      fieldName,
			Value1:         raw.value1,
			Value2:         raw.value2,
			ConstraintType: ConstraintType(raw.constraintType),
		}
	}

	return constraints, nil
}

func (e *Extractor) extractAnnotations() ([]SyzAnnotation, error) {
	var rawAnnotations []*kfuzztestAnnotation
	var err error

	rawAnnotations, err = parseKftfObjects[*kfuzztestAnnotation](e)
	if err != nil {
		return nil, err
	}

	annotations := make([]SyzAnnotation, len(rawAnnotations))
	for i, raw := range rawAnnotations {
		typeName, err := e.readElfString(raw.inputType)
		if err != nil {
			return nil, err
		}
		fieldName, err := e.readElfString(raw.fieldName)
		if err != nil {
			return nil, err
		}
		linkedFieldName, err := e.readElfString(raw.linkedFieldName)
		if err != nil {
			return nil, err
		}

		annotations[i] = SyzAnnotation{
			InputType:       typeName,
			FieldName:       fieldName,
			LinkedFieldName: linkedFieldName,
			Attribute:       AnnotationAttribute(raw.annotationAttribute),
		}
	}

	return annotations, nil
}

func (e *Extractor) dwarfGetType(entry *dwarf.Entry) (dwarf.Type, error) {
	// Case 1: The entry is itself a type definition (e.g., TagStructType, TagBaseType).
	// We use its own offset to get the dwarf.Type object.
	switch entry.Tag {
	case dwarf.TagStructType, dwarf.TagBaseType, dwarf.TagTypedef, dwarf.TagPointerType, dwarf.TagArrayType:
		return e.dwarfData.Type(entry.Offset)
	}

	// Case 2: The entry refers to a type (e.g., TagMember, TagVariable).
	// We use its AttrType field to find the offset of the type definition.
	typeOffset, ok := entry.Val(dwarf.AttrType).(dwarf.Offset)
	if !ok {
		return nil, fmt.Errorf("entry (Tag: %s) has no AttrType field", entry.Tag)
	}

	return e.dwarfData.Type(typeOffset)
}

// extractStructs extracts input structure metadata from discovered KFuzzTest
// targets (funcs).
// Performs a tree-traversal as all struct types need to be defined in the
// resulting description that is emitted by the builder.
func (e *Extractor) extractStructs(funcs []SyzFunc) ([]SyzStruct, error) {
	// Set of input map names so that we can skip over entries that aren't
	// interesting.
	inputStructs := make(map[string]bool)
	for _, fn := range funcs {
		inputStructs[fn.InputStructName] = true
	}
	// Unpacks nested types to find an underlying struct type, or return nil
	// if nothing is found. For example, when called on `struct myStruct **`
	// we return `struct myStruct`.
	unpackNested := func(t dwarf.Type) *dwarf.StructType {
		for {
			switch concreteType := t.(type) {
			case *dwarf.StructType:
				return concreteType
			case *dwarf.PtrType:
				t = concreteType.Type
			case *dwarf.QualType:
				t = concreteType.Type
			default:
				return nil
			}
		}
	}

	structs := make([]SyzStruct, 0)

	// Perform a DFS on discovered struct types in order to discover nested
	// struct types that may be contained within them.
	visited := make(map[string]bool)
	var visitRecur func(*dwarf.StructType)
	visitRecur = func(start *dwarf.StructType) {
		newStruct := SyzStruct{dwarfType: start, Name: start.StructName, Fields: make([]SyzField, 0)}
		for _, child := range start.Field {
			newField := SyzField{Name: child.Name, dwarfType: child.Type}
			newStruct.Fields = append(newStruct.Fields, newField)
			switch childType := child.Type.(type) {
			case *dwarf.StructType:
				if _, contains := visited[childType.StructName]; !contains {
					visited[childType.StructName] = true
					visitRecur(childType)
				}
			case *dwarf.PtrType, *dwarf.QualType:
				// If we hit a pointer or a qualifier, we unpack to see if we
				// find a nested struct type so that we can visit it.
				maybeStructType := unpackNested(childType)
				if maybeStructType != nil {
					if _, contains := visited[maybeStructType.StructName]; !contains {
						visited[maybeStructType.StructName] = true
						visitRecur(maybeStructType)
					}
				}
			default:
				continue
			}
		}
		structs = append(structs, newStruct)
	}

	dwarfReader := e.dwarfData.Reader()
	for {
		entry, err := dwarfReader.Next()
		if err != nil {
			return nil, err
		}
		// EOF.
		if entry == nil {
			break
		}
		if entry.Tag != dwarf.TagStructType {
			continue
		}
		// Skip over unnamed structures.
		nameField := entry.AttrField(dwarf.AttrName)
		if nameField == nil {
			continue
		}
		name, ok := nameField.Val.(string)
		if !ok {
			fmt.Printf("unable to get name field\n")
			continue
		}
		// Dwarf file prefixes structures with `struct` so we must prepend
		// before lookup.
		structName := "struct " + name
		// Check whether or not this type is one that we parsed previously
		// while traversing the .kftf section of the vmlinux binary, discarding
		// if this is not the case.
		if _, ok := inputStructs[structName]; !ok {
			continue
		}

		t, err := e.dwarfGetType(entry)
		if err != nil {
			return nil, err
		}

		switch entryType := t.(type) {
		case *dwarf.StructType:
			visitRecur(entryType)
		default:
			// We shouldn't hit this branch if everything before this is
			// correct.
			panic("Error parsing dwarf - well-formed?")
		}
	}

	return structs, nil
}

// Parses a slice of kftf objects contained within a dedicated section. This
// function assumes that all entries are tightly packed, and that each section
// contains only one type of statically-sized entry types.
func parseKftfObjects[T interface {
	*P
	parsableFromBytes
}, P any](e *Extractor) ([]T, error) {
	var typeinfo T

	startSymbol, err := e.getSymbol(typeinfo.startSymbol())
	if err != nil {
		return nil, err
	} else if startSymbol.Value == 0 {
		return nil, fmt.Errorf("failed to resolve %s", typeinfo.startSymbol())
	}

	endSymbol, err := e.getSymbol(typeinfo.endSymbol())
	if err != nil {
		return nil, err
	} else if endSymbol.Value == 0 {
		return nil, fmt.Errorf("failed to resolve %s", typeinfo.endSymbol())
	}

	out := make([]T, 0)
	data := make([]byte, typeinfo.size())
	for addr := startSymbol.Value; addr < endSymbol.Value; addr += typeinfo.size() {
		section := e.elfSection(addr)
		if section == nil {
			return nil, fmt.Errorf("failed to locate section for addr=0x%x", addr)
		}

		n, err := section.ReadAt(data, int64(addr-section.Addr))
		if err != nil || n < int(typeinfo.size()) {
			// If n < sizeof(T), then err is non-nil as per the documentation
			// of section.ReadAt.
			return nil, err
		}

		obj := T(new(P))
		err = obj.fromBytes(e.elfFile, data)
		if err != nil {
			return nil, err
		}
		out = append(out, obj)
	}

	return out, nil
}