// Copyright 2015 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. package prog import ( "bufio" "bytes" "encoding/hex" "fmt" "strconv" ) // String generates a very compact program description (mostly for debug output). func (p *Prog) String() string { buf := new(bytes.Buffer) for i, c := range p.Calls { if i != 0 { fmt.Fprintf(buf, "-") } fmt.Fprintf(buf, "%v", c.Meta.Name) } return buf.String() } func (p *Prog) Serialize() []byte { if debug { if err := p.validate(); err != nil { panic("serializing invalid program") } } buf := new(bytes.Buffer) vars := make(map[Arg]int) varSeq := 0 for _, c := range p.Calls { if isUsed(c.Ret) { fmt.Fprintf(buf, "r%v = ", varSeq) vars[c.Ret] = varSeq varSeq++ } fmt.Fprintf(buf, "%v(", c.Meta.Name) for i, a := range c.Args { if IsPad(a.Type()) { continue } if i != 0 { fmt.Fprintf(buf, ", ") } serialize(a, buf, vars, &varSeq) } fmt.Fprintf(buf, ")\n") } return buf.Bytes() } func serialize(arg Arg, buf *bytes.Buffer, vars map[Arg]int, varSeq *int) { if arg == nil { fmt.Fprintf(buf, "nil") return } if isUsed(arg) { fmt.Fprintf(buf, "", *varSeq) vars[arg] = *varSeq *varSeq++ } switch a := arg.(type) { case *ConstArg: fmt.Fprintf(buf, "0x%x", a.Val) case *PointerArg: if a.Res == nil && a.PagesNum == 0 { fmt.Fprintf(buf, "0x0") break } fmt.Fprintf(buf, "&%v=", serializeAddr(arg)) serialize(a.Res, buf, vars, varSeq) case *DataArg: if a.Type().Dir() == DirOut { fmt.Fprintf(buf, "\"\"/%v", a.Size()) } else { data := a.Data() if !arg.Type().Varlen() { // Statically typed data will be padded with 0s during // deserialization, so we can strip them here for readability. for len(data) >= 2 && data[len(data)-1] == 0 && data[len(data)-2] == 0 { data = data[:len(data)-1] } } serializeData(buf, data) } case *GroupArg: var delims []byte switch arg.Type().(type) { case *StructType: delims = []byte{'{', '}'} case *ArrayType: delims = []byte{'[', ']'} default: panic("unknown group type") } buf.Write([]byte{delims[0]}) for i, arg1 := range a.Inner { if arg1 != nil && IsPad(arg1.Type()) { continue } if i != 0 { fmt.Fprintf(buf, ", ") } serialize(arg1, buf, vars, varSeq) } buf.Write([]byte{delims[1]}) case *UnionArg: fmt.Fprintf(buf, "@%v=", a.OptionType.FieldName()) serialize(a.Option, buf, vars, varSeq) case *ResultArg: if a.Res == nil { fmt.Fprintf(buf, "0x%x", a.Val) break } id, ok := vars[a.Res] if !ok { panic("no result") } fmt.Fprintf(buf, "r%v", id) if a.OpDiv != 0 { fmt.Fprintf(buf, "/%v", a.OpDiv) } if a.OpAdd != 0 { fmt.Fprintf(buf, "+%v", a.OpAdd) } default: panic("unknown arg kind") } } func (target *Target) Deserialize(data []byte) (prog *Prog, err error) { prog = &Prog{ Target: target, } p := newParser(data) vars := make(map[string]Arg) for p.Scan() { if p.EOF() || p.Char() == '#' { continue } name := p.Ident() r := "" if p.Char() == '=' { r = name p.Parse('=') name = p.Ident() } meta := target.SyscallMap[name] if meta == nil { return nil, fmt.Errorf("unknown syscall %v", name) } c := &Call{ Meta: meta, Ret: MakeReturnArg(meta.Ret), } prog.Calls = append(prog.Calls, c) p.Parse('(') for i := 0; p.Char() != ')'; i++ { if i >= len(meta.Args) { return nil, fmt.Errorf("wrong call arg count: %v, want %v", i+1, len(meta.Args)) } typ := meta.Args[i] if IsPad(typ) { return nil, fmt.Errorf("padding in syscall %v arguments", name) } arg, err := target.parseArg(typ, p, vars) if err != nil { return nil, err } c.Args = append(c.Args, arg) if p.Char() != ')' { p.Parse(',') } } p.Parse(')') if !p.EOF() { return nil, fmt.Errorf("tailing data (line #%v)", p.l) } if len(c.Args) < len(meta.Args) { for i := len(c.Args); i < len(meta.Args); i++ { c.Args = append(c.Args, defaultArg(meta.Args[i])) } } if len(c.Args) != len(meta.Args) { return nil, fmt.Errorf("wrong call arg count: %v, want %v", len(c.Args), len(meta.Args)) } if r != "" { vars[r] = c.Ret } } if err := p.Err(); err != nil { return nil, err } // This validation is done even in non-debug mode because deserialization // procedure does not catch all bugs (e.g. mismatched types). // And we can receive bad programs from corpus and hub. if err := prog.validate(); err != nil { return nil, err } return } func (target *Target) parseArg(typ Type, p *parser, vars map[string]Arg) (Arg, error) { r := "" if p.Char() == '<' { p.Parse('<') r = p.Ident() p.Parse('=') p.Parse('>') } var arg Arg switch p.Char() { case '0': val := p.Ident() v, err := strconv.ParseUint(val, 0, 64) if err != nil { return nil, fmt.Errorf("wrong arg value '%v': %v", val, err) } switch typ.(type) { case *ConstType, *IntType, *FlagsType, *ProcType, *LenType, *CsumType: arg = MakeConstArg(typ, v) case *ResourceType: arg = MakeResultArg(typ, nil, v) case *PtrType: arg = MakePointerArg(typ, 0, 0, 0, nil) case *VmaType: arg = MakePointerArg(typ, 0, 0, 0, nil) default: return nil, fmt.Errorf("bad const type %+v", typ) } case 'r': id := p.Ident() v, ok := vars[id] if !ok || v == nil { return nil, fmt.Errorf("result %v references unknown variable (vars=%+v)", id, vars) } if _, ok := v.(ArgUsed); !ok { // TODO(dvyukov): this happens during loading of old programs. // Figure out when exactly this happens and if it is repairable. return nil, fmt.Errorf("result %v references vad type %#v", id, v) } arg = MakeResultArg(typ, v, 0) if p.Char() == '/' { p.Parse('/') op := p.Ident() v, err := strconv.ParseUint(op, 0, 64) if err != nil { return nil, fmt.Errorf("wrong result div op: '%v'", op) } arg.(*ResultArg).OpDiv = v } if p.Char() == '+' { p.Parse('+') op := p.Ident() v, err := strconv.ParseUint(op, 0, 64) if err != nil { return nil, fmt.Errorf("wrong result add op: '%v'", op) } arg.(*ResultArg).OpAdd = v } case '&': var typ1 Type switch t1 := typ.(type) { case *PtrType: typ1 = t1.Type case *VmaType: default: return nil, fmt.Errorf("& arg is not a pointer: %#v", typ) } p.Parse('&') page, off, size, err := parseAddr(p, true) if err != nil { return nil, err } p.Parse('=') inner, err := target.parseArg(typ1, p, vars) if err != nil { return nil, err } arg = MakePointerArg(typ, page, off, size, inner) case '(': // This used to parse length of VmaType and return ArgPageSize, which is now removed. // Leaving this for now for backwards compatibility. pages, _, _, err := parseAddr(p, false) if err != nil { return nil, err } arg = MakeConstArg(typ, pages*target.PageSize) case '"', '\'': data, err := deserializeData(p) if err != nil { return nil, err } size := ^uint64(0) if p.Char() == '/' { p.Parse('/') sizeStr := p.Ident() size, err = strconv.ParseUint(sizeStr, 0, 64) if err != nil { return nil, fmt.Errorf("failed to parse buffer size: %q", sizeStr) } } if !typ.Varlen() { size = typ.Size() } else if size == ^uint64(0) { size = uint64(len(data)) } if typ.Dir() == DirOut { arg = MakeOutDataArg(typ, size) } else { if diff := int(size) - len(data); diff > 0 { data = append(data, make([]byte, diff)...) } data = data[:size] arg = MakeDataArg(typ, data) } case '{': t1, ok := typ.(*StructType) if !ok { return nil, fmt.Errorf("'{' arg is not a struct: %#v", typ) } p.Parse('{') var inner []Arg for i := 0; p.Char() != '}'; i++ { if i >= len(t1.Fields) { return nil, fmt.Errorf("wrong struct arg count: %v, want %v", i+1, len(t1.Fields)) } fld := t1.Fields[i] if IsPad(fld) { inner = append(inner, MakeConstArg(fld, 0)) } else { arg, err := target.parseArg(fld, p, vars) if err != nil { return nil, err } inner = append(inner, arg) if p.Char() != '}' { p.Parse(',') } } } p.Parse('}') for len(inner) < len(t1.Fields) { inner = append(inner, defaultArg(t1.Fields[len(inner)])) } arg = MakeGroupArg(typ, inner) case '[': t1, ok := typ.(*ArrayType) if !ok { return nil, fmt.Errorf("'[' arg is not an array: %#v", typ) } p.Parse('[') var inner []Arg for i := 0; p.Char() != ']'; i++ { arg, err := target.parseArg(t1.Type, p, vars) if err != nil { return nil, err } inner = append(inner, arg) if p.Char() != ']' { p.Parse(',') } } p.Parse(']') if t1.Kind == ArrayRangeLen && t1.RangeBegin == t1.RangeEnd { for uint64(len(inner)) < t1.RangeBegin { inner = append(inner, defaultArg(t1.Type)) } inner = inner[:t1.RangeBegin] } arg = MakeGroupArg(typ, inner) case '@': t1, ok := typ.(*UnionType) if !ok { return nil, fmt.Errorf("'@' arg is not a union: %#v", typ) } p.Parse('@') name := p.Ident() p.Parse('=') var optType Type for _, t2 := range t1.Fields { if name == t2.FieldName() { optType = t2 break } } if optType == nil { return nil, fmt.Errorf("union arg %v has unknown option: %v", typ.Name(), name) } opt, err := target.parseArg(optType, p, vars) if err != nil { return nil, err } arg = MakeUnionArg(typ, opt, optType) case 'n': p.Parse('n') p.Parse('i') p.Parse('l') if r != "" { return nil, fmt.Errorf("named nil argument") } default: return nil, fmt.Errorf("failed to parse argument at %v (line #%v/%v: %v)", int(p.Char()), p.l, p.i, p.s) } if r != "" { vars[r] = arg } return arg, nil } const ( encodingAddrBase = 0x7f0000000000 encodingPageSize = 4 << 10 maxLineLen = 256 << 10 ) func serializeAddr(arg Arg) string { var pageIndex, pagesNum uint64 var pageOffset int switch a := arg.(type) { case *PointerArg: pageIndex = a.PageIndex pageOffset = a.PageOffset pagesNum = a.PagesNum default: panic("bad addr arg") } page := pageIndex * encodingPageSize page += encodingAddrBase soff := "" if off := pageOffset; off != 0 { sign := "+" if off < 0 { sign = "-" off = -off page += encodingPageSize } soff = fmt.Sprintf("%v0x%x", sign, off) } ssize := "" if size := pagesNum; size != 0 { size *= encodingPageSize ssize = fmt.Sprintf("/0x%x", size) } return fmt.Sprintf("(0x%x%v%v)", page, soff, ssize) } func parseAddr(p *parser, base bool) (uint64, int, uint64, error) { p.Parse('(') pstr := p.Ident() page, err := strconv.ParseUint(pstr, 0, 64) if err != nil { return 0, 0, 0, fmt.Errorf("failed to parse addr page: '%v'", pstr) } if page%encodingPageSize != 0 { return 0, 0, 0, fmt.Errorf("address base is not page size aligned: '%v'", pstr) } if base { if page < encodingAddrBase { return 0, 0, 0, fmt.Errorf("address without base offset: '%v'", pstr) } page -= encodingAddrBase } var off int64 if p.Char() == '+' || p.Char() == '-' { minus := false if p.Char() == '-' { minus = true p.Parse('-') } else { p.Parse('+') } ostr := p.Ident() off, err = strconv.ParseInt(ostr, 0, 64) if err != nil { return 0, 0, 0, fmt.Errorf("failed to parse addr offset: '%v'", ostr) } if minus { page -= encodingPageSize off = -off } } var size uint64 if p.Char() == '/' { p.Parse('/') pstr := p.Ident() size, err = strconv.ParseUint(pstr, 0, 64) if err != nil { return 0, 0, 0, fmt.Errorf("failed to parse addr size: '%v'", pstr) } } p.Parse(')') page /= encodingPageSize size /= encodingPageSize return page, int(off), size, nil } func serializeData(buf *bytes.Buffer, data []byte) { readable := true for _, v := range data { if v >= 0x20 && v < 0x7f { continue } switch v { case 0, '\a', '\b', '\f', '\n', '\r', '\t', '\v': continue } readable = false break } if !readable || len(data) == 0 { fmt.Fprintf(buf, "\"%v\"", hex.EncodeToString(data)) return } buf.WriteByte('\'') for _, v := range data { switch v { case 0: buf.Write([]byte{'\\', 'x', '0', '0'}) case '\a': buf.Write([]byte{'\\', 'a'}) case '\b': buf.Write([]byte{'\\', 'b'}) case '\f': buf.Write([]byte{'\\', 'f'}) case '\n': buf.Write([]byte{'\\', 'n'}) case '\r': buf.Write([]byte{'\\', 'r'}) case '\t': buf.Write([]byte{'\\', 't'}) case '\v': buf.Write([]byte{'\\', 'v'}) case '\'': buf.Write([]byte{'\\', '\''}) case '\\': buf.Write([]byte{'\\', '\\'}) default: buf.WriteByte(v) } } buf.WriteByte('\'') } func deserializeData(p *parser) ([]byte, error) { var data []byte if p.Char() == '"' { p.Parse('"') val := "" if p.Char() != '"' { val = p.Ident() } p.Parse('"') var err error data, err = hex.DecodeString(val) if err != nil { return nil, fmt.Errorf("data arg has bad value %q", val) } } else { if p.consume() != '\'' { return nil, fmt.Errorf("data arg does not start with \" nor with '") } for p.Char() != '\'' && p.Char() != 0 { v := p.consume() if v != '\\' { data = append(data, v) continue } v = p.consume() switch v { case 'x': hi := p.consume() lo := p.consume() if lo != '0' || hi != '0' { return nil, fmt.Errorf( "invalid \\x%c%c escape sequence in data arg", hi, lo) } data = append(data, 0) case 'a': data = append(data, '\a') case 'b': data = append(data, '\b') case 'f': data = append(data, '\f') case 'n': data = append(data, '\n') case 'r': data = append(data, '\r') case 't': data = append(data, '\t') case 'v': data = append(data, '\v') case '\'': data = append(data, '\'') case '\\': data = append(data, '\\') default: return nil, fmt.Errorf("invalid \\%c escape sequence in data arg", v) } } p.Parse('\'') } return data, nil } type parser struct { r *bufio.Scanner s string i int l int e error } func newParser(data []byte) *parser { p := &parser{r: bufio.NewScanner(bytes.NewReader(data))} p.r.Buffer(nil, maxLineLen) return p } func (p *parser) Scan() bool { if p.e != nil { return false } if !p.r.Scan() { p.e = p.r.Err() return false } p.s = p.r.Text() p.i = 0 p.l++ return true } func (p *parser) Err() error { return p.e } func (p *parser) Str() string { return p.s } func (p *parser) EOF() bool { return p.i == len(p.s) } func (p *parser) Char() byte { if p.e != nil { return 0 } if p.EOF() { p.failf("unexpected eof") return 0 } return p.s[p.i] } func (p *parser) Parse(ch byte) { if p.e != nil { return } if p.EOF() { p.failf("want %s, got EOF", string(ch)) return } if p.s[p.i] != ch { p.failf("want '%v', got '%v'", string(ch), string(p.s[p.i])) return } p.i++ p.SkipWs() } func (p *parser) consume() byte { if p.e != nil { return 0 } if p.EOF() { p.failf("unexpected eof") return 0 } v := p.s[p.i] p.i++ return v } func (p *parser) SkipWs() { for p.i < len(p.s) && (p.s[p.i] == ' ' || p.s[p.i] == '\t') { p.i++ } } func (p *parser) Ident() string { i := p.i for p.i < len(p.s) && (p.s[p.i] >= 'a' && p.s[p.i] <= 'z' || p.s[p.i] >= 'A' && p.s[p.i] <= 'Z' || p.s[p.i] >= '0' && p.s[p.i] <= '9' || p.s[p.i] == '_' || p.s[p.i] == '$') { p.i++ } if i == p.i { p.failf("failed to parse identifier at pos %v", i) return "" } if ch := p.s[i]; ch >= '0' && ch <= '9' { } s := p.s[i:p.i] p.SkipWs() return s } func (p *parser) failf(msg string, args ...interface{}) { p.e = fmt.Errorf("%v\nline #%v: %v", fmt.Sprintf(msg, args...), p.l, p.s) } // CallSet returns a set of all calls in the program. // It does very conservative parsing and is intended to parse paste/future serialization formats. func CallSet(data []byte) (map[string]struct{}, error) { calls := make(map[string]struct{}) s := bufio.NewScanner(bytes.NewReader(data)) s.Buffer(nil, maxLineLen) for s.Scan() { ln := s.Bytes() if len(ln) == 0 || ln[0] == '#' { continue } bracket := bytes.IndexByte(ln, '(') if bracket == -1 { return nil, fmt.Errorf("line does not contain opening bracket") } call := ln[:bracket] if eq := bytes.IndexByte(call, '='); eq != -1 { eq++ for eq < len(call) && call[eq] == ' ' { eq++ } call = call[eq:] } if len(call) == 0 { return nil, fmt.Errorf("call name is empty") } calls[string(call)] = struct{}{} } if err := s.Err(); err != nil { return nil, err } if len(calls) == 0 { return nil, fmt.Errorf("program does not contain any calls") } return calls, nil }