From 0d0fbbe73f5b02bfeac0aedd0b6b9e8417ab0b0f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 26 Aug 2016 07:09:25 +0200 Subject: overhaul syscall description generation process This splits generation process into two phases: 1. Extract values of constants from linux kernel sources. 2. Generate Go code. Constant values are checked in. The advantage is that the second phase is now completely independent from linux source files, kernel version, presence of headers for particular drivers, etc. This allows to change what Go code we generate any time without access to all kernel headers (which in future won't be limited to only upstream headers). Constant extraction process does require proper kernel sources, but this can be done only once by the person who added the driver and has access to the required sources. Then the constant values are checked in for others to use. Consant extraction process is per-file/per-arch. That is, if I am adding a driver that is not present upstream and that works only on a single arch, I will check in constants only for that driver and for that arch. --- sysparser/lexer.go | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++ sysparser/parser.go | 101 ++++++++++++++++++++++++ 2 files changed, 321 insertions(+) create mode 100644 sysparser/lexer.go create mode 100644 sysparser/parser.go (limited to 'sysparser') diff --git a/sysparser/lexer.go b/sysparser/lexer.go new file mode 100644 index 000000000..09fd3a911 --- /dev/null +++ b/sysparser/lexer.go @@ -0,0 +1,220 @@ +// Copyright 2015/2016 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sysparser + +import ( + "fmt" + "io" + "os" + "sort" + "strings" +) + +type Syscall struct { + Name string + CallName string + Args [][]string + Ret []string +} + +type Struct struct { + Name string + Flds [][]string + IsUnion bool + Packed bool + Varlen bool + Align int +} + +func Parse(in io.Reader) (includes []string, defines map[string]string, syscalls []Syscall, structs map[string]Struct, unnamed map[string][]string, flags map[string][]string) { + p := newParser(in) + defines = make(map[string]string) + structs = make(map[string]Struct) + unnamed = make(map[string][]string) + flags = make(map[string][]string) + var str *Struct + for p.Scan() { + if p.EOF() || p.Char() == '#' { + continue + } + if str != nil { + // Parsing a struct. + if p.Char() == '}' || p.Char() == ']' { + p.Parse(p.Char()) + for _, attr := range parseType1(p, unnamed, flags, "")[1:] { + if str.IsUnion { + switch attr { + case "varlen": + str.Varlen = true + default: + failf("unknown union %v attribute: %v", str.Name, attr) + } + } else { + switch attr { + case "packed": + str.Packed = true + case "align_1": + str.Align = 1 + case "align_2": + str.Align = 2 + case "align_4": + str.Align = 4 + case "align_8": + str.Align = 8 + default: + failf("unknown struct %v attribute: %v", str.Name, attr) + } + } + } + structs[str.Name] = *str + str = nil + } else { + p.SkipWs() + fld := []string{p.Ident()} + fld = append(fld, parseType(p, unnamed, flags)...) + str.Flds = append(str.Flds, fld) + } + } else { + name := p.Ident() + if name == "include" { + p.Parse('<') + var include []byte + for { + ch := p.Char() + if ch == '>' { + break + } + p.Parse(ch) + include = append(include, ch) + } + p.Parse('>') + includes = append(includes, string(include)) + } else if name == "define" { + key := p.Ident() + var val []byte + for !p.EOF() { + ch := p.Char() + p.Parse(ch) + val = append(val, ch) + } + if defines[key] != "" { + failf("%v define is defined multiple times", key) + } + defines[key] = fmt.Sprintf("(%s)", val) + } else { + switch ch := p.Char(); ch { + case '(': + // syscall + p.Parse('(') + var args [][]string + for p.Char() != ')' { + arg := []string{p.Ident()} + arg = append(arg, parseType(p, unnamed, flags)...) + args = append(args, arg) + if p.Char() != ')' { + p.Parse(',') + } + } + p.Parse(')') + var ret []string + if !p.EOF() { + ret = parseType(p, unnamed, flags) + } + callName := name + if idx := strings.IndexByte(callName, '$'); idx != -1 { + callName = callName[:idx] + } + syscalls = append(syscalls, Syscall{name, callName, args, ret}) + case '=': + // flag + p.Parse('=') + vals := []string{p.Ident()} + for !p.EOF() { + p.Parse(',') + vals = append(vals, p.Ident()) + } + flags[name] = vals + case '{', '[': + p.Parse(ch) + if _, ok := structs[name]; ok { + failf("%v struct is defined multiple times", name) + } + str = &Struct{Name: name, IsUnion: ch == '['} + default: + failf("bad line (%v)", p.Str()) + } + } + } + if !p.EOF() { + failf("trailing data (%v)", p.Str()) + } + } + sort.Sort(syscallArray(syscalls)) + return +} + +func isIdentifier(s string) bool { + for i, c := range s { + if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || i > 0 && (c >= '0' && c <= '9') { + continue + } + return false + } + return true +} + +func parseType(p *parser, unnamed map[string][]string, flags map[string][]string) []string { + return parseType1(p, unnamed, flags, p.Ident()) +} + +var ( + unnamedSeq int + constSeq int +) + +func parseType1(p *parser, unnamed map[string][]string, flags map[string][]string, name string) []string { + typ := []string{name} + if !p.EOF() && p.Char() == '[' { + p.Parse('[') + for { + id := p.Ident() + if p.Char() == '[' { + inner := parseType1(p, unnamed, flags, id) + id = fmt.Sprintf("unnamed%v", unnamedSeq) + unnamedSeq++ + unnamed[id] = inner + } + typ = append(typ, id) + if p.Char() == ']' { + break + } + p.Parse(',') + } + p.Parse(']') + } + if name == "const" && len(typ) > 1 { + // Create a fake flag with the const value. + id := fmt.Sprintf("const_flag_%v", constSeq) + constSeq++ + flags[id] = typ[1:2] + } + if name == "array" && len(typ) > 2 { + // Create a fake flag with the const value. + id := fmt.Sprintf("const_flag_%v", constSeq) + constSeq++ + flags[id] = typ[2:3] + } + return typ +} + +type syscallArray []Syscall + +func (a syscallArray) Len() int { return len(a) } +func (a syscallArray) Less(i, j int) bool { return a[i].Name < a[j].Name } +func (a syscallArray) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func failf(msg string, args ...interface{}) { + fmt.Fprintf(os.Stderr, msg+"\n", args...) + os.Exit(1) +} diff --git a/sysparser/parser.go b/sysparser/parser.go new file mode 100644 index 000000000..590847b0e --- /dev/null +++ b/sysparser/parser.go @@ -0,0 +1,101 @@ +// Copyright 2015/2016 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sysparser + +import ( + "bufio" + "fmt" + "io" + "os" +) + +type parser struct { + r *bufio.Scanner + s string + i int + l int +} + +func newParser(r io.Reader) *parser { + return &parser{r: bufio.NewScanner(r)} +} + +func (p *parser) Scan() bool { + if !p.r.Scan() { + if err := p.r.Err(); err != nil { + failf("failed to read input file: %v", err) + } + return false + } + p.s = p.r.Text() + p.i = 0 + p.l++ + return true +} + +func (p *parser) Str() string { + return p.s +} + +func (p *parser) EOF() bool { + return p.i == len(p.s) +} + +func (p *parser) Char() byte { + if p.EOF() { + p.failf("unexpected eof") + } + return p.s[p.i] +} + +func (p *parser) Parse(ch byte) { + if p.EOF() { + p.failf("want %s, got EOF", string(ch)) + } + if p.s[p.i] != ch { + p.failf("want '%v', got '%v'", string(ch), string(p.s[p.i])) + } + p.i++ + p.SkipWs() +} + +func (p *parser) SkipWs() { + for p.i < len(p.s) && (p.s[p.i] == ' ' || p.s[p.i] == '\t') { + p.i++ + } +} + +func (p *parser) Ident() string { + start, end := p.i, 0 + if p.Char() == '"' { + p.Parse('"') + start++ + for p.Char() != '"' { + p.i++ + } + end = p.i + p.Parse('"') + } else { + for p.i < len(p.s) && + (p.s[p.i] >= 'a' && p.s[p.i] <= 'z' || + p.s[p.i] >= 'A' && p.s[p.i] <= 'Z' || + p.s[p.i] >= '0' && p.s[p.i] <= '9' || + p.s[p.i] == '_' || p.s[p.i] == '$' || // $ is for n-way syscalls (like ptrace$peek) + p.s[p.i] == '-' || p.s[p.i] == ':') { // : is for ranged int (like int32[-3:10]) + p.i++ + } + if start == p.i { + p.failf("failed to parse identifier at pos %v", start) + } + end = p.i + } + s := p.s[start:end] + p.SkipWs() + return s +} + +func (p *parser) failf(msg string, args ...interface{}) { + fmt.Fprintf(os.Stderr, "line #%v: %v\n", p.l, p.s) + failf(msg, args...) +} -- cgit mrf-deployment