From 18847f55bb3fe9db41e46a2e9e49a9f7c28143af Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 10 Feb 2020 14:45:20 +0100 Subject: pkg/ast: introduce hex-encoded string literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stringnozescapes does not make sense with filename, also we may need similar escaping for string flags. Handle escaped strings on ast level instead. This avoids introducing new type and works seamleassly with flags. As alternative I've also tried using strconv.Quote/Unquote but it leads to ugly half-escaped strings: "\xb0\x80s\xe8\xd4N\x91\xe3ڒ,\"C\x82D\xbb\x88\\i\xe2i\xc8\xe9\xd85\xb1\x14):M\xdcn" Make hex-encoded strings a separate string format instead. --- pkg/ast/ast.go | 9 ++++++ pkg/ast/clone.go | 2 ++ pkg/ast/format.go | 15 ++++++++-- pkg/ast/parser.go | 19 +++++++++++-- pkg/ast/scanner.go | 59 ++++++++++++++++++--------------------- pkg/ast/testdata/all.txt | 7 ++--- pkg/compiler/testdata/all.txt | 3 ++ pkg/compiler/testdata/errors2.txt | 1 + pkg/compiler/types.go | 21 ++++---------- 9 files changed, 79 insertions(+), 57 deletions(-) (limited to 'pkg') diff --git a/pkg/ast/ast.go b/pkg/ast/ast.go index 13ff2b40e..f1f74e61b 100644 --- a/pkg/ast/ast.go +++ b/pkg/ast/ast.go @@ -163,6 +163,7 @@ func (n *Ident) Info() (Pos, string, string) { type String struct { Pos Pos Value string + Fmt StrFmt } func (n *String) Info() (Pos, string, string) { @@ -178,6 +179,13 @@ const ( IntFmtChar ) +type StrFmt int + +const ( + StrFmtRaw StrFmt = iota + StrFmtHex +) + type Int struct { Pos Pos // Only one of Value, Ident, CExpr is filled. @@ -198,6 +206,7 @@ type Type struct { ValueFmt IntFmt Ident string String string + StringFmt StrFmt HasString bool // Parts after COLON (for ranges and bitfields). Colon []*Type diff --git a/pkg/ast/clone.go b/pkg/ast/clone.go index a594e41f6..54bf8250c 100644 --- a/pkg/ast/clone.go +++ b/pkg/ast/clone.go @@ -134,6 +134,7 @@ func (n *String) Clone() Node { return &String{ Pos: n.Pos, Value: n.Value, + Fmt: n.Fmt, } } @@ -154,6 +155,7 @@ func (n *Type) Clone() Node { ValueFmt: n.ValueFmt, Ident: n.Ident, String: n.String, + StringFmt: n.StringFmt, HasString: n.HasString, Colon: cloneTypes(n.Colon), Args: cloneTypes(n.Args), diff --git a/pkg/ast/format.go b/pkg/ast/format.go index c3d931706..a2ead06f2 100644 --- a/pkg/ast/format.go +++ b/pkg/ast/format.go @@ -50,6 +50,17 @@ func FormatInt(v uint64, format IntFmt) string { } } +func FormatStr(v string, format StrFmt) string { + switch format { + case StrFmtRaw: + return fmt.Sprintf(`"%v"`, v) + case StrFmtHex: + return fmt.Sprintf("`%x`", v) + default: + panic(fmt.Sprintf("unknown str format %v", format)) + } +} + type serializer interface { serialize(w io.Writer) } @@ -153,7 +164,7 @@ func (flags *IntFlags) serialize(w io.Writer) { func (flags *StrFlags) serialize(w io.Writer) { fmt.Fprintf(w, "%v = ", flags.Name.Name) for i, v := range flags.Values { - fmt.Fprintf(w, "%v\"%v\"", comma(i, ""), v.Value) + fmt.Fprintf(w, "%v%v", comma(i, ""), FormatStr(v.Value, v.Fmt)) } fmt.Fprintf(w, "\n") } @@ -172,7 +183,7 @@ func fmtType(t *Type) string { case t.Ident != "": v = t.Ident case t.HasString: - v = fmt.Sprintf("\"%v\"", t.String) + v = FormatStr(t.String, t.StringFmt) default: v = FormatInt(t.Value, t.ValueFmt) } diff --git a/pkg/ast/parser.go b/pkg/ast/parser.go index b8d22fd88..7b46f6611 100644 --- a/pkg/ast/parser.go +++ b/pkg/ast/parser.go @@ -314,7 +314,7 @@ func (p *parser) parseFlags(name *Ident) Node { switch p.tok { case tokInt, tokIdent: return p.parseIntFlags(name) - case tokString: + case tokString, tokStringHex: return p.parseStrFlags(name) default: p.expect(tokInt, tokIdent, tokString) @@ -417,9 +417,10 @@ func (p *parser) parseType() *Type { case tokIdent: allowColon = true arg.Ident = p.lit - case tokString: + case tokString, tokStringHex: arg.String = p.lit arg.HasString = true + arg.StringFmt = strTokToFmt(p.tok) default: p.expect(tokInt, tokIdent, tokString) } @@ -468,15 +469,27 @@ func (p *parser) parseIdent() *Ident { } func (p *parser) parseString() *String { - p.expect(tokString) + p.expect(tokString, tokStringHex) str := &String{ Pos: p.pos, Value: p.lit, + Fmt: strTokToFmt(p.tok), } p.next() return str } +func strTokToFmt(tok token) StrFmt { + switch tok { + case tokString: + return StrFmtRaw + case tokStringHex: + return StrFmtHex + default: + panic("bad string token") + } +} + func (p *parser) parseInt() *Int { i := &Int{ Pos: p.pos, diff --git a/pkg/ast/scanner.go b/pkg/ast/scanner.go index a9448b34f..3a6ba9d98 100644 --- a/pkg/ast/scanner.go +++ b/pkg/ast/scanner.go @@ -4,6 +4,7 @@ package ast import ( + "encoding/hex" "fmt" "os" "strconv" @@ -20,6 +21,7 @@ const ( tokDefine tokResource tokString + tokStringHex tokCExpr tokInt @@ -51,18 +53,19 @@ var punctuation = [256]token{ } var tok2str = [...]string{ - tokIllegal: "ILLEGAL", - tokComment: "comment", - tokIdent: "identifier", - tokInclude: "include", - tokIncdir: "incdir", - tokDefine: "define", - tokResource: "resource", - tokString: "string", - tokCExpr: "CEXPR", - tokInt: "int", - tokNewLine: "NEWLINE", - tokEOF: "EOF", + tokIllegal: "ILLEGAL", + tokComment: "comment", + tokIdent: "identifier", + tokInclude: "include", + tokIncdir: "incdir", + tokDefine: "define", + tokResource: "resource", + tokString: "string", + tokStringHex: "hex string", + tokCExpr: "CEXPR", + tokInt: "int", + tokNewLine: "NEWLINE", + tokEOF: "EOF", } func init() { @@ -132,12 +135,7 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) { case s.ch == 0: tok = tokEOF s.next() - case s.ch == '`': - tok = tokCExpr - lit = s.scanCExpr(pos) case s.prev2 == tokDefine && s.prev1 == tokIdent: - // Note: the old form for C expressions, not really lexable. - // TODO(dvyukov): get rid of this eventually. tok = tokCExpr for ; s.ch != '\n'; s.next() { } @@ -150,6 +148,9 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) { case s.ch == '"' || s.ch == '<': tok = tokString lit = s.scanStr(pos) + case s.ch == '`': + tok = tokStringHex + lit = s.scanStr(pos) case s.ch >= '0' && s.ch <= '9' || s.ch == '-': tok = tokInt lit = s.scanInt(pos) @@ -170,21 +171,9 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) { return } -func (s *scanner) scanCExpr(pos Pos) string { - for s.next(); s.ch != '`' && s.ch != '\n'; s.next() { - } - if s.ch == '\n' { - s.Error(pos, "C expression is not terminated") - return "" - } - lit := string(s.data[pos.Off+1 : s.off]) - s.next() - return lit -} - func (s *scanner) scanStr(pos Pos) string { // TODO(dvyukov): get rid of <...> strings, that's only includes - closing := byte('"') + closing := s.ch if s.ch == '<' { closing = '>' } @@ -196,7 +185,6 @@ func (s *scanner) scanStr(pos Pos) string { } lit := string(s.data[pos.Off+1 : s.off]) for i := 0; i < len(lit); i++ { - //lit[i] if lit[i] < 0x20 || lit[i] >= 0x80 { pos1 := pos pos1.Col += i + 1 @@ -206,7 +194,14 @@ func (s *scanner) scanStr(pos Pos) string { } } s.next() - return lit + if closing != '`' { + return lit + } + decoded, err := hex.DecodeString(lit) + if err != nil { + s.Error(pos, "bad hex string literal: %v", err) + } + return string(decoded) } func (s *scanner) scanInt(pos Pos) string { diff --git a/pkg/ast/testdata/all.txt b/pkg/ast/testdata/all.txt index 392796254..122fa514b 100644 --- a/pkg/ast/testdata/all.txt +++ b/pkg/ast/testdata/all.txt @@ -17,15 +17,14 @@ int_flags4 = 1, -2- ### bad integer "-2-" str_flags0 = "foo", "bar" str_flags1 = "non terminated ### string literal is not terminated str_flags2 = "bad chars здесь" ### illegal character U+00D0 'Ð' in string literal -str_flags3 = "string", not a string ### unexpected identifier, expecting string -str_flags4 = "string", 42 ### unexpected int, expecting string +str_flags3 = "string", not a string ### unexpected identifier, expecting string, hex string +str_flags4 = "string", 42 ### unexpected int, expecting string, hex string call(foo ,int32 , bar int32) ### unexpected ',', expecting int, identifier, string call(foo int32:"bar") ### unexpected string, expecting int, identifier call(a int32, b len[a:"bar"]) ### unexpected string, expecting int, identifier -define FOO `bar` -define FOO `bar ### C expression is not terminated +define FOO bar foo(x int32[1:2:3, opt]) foo2(x int32[1[2]:2]) ### unexpected ':', expecting ']' diff --git a/pkg/compiler/testdata/all.txt b/pkg/compiler/testdata/all.txt index 9d94a81a1..789071964 100644 --- a/pkg/compiler/testdata/all.txt +++ b/pkg/compiler/testdata/all.txt @@ -40,10 +40,13 @@ strings { f11 stringnoz[string_flags1] f12 string[string_flags2] f13 stringnoz[string_flags2] + f14 string[`abcdef`, 4] + f15 string[string_flags3, 4] } [packed] string_flags1 = "foo", "barbaz" string_flags2 = "" +string_flags3 = "ab", `010203`, `de` int_flags = 0, 1, 0xabc, 'x', -11 _ = 1, 2 _ = C1, C2 diff --git a/pkg/compiler/testdata/errors2.txt b/pkg/compiler/testdata/errors2.txt index 47d76ab89..b5ab19ebf 100644 --- a/pkg/compiler/testdata/errors2.txt +++ b/pkg/compiler/testdata/errors2.txt @@ -267,6 +267,7 @@ foo$525(a int8[-256:256]) ### int range [18446744073709551360:256] is too large foo$526(a int8[-255:255]) ### int range [18446744073709551361:255] is too large for base type of size 8 foo$527(a int16[-40000:40000]) ### int range [18446744073709511616:40000] is too large for base type of size 16 foo$528(a ptr[in, s405]) +foo$529(a ptr[in, string[`abcdde`, 3]]) ### string value "\xab\xcd\xde\x00" exceeds buffer length 3 type type500 proc[C1, 8, int8] ### values starting from 1 with step 8 overflow base type for 32 procs type type501 int8 ### unused type type501 diff --git a/pkg/compiler/types.go b/pkg/compiler/types.go index 251e0fcaa..43efee202 100644 --- a/pkg/compiler/types.go +++ b/pkg/compiler/types.go @@ -514,12 +514,11 @@ func genTextType(t *ast.Type) prog.TextKind { } const ( - stringnoz = "stringnoz" - stringnozescapes = "stringnozescapes" + stringnoz = "stringnoz" ) var typeString = &typeDesc{ - Names: []string{"string", stringnoz, stringnozescapes}, + Names: []string{"string", stringnoz}, CanBeTypedef: true, OptArgs: 2, Args: []namedArg{ @@ -527,7 +526,7 @@ var typeString = &typeDesc{ {Name: "size", Type: typeArgInt}, }, Check: func(comp *compiler, t *ast.Type, args []*ast.Type, base prog.IntTypeCommon) { - if (t.Ident == stringnoz || t.Ident == stringnozescapes) && len(args) > 1 { + if t.Ident == stringnoz && len(args) > 1 { comp.error(args[0].Pos, "fixed-size string can't be non-zero-terminated") } }, @@ -559,7 +558,7 @@ var typeString = &typeDesc{ return &prog.BufferType{ TypeCommon: base.TypeCommon, Kind: prog.BufferFilename, - NoZ: t.Ident == stringnoz || t.Ident == stringnozescapes, + NoZ: t.Ident == stringnoz, } } subkind := "" @@ -576,7 +575,7 @@ var typeString = &typeDesc{ Kind: prog.BufferString, SubKind: subkind, Values: vals, - NoZ: t.Ident == stringnoz || t.Ident == stringnozescapes, + NoZ: t.Ident == stringnoz, } }, } @@ -592,16 +591,6 @@ func (comp *compiler) genStrings(t *ast.Type, args []*ast.Type) []string { } if t.Ident == stringnoz { return vals - } else if t.Ident == stringnozescapes { - for i := range vals { - unquote, err := strconv.Unquote(`"` + vals[i] + `"`) - if err != nil { - comp.error(args[0].Pos, fmt.Sprintf("unable to unquote stringnozescapes %q: %v", vals[i], err)) - } else { - vals[i] = unquote - } - } - return vals } var size uint64 if len(args) > 1 { -- cgit mrf-deployment