aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2020-02-10 14:45:20 +0100
committerDmitry Vyukov <dvyukov@google.com>2020-02-10 14:45:20 +0100
commit18847f55bb3fe9db41e46a2e9e49a9f7c28143af (patch)
treee181c33c75f5851a5042290ff59dd44cce353c66 /pkg
parentd0da558cb16f3fb7cf26f20d24ee89eeb49a6f30 (diff)
pkg/ast: introduce hex-encoded string literals
The stringnozescapes does not make sense with filename, also we may need similar escaping for string flags. Handle escaped strings on ast level instead. This avoids introducing new type and works seamleassly with flags. As alternative I've also tried using strconv.Quote/Unquote but it leads to ugly half-escaped strings: "\xb0\x80s\xe8\xd4N\x91\xe3ڒ,\"C\x82D\xbb\x88\\i\xe2i\xc8\xe9\xd85\xb1\x14):M\xdcn" Make hex-encoded strings a separate string format instead.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/ast/ast.go9
-rw-r--r--pkg/ast/clone.go2
-rw-r--r--pkg/ast/format.go15
-rw-r--r--pkg/ast/parser.go19
-rw-r--r--pkg/ast/scanner.go59
-rw-r--r--pkg/ast/testdata/all.txt7
-rw-r--r--pkg/compiler/testdata/all.txt3
-rw-r--r--pkg/compiler/testdata/errors2.txt1
-rw-r--r--pkg/compiler/types.go21
9 files changed, 79 insertions, 57 deletions
diff --git a/pkg/ast/ast.go b/pkg/ast/ast.go
index 13ff2b40e..f1f74e61b 100644
--- a/pkg/ast/ast.go
+++ b/pkg/ast/ast.go
@@ -163,6 +163,7 @@ func (n *Ident) Info() (Pos, string, string) {
type String struct {
Pos Pos
Value string
+ Fmt StrFmt
}
func (n *String) Info() (Pos, string, string) {
@@ -178,6 +179,13 @@ const (
IntFmtChar
)
+type StrFmt int
+
+const (
+ StrFmtRaw StrFmt = iota
+ StrFmtHex
+)
+
type Int struct {
Pos Pos
// Only one of Value, Ident, CExpr is filled.
@@ -198,6 +206,7 @@ type Type struct {
ValueFmt IntFmt
Ident string
String string
+ StringFmt StrFmt
HasString bool
// Parts after COLON (for ranges and bitfields).
Colon []*Type
diff --git a/pkg/ast/clone.go b/pkg/ast/clone.go
index a594e41f6..54bf8250c 100644
--- a/pkg/ast/clone.go
+++ b/pkg/ast/clone.go
@@ -134,6 +134,7 @@ func (n *String) Clone() Node {
return &String{
Pos: n.Pos,
Value: n.Value,
+ Fmt: n.Fmt,
}
}
@@ -154,6 +155,7 @@ func (n *Type) Clone() Node {
ValueFmt: n.ValueFmt,
Ident: n.Ident,
String: n.String,
+ StringFmt: n.StringFmt,
HasString: n.HasString,
Colon: cloneTypes(n.Colon),
Args: cloneTypes(n.Args),
diff --git a/pkg/ast/format.go b/pkg/ast/format.go
index c3d931706..a2ead06f2 100644
--- a/pkg/ast/format.go
+++ b/pkg/ast/format.go
@@ -50,6 +50,17 @@ func FormatInt(v uint64, format IntFmt) string {
}
}
+func FormatStr(v string, format StrFmt) string {
+ switch format {
+ case StrFmtRaw:
+ return fmt.Sprintf(`"%v"`, v)
+ case StrFmtHex:
+ return fmt.Sprintf("`%x`", v)
+ default:
+ panic(fmt.Sprintf("unknown str format %v", format))
+ }
+}
+
type serializer interface {
serialize(w io.Writer)
}
@@ -153,7 +164,7 @@ func (flags *IntFlags) serialize(w io.Writer) {
func (flags *StrFlags) serialize(w io.Writer) {
fmt.Fprintf(w, "%v = ", flags.Name.Name)
for i, v := range flags.Values {
- fmt.Fprintf(w, "%v\"%v\"", comma(i, ""), v.Value)
+ fmt.Fprintf(w, "%v%v", comma(i, ""), FormatStr(v.Value, v.Fmt))
}
fmt.Fprintf(w, "\n")
}
@@ -172,7 +183,7 @@ func fmtType(t *Type) string {
case t.Ident != "":
v = t.Ident
case t.HasString:
- v = fmt.Sprintf("\"%v\"", t.String)
+ v = FormatStr(t.String, t.StringFmt)
default:
v = FormatInt(t.Value, t.ValueFmt)
}
diff --git a/pkg/ast/parser.go b/pkg/ast/parser.go
index b8d22fd88..7b46f6611 100644
--- a/pkg/ast/parser.go
+++ b/pkg/ast/parser.go
@@ -314,7 +314,7 @@ func (p *parser) parseFlags(name *Ident) Node {
switch p.tok {
case tokInt, tokIdent:
return p.parseIntFlags(name)
- case tokString:
+ case tokString, tokStringHex:
return p.parseStrFlags(name)
default:
p.expect(tokInt, tokIdent, tokString)
@@ -417,9 +417,10 @@ func (p *parser) parseType() *Type {
case tokIdent:
allowColon = true
arg.Ident = p.lit
- case tokString:
+ case tokString, tokStringHex:
arg.String = p.lit
arg.HasString = true
+ arg.StringFmt = strTokToFmt(p.tok)
default:
p.expect(tokInt, tokIdent, tokString)
}
@@ -468,15 +469,27 @@ func (p *parser) parseIdent() *Ident {
}
func (p *parser) parseString() *String {
- p.expect(tokString)
+ p.expect(tokString, tokStringHex)
str := &String{
Pos: p.pos,
Value: p.lit,
+ Fmt: strTokToFmt(p.tok),
}
p.next()
return str
}
+func strTokToFmt(tok token) StrFmt {
+ switch tok {
+ case tokString:
+ return StrFmtRaw
+ case tokStringHex:
+ return StrFmtHex
+ default:
+ panic("bad string token")
+ }
+}
+
func (p *parser) parseInt() *Int {
i := &Int{
Pos: p.pos,
diff --git a/pkg/ast/scanner.go b/pkg/ast/scanner.go
index a9448b34f..3a6ba9d98 100644
--- a/pkg/ast/scanner.go
+++ b/pkg/ast/scanner.go
@@ -4,6 +4,7 @@
package ast
import (
+ "encoding/hex"
"fmt"
"os"
"strconv"
@@ -20,6 +21,7 @@ const (
tokDefine
tokResource
tokString
+ tokStringHex
tokCExpr
tokInt
@@ -51,18 +53,19 @@ var punctuation = [256]token{
}
var tok2str = [...]string{
- tokIllegal: "ILLEGAL",
- tokComment: "comment",
- tokIdent: "identifier",
- tokInclude: "include",
- tokIncdir: "incdir",
- tokDefine: "define",
- tokResource: "resource",
- tokString: "string",
- tokCExpr: "CEXPR",
- tokInt: "int",
- tokNewLine: "NEWLINE",
- tokEOF: "EOF",
+ tokIllegal: "ILLEGAL",
+ tokComment: "comment",
+ tokIdent: "identifier",
+ tokInclude: "include",
+ tokIncdir: "incdir",
+ tokDefine: "define",
+ tokResource: "resource",
+ tokString: "string",
+ tokStringHex: "hex string",
+ tokCExpr: "CEXPR",
+ tokInt: "int",
+ tokNewLine: "NEWLINE",
+ tokEOF: "EOF",
}
func init() {
@@ -132,12 +135,7 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) {
case s.ch == 0:
tok = tokEOF
s.next()
- case s.ch == '`':
- tok = tokCExpr
- lit = s.scanCExpr(pos)
case s.prev2 == tokDefine && s.prev1 == tokIdent:
- // Note: the old form for C expressions, not really lexable.
- // TODO(dvyukov): get rid of this eventually.
tok = tokCExpr
for ; s.ch != '\n'; s.next() {
}
@@ -150,6 +148,9 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) {
case s.ch == '"' || s.ch == '<':
tok = tokString
lit = s.scanStr(pos)
+ case s.ch == '`':
+ tok = tokStringHex
+ lit = s.scanStr(pos)
case s.ch >= '0' && s.ch <= '9' || s.ch == '-':
tok = tokInt
lit = s.scanInt(pos)
@@ -170,21 +171,9 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) {
return
}
-func (s *scanner) scanCExpr(pos Pos) string {
- for s.next(); s.ch != '`' && s.ch != '\n'; s.next() {
- }
- if s.ch == '\n' {
- s.Error(pos, "C expression is not terminated")
- return ""
- }
- lit := string(s.data[pos.Off+1 : s.off])
- s.next()
- return lit
-}
-
func (s *scanner) scanStr(pos Pos) string {
// TODO(dvyukov): get rid of <...> strings, that's only includes
- closing := byte('"')
+ closing := s.ch
if s.ch == '<' {
closing = '>'
}
@@ -196,7 +185,6 @@ func (s *scanner) scanStr(pos Pos) string {
}
lit := string(s.data[pos.Off+1 : s.off])
for i := 0; i < len(lit); i++ {
- //lit[i]
if lit[i] < 0x20 || lit[i] >= 0x80 {
pos1 := pos
pos1.Col += i + 1
@@ -206,7 +194,14 @@ func (s *scanner) scanStr(pos Pos) string {
}
}
s.next()
- return lit
+ if closing != '`' {
+ return lit
+ }
+ decoded, err := hex.DecodeString(lit)
+ if err != nil {
+ s.Error(pos, "bad hex string literal: %v", err)
+ }
+ return string(decoded)
}
func (s *scanner) scanInt(pos Pos) string {
diff --git a/pkg/ast/testdata/all.txt b/pkg/ast/testdata/all.txt
index 392796254..122fa514b 100644
--- a/pkg/ast/testdata/all.txt
+++ b/pkg/ast/testdata/all.txt
@@ -17,15 +17,14 @@ int_flags4 = 1, -2- ### bad integer "-2-"
str_flags0 = "foo", "bar"
str_flags1 = "non terminated ### string literal is not terminated
str_flags2 = "bad chars здесь" ### illegal character U+00D0 'Ð' in string literal
-str_flags3 = "string", not a string ### unexpected identifier, expecting string
-str_flags4 = "string", 42 ### unexpected int, expecting string
+str_flags3 = "string", not a string ### unexpected identifier, expecting string, hex string
+str_flags4 = "string", 42 ### unexpected int, expecting string, hex string
call(foo ,int32 , bar int32) ### unexpected ',', expecting int, identifier, string
call(foo int32:"bar") ### unexpected string, expecting int, identifier
call(a int32, b len[a:"bar"]) ### unexpected string, expecting int, identifier
-define FOO `bar`
-define FOO `bar ### C expression is not terminated
+define FOO bar
foo(x int32[1:2:3, opt])
foo2(x int32[1[2]:2]) ### unexpected ':', expecting ']'
diff --git a/pkg/compiler/testdata/all.txt b/pkg/compiler/testdata/all.txt
index 9d94a81a1..789071964 100644
--- a/pkg/compiler/testdata/all.txt
+++ b/pkg/compiler/testdata/all.txt
@@ -40,10 +40,13 @@ strings {
f11 stringnoz[string_flags1]
f12 string[string_flags2]
f13 stringnoz[string_flags2]
+ f14 string[`abcdef`, 4]
+ f15 string[string_flags3, 4]
} [packed]
string_flags1 = "foo", "barbaz"
string_flags2 = ""
+string_flags3 = "ab", `010203`, `de`
int_flags = 0, 1, 0xabc, 'x', -11
_ = 1, 2
_ = C1, C2
diff --git a/pkg/compiler/testdata/errors2.txt b/pkg/compiler/testdata/errors2.txt
index 47d76ab89..b5ab19ebf 100644
--- a/pkg/compiler/testdata/errors2.txt
+++ b/pkg/compiler/testdata/errors2.txt
@@ -267,6 +267,7 @@ foo$525(a int8[-256:256]) ### int range [18446744073709551360:256] is too large
foo$526(a int8[-255:255]) ### int range [18446744073709551361:255] is too large for base type of size 8
foo$527(a int16[-40000:40000]) ### int range [18446744073709511616:40000] is too large for base type of size 16
foo$528(a ptr[in, s405])
+foo$529(a ptr[in, string[`abcdde`, 3]]) ### string value "\xab\xcd\xde\x00" exceeds buffer length 3
type type500 proc[C1, 8, int8] ### values starting from 1 with step 8 overflow base type for 32 procs
type type501 int8 ### unused type type501
diff --git a/pkg/compiler/types.go b/pkg/compiler/types.go
index 251e0fcaa..43efee202 100644
--- a/pkg/compiler/types.go
+++ b/pkg/compiler/types.go
@@ -514,12 +514,11 @@ func genTextType(t *ast.Type) prog.TextKind {
}
const (
- stringnoz = "stringnoz"
- stringnozescapes = "stringnozescapes"
+ stringnoz = "stringnoz"
)
var typeString = &typeDesc{
- Names: []string{"string", stringnoz, stringnozescapes},
+ Names: []string{"string", stringnoz},
CanBeTypedef: true,
OptArgs: 2,
Args: []namedArg{
@@ -527,7 +526,7 @@ var typeString = &typeDesc{
{Name: "size", Type: typeArgInt},
},
Check: func(comp *compiler, t *ast.Type, args []*ast.Type, base prog.IntTypeCommon) {
- if (t.Ident == stringnoz || t.Ident == stringnozescapes) && len(args) > 1 {
+ if t.Ident == stringnoz && len(args) > 1 {
comp.error(args[0].Pos, "fixed-size string can't be non-zero-terminated")
}
},
@@ -559,7 +558,7 @@ var typeString = &typeDesc{
return &prog.BufferType{
TypeCommon: base.TypeCommon,
Kind: prog.BufferFilename,
- NoZ: t.Ident == stringnoz || t.Ident == stringnozescapes,
+ NoZ: t.Ident == stringnoz,
}
}
subkind := ""
@@ -576,7 +575,7 @@ var typeString = &typeDesc{
Kind: prog.BufferString,
SubKind: subkind,
Values: vals,
- NoZ: t.Ident == stringnoz || t.Ident == stringnozescapes,
+ NoZ: t.Ident == stringnoz,
}
},
}
@@ -592,16 +591,6 @@ func (comp *compiler) genStrings(t *ast.Type, args []*ast.Type) []string {
}
if t.Ident == stringnoz {
return vals
- } else if t.Ident == stringnozescapes {
- for i := range vals {
- unquote, err := strconv.Unquote(`"` + vals[i] + `"`)
- if err != nil {
- comp.error(args[0].Pos, fmt.Sprintf("unable to unquote stringnozescapes %q: %v", vals[i], err))
- } else {
- vals[i] = unquote
- }
- }
- return vals
}
var size uint64
if len(args) > 1 {