diff options
Diffstat (limited to 'vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go')
| -rw-r--r-- | vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go b/vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go new file mode 100644 index 000000000..dd8e7fe36 --- /dev/null +++ b/vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go @@ -0,0 +1,180 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +// Package safehtmlutil contains functions shared by package safehtml and safehtml/template. +package safehtmlutil + +import ( + "bytes" + "fmt" + "reflect" + "regexp" +) + +// IsSafeTrustedResourceURLPrefix returns whether the given prefix is safe to use as a +// TrustedResourceURL prefix. +// +// TrustedResourceURL prefixes must start with one of the following: +// * `https://<origin>/` +// * `//<origin>/` +// * `/<pathStart>` +// * `about:blank#` +// +// `<origin>` must contain only alphanumerics, '.', ':', '[', ']', or '-'. +// These restrictions do not enforce a well-formed domain name, so '.' and '1.2' are valid. +// +// `<pathStart>` is any character except `/` and `\`. Based on +// https://url.spec.whatwg.org/commit-snapshots/56b74ce7cca8883eab62e9a12666e2fac665d03d/#url-parsing, +// an initial / which is not followed by another / or \ will end up in the "path state" and from there +// it can only go to the "fragment state" and "query state". +func IsSafeTrustedResourceURLPrefix(prefix string) bool { + return safeTrustedResourceURLPrefixPattern.MatchString(prefix) +} + +var safeTrustedResourceURLPrefixPattern = regexp.MustCompile(`(?i)^(?:` + + `(?:https:)?//[0-9a-z.:\[\]-]+/|` + + `/[^/\\]|` + + `about:blank#)`) + +// URLContainsDoubleDotSegment returns whether the given URL or URL substring +// contains the double dot-segment ".." (RFC3986 3.3) in its percent-encoded or +// unencoded form. +func URLContainsDoubleDotSegment(url string) bool { + return urlDoubleDotSegmentPattern.MatchString(url) +} + +var urlDoubleDotSegmentPattern = regexp.MustCompile(`(?i)(?:\.|%2e)(?:\.|%2e)`) + +// QueryEscapeURL produces an output that can be embedded in a URL query. +// The output can be embedded in an HTML attribute without further escaping. +func QueryEscapeURL(args ...interface{}) string { + return urlProcessor(false, Stringify(args...)) +} + +// NormalizeURL normalizes URL content so it can be embedded in a quote-delimited +// string or parenthesis delimited url(...). +// The normalizer does not encode all HTML specials. Specifically, it does not +// encode '&' so correct embedding in an HTML attribute requires escaping of +// '&' to '&'. +func NormalizeURL(args ...interface{}) string { + return urlProcessor(true, Stringify(args...)) +} + +// urlProcessor normalizes (when norm is true) or escapes its input to produce +// a valid hierarchical or opaque URL part. +func urlProcessor(norm bool, s string) string { + var b bytes.Buffer + written := 0 + // The byte loop below assumes that all URLs use UTF-8 as the + // content-encoding. This is similar to the URI to IRI encoding scheme + // defined in section 3.1 of RFC 3987, and behaves the same as the + // EcmaScript builtin encodeURIComponent. + // It should not cause any misencoding of URLs in pages with + // Content-type: text/html;charset=UTF-8. + for i, n := 0, len(s); i < n; i++ { + c := s[i] + switch c { + // Single quote and parens are sub-delims in RFC 3986, but we + // escape them so the output can be embedded in single + // quoted attributes and unquoted CSS url(...) constructs. + // Single quotes are reserved in URLs, but are only used in + // the obsolete "mark" rule in an appendix in RFC 3986 + // so can be safely encoded. + case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']': + if norm { + continue + } + // Unreserved according to RFC 3986 sec 2.3 + // "For consistency, percent-encoded octets in the ranges of + // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), + // period (%2E), underscore (%5F), or tilde (%7E) should not be + // created by URI producers + case '-', '.', '_', '~': + continue + case '%': + // When normalizing do not re-encode valid escapes. + if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) { + continue + } + default: + // Unreserved according to RFC 3986 sec 2.3 + if 'a' <= c && c <= 'z' { + continue + } + if 'A' <= c && c <= 'Z' { + continue + } + if '0' <= c && c <= '9' { + continue + } + } + b.WriteString(s[written:i]) + fmt.Fprintf(&b, "%%%02x", c) + written = i + 1 + } + if written == 0 { + return s + } + b.WriteString(s[written:]) + return b.String() +} + +// isHex reports whether the given character is a hex digit. +func isHex(c byte) bool { + return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' +} + +// Stringify converts its arguments to a string. It is equivalent to +// fmt.Sprint(args...), except that it deferences all pointers. +func Stringify(args ...interface{}) string { + // Optimization for simple common case of a single string argument. + if len(args) == 1 { + if s, ok := args[0].(string); ok { + return s + } + } + for i, arg := range args { + args[i] = indirectToStringerOrError(arg) + } + return fmt.Sprint(args...) +} + +var ( + errorType = reflect.TypeOf((*error)(nil)).Elem() + fmtStringerType = reflect.TypeOf((*fmt.Stringer)(nil)).Elem() +) + +// indirectToStringerOrError dereferences a as many times +// as necessary to reach the base type, an implementation of fmt.Stringer, +// or an implementation of error, and returns a value of that type. It returns +// nil if a is nil. +func indirectToStringerOrError(a interface{}) interface{} { + if a == nil { + return nil + } + v := reflect.ValueOf(a) + for !v.Type().Implements(fmtStringerType) && !v.Type().Implements(errorType) && v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + return v.Interface() +} + +// Indirect returns the value, after dereferencing as many times +// as necessary to reach the base type (or nil). +func Indirect(a interface{}) interface{} { + if a == nil { + return nil + } + if t := reflect.TypeOf(a); t.Kind() != reflect.Ptr { + // Avoid creating a reflect.Value if it's not a pointer. + return a + } + v := reflect.ValueOf(a) + for v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + return v.Interface() +} |
