// Copyright (c) 2017 The Go Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file or at // https://developers.google.com/open-source/licenses/bsd // Package safehtmlutil contains functions shared by package safehtml and safehtml/template. package safehtmlutil import ( "bytes" "fmt" "reflect" "regexp" ) // IsSafeTrustedResourceURLPrefix returns whether the given prefix is safe to use as a // TrustedResourceURL prefix. // // TrustedResourceURL prefixes must start with one of the following: // * `https:///` // * `///` // * `/` // * `about:blank#` // // `` must contain only alphanumerics, '.', ':', '[', ']', or '-'. // These restrictions do not enforce a well-formed domain name, so '.' and '1.2' are valid. // // `` is any character except `/` and `\`. Based on // https://url.spec.whatwg.org/commit-snapshots/56b74ce7cca8883eab62e9a12666e2fac665d03d/#url-parsing, // an initial / which is not followed by another / or \ will end up in the "path state" and from there // it can only go to the "fragment state" and "query state". func IsSafeTrustedResourceURLPrefix(prefix string) bool { return safeTrustedResourceURLPrefixPattern.MatchString(prefix) } var safeTrustedResourceURLPrefixPattern = regexp.MustCompile(`(?i)^(?:` + `(?:https:)?//[0-9a-z.:\[\]-]+/|` + `/[^/\\]|` + `about:blank#)`) // URLContainsDoubleDotSegment returns whether the given URL or URL substring // contains the double dot-segment ".." (RFC3986 3.3) in its percent-encoded or // unencoded form. func URLContainsDoubleDotSegment(url string) bool { return urlDoubleDotSegmentPattern.MatchString(url) } var urlDoubleDotSegmentPattern = regexp.MustCompile(`(?i)(?:\.|%2e)(?:\.|%2e)`) // QueryEscapeURL produces an output that can be embedded in a URL query. // The output can be embedded in an HTML attribute without further escaping. func QueryEscapeURL(args ...interface{}) string { return urlProcessor(false, Stringify(args...)) } // NormalizeURL normalizes URL content so it can be embedded in a quote-delimited // string or parenthesis delimited url(...). // The normalizer does not encode all HTML specials. Specifically, it does not // encode '&' so correct embedding in an HTML attribute requires escaping of // '&' to '&'. func NormalizeURL(args ...interface{}) string { return urlProcessor(true, Stringify(args...)) } // urlProcessor normalizes (when norm is true) or escapes its input to produce // a valid hierarchical or opaque URL part. func urlProcessor(norm bool, s string) string { var b bytes.Buffer written := 0 // The byte loop below assumes that all URLs use UTF-8 as the // content-encoding. This is similar to the URI to IRI encoding scheme // defined in section 3.1 of RFC 3987, and behaves the same as the // EcmaScript builtin encodeURIComponent. // It should not cause any misencoding of URLs in pages with // Content-type: text/html;charset=UTF-8. for i, n := 0, len(s); i < n; i++ { c := s[i] switch c { // Single quote and parens are sub-delims in RFC 3986, but we // escape them so the output can be embedded in single // quoted attributes and unquoted CSS url(...) constructs. // Single quotes are reserved in URLs, but are only used in // the obsolete "mark" rule in an appendix in RFC 3986 // so can be safely encoded. case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']': if norm { continue } // Unreserved according to RFC 3986 sec 2.3 // "For consistency, percent-encoded octets in the ranges of // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), // period (%2E), underscore (%5F), or tilde (%7E) should not be // created by URI producers case '-', '.', '_', '~': continue case '%': // When normalizing do not re-encode valid escapes. if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) { continue } default: // Unreserved according to RFC 3986 sec 2.3 if 'a' <= c && c <= 'z' { continue } if 'A' <= c && c <= 'Z' { continue } if '0' <= c && c <= '9' { continue } } b.WriteString(s[written:i]) fmt.Fprintf(&b, "%%%02x", c) written = i + 1 } if written == 0 { return s } b.WriteString(s[written:]) return b.String() } // isHex reports whether the given character is a hex digit. func isHex(c byte) bool { return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' } // Stringify converts its arguments to a string. It is equivalent to // fmt.Sprint(args...), except that it deferences all pointers. func Stringify(args ...interface{}) string { // Optimization for simple common case of a single string argument. if len(args) == 1 { if s, ok := args[0].(string); ok { return s } } for i, arg := range args { args[i] = indirectToStringerOrError(arg) } return fmt.Sprint(args...) } var ( errorType = reflect.TypeOf((*error)(nil)).Elem() fmtStringerType = reflect.TypeOf((*fmt.Stringer)(nil)).Elem() ) // indirectToStringerOrError dereferences a as many times // as necessary to reach the base type, an implementation of fmt.Stringer, // or an implementation of error, and returns a value of that type. It returns // nil if a is nil. func indirectToStringerOrError(a interface{}) interface{} { if a == nil { return nil } v := reflect.ValueOf(a) for !v.Type().Implements(fmtStringerType) && !v.Type().Implements(errorType) && v.Kind() == reflect.Ptr && !v.IsNil() { v = v.Elem() } return v.Interface() } // Indirect returns the value, after dereferencing as many times // as necessary to reach the base type (or nil). func Indirect(a interface{}) interface{} { if a == nil { return nil } if t := reflect.TypeOf(a); t.Kind() != reflect.Ptr { // Avoid creating a reflect.Value if it's not a pointer. return a } v := reflect.ValueOf(a) for v.Kind() == reflect.Ptr && !v.IsNil() { v = v.Elem() } return v.Interface() }