From 4165372ec8fd142475a4e35fd0cf4f8042132208 Mon Sep 17 00:00:00 2001 From: Taras Madan Date: Wed, 22 Feb 2023 22:16:50 +0100 Subject: dependencies: update set go min requirements to 1.19 update dependencies update vendor --- vendor/github.com/google/safehtml/CONTRIBUTING.md | 29 + vendor/github.com/google/safehtml/LICENSE | 27 + vendor/github.com/google/safehtml/README.md | 17 + vendor/github.com/google/safehtml/doc.go | 11 + vendor/github.com/google/safehtml/html.go | 117 +++ vendor/github.com/google/safehtml/identifier.go | 83 ++ vendor/github.com/google/safehtml/init.go | 58 ++ .../github.com/google/safehtml/internal/raw/raw.go | 31 + .../safehtml/internal/safehtmlutil/safehtmlutil.go | 180 +++++ .../google/safehtml/internal/template/raw/raw.go | 16 + vendor/github.com/google/safehtml/script.go | 90 +++ vendor/github.com/google/safehtml/style.go | 304 +++++++ vendor/github.com/google/safehtml/stylesheet.go | 111 +++ .../github.com/google/safehtml/template/context.go | 183 +++++ .../google/safehtml/template/delim_string.go | 16 + vendor/github.com/google/safehtml/template/doc.go | 291 +++++++ .../github.com/google/safehtml/template/error.go | 280 +++++++ .../github.com/google/safehtml/template/escape.go | 884 +++++++++++++++++++++ vendor/github.com/google/safehtml/template/init.go | 28 + .../google/safehtml/template/sanitize.go | 258 ++++++ .../google/safehtml/template/sanitizers.go | 599 ++++++++++++++ .../google/safehtml/template/state_string.go | 16 + .../google/safehtml/template/template.go | 651 +++++++++++++++ .../template/testdata/dir1/parsefiles_t1.tmpl | 1 + .../template/testdata/dir2/parsefiles_t2.tmpl | 1 + .../google/safehtml/template/testdata/glob_t0.tmpl | 1 + .../google/safehtml/template/testdata/glob_t1.tmpl | 1 + .../google/safehtml/template/testdata/glob_t2.tmpl | 1 + .../safehtml/template/testdata/helpers_t1.tmpl | 1 + .../safehtml/template/testdata/helpers_t2.tmpl | 1 + .../safehtml/template/testdata/share_t0.tmpl | 1 + .../safehtml/template/testdata/share_t1.tmpl | 1 + .../google/safehtml/template/transition.go | 312 ++++++++ .../google/safehtml/template/trustedfs.go | 98 +++ .../google/safehtml/template/trustedsource.go | 105 +++ .../google/safehtml/template/trustedtemplate.go | 36 + vendor/github.com/google/safehtml/template/url.go | 122 +++ .../google/safehtml/trustedresourceurl.go | 195 +++++ .../uncheckedconversions/uncheckedconversions.go | 131 +++ vendor/github.com/google/safehtml/url.go | 127 +++ vendor/github.com/google/safehtml/urlset.go | 167 ++++ 41 files changed, 5582 insertions(+) create mode 100644 vendor/github.com/google/safehtml/CONTRIBUTING.md create mode 100644 vendor/github.com/google/safehtml/LICENSE create mode 100644 vendor/github.com/google/safehtml/README.md create mode 100644 vendor/github.com/google/safehtml/doc.go create mode 100644 vendor/github.com/google/safehtml/html.go create mode 100644 vendor/github.com/google/safehtml/identifier.go create mode 100644 vendor/github.com/google/safehtml/init.go create mode 100644 vendor/github.com/google/safehtml/internal/raw/raw.go create mode 100644 vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go create mode 100644 vendor/github.com/google/safehtml/internal/template/raw/raw.go create mode 100644 vendor/github.com/google/safehtml/script.go create mode 100644 vendor/github.com/google/safehtml/style.go create mode 100644 vendor/github.com/google/safehtml/stylesheet.go create mode 100644 vendor/github.com/google/safehtml/template/context.go create mode 100644 vendor/github.com/google/safehtml/template/delim_string.go create mode 100644 vendor/github.com/google/safehtml/template/doc.go create mode 100644 vendor/github.com/google/safehtml/template/error.go create mode 100644 vendor/github.com/google/safehtml/template/escape.go create mode 100644 vendor/github.com/google/safehtml/template/init.go create mode 100644 vendor/github.com/google/safehtml/template/sanitize.go create mode 100644 vendor/github.com/google/safehtml/template/sanitizers.go create mode 100644 vendor/github.com/google/safehtml/template/state_string.go create mode 100644 vendor/github.com/google/safehtml/template/template.go create mode 100644 vendor/github.com/google/safehtml/template/testdata/dir1/parsefiles_t1.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/dir2/parsefiles_t2.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/glob_t0.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/glob_t1.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/glob_t2.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/helpers_t1.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/helpers_t2.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/share_t0.tmpl create mode 100644 vendor/github.com/google/safehtml/template/testdata/share_t1.tmpl create mode 100644 vendor/github.com/google/safehtml/template/transition.go create mode 100644 vendor/github.com/google/safehtml/template/trustedfs.go create mode 100644 vendor/github.com/google/safehtml/template/trustedsource.go create mode 100644 vendor/github.com/google/safehtml/template/trustedtemplate.go create mode 100644 vendor/github.com/google/safehtml/template/url.go create mode 100644 vendor/github.com/google/safehtml/trustedresourceurl.go create mode 100644 vendor/github.com/google/safehtml/uncheckedconversions/uncheckedconversions.go create mode 100644 vendor/github.com/google/safehtml/url.go create mode 100644 vendor/github.com/google/safehtml/urlset.go (limited to 'vendor/github.com/google/safehtml') diff --git a/vendor/github.com/google/safehtml/CONTRIBUTING.md b/vendor/github.com/google/safehtml/CONTRIBUTING.md new file mode 100644 index 000000000..22b241cb7 --- /dev/null +++ b/vendor/github.com/google/safehtml/CONTRIBUTING.md @@ -0,0 +1,29 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement (CLA). You (or your employer) retain the copyright to your +contribution; this simply gives us permission to use and redistribute your +contributions as part of the project. Head over to + to see your current agreements on file or +to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +## Community Guidelines + +This project follows +[Google's Open Source Community Guidelines](https://opensource.google/conduct/). diff --git a/vendor/github.com/google/safehtml/LICENSE b/vendor/github.com/google/safehtml/LICENSE new file mode 100644 index 000000000..dec93b16e --- /dev/null +++ b/vendor/github.com/google/safehtml/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2017 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/vendor/github.com/google/safehtml/README.md b/vendor/github.com/google/safehtml/README.md new file mode 100644 index 000000000..d3c9676d1 --- /dev/null +++ b/vendor/github.com/google/safehtml/README.md @@ -0,0 +1,17 @@ +# Safe HTML for Go + +`safehtml` provides immutable string-like types that wrap web types such as +HTML, JavaScript and CSS. These wrappers are safe by construction against XSS +and similar web vulnerabilities, and they can only be interpolated in safe ways. +You can read more about our approach to web security in our +[whitepaper](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/42934.pdf), +or this [OWASP talk](https://www.youtube.com/watch?v=ccfEu-Jj0as). + +Additional subpackages provide APIs for managing exceptions to the +safety rules, and a template engine with a syntax and interface that closely +matches [`html/template`](https://golang.org/pkg/html/template/). You can refer +to the [godoc](https://pkg.go.dev/github.com/google/safehtml?tab=doc) +for each (sub)package for the API documentation and code examples. +More end-to-end demos are available in `example_test.go`. + +This is not an officially supported Google product. diff --git a/vendor/github.com/google/safehtml/doc.go b/vendor/github.com/google/safehtml/doc.go new file mode 100644 index 000000000..4c5c1bf78 --- /dev/null +++ b/vendor/github.com/google/safehtml/doc.go @@ -0,0 +1,11 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +// Package safehtml provides immutable string-like types which represent values that +// are guaranteed to be safe, by construction or by escaping or sanitization, to use +// in various HTML contexts and with various DOM APIs. +// +package safehtml diff --git a/vendor/github.com/google/safehtml/html.go b/vendor/github.com/google/safehtml/html.go new file mode 100644 index 000000000..27c0f337d --- /dev/null +++ b/vendor/github.com/google/safehtml/html.go @@ -0,0 +1,117 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package safehtml + +import ( + "bytes" + "html" + "unicode" + + "golang.org/x/text/unicode/rangetable" +) + +// An HTML is an immutable string-like type that is safe to use in HTML +// contexts in DOM APIs and HTML documents. +// +// HTML guarantees that its value as a string will not cause untrusted script +// execution when evaluated as HTML in a browser. +// +// Values of this type are guaranteed to be safe to use in HTML contexts, +// such as assignment to the innerHTML DOM property, or interpolation into an +// HTML template in HTML PC_DATA context, in the sense that the use will not +// result in a Cross-site Scripting (XSS) vulnerability. +type HTML struct { + // We declare an HTML not as a string but as a struct wrapping a string + // to prevent construction of HTML values through string conversion. + str string +} + +// HTMLer is implemented by any value that has an HTML method, which defines the +// safe HTML format for that value. +type HTMLer interface { + HTML() HTML +} + +// HTMLEscaped returns an HTML whose value is text, with the characters [&<>"'] escaped. +// +// text is coerced to interchange valid, so the resulting HTML contains only +// valid UTF-8 characters which are legal in HTML and XML. +// +func HTMLEscaped(text string) HTML { + return HTML{escapeAndCoerceToInterchangeValid(text)} +} + +// HTMLConcat returns an HTML which contains, in order, the string representations +// of the given htmls. +func HTMLConcat(htmls ...HTML) HTML { + var b bytes.Buffer + for _, html := range htmls { + b.WriteString(html.String()) + } + return HTML{b.String()} +} + +// String returns the string form of the HTML. +func (h HTML) String() string { + return h.str +} + +// escapeAndCoerceToInterchangeValid coerces the string to interchange-valid +// UTF-8 and then HTML-escapes it. +func escapeAndCoerceToInterchangeValid(str string) string { + return html.EscapeString(coerceToUTF8InterchangeValid(str)) +} + +// coerceToUTF8InterchangeValid coerces a string to interchange-valid UTF-8. +// Illegal UTF-8 bytes are replaced with the Unicode replacement character +// ('\uFFFD'). C0 and C1 control codes (other than CR LF HT FF) and +// non-characters are also replaced with the Unicode replacement character. +func coerceToUTF8InterchangeValid(s string) string { + // TODO: Replace this entire function with stdlib function if https://golang.org/issue/25805 gets addressed. + runes := make([]rune, 0, len(s)) + // If s contains any invalid UTF-8 byte sequences, range will have rune + // contain the Unicode replacement character and there's no need to call + // utf8.ValidRune. I.e. iteration over the string implements + // CoerceToStructurallyValid() from C++/Java. + // See https://blog.golang.org/strings. + for _, rune := range s { + if unicode.Is(controlAndNonCharacter, rune) { + runes = append(runes, unicode.ReplacementChar) + } else { + runes = append(runes, rune) + } + } + return string(runes) +} + +// controlAndNonCharacters contains the non-interchange-valid codepoints. +// +// See http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream +// +// safehtml functions do a lot of lookups on these tables, so merging them is probably +// worth it to avoid comparing against both tables each time. +var controlAndNonCharacter = rangetable.Merge(unicode.Noncharacter_Code_Point, controlChar) + +// controlChar contains Unicode control characters disallowed in interchange +// valid UTF-8. This table is slightly different from unicode.Cc: +// - Disallows null. +// - Allows LF, CR, HT, and FF. +// +// unicode.C is mentioned in unicode.IsControl; it contains "special" characters +// which includes at least control characters, surrogate code points, and +// formatting codepoints (e.g. word joiner). We don't need to exclude all of +// those. In particular, surrogates are handled by the for loop converting +// invalid UTF-8 byte sequences to the Unicode replacement character. +var controlChar = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x0000, 0x0008, 1}, + {0x000B, 0x000B, 1}, + {0x000E, 0x001F, 1}, + {0x007F, 0x009F, 1}, + }, + LatinOffset: 4, +} diff --git a/vendor/github.com/google/safehtml/identifier.go b/vendor/github.com/google/safehtml/identifier.go new file mode 100644 index 000000000..ffad26423 --- /dev/null +++ b/vendor/github.com/google/safehtml/identifier.go @@ -0,0 +1,83 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package safehtml + +import ( + "fmt" + "regexp" +) + +// A Identifier is an immutable string-like type that is safe to use in HTML +// contexts as an identifier for HTML elements. For example, it is unsafe to +// insert an untrusted string into a +// +// +// +// context since the string may be controlled by an attacker who can assign it +// a value that masks existing DOM properties (i.e. DOM Clobbering). An +// attacker may also be able to force legitimate Javascript code, which uses +// document.getElementsByName(...) to read DOM elements, to refer to this +// element. This may lead to unintended side effects, particularly if that +// element contains attacker-controlled data. It is, however, safe to use an +// Identifier in this context since its value is known to be partially or fully +// under application control. +// +// In order to ensure that an attacker cannot influence the Identifier value, +// an Identifier can only be instantiated from a compile-time constant string +// literal prefix. +// +// Note that Identifier is Go-specific and therefore does not have a Proto form +// for cross-language use. +type Identifier struct { + // We declare a Identifier not as a string but as a struct wrapping a string + // to prevent construction of Identifier values through string conversion. + str string +} + +// To minimize the risk of parsing errors, Identifier values must start with an +// alphabetical rune, and comprise of only alphanumeric, '-', and '_' runes. + +// startsWithAlphabetPattern matches strings that start with an alphabetical rune. +var startsWithAlphabetPattern = regexp.MustCompile(`^[a-zA-Z]`) + +// onlyAlphanumericsOrHyphenPattern matches strings that only contain alphanumeric, +// '-' and '_' runes. +var onlyAlphanumericsOrHyphenPattern = regexp.MustCompile(`^[-_a-zA-Z0-9]*$`) + +// IdentifierFromConstant constructs an Identifier with its underlying identifier +// set to the given string value, which must be an untyped string constant. It +// panics if value does not start with an alphabetic rune or contains any +// non-alphanumeric runes other than '-' and '_'. +func IdentifierFromConstant(value stringConstant) Identifier { + if !startsWithAlphabetPattern.MatchString(string(value)) || + !onlyAlphanumericsOrHyphenPattern.MatchString(string(value)) { + panic(fmt.Sprintf("invalid identifier %q", string(value))) + } + return Identifier{string(value)} +} + +// IdentifierFromConstantPrefix constructs an Identifier with its underlying string +// set to the string formed by joining prefix, which must be an untyped string +// constant, and value with a hyphen. It panics if prefix or value contain any +// non-alphanumeric runes other than '-' and '_', or if prefix does not start with +// an alphabetic rune. +func IdentifierFromConstantPrefix(prefix stringConstant, value string) Identifier { + prefixString := string(prefix) + if !startsWithAlphabetPattern.MatchString(string(prefix)) || + !onlyAlphanumericsOrHyphenPattern.MatchString(string(prefix)) { + panic(fmt.Sprintf("invalid prefix %q", string(prefix))) + } + if !onlyAlphanumericsOrHyphenPattern.MatchString(value) { + panic(fmt.Sprintf("value %q contains non-alphanumeric runes", value)) + } + return Identifier{prefixString + "-" + value} +} + +// String returns the string form of the Identifier. +func (i Identifier) String() string { + return i.str +} diff --git a/vendor/github.com/google/safehtml/init.go b/vendor/github.com/google/safehtml/init.go new file mode 100644 index 000000000..d37547d72 --- /dev/null +++ b/vendor/github.com/google/safehtml/init.go @@ -0,0 +1,58 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package safehtml + +import ( + "github.com/google/safehtml/internal/raw" +) + +// stringConstant is an unexported string type. Users of this package cannot +// create values of this type except by passing an untyped string constant to +// functions which expect a stringConstant. This type should only be used in +// function and method parameters. +type stringConstant string + +// The following functions are used by package uncheckedconversions +// (via package raw) to create safe HTML types from plain strings. + +func htmlRaw(s string) HTML { + return HTML{s} +} + +func scriptRaw(s string) Script { + return Script{s} +} + +func style(s string) Style { + return Style{s} +} + +func styleSheetRaw(s string) StyleSheet { + return StyleSheet{s} +} + +func urlRaw(s string) URL { + return URL{s} +} + +func trustedResourceURLRaw(s string) TrustedResourceURL { + return TrustedResourceURL{s} +} + +func identifierRaw(s string) Identifier { + return Identifier{s} +} + +func init() { + raw.HTML = htmlRaw + raw.Script = scriptRaw + raw.Style = style + raw.StyleSheet = styleSheetRaw + raw.URL = urlRaw + raw.TrustedResourceURL = trustedResourceURLRaw + raw.Identifier = identifierRaw +} diff --git a/vendor/github.com/google/safehtml/internal/raw/raw.go b/vendor/github.com/google/safehtml/internal/raw/raw.go new file mode 100644 index 000000000..3bedb6a6d --- /dev/null +++ b/vendor/github.com/google/safehtml/internal/raw/raw.go @@ -0,0 +1,31 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +// Package raw provides a coordination point for package safehtml, package +// uncheckedconversions, package legacyconversions, and package testconversions. +// raw must only be imported by these four packages. +package raw + +// HTML is the raw constructor for a safehtml.HTML. +var HTML interface{} + +// Script is the raw constructor for a safehtml.Script. +var Script interface{} + +// Style is the raw constructor for a safehtml.Style. +var Style interface{} + +// StyleSheet is the raw constructor for a safehtml.StyleSheet. +var StyleSheet interface{} + +// URL is the raw constructor for a safehtml.URL. +var URL interface{} + +// TrustedResourceURL is the raw constructor for a safehtml.TrustedResourceURL. +var TrustedResourceURL interface{} + +// Identifier is the raw constructor for a safehtml.Identifier. +var Identifier interface{} diff --git a/vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go b/vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go new file mode 100644 index 000000000..dd8e7fe36 --- /dev/null +++ b/vendor/github.com/google/safehtml/internal/safehtmlutil/safehtmlutil.go @@ -0,0 +1,180 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +// Package safehtmlutil contains functions shared by package safehtml and safehtml/template. +package safehtmlutil + +import ( + "bytes" + "fmt" + "reflect" + "regexp" +) + +// IsSafeTrustedResourceURLPrefix returns whether the given prefix is safe to use as a +// TrustedResourceURL prefix. +// +// TrustedResourceURL prefixes must start with one of the following: +// * `https:///` +// * `///` +// * `/` +// * `about:blank#` +// +// `` must contain only alphanumerics, '.', ':', '[', ']', or '-'. +// These restrictions do not enforce a well-formed domain name, so '.' and '1.2' are valid. +// +// `` is any character except `/` and `\`. Based on +// https://url.spec.whatwg.org/commit-snapshots/56b74ce7cca8883eab62e9a12666e2fac665d03d/#url-parsing, +// an initial / which is not followed by another / or \ will end up in the "path state" and from there +// it can only go to the "fragment state" and "query state". +func IsSafeTrustedResourceURLPrefix(prefix string) bool { + return safeTrustedResourceURLPrefixPattern.MatchString(prefix) +} + +var safeTrustedResourceURLPrefixPattern = regexp.MustCompile(`(?i)^(?:` + + `(?:https:)?//[0-9a-z.:\[\]-]+/|` + + `/[^/\\]|` + + `about:blank#)`) + +// URLContainsDoubleDotSegment returns whether the given URL or URL substring +// contains the double dot-segment ".." (RFC3986 3.3) in its percent-encoded or +// unencoded form. +func URLContainsDoubleDotSegment(url string) bool { + return urlDoubleDotSegmentPattern.MatchString(url) +} + +var urlDoubleDotSegmentPattern = regexp.MustCompile(`(?i)(?:\.|%2e)(?:\.|%2e)`) + +// QueryEscapeURL produces an output that can be embedded in a URL query. +// The output can be embedded in an HTML attribute without further escaping. +func QueryEscapeURL(args ...interface{}) string { + return urlProcessor(false, Stringify(args...)) +} + +// NormalizeURL normalizes URL content so it can be embedded in a quote-delimited +// string or parenthesis delimited url(...). +// The normalizer does not encode all HTML specials. Specifically, it does not +// encode '&' so correct embedding in an HTML attribute requires escaping of +// '&' to '&'. +func NormalizeURL(args ...interface{}) string { + return urlProcessor(true, Stringify(args...)) +} + +// urlProcessor normalizes (when norm is true) or escapes its input to produce +// a valid hierarchical or opaque URL part. +func urlProcessor(norm bool, s string) string { + var b bytes.Buffer + written := 0 + // The byte loop below assumes that all URLs use UTF-8 as the + // content-encoding. This is similar to the URI to IRI encoding scheme + // defined in section 3.1 of RFC 3987, and behaves the same as the + // EcmaScript builtin encodeURIComponent. + // It should not cause any misencoding of URLs in pages with + // Content-type: text/html;charset=UTF-8. + for i, n := 0, len(s); i < n; i++ { + c := s[i] + switch c { + // Single quote and parens are sub-delims in RFC 3986, but we + // escape them so the output can be embedded in single + // quoted attributes and unquoted CSS url(...) constructs. + // Single quotes are reserved in URLs, but are only used in + // the obsolete "mark" rule in an appendix in RFC 3986 + // so can be safely encoded. + case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']': + if norm { + continue + } + // Unreserved according to RFC 3986 sec 2.3 + // "For consistency, percent-encoded octets in the ranges of + // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), + // period (%2E), underscore (%5F), or tilde (%7E) should not be + // created by URI producers + case '-', '.', '_', '~': + continue + case '%': + // When normalizing do not re-encode valid escapes. + if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) { + continue + } + default: + // Unreserved according to RFC 3986 sec 2.3 + if 'a' <= c && c <= 'z' { + continue + } + if 'A' <= c && c <= 'Z' { + continue + } + if '0' <= c && c <= '9' { + continue + } + } + b.WriteString(s[written:i]) + fmt.Fprintf(&b, "%%%02x", c) + written = i + 1 + } + if written == 0 { + return s + } + b.WriteString(s[written:]) + return b.String() +} + +// isHex reports whether the given character is a hex digit. +func isHex(c byte) bool { + return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' +} + +// Stringify converts its arguments to a string. It is equivalent to +// fmt.Sprint(args...), except that it deferences all pointers. +func Stringify(args ...interface{}) string { + // Optimization for simple common case of a single string argument. + if len(args) == 1 { + if s, ok := args[0].(string); ok { + return s + } + } + for i, arg := range args { + args[i] = indirectToStringerOrError(arg) + } + return fmt.Sprint(args...) +} + +var ( + errorType = reflect.TypeOf((*error)(nil)).Elem() + fmtStringerType = reflect.TypeOf((*fmt.Stringer)(nil)).Elem() +) + +// indirectToStringerOrError dereferences a as many times +// as necessary to reach the base type, an implementation of fmt.Stringer, +// or an implementation of error, and returns a value of that type. It returns +// nil if a is nil. +func indirectToStringerOrError(a interface{}) interface{} { + if a == nil { + return nil + } + v := reflect.ValueOf(a) + for !v.Type().Implements(fmtStringerType) && !v.Type().Implements(errorType) && v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + return v.Interface() +} + +// Indirect returns the value, after dereferencing as many times +// as necessary to reach the base type (or nil). +func Indirect(a interface{}) interface{} { + if a == nil { + return nil + } + if t := reflect.TypeOf(a); t.Kind() != reflect.Ptr { + // Avoid creating a reflect.Value if it's not a pointer. + return a + } + v := reflect.ValueOf(a) + for v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + return v.Interface() +} diff --git a/vendor/github.com/google/safehtml/internal/template/raw/raw.go b/vendor/github.com/google/safehtml/internal/template/raw/raw.go new file mode 100644 index 000000000..b69599bd2 --- /dev/null +++ b/vendor/github.com/google/safehtml/internal/template/raw/raw.go @@ -0,0 +1,16 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +// Package raw provides a coordination point for package safehtml/template and +// package safehtml/template/uncheckedconversions. raw must be imported only by +// these two packages. +package raw + +// TrustedSource is the raw constructor for a template.TrustedSource. +var TrustedSource interface{} + +// TrustedTemplate is the raw constructor for a template.TrustedTemplate. +var TrustedTemplate interface{} diff --git a/vendor/github.com/google/safehtml/script.go b/vendor/github.com/google/safehtml/script.go new file mode 100644 index 000000000..c9e0fd298 --- /dev/null +++ b/vendor/github.com/google/safehtml/script.go @@ -0,0 +1,90 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package safehtml + +import ( + "encoding/json" + "fmt" + "regexp" +) + +// A Script is an immutable string-like type which represents JavaScript +// code and guarantees that its value, as a string, will not cause execution +// of unconstrained attacker controlled code (cross-site scripting) when +// evaluated as JavaScript in a browser. +// +// Script's string representation can safely be interpolated as the +// content of a script element within HTML, and can safely be passed to DOM +// properties and functions which expect JavaScript. In these cases, the Script +// string should not be escaped. Script's string representation can also be safely +// used as the value for on* attribute handlers in HTML, though the Script string +// must be escaped before such use. +// +// Note that the Script might contain text that is attacker-controlled but +// that text should have been interpolated with appropriate escaping, +// sanitization and/or validation into the right location in the script, such +// that it is highly constrained in its effect (for example, it had to match a +// set of allowed words). +// +// In order to ensure that an attacker cannot influence the Script +// value, a Script can only be instantiated from compile-time +// constant string literals or security-reviewed unchecked conversions, +// but never from arbitrary string values potentially representing untrusted +// user input. +type Script struct { + // We declare a Script not as a string but as a struct wrapping a string + // to prevent construction of Script values through string conversion. + str string +} + +// ScriptFromConstant constructs a Script with its underlying script set +// to the given script, which must be an untyped string constant. +// +// No runtime validation or sanitization is performed on script; being under +// application control, it is simply assumed to comply with the Script +// contract. +func ScriptFromConstant(script stringConstant) Script { + return Script{string(script)} +} + +// ScriptFromDataAndConstant constructs a Script of the form +// +// var name = data; script +// +// where name is the supplied variable name, data is the supplied data value +// encoded as JSON using encoding/json.Marshal, and script is the supplied +// JavaScript statement or sequence of statements. The supplied name and script +// must both be untyped string constants. It returns an error if name is not a +// valid Javascript identifier or JSON encoding fails. +// +// No runtime validation or sanitization is performed on script; being under +// application control, it is simply assumed to comply with the Script +// contract. +func ScriptFromDataAndConstant(name stringConstant, data interface{}, script stringConstant) (Script, error) { + if !jsIdentifierPattern.MatchString(string(name)) { + return Script{}, fmt.Errorf("variable name %q is an invalid Javascript identifier", string(name)) + } + json, err := json.Marshal(data) + if err != nil { + return Script{}, err + } + return Script{fmt.Sprintf("var %s = %s;\n%s", name, json, string(script))}, nil +} + +// jsIdentifierPattern matches strings that are valid Javascript identifiers. +// +// This pattern accepts only a subset of valid identifiers defined in +// https://tc39.github.io/ecma262/#sec-names-and-keywords. In particular, +// it does not match identifiers that contain non-ASCII letters, Unicode +// escape sequences, and the Unicode format-control characters +// \u200C (zero-width non-joiner) and \u200D (zero-width joiner). +var jsIdentifierPattern = regexp.MustCompile(`^[$_a-zA-Z][$_a-zA-Z0-9]+$`) + +// String returns the string form of the Script. +func (s Script) String() string { + return s.str +} diff --git a/vendor/github.com/google/safehtml/style.go b/vendor/github.com/google/safehtml/style.go new file mode 100644 index 000000000..c11ac9d96 --- /dev/null +++ b/vendor/github.com/google/safehtml/style.go @@ -0,0 +1,304 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package safehtml + +import ( + "bytes" + "fmt" + "regexp" + "strings" +) + +// A Style is an immutable string-like type which represents a sequence of CSS +// declarations (property_name1: property_value1; property_name2: property_value2; ...) +// and guarantees that its value will not cause untrusted script execution +// (cross-site scripting) when evaluated as CSS in a browser. +// +// Style's string representation can safely be: +// * Interpolated as the content of a quoted HTML style attribute. However, the +// Style string must be HTML-attribute-escaped before interpolation. +// * Interpolated as the content of a {}-wrapped block within a StyleSheet. +// '<' runes in the Style string must be CSS-escaped before interpolation. +// The Style string is also guaranteed not to be able to introduce new +// properties or elide existing ones. +// * Interpolated as the content of a {}-wrapped block within an HTML `. Escape this in case the Style user forgets to. + c == '"', c == '\\', // Must be CSS-escaped in . U+000A line feed is handled in the next case. + c <= '\u001F', c == '\u007F', // C0 control codes + c >= '\u0080' && c <= '\u009F', // C1 control codes + c == '\u2028', c == '\u2029': // Unicode newline characters + // See CSS escape sequence syntax at https://www.w3.org/TR/css-syntax-3/#escape-diagram. + fmt.Fprintf(&b, "\\%06X", c) + default: + b.WriteRune(c) + } + } + return b.String() +} diff --git a/vendor/github.com/google/safehtml/stylesheet.go b/vendor/github.com/google/safehtml/stylesheet.go new file mode 100644 index 000000000..17de8a517 --- /dev/null +++ b/vendor/github.com/google/safehtml/stylesheet.go @@ -0,0 +1,111 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package safehtml + +import ( + "container/list" + "fmt" + "regexp" + "strings" +) + +// A StyleSheet is an immutable string-like type which represents a CSS +// style sheet and guarantees that its value, as a string, will not cause +// untrusted script execution (cross-site scripting) when evaluated as CSS +// in a browser. +// +// StyleSheet's string representation can safely be interpolated as the +// content of a style element within HTML. The StyleSheet string should +// not be escaped before interpolation. +type StyleSheet struct { + // We declare a StyleSheet not as a string but as a struct wrapping a string + // to prevent construction of StyleSheet values through string conversion. + str string +} + +// StyleSheetFromConstant constructs a StyleSheet with the +// underlying stylesheet set to the given styleSheet, which must be an untyped string +// constant. +// +// No runtime validation or sanitization is performed on script; being under +// application control, it is simply assumed to comply with the StyleSheet +// contract. +func StyleSheetFromConstant(styleSheet stringConstant) StyleSheet { + return StyleSheet{string(styleSheet)} +} + +// CSSRule constructs a StyleSheet containng a CSS rule of the form: +// selector{style} +// It returns an error if selector contains disallowed characters or unbalanced +// brackets. +// +// The constructed StyleSheet value is guaranteed to fulfill its type contract, +// but is not guaranteed to be semantically valid CSS. +func CSSRule(selector string, style Style) (StyleSheet, error) { + if strings.ContainsRune(selector, '<') { + return StyleSheet{}, fmt.Errorf("selector %q contains '<'", selector) + } + selectorWithoutStrings := cssStringPattern.ReplaceAllString(selector, "") + if matches := invalidCSSSelectorRune.FindStringSubmatch(selectorWithoutStrings); matches != nil { + return StyleSheet{}, fmt.Errorf("selector %q contains %q, which is disallowed outside of CSS strings", selector, matches[0]) + } + if !hasBalancedBrackets(selectorWithoutStrings) { + return StyleSheet{}, fmt.Errorf("selector %q contains unbalanced () or [] brackets", selector) + } + return StyleSheet{fmt.Sprintf("%s{%s}", selector, style.String())}, nil +} + +var ( + // cssStringPattern matches a single- or double-quoted CSS string. + cssStringPattern = regexp.MustCompile( + `"([^"\r\n\f\\]|\\[\s\S])*"|` + // Double-quoted string literal + `'([^'\r\n\f\\]|\\[\s\S])*'`) // Single-quoted string literal + + // invalidCSSSelectorRune matches a rune that is not allowed in a CSS3 + // selector that does not contain string literals. + // See https://w3.org/TR/css3-selectors/#selectors. + invalidCSSSelectorRune = regexp.MustCompile(`[^-_a-zA-Z0-9#.:* ,>+~[\]()=^$|]`) +) + +// hasBalancedBrackets returns whether s has balanced () and [] brackets. +func hasBalancedBrackets(s string) bool { + stack := list.New() + for i := 0; i < len(s); i++ { + c := s[i] + if expected, ok := matchingBrackets[c]; ok { + e := stack.Back() + if e == nil { + return false + } + // Skip success check for this type assertion since it is trivial to + // see that only bytes are pushed onto this stack. + if v := e.Value.(byte); v != expected { + return false + } + stack.Remove(e) + continue + } + for _, openBracket := range matchingBrackets { + if c == openBracket { + stack.PushBack(c) + break + } + } + } + return stack.Len() == 0 +} + +// matchingBrackets[x] is the opening bracket that matches closing bracket x. +var matchingBrackets = map[byte]byte{ + ')': '(', + ']': '[', +} + +// String returns the string form of the StyleSheet. +func (s StyleSheet) String() string { + return s.str +} diff --git a/vendor/github.com/google/safehtml/template/context.go b/vendor/github.com/google/safehtml/template/context.go new file mode 100644 index 000000000..dd7886dc6 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/context.go @@ -0,0 +1,183 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( + "strings" +) + +// context describes the state an HTML parser must be in when it reaches the +// portion of HTML produced by evaluating a particular template node. +// +// The zero value of type Context is the start context for a template that +// produces an HTML fragment as defined at +// http://www.w3.org/TR/html5/syntax.html#the-end +// where the context element is null. +type context struct { + state state + delim delim + element element + attr attr + err *Error + // scriptType is the lowercase value of the "type" attribute inside the current "script" + // element (see https://dev.w3.org/html5/spec-preview/the-script-element.html#attr-script-type). + // This field will be empty if the parser is currently not in a script element, + // the type attribute has not already been parsed in the current element, or if the + // value of the type attribute cannot be determined at parse time. + scriptType string + // linkRel is the value of the "rel" attribute inside the current "link" + // element (see https://html.spec.whatwg.org/multipage/semantics.html#attr-link-rel). + // This value has been normalized to lowercase with exactly one space between tokens + // and exactly one space at start and end, so that a lookup of any token foo can + // be performed by searching for the substring " foo ". + // This field will be empty if the parser is currently not in a link element, + // the rel attribute has not already been parsed in the current element, or if the + // value of the rel attribute cannot be determined at parse time. + linkRel string +} + +// eq returns whether Context c is equal to Context d. +func (c context) eq(d context) bool { + return c.state == d.state && + c.delim == d.delim && + c.element.eq(d.element) && + c.attr.eq(d.attr) && + c.err == d.err && + c.scriptType == d.scriptType && + c.linkRel == d.linkRel +} + +// state describes a high-level HTML parser state. +// +// It bounds the top of the element stack, and by extension the HTML insertion +// mode, but also contains state that does not correspond to anything in the +// HTML5 parsing algorithm because a single token production in the HTML +// grammar may contain embedded actions in a template. For instance, the quoted +// HTML attribute produced by +//
+// is a single token in HTML's grammar but in a template spans several nodes. +type state uint8 + +//go:generate stringer -type state + +const ( + // stateText is parsed character data. An HTML parser is in + // this state when its parse position is outside an HTML tag, + // directive, comment, and special element body. + stateText state = iota + // stateSpecialElementBody occurs inside a specal HTML element body. + stateSpecialElementBody + // stateTag occurs before an HTML attribute or the end of a tag. + stateTag + // stateAttrName occurs inside an attribute name. + // It occurs between the ^'s in ` ^name^ = value`. + stateAttrName + // stateAfterName occurs after an attr name has ended but before any + // equals sign. It occurs between the ^'s in ` name^ ^= value`. + stateAfterName + // stateBeforeValue occurs after the equals sign but before the value. + // It occurs between the ^'s in ` name =^ ^value`. + stateBeforeValue + // stateHTMLCmt occurs inside an . + stateHTMLCmt + // stateAttr occurs inside an HTML attribute whose content is text. + stateAttr + // stateError is an infectious error state outside any valid + // HTML/CSS/JS construct. + stateError +) + +// isComment reports whether a state contains content meant for template +// authors & maintainers, not for end-users or machines. +func isComment(s state) bool { + switch s { + case stateHTMLCmt: + return true + } + return false +} + +// isInTag reports whether s occurs solely inside an HTML tag. +func isInTag(s state) bool { + switch s { + case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr: + return true + } + return false +} + +// delim is the delimiter that will end the current HTML attribute. +type delim uint8 + +//go:generate stringer -type delim + +const ( + // delimNone occurs outside any attribute. + delimNone delim = iota + // delimDoubleQuote occurs when a double quote (") closes the attribute. + delimDoubleQuote + // delimSingleQuote occurs when a single quote (') closes the attribute. + delimSingleQuote + // delimSpaceOrTagEnd occurs when a space or right angle bracket (>) + // closes the attribute. + delimSpaceOrTagEnd +) + +type element struct { + // name is the lowercase name of the element. If context joining has occurred, name + // will be arbitrarily assigned the element name from one of the joined contexts. + name string + // names contains all possible names the element could assume because of context joining. + // For example, after joining the contexts in the "if" and "else" branches of + // {{if .C}}`, + // names will contain "img" and "audio". + // names can also contain empty strings, which represent joined contexts with no element name. + // names will be empty if no context joining occurred. + names []string +} + +// eq reports whether a and b have the same name. All other fields are ignored. +func (e element) eq(d element) bool { + return e.name == d.name +} + +// String returns the string representation of the element. +func (e element) String() string { + return "element" + strings.Title(e.name) +} + +// attr represents the attribute that the parser is in, that is, +// starting from stateAttrName until stateTag/stateText (exclusive). +type attr struct { + // name is the lowercase name of the attribute. If context joining has occurred, name + // will be arbitrarily assigned the attribute name from one of the joined contexts. + name string + // value is the value of the attribute. If context joining has occurred, value + // will be arbitrarily assigned the attribute value from one of the joined contexts. + // If there are multiple actions in the attribute value, value will contain the + // concatenation of all values seen so far. For example, in + // + // value is "foo" at "{{.X}}" and "foobar" at "{{.Y}}". + value string + // ambiguousValue indicates whether value contains an ambiguous value due to context-joining. + ambiguousValue bool + // names contains all possible names the attribute could assume because of context joining. + // For example, after joining the contexts in the "if" and "else" branches of + // + // names will contain "title" and "name". + // names can also contain empty strings, which represent joined contexts with no attribute name. + // names will be empty if no context joining occurred. + names []string +} + +// eq reports whether a and b have the same name. All other fields are ignored. +func (a attr) eq(b attr) bool { + return a.name == b.name +} + +// String returns the string representation of the attr. +func (a attr) String() string { + return "attr" + strings.Title(a.name) +} diff --git a/vendor/github.com/google/safehtml/template/delim_string.go b/vendor/github.com/google/safehtml/template/delim_string.go new file mode 100644 index 000000000..0ef2c2510 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/delim_string.go @@ -0,0 +1,16 @@ +// Code generated by "stringer -type Delim"; DO NOT EDIT + +package template + +import "fmt" + +const _Delim_name = "DelimNoneDelimDoubleQuoteDelimSingleQuoteDelimSpaceOrTagEnd" + +var _Delim_index = [...]uint8{0, 9, 25, 41, 59} + +func (i delim) String() string { + if i >= delim(len(_Delim_index)-1) { + return fmt.Sprintf("delim(%d)", i) + } + return _Delim_name[_Delim_index[i]:_Delim_index[i+1]] +} diff --git a/vendor/github.com/google/safehtml/template/doc.go b/vendor/github.com/google/safehtml/template/doc.go new file mode 100644 index 000000000..fab552b25 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/doc.go @@ -0,0 +1,291 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +/* +Package template (safehtml/template) implements data-driven templates for +generating HTML output safe against code injection. It provides an interface +similar to that of package html/template, but produces HTML output that is more +secure. Therefore, it should be used instead of html/template to render HTML. + +The documentation here focuses on the security features of the package. For +information about how to program the templates themselves, see the +documentation for text/template. + + +Basic usage + +This package provides an API almost identical to that of text/template and +html/template to parse and execute HTML templates safely. + + tmpl := template.Must(template.New("name").Parse(`
Hello {{.}}
`)) + err := tmpl.Execute(out, data) + +If successful, out will contain code-injection-safe HTML. Otherwise, err's +string representation will describe the error that occurred. + +Elements of data might be modified at run time before being included in out, or +rejected completely if such a conversion is not possible. Pass values of +appropriate types from package safehtml to ensure that they are included in the +template's HTML output in their expected form. More details are provided below +in "Contextual autosanitization" and "Sanitization contexts". + + +Security improvements + +safehtml/template produces HTML more resistant to code injection than +html/template because it: + * Allows values of types only from package safehtml to bypass run-time + sanitization. These types represent values that are known---by construction + or by run-time sanitization---to be safe for use in various HTML contexts + without being processed by certain sanitization functions. + * Does not attempt to escape CSS or JavaScript. Instead of attempting to + parse and escape these complex languages, safehtml/template allows values + of only the appropriate types from package safehtml (e.g. safehtml.Style, + safehtml.Script) to be used in these contexts, since they are already + guaranteed to be safe. + * Emits an error if user data is interpolated in unsafe contexts, such as + within disallowed elements or unquoted attribute values. + * Only loads templates from trusted sources. This ensures that the contents + of the template are always under programmer control. More details are + provided below in "Trusted template sources". + * Differentiates between URLs that load code and those that do not. URLs in + the former category must be supplied to the template as values of type + safehtml.TrustedResourceURL, whose type contract promises that the URL + identifies a trustworthy resource. URLs in the latter category can be + sanitized at run time. + + +Threat model + +safehtml/template assumes that programmers are trustworthy. Therefore, data +fully under programmer control, such as string literals, are considered safe. +The types from package safehtml are designed around this same assumption, so +their type contracts are trusted by this package. + +safehtml/template considers all other data values untrustworthy and +conservatively assumes that such values could result in a code-injection +vulnerability if included verbatim in HTML. + + +Trusted template sources + +safehtml/template loads templates only from trusted sources. Therefore, template +text, file paths, and file patterns passed to Parse* functions and methods must +be entirely under programmer control. + +This constraint is enforced by using unexported string types for the parameters +of Parse* functions and methods, such as trustedFilePattern for ParseGlob. +The only values that may be assigned to these types (and thus provided as +arguments) are untyped string constants such as string literals, which are +always under programmer control. + + +Contextual autosanitization + +Code injection vulnerabilities, such as cross-site scripting (XSS), occur when +untrusted data values are embedded in a HTML document. For example, + + import "text/template" + ... + var t = template.Must(template.New("foo").Parse(`
{{ .Y }}`)) + func renderHTML(x, y string) string { + var out bytes.Buffer + err := t.Execute(&out, struct{ X, Y string }{x, y}) + // Error checking elided + return out.String() + } + +If x and y originate from user-provided data, an attacker who controls these +strings could arrange for them to contain the following values: + + x = "javascript:evil()" + y = "" + +which will cause renderHTML to return the following unsafe HTML: + + + +To prevent such vulnerabilities, untrusted data must be sanitized before being +included in HTML. A sanitization function takes untrusted data and returns a +string that will not create a code-injection vulnerability in the destination +context. The function might return the input unchanged if it deems it safe, +escape special runes in the input's string representation to prevent them from +triggering undesired state changes in the HTML parser, or entirely replace the +input by an innocuous string (also known as "filtering"). If none of these +conversions are possible, the sanitization function aborts template processing. + +safehtml/template contextually autosanitizes untrusted data by adding +appropriate sanitization functions to template actions to ensure that the +action output is safe to include in the HTML context in which the action +appears. For example, in + + import "safehtml/template" + ... + var t = template.Must(template.New("foo").Parse(`{{ .Y }}`)) + func renderHTML(x, y string) string { + var out bytes.Buffer + err := t.Execute(&out, struct{ X, Y string }{x, y}) + // Error checking elided + return out.String() + } + +the contextual autosanitizer rewrites the template to + + {{ .Y | _sanitizeHTML }} + +so that the template produces the following safe, sanitized HTML output (split +across multiple lines for clarity): + + + </a><script>alert('pwned')</script><a> + + +Similar template systems such as html/template, Soy, and Angular, refer to this +functionality as "contextual autoescaping". safehtml/template uses the term +"autosanitization" instead of "autoescaping" since "sanitization" broadly +captures the operations of escaping and filtering. + + +Sanitization contexts + +The types of sanitization functions inserted into an action depend on the +action's sanitization context, which is determined by its surrounding text. +The following table describes these sanitization contexts. + + +--------------------+----------------------------------+------------------------------+-----------------------+ + | Context | Examples | Safe types | Run-time sanitizer | + |--------------------+----------------------------------+------------------------------+-----------------------+ + | HTMLContent | Hello {{.}} | safehtml.HTML | safehtml.HTMLEscaped | + | | {{.}} | | | + +--------------------------------------------------------------------------------------------------------------+ + | HTMLValOnly | | safehtml.HTML* | N/A | + +--------------------------------------------------------------------------------------------------------------+ + | URL | Cite | safehtml.URL | safehtml.URLSanitized | + +--------------------------------------------------------------------------------------------------------------+ + | URL or | Link | safehtml.URL | safehtml.URLSanitized | + | TrustedResourceURL | | safehtml.TrustedResourceURL | | + +--------------------------------------------------------------------------------------------------------------+ + | TrustedResourceURL | | safehtml.TrustedResourceURL† | N/A | + +--------------------------------------------------------------------------------------------------------------+ + | Script | | safehtml.Script* | N/A | + +--------------------------------------------------------------------------------------------------------------+ + | Style |

Paragraph

| safehtml.Style* | N/A | + +--------------------------------------------------------------------------------------------------------------+ + | Stylesheet | | safehtml.StyleSheet* | N/A | + +--------------------------------------------------------------------------------------------------------------+ + | Identifier |

Hello

| safehtml.Identifier* | N/A | + +--------------------------------------------------------------------------------------------------------------+ + | Enumerated value | Link | Allowed string values | N/A | + | | | ("_self" or "_blank" for | | + | | | the given example) | | + +--------------------------------------------------------------------------------------------------------------+ + | None |

Hello

| N/A (any type allowed) | N/A (any type | + | | | | allowed) | + +--------------------+----------------------------------+------------------------------+-----------------------+ + *: Values only of this type are allowed in this context. Other values will trigger a run-time error. + †: If the action is a prefix of the attribute value, values only of this type are allowed. + Otherwise, values of any type are allowed. See "Substitutions in URLs" for more details. + +For each context, the function named in "Run-time sanitizer" is called to +sanitize the output of the action. However, if the action outputs a value of +any of the types listed in "Safe types", the run-time sanitizer is not called. +For example, in + + {{ .X }} + +if X is a string value, a HTML sanitizer that calls safehtml.HTMLEscaped will be +added to the action to sanitize X. + + // _sanitizeHTML calls safehtml.HTMLEscaped. + {{ .X | _sanitizeHTML }} + +However, if X is a safehtml.HTML value, _sanitizeHTML will not change its +value, since safehtml.HTML values are already safe to use in HTML contexts. +Therefore, the string contents of X will bypass context-specific +sanitization (in this case, HTML escaping) and appear unchanged in the +template's HTML output. Note that in attribute value contexts, HTML escaping +will always take place, whether or not context-specific sanitization is +performed. More details can be found at the end of this section. + +In certain contexts, the autosanitizer allows values only of that context's +"Safe types". Any other values will trigger an error and abort template +processing. For example, the template + + + +triggers a run-time error if X is not a safehtml.StyleSheet. Otherwise, the +string form of X will appear unchanged in the output. The only exception to +this behavior is in TrustedResourceURL sanitization contexts, where actions may +output data of any type if the action occurs after a safe attribute value prefix. +More details can be found below in "Substitutions in URLs". + + +Unconditional sanitization + +In attribute value contexts, action outputs are always HTML-escaped after +context-specific sanitization to ensure that the attribute values cannot change +change the structure of the surrounding HTML tag. In URL or TrustedResourceURL +sanitization contexts, action outputs are additionally URL-normalized to reduce +the likelihood of downstream URL-parsing bugs. For example, the template + + Link +

Text

+ +is rewritten by the autosanitizer into + + // _sanitizeHTML calls safehtml.HTMLEscaped. + Link +

Text

+ +Even if X is a safehtml.URL or safehtml.TrustedResourceURL value, which +remains unchanged after _sanitizeTrustedResourceURLOrURL, X will still be +URL-normalized and HTML-escaped. Likewise, Y will still be HTML-escaped even if +its string form is left unchanged by _sanitizeIdentifier. + + +Substitutions in URLs + +Values of any type may be substituted into attribute values in URL and +TrustedResourceURL sanitization contexts only if the action is preceded by a +safe URL prefix. For example, in + + foo + +Since "http://www.foo.com/" is a safe URL prefix, PathComponent can safely be +interpolated into this URL sanitization context after URL normalization. +Similarly, in + + + +Since "https://www.bar.com/" is a safe TrustedResourceURL prefix, PathComponent +can safely be interpolated into this TrustedResourceURL sanitization context +after URL escaping. Substitutions after a safe TrustedResourceURL prefix are +escaped instead of normalized to prevent the injection of any new URL +components, including additional path components. URL escaping also takes place +in URL sanitization contexts where the substitutions occur in the query or +fragment part of the URL, such as in: + + Link + +A URL prefix is considered safe in a URL sanitization context if it does +not end in an incomplete HTML character reference (e.g. https) or incomplete +percent-encoding character triplet (e.g. /fo%6), does not contain whitespace or control +characters, and one of the following is true: + * The prefix has a safe scheme (i.e. http, https, mailto, or ftp). + * The prefix has the data scheme with base64 encoding and an allowed audio, image, + or video MIME type (e.g. data:img/jpeg;base64, data:video/mp4;base64). + * The prefix has no scheme at all, and cannot be interpreted as a scheme prefix (e.g. /path). + +A URL prefix is considered safe in a TrustedResourceURL sanitization context if it does +not end in an incomplete HTML character reference (e.g. https) or incomplete +percent-encoding character triplet (e.g. /fo%6), does not contain white space or control +characters, and one of the following is true: + * The prefix has the https scheme and contains a domain name (e.g. https://www.foo.com). + * The prefix is scheme-relative and contains a domain name (e.g. //www.foo.com/). + * The prefix is path-absolute and contains a path (e.g. /path). + * The prefix is "about:blank". +*/ +package template diff --git a/vendor/github.com/google/safehtml/template/error.go b/vendor/github.com/google/safehtml/template/error.go new file mode 100644 index 000000000..fe7821433 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/error.go @@ -0,0 +1,280 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( + "fmt" + "text/template/parse" +) + +// Error describes a problem encountered during template Escaping. +type Error struct { + // ErrorCode describes the kind of error. + ErrorCode ErrorCode + // Node is the node that caused the problem, if known. + // If not nil, it overrides Name and Line. + Node parse.Node + // Name is the name of the template in which the error was encountered. + Name string + // Line is the line number of the error in the template source or 0. + Line int + // Description is a human-readable description of the problem. + Description string +} + +// ErrorCode is a code for a kind of error. +type ErrorCode int + +// We define codes for each error that manifests while escaping templates, but +// escaped templates may also fail at runtime. +// +// Output: "ZgotmplZ" +// Example: +// +// where {{.X}} evaluates to `javascript:...` +// Discussion: +// "ZgotmplZ" is a special value that indicates that unsafe content reached a +// CSS or URL context at runtime. The output of the example will be +// +// If the data comes from a trusted source, use content types to exempt it +// from filtering: URL(`javascript:...`). +const ( + // OK indicates the lack of an error. + OK ErrorCode = iota + + // ErrAmbigContext: "... appears in an ambiguous context within a URL" + // Example: + // + // Discussion: + // {{.X}} is in an ambiguous URL context since, depending on {{.C}}, + // it may be either a URL suffix or a query parameter. + // Moving {{.X}} into the condition removes the ambiguity: + // + ErrAmbigContext + + // ErrBadHTML: "expected space, attr name, or end of tag, but got ...", + // "... in unquoted attr", "... in attribute name" + // Example: + // + // + //
+ //
{{end}} + // {{define "attrs"}}href="{{.URL}}"{{end}} + // Discussion: + // Package html/template looks through template calls to compute the + // context. + // Here the {{.URL}} in "attrs" must be treated as a URL when called + // from "main", but you will get this error if "attrs" is not defined + // when "main" is parsed. + ErrNoSuchTemplate + + // ErrOutputContext: "cannot compute output context for template ..." + // Examples: + // {{define "t"}}{{if .T}}{{template "t" .T}}{{end}}{{.H}}",{{end}} + // Discussion: + // A recursive template does not end in the same context in which it + // starts, and a reliable output context cannot be computed. + // Look for typos in the named template. + // If the template should not be called in the named start context, + // look for calls to that template in unexpected contexts. + // Maybe refactor recursive templates to not be recursive. + ErrOutputContext + + // ErrPartialCharset: "unfinished JS regexp charset in ..." + // Example: + // + // Discussion: + // Package html/template does not support interpolation into regular + // expression literal character sets. + ErrPartialCharset + + // ErrPartialEscape: "unfinished escape sequence in ..." + // Example: + // + // Discussion: + // Package html/template does not support actions following a + // backslash. + // This is usually an error and there are better solutions; for + // example + // + // should work, and if {{.X}} is a partial escape sequence such as + // "xA0", mark the whole sequence as safe content: JSStr(`\xA0`) + ErrPartialEscape + + // ErrRangeLoopReentry: "on range loop re-entry: ..." + // Example: + // + // Discussion: + // If an iteration through a range would cause it to end in a + // different context than an earlier pass, there is no single context. + // In the example, there is missing a quote, so it is not clear + // whether {{.}} is meant to be inside a JS string or in a JS value + // context. The second iteration would produce something like + // + // + ErrRangeLoopReentry + + // ErrSlashAmbig: '/' could start a division or regexp. + // Example: + // + // Discussion: + // The example above could produce `var x = 1/-2/i.test(s)...` + // in which the first '/' is a mathematical division operator or it + // could produce `/-2/i.test(s)` in which the first '/' starts a + // regexp literal. + // Look for missing semicolons inside branches, and maybe add + // parentheses to make it clear which interpretation you intend. + ErrSlashAmbig + + // ErrPredefinedEscaper: "predefined escaper ... disallowed in template" + // Example: + //
Hello
+ // Discussion: + // Package html/template already contextually escapes all pipelines to + // produce HTML output safe against code injection. Manually escaping + // pipeline output using the predefined escapers "html" or "urlquery" is + // unnecessary, and may affect the correctness or safety of the escaped + // pipeline output in Go 1.8 and earlier. + // + // In most cases, such as the given example, this error can be resolved by + // simply removing the predefined escaper from the pipeline and letting the + // contextual autoescaper handle the escaping of the pipeline. In other + // instances, where the predefined escaper occurs in the middle of a + // pipeline where subsequent commands expect escaped input, e.g. + // {{.X | html | makeALink}} + // where makeALink does + // return `link` + // consider refactoring the surrounding template to make use of the + // contextual autoescaper, i.e. + // link + // + // To ease migration to Go 1.9 and beyond, "html" and "urlquery" will + // continue to be allowed as the last command in a pipeline. However, if the + // pipeline occurs in an unquoted attribute value context, "html" is + // disallowed. Avoid using "html" and "urlquery" entirely in new templates. + ErrPredefinedEscaper + + // ErrEscapeAction: "cannot escape action ..." + // Discussion: + // Error returned while escaping an action using EscaperForContext. + // Refer to error message for more details. + // TODO: remove this error type and replace it with more informative sanitization errors. + ErrEscapeAction + + // ErrCSPCompatibility: `"javascript:" URI disallowed for CSP compatibility`, + // "inline event handler ... is disallowed for CSP compatibility + // Examples: + // A thing. + // foo + // Discussion: + // Inline event handlers (onclick="...", onerror="...") and + // links can be used to run scripts, + // so an attacker who finds an XSS bug could inject such HTML + // and execute malicious JavaScript. These patterns must be + // refactored into safer alternatives for compatibility with + // Content Security Policy (CSP). + // + // For example, the following HTML that contains an inline event handler: + // + // A thing. + // can be refactored into: + // A thing. + // + // + // Likewise, the following HTML containng a javascript: URI: + // foo + // can be refactored into: + // foo + // + ErrCSPCompatibility + // All JS templates inside script literals have to be balanced; otherwise a concatenation such as + // can contain XSS if data contains user-controlled escaped strings (e.g. as JSON). + ErrUnbalancedJsTemplate +) + +func (e *Error) Error() string { + switch { + case e.Node != nil: + loc, _ := (*parse.Tree)(nil).ErrorContext(e.Node) + return fmt.Sprintf("html/template:%s: %s", loc, e.Description) + case e.Line != 0: + return fmt.Sprintf("html/template:%s:%d: %s", e.Name, e.Line, e.Description) + case e.Name != "": + return fmt.Sprintf("html/template:%s: %s", e.Name, e.Description) + } + return "html/template: " + e.Description +} + +// errorf creates an error given a format string f and args. +// The template Name still needs to be supplied. +func errorf(k ErrorCode, node parse.Node, line int, f string, args ...interface{}) *Error { + return &Error{k, node, "", line, fmt.Sprintf(f, args...)} +} diff --git a/vendor/github.com/google/safehtml/template/escape.go b/vendor/github.com/google/safehtml/template/escape.go new file mode 100644 index 000000000..8a9d53dd5 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/escape.go @@ -0,0 +1,884 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( + "bytes" + "fmt" + "html" + "reflect" + "strings" + "text/template" + "text/template/parse" +) + +// TODO: remove all unused escaping logic inherited from html/template. +// TODO: replace "escape" with "sanitize" in file names and contents to maintain consistency with safehtml/template docs. + +// escapeTemplate rewrites the named template, which must be +// associated with t, to guarantee that the output of any of the named +// templates is properly escaped. If no error is returned, then the named templates have +// been modified. Otherwise the named templates have been rendered +// unusable. +func escapeTemplate(tmpl *Template, node parse.Node, name string) error { + c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) + var err error + if c.err != nil { + err, c.err.Name = c.err, name + } else if c.state != stateText { + err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %+v", c)} + } + if err != nil { + // Prevent execution of unsafe templates. + if t := tmpl.set[name]; t != nil { + t.escapeErr = err + t.text.Tree = nil + t.Tree = nil + } + return err + } + tmpl.esc.commit() + if t := tmpl.set[name]; t != nil { + t.escapeErr = errEscapeOK + t.Tree = t.text.Tree + } + return nil +} + +// evalArgs formats the list of arguments into a string. It is equivalent to +// fmt.Sprint(args...), except that it deferences all pointers. +func evalArgs(args ...interface{}) string { + // Optimization for simple common case of a single string argument. + if len(args) == 1 { + if s, ok := args[0].(string); ok { + return s + } + } + for i, arg := range args { + args[i] = indirectToStringerOrError(arg) + } + return fmt.Sprint(args...) +} + +// escaper collects type inferences about templates and changes needed to make +// templates injection safe. +type escaper struct { + // ns is the nameSpace that this escaper is associated with. + ns *nameSpace + // output[templateName] is the output context for a templateName that + // has been mangled to include its input context. + output map[string]context + // derived[c.mangle(name)] maps to a template derived from the template + // named name templateName for the start context c. + derived map[string]*template.Template + // called[templateName] is a set of called mangled template names. + called map[string]bool + // xxxNodeEdits are the accumulated edits to apply during commit. + // Such edits are not applied immediately in case a template set + // executes a given template in different escaping contexts. + actionNodeEdits map[*parse.ActionNode][]string + templateNodeEdits map[*parse.TemplateNode]string + textNodeEdits map[*parse.TextNode][]byte +} + +// makeEscaper creates a blank escaper for the given set. +func makeEscaper(n *nameSpace) escaper { + return escaper{ + n, + map[string]context{}, + map[string]*template.Template{}, + map[string]bool{}, + map[*parse.ActionNode][]string{}, + map[*parse.TemplateNode]string{}, + map[*parse.TextNode][]byte{}, + } +} + +// escape escapes a template node. +func (e *escaper) escape(c context, n parse.Node) context { + switch n := n.(type) { + case *parse.ActionNode: + return e.escapeAction(c, n) + case *parse.IfNode: + return e.escapeBranch(c, &n.BranchNode, "if") + case *parse.ListNode: + return e.escapeList(c, n) + case *parse.RangeNode: + return e.escapeBranch(c, &n.BranchNode, "range") + case *parse.TemplateNode: + return e.escapeTemplate(c, n) + case *parse.TextNode: + return e.escapeText(c, n) + case *parse.WithNode: + return e.escapeBranch(c, &n.BranchNode, "with") + } + panic("escaping " + n.String() + " is unimplemented") +} + +// escapeAction escapes an action template node. +func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { + if len(n.Pipe.Decl) != 0 { + // A local variable assignment, not an interpolation. + return c + } + c = nudge(c) + // Check for disallowed use of predefined escapers in the pipeline. + for pos, idNode := range n.Pipe.Cmds { + node, ok := idNode.Args[0].(*parse.IdentifierNode) + if !ok { + // A predefined escaper "esc" will never be found as an identifier in a + // Chain or Field node, since: + // - "esc.x ..." is invalid, since predefined escapers return strings, and + // strings do not have methods, keys or fields. + // - "... .esc" is invalid, since predefined escapers are global functions, + // not methods or fields of any types. + // Therefore, it is safe to ignore these two node types. + continue + } + ident := node.Ident + if _, ok := predefinedEscapers[ident]; ok { + if pos < len(n.Pipe.Cmds)-1 || + c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { + return context{ + state: stateError, + err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), + } + } + } + } + switch c.state { + case stateError: + return c + case stateAttrName, stateTag: + c.state = stateAttrName + } + // TODO: integrate sanitizerForContext into escapeAction. + s, err := sanitizerForContext(c) + if err != nil { + return context{ + state: stateError, + // TODO: return sanitization-specific errors. + err: errorf(ErrEscapeAction, n, n.Line, "cannot escape action %v: %s", n, err), + } + } + e.editActionNode(n, s) + return c +} + +// ensurePipelineContains ensures that the pipeline ends with the commands with +// the identifiers in s in order. If the pipeline ends with a predefined escaper +// (i.e. "html" or "urlquery"), merge it with the identifiers in s.c +func ensurePipelineContains(p *parse.PipeNode, s []string) { + if len(s) == 0 { + // Do not rewrite pipeline if we have no escapers to insert. + return + } + // Precondition: p.Cmds contains at most one predefined escaper and the + // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is + // always true because of the checks in escapeAction. + pipelineLen := len(p.Cmds) + if pipelineLen > 0 { + lastCmd := p.Cmds[pipelineLen-1] + if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { + if esc := idNode.Ident; predefinedEscapers[esc] { + // Pipeline ends with a predefined escaper. + if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { + // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, + // where esc is the predefined escaper, and arg1...argN are its arguments. + // Convert this into the equivalent form + // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily + // merged with the escapers in s. + lastCmd.Args[0] = parse.NewIdentifier(evalArgsFuncName).SetTree(nil).SetPos(lastCmd.Args[0].Position()) + p.Cmds = append(p.Cmds, newIdentCmd(esc, p.Position())) + pipelineLen++ + } + // If any of the commands in s that we are about to insert is equivalent + // to the predefined escaper, use the predefined escaper instead. + dup := false + for i, escaper := range s { + if escFnsEq(esc, escaper) { + s[i] = idNode.Ident + dup = true + } + } + if dup { + // The predefined escaper will already be inserted along with the + // escapers in s, so do not copy it to the rewritten pipeline. + pipelineLen-- + } + } + } + } + // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. + newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) + copy(newCmds, p.Cmds) + for _, name := range s { + newCmds = append(newCmds, newIdentCmd(name, p.Position())) + } + p.Cmds = newCmds +} + +// predefinedEscapers contains template predefined escapers that are equivalent +// to some contextual escapers. Keep in sync with equivEscapers. +var predefinedEscapers = map[string]bool{ + "html": true, + "urlquery": true, +} + +// equivEscapers matches contextual escapers to equivalent predefined +// template escapers. +var equivEscapers = map[string]string{ + // The following pairs of HTML escapers provide equivalent security + // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. + sanitizeHTMLFuncName: "html", + sanitizeRCDATAFuncName: "html", + // These two URL escapers produce URLs safe for embedding in a URL query by + // percent-encoding all the reserved characters specified in RFC 3986 Section + // 2.2 + queryEscapeURLFuncName: "urlquery", + // The normalizer function is not actually equivalent to urlquery; urlquery is + // stricter as it escapes reserved characters (e.g. '#'), while the normalizer + // function does not. It is therefore only safe to replace the normalizer with + // with urlquery (this happens in ensurePipelineContains), but not the other + // way around. We keep this entry around to preserve the behavior of templates + // written before Go 1.9, which might depend on this substitution taking place. + normalizeURLFuncName: "urlquery", +} + +// escFnsEq reports whether the two escaping functions are equivalent. +func escFnsEq(a, b string) bool { + return normalizeEscFn(a) == normalizeEscFn(b) +} + +// normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of +// escaper functions a and b that are equivalent. +func normalizeEscFn(e string) string { + if norm := equivEscapers[e]; norm != "" { + return norm + } + return e +} + +// newIdentCmd produces a command containing a single identifier node. +func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { + return &parse.CommandNode{ + NodeType: parse.NodeCommand, + Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. + Pos: pos, + } +} + +// nudge returns the context that would result from following empty string +// transitions from the input context. +// For example, parsing: +// `90% of the time. + e.output[t.Name()] = c + return e.escapeListConditionally(c, t.Tree.Root, filter) +} + +// delimEnds maps each delim to a string of characters that terminate it. +var delimEnds = [...]string{ + delimDoubleQuote: `"`, + delimSingleQuote: "'", + // Determined empirically by running the below in various browsers. + // var div = document.createElement("DIV"); + // for (var i = 0; i < 0x10000; ++i) { + // div.innerHTML = ""; + // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) + // document.write("

U+" + i.toString(16)); + // } + delimSpaceOrTagEnd: " \t\n\f\r>", +} + +var doctypeBytes = []byte("= i; j-- { + if s[j] == '<' { + end = j + break + } + } + } + for j := i; j < end; j++ { + if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { + b.Write(s[written:j]) + b.WriteString("<") + written = j + 1 + } + } + } else if isComment(c.state) && c.delim == delimNone { + written = i1 + } + if c.state == stateSpecialElementBody && c.element.name == "script" { + if err := isJsTemplateBalanced(bytes.NewBuffer(s)); err != nil { + return context{ + state: stateError, + err: errorf(ErrUnbalancedJsTemplate, n, 0, "Mixing template systems can cause security vulnerabilites. Therefore, there can be no safehtml/template insertion points or actions inside an ES6 template, and all ES6 templates must be closed: %v", err.Error()), + } + } + } + + if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { + // Preserve the portion between written and the comment start. + cs := i1 - 2 + if c1.state == stateHTMLCmt { + // "") + +// tText is the context transition function for the text state. +func tText(c context, s []byte) (context, int) { + k := 0 + for { + i := k + bytes.IndexByte(s[k:], '<') + if i < k || i+1 == len(s) { + return c, len(s) + } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { + return context{state: stateHTMLCmt}, i + 4 + } + i++ + end := false + if s[i] == '/' { + if i+1 == len(s) { + return c, len(s) + } + end, i = true, i+1 + } + j, e := eatTagName(s, i) + if j != i { + // We've found an HTML tag. + ret := context{state: stateTag} + // Element name not needed if we are at the end of the element. + if !end { + ret.element = e + } + return ret, j + } + k = j + } +} + +// specialElements contains the names of elements whose bodies are treated +// differently by the parser and escaper from stateText. +var specialElements = map[string]bool{ + "script": true, + "style": true, + "textarea": true, + "title": true, +} + +// voidElements contains the names of all void elements. +// https://www.w3.org/TR/html5/syntax.html#void-elements +var voidElements = map[string]bool{ + "area": true, + "base": true, + "br": true, + "col": true, + "embed": true, + "hr": true, + "img": true, + "input": true, + "keygen": true, + "link": true, + "meta": true, + "param": true, + "source": true, + "track": true, + "wbr": true, +} + +// tTag is the context transition function for the tag state. +func tTag(c context, s []byte) (context, int) { + // Find the attribute name. + i := eatWhiteSpace(s, 0) + if i == len(s) { + return c, len(s) + } + if s[i] == '>' { + ret := context{ + state: stateText, + element: c.element, + scriptType: c.scriptType, + linkRel: c.linkRel, + } + if specialElements[c.element.name] { + ret.state = stateSpecialElementBody + } + if c.element.name != "" && voidElements[c.element.name] { + // Special case: end of start tag of a void element. + // Discard unnecessary state, since this element have no content. + ret.element = element{} + ret.scriptType = "" + ret.linkRel = "" + } + return ret, i + 1 + } + j, err := eatAttrName(s, i) + if err != nil { + return context{state: stateError, err: err}, len(s) + } + state := stateTag + if i == j { + return context{ + state: stateError, + err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), + }, len(s) + } + + if j == len(s) { + state = stateAttrName + } else { + state = stateAfterName + } + return context{ + state: state, + element: c.element, + attr: attr{name: strings.ToLower(string(s[i:j]))}, + linkRel: c.linkRel, + }, j +} + +// tAttrName is the context transition function for stateAttrName. +func tAttrName(c context, s []byte) (context, int) { + i, err := eatAttrName(s, 0) + if err != nil { + return context{state: stateError, err: err}, len(s) + } else if i != len(s) { + c.state = stateAfterName + } + return c, i +} + +// tAfterName is the context transition function for stateAfterName. +func tAfterName(c context, s []byte) (context, int) { + // Look for the start of the value. + i := eatWhiteSpace(s, 0) + if i == len(s) { + return c, len(s) + } else if s[i] != '=' { + // Occurs due to tag ending '>', and valueless attribute. + c.state = stateTag + return c, i + } + c.state = stateBeforeValue + // Consume the "=". + return c, i + 1 +} + +// tBeforeValue is the context transition function for stateBeforeValue. +func tBeforeValue(c context, s []byte) (context, int) { + i := eatWhiteSpace(s, 0) + if i == len(s) { + return c, len(s) + } + // Find the attribute delimiter. + // TODO: consider disallowing single-quoted or unquoted attribute values completely, even in hardcoded template text. + delim := delimSpaceOrTagEnd + switch s[i] { + case '\'': + delim, i = delimSingleQuote, i+1 + case '"': + delim, i = delimDoubleQuote, i+1 + } + c.state, c.delim = stateAttr, delim + return c, i +} + +// tHTMLCmt is the context transition function for stateHTMLCmt. +func tHTMLCmt(c context, s []byte) (context, int) { + if i := bytes.Index(s, commentEnd); i != -1 { + return context{}, i + 3 + } + return c, len(s) +} + +var ( + specialTagEndPrefix = []byte(" \t\n\f/") +) + +// tSpecialTagEnd is the context transition function for raw text, RCDATA +// script data, and stylesheet element states. +func tSpecialTagEnd(c context, s []byte) (context, int) { + if specialElements[c.element.name] { + if i := indexTagEnd(s, []byte(c.element.name)); i != -1 { + return context{}, i + } + } + return c, len(s) +} + +// indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 +func indexTagEnd(s []byte, tag []byte) int { + res := 0 + plen := len(specialTagEndPrefix) + for len(s) > 0 { + // Try to find the tag end prefix first + i := bytes.Index(s, specialTagEndPrefix) + if i == -1 { + return i + } + s = s[i+plen:] + // Try to match the actual tag if there is still space for it + if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { + s = s[len(tag):] + // Check the tag is followed by a proper separator + if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { + return res + i + } + res += len(tag) + } + res += i + plen + } + return -1 +} + +// tAttr is the context transition function for the attribute state. +func tAttr(c context, s []byte) (context, int) { + return c, len(s) +} + +// tError is the context transition function for the error state. +func tError(c context, s []byte) (context, int) { + return c, len(s) +} + +// eatAttrName returns the largest j such that s[i:j] is an attribute name. +// It returns an error if s[i:] does not look like it begins with an +// attribute name, such as encountering a quote mark without a preceding +// equals sign. +func eatAttrName(s []byte, i int) (int, *Error) { + for j := i; j < len(s); j++ { + switch s[j] { + case ' ', '\t', '\n', '\f', '\r', '=', '>': + return j, nil + case '\'', '"', '<': + // These result in a parse warning in HTML5 and are + // indicative of serious problems if seen in an attr + // name in a template. + return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) + default: + // No-op. + } + } + return len(s), nil +} + +// asciiAlpha reports whether c is an ASCII letter. +func asciiAlpha(c byte) bool { + return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' +} + +// asciiAlphaNum reports whether c is an ASCII letter or digit. +func asciiAlphaNum(c byte) bool { + return asciiAlpha(c) || '0' <= c && c <= '9' +} + +// eatTagName returns the largest j such that s[i:j] is a tag name and the tag name. +func eatTagName(s []byte, i int) (int, element) { + if i == len(s) || !asciiAlpha(s[i]) { + return i, element{} + } + j := i + 1 + for j < len(s) { + x := s[j] + if asciiAlphaNum(x) { + j++ + continue + } + // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". + if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { + j += 2 + continue + } + break + } + return j, element{name: strings.ToLower(string(s[i:j]))} +} + +// eatWhiteSpace returns the largest j such that s[i:j] is white space. +func eatWhiteSpace(s []byte, i int) int { + for j := i; j < len(s); j++ { + switch s[j] { + case ' ', '\t', '\n', '\f', '\r': + // No-op. + default: + return j + } + } + return len(s) +} diff --git a/vendor/github.com/google/safehtml/template/trustedfs.go b/vendor/github.com/google/safehtml/template/trustedfs.go new file mode 100644 index 000000000..80db11824 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/trustedfs.go @@ -0,0 +1,98 @@ +// Copyright (c) 2021 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +//go:build go1.16 +// +build go1.16 + +package template + +import ( + "embed" + "fmt" + "io/fs" + "os" + "path" +) + +// A TrustedFS is an immutable type referencing a filesystem (fs.FS) +// under application control. +// +// In order to ensure that an attacker cannot influence the TrustedFS value, a +// TrustedFS can be instantiated in only two ways. One way is from an embed.FS +// with TrustedFSFromEmbed. It is assumed that embedded filesystems are under +// the programmer's control. The other way is from a TrustedSource using +// TrustedFSFromTrustedSource, in which case the guarantees and caveats of +// TrustedSource apply. +type TrustedFS struct { + fsys fs.FS +} + +// TrustedFSFromEmbed constructs a TrustedFS from an embed.FS. +func TrustedFSFromEmbed(fsys embed.FS) TrustedFS { + return TrustedFS{fsys: fsys} +} + +// TrustedFSFromTrustedSource constructs a TrustedFS from the string in the +// TrustedSource, which should refer to a directory. +func TrustedFSFromTrustedSource(ts TrustedSource) TrustedFS { + return TrustedFS{fsys: os.DirFS(ts.src)} +} + +// Sub returns a TrustedFS at a subdirectory of the receiver. +// It works by calling fs.Sub on the receiver's fs.FS. +func (tf TrustedFS) Sub(dir TrustedSource) (TrustedFS, error) { + subfs, err := fs.Sub(tf.fsys, dir.String()) + return TrustedFS{fsys: subfs}, err +} + +// ParseFS is like ParseFiles or ParseGlob but reads from the TrustedFS +// instead of the host operating system's file system. +// It accepts a list of glob patterns. +// (Note that most file names serve as glob patterns matching only themselves.) +// +// The same behaviors listed for ParseFiles() apply to ParseFS too (e.g. using the base name +// of the file as the template name). +func ParseFS(tfs TrustedFS, patterns ...string) (*Template, error) { + return parseFS(nil, tfs.fsys, patterns) +} + +// ParseFS is like ParseFiles or ParseGlob but reads from the TrustedFS +// instead of the host operating system's file system. +// It accepts a list of glob patterns. +// (Note that most file names serve as glob patterns matching only themselves.) +// +// The same behaviors listed for ParseFiles() apply to ParseFS too (e.g. using the base name +// of the file as the template name). +func (t *Template) ParseFS(tfs TrustedFS, patterns ...string) (*Template, error) { + return parseFS(t, tfs.fsys, patterns) +} + +// Copied from +// https://go.googlesource.com/go/+/refs/tags/go1.17.1/src/text/template/helper.go. +func parseFS(t *Template, fsys fs.FS, patterns []string) (*Template, error) { + var filenames []string + for _, pattern := range patterns { + list, err := fs.Glob(fsys, pattern) + if err != nil { + return nil, err + } + if len(list) == 0 { + return nil, fmt.Errorf("template: pattern matches no files: %#q", pattern) + } + filenames = append(filenames, list...) + } + return parseFiles(t, readFileFS(fsys), filenames...) +} + +// Copied with minor changes from +// https://go.googlesource.com/go/+/refs/tags/go1.17.1/src/text/template/helper.go. +func readFileFS(fsys fs.FS) func(string) (string, []byte, error) { + return func(file string) (string, []byte, error) { + name := path.Base(file) + b, err := fs.ReadFile(fsys, file) + return name, b, err + } +} diff --git a/vendor/github.com/google/safehtml/template/trustedsource.go b/vendor/github.com/google/safehtml/template/trustedsource.go new file mode 100644 index 000000000..f64263948 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/trustedsource.go @@ -0,0 +1,105 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package template + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "flag" +) + +// A TrustedSource is an immutable string-like type referencing +// trusted template files under application control. It can be passed to +// template-parsing functions and methods to safely load templates +// without the risk of untrusted template execution. +// +// In order to ensure that an attacker cannot influence the TrustedSource +// value, a TrustedSource can be instantiated only from untyped string +// constants, command-line flags, and other application-controlled strings, but +// never from arbitrary string values potentially representing untrusted user input. +// +// Note that TrustedSource's constructors cannot truly guarantee that the +// templates it references are not attacker-controlled; it can guarantee only that +// the path to the template itself is under application control. Users of these +// constructors must ensure themselves that TrustedSource never references +// attacker-controlled files or directories that contain such files. +type TrustedSource struct { + // We declare a TrustedSource not as a string but as a struct wrapping a string + // to prevent construction of TrustedSource values through string conversion. + src string +} + +// TrustedSourceFromConstant constructs a TrustedSource with its underlying +// src set to the given src, which must be an untyped string constant. +// +// No runtime validation or sanitization is performed on src; being under +// application control, it is simply assumed to comply with the TrustedSource type +// contract. +func TrustedSourceFromConstant(src stringConstant) TrustedSource { + return TrustedSource{string(src)} +} + +// TrustedSourceFromConstantDir constructs a TrustedSource calling path/filepath.Join on +// an application-controlled directory path, which must be an untyped string constant, +// a TrustedSource, and a dynamic filename. It returns an error if filename contains +// filepath or list separators, since this might cause the resulting path to reference a +// file outside of the given directory. +// +// dir or src may be empty if either of these path segments are not required. +func TrustedSourceFromConstantDir(dir stringConstant, src TrustedSource, filename string) (TrustedSource, error) { + if i := strings.IndexAny(filename, string([]rune{filepath.Separator, filepath.ListSeparator})); i != -1 { + return TrustedSource{}, fmt.Errorf("filename %q must not contain the separator %q", filename, filename[i]) + } + if filename == ".." { + return TrustedSource{}, fmt.Errorf("filename must not be the special name %q", filename) + } + return TrustedSource{filepath.Join(string(dir), src.String(), filename)}, nil +} + +// TrustedSourceJoin is a wrapper around path/filepath.Join that returns a +// TrustedSource formed by joining the given path elements into a single path, +// adding an OS-specific path separator if necessary. +func TrustedSourceJoin(elem ...TrustedSource) TrustedSource { + return TrustedSource{filepath.Join(trustedSourcesToStrings(elem)...)} +} + +// TrustedSourceFromFlag returns a TrustedSource containing the string +// representation of the retrieved value of the flag. +// +// In a server setting, flags are part of the application's deployment +// configuration and are hence considered application-controlled. +func TrustedSourceFromFlag(value flag.Value) TrustedSource { + return TrustedSource{fmt.Sprint(value.String())} +} + +// TrustedSourceFromEnvVar is a wrapper around os.Getenv that +// returns a TrustedSource containing the value of the environment variable +// named by the key. It returns the value, which will be empty if the variable +// is not present. To distinguish between an empty value and an unset value, +// use os.LookupEnv. +// +// In a server setting, environment variables are part of the application's +// deployment configuration and are hence considered application-controlled. +func TrustedSourceFromEnvVar(key stringConstant) TrustedSource { + return TrustedSource{os.Getenv(string(key))} +} + +// String returns the string form of the TrustedSource. +func (t TrustedSource) String() string { + return t.src +} + +func trustedSourcesToStrings(paths []TrustedSource) []string { + ret := make([]string, 0, len(paths)) + for _, p := range paths { + ret = append(ret, p.String()) + } + return ret +} diff --git a/vendor/github.com/google/safehtml/template/trustedtemplate.go b/vendor/github.com/google/safehtml/template/trustedtemplate.go new file mode 100644 index 000000000..bd3b1b46a --- /dev/null +++ b/vendor/github.com/google/safehtml/template/trustedtemplate.go @@ -0,0 +1,36 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package template + +// A TrustedTemplate is an immutable string-like type containing a +// safehtml/template template body. It can be safely loaded as template +// text without the risk of untrusted template execution. +// +// In order to ensure that an attacker cannot influence the TrustedTemplate +// value, a TrustedTemplate can be instantiated only from untyped string constants, +// and never from arbitrary string values potentially representing untrusted user input. +// +type TrustedTemplate struct { + // We declare a TrustedTemplate not as a string but as a struct wrapping a string + // to prevent construction of TrustedTemplate values through string conversion. + tmpl string +} + +// MakeTrustedTemplate constructs a TrustedTemplate with its underlying +// tmpl set to the given tmpl, which must be an untyped string constant. +// +// No runtime validation or sanitization is performed on tmpl; being under +// application control, it is simply assumed to comply with the TrustedTemplate type +// contract. +func MakeTrustedTemplate(tmpl stringConstant) TrustedTemplate { + return TrustedTemplate{string(tmpl)} +} + +// String returns the string form of the TrustedTemplate. +func (t TrustedTemplate) String() string { + return t.tmpl +} diff --git a/vendor/github.com/google/safehtml/template/url.go b/vendor/github.com/google/safehtml/template/url.go new file mode 100644 index 000000000..f63475fcf --- /dev/null +++ b/vendor/github.com/google/safehtml/template/url.go @@ -0,0 +1,122 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( + "fmt" + "html" + "regexp" + "strings" + + "github.com/google/safehtml/internal/safehtmlutil" + "github.com/google/safehtml" +) + +// urlPrefixValidators maps URL and TrustedResourceURL sanitization contexts to functions return an error +// if the given string is unsafe to use as a URL prefix in that sanitization context. +var urlPrefixValidators = map[sanitizationContext]func(string) error{ + sanitizationContextURL: validateURLPrefix, + sanitizationContextTrustedResourceURLOrURL: validateURLPrefix, + sanitizationContextTrustedResourceURL: validateTrustedResourceURLPrefix, +} + +// startsWithFullySpecifiedSchemePattern matches strings that have a fully-specified scheme component. +// See RFC 3986 Section 3. +var startsWithFullySpecifiedSchemePattern = regexp.MustCompile( + `^[[:alpha:]](?:[[:alnum:]]|[+.-])*:`) + +// validateURLPrefix validates if the given non-empty prefix is a safe safehtml.URL prefix. +// +// Prefixes are considered unsafe if they end in an incomplete HTML character reference +// or percent-encoding character triplet. +// +// If the prefix contains a fully-specified scheme component, it is considered safe only if +// it starts with an allowed scheme. See safehtml.URLSanitized for more details. +// +// Otherwise, the prefix is safe only if it contains '/', '?', or '#', since the presence of any +// of these runes ensures that this prefix, when combined with some arbitrary suffix, cannot be +// interpreted as a part of a scheme. +func validateURLPrefix(prefix string) error { + decoded, err := decodeURLPrefix(prefix) + if err != nil { + return err + } + switch { + case startsWithFullySpecifiedSchemePattern.MatchString(decoded): + if safehtml.URLSanitized(decoded).String() != decoded { + return fmt.Errorf("URL prefix %q contains an unsafe scheme", prefix) + } + case !strings.ContainsAny(decoded, "/?#"): + // If the URL prefix does not already have a ':' scheme delimiter, and does not contain + // '/', '?', or '#', any ':' following this prefix will be intepreted as a scheme + // delimiter, causing this URL prefix to be interpreted as being part of a scheme. + // e.g. `` + return fmt.Errorf("URL prefix %q is unsafe; it might be interpreted as part of a scheme", prefix) + } + return nil +} + +// validateTrustedResourceURLPrefix validates if the given non-empty prefix is a safe +// safehtml.TrustedResourceURL prefix. +// +// Prefixes are considered unsafe if they end in an incomplete HTML character reference +// or percent-encoding character triplet. +// +// See safehtmlutil.IsSafeTrustedResourceURLPrefix for details on how the prefix is validated. +func validateTrustedResourceURLPrefix(prefix string) error { + decoded, err := decodeURLPrefix(prefix) + if err != nil { + return err + } + if !safehtmlutil.IsSafeTrustedResourceURLPrefix(decoded) { + return fmt.Errorf("%q is a disallowed TrustedResourceURL prefix", prefix) + } + return nil +} + +// endsWithPercentEncodingPrefixPattern matches strings that end in an incomplete +// URL percent encoding triplet. +// +// See https://tools.ietf.org/html/rfc3986#section-2.1. +var endsWithPercentEncodingPrefixPattern = regexp.MustCompile( + `%[[:xdigit:]]?$`) + +// containsWhitespaceOrControlPattern matches strings that contain ASCII whitespace +// or control characters. +var containsWhitespaceOrControlPattern = regexp.MustCompile(`[[:space:]]|[[:cntrl:]]`) + +// decodeURLPrefix returns the given prefix after it has been HTML-unescaped. +// It returns an error if the prefix: +// * ends in an incomplete HTML character reference before HTML-unescaping, +// * ends in an incomplete percent-encoding character triplet after HTML-unescaping, or +// * contains whitespace before or after HTML-unescaping. +func decodeURLPrefix(prefix string) (string, error) { + if containsWhitespaceOrControlPattern.MatchString(prefix) { + return "", fmt.Errorf("URL prefix %q contains whitespace or control characters", prefix) + } + if err := validateDoesNotEndsWithCharRefPrefix(prefix); err != nil { + return "", fmt.Errorf("URL %s", err) + } + decoded := html.UnescapeString(prefix) + // Check again for whitespace that might have previously been masked by a HTML reference, + // such as in "javascript ". + if containsWhitespaceOrControlPattern.MatchString(decoded) { + return "", fmt.Errorf("URL prefix %q contains whitespace or control characters", prefix) + } + if endsWithPercentEncodingPrefixPattern.MatchString(decoded) { + return "", fmt.Errorf("URL prefix %q ends with an incomplete percent-encoding character triplet", prefix) + } + return decoded, nil +} + +func validateTrustedResourceURLSubstitution(args ...interface{}) (string, error) { + input := safehtmlutil.Stringify(args...) + if safehtmlutil.URLContainsDoubleDotSegment(input) { + // Reject substitutions containing the ".." dot-segment to prevent the final TrustedResourceURL from referencing + // a resource higher up in the path name hierarchy than the path specified in the prefix. + return "", fmt.Errorf(`cannot substitute %q after TrustedResourceURL prefix: ".." is disallowed`, input) + } + return input, nil +} diff --git a/vendor/github.com/google/safehtml/trustedresourceurl.go b/vendor/github.com/google/safehtml/trustedresourceurl.go new file mode 100644 index 000000000..e31a2fd56 --- /dev/null +++ b/vendor/github.com/google/safehtml/trustedresourceurl.go @@ -0,0 +1,195 @@ +// Copyright (c) 2017 The Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +package safehtml + +import ( + "fmt" + "regexp" + "sort" + "strings" + + "flag" + "github.com/google/safehtml/internal/safehtmlutil" +) + +// A TrustedResourceURL is an immutable string-like type referencing the +// application’s own, trusted resources. It can be used to safely load scripts, +// CSS and other sensitive resources without the risk of untrusted code execution. +// For example, it is unsafe to insert a plain string in a +// +// +// +// context since the URL may originate from untrusted user input and the +// script it is pointing to may thus be controlled by an attacker. It is, +// however, safe to use a TrustedResourceURL since its value is known to never +// have left application control. +// +// In order to ensure that an attacker cannot influence the TrustedResourceURL +// value, a TrustedResourceURL can only be instantiated from compile-time +// constant string literals, command-line flags or a combination of the two, +// but never from arbitrary string values potentially representing untrusted user input. +// +// Additionally, TrustedResourceURLs can be serialized and passed along within +// the application via protocol buffers. It is the application’s responsibility +// to ensure that the protocol buffers originate from within the application +// itself and not from an external entity outside its trust domain. +// +// Note that TrustedResourceURLs can also use absolute paths (starting with '/') +// and relative paths. This allows the same binary to be used for different +// hosts without hard-coding the hostname in a string literal. +type TrustedResourceURL struct { + // We declare a TrustedResourceURL not as a string but as a struct wrapping a string + // to prevent construction of TrustedResourceURL values through string conversion. + str string +} + +// TrustedResourceURLWithParams constructs a new TrustedResourceURL with the +// given key-value pairs added as query parameters. +// +// Map entries with empty keys or values are ignored. The order of appended +// keys is guaranteed to be stable but may differ from the order in input. +func TrustedResourceURLWithParams(t TrustedResourceURL, params map[string]string) TrustedResourceURL { + url := t.str + var fragment string + if i := strings.IndexByte(url, '#'); i != -1 { + // The fragment identifier component will always appear at the end + // of the URL after the query segment. It is therefore safe to + // trim the fragment from the tail of the URL and re-append it after + // all query parameters have been added. + // See https://tools.ietf.org/html/rfc3986#appendix-A. + fragment = url[i:] + url = url[:i] + } + sep := "?" + if i := strings.IndexRune(url, '?'); i != -1 { + // The first "?" in a URL indicates the start of the query component. + // See https://tools.ietf.org/html/rfc3986#section-3.4 + if i == len(url)-1 { + sep = "" + } else { + sep = "&" + } + } + stringParams := make([]string, 0, len(params)) + for k, v := range params { + if k == "" || v == "" { + continue + } + stringParam := safehtmlutil.QueryEscapeURL(k) + "=" + safehtmlutil.QueryEscapeURL(v) + stringParams = append(stringParams, stringParam) + } + if len(stringParams) > 0 { + sort.Strings(stringParams) + url += sep + strings.Join(stringParams, "&") + } + return TrustedResourceURL{url + fragment} +} + +// TrustedResourceURLFromConstant constructs a TrustedResourceURL with its underlying +// URL set to the given url, which must be an untyped string constant. +// +// No runtime validation or sanitization is performed on url; being under +// application control, it is simply assumed to comply with the TrustedResourceURL type +// contract. +func TrustedResourceURLFromConstant(url stringConstant) TrustedResourceURL { + return TrustedResourceURL{string(url)} +} + +// TrustedResourceURLFormatFromConstant constructs a TrustedResourceURL from a +// format string, which must be an untyped string constant, and string arguments. +// +// Arguments are specified as a map of labels, which must contain only alphanumeric +// and '_' runes, to string values. Each `%{