aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/google/safehtml/template/sanitize.go
diff options
context:
space:
mode:
authorTaras Madan <tarasmadan@google.com>2023-02-22 22:16:50 +0100
committerTaras Madan <tarasmadan@google.com>2023-02-24 12:47:23 +0100
commit4165372ec8fd142475a4e35fd0cf4f8042132208 (patch)
tree21cd62211b4dd80bee469054c5b65db77342333c /vendor/github.com/google/safehtml/template/sanitize.go
parent2b3ed821a493b8936c8bacfa6f8b4f1c90a00855 (diff)
dependencies: update
set go min requirements to 1.19 update dependencies update vendor
Diffstat (limited to 'vendor/github.com/google/safehtml/template/sanitize.go')
-rw-r--r--vendor/github.com/google/safehtml/template/sanitize.go258
1 files changed, 258 insertions, 0 deletions
diff --git a/vendor/github.com/google/safehtml/template/sanitize.go b/vendor/github.com/google/safehtml/template/sanitize.go
new file mode 100644
index 000000000..c75e345e1
--- /dev/null
+++ b/vendor/github.com/google/safehtml/template/sanitize.go
@@ -0,0 +1,258 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+ "regexp"
+ "strings"
+)
+
+// sanitizerForContext returns an ordered list of function names that will be called to
+// sanitize data values found in the HTML context defined by c.
+func sanitizerForContext(c context) ([]string, error) {
+ switch c.state {
+ case stateTag, stateAttrName, stateAfterName:
+ return nil, fmt.Errorf("actions must not affect element or attribute names")
+ case stateHTMLCmt:
+ return []string{sanitizeHTMLCommentFuncName}, nil
+ }
+ if len(c.element.names) == 0 && c.element.name == "" && c.state == stateText {
+ // Not in an HTML element.
+ return []string{sanitizeHTMLFuncName}, nil
+ }
+ if c.attr.name != "" || len(c.attr.names) > 0 {
+ // We are in an attribute value context.
+ if c.delim != delimDoubleQuote && c.delim != delimSingleQuote {
+ // TODO: consider disallowing single-quoted or unquoted attribute values completely, even in hardcoded template text.
+ return nil, fmt.Errorf("unquoted attribute values disallowed")
+ }
+ return sanitizersForAttributeValue(c)
+ }
+ // Otherwise, we are in an element content context.
+ elementContentSanitizer, err := sanitizerForElementContent(c)
+ return appendIfNotEmpty([]string{}, elementContentSanitizer), err
+}
+
+// appendIfNotEmpty appends the given strings that are non-empty to the given slice.
+func appendIfNotEmpty(slice []string, strings ...string) []string {
+ for _, s := range strings {
+ if s != "" {
+ slice = append(slice, s)
+ }
+ }
+ return slice
+}
+
+// sanitizersForAttributeValue returns a list of names of functions that will be
+// called in order to sanitize data values found the HTML attribtue value context c.
+func sanitizersForAttributeValue(c context) ([]string, error) {
+ // Ensure that all combinations of element and attribute names for this context results
+ // in the same attribute value sanitization context.
+ var elems, attrs []string
+ if len(c.element.names) == 0 {
+ elems = []string{c.element.name}
+ } else {
+ elems = c.element.names
+ }
+ if len(c.attr.names) == 0 {
+ attrs = []string{c.attr.name}
+ } else {
+ attrs = c.attr.names
+ }
+ var sc0 sanitizationContext
+ var elem0, attr0 string
+ for i, elem := range elems {
+ for j, attr := range attrs {
+ sc, err := sanitizationContextForAttrVal(elem, attr, c.linkRel)
+ if err != nil {
+ if len(elems) == 1 && len(attrs) == 1 {
+ return nil, err
+ }
+ return nil, fmt.Errorf(`conditional branch with {element=%q, attribute=%q} results in sanitization error: %s`, elem, attr, err)
+ }
+ if i == 0 && j == 0 {
+ sc0, elem0, attr0 = sc, elem, attr
+ continue
+ }
+ if sc != sc0 {
+ return nil, fmt.Errorf(
+ `conditional branches end in different attribute value sanitization contexts: {element=%q, attribute=%q} has sanitization context %q, {element=%q, attribute=%q} has sanitization context %q`,
+ elem0, attr0, sc0, elem, attr, sc)
+ }
+ }
+ }
+ if sc0.isEnum() && c.attr.value != "" {
+ return nil, fmt.Errorf("partial substitutions are disallowed in the %q attribute value context of a %q element", c.attr.name, c.element.name)
+ }
+ if sc0 == sanitizationContextStyle && c.attr.value != "" {
+ if err := validateDoesNotEndsWithCharRefPrefix(c.attr.value); err != nil {
+ return nil, fmt.Errorf("action cannot be interpolated into the %q attribute value of this %q element: %s", c.attr.name, c.element.name, err)
+ }
+ }
+ // ret is a stack of sanitizer names that will be built in reverse.
+ var ret []string
+ // All attribute values must be HTML-escaped at run time by sanitizeHTML to eliminate
+ // any HTML markup that can cause the HTML parser to transition out of the attribute value state.
+ // These attribute values will later be HTML-unescaped by the HTML parser in the browser.
+ ret = append(ret, sanitizeHTMLFuncName)
+ sanitizer := sc0.sanitizerName()
+ if !sc0.isURLorTrustedResourceURL() {
+ return reverse(appendIfNotEmpty(ret, sanitizer)), nil
+ }
+ urlAttrValPrefix := c.attr.value
+ if urlAttrValPrefix == "" {
+ // Attribute value prefixes in URL or TrustedResourceURL sanitization contexts
+ // must sanitized and normalized.
+ return reverse(appendIfNotEmpty(ret, normalizeURLFuncName, sanitizer)), nil
+ }
+ // Action occurs after a URL or TrustedResourceURL prefix.
+ if c.attr.ambiguousValue {
+ return nil, fmt.Errorf("actions must not occur after an ambiguous URL prefix in the %q attribute value context of a %q element", c.attr.name, c.element.name)
+ }
+ validator, ok := urlPrefixValidators[sc0]
+ if !ok {
+ return nil, fmt.Errorf("cannot validate attribute value prefix %q in the %q sanitization context", c.attr.value, sc0)
+ }
+ if err := validator(c.attr.value); err != nil {
+ return nil, fmt.Errorf("action cannot be interpolated into the %q URL attribute value of this %q element: %s", c.attr.name, c.element.name, err)
+ }
+ switch {
+ case sc0 == sanitizationContextTrustedResourceURL:
+ // Untrusted data that occurs anywhere after TrustedResourceURL prefix must be query-escaped
+ // to prevent the injection of any new path segments or URL components. Moreover, they must
+ // not contain any ".." dot-segments.
+ ret = append(ret, queryEscapeURLFuncName, validateTrustedResourceURLSubstitutionFuncName)
+ case strings.ContainsAny(urlAttrValPrefix, "#?"):
+ // For URLs, we only escape in the query or fragment part to prevent the injection of new query
+ // parameters or fragments.
+ ret = append(ret, queryEscapeURLFuncName)
+ default:
+ ret = append(ret, normalizeURLFuncName)
+ }
+ return reverse(ret), nil
+}
+
+// reverse reverses s and returns it.
+func reverse(s []string) []string {
+ for head, tail := 0, len(s)-1; head < tail; head, tail = head+1, tail-1 {
+ s[head], s[tail] = s[tail], s[head]
+ }
+ return s
+}
+
+// sanitizationContextForAttrVal returns the sanitization context for attr when it
+// appears within element.
+func sanitizationContextForAttrVal(element, attr, linkRel string) (sanitizationContext, error) {
+ if element == "link" && attr == "href" {
+ // Special case: safehtml.URL values are allowed in a link element's href attribute if that element's
+ // rel attribute possesses certain values.
+ relVals := strings.Fields(linkRel)
+ for _, val := range relVals {
+ if urlLinkRelVals[val] {
+ return sanitizationContextTrustedResourceURLOrURL, nil
+ }
+ }
+ }
+ if dataAttributeNamePattern.MatchString(attr) {
+ // Special case: data-* attributes are specified by HTML5 to hold custom data private to
+ // the page or application; they should not be interpreted by browsers. Therefore, no
+ // sanitization is required for these attribute values.
+ return sanitizationContextNone, nil
+ }
+ if sc, ok := elementSpecificAttrValSanitizationContext[attr][element]; ok {
+ return sc, nil
+ }
+ sc, isAllowedAttr := globalAttrValSanitizationContext[attr]
+ _, isAllowedElement := elementContentSanitizationContext[element]
+ if isAllowedAttr && (isAllowedElement || allowedVoidElements[element]) {
+ // Only sanitize attributes that appear in elements whose semantics are known.
+ // Thes attributes might have different semantics in other standard or custom
+ // elements that our sanitization policy does not handle correctly.
+ return sc, nil
+ }
+ return 0, fmt.Errorf("actions must not occur in the %q attribute value context of a %q element", attr, element)
+}
+
+// dataAttributeNamePattern matches valid data attribute names.
+// This pattern is conservative and matches only a subset of the valid names defined in
+// https://html.spec.whatwg.org/multipage/dom.html#embedding-custom-non-visible-data-with-the-data-*-attributes
+var dataAttributeNamePattern = regexp.MustCompile(`^data-[a-z_][-a-z0-9_]*$`)
+
+// endsWithCharRefPrefixPattern matches strings that end in an incomplete
+// HTML character reference.
+//
+// See https://html.spec.whatwg.org/multipage/syntax.html#character-references.
+var endsWithCharRefPrefixPattern = regexp.MustCompile(
+ `&(?:[[:alpha:]][[:alnum:]]*|#(?:[xX][[:xdigit:]]*|[[:digit:]]*))?$`)
+
+// validateDoesNotEndsWithCharRefPrefix returns an error only if the given prefix ends
+// with an incomplete HTML character reference.
+func validateDoesNotEndsWithCharRefPrefix(prefix string) error {
+ if endsWithCharRefPrefixPattern.MatchString(prefix) {
+ return fmt.Errorf(`prefix %q ends with an incomplete HTML character reference; did you mean "&amp;" instead of "&"?`, prefix)
+ }
+ return nil
+}
+
+// sanitizerForElementContent returns the name of the function that will be called
+// to sanitize data values found in the HTML element content context c.
+func sanitizerForElementContent(c context) (string, error) {
+ // Ensure that all other possible element names for this context result in the same
+ // element content sanitization context.
+ var elems []string
+ if len(c.element.names) == 0 {
+ elems = []string{c.element.name}
+ } else {
+ elems = c.element.names
+ }
+ var sc0 sanitizationContext
+ var elem0 string
+ for i, elem := range elems {
+ var sc sanitizationContext
+ var err error
+ if elem == "" {
+ // Special case: an empty element name represents a context outside of a HTML element.
+ sc = sanitizationContextHTML
+ } else {
+ sc, err = sanitizationContextForElementContent(elem)
+ }
+ if err != nil {
+ if len(elems) == 1 {
+ return "", err
+ }
+ return "", fmt.Errorf(`conditional branch with element %q results in sanitization error: %s`, elem, err)
+ }
+ if i == 0 {
+ sc0, elem0 = sc, elem
+ continue
+ }
+ if sc != sc0 {
+ return "",
+ fmt.Errorf(`conditional branches end in different element content sanitization contexts: element %q has sanitization context %q, element %q has sanitization context %q`,
+ elem0, sc0, elem, sc)
+ }
+ }
+ return sc0.sanitizerName(), nil
+}
+
+// sanitizationContextForElementContent returns the element content sanitization context for the given element.
+func sanitizationContextForElementContent(element string) (sanitizationContext, error) {
+ sc, ok := elementContentSanitizationContext[element]
+ if !ok {
+ return 0, fmt.Errorf("actions must not occur in the element content context of a %q element", element)
+ }
+ return sc, nil
+}
+
+// sanitizeHTMLComment returns the empty string regardless of input.
+// Comment content does not correspond to any parsed structure or
+// human-readable content, so the simplest and most secure policy is to drop
+// content interpolated into comments.
+// This approach is equally valid whether or not static comment content is
+// removed from the template.
+func sanitizeHTMLComment(_ ...interface{}) string {
+ return ""
+}