From 4165372ec8fd142475a4e35fd0cf4f8042132208 Mon Sep 17 00:00:00 2001 From: Taras Madan Date: Wed, 22 Feb 2023 22:16:50 +0100 Subject: dependencies: update set go min requirements to 1.19 update dependencies update vendor --- .../google/safehtml/template/sanitize.go | 258 +++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 vendor/github.com/google/safehtml/template/sanitize.go (limited to 'vendor/github.com/google/safehtml/template/sanitize.go') diff --git a/vendor/github.com/google/safehtml/template/sanitize.go b/vendor/github.com/google/safehtml/template/sanitize.go new file mode 100644 index 000000000..c75e345e1 --- /dev/null +++ b/vendor/github.com/google/safehtml/template/sanitize.go @@ -0,0 +1,258 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( + "fmt" + "regexp" + "strings" +) + +// sanitizerForContext returns an ordered list of function names that will be called to +// sanitize data values found in the HTML context defined by c. +func sanitizerForContext(c context) ([]string, error) { + switch c.state { + case stateTag, stateAttrName, stateAfterName: + return nil, fmt.Errorf("actions must not affect element or attribute names") + case stateHTMLCmt: + return []string{sanitizeHTMLCommentFuncName}, nil + } + if len(c.element.names) == 0 && c.element.name == "" && c.state == stateText { + // Not in an HTML element. + return []string{sanitizeHTMLFuncName}, nil + } + if c.attr.name != "" || len(c.attr.names) > 0 { + // We are in an attribute value context. + if c.delim != delimDoubleQuote && c.delim != delimSingleQuote { + // TODO: consider disallowing single-quoted or unquoted attribute values completely, even in hardcoded template text. + return nil, fmt.Errorf("unquoted attribute values disallowed") + } + return sanitizersForAttributeValue(c) + } + // Otherwise, we are in an element content context. + elementContentSanitizer, err := sanitizerForElementContent(c) + return appendIfNotEmpty([]string{}, elementContentSanitizer), err +} + +// appendIfNotEmpty appends the given strings that are non-empty to the given slice. +func appendIfNotEmpty(slice []string, strings ...string) []string { + for _, s := range strings { + if s != "" { + slice = append(slice, s) + } + } + return slice +} + +// sanitizersForAttributeValue returns a list of names of functions that will be +// called in order to sanitize data values found the HTML attribtue value context c. +func sanitizersForAttributeValue(c context) ([]string, error) { + // Ensure that all combinations of element and attribute names for this context results + // in the same attribute value sanitization context. + var elems, attrs []string + if len(c.element.names) == 0 { + elems = []string{c.element.name} + } else { + elems = c.element.names + } + if len(c.attr.names) == 0 { + attrs = []string{c.attr.name} + } else { + attrs = c.attr.names + } + var sc0 sanitizationContext + var elem0, attr0 string + for i, elem := range elems { + for j, attr := range attrs { + sc, err := sanitizationContextForAttrVal(elem, attr, c.linkRel) + if err != nil { + if len(elems) == 1 && len(attrs) == 1 { + return nil, err + } + return nil, fmt.Errorf(`conditional branch with {element=%q, attribute=%q} results in sanitization error: %s`, elem, attr, err) + } + if i == 0 && j == 0 { + sc0, elem0, attr0 = sc, elem, attr + continue + } + if sc != sc0 { + return nil, fmt.Errorf( + `conditional branches end in different attribute value sanitization contexts: {element=%q, attribute=%q} has sanitization context %q, {element=%q, attribute=%q} has sanitization context %q`, + elem0, attr0, sc0, elem, attr, sc) + } + } + } + if sc0.isEnum() && c.attr.value != "" { + return nil, fmt.Errorf("partial substitutions are disallowed in the %q attribute value context of a %q element", c.attr.name, c.element.name) + } + if sc0 == sanitizationContextStyle && c.attr.value != "" { + if err := validateDoesNotEndsWithCharRefPrefix(c.attr.value); err != nil { + return nil, fmt.Errorf("action cannot be interpolated into the %q attribute value of this %q element: %s", c.attr.name, c.element.name, err) + } + } + // ret is a stack of sanitizer names that will be built in reverse. + var ret []string + // All attribute values must be HTML-escaped at run time by sanitizeHTML to eliminate + // any HTML markup that can cause the HTML parser to transition out of the attribute value state. + // These attribute values will later be HTML-unescaped by the HTML parser in the browser. + ret = append(ret, sanitizeHTMLFuncName) + sanitizer := sc0.sanitizerName() + if !sc0.isURLorTrustedResourceURL() { + return reverse(appendIfNotEmpty(ret, sanitizer)), nil + } + urlAttrValPrefix := c.attr.value + if urlAttrValPrefix == "" { + // Attribute value prefixes in URL or TrustedResourceURL sanitization contexts + // must sanitized and normalized. + return reverse(appendIfNotEmpty(ret, normalizeURLFuncName, sanitizer)), nil + } + // Action occurs after a URL or TrustedResourceURL prefix. + if c.attr.ambiguousValue { + return nil, fmt.Errorf("actions must not occur after an ambiguous URL prefix in the %q attribute value context of a %q element", c.attr.name, c.element.name) + } + validator, ok := urlPrefixValidators[sc0] + if !ok { + return nil, fmt.Errorf("cannot validate attribute value prefix %q in the %q sanitization context", c.attr.value, sc0) + } + if err := validator(c.attr.value); err != nil { + return nil, fmt.Errorf("action cannot be interpolated into the %q URL attribute value of this %q element: %s", c.attr.name, c.element.name, err) + } + switch { + case sc0 == sanitizationContextTrustedResourceURL: + // Untrusted data that occurs anywhere after TrustedResourceURL prefix must be query-escaped + // to prevent the injection of any new path segments or URL components. Moreover, they must + // not contain any ".." dot-segments. + ret = append(ret, queryEscapeURLFuncName, validateTrustedResourceURLSubstitutionFuncName) + case strings.ContainsAny(urlAttrValPrefix, "#?"): + // For URLs, we only escape in the query or fragment part to prevent the injection of new query + // parameters or fragments. + ret = append(ret, queryEscapeURLFuncName) + default: + ret = append(ret, normalizeURLFuncName) + } + return reverse(ret), nil +} + +// reverse reverses s and returns it. +func reverse(s []string) []string { + for head, tail := 0, len(s)-1; head < tail; head, tail = head+1, tail-1 { + s[head], s[tail] = s[tail], s[head] + } + return s +} + +// sanitizationContextForAttrVal returns the sanitization context for attr when it +// appears within element. +func sanitizationContextForAttrVal(element, attr, linkRel string) (sanitizationContext, error) { + if element == "link" && attr == "href" { + // Special case: safehtml.URL values are allowed in a link element's href attribute if that element's + // rel attribute possesses certain values. + relVals := strings.Fields(linkRel) + for _, val := range relVals { + if urlLinkRelVals[val] { + return sanitizationContextTrustedResourceURLOrURL, nil + } + } + } + if dataAttributeNamePattern.MatchString(attr) { + // Special case: data-* attributes are specified by HTML5 to hold custom data private to + // the page or application; they should not be interpreted by browsers. Therefore, no + // sanitization is required for these attribute values. + return sanitizationContextNone, nil + } + if sc, ok := elementSpecificAttrValSanitizationContext[attr][element]; ok { + return sc, nil + } + sc, isAllowedAttr := globalAttrValSanitizationContext[attr] + _, isAllowedElement := elementContentSanitizationContext[element] + if isAllowedAttr && (isAllowedElement || allowedVoidElements[element]) { + // Only sanitize attributes that appear in elements whose semantics are known. + // Thes attributes might have different semantics in other standard or custom + // elements that our sanitization policy does not handle correctly. + return sc, nil + } + return 0, fmt.Errorf("actions must not occur in the %q attribute value context of a %q element", attr, element) +} + +// dataAttributeNamePattern matches valid data attribute names. +// This pattern is conservative and matches only a subset of the valid names defined in +// https://html.spec.whatwg.org/multipage/dom.html#embedding-custom-non-visible-data-with-the-data-*-attributes +var dataAttributeNamePattern = regexp.MustCompile(`^data-[a-z_][-a-z0-9_]*$`) + +// endsWithCharRefPrefixPattern matches strings that end in an incomplete +// HTML character reference. +// +// See https://html.spec.whatwg.org/multipage/syntax.html#character-references. +var endsWithCharRefPrefixPattern = regexp.MustCompile( + `&(?:[[:alpha:]][[:alnum:]]*|#(?:[xX][[:xdigit:]]*|[[:digit:]]*))?$`) + +// validateDoesNotEndsWithCharRefPrefix returns an error only if the given prefix ends +// with an incomplete HTML character reference. +func validateDoesNotEndsWithCharRefPrefix(prefix string) error { + if endsWithCharRefPrefixPattern.MatchString(prefix) { + return fmt.Errorf(`prefix %q ends with an incomplete HTML character reference; did you mean "&" instead of "&"?`, prefix) + } + return nil +} + +// sanitizerForElementContent returns the name of the function that will be called +// to sanitize data values found in the HTML element content context c. +func sanitizerForElementContent(c context) (string, error) { + // Ensure that all other possible element names for this context result in the same + // element content sanitization context. + var elems []string + if len(c.element.names) == 0 { + elems = []string{c.element.name} + } else { + elems = c.element.names + } + var sc0 sanitizationContext + var elem0 string + for i, elem := range elems { + var sc sanitizationContext + var err error + if elem == "" { + // Special case: an empty element name represents a context outside of a HTML element. + sc = sanitizationContextHTML + } else { + sc, err = sanitizationContextForElementContent(elem) + } + if err != nil { + if len(elems) == 1 { + return "", err + } + return "", fmt.Errorf(`conditional branch with element %q results in sanitization error: %s`, elem, err) + } + if i == 0 { + sc0, elem0 = sc, elem + continue + } + if sc != sc0 { + return "", + fmt.Errorf(`conditional branches end in different element content sanitization contexts: element %q has sanitization context %q, element %q has sanitization context %q`, + elem0, sc0, elem, sc) + } + } + return sc0.sanitizerName(), nil +} + +// sanitizationContextForElementContent returns the element content sanitization context for the given element. +func sanitizationContextForElementContent(element string) (sanitizationContext, error) { + sc, ok := elementContentSanitizationContext[element] + if !ok { + return 0, fmt.Errorf("actions must not occur in the element content context of a %q element", element) + } + return sc, nil +} + +// sanitizeHTMLComment returns the empty string regardless of input. +// Comment content does not correspond to any parsed structure or +// human-readable content, so the simplest and most secure policy is to drop +// content interpolated into comments. +// This approach is equally valid whether or not static comment content is +// removed from the template. +func sanitizeHTMLComment(_ ...interface{}) string { + return "" +} -- cgit mrf-deployment