// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "fmt" "regexp" "strings" ) // sanitizerForContext returns an ordered list of function names that will be called to // sanitize data values found in the HTML context defined by c. func sanitizerForContext(c context) ([]string, error) { switch c.state { case stateTag, stateAttrName, stateAfterName: return nil, fmt.Errorf("actions must not affect element or attribute names") case stateHTMLCmt: return []string{sanitizeHTMLCommentFuncName}, nil } if len(c.element.names) == 0 && c.element.name == "" && c.state == stateText { // Not in an HTML element. return []string{sanitizeHTMLFuncName}, nil } if c.attr.name != "" || len(c.attr.names) > 0 { // We are in an attribute value context. if c.delim != delimDoubleQuote && c.delim != delimSingleQuote { // TODO: consider disallowing single-quoted or unquoted attribute values completely, even in hardcoded template text. return nil, fmt.Errorf("unquoted attribute values disallowed") } return sanitizersForAttributeValue(c) } // Otherwise, we are in an element content context. elementContentSanitizer, err := sanitizerForElementContent(c) return appendIfNotEmpty([]string{}, elementContentSanitizer), err } // appendIfNotEmpty appends the given strings that are non-empty to the given slice. func appendIfNotEmpty(slice []string, strings ...string) []string { for _, s := range strings { if s != "" { slice = append(slice, s) } } return slice } // sanitizersForAttributeValue returns a list of names of functions that will be // called in order to sanitize data values found the HTML attribtue value context c. func sanitizersForAttributeValue(c context) ([]string, error) { // Ensure that all combinations of element and attribute names for this context results // in the same attribute value sanitization context. var elems, attrs []string if len(c.element.names) == 0 { elems = []string{c.element.name} } else { elems = c.element.names } if len(c.attr.names) == 0 { attrs = []string{c.attr.name} } else { attrs = c.attr.names } var sc0 sanitizationContext var elem0, attr0 string for i, elem := range elems { for j, attr := range attrs { sc, err := sanitizationContextForAttrVal(elem, attr, c.linkRel) if err != nil { if len(elems) == 1 && len(attrs) == 1 { return nil, err } return nil, fmt.Errorf(`conditional branch with {element=%q, attribute=%q} results in sanitization error: %s`, elem, attr, err) } if i == 0 && j == 0 { sc0, elem0, attr0 = sc, elem, attr continue } if sc != sc0 { return nil, fmt.Errorf( `conditional branches end in different attribute value sanitization contexts: {element=%q, attribute=%q} has sanitization context %q, {element=%q, attribute=%q} has sanitization context %q`, elem0, attr0, sc0, elem, attr, sc) } } } if sc0.isEnum() && c.attr.value != "" { return nil, fmt.Errorf("partial substitutions are disallowed in the %q attribute value context of a %q element", c.attr.name, c.element.name) } if sc0 == sanitizationContextStyle && c.attr.value != "" { if err := validateDoesNotEndsWithCharRefPrefix(c.attr.value); err != nil { return nil, fmt.Errorf("action cannot be interpolated into the %q attribute value of this %q element: %s", c.attr.name, c.element.name, err) } } // ret is a stack of sanitizer names that will be built in reverse. var ret []string // All attribute values must be HTML-escaped at run time by sanitizeHTML to eliminate // any HTML markup that can cause the HTML parser to transition out of the attribute value state. // These attribute values will later be HTML-unescaped by the HTML parser in the browser. ret = append(ret, sanitizeHTMLFuncName) sanitizer := sc0.sanitizerName() if !sc0.isURLorTrustedResourceURL() { return reverse(appendIfNotEmpty(ret, sanitizer)), nil } urlAttrValPrefix := c.attr.value if urlAttrValPrefix == "" { // Attribute value prefixes in URL or TrustedResourceURL sanitization contexts // must sanitized and normalized. return reverse(appendIfNotEmpty(ret, normalizeURLFuncName, sanitizer)), nil } // Action occurs after a URL or TrustedResourceURL prefix. if c.attr.ambiguousValue { return nil, fmt.Errorf("actions must not occur after an ambiguous URL prefix in the %q attribute value context of a %q element", c.attr.name, c.element.name) } validator, ok := urlPrefixValidators[sc0] if !ok { return nil, fmt.Errorf("cannot validate attribute value prefix %q in the %q sanitization context", c.attr.value, sc0) } if err := validator(c.attr.value); err != nil { return nil, fmt.Errorf("action cannot be interpolated into the %q URL attribute value of this %q element: %s", c.attr.name, c.element.name, err) } switch { case sc0 == sanitizationContextTrustedResourceURL: // Untrusted data that occurs anywhere after TrustedResourceURL prefix must be query-escaped // to prevent the injection of any new path segments or URL components. Moreover, they must // not contain any ".." dot-segments. ret = append(ret, queryEscapeURLFuncName, validateTrustedResourceURLSubstitutionFuncName) case strings.ContainsAny(urlAttrValPrefix, "#?"): // For URLs, we only escape in the query or fragment part to prevent the injection of new query // parameters or fragments. ret = append(ret, queryEscapeURLFuncName) default: ret = append(ret, normalizeURLFuncName) } return reverse(ret), nil } // reverse reverses s and returns it. func reverse(s []string) []string { for head, tail := 0, len(s)-1; head < tail; head, tail = head+1, tail-1 { s[head], s[tail] = s[tail], s[head] } return s } // sanitizationContextForAttrVal returns the sanitization context for attr when it // appears within element. func sanitizationContextForAttrVal(element, attr, linkRel string) (sanitizationContext, error) { if element == "link" && attr == "href" { // Special case: safehtml.URL values are allowed in a link element's href attribute if that element's // rel attribute possesses certain values. relVals := strings.Fields(linkRel) for _, val := range relVals { if urlLinkRelVals[val] { return sanitizationContextTrustedResourceURLOrURL, nil } } } if dataAttributeNamePattern.MatchString(attr) { // Special case: data-* attributes are specified by HTML5 to hold custom data private to // the page or application; they should not be interpreted by browsers. Therefore, no // sanitization is required for these attribute values. return sanitizationContextNone, nil } if sc, ok := elementSpecificAttrValSanitizationContext[attr][element]; ok { return sc, nil } sc, isAllowedAttr := globalAttrValSanitizationContext[attr] _, isAllowedElement := elementContentSanitizationContext[element] if isAllowedAttr && (isAllowedElement || allowedVoidElements[element]) { // Only sanitize attributes that appear in elements whose semantics are known. // Thes attributes might have different semantics in other standard or custom // elements that our sanitization policy does not handle correctly. return sc, nil } return 0, fmt.Errorf("actions must not occur in the %q attribute value context of a %q element", attr, element) } // dataAttributeNamePattern matches valid data attribute names. // This pattern is conservative and matches only a subset of the valid names defined in // https://html.spec.whatwg.org/multipage/dom.html#embedding-custom-non-visible-data-with-the-data-*-attributes var dataAttributeNamePattern = regexp.MustCompile(`^data-[a-z_][-a-z0-9_]*$`) // endsWithCharRefPrefixPattern matches strings that end in an incomplete // HTML character reference. // // See https://html.spec.whatwg.org/multipage/syntax.html#character-references. var endsWithCharRefPrefixPattern = regexp.MustCompile( `&(?:[[:alpha:]][[:alnum:]]*|#(?:[xX][[:xdigit:]]*|[[:digit:]]*))?$`) // validateDoesNotEndsWithCharRefPrefix returns an error only if the given prefix ends // with an incomplete HTML character reference. func validateDoesNotEndsWithCharRefPrefix(prefix string) error { if endsWithCharRefPrefixPattern.MatchString(prefix) { return fmt.Errorf(`prefix %q ends with an incomplete HTML character reference; did you mean "&" instead of "&"?`, prefix) } return nil } // sanitizerForElementContent returns the name of the function that will be called // to sanitize data values found in the HTML element content context c. func sanitizerForElementContent(c context) (string, error) { // Ensure that all other possible element names for this context result in the same // element content sanitization context. var elems []string if len(c.element.names) == 0 { elems = []string{c.element.name} } else { elems = c.element.names } var sc0 sanitizationContext var elem0 string for i, elem := range elems { var sc sanitizationContext var err error if elem == "" { // Special case: an empty element name represents a context outside of a HTML element. sc = sanitizationContextHTML } else { sc, err = sanitizationContextForElementContent(elem) } if err != nil { if len(elems) == 1 { return "", err } return "", fmt.Errorf(`conditional branch with element %q results in sanitization error: %s`, elem, err) } if i == 0 { sc0, elem0 = sc, elem continue } if sc != sc0 { return "", fmt.Errorf(`conditional branches end in different element content sanitization contexts: element %q has sanitization context %q, element %q has sanitization context %q`, elem0, sc0, elem, sc) } } return sc0.sanitizerName(), nil } // sanitizationContextForElementContent returns the element content sanitization context for the given element. func sanitizationContextForElementContent(element string) (sanitizationContext, error) { sc, ok := elementContentSanitizationContext[element] if !ok { return 0, fmt.Errorf("actions must not occur in the element content context of a %q element", element) } return sc, nil } // sanitizeHTMLComment returns the empty string regardless of input. // Comment content does not correspond to any parsed structure or // human-readable content, so the simplest and most secure policy is to drop // content interpolated into comments. // This approach is equally valid whether or not static comment content is // removed from the template. func sanitizeHTMLComment(_ ...interface{}) string { return "" }