aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/google/safehtml/urlset.go
diff options
context:
space:
mode:
authorTaras Madan <tarasmadan@google.com>2023-02-22 22:16:50 +0100
committerTaras Madan <tarasmadan@google.com>2023-02-24 12:47:23 +0100
commit4165372ec8fd142475a4e35fd0cf4f8042132208 (patch)
tree21cd62211b4dd80bee469054c5b65db77342333c /vendor/github.com/google/safehtml/urlset.go
parent2b3ed821a493b8936c8bacfa6f8b4f1c90a00855 (diff)
dependencies: update
set go min requirements to 1.19 update dependencies update vendor
Diffstat (limited to 'vendor/github.com/google/safehtml/urlset.go')
-rw-r--r--vendor/github.com/google/safehtml/urlset.go167
1 files changed, 167 insertions, 0 deletions
diff --git a/vendor/github.com/google/safehtml/urlset.go b/vendor/github.com/google/safehtml/urlset.go
new file mode 100644
index 000000000..8d74a7732
--- /dev/null
+++ b/vendor/github.com/google/safehtml/urlset.go
@@ -0,0 +1,167 @@
+// Copyright (c) 2017 The Go Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file or at
+// https://developers.google.com/open-source/licenses/bsd
+
+package safehtml
+
+import (
+ "bytes"
+ "strconv"
+)
+
+// https://infra.spec.whatwg.org/#ascii-whitespace
+// ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 SPACE.
+var asciiWhitespace [256]bool
+
+// Metacharacters that affect parsing of srcset values.
+var srcsetMetachars [256]bool
+
+func init() {
+ asciiWhitespace['\t'] = true
+ asciiWhitespace[' '] = true
+ asciiWhitespace['\n'] = true
+ asciiWhitespace['\f'] = true
+ asciiWhitespace['\r'] = true
+
+ srcsetMetachars['\t'] = true
+ srcsetMetachars[' '] = true
+ srcsetMetachars['\n'] = true
+ srcsetMetachars['\f'] = true
+ srcsetMetachars['\r'] = true
+ srcsetMetachars[','] = true
+}
+
+// URLSetSanitized returns a safe srcset by individually vetting each
+// substring that specifies a URL.
+//
+// https://html.spec.whatwg.org/multipage/images.html#srcset-attributes
+func URLSetSanitized(str string) URLSet {
+ var buffer bytes.Buffer
+
+ for len(str) != 0 {
+ // Consume one image candidate
+ var url, metadata string
+ _, str = consumeIn(str, asciiWhitespace)
+ url, str = consumeNotIn(str, asciiWhitespace)
+ _, str = consumeIn(str, asciiWhitespace)
+ metadata, str = consumeNotIn(str, srcsetMetachars)
+ _, str = consumeIn(str, asciiWhitespace)
+
+ // Append sanitized content onto buffer.
+ if len(url) != 0 && isSafeURL(url) && isOptionalSrcMetadataWellFormed(metadata) {
+ if buffer.Len() != 0 {
+ // The space before the comma is necessary because
+ // a comma adjacent to a URL will attach to it.
+ buffer.WriteString(" , ")
+ }
+ // URL may contain commas. Disambiguate.
+ appendURLToSet(url, &buffer)
+ if len(metadata) != 0 {
+ buffer.WriteByte(' ')
+ buffer.WriteString(metadata)
+ }
+ }
+
+ // Consume any trailing comma
+ if len(str) == 0 || str[0] != ',' {
+ break
+ }
+ str = str[1:]
+ }
+
+ if buffer.Len() == 0 {
+ return URLSet{InnocuousURL}
+ }
+
+ return URLSet{buffer.String()}
+}
+
+// appendURLToSet appends a URL so that it does not start or end with a comma
+//
+// https://html.spec.whatwg.org/multipage/images.html#srcset-attributes
+// parsing step 2 which says:
+// """
+// A valid non-empty URL that does not start or end with a U+002C COMMA character (,),
+// referencing a non-interactive, optionally animated, image resource that is neither
+// paged nor scripted
+// """
+//
+// Simply replacing all commas would break data:image/png;base64,IMAGECONTENT
+// Note: This breaks data URLs with empty content since they end with a comma.
+// We could handle that case by appending a '#'.
+func appendURLToSet(url string, buffer *bytes.Buffer) {
+ n := len(url)
+ left, right := 0, n
+ if url[left] == ',' {
+ buffer.WriteString("%2c")
+ left++
+ }
+ commaAtEnd := false
+ if left < right && url[right-1] == ',' {
+ commaAtEnd = true
+ right--
+ }
+ buffer.WriteString(url[left:right])
+ if commaAtEnd {
+ buffer.WriteString("%2c")
+ }
+}
+
+// consumeNotIn uses bytes in str as bit indices in mask to find
+// the least index >= left whose byte corresponds to a zero bit.
+func consumeNotIn(str string, mask [256]bool) (consumed, rest string) {
+ i, n := 0, len(str)
+ for ; i < n; i++ {
+ if mask[str[i]] {
+ return str[0:i], str[i:n]
+ }
+ }
+ return str, ""
+}
+
+// consumeIn is like consumeNotIn but treats mask as inverted.
+func consumeIn(str string, mask [256]bool) (consumed, rest string) {
+ for i, n := 0, len(str); i < n; i++ {
+ if !mask[str[i]] {
+ return str[0:i], str[i:n]
+ }
+ }
+ return str, ""
+}
+
+// isOptionalSrcMetadataWellFormed is true when its input is empty and
+// when it is a floating point number optionally followed by an ASCII letter.
+func isOptionalSrcMetadataWellFormed(metadata string) bool {
+ // srcset for both image candidates (<img srcset>) and
+ // the proposal for script allow a number and an optional letter
+ // afterwards.
+ n := len(metadata)
+ if n == 0 {
+ // Metadata is optional
+ return true
+ }
+ metadataPrefix := metadata
+ if last := metadata[n-1] | 32; 'a' <= last && last <= 'z' {
+ metadataPrefix = metadata[0 : n-1]
+ }
+ // This overmatches
+ // html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-floating-point-number
+ // but is sufficient.
+ _, err := strconv.ParseFloat(metadataPrefix, 64)
+ return err == nil
+}
+
+// URLSet corresponds to the value of a srcset attribute outside a
+// TrustedResourceURL context.
+type URLSet struct {
+ // We declare a URLSet not as a string but as a struct wrapping a string
+ // to prevent construction of URL values through string conversion.
+ str string
+}
+
+// String returns the string content of a URLSet
+func (s URLSet) String() string {
+ return s.str
+}