aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2023-05-12 15:26:38 +0200
committerAleksandr Nogikh <wp32pw@gmail.com>2023-05-15 10:58:51 +0200
commitf174396014c87ad0b8e4b03465b418e3394d0bd1 (patch)
treea42dcb5ce53529fab101cf531929dcf90a89baf2
parente9f324cf6d085e7e9b87302e5f9df84199b240b0 (diff)
pkg/asset: support deprecation in the many buckets case
If several syz-cis with different GCS buckets for assets are connected to a single dashboard, we currently face problems during the asset deprecation process. If we receive from the dashboard a valid GCS URL that belong to an unknown bucket, don't abort the process. Just ignore the URL. Test this behavior.
-rw-r--r--pkg/asset/backend_dummy.go8
-rw-r--r--pkg/asset/backend_gcs.go14
-rw-r--r--pkg/asset/backend_gcs_test.go47
-rw-r--r--pkg/asset/storage.go7
-rw-r--r--pkg/asset/storage_test.go53
5 files changed, 122 insertions, 7 deletions
diff --git a/pkg/asset/backend_dummy.go b/pkg/asset/backend_dummy.go
index 6df86809d..c6a6aac1d 100644
--- a/pkg/asset/backend_dummy.go
+++ b/pkg/asset/backend_dummy.go
@@ -60,7 +60,13 @@ func (be *dummyStorageBackend) downloadURL(path string, publicURL bool) (string,
}
func (be *dummyStorageBackend) getPath(url string) (string, error) {
- return strings.TrimPrefix(url, "http://download/"), nil
+ if strings.HasPrefix(url, "http://unknown-bucket/") {
+ return "", ErrUnknownBucket
+ }
+ if strings.HasPrefix(url, "http://download/") {
+ return strings.TrimPrefix(url, "http://download/"), nil
+ }
+ return "", fmt.Errorf("unknown URL format")
}
func (be *dummyStorageBackend) list() ([]storedObject, error) {
diff --git a/pkg/asset/backend_gcs.go b/pkg/asset/backend_gcs.go
index 0a14781fc..2706fb426 100644
--- a/pkg/asset/backend_gcs.go
+++ b/pkg/asset/backend_gcs.go
@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/url"
+ "regexp"
"strings"
"github.com/google/syzkaller/pkg/debugtracer"
@@ -85,16 +86,21 @@ func (csb *cloudStorageBackend) downloadURL(path string, publicURL bool) (string
return csb.client.GetDownloadURL(fmt.Sprintf("%s/%s", csb.bucket, path), publicURL), nil
}
+var allowedDomainsRe = regexp.MustCompile(`^storage\.googleapis\.com|storage\.cloud\.google\.com$`)
+
func (csb *cloudStorageBackend) getPath(downloadURL string) (string, error) {
u, err := url.Parse(downloadURL)
if err != nil {
return "", fmt.Errorf("failed to parse the URL: %w", err)
}
- parts := strings.SplitN(u.Path, csb.bucket+"/", 2)
- if len(parts) != 2 {
- return "", fmt.Errorf("%s/ is not present in the path %s", csb.bucket, u.Path)
+ if !allowedDomainsRe.MatchString(u.Host) {
+ return "", fmt.Errorf("not allowed host: %s", u.Host)
+ }
+ prefix := "/" + csb.bucket + "/"
+ if !strings.HasPrefix(u.Path, prefix) {
+ return "", ErrUnknownBucket
}
- return parts[1], nil
+ return u.Path[len(prefix):], nil
}
func (csb *cloudStorageBackend) list() ([]storedObject, error) {
diff --git a/pkg/asset/backend_gcs_test.go b/pkg/asset/backend_gcs_test.go
new file mode 100644
index 000000000..200881e96
--- /dev/null
+++ b/pkg/asset/backend_gcs_test.go
@@ -0,0 +1,47 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package asset
+
+import (
+ "testing"
+
+ "github.com/google/syzkaller/pkg/debugtracer"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestCloudGetPaths(t *testing.T) {
+ obj := &cloudStorageBackend{
+ client: nil, // we won't need it
+ bucket: "my_bucket",
+ tracer: &debugtracer.NullTracer{},
+ }
+ // Test a public download URL.
+ url, _ := obj.downloadURL("folder/file.txt", true)
+ assert.Equal(t, `https://storage.googleapis.com/my_bucket/folder/file.txt`, url)
+ // Test a non-public download URL.
+ url, _ = obj.downloadURL("folder/file.txt", false)
+ assert.Equal(t, `https://storage.cloud.google.com/my_bucket/folder/file.txt`, url)
+}
+
+func TestCloudParsePaths(t *testing.T) {
+ obj := &cloudStorageBackend{
+ client: nil, // we won't need it
+ bucket: `my_bucket`,
+ tracer: &debugtracer.NullTracer{},
+ }
+ // Parse a public download URL.
+ path, err := obj.getPath(`https://storage.googleapis.com/my_bucket/folder/file.txt`)
+ assert.NoError(t, err)
+ assert.Equal(t, `folder/file.txt`, path)
+ // Parse a non-public download URL.
+ path, err = obj.getPath(`https://storage.cloud.google.com/my_bucket/folder/file.txt`)
+ assert.NoError(t, err)
+ assert.Equal(t, `folder/file.txt`, path)
+ // Error: unknown domain.
+ _, err = obj.getPath(`https://unknown-host.com/my_bucket/folder/file.txt`)
+ assert.ErrorContains(t, err, `not allowed host: unknown-host.com`)
+ // Error: unknown bucket.
+ _, err = obj.getPath(`https://storage.cloud.google.com/not_my_bucket/folder/file.txt`)
+ assert.ErrorIs(t, err, ErrUnknownBucket)
+}
diff --git a/pkg/asset/storage.go b/pkg/asset/storage.go
index 6905b0a07..1debdaa4a 100644
--- a/pkg/asset/storage.go
+++ b/pkg/asset/storage.go
@@ -205,6 +205,8 @@ func (e *FileExistsError) Error() string {
return fmt.Sprintf("asset exists: %s", e.Path)
}
+var ErrUnknownBucket = errors.New("the asset is not in the currently managed bucket")
+
const deletionEmbargo = time.Hour * 24 * 7
// Best way: convert download URLs to paths.
@@ -217,7 +219,10 @@ func (storage *Storage) DeprecateAssets() error {
needed := map[string]bool{}
for _, url := range resp.DownloadURLs {
path, err := storage.backend.getPath(url)
- if err != nil {
+ if err == ErrUnknownBucket {
+ // The asset is not managed by the particular instance.
+ continue
+ } else if err != nil {
// If we failed to parse just one URL, let's stop the entire process.
// Otherwise we'll start deleting still needed files we couldn't recognize.
return fmt.Errorf("failed to parse '%s': %w", url, err)
diff --git a/pkg/asset/storage_test.go b/pkg/asset/storage_test.go
index fb45f5fad..83088ef9c 100644
--- a/pkg/asset/storage_test.go
+++ b/pkg/asset/storage_test.go
@@ -16,6 +16,7 @@ import (
"github.com/google/syzkaller/dashboard/dashapi"
"github.com/google/syzkaller/pkg/debugtracer"
+ "github.com/stretchr/testify/assert"
"github.com/ulikunitz/xz"
)
@@ -184,7 +185,7 @@ func TestUploadBuildAsset(t *testing.T) {
t.Fatalf("invalid dashMock state: expected 3 assets, got %d", len(allUrls))
}
// First try to remove two assets.
- dashMock.downloadURLs = map[string]bool{allUrls[2]: true, "http://non-related-asset.com/abcd": true}
+ dashMock.downloadURLs = map[string]bool{allUrls[2]: true, "http://download/unrelated.txt": true}
// Pretend there's an asset deletion error.
be.objectRemove = func(string) error { return fmt.Errorf("not now") }
@@ -351,3 +352,53 @@ func TestUploadSameContent(t *testing.T) {
asset.DownloadURL, assetTwo.DownloadURL)
}
}
+
+// Test that we adequately handle the case when several syz-cis with separate buckets
+// are connected to a single dashboard.
+// nolint: dupl
+func TestTwoBucketDeprecation(t *testing.T) {
+ dash := newDashMock()
+ storage, dummy := makeStorage(t, dash.getDashapi())
+
+ // "Upload" an asset from this instance.
+ resp, _ := dummy.upload(&uploadRequest{
+ savePath: `folder/file.txt`,
+ })
+ url, _ := dummy.downloadURL(resp.path, true)
+
+ // Dashboard returns two asset URLs.
+ dash.downloadURLs = map[string]bool{
+ "http://unknown-bucket/other-folder/other-file.txt": true, // will cause ErrUnknownBucket
+ url: true,
+ }
+ dummy.objectRemove = func(url string) error {
+ t.Fatalf("Unexpected removal")
+ return nil
+ }
+ err := storage.DeprecateAssets()
+ assert.NoError(t, err)
+}
+
+// nolint: dupl
+func TestInvalidAssetURLs(t *testing.T) {
+ dash := newDashMock()
+ storage, dummy := makeStorage(t, dash.getDashapi())
+
+ // "Upload" an asset from this instance.
+ resp, _ := dummy.upload(&uploadRequest{
+ savePath: `folder/file.txt`,
+ })
+ url, _ := dummy.downloadURL(resp.path, true)
+
+ // Dashboard returns two asset URLs.
+ dash.downloadURLs = map[string]bool{
+ "http://totally-unknown-bucket/other-folder/other-file.txt": true,
+ url: true,
+ }
+ dummy.objectRemove = func(url string) error {
+ t.Fatalf("Unexpected removal")
+ return nil
+ }
+ err := storage.DeprecateAssets()
+ assert.Error(t, err)
+}