From f174396014c87ad0b8e4b03465b418e3394d0bd1 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 12 May 2023 15:26:38 +0200 Subject: pkg/asset: support deprecation in the many buckets case If several syz-cis with different GCS buckets for assets are connected to a single dashboard, we currently face problems during the asset deprecation process. If we receive from the dashboard a valid GCS URL that belong to an unknown bucket, don't abort the process. Just ignore the URL. Test this behavior. --- pkg/asset/backend_dummy.go | 8 ++++++- pkg/asset/backend_gcs.go | 14 ++++++++---- pkg/asset/backend_gcs_test.go | 47 ++++++++++++++++++++++++++++++++++++++ pkg/asset/storage.go | 7 +++++- pkg/asset/storage_test.go | 53 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 pkg/asset/backend_gcs_test.go (limited to 'pkg') diff --git a/pkg/asset/backend_dummy.go b/pkg/asset/backend_dummy.go index 6df86809d..c6a6aac1d 100644 --- a/pkg/asset/backend_dummy.go +++ b/pkg/asset/backend_dummy.go @@ -60,7 +60,13 @@ func (be *dummyStorageBackend) downloadURL(path string, publicURL bool) (string, } func (be *dummyStorageBackend) getPath(url string) (string, error) { - return strings.TrimPrefix(url, "http://download/"), nil + if strings.HasPrefix(url, "http://unknown-bucket/") { + return "", ErrUnknownBucket + } + if strings.HasPrefix(url, "http://download/") { + return strings.TrimPrefix(url, "http://download/"), nil + } + return "", fmt.Errorf("unknown URL format") } func (be *dummyStorageBackend) list() ([]storedObject, error) { diff --git a/pkg/asset/backend_gcs.go b/pkg/asset/backend_gcs.go index 0a14781fc..2706fb426 100644 --- a/pkg/asset/backend_gcs.go +++ b/pkg/asset/backend_gcs.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/url" + "regexp" "strings" "github.com/google/syzkaller/pkg/debugtracer" @@ -85,16 +86,21 @@ func (csb *cloudStorageBackend) downloadURL(path string, publicURL bool) (string return csb.client.GetDownloadURL(fmt.Sprintf("%s/%s", csb.bucket, path), publicURL), nil } +var allowedDomainsRe = regexp.MustCompile(`^storage\.googleapis\.com|storage\.cloud\.google\.com$`) + func (csb *cloudStorageBackend) getPath(downloadURL string) (string, error) { u, err := url.Parse(downloadURL) if err != nil { return "", fmt.Errorf("failed to parse the URL: %w", err) } - parts := strings.SplitN(u.Path, csb.bucket+"/", 2) - if len(parts) != 2 { - return "", fmt.Errorf("%s/ is not present in the path %s", csb.bucket, u.Path) + if !allowedDomainsRe.MatchString(u.Host) { + return "", fmt.Errorf("not allowed host: %s", u.Host) + } + prefix := "/" + csb.bucket + "/" + if !strings.HasPrefix(u.Path, prefix) { + return "", ErrUnknownBucket } - return parts[1], nil + return u.Path[len(prefix):], nil } func (csb *cloudStorageBackend) list() ([]storedObject, error) { diff --git a/pkg/asset/backend_gcs_test.go b/pkg/asset/backend_gcs_test.go new file mode 100644 index 000000000..200881e96 --- /dev/null +++ b/pkg/asset/backend_gcs_test.go @@ -0,0 +1,47 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package asset + +import ( + "testing" + + "github.com/google/syzkaller/pkg/debugtracer" + "github.com/stretchr/testify/assert" +) + +func TestCloudGetPaths(t *testing.T) { + obj := &cloudStorageBackend{ + client: nil, // we won't need it + bucket: "my_bucket", + tracer: &debugtracer.NullTracer{}, + } + // Test a public download URL. + url, _ := obj.downloadURL("folder/file.txt", true) + assert.Equal(t, `https://storage.googleapis.com/my_bucket/folder/file.txt`, url) + // Test a non-public download URL. + url, _ = obj.downloadURL("folder/file.txt", false) + assert.Equal(t, `https://storage.cloud.google.com/my_bucket/folder/file.txt`, url) +} + +func TestCloudParsePaths(t *testing.T) { + obj := &cloudStorageBackend{ + client: nil, // we won't need it + bucket: `my_bucket`, + tracer: &debugtracer.NullTracer{}, + } + // Parse a public download URL. + path, err := obj.getPath(`https://storage.googleapis.com/my_bucket/folder/file.txt`) + assert.NoError(t, err) + assert.Equal(t, `folder/file.txt`, path) + // Parse a non-public download URL. + path, err = obj.getPath(`https://storage.cloud.google.com/my_bucket/folder/file.txt`) + assert.NoError(t, err) + assert.Equal(t, `folder/file.txt`, path) + // Error: unknown domain. + _, err = obj.getPath(`https://unknown-host.com/my_bucket/folder/file.txt`) + assert.ErrorContains(t, err, `not allowed host: unknown-host.com`) + // Error: unknown bucket. + _, err = obj.getPath(`https://storage.cloud.google.com/not_my_bucket/folder/file.txt`) + assert.ErrorIs(t, err, ErrUnknownBucket) +} diff --git a/pkg/asset/storage.go b/pkg/asset/storage.go index 6905b0a07..1debdaa4a 100644 --- a/pkg/asset/storage.go +++ b/pkg/asset/storage.go @@ -205,6 +205,8 @@ func (e *FileExistsError) Error() string { return fmt.Sprintf("asset exists: %s", e.Path) } +var ErrUnknownBucket = errors.New("the asset is not in the currently managed bucket") + const deletionEmbargo = time.Hour * 24 * 7 // Best way: convert download URLs to paths. @@ -217,7 +219,10 @@ func (storage *Storage) DeprecateAssets() error { needed := map[string]bool{} for _, url := range resp.DownloadURLs { path, err := storage.backend.getPath(url) - if err != nil { + if err == ErrUnknownBucket { + // The asset is not managed by the particular instance. + continue + } else if err != nil { // If we failed to parse just one URL, let's stop the entire process. // Otherwise we'll start deleting still needed files we couldn't recognize. return fmt.Errorf("failed to parse '%s': %w", url, err) diff --git a/pkg/asset/storage_test.go b/pkg/asset/storage_test.go index fb45f5fad..83088ef9c 100644 --- a/pkg/asset/storage_test.go +++ b/pkg/asset/storage_test.go @@ -16,6 +16,7 @@ import ( "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/debugtracer" + "github.com/stretchr/testify/assert" "github.com/ulikunitz/xz" ) @@ -184,7 +185,7 @@ func TestUploadBuildAsset(t *testing.T) { t.Fatalf("invalid dashMock state: expected 3 assets, got %d", len(allUrls)) } // First try to remove two assets. - dashMock.downloadURLs = map[string]bool{allUrls[2]: true, "http://non-related-asset.com/abcd": true} + dashMock.downloadURLs = map[string]bool{allUrls[2]: true, "http://download/unrelated.txt": true} // Pretend there's an asset deletion error. be.objectRemove = func(string) error { return fmt.Errorf("not now") } @@ -351,3 +352,53 @@ func TestUploadSameContent(t *testing.T) { asset.DownloadURL, assetTwo.DownloadURL) } } + +// Test that we adequately handle the case when several syz-cis with separate buckets +// are connected to a single dashboard. +// nolint: dupl +func TestTwoBucketDeprecation(t *testing.T) { + dash := newDashMock() + storage, dummy := makeStorage(t, dash.getDashapi()) + + // "Upload" an asset from this instance. + resp, _ := dummy.upload(&uploadRequest{ + savePath: `folder/file.txt`, + }) + url, _ := dummy.downloadURL(resp.path, true) + + // Dashboard returns two asset URLs. + dash.downloadURLs = map[string]bool{ + "http://unknown-bucket/other-folder/other-file.txt": true, // will cause ErrUnknownBucket + url: true, + } + dummy.objectRemove = func(url string) error { + t.Fatalf("Unexpected removal") + return nil + } + err := storage.DeprecateAssets() + assert.NoError(t, err) +} + +// nolint: dupl +func TestInvalidAssetURLs(t *testing.T) { + dash := newDashMock() + storage, dummy := makeStorage(t, dash.getDashapi()) + + // "Upload" an asset from this instance. + resp, _ := dummy.upload(&uploadRequest{ + savePath: `folder/file.txt`, + }) + url, _ := dummy.downloadURL(resp.path, true) + + // Dashboard returns two asset URLs. + dash.downloadURLs = map[string]bool{ + "http://totally-unknown-bucket/other-folder/other-file.txt": true, + url: true, + } + dummy.objectRemove = func(url string) error { + t.Fatalf("Unexpected removal") + return nil + } + err := storage.DeprecateAssets() + assert.Error(t, err) +} -- cgit mrf-deployment