diff options
| author | Taras Madan <tarasmadan@google.com> | 2024-12-18 13:33:01 +0100 |
|---|---|---|
| committer | Taras Madan <tarasmadan@google.com> | 2024-12-19 14:38:21 +0000 |
| commit | fef5bb5152a5013d0c4571cd0601f46824a9664b (patch) | |
| tree | c8fc887f6bfe48ae314a55f439ca1e7cbea370c7 /dashboard/dashapi/dashapi.go | |
| parent | f1c188dc6963bf016692df095d783e4945d84f7b (diff) | |
tools/syz-covermerger: upload coverage as jsonl
Previous implementation store only the summary of processed records.
The summary was <1GB and single processing node was able to manipulate the data.
Current implementation stores all the details about records read to make post-processing more flexible.
This change was needed to get access to the source manager name and will help to analyze other details.
This new implementation requires 20GB mem to process single day records.
CSV log interning experiment allowed to merge using 10G.
Quarter data aggregation will cost ~100 times more.
The alternative is to use stream processing. We can process data kernel-file-by-file.
It allows to /15000 memory consumption.
This approach is implemented here.
We're batching coverage signals by file and store per-file results in GCS JSONL file.
See https://jsonlines.org/ to learn about jsonl.
Diffstat (limited to 'dashboard/dashapi/dashapi.go')
| -rw-r--r-- | dashboard/dashapi/dashapi.go | 44 |
1 files changed, 8 insertions, 36 deletions
diff --git a/dashboard/dashapi/dashapi.go b/dashboard/dashapi/dashapi.go index 0fc3651cc..0d4bee371 100644 --- a/dashboard/dashapi/dashapi.go +++ b/dashboard/dashapi/dashapi.go @@ -8,7 +8,6 @@ package dashapi import ( "bytes" "compress/gzip" - "context" "encoding/json" "fmt" "io" @@ -18,10 +17,7 @@ import ( "reflect" "time" - "cloud.google.com/go/civil" "github.com/google/syzkaller/pkg/auth" - "github.com/google/syzkaller/pkg/coveragedb" - "github.com/google/syzkaller/pkg/gcs" ) type Dashboard struct { @@ -692,42 +688,18 @@ func (dash *Dashboard) SaveDiscussion(req *SaveDiscussionReq) error { return dash.Query("save_discussion", req, nil) } -type MergedCoverage struct { - Namespace string - Repo string - Commit string - Duration int64 - DateTo civil.Date - TotalRows int64 - FileData coveragedb.ManagersCoverage -} - -type SaveCoverageReq struct { - Coverage *MergedCoverage -} - -// SaveCoverage returns amount of records created in db. -func (dash *Dashboard) SaveCoverage(req *SaveCoverageReq) (int, error) { +func (dash *Dashboard) CreateUploadURL() (string, error) { uploadURL := new(string) - if err := dash.Query("create_upload_url", req, uploadURL); err != nil { - return 0, fmt.Errorf("create_upload_url: %w", err) - } - - gcsClient, err := gcs.NewClient(context.Background()) - if err != nil { - return 0, fmt.Errorf("gcs.NewClient: %w", err) - } - w, err := gcsClient.FileWriter(*uploadURL) - if err != nil { - return 0, fmt.Errorf("gcsClient.FileWriter: %w", err) - } - defer w.Close() - if err := json.NewEncoder(gzip.NewWriter(w)).Encode(req); err != nil { - return 0, fmt.Errorf("json.NewEncoder(gzip.NewWriter(w)).Encode: %w", err) + if err := dash.Query("create_upload_url", nil, uploadURL); err != nil { + return "", fmt.Errorf("create_upload_url: %w", err) } + return *uploadURL, nil +} +// SaveCoverage returns amount of records created in db. +func (dash *Dashboard) SaveCoverage(gcpURL string) (int, error) { rowsWritten := new(int) - if err := dash.Query("save_coverage", *uploadURL, rowsWritten); err != nil { + if err := dash.Query("save_coverage", gcpURL, rowsWritten); err != nil { return 0, fmt.Errorf("save_coverage: %w", err) } return *rowsWritten, nil |
