From ba28e0a86a1747112e1216d1f602ca4254bdf1f7 Mon Sep 17 00:00:00 2001 From: Taras Madan Date: Mon, 28 Jul 2025 10:54:22 +0200 Subject: pkg/coveragedb: document how to create the BQ table and data transfer --- pkg/coveragedb/README.md | 61 +++++++++++++++++++++++++ pkg/coveragedb/bq-schema.json | 104 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 pkg/coveragedb/README.md create mode 100644 pkg/coveragedb/bq-schema.json (limited to 'pkg/coveragedb') diff --git a/pkg/coveragedb/README.md b/pkg/coveragedb/README.md new file mode 100644 index 000000000..834ffca1d --- /dev/null +++ b/pkg/coveragedb/README.md @@ -0,0 +1,61 @@ +# Prepare coverage aggregation pipeline + +Assuming you have the coverage `*.jsonl` files in some bucket: +1. Create BigQuery table. +2. Start data transfers from the bucket to BigQuery table. + +Coverage merger job is consuming data from BQ table and store aggregations +in the Spanner DB. + +The dots in BQ table name are not supported, thus: +1. For the namespace "upstream" the expected BQ table name is "upstream". +2. For the namespace "android-6.12" the expected BQ table name is "android-6-12". + + +### Create new BigQuery table for coverage data +```bash +bq mk \ + --table \ + --description "android 6.12" + --time_partitioning_field timestamp \ + --time_partitioning_type DAY \ + --require_partition_filter=true \ + --clustering_fields file_path,kernel_commit,hit_count \ + syzkaller:syzbot_coverage.android-6-12 \ + ./pkg/coveragedb/bq-schema.json +``` + +### Add new data transfer +```bash +bq mk \ + --transfer_config \ + --display_name=ci-android-6-12-bucket-to-syzbot_coverage \ + --params='{"destination_table_name_template":"android-6-12", + "data_path_template": "gs://$COVERAGE_STREAM_BUCKET/ci-android-6.12/*.jsonl", + "allow_jagged_rows": false, + "allow_quoted_newlines": false, + "delete_source_files": true, + "encoding": "UTF8", + "field_delimiter": ",", + "file_format": "JSON", + "ignore_unknown_values": false, + "max_bad_records": "0", + "parquet_enable_list_inference": false, + "parquet_enum_as_string": false, + "preserve_ascii_control_characters": false, + "skip_leading_rows": "0", + "use_avro_logical_types": false, + "write_disposition": "APPEND" + }' \ + --project_id=syzkaller \ + --data_source=google_cloud_storage \ + --target_dataset=syzbot_coverage +``` + +### List BigQuery data transfers +```bash +bq ls \ + --transfer_config \ + --transfer_location=us-central1 \ + --project_id=syzkaller +``` \ No newline at end of file diff --git a/pkg/coveragedb/bq-schema.json b/pkg/coveragedb/bq-schema.json new file mode 100644 index 000000000..bb757a503 --- /dev/null +++ b/pkg/coveragedb/bq-schema.json @@ -0,0 +1,104 @@ +[ + { + "name": "timestamp", + "type": "TIMESTAMP", + "mode": "REQUIRED", + "description": "the time following data was captured" + }, + { + "name": "version", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "the record version to simplify future changes" + }, + { + "name": "fuzzing_minutes", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "how many fuzzing hours are represented in the record" + }, + { + "name": "arch", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "build_id", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "manager", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "kernel_repo", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "kernel_branch", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "kernel_commit", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "file_path", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "func_name", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "sl", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "Callback position frame StartLine." + }, + { + "name": "sc", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "Callback position frame StarCol." + }, + { + "name": "el", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "Callback position frame EndLine." + }, + { + "name": "ec", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "Callback position frame EndCol." + }, + { + "name": "hit_count", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "Coverage point hit count." + }, + { + "name": "inline", + "type": "BOOLEAN", + "mode": "REQUIRED", + "description": "TRUE if the coverage signal was received from inline function" + }, + { + "name": "pc", + "type": "NUMERIC", + "mode": "REQUIRED", + "description": "single callback generates many coverage signals if inlined", + "precision": "20", + "scale": "0" + } +] -- cgit mrf-deployment