aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorTaras Madan <tarasmadan@google.com>2025-07-28 10:54:22 +0200
committerTaras Madan <tarasmadan@google.com>2025-07-29 12:25:45 +0000
commitba28e0a86a1747112e1216d1f602ca4254bdf1f7 (patch)
tree4153e193ac2649a3508966481ee6df02de305df3 /pkg
parentea52e6c494b8bb65114b8f63180336dde1f98ef0 (diff)
pkg/coveragedb: document how to create the BQ table and data transfer
Diffstat (limited to 'pkg')
-rw-r--r--pkg/coveragedb/README.md61
-rw-r--r--pkg/coveragedb/bq-schema.json104
2 files changed, 165 insertions, 0 deletions
diff --git a/pkg/coveragedb/README.md b/pkg/coveragedb/README.md
new file mode 100644
index 000000000..834ffca1d
--- /dev/null
+++ b/pkg/coveragedb/README.md
@@ -0,0 +1,61 @@
+# Prepare coverage aggregation pipeline
+
+Assuming you have the coverage `*.jsonl` files in some bucket:
+1. Create BigQuery table.
+2. Start data transfers from the bucket to BigQuery table.
+
+Coverage merger job is consuming data from BQ table and store aggregations
+in the Spanner DB.
+
+The dots in BQ table name are not supported, thus:
+1. For the namespace "upstream" the expected BQ table name is "upstream".
+2. For the namespace "android-6.12" the expected BQ table name is "android-6-12".
+
+
+### Create new BigQuery table for coverage data
+```bash
+bq mk \
+ --table \
+ --description "android 6.12"
+ --time_partitioning_field timestamp \
+ --time_partitioning_type DAY \
+ --require_partition_filter=true \
+ --clustering_fields file_path,kernel_commit,hit_count \
+ syzkaller:syzbot_coverage.android-6-12 \
+ ./pkg/coveragedb/bq-schema.json
+```
+
+### Add new data transfer
+```bash
+bq mk \
+ --transfer_config \
+ --display_name=ci-android-6-12-bucket-to-syzbot_coverage \
+ --params='{"destination_table_name_template":"android-6-12",
+ "data_path_template": "gs://$COVERAGE_STREAM_BUCKET/ci-android-6.12/*.jsonl",
+ "allow_jagged_rows": false,
+ "allow_quoted_newlines": false,
+ "delete_source_files": true,
+ "encoding": "UTF8",
+ "field_delimiter": ",",
+ "file_format": "JSON",
+ "ignore_unknown_values": false,
+ "max_bad_records": "0",
+ "parquet_enable_list_inference": false,
+ "parquet_enum_as_string": false,
+ "preserve_ascii_control_characters": false,
+ "skip_leading_rows": "0",
+ "use_avro_logical_types": false,
+ "write_disposition": "APPEND"
+ }' \
+ --project_id=syzkaller \
+ --data_source=google_cloud_storage \
+ --target_dataset=syzbot_coverage
+```
+
+### List BigQuery data transfers
+```bash
+bq ls \
+ --transfer_config \
+ --transfer_location=us-central1 \
+ --project_id=syzkaller
+``` \ No newline at end of file
diff --git a/pkg/coveragedb/bq-schema.json b/pkg/coveragedb/bq-schema.json
new file mode 100644
index 000000000..bb757a503
--- /dev/null
+++ b/pkg/coveragedb/bq-schema.json
@@ -0,0 +1,104 @@
+[
+ {
+ "name": "timestamp",
+ "type": "TIMESTAMP",
+ "mode": "REQUIRED",
+ "description": "the time following data was captured"
+ },
+ {
+ "name": "version",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": "the record version to simplify future changes"
+ },
+ {
+ "name": "fuzzing_minutes",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": "how many fuzzing hours are represented in the record"
+ },
+ {
+ "name": "arch",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "build_id",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "manager",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "kernel_repo",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "kernel_branch",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "kernel_commit",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "file_path",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "func_name",
+ "type": "STRING",
+ "mode": "REQUIRED"
+ },
+ {
+ "name": "sl",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": "Callback position frame StartLine."
+ },
+ {
+ "name": "sc",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": "Callback position frame StarCol."
+ },
+ {
+ "name": "el",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": "Callback position frame EndLine."
+ },
+ {
+ "name": "ec",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": "Callback position frame EndCol."
+ },
+ {
+ "name": "hit_count",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": "Coverage point hit count."
+ },
+ {
+ "name": "inline",
+ "type": "BOOLEAN",
+ "mode": "REQUIRED",
+ "description": "TRUE if the coverage signal was received from inline function"
+ },
+ {
+ "name": "pc",
+ "type": "NUMERIC",
+ "mode": "REQUIRED",
+ "description": "single callback generates many coverage signals if inlined",
+ "precision": "20",
+ "scale": "0"
+ }
+]