From 7589012970c80e2228f45f9e3be30bd3b751d3ea Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 3 Aug 2022 13:35:47 +0000 Subject: dashboard/config: enable debug options for the arm64-full config Also, factor emulation-dependent consts into a separate file. --- dashboard/config/linux/bits/arm.yml | 7 +----- dashboard/config/linux/bits/arm64.yml | 7 +----- dashboard/config/linux/bits/lockdep.yml | 5 +++- dashboard/config/linux/bits/riscv64.yml | 4 ---- dashboard/config/linux/bits/timeouts_emu.yml | 10 ++++++++ dashboard/config/linux/bits/timeouts_native.yml | 32 +++++++++++++++++++++++++ dashboard/config/linux/bits/x86_64.yml | 30 +---------------------- 7 files changed, 49 insertions(+), 46 deletions(-) create mode 100644 dashboard/config/linux/bits/timeouts_emu.yml create mode 100644 dashboard/config/linux/bits/timeouts_native.yml (limited to 'dashboard/config/linux/bits') diff --git a/dashboard/config/linux/bits/arm.yml b/dashboard/config/linux/bits/arm.yml index da9bda68b..0557ddf2a 100644 --- a/dashboard/config/linux/bits/arm.yml +++ b/dashboard/config/linux/bits/arm.yml @@ -6,13 +6,8 @@ shell: - make kvm_guest.config config: - # We reduce number of vivid/netrom/rose/hcd devices to 2 because we use at most 2 procs - # with emulation and creating these devices takes time during boot. - - CMDLINE: [append, "root=/dev/vda console=ttyAMA0 vmalloc=512M smp.csd_lock_timeout=300000 watchdog_thresh=165 workqueue.watchdog_thresh=420 sysctl.net.core.netdev_unregister_timeout_secs=420 dummy_hcd.num=2"] + - CMDLINE: [append, "root=/dev/vda console=ttyAMA0 vmalloc=512M"] - CMDLINE_EXTEND - # See the comment in x86_64.yml re these numbers. - - RCU_CPU_STALL_TIMEOUT: 300 - - DEFAULT_HUNG_TASK_TIMEOUT: 420 # If we enable KASAN, we may want the following configs as well. # KASAN_INLINE leads to a too large kernel image which is mishandled by qemu or kernel and does not boot: diff --git a/dashboard/config/linux/bits/arm64.yml b/dashboard/config/linux/bits/arm64.yml index 08ddfa04c..5b2395ef2 100644 --- a/dashboard/config/linux/bits/arm64.yml +++ b/dashboard/config/linux/bits/arm64.yml @@ -7,14 +7,9 @@ shell: - make kvm_guest.config: [-android] config: - # We reduce number of vivid/netrom/rose/hcd devices to 2 because we use at most 2 procs - # with emulation and creating these devices takes time during boot. - - CMDLINE: [append, "root=/dev/vda console=ttyAMA0 smp.csd_lock_timeout=300000 watchdog_thresh=165 workqueue.watchdog_thresh=420 sysctl.net.core.netdev_unregister_timeout_secs=420 dummy_hcd.num=2"] + - CMDLINE: [append, "root=/dev/vda console=ttyAMA0"] # Was dropped in "arm64: Drop support for CMDLINE_EXTEND". - CMDLINE_EXTEND: [-v5.12] - # See the comment in x86_64.yml re these numbers. - - RCU_CPU_STALL_TIMEOUT: 300 - - DEFAULT_HUNG_TASK_TIMEOUT: [420, -nonoise, -kmsan, -kcsan, -kmemleak] - ARM64_TAGGED_ADDR_ABI - ARM64_PMEM diff --git a/dashboard/config/linux/bits/lockdep.yml b/dashboard/config/linux/bits/lockdep.yml index ca49ff134..03aa31ba6 100644 --- a/dashboard/config/linux/bits/lockdep.yml +++ b/dashboard/config/linux/bits/lockdep.yml @@ -7,7 +7,10 @@ config: - LOCKDEP_BITS: [17, v5.13] # This helps to prevent "BUG: MAX_LOCKDEP_CHAINS too low!" # and "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" - - LOCKDEP_CHAINS_BITS: [18, v5.13] + # We don't set it to 18 on arm64, because in that case the -full config does not boot on GCE. + # It boots with 17, but the kconf syntax currently does not let us set it to 17 for some arches + # and to 18 for others. + - LOCKDEP_CHAINS_BITS: [18, v5.13, -arm64] - LOCKDEP_STACK_TRACE_BITS: [20, v5.13] - LOCKDEP_STACK_TRACE_HASH_BITS: [14, v5.13] - LOCKDEP_CIRCULAR_QUEUE_BITS: [12, v5.13] diff --git a/dashboard/config/linux/bits/riscv64.yml b/dashboard/config/linux/bits/riscv64.yml index 6501b02fa..3352e0503 100644 --- a/dashboard/config/linux/bits/riscv64.yml +++ b/dashboard/config/linux/bits/riscv64.yml @@ -6,11 +6,7 @@ shell: - make kvm_guest.config config: - - CMDLINE: [append, "dummy_hcd.num=2 smp.csd_lock_timeout=300000 watchdog_thresh=165 workqueue.watchdog_thresh=420 sysctl.net.core.netdev_unregister_timeout_secs=420"] - CMDLINE_EXTEND - # See the comment in x86_64.yml re these numbers. - - RCU_CPU_STALL_TIMEOUT: 300 - - DEFAULT_HUNG_TASK_TIMEOUT: 420 # Presumably this gives more interesting coverage. - BINFMT_FLAT diff --git a/dashboard/config/linux/bits/timeouts_emu.yml b/dashboard/config/linux/bits/timeouts_emu.yml new file mode 100644 index 000000000..b8058a3c7 --- /dev/null +++ b/dashboard/config/linux/bits/timeouts_emu.yml @@ -0,0 +1,10 @@ +# Copyright 2022 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +config: + # We reduce number of vivid/netrom/rose/hcd devices to 2 because we use at most 2 procs + # with emulation and creating these devices takes time during boot. + - CMDLINE: [append, "smp.csd_lock_timeout=300000 watchdog_thresh=165 workqueue.watchdog_thresh=420 sysctl.net.core.netdev_unregister_timeout_secs=420 dummy_hcd.num=2"] + # See the comment in x86_64.yml re these numbers. + - RCU_CPU_STALL_TIMEOUT: 300 + - DEFAULT_HUNG_TASK_TIMEOUT: [420, -nonoise, -kmsan, -kcsan, -kmemleak] diff --git a/dashboard/config/linux/bits/timeouts_native.yml b/dashboard/config/linux/bits/timeouts_native.yml new file mode 100644 index 000000000..7bc8d7551 --- /dev/null +++ b/dashboard/config/linux/bits/timeouts_native.yml @@ -0,0 +1,32 @@ +# Copyright 2022 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +config: + - CMDLINE: [append, "smp.csd_lock_timeout=100000 watchdog_thresh=55 workqueue.watchdog_thresh=140 sysctl.net.core.netdev_unregister_timeout_secs=140 dummy_hcd.num=8"] + # Deterministic hang/stall detection. + # Without careful tuning the same hang/stall can be detected in multiple, radically-different ways. + # For example, an infinite loop can be detected as CPU stall, RCU stall, hung task or workqueue stall. + # This may lead to lots of duplicate bugs. In order to prevent this we strictly order hang/stall detection + # timeouts based on their causality (CPU stall can cause hung task, but not vise versa). See #516 for details. + # + # 1. RCU stalls has the highest priority. + # RCU_CPU_STALL_TIMEOUT=100 results in stalls detected after 100-101 secs. + # This also includes CSD lock timeout (CONFIG_CSD_LOCK_WAIT_DEBUG + smp.csd_lock_timeout=100000(ms)). + # + # 2. Then softlockup detector. + # kernel.watchdog_thresh = 55 detects stalls after 110-132 secs. + # + # 3. Then hung tasks, workqueue stalls and netdev unregister hangs. + # Unfortunately we can't separate them because that would require setting "no output" timeout to 10+ minutes. + # watchdog_thresh=140 and DEFAULT_HUNG_TASK_TIMEOUT=140 both result in detection after 140-280 secs. + # + # 4. Finally, "no output" crashes. + # sys/targets.Timeouts.NoOutput is set to 300 secs. + # + # Don't change these values without considering all other values as well. + # Note: executor also sets hung_task_check_interval_secs=20 sysctl. + # Note: gVisor watchdog timeout is 3 mins + 1/4 of that for checking period = 3m45s. + # Note: other arches that use qemu emulation scale all these timeouts by 3 (sys/targets.Timeouts.Scale). + - RCU_CPU_STALL_TIMEOUT: 100 + # The constraints repeat constraints on inclusion of debug.yml which enables hung task detection. + - DEFAULT_HUNG_TASK_TIMEOUT: [140, -nonoise, -kmsan, -kcsan, -kmemleak] diff --git a/dashboard/config/linux/bits/x86_64.yml b/dashboard/config/linux/bits/x86_64.yml index 0d368fe55..225ef76c5 100644 --- a/dashboard/config/linux/bits/x86_64.yml +++ b/dashboard/config/linux/bits/x86_64.yml @@ -9,37 +9,9 @@ config: # root=/dev/sda is not necessary correct, but it's a common setting and user arguments are appended # to the builtin command line, so user can always override this. # vsyscall=native required to run x86_64 executables on android kernels (it disables VDSO by default). - - CMDLINE: [append, "root=/dev/sda console=ttyS0 vsyscall=native numa=fake=2 kvm-intel.nested=1 spec_store_bypass_disable=prctl nopcid vivid.n_devs=16 vivid.multiplanar=1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2 netrom.nr_ndevs=16 rose.rose_ndevs=16 dummy_hcd.num=8 smp.csd_lock_timeout=100000 watchdog_thresh=55 workqueue.watchdog_thresh=140 sysctl.net.core.netdev_unregister_timeout_secs=140"] + - CMDLINE: [append, "root=/dev/sda console=ttyS0 vsyscall=native numa=fake=2 kvm-intel.nested=1 spec_store_bypass_disable=prctl nopcid vivid.n_devs=16 vivid.multiplanar=1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2 netrom.nr_ndevs=16 rose.rose_ndevs=16"] - CMDLINE_BOOL - # Deterministic hang/stall detection. - # Without careful tuning the same hang/stall can be detected in multiple, radically-different ways. - # For example, an infinite loop can be detected as CPU stall, RCU stall, hung task or workqueue stall. - # This may lead to lots of duplicate bugs. In order to prevent this we strictly order hang/stall detection - # timeouts based on their causality (CPU stall can cause hung task, but not vise versa). See #516 for details. - # - # 1. RCU stalls has the highest priority. - # RCU_CPU_STALL_TIMEOUT=100 results in stalls detected after 100-101 secs. - # This also includes CSD lock timeout (CONFIG_CSD_LOCK_WAIT_DEBUG + smp.csd_lock_timeout=100000(ms)). - # - # 2. Then softlockup detector. - # kernel.watchdog_thresh = 55 detects stalls after 110-132 secs. - # - # 3. Then hung tasks, workqueue stalls and netdev unregister hangs. - # Unfortunately we can't separate them because that would require setting "no output" timeout to 10+ minutes. - # watchdog_thresh=140 and DEFAULT_HUNG_TASK_TIMEOUT=140 both result in detection after 140-280 secs. - # - # 4. Finally, "no output" crashes. - # sys/targets.Timeouts.NoOutput is set to 300 secs. - # - # Don't change these values without considering all other values as well. - # Note: executor also sets hung_task_check_interval_secs=20 sysctl. - # Note: gVisor watchdog timeout is 3 mins + 1/4 of that for checking period = 3m45s. - # Note: other arches that use qemu emulation scale all these timeouts by 3 (sys/targets.Timeouts.Scale). - - RCU_CPU_STALL_TIMEOUT: 100 - # The constraints repeat constraints on inclusion of debug.yml which enables hung task detection. - - DEFAULT_HUNG_TASK_TIMEOUT: [140, -nonoise, -kmsan, -kcsan, -kmemleak] - # Should be faster this way. - MCORE2 - BASE_FULL -- cgit mrf-deployment