From fd8caa5462e64f37cb9eebd75ffca1737dde447d Mon Sep 17 00:00:00 2001
From: Aleksandr Nogikh <nogikh@google.com>
Date: Thu, 23 Sep 2021 16:15:41 +0000
Subject: all: replace collide mode by `async` call property

Replace the currently existing straightforward approach to race triggering
(that was almost entirely implemented inside syz-executor) with a more
flexible one.

The `async` call property instructs syz-executor not to block until the
call has completed execution and proceed immediately to the next call.
The decision on what calls to mark with `async` is made by syz-fuzzer.

Ultimately this should let us implement more intelligent race provoking
strategies as well as make more fine-grained reproducers.
---
 executor/common.h    |  14 +---
 executor/executor.cc | 182 ++++++++++++++++++++++++---------------------------
 2 files changed, 86 insertions(+), 110 deletions(-)

(limited to 'executor')

diff --git a/executor/common.h b/executor/common.h
index 1e6eca5ae..95888de80 100644
--- a/executor/common.h
+++ b/executor/common.h
@@ -514,10 +514,6 @@ static void loop(void)
 	fprintf(stderr, "### start\n");
 #endif
 	int i, call, thread;
-#if SYZ_COLLIDE
-	int collide = 0;
-again:
-#endif
 	for (call = 0; call < /*{{{NUM_CALLS}}}*/; call++) {
 		for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); thread++) {
 			struct thread_t* th = &threads[thread];
@@ -534,8 +530,8 @@ again:
 			th->call = call;
 			__atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
 			event_set(&th->ready);
-#if SYZ_COLLIDE
-			if (collide && (call % 2) == 0)
+#if SYZ_ASYNC
+			if (/*{{{ASYNC_CONDITIONS}}}*/)
 				break;
 #endif
 			event_timedwait(&th->done, /*{{{CALL_TIMEOUT_MS}}}*/);
@@ -547,12 +543,6 @@ again:
 #if SYZ_HAVE_CLOSE_FDS
 	close_fds();
 #endif
-#if SYZ_COLLIDE
-	if (!collide) {
-		collide = 1;
-		goto again;
-	}
-#endif
 }
 #endif
 
diff --git a/executor/executor.cc b/executor/executor.cc
index 3b4d93eba..01b19b81e 100644
--- a/executor/executor.cc
+++ b/executor/executor.cc
@@ -131,8 +131,9 @@ const uint64 kOutputBase = 0x1b2bc20000ull;
 // the amount we might possibly need for the specific received prog.
 const int kMaxOutputComparisons = 14 << 20; // executions with comparsions enabled are usually < 1% of all executions
 const int kMaxOutputCoverage = 6 << 20; // coverage is needed in ~ up to 1/3 of all executions (depending on corpus rotation)
-const int kMaxOutputSignal = 4 << 20; // signal collection is always required
-const int kInitialOutput = kMaxOutputSignal; // the minimal size to be allocated in the parent process
+const int kMaxOutputSignal = 4 << 20;
+const int kMinOutput = 256 << 10; // if we don't need to send signal, the output is rather short.
+const int kInitialOutput = kMinOutput; // the minimal size to be allocated in the parent process
 #else
 // We don't fork and allocate the memory only once, so prepare for the worst case.
 const int kInitialOutput = 14 << 20;
@@ -174,9 +175,9 @@ static bool flag_wifi;
 static bool flag_delay_kcov_mmap;
 
 static bool flag_collect_cover;
+static bool flag_collect_signal;
 static bool flag_dedup_cover;
 static bool flag_threaded;
-static bool flag_collide;
 static bool flag_coverage_filter;
 
 // If true, then executor should write the comparisons data to fuzzer.
@@ -212,7 +213,6 @@ const uint64 binary_format_stroct = 4;
 const uint64 no_copyout = -1;
 
 static int running;
-static bool collide;
 uint32 completed;
 bool is_kernel_64_bit = true;
 
@@ -263,7 +263,6 @@ struct thread_t {
 	event_t done;
 	uint64* copyout_pos;
 	uint64 copyout_index;
-	bool colliding;
 	bool executing;
 	int call_index;
 	int call_num;
@@ -368,7 +367,7 @@ struct feature_t {
 	void (*setup)();
 };
 
-static thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos, call_props_t call_props);
+static thread_t* schedule_call(int call_index, int call_num, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos, call_props_t call_props);
 static void handle_completion(thread_t* th);
 static void copyout_call_results(thread_t* th);
 static void write_call_output(thread_t* th, bool finished);
@@ -653,18 +652,17 @@ void receive_execute()
 	syscall_timeout_ms = req.syscall_timeout_ms;
 	program_timeout_ms = req.program_timeout_ms;
 	slowdown_scale = req.slowdown_scale;
-	flag_collect_cover = req.exec_flags & (1 << 0);
-	flag_dedup_cover = req.exec_flags & (1 << 1);
-	flag_comparisons = req.exec_flags & (1 << 2);
-	flag_threaded = req.exec_flags & (1 << 3);
-	flag_collide = req.exec_flags & (1 << 4);
+	flag_collect_signal = req.exec_flags & (1 << 0);
+	flag_collect_cover = req.exec_flags & (1 << 1);
+	flag_dedup_cover = req.exec_flags & (1 << 2);
+	flag_comparisons = req.exec_flags & (1 << 3);
+	flag_threaded = req.exec_flags & (1 << 4);
 	flag_coverage_filter = req.exec_flags & (1 << 5);
-	if (!flag_threaded)
-		flag_collide = false;
-	debug("[%llums] exec opts: procid=%llu threaded=%d collide=%d cover=%d comps=%d dedup=%d"
+
+	debug("[%llums] exec opts: procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d"
 	      " timeouts=%llu/%llu/%llu prog=%llu filter=%d\n",
-	      current_time_ms() - start_time_ms, procid, flag_threaded, flag_collide,
-	      flag_collect_cover, flag_comparisons, flag_dedup_cover, syscall_timeout_ms,
+	      current_time_ms() - start_time_ms, procid, flag_threaded, flag_collect_cover,
+	      flag_comparisons, flag_dedup_cover, flag_collect_signal, syscall_timeout_ms,
 	      program_timeout_ms, slowdown_scale, req.prog_size, flag_coverage_filter);
 	if (syscall_timeout_ms == 0 || program_timeout_ms <= syscall_timeout_ms || slowdown_scale == 0)
 		failmsg("bad timeouts", "syscall=%llu, program=%llu, scale=%llu",
@@ -689,6 +687,11 @@ void receive_execute()
 		failmsg("bad input size", "size=%lld, want=%lld", pos, req.prog_size);
 }
 
+bool cover_collection_required()
+{
+	return flag_coverage && (flag_collect_signal || flag_collect_cover || flag_comparisons);
+}
+
 #if GOOS_akaros
 void resend_execute(int fd)
 {
@@ -718,7 +721,7 @@ void realloc_output_data()
 		mmap_output(kMaxOutputComparisons);
 	else if (flag_collect_cover)
 		mmap_output(kMaxOutputCoverage);
-	else if (flag_coverage)
+	else if (flag_collect_signal)
 		mmap_output(kMaxOutputSignal);
 	if (close(kOutFd) < 0)
 		fail("failed to close kOutFd");
@@ -729,21 +732,15 @@ void realloc_output_data()
 // execute_one executes program stored in input_data.
 void execute_one()
 {
-	// Duplicate global collide variable on stack.
-	// Fuzzer once come up with ioctl(fd, FIONREAD, 0x920000),
-	// where 0x920000 was exactly collide address, so every iteration reset collide to 0.
-	bool colliding = false;
 #if SYZ_EXECUTOR_USES_SHMEM
 	realloc_output_data();
 	output_pos = output_data;
 	write_output(0); // Number of executed syscalls (updated later).
 #endif
 	uint64 start = current_time_ms();
-
-retry:
 	uint64* input_pos = (uint64*)input_data;
 
-	if (flag_coverage && !colliding) {
+	if (cover_collection_required()) {
 		if (!flag_threaded)
 			cover_enable(&threads[0].cov, flag_comparisons, false);
 		if (flag_extra_coverage)
@@ -753,7 +750,6 @@ retry:
 	int call_index = 0;
 	uint64 prog_extra_timeout = 0;
 	uint64 prog_extra_cover_timeout = 0;
-	bool has_fault_injection = false;
 	call_props_t call_props;
 	memset(&call_props, 0, sizeof(call_props));
 
@@ -863,7 +859,6 @@ retry:
 			prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 500 * slowdown_scale);
 		if (strncmp(syscalls[call_num].name, "syz_80211_inject_frame", strlen("syz_80211_inject_frame")) == 0)
 			prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 300 * slowdown_scale);
-		has_fault_injection |= (call_props.fail_nth > 0);
 		uint64 copyout_index = read_input(&input_pos);
 		uint64 num_args = read_input(&input_pos);
 		if (num_args > kMaxArgs)
@@ -873,12 +868,13 @@ retry:
 			args[i] = read_arg(&input_pos);
 		for (uint64 i = num_args; i < kMaxArgs; i++)
 			args[i] = 0;
-		thread_t* th = schedule_call(call_index++, call_num, colliding, copyout_index,
+		thread_t* th = schedule_call(call_index++, call_num, copyout_index,
 					     num_args, args, input_pos, call_props);
 
-		if (colliding && (call_index % 2) == 0) {
-			// Don't wait for every other call.
-			// We already have results from the previous execution.
+		if (call_props.async) {
+			if (!flag_threaded)
+				fail("SYZFAIL: unable to do an async call in a non-threaded mode");
+			// Don't wait for an async call to finish. We'll wait at the end.
 		} else if (flag_threaded) {
 			// Wait for call completion.
 			uint64 timeout_ms = syscall_timeout_ms + call->attrs.timeout * slowdown_scale;
@@ -907,7 +903,7 @@ retry:
 		memset(&call_props, 0, sizeof(call_props));
 	}
 
-	if (!colliding && !collide && running > 0) {
+	if (running > 0) {
 		// Give unfinished syscalls some additional time.
 		last_scheduled = 0;
 		uint64 wait_start = current_time_ms();
@@ -927,7 +923,7 @@ retry:
 			for (int i = 0; i < kMaxThreads; i++) {
 				thread_t* th = &threads[i];
 				if (th->executing) {
-					if (flag_coverage)
+					if (cover_collection_required())
 						cover_collect(&th->cov);
 					write_call_output(th, false);
 				}
@@ -939,33 +935,25 @@ retry:
 	close_fds();
 #endif
 
-	if (!colliding && !collide) {
+	write_extra_output();
+	// Check for new extra coverage in small intervals to avoid situation
+	// that we were killed on timeout before we write any.
+	// Check for extra coverage is very cheap, effectively a memory load.
+	const uint64 kSleepMs = 100;
+	for (uint64 i = 0; i < prog_extra_cover_timeout / kSleepMs; i++) {
+		sleep_ms(kSleepMs);
 		write_extra_output();
-		// Check for new extra coverage in small intervals to avoid situation
-		// that we were killed on timeout before we write any.
-		// Check for extra coverage is very cheap, effectively a memory load.
-		const uint64 kSleepMs = 100;
-		for (uint64 i = 0; i < prog_extra_cover_timeout / kSleepMs; i++) {
-			sleep_ms(kSleepMs);
-			write_extra_output();
-		}
-	}
-
-	if (flag_collide && !colliding && !has_fault_injection && !collide) {
-		debug("enabling collider\n");
-		collide = colliding = true;
-		goto retry;
 	}
 }
 
-thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos, call_props_t call_props)
+thread_t* schedule_call(int call_index, int call_num, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos, call_props_t call_props)
 {
 	// Find a spare thread to execute the call.
 	int i = 0;
 	for (; i < kMaxThreads; i++) {
 		thread_t* th = &threads[i];
 		if (!th->created)
-			thread_create(th, i, flag_coverage && !colliding);
+			thread_create(th, i, cover_collection_required());
 		if (event_isset(&th->done)) {
 			if (th->executing)
 				handle_completion(th);
@@ -979,7 +967,6 @@ thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 cop
 		failmsg("bad thread state in schedule", "ready=%d done=%d executing=%d",
 			event_isset(&th->ready), event_isset(&th->done), th->executing);
 	last_scheduled = th;
-	th->colliding = colliding;
 	th->copyout_pos = pos;
 	th->copyout_index = copyout_index;
 	event_reset(&th->done);
@@ -1002,44 +989,46 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover
 	// Write out feedback signals.
 	// Currently it is code edges computed as xor of two subsequent basic block PCs.
 	cover_data_t* cover_data = (cover_data_t*)(cov->data + cov->data_offset);
-	uint32 nsig = 0;
-	cover_data_t prev_pc = 0;
-	bool prev_filter = true;
-	for (uint32 i = 0; i < cov->size; i++) {
-		cover_data_t pc = cover_data[i] + cov->pc_offset;
-		uint32 sig = pc;
-		if (use_cover_edges(pc))
-			sig ^= hash(prev_pc);
-		bool filter = coverage_filter(pc);
-		// Ignore the edge only if both current and previous PCs are filtered out
-		// to capture all incoming and outcoming edges into the interesting code.
-		bool ignore = !filter && !prev_filter;
-		prev_pc = pc;
-		prev_filter = filter;
-		if (ignore || dedup(sig))
-			continue;
-		write_output(sig);
-		nsig++;
+	if (flag_collect_signal) {
+		uint32 nsig = 0;
+		cover_data_t prev_pc = 0;
+		bool prev_filter = true;
+		for (uint32 i = 0; i < cov->size; i++) {
+			cover_data_t pc = cover_data[i] + cov->pc_offset;
+			uint32 sig = pc;
+			if (use_cover_edges(pc))
+				sig ^= hash(prev_pc);
+			bool filter = coverage_filter(pc);
+			// Ignore the edge only if both current and previous PCs are filtered out
+			// to capture all incoming and outcoming edges into the interesting code.
+			bool ignore = !filter && !prev_filter;
+			prev_pc = pc;
+			prev_filter = filter;
+			if (ignore || dedup(sig))
+				continue;
+			write_output(sig);
+			nsig++;
+		}
+		// Write out number of signals.
+		*signal_count_pos = nsig;
 	}
-	// Write out number of signals.
-	*signal_count_pos = nsig;
 
-	if (!flag_collect_cover)
-		return;
-	// Write out real coverage (basic block PCs).
-	uint32 cover_size = cov->size;
-	if (flag_dedup_cover) {
-		cover_data_t* end = cover_data + cover_size;
-		cover_unprotect(cov);
-		std::sort(cover_data, end);
-		cover_size = std::unique(cover_data, end) - cover_data;
-		cover_protect(cov);
+	if (flag_collect_cover) {
+		// Write out real coverage (basic block PCs).
+		uint32 cover_size = cov->size;
+		if (flag_dedup_cover) {
+			cover_data_t* end = cover_data + cover_size;
+			cover_unprotect(cov);
+			std::sort(cover_data, end);
+			cover_size = std::unique(cover_data, end) - cover_data;
+			cover_protect(cov);
+		}
+		// Truncate PCs to uint32 assuming that they fit into 32-bits.
+		// True for x86_64 and arm64 without KASLR.
+		for (uint32 i = 0; i < cover_size; i++)
+			write_output(cover_data[i] + cov->pc_offset);
+		*cover_count_pos = cover_size;
 	}
-	// Truncate PCs to uint32 assuming that they fit into 32-bits.
-	// True for x86_64 and arm64 without KASLR.
-	for (uint32 i = 0; i < cover_size; i++)
-		write_output(cover_data[i] + cov->pc_offset);
-	*cover_count_pos = cover_size;
 }
 #endif
 
@@ -1050,21 +1039,20 @@ void handle_completion(thread_t* th)
 			event_isset(&th->ready), event_isset(&th->done), th->executing);
 	if (th->res != (intptr_t)-1)
 		copyout_call_results(th);
-	if (!collide && !th->colliding) {
-		write_call_output(th, true);
-		write_extra_output();
-	}
+
+	write_call_output(th, true);
+	write_extra_output();
 	th->executing = false;
 	running--;
 	if (running < 0) {
 		// This fires periodically for the past 2 years (see issue #502).
-		fprintf(stderr, "running=%d collide=%d completed=%d flag_threaded=%d flag_collide=%d current=%d\n",
-			running, collide, completed, flag_threaded, flag_collide, th->id);
+		fprintf(stderr, "running=%d completed=%d flag_threaded=%d current=%d\n",
+			running, completed, flag_threaded, th->id);
 		for (int i = 0; i < kMaxThreads; i++) {
 			thread_t* th1 = &threads[i];
-			fprintf(stderr, "th #%2d: created=%d executing=%d colliding=%d"
+			fprintf(stderr, "th #%2d: created=%d executing=%d"
 					" ready=%d done=%d call_index=%d res=%lld reserrno=%d\n",
-				i, th1->created, th1->executing, th1->colliding,
+				i, th1->created, th1->executing,
 				event_isset(&th1->ready), event_isset(&th1->done),
 				th1->call_index, (uint64)th1->res, th1->reserrno);
 		}
@@ -1143,7 +1131,7 @@ void write_call_output(thread_t* th, bool finished)
 		}
 		// Write out number of comparisons.
 		*comps_count_pos = comps_size;
-	} else if (flag_coverage) {
+	} else if (flag_collect_signal || flag_collect_cover) {
 		if (is_kernel_64_bit)
 			write_coverage_signal<uint64>(&th->cov, signal_count_pos, cover_count_pos);
 		else
@@ -1176,7 +1164,7 @@ void write_call_output(thread_t* th, bool finished)
 void write_extra_output()
 {
 #if SYZ_EXECUTOR_USES_SHMEM
-	if (!flag_coverage || !flag_extra_coverage || flag_comparisons)
+	if (!cover_collection_required() || !flag_extra_coverage || flag_comparisons)
 		return;
 	cover_collect(&extra_cov);
 	if (!extra_cov.size)
@@ -1230,7 +1218,7 @@ void* worker_thread(void* arg)
 {
 	thread_t* th = (thread_t*)arg;
 	current_thread = th;
-	if (flag_coverage)
+	if (cover_collection_required())
 		cover_enable(&th->cov, flag_comparisons, false);
 	for (;;) {
 		event_wait(&th->ready);
@@ -1256,8 +1244,6 @@ void execute_call(thread_t* th)
 	int fail_fd = -1;
 	th->soft_fail_state = false;
 	if (th->call_props.fail_nth > 0) {
-		if (collide)
-			fail("both collide and fault injection are enabled");
 		fail_fd = inject_fault(th->call_props.fail_nth);
 		th->soft_fail_state = true;
 	}
-- 
cgit mrf-deployment