From 07c84b670b4a25a7795e9fb8d47abe0922d2976b Mon Sep 17 00:00:00 2001
From: Victor Chibotaru <tchibo@google.com>
Date: Wed, 16 Aug 2017 21:18:04 +0200
Subject: executor, ipc: modify the IO between KCOV<->executor<->fuzzer

Now executor is able to read comparisons data from KCOV and write them
to fuzzer.
---
 executor/executor.cc | 153 ++++++++++++++++++++++++++++++++++++++-------------
 pkg/ipc/ipc.go       |  95 ++++++++++++++++++++++++++++++--
 prog/hints.go        |  50 +++++++++++++++++
 3 files changed, 255 insertions(+), 43 deletions(-)
 create mode 100644 prog/hints.go

diff --git a/executor/executor.cc b/executor/executor.cc
index fdd44e246..8f5e221ac 100644
--- a/executor/executor.cc
+++ b/executor/executor.cc
@@ -106,6 +106,75 @@ struct res_t {
 
 res_t results[kMaxCommands];
 
+enum {
+	KCOV_CMP_CONST = 1,
+	KCOV_CMP_SIZE1 = 0,
+	KCOV_CMP_SIZE2 = 2,
+	KCOV_CMP_SIZE4 = 4,
+	KCOV_CMP_SIZE8 = 6,
+	KCOV_CMP_SIZE_MASK = 6,
+};
+
+struct kcov_comparison_t {
+	uint64_t type;
+	uint64_t arg1;
+	uint64_t arg2;
+
+	bool operator==(const struct kcov_comparison_t& other) const
+	{
+		return type == other.type && arg1 == other.arg1 && arg2 == other.arg2;
+	}
+
+	bool operator<(const struct kcov_comparison_t& other) const
+	{
+		if (type != other.type)
+			return type < other.type;
+		if (arg1 != other.arg1)
+			return arg1 < other.arg1;
+		return arg2 < other.arg2;
+	}
+
+	// Writes the structure using the write_one function for each field.
+	// Inspired by write_output() function.
+	void write(uint32_t* (*write_one)(uint32_t))
+	{
+		// Write order: type arg1 arg2.
+		write_one((uint32_t)type);
+
+		// KCOV converts all arguments of size x first to uintx_t and then to
+		// uint64_t. We want to properly extend signed values, e.g we want
+		// int8_t c = 0xfe to be represented as 0xfffffffffffffffe.
+		// Note that uint8_t c = 0xfe will be represented the same way.
+		// This is ok because during hints processing we will anyways try
+		// the value 0x00000000000000fe.
+		switch (type & KCOV_CMP_SIZE_MASK) {
+		case KCOV_CMP_SIZE1:
+			arg1 = (uint64_t)(int64_t)(int8_t)arg1;
+			arg2 = (uint64_t)(int64_t)(int8_t)arg2;
+			break;
+		case KCOV_CMP_SIZE2:
+			arg1 = (uint64_t)(int64_t)(int16_t)arg1;
+			arg2 = (uint64_t)(int64_t)(int16_t)arg2;
+			break;
+		case KCOV_CMP_SIZE4:
+			arg1 = (uint64_t)(int64_t)(int32_t)arg1;
+			arg2 = (uint64_t)(int64_t)(int32_t)arg2;
+			break;
+		}
+		bool is_size_8 = (type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8;
+		if (!is_size_8) {
+			write_one((uint32_t)arg1);
+			write_one((uint32_t)arg2);
+			return;
+		}
+		// If we have 64 bits arguments then write them in Little-endian.
+		write_one((uint32_t)(arg1 & 0xFFFFFFFF));
+		write_one((uint32_t)(arg1 >> 32));
+		write_one((uint32_t)(arg2 & 0xFFFFFFFF));
+		write_one((uint32_t)(arg2 >> 32));
+	}
+};
+
 struct thread_t {
 	bool created;
 	int id;
@@ -599,46 +668,56 @@ void handle_completion(thread_t* th)
 		write_output(th->fault_injected);
 		uint32_t* signal_count_pos = write_output(0); // filled in later
 		uint32_t* cover_count_pos = write_output(0); // filled in later
-
-		// Write out feedback signals.
-		// Currently it is code edges computed as xor of two subsequent basic block PCs.
-		uint64_t* cover_data = th->cover_data + 1;
-		uint32_t cover_size = th->cover_size;
-		uint32_t prev = 0;
-		uint32_t nsig = 0;
-		for (uint32_t i = 0; i < cover_size; i++) {
-			uint32_t pc = cover_data[i];
-			uint32_t sig = pc ^ prev;
-			prev = hash(pc);
-			if (dedup(sig))
-				continue;
-			write_output(sig);
-			nsig++;
-		}
-		*signal_count_pos = nsig;
-		if (flag_collect_cover) {
-			// Write out real coverage (basic block PCs).
-			if (flag_dedup_cover) {
-				std::sort(cover_data, cover_data + cover_size);
-				uint64_t w = 0;
-				uint64_t last = 0;
-				for (uint32_t i = 0; i < cover_size; i++) {
-					uint64_t pc = cover_data[i];
-					if (pc == last)
-						continue;
-					cover_data[w++] = last = pc;
+		uint32_t* comps_count_pos = write_output(0); // filled in later
+		uint32_t nsig = 0, cover_size = 0, comps_size = 0;
+
+		if (flag_collect_comps) {
+			// Collect only the comparisons
+			comps_size = th->cover_size;
+			kcov_comparison_t* start = (kcov_comparison_t*)th->cover_data;
+			kcov_comparison_t* end = start + comps_size;
+			std::sort(start, end);
+			comps_size = std::unique(start, end) - start;
+			for (uint32_t i = 0; i < comps_size; ++i)
+				start[i].write(write_output);
+		} else {
+			// Write out feedback signals.
+			// Currently it is code edges computed as xor of
+			// two subsequent basic block PCs.
+			uint32_t prev = 0;
+			for (uint32_t i = 0; i < th->cover_size; i++) {
+				uint32_t pc = (uint32_t)th->cover_data[i];
+				uint32_t sig = pc ^ prev;
+				prev = hash(pc);
+				if (dedup(sig))
+					continue;
+				write_output(sig);
+				nsig++;
+			}
+			if (flag_collect_cover) {
+				// Write out real coverage (basic block PCs).
+				cover_size = th->cover_size;
+				if (flag_dedup_cover) {
+					uint64_t* start = (uint64_t*)th->cover_data;
+					uint64_t* end = start + cover_size;
+					std::sort(start, end);
+					cover_size = std::unique(start, end) - start;
 				}
-				cover_size = w;
+				// Truncate PCs to uint32_t assuming that they fit into 32-bits.
+				// True for x86_64 and arm64 without KASLR.
+				for (uint32_t i = 0; i < cover_size; i++)
+					write_output((uint32_t)th->cover_data[i]);
 			}
-			// Truncate PCs to uint32_t assuming that they fit into 32-bits.
-			// True for x86_64 and arm64 without KASLR.
-			for (uint32_t i = 0; i < cover_size; i++)
-				write_output((uint32_t)cover_data[i]);
-			*cover_count_pos = cover_size;
 		}
-		debug("out #%u: index=%u num=%u errno=%d sig=%u cover=%u\n",
-		      completed, th->call_index, th->call_num, reserrno, nsig, cover_size);
-
+		// Write out real coverage (basic block PCs).
+		*cover_count_pos = cover_size;
+		// Write out number of comparisons
+		*comps_count_pos = comps_size;
+		// Write out number of signals
+		*signal_count_pos = nsig;
+		debug("out #%u: index=%u num=%u errno=%d sig=%u cover=%u comps=%u\n",
+		      completed, th->call_index, th->call_num, reserrno, nsig,
+		      cover_size, comps_size);
 		completed++;
 		__atomic_store_n(output_data, completed, __ATOMIC_RELEASE);
 	}
diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go
index 674596d60..af0173c2e 100644
--- a/pkg/ipc/ipc.go
+++ b/pkg/ipc/ipc.go
@@ -64,6 +64,11 @@ const (
 	statusFail  = 67
 	statusError = 68
 	statusRetry = 69
+
+	// Comparison types masks taken from KCOV headers.
+	compSizeMask  = 6
+	compSize8     = 6
+	compConstMask = 1
 )
 
 var (
@@ -222,10 +227,19 @@ type CallInfo struct {
 	Signal []uint32 // feedback signal, filled if FlagSignal is set
 	Cover  []uint32 // per-call coverage, filled if FlagSignal is set and cover == true,
 	//if dedup == false, then cov effectively contains a trace, otherwise duplicates are removed
-	Errno         int // call errno (0 if the call was successful)
+	Comps         prog.CompMap // per-call comparison operands
+	Errno         int          // call errno (0 if the call was successful)
 	FaultInjected bool
 }
 
+func GetCompMaps(info []CallInfo) []prog.CompMap {
+	compMaps := make([]prog.CompMap, len(info))
+	for i, inf := range info {
+		compMaps[i] = inf.Comps
+	}
+	return compMaps
+}
+
 // Exec starts executor binary to execute program p and returns information about the execution:
 // output: process output
 // info: per-call info
@@ -264,7 +278,8 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info []CallIn
 		return
 	}
 
-	if env.config.Flags&FlagSignal == 0 || p == nil {
+	if p == nil || env.config.Flags&FlagSignal == 0 &&
+		env.config.Flags&FlagCollectComps == 0 {
 		return
 	}
 	info, err0 = env.readOutCoverage(p)
@@ -282,9 +297,27 @@ func (env *Env) readOutCoverage(p *prog.Prog) (info []CallInfo, err0 error) {
 		return true
 	}
 
+	readOutAndSetErr := func(v *uint32, msg string, args ...interface{}) bool {
+		if !readOut(v) {
+			err0 = fmt.Errorf(msg, args)
+			return false
+		}
+		return true
+	}
+
+	// Reads out a 64 bits int in Little-endian as two blocks of 32 bits.
+	readOut64 := func(v *uint64, msg string, args ...interface{}) bool {
+		var a, b uint32
+		if !(readOutAndSetErr(&a, msg, args) && readOutAndSetErr(&b, msg, args)) {
+			return false
+		}
+		*v = uint64(a) + uint64(b)<<32
+		return true
+	}
+
 	var ncmd uint32
-	if !readOut(&ncmd) {
-		err0 = fmt.Errorf("executor %v: failed to read output coverage", env.pid)
+	if !readOutAndSetErr(&ncmd,
+		"executor %v: failed to read output coverage", env.pid) {
 		return
 	}
 	info = make([]CallInfo, len(p.Calls))
@@ -303,8 +336,8 @@ func (env *Env) readOutCoverage(p *prog.Prog) (info []CallInfo, err0 error) {
 		return buf.String()
 	}
 	for i := uint32(0); i < ncmd; i++ {
-		var callIndex, callNum, errno, faultInjected, signalSize, coverSize uint32
-		if !readOut(&callIndex) || !readOut(&callNum) || !readOut(&errno) || !readOut(&faultInjected) || !readOut(&signalSize) || !readOut(&coverSize) {
+		var callIndex, callNum, errno, faultInjected, signalSize, coverSize, compsSize uint32
+		if !readOut(&callIndex) || !readOut(&callNum) || !readOut(&errno) || !readOut(&faultInjected) || !readOut(&signalSize) || !readOut(&coverSize) || !readOut(&compsSize) {
 			err0 = fmt.Errorf("executor %v: failed to read output coverage", env.pid)
 			return
 		}
@@ -331,8 +364,10 @@ func (env *Env) readOutCoverage(p *prog.Prog) (info []CallInfo, err0 error) {
 				env.pid, i, callIndex, signalSize, coverSize)
 			return
 		}
+		// Read out signals.
 		info[callIndex].Signal = out[:signalSize:signalSize]
 		out = out[signalSize:]
+		// Read out coverage.
 		if coverSize > uint32(len(out)) {
 			err0 = fmt.Errorf("executor %v: failed to read output coverage: record %v, call %v, signalsize=%v coversize=%v",
 				env.pid, i, callIndex, signalSize, coverSize)
@@ -340,6 +375,54 @@ func (env *Env) readOutCoverage(p *prog.Prog) (info []CallInfo, err0 error) {
 		}
 		info[callIndex].Cover = out[:coverSize:coverSize]
 		out = out[coverSize:]
+		// Read out comparisons.
+		compMap := make(prog.CompMap)
+		for j := uint32(0); j < compsSize; j++ {
+			var typ uint32
+			var op1, op2 uint64
+			if !readOutAndSetErr(&typ,
+				"executor %v: failed while reading type of comparison %v", env.pid, j) {
+				return
+			}
+			if typ > compConstMask|compSizeMask {
+				err0 = fmt.Errorf("executor %v: got wrong value (%v) while reading type of comparison %v",
+					env.pid, typ, j)
+				return
+			}
+
+			isSize8 := (typ & compSizeMask) == compSize8
+			isConst := (typ & compConstMask) != 0
+			arg1ErrString := "executor %v: failed while reading op1 of comparison %v"
+			arg2ErrString := "executor %v: failed while reading op2 of comparison %v"
+			if isSize8 {
+				var tmp1, tmp2 uint32
+				if !readOutAndSetErr(&tmp1, arg1ErrString, env.pid, j) ||
+					!readOutAndSetErr(&tmp2, arg2ErrString, env.pid, j) {
+					return
+				}
+				op1 = uint64(tmp1)
+				op2 = uint64(tmp2)
+			} else {
+				if !readOut64(&op1, arg1ErrString, env.pid, j) ||
+					!readOut64(&op2, arg2ErrString, env.pid, j) {
+					return
+				}
+			}
+			if op1 == op2 {
+				// It's useless to store such comparisons.
+				continue
+			}
+			compMap.AddComp(op2, op1)
+			if isConst {
+				// If one of the operands was const, then this operand is always
+				// placed first in the instrumented callbacks. Such an operand
+				// could not be an argument of our syscalls (because otherwise
+				// it wouldn't be const), thus we simply ignore it.
+				continue
+			}
+			compMap.AddComp(op1, op2)
+		}
+		info[callIndex].Comps = compMap
 	}
 	return
 }
diff --git a/prog/hints.go b/prog/hints.go
new file mode 100644
index 000000000..50efc6b8a
--- /dev/null
+++ b/prog/hints.go
@@ -0,0 +1,50 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package prog
+
+// A hint is basically a tuple consisting of a pointer to an argument
+// in one of the syscalls of a program and a value, which should be
+// assigned to that argument (we call it a replacer).
+
+// A simplified version of hints workflow looks like this:
+//		1. Fuzzer launches a program (we call it a hint seed) and collects all
+// the comparisons' data for every syscall in the program.
+//		2. Next it tries to match the obtained comparison operands' values
+// vs. the input arguments' values.
+//		3. For every such match the fuzzer mutates the program by
+// replacing the pointed argument with the saved value.
+//		4. If a valid program is obtained, then fuzzer launches it and
+// checks if new coverage is obtained.
+// For more insights on particular mutations please see prog/hints_test.go.
+
+type uint64Set map[uint64]bool
+
+// Example: for comparisons {(op1, op2), (op1, op3), (op1, op4), (op2, op1)}
+// this map will store the following:
+// m = {
+//		op1: {map[op2]: true, map[op3]: true, map[op4]: true},
+//		op2: {map[op1]: true}
+// }.
+type CompMap map[uint64]uint64Set
+
+var specialIntsSet uint64Set
+
+func (m CompMap) AddComp(arg1, arg2 uint64) {
+	if _, ok := specialIntsSet[arg2]; ok {
+		// We don't want to add arg2 because it's in the set of
+		// "special" values, which the fuzzer will try anyways.
+		return
+	}
+	if _, ok := m[arg1]; !ok {
+		m[arg1] = make(uint64Set)
+	}
+	m[arg1][arg2] = true
+}
+
+func init() {
+	specialIntsSet = make(uint64Set)
+	for _, v := range specialInts {
+		specialIntsSet[v] = true
+	}
+}
-- 
cgit mrf-deployment