From 0ac7291ca51f87df8022da0f66178546e855701a Mon Sep 17 00:00:00 2001
From: Ethan Graham <ethangraham@google.com>
Date: Thu, 18 Sep 2025 14:13:45 +0000
Subject: prog: fix syz_kfuzztest_run allocation strategy

Previously, the generated KFuzzTest programs were reusing the address of
the top-level input struct. A problem could arise when the encoded blob
is large and overflows into another allocated region - this certainly
happens in the case where the input struct points to some large char
buffer, for example.

While this wasn't directly a problem, it could lead to racy behavior
when running KFuzzTest targets concurrently.

To fix this, we now introduce an additional buffer parameter into
syz_kfuzztest_run that is as big as the maximum accepted input size in
the KFuzzTest kernel code. When this buffer is allocated, we ensure that
we have some allocated space in the program that can hold the entire
encoded input.

This works in practice, but has not been tested with concurrent
KFuzzTest executions yet.
---
 executor/common_linux.h           |  8 +++---
 pkg/kfuzztest/builder.go          |  3 ++-
 pkg/kfuzztest/testdata/1/desc.txt |  2 +-
 pkg/kfuzztest/testdata/2/desc.txt |  2 +-
 prog/encodingexec.go              | 56 ++++++++++++++++++++++++---------------
 prog/kfuzztest.go                 |  3 +++
 sys/linux/kfuzztest.txt           |  2 +-
 7 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/executor/common_linux.h b/executor/common_linux.h
index 76325ff6d..5d477a16a 100644
--- a/executor/common_linux.h
+++ b/executor/common_linux.h
@@ -5865,15 +5865,15 @@ static long syz_pidfd_open(volatile long pid, volatile long flags)
 #include <unistd.h>
 
 static long syz_kfuzztest_run(volatile long test_name_ptr, volatile long input_data,
-			      volatile long input_data_size)
+			      volatile long input_data_size, volatile long buffer)
 {
 	const char* test_name = (const char*)test_name_ptr;
 	if (!test_name) {
 		debug("syz_kfuzztest_run: test name was NULL\n");
 		return -1;
 	}
-	if (!input_data || input_data_size == 0) {
-		debug("syz_kfuzztest_run: input data was NULL\n");
+	if (!buffer) {
+		debug("syz_kfuzztest_run: buffer was NULL\n");
 		return -1;
 	}
 
@@ -5890,7 +5890,7 @@ static long syz_kfuzztest_run(volatile long test_name_ptr, volatile long input_d
 		return -1;
 	}
 
-	ssize_t bytes_written = write(fd, (void*)input_data, (size_t)input_data_size);
+	ssize_t bytes_written = write(fd, (void*)buffer, (size_t)input_data_size);
 	if (bytes_written != input_data_size) {
 		debug("syz_kfuzztest_run: failed to write to %s, reason: %s\n", buf, strerror(errno));
 		close(fd);
diff --git a/pkg/kfuzztest/builder.go b/pkg/kfuzztest/builder.go
index 1c62e1093..7262fd776 100644
--- a/pkg/kfuzztest/builder.go
+++ b/pkg/kfuzztest/builder.go
@@ -199,7 +199,8 @@ func syzFuncToSyzlang(s SyzFunc) string {
 	fmt.Fprintf(&builder, "syz_kfuzztest_run$%s(", s.Name)
 	fmt.Fprintf(&builder, "name ptr[in, string[\"%s\"]], ", s.Name)
 	fmt.Fprintf(&builder, "data ptr[in, %s], ", typeName)
-	builder.WriteString("len bytesize[data])")
+	builder.WriteString("len bytesize[data], ")
+	builder.WriteString("buf ptr[in, array[int8, 65536]]) ")
 	// TODO:(ethangraham) The only other way I can think of getting this name
 	// would involve using the "reflect" package and matching against the
 	// KFuzzTest name, which isn't much better than hardcoding this.
diff --git a/pkg/kfuzztest/testdata/1/desc.txt b/pkg/kfuzztest/testdata/1/desc.txt
index 71c4acb39..6d18ebeea 100644
--- a/pkg/kfuzztest/testdata/1/desc.txt
+++ b/pkg/kfuzztest/testdata/1/desc.txt
@@ -4,4 +4,4 @@ pkcs7_parse_message_arg {
 	datalen	len[data, int64]
 }
 
-syz_kfuzztest_run$test_pkcs7_parse_message(name ptr[in, string["test_pkcs7_parse_message"]], data ptr[in, pkcs7_parse_message_arg], len bytesize[data]) (kfuzz_test)
+syz_kfuzztest_run$test_pkcs7_parse_message(name ptr[in, string["test_pkcs7_parse_message"]], data ptr[in, pkcs7_parse_message_arg], len bytesize[data], buf ptr[in, array[int8, 65536]]) (kfuzz_test)
diff --git a/pkg/kfuzztest/testdata/2/desc.txt b/pkg/kfuzztest/testdata/2/desc.txt
index 2705252dd..55ee03f8f 100644
--- a/pkg/kfuzztest/testdata/2/desc.txt
+++ b/pkg/kfuzztest/testdata/2/desc.txt
@@ -12,4 +12,4 @@ foo {
 	numbers	ptr[in, array[int64]]
 }
 
-syz_kfuzztest_run$some_target(name ptr[in, string["some_target"]], data ptr[in, foo], len bytesize[data]) (kfuzz_test)
+syz_kfuzztest_run$some_target(name ptr[in, string["some_target"]], data ptr[in, foo], len bytesize[data], buf ptr[in, array[int8, 65536]]) (kfuzz_test)
diff --git a/prog/encodingexec.go b/prog/encodingexec.go
index f3b48924c..14466a272 100644
--- a/prog/encodingexec.go
+++ b/prog/encodingexec.go
@@ -75,7 +75,9 @@ func (p *Prog) SerializeForExec() ([]byte, error) {
 	w.write(uint64(len(p.Calls)))
 	for _, c := range p.Calls {
 		w.csumMap, w.csumUses = calcChecksumsCall(c)
-		w.serializeCall(c)
+		// TODO: if we propagate this error, something breaks and no coverage
+		// is displayed to the dashboard or the logs.
+		_ = w.serializeCall(c)
 	}
 	w.write(execInstrEOF)
 	if len(w.buf) > ExecBufferSize {
@@ -87,13 +89,12 @@ func (p *Prog) SerializeForExec() ([]byte, error) {
 	return w.buf, nil
 }
 
-func (w *execContext) serializeCall(c *Call) {
+func (w *execContext) serializeCall(c *Call) error {
 	// We introduce special serialization logic for kfuzztest targets, which
 	// require special handling due to their use of relocation tables to copy
 	// entire blobs of data into the kenrel.
 	if c.Meta.Attrs.KFuzzTest {
-		w.serializeKFuzzTestCall(c)
-		return
+		return w.serializeKFuzzTestCall(c)
 	}
 
 	// Calculate arg offsets within structs.
@@ -125,18 +126,35 @@ func (w *execContext) serializeCall(c *Call) {
 
 	// Generate copyout instructions that persist interesting return values.
 	w.writeCopyout(c)
+	return nil
 }
 
 // KFuzzTest targets require special handling due to their use of relocation
 // tables for serializing all data (including pointed-to data) into a
 // continuous blob that can be passed into the kernel.
-func (w *execContext) serializeKFuzzTestCall(c *Call) {
+func (w *execContext) serializeKFuzzTestCall(c *Call) error {
 	if !c.Meta.Attrs.KFuzzTest {
 		// This is a specialized function that shouldn't be called on anything
 		// other than an instance of a syz_kfuzztest_run$* syscall
 		panic("serializeKFuzzTestCall called on an invalid syscall")
 	}
 
+	// Generate the final syscall instruction with the update arguments.
+	kFuzzTestRunID, err := w.target.KFuzzTestRunID()
+	if err != nil {
+		panic(err)
+	}
+	// Ensures that we copy some arguments into the executor so that it doesn't
+	// receive an incomplete program on failure.
+	defer func() {
+		w.write(uint64(kFuzzTestRunID))
+		w.write(ExecNoCopyout)
+		w.write(uint64(len(c.Args)))
+		for _, arg := range c.Args {
+			w.writeArg(arg)
+		}
+	}()
+
 	// Write the initial string argument (test name) normally.
 	w.writeCopyin(&Call{Meta: c.Meta, Args: []Arg{c.Args[0]}})
 
@@ -145,12 +163,17 @@ func (w *execContext) serializeKFuzzTestCall(c *Call) {
 	// to the fuzzing driver with a relocation table.
 	dataArg := c.Args[1].(*PointerArg)
 	finalBlob := MarshallKFuzztestArg(dataArg.Res)
+	if len(finalBlob) > int(KFuzzTestMaxInputSize) {
+		return fmt.Errorf("encoded blob was too large")
+	}
 
-	// Reuse the memory address that was pre-allocated for the original struct
-	// argument. This avoids needing to hook into the memory allocation which
-	// is done at a higher level than the serialization. This relies on the
-	// original buffer being large enough
-	blobAddress := w.target.PhysicalAddr(dataArg) - w.target.DataOffset
+	// Use the buffer argument as data offset - this represents a buffer of
+	// size 64KiB - the maximum input size that the KFuzzTest module accepts.
+	bufferArg := c.Args[3].(*PointerArg)
+	if bufferArg.Res == nil {
+		return fmt.Errorf("buffer was nil")
+	}
+	blobAddress := w.target.PhysicalAddr(bufferArg) - w.target.DataOffset
 
 	// Write the entire marshalled blob as a raw byte array.
 	w.write(execInstrCopyin)
@@ -164,18 +187,7 @@ func (w *execContext) serializeKFuzzTestCall(c *Call) {
 	// of the struct argument passed into the pseudo-syscall.
 	lenArg := c.Args[2].(*ConstArg)
 	lenArg.Val = uint64(len(finalBlob))
-
-	// Generate the final syscall instruction with the update arguments.
-	kFuzzTestRunID, err := w.target.KFuzzTestRunID()
-	if err != nil {
-		panic(err)
-	}
-	w.write(uint64(kFuzzTestRunID))
-	w.write(ExecNoCopyout)
-	w.write(uint64(len(c.Args)))
-	for _, arg := range c.Args {
-		w.writeArg(arg)
-	}
+	return nil
 }
 
 type execContext struct {
diff --git a/prog/kfuzztest.go b/prog/kfuzztest.go
index 1ff65e29a..dacd54885 100644
--- a/prog/kfuzztest.go
+++ b/prog/kfuzztest.go
@@ -22,6 +22,9 @@ const (
 	// x86_64, so we hardcode it for now. A more robust solution would involve
 	// reading this from the debugfs entry at boot before fuzzing begins.
 	kFuzzTestMinalign uint64 = 8
+
+	// Maximum input size accepted by the KFuzzTest kernel module.
+	KFuzzTestMaxInputSize uint64 = 64 << 10
 )
 
 func kFuzzTestWritePrefix(buf *bytes.Buffer) {
diff --git a/sys/linux/kfuzztest.txt b/sys/linux/kfuzztest.txt
index 3d4aba385..094d50ac0 100644
--- a/sys/linux/kfuzztest.txt
+++ b/sys/linux/kfuzztest.txt
@@ -1,4 +1,4 @@
 # Copyright 2025 syzkaller project authors. All rights reserved.
 # Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
 
-syz_kfuzztest_run(name ptr[in, string], data ptr[in, array[int8]], len bytesize[data]) (kfuzz_test, no_generate)
+syz_kfuzztest_run(name ptr[in, string], data ptr[in, array[int8]], len bytesize[data], buf ptr[in, array[int8, 65536]]) (kfuzz_test, no_generate)
-- 
cgit mrf-deployment