1 files changed, 147 insertions, 0 deletions
diff --git a/executor/common_linux.h b/executor/common_linux.h
index 324391196..546e60b79 100644
--- a/executor/common_linux.h
+++ b/executor/common_linux.h
@@ -1355,6 +1355,153 @@ static long syz_emit_ethernet(volatile long a0, volatile long a1, volatile long
 }
 #endif
 
+#if SYZ_EXECUTOR || __NR_syz_io_uring_submit || __NR_syz_io_uring_complete
+
+#define SIZEOF_IO_URING_SQE 64
+#define SIZEOF_IO_URING_CQE 16
+
+// Once a io_uring is set up by calling io_uring_setup, the offsets to the member fields
+// to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets.
+// Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings
+// is organized in code. The offsets can be marked as resources in syzkaller descriptions but
+// this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are
+// hard-coded here (and in the descriptions), and array offset is later computed once the number
+// of entries is available. Another way to obtain the offsets is to setup another io_uring here
+// and use what it returns. It is slower but might be more maintainable.
+#define SQ_HEAD_OFFSET 0
+#define SQ_TAIL_OFFSET 64
+#define SQ_RING_MASK_OFFSET 256
+#define SQ_RING_ENTRIES_OFFSET 264
+#define SQ_FLAGS_OFFSET 276
+#define SQ_DROPPED_OFFSET 272
+#define CQ_HEAD_OFFSET 128
+#define CQ_TAIL_OFFSET 192
+#define CQ_RING_MASK_OFFSET 260
+#define CQ_RING_ENTRIES_OFFSET 268
+#define CQ_RING_OVERFLOW_OFFSET 284
+#define CQ_FLAGS_OFFSET 280
+#define CQ_CQES_OFFSET 320
+#define SQ_ARRAY_OFFSET(sq_entries, cq_entries) (round_up(CQ_CQES_OFFSET + cq_entries * SIZEOF_IO_URING_CQE, 64))
+
+uint32 round_up(uint32 x, uint32 a)
+{
+	return (x + a - 1) & ~(a - 1);
+}
+
+#if SYZ_EXECUTOR || __NR_syz_io_uring_complete
+
+// From linux/io_uring.h
+struct io_uring_cqe {
+	uint64 user_data;
+	uint32 res;
+	uint32 flags;
+};
+
+static long syz_io_uring_complete(volatile long a0)
+{
+	// syzlang: syz_io_uring_complete(ring_ptr ring_ptr)
+	// C:       syz_io_uring_complete(char* ring_ptr)
+
+	// It is not checked if the ring is empty
+
+	// Cast to original
+	char* ring_ptr = (char*)a0;
+
+	// Compute the head index and the next head value
+	uint32 cq_ring_mask = *(uint32*)(ring_ptr + CQ_RING_MASK_OFFSET);
+	uint32* cq_head_ptr = (uint32*)(ring_ptr + CQ_HEAD_OFFSET);
+	uint32 cq_head = *cq_head_ptr & cq_ring_mask;
+	uint32 cq_head_next = *cq_head_ptr + 1;
+
+	// Compute the ptr to the src cq entry on the ring
+	char* cqe_src = ring_ptr + CQ_CQES_OFFSET + cq_head * SIZEOF_IO_URING_CQE;
+
+	// Get the cq entry from the ring
+	struct io_uring_cqe cqe;
+	memcpy(&cqe, cqe_src, sizeof(cqe));
+
+	// Advance the head. Head is a free-flowing integer and relies on natural wrapping.
+	// Ensure that the kernel will never see a head update without the preceeding CQE
+	// stores being done.
+	__atomic_store_n(cq_head_ptr, cq_head_next, __ATOMIC_RELEASE);
+
+	// In the descriptions (sys/linux/io_uring.txt), openat and openat2 are passed
+	// with a unique range of sqe.user_data (0x12345 and 0x23456) to identify the operations
+	// which produces an fd instance. Check cqe.user_data, which should be the same
+	// as sqe.user_data for that operation. If it falls in that unique range, return
+	// cqe.res as fd. Otherwise, just return an invalid fd.
+	return (cqe.user_data == 0x12345 || cqe.user_data == 0x23456) ? (long)cqe.res : (long)-1;
+}
+
+#endif
+
+#if SYZ_EXECUTOR || __NR_syz_io_uring_submit
+
+static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2, volatile long a3)
+{
+	// syzlang: syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, 		sqe ptr[in, io_uring_sqe],   sqes_index int32)
+	// C:       syz_io_uring_submit(char* ring_ptr,       io_uring_sqe* sqes_ptr,    io_uring_sqe* sqe,           uint32 sqes_index)
+
+	// It is not checked if the ring is full
+
+	// Cast to original
+	char* ring_ptr = (char*)a0; // This will be exposed to offsets in bytes
+	char* sqes_ptr = (char*)a1;
+	char* sqe = (char*)a2;
+	uint32 sqes_index = (uint32)a3;
+
+	uint32 sq_ring_entries = *(uint32*)(ring_ptr + SQ_RING_ENTRIES_OFFSET);
+	uint32 cq_ring_entries = *(uint32*)(ring_ptr + CQ_RING_ENTRIES_OFFSET);
+
+	// Compute the sq_array offset
+	uint32 sq_array_off = SQ_ARRAY_OFFSET(sq_ring_entries, cq_ring_entries);
+
+	// Get the ptr to the destination for the sqe
+	if (sq_ring_entries)
+		sqes_index %= sq_ring_entries;
+	char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE;
+
+	// Write the sqe entry to its destination in sqes
+	memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
+
+	// Write the index to the sqe array
+	uint32 sq_ring_mask = *(uint32*)(ring_ptr + SQ_RING_MASK_OFFSET);
+	uint32* sq_tail_ptr = (uint32*)(ring_ptr + SQ_TAIL_OFFSET);
+	uint32 sq_tail = *sq_tail_ptr & sq_ring_mask;
+	uint32 sq_tail_next = *sq_tail_ptr + 1;
+	uint32* sq_array = (uint32*)(ring_ptr + sq_array_off);
+	*(sq_array + sq_tail) = sqes_index;
+
+	// Advance the tail. Tail is a free-flowing integer and relies on natural wrapping.
+	// Ensure that the kernel will never see a tail update without the preceeding SQE
+	// stores being done.
+	__atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
+
+	// Now the application is free to call io_uring_enter() to submit the sqe
+	return 0;
+}
+
+#endif
+
+#endif
+
+// Same as memcpy except that it accepts offset to dest and src.
+#if SYZ_EXECUTOR || __NR_syz_memcpy_off
+static long syz_memcpy_off(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4)
+{
+	// C:       syz_memcpy_off(void* dest, uint32 dest_off, void* src, uint32 src_off, size_t n)
+
+	// Cast to original
+	char* dest = (char*)a0;
+	uint32 dest_off = (uint32)a1;
+	char* src = (char*)a2;
+	uint32 src_off = (uint32)a3;
+	size_t n = (size_t)a4;
+
+	return (long)memcpy(dest + dest_off, src + src_off, n);
+}
+#endif
+
 #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_NET_INJECTION
 static void flush_tun()
 {