diff options
Diffstat (limited to 'executor/common_linux.h')
| -rw-r--r-- | executor/common_linux.h | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/executor/common_linux.h b/executor/common_linux.h index 324391196..546e60b79 100644 --- a/executor/common_linux.h +++ b/executor/common_linux.h @@ -1355,6 +1355,153 @@ static long syz_emit_ethernet(volatile long a0, volatile long a1, volatile long } #endif +#if SYZ_EXECUTOR || __NR_syz_io_uring_submit || __NR_syz_io_uring_complete + +#define SIZEOF_IO_URING_SQE 64 +#define SIZEOF_IO_URING_CQE 16 + +// Once a io_uring is set up by calling io_uring_setup, the offsets to the member fields +// to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets. +// Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings +// is organized in code. The offsets can be marked as resources in syzkaller descriptions but +// this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are +// hard-coded here (and in the descriptions), and array offset is later computed once the number +// of entries is available. Another way to obtain the offsets is to setup another io_uring here +// and use what it returns. It is slower but might be more maintainable. +#define SQ_HEAD_OFFSET 0 +#define SQ_TAIL_OFFSET 64 +#define SQ_RING_MASK_OFFSET 256 +#define SQ_RING_ENTRIES_OFFSET 264 +#define SQ_FLAGS_OFFSET 276 +#define SQ_DROPPED_OFFSET 272 +#define CQ_HEAD_OFFSET 128 +#define CQ_TAIL_OFFSET 192 +#define CQ_RING_MASK_OFFSET 260 +#define CQ_RING_ENTRIES_OFFSET 268 +#define CQ_RING_OVERFLOW_OFFSET 284 +#define CQ_FLAGS_OFFSET 280 +#define CQ_CQES_OFFSET 320 +#define SQ_ARRAY_OFFSET(sq_entries, cq_entries) (round_up(CQ_CQES_OFFSET + cq_entries * SIZEOF_IO_URING_CQE, 64)) + +uint32 round_up(uint32 x, uint32 a) +{ + return (x + a - 1) & ~(a - 1); +} + +#if SYZ_EXECUTOR || __NR_syz_io_uring_complete + +// From linux/io_uring.h +struct io_uring_cqe { + uint64 user_data; + uint32 res; + uint32 flags; +}; + +static long syz_io_uring_complete(volatile long a0) +{ + // syzlang: syz_io_uring_complete(ring_ptr ring_ptr) + // C: syz_io_uring_complete(char* ring_ptr) + + // It is not checked if the ring is empty + + // Cast to original + char* ring_ptr = (char*)a0; + + // Compute the head index and the next head value + uint32 cq_ring_mask = *(uint32*)(ring_ptr + CQ_RING_MASK_OFFSET); + uint32* cq_head_ptr = (uint32*)(ring_ptr + CQ_HEAD_OFFSET); + uint32 cq_head = *cq_head_ptr & cq_ring_mask; + uint32 cq_head_next = *cq_head_ptr + 1; + + // Compute the ptr to the src cq entry on the ring + char* cqe_src = ring_ptr + CQ_CQES_OFFSET + cq_head * SIZEOF_IO_URING_CQE; + + // Get the cq entry from the ring + struct io_uring_cqe cqe; + memcpy(&cqe, cqe_src, sizeof(cqe)); + + // Advance the head. Head is a free-flowing integer and relies on natural wrapping. + // Ensure that the kernel will never see a head update without the preceeding CQE + // stores being done. + __atomic_store_n(cq_head_ptr, cq_head_next, __ATOMIC_RELEASE); + + // In the descriptions (sys/linux/io_uring.txt), openat and openat2 are passed + // with a unique range of sqe.user_data (0x12345 and 0x23456) to identify the operations + // which produces an fd instance. Check cqe.user_data, which should be the same + // as sqe.user_data for that operation. If it falls in that unique range, return + // cqe.res as fd. Otherwise, just return an invalid fd. + return (cqe.user_data == 0x12345 || cqe.user_data == 0x23456) ? (long)cqe.res : (long)-1; +} + +#endif + +#if SYZ_EXECUTOR || __NR_syz_io_uring_submit + +static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2, volatile long a3) +{ + // syzlang: syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, sqe ptr[in, io_uring_sqe], sqes_index int32) + // C: syz_io_uring_submit(char* ring_ptr, io_uring_sqe* sqes_ptr, io_uring_sqe* sqe, uint32 sqes_index) + + // It is not checked if the ring is full + + // Cast to original + char* ring_ptr = (char*)a0; // This will be exposed to offsets in bytes + char* sqes_ptr = (char*)a1; + char* sqe = (char*)a2; + uint32 sqes_index = (uint32)a3; + + uint32 sq_ring_entries = *(uint32*)(ring_ptr + SQ_RING_ENTRIES_OFFSET); + uint32 cq_ring_entries = *(uint32*)(ring_ptr + CQ_RING_ENTRIES_OFFSET); + + // Compute the sq_array offset + uint32 sq_array_off = SQ_ARRAY_OFFSET(sq_ring_entries, cq_ring_entries); + + // Get the ptr to the destination for the sqe + if (sq_ring_entries) + sqes_index %= sq_ring_entries; + char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE; + + // Write the sqe entry to its destination in sqes + memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE); + + // Write the index to the sqe array + uint32 sq_ring_mask = *(uint32*)(ring_ptr + SQ_RING_MASK_OFFSET); + uint32* sq_tail_ptr = (uint32*)(ring_ptr + SQ_TAIL_OFFSET); + uint32 sq_tail = *sq_tail_ptr & sq_ring_mask; + uint32 sq_tail_next = *sq_tail_ptr + 1; + uint32* sq_array = (uint32*)(ring_ptr + sq_array_off); + *(sq_array + sq_tail) = sqes_index; + + // Advance the tail. Tail is a free-flowing integer and relies on natural wrapping. + // Ensure that the kernel will never see a tail update without the preceeding SQE + // stores being done. + __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE); + + // Now the application is free to call io_uring_enter() to submit the sqe + return 0; +} + +#endif + +#endif + +// Same as memcpy except that it accepts offset to dest and src. +#if SYZ_EXECUTOR || __NR_syz_memcpy_off +static long syz_memcpy_off(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4) +{ + // C: syz_memcpy_off(void* dest, uint32 dest_off, void* src, uint32 src_off, size_t n) + + // Cast to original + char* dest = (char*)a0; + uint32 dest_off = (uint32)a1; + char* src = (char*)a2; + uint32 src_off = (uint32)a3; + size_t n = (size_t)a4; + + return (long)memcpy(dest + dest_off, src + src_off, n); +} +#endif + #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_NET_INJECTION static void flush_tun() { |
