From 90d67044dab68568e8f35bc14b68055dbd166eff Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 4 Jun 2024 12:55:40 +0200 Subject: executor: refactor coverage filter --- executor/cov_filter.h | 53 ------------------- executor/cover_filter.h | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ executor/executor.cc | 51 +++++++++++------- executor/shmem.h | 75 ++++++++++++++++++++++++++ executor/test.h | 97 +++++++++++++++++++++++++--------- 5 files changed, 315 insertions(+), 98 deletions(-) delete mode 100644 executor/cov_filter.h create mode 100644 executor/cover_filter.h create mode 100644 executor/shmem.h (limited to 'executor') diff --git a/executor/cov_filter.h b/executor/cov_filter.h deleted file mode 100644 index 1119a837a..000000000 --- a/executor/cov_filter.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2020 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -#include -#include -#include - -struct cov_filter_t { - uint64 pcstart; - uint64 pcsize; - uint8 bitmap[]; -}; - -static cov_filter_t* cov_filter; - -static void init_coverage_filter(char* filename) -{ - int f = open(filename, O_RDONLY); - if (f < 0) { - // We don't fail here because we don't know yet if we should use coverage filter or not. - // We will receive the flag only in execute flags and will fail in coverage_filter if necessary. - debug("bitmap is not found, coverage filter disabled\n"); - return; - } - struct stat st; - if (fstat(f, &st)) - fail("faied to stat coverage filter"); - // A random address for bitmap. Don't corrupt output_data. - void* preferred = (void*)0x110f230000ull; - cov_filter = (cov_filter_t*)mmap(preferred, st.st_size, PROT_READ, MAP_PRIVATE, f, 0); - if (cov_filter != preferred) - failmsg("failed to mmap coverage filter bitmap", "want=%p, got=%p", preferred, cov_filter); - if ((uint32)st.st_size != sizeof(uint64) * 2 + ((cov_filter->pcsize >> 4) / 8 + 2)) - fail("bad coverage filter bitmap size"); - close(f); -} - -static bool coverage_filter(uint64 pc) -{ - if (!flag_coverage_filter) - return true; - if (cov_filter == NULL) - fail("coverage filter was enabled but bitmap initialization failed"); - // Prevent out of bound while searching bitmap. - if (pc < cov_filter->pcstart || pc > cov_filter->pcstart + cov_filter->pcsize) - return false; - // For minimizing the size of bitmap, the lowest 4-bit will be dropped. - pc -= cov_filter->pcstart; - pc = pc >> 4; - uint64 idx = pc / 8; - uint64 shift = pc % 8; - return (cov_filter->bitmap[idx] & (1 << shift)) > 0; -} diff --git a/executor/cover_filter.h b/executor/cover_filter.h new file mode 100644 index 000000000..672e9fbec --- /dev/null +++ b/executor/cover_filter.h @@ -0,0 +1,137 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// CoverFilter is PC hash set that can be placed in shared memory. +// +// The set can cover up to 4 distinct 1GB regions of PCs. +// This restriction allows for efficient, simple and shared memory compatible representation, +// but should be enough to cover any reasonable combination of kernel/modules mapping. +// +// Low 3 bits of PCs are discarded. This reduces memory consumption 8x, but allows for some false positives. +// However, in practice false positives should be very rare. A typical coverage call instruction is 4/5 bytes, +// and there must be at least 1 other instruction in between them to make them different basic blocks, +// so it's practically impossible to place 2 of them in the same 8-byte region. +// For signal with hashed low 12 bits the probability is also low b/c overall density of coverage callbacks +// is relatively low, a KASAN Linux kernel contains 1 callback per 88 bytes of code on average. +// So even if we discard low 3 bits, average densitiy is still 1/11. +// For gVisor with dense coverage IDs special care must be taken to avoid collisions. +// +// The set is organized as a 3 level table. +// The top "region" level is linear lookup, but contains at most 4 entries, each covering 1GB. +// Most likely the first entry is the right one. This level allows to cover unconnected regions of PCs. +// The next "L1" level splits 1GB chunks into 1MB chunks, and allows to allocate memory only +// for a subset of these 1MB chunks. +// The last "L2" level covers 1MB chunks with 16KB bitmaps (1MB divided by 8 for 3 discarded PC bits, +// and divided by 8 again for 8 bits in a byte). +class CoverFilter +{ +public: + CoverFilter(const char* file, void* preferred = nullptr) + : shmem_(file, preferred, kMemSize), tab_(static_cast(shmem_.Mem())) + { + } + + CoverFilter(int fd, void* preferred = nullptr) + : shmem_(fd, preferred, kMemSize, false), tab_(static_cast(shmem_.Mem())) + { + } + + void Insert(uint64 pc) + { + auto [byte, bit] = FindByte(pc, true); + byte |= bit; + } + + void Remove(uint64 pc) + { + auto [byte, bit] = FindByte(pc, true); + byte &= ~bit; + } + + bool Contains(uint64 pc) + { + auto [byte, bit] = FindByte(pc, false); + return byte & bit; + } + + // Prevents any future modifications to the filter. + void Seal() + { + shmem_.Seal(); + } + + int FD() const + { + return shmem_.FD(); + } + +private: + static constexpr size_t kNumRegions = 4; + static constexpr size_t kL1Size = 1 << 30; + static constexpr size_t kL2Size = 1 << 20; + static constexpr size_t kPCDivider = 8; + static constexpr size_t kByteBits = 8; + // Approximately how much .text we can cover (2GB of PCs require 32MB shmem region). + static constexpr size_t kMaxCovered = 2ull << 30; + static constexpr size_t kCompression = kPCDivider * kByteBits; + static constexpr size_t kMemSize = kMaxCovered / kCompression; + static constexpr size_t kNoRegion = static_cast(-1); + + struct Table { + uint64 regions[kNumRegions]; + uint16 l1[kNumRegions][kL1Size / kL2Size]; + uint8 l2[][kL2Size / kCompression]; + }; + + ShmemFile shmem_; + Table* tab_ = nullptr; + uint16 alloc_ = 0; + + std::pair FindByte(uint64 pc, bool add = false) + { + static const uint8 empty = 0; + size_t reg = FindRegion(pc, add); + if (reg == kNoRegion) + return {const_cast(empty), 0}; + size_t l1 = (pc % kL1Size) / kL2Size; + size_t l2 = tab_->l1[reg][l1]; + if (l2 == 0) { + if (!add) + return {const_cast(empty), 0}; + l2 = ++alloc_; + tab_->l1[reg][l1] = l2; + if ((tab_->l2[l2 - 1] + 1) > reinterpret_cast(tab_) + kMemSize) + Overflow(pc); + } + size_t off = (pc % kL2Size) / kCompression; + size_t shift = (pc / kPCDivider) % kByteBits; + return {tab_->l2[l2 - 1][off], 1 << shift}; + } + + size_t FindRegion(uint64 pc, bool add = false) + { + const uint64 reg = pc | (kL1Size - 1); + for (size_t r = 0; r < kNumRegions; r++) { + if (tab_->regions[r] == reg) + return r; + } + if (!add) + return kNoRegion; + for (size_t r = 0; r < kNumRegions; r++) { + if (tab_->regions[r] == 0) { + tab_->regions[r] = reg; + return r; + } + } + Overflow(pc); + } + + NORETURN void Overflow(uint64 pc) + { + failmsg("coverage filter is full", "pc=0x%llx regions=[0x%llx 0x%llx 0x%llx 0x%llx] alloc=%u", + pc, tab_->regions[0], tab_->regions[1], tab_->regions[2], tab_->regions[3], alloc_); + } + + CoverFilter(const CoverFilter&) = delete; + CoverFilter& operator=(const CoverFilter&) = delete; +}; diff --git a/executor/executor.cc b/executor/executor.cc index cb2245c69..ca728a6aa 100644 --- a/executor/executor.cc +++ b/executor/executor.cc @@ -177,7 +177,6 @@ static bool flag_collect_cover; static bool flag_collect_signal; static bool flag_dedup_cover; static bool flag_threaded; -static bool flag_coverage_filter; // If true, then executor should write the comparisons data to fuzzer. static bool flag_comparisons; @@ -302,6 +301,8 @@ struct handshake_req { uint64 flags; // env flags uint64 pid; uint64 sandbox_arg; + uint64 cover_filter_size; + // Followed by uint64[cover_filter_size] filter. }; struct handshake_reply { @@ -390,6 +391,7 @@ static void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off static bool copyout(char* addr, uint64 size, uint64* res); static void setup_control_pipes(); static void setup_features(char** enable, int n); +static bool coverage_filter(uint64 pc); #include "syscalls.h" @@ -413,10 +415,14 @@ static void setup_features(char** enable, int n); static feature_t features[] = {}; #endif -#include "cov_filter.h" +#include "shmem.h" + +#include "cover_filter.h" #include "test.h" +static std::optional cover_filter; + #if SYZ_HAVE_SANDBOX_ANDROID static uint64 sandbox_arg = 0; #endif @@ -509,17 +515,6 @@ int main(int argc, char** argv) // Don't enable comps because we don't use them in the fuzzer yet. cover_enable(&extra_cov, false, true); } - char sep = '/'; -#if GOOS_windows - sep = '\\'; -#endif - char filename[1024] = {0}; - char* end = strrchr(argv[0], sep); - size_t len = end - argv[0]; - strncpy(filename, argv[0], len + 1); - strncat(filename, "syz-cover-bitmap", 17); - filename[sizeof(filename) - 1] = '\0'; - init_coverage_filter(filename); } int status = 0; @@ -644,9 +639,9 @@ void parse_env_flags(uint64 flags) void receive_handshake() { handshake_req req = {}; - int n = read(kInPipeFd, &req, sizeof(req)); + ssize_t n = read(kInPipeFd, &req, sizeof(req)); if (n != sizeof(req)) - failmsg("handshake read failed", "read=%d", n); + failmsg("handshake read failed", "read=%zu", n); if (req.magic != kInMagic) failmsg("bad handshake magic", "magic=0x%llx", req.magic); #if SYZ_HAVE_SANDBOX_ANDROID @@ -654,6 +649,18 @@ void receive_handshake() #endif parse_env_flags(req.flags); procid = req.pid; + if (!req.cover_filter_size) + return; + // A random address for bitmap. Don't corrupt output_data. + cover_filter.emplace("syz-cover-filer", reinterpret_cast(0x110f230000ull)); + std::vector pcs(req.cover_filter_size); + const ssize_t filter_size = req.cover_filter_size * sizeof(uint64); + n = read(kInPipeFd, &pcs[0], filter_size); + if (n != filter_size) + failmsg("failed to read cover filter", "read=%zu", n); + for (auto pc : pcs) + cover_filter->Insert(pc); + cover_filter->Seal(); } void reply_handshake() @@ -684,13 +691,12 @@ void receive_execute() flag_dedup_cover = req.exec_flags & (1 << 2); flag_comparisons = req.exec_flags & (1 << 3); flag_threaded = req.exec_flags & (1 << 4); - flag_coverage_filter = req.exec_flags & (1 << 5); - debug("[%llums] exec opts: procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d" - " timeouts=%llu/%llu/%llu filter=%d\n", + debug("[%llums] exec opts: procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d " + " timeouts=%llu/%llu/%llu\n", current_time_ms() - start_time_ms, procid, flag_threaded, flag_collect_cover, flag_comparisons, flag_dedup_cover, flag_collect_signal, syscall_timeout_ms, - program_timeout_ms, slowdown_scale, flag_coverage_filter); + program_timeout_ms, slowdown_scale); if (syscall_timeout_ms == 0 || program_timeout_ms <= syscall_timeout_ms || slowdown_scale == 0) failmsg("bad timeouts", "syscall=%llu, program=%llu, scale=%llu", syscall_timeout_ms, program_timeout_ms, slowdown_scale); @@ -1036,6 +1042,13 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover } } +bool coverage_filter(uint64 pc) +{ + if (!cover_filter) + return true; + return cover_filter->Contains(pc); +} + void handle_completion(thread_t* th) { if (event_isset(&th->ready) || !event_isset(&th->done) || !th->executing) diff --git a/executor/shmem.h b/executor/shmem.h new file mode 100644 index 000000000..ab9d17300 --- /dev/null +++ b/executor/shmem.h @@ -0,0 +1,75 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include +#include +#include +#include + +// ShmemFile is shared memory region wrapper. +class ShmemFile +{ +public: + // Maps shared memory region of size 'size' from a new file 'file', preferably at the address 'preferred'. + ShmemFile(const char* file, void* preferred, size_t size) + { + fd_ = open(file, O_RDWR | O_CREAT | O_TRUNC, 0600); + if (fd_ == -1) + failmsg("shmem open failed", "file=%s", file); + if (fallocate(fd_, 0, 0, size)) + failmsg("shmem fallocate failed", "size=%zu", size); + Mmap(fd_, preferred, size, true); + if (unlink(file)) + fail("shmem unlink failed"); + } + + // Maps shared memory region from the file 'fd' in read/write or write-only mode. + ShmemFile(int fd, void* preferred, size_t size, bool write) + { + Mmap(fd, preferred, size, write); + } + + ~ShmemFile() + { + if (munmap(mem_, size_)) + fail("shmem munmap failed"); + if (fd_ != -1) + close(fd_); + } + + // Prevents any future modifications to the region. + void Seal() + { + if (mprotect(mem_, size_, PROT_READ)) + fail("shmem mprotect failed"); + if (fd_ != -1) + close(fd_); + fd_ = -1; + } + + int FD() const + { + return fd_; + } + + void* Mem() const + { + return mem_; + } + +private: + void* mem_ = nullptr; + size_t size_ = 0; + int fd_ = -1; + + void Mmap(int fd, void* preferred, size_t size, bool write) + { + size_ = size; + mem_ = mmap(preferred, size, PROT_READ | (write ? PROT_WRITE : 0), MAP_SHARED, fd, 0); + if (mem_ == MAP_FAILED) + failmsg("shmem mmap failed", "size=%zu", size); + } + + ShmemFile(const ShmemFile&) = delete; + ShmemFile& operator=(const ShmemFile&) = delete; +}; diff --git a/executor/test.h b/executor/test.h index bb1291f97..c49459033 100644 --- a/executor/test.h +++ b/executor/test.h @@ -201,37 +201,82 @@ static int test_csum_inet_acc() return 0; } -static int test_coverage_filter() +static int test_cover_filter() { - struct tmp_cov_filter_t { - uint64 pcstart; - uint64 pcsize; - uint8 bitmap[((0x1000 >> 4) + 7) / 8]; + char* tmp = tempnam(nullptr, "syz-test-cover-filter"); + CoverFilter filter(tmp); + CoverFilter child(filter.FD()); + free(tmp); + + std::vector pcs = { + 100, + 111, + 200, + (1 << 20) - 1, + 1 << 20, + (1 << 30) - 1, + 100ull << 30, + (100ull << 30) + 100, + 200ull << 30, + (1ull << 62) + 100, }; - static struct tmp_cov_filter_t tmp_cov_filter; - tmp_cov_filter.pcstart = 0xffffffff81000000; - tmp_cov_filter.pcsize = 0x1000; - cov_filter = (cov_filter_t*)&tmp_cov_filter; - flag_coverage_filter = true; - uint64 enable_pc = 0xffffffff81000765; - uint64 disable_pc = 0xffffffff81000627; - uint64 out_pc = 0xffffffff82000000; + // These we don't insert, but they are also present due to truncation of low 3 bits. + std::vector also_contain = { + 96, + 103, + 104, + 207, + (1 << 20) - 7, + (1 << 20) + 7, + (1ull << 62) + 96, + (1ull << 62) + 103, + }; - uint32 idx = ((enable_pc - cov_filter->pcstart) >> 4) / 8; - uint32 shift = ((enable_pc - cov_filter->pcstart) >> 4) % 8; - cov_filter->bitmap[idx] |= (1 << shift); + std::vector dont_contain = { + 0, + 1, + 95, + 112, + 199, + 208, + 100 << 10, + (1 << 20) - 9, + (1 << 20) + 8, + (2ull << 30) - 1, + 2ull << 30, + (2ull << 30) + 1, + (100ull << 30) + 108, + 150ull << 30, + 1ull << 40, + 1ull << 63, + ~0ull, + }; - if (!coverage_filter(enable_pc)) - return 1; - if (coverage_filter(disable_pc)) - return 1; - if (coverage_filter(out_pc)) - return 1; + int ret = 0; + for (auto pc : pcs) + filter.Insert(pc); + pcs.insert(pcs.end(), also_contain.begin(), also_contain.end()); + for (auto pc : pcs) { + if (!filter.Contains(pc) || !child.Contains(pc)) { + printf("filter doesn't contain %llu (0x%llx)\n", pc, pc); + ret = 1; + } + } + for (auto pc : dont_contain) { + if (filter.Contains(pc) || child.Contains(pc)) { + printf("filter contains %llu (0x%llx)\n", pc, pc); + ret = 1; + } + } - cov_filter = NULL; - flag_coverage_filter = false; - return 0; + filter.Remove(105); + if (filter.Contains(104) || filter.Contains(105) || filter.Contains(111)) + printf("filter contains 105 after removal\n"); + if (!filter.Contains(103)) + printf("filter doesn't contains 103 after 105 removal\n"); + + return ret; } static struct { @@ -244,7 +289,7 @@ static struct { #if GOOS_linux && (GOARCH_amd64 || GOARCH_ppc64 || GOARCH_ppc64le) {"test_kvm", test_kvm}, #endif - {"test_coverage_filter", test_coverage_filter}, + {"test_cover_filter", test_cover_filter}, }; static int run_tests(const char* test) -- cgit mrf-deployment