diff options
Diffstat (limited to 'executor')
| -rw-r--r-- | executor/executor.cc | 7 | ||||
| -rw-r--r-- | executor/executor_runner.h | 82 | ||||
| -rw-r--r-- | executor/snapshot.h | 2 |
3 files changed, 75 insertions, 16 deletions
diff --git a/executor/executor.cc b/executor/executor.cc index 305675bce..5b5e06422 100644 --- a/executor/executor.cc +++ b/executor/executor.cc @@ -470,7 +470,8 @@ static void setup_control_pipes(); static bool coverage_filter(uint64 pc); static rpc::ComparisonRaw convert(const kcov_comparison_t& cmp); static flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 req_id, uint32 num_calls, - uint64 elapsed, uint64 freshness, uint32 status, const std::vector<uint8_t>* process_output); + uint64 elapsed, uint64 freshness, uint32 status, bool hanged, + const std::vector<uint8_t>* process_output); static void parse_execute(const execute_req& req); static void parse_handshake(const handshake_req& req); @@ -1343,7 +1344,7 @@ void write_extra_output() } flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 req_id, uint32 num_calls, uint64 elapsed, - uint64 freshness, uint32 status, const std::vector<uint8_t>* process_output) + uint64 freshness, uint32 status, bool hanged, const std::vector<uint8_t>* process_output) { // In snapshot mode the output size is fixed and output_size is always initialized, so use it. int out_size = flag_snapshot ? output_size : output->size.load(std::memory_order_relaxed) ? @@ -1376,7 +1377,7 @@ flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 flatbuffers::Offset<flatbuffers::Vector<uint8_t>> output_off = 0; if (process_output) output_off = fbb.CreateVector(*process_output); - auto exec_off = rpc::CreateExecResultRaw(fbb, req_id, proc_id, output_off, error_off, prog_info_off); + auto exec_off = rpc::CreateExecResultRaw(fbb, req_id, proc_id, output_off, hanged, error_off, prog_info_off); auto msg_off = rpc::CreateExecutorMessageRaw(fbb, rpc::ExecutorMessagesRaw::ExecResult, flatbuffers::Offset<void>(exec_off.o)); fbb.FinishSizePrefixed(msg_off); diff --git a/executor/executor_runner.h b/executor/executor_runner.h index 30a5f3a6d..5680f8fc2 100644 --- a/executor/executor_runner.h +++ b/executor/executor_runner.h @@ -27,16 +27,62 @@ inline std::ostream& operator<<(std::ostream& ss, const rpc::ExecRequestRawT& re << "\n"; } +// ProcIDPool allows to reuse a set of unique proc IDs across a set of subprocesses. +// +// When a subprocess hangs, it's a bit unclear what to do (we don't have means to kill +// the whole tree of its children, and waiting for all them will presumably hang as well). +// Later there may appear a "task hung" report from the kernel, so we don't want to terminate +// the VM immidiatly. But the "task hung" report may also not appear at all, so we can't +// just wait for a hanged subprocesses forever. +// +// So in that case we kill/wait just the top subprocesses, and give it a new proc ID +// (since some resources associated with the old proc ID may still be used by the old +// unterminated test processes). However, we don't have infinite number of proc IDs, +// so we recycle them in FIFO order. This is not ideal, but it looks like the best +// practical solution. +class ProcIDPool +{ +public: + ProcIDPool(int num_procs) + { + // Theoretically we have 32 procs (prog.MaxPids), but there are some limitations in descriptions + // that make them work well only for up to 10 procs. For example, we form /dev/loopN + // device name using proc['0', 1, int8]. When these limitations are fixed, + // we can use all 32 here (prog.MaxPids) + constexpr int kNumGoodProcs = 10; + for (int i = 0; i < std::max(num_procs, kNumGoodProcs); i++) + ids_.push_back(i); + } + + int Alloc(int old = -1) + { + if (old >= 0) + ids_.push_back(old); + if (ids_.empty()) + fail("out of proc ids"); + int id = ids_.front(); + ids_.pop_front(); + return id; + } + +private: + std::deque<int> ids_; + + ProcIDPool(const ProcIDPool&) = delete; + ProcIDPool& operator=(const ProcIDPool&) = delete; +}; + // Proc represents one subprocess that runs tests (re-execed syz-executor with 'exec' argument). // The object is persistent and re-starts subprocess when it crashes. class Proc { public: - Proc(Connection& conn, const char* bin, int id, int& restarting, const bool& corpus_triaged, int max_signal_fd, int cover_filter_fd, + Proc(Connection& conn, const char* bin, int id, ProcIDPool& proc_id_pool, int& restarting, const bool& corpus_triaged, int max_signal_fd, int cover_filter_fd, bool use_cover_edges, bool is_kernel_64_bit, uint32 slowdown, uint32 syscall_timeout_ms, uint32 program_timeout_ms) : conn_(conn), bin_(bin), - id_(id), + proc_id_pool_(proc_id_pool), + id_(proc_id_pool.Alloc()), restarting_(restarting), corpus_triaged_(corpus_triaged), max_signal_fd_(max_signal_fd), @@ -92,7 +138,7 @@ public: // fork server is enabled, so we use quite large timeout. Child process can be slow // due to global locks in namespaces and other things, so let's better wait than // report false misleading crashes. - uint64 timeout = 2 * program_timeout_ms_; + uint64 timeout = 3 * program_timeout_ms_; #else uint64 timeout = program_timeout_ms_; #endif @@ -134,7 +180,8 @@ private: Connection& conn_; const char* const bin_; - const int id_; + ProcIDPool& proc_id_pool_; + int id_; int& restarting_; const bool& corpus_triaged_; const int max_signal_fd_; @@ -215,7 +262,15 @@ private: while (ReadOutput()) ; } - HandleCompletion(status); + bool hanged = SYZ_EXECUTOR_USES_FORK_SERVER && state_ == State::Executing; + HandleCompletion(status, hanged); + if (hanged) { + // If the process has hanged, it may still be using per-proc resources, + // so allocate a fresh proc id. + int new_id = proc_id_pool_.Alloc(id_); + debug("proc %d: changing proc id to %d\n", id_, new_id); + id_ = new_id; + } } else if (attempts_ > 3) sleep_ms(100 * attempts_); Start(); @@ -350,7 +405,7 @@ private: } } - void HandleCompletion(uint32 status) + void HandleCompletion(uint32 status, bool hanged = false) { if (!msg_) fail("don't have executed msg"); @@ -370,7 +425,7 @@ private: } } uint32 num_calls = read_input(&prog_data); - auto data = finish_output(resp_mem_, id_, msg_->id, num_calls, elapsed, freshness_++, status, output); + auto data = finish_output(resp_mem_, id_, msg_->id, num_calls, elapsed, freshness_++, status, hanged, output); conn_.Send(data.data(), data.size()); resp_mem_->Reset(); @@ -458,12 +513,14 @@ public: : conn_(conn), vm_index_(vm_index) { - size_t num_procs = Handshake(); + int num_procs = Handshake(); + proc_id_pool_.emplace(num_procs); int max_signal_fd = max_signal_ ? max_signal_->FD() : -1; int cover_filter_fd = cover_filter_ ? cover_filter_->FD() : -1; - for (size_t i = 0; i < num_procs; i++) - procs_.emplace_back(new Proc(conn, bin, i, restarting_, corpus_triaged_, max_signal_fd, cover_filter_fd, - use_cover_edges_, is_kernel_64_bit_, slowdown_, syscall_timeout_ms_, program_timeout_ms_)); + for (int i = 0; i < num_procs; i++) + procs_.emplace_back(new Proc(conn, bin, i, *proc_id_pool_, restarting_, corpus_triaged_, + max_signal_fd, cover_filter_fd, use_cover_edges_, is_kernel_64_bit_, slowdown_, + syscall_timeout_ms_, program_timeout_ms_)); for (;;) Loop(); @@ -474,6 +531,7 @@ private: const int vm_index_; std::optional<CoverFilter> max_signal_; std::optional<CoverFilter> cover_filter_; + std::optional<ProcIDPool> proc_id_pool_; std::vector<std::unique_ptr<Proc>> procs_; std::deque<rpc::ExecRequestRawT> requests_; std::vector<std::string> leak_frames_; @@ -545,7 +603,7 @@ private: failmsg("bad restarting", "restarting=%d", restarting_); } - size_t Handshake() + int Handshake() { rpc::ConnectRequestRawT conn_req; conn_req.id = vm_index_; diff --git a/executor/snapshot.h b/executor/snapshot.h index ce3e355b0..462fb2c56 100644 --- a/executor/snapshot.h +++ b/executor/snapshot.h @@ -251,7 +251,7 @@ NORETURN static void SnapshotDone(bool failed) debug("SnapshotDone\n"); CoverAccessScope scope(nullptr); uint32 num_calls = output_data->num_calls.load(std::memory_order_relaxed); - auto data = finish_output(output_data, 0, 0, num_calls, 0, 0, failed ? kFailStatus : 0, nullptr); + auto data = finish_output(output_data, 0, 0, num_calls, 0, 0, failed ? kFailStatus : 0, false, nullptr); ivs.hdr->output_offset = data.data() - reinterpret_cast<volatile uint8_t*>(ivs.hdr); ivs.hdr->output_size = data.size(); SnapshotSetState(failed ? rpc::SnapshotState::Failed : rpc::SnapshotState::Executed); |
