aboutsummaryrefslogtreecommitdiffstats
path: root/executor
diff options
context:
space:
mode:
Diffstat (limited to 'executor')
-rw-r--r--executor/executor.cc7
-rw-r--r--executor/executor_runner.h82
-rw-r--r--executor/snapshot.h2
3 files changed, 75 insertions, 16 deletions
diff --git a/executor/executor.cc b/executor/executor.cc
index 305675bce..5b5e06422 100644
--- a/executor/executor.cc
+++ b/executor/executor.cc
@@ -470,7 +470,8 @@ static void setup_control_pipes();
static bool coverage_filter(uint64 pc);
static rpc::ComparisonRaw convert(const kcov_comparison_t& cmp);
static flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 req_id, uint32 num_calls,
- uint64 elapsed, uint64 freshness, uint32 status, const std::vector<uint8_t>* process_output);
+ uint64 elapsed, uint64 freshness, uint32 status, bool hanged,
+ const std::vector<uint8_t>* process_output);
static void parse_execute(const execute_req& req);
static void parse_handshake(const handshake_req& req);
@@ -1343,7 +1344,7 @@ void write_extra_output()
}
flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 req_id, uint32 num_calls, uint64 elapsed,
- uint64 freshness, uint32 status, const std::vector<uint8_t>* process_output)
+ uint64 freshness, uint32 status, bool hanged, const std::vector<uint8_t>* process_output)
{
// In snapshot mode the output size is fixed and output_size is always initialized, so use it.
int out_size = flag_snapshot ? output_size : output->size.load(std::memory_order_relaxed) ?
@@ -1376,7 +1377,7 @@ flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64
flatbuffers::Offset<flatbuffers::Vector<uint8_t>> output_off = 0;
if (process_output)
output_off = fbb.CreateVector(*process_output);
- auto exec_off = rpc::CreateExecResultRaw(fbb, req_id, proc_id, output_off, error_off, prog_info_off);
+ auto exec_off = rpc::CreateExecResultRaw(fbb, req_id, proc_id, output_off, hanged, error_off, prog_info_off);
auto msg_off = rpc::CreateExecutorMessageRaw(fbb, rpc::ExecutorMessagesRaw::ExecResult,
flatbuffers::Offset<void>(exec_off.o));
fbb.FinishSizePrefixed(msg_off);
diff --git a/executor/executor_runner.h b/executor/executor_runner.h
index 30a5f3a6d..5680f8fc2 100644
--- a/executor/executor_runner.h
+++ b/executor/executor_runner.h
@@ -27,16 +27,62 @@ inline std::ostream& operator<<(std::ostream& ss, const rpc::ExecRequestRawT& re
<< "\n";
}
+// ProcIDPool allows to reuse a set of unique proc IDs across a set of subprocesses.
+//
+// When a subprocess hangs, it's a bit unclear what to do (we don't have means to kill
+// the whole tree of its children, and waiting for all them will presumably hang as well).
+// Later there may appear a "task hung" report from the kernel, so we don't want to terminate
+// the VM immidiatly. But the "task hung" report may also not appear at all, so we can't
+// just wait for a hanged subprocesses forever.
+//
+// So in that case we kill/wait just the top subprocesses, and give it a new proc ID
+// (since some resources associated with the old proc ID may still be used by the old
+// unterminated test processes). However, we don't have infinite number of proc IDs,
+// so we recycle them in FIFO order. This is not ideal, but it looks like the best
+// practical solution.
+class ProcIDPool
+{
+public:
+ ProcIDPool(int num_procs)
+ {
+ // Theoretically we have 32 procs (prog.MaxPids), but there are some limitations in descriptions
+ // that make them work well only for up to 10 procs. For example, we form /dev/loopN
+ // device name using proc['0', 1, int8]. When these limitations are fixed,
+ // we can use all 32 here (prog.MaxPids)
+ constexpr int kNumGoodProcs = 10;
+ for (int i = 0; i < std::max(num_procs, kNumGoodProcs); i++)
+ ids_.push_back(i);
+ }
+
+ int Alloc(int old = -1)
+ {
+ if (old >= 0)
+ ids_.push_back(old);
+ if (ids_.empty())
+ fail("out of proc ids");
+ int id = ids_.front();
+ ids_.pop_front();
+ return id;
+ }
+
+private:
+ std::deque<int> ids_;
+
+ ProcIDPool(const ProcIDPool&) = delete;
+ ProcIDPool& operator=(const ProcIDPool&) = delete;
+};
+
// Proc represents one subprocess that runs tests (re-execed syz-executor with 'exec' argument).
// The object is persistent and re-starts subprocess when it crashes.
class Proc
{
public:
- Proc(Connection& conn, const char* bin, int id, int& restarting, const bool& corpus_triaged, int max_signal_fd, int cover_filter_fd,
+ Proc(Connection& conn, const char* bin, int id, ProcIDPool& proc_id_pool, int& restarting, const bool& corpus_triaged, int max_signal_fd, int cover_filter_fd,
bool use_cover_edges, bool is_kernel_64_bit, uint32 slowdown, uint32 syscall_timeout_ms, uint32 program_timeout_ms)
: conn_(conn),
bin_(bin),
- id_(id),
+ proc_id_pool_(proc_id_pool),
+ id_(proc_id_pool.Alloc()),
restarting_(restarting),
corpus_triaged_(corpus_triaged),
max_signal_fd_(max_signal_fd),
@@ -92,7 +138,7 @@ public:
// fork server is enabled, so we use quite large timeout. Child process can be slow
// due to global locks in namespaces and other things, so let's better wait than
// report false misleading crashes.
- uint64 timeout = 2 * program_timeout_ms_;
+ uint64 timeout = 3 * program_timeout_ms_;
#else
uint64 timeout = program_timeout_ms_;
#endif
@@ -134,7 +180,8 @@ private:
Connection& conn_;
const char* const bin_;
- const int id_;
+ ProcIDPool& proc_id_pool_;
+ int id_;
int& restarting_;
const bool& corpus_triaged_;
const int max_signal_fd_;
@@ -215,7 +262,15 @@ private:
while (ReadOutput())
;
}
- HandleCompletion(status);
+ bool hanged = SYZ_EXECUTOR_USES_FORK_SERVER && state_ == State::Executing;
+ HandleCompletion(status, hanged);
+ if (hanged) {
+ // If the process has hanged, it may still be using per-proc resources,
+ // so allocate a fresh proc id.
+ int new_id = proc_id_pool_.Alloc(id_);
+ debug("proc %d: changing proc id to %d\n", id_, new_id);
+ id_ = new_id;
+ }
} else if (attempts_ > 3)
sleep_ms(100 * attempts_);
Start();
@@ -350,7 +405,7 @@ private:
}
}
- void HandleCompletion(uint32 status)
+ void HandleCompletion(uint32 status, bool hanged = false)
{
if (!msg_)
fail("don't have executed msg");
@@ -370,7 +425,7 @@ private:
}
}
uint32 num_calls = read_input(&prog_data);
- auto data = finish_output(resp_mem_, id_, msg_->id, num_calls, elapsed, freshness_++, status, output);
+ auto data = finish_output(resp_mem_, id_, msg_->id, num_calls, elapsed, freshness_++, status, hanged, output);
conn_.Send(data.data(), data.size());
resp_mem_->Reset();
@@ -458,12 +513,14 @@ public:
: conn_(conn),
vm_index_(vm_index)
{
- size_t num_procs = Handshake();
+ int num_procs = Handshake();
+ proc_id_pool_.emplace(num_procs);
int max_signal_fd = max_signal_ ? max_signal_->FD() : -1;
int cover_filter_fd = cover_filter_ ? cover_filter_->FD() : -1;
- for (size_t i = 0; i < num_procs; i++)
- procs_.emplace_back(new Proc(conn, bin, i, restarting_, corpus_triaged_, max_signal_fd, cover_filter_fd,
- use_cover_edges_, is_kernel_64_bit_, slowdown_, syscall_timeout_ms_, program_timeout_ms_));
+ for (int i = 0; i < num_procs; i++)
+ procs_.emplace_back(new Proc(conn, bin, i, *proc_id_pool_, restarting_, corpus_triaged_,
+ max_signal_fd, cover_filter_fd, use_cover_edges_, is_kernel_64_bit_, slowdown_,
+ syscall_timeout_ms_, program_timeout_ms_));
for (;;)
Loop();
@@ -474,6 +531,7 @@ private:
const int vm_index_;
std::optional<CoverFilter> max_signal_;
std::optional<CoverFilter> cover_filter_;
+ std::optional<ProcIDPool> proc_id_pool_;
std::vector<std::unique_ptr<Proc>> procs_;
std::deque<rpc::ExecRequestRawT> requests_;
std::vector<std::string> leak_frames_;
@@ -545,7 +603,7 @@ private:
failmsg("bad restarting", "restarting=%d", restarting_);
}
- size_t Handshake()
+ int Handshake()
{
rpc::ConnectRequestRawT conn_req;
conn_req.id = vm_index_;
diff --git a/executor/snapshot.h b/executor/snapshot.h
index ce3e355b0..462fb2c56 100644
--- a/executor/snapshot.h
+++ b/executor/snapshot.h
@@ -251,7 +251,7 @@ NORETURN static void SnapshotDone(bool failed)
debug("SnapshotDone\n");
CoverAccessScope scope(nullptr);
uint32 num_calls = output_data->num_calls.load(std::memory_order_relaxed);
- auto data = finish_output(output_data, 0, 0, num_calls, 0, 0, failed ? kFailStatus : 0, nullptr);
+ auto data = finish_output(output_data, 0, 0, num_calls, 0, 0, failed ? kFailStatus : 0, false, nullptr);
ivs.hdr->output_offset = data.data() - reinterpret_cast<volatile uint8_t*>(ivs.hdr);
ivs.hdr->output_size = data.size();
SnapshotSetState(failed ? rpc::SnapshotState::Failed : rpc::SnapshotState::Executed);