diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-11-27 17:23:09 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-12-11 15:22:17 +0000 |
| commit | 299ee674e6c124a35f1cf258df4f0f3c6e1db1f3 (patch) | |
| tree | 416b515e959a1d0a64a9516b1524a062ae63ba7d /executor | |
| parent | ff949d2512c5ac33d0407d26d80f1df77b2de0e7 (diff) | |
executor: query globs in the test program context
We query globs for 2 reasons:
1. Expand glob types in syscall descriptions.
2. Dynamic file probing for automatic descriptions generation.
In both of these contexts are are interested in files
that will be present during test program execution
(rather than normal unsandboxed execution).
For example, some files may not be accessible to test programs
after pivot root. On the other hand, we create and link
some additional files for the test program that don't
normally exist.
Add a new request type for querying of globs that are
executed in the test program context.
Diffstat (limited to 'executor')
| -rw-r--r-- | executor/common.h | 16 | ||||
| -rw-r--r-- | executor/executor.cc | 40 | ||||
| -rw-r--r-- | executor/executor_runner.h | 46 | ||||
| -rw-r--r-- | executor/files.h | 23 | ||||
| -rw-r--r-- | executor/snapshot.h | 1 |
5 files changed, 89 insertions, 37 deletions
diff --git a/executor/common.h b/executor/common.h index 123723e5a..7425a8bff 100644 --- a/executor/common.h +++ b/executor/common.h @@ -705,9 +705,21 @@ static void loop(void) last_executed = now; } // TODO: adjust timeout for progs with syz_usb_connect call. - if ((now - start < program_timeout_ms) && - (now - start < min_timeout_ms || now - last_executed < inactive_timeout_ms)) + // If the max program timeout is exceeded, kill unconditionally. + if (now - start > program_timeout_ms) + goto kill_test; + // If the request type is not a normal test program (currently, glob expansion request), + // then wait for the full timeout (these requests don't update number of completed calls + // + they are more important and we don't want timing flakes). + if (request_type != rpc::RequestType::Program) continue; + // Always wait at least the min timeout for each program. + if (now - start < min_timeout_ms) + continue; + // If it keeps completing syscalls, then don't kill it. + if (now - last_executed < inactive_timeout_ms) + continue; + kill_test: #else if (current_time_ms() - start < /*{{{PROGRAM_TIMEOUT_MS}}}*/) continue; diff --git a/executor/executor.cc b/executor/executor.cc index e8e1cb000..702f8c3b4 100644 --- a/executor/executor.cc +++ b/executor/executor.cc @@ -146,6 +146,7 @@ struct alignas(8) OutputData { std::atomic<uint32> consumed; std::atomic<uint32> completed; std::atomic<uint32> num_calls; + std::atomic<flatbuffers::Offset<flatbuffers::Vector<uint8_t>>> result_offset; struct { // Call index in the test program (they may be out-of-order is some syscalls block). int index; @@ -159,6 +160,7 @@ struct alignas(8) OutputData { consumed.store(0, std::memory_order_relaxed); completed.store(0, std::memory_order_relaxed); num_calls.store(0, std::memory_order_relaxed); + result_offset.store(0, std::memory_order_relaxed); } }; @@ -280,6 +282,7 @@ static bool flag_threaded; static bool flag_comparisons; static uint64 request_id; +static rpc::RequestType request_type; static uint64 all_call_signal; static bool all_extra_signal; @@ -417,6 +420,7 @@ struct handshake_req { struct execute_req { uint64 magic; uint64 id; + rpc::RequestType type; uint64 exec_flags; uint64 all_call_signal; bool all_extra_signal; @@ -791,6 +795,7 @@ void receive_execute() void parse_execute(const execute_req& req) { request_id = req.id; + request_type = req.type; flag_collect_signal = req.exec_flags & (1 << 0); flag_collect_cover = req.exec_flags & (1 << 1); flag_dedup_cover = req.exec_flags & (1 << 2); @@ -799,9 +804,9 @@ void parse_execute(const execute_req& req) all_call_signal = req.all_call_signal; all_extra_signal = req.all_extra_signal; - debug("[%llums] exec opts: procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d " + debug("[%llums] exec opts: reqid=%llu type=%llu procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d " " sandbox=%d/%d/%d/%d timeouts=%llu/%llu/%llu kernel_64_bit=%d\n", - current_time_ms() - start_time_ms, procid, flag_threaded, flag_collect_cover, + current_time_ms() - start_time_ms, request_id, (uint64)request_type, procid, flag_threaded, flag_collect_cover, flag_comparisons, flag_dedup_cover, flag_collect_signal, flag_sandbox_none, flag_sandbox_setuid, flag_sandbox_namespace, flag_sandbox_android, syscall_timeout_ms, program_timeout_ms, slowdown_scale, is_kernel_64_bit); @@ -837,9 +842,35 @@ void realloc_output_data() #endif } +void execute_glob() +{ + const char* pattern = (const char*)input_data; + const auto& files = Glob(pattern); + size_t size = 0; + for (const auto& file : files) + size += file.size() + 1; + mmap_output(kMaxOutput); + ShmemBuilder fbb(output_data, kMaxOutput, true); + uint8_t* pos = nullptr; + auto off = fbb.CreateUninitializedVector(size, &pos); + for (const auto& file : files) { + memcpy(pos, file.c_str(), file.size() + 1); + pos += file.size() + 1; + } + output_data->consumed.store(fbb.GetSize(), std::memory_order_release); + output_data->result_offset.store(off, std::memory_order_release); +} + // execute_one executes program stored in input_data. void execute_one() { + if (request_type == rpc::RequestType::Glob) { + execute_glob(); + return; + } + if (request_type != rpc::RequestType::Program) + failmsg("bad request type", "type=%llu", (uint64)request_type); + in_execute_one = true; #if GOOS_linux char buf[64]; @@ -1382,8 +1413,9 @@ flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 flatbuffers::Offset<flatbuffers::String> error_off = 0; if (status == kFailStatus) error_off = fbb.CreateString("process failed"); - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> output_off = 0; - if (process_output) + // If the request wrote binary result (currently glob requests do this), use it instead of the output. + auto output_off = output->result_offset.load(std::memory_order_relaxed); + if (output_off.IsNull() && process_output) output_off = fbb.CreateVector(*process_output); auto exec_off = rpc::CreateExecResultRaw(fbb, req_id, proc_id, output_off, hanged, error_off, prog_info_off); auto msg_off = rpc::CreateExecutorMessageRaw(fbb, rpc::ExecutorMessagesRaw::ExecResult, diff --git a/executor/executor_runner.h b/executor/executor_runner.h index c24886b10..a3b668893 100644 --- a/executor/executor_runner.h +++ b/executor/executor_runner.h @@ -23,7 +23,7 @@ inline std::ostream& operator<<(std::ostream& ss, const rpc::ExecRequestRawT& re << " flags=0x" << std::hex << static_cast<uint64>(req.flags) << " env_flags=0x" << std::hex << static_cast<uint64>(req.exec_opts->env_flags()) << " exec_flags=0x" << std::hex << static_cast<uint64>(req.exec_opts->exec_flags()) - << " prod_data=" << std::dec << req.prog_data.size() + << " data_size=" << std::dec << req.data.size() << "\n"; } @@ -87,7 +87,7 @@ private: class Proc { public: - Proc(Connection& conn, const char* bin, int id, ProcIDPool& proc_id_pool, int& restarting, const bool& corpus_triaged, int max_signal_fd, int cover_filter_fd, + Proc(Connection& conn, const char* bin, ProcIDPool& proc_id_pool, int& restarting, const bool& corpus_triaged, int max_signal_fd, int cover_filter_fd, bool use_cover_edges, bool is_kernel_64_bit, uint32 slowdown, uint32 syscall_timeout_ms, uint32 program_timeout_ms) : conn_(conn), bin_(bin), @@ -122,8 +122,10 @@ public: if (wait_start_) wait_end_ = current_time_ms(); // Restart every once in a while to not let too much state accumulate. + // Also request if request type differs as it affects program timeout. constexpr uint64 kRestartEvery = 600; if (state_ == State::Idle && ((corpus_triaged_ && restarting_ == 0 && freshness_ >= kRestartEvery) || + req_type_ != msg.type || exec_env_ != msg.exec_opts->env_flags() || sandbox_arg_ != msg.exec_opts->sandbox_arg())) Restart(); attempts_ = 0; @@ -150,9 +152,9 @@ public: // fork server is enabled, so we use quite large timeout. Child process can be slow // due to global locks in namespaces and other things, so let's better wait than // report false misleading crashes. - uint64 timeout = 3 * program_timeout_ms_; + uint64 timeout = 3 * ProgramTimeoutMs(); #else - uint64 timeout = program_timeout_ms_; + uint64 timeout = ProgramTimeoutMs(); #endif // Sandbox setup can take significant time. if (state_ == State::Handshaking) @@ -211,6 +213,7 @@ private: int req_pipe_ = -1; int resp_pipe_ = -1; int stdout_pipe_ = -1; + rpc::RequestType req_type_ = rpc::RequestType::Program; rpc::ExecEnv exec_env_ = rpc::ExecEnv::NONE; int64_t sandbox_arg_ = 0; std::optional<rpc::ExecRequestRawT> msg_; @@ -349,6 +352,7 @@ private: debug("proc %d: handshaking to execute request %llu\n", id_, static_cast<uint64>(msg_->id)); ChangeState(State::Handshaking); exec_start_ = current_time_ms(); + req_type_ = msg_->type; exec_env_ = msg_->exec_opts->env_flags() & ~rpc::ExecEnv::ResetState; sandbox_arg_ = msg_->exec_opts->sandbox_arg(); handshake_req req = { @@ -359,7 +363,7 @@ private: .pid = static_cast<uint64>(id_), .sandbox_arg = static_cast<uint64>(sandbox_arg_), .syscall_timeout_ms = syscall_timeout_ms_, - .program_timeout_ms = program_timeout_ms_, + .program_timeout_ms = ProgramTimeoutMs(), .slowdown_scale = slowdown_, }; if (write(req_pipe_, &req, sizeof(req)) != sizeof(req)) { @@ -401,10 +405,11 @@ private: else all_call_signal |= 1ull << call; } - memcpy(req_shmem_.Mem(), msg_->prog_data.data(), std::min(msg_->prog_data.size(), kMaxInput)); + memcpy(req_shmem_.Mem(), msg_->data.data(), std::min(msg_->data.size(), kMaxInput)); execute_req req{ .magic = kInMagic, .id = static_cast<uint64>(msg_->id), + .type = msg_->type, .exec_flags = static_cast<uint64>(msg_->exec_opts->exec_flags()), .all_call_signal = all_call_signal, .all_extra_signal = all_extra_signal, @@ -425,7 +430,7 @@ private: // Note: if the child process crashed during handshake and the request has ReturnError flag, // we have not started executing the request yet. uint64 elapsed = (current_time_ms() - exec_start_) * 1000 * 1000; - uint8* prog_data = msg_->prog_data.data(); + uint8* prog_data = msg_->data.data(); input_data = prog_data; std::vector<uint8_t>* output = nullptr; if (IsSet(msg_->flags, rpc::RequestFlag::ReturnOutput)) { @@ -436,7 +441,9 @@ private: output_.insert(output_.end(), tmp, tmp + strlen(tmp)); } } - uint32 num_calls = read_input(&prog_data); + uint32 num_calls = 0; + if (msg_->type == rpc::RequestType::Program) + num_calls = read_input(&prog_data); auto data = finish_output(resp_mem_, id_, msg_->id, num_calls, elapsed, freshness_++, status, hanged, output); conn_.Send(data.data(), data.size()); @@ -497,6 +504,7 @@ private: return false; } if (flag_debug) { + const bool has_nl = output_.back() == '\n'; output_.resize(output_.size() + 1); char* output = reinterpret_cast<char*>(output_.data()) + debug_output_pos_; // During machine check we can execute some requests that legitimately fail. @@ -508,12 +516,18 @@ private: if (syzfail) memcpy(syzfail, "NOTFAIL", strlen("NOTFAIL")); } - debug("proc %d: got output: %s\n", id_, output); + debug("proc %d: got output: %s%s", id_, output, has_nl ? "" : "\n"); output_.resize(output_.size() - 1); debug_output_pos_ = output_.size(); } return true; } + + uint32 ProgramTimeoutMs() const + { + // Glob requests can expand to >10K files and can take a while to run. + return program_timeout_ms_ * (req_type_ == rpc::RequestType::Program ? 1 : 10); + } }; // Runner manages a set of test subprocesses (Proc's), receives new test requests from the manager, @@ -530,7 +544,7 @@ public: int max_signal_fd = max_signal_ ? max_signal_->FD() : -1; int cover_filter_fd = cover_filter_ ? cover_filter_->FD() : -1; for (int i = 0; i < num_procs; i++) - procs_.emplace_back(new Proc(conn, bin, i, *proc_id_pool_, restarting_, corpus_triaged_, + procs_.emplace_back(new Proc(conn, bin, *proc_id_pool_, restarting_, corpus_triaged_, max_signal_fd, cover_filter_fd, use_cover_edges_, is_kernel_64_bit_, slowdown_, syscall_timeout_ms_, program_timeout_ms_)); @@ -644,7 +658,6 @@ private: rpc::InfoRequestRawT info_req; info_req.files = ReadFiles(conn_reply.files); - info_req.globs = ReadGlobs(conn_reply.globs); // This does any one-time setup for the requested features on the machine. // Note: this can be called multiple times and must be idempotent. @@ -701,13 +714,14 @@ private: void Handle(rpc::ExecRequestRawT& msg) { - debug("recv exec request %llu: flags=0x%llx env=0x%llx exec=0x%llx size=%zu\n", + debug("recv exec request %llu: type=%llu flags=0x%llx env=0x%llx exec=0x%llx size=%zu\n", static_cast<uint64>(msg.id), + static_cast<uint64>(msg.type), static_cast<uint64>(msg.flags), static_cast<uint64>(msg.exec_opts->env_flags()), static_cast<uint64>(msg.exec_opts->exec_flags()), - msg.prog_data.size()); - if (IsSet(msg.flags, rpc::RequestFlag::IsBinary)) { + msg.data.size()); + if (msg.type == rpc::RequestType::Binary) { ExecuteBinary(msg); return; } @@ -783,9 +797,9 @@ private: int fd = open(file.c_str(), O_WRONLY | O_CLOEXEC | O_CREAT, 0755); if (fd == -1) return {"binary file creation failed", {}}; - ssize_t wrote = write(fd, msg.prog_data.data(), msg.prog_data.size()); + ssize_t wrote = write(fd, msg.data.data(), msg.data.size()); close(fd); - if (wrote != static_cast<ssize_t>(msg.prog_data.size())) + if (wrote != static_cast<ssize_t>(msg.data.size())) return {"binary file write failed", {}}; int stdin_pipe[2]; diff --git a/executor/files.h b/executor/files.h index 7be826d0a..f1d2a6104 100644 --- a/executor/files.h +++ b/executor/files.h @@ -24,20 +24,25 @@ static std::vector<std::string> Glob(const std::string& pattern) // because they cause recursion, or lead outside of the target glob // (e.g. /proc/self/{root,cwd}). // However, we want to keep few links: /proc/self, /proc/thread-self, - // /sys/kernel/slab/kmalloc-64 (may be a link with slab merging). + // /sys/kernel/slab/kmalloc-64 (may be a link with slab merging), + // and cgroup links created in the test dir. // This is a hacky way to do it b/c e.g. "self" will be matched in all paths, // not just /proc. A proper fix would require writing completly custom version of glob // that would support recursion and would allow using/not using links on demand. + buf.gl_readdir = [](void* dir) -> dirent* { for (;;) { struct dirent* ent = readdir(static_cast<DIR*>(dir)); if (!ent || ent->d_type != DT_LNK || !strcmp(ent->d_name, "self") || !strcmp(ent->d_name, "thread-self") || - !strcmp(ent->d_name, "kmalloc-64")) + !strcmp(ent->d_name, "kmalloc-64") || + !strcmp(ent->d_name, "cgroup") || + !strcmp(ent->d_name, "cgroup.cpu") || + !strcmp(ent->d_name, "cgroup.net")) return ent; } - }, + }; buf.gl_stat = stat; buf.gl_lstat = lstat; int res = glob(pattern.c_str(), GLOB_MARK | GLOB_NOSORT | GLOB_ALTDIRFUNC, nullptr, &buf); @@ -112,15 +117,3 @@ static std::vector<std::unique_ptr<rpc::FileInfoRawT>> ReadFiles(const std::vect } return results; } - -static std::vector<std::unique_ptr<rpc::GlobInfoRawT>> ReadGlobs(const std::vector<std::string>& patterns) -{ - std::vector<std::unique_ptr<rpc::GlobInfoRawT>> results; - for (const auto& pattern : patterns) { - auto info = std::make_unique<rpc::GlobInfoRawT>(); - info->name = pattern; - info->files = Glob(pattern); - results.push_back(std::move(info)); - } - return results; -} diff --git a/executor/snapshot.h b/executor/snapshot.h index 0cac33822..71c0b3940 100644 --- a/executor/snapshot.h +++ b/executor/snapshot.h @@ -245,6 +245,7 @@ static void SnapshotStart() execute_req req = { .magic = kInMagic, .id = 0, + .type = rpc::RequestType::Program, .exec_flags = static_cast<uint64>(msg->exec_flags()), .all_call_signal = msg->all_call_signal(), .all_extra_signal = msg->all_extra_signal(), |
