diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-10-21 11:53:44 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-10-24 09:34:38 +0000 |
| commit | 9fc8fe026baab9959459256f2d47f4bbf21d405a (patch) | |
| tree | 6d97a7ac2b8e69f5fa7a92a4b3824b1ad9e571c7 /pkg/rpcserver/last_executing.go | |
| parent | a85e9d5032fdf305457a6400bd3af4a8df6c45c4 (diff) | |
executor: better handling for hanged test processes
Currently we kill hanged processes and consider the corresponding test finished.
We don't kill/wait for the actual test subprocess (we don't know its pid to kill,
and waiting will presumably hang). This has 2 problems:
1. If the hanged process causes "task hung" report, we can't reproduce it,
since the test finished too long ago (manager thinks its finished and
discards the request).
2. The test process still consumed per-pid resources.
Explicitly detect and handle such cases:
Manager keeps these hanged tests forever,
and we assign a new proc id for future processes
(don't reuse the hanged one).
Diffstat (limited to 'pkg/rpcserver/last_executing.go')
| -rw-r--r-- | pkg/rpcserver/last_executing.go | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/pkg/rpcserver/last_executing.go b/pkg/rpcserver/last_executing.go index 29b854df2..6064cfe14 100644 --- a/pkg/rpcserver/last_executing.go +++ b/pkg/rpcserver/last_executing.go @@ -10,6 +10,7 @@ import ( "time" "github.com/google/syzkaller/pkg/report" + "github.com/google/syzkaller/prog" ) // LastExecuting keeps the given number of last executed programs @@ -17,6 +18,7 @@ import ( type LastExecuting struct { count int procs []ExecRecord + hanged []ExecRecord // hanged programs, kept forever positions []int } @@ -36,12 +38,12 @@ func MakeLastExecuting(procs, count int) *LastExecuting { } // Note execution of the 'prog' on 'proc' at time 'now'. -func (last *LastExecuting) Note(id, proc int, prog []byte, now time.Duration) { +func (last *LastExecuting) Note(id, proc int, progData []byte, now time.Duration) { pos := &last.positions[proc] last.procs[proc*last.count+*pos] = ExecRecord{ ID: id, Proc: proc, - Prog: prog, + Prog: progData, Time: now, } *pos++ @@ -50,13 +52,26 @@ func (last *LastExecuting) Note(id, proc int, prog []byte, now time.Duration) { } } +// Note a hanged program. +func (last *LastExecuting) Hanged(id, proc int, progData []byte, now time.Duration) { + last.hanged = append(last.hanged, ExecRecord{ + ID: id, + // Use unique proc for these programs b/c pkg/repro will either use the program with matching ID, + // of take the last program from each proc, and we want the hanged programs to be included. + Proc: prog.MaxPids + len(last.hanged), + Prog: progData, + Time: now, + }) +} + // Returns a sorted set of last executing programs. // The records are sorted by time in ascending order. // ExecRecord.Time is the difference in start executing time between this // program and the program that started executing last. func (last *LastExecuting) Collect() []ExecRecord { - procs := last.procs + procs := append(last.procs, last.hanged...) last.procs = nil // The type must not be used after this. + last.hanged = nil sort.Slice(procs, func(i, j int) bool { return procs[i].Time < procs[j].Time }) |
