2011-10-28 Pedro Alves <pedro@codesourcery.com>

gdb/
	* linux-nat.c (linux_nat_filter_event): Remove `options'
	parameter, and dead code that used it.  If we're handling a
	PTRACE_EVENT_EXEC event, and the thread group leader is no longer
	in our lwp list, re-add it.
	(check_zombie_leaders): New.
	(linux_nat_wait_1): Remove `options' and `pid' locals.  Always
	wait for children with WNOHANG, and always wait for all children.
	Don't check for no resumed children upfront.  Simplify wait loop.
	Check for zombie thread group leaders after handling all wait
	statuses.  Return TARGET_WAITKIND_NO_RESUMED if there no
	unwaited-for children left.
	* infrun.c (fetch_inferior_event): Handle TARGET_WAITKIND_NO_RESUMED.
	(handle_inferior_event): Handle TARGET_WAITKIND_NO_RESUMED.
	(normal_stop): Handle TARGET_WAITKIND_NO_RESUMED.
	* target.h (enum target_waitkind) <TARGET_WAITKIND_NO_RESUMED>: New.

	gdb/testsuite/
	* gdb.threads/no-unwaited-for-left.c: New.
	* gdb.threads/no-unwaited-for-left.exp: New.
	* gdb.threads/non-ldr-exc-1.c: New.
	* gdb.threads/non-ldr-exc-1.exp: New.
	* gdb.threads/non-ldr-exc-2.c: New.
	* gdb.threads/non-ldr-exc-2.exp: New.
	* gdb.threads/non-ldr-exc-3.c: New.
	* gdb.threads/non-ldr-exc-3.exp: New.
	* gdb.threads/non-ldr-exc-4.c: New.
	* gdb.threads/non-ldr-exc-4.exp: New.
This commit is contained in:
Pedro Alves
2011-10-28 18:30:00 +00:00
parent b3f5b73ba4
commit 0e5bf2a8c9
15 changed files with 959 additions and 125 deletions

View File

@@ -3176,7 +3176,7 @@ stop_and_resume_callback (struct lwp_info *lp, void *data)
other lwp. In that case set *NEW_PENDING_P to true. */
static struct lwp_info *
linux_nat_filter_event (int lwpid, int status, int options, int *new_pending_p)
linux_nat_filter_event (int lwpid, int status, int *new_pending_p)
{
struct lwp_info *lp;
@@ -3191,7 +3191,27 @@ linux_nat_filter_event (int lwpid, int status, int options, int *new_pending_p)
fork, vfork, and clone events, then we'll just add the
new one to our list and go back to waiting for the event
to be reported - the stopped process might be returned
from waitpid before or after the event is. */
from waitpid before or after the event is.
But note the case of a non-leader thread exec'ing after the
leader having exited, and gone from our lists. The non-leader
thread changes its tid to the tgid. */
if (WIFSTOPPED (status) && lp == NULL
&& (WSTOPSIG (status) == SIGTRAP && status >> 16 == PTRACE_EVENT_EXEC))
{
/* A multi-thread exec after we had seen the leader exiting. */
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"LLW: Re-adding thread group leader LWP %d.\n",
lwpid);
lp = add_lwp (BUILD_LWP (lwpid, lwpid));
lp->stopped = 1;
lp->resumed = 1;
add_thread (lp->ptid);
}
if (WIFSTOPPED (status) && !lp)
{
add_to_pid_list (&stopped_pids, lwpid, status);
@@ -3205,33 +3225,6 @@ linux_nat_filter_event (int lwpid, int status, int options, int *new_pending_p)
if (!WIFSTOPPED (status) && !lp)
return NULL;
/* NOTE drow/2003-06-17: This code seems to be meant for debugging
CLONE_PTRACE processes which do not use the thread library -
otherwise we wouldn't find the new LWP this way. That doesn't
currently work, and the following code is currently unreachable
due to the two blocks above. If it's fixed some day, this code
should be broken out into a function so that we can also pick up
LWPs from the new interface. */
if (!lp)
{
lp = add_lwp (BUILD_LWP (lwpid, GET_PID (inferior_ptid)));
if (options & __WCLONE)
lp->cloned = 1;
gdb_assert (WIFSTOPPED (status)
&& WSTOPSIG (status) == SIGSTOP);
lp->signalled = 1;
if (!in_thread_list (inferior_ptid))
{
inferior_ptid = BUILD_LWP (GET_PID (inferior_ptid),
GET_PID (inferior_ptid));
add_thread (inferior_ptid);
}
add_thread (lp->ptid);
}
/* Handle GNU/Linux's syscall SIGTRAPs. */
if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP)
{
@@ -3392,6 +3385,71 @@ linux_nat_filter_event (int lwpid, int status, int options, int *new_pending_p)
return lp;
}
/* Detect zombie thread group leaders, and "exit" them. We can't reap
their exits until all other threads in the group have exited. */
static void
check_zombie_leaders (void)
{
struct inferior *inf;
ALL_INFERIORS (inf)
{
struct lwp_info *leader_lp;
if (inf->pid == 0)
continue;
leader_lp = find_lwp_pid (pid_to_ptid (inf->pid));
if (leader_lp != NULL
/* Check if there are other threads in the group, as we may
have raced with the inferior simply exiting. */
&& num_lwps (inf->pid) > 1
&& linux_lwp_is_zombie (inf->pid))
{
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"CZL: Thread group leader %d zombie "
"(it exited, or another thread execd).\n",
inf->pid);
/* A leader zombie can mean one of two things:
- It exited, and there's an exit status pending
available, or only the leader exited (not the whole
program). In the latter case, we can't waitpid the
leader's exit status until all other threads are gone.
- There are 3 or more threads in the group, and a thread
other than the leader exec'd. On an exec, the Linux
kernel destroys all other threads (except the execing
one) in the thread group, and resets the execing thread's
tid to the tgid. No exit notification is sent for the
execing thread -- from the ptracer's perspective, it
appears as though the execing thread just vanishes.
Until we reap all other threads except the leader and the
execing thread, the leader will be zombie, and the
execing thread will be in `D (disc sleep)'. As soon as
all other threads are reaped, the execing thread changes
it's tid to the tgid, and the previous (zombie) leader
vanishes, giving place to the "new" leader. We could try
distinguishing the exit and exec cases, by waiting once
more, and seeing if something comes out, but it doesn't
sound useful. The previous leader _does_ go away, and
we'll re-add the new one once we see the exec event
(which is just the same as what would happen if the
previous leader did exit voluntarily before some other
thread execs). */
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"CZL: Thread group leader %d vanished.\n",
inf->pid);
exit_lwp (leader_lp);
}
}
}
static ptid_t
linux_nat_wait_1 (struct target_ops *ops,
ptid_t ptid, struct target_waitstatus *ourstatus,
@@ -3400,9 +3458,7 @@ linux_nat_wait_1 (struct target_ops *ops,
static sigset_t prev_mask;
enum resume_kind last_resume_kind;
struct lwp_info *lp;
int options;
int status;
pid_t pid;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog, "LLW: enter\n");
@@ -3424,41 +3480,14 @@ linux_nat_wait_1 (struct target_ops *ops,
/* Make sure SIGCHLD is blocked. */
block_child_signals (&prev_mask);
if (ptid_equal (ptid, minus_one_ptid))
pid = -1;
else if (ptid_is_pid (ptid))
/* A request to wait for a specific tgid. This is not possible
with waitpid, so instead, we wait for any child, and leave
children we're not interested in right now with a pending
status to report later. */
pid = -1;
else
pid = GET_LWP (ptid);
retry:
lp = NULL;
status = 0;
options = 0;
/* Make sure that of those LWPs we want to get an event from, there
is at least one LWP that has been resumed. If there's none, just
bail out. The core may just be flushing asynchronously all
events. */
if (iterate_over_lwps (ptid, resumed_callback, NULL) == NULL)
{
ourstatus->kind = TARGET_WAITKIND_IGNORE;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog, "LLW: exit (no resumed LWP)\n");
restore_child_signals_mask (&prev_mask);
return minus_one_ptid;
}
/* First check if there is a LWP with a wait status pending. */
if (pid == -1)
if (ptid_equal (ptid, minus_one_ptid) || ptid_is_pid (ptid))
{
/* Any LWP that's been resumed will do. */
/* Any LWP in the PTID group that's been resumed will do. */
lp = iterate_over_lwps (ptid, status_callback, NULL);
if (lp)
{
@@ -3468,11 +3497,6 @@ retry:
status_to_str (lp->status),
target_pid_to_str (lp->ptid));
}
/* But if we don't find one, we'll have to wait, and check both
cloned and uncloned processes. We start with the cloned
processes. */
options = __WCLONE | WNOHANG;
}
else if (is_lwp (ptid))
{
@@ -3491,12 +3515,6 @@ retry:
status_to_str (lp->status),
target_pid_to_str (lp->ptid));
/* If we have to wait, take into account whether PID is a cloned
process or not. And we have to convert it to something that
the layer beneath us can understand. */
options = lp->cloned ? __WCLONE : 0;
pid = GET_LWP (ptid);
/* We check for lp->waitstatus in addition to lp->status,
because we can have pending process exits recorded in
lp->status and W_EXITCODE(0,0) == 0. We should probably have
@@ -3557,15 +3575,34 @@ retry:
set_sigint_trap ();
}
/* Translate generic target_wait options into waitpid options. */
if (target_options & TARGET_WNOHANG)
options |= WNOHANG;
/* But if we don't find a pending event, we'll have to wait. */
while (lp == NULL)
{
pid_t lwpid;
lwpid = my_waitpid (pid, &status, options);
/* Always use -1 and WNOHANG, due to couple of a kernel/ptrace
quirks:
- If the thread group leader exits while other threads in the
thread group still exist, waitpid(TGID, ...) hangs. That
waitpid won't return an exit status until the other threads
in the group are reapped.
- When a non-leader thread execs, that thread just vanishes
without reporting an exit (so we'd hang if we waited for it
explicitly in that case). The exec event is reported to
the TGID pid. */
errno = 0;
lwpid = my_waitpid (-1, &status, __WCLONE | WNOHANG);
if (lwpid == 0 || (lwpid == -1 && errno == ECHILD))
lwpid = my_waitpid (-1, &status, WNOHANG);
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"LNW: waitpid(-1, ...) returned %d, %s\n",
lwpid, errno ? safe_strerror (errno) : "ERRNO-OK");
if (lwpid > 0)
{
@@ -3573,8 +3610,6 @@ retry:
now have pending events to handle. */
int new_pending;
gdb_assert (pid == -1 || lwpid == pid);
if (debug_linux_nat)
{
fprintf_unfiltered (gdb_stdlog,
@@ -3582,14 +3617,12 @@ retry:
(long) lwpid, status_to_str (status));
}
lp = linux_nat_filter_event (lwpid, status, options, &new_pending);
lp = linux_nat_filter_event (lwpid, status, &new_pending);
/* STATUS is now no longer valid, use LP->STATUS instead. */
status = 0;
if (lp
&& ptid_is_pid (ptid)
&& ptid_get_pid (lp->ptid) != ptid_get_pid (ptid))
if (lp && !ptid_match (lp->ptid, ptid))
{
gdb_assert (lp->resumed);
@@ -3662,69 +3695,65 @@ retry:
store_waitstatus (&lp->waitstatus, lp->status);
}
if (new_pending)
goto retry;
/* Keep looking. */
lp = NULL;
continue;
}
if (new_pending)
{
/* Some LWP now has a pending event. Go all the way
back to check it. */
goto retry;
}
if (lp)
break;
else
{
if (new_pending)
goto retry;
if (pid == -1)
{
/* waitpid did return something. Restart over. */
options |= __WCLONE;
}
continue;
/* We got an event to report to the core. */
break;
}
/* Retry until nothing comes out of waitpid. A single
SIGCHLD can indicate more than one child stopped. */
continue;
}
if (pid == -1)
/* Check for zombie thread group leaders. Those can't be reaped
until all other threads in the thread group are. */
check_zombie_leaders ();
/* If there are no resumed children left, bail. We'd be stuck
forever in the sigsuspend call below otherwise. */
if (iterate_over_lwps (ptid, resumed_callback, NULL) == NULL)
{
/* Alternate between checking cloned and uncloned processes. */
options ^= __WCLONE;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog, "LLW: exit (no resumed LWP)\n");
/* And every time we have checked both:
In async mode, return to event loop;
In sync mode, suspend waiting for a SIGCHLD signal. */
if (options & __WCLONE)
{
if (target_options & TARGET_WNOHANG)
{
/* No interesting event. */
ourstatus->kind = TARGET_WAITKIND_IGNORE;
ourstatus->kind = TARGET_WAITKIND_NO_RESUMED;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog, "LLW: exit (ignore)\n");
if (!target_can_async_p ())
clear_sigint_trap ();
restore_child_signals_mask (&prev_mask);
return minus_one_ptid;
}
sigsuspend (&suspend_mask);
}
restore_child_signals_mask (&prev_mask);
return minus_one_ptid;
}
else if (target_options & TARGET_WNOHANG)
{
/* No interesting event for PID yet. */
ourstatus->kind = TARGET_WAITKIND_IGNORE;
/* No interesting event to report to the core. */
if (target_options & TARGET_WNOHANG)
{
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog, "LLW: exit (ignore)\n");
ourstatus->kind = TARGET_WAITKIND_IGNORE;
restore_child_signals_mask (&prev_mask);
return minus_one_ptid;
}
/* We shouldn't end up here unless we want to try again. */
gdb_assert (lp == NULL);
/* Block until we get an event reported with SIGCHLD. */
sigsuspend (&suspend_mask);
}
if (!target_can_async_p ())
@@ -3813,7 +3842,7 @@ retry:
from among those that have had events. Giving equal priority
to all LWPs that have had events helps prevent
starvation. */
if (pid == -1)
if (ptid_equal (ptid, minus_one_ptid) || ptid_is_pid (ptid))
select_event_lwp (ptid, &lp, &status);
/* Now that we've selected our final event LWP, cancel any