Fix PR 10970, PR 12702.
	* linux-nat.c (linux_lwp_is_zombie): New function.
	(wait_lwp): Initialize status.  New variable prev_mask.  Block signals.
	Check for linux_lwp_is_zombie.  Use WNOHANG and sigsuspend.

gdb/testsuite/
	* gdb.threads/leader-exit.c: New file.
	* gdb.threads/leader-exit.exp: New file.
This commit is contained in:
Jan Kratochvil
2011-05-27 16:55:39 +00:00
parent b0f260d61e
commit 432b4d03ad
5 changed files with 185 additions and 10 deletions

View File

@@ -2356,6 +2356,33 @@ linux_handle_extended_wait (struct lwp_info *lp, int status,
_("unknown ptrace event %d"), event);
}
/* Return non-zero if LWP is a zombie. */
static int
linux_lwp_is_zombie (long lwp)
{
char buffer[MAXPATHLEN];
FILE *procfile;
int retval = 0;
sprintf (buffer, "/proc/%ld/status", lwp);
procfile = fopen (buffer, "r");
if (procfile == NULL)
{
warning (_("unable to open /proc file '%s'"), buffer);
return 0;
}
while (fgets (buffer, sizeof (buffer), procfile) != NULL)
if (strcmp (buffer, "State:\tZ (zombie)\n") == 0)
{
retval = 1;
break;
}
fclose (procfile);
return retval;
}
/* Wait for LP to stop. Returns the wait status, or 0 if the LWP has
exited. */
@@ -2363,28 +2390,76 @@ static int
wait_lwp (struct lwp_info *lp)
{
pid_t pid;
int status;
int status = 0;
int thread_dead = 0;
sigset_t prev_mask;
gdb_assert (!lp->stopped);
gdb_assert (lp->status == 0);
pid = my_waitpid (GET_LWP (lp->ptid), &status, 0);
if (pid == -1 && errno == ECHILD)
/* Make sure SIGCHLD is blocked for sigsuspend avoiding a race below. */
block_child_signals (&prev_mask);
for (;;)
{
pid = my_waitpid (GET_LWP (lp->ptid), &status, __WCLONE);
/* If my_waitpid returns 0 it means the __WCLONE vs. non-__WCLONE kind
was right and we should just call sigsuspend. */
pid = my_waitpid (GET_LWP (lp->ptid), &status, WNOHANG);
if (pid == -1 && errno == ECHILD)
pid = my_waitpid (GET_LWP (lp->ptid), &status, __WCLONE | WNOHANG);
if (pid != 0)
break;
/* Bugs 10970, 12702.
Thread group leader may have exited in which case we'll lock up in
waitpid if there are other threads, even if they are all zombies too.
Basically, we're not supposed to use waitpid this way.
__WCLONE is not applicable for the leader so we can't use that.
LINUX_NAT_THREAD_ALIVE cannot be used here as it requires a STOPPED
process; it gets ESRCH both for the zombie and for running processes.
As a workaround, check if we're waiting for the thread group leader and
if it's a zombie, and avoid calling waitpid if it is.
This is racy, what if the tgl becomes a zombie right after we check?
Therefore always use WNOHANG with sigsuspend - it is equivalent to
waiting waitpid but the linux_lwp_is_zombie is safe this way. */
if (GET_PID (lp->ptid) == GET_LWP (lp->ptid)
&& linux_lwp_is_zombie (GET_LWP (lp->ptid)))
{
/* The thread has previously exited. We need to delete it
now because, for some vendor 2.4 kernels with NPTL
support backported, there won't be an exit event unless
it is the main thread. 2.6 kernels will report an exit
event for each thread that exits, as expected. */
thread_dead = 1;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog, "WL: %s vanished.\n",
fprintf_unfiltered (gdb_stdlog,
"WL: Thread group leader %s vanished.\n",
target_pid_to_str (lp->ptid));
break;
}
/* Wait for next SIGCHLD and try again. This may let SIGCHLD handlers
get invoked despite our caller had them intentionally blocked by
block_child_signals. This is sensitive only to the loop of
linux_nat_wait_1 and there if we get called my_waitpid gets called
again before it gets to sigsuspend so we can safely let the handlers
get executed here. */
sigsuspend (&suspend_mask);
}
restore_child_signals_mask (&prev_mask);
if (pid == -1 && errno == ECHILD)
{
/* The thread has previously exited. We need to delete it
now because, for some vendor 2.4 kernels with NPTL
support backported, there won't be an exit event unless
it is the main thread. 2.6 kernels will report an exit
event for each thread that exits, as expected. */
thread_dead = 1;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog, "WL: %s vanished.\n",
target_pid_to_str (lp->ptid));
}
if (!thread_dead)