diff --git a/gdb/amd-dbgapi-target.c b/gdb/amd-dbgapi-target.c index 40f24b5fc2f..e90628c8183 100644 --- a/gdb/amd-dbgapi-target.c +++ b/gdb/amd-dbgapi-target.c @@ -148,7 +148,7 @@ struct amd_dbgapi_inferior_info }; static amd_dbgapi_event_id_t process_event_queue - (amd_dbgapi_process_id_t process_id = AMD_DBGAPI_PROCESS_NONE, + (amd_dbgapi_process_id_t process_id, amd_dbgapi_event_kind_t until_event_kind = AMD_DBGAPI_EVENT_KIND_NONE); static const target_info amd_dbgapi_target_info = { @@ -1255,8 +1255,10 @@ amd_dbgapi_target::wait (ptid_t ptid, struct target_waitstatus *ws, std::tie (event_ptid, gpu_waitstatus) = consume_one_event (ptid.pid ()); if (event_ptid == minus_one_ptid) { - /* Drain the events from the amd_dbgapi and preserve the ordering. */ - process_event_queue (); + /* Drain the events for the current inferior from the amd_dbgapi and + preserve the ordering. */ + auto info = get_amd_dbgapi_inferior_info (current_inferior ()); + process_event_queue (info->process_id, AMD_DBGAPI_EVENT_KIND_NONE); std::tie (event_ptid, gpu_waitstatus) = consume_one_event (ptid.pid ()); if (event_ptid == minus_one_ptid) diff --git a/gdb/testsuite/gdb.rocm/multi-inferior-gpu.cpp b/gdb/testsuite/gdb.rocm/multi-inferior-gpu.cpp new file mode 100644 index 00000000000..ca869233b58 --- /dev/null +++ b/gdb/testsuite/gdb.rocm/multi-inferior-gpu.cpp @@ -0,0 +1,113 @@ +/* This testcase is part of GDB, the GNU debugger. + + Copyright 2023 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include + +#define CHECK(cmd) \ + { \ + hipError_t error = cmd; \ + if (error != hipSuccess) \ + { \ + fprintf (stderr, "error: '%s'(%d) at %s:%d\n", \ + hipGetErrorString (error), error, __FILE__, __LINE__); \ + exit (EXIT_FAILURE); \ + } \ + } + +__global__ void +kern () +{ + asm ("s_sleep 1"); +} + +/* Spawn one child process per detected GPU. */ + +static int +parent (int argc, char **argv) +{ + /* Identify how many GPUs we have, and spawn one child for each. */ + int num_devices; + CHECK (hipGetDeviceCount (&num_devices)); + + /* Break here. */ + + for (int i = 0; i < num_devices; i++) + { + char n[32] = {}; + snprintf (n, sizeof (n), "%d", i); + pid_t pid = fork (); + if (pid == -1) + { + perror ("Fork failed"); + return -1; + } + + if (pid == 0) + { + /* Exec to force the child to re-initialize the ROCm runtime. */ + if (execl (argv[0], argv[0], n) == -1) + { + perror ("Failed to exec"); + return -1; + } + } + } + + /* Wait for all children. */ + while (true) + { + int ws; + pid_t ret = waitpid (-1, &ws, 0); + if (ret == -1 && errno == ECHILD) + break; + } + + /* Last break here. */ + return 0; +} + +static int +child (int argc, char **argv) +{ + int dev_number; + if (sscanf (argv[1], "%d", &dev_number) != 1) + { + fprintf (stderr, "Invalid argument \"%s\"\n", argv[1]); + return -1; + } + + CHECK (hipSetDevice (dev_number)); + kern<<<1, 1>>> (); + hipDeviceSynchronize (); + return 0; +} + +/* When called with no argument, identify how many AMDGPU devices are + available on the system and spawn one worker process per GPU. If a + command-line argument is provided, it is the index of the GPU to use. */ + +int +main (int argc, char **argv) +{ + if (argc <= 1) + return parent (argc, argv); + else + return child (argc, argv); +} diff --git a/gdb/testsuite/gdb.rocm/multi-inferior-gpu.exp b/gdb/testsuite/gdb.rocm/multi-inferior-gpu.exp new file mode 100644 index 00000000000..18b4172ff09 --- /dev/null +++ b/gdb/testsuite/gdb.rocm/multi-inferior-gpu.exp @@ -0,0 +1,89 @@ +# Copyright 2023 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This test checks that GDB can debug multiple inferior which uses all +# the ROCm runtime. + +load_lib rocm.exp + +standard_testfile .cpp + +require allow_hipcc_tests +require hip_devices_support_debug_multi_process + +if {[build_executable "failed to prepare" $testfile $srcfile {debug hip}]} { + return +} + +proc do_test {} { + clean_restart $::binfile + gdb_test_no_output "set non-stop on" + gdb_test_no_output "set detach-on-fork off" + gdb_test_no_output "set follow-fork parent" + + with_rocm_gpu_lock { + gdb_breakpoint [gdb_get_line_number "Break here"] + gdb_breakpoint kern allow-pending + gdb_breakpoint [gdb_get_line_number "Last break here"] + + # Run until we reach the first breakpoint where we can figure + # out how many children will be spawned. + gdb_test "run" "hit Breakpoint.*" + + set num_children [get_integer_valueof "num_devices" 0] + set bp_to_see $num_children + set stopped_gpu_threads [list] + + gdb_test_multiple "continue -a &" "continue to gpu breakpoints" { + -re "Continuing\.\r\n$::gdb_prompt " { + pass $gdb_test_name + } + } + + gdb_test_multiple "" "wait for gpu stops" { + -re "Thread ($::decimal\.$::decimal)\[^\r\n\]* hit Breakpoint\[^\r\n\]*, kern \(\)\[^\r\n\]*\r\n" { + lappend stopped_gpu_threads $expect_out(1,string) + incr bp_to_see -1 + if {$bp_to_see != 0} { + exp_continue + } else { + pass $gdb_test_name + } + } + } + + # Continue all the GPU kernels so all the children processes can reach exit. + foreach thread $stopped_gpu_threads { + set infnumber [lindex [split $thread .] 0] + gdb_test "thread $thread" "Switching to thread.*" + gdb_test_multiple "continue $thread" "" { + -re "\\\[Inferior $infnumber \[^\n\r\]* exited normally\\]\r\n$::gdb_prompt " { + pass $gdb_test_name + } + } + } + + gdb_test_multiple "" "reach breakpoint in main" { + -re "hit Breakpoint.*parent" { + pass $gdb_test_name + } + } + # Select main inferior + gdb_test "inferior 1" "Switching to inferior 1.*" + gdb_continue_to_end "" "continue -a" 1 + } +} + +do_test