diff --git a/gdb/Makefile.in b/gdb/Makefile.in index cdfbad4deed..9c0a0bff2cd 100644 --- a/gdb/Makefile.in +++ b/gdb/Makefile.in @@ -728,6 +728,7 @@ ALL_64_TARGET_OBS = \ arch/aarch64.o \ arch/aarch64-insn.o \ arch/aarch64-mte-linux.o \ + arch/aarch64-scalable-linux.o \ arch/amd64.o \ arch/riscv.o \ bpf-tdep.o \ @@ -1524,6 +1525,7 @@ HFILES_NO_SRCDIR = \ arch/aarch64.h \ arch/aarch64-insn.h \ arch/aarch64-mte-linux.h \ + arch/aarch64-scalable-linux.h \ arch/arc.h \ arch/arm.h \ arch/i386.h \ @@ -1562,6 +1564,7 @@ HFILES_NO_SRCDIR = \ nat/aarch64-linux-hw-point.h \ nat/aarch64-mte-linux-ptrace.h \ nat/aarch64-scalable-linux-ptrace.h \ + nat/aarch64-scalable-linux-sigcontext.h \ nat/amd64-linux-siginfo.h \ nat/gdb_ptrace.h \ nat/gdb_thread_db.h \ @@ -1627,6 +1630,7 @@ ALLDEPFILES = \ arch/aarch64.c \ arch/aarch64-insn.c \ arch/aarch64-mte-linux.c \ + arch/aarch64-scalable-linux.c \ arch/amd64.c \ arch/arc.c \ arch/arm.c \ diff --git a/gdb/aarch64-linux-nat.c b/gdb/aarch64-linux-nat.c index 267a1ca0e79..d7fcef5a0db 100644 --- a/gdb/aarch64-linux-nat.c +++ b/gdb/aarch64-linux-nat.c @@ -55,6 +55,7 @@ #include "arch/aarch64-mte-linux.h" #include "nat/aarch64-mte-linux-ptrace.h" +#include "arch/aarch64-scalable-linux.h" #include @@ -313,8 +314,11 @@ store_fpregs_to_thread (const struct regcache *regcache) } } -/* Fill GDB's register array with the sve register values - from the current thread. */ +/* Fill GDB's REGCACHE with the valid SVE register values from the thread + associated with REGCACHE. + + This function handles reading data from SVE or SSVE states, depending + on which state is active at the moment. */ static void fetch_sveregs_from_thread (struct regcache *regcache) @@ -323,8 +327,11 @@ fetch_sveregs_from_thread (struct regcache *regcache) aarch64_sve_regs_copy_to_reg_buf (regcache->ptid ().lwp (), regcache); } -/* Store to the current thread the valid sve register - values in the GDB's register array. */ +/* Store the valid SVE register values from GDB's REGCACHE to the thread + associated with REGCACHE. + + This function handles writing data to SVE or SSVE states, depending + on which state is active at the moment. */ static void store_sveregs_to_thread (struct regcache *regcache) @@ -334,6 +341,41 @@ store_sveregs_to_thread (struct regcache *regcache) aarch64_sve_regs_copy_from_reg_buf (regcache->ptid ().lwp (), regcache); } +/* Fill GDB's REGCACHE with the ZA register set contents from the + thread associated with REGCACHE. If there is no active ZA register state, + make the ZA register contents zero. */ + +static void +fetch_za_from_thread (struct regcache *regcache) +{ + aarch64_gdbarch_tdep *tdep + = gdbarch_tdep (regcache->arch ()); + + /* Read ZA state from the thread to the register cache. */ + aarch64_za_regs_copy_to_reg_buf (regcache->ptid ().lwp (), + regcache, + tdep->sme_za_regnum, + tdep->sme_svg_regnum, + tdep->sme_svcr_regnum); +} + +/* Store the NT_ARM_ZA register set contents from GDB's REGCACHE to the thread + associated with REGCACHE. */ + +static void +store_za_to_thread (struct regcache *regcache) +{ + aarch64_gdbarch_tdep *tdep + = gdbarch_tdep (regcache->arch ()); + + /* Write ZA state from the register cache to the thread. */ + aarch64_za_regs_copy_from_reg_buf (regcache->ptid ().lwp (), + regcache, + tdep->sme_za_regnum, + tdep->sme_svg_regnum, + tdep->sme_svcr_regnum); +} + /* Fill GDB's register array with the pointer authentication mask values from the current thread. */ @@ -488,7 +530,10 @@ aarch64_fetch_registers (struct regcache *regcache, int regno) if (regno == -1) { fetch_gregs_from_thread (regcache); - if (tdep->has_sve ()) + + /* We attempt to fetch SVE registers if there is support for either + SVE or SME (due to the SSVE state of SME). */ + if (tdep->has_sve () || tdep->has_sme ()) fetch_sveregs_from_thread (regcache); else fetch_fpregs_from_thread (regcache); @@ -501,12 +546,16 @@ aarch64_fetch_registers (struct regcache *regcache, int regno) if (tdep->has_tls ()) fetch_tlsregs_from_thread (regcache); + + if (tdep->has_sme ()) + fetch_za_from_thread (regcache); } /* General purpose register? */ else if (regno < AARCH64_V0_REGNUM) fetch_gregs_from_thread (regcache); /* SVE register? */ - else if (tdep->has_sve () && regno <= AARCH64_SVE_VG_REGNUM) + else if ((tdep->has_sve () || tdep->has_sme ()) + && regno <= AARCH64_SVE_VG_REGNUM) fetch_sveregs_from_thread (regcache); /* FPSIMD register? */ else if (regno <= AARCH64_FPCR_REGNUM) @@ -516,6 +565,10 @@ aarch64_fetch_registers (struct regcache *regcache, int regno) && (regno == AARCH64_PAUTH_DMASK_REGNUM (tdep->pauth_reg_base) || regno == AARCH64_PAUTH_CMASK_REGNUM (tdep->pauth_reg_base))) fetch_pauth_masks_from_thread (regcache); + /* SME register? */ + else if (tdep->has_sme () && regno >= tdep->sme_reg_base + && regno < tdep->sme_reg_base + 3) + fetch_za_from_thread (regcache); /* MTE register? */ else if (tdep->has_mte () && (regno == tdep->mte_reg_base)) @@ -577,7 +630,10 @@ aarch64_store_registers (struct regcache *regcache, int regno) if (regno == -1) { store_gregs_to_thread (regcache); - if (tdep->has_sve ()) + + /* We attempt to store SVE registers if there is support for either + SVE or SME (due to the SSVE state of SME). */ + if (tdep->has_sve () || tdep->has_sme ()) store_sveregs_to_thread (regcache); else store_fpregs_to_thread (regcache); @@ -587,16 +643,24 @@ aarch64_store_registers (struct regcache *regcache, int regno) if (tdep->has_tls ()) store_tlsregs_to_thread (regcache); + + if (tdep->has_sme ()) + store_za_to_thread (regcache); } /* General purpose register? */ else if (regno < AARCH64_V0_REGNUM) store_gregs_to_thread (regcache); /* SVE register? */ - else if (tdep->has_sve () && regno <= AARCH64_SVE_VG_REGNUM) + else if ((tdep->has_sve () || tdep->has_sme ()) + && regno <= AARCH64_SVE_VG_REGNUM) store_sveregs_to_thread (regcache); /* FPSIMD register? */ else if (regno <= AARCH64_FPCR_REGNUM) store_fpregs_to_thread (regcache); + /* SME register? */ + else if (tdep->has_sme () && regno >= tdep->sme_reg_base + && regno < tdep->sme_reg_base + 3) + store_za_to_thread (regcache); /* MTE register? */ else if (tdep->has_mte () && (regno == tdep->mte_reg_base)) @@ -787,10 +851,15 @@ aarch64_linux_nat_target::read_description () CORE_ADDR hwcap2 = linux_get_hwcap2 (); aarch64_features features; + /* SVE/SSVE check. Reading VQ may return either the regular vector length + or the streaming vector length, depending on whether streaming mode is + active or not. */ features.vq = aarch64_sve_get_vq (tid); features.pauth = hwcap & AARCH64_HWCAP_PACA; features.mte = hwcap2 & HWCAP2_MTE; features.tls = aarch64_tls_register_count (tid); + /* SME feature check. */ + features.svq = aarch64_za_get_svq (tid); return aarch64_read_description (features); } @@ -893,21 +962,24 @@ aarch64_linux_nat_target::thread_architecture (ptid_t ptid) if (gdbarch_bfd_arch_info (inf->gdbarch)->bits_per_word == 32) return inf->gdbarch; - /* Only return it if the current vector length matches the one in the tdep. */ + /* Only return the inferior's gdbarch if both vq and svq match the ones in + the tdep. */ aarch64_gdbarch_tdep *tdep = gdbarch_tdep (inf->gdbarch); uint64_t vq = aarch64_sve_get_vq (ptid.lwp ()); - if (vq == tdep->vq) + uint64_t svq = aarch64_za_get_svq (ptid.lwp ()); + if (vq == tdep->vq && svq == tdep->sme_svq) return inf->gdbarch; - /* We reach here if the vector length for the thread is different from its + /* We reach here if any vector length for the thread is different from its value at process start. Lookup gdbarch via info (potentially creating a - new one) by using a target description that corresponds to the new vq value - and the current architecture features. */ + new one) by using a target description that corresponds to the new vq/svq + value and the current architecture features. */ const struct target_desc *tdesc = gdbarch_target_desc (inf->gdbarch); aarch64_features features = aarch64_features_from_target_desc (tdesc); features.vq = vq; + features.svq = svq; struct gdbarch_info info; info.bfd_arch_info = bfd_lookup_arch (bfd_arch_aarch64, bfd_mach_aarch64); diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c index b1d7da91754..8e7259a6f42 100644 --- a/gdb/aarch64-tdep.c +++ b/gdb/aarch64-tdep.c @@ -57,6 +57,8 @@ /* For inferior_ptid and current_inferior (). */ #include "inferior.h" +/* For std::sqrt and std::pow. */ +#include /* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most four members. */ @@ -190,6 +192,43 @@ struct aarch64_prologue_cache trad_frame_saved_reg *saved_regs; }; +/* Holds information used to read/write from/to ZA + pseudo-registers. + + With this information, the read/write code can be simplified so it + deals only with the required information to map a ZA pseudo-register + to the exact bytes into the ZA contents buffer. Otherwise we'd need + to use a lot of conditionals. */ + +struct za_offsets +{ + /* Offset, into ZA, of the starting byte of the pseudo-register. */ + size_t starting_offset; + /* The size of the contiguous chunks of the pseudo-register. */ + size_t chunk_size; + /* The number of pseudo-register chunks contained in ZA. */ + size_t chunks; + /* The offset between each contiguous chunk. */ + size_t stride_size; +}; + +/* Holds data that is helpful to determine the individual fields that make + up the names of the ZA pseudo-registers. It is also very helpful to + determine offsets, stride and sizes for reading ZA tiles and tile + slices. */ + +struct za_pseudo_encoding +{ + /* The slice index (0 ~ svl). Only used for tile slices. */ + uint8_t slice_index; + /* The tile number (0 ~ 15). */ + uint8_t tile_index; + /* Direction (horizontal/vertical). Only used for tile slices. */ + bool horizontal; + /* Qualifier index (0 ~ 4). These map to B, H, S, D and Q. */ + uint8_t qualifier_index; +}; + static void show_aarch64_debug (struct ui_file *file, int from_tty, struct cmd_list_element *c, const char *value) @@ -2147,6 +2186,214 @@ aarch64_vnb_type (struct gdbarch *gdbarch) return tdep->vnb_type; } +/* Return TRUE if REGNUM is a ZA tile slice pseudo-register number. Return + FALSE otherwise. */ + +static bool +is_sme_tile_slice_pseudo_register (struct gdbarch *gdbarch, int regnum) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_svq > 0); + gdb_assert (tdep->sme_pseudo_base <= regnum); + gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count); + + if (tdep->sme_tile_slice_pseudo_base <= regnum + && regnum < tdep->sme_tile_slice_pseudo_base + + tdep->sme_tile_slice_pseudo_count) + return true; + + return false; +} + +/* Given REGNUM, a ZA pseudo-register number, return, in ENCODING, the + decoded fields that make up its name. */ + +static void +aarch64_za_decode_pseudos (struct gdbarch *gdbarch, int regnum, + struct za_pseudo_encoding &encoding) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_svq > 0); + gdb_assert (tdep->sme_pseudo_base <= regnum); + gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count); + + if (is_sme_tile_slice_pseudo_register (gdbarch, regnum)) + { + /* Calculate the tile slice pseudo-register offset relative to the other + tile slice pseudo-registers. */ + int offset = regnum - tdep->sme_tile_slice_pseudo_base; + + /* Fetch the qualifier. We can have 160 to 2560 possible tile slice + pseudo-registers. Each qualifier (we have 5 of them: B, H, S, D + and Q) covers 32 * svq pseudo-registers, so we divide the offset by + that constant. */ + size_t qualifier = offset / (tdep->sme_svq * 32); + encoding.qualifier_index = qualifier; + + /* Prepare to fetch the direction (d), tile number (t) and slice + number (s). */ + int dts = offset % (tdep->sme_svq * 32); + + /* The direction is represented by the even/odd numbers. Even-numbered + pseudo-registers are horizontal tile slices and odd-numbered + pseudo-registers are vertical tile slices. */ + encoding.horizontal = !(dts & 1); + + /* Fetch the tile number. The tile number is closely related to the + qualifier. B has 1 tile, H has 2 tiles, S has 4 tiles, D has 8 tiles + and Q has 16 tiles. */ + encoding.tile_index = (dts >> 1) & ((1 << qualifier) - 1); + + /* Fetch the slice number. The slice number is closely related to the + qualifier and the svl. */ + encoding.slice_index = dts >> (qualifier + 1); + } + else + { + /* Calculate the tile pseudo-register offset relative to the other + tile pseudo-registers. */ + int offset = regnum - tdep->sme_tile_pseudo_base; + + encoding.qualifier_index = std::floor (std::log2 (offset + 1)); + /* Calculate the tile number. */ + encoding.tile_index = (offset + 1) - (1 << encoding.qualifier_index); + /* Direction and slice index don't get used for tiles. Set them to + 0/false values. */ + encoding.slice_index = 0; + encoding.horizontal = false; + } +} + +/* Return the type for a ZA tile slice pseudo-register based on ENCODING. */ + +static struct type * +aarch64_za_tile_slice_type (struct gdbarch *gdbarch, + const struct za_pseudo_encoding &encoding) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_svq > 0); + + if (tdep->sme_tile_slice_type_q == nullptr) + { + /* Q tile slice type. */ + tdep->sme_tile_slice_type_q + = init_vector_type (builtin_type (gdbarch)->builtin_uint128, + tdep->sme_svq); + /* D tile slice type. */ + tdep->sme_tile_slice_type_d + = init_vector_type (builtin_type (gdbarch)->builtin_uint64, + tdep->sme_svq * 2); + /* S tile slice type. */ + tdep->sme_tile_slice_type_s + = init_vector_type (builtin_type (gdbarch)->builtin_uint32, + tdep->sme_svq * 4); + /* H tile slice type. */ + tdep->sme_tile_slice_type_h + = init_vector_type (builtin_type (gdbarch)->builtin_uint16, + tdep->sme_svq * 8); + /* B tile slice type. */ + tdep->sme_tile_slice_type_b + = init_vector_type (builtin_type (gdbarch)->builtin_uint8, + tdep->sme_svq * 16); + } + + switch (encoding.qualifier_index) + { + case 4: + return tdep->sme_tile_slice_type_q; + case 3: + return tdep->sme_tile_slice_type_d; + case 2: + return tdep->sme_tile_slice_type_s; + case 1: + return tdep->sme_tile_slice_type_h; + case 0: + return tdep->sme_tile_slice_type_b; + default: + error (_("Invalid qualifier index %s for tile slice pseudo register."), + pulongest (encoding.qualifier_index)); + } + + gdb_assert_not_reached ("Unknown qualifier for ZA tile slice register"); +} + +/* Return the type for a ZA tile pseudo-register based on ENCODING. */ + +static struct type * +aarch64_za_tile_type (struct gdbarch *gdbarch, + const struct za_pseudo_encoding &encoding) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_svq > 0); + + if (tdep->sme_tile_type_q == nullptr) + { + struct type *inner_vectors_type; + + /* Q tile type. */ + inner_vectors_type + = init_vector_type (builtin_type (gdbarch)->builtin_uint128, + tdep->sme_svq); + tdep->sme_tile_type_q + = init_vector_type (inner_vectors_type, tdep->sme_svq); + + /* D tile type. */ + inner_vectors_type + = init_vector_type (builtin_type (gdbarch)->builtin_uint64, + tdep->sme_svq * 2); + tdep->sme_tile_type_d + = init_vector_type (inner_vectors_type, tdep->sme_svq * 2); + + /* S tile type. */ + inner_vectors_type + = init_vector_type (builtin_type (gdbarch)->builtin_uint32, + tdep->sme_svq * 4); + tdep->sme_tile_type_s + = init_vector_type (inner_vectors_type, tdep->sme_svq * 4); + + /* H tile type. */ + inner_vectors_type + = init_vector_type (builtin_type (gdbarch)->builtin_uint16, + tdep->sme_svq * 8); + tdep->sme_tile_type_h + = init_vector_type (inner_vectors_type, tdep->sme_svq * 8); + + /* B tile type. */ + inner_vectors_type + = init_vector_type (builtin_type (gdbarch)->builtin_uint8, + tdep->sme_svq * 16); + tdep->sme_tile_type_b + = init_vector_type (inner_vectors_type, tdep->sme_svq * 16); + } + + switch (encoding.qualifier_index) + { + case 4: + return tdep->sme_tile_type_q; + case 3: + return tdep->sme_tile_type_d; + case 2: + return tdep->sme_tile_type_s; + case 1: + return tdep->sme_tile_type_h; + case 0: + return tdep->sme_tile_type_b; + default: + error (_("Invalid qualifier index %s for ZA tile pseudo register."), + pulongest (encoding.qualifier_index)); + } + + gdb_assert_not_reached ("unknown qualifier for tile pseudo-register"); +} + /* Return the type for an AdvSISD V register. */ static struct type * @@ -2579,6 +2826,73 @@ is_w_pseudo_register (struct gdbarch *gdbarch, int regnum) return false; } +/* Return TRUE if REGNUM is a SME pseudo-register number. Return FALSE + otherwise. */ + +static bool +is_sme_pseudo_register (struct gdbarch *gdbarch, int regnum) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + if (tdep->has_sme () && tdep->sme_pseudo_base <= regnum + && regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count) + return true; + + return false; +} + +/* Convert ENCODING into a ZA tile slice name. */ + +static const std::string +aarch64_za_tile_slice_name (const struct za_pseudo_encoding &encoding) +{ + gdb_assert (encoding.qualifier_index >= 0); + gdb_assert (encoding.qualifier_index <= 4); + gdb_assert (encoding.tile_index >= 0); + gdb_assert (encoding.tile_index <= 15); + gdb_assert (encoding.slice_index >= 0); + gdb_assert (encoding.slice_index <= 255); + + const char orientation = encoding.horizontal ? 'h' : 'v'; + + const char qualifiers[6] = "bhsdq"; + const char qualifier = qualifiers [encoding.qualifier_index]; + return string_printf ("za%d%c%c%d", encoding.tile_index, orientation, + qualifier, encoding.slice_index); +} + +/* Convert ENCODING into a ZA tile name. */ + +static const std::string +aarch64_za_tile_name (const struct za_pseudo_encoding &encoding) +{ + /* Tiles don't use the slice number and the direction fields. */ + gdb_assert (encoding.qualifier_index >= 0); + gdb_assert (encoding.qualifier_index <= 4); + gdb_assert (encoding.tile_index >= 0); + gdb_assert (encoding.tile_index <= 15); + + const char qualifiers[6] = "bhsdq"; + const char qualifier = qualifiers [encoding.qualifier_index]; + return (string_printf ("za%d%c", encoding.tile_index, qualifier)); +} + +/* Given a SME pseudo-register REGNUM, return its type. */ + +static struct type * +aarch64_sme_pseudo_register_type (struct gdbarch *gdbarch, int regnum) +{ + struct za_pseudo_encoding encoding; + + /* Decode the SME pseudo-register number. */ + aarch64_za_decode_pseudos (gdbarch, regnum, encoding); + + if (is_sme_tile_slice_pseudo_register (gdbarch, regnum)) + return aarch64_za_tile_slice_type (gdbarch, encoding); + else + return aarch64_za_tile_type (gdbarch, encoding); +} + /* Return the pseudo register name corresponding to register regnum. */ static const char * @@ -2699,6 +3013,9 @@ aarch64_pseudo_register_name (struct gdbarch *gdbarch, int regnum) return sve_v_name[p_regnum - AARCH64_SVE_V0_REGNUM]; } + if (is_sme_pseudo_register (gdbarch, regnum)) + return tdep->sme_pseudo_names[regnum - tdep->sme_pseudo_base].c_str (); + /* RA_STATE is used for unwinding only. Do not assign it a name - this prevents it from being read by methods such as mi_cmd_trace_frame_collected. */ @@ -2741,6 +3058,9 @@ aarch64_pseudo_register_type (struct gdbarch *gdbarch, int regnum) if (is_w_pseudo_register (gdbarch, regnum)) return builtin_type (gdbarch)->builtin_uint32; + if (is_sme_pseudo_register (gdbarch, regnum)) + return aarch64_sme_pseudo_register_type (gdbarch, regnum); + if (tdep->has_pauth () && regnum == tdep->ra_sign_state_regnum) return builtin_type (gdbarch)->builtin_uint64; @@ -2773,6 +3093,8 @@ aarch64_pseudo_register_reggroup_p (struct gdbarch *gdbarch, int regnum, else if (tdep->has_sve () && p_regnum >= AARCH64_SVE_V0_REGNUM && p_regnum < AARCH64_SVE_V0_REGNUM + AARCH64_V_REGS_NUM) return group == all_reggroup || group == vector_reggroup; + else if (is_sme_pseudo_register (gdbarch, regnum)) + return group == all_reggroup || group == vector_reggroup; /* RA_STATE is used for unwinding only. Do not assign it to any groups. */ if (tdep->has_pauth () && regnum == tdep->ra_sign_state_regnum) return 0; @@ -2802,6 +3124,122 @@ aarch64_pseudo_read_value_1 (struct gdbarch *gdbarch, return result_value; } +/* Helper function for reading/writing ZA pseudo-registers. Given REGNUM, + a ZA pseudo-register number, return, in OFFSETS, the information on positioning + of the bytes that must be read from/written to. */ + +static void +aarch64_za_offsets_from_regnum (struct gdbarch *gdbarch, int regnum, + struct za_offsets &offsets) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_svq > 0); + gdb_assert (tdep->sme_pseudo_base <= regnum); + gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count); + + struct za_pseudo_encoding encoding; + + /* Decode the ZA pseudo-register number. */ + aarch64_za_decode_pseudos (gdbarch, regnum, encoding); + + /* Fetch the streaming vector length. */ + size_t svl = sve_vl_from_vq (tdep->sme_svq); + + if (is_sme_tile_slice_pseudo_register (gdbarch, regnum)) + { + if (encoding.horizontal) + { + /* Horizontal tile slices are contiguous ranges of svl bytes. */ + + /* The starting offset depends on the tile index (to locate the tile + in the ZA buffer), the slice index (to locate the slice within the + tile) and the qualifier. */ + offsets.starting_offset + = encoding.tile_index * svl + encoding.slice_index + * (svl >> encoding.qualifier_index); + /* Horizontal tile slice data is contiguous and thus doesn't have + a stride. */ + offsets.stride_size = 0; + /* Horizontal tile slice data is contiguous and thus only has 1 + chunk. */ + offsets.chunks = 1; + /* The chunk size is always svl bytes. */ + offsets.chunk_size = svl; + } + else + { + /* Vertical tile slices are non-contiguous ranges of + (1 << qualifier_index) bytes. */ + + /* The starting offset depends on the tile number (to locate the + tile in the ZA buffer), the slice index (to locate the element + within the tile slice) and the qualifier. */ + offsets.starting_offset + = encoding.tile_index * svl + encoding.slice_index + * (1 << encoding.qualifier_index); + /* The offset between vertical tile slices depends on the qualifier + and svl. */ + offsets.stride_size = svl << encoding.qualifier_index; + /* The number of chunks depends on svl and the qualifier size. */ + offsets.chunks = svl >> encoding.qualifier_index; + /* The chunk size depends on the qualifier. */ + offsets.chunk_size = 1 << encoding.qualifier_index; + } + } + else + { + /* ZA tile pseudo-register. */ + + /* Starting offset depends on the tile index and qualifier. */ + offsets.starting_offset = encoding.tile_index * svl; + /* The offset between tile slices depends on the qualifier and svl. */ + offsets.stride_size = svl << encoding.qualifier_index; + /* The number of chunks depends on the qualifier and svl. */ + offsets.chunks = svl >> encoding.qualifier_index; + /* The chunk size is always svl bytes. */ + offsets.chunk_size = svl; + } +} + +/* Given REGNUM, a SME pseudo-register number, return its value in RESULT. */ + +static struct value * +aarch64_sme_pseudo_register_read (struct gdbarch *gdbarch, + readable_regcache *regcache, int regnum, + struct value *result) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_svq > 0); + gdb_assert (tdep->sme_pseudo_base <= regnum); + gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count); + + /* Fetch the offsets that we need in order to read from the correct blocks + of ZA. */ + struct za_offsets offsets; + aarch64_za_offsets_from_regnum (gdbarch, regnum, offsets); + + /* Fetch the contents of ZA. */ + size_t svl = sve_vl_from_vq (tdep->sme_svq); + gdb::byte_vector za (std::pow (svl, 2)); + regcache->raw_read (tdep->sme_za_regnum, za.data ()); + + /* Copy the requested data. */ + for (int chunks = 0; chunks < offsets.chunks; chunks++) + { + const gdb_byte *source + = za.data () + offsets.starting_offset + chunks * offsets.stride_size; + gdb_byte *destination + = result->contents_raw ().data () + chunks * offsets.chunk_size; + + memcpy (destination, source, offsets.chunk_size); + } + return result; +} + /* Implement the "pseudo_register_read_value" gdbarch method. */ static struct value * @@ -2835,6 +3273,9 @@ aarch64_pseudo_read_value (struct gdbarch *gdbarch, readable_regcache *regcache, return result_value; } + else if (is_sme_pseudo_register (gdbarch, regnum)) + return aarch64_sme_pseudo_register_read (gdbarch, regcache, regnum, + result_value); regnum -= gdbarch_num_regs (gdbarch); @@ -2894,6 +3335,44 @@ aarch64_pseudo_write_1 (struct gdbarch *gdbarch, struct regcache *regcache, regcache->raw_write (v_regnum, reg_buf); } +/* Given REGNUM, a SME pseudo-register number, store the bytes from DATA to the + pseudo-register. */ + +static void +aarch64_sme_pseudo_register_write (struct gdbarch *gdbarch, + struct regcache *regcache, + int regnum, const gdb_byte *data) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_svq > 0); + gdb_assert (tdep->sme_pseudo_base <= regnum); + gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count); + + /* Fetch the offsets that we need in order to write to the correct blocks + of ZA. */ + struct za_offsets offsets; + aarch64_za_offsets_from_regnum (gdbarch, regnum, offsets); + + /* Fetch the contents of ZA. */ + size_t svl = sve_vl_from_vq (tdep->sme_svq); + gdb::byte_vector za (std::pow (svl, 2)); + + /* Copy the requested data. */ + for (int chunks = 0; chunks < offsets.chunks; chunks++) + { + const gdb_byte *source = data + chunks * offsets.chunk_size; + gdb_byte *destination + = za.data () + offsets.starting_offset + chunks * offsets.stride_size; + + memcpy (destination, source, offsets.chunk_size); + } + + /* Write back to ZA. */ + regcache->raw_write (tdep->sme_za_regnum, za.data ()); +} + /* Implement the "pseudo_register_write" gdbarch method. */ static void @@ -2921,6 +3400,11 @@ aarch64_pseudo_write (struct gdbarch *gdbarch, struct regcache *regcache, regcache->raw_write_part (x_regnum, offset, 4, buf); return; } + else if (is_sme_pseudo_register (gdbarch, regnum)) + { + aarch64_sme_pseudo_register_write (gdbarch, regcache, regnum, buf); + return; + } regnum -= gdbarch_num_regs (gdbarch); @@ -3500,6 +3984,33 @@ aarch64_get_tdesc_vq (const struct target_desc *tdesc) return sve_vq_from_vl (vl); } + +/* Return the svq (streaming vector quotient) used when creating the target + description TDESC. */ + +static uint64_t +aarch64_get_tdesc_svq (const struct target_desc *tdesc) +{ + const struct tdesc_feature *feature_sme; + + if (!tdesc_has_registers (tdesc)) + return 0; + + feature_sme = tdesc_find_feature (tdesc, "org.gnu.gdb.aarch64.sme"); + + if (feature_sme == nullptr) + return 0; + + size_t svl_squared = tdesc_register_bitsize (feature_sme, "za"); + + /* We have the total size of the ZA matrix, in bits. Figure out the svl + value. */ + size_t svl = std::sqrt (svl_squared / 8); + + /* Now extract svq. */ + return sve_vq_from_vl (svl); +} + /* Get the AArch64 features present in the given target description. */ aarch64_features @@ -3535,6 +4046,8 @@ aarch64_features_from_target_desc (const struct target_desc *tdesc) features.tls = 1; } + features.svq = aarch64_get_tdesc_svq (tdesc); + return features; } @@ -3654,6 +4167,35 @@ aarch64_remove_non_address_bits (struct gdbarch *gdbarch, CORE_ADDR pointer) return aarch64_remove_top_bits (pointer, mask); } +/* Given NAMES, a vector of strings, initialize it with all the SME + pseudo-register names for the current streaming vector length. */ + +static void +aarch64_initialize_sme_pseudo_names (struct gdbarch *gdbarch, + std::vector &names) +{ + aarch64_gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + gdb_assert (tdep->has_sme ()); + gdb_assert (tdep->sme_tile_slice_pseudo_base > 0); + gdb_assert (tdep->sme_tile_pseudo_base > 0); + + for (int i = 0; i < tdep->sme_tile_slice_pseudo_count; i++) + { + int regnum = tdep->sme_tile_slice_pseudo_base + i; + struct za_pseudo_encoding encoding; + aarch64_za_decode_pseudos (gdbarch, regnum, encoding); + names.push_back (aarch64_za_tile_slice_name (encoding)); + } + for (int i = 0; i < AARCH64_ZA_TILES_NUM; i++) + { + int regnum = tdep->sme_tile_pseudo_base + i; + struct za_pseudo_encoding encoding; + aarch64_za_decode_pseudos (gdbarch, regnum, encoding); + names.push_back (aarch64_za_tile_name (encoding)); + } +} + /* Initialize the current architecture based on INFO. If possible, re-use an architecture from ARCHES, which is a list of architectures already created during this debugging session. @@ -3671,11 +4213,17 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches) int first_pauth_regnum = -1, ra_sign_state_offset = -1; int first_mte_regnum = -1, first_tls_regnum = -1; uint64_t vq = aarch64_get_tdesc_vq (info.target_desc); + uint64_t svq = aarch64_get_tdesc_svq (info.target_desc); if (vq > AARCH64_MAX_SVE_VQ) internal_error (_("VQ out of bounds: %s (max %d)"), pulongest (vq), AARCH64_MAX_SVE_VQ); + if (svq > AARCH64_MAX_SVE_VQ) + internal_error (_("Streaming vector quotient (svq) out of bounds: %s" + " (max %d)"), + pulongest (svq), AARCH64_MAX_SVE_VQ); + /* If there is already a candidate, use it. */ for (gdbarch_list *best_arch = gdbarch_list_lookup_by_info (arches, &info); best_arch != nullptr; @@ -3683,15 +4231,21 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches) { aarch64_gdbarch_tdep *tdep = gdbarch_tdep (best_arch->gdbarch); - if (tdep && tdep->vq == vq) + if (tdep && tdep->vq == vq && tdep->sme_svq == svq) return best_arch->gdbarch; } /* Ensure we always have a target descriptor, and that it is for the given VQ value. */ const struct target_desc *tdesc = info.target_desc; - if (!tdesc_has_registers (tdesc)) - tdesc = aarch64_read_description ({}); + if (!tdesc_has_registers (tdesc) || vq != aarch64_get_tdesc_vq (tdesc) + || svq != aarch64_get_tdesc_svq (tdesc)) + { + aarch64_features features; + features.vq = vq; + features.svq = svq; + tdesc = aarch64_read_description (features); + } gdb_assert (tdesc); feature_core = tdesc_find_feature (tdesc,"org.gnu.gdb.aarch64.core"); @@ -3755,6 +4309,35 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches) num_pseudo_regs += 32; /* add the Bn scalar register pseudos */ } + int first_sme_regnum = -1; + int first_sme_pseudo_regnum = -1; + const struct tdesc_feature *feature_sme + = tdesc_find_feature (tdesc, "org.gnu.gdb.aarch64.sme"); + if (feature_sme != nullptr) + { + /* Record the first SME register. */ + first_sme_regnum = num_regs; + + valid_p &= tdesc_numbered_register (feature_sme, tdesc_data.get (), + num_regs++, "svg"); + + valid_p &= tdesc_numbered_register (feature_sme, tdesc_data.get (), + num_regs++, "svcr"); + + valid_p &= tdesc_numbered_register (feature_sme, tdesc_data.get (), + num_regs++, "za"); + + /* Record the first SME pseudo register. */ + first_sme_pseudo_regnum = num_pseudo_regs; + + /* Add the ZA tile slice pseudo registers. The number of tile slice + pseudo-registers depend on the svl, and is always a multiple of 5. */ + num_pseudo_regs += (svq << 5) * 5; + + /* Add the ZA tile pseudo registers. */ + num_pseudo_regs += AARCH64_ZA_TILES_NUM; + } + /* Add the TLS register. */ int tls_register_count = 0; if (feature_tls != nullptr) @@ -3868,6 +4451,14 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches) tdep->tls_regnum_base = first_tls_regnum; tdep->tls_register_count = tls_register_count; + /* Set the SME register set details. The pseudo-registers will be adjusted + later. */ + tdep->sme_reg_base = first_sme_regnum; + tdep->sme_svg_regnum = first_sme_regnum; + tdep->sme_svcr_regnum = first_sme_regnum + 1; + tdep->sme_za_regnum = first_sme_regnum + 2; + tdep->sme_svq = svq; + set_gdbarch_push_dummy_call (gdbarch, aarch64_push_dummy_call); set_gdbarch_frame_align (gdbarch, aarch64_frame_align); @@ -3984,6 +4575,86 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches) set_gdbarch_remove_non_address_bits (gdbarch, aarch64_remove_non_address_bits); + /* SME pseudo-registers. */ + if (tdep->has_sme ()) + { + tdep->sme_pseudo_base = num_regs + first_sme_pseudo_regnum; + tdep->sme_tile_slice_pseudo_base = tdep->sme_pseudo_base; + tdep->sme_tile_slice_pseudo_count = (svq * 32) * 5; + tdep->sme_tile_pseudo_base + = tdep->sme_pseudo_base + tdep->sme_tile_slice_pseudo_count; + tdep->sme_pseudo_count + = tdep->sme_tile_slice_pseudo_count + AARCH64_ZA_TILES_NUM; + + /* The SME ZA pseudo-registers are a set of 160 to 2560 pseudo-registers + depending on the value of svl. + + The tile pseudo-registers are organized around their qualifiers + (b, h, s, d and q). Their numbers are distributed as follows: + + b 0 + h 1~2 + s 3~6 + d 7~14 + q 15~30 + + The naming of the tile pseudo-registers follows the pattern za, + where: + + is the tile number, with the following possible values based on + the qualifiers: + + Qualifier - Allocated indexes + + b - 0 + h - 0~1 + s - 0~3 + d - 0~7 + q - 0~15 + + is the qualifier: b, h, s, d and q. + + The tile slice pseudo-registers are organized around their + qualifiers as well (b, h, s, d and q), but also around their + direction (h - horizontal and v - vertical). + + Even-numbered tile slice pseudo-registers are horizontally-oriented + and odd-numbered tile slice pseudo-registers are vertically-oriented. + + Their numbers are distributed as follows: + + Qualifier - Allocated indexes + + b tile slices - 0~511 + h tile slices - 512~1023 + s tile slices - 1024~1535 + d tile slices - 1536~2047 + q tile slices - 2048~2559 + + The naming of the tile slice pseudo-registers follows the pattern + za, where: + + is the tile number as described for the tile pseudo-registers. + is the direction of the tile slice (h or v) + is the qualifier of the tile slice (b, h, s, d or q) + is the slice number, defined as follows: + + Qualifier - Allocated indexes + + b - 0~15 + h - 0~7 + s - 0~3 + d - 0~1 + q - 0 + + We have helper functions to translate to/from register index from/to + the set of fields that make the pseudo-register names. */ + + /* Build the array of pseudo-register names available for this + particular gdbarch configuration. */ + aarch64_initialize_sme_pseudo_names (gdbarch, tdep->sme_pseudo_names); + } + /* Add standard register aliases. */ for (i = 0; i < ARRAY_SIZE (aarch64_register_aliases); i++) user_reg_add (gdbarch, aarch64_register_aliases[i].name, @@ -4005,6 +4676,48 @@ aarch64_dump_tdep (struct gdbarch *gdbarch, struct ui_file *file) gdb_printf (file, _("aarch64_dump_tdep: Lowest pc = 0x%s\n"), paddress (gdbarch, tdep->lowest_pc)); + + /* SME fields. */ + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_q = %s\n"), + host_address_to_string (tdep->sme_tile_type_q)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_d = %s\n"), + host_address_to_string (tdep->sme_tile_type_d)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_s = %s\n"), + host_address_to_string (tdep->sme_tile_type_s)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_h = %s\n"), + host_address_to_string (tdep->sme_tile_type_h)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_n = %s\n"), + host_address_to_string (tdep->sme_tile_type_b)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_q = %s\n"), + host_address_to_string (tdep->sme_tile_slice_type_q)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_d = %s\n"), + host_address_to_string (tdep->sme_tile_slice_type_d)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_s = %s\n"), + host_address_to_string (tdep->sme_tile_slice_type_s)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_h = %s\n"), + host_address_to_string (tdep->sme_tile_slice_type_h)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_b = %s\n"), + host_address_to_string (tdep->sme_tile_slice_type_b)); + gdb_printf (file, _("aarch64_dump_tdep: sme_reg_base = %s\n"), + pulongest (tdep->sme_reg_base)); + gdb_printf (file, _("aarch64_dump_tdep: sme_svg_regnum = %s\n"), + pulongest (tdep->sme_svg_regnum)); + gdb_printf (file, _("aarch64_dump_tdep: sme_svcr_regnum = %s\n"), + pulongest (tdep->sme_svcr_regnum)); + gdb_printf (file, _("aarch64_dump_tdep: sme_za_regnum = %s\n"), + pulongest (tdep->sme_za_regnum)); + gdb_printf (file, _("aarch64_dump_tdep: sme_pseudo_base = %s\n"), + pulongest (tdep->sme_pseudo_base)); + gdb_printf (file, _("aarch64_dump_tdep: sme_pseudo_count = %s\n"), + pulongest (tdep->sme_pseudo_count)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_pseudo_base = %s\n"), + pulongest (tdep->sme_tile_slice_pseudo_base)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_pseudo_count = %s\n"), + pulongest (tdep->sme_tile_slice_pseudo_count)); + gdb_printf (file, _("aarch64_dump_tdep: sme_tile_pseudo_base = %s\n"), + pulongest (tdep->sme_tile_pseudo_base)); + gdb_printf (file, _("aarch64_dump_tdep: sme_svq = %s\n"), + pulongest (tdep->sme_svq)); } #if GDB_SELF_TEST diff --git a/gdb/aarch64-tdep.h b/gdb/aarch64-tdep.h index 505e050ba48..9297487a584 100644 --- a/gdb/aarch64-tdep.h +++ b/gdb/aarch64-tdep.h @@ -80,6 +80,22 @@ struct aarch64_gdbarch_tdep : gdbarch_tdep_base struct type *vnb_type = nullptr; struct type *vnv_type = nullptr; + /* Types for SME ZA tiles and tile slices pseudo-registers. */ + struct type *sme_tile_type_q = nullptr; + struct type *sme_tile_type_d = nullptr; + struct type *sme_tile_type_s = nullptr; + struct type *sme_tile_type_h = nullptr; + struct type *sme_tile_type_b = nullptr; + struct type *sme_tile_slice_type_q = nullptr; + struct type *sme_tile_slice_type_d = nullptr; + struct type *sme_tile_slice_type_s = nullptr; + struct type *sme_tile_slice_type_h = nullptr; + struct type *sme_tile_slice_type_b = nullptr; + + /* Vector of names for SME pseudo-registers. The number of elements is + different for each distinct svl value. */ + std::vector sme_pseudo_names; + /* syscall record. */ int (*aarch64_syscall_record) (struct regcache *regcache, unsigned long svc_number) = nullptr; @@ -125,6 +141,37 @@ struct aarch64_gdbarch_tdep : gdbarch_tdep_base /* The W pseudo-registers. */ int w_pseudo_base = 0; int w_pseudo_count = 0; + + /* SME feature fields. */ + + /* Index of the first SME register. This is -1 if SME is not supported. */ + int sme_reg_base = 0; + /* svg register index. */ + int sme_svg_regnum = 0; + /* svcr register index. */ + int sme_svcr_regnum = 0; + /* ZA register index. */ + int sme_za_regnum = 0; + /* Index of the first SME pseudo-register. This is -1 if SME is not + supported. */ + int sme_pseudo_base = 0; + /* Total number of SME pseudo-registers. */ + int sme_pseudo_count = 0; + /* First tile slice pseudo-register index. */ + int sme_tile_slice_pseudo_base = 0; + /* Total number of tile slice pseudo-registers. */ + int sme_tile_slice_pseudo_count = 0; + /* First tile pseudo-register index. */ + int sme_tile_pseudo_base = 0; + /* The streaming vector quotient (svq) for SME, or zero if SME is not + supported. */ + size_t sme_svq = 0; + + /* Return true if the target supports SME, and false otherwise. */ + bool has_sme () const + { + return sme_svq != 0; + } }; const target_desc *aarch64_read_description (const aarch64_features &features); diff --git a/gdb/arch/aarch64-scalable-linux.c b/gdb/arch/aarch64-scalable-linux.c new file mode 100644 index 00000000000..3803acfd9a8 --- /dev/null +++ b/gdb/arch/aarch64-scalable-linux.c @@ -0,0 +1,21 @@ +/* Common Linux arch-specific functionality for AArch64 scalable + extensions: SVE and SME. + + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "arch/aarch64-scalable-linux.h" diff --git a/gdb/arch/aarch64-scalable-linux.h b/gdb/arch/aarch64-scalable-linux.h new file mode 100644 index 00000000000..df1741004ed --- /dev/null +++ b/gdb/arch/aarch64-scalable-linux.h @@ -0,0 +1,38 @@ +/* Common AArch64 Linux arch-specific definitions for the scalable + extensions: SVE and SME. + + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef ARCH_AARCH64_SCALABLE_LINUX_H +#define ARCH_AARCH64_SCALABLE_LINUX_H + +#include "gdbsupport/common-defs.h" + +/* Feature check for Scalable Matrix Extension. */ +#ifndef HWCAP2_SME +#define HWCAP2_SME (1 << 23) +#endif + +/* Streaming mode enabled/disabled bit. */ +#define SVCR_SM_BIT (1 << 0) +/* ZA enabled/disabled bit. */ +#define SVCR_ZA_BIT (1 << 1) +/* Mask including all valid SVCR bits. */ +#define SVCR_BIT_MASK (SVCR_SM_BIT | SVCR_ZA_BIT) + +#endif /* ARCH_AARCH64_SCALABLE_LINUX_H */ diff --git a/gdb/arch/aarch64.c b/gdb/arch/aarch64.c index 8644b9afcef..e1f4948aa25 100644 --- a/gdb/arch/aarch64.c +++ b/gdb/arch/aarch64.c @@ -24,6 +24,7 @@ #include "../features/aarch64-sve.c" #include "../features/aarch64-pauth.c" #include "../features/aarch64-mte.c" +#include "../features/aarch64-sme.c" #include "../features/aarch64-tls.c" /* See arch/aarch64.h. */ @@ -57,6 +58,10 @@ aarch64_create_target_description (const aarch64_features &features) if (features.tls > 0) regnum = create_feature_aarch64_tls (tdesc.get (), regnum, features.tls); + if (features.svq) + regnum = create_feature_aarch64_sme (tdesc.get (), regnum, + sve_vl_from_vq (features.svq)); + return tdesc.release (); } diff --git a/gdb/arch/aarch64.h b/gdb/arch/aarch64.h index 4b3f1b8eff8..c1cd233c51e 100644 --- a/gdb/arch/aarch64.h +++ b/gdb/arch/aarch64.h @@ -27,15 +27,27 @@ struct aarch64_features { /* A non zero VQ value indicates both the presence of SVE and the - Vector Quotient - the number of 128bit chunks in an SVE Z - register. */ - uint64_t vq = 0; + Vector Quotient - the number of 128-bit chunks in an SVE Z + register. + The maximum value for VQ is 16 (5 bits). */ + uint64_t vq = 0; bool pauth = false; bool mte = false; /* A positive TLS value indicates the number of TLS registers available. */ uint8_t tls = 0; + /* The allowed values for SVQ are the following: + + 0 - SME is not supported/available. + 1 - SME is available, SVL is 16 bytes / 128-bit. + 2 - SME is available, SVL is 32 bytes / 256-bit. + 4 - SME is available, SVL is 64 bytes / 512-bit. + 8 - SME is available, SVL is 128 bytes / 1024-bit. + 16 - SME is available, SVL is 256 bytes / 2048-bit. + + These use at most 5 bits to represent. */ + uint8_t svq = 0; }; inline bool operator==(const aarch64_features &lhs, const aarch64_features &rhs) @@ -43,7 +55,8 @@ inline bool operator==(const aarch64_features &lhs, const aarch64_features &rhs) return lhs.vq == rhs.vq && lhs.pauth == rhs.pauth && lhs.mte == rhs.mte - && lhs.tls == rhs.tls; + && lhs.tls == rhs.tls + && lhs.svq == rhs.svq; } namespace std @@ -61,6 +74,11 @@ namespace std /* Shift by two bits for now. We may need to increase this in the future if more TLS registers get added. */ h = h << 2 | features.tls; + + /* Make sure the SVQ values are within the limits. */ + gdb_assert (features.svq >= 0); + gdb_assert (features.svq <= 16); + h = h << 5 | (features.svq & 0x5); return h; } }; @@ -171,4 +189,35 @@ enum aarch64_regnum /* Maximum supported VQ value. Increase if required. */ #define AARCH64_MAX_SVE_VQ 16 +/* SME definitions + + Some of these definitions are not found in the Architecture Reference + Manual, but we use them so we can keep a similar standard compared to the + SVE definitions that the Linux Kernel uses. Otherwise it can get + confusing. + + SVL : Streaming Vector Length. + Although the documentation handles SVL in bits, we do it in + bytes to match what we do for SVE. + + The streaming vector length dictates the size of the ZA register and + the size of the SVE registers when in streaming mode. + + SVQ : Streaming Vector Quotient. + The number of 128-bit chunks in an SVE Z register or the size of + each dimension of the SME ZA matrix. + + SVG : Streaming Vector Granule. + The number of 64-bit chunks in an SVE Z register or the size of + half a SME ZA matrix dimension. The SVG definition was added so + we keep a familiar definition when dealing with SVE registers in + streaming mode. */ + +/* The total number of tiles. This is always fixed regardless of the + streaming vector length (svl). */ +#define AARCH64_ZA_TILES_NUM 31 +/* svl limits for SME. */ +#define AARCH64_SME_MIN_SVL 128 +#define AARCH64_SME_MAX_SVL 2048 + #endif /* ARCH_AARCH64_H */ diff --git a/gdb/configure.tgt b/gdb/configure.tgt index d5b7dd1e7d7..47a674201f9 100644 --- a/gdb/configure.tgt +++ b/gdb/configure.tgt @@ -146,6 +146,7 @@ aarch64*-*-linux*) # Target: AArch64 linux gdb_target_obs="aarch64-linux-tdep.o arch/aarch64.o\ arch/aarch64-mte-linux.o \ + arch/aarch64-scalable-linux.o \ arch/arm.o arch/arm-linux.o arch/arm-get-next-pcs.o \ arm-tdep.o arm-linux-tdep.o \ glibc-tdep.o linux-tdep.o solib-svr4.o \ diff --git a/gdb/features/aarch64-sme.c b/gdb/features/aarch64-sme.c new file mode 100644 index 00000000000..697a3014093 --- /dev/null +++ b/gdb/features/aarch64-sme.c @@ -0,0 +1,63 @@ +/* Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "gdbsupport/tdesc.h" +#include + +/* This function is NOT auto generated from xml. Create the AArch64 SME + feature into RESULT. SVL is the streaming vector length in bytes. + + The ZA register has a total size of SVL x SVL. + + When in Streaming SVE mode, the effective SVE vector length, VL, is equal + to SVL. */ + +static int +create_feature_aarch64_sme (struct target_desc *result, long regnum, + size_t svl) +{ + struct tdesc_feature *feature; + tdesc_type *element_type; + + feature = tdesc_create_feature (result, "org.gnu.gdb.aarch64.sme"); + + /* The SVG register. */ + tdesc_create_reg (feature, "svg", regnum++, 1, nullptr, 64, "int"); + + /* SVCR flags type. */ + tdesc_type_with_fields *type_with_fields + = tdesc_create_flags (feature, "svcr_flags", 8); + tdesc_add_flag (type_with_fields, 0, "SM"); + tdesc_add_flag (type_with_fields, 1, "ZA"); + + /* The SVCR register. */ + tdesc_create_reg (feature, "svcr", regnum++, 1, nullptr, 64, "svcr_flags"); + + /* Byte type. */ + element_type = tdesc_named_type (feature, "uint8"); + /* Vector of bytes. */ + element_type = tdesc_create_vector (feature, "sme_bv", element_type, + svl); + /* Vector of vector of bytes (Matrix). */ + element_type = tdesc_create_vector (feature, "sme_bvv", element_type, + svl); + + /* The following is the ZA register set. */ + tdesc_create_reg (feature, "za", regnum++, 1, nullptr, + std::pow (svl, 2) * 8, "sme_bvv"); + return regnum; +} diff --git a/gdb/nat/aarch64-scalable-linux-ptrace.c b/gdb/nat/aarch64-scalable-linux-ptrace.c index 6eea9104cef..d3a50ed0c81 100644 --- a/gdb/nat/aarch64-scalable-linux-ptrace.c +++ b/gdb/nat/aarch64-scalable-linux-ptrace.c @@ -1,5 +1,4 @@ -/* Common target dependent routines for AArch64 Scalable Extensions - (SVE/SME). +/* Common native Linux code for the AArch64 scalable extensions: SVE and SME. Copyright (C) 2018-2023 Free Software Foundation, Inc. @@ -28,6 +27,193 @@ #include "gdbsupport/common-regcache.h" #include "gdbsupport/byte-vector.h" #include +#include "arch/aarch64-scalable-linux.h" + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +aarch64_has_sve_state (int tid) +{ + struct user_sve_header header; + + if (!read_sve_header (tid, header)) + return false; + + if ((header.flags & SVE_PT_REGS_SVE) == 0) + return false; + + if (sizeof (header) == header.size) + return false; + + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +aarch64_has_ssve_state (int tid) +{ + struct user_sve_header header; + + if (!read_ssve_header (tid, header)) + return false; + + if ((header.flags & SVE_PT_REGS_SVE) == 0) + return false; + + if (sizeof (header) == header.size) + return false; + + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +aarch64_has_za_state (int tid) +{ + struct user_za_header header; + + if (!read_za_header (tid, header)) + return false; + + if (sizeof (header) == header.size) + return false; + + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +read_sve_header (int tid, struct user_sve_header &header) +{ + struct iovec iovec; + + iovec.iov_len = sizeof (header); + iovec.iov_base = &header; + + if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0) + { + /* SVE is not supported. */ + return false; + } + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +write_sve_header (int tid, const struct user_sve_header &header) +{ + struct iovec iovec; + + iovec.iov_len = sizeof (header); + iovec.iov_base = (void *) &header; + + if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SVE, &iovec) < 0) + { + /* SVE is not supported. */ + return false; + } + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +read_ssve_header (int tid, struct user_sve_header &header) +{ + struct iovec iovec; + + iovec.iov_len = sizeof (header); + iovec.iov_base = &header; + + if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SSVE, &iovec) < 0) + { + /* SSVE is not supported. */ + return false; + } + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +write_ssve_header (int tid, const struct user_sve_header &header) +{ + struct iovec iovec; + + iovec.iov_len = sizeof (header); + iovec.iov_base = (void *) &header; + + if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SSVE, &iovec) < 0) + { + /* SSVE is not supported. */ + return false; + } + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +read_za_header (int tid, struct user_za_header &header) +{ + struct iovec iovec; + + iovec.iov_len = sizeof (header); + iovec.iov_base = &header; + + if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_ZA, &iovec) < 0) + { + /* ZA is not supported. */ + return false; + } + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +write_za_header (int tid, const struct user_za_header &header) +{ + struct iovec iovec; + + iovec.iov_len = sizeof (header); + iovec.iov_base = (void *) &header; + + if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0) + { + /* ZA is not supported. */ + return false; + } + return true; +} + +/* Given VL, the streaming vector length for SME, return true if it is valid + and false otherwise. */ + +static bool +aarch64_sme_vl_valid (size_t vl) +{ + return (vl == 16 || vl == 32 || vl == 64 || vl == 128 || vl == 256); +} + +/* Given VL, the vector length for SVE, return true if it is valid and false + otherwise. SVE_state is true when the check is for the SVE register set. + Otherwise the check is for the SSVE register set. */ + +static bool +aarch64_sve_vl_valid (const bool sve_state, size_t vl) +{ + if (sve_state) + return sve_vl_valid (vl); + + /* We have an active SSVE state, where the valid vector length values are + more restrictive. */ + return aarch64_sme_vl_valid (vl); +} /* See nat/aarch64-scalable-linux-ptrace.h. */ @@ -36,23 +222,25 @@ aarch64_sve_get_vq (int tid) { struct iovec iovec; struct user_sve_header header; - iovec.iov_len = sizeof (header); iovec.iov_base = &header; - /* Ptrace gives the vector length in bytes. Convert it to VQ, the number of - 128bit chunks in a Z register. We use VQ because 128bits is the minimum - a Z register can increase in size. */ - - if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0) + /* Figure out which register set to use for the request. The vector length + for SVE can be different from the vector length for SSVE. */ + bool has_sve_state = !aarch64_has_ssve_state (tid); + if (ptrace (PTRACE_GETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE, + &iovec) < 0) { /* SVE is not supported. */ return 0; } + /* Ptrace gives the vector length in bytes. Convert it to VQ, the number of + 128bit chunks in a Z register. We use VQ because 128 bits is the minimum + a Z register can increase in size. */ uint64_t vq = sve_vq_from_vl (header.vl); - if (!sve_vl_valid (header.vl)) + if (!aarch64_sve_vl_valid (has_sve_state, header.vl)) { warning (_("Invalid SVE state from kernel; SVE disabled.")); return 0; @@ -72,15 +260,20 @@ aarch64_sve_set_vq (int tid, uint64_t vq) iovec.iov_len = sizeof (header); iovec.iov_base = &header; - if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0) + /* Figure out which register set to use for the request. The vector length + for SVE can be different from the vector length for SSVE. */ + bool has_sve_state = !aarch64_has_ssve_state (tid); + if (ptrace (PTRACE_GETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE, + &iovec) < 0) { - /* SVE is not supported. */ + /* SVE/SSVE is not supported. */ return false; } header.vl = sve_vl_from_vq (vq); - if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SVE, &iovec) < 0) + if (ptrace (PTRACE_SETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE, + &iovec) < 0) { /* Vector length change failed. */ return false; @@ -120,13 +313,108 @@ aarch64_sve_set_vq (int tid, struct reg_buffer_common *reg_buf) /* See nat/aarch64-scalable-linux-ptrace.h. */ +uint64_t +aarch64_za_get_svq (int tid) +{ + struct user_za_header header; + if (!read_za_header (tid, header)) + return 0; + + uint64_t vq = sve_vq_from_vl (header.vl); + + if (!aarch64_sve_vl_valid (false, header.vl)) + { + warning (_("Invalid ZA state from kernel; ZA disabled.")); + return 0; + } + + return vq; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +aarch64_za_set_svq (int tid, uint64_t vq) +{ + struct iovec iovec; + + /* Read the NT_ARM_ZA header. */ + struct user_za_header header; + if (!read_za_header (tid, header)) + { + /* ZA is not supported. */ + return false; + } + + /* If the size is the correct one already, don't update it. If we do + update the streaming vector length, we will invalidate the register + state for ZA, and we do not want that. */ + if (header.vl == sve_vl_from_vq (vq)) + return true; + + /* The streaming vector length is about to get updated. Set the new value + in the NT_ARM_ZA header and adjust the size as well. */ + + header.vl = sve_vl_from_vq (vq); + header.size = sizeof (struct user_za_header); + + /* Update the NT_ARM_ZA register set with the new streaming vector + length. */ + iovec.iov_len = sizeof (header); + iovec.iov_base = &header; + + if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0) + { + /* Streaming vector length change failed. */ + return false; + } + + /* At this point we have successfully adjusted the streaming vector length + for the NT_ARM_ZA register set, and it should have no payload + (no ZA state). */ + + return true; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +bool +aarch64_za_set_svq (int tid, const struct reg_buffer_common *reg_buf, + int svg_regnum) +{ + uint64_t reg_svg = 0; + + /* The svg register may not be valid if we've not collected any value yet. + This can happen, for example, if we're restoring the regcache after an + inferior function call, and the svg register comes after the Z + registers. */ + if (reg_buf->get_register_status (svg_regnum) != REG_VALID) + { + /* If svg is not available yet, fetch it from ptrace. The svg value from + ptrace is likely the correct one. */ + uint64_t svq = aarch64_za_get_svq (tid); + + /* If something went wrong, just bail out. */ + if (svq == 0) + return false; + + reg_svg = sve_vg_from_vq (svq); + } + else + reg_buf->raw_collect (svg_regnum, ®_svg); + + return aarch64_za_set_svq (tid, sve_vq_from_vg (reg_svg)); +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + gdb::byte_vector aarch64_fetch_sve_regset (int tid) { uint64_t vq = aarch64_sve_get_vq (tid); if (vq == 0) - perror_with_name (_("Unable to fetch SVE vector length")); + perror_with_name (_("Unable to fetch SVE/SSVE vector length")); /* A ptrace call with NT_ARM_SVE will return a header followed by either a dump of all the SVE and FP registers, or an fpsimd structure (identical to @@ -139,8 +427,10 @@ aarch64_fetch_sve_regset (int tid) iovec.iov_base = sve_state.data (); iovec.iov_len = sve_state.size (); - if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0) - perror_with_name (_("Unable to fetch SVE registers")); + bool has_sve_state = !aarch64_has_ssve_state (tid); + if (ptrace (PTRACE_GETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE, + &iovec) < 0) + perror_with_name (_("Unable to fetch SVE/SSVE registers")); return sve_state; } @@ -155,8 +445,82 @@ aarch64_store_sve_regset (int tid, const gdb::byte_vector &sve_state) iovec.iov_base = (void *) sve_state.data (); iovec.iov_len = sve_state.size (); - if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SVE, &iovec) < 0) - perror_with_name (_("Unable to store SVE registers")); + bool has_sve_state = !aarch64_has_ssve_state (tid); + if (ptrace (PTRACE_SETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE, + &iovec) < 0) + perror_with_name (_("Unable to store SVE/SSVE registers")); +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +gdb::byte_vector +aarch64_fetch_za_regset (int tid) +{ + struct user_za_header header; + if (!read_za_header (tid, header)) + error (_("Failed to read NT_ARM_ZA header.")); + + if (!aarch64_sme_vl_valid (header.vl)) + error (_("Found invalid vector length for NT_ARM_ZA.")); + + struct iovec iovec; + iovec.iov_len = header.size; + gdb::byte_vector za_state (header.size); + iovec.iov_base = za_state.data (); + + if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_ZA, &iovec) < 0) + perror_with_name (_("Failed to fetch NT_ARM_ZA register set.")); + + return za_state; +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +void +aarch64_store_za_regset (int tid, const gdb::byte_vector &za_state) +{ + struct iovec iovec; + /* We need to cast from (const void *) here. */ + iovec.iov_base = (void *) za_state.data (); + iovec.iov_len = za_state.size (); + + if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0) + perror_with_name (_("Failed to write to the NT_ARM_ZA register set.")); +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +void +aarch64_initialize_za_regset (int tid) +{ + /* First fetch the NT_ARM_ZA header so we can fetch the streaming vector + length. */ + struct user_za_header header; + if (!read_za_header (tid, header)) + error (_("Failed to read NT_ARM_ZA header.")); + + /* The vector should be default-initialized to zero, and we should account + for the payload as well. */ + std::vector za_new_state (ZA_PT_SIZE (sve_vq_from_vl (header.vl))); + + /* Adjust the header size since we are adding the initialized ZA + payload. */ + header.size = ZA_PT_SIZE (sve_vq_from_vl (header.vl)); + + /* Overlay the modified header onto the new ZA state. */ + const gdb_byte *base = (gdb_byte *) &header; + memcpy (za_new_state.data (), base, sizeof (user_za_header)); + + /* Set the ptrace request up and update the NT_ARM_ZA register set. */ + struct iovec iovec; + iovec.iov_len = za_new_state.size (); + iovec.iov_base = za_new_state.data (); + + if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0) + perror_with_name (_("Failed to initialize the NT_ARM_ZA register set.")); + + /* The NT_ARM_ZA register set should now contain a zero-initialized ZA + payload. */ } /* If we are running in BE mode, byteswap the contents @@ -451,3 +815,177 @@ aarch64_sve_regs_copy_from_reg_buf (int tid, passed on to ptrace. */ aarch64_store_sve_regset (tid, new_state); } + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +void +aarch64_za_regs_copy_to_reg_buf (int tid, struct reg_buffer_common *reg_buf, + int za_regnum, int svg_regnum, + int svcr_regnum) +{ + /* Fetch the current ZA state from the thread. */ + gdb::byte_vector za_state = aarch64_fetch_za_regset (tid); + + /* Sanity check. */ + gdb_assert (!za_state.empty ()); + + char *base = (char *) za_state.data (); + struct user_za_header *header = (struct user_za_header *) base; + + /* If we have ZA state, read it. Otherwise, make the contents of ZA + in the register cache all zeroes. This is how we present the ZA + state when it is not initialized. */ + uint64_t svcr_value = 0; + if (aarch64_has_za_state (tid)) + { + /* Sanity check the data in the header. */ + if (!sve_vl_valid (header->vl) + || ZA_PT_SIZE (sve_vq_from_vl (header->vl)) != header->size) + { + error (_("Found invalid streaming vector length in NT_ARM_ZA" + " register set")); + } + + reg_buf->raw_supply (za_regnum, base + ZA_PT_ZA_OFFSET); + svcr_value |= SVCR_ZA_BIT; + } + else + { + size_t za_bytes = header->vl * header->vl; + gdb_byte za_zeroed[za_bytes]; + memset (za_zeroed, 0, za_bytes); + reg_buf->raw_supply (za_regnum, za_zeroed); + } + + /* Handle the svg and svcr registers separately. We need to calculate + their values manually, as the Linux Kernel doesn't expose those + explicitly. */ + svcr_value |= aarch64_has_ssve_state (tid)? SVCR_SM_BIT : 0; + uint64_t svg_value = sve_vg_from_vl (header->vl); + + /* Update the contents of svg and svcr registers. */ + reg_buf->raw_supply (svg_regnum, &svg_value); + reg_buf->raw_supply (svcr_regnum, &svcr_value); + + /* The register buffer should now contain the updated copy of the NT_ARM_ZA + state. */ +} + +/* See nat/aarch64-scalable-linux-ptrace.h. */ + +void +aarch64_za_regs_copy_from_reg_buf (int tid, + struct reg_buffer_common *reg_buf, + int za_regnum, int svg_regnum, + int svcr_regnum) +{ + /* REG_BUF contains the updated ZA state. We need to extract that state + and write it to the thread TID. */ + + + /* First check if there is a change to the streaming vector length. Two + outcomes are possible here: + + 1 - The streaming vector length in the register cache differs from the + one currently on the thread state. This means that we will need to + update the NT_ARM_ZA register set to reflect the new streaming vector + length. + + 2 - The streaming vector length in the register cache is the same as in + the thread state. This means we do not need to update the NT_ARM_ZA + register set for a new streaming vector length, and we only need to + deal with changes to za, svg and svcr. + + None of the two possibilities above imply that the ZA state actually + exists. They only determine what needs to be done with any ZA content + based on the state of the streaming vector length. */ + + /* First fetch the NT_ARM_ZA header so we can fetch the streaming vector + length. */ + struct user_za_header header; + if (!read_za_header (tid, header)) + error (_("Failed to read NT_ARM_ZA header.")); + + /* Fetch the current streaming vector length. */ + uint64_t old_svg = sve_vg_from_vl (header.vl); + + /* Fetch the (potentially) new streaming vector length. */ + uint64_t new_svg; + reg_buf->raw_collect (svg_regnum, &new_svg); + + /* Did the streaming vector length change? */ + bool svg_changed = new_svg != old_svg; + + /* First store the streaming vector length to the thread. This is done + first to ensure the ptrace buffers read from the kernel are the correct + size. If the streaming vector length is the same as the current one, it + won't be updated. */ + if (!aarch64_za_set_svq (tid, reg_buf, svg_regnum)) + error (_("Unable to set svg register")); + + bool has_za_state = aarch64_has_za_state (tid); + + size_t za_bytes = sve_vl_from_vg (old_svg) * sve_vl_from_vg (old_svg); + gdb_byte za_zeroed[za_bytes]; + memset (za_zeroed, 0, za_bytes); + + /* If the streaming vector length changed, zero out the contents of ZA in + the register cache. Otherwise, we will need to update the ZA contents + in the thread with the ZA contents from the register cache, and they will + differ in size. */ + if (svg_changed) + reg_buf->raw_supply (za_regnum, za_zeroed); + + /* When we update svg, we don't automatically initialize the ZA buffer. If + we have no ZA state and the ZA register contents in the register cache are + zero, just return and leave the ZA register cache contents as zero. */ + if (!has_za_state + && reg_buf->raw_compare (za_regnum, za_zeroed, 0)) + { + /* No ZA state in the thread or in the register cache. This was likely + just an adjustment of the streaming vector length. Let this fall + through and update svcr and svg in the register cache. */ + } + else + { + /* If there is no ZA state but the register cache contains ZA data, we + need to initialize the ZA data through ptrace. First we initialize + all the bytes of ZA to zero. */ + if (!has_za_state + && !reg_buf->raw_compare (za_regnum, za_zeroed, 0)) + aarch64_initialize_za_regset (tid); + + /* From this point onwards, it is assumed we have a ZA payload in + the NT_ARM_ZA register set for this thread, and we need to update + such state based on the contents of the register cache. */ + + /* Fetch the current ZA state from the thread. */ + gdb::byte_vector za_state = aarch64_fetch_za_regset (tid); + + char *base = (char *) za_state.data (); + struct user_za_header *za_header = (struct user_za_header *) base; + uint64_t svq = sve_vq_from_vl (za_header->vl); + + /* Sanity check the data in the header. */ + if (!sve_vl_valid (za_header->vl) + || ZA_PT_SIZE (svq) != za_header->size) + error (_("Invalid vector length or payload size when reading ZA.")); + + /* Overwrite the ZA state contained in the thread with the ZA state from + the register cache. */ + if (REG_VALID == reg_buf->get_register_status (za_regnum)) + reg_buf->raw_collect (za_regnum, base + ZA_PT_ZA_OFFSET); + + /* Write back the ZA state to the thread's NT_ARM_ZA register set. */ + aarch64_store_za_regset (tid, za_state); + } + + /* Update svcr and svg accordingly. */ + uint64_t svcr_value = 0; + svcr_value |= aarch64_has_ssve_state (tid)? SVCR_SM_BIT : 0; + svcr_value |= aarch64_has_za_state (tid)? SVCR_ZA_BIT : 0; + reg_buf->raw_supply (svcr_regnum, &svcr_value); + + /* At this point we have written the data contained in the register cache to + the thread's NT_ARM_ZA register set. */ +} diff --git a/gdb/nat/aarch64-scalable-linux-ptrace.h b/gdb/nat/aarch64-scalable-linux-ptrace.h index 167782c493d..d609933801d 100644 --- a/gdb/nat/aarch64-scalable-linux-ptrace.h +++ b/gdb/nat/aarch64-scalable-linux-ptrace.h @@ -1,5 +1,5 @@ -/* Common target dependent definitions for AArch64 Scalable Extensions - (SVE/SME). +/* Common native Linux definitions for the AArch64 scalable + extensions: SVE and SME. Copyright (C) 2018-2023 Free Software Foundation, Inc. @@ -31,19 +31,58 @@ result when is included before . */ #include #include - -#ifndef SVE_SIG_ZREGS_SIZE +#include +#include "aarch64-scalable-linux-ptrace.h" #include "aarch64-scalable-linux-sigcontext.h" -#endif /* Indicates whether a SVE ptrace header is followed by SVE registers or a fpsimd structure. */ - #define HAS_SVE_STATE(header) ((header).flags & SVE_PT_REGS_SVE) +/* Return true if there is an active SVE state in TID. + Return false otherwise. */ +bool aarch64_has_sve_state (int tid); + +/* Return true if there is an active SSVE state in TID. + Return false otherwise. */ +bool aarch64_has_ssve_state (int tid); + +/* Return true if there is an active ZA state in TID. + Return false otherwise. */ +bool aarch64_has_za_state (int tid); + +/* Given TID, read the SVE header into HEADER. + + Return true if successful, false otherwise. */ +bool read_sve_header (int tid, struct user_sve_header &header); + +/* Given TID, store the SVE HEADER. + + Return true if successful, false otherwise. */ +bool write_sve_header (int tid, const struct user_sve_header &header); + +/* Given TID, read the SSVE header into HEADER. + + Return true if successful, false otherwise. */ +bool read_ssve_header (int tid, struct user_sve_header &header); + +/* Given TID, store the SSVE HEADER. + + Return true if successful, false otherwise. */ +bool write_ssve_header (int tid, const struct user_sve_header &header); + +/* Given TID, read the ZA header into HEADER. + + Return true if successful, false otherwise. */ +bool read_za_header (int tid, struct user_za_header &header); + +/* Given TID, store the ZA HEADER. + + Return true if successful, false otherwise. */ +bool write_za_header (int tid, const struct user_za_header &header); + /* Read VQ for the given tid using ptrace. If SVE is not supported then zero is returned (on a system that supports SVE, then VQ cannot be zero). */ - uint64_t aarch64_sve_get_vq (int tid); /* Set VQ in the kernel for the given tid, using either the value VQ or @@ -52,27 +91,64 @@ uint64_t aarch64_sve_get_vq (int tid); bool aarch64_sve_set_vq (int tid, uint64_t vq); bool aarch64_sve_set_vq (int tid, struct reg_buffer_common *reg_buf); -/* Read the current SVE register set from thread TID and return its data - through a byte vector. */ +/* Read the streaming mode vq (svq) for the given TID. If the ZA state is not + supported or active, return 0. */ +uint64_t aarch64_za_get_svq (int tid); +/* Set the vector quotient (vq) in the kernel for the given TID using the + value VQ. + + Return true if successful, false otherwise. */ +bool aarch64_za_set_svq (int tid, uint64_t vq); +bool aarch64_za_set_svq (int tid, const struct reg_buffer_common *reg_buf, + int svg_regnum); + +/* Given TID, return the SVE/SSVE data as a vector of bytes. */ extern gdb::byte_vector aarch64_fetch_sve_regset (int tid); -/* Write the SVE contents from SVE_STATE to thread TID. */ +/* Write the SVE/SSVE contents from SVE_STATE to TID. */ +extern void aarch64_store_sve_regset (int tid, + const gdb::byte_vector &sve_state); -extern void -aarch64_store_sve_regset (int tid, const gdb::byte_vector &sve_state); +/* Given TID, return the ZA data as a vector of bytes. */ +extern gdb::byte_vector aarch64_fetch_za_regset (int tid); -/* Given a thread id TID and a register buffer REG_BUF, update the register - buffer with the SVE state from thread TID. */ +/* Write ZA_STATE for TID. */ +extern void aarch64_store_za_regset (int tid, const gdb::byte_vector &za_state); +/* Given TID, initialize the ZA register set so the header contains the right + size. The bytes of the ZA register are initialized to zero. */ +extern void aarch64_initialize_za_regset (int tid); + +/* Given a register buffer REG_BUF, update it with SVE/SSVE register data + from SVE_STATE. */ extern void aarch64_sve_regs_copy_to_reg_buf (int tid, struct reg_buffer_common *reg_buf); -/* Given a thread id TID and a register buffer REG_BUF containing SVE +/* Given a thread id TID and a register buffer REG_BUF containing SVE/SSVE register data, write the SVE data to thread TID. */ - extern void aarch64_sve_regs_copy_from_reg_buf (int tid, struct reg_buffer_common *reg_buf); +/* Given a thread id TID and a register buffer REG_BUF, update the register + buffer with the ZA state from thread TID. + + ZA_REGNUM, SVG_REGNUM and SVCR_REGNUM are the register numbers for ZA, + SVG and SVCR registers. */ +extern void aarch64_za_regs_copy_to_reg_buf (int tid, + struct reg_buffer_common *reg_buf, + int za_regnum, int svg_regnum, + int svcr_regnum); + +/* Given a thread id TID and a register buffer REG_BUF containing ZA register + data, write the ZA data to thread TID. + + ZA_REGNUM, SVG_REGNUM and SVCR_REGNUM are the register numbers for ZA, + SVG and SVCR registers. */ +extern void +aarch64_za_regs_copy_from_reg_buf (int tid, + struct reg_buffer_common *reg_buf, + int za_regnum, int svg_regnum, + int svcr_regnum); #endif /* NAT_AARCH64_SCALABLE_LINUX_PTRACE_H */ diff --git a/gdb/nat/aarch64-scalable-linux-sigcontext.h b/gdb/nat/aarch64-scalable-linux-sigcontext.h index e0120e093a0..74407bd266a 100644 --- a/gdb/nat/aarch64-scalable-linux-sigcontext.h +++ b/gdb/nat/aarch64-scalable-linux-sigcontext.h @@ -22,8 +22,11 @@ #ifndef NAT_AARCH64_SCALABLE_LINUX_SIGCONTEXT_H #define NAT_AARCH64_SCALABLE_LINUX_SIGCONTEXT_H +#ifndef SVE_SIG_ZREGS_SIZE + #define SVE_MAGIC 0x53564501 + struct sve_context { struct _aarch64_ctx head; __u16 vl; @@ -132,7 +135,7 @@ struct sve_context { #define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) -/* SVE/FP/SIMD state (NT_ARM_SVE) */ +/* SVE/FP/SIMD state (NT_ARM_SVE and NT_ARM_SSVE) */ struct user_sve_header { __u32 size; /* total meaningful regset content in bytes */ @@ -242,6 +245,7 @@ struct user_sve_header { (SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \ SVE_PT_SVE_PREGS_OFFSET(vq)) +/* For streaming mode SVE (SSVE) FFR must be read and written as zero. */ #define SVE_PT_SVE_FFR_OFFSET(vq) \ __SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq)) @@ -267,4 +271,55 @@ struct user_sve_header { SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) +#endif /* SVE_SIG_ZREGS_SIZE */ + +/* Scalable Matrix Extensions (SME) definitions. */ + +/* Make sure we only define these if the kernel header doesn't. */ +#ifndef ZA_PT_SIZE + +/* ZA state (NT_ARM_ZA) */ +struct user_za_header { + __u32 size; /* total meaningful regset content in bytes */ + __u32 max_size; /* maximum possible size for this thread */ + __u16 vl; /* current vector length */ + __u16 max_vl; /* maximum possible vector length */ + __u16 flags; + __u16 __reserved; +}; + +/* The remainder of the ZA state follows struct user_za_header. The + total size of the ZA state (including header) depends on the + metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size + of the state in bytes, including the header. + + Refer to arch/arm64/include/uapi/asm/sigcontext.h from the Linux kernel + for details of how to pass the correct "vq" argument to these macros. */ + +/* Offset from the start of struct user_za_header to the register data */ +#define ZA_PT_ZA_OFFSET \ + ((sizeof (struct user_za_header) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +/* The payload starts at offset ZA_PT_ZA_OFFSET, and is of size + ZA_PT_ZA_SIZE(vq, flags). + + The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8 + bytes each, starting from vector 0. + + Additional data might be appended in the future. + + The ZA matrix is represented in memory in an endianness-invariant layout + which differs from the layout used for the FPSIMD V-registers on big-endian + systems: see sigcontext.h for more explanation. */ + +#define ZA_PT_ZAV_OFFSET(vq, n) \ + (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n)) + +#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_PT_SIZE(vq) \ + (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) +#endif /* ZA_PT_SIZE */ + #endif /* NAT_AARCH64_SCALABLE_LINUX_SIGCONTEXT_H */ diff --git a/gdbserver/configure.srv b/gdbserver/configure.srv index 7a7575ac43b..9e861a75088 100644 --- a/gdbserver/configure.srv +++ b/gdbserver/configure.srv @@ -49,6 +49,7 @@ case "${gdbserver_host}" in srv_tgtobj="$srv_tgtobj arch/aarch64-insn.o" srv_tgtobj="$srv_tgtobj arch/aarch64.o" srv_tgtobj="$srv_tgtobj arch/aarch64-mte-linux.o" + srv_tgtobj="$srv_tgtobj arch/aarch64-scalable-linux.o" srv_tgtobj="$srv_tgtobj linux-aarch64-tdesc.o" srv_tgtobj="$srv_tgtobj nat/aarch64-mte-linux-ptrace.o" srv_tgtobj="$srv_tgtobj nat/aarch64-scalable-linux-ptrace.o"