AArch64 v9.7 extensions: FEAT_SVE_B16MM

This patch includes:
  - Feature flag for FEAT_SVE_B16MM
  - Instruction:
      - BFMMLA (non-widening) BFloat16 matrix multiply-accumulate.
This commit is contained in:
Sivan Shani
2025-12-17 18:10:35 +00:00
committed by Alice Carlotti
parent 3eb520ce6a
commit d8d024ad42
8 changed files with 44 additions and 4 deletions

View File

@@ -10924,6 +10924,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
{"f16f32dot", AARCH64_FEATURE (F16F32DOT), AARCH64_FEATURE (SIMD)},
{"f16f32mm", AARCH64_FEATURE (F16F32MM), AARCH64_FEATURES (2, SIMD, F16)},
{"f16mm", AARCH64_FEATURE (F16MM), AARCH64_FEATURES (2, SIMD, F16)},
{"sve-b16mm", AARCH64_FEATURE (SVE_B16MM), AARCH64_FEATURE (SVE)},
{NULL, AARCH64_NO_FEATURES, AARCH64_NO_FEATURES},
};

View File

@@ -353,6 +353,8 @@ automatically cause those extensions to be disabled.
@tab Enable the SVE2 BITPERM Extension.
@item @code{sve-b16b16} @tab
@tab Enable the SVE B16B16 extension. These instructions also require either @code{+sve2} or @code{+sme2}.
@item @code{sve-b16mm} @tab @code{sve}
@tab Enable the SVE B16MM Extension.
@item @code{sve-bfscale} @tab
@tab Enable the SVE BFSCALE extension. These instructions also require either @code{+sve2} or @code{+sme2}.
@item @code{sve-f16f32mm} @tab @code{sve}

View File

@@ -0,0 +1,12 @@
#as: -march=armv8-a+sve-b16mm
#objdump: -dr
.*: file format .*
Disassembly of section \.text:
0+ <\.text>:
*[0-9a-f]+: 64e0e000 bfmmla z0.h, z0.h, z0.h
*[0-9a-f]+: 64e0e01f bfmmla z31.h, z0.h, z0.h
*[0-9a-f]+: 64e0e3e0 bfmmla z0.h, z31.h, z0.h
*[0-9a-f]+: 64ffe000 bfmmla z0.h, z0.h, z31.h

View File

@@ -0,0 +1,4 @@
bfmmla z0.h, z0.h, z0.h
bfmmla z31.h, z0.h, z0.h
bfmmla z0.h, z31.h, z0.h
bfmmla z0.h, z0.h, z31.h

View File

@@ -277,6 +277,8 @@ enum aarch64_feature_bit {
AARCH64_FEATURE_F16F32MM,
/* F16MM instructions. */
AARCH64_FEATURE_F16MM,
/* SVE B16MM instructions. */
AARCH64_FEATURE_SVE_B16MM,
/* Virtual features. These are used to gate instructions that are enabled
by either of two (or more) sets of command line flags. */

View File

@@ -20326,10 +20326,20 @@ aarch64_opcode_lookup_1 (uint32_t word)
{
if (((word >> 31) & 0x1) == 0)
{
/* 33222222222211111111110000000000
10987654321098765432109876543210
011001x0111xxxxx111xxxxxxxxxxxxx. */
return A64_OPID_64e0e400_fmmla_SVE_Zd_SVE_Zn_SVE_Zm_16;
if (((word >> 10) & 0x1) == 0)
{
/* 33222222222211111111110000000000
10987654321098765432109876543210
011001x0111xxxxx111xx0xxxxxxxxxx. */
return A64_OPID_64e0e000_bfmmla_SVE_Zd_SVE_Zn_SVE_Zm_16;
}
else
{
/* 33222222222211111111110000000000
10987654321098765432109876543210
011001x0111xxxxx111xx1xxxxxxxxxx. */
return A64_OPID_64e0e400_fmmla_SVE_Zd_SVE_Zn_SVE_Zm_16;
}
}
else
{

View File

@@ -4043,5 +4043,6 @@ enum aarch64_opcode_idx
A64_OPID_4e40ec00_fmmla_Vd_Vn_Vm,
A64_OPID_4ec0ec00_fmmla_Vd_Vn_Vm,
A64_OPID_64a0e000_fmmla_SVE_Zd_SVE_Zn_SVE_Zm_16,
A64_OPID_64e0e000_bfmmla_SVE_Zd_SVE_Zn_SVE_Zm_16,
A64_OPID_MAX,
};

View File

@@ -3090,6 +3090,8 @@ static const aarch64_feature_set aarch64_feature_f16mm =
AARCH64_FEATURE (F16MM);
static const aarch64_feature_set aarch64_feature_f16mm_sve2p2 =
AARCH64_FEATURES (2, F16MM, SVE2p2);
static const aarch64_feature_set aarch64_feature_sve_b16mm =
AARCH64_FEATURE (SVE_B16MM);
#define CORE &aarch64_feature_v8
#define FP &aarch64_feature_fp
@@ -3223,6 +3225,7 @@ static const aarch64_feature_set aarch64_feature_f16mm_sve2p2 =
#define F16F32MM &aarch64_feature_f16f32mm
#define F16MM &aarch64_feature_f16mm
#define F16MM_SVE2p2 &aarch64_feature_f16mm_sve2p2
#define SVE_B16MM &aarch64_feature_sve_b16mm
#define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \
{ NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS | F_INVALID_IMM_SYMS_1, 0, 0, NULL }
@@ -3575,6 +3578,8 @@ static const aarch64_feature_set aarch64_feature_f16mm_sve2p2 =
{ NAME, OPCODE, MASK, CLASS, 0, F16MM, OPS, QUALS, FLAGS, 0, 0, NULL }
#define F16MM_SVE2p2_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \
{ NAME, OPCODE, MASK, CLASS, 0, F16MM, OPS, QUALS, FLAGS | F_STRICT, 0, 0, NULL }
#define SVE_B16MM_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \
{ NAME, OPCODE, MASK, CLASS, 0, SVE_B16MM, OPS, QUALS, FLAGS | F_STRICT, 0, 0, NULL }
#define MOPS_CPY_OP1_OP2_PME_INSN(NAME, OPCODE, MASK, FLAGS, CONSTRAINTS) \
MOPS_INSN (NAME, OPCODE, MASK, 0, \
@@ -7878,6 +7883,9 @@ const struct aarch64_opcode aarch64_opcode_table[] =
F16MM_INSN ("fmmla", 0x4ec0ec00, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3SAME8H, 0),
F16MM_SVE2p2_INSN ("fmmla", 0x64a0e000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0),
/* SVE B16MM instructions. */
SVE_B16MM_INSN("bfmmla", 0x64e0e000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0),
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
};