forked from Imagelibrary/binutils-gdb
[binutils][arm] BFloat16 enablement [4/X]
Hi,
This patch is part of a series that adds support for Armv8.6-A
(Matrix Multiply and BFloat16 extensions) to binutils.
This patch introduces BFloat16 instructions to the arm backend.
The following BFloat16 instructions are added: vdot, vfma{l/t},
vmmla, vfmal{t/b}, vcvt, vcvt{t/b}.
gas/ChangeLog:
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
* config/tc-arm.c (arm_archs): Add armv8.6-a option.
(cpu_arch_ver): Add TAG_CPU_ARCH_V8 tag for Armv8.6-a.
* doc/c-arm.texi (-march): New armv8.6-a arch.
* config/tc-arm.c (arm_ext_bf16): New feature set.
(enum neon_el_type): Add NT_bfloat value.
(B_MNEM_vfmat, B_MNEM_vfmab): New bfloat16 encoder
helpers.
(BAD_BF16): New message.
(parse_neon_type): Add bf16 type specifier.
(enum neon_type_mask): Add N_BF16 type.
(type_chk_of_el_type): Account for NT_bfloat.
(el_type_of_type_chk): Account for N_BF16.
(neon_three_args): Split out from neon_three_same.
(neon_three_same): Part split out into neon_three_args.
(CVT_FLAVOUR_VAR): Add bf16_f32 cvt flavour.
(do_neon_cvt_1): Account for vcvt.bf16.f32.
(do_bfloat_vmla): New.
(do_mve_vfma): New function to deal with the mnemonic clash between the BF16
vfmat and the MVE vfma in a VPT block with a 't'rue condition.
(do_neon_cvttb_1): Account for vcvt{t,b}.bf16.f32.
(do_vdot): New
(do_vmmla): New
(insns): Add vdot and vmmla mnemonics.
(arm_extensions): Add "bf16" extension.
* doc/c-arm.texi: Document "bf16" extension.
* testsuite/gas/arm/attr-march-armv8_6-a.d: New test.
* testsuite/gas/arm/bfloat16-bad.d: New test.
* testsuite/gas/arm/bfloat16-bad.l: New test.
* testsuite/gas/arm/bfloat16-bad.s: New test.
* testsuite/gas/arm/bfloat16-cmdline-bad-2.d: New test.
* testsuite/gas/arm/bfloat16-cmdline-bad-3.d: New test.
* testsuite/gas/arm/bfloat16-cmdline-bad.d: New test.
* testsuite/gas/arm/bfloat16-neon.s: New test.
* testsuite/gas/arm/bfloat16-non-neon.s: New test.
* testsuite/gas/arm/bfloat16-thumb-bad.d: New test.
* testsuite/gas/arm/bfloat16-thumb-bad.l: New test.
* testsuite/gas/arm/bfloat16-thumb.d: New test.
* testsuite/gas/arm/bfloat16-vfp.d: New test.
* testsuite/gas/arm/bfloat16.d: New test.
* testsuite/gas/arm/bfloat16.s: New test.
include/ChangeLog:
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
* opcode/arm.h (ARM_EXT2_V8_6A, ARM_AEXT2_V8_6A,
ARM_ARCH_V8_6A): New.
* opcode/arm.h (ARM_EXT2_BF16): New feature macro.
(ARM_AEXT2_V8_6A): Include above macro in definition.
opcodes/ChangeLog:
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
* arm-dis.c (select_arm_features): Update bfd_march_arm_8 with
Armv8.6-A.
(coprocessor_opcodes): Add bfloat16 vcvt{t,b}.
(neon_opcodes): Add bfloat SIMD instructions.
(print_insn_coprocessor): Add new control character %b to print
condition code without checking cp_num.
(print_insn_neon): Account for BFloat16 instructions that have no
special top-byte handling.
Regression tested on arm-none-eabi.
Is it ok for trunk?
Regards,
Mihail
This commit is contained in:
@@ -396,6 +396,7 @@ struct opcode16
|
||||
%% %
|
||||
|
||||
%c print condition code (always bits 28-31 in ARM mode)
|
||||
%b print condition code allowing cp_num == 9
|
||||
%q print shifter argument
|
||||
%u print condition code (unconditional in ARM mode,
|
||||
UNPREDICTABLE if not AL in Thumb)
|
||||
@@ -1207,11 +1208,15 @@ static const struct sopcode32 coprocessor_opcodes[] =
|
||||
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
|
||||
0xfea00800, 0xffa00f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5D[0], #%20?21%20?780"},
|
||||
|
||||
/* BFloat16 instructions. */
|
||||
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0x0eb30940, 0x0fbf0f50, "vcvt%7?tb%b.bf16.f32\t%y1, %y0"},
|
||||
|
||||
/* Dot Product instructions in the space of coprocessor 13. */
|
||||
{ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
|
||||
0xfc200d00, 0xffb00f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3,5V"},
|
||||
{ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
|
||||
0xfe000d00, 0xff000f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
|
||||
0xfe200d00, 0xff200f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
|
||||
|
||||
/* ARMv8.2 FMAC Long instructions in the space of coprocessor 8. */
|
||||
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST | ARM_EXT2_V8_2A),
|
||||
@@ -1452,6 +1457,20 @@ static const struct opcode32 neon_opcodes[] =
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
|
||||
0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
|
||||
|
||||
/* BFloat16 instructions. */
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfc000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfe000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfc000c40, 0xffb00f50, "vmmla.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xf3b60640, 0xffbf0fd0, "vcvt%c.bf16.f32\t%12-15,22D, %0-3,5Q"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfc300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"},
|
||||
|
||||
/* Two registers, miscellaneous. */
|
||||
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
|
||||
0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
|
||||
@@ -8159,6 +8178,8 @@ print_insn_coprocessor_1 (const struct sopcode32 *opcodes,
|
||||
if (cond != COND_UNCOND && cp_num == 9)
|
||||
is_unpredictable = TRUE;
|
||||
|
||||
/* Fall through. */
|
||||
case 'b':
|
||||
func (stream, "%s", arm_conditional[cond]);
|
||||
break;
|
||||
|
||||
@@ -8772,6 +8793,10 @@ print_insn_neon (struct disassemble_info *info, long given, bfd_boolean thumb)
|
||||
}
|
||||
else if ((given & 0xff000000) == 0xf9000000)
|
||||
given ^= 0xf9000000 ^ 0xf4000000;
|
||||
/* BFloat16 neon instructions without special top byte handling. */
|
||||
else if ((given & 0xff000000) == 0xfe000000
|
||||
|| (given & 0xff000000) == 0xfc000000)
|
||||
;
|
||||
/* vdup is also a valid neon instruction. */
|
||||
else if ((given & 0xff910f5f) != 0xee800b10)
|
||||
return FALSE;
|
||||
@@ -11625,11 +11650,11 @@ select_arm_features (unsigned long mach,
|
||||
case bfd_mach_arm_7EM: ARM_SET_FEATURES (ARM_ARCH_V7EM); break;
|
||||
case bfd_mach_arm_8:
|
||||
{
|
||||
/* Add bits for extensions that Armv8.5-A recognizes. */
|
||||
arm_feature_set armv8_5_ext_fset
|
||||
/* Add bits for extensions that Armv8.6-A recognizes. */
|
||||
arm_feature_set armv8_6_ext_fset
|
||||
= ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
|
||||
ARM_SET_FEATURES (ARM_ARCH_V8_5A);
|
||||
ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_5_ext_fset);
|
||||
ARM_SET_FEATURES (ARM_ARCH_V8_6A);
|
||||
ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_6_ext_fset);
|
||||
break;
|
||||
}
|
||||
case bfd_mach_arm_8R: ARM_SET_FEATURES (ARM_ARCH_V8R); break;
|
||||
|
||||
Reference in New Issue
Block a user