[binutils][arm] BFloat16 enablement [4/X]

Hi,

This patch is part of a series that adds support for Armv8.6-A
(Matrix Multiply and BFloat16 extensions) to binutils.

This patch introduces BFloat16 instructions to the arm backend.
The following BFloat16 instructions are added: vdot, vfma{l/t},
vmmla, vfmal{t/b}, vcvt, vcvt{t/b}.

gas/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>
2019-11-07  Matthew Malcomson  <matthew.malcomson@arm.com>

	* config/tc-arm.c (arm_archs): Add armv8.6-a option.
	(cpu_arch_ver): Add TAG_CPU_ARCH_V8 tag for Armv8.6-a.
	* doc/c-arm.texi (-march): New armv8.6-a arch.
	* config/tc-arm.c (arm_ext_bf16): New feature set.
	(enum neon_el_type): Add NT_bfloat value.
	(B_MNEM_vfmat, B_MNEM_vfmab): New bfloat16 encoder
	helpers.
	(BAD_BF16): New message.
	(parse_neon_type): Add bf16 type specifier.
	(enum neon_type_mask): Add N_BF16 type.
	(type_chk_of_el_type): Account for NT_bfloat.
	(el_type_of_type_chk): Account for N_BF16.
	(neon_three_args): Split out from neon_three_same.
	(neon_three_same): Part split out into neon_three_args.
	(CVT_FLAVOUR_VAR): Add bf16_f32 cvt flavour.
	(do_neon_cvt_1): Account for vcvt.bf16.f32.
	(do_bfloat_vmla): New.
	(do_mve_vfma): New function to deal with the mnemonic clash between the BF16
	vfmat and the MVE vfma in a VPT block with a 't'rue condition.
	(do_neon_cvttb_1): Account for vcvt{t,b}.bf16.f32.
	(do_vdot): New
	(do_vmmla): New
	(insns): Add vdot and vmmla mnemonics.
	(arm_extensions): Add "bf16" extension.
	* doc/c-arm.texi: Document "bf16" extension.
	* testsuite/gas/arm/attr-march-armv8_6-a.d: New test.
	* testsuite/gas/arm/bfloat16-bad.d: New test.
	* testsuite/gas/arm/bfloat16-bad.l: New test.
	* testsuite/gas/arm/bfloat16-bad.s: New test.
	* testsuite/gas/arm/bfloat16-cmdline-bad-2.d: New test.
	* testsuite/gas/arm/bfloat16-cmdline-bad-3.d: New test.
	* testsuite/gas/arm/bfloat16-cmdline-bad.d: New test.
	* testsuite/gas/arm/bfloat16-neon.s: New test.
	* testsuite/gas/arm/bfloat16-non-neon.s: New test.
	* testsuite/gas/arm/bfloat16-thumb-bad.d: New test.
	* testsuite/gas/arm/bfloat16-thumb-bad.l: New test.
	* testsuite/gas/arm/bfloat16-thumb.d: New test.
	* testsuite/gas/arm/bfloat16-vfp.d: New test.
	* testsuite/gas/arm/bfloat16.d: New test.
	* testsuite/gas/arm/bfloat16.s: New test.

include/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>
2019-11-07  Matthew Malcomson  <matthew.malcomson@arm.com>

	* opcode/arm.h (ARM_EXT2_V8_6A, ARM_AEXT2_V8_6A,
	ARM_ARCH_V8_6A): New.
	* opcode/arm.h (ARM_EXT2_BF16): New feature macro.
	(ARM_AEXT2_V8_6A): Include above macro in definition.

opcodes/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>
2019-11-07  Matthew Malcomson  <matthew.malcomson@arm.com>

	* arm-dis.c (select_arm_features): Update bfd_march_arm_8 with
	Armv8.6-A.
	(coprocessor_opcodes): Add bfloat16 vcvt{t,b}.
	(neon_opcodes): Add bfloat SIMD instructions.
	(print_insn_coprocessor): Add new control character %b to print
	condition code without checking cp_num.
	(print_insn_neon): Account for BFloat16 instructions that have no
	special top-byte handling.

Regression tested on arm-none-eabi.

Is it ok for trunk?

Regards,
Mihail
This commit is contained in:
Matthew Malcomson
2019-11-07 16:56:12 +00:00
parent 33593eafc9
commit aab2c27d9f
22 changed files with 863 additions and 29 deletions

View File

@@ -396,6 +396,7 @@ struct opcode16
%% %
%c print condition code (always bits 28-31 in ARM mode)
%b print condition code allowing cp_num == 9
%q print shifter argument
%u print condition code (unconditional in ARM mode,
UNPREDICTABLE if not AL in Thumb)
@@ -1207,11 +1208,15 @@ static const struct sopcode32 coprocessor_opcodes[] =
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
0xfea00800, 0xffa00f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5D[0], #%20?21%20?780"},
/* BFloat16 instructions. */
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0x0eb30940, 0x0fbf0f50, "vcvt%7?tb%b.bf16.f32\t%y1, %y0"},
/* Dot Product instructions in the space of coprocessor 13. */
{ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
0xfc200d00, 0xffb00f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3,5V"},
{ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
0xfe000d00, 0xff000f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
0xfe200d00, 0xff200f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
/* ARMv8.2 FMAC Long instructions in the space of coprocessor 8. */
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST | ARM_EXT2_V8_2A),
@@ -1452,6 +1457,20 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
/* BFloat16 instructions. */
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xfc000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xfe000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xfc000c40, 0xffb00f50, "vmmla.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xf3b60640, 0xffbf0fd0, "vcvt%c.bf16.f32\t%12-15,22D, %0-3,5Q"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xfc300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"},
/* Two registers, miscellaneous. */
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
@@ -8159,6 +8178,8 @@ print_insn_coprocessor_1 (const struct sopcode32 *opcodes,
if (cond != COND_UNCOND && cp_num == 9)
is_unpredictable = TRUE;
/* Fall through. */
case 'b':
func (stream, "%s", arm_conditional[cond]);
break;
@@ -8772,6 +8793,10 @@ print_insn_neon (struct disassemble_info *info, long given, bfd_boolean thumb)
}
else if ((given & 0xff000000) == 0xf9000000)
given ^= 0xf9000000 ^ 0xf4000000;
/* BFloat16 neon instructions without special top byte handling. */
else if ((given & 0xff000000) == 0xfe000000
|| (given & 0xff000000) == 0xfc000000)
;
/* vdup is also a valid neon instruction. */
else if ((given & 0xff910f5f) != 0xee800b10)
return FALSE;
@@ -11625,11 +11650,11 @@ select_arm_features (unsigned long mach,
case bfd_mach_arm_7EM: ARM_SET_FEATURES (ARM_ARCH_V7EM); break;
case bfd_mach_arm_8:
{
/* Add bits for extensions that Armv8.5-A recognizes. */
arm_feature_set armv8_5_ext_fset
/* Add bits for extensions that Armv8.6-A recognizes. */
arm_feature_set armv8_6_ext_fset
= ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
ARM_SET_FEATURES (ARM_ARCH_V8_5A);
ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_5_ext_fset);
ARM_SET_FEATURES (ARM_ARCH_V8_6A);
ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_6_ext_fset);
break;
}
case bfd_mach_arm_8R: ARM_SET_FEATURES (ARM_ARCH_V8R); break;