x86/APX: make .insn extended-EVEX capable

So far tricks had to be played to use .insn to encode extended-EVEX
insns; the X4 bit couldn't be controlled at all. Extend the syntax just
enough to cover all features, taking care to reject invalid feature
combinations (albeit aiming at being as lax there as possible, to offer
users as much flexibility as we can - we don't, after all, know what
future will bring).

In a pre-existing testcase replace all but one .byte; the one that needs
to remain wants to have EVEX.U clear in a way that's neither
controllable via AVX10/256 embedded rounding (would otherwise also set
EVEX.ND), nor via the index register (EVEX.X4), as there's no memory
operand. For one of the converted instances ModR/M.mod needs correcting:
An 8-bit displacement requires that to be 1, not 2. Also adjust source
comments to better represent what the bad insns mimic.
This commit is contained in:
Jan Beulich
2025-02-14 09:32:35 +01:00
parent 539f21cf4a
commit 298a683397
6 changed files with 197 additions and 25 deletions

View File

@@ -2125,6 +2125,59 @@ check_Scc_OszcOperations (const char *l)
/* Skip '{'. */ /* Skip '{'. */
suffix_string++; suffix_string++;
/* For .insn require 'scc=' as the first element. */
if (dot_insn ())
{
char *copy;
valueT val;
while (is_whitespace (*suffix_string))
suffix_string++;
if (strncasecmp (suffix_string, "scc", 3) == 0)
suffix_string += 3;
else
{
as_bad (_("unrecognized pseudo-suffix"));
return -1;
}
while (is_whitespace (*suffix_string))
suffix_string++;
if (*suffix_string == '=')
suffix_string++;
else
{
as_bad (_("unrecognized pseudo-suffix"));
return -1;
}
copy = xstrdup (suffix_string);
/* No need to save/restore input_line_pointer; that's done in the
caller already. */
input_line_pointer = copy;
val = get_absolute_expression ();
suffix_string += input_line_pointer - copy;
free (copy);
if (val > 0xf)
{
as_bad (_("scc= value must be between 0 and 15 (decimal)"));
return -1;
}
i.scc = val;
/* Permit dfv= to be absent (implying all flag values being zero). */
if (*suffix_string == '}')
return suffix_string + 1 - l;
if (*suffix_string != ',')
goto bad;
suffix_string++;
}
/* Parse 'dfv='. */ /* Parse 'dfv='. */
while (is_whitespace (*suffix_string)) while (is_whitespace (*suffix_string))
suffix_string++; suffix_string++;
@@ -2197,6 +2250,7 @@ check_Scc_OszcOperations (const char *l)
suffix_string ++; suffix_string ++;
} }
bad:
as_bad (_("missing `}' or `,' in pseudo-suffix")); as_bad (_("missing `}' or `,' in pseudo-suffix"));
return -1; return -1;
} }
@@ -4573,7 +4627,7 @@ build_rex2_prefix (void)
| z| L'L | b | `v | aaa | | z| L'L | b | `v | aaa |
*/ */
static bool static bool
build_apx_evex_prefix (void) build_apx_evex_prefix (bool force_nd)
{ {
/* To mimic behavior for legacy insns, transform use of DATA16 and REX64 into /* To mimic behavior for legacy insns, transform use of DATA16 and REX64 into
their embedded-prefix representations. */ their embedded-prefix representations. */
@@ -4620,7 +4674,8 @@ build_apx_evex_prefix (void)
/* Encode the NDD bit of the instruction promoted from the legacy /* Encode the NDD bit of the instruction promoted from the legacy
space. ZU shares the same bit with NDD. */ space. ZU shares the same bit with NDD. */
if ((i.vex.register_specifier && i.tm.opcode_space == SPACE_MAP4) if ((i.vex.register_specifier && i.tm.opcode_space == SPACE_MAP4)
|| i.tm.opcode_modifier.operandconstraint == ZERO_UPPER) || i.tm.opcode_modifier.operandconstraint == ZERO_UPPER
|| force_nd)
i.vex.bytes[3] |= 0x10; i.vex.bytes[3] |= 0x10;
/* Encode SCC and oszc flags bits. */ /* Encode SCC and oszc flags bits. */
@@ -7462,7 +7517,7 @@ i386_assemble (char *line)
if (is_apx_evex_encoding ()) if (is_apx_evex_encoding ())
{ {
if (!build_apx_evex_prefix ()) if (!build_apx_evex_prefix (false))
return; return;
} }
else if (i.tm.opcode_modifier.vex) else if (i.tm.opcode_modifier.vex)
@@ -10972,7 +11027,8 @@ build_modrm_byte (void)
if (i.tm.operand_types[op].bitfield.baseindex) if (i.tm.operand_types[op].bitfield.baseindex)
break; break;
if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4) if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None)
+ (i.tm.opcode_modifier.operandconstraint == SCC) == 4)
{ {
expressionS *exp; expressionS *exp;
@@ -10984,10 +11040,12 @@ build_modrm_byte (void)
2. 4 operands: 4 register operands or 3 register operands 2. 4 operands: 4 register operands or 3 register operands
plus 1 memory operand, with VexXDS. plus 1 memory operand, with VexXDS.
3. Other equivalent combinations when coming from s_insn(). */ 3. Other equivalent combinations when coming from s_insn(). */
gas_assert (i.tm.opcode_modifier.vexvvvv if (!dot_insn ())
&& i.tm.opcode_modifier.vexw); {
gas_assert (dot_insn () gas_assert (i.tm.opcode_modifier.vexvvvv
|| i.tm.operand_types[dest].bitfield.class == RegSIMD); && i.tm.opcode_modifier.vexw);
gas_assert (i.tm.operand_types[dest].bitfield.class == RegSIMD);
}
/* Of the first two non-immediate operands the one with the template /* Of the first two non-immediate operands the one with the template
not allowing for a memory one is encoded in the immediate operand. */ not allowing for a memory one is encoded in the immediate operand. */
@@ -13275,7 +13333,8 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
const char *end; const char *end;
unsigned int j; unsigned int j;
valueT val; valueT val;
bool vex = false, xop = false, evex = false; bool vex = false, xop = false;
enum { evex_none, evex_basic, evex_nd } evex = evex_none;
struct last_insn *last_insn; struct last_insn *last_insn;
init_globals (); init_globals ();
@@ -13324,7 +13383,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
else if (startswith (line, "EVEX") else if (startswith (line, "EVEX")
&& (line[4] == '.' || is_whitespace (line[4]))) && (line[4] == '.' || is_whitespace (line[4])))
{ {
evex = true; evex = evex_basic;
line += 4; line += 4;
} }
@@ -13543,6 +13602,20 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
line += 3; line += 3;
} }
if (line > end && evex && *line == '.')
{
if (line[1] == 'N' && line[2] == 'D')
{
evex = evex_nd;
line += 3;
}
else if (line[1] == 'Z' && line[2] == 'U')
{
i.tm.opcode_modifier.operandconstraint = ZERO_UPPER;
line += 3;
}
}
if (line > end && *line && !is_whitespace (*line)) if (line > end && *line && !is_whitespace (*line))
{ {
/* Improve diagnostic a little. */ /* Improve diagnostic a little. */
@@ -13612,6 +13685,25 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
} }
} }
if (evex == evex_basic && *line == '{')
{
int length = check_Scc_OszcOperations (line);
if (length > 0)
{
line += length;
if (is_whitespace (*line))
++line;
if (i.tm.opcode_modifier.operandconstraint)
{
as_bad (_("SCC/OSZC specifier cannot be used here"));
goto bad;
}
i.tm.opcode_modifier.operandconstraint = SCC;
}
}
/* Parse operands, if any, before evaluating encoding space. */ /* Parse operands, if any, before evaluating encoding space. */
if (*line == ',') if (*line == ',')
{ {
@@ -13713,7 +13805,8 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
/* Enforce certain constraints on operands. */ /* Enforce certain constraints on operands. */
switch (i.reg_operands + i.mem_operands switch (i.reg_operands + i.mem_operands
+ (i.tm.extension_opcode != None)) + (i.tm.extension_opcode != None)
+ (i.tm.opcode_modifier.operandconstraint == SCC))
{ {
case 0: case 0:
if (i.short_form) if (i.short_form)
@@ -13728,9 +13821,13 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
as_bad (_("too few register/memory operands")); as_bad (_("too few register/memory operands"));
goto done; goto done;
} }
break; /* Fall through. */
case 2: case 2:
if (evex == evex_nd)
{
as_bad (_("too few register/memory operands"));
goto done;
}
break; break;
case 4: case 4:
@@ -13743,9 +13840,12 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
} }
/* Fall through. */ /* Fall through. */
case 3: case 3:
if (i.tm.opcode_modifier.operandconstraint == SCC)
break;
if (pp.encoding != encoding_default) if (pp.encoding != encoding_default)
{ {
i.tm.opcode_modifier.vexvvvv = i.tm.extension_opcode == None i.tm.opcode_modifier.vexvvvv = (i.tm.extension_opcode == None
&& evex != evex_nd)
? VexVVVV_SRC1 : VexVVVV_DST; ? VexVVVV_SRC1 : VexVVVV_DST;
break; break;
} }
@@ -14043,6 +14143,13 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
if (vex || xop) if (vex || xop)
{ {
if (is_apx_evex_encoding ())
{
as_bad (_("APX functionality cannot be used with %s encodings"),
vex ? "VEX" : "XOP");
goto done;
}
if (!i.tm.opcode_modifier.vex) if (!i.tm.opcode_modifier.vex)
i.tm.opcode_modifier.vex = VEXScalar; /* LIG */ i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
@@ -14054,7 +14161,36 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
if (!i.tm.opcode_modifier.evex) if (!i.tm.opcode_modifier.evex)
i.tm.opcode_modifier.evex = EVEXLIG; i.tm.opcode_modifier.evex = EVEXLIG;
build_evex_prefix (); /* To keep earlier .insn uses working as far as possible, take the
legacy path when opcode space is 4 bits wide (impossible to encode in
extended EVEX), and when no "extended" syntax elements are used. */
if ((!is_apx_evex_encoding () || i.insn_opcode_space > 7)
&& evex == evex_basic
&& !i.tm.opcode_modifier.operandconstraint)
build_evex_prefix ();
else if (i.insn_opcode_space > 7)
{
as_bad (_("opcode space cannot be larger than 7"));
goto done;
}
else if (evex == evex_nd && (i.broadcast.type || i.broadcast.bytes))
{
as_bad (_("ND and broadcast cannot be used at the same time"));
goto done;
}
else if (pp.has_nf && i.mask.reg)
{
as_bad (_("{nf} and masking cannot be used at the same time"));
goto done;
}
else if (i.tm.opcode_modifier.operandconstraint == SCC
&& (pp.has_nf || i.mask.reg))
{
as_bad (_("SCC cannot be used at the same time {nf} / masking"));
goto done;
}
else if (!build_apx_evex_prefix (evex == evex_nd))
goto done;
i.rex &= REX_OPCODE; i.rex &= REX_OPCODE;
} }
else else

View File

@@ -702,7 +702,7 @@ operand, as long as there is one.
syntax tries to resemble that used in documentation: syntax tries to resemble that used in documentation:
@itemize @bullet @itemize @bullet
@item @code{VEX}[@code{.@var{len}}][@code{.@var{prefix}}][@code{.@var{space}}][@code{.@var{w}}] @item @code{VEX}[@code{.@var{len}}][@code{.@var{prefix}}][@code{.@var{space}}][@code{.@var{w}}]
@item @code{EVEX}[@code{.@var{len}}][@code{.@var{prefix}}][@code{.@var{space}}][@code{.@var{w}}] @item @code{EVEX}[@code{.@var{len}}][@code{.@var{prefix}}][@code{.@var{space}}][@code{.@var{w}}][@code{.@var{opt}}]
@item @code{XOP}@var{space}[@code{.@var{len}}][@code{.@var{prefix}}][@code{.@var{w}}] @item @code{XOP}@var{space}[@code{.@var{len}}][@code{.@var{prefix}}][@code{.@var{w}}]
@end itemize @end itemize
@@ -717,10 +717,11 @@ only) @code{512} as well as @code{L0} / @code{L1} for VEX / XOP and
@item @code{0f}, @code{0f38}, @code{0f3a}, or @code{M0}...@code{M31} @item @code{0f}, @code{0f38}, @code{0f3a}, or @code{M0}...@code{M31}
for VEX for VEX
@item @code{08}...@code{1f} for XOP @item @code{08}...@code{1f} for XOP
@item @code{0f}, @code{0f38}, @code{0f3a}, or @code{M0}...@code{M15} @item @code{0f}, @code{0f38}, @code{0f3a}, or @code{M0}...@code{M7}
for EVEX for EVEX
@end itemize @end itemize
@item @var{w} can be @code{WIG}, @code{W0}, or @code{W1} @item @var{w} can be @code{WIG}, @code{W0}, or @code{W1}
@item @var{opt} can be @code{ND} or @code{ZU}
@end itemize @end itemize
Defaults: Defaults:
@@ -808,6 +809,12 @@ be suffixed by @code{@{:d@var{n}@}} to specify the size (in bytes).
This can be combined with an embedded broadcast specifier: This can be combined with an embedded broadcast specifier:
@samp{8(%eax)@{1to8:d8@}}. @samp{8(%eax)@{1to8:d8@}}.
For SCC EVEX the @code{@{dfv=@}} specifier used by ordinary insns is
extended and immediately follows the opcode specifier. The extension
is that the SCC value needs to be specified and goes first, as in
@code{@{scc=@var{n},dfv=...@}}. Unlike for ordinary insns @code{dfv=}
may be omitted for brevity.
@cindex @code{noopt} directive @cindex @code{noopt} directive
@item .noopt @item .noopt
Disable instruction size optimization. Disable instruction size optimization.

View File

@@ -63,4 +63,13 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f5 fd 58 5a 40 01[ ]+vcvtpd2ph (0x)?8\(%rax\)\{1to8\},%xmm0 [ ]*[a-f0-9]+: 62 f5 fd 58 5a 40 01[ ]+vcvtpd2ph (0x)?8\(%rax\)\{1to8\},%xmm0
[ ]*[a-f0-9]+: 62 f5 7c 48 5a 40 01[ ]+vcvtph2pd 0x10\(%rax\),%zmm0 [ ]*[a-f0-9]+: 62 f5 7c 48 5a 40 01[ ]+vcvtph2pd 0x10\(%rax\),%zmm0
[ ]*[a-f0-9]+: 62 f5 7c 58 5a 40 01[ ]+vcvtph2pd (0x)?2\(%rax\)\{1to8\},%zmm0 [ ]*[a-f0-9]+: 62 f5 7c 58 5a 40 01[ ]+vcvtph2pd (0x)?2\(%rax\)\{1to8\},%zmm0
[ ]*[a-f0-9]+: 62 e4 7c 08 8b 00[ ]+movrs \(%rax\),%r16d
[ ]*[a-f0-9]+: 62 fc 7c 08 8b 00[ ]+movrs \(%r16\),%eax
[ ]*[a-f0-9]+: 62 f4 78 08 8b 04 00[ ]+movrs \(%rax,%r16(,1)?\),%eax
[ ]*[a-f0-9]+: 62 fc 7c 08 60 c0[ ]+movbe %r16d,%eax
[ ]*[a-f0-9]+: 62 f4 7c 0c 01 c0[ ]+\{nf\} add %eax,%eax
[ ]*[a-f0-9]+: 62 f4 7c 18 01 c0[ ]+add %eax,%eax,%eax
[ ]*[a-f0-9]+: 62 f4 ec 18 ff f1[ ]+push2p %rcx,%rdx
[ ]*[a-f0-9]+: 62 f4 7f 18 42 c0[ ]+setzub %al
[ ]*[a-f0-9]+: 62 f4 44 0b 39 c0[ ]+ccmpf \{dfv=of\} %eax,%eax
#pass #pass

View File

@@ -104,3 +104,24 @@ insn:
# vcvtph2pd # vcvtph2pd
.insn EVEX.M5.W0 0x5a, 16(%rax){:d16}, %zmm0 .insn EVEX.M5.W0 0x5a, 16(%rax){:d16}, %zmm0
.insn EVEX.M5.W0 0x5a, 2(%rax){1to8:d2}, %zmm0 .insn EVEX.M5.W0 0x5a, 2(%rax){1to8:d2}, %zmm0
# movrs (APX)
.insn EVEX.L0.NP.M4 0x8b, (%rax), %r16d
.insn EVEX.L0.NP.M4 0x8b, (%r16), %eax
.insn EVEX.L0.NP.M4 0x8b, (%rax,%r16), %eax
# movbe (APX)
.insn EVEX.L0.NP.M4 0x60, %r16d, %eax
# add (APX)
.insn {nf} EVEX.L0.NP.M4 0x01, %eax, %eax
.insn EVEX.L0.NP.M4.ND 0x01, %eax, %eax, %eax
# push2p
.insn EVEX.L0.NP.M4.W1.ND 0xff/6, %rcx, %rdx
# setzub
.insn EVEX.L0.F2.M4.ZU 0x42/0, %eax
# ccmpf
.insn EVEX.L0.NP.M4 0x39 {scc=0b1011,dfv=of}, %eax, %eax

View File

@@ -12,7 +12,7 @@ Disassembly of section .text:
[ ]*[a-f0-9]+:[ ]+c7[ ]+\(bad\) [ ]*[a-f0-9]+:[ ]+c7[ ]+\(bad\)
[ ]*[a-f0-9]+:[ ]+62 fc 7f 08 60[ ]+\(bad\) [ ]*[a-f0-9]+:[ ]+62 fc 7f 08 60[ ]+\(bad\)
[ ]*[a-f0-9]+:[ ]+c7[ ]+\(bad\) [ ]*[a-f0-9]+:[ ]+c7[ ]+\(bad\)
[ ]*[a-f0-9]+:[ ]+62 e2 f9 41 91 84[ ]+vpgatherqq \(bad\),%zmm16\{%k1\} [ ]*[a-f0-9]+:[ ]+62 e2 f9 41 91 44[ ]+vpgatherqq \(bad\),%zmm16\{%k1\}
[ ]*[a-f0-9]+:[ ]+cd ff[ ]+int \$0xff [ ]*[a-f0-9]+:[ ]+cd ff[ ]+int \$0xff
[ ]*[a-f0-9]+:[ ]+62 fd 7d 08 60[ ]+\(bad\) [ ]*[a-f0-9]+:[ ]+62 fd 7d 08 60[ ]+\(bad\)
[ ]*[a-f0-9]+:[ ]+c7[ ]+.* [ ]*[a-f0-9]+:[ ]+c7[ ]+.*

View File

@@ -9,9 +9,8 @@ _start:
#movbe %r23w,%ax set EVEX.pp = f2. #movbe %r23w,%ax set EVEX.pp = f2.
.insn EVEX.L0.f2.M12.W0 0x60, %di, %ax .insn EVEX.L0.f2.M12.W0 0x60, %di, %ax
#VSIB vpgatherqq (%rbp,%zmm17,8),%zmm16{%k1} set EVEX.P[10] == 0 #VSIB vpgatherqq -8(%rbp,%zmm17,8),%zmm16{%k1} set EVEX.U = 0.
.byte 0x62, 0xe2, 0xf9, 0x41, 0x91, 0x84, 0xcd .insn EVEX.512.66.0f38.W1 0x91, -8(%rbp,%r17,8){:d8}, %zmm16{%k1}, %r16
.byte 0xff
#EVEX_MAP4 movbe %r23w,%ax set EVEX.mm == 0b01. #EVEX_MAP4 movbe %r23w,%ax set EVEX.mm == 0b01.
.insn EVEX.L0.66.M13.W0 0x60, %di, %ax .insn EVEX.L0.66.M13.W0 0x60, %di, %ax
@@ -38,11 +37,11 @@ _start:
#EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.P[20](EVEX.b) == 0b1 #EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.P[20](EVEX.b) == 0b1
.insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx){1to8}, %rcx .insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx){1to8}, %rcx
#{evex} inc %rax %rbx EVEX.vvvv != 1111 && EVEX.ND = 0. #{evex} inc (%rax,%rcx), %rbx EVEX.vvvv != 1111 && EVEX.ND = 0.
.byte 0x62, 0xf4, 0xe4, 0x08, 0xff, 0x04, 0x08 .insn EVEX.L0.NP.M4 0xff/0, (%rax,%rcx), %rbx
# pop2 %rdi, %r8 set EVEX.ND=0. # pop2 %rdi, %r8 set EVEX.ND=0.
.byte 0x62, 0xf4, 0x3c, 0x08, 0x8f, 0xc7 .insn EVEX.L0.NP.M4.W0 0x8f/0, %rdi, %r8
# pop2 %rax, %rax # pop2 %rax, %rax
.insn EVEX.L0.NP.M4.W0 0x8f/0, %rax, {sae}, %rax .insn EVEX.L0.NP.M4.W0 0x8f/0, %rax, {sae}, %rax
@@ -59,7 +58,7 @@ _start:
#EVEX_MAP4 movbe %r18w,%ax set EVEX.nf = 1. #EVEX_MAP4 movbe %r18w,%ax set EVEX.nf = 1.
.insn EVEX.L0.66.M12.W0 0x60, %di, %ax {%k4} .insn EVEX.L0.66.M12.W0 0x60, %di, %ax {%k4}
# EVEX_MAP4 movbe %r23w,%ax set EVEX.P[10] = 0. # EVEX_MAP4 movbe %r23w,%ax set EVEX.U = 0.
.byte 0x62, 0xfc, 0x79, 0x08, 0x60, 0xc7 .byte 0x62, 0xfc, 0x79, 0x08, 0x60, 0xc7
# ccmps {dfv=of,sf,zf,cf} %r15, %rdx set EVEX.ND = 1. # ccmps {dfv=of,sf,zf,cf} %r15, %rdx set EVEX.ND = 1.