gas: consolidate whitespace recognition

Let's extend lex_type[] to also cover whitespace, then having a simple
macro to uniformly recognize both blanks and tabs (and \r when it's not
EOL) as such.

In macro.c use sb_skip_white() as appropriate, instead of open-coding
it.
This commit is contained in:
Jan Beulich
2025-02-03 11:48:55 +01:00
parent a450dd002f
commit e8efdd32b5
7 changed files with 43 additions and 51 deletions

View File

@@ -250,7 +250,7 @@ get_mri_string (int terminator, int *len)
&& ! is_end_of_line[(unsigned char) *input_line_pointer]) && ! is_end_of_line[(unsigned char) *input_line_pointer])
++input_line_pointer; ++input_line_pointer;
s = input_line_pointer; s = input_line_pointer;
while (s > ret && (s[-1] == ' ' || s[-1] == '\t')) while (s > ret && is_whitespace (s[-1]))
--s; --s;
} }

View File

@@ -1432,7 +1432,7 @@ operand (expressionS *expressionP, enum expr_mode mode)
created. Doing it here saves lines of code. */ created. Doing it here saves lines of code. */
clean_up_expression (expressionP); clean_up_expression (expressionP);
SKIP_ALL_WHITESPACE (); /* -> 1st char after operand. */ SKIP_ALL_WHITESPACE (); /* -> 1st char after operand. */
know (*input_line_pointer != ' '); know (!is_whitespace (*input_line_pointer));
/* The PA port needs this information. */ /* The PA port needs this information. */
if (expressionP->X_add_symbol) if (expressionP->X_add_symbol)
@@ -1854,7 +1854,7 @@ expr (int rankarg, /* Larger # is higher rank. */
retval = operand (resultP, mode); retval = operand (resultP, mode);
/* operand () gobbles spaces. */ /* operand () gobbles spaces. */
know (*input_line_pointer != ' '); know (!is_whitespace (*input_line_pointer));
op_left = operatorf (&op_chars); op_left = operatorf (&op_chars);
while (op_left != O_illegal && op_rank[(int) op_left] > rank) while (op_left != O_illegal && op_rank[(int) op_left] > rank)
@@ -1876,7 +1876,7 @@ expr (int rankarg, /* Larger # is higher rank. */
right.X_op_symbol = NULL; right.X_op_symbol = NULL;
} }
know (*input_line_pointer != ' '); know (!is_whitespace (*input_line_pointer));
if (op_left == O_index) if (op_left == O_index)
{ {

View File

@@ -1152,7 +1152,7 @@ debugging_pseudo (list_info_type *list ATTRIBUTE_UNUSED, const char *line)
in_debug = false; in_debug = false;
#endif #endif
while (ISSPACE (*line)) while (is_whitespace (*line))
line++; line++;
if (*line != '.') if (*line != '.')

View File

@@ -29,10 +29,8 @@
/* The routines in this file handle macro definition and expansion. /* The routines in this file handle macro definition and expansion.
They are called by gas. */ They are called by gas. */
#define ISWHITE(x) ((x) == ' ' || (x) == '\t')
#define ISSEP(x) \ #define ISSEP(x) \
((x) == ' ' || (x) == '\t' || (x) == ',' || (x) == '"' || (x) == ';' \ (is_whitespace (x) || (x) == ',' || (x) == '"' || (x) == ';' \
|| (x) == ')' || (x) == '(' \ || (x) == ')' || (x) == '(' \
|| ((flag_macro_alternate || flag_mri) && ((x) == '<' || (x) == '>'))) || ((flag_macro_alternate || flag_mri) && ((x) == '<' || (x) == '>')))
@@ -139,8 +137,7 @@ buffer_and_nest (const char *from, const char *to, sb *ptr,
if (! LABELS_WITHOUT_COLONS) if (! LABELS_WITHOUT_COLONS)
{ {
/* Skip leading whitespace. */ /* Skip leading whitespace. */
while (i < ptr->len && ISWHITE (ptr->ptr[i])) i = sb_skip_white (i, ptr);
i++;
} }
for (;;) for (;;)
@@ -154,8 +151,7 @@ buffer_and_nest (const char *from, const char *to, sb *ptr,
if (i < ptr->len && is_name_ender (ptr->ptr[i])) if (i < ptr->len && is_name_ender (ptr->ptr[i]))
i++; i++;
/* Skip whitespace. */ /* Skip whitespace. */
while (i < ptr->len && ISWHITE (ptr->ptr[i])) i = sb_skip_white (i, ptr);
i++;
/* Check for the colon. */ /* Check for the colon. */
if (i >= ptr->len || ptr->ptr[i] != ':') if (i >= ptr->len || ptr->ptr[i] != ':')
{ {
@@ -174,8 +170,7 @@ buffer_and_nest (const char *from, const char *to, sb *ptr,
} }
/* Skip trailing whitespace. */ /* Skip trailing whitespace. */
while (i < ptr->len && ISWHITE (ptr->ptr[i])) i = sb_skip_white (i, ptr);
i++;
if (i < ptr->len && (ptr->ptr[i] == '.' if (i < ptr->len && (ptr->ptr[i] == '.'
|| NO_PSEUDO_DOT || NO_PSEUDO_DOT
@@ -424,9 +419,7 @@ get_any_string (size_t idx, sb *in, sb *out)
*in_br = '\0'; *in_br = '\0';
while (idx < in->len while (idx < in->len
&& (*in_br && (*in_br || !is_whitespace (in->ptr[idx]))
|| (in->ptr[idx] != ' '
&& in->ptr[idx] != '\t'))
&& in->ptr[idx] != ',' && in->ptr[idx] != ','
&& (in->ptr[idx] != '<' && (in->ptr[idx] != '<'
|| (! flag_macro_alternate && ! flag_mri))) || (! flag_macro_alternate && ! flag_mri)))
@@ -916,7 +909,7 @@ macro_expand_body (sb *in, sb *out, formal_entry *formals,
if (! macro if (! macro
|| src + 5 >= in->len || src + 5 >= in->len
|| strncasecmp (in->ptr + src, "LOCAL", 5) != 0 || strncasecmp (in->ptr + src, "LOCAL", 5) != 0
|| ! ISWHITE (in->ptr[src + 5]) || ! is_whitespace (in->ptr[src + 5])
/* PR 11507: Skip keyword LOCAL if it is found inside a quoted string. */ /* PR 11507: Skip keyword LOCAL if it is found inside a quoted string. */
|| inquote) || inquote)
{ {
@@ -1069,9 +1062,7 @@ macro_expand (size_t idx, sb *in, macro_entry *m, sb *out)
/* The Microtec assembler ignores this if followed by a white space. /* The Microtec assembler ignores this if followed by a white space.
(Macro invocation with empty extension) */ (Macro invocation with empty extension) */
idx++; idx++;
if ( idx < in->len if (idx < in->len && !is_whitespace (in->ptr[idx]))
&& in->ptr[idx] != ' '
&& in->ptr[idx] != '\t')
{ {
formal_entry *n = new_formal (); formal_entry *n = new_formal ();
@@ -1192,7 +1183,7 @@ macro_expand (size_t idx, sb *in, macro_entry *m, sb *out)
{ {
if (idx < in->len && in->ptr[idx] == ',') if (idx < in->len && in->ptr[idx] == ',')
++idx; ++idx;
if (idx < in->len && ISWHITE (in->ptr[idx])) if (idx < in->len && is_whitespace (in->ptr[idx]))
break; break;
} }
} }

View File

@@ -76,6 +76,12 @@ bool input_from_string = false;
die horribly; die horribly;
#endif #endif
#ifndef CR_EOL
#define LEX_CR LEX_WHITE
#else
#define LEX_CR 0
#endif
#ifndef LEX_AT #ifndef LEX_AT
#define LEX_AT 0 #define LEX_AT 0
#endif #endif
@@ -112,9 +118,9 @@ die horribly;
/* Used by is_... macros. our ctype[]. */ /* Used by is_... macros. our ctype[]. */
char lex_type[256] = { char lex_type[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @ABCDEFGHIJKLMNO */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, LEX_CR, 0, 0, /* @ABCDEFGHIJKLMNO */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* PQRSTUVWXYZ[\]^_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* PQRSTUVWXYZ[\]^_ */
0, 0, 0, LEX_HASH, LEX_DOLLAR, LEX_PCT, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, /* _!"#$%&'()*+,-./ */ 8, 0, 0, LEX_HASH, LEX_DOLLAR, LEX_PCT, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, /* _!"#$%&'()*+,-./ */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, LEX_QM, /* 0123456789:;<=>? */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, LEX_QM, /* 0123456789:;<=>? */
LEX_AT, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* @ABCDEFGHIJKLMNO */ LEX_AT, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* @ABCDEFGHIJKLMNO */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, LEX_BR, 0, LEX_BR, 0, 3, /* PQRSTUVWXYZ[\]^_ */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, LEX_BR, 0, LEX_BR, 0, 3, /* PQRSTUVWXYZ[\]^_ */
@@ -1068,11 +1074,11 @@ read_a_source_file (const char *name)
if (*rest == ':') if (*rest == ':')
++rest; ++rest;
if (*rest == ' ' || *rest == '\t') if (is_whitespace (*rest))
++rest; ++rest;
if ((strncasecmp (rest, "EQU", 3) == 0 if ((strncasecmp (rest, "EQU", 3) == 0
|| strncasecmp (rest, "SET", 3) == 0) || strncasecmp (rest, "SET", 3) == 0)
&& (rest[3] == ' ' || rest[3] == '\t')) && is_whitespace (rest[3]))
{ {
input_line_pointer = rest + 3; input_line_pointer = rest + 3;
equals (line_start, equals (line_start,
@@ -1080,8 +1086,7 @@ read_a_source_file (const char *name)
continue; continue;
} }
if (strncasecmp (rest, "MACRO", 5) == 0 if (strncasecmp (rest, "MACRO", 5) == 0
&& (rest[5] == ' ' && (is_whitespace (rest[5])
|| rest[5] == '\t'
|| is_end_of_line[(unsigned char) rest[5]])) || is_end_of_line[(unsigned char) rest[5]]))
mri_line_macro = 1; mri_line_macro = 1;
} }
@@ -1117,7 +1122,7 @@ read_a_source_file (const char *name)
level. */ level. */
do do
nul_char = next_char = *input_line_pointer++; nul_char = next_char = *input_line_pointer++;
while (next_char == '\t' || next_char == ' ' || next_char == '\f'); while (is_whitespace (next_char) || next_char == '\f');
/* C is the 1st significant character. /* C is the 1st significant character.
Input_line_pointer points after that character. */ Input_line_pointer points after that character. */
@@ -1146,12 +1151,12 @@ read_a_source_file (const char *name)
if (*rest == ':') if (*rest == ':')
++rest; ++rest;
if (*rest == ' ' || *rest == '\t') if (is_whitespace (*rest))
++rest; ++rest;
if ((strncasecmp (rest, "EQU", 3) == 0 if ((strncasecmp (rest, "EQU", 3) == 0
|| strncasecmp (rest, "SET", 3) == 0) || strncasecmp (rest, "SET", 3) == 0)
&& (rest[3] == ' ' || rest[3] == '\t')) && is_whitespace (rest[3]))
{ {
input_line_pointer = rest + 3; input_line_pointer = rest + 3;
equals (s, 1); equals (s, 1);
@@ -1169,7 +1174,7 @@ read_a_source_file (const char *name)
SKIP_WHITESPACE (); SKIP_WHITESPACE ();
} }
else if ((next_char == '=' && *rest == '=') else if ((next_char == '=' && *rest == '=')
|| ((next_char == ' ' || next_char == '\t') || (is_whitespace (next_char)
&& rest[0] == '=' && rest[0] == '='
&& rest[1] == '=')) && rest[1] == '='))
{ {
@@ -1177,7 +1182,7 @@ read_a_source_file (const char *name)
demand_empty_rest_of_line (); demand_empty_rest_of_line ();
} }
else if ((next_char == '=' else if ((next_char == '='
|| ((next_char == ' ' || next_char == '\t') || (is_whitespace (next_char)
&& *rest == '=')) && *rest == '='))
#ifdef TC_EQUAL_IN_INSN #ifdef TC_EQUAL_IN_INSN
&& !TC_EQUAL_IN_INSN (next_char, s) && !TC_EQUAL_IN_INSN (next_char, s)
@@ -1284,7 +1289,7 @@ read_a_source_file (const char *name)
/* The following skip of whitespace is compulsory. /* The following skip of whitespace is compulsory.
A well shaped space is sometimes all that separates A well shaped space is sometimes all that separates
keyword from operands. */ keyword from operands. */
if (next_char == ' ' || next_char == '\t') if (is_whitespace (next_char))
input_line_pointer++; input_line_pointer++;
/* Input_line is restored. /* Input_line is restored.
@@ -1497,7 +1502,7 @@ mri_comment_field (char *stopcp)
know (flag_m68k_mri); know (flag_m68k_mri);
for (s = input_line_pointer; for (s = input_line_pointer;
((!is_end_of_line[(unsigned char) *s] && *s != ' ' && *s != '\t') ((!is_end_of_line[(unsigned char) *s] && !is_whitespace (*s))
|| inquote); || inquote);
s++) s++)
{ {
@@ -6321,7 +6326,7 @@ equals (char *sym_name, int reassign)
if (reassign < 0 && *input_line_pointer == '=') if (reassign < 0 && *input_line_pointer == '=')
input_line_pointer++; input_line_pointer++;
while (*input_line_pointer == ' ' || *input_line_pointer == '\t') while (is_whitespace (*input_line_pointer))
input_line_pointer++; input_line_pointer++;
if (flag_mri) if (flag_mri)
@@ -6495,8 +6500,7 @@ s_include (int arg ATTRIBUTE_UNUSED)
SKIP_WHITESPACE (); SKIP_WHITESPACE ();
i = 0; i = 0;
while (!is_end_of_line[(unsigned char) *input_line_pointer] while (!is_end_of_line[(unsigned char) *input_line_pointer]
&& *input_line_pointer != ' ' && !is_whitespace (*input_line_pointer))
&& *input_line_pointer != '\t')
{ {
obstack_1grow (&notes, *input_line_pointer); obstack_1grow (&notes, *input_line_pointer);
++input_line_pointer; ++input_line_pointer;

View File

@@ -29,17 +29,18 @@ extern bool input_from_string;
#ifdef PERMIT_WHITESPACE #ifdef PERMIT_WHITESPACE
#define SKIP_WHITESPACE() \ #define SKIP_WHITESPACE() \
((*input_line_pointer == ' ') ? ++input_line_pointer : 0) (is_whitespace (*input_line_pointer) ? ++input_line_pointer : 0)
#define SKIP_ALL_WHITESPACE() \ #define SKIP_ALL_WHITESPACE() \
while (*input_line_pointer == ' ') ++input_line_pointer while (is_whitespace (*input_line_pointer)) ++input_line_pointer
#else #else
#define SKIP_WHITESPACE() know (*input_line_pointer != ' ' ) #define SKIP_WHITESPACE() know (!is_whitespace (*input_line_pointer))
#define SKIP_ALL_WHITESPACE() SKIP_WHITESPACE() #define SKIP_ALL_WHITESPACE() SKIP_WHITESPACE()
#endif #endif
#define LEX_NAME (1) /* may continue a name */ #define LEX_NAME (1) /* may continue a name */
#define LEX_BEGIN_NAME (2) /* may begin a name */ #define LEX_BEGIN_NAME (2) /* may begin a name */
#define LEX_END_NAME (4) /* ends a name */ #define LEX_END_NAME (4) /* ends a name */
#define LEX_WHITE (8) /* whitespace */
#define is_name_beginner(c) \ #define is_name_beginner(c) \
( lex_type[(unsigned char) (c)] & LEX_BEGIN_NAME ) ( lex_type[(unsigned char) (c)] & LEX_BEGIN_NAME )
@@ -47,6 +48,8 @@ extern bool input_from_string;
( lex_type[(unsigned char) (c)] & LEX_NAME ) ( lex_type[(unsigned char) (c)] & LEX_NAME )
#define is_name_ender(c) \ #define is_name_ender(c) \
( lex_type[(unsigned char) (c)] & LEX_END_NAME ) ( lex_type[(unsigned char) (c)] & LEX_END_NAME )
#define is_whitespace(c) \
( lex_type[(unsigned char) (c)] & LEX_WHITE )
/* The distinction of "line" and "statement" sadly is blurred by unhelpful /* The distinction of "line" and "statement" sadly is blurred by unhelpful
naming of e.g. the underlying array. Most users really mean "end of naming of e.g. the underlying array. Most users really mean "end of

View File

@@ -215,9 +215,7 @@ sb_terminate (sb *in)
size_t size_t
sb_skip_white (size_t idx, sb *ptr) sb_skip_white (size_t idx, sb *ptr)
{ {
while (idx < ptr->len while (idx < ptr->len && is_whitespace (ptr->ptr[idx]))
&& (ptr->ptr[idx] == ' '
|| ptr->ptr[idx] == '\t'))
idx++; idx++;
return idx; return idx;
} }
@@ -229,18 +227,14 @@ sb_skip_white (size_t idx, sb *ptr)
size_t size_t
sb_skip_comma (size_t idx, sb *ptr) sb_skip_comma (size_t idx, sb *ptr)
{ {
while (idx < ptr->len while (idx < ptr->len && is_whitespace (ptr->ptr[idx]))
&& (ptr->ptr[idx] == ' '
|| ptr->ptr[idx] == '\t'))
idx++; idx++;
if (idx < ptr->len if (idx < ptr->len
&& ptr->ptr[idx] == ',') && ptr->ptr[idx] == ',')
idx++; idx++;
while (idx < ptr->len while (idx < ptr->len && is_whitespace (ptr->ptr[idx]))
&& (ptr->ptr[idx] == ' '
|| ptr->ptr[idx] == '\t'))
idx++; idx++;
return idx; return idx;