Change ada_decode to preserve upper-case in some situations

This patch is needed to avoid regressions later in the series.

The issue here is that ada_decode, when called with wide=false, would
act as though the input needed verbatim quoting.  That would happen
because the 'W' character would be passed through; and then a later
loop would reject the result due to that character.

Similarly, with operators=false the upper-case-checking loop would be
skipped, but then some names that did need verbatim quoting would pass
through.

Furthermore I noticed that there isn't a need to distinguish between
the "wide" and "operators" cases -- all callers pass identical values
to both.

This patch cleans up the above, consolidating the parameters and
changing how upper-case detection is handled, so that both the
operator and wide cases pass-through without issue.  I've added new
unit tests for this.

Acked-By: Simon Marchi <simon.marchi@efficios.com>
This commit is contained in:
Tom Tromey
2025-02-22 14:07:57 -07:00
parent 27de7d7bc3
commit cfe3a766e6
4 changed files with 69 additions and 34 deletions

View File

@@ -1310,7 +1310,7 @@ convert_from_hex_encoded (std::string &out, const char *str, int n)
/* See ada-lang.h. */
std::string
ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
ada_decode (const char *encoded, bool wrap, bool translate)
{
int i;
int len0;
@@ -1405,7 +1405,7 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
while (i < len0)
{
/* Is this a symbol function? */
if (operators && at_start_name && encoded[i] == 'O')
if (at_start_name && encoded[i] == 'O')
{
int k;
@@ -1416,7 +1416,10 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
op_len - 1) == 0)
&& !c_isalnum (encoded[i + op_len]))
{
decoded.append (ada_opname_table[k].decoded);
if (translate)
decoded.append (ada_opname_table[k].decoded);
else
decoded.append (ada_opname_table[k].encoded);
at_start_name = 0;
i += op_len;
break;
@@ -1504,28 +1507,60 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
i++;
}
if (wide && i < len0 + 3 && encoded[i] == 'U' && c_isxdigit (encoded[i + 1]))
/* Handle wide characters while respecting the arguments to the
function: we may want to copy them verbatim, but in this case
we do not want to register that we've copied an upper-case
character. */
if (i < len0 + 3 && encoded[i] == 'U' && c_isxdigit (encoded[i + 1]))
{
if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2))
if (translate)
{
i += 3;
if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2))
{
i += 3;
continue;
}
}
else
{
decoded.push_back (encoded[i]);
++i;
continue;
}
}
else if (wide && i < len0 + 5 && encoded[i] == 'W' && c_isxdigit (encoded[i + 1]))
else if (i < len0 + 5 && encoded[i] == 'W'
&& c_isxdigit (encoded[i + 1]))
{
if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4))
if (translate)
{
i += 5;
if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4))
{
i += 5;
continue;
}
}
else
{
decoded.push_back (encoded[i]);
++i;
continue;
}
}
else if (wide && i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W'
else if (i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W'
&& c_isxdigit (encoded[i + 2]))
{
if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8))
if (translate)
{
i += 10;
if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8))
{
i += 10;
continue;
}
}
else
{
decoded.push_back (encoded[i]);
++i;
continue;
}
}
@@ -1552,6 +1587,12 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
at_start_name = 1;
i += 2;
}
else if (isupper (encoded[i]) || encoded[i] == ' ')
{
/* Decoded names should never contain any uppercase
character. */
goto Suppress;
}
else
{
/* It's a character part of the decoded name, so just copy it
@@ -1561,16 +1602,6 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
}
}
/* Decoded names should never contain any uppercase character.
Double-check this, and abort the decoding if we find one. */
if (operators)
{
for (i = 0; i < decoded.length(); ++i)
if (c_isupper (decoded[i]) || decoded[i] == ' ')
goto Suppress;
}
/* If the compiler added a suffix, append it now. */
if (suffix >= 0)
decoded = decoded + "[" + &encoded[suffix] + "]";
@@ -1596,6 +1627,13 @@ ada_decode_tests ()
/* This isn't valid, but used to cause a crash. PR gdb/30639. The
result does not really matter very much. */
SELF_CHECK (ada_decode ("44") == "44");
/* Check that the settings used by the DWARF reader have the desired
effect. */
SELF_CHECK (ada_decode ("symada__cS", false, false) == "");
SELF_CHECK (ada_decode ("pkg__Oxor", false, false) == "pkg.Oxor");
SELF_CHECK (ada_decode ("pack__func_W017b", false, false)
== "pack.func_W017b");
}
#endif
@@ -13313,7 +13351,7 @@ ada_lookup_name_info::ada_lookup_name_info (const lookup_name_info &lookup_name)
else
m_standard_p = false;
m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false, false);
m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false);
/* If the name contains a ".", then the user is entering a fully
qualified entity name, and the match must not be done in wild

View File

@@ -218,16 +218,13 @@ extern const char *ada_decode_symbol (const struct general_symbol_info *);
simply wrapped in <...>. If WRAP is false, then the empty string
will be returned.
When OPERATORS is false, operator names will not be decoded. By
default, they are decoded, e.g., 'Oadd' will be transformed to
'"+"'.
When WIDE is false, wide characters will be left as-is. By
default, they converted from their hex encoding to the host
charset. */
TRANSLATE has two effects. When true (the default), operator names
and wide characters will be decoded. E.g., 'Oadd' will be
transformed to '"+"', and wide characters converted from their hex
encoding to the host charset. When false, these will be left
alone. */
extern std::string ada_decode (const char *name, bool wrap = true,
bool operators = true,
bool wide = true);
bool translate = true);
extern std::vector<struct block_symbol> ada_lookup_symbol_list
(const char *, const struct block *, domain_search_flags);

View File

@@ -108,7 +108,7 @@ cooked_index_shard::handle_gnat_encoded_entry
characters are left as-is. This is done to make name matching a
bit simpler; and for wide characters, it means the choice of Ada
source charset does not affect the indexer directly. */
std::string canonical = ada_decode (entry->name, false, false, false);
std::string canonical = ada_decode (entry->name, false, false);
if (canonical.empty ())
{
entry->canonical = entry->name;

View File

@@ -145,7 +145,7 @@ private:
std::string m_encoded_name;
/* The decoded lookup name. This is formed by calling ada_decode
with both 'operators' and 'wide' set to false. */
with 'translate' set to false. */
std::string m_decoded_name;
/* Whether the user-provided lookup name was Ada encoded. If so,