mirror of
https://github.com/bminor/binutils-gdb.git
synced 2025-12-05 15:15:42 +00:00
This patch rewrites the .gdb_index reader to create the same data structures that are created by the cooked indexer and the .debug_names reader. This is done in support of this series; but also because, from what I can tell, the "templates.exp" change didn't really work properly with this reader. In addition to fixing that problem, this patch removes a lot of code. Implementing this required a couple of hacks, as .gdb_index does not contain all the information that's used by the cooked index implementation. * The index-searching code likes to differentiate between the various DWARF tags when matching, but .gdb_index lumps many things into a single "other" category. To handle this, we introduce a phony tag that's used so that the match method can match on multiple domains. * Similarly, .gdb_index doesn't distinguish between the type and struct domains, so another phony tag is used for this. * The reader must attempt to guess the language of various symbols. This is somewhat finicky. "Plain" (unqualified) symbols are marked as language_unknown and then a couple of hacks are used to handle these -- one in expand_symtabs_matching and another when recognizing "main". For what it's worth, I consider .gdb_index to be near the end of its life. While .debug_names is not perfect -- we found a number of bugs in the standard while implementing it -- it is better than .gdb_index and also better documented. After this patch, we could conceivably remove dwarf_scanner_base. However, I have not done this. Finally, this patch also changes this reader to dump the content of the index, as the other DWARF readers do. This can be handy when debugging gdb. Acked-By: Simon Marchi <simon.marchi@efficios.com> Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=33316
346 lines
11 KiB
C
346 lines
11 KiB
C
/* Shards for the cooked index
|
|
|
|
Copyright (C) 2022-2025 Free Software Foundation, Inc.
|
|
|
|
This file is part of GDB.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
#include "dwarf2/cooked-index-shard.h"
|
|
#include "dwarf2/tag.h"
|
|
#include "dwarf2/index-common.h"
|
|
#include "cp-support.h"
|
|
#include "c-lang.h"
|
|
#include "ada-lang.h"
|
|
|
|
/* Return true if a plain "main" could be the main program for this
|
|
language. Languages that are known to use some other mechanism are
|
|
excluded here. */
|
|
|
|
static bool
|
|
language_may_use_plain_main (enum language lang)
|
|
{
|
|
/* No need to handle "unknown" here. */
|
|
return (lang == language_c
|
|
|| lang == language_objc
|
|
|| lang == language_cplus
|
|
|| lang == language_m2
|
|
|| lang == language_asm
|
|
|| lang == language_opencl
|
|
|| lang == language_minimal);
|
|
}
|
|
|
|
/* See cooked-index-shard.h. */
|
|
|
|
cooked_index_entry *
|
|
cooked_index_shard::create (sect_offset die_offset,
|
|
enum dwarf_tag tag,
|
|
cooked_index_flag flags,
|
|
enum language lang,
|
|
const char *name,
|
|
cooked_index_entry_ref parent_entry,
|
|
dwarf2_per_cu *per_cu)
|
|
{
|
|
if (tag == DW_TAG_module || tag == DW_TAG_namespace)
|
|
flags &= ~IS_STATIC;
|
|
else if (lang == language_cplus
|
|
&& (tag == DW_TAG_class_type
|
|
|| tag == DW_TAG_interface_type
|
|
|| tag == DW_TAG_structure_type
|
|
|| tag == DW_TAG_union_type
|
|
|| tag == DW_TAG_enumeration_type
|
|
|| tag == DW_TAG_enumerator))
|
|
flags &= ~IS_STATIC;
|
|
else if (tag_is_type (tag))
|
|
flags |= IS_STATIC;
|
|
|
|
return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
|
|
lang, name, parent_entry,
|
|
per_cu);
|
|
}
|
|
|
|
/* See cooked-index-shard.h. */
|
|
|
|
cooked_index_entry *
|
|
cooked_index_shard::add (sect_offset die_offset, enum dwarf_tag tag,
|
|
cooked_index_flag flags, enum language lang,
|
|
const char *name, cooked_index_entry_ref parent_entry,
|
|
dwarf2_per_cu *per_cu)
|
|
{
|
|
cooked_index_entry *result = create (die_offset, tag, flags, lang, name,
|
|
parent_entry, per_cu);
|
|
m_entries.push_back (result);
|
|
|
|
/* An explicitly-tagged main program should always override the
|
|
implicit "main" discovery. */
|
|
if ((flags & IS_MAIN) != 0)
|
|
m_main = result;
|
|
/* The language check here is subtle: it exists solely to work
|
|
around a bug in .gdb_index. That index does not record
|
|
languages, but it might emit an entry for "main". However,
|
|
recognizing this "main" as being the main program would be wrong
|
|
-- for example, an Ada program has a C "main" but this is not the
|
|
desired target of the "start" command. Requiring the language to
|
|
be set here avoids over-eagerly setting the "main" when using
|
|
.gdb_index. Should .gdb_index ever be removed (PR symtab/31363),
|
|
the language_unknown check here could also be removed.
|
|
|
|
Note that this explicit check isn't truly needed (it is covered
|
|
by language_may_use_plain_main as well), but it's handy as a spot
|
|
to document. */
|
|
else if (lang != language_unknown
|
|
&& (flags & IS_PARENT_DEFERRED) == 0
|
|
&& parent_entry.resolved == nullptr
|
|
&& m_main == nullptr
|
|
&& language_may_use_plain_main (lang)
|
|
&& strcmp (name, "main") == 0)
|
|
m_main = result;
|
|
|
|
return result;
|
|
}
|
|
|
|
/* See cooked-index-shard.h. */
|
|
|
|
void
|
|
cooked_index_shard::handle_gnat_encoded_entry
|
|
(cooked_index_entry *entry,
|
|
htab_t gnat_entries,
|
|
std::vector<cooked_index_entry *> &new_entries)
|
|
{
|
|
/* We decode Ada names in a particular way: operators and wide
|
|
characters are left as-is. This is done to make name matching a
|
|
bit simpler; and for wide characters, it means the choice of Ada
|
|
source charset does not affect the indexer directly. */
|
|
std::string canonical = ada_decode (entry->name, false, false);
|
|
if (canonical.empty ())
|
|
{
|
|
entry->canonical = entry->name;
|
|
return;
|
|
}
|
|
std::vector<std::string_view> names = split_name (canonical.c_str (),
|
|
split_style::DOT_STYLE);
|
|
std::string_view tail = names.back ();
|
|
names.pop_back ();
|
|
|
|
const cooked_index_entry *parent = nullptr;
|
|
for (const auto &name : names)
|
|
{
|
|
uint32_t hashval = dwarf5_djb_hash (name);
|
|
void **slot = htab_find_slot_with_hash (gnat_entries, &name,
|
|
hashval, INSERT);
|
|
/* CUs are processed in order, so we only need to check the most
|
|
recent entry. */
|
|
cooked_index_entry *last = (cooked_index_entry *) *slot;
|
|
if (last == nullptr || last->per_cu != entry->per_cu)
|
|
{
|
|
const char *new_name = m_names.insert (name);
|
|
last = create (entry->die_offset, DW_TAG_module,
|
|
IS_SYNTHESIZED, language_ada, new_name, parent,
|
|
entry->per_cu);
|
|
last->canonical = last->name;
|
|
new_entries.push_back (last);
|
|
*slot = last;
|
|
}
|
|
|
|
parent = last;
|
|
}
|
|
|
|
entry->set_parent (parent);
|
|
entry->canonical = m_names.insert (tail);
|
|
}
|
|
|
|
/* Hash a cooked index entry by name pointer value.
|
|
|
|
We can use pointer equality here because names come from .debug_str, which
|
|
will normally be unique-ified by the linker. Also, duplicates are relatively
|
|
harmless -- they just mean a bit of extra memory is used. */
|
|
|
|
struct cooked_index_entry_name_ptr_hash
|
|
{
|
|
using is_avalanching = void;
|
|
|
|
std::uint64_t operator () (const cooked_index_entry *entry) const noexcept
|
|
{
|
|
return ankerl::unordered_dense::hash<const char *> () (entry->name);
|
|
}
|
|
};
|
|
|
|
/* Compare cooked index entries by name pointer value. */
|
|
|
|
struct cooked_index_entry_name_ptr_eq
|
|
{
|
|
bool operator () (const cooked_index_entry *a,
|
|
const cooked_index_entry *b) const noexcept
|
|
{
|
|
return a->name == b->name;
|
|
}
|
|
};
|
|
|
|
/* See cooked-index-shard.h. */
|
|
|
|
void
|
|
cooked_index_shard::finalize (const parent_map_map *parent_maps)
|
|
{
|
|
gdb::unordered_set<const cooked_index_entry *,
|
|
cooked_index_entry_name_ptr_hash,
|
|
cooked_index_entry_name_ptr_eq> seen_names;
|
|
|
|
auto hash_entry = [] (const void *e)
|
|
{
|
|
const cooked_index_entry *entry = (const cooked_index_entry *) e;
|
|
return dwarf5_djb_hash (entry->canonical);
|
|
};
|
|
|
|
auto eq_entry = [] (const void *a, const void *b) -> int
|
|
{
|
|
const cooked_index_entry *ae = (const cooked_index_entry *) a;
|
|
const std::string_view *sv = (const std::string_view *) b;
|
|
return (strlen (ae->canonical) == sv->length ()
|
|
&& strncasecmp (ae->canonical, sv->data (), sv->length ()) == 0);
|
|
};
|
|
|
|
htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
|
|
nullptr, xcalloc, xfree));
|
|
std::vector<cooked_index_entry *> new_gnat_entries;
|
|
|
|
for (cooked_index_entry *entry : m_entries)
|
|
{
|
|
if ((entry->flags & IS_PARENT_DEFERRED) != 0)
|
|
{
|
|
const cooked_index_entry *new_parent
|
|
= parent_maps->find (entry->get_deferred_parent ());
|
|
entry->resolve_parent (new_parent);
|
|
}
|
|
|
|
/* Note that this code must be kept in sync with
|
|
language_requires_canonicalization. */
|
|
gdb_assert (entry->canonical == nullptr);
|
|
if ((entry->flags & IS_LINKAGE) != 0)
|
|
entry->canonical = entry->name;
|
|
else if (entry->lang == language_ada)
|
|
{
|
|
/* Newer versions of GNAT emit DW_TAG_module and use a
|
|
hierarchical structure. In this case, we don't need to
|
|
do any extra work. This can be detected by looking for a
|
|
GNAT-encoded name. */
|
|
if (strstr (entry->name, "__") == nullptr)
|
|
{
|
|
entry->canonical = entry->name;
|
|
|
|
/* If the entry does not have a parent, then there's
|
|
nothing extra to do here -- the entry itself is
|
|
sufficient.
|
|
|
|
However, if it does have a parent, we have to
|
|
synthesize an entry with the full name. This is
|
|
unfortunate, but it's necessary due to how some of
|
|
the Ada name-lookup code currently works. For
|
|
example, without this, ada_get_tsd_type will
|
|
fail.
|
|
|
|
Eventually it would be good to change the Ada lookup
|
|
code, and then remove these entries (and supporting
|
|
code in cooked_index_entry::full_name). */
|
|
if (entry->get_parent () != nullptr)
|
|
{
|
|
const char *fullname
|
|
= entry->full_name (&m_storage, FOR_ADA_LINKAGE_NAME);
|
|
cooked_index_entry *linkage = create (entry->die_offset,
|
|
entry->tag,
|
|
(entry->flags
|
|
| IS_LINKAGE
|
|
| IS_SYNTHESIZED),
|
|
language_ada,
|
|
fullname,
|
|
nullptr,
|
|
entry->per_cu);
|
|
linkage->canonical = fullname;
|
|
new_gnat_entries.push_back (linkage);
|
|
}
|
|
}
|
|
else
|
|
handle_gnat_encoded_entry (entry, gnat_entries.get (),
|
|
new_gnat_entries);
|
|
}
|
|
else if (entry->lang == language_cplus || entry->lang == language_c)
|
|
{
|
|
auto [it, inserted] = seen_names.insert (entry);
|
|
|
|
if (inserted)
|
|
{
|
|
/* No entry with that name was present, compute the canonical
|
|
name. */
|
|
gdb::unique_xmalloc_ptr<char> canon_name
|
|
= (entry->lang == language_cplus
|
|
? cp_canonicalize_string (entry->name)
|
|
: c_canonicalize_name (entry->name));
|
|
if (canon_name == nullptr)
|
|
entry->canonical = entry->name;
|
|
else
|
|
entry->canonical = m_names.insert (std::move (canon_name));
|
|
}
|
|
else
|
|
{
|
|
/* An entry with that name was present, re-use its canonical
|
|
name. */
|
|
entry->canonical = (*it)->canonical;
|
|
}
|
|
}
|
|
else
|
|
entry->canonical = entry->name;
|
|
}
|
|
|
|
/* Make sure any new Ada entries end up in the results. This isn't
|
|
done when creating these new entries to avoid invalidating the
|
|
m_entries iterator used in the foreach above. */
|
|
m_entries.insert (m_entries.end (), new_gnat_entries.begin (),
|
|
new_gnat_entries.end ());
|
|
|
|
m_entries.shrink_to_fit ();
|
|
std::sort (m_entries.begin (), m_entries.end (),
|
|
[] (const cooked_index_entry *a, const cooked_index_entry *b)
|
|
{
|
|
return *a < *b;
|
|
});
|
|
}
|
|
|
|
/* See cooked-index-shard.h. */
|
|
|
|
cooked_index_shard::range
|
|
cooked_index_shard::find (const std::string &name, bool completing) const
|
|
{
|
|
struct comparator
|
|
{
|
|
cooked_index_entry::comparison_mode mode;
|
|
|
|
bool operator() (const cooked_index_entry *entry,
|
|
const char *name) const noexcept
|
|
{
|
|
return cooked_index_entry::compare (entry->canonical, name, mode) < 0;
|
|
}
|
|
|
|
bool operator() (const char *name,
|
|
const cooked_index_entry *entry) const noexcept
|
|
{
|
|
return cooked_index_entry::compare (entry->canonical, name, mode) > 0;
|
|
}
|
|
};
|
|
|
|
return std::make_from_tuple<range>
|
|
(std::equal_range (m_entries.cbegin (), m_entries.cend (), name.c_str (),
|
|
comparator { (completing
|
|
? cooked_index_entry::COMPLETE
|
|
: cooked_index_entry::MATCH) }));
|
|
}
|