Handle unaligned mapping of .gdb_index

The .gdb_index was designed such that all data would be aligned.
Unfortunately, we neglected to require this alignment in the objcopy
instructions in the manual.  As a result, in many cases, a .gdb_index
in the wild will not be properly aligned by mmap.  This yields
undefined behavior, which is PR gdb/23743.

This patch fixes the bug by always assuming that the mapping is
unaligned, and using extract_unsigned_integer when needed.  A new
helper class is introduced to make this less painful.

gdb/ChangeLog
2021-04-17  Tom Tromey  <tom@tromey.com>

	PR gdb/23743:
	* dwarf2/read.c (class offset_view): New.
	(struct symbol_table_slot): Remove.
	(struct mapped_index) <symbol_table, constant_pool>: Change type.
	<symbol_name_index, symbol_vec_index>: New methods.
	<symbol_name_slot_invalid, symbol_name_at, symbol_name_count>:
	Rewrite.
	(read_gdb_index_from_buffer): Update.
	(struct dw2_symtab_iterator) <vec>: Change type.
	(dw2_symtab_iter_init_common, dw2_symtab_iter_init)
	(dw2_symtab_iter_next, dw2_expand_marked_cus): Update.
	* dwarf2/index-write.c (class data_buf) <append_data>: Remove.
	<append_array, append_offset>: New methods.
	(write_hash_table, add_address_entry, write_gdbindex_1)
	(write_debug_names): Update.
	* dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove.
This commit is contained in:
Tom Tromey
2021-04-17 13:56:36 -06:00
parent da314dd397
commit 42c2c69462
4 changed files with 132 additions and 81 deletions

View File

@@ -1,3 +1,22 @@
2021-04-17 Tom Tromey <tom@tromey.com>
PR gdb/23743:
* dwarf2/read.c (class offset_view): New.
(struct symbol_table_slot): Remove.
(struct mapped_index) <symbol_table, constant_pool>: Change type.
<symbol_name_index, symbol_vec_index>: New methods.
<symbol_name_slot_invalid, symbol_name_at, symbol_name_count>:
Rewrite.
(read_gdb_index_from_buffer): Update.
(struct dw2_symtab_iterator) <vec>: Change type.
(dw2_symtab_iter_init_common, dw2_symtab_iter_init)
(dw2_symtab_iter_next, dw2_expand_marked_cus): Update.
* dwarf2/index-write.c (class data_buf) <append_data>: Remove.
<append_array, append_offset>: New methods.
(write_hash_table, add_address_entry, write_gdbindex_1)
(write_debug_names): Update.
* dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove.
2021-04-17 Tom Tromey <tom@tromey.com>
* dwarf2/index-write.c (write_psymtabs_to_index): Check

View File

@@ -29,28 +29,15 @@
architecture-independent. */
typedef uint32_t offset_type;
#if WORDS_BIGENDIAN
/* Convert VALUE between big- and little-endian. */
/* Unpack a 32-bit little-endian value. */
static inline offset_type
byte_swap (offset_type value)
gdb_index_unpack (const gdb_byte *value)
{
offset_type result;
result = (value & 0xff) << 24;
result |= (value & 0xff00) << 8;
result |= (value & 0xff0000) >> 8;
result |= (value & 0xff000000) >> 24;
return result;
return (offset_type) extract_unsigned_integer (value, sizeof (offset_type),
BFD_ENDIAN_LITTLE);
}
#define MAYBE_SWAP(V) byte_swap (V)
#else
#define MAYBE_SWAP(V) static_cast<offset_type> (V)
#endif /* WORDS_BIGENDIAN */
/* The hash function for strings in the mapped index. This is the same as
SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the
implementation. This is necessary because the hash function is tied to the

View File

@@ -94,13 +94,10 @@ file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
class data_buf
{
public:
/* Copy DATA to the end of the buffer. */
template<typename T>
void append_data (const T &data)
/* Copy ARRAY to the end of the buffer. */
void append_array (gdb::array_view<const gdb_byte> array)
{
std::copy (reinterpret_cast<const gdb_byte *> (&data),
reinterpret_cast<const gdb_byte *> (&data + 1),
grow (sizeof (data)));
std::copy (array.begin (), array.end (), grow (array.size ()));
}
/* Copy CSTR (a zero-terminated string) to the end of buffer. The
@@ -120,7 +117,7 @@ public:
input >>= 7;
if (input)
output |= 0x80;
append_data (output);
m_vec.push_back (output);
if (input == 0)
break;
}
@@ -133,6 +130,12 @@ public:
::store_unsigned_integer (grow (len), len, byte_order, val);
}
/* Copy VALUE to the end of the buffer, little-endian. */
void append_offset (offset_type value)
{
append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
}
/* Return the size of the buffer. */
size_t size () const
{
@@ -371,9 +374,9 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
entry.index_offset = cpool.size ();
cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ()));
cpool.append_offset (entry.cu_indices.size ());
for (const auto index : entry.cu_indices)
cpool.append_data (MAYBE_SWAP (index));
cpool.append_offset (index);
}
}
@@ -399,8 +402,8 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
vec_off = 0;
}
output.append_data (MAYBE_SWAP (str_off));
output.append_data (MAYBE_SWAP (vec_off));
output.append_offset (str_off);
output.append_offset (vec_off);
}
}
@@ -434,7 +437,7 @@ add_address_entry (data_buf &addr_vec,
{
addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
addr_vec.append_data (MAYBE_SWAP (cu_index));
addr_vec.append_offset (cu_index);
}
/* Worker function for traversing an addrmap to build the address table. */
@@ -1374,26 +1377,26 @@ write_gdbindex_1 (FILE *out_file,
offset_type total_len = size_of_header;
/* The version number. */
contents.append_data (MAYBE_SWAP (8));
contents.append_offset (8);
/* The offset of the CU list from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len));
contents.append_offset (total_len);
total_len += cu_list.size ();
/* The offset of the types CU list from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len));
contents.append_offset (total_len);
total_len += types_cu_list.size ();
/* The offset of the address table from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len));
contents.append_offset (total_len);
total_len += addr_vec.size ();
/* The offset of the symbol table from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len));
contents.append_offset (total_len);
total_len += symtab_vec.size ();
/* The offset of the constant pool from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len));
contents.append_offset (total_len);
total_len += constant_pool.size ();
gdb_assert (contents.size () == size_of_header);
@@ -1611,7 +1614,7 @@ write_debug_names (dwarf2_per_objfile *per_objfile,
string. This value is rounded up to a multiple of 4. */
static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
header.append_data (dwarf5_gdb_augmentation);
header.append_array (dwarf5_gdb_augmentation);
gdb_assert (header.size () == bytes_of_header);

View File

@@ -223,17 +223,50 @@ protected:
~mapped_index_base() = default;
};
/* This is a view into the index that converts from bytes to an
offset_type, and allows indexing. Unaligned bytes are specifically
allowed here, and handled via unpacking. */
class offset_view
{
public:
offset_view () = default;
explicit offset_view (gdb::array_view<const gdb_byte> bytes)
: m_bytes (bytes)
{
}
/* Extract the INDEXth offset_type from the array. */
offset_type operator[] (size_t index) const
{
const gdb_byte *bytes = &m_bytes[index * sizeof (offset_type)];
return (offset_type) extract_unsigned_integer (bytes,
sizeof (offset_type),
BFD_ENDIAN_LITTLE);
}
/* Return the number of offset_types in this array. */
size_t size () const
{
return m_bytes.size () / sizeof (offset_type);
}
/* Return true if this view is empty. */
bool empty () const
{
return m_bytes.empty ();
}
private:
/* The underlying bytes. */
gdb::array_view<const gdb_byte> m_bytes;
};
/* A description of the mapped index. The file format is described in
a comment by the code that writes the index. */
struct mapped_index final : public mapped_index_base
{
/* A slot/bucket in the symbol table hash. */
struct symbol_table_slot
{
const offset_type name;
const offset_type vec;
};
/* Index data format version. */
int version = 0;
@@ -241,25 +274,42 @@ struct mapped_index final : public mapped_index_base
gdb::array_view<const gdb_byte> address_table;
/* The symbol table, implemented as a hash table. */
gdb::array_view<symbol_table_slot> symbol_table;
offset_view symbol_table;
/* A pointer to the constant pool. */
const char *constant_pool = nullptr;
gdb::array_view<const gdb_byte> constant_pool;
/* Return the index into the constant pool of the name of the IDXth
symbol in the symbol table. */
offset_type symbol_name_index (offset_type idx) const
{
return symbol_table[2 * idx];
}
/* Return the index into the constant pool of the CU vector of the
IDXth symbol in the symbol table. */
offset_type symbol_vec_index (offset_type idx) const
{
return symbol_table[2 * idx + 1];
}
bool symbol_name_slot_invalid (offset_type idx) const override
{
const auto &bucket = this->symbol_table[idx];
return bucket.name == 0 && bucket.vec == 0;
return (symbol_name_index (idx) == 0
&& symbol_vec_index (idx) == 0);
}
/* Convenience method to get at the name of the symbol at IDX in the
symbol table. */
const char *symbol_name_at
(offset_type idx, dwarf2_per_objfile *per_objfile) const override
{ return this->constant_pool + MAYBE_SWAP (this->symbol_table[idx].name); }
{
return (const char *) (this->constant_pool.data ()
+ symbol_name_index (idx));
}
size_t symbol_name_count () const override
{ return this->symbol_table.size (); }
{ return this->symbol_table.size () / 2; }
};
/* A description of the mapped .debug_names.
@@ -2950,9 +3000,10 @@ read_gdb_index_from_buffer (const char *filename,
offset_type *types_list_elements)
{
const gdb_byte *addr = &buffer[0];
offset_view metadata (buffer);
/* Version check. */
offset_type version = MAYBE_SWAP (*(offset_type *) addr);
offset_type version = metadata[0];
/* Versions earlier than 3 emitted every copy of a psymbol. This
causes the index to behave very poorly for certain requests. Version 3
contained incomplete addrmap. So, it seems better to just ignore such
@@ -3005,35 +3056,29 @@ to use the section anyway."),
map->version = version;
offset_type *metadata = (offset_type *) (addr + sizeof (offset_type));
int i = 0;
*cu_list = addr + MAYBE_SWAP (metadata[i]);
*cu_list_elements = ((MAYBE_SWAP (metadata[i + 1]) - MAYBE_SWAP (metadata[i]))
/ 8);
int i = 1;
*cu_list = addr + metadata[i];
*cu_list_elements = (metadata[i + 1] - metadata[i]) / 8;
++i;
*types_list = addr + MAYBE_SWAP (metadata[i]);
*types_list_elements = ((MAYBE_SWAP (metadata[i + 1])
- MAYBE_SWAP (metadata[i]))
/ 8);
*types_list = addr + metadata[i];
*types_list_elements = (metadata[i + 1] - metadata[i]) / 8;
++i;
const gdb_byte *address_table = addr + MAYBE_SWAP (metadata[i]);
const gdb_byte *address_table_end = addr + MAYBE_SWAP (metadata[i + 1]);
const gdb_byte *address_table = addr + metadata[i];
const gdb_byte *address_table_end = addr + metadata[i + 1];
map->address_table
= gdb::array_view<const gdb_byte> (address_table, address_table_end);
++i;
const gdb_byte *symbol_table = addr + MAYBE_SWAP (metadata[i]);
const gdb_byte *symbol_table_end = addr + MAYBE_SWAP (metadata[i + 1]);
const gdb_byte *symbol_table = addr + metadata[i];
const gdb_byte *symbol_table_end = addr + metadata[i + 1];
map->symbol_table
= gdb::array_view<mapped_index::symbol_table_slot>
((mapped_index::symbol_table_slot *) symbol_table,
(mapped_index::symbol_table_slot *) symbol_table_end);
= offset_view (gdb::array_view<const gdb_byte> (symbol_table,
symbol_table_end));
++i;
map->constant_pool = (char *) (addr + MAYBE_SWAP (metadata[i]));
map->constant_pool = buffer.slice (metadata[i]);
return 1;
}
@@ -3317,7 +3362,7 @@ struct dw2_symtab_iterator
domain_enum domain;
/* The list of CUs from the index entry of the symbol,
or NULL if not found. */
offset_type *vec;
offset_view vec;
/* The next element in VEC to look at. */
int next;
/* The number of elements in VEC, or zero if there is no match. */
@@ -3342,7 +3387,7 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter,
iter->domain = domain;
iter->next = 0;
iter->global_seen = 0;
iter->vec = NULL;
iter->vec = {};
iter->length = 0;
mapped_index *index = per_objfile->per_bfd->index_table.get ();
@@ -3351,11 +3396,10 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter,
return;
gdb_assert (!index->symbol_name_slot_invalid (namei));
const auto &bucket = index->symbol_table[namei];
offset_type vec_idx = index->symbol_vec_index (namei);
iter->vec = (offset_type *) (index->constant_pool
+ MAYBE_SWAP (bucket.vec));
iter->length = MAYBE_SWAP (*iter->vec);
iter->vec = offset_view (index->constant_pool.slice (vec_idx));
iter->length = iter->vec[0];
}
/* Return the next matching CU or NULL if there are no more. */
@@ -3367,8 +3411,7 @@ dw2_symtab_iter_next (struct dw2_symtab_iterator *iter)
for ( ; iter->next < iter->length; ++iter->next)
{
offset_type cu_index_and_attrs =
MAYBE_SWAP (iter->vec[iter->next + 1]);
offset_type cu_index_and_attrs = iter->vec[iter->next + 1];
offset_type cu_index = GDB_INDEX_CU_VALUE (cu_index_and_attrs);
gdb_index_symbol_kind symbol_kind =
GDB_INDEX_SYMBOL_KIND_VALUE (cu_index_and_attrs);
@@ -4387,16 +4430,15 @@ dw2_expand_marked_cus
block_search_flags search_flags,
search_domain kind)
{
offset_type *vec, vec_len, vec_idx;
offset_type vec_len, vec_idx;
bool global_seen = false;
mapped_index &index = *per_objfile->per_bfd->index_table;
vec = (offset_type *) (index.constant_pool
+ MAYBE_SWAP (index.symbol_table[idx].vec));
vec_len = MAYBE_SWAP (vec[0]);
offset_view vec (index.constant_pool.slice (index.symbol_vec_index (idx)));
vec_len = vec[0];
for (vec_idx = 0; vec_idx < vec_len; ++vec_idx)
{
offset_type cu_index_and_attrs = MAYBE_SWAP (vec[vec_idx + 1]);
offset_type cu_index_and_attrs = vec[vec_idx + 1];
/* This value is only valid for index versions >= 7. */
int is_static = GDB_INDEX_SYMBOL_STATIC_VALUE (cu_index_and_attrs);
gdb_index_symbol_kind symbol_kind =