Handle unaligned mapping of .gdb_index

The .gdb_index was designed such that all data would be aligned.
Unfortunately, we neglected to require this alignment in the objcopy
instructions in the manual.  As a result, in many cases, a .gdb_index
in the wild will not be properly aligned by mmap.  This yields
undefined behavior, which is PR gdb/23743.

This patch fixes the bug by always assuming that the mapping is
unaligned, and using extract_unsigned_integer when needed.  A new
helper class is introduced to make this less painful.

gdb/ChangeLog
2021-04-17  Tom Tromey  <tom@tromey.com>

	PR gdb/23743:
	* dwarf2/read.c (class offset_view): New.
	(struct symbol_table_slot): Remove.
	(struct mapped_index) <symbol_table, constant_pool>: Change type.
	<symbol_name_index, symbol_vec_index>: New methods.
	<symbol_name_slot_invalid, symbol_name_at, symbol_name_count>:
	Rewrite.
	(read_gdb_index_from_buffer): Update.
	(struct dw2_symtab_iterator) <vec>: Change type.
	(dw2_symtab_iter_init_common, dw2_symtab_iter_init)
	(dw2_symtab_iter_next, dw2_expand_marked_cus): Update.
	* dwarf2/index-write.c (class data_buf) <append_data>: Remove.
	<append_array, append_offset>: New methods.
	(write_hash_table, add_address_entry, write_gdbindex_1)
	(write_debug_names): Update.
	* dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove.
This commit is contained in:
Tom Tromey
2021-04-17 13:56:36 -06:00
parent da314dd397
commit 42c2c69462
4 changed files with 132 additions and 81 deletions

View File

@@ -1,3 +1,22 @@
2021-04-17 Tom Tromey <tom@tromey.com>
PR gdb/23743:
* dwarf2/read.c (class offset_view): New.
(struct symbol_table_slot): Remove.
(struct mapped_index) <symbol_table, constant_pool>: Change type.
<symbol_name_index, symbol_vec_index>: New methods.
<symbol_name_slot_invalid, symbol_name_at, symbol_name_count>:
Rewrite.
(read_gdb_index_from_buffer): Update.
(struct dw2_symtab_iterator) <vec>: Change type.
(dw2_symtab_iter_init_common, dw2_symtab_iter_init)
(dw2_symtab_iter_next, dw2_expand_marked_cus): Update.
* dwarf2/index-write.c (class data_buf) <append_data>: Remove.
<append_array, append_offset>: New methods.
(write_hash_table, add_address_entry, write_gdbindex_1)
(write_debug_names): Update.
* dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove.
2021-04-17 Tom Tromey <tom@tromey.com> 2021-04-17 Tom Tromey <tom@tromey.com>
* dwarf2/index-write.c (write_psymtabs_to_index): Check * dwarf2/index-write.c (write_psymtabs_to_index): Check

View File

@@ -29,28 +29,15 @@
architecture-independent. */ architecture-independent. */
typedef uint32_t offset_type; typedef uint32_t offset_type;
#if WORDS_BIGENDIAN /* Unpack a 32-bit little-endian value. */
/* Convert VALUE between big- and little-endian. */
static inline offset_type static inline offset_type
byte_swap (offset_type value) gdb_index_unpack (const gdb_byte *value)
{ {
offset_type result; return (offset_type) extract_unsigned_integer (value, sizeof (offset_type),
BFD_ENDIAN_LITTLE);
result = (value & 0xff) << 24;
result |= (value & 0xff00) << 8;
result |= (value & 0xff0000) >> 8;
result |= (value & 0xff000000) >> 24;
return result;
} }
#define MAYBE_SWAP(V) byte_swap (V)
#else
#define MAYBE_SWAP(V) static_cast<offset_type> (V)
#endif /* WORDS_BIGENDIAN */
/* The hash function for strings in the mapped index. This is the same as /* The hash function for strings in the mapped index. This is the same as
SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the
implementation. This is necessary because the hash function is tied to the implementation. This is necessary because the hash function is tied to the

View File

@@ -94,13 +94,10 @@ file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
class data_buf class data_buf
{ {
public: public:
/* Copy DATA to the end of the buffer. */ /* Copy ARRAY to the end of the buffer. */
template<typename T> void append_array (gdb::array_view<const gdb_byte> array)
void append_data (const T &data)
{ {
std::copy (reinterpret_cast<const gdb_byte *> (&data), std::copy (array.begin (), array.end (), grow (array.size ()));
reinterpret_cast<const gdb_byte *> (&data + 1),
grow (sizeof (data)));
} }
/* Copy CSTR (a zero-terminated string) to the end of buffer. The /* Copy CSTR (a zero-terminated string) to the end of buffer. The
@@ -120,7 +117,7 @@ public:
input >>= 7; input >>= 7;
if (input) if (input)
output |= 0x80; output |= 0x80;
append_data (output); m_vec.push_back (output);
if (input == 0) if (input == 0)
break; break;
} }
@@ -133,6 +130,12 @@ public:
::store_unsigned_integer (grow (len), len, byte_order, val); ::store_unsigned_integer (grow (len), len, byte_order, val);
} }
/* Copy VALUE to the end of the buffer, little-endian. */
void append_offset (offset_type value)
{
append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
}
/* Return the size of the buffer. */ /* Return the size of the buffer. */
size_t size () const size_t size () const
{ {
@@ -371,9 +374,9 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
symbol_hash_table.emplace (entry.cu_indices, cpool.size ()); symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
entry.index_offset = cpool.size (); entry.index_offset = cpool.size ();
cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ())); cpool.append_offset (entry.cu_indices.size ());
for (const auto index : entry.cu_indices) for (const auto index : entry.cu_indices)
cpool.append_data (MAYBE_SWAP (index)); cpool.append_offset (index);
} }
} }
@@ -399,8 +402,8 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
vec_off = 0; vec_off = 0;
} }
output.append_data (MAYBE_SWAP (str_off)); output.append_offset (str_off);
output.append_data (MAYBE_SWAP (vec_off)); output.append_offset (vec_off);
} }
} }
@@ -434,7 +437,7 @@ add_address_entry (data_buf &addr_vec,
{ {
addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start); addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end); addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
addr_vec.append_data (MAYBE_SWAP (cu_index)); addr_vec.append_offset (cu_index);
} }
/* Worker function for traversing an addrmap to build the address table. */ /* Worker function for traversing an addrmap to build the address table. */
@@ -1374,26 +1377,26 @@ write_gdbindex_1 (FILE *out_file,
offset_type total_len = size_of_header; offset_type total_len = size_of_header;
/* The version number. */ /* The version number. */
contents.append_data (MAYBE_SWAP (8)); contents.append_offset (8);
/* The offset of the CU list from the start of the file. */ /* The offset of the CU list from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len)); contents.append_offset (total_len);
total_len += cu_list.size (); total_len += cu_list.size ();
/* The offset of the types CU list from the start of the file. */ /* The offset of the types CU list from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len)); contents.append_offset (total_len);
total_len += types_cu_list.size (); total_len += types_cu_list.size ();
/* The offset of the address table from the start of the file. */ /* The offset of the address table from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len)); contents.append_offset (total_len);
total_len += addr_vec.size (); total_len += addr_vec.size ();
/* The offset of the symbol table from the start of the file. */ /* The offset of the symbol table from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len)); contents.append_offset (total_len);
total_len += symtab_vec.size (); total_len += symtab_vec.size ();
/* The offset of the constant pool from the start of the file. */ /* The offset of the constant pool from the start of the file. */
contents.append_data (MAYBE_SWAP (total_len)); contents.append_offset (total_len);
total_len += constant_pool.size (); total_len += constant_pool.size ();
gdb_assert (contents.size () == size_of_header); gdb_assert (contents.size () == size_of_header);
@@ -1611,7 +1614,7 @@ write_debug_names (dwarf2_per_objfile *per_objfile,
string. This value is rounded up to a multiple of 4. */ string. This value is rounded up to a multiple of 4. */
static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, ""); static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation)); header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
header.append_data (dwarf5_gdb_augmentation); header.append_array (dwarf5_gdb_augmentation);
gdb_assert (header.size () == bytes_of_header); gdb_assert (header.size () == bytes_of_header);

View File

@@ -223,17 +223,50 @@ protected:
~mapped_index_base() = default; ~mapped_index_base() = default;
}; };
/* This is a view into the index that converts from bytes to an
offset_type, and allows indexing. Unaligned bytes are specifically
allowed here, and handled via unpacking. */
class offset_view
{
public:
offset_view () = default;
explicit offset_view (gdb::array_view<const gdb_byte> bytes)
: m_bytes (bytes)
{
}
/* Extract the INDEXth offset_type from the array. */
offset_type operator[] (size_t index) const
{
const gdb_byte *bytes = &m_bytes[index * sizeof (offset_type)];
return (offset_type) extract_unsigned_integer (bytes,
sizeof (offset_type),
BFD_ENDIAN_LITTLE);
}
/* Return the number of offset_types in this array. */
size_t size () const
{
return m_bytes.size () / sizeof (offset_type);
}
/* Return true if this view is empty. */
bool empty () const
{
return m_bytes.empty ();
}
private:
/* The underlying bytes. */
gdb::array_view<const gdb_byte> m_bytes;
};
/* A description of the mapped index. The file format is described in /* A description of the mapped index. The file format is described in
a comment by the code that writes the index. */ a comment by the code that writes the index. */
struct mapped_index final : public mapped_index_base struct mapped_index final : public mapped_index_base
{ {
/* A slot/bucket in the symbol table hash. */
struct symbol_table_slot
{
const offset_type name;
const offset_type vec;
};
/* Index data format version. */ /* Index data format version. */
int version = 0; int version = 0;
@@ -241,25 +274,42 @@ struct mapped_index final : public mapped_index_base
gdb::array_view<const gdb_byte> address_table; gdb::array_view<const gdb_byte> address_table;
/* The symbol table, implemented as a hash table. */ /* The symbol table, implemented as a hash table. */
gdb::array_view<symbol_table_slot> symbol_table; offset_view symbol_table;
/* A pointer to the constant pool. */ /* A pointer to the constant pool. */
const char *constant_pool = nullptr; gdb::array_view<const gdb_byte> constant_pool;
/* Return the index into the constant pool of the name of the IDXth
symbol in the symbol table. */
offset_type symbol_name_index (offset_type idx) const
{
return symbol_table[2 * idx];
}
/* Return the index into the constant pool of the CU vector of the
IDXth symbol in the symbol table. */
offset_type symbol_vec_index (offset_type idx) const
{
return symbol_table[2 * idx + 1];
}
bool symbol_name_slot_invalid (offset_type idx) const override bool symbol_name_slot_invalid (offset_type idx) const override
{ {
const auto &bucket = this->symbol_table[idx]; return (symbol_name_index (idx) == 0
return bucket.name == 0 && bucket.vec == 0; && symbol_vec_index (idx) == 0);
} }
/* Convenience method to get at the name of the symbol at IDX in the /* Convenience method to get at the name of the symbol at IDX in the
symbol table. */ symbol table. */
const char *symbol_name_at const char *symbol_name_at
(offset_type idx, dwarf2_per_objfile *per_objfile) const override (offset_type idx, dwarf2_per_objfile *per_objfile) const override
{ return this->constant_pool + MAYBE_SWAP (this->symbol_table[idx].name); } {
return (const char *) (this->constant_pool.data ()
+ symbol_name_index (idx));
}
size_t symbol_name_count () const override size_t symbol_name_count () const override
{ return this->symbol_table.size (); } { return this->symbol_table.size () / 2; }
}; };
/* A description of the mapped .debug_names. /* A description of the mapped .debug_names.
@@ -2950,9 +3000,10 @@ read_gdb_index_from_buffer (const char *filename,
offset_type *types_list_elements) offset_type *types_list_elements)
{ {
const gdb_byte *addr = &buffer[0]; const gdb_byte *addr = &buffer[0];
offset_view metadata (buffer);
/* Version check. */ /* Version check. */
offset_type version = MAYBE_SWAP (*(offset_type *) addr); offset_type version = metadata[0];
/* Versions earlier than 3 emitted every copy of a psymbol. This /* Versions earlier than 3 emitted every copy of a psymbol. This
causes the index to behave very poorly for certain requests. Version 3 causes the index to behave very poorly for certain requests. Version 3
contained incomplete addrmap. So, it seems better to just ignore such contained incomplete addrmap. So, it seems better to just ignore such
@@ -3005,35 +3056,29 @@ to use the section anyway."),
map->version = version; map->version = version;
offset_type *metadata = (offset_type *) (addr + sizeof (offset_type)); int i = 1;
*cu_list = addr + metadata[i];
int i = 0; *cu_list_elements = (metadata[i + 1] - metadata[i]) / 8;
*cu_list = addr + MAYBE_SWAP (metadata[i]);
*cu_list_elements = ((MAYBE_SWAP (metadata[i + 1]) - MAYBE_SWAP (metadata[i]))
/ 8);
++i; ++i;
*types_list = addr + MAYBE_SWAP (metadata[i]); *types_list = addr + metadata[i];
*types_list_elements = ((MAYBE_SWAP (metadata[i + 1]) *types_list_elements = (metadata[i + 1] - metadata[i]) / 8;
- MAYBE_SWAP (metadata[i]))
/ 8);
++i; ++i;
const gdb_byte *address_table = addr + MAYBE_SWAP (metadata[i]); const gdb_byte *address_table = addr + metadata[i];
const gdb_byte *address_table_end = addr + MAYBE_SWAP (metadata[i + 1]); const gdb_byte *address_table_end = addr + metadata[i + 1];
map->address_table map->address_table
= gdb::array_view<const gdb_byte> (address_table, address_table_end); = gdb::array_view<const gdb_byte> (address_table, address_table_end);
++i; ++i;
const gdb_byte *symbol_table = addr + MAYBE_SWAP (metadata[i]); const gdb_byte *symbol_table = addr + metadata[i];
const gdb_byte *symbol_table_end = addr + MAYBE_SWAP (metadata[i + 1]); const gdb_byte *symbol_table_end = addr + metadata[i + 1];
map->symbol_table map->symbol_table
= gdb::array_view<mapped_index::symbol_table_slot> = offset_view (gdb::array_view<const gdb_byte> (symbol_table,
((mapped_index::symbol_table_slot *) symbol_table, symbol_table_end));
(mapped_index::symbol_table_slot *) symbol_table_end);
++i; ++i;
map->constant_pool = (char *) (addr + MAYBE_SWAP (metadata[i])); map->constant_pool = buffer.slice (metadata[i]);
return 1; return 1;
} }
@@ -3317,7 +3362,7 @@ struct dw2_symtab_iterator
domain_enum domain; domain_enum domain;
/* The list of CUs from the index entry of the symbol, /* The list of CUs from the index entry of the symbol,
or NULL if not found. */ or NULL if not found. */
offset_type *vec; offset_view vec;
/* The next element in VEC to look at. */ /* The next element in VEC to look at. */
int next; int next;
/* The number of elements in VEC, or zero if there is no match. */ /* The number of elements in VEC, or zero if there is no match. */
@@ -3342,7 +3387,7 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter,
iter->domain = domain; iter->domain = domain;
iter->next = 0; iter->next = 0;
iter->global_seen = 0; iter->global_seen = 0;
iter->vec = NULL; iter->vec = {};
iter->length = 0; iter->length = 0;
mapped_index *index = per_objfile->per_bfd->index_table.get (); mapped_index *index = per_objfile->per_bfd->index_table.get ();
@@ -3351,11 +3396,10 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter,
return; return;
gdb_assert (!index->symbol_name_slot_invalid (namei)); gdb_assert (!index->symbol_name_slot_invalid (namei));
const auto &bucket = index->symbol_table[namei]; offset_type vec_idx = index->symbol_vec_index (namei);
iter->vec = (offset_type *) (index->constant_pool iter->vec = offset_view (index->constant_pool.slice (vec_idx));
+ MAYBE_SWAP (bucket.vec)); iter->length = iter->vec[0];
iter->length = MAYBE_SWAP (*iter->vec);
} }
/* Return the next matching CU or NULL if there are no more. */ /* Return the next matching CU or NULL if there are no more. */
@@ -3367,8 +3411,7 @@ dw2_symtab_iter_next (struct dw2_symtab_iterator *iter)
for ( ; iter->next < iter->length; ++iter->next) for ( ; iter->next < iter->length; ++iter->next)
{ {
offset_type cu_index_and_attrs = offset_type cu_index_and_attrs = iter->vec[iter->next + 1];
MAYBE_SWAP (iter->vec[iter->next + 1]);
offset_type cu_index = GDB_INDEX_CU_VALUE (cu_index_and_attrs); offset_type cu_index = GDB_INDEX_CU_VALUE (cu_index_and_attrs);
gdb_index_symbol_kind symbol_kind = gdb_index_symbol_kind symbol_kind =
GDB_INDEX_SYMBOL_KIND_VALUE (cu_index_and_attrs); GDB_INDEX_SYMBOL_KIND_VALUE (cu_index_and_attrs);
@@ -4387,16 +4430,15 @@ dw2_expand_marked_cus
block_search_flags search_flags, block_search_flags search_flags,
search_domain kind) search_domain kind)
{ {
offset_type *vec, vec_len, vec_idx; offset_type vec_len, vec_idx;
bool global_seen = false; bool global_seen = false;
mapped_index &index = *per_objfile->per_bfd->index_table; mapped_index &index = *per_objfile->per_bfd->index_table;
vec = (offset_type *) (index.constant_pool offset_view vec (index.constant_pool.slice (index.symbol_vec_index (idx)));
+ MAYBE_SWAP (index.symbol_table[idx].vec)); vec_len = vec[0];
vec_len = MAYBE_SWAP (vec[0]);
for (vec_idx = 0; vec_idx < vec_len; ++vec_idx) for (vec_idx = 0; vec_idx < vec_len; ++vec_idx)
{ {
offset_type cu_index_and_attrs = MAYBE_SWAP (vec[vec_idx + 1]); offset_type cu_index_and_attrs = vec[vec_idx + 1];
/* This value is only valid for index versions >= 7. */ /* This value is only valid for index versions >= 7. */
int is_static = GDB_INDEX_SYMBOL_STATIC_VALUE (cu_index_and_attrs); int is_static = GDB_INDEX_SYMBOL_STATIC_VALUE (cu_index_and_attrs);
gdb_index_symbol_kind symbol_kind = gdb_index_symbol_kind symbol_kind =