Files
binutils-gdb/libctf/ctf-util.c
Nick Alcock a480362d88 libctf: string: refs rework
This commit moves provisional (not-yet-serialized) string refs towards the
scheme to be used for CTF IDs in the future.  In particular

 - provisional string offsets now count downwards from just under the
   external string offset space (all bits on but the high bit).  This makes
   it possible to detect an overflowing strtab, and also makes it trivial to
   determine whether any string offset (ref) updates were missed -- where
   before we might get a slightly corrupted or incorrect string, we now get
   a huge high strtab offset corresponding to no string, and an error is
   emitted at read time.

 - refs are emitted at serialization time during the pass through the types.
   They are strictly associated with the newly-written-out buffer: the
   existing opened CTF dict is not changed, though it does still get the new
   strtab so that new refs to the same string can just refer directly to it.
   The provisional strtab hash table that contains these strings is not
   deleted after serialization (because we might serialize again): instead,
   we keep track in the parent of the lowest-yet-used ("latest") provisional
   strtab offset, and any strtab offset above that, but not external
   (high-bit-on) is considered provisional.

   This is sort-of-enforced by moving most of the ref-addition function
   declarations (including ctf_str_add_ref) to a new ctf-ref.h, which is
   not included by ctf-create.c or ctf-open.c.

 - because we don't add refs when adding types, we don't need to handle the
   case where we add things to expanding vlens (enums, struct members) and
   have to realloc() them.  So the entire painful movable refs system can
   just be deleted, along with the ability to remove refs piecemeal at all
   (purging all of them is still possible).  Strings added during type
   addition are added via ctf_str_add(), which adds no refs: the strings are
   picked up at serialization time and refs to their final, serialized
   resting place added.  The DTDs never have any refs in them, and their
   provisional strtab offsets are never updated by the ref system.

This caused several bugs to fall out of the earlier work and get fixed.
In particular, attempts to look up a string in a child dict now search
the parent's provisional strtab too: we add some extra special casing
for the null string so we don't need to worry about deduplication
moving it somewhere other than offset zero.

Finally, the optimization that removes an unreferenced synthetic external
strtab (the record of the strings the linker has told us about, kept around
internally for lookup during late serialization) is faulty: references to a
strtab entry will only produce CTF-level refs if their value might change,
and an external string's offset won't change, so it produces no refs: worse
yet, even if we did get a ref (say, if the string was originally believed
to be internal and only later were we told that the linker knew about it
too), when we serialize a strtab, all its refs are dropped (since they've
been updated and can no longer change); so if we serialized it a second
time, its synthetic external strtab would be considered empty and dropped,
even though the same external strings as before still exist, referencing
it.  We must keep the synthetic external strtab around as long as external
strings exist that reference it, i.e. for the life of the dict.

One benefit of all this: now we're emitting provisional string offsets at
a really high value, it's out of the way of the consecutive, deduplicated
string offsets in child dicts.  So we can drop the constraint that you
cannot add strings to a dict with children, which allows us to add types
freely to parent dicts again.  What you can't do is write that dict out
again: when we serialize, we currently update the dict being serialized
with the updated strtabs: when you write a dict out, its provisional
strings become real strings, and suddenly the offsets would overlap once
more.  But opening a dict and its children, adding to it, and then
writing it out again is rare indeed, and we have a workaround: anyone
wanting to do this can just use ctf_link instead.
2025-02-28 15:13:24 +00:00

265 lines
6.0 KiB
C

/* Simple data structure utilities and helpers.
Copyright (C) 2019-2025 Free Software Foundation, Inc.
This file is part of libctf.
libctf is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not see
<http://www.gnu.org/licenses/>. */
#include <ctf-impl.h>
#include <string.h>
#include "ctf-ref.h"
#include "ctf-endian.h"
/* Simple doubly-linked list append routine. This implementation assumes that
each list element contains an embedded ctf_list_t as the first member.
An additional ctf_list_t is used to store the head (l_next) and tail
(l_prev) pointers. The current head and tail list elements have their
previous and next pointers set to NULL, respectively. */
void
ctf_list_append (ctf_list_t *lp, void *newp)
{
ctf_list_t *p = lp->l_prev; /* p = tail list element. */
ctf_list_t *q = newp; /* q = new list element. */
lp->l_prev = q;
q->l_prev = p;
q->l_next = NULL;
if (p != NULL)
p->l_next = q;
else
lp->l_next = q;
}
/* Prepend the specified existing element to the given ctf_list_t. The
existing pointer should be pointing at a struct with embedded ctf_list_t. */
void
ctf_list_prepend (ctf_list_t * lp, void *newp)
{
ctf_list_t *p = newp; /* p = new list element. */
ctf_list_t *q = lp->l_next; /* q = head list element. */
lp->l_next = p;
p->l_prev = NULL;
p->l_next = q;
if (q != NULL)
q->l_prev = p;
else
lp->l_prev = p;
}
/* Delete the specified existing element from the given ctf_list_t. The
existing pointer should be pointing at a struct with embedded ctf_list_t. */
void
ctf_list_delete (ctf_list_t *lp, void *existing)
{
ctf_list_t *p = existing;
if (p->l_prev != NULL)
p->l_prev->l_next = p->l_next;
else
lp->l_next = p->l_next;
if (p->l_next != NULL)
p->l_next->l_prev = p->l_prev;
else
lp->l_prev = p->l_prev;
}
/* Return 1 if the list is empty. */
int
ctf_list_empty_p (ctf_list_t *lp)
{
return (lp->l_next == NULL && lp->l_prev == NULL);
}
/* Splice one entire list onto the end of another one. The existing list is
emptied. */
void
ctf_list_splice (ctf_list_t *lp, ctf_list_t *append)
{
if (ctf_list_empty_p (append))
return;
if (lp->l_prev != NULL)
lp->l_prev->l_next = append->l_next;
else
lp->l_next = append->l_next;
append->l_next->l_prev = lp->l_prev;
lp->l_prev = append->l_prev;
append->l_next = NULL;
append->l_prev = NULL;
}
/* A string appender working on dynamic strings. Returns NULL on OOM. */
char *
ctf_str_append (char *s, const char *append)
{
size_t s_len = 0;
if (append == NULL)
return s;
if (s != NULL)
s_len = strlen (s);
size_t append_len = strlen (append);
if ((s = realloc (s, s_len + append_len + 1)) == NULL)
return NULL;
memcpy (s + s_len, append, append_len);
s[s_len + append_len] = '\0';
return s;
}
/* A version of ctf_str_append that returns the old string on OOM. */
char *
ctf_str_append_noerr (char *s, const char *append)
{
char *new_s;
new_s = ctf_str_append (s, append);
if (!new_s)
return s;
return new_s;
}
/* Allocate a ref and bind it into a ref list. Does not actually
initialize anything through the ref: the caller must do that. */
ctf_ref_t *
ctf_create_ref (ctf_dict_t *fp _libctf_unused_, ctf_list_t *reflist,
uint32_t *ref)
{
ctf_ref_t *aref;
aref = malloc (sizeof (struct ctf_ref));
if (!aref)
return NULL;
aref->cre_ref = ref;
ctf_list_append (reflist, aref);
return aref;
}
/* Remove all refs to a given entity. */
void
ctf_purge_ref_list (ctf_dict_t *fp _libctf_unused_, ctf_list_t *reflist)
{
ctf_ref_t *ref, *next;
for (ref = ctf_list_next (reflist); ref != NULL; ref = next)
{
next = ctf_list_next (ref);
ctf_list_delete (reflist, ref);
free (ref);
}
}
/* Update a list of refs to the specified value. */
void
ctf_update_refs (ctf_list_t *reflist, uint32_t value)
{
ctf_ref_t *ref;
for (ref = ctf_list_next (reflist); ref != NULL;
ref = ctf_list_next (ref))
*(ref->cre_ref) = value;
}
/* Create a ctf_next_t. */
ctf_next_t *
ctf_next_create (void)
{
return calloc (1, sizeof (struct ctf_next));
}
/* Destroy a ctf_next_t, for early exit from iterators. */
void
ctf_next_destroy (ctf_next_t *i)
{
if (i == NULL)
return;
if (i->ctn_iter_fun == (void (*) (void)) ctf_dynhash_next_sorted)
free (i->u.ctn_sorted_hkv);
if (i->ctn_next)
ctf_next_destroy (i->ctn_next);
if (i->ctn_next_inner)
ctf_next_destroy (i->ctn_next_inner);
free (i);
}
/* Copy a ctf_next_t. */
ctf_next_t *
ctf_next_copy (ctf_next_t *i)
{
ctf_next_t *i2;
if ((i2 = ctf_next_create()) == NULL)
return NULL;
memcpy (i2, i, sizeof (struct ctf_next));
if (i2->ctn_next)
{
i2->ctn_next = ctf_next_copy (i2->ctn_next);
if (i2->ctn_next == NULL)
goto err_next;
}
if (i2->ctn_next_inner)
{
i2->ctn_next_inner = ctf_next_copy (i2->ctn_next_inner);
if (i2->ctn_next_inner == NULL)
goto err_next_inner;
}
if (i2->ctn_iter_fun == (void (*) (void)) ctf_dynhash_next_sorted)
{
size_t els = ctf_dynhash_elements ((ctf_dynhash_t *) i->cu.ctn_h);
if ((i2->u.ctn_sorted_hkv = calloc (els, sizeof (ctf_next_hkv_t))) == NULL)
goto err_sorted_hkv;
memcpy (i2->u.ctn_sorted_hkv, i->u.ctn_sorted_hkv,
els * sizeof (ctf_next_hkv_t));
}
return i2;
err_sorted_hkv:
ctf_next_destroy (i2->ctn_next_inner);
err_next_inner:
ctf_next_destroy (i2->ctn_next);
err_next:
ctf_next_destroy (i2);
return NULL;
}