libctf: prohibit addition of enums with overlapping enumerator constants

libctf has long prohibited addition of enums with overlapping constants in a
single enum, but now that we are properly considering enums with overlapping
constants to be conflciting types, we can go further and prohibit addition
of enumeration constants to a dict if they already exist in any enum in that
dict: the same rules as C itself.

We do this in a fashion vaguely similar to what we just did in the
deduplicator, by considering enumeration constants as identifiers and adding
them to the core type/identifier namespace, ctf_dict_t.ctf_names.  This is a
little fiddly, because we do not want to prohibit opening of existing dicts
into which the deduplicator has stuffed enums with overlapping constants!
We just want to prohibit the addition of *new* enumerators that violate that
rule.  Even then, it's fine to add overlapping enumerator constants as long
as at least one of them is in a non-root type.  (This is essential for
proper deduplicator operation in cu-mapped mode, where multiple compilation
units can be smashed into one dict, with conflicting types marked as
hidden: these types may well contain overlapping enumerators.)

So, at open time, keep track of all enums observed, then do a third pass
through the enums alone, adding each enumerator either to the ctf_names
table as a mapping from the enumerator name to the enum it is part of (if
not already present), or to a new ctf_conflicting_enums hashtable that
tracks observed duplicates. (The latter is not used yet, but will be soon.)

(We need to do a third pass because it's quite possible to have an enum
containing an enumerator FOO followed by a type FOO: since they're processed
in order, the enumerator would be processed before the type, and at that
stage it seems nonconflicting.  The easiest fix is to run through the
enumerators after all type names are interned.)

At ctf_add_enumerator time, if the enumerator to which we are adding a type
is root-visible, check for an already-present name and error out if found,
then intern the new name in the ctf_names table as is done at open time.

(We retain the existing code which scans the enum itself for duplicates
because it is still an error to add an enumerator twice to a
non-root-visible enum type; but we only need to do this if the enum is
non-root-visible, so the cost of enum addition is reduced.)

Tested in an upcoming commit.

libctf/
	* ctf-impl.h (ctf_dict_t) <ctf_names>: Augment comment.
        <ctf_conflicting_enums>: New.
	(ctf_dynset_elements): New.
	* ctf-hash.c (ctf_dynset_elements): Implement it.
	* ctf-open.c (init_static_types): Split body into...
        (init_static_types_internal): ... here.  Count enumerators;
        keep track of observed enums in pass 2; populate ctf_names and
        ctf_conflicting_enums with enumerators in a third pass.
	(ctf_dict_close): Free ctf_conflicting_enums.
	* ctf-create.c (ctf_add_enumerator): Prohibit addition of duplicate
        enumerators in root-visible enum types.

include/
	* ctf-api.h (CTF_ADD_NONROOT): Describe what non-rootness
        means for enumeration constants.
	(ctf_add_enumerator):  The name is not a misnomer.
        We now require that enumerators have unique names.
        Document the non-rootness of enumerators.
This commit is contained in:
Nick Alcock
2024-06-11 20:33:03 +01:00
parent 9f0fb75b8e
commit 6e09d4a6e6
5 changed files with 171 additions and 28 deletions

View File

@@ -268,9 +268,11 @@ _CTF_ERRORS
#endif #endif
/* Dynamic CTF containers can be created using ctf_create. The ctf_add_* /* Dynamic CTF containers can be created using ctf_create. The ctf_add_*
routines can be used to add new definitions to the dynamic container. routines can be used to add new definitions to the dynamic container. New
New types are labeled as root or non-root to determine whether they are types are labeled as root or non-root to determine whether they are visible
visible at the top-level program scope when subsequently doing a lookup. */ at the top-level program scope when subsequently doing a lookup.
(Identifiers contained within non-root types, like enumeration constants, are
also not visible.) */
#define CTF_ADD_NONROOT 0 /* Type only visible in nested scope. */ #define CTF_ADD_NONROOT 0 /* Type only visible in nested scope. */
#define CTF_ADD_ROOT 1 /* Type visible at top-level scope. */ #define CTF_ADD_ROOT 1 /* Type visible at top-level scope. */
@@ -785,9 +787,8 @@ extern ctf_id_t ctf_add_union_sized (ctf_dict_t *, uint32_t, const char *,
extern ctf_id_t ctf_add_unknown (ctf_dict_t *, uint32_t, const char *); extern ctf_id_t ctf_add_unknown (ctf_dict_t *, uint32_t, const char *);
extern ctf_id_t ctf_add_volatile (ctf_dict_t *, uint32_t, ctf_id_t); extern ctf_id_t ctf_add_volatile (ctf_dict_t *, uint32_t, ctf_id_t);
/* Add an enumerator to an enum (the name is a misnomer). We do not currently /* Add an enumerator to an enum. If the enum is non-root, so are all the
validate that enumerators have unique names, even though C requires it: in constants added to it by ctf_add_enumerator. */
future this may change. */
extern int ctf_add_enumerator (ctf_dict_t *, ctf_id_t, const char *, int); extern int ctf_add_enumerator (ctf_dict_t *, ctf_id_t, const char *, int);

View File

@@ -1048,7 +1048,6 @@ ctf_add_enumerator (ctf_dict_t *fp, ctf_id_t enid, const char *name,
ctf_dtdef_t *dtd = ctf_dtd_lookup (fp, enid); ctf_dtdef_t *dtd = ctf_dtd_lookup (fp, enid);
unsigned char *old_vlen; unsigned char *old_vlen;
ctf_enum_t *en; ctf_enum_t *en;
size_t i;
uint32_t kind, vlen, root; uint32_t kind, vlen, root;
@@ -1068,6 +1067,12 @@ ctf_add_enumerator (ctf_dict_t *fp, ctf_id_t enid, const char *name,
root = LCTF_INFO_ISROOT (fp, dtd->dtd_data.ctt_info); root = LCTF_INFO_ISROOT (fp, dtd->dtd_data.ctt_info);
vlen = LCTF_INFO_VLEN (fp, dtd->dtd_data.ctt_info); vlen = LCTF_INFO_VLEN (fp, dtd->dtd_data.ctt_info);
/* Enumeration constant names are only added, and only checked for duplicates,
if the enum they are part of is a root-visible type. */
if (root == CTF_ADD_ROOT && ctf_dynhash_lookup (fp->ctf_names, name))
return (ctf_set_errno (ofp, ECTF_DUPLICATE));
if (kind != CTF_K_ENUM) if (kind != CTF_K_ENUM)
return (ctf_set_errno (ofp, ECTF_NOTENUM)); return (ctf_set_errno (ofp, ECTF_NOTENUM));
@@ -1075,24 +1080,46 @@ ctf_add_enumerator (ctf_dict_t *fp, ctf_id_t enid, const char *name,
return (ctf_set_errno (ofp, ECTF_DTFULL)); return (ctf_set_errno (ofp, ECTF_DTFULL));
old_vlen = dtd->dtd_vlen; old_vlen = dtd->dtd_vlen;
if (ctf_grow_vlen (fp, dtd, sizeof (ctf_enum_t) * (vlen + 1)) < 0) if (ctf_grow_vlen (fp, dtd, sizeof (ctf_enum_t) * (vlen + 1)) < 0)
return -1; /* errno is set for us. */ return -1; /* errno is set for us. */
en = (ctf_enum_t *) dtd->dtd_vlen; en = (ctf_enum_t *) dtd->dtd_vlen;
/* Remove refs in the old vlen region and reapply them. */ /* Remove refs in the old vlen region and reapply them. */
ctf_str_move_refs (fp, old_vlen, sizeof (ctf_enum_t) * vlen, dtd->dtd_vlen); ctf_str_move_refs (fp, old_vlen, sizeof (ctf_enum_t) * vlen, dtd->dtd_vlen);
for (i = 0; i < vlen; i++) /* Check for constant duplication within any given enum: only needed for
if (strcmp (ctf_strptr (fp, en[i].cte_name), name) == 0) non-root-visible types, since the duplicate detection above does the job
return (ctf_set_errno (ofp, ECTF_DUPLICATE)); for root-visible types just fine. */
en[i].cte_name = ctf_str_add_movable_ref (fp, name, &en[i].cte_name); if (root == CTF_ADD_NONROOT)
en[i].cte_value = value; {
size_t i;
if (en[i].cte_name == 0 && name != NULL && name[0] != '\0') for (i = 0; i < vlen; i++)
if (strcmp (ctf_strptr (fp, en[i].cte_name), name) == 0)
return (ctf_set_errno (ofp, ECTF_DUPLICATE));
}
en[vlen].cte_name = ctf_str_add_movable_ref (fp, name, &en[vlen].cte_name);
en[vlen].cte_value = value;
if (en[vlen].cte_name == 0 && name != NULL && name[0] != '\0')
return (ctf_set_errno (ofp, ctf_errno (fp))); return (ctf_set_errno (ofp, ctf_errno (fp)));
/* Put the newly-added enumerator name into the name table if this type is
root-visible. */
if (root == CTF_ADD_ROOT)
{
if (ctf_dynhash_insert (fp->ctf_names,
(char *) ctf_strptr (fp, en[vlen].cte_name),
(void *) (uintptr_t) enid) < 0)
return ctf_set_errno (fp, ENOMEM);
}
dtd->dtd_data.ctt_info = CTF_TYPE_INFO (kind, root, vlen + 1); dtd->dtd_data.ctt_info = CTF_TYPE_INFO (kind, root, vlen + 1);
return 0; return 0;

View File

@@ -651,6 +651,12 @@ ctf_dynset_remove (ctf_dynset_t *hp, const void *key)
htab_remove_elt ((struct htab *) hp, key_to_internal (key)); htab_remove_elt ((struct htab *) hp, key_to_internal (key));
} }
size_t
ctf_dynset_elements (ctf_dynset_t *hp)
{
return htab_elements ((struct htab *) hp);
}
void void
ctf_dynset_destroy (ctf_dynset_t *hp) ctf_dynset_destroy (ctf_dynset_t *hp)
{ {

View File

@@ -387,7 +387,8 @@ struct ctf_dict
ctf_dynhash_t *ctf_structs; /* Hash table of struct types. */ ctf_dynhash_t *ctf_structs; /* Hash table of struct types. */
ctf_dynhash_t *ctf_unions; /* Hash table of union types. */ ctf_dynhash_t *ctf_unions; /* Hash table of union types. */
ctf_dynhash_t *ctf_enums; /* Hash table of enum types. */ ctf_dynhash_t *ctf_enums; /* Hash table of enum types. */
ctf_dynhash_t *ctf_names; /* Hash table of remaining type names. */ ctf_dynhash_t *ctf_names; /* Hash table of remaining types, plus
enumeration constants. */
ctf_lookup_t ctf_lookups[5]; /* Pointers to nametabs for name lookup. */ ctf_lookup_t ctf_lookups[5]; /* Pointers to nametabs for name lookup. */
ctf_strs_t ctf_str[2]; /* Array of string table base and bounds. */ ctf_strs_t ctf_str[2]; /* Array of string table base and bounds. */
ctf_strs_writable_t *ctf_dynstrtab; /* Dynamically allocated string table, if any. */ ctf_strs_writable_t *ctf_dynstrtab; /* Dynamically allocated string table, if any. */
@@ -407,6 +408,7 @@ struct ctf_dict
uint32_t *ctf_pptrtab; /* Parent types pointed to by child dicts. */ uint32_t *ctf_pptrtab; /* Parent types pointed to by child dicts. */
size_t ctf_pptrtab_len; /* Num types storable in pptrtab currently. */ size_t ctf_pptrtab_len; /* Num types storable in pptrtab currently. */
uint32_t ctf_pptrtab_typemax; /* Max child type when pptrtab last updated. */ uint32_t ctf_pptrtab_typemax; /* Max child type when pptrtab last updated. */
ctf_dynset_t *ctf_conflicting_enums; /* Tracks enum constants that conflict. */
uint32_t *ctf_funcidx_names; /* Name of each function symbol in symtypetab uint32_t *ctf_funcidx_names; /* Name of each function symbol in symtypetab
(if indexed). */ (if indexed). */
uint32_t *ctf_objtidx_names; /* Likewise, for object symbols. */ uint32_t *ctf_objtidx_names; /* Likewise, for object symbols. */
@@ -669,6 +671,7 @@ extern int ctf_dynhash_next_sorted (ctf_dynhash_t *, ctf_next_t **,
extern ctf_dynset_t *ctf_dynset_create (htab_hash, htab_eq, ctf_hash_free_fun); extern ctf_dynset_t *ctf_dynset_create (htab_hash, htab_eq, ctf_hash_free_fun);
extern int ctf_dynset_insert (ctf_dynset_t *, void *); extern int ctf_dynset_insert (ctf_dynset_t *, void *);
extern void ctf_dynset_remove (ctf_dynset_t *, const void *); extern void ctf_dynset_remove (ctf_dynset_t *, const void *);
extern size_t ctf_dynset_elements (ctf_dynset_t *);
extern void ctf_dynset_destroy (ctf_dynset_t *); extern void ctf_dynset_destroy (ctf_dynset_t *);
extern void *ctf_dynset_lookup (ctf_dynset_t *, const void *); extern void *ctf_dynset_lookup (ctf_dynset_t *, const void *);
extern int ctf_dynset_exists (ctf_dynset_t *, const void *key, extern int ctf_dynset_exists (ctf_dynset_t *, const void *key,

View File

@@ -670,24 +670,50 @@ upgrade_types (ctf_dict_t *fp, ctf_header_t *cth)
return 0; return 0;
} }
static int
init_static_types_internal (ctf_dict_t *fp, ctf_header_t *cth,
ctf_dynset_t *all_enums);
/* Populate statically-defined types (those loaded from a saved buffer). /* Populate statically-defined types (those loaded from a saved buffer).
Initialize the type ID translation table with the byte offset of each type, Initialize the type ID translation table with the byte offset of each type,
and initialize the hash tables of each named type. Upgrade the type table to and initialize the hash tables of each named type. Upgrade the type table to
the latest supported representation in the process, if needed, and if this the latest supported representation in the process, if needed, and if this
recension of libctf supports upgrading. */ recension of libctf supports upgrading.
This is a wrapper to simplify memory allocation on error in the _internal
function that does all the actual work. */
static int static int
init_static_types (ctf_dict_t *fp, ctf_header_t *cth) init_static_types (ctf_dict_t *fp, ctf_header_t *cth)
{
ctf_dynset_t *all_enums;
int err;
if ((all_enums = ctf_dynset_create (htab_hash_pointer, htab_eq_pointer,
NULL)) == NULL)
return ENOMEM;
err = init_static_types_internal (fp, cth, all_enums);
ctf_dynset_destroy (all_enums);
return err;
}
static int
init_static_types_internal (ctf_dict_t *fp, ctf_header_t *cth,
ctf_dynset_t *all_enums)
{ {
const ctf_type_t *tbuf; const ctf_type_t *tbuf;
const ctf_type_t *tend; const ctf_type_t *tend;
unsigned long pop[CTF_K_MAX + 1] = { 0 }; unsigned long pop[CTF_K_MAX + 1] = { 0 };
int pop_enumerators = 0;
const ctf_type_t *tp; const ctf_type_t *tp;
uint32_t id; uint32_t id;
uint32_t *xp; uint32_t *xp;
unsigned long typemax = 0; unsigned long typemax = 0;
ctf_next_t *i = NULL;
void *k;
/* We determine whether the dict is a child or a parent based on the value of /* We determine whether the dict is a child or a parent based on the value of
cth_parname. */ cth_parname. */
@@ -706,8 +732,10 @@ init_static_types (ctf_dict_t *fp, ctf_header_t *cth)
tbuf = (ctf_type_t *) (fp->ctf_buf + cth->cth_typeoff); tbuf = (ctf_type_t *) (fp->ctf_buf + cth->cth_typeoff);
tend = (ctf_type_t *) (fp->ctf_buf + cth->cth_stroff); tend = (ctf_type_t *) (fp->ctf_buf + cth->cth_stroff);
/* We make two passes through the entire type section. In this first /* We make two passes through the entire type section, and one third pass
pass, we count the number of each type and the total number of types. */ through part of it. In this first pass, we count the number of each type
and type-like identifier (like enumerators) and the total number of
types. */
for (tp = tbuf; tp < tend; typemax++) for (tp = tbuf; tp < tend; typemax++)
{ {
@@ -728,6 +756,9 @@ init_static_types (ctf_dict_t *fp, ctf_header_t *cth)
tp = (ctf_type_t *) ((uintptr_t) tp + increment + vbytes); tp = (ctf_type_t *) ((uintptr_t) tp + increment + vbytes);
pop[kind]++; pop[kind]++;
if (kind == CTF_K_ENUM)
pop_enumerators += vlen;
} }
if (child) if (child)
@@ -765,11 +796,16 @@ init_static_types (ctf_dict_t *fp, ctf_header_t *cth)
pop[CTF_K_POINTER] + pop[CTF_K_POINTER] +
pop[CTF_K_VOLATILE] + pop[CTF_K_VOLATILE] +
pop[CTF_K_CONST] + pop[CTF_K_CONST] +
pop[CTF_K_RESTRICT], pop[CTF_K_RESTRICT] +
pop_enumerators,
ctf_hash_string, ctf_hash_string,
ctf_hash_eq_string, NULL, NULL)) == NULL) ctf_hash_eq_string, NULL, NULL)) == NULL)
return ENOMEM; return ENOMEM;
if ((fp->ctf_conflicting_enums
= ctf_dynset_create (htab_hash_string, htab_eq_string, NULL)) == NULL)
return ENOMEM;
/* The ptrtab and txlate can be appropriately sized for precisely this set /* The ptrtab and txlate can be appropriately sized for precisely this set
of types: the txlate because it is only used to look up static types, of types: the txlate because it is only used to look up static types,
so dynamic types added later will never go through it, and the ptrtab so dynamic types added later will never go through it, and the ptrtab
@@ -793,6 +829,8 @@ init_static_types (ctf_dict_t *fp, ctf_header_t *cth)
/* In the second pass through the types, we fill in each entry of the /* In the second pass through the types, we fill in each entry of the
type and pointer tables and add names to the appropriate hashes. type and pointer tables and add names to the appropriate hashes.
(Not all names are added in this pass, only type names. See below.)
Bump ctf_typemax as we go, but keep it one higher than normal, so that Bump ctf_typemax as we go, but keep it one higher than normal, so that
the type being read in is considered a valid type and it is at least the type being read in is considered a valid type and it is at least
barely possible to run simple lookups on it. */ barely possible to run simple lookups on it. */
@@ -902,16 +940,25 @@ init_static_types (ctf_dict_t *fp, ctf_header_t *cth)
break; break;
case CTF_K_ENUM: case CTF_K_ENUM:
if (!isroot) {
if (!isroot)
break;
err = ctf_dynhash_insert_type (fp, fp->ctf_enums,
LCTF_INDEX_TO_TYPE (fp, id, child),
tp->ctt_name);
if (err != 0)
return err;
/* Remember all enums for later rescanning. */
err = ctf_dynset_insert (all_enums, (void *) (ptrdiff_t)
LCTF_INDEX_TO_TYPE (fp, id, child));
if (err != 0)
return err;
break; break;
}
err = ctf_dynhash_insert_type (fp, fp->ctf_enums,
LCTF_INDEX_TO_TYPE (fp, id, child),
tp->ctt_name);
if (err != 0)
return err;
break;
case CTF_K_TYPEDEF: case CTF_K_TYPEDEF:
if (!isroot) if (!isroot)
@@ -976,13 +1023,71 @@ init_static_types (ctf_dict_t *fp, ctf_header_t *cth)
assert (fp->ctf_typemax == typemax); assert (fp->ctf_typemax == typemax);
ctf_dprintf ("%lu total types processed\n", fp->ctf_typemax); ctf_dprintf ("%lu total types processed\n", fp->ctf_typemax);
/* In the third pass, we traverse the enums we spotted earlier and add all
the enumeration constants therein either to the types table (if no
type exists with that name) or to ctf_conflciting_enums (otherwise).
Doing this in a third pass is necessary to avoid the case where an
enum appears with a constant FOO, then later a type named FOO appears,
too late to spot the conflict by checking the enum's constants. */
while ((err = ctf_dynset_next (all_enums, &i, &k)) == 0)
{
ctf_id_t enum_id = (uintptr_t) k;
ctf_next_t *i_constants = NULL;
const char *cte_name;
while ((cte_name = ctf_enum_next (fp, enum_id, &i_constants, NULL)) != NULL)
{
/* Add all the enumeration constants as identifiers. They all appear
as types that cite the original enum.
Constants that appear in more than one enum, or which are already
the names of types, appear in ctf_conflicting_enums as well. */
if (ctf_dynhash_lookup_type (fp->ctf_names, cte_name) == 0)
{
uint32_t name = ctf_str_add (fp, cte_name);
if (name == 0)
goto enum_err;
err = ctf_dynhash_insert_type (fp, fp->ctf_names, enum_id, name);
}
else
{
err = ctf_dynset_insert (fp->ctf_conflicting_enums, (void *)
cte_name);
if (err != 0)
goto enum_err;
}
continue;
enum_err:
ctf_next_destroy (i_constants);
ctf_next_destroy (i);
return ctf_errno (fp);
}
if (ctf_errno (fp) != ECTF_NEXT_END)
{
ctf_next_destroy (i);
return ctf_errno (fp);
}
}
if (err != ECTF_NEXT_END)
return err;
ctf_dprintf ("%zu enum names hashed\n", ctf_dprintf ("%zu enum names hashed\n",
ctf_dynhash_elements (fp->ctf_enums)); ctf_dynhash_elements (fp->ctf_enums));
ctf_dprintf ("%zu conflicting enumerators identified\n",
ctf_dynset_elements (fp->ctf_conflicting_enums));
ctf_dprintf ("%zu struct names hashed (%d long)\n", ctf_dprintf ("%zu struct names hashed (%d long)\n",
ctf_dynhash_elements (fp->ctf_structs), nlstructs); ctf_dynhash_elements (fp->ctf_structs), nlstructs);
ctf_dprintf ("%zu union names hashed (%d long)\n", ctf_dprintf ("%zu union names hashed (%d long)\n",
ctf_dynhash_elements (fp->ctf_unions), nlunions); ctf_dynhash_elements (fp->ctf_unions), nlunions);
ctf_dprintf ("%zu base type names hashed\n", ctf_dprintf ("%zu base type names and identifiers hashed\n",
ctf_dynhash_elements (fp->ctf_names)); ctf_dynhash_elements (fp->ctf_names));
return 0; return 0;
@@ -1786,6 +1891,7 @@ ctf_dict_close (ctf_dict_t *fp)
} }
ctf_dynhash_destroy (fp->ctf_dthash); ctf_dynhash_destroy (fp->ctf_dthash);
ctf_dynset_destroy (fp->ctf_conflicting_enums);
ctf_dynhash_destroy (fp->ctf_structs); ctf_dynhash_destroy (fp->ctf_structs);
ctf_dynhash_destroy (fp->ctf_unions); ctf_dynhash_destroy (fp->ctf_unions);
ctf_dynhash_destroy (fp->ctf_enums); ctf_dynhash_destroy (fp->ctf_enums);