libctf: create, types: variables and datasecs (REVIEW NEEDED)

This is an area of significant difference from CTFv3.  The API changes
significantly, with quite a few additions to allow creation and querying of
these new datasec entities:

-typedef int ctf_variable_f (const char *name, ctf_id_t type, void *arg);
+typedef int ctf_variable_f (ctf_dict_t *, const char *name, ctf_id_t type,
+			    void *arg);
+typedef int ctf_datasec_var_f (ctf_dict_t *fp, ctf_id_t type, size_t offset,
+			       size_t datasec_size, void *arg);

+/* Search a datasec for a variable covering a given offset.
+
+   Errors with ECTF_NODATASEC if not found.  */
+
+ctf_id_t ctf_datasec_var_offset (ctf_dict_t *fp, ctf_id_t datasec,
+				 uint32_t offset);
+
+/* Return the datasec that a given variable appears in, or ECTF_NODATASEC if
+   none.  */
+
+ctf_id_t ctf_variable_datasec (ctf_dict_t *fp, ctf_id_t var);

+int ctf_datasec_var_iter (ctf_dict_t *, ctf_id_t, ctf_datasec_var_f *,
+			  void *);
+ctf_id_t ctf_datasec_var_next (ctf_dict_t *, ctf_id_t, ctf_next_t **,
+			       size_t *size, size_t *offset);

-int ctf_add_variable (ctf_dict_t *, const char *, ctf_id_t);
+/* ctf_add_variable adds variables to no datasec at all;
+   ctf_add_section_variable adds them to the given datasec, or to no datasec at
+   all if the datasec is NULL.  */
+
+ctf_id_t ctf_add_variable (ctf_dict_t *, const char *, int linkage, ctf_id_t);
+ctf_id_t ctf_add_section_variable (ctf_dict_t *, uint32_t,
+				   const char *datasec, const char *name,
+				   int linkage, ctf_id_t type,
+				   size_t size, size_t offset);

We tie datasecs quite closely to variables at addition (and, as should
become clear later, dedup) time: you never create datasecs, you only create
variables *in* datasecs, and the datasec springs into existence when you do
so: datasecs are always found in the same dict as the variables they contain
(the variables are never in the parent if the datasec is in a child or
anything).  We keep track of the variable->datasec mapping in
ctf_var_datasecs (populating it at addition and open time), to allow
ctf_variable_datasec to work at reasonable speed.  (But, as yet, there are
no tests of this function at all.)

The datasecs are created unsorted (to avoid variable addition becoming
O(n^2)) and sorted at serialization time, and when ctf_datasec_var_offset is
invoked.

We reuse the natural-alignment code from struct addition to get a plausible
offset in datasecs if an alignment of -1 is specified: maybe this is
unnecessary now (it was originally added when ctf_add_variable added
variables to a "default datasec", while now it just leaves them out of
all datasecs, like externs are).

One constraint of this is that we currently prohibit the addition of
nonrepresentable-typed variables, because we can't tell what their natural
alignment is: if we dropped the whole "align" and just required everyone
adding a variable to a datasec to specify an offset, we could drop that
restriction. WDYT?

One additional caveat: right now, ctf_lookup_variable() looks up the type of
a variable (because when it was invented, variables were not entities in
themselves that you could look up).  This name is confusing as hell as a
result.  It might be less confusing to make it return the CTF_K_VAR, but
that would be awful to adapt callers to, since both are represented with
ctf_id_t's, so the compiler wouldn't warn about the needed change at all...
I've vacillated on this three or four times now.
This commit is contained in:
Nick Alcock
2025-04-24 17:42:16 +01:00
parent 097ff012e4
commit ea21a1b2ae
7 changed files with 488 additions and 65 deletions

View File

@@ -608,7 +608,7 @@ ctf_variable_iter (ctf_dict_t *fp, ctf_variable_f *func, void *arg)
while ((type = ctf_variable_next (fp, &i, &name)) != CTF_ERR)
{
int rc;
if ((rc = func (name, type, arg)) != 0)
if ((rc = func (fp, name, type, arg)) != 0)
{
ctf_next_destroy (i);
return rc;
@@ -643,7 +643,7 @@ ctf_variable_next (ctf_dict_t *fp, ctf_next_t **it, const char **name)
i->cu.ctn_fp = fp;
i->ctn_iter_fun = (void (*) (void)) ctf_variable_next;
i->u.ctn_dvd = ctf_list_next (&fp->ctf_dvdefs);
i->ctn_next = NULL;
*it = i;
}
@@ -653,25 +653,110 @@ ctf_variable_next (ctf_dict_t *fp, ctf_next_t **it, const char **name)
if (fp != i->cu.ctn_fp)
return (ctf_set_typed_errno (fp, ECTF_NEXT_WRONGFP));
if (i->ctn_n < fp->ctf_nvars)
if ((id = ctf_type_kind_next (fp, &i->ctn_next, CTF_K_VAR)) == CTF_ERR)
{
*name = ctf_strptr (fp, fp->ctf_vars[i->ctn_n].ctv_name);
return fp->ctf_vars[i->ctn_n++].ctv_type;
if (ctf_errno (fp) == ECTF_NEXT_END)
ctf_next_destroy (i);
}
if (i->u.ctn_dvd == NULL)
if (name)
*name = ctf_type_name_raw (fp, id);
return id;
}
/* Iterate over every variable in the given DATASEC, in arbitrary order. We
pass the type ID, datasec-recorded size (usually 0), and offset of each
variable to the specified callback function. */
int
ctf_datasec_var_iter (ctf_dict_t *fp, ctf_id_t datasec,
ctf_datasec_var_f *func, void *arg)
{
ctf_next_t *i = NULL;
ctf_id_t type;
size_t size, offset;
while ((type = ctf_datasec_var_next (fp, datasec, &i, &size, &offset)) != CTF_ERR)
{
int rc;
if ((rc = func (fp, type, offset, size, arg)) != 0)
{
ctf_next_destroy (i);
return rc;
}
}
if (ctf_errno (fp) != ECTF_NEXT_END)
return -1; /* errno is set for us. */
return 0;
}
/* Iterate over every variable in the given CTF datasec, in arbitrary order,
returning the name and type of each variable in turn. Returns CTF_ERR on end
of iteration or error.
(The order is arbitrary so we don't need to worry about sorting unsorted
datasecs.) */
ctf_id_t
ctf_datasec_var_next (ctf_dict_t *fp, ctf_id_t datasec, ctf_next_t **it,
size_t *size, size_t *offset)
{
ctf_next_t *i = *it;
ctf_id_t type;
if (!i)
{
const ctf_type_t *tp;
unsigned char *vlen;
ctf_dict_t *ofp = fp;
if ((datasec = ctf_type_resolve_unsliced (fp, datasec)) == CTF_ERR)
return CTF_ERR; /* errno is set for us. */
if (ctf_type_kind (fp, datasec) != CTF_K_DATASEC)
return (ctf_set_typed_errno (ofp, ECTF_NOTDATASEC));
if ((tp = ctf_lookup_by_id (&fp, datasec, NULL)) == NULL)
return CTF_ERR; /* errno is set for us. */
if ((i = ctf_next_create ()) == NULL)
return (ctf_set_typed_errno (ofp, ENOMEM));
i->cu.ctn_fp = ofp;
i->ctn_iter_fun = (void (*) (void)) ctf_datasec_var_next;
vlen = ctf_vlen (fp, datasec, tp, &i->ctn_n);
i->u.ctn_datasec = (const ctf_var_secinfo_t *) vlen;
*it = i;
}
if ((void (*) (void)) ctf_datasec_var_next != i->ctn_iter_fun)
return (ctf_set_typed_errno (fp, ECTF_NEXT_WRONGFUN));
if (fp != i->cu.ctn_fp)
return (ctf_set_typed_errno (fp, ECTF_NEXT_WRONGFP));
if (i->ctn_n == 0)
goto end_iter;
*name = i->u.ctn_dvd->dvd_name;
id = i->u.ctn_dvd->dvd_type;
i->u.ctn_dvd = ctf_list_next (i->u.ctn_dvd);
return id;
if (size)
*size = i->u.ctn_datasec->cvs_size;
if (offset)
*offset = i->u.ctn_datasec->cvs_offset;
type = i->u.ctn_datasec->cvs_type;
i->u.ctn_datasec++;
i->ctn_n--;
return type;
end_iter:
ctf_next_destroy (i);
*it = NULL;
return ctf_set_typed_errno (fp, ECTF_NEXT_END);
return (ctf_set_typed_errno (fp, ECTF_NEXT_END));
}
/* Follow a given type through the graph for TYPEDEF, VOLATILE, CONST, and
@@ -874,8 +959,10 @@ ctf_type_aname (ctf_dict_t *fp, ctf_id_t type)
case CTF_K_INTEGER:
case CTF_K_FLOAT:
case CTF_K_TYPEDEF:
/* Integers, floats, and typedefs must always be named types. */
case CTF_K_BTF_FLOAT:
case CTF_K_DATASEC:
/* Integers, floats, typedefs, and datasecs must always be named
types. */
if (name[0] == '\0')
{
@@ -884,7 +971,11 @@ ctf_type_aname (ctf_dict_t *fp, ctf_id_t type)
return NULL;
}
ctf_decl_sprintf (&cd, "%s", name);
if (cdp->cd_kind != CTF_K_DATASEC)
ctf_decl_sprintf (&cd, "%s", name);
else
ctf_decl_sprintf (&cd, "DATASEC (\"%s\", %i)", name,
LCTF_VLEN (rfp, tp));
break;
case CTF_K_POINTER:
ctf_decl_sprintf (&cd, "*");
@@ -1967,6 +2058,110 @@ ctf_func_type_args (ctf_dict_t *fp, ctf_id_t type, uint32_t argc, ctf_id_t *argv
return 0;
}
/* bsearch_r comparison function for datasec searches. */
static int
search_datasec_by_offset (const void *key_, const void *arr_)
{
uint32_t *key = (uint32_t *) key_;
ctf_var_secinfo_t *arr = (ctf_var_secinfo_t *) arr_;
if (*key < arr->cvs_offset)
return -1;
else if (*key > arr->cvs_offset)
return 1;
return 0;
}
/* Search a datasec for a variable covering a given offset.
Errors with ECTF_NODATASEC if not found. */
ctf_id_t
ctf_datasec_var_offset (ctf_dict_t *fp, ctf_id_t datasec, uint32_t offset)
{
ctf_dtdef_t *dtd;
const ctf_type_t *tp;
unsigned char *vlen;
size_t vlen_len;
ctf_var_secinfo_t *sec;
ctf_var_secinfo_t *el;
ssize_t size;
if ((tp = ctf_lookup_by_id (&fp, datasec, NULL)) == NULL)
return -1; /* errno is set for us. */
if (ctf_type_kind (fp, datasec) != CTF_K_DATASEC)
return ctf_set_typed_errno (fp, ECTF_NOTDATASEC);
if ((dtd = ctf_dynamic_type (fp, datasec)) != NULL)
{
if (dtd->dtd_flags & DTD_F_UNSORTED)
ctf_datasec_sort (fp, dtd);
}
vlen = ctf_vlen (fp, datasec, tp, &vlen_len);
sec = (ctf_var_secinfo_t *) vlen;
if ((el = bsearch (&offset, sec, vlen_len, sizeof (ctf_var_secinfo_t),
search_datasec_by_offset)) == NULL)
return ctf_set_typed_errno (fp, ECTF_NODATASEC);
if (el->cvs_offset == offset)
return el->cvs_type;
if ((size = ctf_type_size (fp, el->cvs_type)) >= 0)
if (el->cvs_offset < offset && el->cvs_offset + size > offset)
return el->cvs_type;
return ctf_set_typed_errno (fp, ECTF_NODATASEC);
}
/* Return the entry corresponding to a given component_idx in a datasec.
Not currently public API. */
ctf_var_secinfo_t *
ctf_datasec_entry (ctf_dict_t *fp, ctf_id_t datasec, int component_idx)
{
const ctf_type_t *tp;
unsigned char *vlen;
size_t vlen_len;
ctf_var_secinfo_t *sec;
if ((tp = ctf_lookup_by_id (&fp, datasec, NULL)) == NULL)
return NULL; /* errno is set for us. */
/* No type kind check: internal function. */
vlen = ctf_vlen (fp, datasec, tp, &vlen_len);
sec = (ctf_var_secinfo_t *) vlen;
if (component_idx < 0 || (size_t) component_idx > vlen_len)
{
ctf_set_errno (fp, EOVERFLOW);
return NULL;
}
return &sec[component_idx];
}
/* Return the datasec that a given variable appears in, or ECTF_NODATASEC if
none. */
ctf_id_t ctf_variable_datasec (ctf_dict_t *fp, ctf_id_t var)
{
void *sec;
if (ctf_type_kind (fp, var) != CTF_K_VAR)
return (ctf_set_typed_errno (fp, ECTF_NOTVAR));
if (ctf_dynhash_lookup_kv (fp->ctf_var_datasecs, (void *) (ptrdiff_t) var,
NULL, &sec))
return (ctf_id_t) sec;
return (ctf_set_typed_errno (fp, ECTF_NODATASEC));
}
/* Recursively visit the members of any type. This function is used as the
engine for ctf_type_visit, below. We resolve the input type, recursively
invoke ourself for each type member if the type is a struct or union, and