libctf: rethink strtab writeout

This commit finally adjusts strtab writeout so that repeated writeouts, or
writeouts of a dict that was read in earlier, only sorts the portion of the
strtab that was newly added.

There are three intertwined changes here:

 - pull the contents of strtabs from newly ctf_bufopened dicts into the
   atoms table, so that future additions will reuse the existing offset etc
   rather than adding new identical strings
 - allow the internal ctf_bufopen done by serialization to contribute its
   existing atoms table, so that existing atoms can be used for the
   remainder of the open process (like name table construction): this atoms
   table currente gets thrown away in the mass reassignment done later in
   ctf_serialize in any case, but it needs to be there during the open.
 - rewrite ctf_str_write_strtab so that a) it uses iterators rather than
   ctf_*_iter, reducing pointless structures which serve no other purpose
   than to implement ordinary variable scope, but more clunkily, and b)
   retains the existing strtab on the front of the new one, with its sort
   retained, rather than resorting, so all existing already-written strtab
   offsets remain valid across the call.

This latter change finally permits repeated serializations, and
reserializations of ctf_open()ed dicts, to work, but for now we keep the
code that prevents that because serialization is about to change again in a
way that will make it more obvious that doing such things is safe, and we
can take it out then.

(There are also some smaller changes like moving the purge of the refs table
into ctf_str_write_strtab(), since that's where the changes happen that
invalidate it, rather than doing it in ctf_serialize().  We also prohibit
something that has never worked, opening a dict and then reporting symbols
to it via ctf_link_add_strtab() et al: you must do that to newly-created
dicts which have had stuff ctf_link()ed into them.  This is very unlikely
ever to be a problem in practice: linkers just don't do that sort of thing.)

libctf/

	* ctf-create.c (ctf_create): Add (temporary) atoms arg.
	* ctf-impl.h (struct ctf_dict.ctf_dynstrtab): New.
	(ctf_str_create_atoms): Adjust.
	(ctf_str_write_strtab): Likewise.
	(ctf_simple_open_internal): Likewise.
	* ctf-open.c (ctf_simple_open_internal): Add atoms arg.
	(ctf_bufopen): Likewise.
	(ctf_bufopen_internal): Initialize just enough of an
	atoms table: pre-init from the atoms arg if supplied.
	(ctf_simple_open): Adjust.
	* ctf-serialize.c (ctf_serialize): Constify the strtab.
	Move ref list purging into ctf_str_write_strtab.
	Initialize the new dict with the old dict's atoms table.
	Accept the new strtab from ctf_str_write_strtab.
	Adjust for addition of ctf_dynstrtab.
	* ctf-string.c (ctf_strraw_explicit): Improve comments.
	(ctf_str_create_atoms): Prepopulate from an existing atoms table,
	or alternatively pull in all strings from the strtab and turn
	them into atoms.
	(ctf_str_free_atoms): Free the dynstrtab and its strtab.
	(struct ctf_strtab_write_state): Remove.
	(ctf_str_count_strtab): Fold this...
	(ctf_str_populate_sorttab): ... and this...
	(ctf_str_write_strtab): ... into this.  Prepend existing strings
	to the strtab rather than resorting them (and wrecking their
	offsets).  Keep the dynstrtab updated.  Update refs for all
	atoms with refs, whether or not they are strings newly added
	to the strtab.
This commit is contained in:
Nick Alcock
2024-03-25 19:07:43 +00:00
parent 149ce5c263
commit cf9da3b0b6
5 changed files with 294 additions and 152 deletions

View File

@@ -133,7 +133,7 @@ ctf_create (int *errp)
cts.cts_size = sizeof (hdr); cts.cts_size = sizeof (hdr);
cts.cts_entsize = 1; cts.cts_entsize = 1;
if ((fp = ctf_bufopen_internal (&cts, NULL, NULL, NULL, errp)) == NULL) if ((fp = ctf_bufopen_internal (&cts, NULL, NULL, NULL, NULL, errp)) == NULL)
goto err; goto err;
/* These hashes will have been initialized with a starting size of zero, /* These hashes will have been initialized with a starting size of zero,

View File

@@ -396,6 +396,7 @@ struct ctf_dict
ctf_dynhash_t *ctf_names; /* Hash table of remaining type names. */ ctf_dynhash_t *ctf_names; /* Hash table of remaining type names. */
ctf_lookup_t ctf_lookups[5]; /* Pointers to nametabs for name lookup. */ ctf_lookup_t ctf_lookups[5]; /* Pointers to nametabs for name lookup. */
ctf_strs_t ctf_str[2]; /* Array of string table base and bounds. */ ctf_strs_t ctf_str[2]; /* Array of string table base and bounds. */
ctf_strs_writable_t *ctf_dynstrtab; /* Dynamically allocated string table, if any. */
ctf_dynhash_t *ctf_str_atoms; /* Hash table of ctf_str_atoms_t. */ ctf_dynhash_t *ctf_str_atoms; /* Hash table of ctf_str_atoms_t. */
ctf_dynhash_t *ctf_str_movable_refs; /* Hash table of void * -> ctf_str_atom_ref_t. */ ctf_dynhash_t *ctf_str_movable_refs; /* Hash table of void * -> ctf_str_atom_ref_t. */
uint32_t ctf_str_prov_offset; /* Latest provisional offset assigned so far. */ uint32_t ctf_str_prov_offset; /* Latest provisional offset assigned so far. */
@@ -734,7 +735,7 @@ extern const char *ctf_strraw (ctf_dict_t *, uint32_t);
extern const char *ctf_strraw_explicit (ctf_dict_t *, uint32_t, extern const char *ctf_strraw_explicit (ctf_dict_t *, uint32_t,
ctf_strs_t *); ctf_strs_t *);
extern const char *ctf_strptr_validate (ctf_dict_t *, uint32_t); extern const char *ctf_strptr_validate (ctf_dict_t *, uint32_t);
extern int ctf_str_create_atoms (ctf_dict_t *); extern int ctf_str_create_atoms (ctf_dict_t *, ctf_dynhash_t *atoms);
extern void ctf_str_free_atoms (ctf_dict_t *); extern void ctf_str_free_atoms (ctf_dict_t *);
extern uint32_t ctf_str_add (ctf_dict_t *, const char *); extern uint32_t ctf_str_add (ctf_dict_t *, const char *);
extern uint32_t ctf_str_add_ref (ctf_dict_t *, const char *, uint32_t *ref); extern uint32_t ctf_str_add_ref (ctf_dict_t *, const char *, uint32_t *ref);
@@ -745,7 +746,7 @@ extern int ctf_str_add_external (ctf_dict_t *, const char *, uint32_t offset);
extern void ctf_str_remove_ref (ctf_dict_t *, const char *, uint32_t *ref); extern void ctf_str_remove_ref (ctf_dict_t *, const char *, uint32_t *ref);
extern void ctf_str_rollback (ctf_dict_t *, ctf_snapshot_id_t); extern void ctf_str_rollback (ctf_dict_t *, ctf_snapshot_id_t);
extern void ctf_str_purge_refs (ctf_dict_t *); extern void ctf_str_purge_refs (ctf_dict_t *);
extern ctf_strs_writable_t ctf_str_write_strtab (ctf_dict_t *); extern const ctf_strs_writable_t *ctf_str_write_strtab (ctf_dict_t *);
extern struct ctf_archive_internal * extern struct ctf_archive_internal *
ctf_new_archive_internal (int is_archive, int unmap_on_close, ctf_new_archive_internal (int is_archive, int unmap_on_close,
@@ -762,10 +763,10 @@ extern int ctf_flip (ctf_dict_t *, ctf_header_t *, unsigned char *, int);
extern ctf_dict_t *ctf_simple_open_internal (const char *, size_t, const char *, extern ctf_dict_t *ctf_simple_open_internal (const char *, size_t, const char *,
size_t, size_t, size_t, size_t,
const char *, size_t, const char *, size_t,
ctf_dynhash_t *, int *); ctf_dynhash_t *, ctf_dynhash_t *, int *);
extern ctf_dict_t *ctf_bufopen_internal (const ctf_sect_t *, const ctf_sect_t *, extern ctf_dict_t *ctf_bufopen_internal (const ctf_sect_t *, const ctf_sect_t *,
const ctf_sect_t *, ctf_dynhash_t *, const ctf_sect_t *, ctf_dynhash_t *,
int *); ctf_dynhash_t *, int *);
extern int ctf_import_unref (ctf_dict_t *fp, ctf_dict_t *pfp); extern int ctf_import_unref (ctf_dict_t *fp, ctf_dict_t *pfp);
extern int ctf_serialize (ctf_dict_t *); extern int ctf_serialize (ctf_dict_t *);

View File

@@ -1290,7 +1290,7 @@ ctf_dict_t *ctf_simple_open (const char *ctfsect, size_t ctfsect_size,
{ {
return ctf_simple_open_internal (ctfsect, ctfsect_size, symsect, symsect_size, return ctf_simple_open_internal (ctfsect, ctfsect_size, symsect, symsect_size,
symsect_entsize, strsect, strsect_size, NULL, symsect_entsize, strsect, strsect_size, NULL,
errp); NULL, errp);
} }
/* Open a CTF file, mocking up a suitable ctf_sect and overriding the external /* Open a CTF file, mocking up a suitable ctf_sect and overriding the external
@@ -1300,7 +1300,8 @@ ctf_dict_t *ctf_simple_open_internal (const char *ctfsect, size_t ctfsect_size,
const char *symsect, size_t symsect_size, const char *symsect, size_t symsect_size,
size_t symsect_entsize, size_t symsect_entsize,
const char *strsect, size_t strsect_size, const char *strsect, size_t strsect_size,
ctf_dynhash_t *syn_strtab, int *errp) ctf_dynhash_t *syn_strtab,
ctf_dynhash_t *atoms, int *errp)
{ {
ctf_sect_t skeleton; ctf_sect_t skeleton;
@@ -1338,7 +1339,7 @@ ctf_dict_t *ctf_simple_open_internal (const char *ctfsect, size_t ctfsect_size,
} }
return ctf_bufopen_internal (ctfsectp, symsectp, strsectp, syn_strtab, return ctf_bufopen_internal (ctfsectp, symsectp, strsectp, syn_strtab,
errp); atoms, errp);
} }
/* Decode the specified CTF buffer and optional symbol table, and create a new /* Decode the specified CTF buffer and optional symbol table, and create a new
@@ -1350,7 +1351,7 @@ ctf_dict_t *
ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect, ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
const ctf_sect_t *strsect, int *errp) const ctf_sect_t *strsect, int *errp)
{ {
return ctf_bufopen_internal (ctfsect, symsect, strsect, NULL, errp); return ctf_bufopen_internal (ctfsect, symsect, strsect, NULL, NULL, errp);
} }
/* Like ctf_bufopen, but overriding the external strtab with a synthetic one. */ /* Like ctf_bufopen, but overriding the external strtab with a synthetic one. */
@@ -1358,7 +1359,7 @@ ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
ctf_dict_t * ctf_dict_t *
ctf_bufopen_internal (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect, ctf_bufopen_internal (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
const ctf_sect_t *strsect, ctf_dynhash_t *syn_strtab, const ctf_sect_t *strsect, ctf_dynhash_t *syn_strtab,
int *errp) ctf_dynhash_t *atoms, int *errp)
{ {
const ctf_preamble_t *pp; const ctf_preamble_t *pp;
size_t hdrsz = sizeof (ctf_header_t); size_t hdrsz = sizeof (ctf_header_t);
@@ -1615,7 +1616,14 @@ ctf_bufopen_internal (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
ctf_set_base(). */ ctf_set_base(). */
ctf_set_version (fp, hp, hp->cth_version); ctf_set_version (fp, hp, hp->cth_version);
if (ctf_str_create_atoms (fp) < 0)
/* Temporary assignment, just enough to be able to initialize
the atoms table. */
fp->ctf_str[CTF_STRTAB_0].cts_strs = (const char *) fp->ctf_buf
+ hp->cth_stroff;
fp->ctf_str[CTF_STRTAB_0].cts_len = hp->cth_strlen;
if (ctf_str_create_atoms (fp, atoms) < 0)
{ {
err = ENOMEM; err = ENOMEM;
goto bad; goto bad;

View File

@@ -955,7 +955,7 @@ ctf_serialize (ctf_dict_t *fp)
ctf_header_t hdr, *hdrp; ctf_header_t hdr, *hdrp;
ctf_dvdef_t *dvd; ctf_dvdef_t *dvd;
ctf_varent_t *dvarents; ctf_varent_t *dvarents;
ctf_strs_writable_t strtab; const ctf_strs_writable_t *strtab;
int err; int err;
int sym_functions = 0; int sym_functions = 0;
@@ -1090,36 +1090,34 @@ ctf_serialize (ctf_dict_t *fp)
assert (t == (unsigned char *) buf + sizeof (ctf_header_t) + hdr.cth_stroff); assert (t == (unsigned char *) buf + sizeof (ctf_header_t) + hdr.cth_stroff);
/* Construct the final string table and fill out all the string refs with the /* Construct the final string table and fill out all the string refs with the
final offsets. Then purge the refs list, because we're about to move this final offsets. */
strtab onto the end of the buf, invalidating all the offsets. */
strtab = ctf_str_write_strtab (fp);
ctf_str_purge_refs (fp);
if (strtab.cts_strs == NULL) strtab = ctf_str_write_strtab (fp);
if (strtab == NULL)
goto oom; goto oom;
/* Now the string table is constructed, we can sort the buffer of /* Now the string table is constructed, we can sort the buffer of
ctf_varent_t's. */ ctf_varent_t's. */
ctf_sort_var_arg_cb_t sort_var_arg = { fp, (ctf_strs_t *) &strtab }; ctf_sort_var_arg_cb_t sort_var_arg = { fp, (ctf_strs_t *) strtab };
ctf_qsort_r (dvarents, nvars, sizeof (ctf_varent_t), ctf_sort_var, ctf_qsort_r (dvarents, nvars, sizeof (ctf_varent_t), ctf_sort_var,
&sort_var_arg); &sort_var_arg);
if ((newbuf = realloc (buf, buf_size + strtab.cts_len)) == NULL) if ((newbuf = realloc (buf, buf_size + strtab->cts_len)) == NULL)
goto oom; goto oom;
buf = newbuf; buf = newbuf;
memcpy (buf + buf_size, strtab.cts_strs, strtab.cts_len); memcpy (buf + buf_size, strtab->cts_strs, strtab->cts_len);
hdrp = (ctf_header_t *) buf; hdrp = (ctf_header_t *) buf;
hdrp->cth_strlen = strtab.cts_len; hdrp->cth_strlen = strtab->cts_len;
buf_size += hdrp->cth_strlen; buf_size += hdrp->cth_strlen;
free (strtab.cts_strs);
/* Finally, we are ready to ctf_simple_open() the new dict. If this is /* Finally, we are ready to ctf_simple_open() the new dict. If this is
successful, we then switch nfp and fp and free the old dict. */ successful, we then switch nfp and fp and free the old dict. */
if ((nfp = ctf_simple_open_internal ((char *) buf, buf_size, NULL, 0, if ((nfp = ctf_simple_open_internal ((char *) buf, buf_size, NULL, 0,
0, NULL, 0, fp->ctf_syn_ext_strtab, 0, NULL, 0, fp->ctf_syn_ext_strtab,
&err)) == NULL) fp->ctf_str_atoms, &err)) == NULL)
{ {
free (buf); free (buf);
return (ctf_set_errno (fp, err)); return (ctf_set_errno (fp, err));
@@ -1189,9 +1187,11 @@ ctf_serialize (ctf_dict_t *fp)
ctf_str_free_atoms (nfp); ctf_str_free_atoms (nfp);
nfp->ctf_str_atoms = fp->ctf_str_atoms; nfp->ctf_str_atoms = fp->ctf_str_atoms;
nfp->ctf_prov_strtab = fp->ctf_prov_strtab; nfp->ctf_prov_strtab = fp->ctf_prov_strtab;
nfp->ctf_dynstrtab = fp->ctf_dynstrtab;
nfp->ctf_str_movable_refs = fp->ctf_str_movable_refs; nfp->ctf_str_movable_refs = fp->ctf_str_movable_refs;
fp->ctf_str_atoms = NULL; fp->ctf_str_atoms = NULL;
fp->ctf_prov_strtab = NULL; fp->ctf_prov_strtab = NULL;
fp->ctf_dynstrtab = NULL;
fp->ctf_str_movable_refs = NULL; fp->ctf_str_movable_refs = NULL;
memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t)); memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t));
memset (&fp->ctf_errs_warnings, 0, sizeof (ctf_list_t)); memset (&fp->ctf_errs_warnings, 0, sizeof (ctf_list_t));

View File

@@ -20,10 +20,14 @@
#include <assert.h> #include <assert.h>
#include <ctf-impl.h> #include <ctf-impl.h>
#include <string.h> #include <string.h>
#include <assert.h>
/* Convert an encoded CTF string name into a pointer to a C string, using an static ctf_str_atom_t *
explicit internal strtab rather than the fp-based one. */ ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
int flags, uint32_t *ref);
/* Convert an encoded CTF string name into a pointer to a C string, possibly
using an explicit internal provisional strtab rather than the fp-based
one. */
const char * const char *
ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab) ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
{ {
@@ -32,18 +36,20 @@ ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL)) if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
ctsp = strtab; ctsp = strtab;
/* If this name is in the external strtab, and there is a synthetic strtab, /* If this name is in the external strtab, and there is a synthetic
use it in preference. */ strtab, use it in preference. (This is used to add the set of strings
-- symbol names, etc -- the linker knows about before the strtab is
written out.) */
if (CTF_NAME_STID (name) == CTF_STRTAB_1 if (CTF_NAME_STID (name) == CTF_STRTAB_1
&& fp->ctf_syn_ext_strtab != NULL) && fp->ctf_syn_ext_strtab != NULL)
return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab, return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
(void *) (uintptr_t) name); (void *) (uintptr_t) name);
/* If the name is in the internal strtab, and the offset is beyond the end of /* If the name is in the internal strtab, and the name offset is beyond
the ctsp->cts_len but below the ctf_str_prov_offset, this is a provisional the end of the ctsp->cts_len but below the ctf_str_prov_offset, this is
string added by ctf_str_add*() but not yet built into a real strtab: get a provisional string added by ctf_str_add*() but not yet built into a
the value out of the ctf_prov_strtab. */ real strtab: get the value out of the ctf_prov_strtab. */
if (CTF_NAME_STID (name) == CTF_STRTAB_0 if (CTF_NAME_STID (name) == CTF_STRTAB_0
&& name >= ctsp->cts_len && name < fp->ctf_str_prov_offset) && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
@@ -134,13 +140,25 @@ ctf_str_free_atom (void *a)
} }
/* Create the atoms table. There is always at least one atom in it, the null /* Create the atoms table. There is always at least one atom in it, the null
string. */ string: but also pull in atoms from the internal strtab. (We rely on
calls to ctf_str_add_external to populate external strtab entries, since
these are often not quite the same as what appears in any external
strtab, and the external strtab is often huge and best not aggressively
pulled in.)
Alternatively, if passed, populate atoms from the passed-in table, but do
not propagate their flags or refs: they are all non-freeable and
non-movable. (This is used when serializing a dict: this entire atoms
table will be thrown away shortly, so it is important that we not create
any new strings.) */
int int
ctf_str_create_atoms (ctf_dict_t *fp) ctf_str_create_atoms (ctf_dict_t *fp, ctf_dynhash_t *atoms)
{ {
size_t i;
fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
free, ctf_str_free_atom); NULL, ctf_str_free_atom);
if (fp->ctf_str_atoms == NULL) if (!fp->ctf_str_atoms)
return -ENOMEM; return -ENOMEM;
if (!fp->ctf_prov_strtab) if (!fp->ctf_prov_strtab)
@@ -161,6 +179,63 @@ ctf_str_create_atoms (ctf_dict_t *fp)
if (errno == ENOMEM) if (errno == ENOMEM)
goto oom_str_add; goto oom_str_add;
/* Serializing. We have existing strings in an existing atoms table with
possibly-live pointers to them which must be used unchanged. Import
them into this atoms table. */
if (atoms)
{
ctf_next_t *it = NULL;
void *k, *v;
int err;
while ((err = ctf_dynhash_next (atoms, &it, &k, &v)) == 0)
{
ctf_str_atom_t *existing = v;
ctf_str_atom_t *atom;
if (existing->csa_str[0] == 0)
continue;
if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
goto oom_str_add;
memcpy (atom, existing, sizeof (struct ctf_str_atom));
memset (&atom->csa_refs, 0, sizeof(ctf_list_t));
atom->csa_flags = 0;
if (ctf_dynhash_insert (fp->ctf_str_atoms, atom->csa_str, atom) < 0)
{
free (atom);
goto oom_str_add;
}
}
}
else
{
/* Not serializing. Pull in all the strings in the strtab as new
atoms. The provisional strtab must be empty at this point, so
there is no need to populate atoms from it as well. Types in this
subset are frozen and readonly, so the refs list and movable refs
list need not be populated. */
for (i = 0; i < fp->ctf_str[CTF_STRTAB_0].cts_len;
i += strlen (&fp->ctf_str[CTF_STRTAB_0].cts_strs[i]) + 1)
{
ctf_str_atom_t *atom;
if (fp->ctf_str[CTF_STRTAB_0].cts_strs[i] == 0)
continue;
atom = ctf_str_add_ref_internal (fp, &fp->ctf_str[CTF_STRTAB_0].cts_strs[i],
0, 0);
if (!atom)
goto oom_str_add;
atom->csa_offset = i;
}
}
return 0; return 0;
oom_str_add: oom_str_add:
@@ -182,6 +257,11 @@ ctf_str_free_atoms (ctf_dict_t *fp)
ctf_dynhash_destroy (fp->ctf_prov_strtab); ctf_dynhash_destroy (fp->ctf_prov_strtab);
ctf_dynhash_destroy (fp->ctf_str_atoms); ctf_dynhash_destroy (fp->ctf_str_atoms);
ctf_dynhash_destroy (fp->ctf_str_movable_refs); ctf_dynhash_destroy (fp->ctf_str_movable_refs);
if (fp->ctf_dynstrtab)
{
free (fp->ctf_dynstrtab->cts_strs);
free (fp->ctf_dynstrtab);
}
} }
#define CTF_STR_ADD_REF 0x1 #define CTF_STR_ADD_REF 0x1
@@ -538,69 +618,6 @@ ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
*(ref->caf_ref) = value; *(ref->caf_ref) = value;
} }
/* State shared across the strtab write process. */
typedef struct ctf_strtab_write_state
{
/* Strtab we are writing, and the number of strings in it. */
ctf_strs_writable_t *strtab;
size_t strtab_count;
/* Pointers to (existing) atoms in the atoms table, for qsorting. */
ctf_str_atom_t **sorttab;
/* Loop counter for sorttab population. */
size_t i;
/* The null-string atom (skipped during population). */
ctf_str_atom_t *nullstr;
} ctf_strtab_write_state_t;
/* Count the number of entries in the strtab, and its length. */
static void
ctf_str_count_strtab (void *key _libctf_unused_, void *value,
void *arg)
{
ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
/* We only factor in the length of items that have no offset and have refs:
other items are in the external strtab, or will simply not be written out
at all. They still contribute to the total count, though, because we still
have to sort them. We add in the null string's length explicitly, outside
this function, since it is explicitly written out even if it has no refs at
all. */
if (s->nullstr == atom)
{
s->strtab_count++;
return;
}
if (!ctf_list_empty_p (&atom->csa_refs))
{
if (!atom->csa_external_offset)
s->strtab->cts_len += strlen (atom->csa_str) + 1;
s->strtab_count++;
}
}
/* Populate the sorttab with pointers to the strtab atoms. */
static void
ctf_str_populate_sorttab (void *key _libctf_unused_, void *value,
void *arg)
{
ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
/* Skip the null string. */
if (s->nullstr == atom)
return;
/* Skip atoms with no refs. */
if (!ctf_list_empty_p (&atom->csa_refs))
s->sorttab[s->i++] = atom;
}
/* Sort the strtab. */ /* Sort the strtab. */
static int static int
ctf_str_sort_strtab (const void *a, const void *b) ctf_str_sort_strtab (const void *a, const void *b)
@@ -612,79 +629,182 @@ ctf_str_sort_strtab (const void *a, const void *b)
} }
/* Write out and return a strtab containing all strings with recorded refs, /* Write out and return a strtab containing all strings with recorded refs,
adjusting the refs to refer to the corresponding string. The returned strtab adjusting the refs to refer to the corresponding string. The returned
may be NULL on error. Also populate the synthetic strtab with mappings from strtab is already assigned to strtab 0 in this dict, is owned by this
external strtab offsets to names, so we can look them up with ctf_strptr(). dict, and may be NULL on error. Also populate the synthetic strtab with
Only external strtab offsets with references are added. */ mappings from external strtab offsets to names, so we can look them up
ctf_strs_writable_t with ctf_strptr(). Only external strtab offsets with references are
added.
As a side effect, replaces the strtab of the current dict with the newly-
generated strtab. This is an exception to the general rule that
serialization does not change the dict passed in, because the alternative
is to copy the entire atoms table on every reserialization just to avoid
modifying the original, which is excessively costly for minimal gain.
We use the lazy man's approach and double memory costs by always storing
atoms as individually allocated entities whenever they come from anywhere
but a freshly-opened, mmapped dict, even though after serialization there
is another copy in the strtab; this ensures that ctf_strptr()-returned
pointers to them remain valid for the lifetime of the dict.
This is all rendered more complex because if a dict is ctf_open()ed it
will have a bunch of strings in its strtab already, and their strtab
offsets can never change (without piles of complexity to rescan the
entire dict just to get all the offsets to all of them into the atoms
table). Entries below the existing strtab limit are just copied into the
new dict: entries above it are new, and are are sorted first, then
appended to it. The sorting is purely a compression-efficiency
improvement, and we get nearly as good an improvement from sorting big
chunks like this as we would from sorting the whole thing. */
const ctf_strs_writable_t *
ctf_str_write_strtab (ctf_dict_t *fp) ctf_str_write_strtab (ctf_dict_t *fp)
{ {
ctf_strs_writable_t strtab; ctf_strs_writable_t *strtab;
ctf_str_atom_t *nullstr; size_t strtab_count = 0;
uint32_t cur_stroff = 0; uint32_t cur_stroff = 0;
ctf_strtab_write_state_t s;
ctf_str_atom_t **sorttab; ctf_str_atom_t **sorttab;
ctf_next_t *it = NULL;
size_t i; size_t i;
void *v;
int err;
int new_strtab = 0;
int any_external = 0; int any_external = 0;
memset (&strtab, 0, sizeof (struct ctf_strs_writable)); strtab = calloc (1, sizeof (ctf_strs_writable_t));
memset (&s, 0, sizeof (struct ctf_strtab_write_state)); if (!strtab)
s.strtab = &strtab; return NULL;
nullstr = ctf_dynhash_lookup (fp->ctf_str_atoms, ""); /* The strtab contains the existing string table at its start: figure out
if (!nullstr) how many new strings we need to add. We only need to add new strings
that have no external offset, that have refs, and that are found in the
provisional strtab. If the existing strtab is empty we also need to
add the null string at its start. */
strtab->cts_len = fp->ctf_str[CTF_STRTAB_0].cts_len;
if (strtab->cts_len == 0)
{ {
ctf_err_warn (fp, 0, ECTF_INTERNAL, _("null string not found in strtab")); new_strtab = 1;
strtab.cts_strs = NULL; strtab->cts_len++; /* For the \0. */
return strtab;
} }
s.nullstr = nullstr; /* Count new entries in the strtab: i.e. entries in the provisional
ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_count_strtab, &s); strtab. Ignore any entry for \0, entries which ended up in the
strtab.cts_len++; /* For the null string. */ external strtab, and unreferenced entries. */
ctf_dprintf ("%lu bytes of strings in strtab.\n", while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
(unsigned long) strtab.cts_len); {
const char *str = (const char *) v;
ctf_str_atom_t *atom;
/* Sort the strtab. Force the null string to be first. */ atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
sorttab = calloc (s.strtab_count, sizeof (ctf_str_atom_t *)); if (!ctf_assert (fp, atom))
goto err_strtab;
if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
atom->csa_external_offset)
continue;
strtab->cts_len += strlen (atom->csa_str) + 1;
strtab_count++;
}
if (err != ECTF_NEXT_END)
{
ctf_dprintf ("ctf_str_write_strtab: error counting strtab entries: %s\n",
ctf_errmsg (err));
goto err_strtab;
}
ctf_dprintf ("%lu bytes of strings in strtab: %lu pre-existing.\n",
(unsigned long) strtab->cts_len,
(unsigned long) fp->ctf_str[CTF_STRTAB_0].cts_len);
/* Sort the new part of the strtab. */
sorttab = calloc (strtab_count, sizeof (ctf_str_atom_t *));
if (!sorttab) if (!sorttab)
goto oom; {
ctf_set_errno (fp, ENOMEM);
goto err_strtab;
}
sorttab[0] = nullstr; i = 0;
s.i = 1; while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
s.sorttab = sorttab; {
ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_populate_sorttab, &s); ctf_str_atom_t *atom;
qsort (&sorttab[1], s.strtab_count - 1, sizeof (ctf_str_atom_t *), atom = ctf_dynhash_lookup (fp->ctf_str_atoms, v);
if (!ctf_assert (fp, atom))
goto err_sorttab;
if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
atom->csa_external_offset)
continue;
sorttab[i++] = atom;
}
qsort (sorttab, strtab_count, sizeof (ctf_str_atom_t *),
ctf_str_sort_strtab); ctf_str_sort_strtab);
if ((strtab.cts_strs = malloc (strtab.cts_len)) == NULL) if ((strtab->cts_strs = malloc (strtab->cts_len)) == NULL)
goto oom_sorttab; goto err_sorttab;
/* Update all refs: also update the strtab appropriately. */ cur_stroff = fp->ctf_str[CTF_STRTAB_0].cts_len;
for (i = 0; i < s.strtab_count; i++)
{
if (sorttab[i]->csa_external_offset)
{
/* External strtab entry. */
any_external = 1; if (new_strtab)
ctf_str_update_refs (sorttab[i], sorttab[i]->csa_external_offset); {
sorttab[i]->csa_offset = sorttab[i]->csa_external_offset; strtab->cts_strs[0] = 0;
cur_stroff++;
} }
else else
{ memcpy (strtab->cts_strs, fp->ctf_str[CTF_STRTAB_0].cts_strs,
/* Internal strtab entry with refs: actually add to the string fp->ctf_str[CTF_STRTAB_0].cts_len);
table. */
ctf_str_update_refs (sorttab[i], cur_stroff); /* Work over the sorttab, add its strings to the strtab, and remember
where they are in the csa_offset for the appropriate atom. No ref
updating is done at this point, because refs might well relate to
already-existing strings, or external strings, which do not need adding
to the strtab and may not be in the sorttab. */
for (i = 0; i < strtab_count; i++)
{
sorttab[i]->csa_offset = cur_stroff; sorttab[i]->csa_offset = cur_stroff;
strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str); strcpy (&strtab->cts_strs[cur_stroff], sorttab[i]->csa_str);
cur_stroff += strlen (sorttab[i]->csa_str) + 1; cur_stroff += strlen (sorttab[i]->csa_str) + 1;
} }
}
free (sorttab); free (sorttab);
sorttab = NULL;
/* Update all refs, then purge them as no longer necessary: also update
the strtab appropriately. */
while ((err = ctf_dynhash_next (fp->ctf_str_atoms, &it, NULL, &v)) == 0)
{
ctf_str_atom_t *atom = (ctf_str_atom_t *) v;
uint32_t offset;
if (ctf_list_empty_p (&atom->csa_refs))
continue;
if (atom->csa_external_offset)
{
any_external = 1;
offset = atom->csa_external_offset;
}
else
offset = atom->csa_offset;
ctf_str_update_refs (atom, offset);
}
if (err != ECTF_NEXT_END)
{
ctf_dprintf ("ctf_str_write_strtab: error iterating over atoms while updating refs: %s\n",
ctf_errmsg (err));
goto err_strtab;
}
ctf_str_purge_refs (fp);
if (!any_external) if (!any_external)
{ {
@@ -692,16 +812,29 @@ ctf_str_write_strtab (ctf_dict_t *fp)
fp->ctf_syn_ext_strtab = NULL; fp->ctf_syn_ext_strtab = NULL;
} }
/* Replace the old strtab with the new one in this dict. */
if (fp->ctf_dynstrtab)
{
free (fp->ctf_dynstrtab->cts_strs);
free (fp->ctf_dynstrtab);
}
fp->ctf_dynstrtab = strtab;
fp->ctf_str[CTF_STRTAB_0].cts_strs = strtab->cts_strs;
fp->ctf_str[CTF_STRTAB_0].cts_len = strtab->cts_len;
/* All the provisional strtab entries are now real strtab entries, and /* All the provisional strtab entries are now real strtab entries, and
ctf_strptr() will find them there. The provisional offset now starts right ctf_strptr() will find them there. The provisional offset now starts right
beyond the new end of the strtab. */ beyond the new end of the strtab. */
ctf_dynhash_empty (fp->ctf_prov_strtab); ctf_dynhash_empty (fp->ctf_prov_strtab);
fp->ctf_str_prov_offset = strtab.cts_len + 1; fp->ctf_str_prov_offset = strtab->cts_len + 1;
return strtab; return strtab;
oom_sorttab: err_sorttab:
free (sorttab); free (sorttab);
oom: err_strtab:
return strtab; free (strtab);
return NULL;
} }