libctf: move string deduplication into ctf-archive

This means that any archive containing dicts can get its strings dedupped
together, rather than only those that are ctf_linked.

(For now, we are still constrained to ctf_linked archives, since fixing that
requires further changes to ctf_dedup_strings: but this gives us the first
half of what is necessary.)

libctf/
	* ctf-link.c (ctf_link_write): Move string dedup into...
	* ctf-archive.c (ctf_arc_preserialize): ... this new function.
	(ctf_arc_write_fd): Call it.
This commit is contained in:
Nick Alcock
2025-02-16 19:39:41 +00:00
parent 06f77d49f6
commit beccf36b88
4 changed files with 85 additions and 43 deletions

View File

@@ -152,7 +152,7 @@ static ctf_list_t open_errors;
open errors list if NULL): if ERR is nonzero it is the errno to report to the open errors list if NULL): if ERR is nonzero it is the errno to report to the
debug stream instead of that recorded on fp. */ debug stream instead of that recorded on fp. */
_libctf_printflike_ (4, 5) _libctf_printflike_ (4, 5)
extern void void
ctf_err_warn (ctf_dict_t *fp, int is_warning, int err, ctf_err_warn (ctf_dict_t *fp, int is_warning, int err,
const char *format, ...) const char *format, ...)
{ {
@@ -203,6 +203,18 @@ ctf_err_warn_to_open (ctf_dict_t *fp)
ctf_list_splice (&open_errors, &fp->ctf_errs_warnings); ctf_list_splice (&open_errors, &fp->ctf_errs_warnings);
} }
/* Copy all the errors/warnings from one fp to another one, and the error code
as well. */
void
ctf_err_copy (ctf_dict_t *dest, ctf_dict_t *src)
{
ctf_err_warning_t *cew;
for (cew = ctf_list_next (&src->ctf_errs_warnings); cew != NULL;
cew = ctf_list_next (cew))
ctf_err_warn (dest, cew->cew_is_warning, 0, cew->cew_text);
ctf_set_errno (dest, ctf_errno (src));
}
/* Error-warning reporting: an 'iterator' that returns errors and warnings from /* Error-warning reporting: an 'iterator' that returns errors and warnings from
the error/warning list, in order of emission. Errors and warnings are popped the error/warning list, in order of emission. Errors and warnings are popped
after return: the caller must free the returned error-text pointer. after return: the caller must free the returned error-text pointer.

View File

@@ -51,6 +51,64 @@ static int ctf_arc_import_parent (const ctf_archive_t *arc, ctf_dict_t *fp,
and ctfi_symnamedicts. Never initialized. */ and ctfi_symnamedicts. Never initialized. */
static ctf_dict_t enosym; static ctf_dict_t enosym;
/* Prepare to serialize everything. Members of archives have dependencies on
each other, because the strtabs and type IDs of children depend on the
parent: so we have to work over the archive as a whole to prepare for final
serialization.
Returns zero on success, or an errno, or an ECTF_* value.
Updates the first dict in the archive with the errno value. */
static int
ctf_arc_preserialize (ctf_dict_t **ctf_dicts, ssize_t ctf_dict_cnt)
{
uint64_t old_parent_strlen, all_strlens = 0;
ssize_t i;
int err;
ctf_dprintf ("Preserializing dicts.\n");
/* Preserialize everything, doing everything but strtab generation and things
that depend on that. */
for (i = 0; i < ctf_dict_cnt; i++)
if (ctf_preserialize (ctf_dicts[i]) < 0)
goto err;
ctf_dprintf ("Deduplicating strings.\n");
for (i = 0; i < ctf_dict_cnt; i++)
all_strlens += ctf_dicts[i]->ctf_str[0].cts_len
+ ctf_dicts[i]->ctf_str_prov_len;
/* If linking, deduplicate strings against the children in every dict that has
any. (String deduplication is not yet implemented for non-linked dicts.) */
for (i = 0; i < ctf_dict_cnt; i++)
if (ctf_dicts[i]->ctf_flags & LCTF_LINKING && ctf_dicts[i]->ctf_link_outputs)
{
old_parent_strlen = ctf_dicts[i]->ctf_str[0].cts_len
+ ctf_dicts[i]->ctf_str_prov_len;
if (ctf_dedup_strings (ctf_dicts[i]) < 0)
goto err;
ctf_dprintf ("Deduplicated strings in archive member %zi: "
"original parent strlen: %zu; original lengths: %zu; "
"final length: %zu.\n", i, (size_t) old_parent_strlen,
(size_t) all_strlens,
(size_t) ctf_dicts[i]->ctf_str_prov_len);
}
return 0;
err:
err = ctf_errno (ctf_dicts[i]);
ctf_err_copy (ctf_dicts[0], ctf_dicts[i]);
for (i--; i >= 0; i--)
ctf_depreserialize (ctf_dicts[i]);
return err;
}
/* Write out a CTF archive to the start of the file referenced by the passed-in /* Write out a CTF archive to the start of the file referenced by the passed-in
fd. The entries in CTF_DICTS are referenced by name: the names are passed in fd. The entries in CTF_DICTS are referenced by name: the names are passed in
the names array, which must have CTF_DICTS entries. the names array, which must have CTF_DICTS entries.
@@ -70,8 +128,15 @@ ctf_arc_write_fd (int fd, ctf_dict_t **ctf_dicts, size_t ctf_dict_cnt,
char *nametbl = NULL; /* The name table. */ char *nametbl = NULL; /* The name table. */
char *np; char *np;
off_t nameoffs; off_t nameoffs;
int err;
struct ctf_archive_modent *modent; struct ctf_archive_modent *modent;
/* Prepare by serializing everything. Done first because it allocates a lot
of space and thus is more likely to fail. */
if (ctf_dict_cnt > 0 &&
(err = ctf_arc_preserialize (ctf_dicts, ctf_dict_cnt)) < 0)
return err;
ctf_dprintf ("Writing CTF archive with %lu files\n", ctf_dprintf ("Writing CTF archive with %lu files\n",
(unsigned long) ctf_dict_cnt); (unsigned long) ctf_dict_cnt);

View File

@@ -792,6 +792,7 @@ _libctf_printflike_ (4, 5)
extern void ctf_err_warn (ctf_dict_t *, int is_warning, int err, extern void ctf_err_warn (ctf_dict_t *, int is_warning, int err,
const char *, ...); const char *, ...);
extern void ctf_err_warn_to_open (ctf_dict_t *); extern void ctf_err_warn_to_open (ctf_dict_t *);
extern void ctf_err_copy (ctf_dict_t *dest, ctf_dict_t *src);
extern void ctf_assert_fail_internal (ctf_dict_t *, const char *, extern void ctf_assert_fail_internal (ctf_dict_t *, const char *,
size_t, const char *); size_t, const char *);
extern const char *ctf_link_input_name (ctf_dict_t *); extern const char *ctf_link_input_name (ctf_dict_t *);

View File

@@ -2052,7 +2052,6 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
long fsize; long fsize;
const char *errloc; const char *errloc;
unsigned char *buf = NULL; unsigned char *buf = NULL;
uint64_t old_parent_strlen, all_strlens = 0;
memset (&arg, 0, sizeof (ctf_name_list_accum_cb_arg_t)); memset (&arg, 0, sizeof (ctf_name_list_accum_cb_arg_t));
arg.fp = fp; arg.fp = fp;
@@ -2123,41 +2122,6 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
memmove (&(arg.files[1]), arg.files, sizeof (ctf_dict_t *) * (arg.i)); memmove (&(arg.files[1]), arg.files, sizeof (ctf_dict_t *) * (arg.i));
arg.files[0] = fp; arg.files[0] = fp;
/* Preserialize everything, doing everything but strtab generation and things that
depend on that. */
for (i = 0; i < arg.i + 1; i++)
{
if (ctf_preserialize (arg.files[i]) < 0)
{
errno = ctf_errno (arg.files[i]);
for (i--; i >= 0; i--)
ctf_depreserialize (arg.files[i]);
errloc = "preserialization";
goto err_no;
}
}
ctf_dprintf ("Deduplicating strings.\n");
for (i = 0; i < arg.i; i++)
all_strlens += arg.files[i]->ctf_str[0].cts_len
+ arg.files[i]->ctf_str_prov_len;
old_parent_strlen = arg.files[0]->ctf_str[0].cts_len
+ arg.files[0]->ctf_str_prov_len;
if (ctf_dedup_strings (fp) < 0)
{
for (i = 0; i < arg.i + 1; i++)
ctf_depreserialize (arg.files[i]);
errloc = "string deduplication";
goto err_str_dedup;
}
ctf_dprintf ("Deduplicated strings: original parent strlen: %zu; "
"original lengths: %zu; final length: %zu.\n",
(size_t) old_parent_strlen, (size_t) all_strlens,
(size_t) arg.files[0]->ctf_str_prov_len);
if ((f = tmpfile ()) == NULL) if ((f = tmpfile ()) == NULL)
{ {
errloc = "tempfile creation"; errloc = "tempfile creation";
@@ -2168,9 +2132,8 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
(const char **) arg.names, (const char **) arg.names,
threshold)) < 0) threshold)) < 0)
{ {
errloc = "archive writing"; errloc = NULL; /* errno is set for us. */
errno = err; goto err_set;
goto err_no;
} }
if (fseek (f, 0, SEEK_END) < 0) if (fseek (f, 0, SEEK_END) < 0)
@@ -2229,7 +2192,7 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
err_no: err_no:
ctf_set_errno (fp, errno); ctf_set_errno (fp, errno);
err_str_dedup: err_set:
/* Turn off the is-linking flag on all the dicts in this link, as above. */ /* Turn off the is-linking flag on all the dicts in this link, as above. */
for (i = 0; i < arg.i; i++) for (i = 0; i < arg.i; i++)
{ {
@@ -2251,7 +2214,8 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
free (arg.dynames[i]); free (arg.dynames[i]);
free (arg.dynames); free (arg.dynames);
} }
ctf_err_warn (fp, 0, 0, _("cannot write archive in link: %s failure"), if (errloc)
errloc); ctf_err_warn (fp, 0, 0, _("cannot write archive in link: %s failure"),
errloc);
return NULL; return NULL;
} }