strip: Properly handle LLVM IR bitcode

commit 717a38e9a0
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun May 4 05:12:46 2025 +0800

    strip: Add GCC LTO IR support

added "-R .gnu.lto_.*" to strip to remove all GCC LTO sections.  When
"-R .gnu.lto_.*" is used, the plugin target is ignored so that all LTO
sections are stripped as the regular sections.  It works for the slim
GCC LTO IR since the GCC LTO IR is stored in the regular sections.  When
the plugin target is ignored, the GCC LTO IR can be recognized as the
normal object files.  But it doesn't work for the slim LLVM IR which
is stored in a standalone file.

1. Add bfd_check_format_matches_lto and bfd_check_format_lto to take an
argument, lto_sections_removed, to indicate if all LTO sections should
be removed.
2. Update strip to always enable the plugin target so that the plugin
target is enabled when checking for bfd_archive.
3. Update strip to ignore the plugin target for bfd_object when all LTO
sections should be removed.  If the object is unknown, copy it as an
unknown file without any messages.
4. Treat the "-R .llvm.lto" strip option as removing all LTO sections.

bfd/

	PR binutils/33198
	* format.c (bfd_check_format_lto): New function.
	(bfd_check_format): Call bfd_check_format_matches_lto.
	(bfd_check_format_matches): Renamed to ...
	(bfd_check_format_matches_lto): This.  Add an argument,
	lto_sections_removed, to indicate if all LTO sections should be
	removed and don't match the plugin target if lto_sections_removed
	is true.
	(bfd_check_format_matches): Call bfd_check_format_matches_lto.
	* bfd-in2.h: Regenerated.

binutils/

	PR binutils/33198
	* objcopy.c (copy_archive): Call bfd_check_format_lto, instead
	of bfd_check_format, and pass lto_sections_removed.  Remove the
	non-fatal message on unknown element since it will be copied as
	an unknown file.
	(copy_file): Don't check lto_sections_removed when enabling LTO
	plugin in strip.
	(copy_file): Ignore the plugin target first if all LTO sections
	should be removed.  Try with the plugin target next if ignoring
	the plugin target failed to match the format.
	(strip_main): Also set lto_sections_removed for -R .llvm.lto.
	* testsuite/binutils-all/x86-64/pr33198.c: New file.
	* testsuite/binutils-all/x86-64/x86-64.exp (run_pr33198_test):
	New.
	Run binutils/33198 tests.
	* testsuite/lib/binutils-common.exp (llvm_plug_opt): New.
	(CLANG_FOR_TARGET): New.  Set to "clang" for native build if
	"clang -v" reports "clang version".

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit f752be8f91)
This commit is contained in:
H.J. Lu
2025-07-22 14:09:48 -07:00
parent 44cd1f92e5
commit a0d3507d10
6 changed files with 299 additions and 21 deletions

View File

@@ -2903,8 +2903,15 @@ bool generic_core_file_matches_executable_p
(bfd *core_bfd, bfd *exec_bfd);
/* Extracted from format.c. */
bool bfd_check_format_lto (bfd *abfd, bfd_format format,
bool lto_sections_removed);
bool bfd_check_format (bfd *abfd, bfd_format format);
bool bfd_check_format_matches_lto
(bfd *abfd, bfd_format format, char ***matching,
bool lto_sections_removed);
bool bfd_check_format_matches
(bfd *abfd, bfd_format format, char ***matching);

View File

@@ -56,16 +56,19 @@ extern const size_t _bfd_target_vector_entries;
/*
FUNCTION
bfd_check_format
bfd_check_format_lto
SYNOPSIS
bool bfd_check_format (bfd *abfd, bfd_format format);
bool bfd_check_format_lto (bfd *abfd, bfd_format format,
bool lto_sections_removed);
DESCRIPTION
Verify if the file attached to the BFD @var{abfd} is compatible
with the format @var{format} (i.e., one of <<bfd_object>>,
<<bfd_archive>> or <<bfd_core>>).
If LTO_SECTION_REMOVED is true, ignore plugin target.
If the BFD has been set to a specific target before the
call, only the named target and format combination is
checked. If the target has not been set, or has been set to
@@ -99,10 +102,31 @@ DESCRIPTION
itself.
*/
bool
bfd_check_format_lto (bfd *abfd, bfd_format format,
bool lto_sections_removed)
{
return bfd_check_format_matches_lto (abfd, format, NULL,
lto_sections_removed);
}
/*
FUNCTION
bfd_check_format
SYNOPSIS
bool bfd_check_format (bfd *abfd, bfd_format format);
DESCRIPTION
Similar to bfd_check_format_plugin, except plugin target isn't
ignored.
*/
bool
bfd_check_format (bfd *abfd, bfd_format format)
{
return bfd_check_format_matches (abfd, format, NULL);
return bfd_check_format_matches_lto (abfd, format, NULL, false);
}
struct bfd_preserve
@@ -407,11 +431,12 @@ bfd_set_lto_type (bfd *abfd ATTRIBUTE_UNUSED)
/*
FUNCTION
bfd_check_format_matches
bfd_check_format_matches_lto
SYNOPSIS
bool bfd_check_format_matches
(bfd *abfd, bfd_format format, char ***matching);
bool bfd_check_format_matches_lto
(bfd *abfd, bfd_format format, char ***matching,
bool lto_sections_removed);
DESCRIPTION
Like <<bfd_check_format>>, except when it returns FALSE with
@@ -423,10 +448,14 @@ DESCRIPTION
When done with the list that @var{matching} points to, the caller
should free it.
If LTO_SECTION_REMOVED is true, ignore plugin target.
*/
bool
bfd_check_format_matches (bfd *abfd, bfd_format format, char ***matching)
bfd_check_format_matches_lto (bfd *abfd, bfd_format format,
char ***matching,
bool lto_sections_removed ATTRIBUTE_UNUSED)
{
extern const bfd_target binary_vec;
const bfd_target * const *target;
@@ -495,8 +524,13 @@ bfd_check_format_matches (bfd *abfd, bfd_format format, char ***matching)
if (!bfd_preserve_save (abfd, &preserve, NULL))
goto err_ret;
/* If the target type was explicitly specified, just check that target. */
if (!abfd->target_defaulted)
/* If the target type was explicitly specified, just check that target.
If LTO_SECTION_REMOVED is true, don't match the plugin target. */
if (!abfd->target_defaulted
#if BFD_SUPPORTS_PLUGINS
&& (!lto_sections_removed || !bfd_plugin_target_p (abfd->xvec))
#endif
)
{
if (bfd_seek (abfd, 0, SEEK_SET) != 0) /* rewind! */
goto err_ret;
@@ -540,10 +574,12 @@ bfd_check_format_matches (bfd *abfd, bfd_format format, char ***matching)
searching. Don't match the plugin target if we have another
alternative since we want to properly set the input format
before allowing a plugin to claim the file. Also, don't
check the default target twice. */
check the default target twice. If LTO_SECTION_REMOVED is
true, don't match the plugin target. */
if (*target == &binary_vec
#if BFD_SUPPORTS_PLUGINS
|| (match_count != 0 && bfd_plugin_target_p (*target))
|| ((lto_sections_removed || match_count != 0)
&& bfd_plugin_target_p (*target))
#endif
|| (!abfd->target_defaulted && *target == save_targ))
continue;
@@ -795,6 +831,32 @@ bfd_check_format_matches (bfd *abfd, bfd_format format, char ***matching)
return false;
}
/*
FUNCTION
bfd_check_format_matches
SYNOPSIS
bool bfd_check_format_matches
(bfd *abfd, bfd_format format, char ***matching);
DESCRIPTION
Like <<bfd_check_format>>, except when it returns FALSE with
<<bfd_errno>> set to <<bfd_error_file_ambiguously_recognized>>. In that
case, if @var{matching} is not NULL, it will be filled in with
a NULL-terminated list of the names of the formats that matched,
allocated with <<malloc>>.
Then the user may choose a format and try again.
When done with the list that @var{matching} points to, the caller
should free it.
*/
bool
bfd_check_format_matches (bfd *abfd, bfd_format format, char ***matching)
{
return bfd_check_format_matches_lto (abfd, format, matching, false);
}
/*
FUNCTION
bfd_set_format

View File

@@ -3748,10 +3748,13 @@ copy_archive (bfd *ibfd, bfd *obfd, const char *output_target,
l->obfd = NULL;
list = l;
#if BFD_SUPPORTS_PLUGINS
/* Ignore plugin target if all LTO sections should be removed. */
ok_object = bfd_check_format_lto (this_element, bfd_object,
lto_sections_removed);
#else
ok_object = bfd_check_format (this_element, bfd_object);
if (!ok_object)
bfd_nonfatal_message (NULL, this_element, NULL,
_("Unable to recognise the format of file"));
#endif
/* PR binutils/3110: Cope with archives
containing multiple target types. */
@@ -3887,9 +3890,8 @@ copy_file (const char *input_filename, const char *output_filename, int ofd,
}
#if BFD_SUPPORTS_PLUGINS
/* Enable LTO plugin in strip unless all LTO sections should be
removed. */
if (is_strip && !target && !lto_sections_removed)
/* Enable LTO plugin in strip. */
if (is_strip && !target)
target = "plugin";
#endif
@@ -3987,7 +3989,21 @@ copy_file (const char *input_filename, const char *output_filename, int ofd,
input_arch))
status = 1;
}
else if (bfd_check_format_matches (ibfd, bfd_object, &obj_matching))
else if (
#if BFD_SUPPORTS_PLUGINS
/* Ignore plugin target first if all LTO sections should be
removed. Try with plugin target next if ignoring plugin
target fails to match the format. */
bfd_check_format_matches_lto (ibfd, bfd_object, &obj_matching,
lto_sections_removed)
|| (lto_sections_removed
&& bfd_check_format_matches_lto (ibfd, bfd_object,
&obj_matching, false))
#else
bfd_check_format_matches_lto (ibfd, bfd_object, &obj_matching,
false)
#endif
)
{
bfd *obfd;
do_copy:
@@ -5043,9 +5059,13 @@ strip_main (int argc, char *argv[])
#if BFD_SUPPORTS_PLUGINS
/* Check if all GCC LTO sections should be removed, assuming all LTO
sections will be removed with -R .gnu.lto_.*. * Remove .gnu.lto_.*
sections will also remove .gnu.debuglto_. sections. */
lto_sections_removed = !!find_section_list (".gnu.lto_.*", false,
SECTION_CONTEXT_REMOVE);
sections will also remove .gnu.debuglto_. sections. LLVM IR
bitcode is stored in .llvm.lto section which will be removed with
-R .llvm.lto. */
lto_sections_removed = (!!find_section_list (".gnu.lto_.*", false,
SECTION_CONTEXT_REMOVE)
|| !!find_section_list (".llvm.lto", false,
SECTION_CONTEXT_REMOVE));
#endif
i = optind;

View File

@@ -0,0 +1,4 @@
void
foo (void)
{
}

View File

@@ -96,3 +96,161 @@ if {[catch "system \"bzip2 -dc $t > $tempfile\""] != 0} {
}
}
}
proc run_pr33198_test { fat strip_flags } {
global srcdir
global subdir
global llvm_plug_opt
global AR
global CLANG_FOR_TARGET
global NM
global READELF
global STRIP
set test pr33198
set testname "${test}${fat} with $strip_flags"
if { [istarget "x86_64-*-linux*-gnux32"] \
|| ![info exists CLANG_FOR_TARGET]
|| [string match "" $llvm_plug_opt] } then {
untested $testname
return
}
set src $srcdir/$subdir/${test}.c
set obj tmpdir/${test}${fat}.o
set archive tmpdir/${test}${fat}.a
set CLANG_CFLAGS "-c -O2 -flto"
if { "$fat" == "-fat" } {
append CLANG_CFLAGS " -ffat-lto-objects"
}
append strip_flags " --strip-unneeded $llvm_plug_opt"
set cmd "$CLANG_FOR_TARGET $CLANG_CFLAGS -o $obj $src"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![string match "" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname ($obj)"
return
}
set cmd "$NM $llvm_plug_opt $obj\n"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![regexp "0+ T foo" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname ($obj)"
return
}
pass "$testname ($obj)"
set cmd "$STRIP $strip_flags $obj -o ${obj}.strip"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![string match "" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname (strip $obj)"
return
}
set cmd "$NM $llvm_plug_opt ${obj}.strip"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![regexp "0+ T foo" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname (strip $obj)"
return
}
if { "$fat" == "-fat" } {
set cmd "$READELF -SW ${obj}.strip"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if [regexp "\.llvm\.lto *LLVM_LTO *" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname (strip $obj)"
return
}
}
pass "$testname (strip $obj)"
set cmd "$AR $llvm_plug_opt -s -r -c $archive $obj"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![string match "" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname ($archive)"
return
}
set cmd "$NM $llvm_plug_opt $archive\n"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![regexp "0+ T foo" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname ($archive)"
return
}
pass "$testname ($archive)"
set cmd "$STRIP $strip_flags $archive -o ${archive}.strip"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![string match "" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname (strip $archive)"
return
}
set cmd "$NM $llvm_plug_opt ${archive}.strip"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if ![regexp "0+ T foo" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname (strip $archive)"
return
}
if { "$fat" == "-fat" } {
set cmd "$READELF -SW ${archive}.strip"
send_log "$cmd\n"
verbose "$cmd" 1
catch "exec $cmd" got
if [regexp "\.llvm\.lto *LLVM_LTO *" $got] then {
send_log "$got\n"
verbose "$got" 1
fail "$testname (strip $archive)"
return
}
}
pass "$testname (strip $archive)"
}
run_pr33198_test "" "-R .gnu.lto_* -R .gnu.debuglto_* -R .llvm.lto -N __gnu_lto_v1"
run_pr33198_test "-fat" "-R .gnu.lto_* -R .gnu.debuglto_* -R .llvm.lto -N __gnu_lto_v1"
run_pr33198_test "" "-R .llvm.lto"
run_pr33198_test "-fat" "-R .llvm.lto"

View File

@@ -1813,3 +1813,30 @@ proc get_standard_section_names {} {
}
return
}
set llvm_plug_opt ""
if { [isnative] } then {
if ![info exists CLANG_FOR_TARGET] then {
catch "exec clang -v" got
if [regexp "clang version" $got] then {
set CLANG_FOR_TARGET clang
}
}
if [info exists CLANG_FOR_TARGET] then {
set llvm_plug_so [string trim [exec $CLANG_FOR_TARGET -print-file-name=LLVMgold.so]]
if { $llvm_plug_so ne "LLVMgold.so" } then {
set llvm_plug_opt "--plugin $llvm_plug_so"
}
if { $llvm_plug_opt eq "" } then {
# If it is still blank, try llvm-config --libdir. Clang
# searches CLANG_INSTALL_LIBDIR_BASENAME which corresponds
# to this.
catch "exec llvm-config --libdir" got
if {[file isdirectory $got] \
&& [file isfile $got/LLVMgold.so]} then {
set llvm_plug_opt "--plugin $got/LLVMgold.so"
}
}
}
}