Compare commits

...

2 Commits

Author SHA1 Message Date
Pedro Alves
50991aaf22 all languages
Change-Id: I79ef914087dbf85e1cbc19263843a6034383afe7
2021-07-15 15:10:59 +01:00
Simon Marchi
67ea24cb99 gdb: consider null terminator for length arguments of value_cstring calls
This patch started when I looked at this bit in cli/cli-cmds.c:

    if (*(char **) cmd->var)
      return value_cstring (*(char **) cmd->var, strlen (*(char **) cmd->var),
			    builtin_type (gdbarch)->builtin_char);
    else
      return value_cstring ("", 1,
			    builtin_type (gdbarch)->builtin_char);

I found it odd that the first value_cstring call passes a length that
does not consider the null terminator of the C string, but second does.
I want to clarify the documentation of value_cstring and fix the one
that is wrong.

Debugging a little bit, I found that the first call is the wrong one.
Doing:

  (gdb) set args foo
  (gdb) print $_gdb_setting("args")
  $1 = "foo"

creates a struct value of code TYPE_CODE_ARRAY of size 3, which doesn't
have a null terminator.  That does not create a valid C string.  It is
however printed correctly by GDB, because the printing code makes sure
not to read past the value's length.

A way to expose the bug is to print each element of the created string,
including the null terminator.  Before:

    (gdb) set args foo
    (gdb) p $_gdb_setting("args")[3]
    no such vector element

After:

    (gdb) set args foo
    (gdb) p $_gdb_setting("args")[3]
    $1 = 0 '\000'

Another perhaps more convincing way of showing the bug is if the string
value is passed to an inferior function call;

    (gdb) print an_inferior_function($_gdb_setting("args))

The space allocate for the string in the inferior will not take into
account a null terminator (with the string "foo", 3 bytes will be
allocated).  If the inferior tries to read the string until the null
terminator, it will read past the allocated buffer.  Compiling the
inferior with AddressSanitizer makes that bad access obvious.

I found a few calls to value_cstring that were wrong, I fixed them
all, all exercised by the test.

The change in guile/scm-math.c maybe requires a bit of explanation.
According to the doc of gdbscm_scm_to_string, if don't pass a length
pointer, we get back a null-terminated string.  If we pass a length
pointer, we get a non-null-terminated string, but we get the length
separately.  Trying to pass "len + 1" to value_cstring would lead to GDB
reading past the allocated buffer, that is exactly of length `len`.  So
instead, don't pass a length pointer and use strlen on the result.

gdb.base/settings.exp and gdb.python/py-mi.exp required changes in some
expected outputs, because the array type created by $_gdb_setting_str
is now one element larger, to account for the null terminator.  I don't
think this change in user-visible behavior is a problem.

Change-Id: If8dd13cce55c70521e34e7c360139064b4e87496
2021-07-15 15:10:52 +01:00
16 changed files with 395 additions and 39 deletions

View File

@@ -12900,6 +12900,16 @@ public:
return language_defn::read_var_value (var, var_block, frame);
}
struct value *value_string (struct gdbarch *gdbarch,
const char *ptr, ssize_t len) const override
{
struct type *type = language_string_char_type (this, gdbarch);
value *val = ::value_string (ptr, len, type);
/* See ada_string_operation::evaluate. */
value_type (val)->set_code (TYPE_CODE_ARRAY);
return val;
}
/* See language.h. */
void language_arch_info (struct gdbarch *gdbarch,
struct language_arch_info *lai) const override

View File

@@ -653,16 +653,11 @@ c_string_operation::evaluate (struct type *expect_type,
}
else
{
int i;
/* Write the terminating character. */
for (i = 0; i < TYPE_LENGTH (type); ++i)
obstack_1grow (&output, 0);
int element_size = TYPE_LENGTH (type);
if (satisfy_expected)
{
LONGEST low_bound, high_bound;
int element_size = TYPE_LENGTH (type);
if (!get_discrete_bounds (expect_type->index_type (),
&low_bound, &high_bound))
@@ -677,10 +672,13 @@ c_string_operation::evaluate (struct type *expect_type,
result = allocate_value (expect_type);
memcpy (value_contents_raw (result), obstack_base (&output),
obstack_object_size (&output));
/* Write the terminating character. */
memset (value_contents_raw (result) + obstack_object_size (&output),
0, element_size);
}
else
result = value_cstring ((const char *) obstack_base (&output),
obstack_object_size (&output),
obstack_object_size (&output) / element_size,
type);
}
return result;

View File

@@ -2146,11 +2146,10 @@ value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
case var_filename:
case var_enum:
if (*(char **) cmd->var)
return value_cstring (*(char **) cmd->var, strlen (*(char **) cmd->var),
builtin_type (gdbarch)->builtin_char);
return current_language->value_string (gdbarch, *(char **) cmd->var,
strlen (*(char **) cmd->var));
else
return value_cstring ("", 1,
builtin_type (gdbarch)->builtin_char);
return current_language->value_string (gdbarch, "", 0);
default:
gdb_assert_not_reached ("bad var_type");
}
@@ -2198,8 +2197,9 @@ str_value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
{
std::string cmd_val = get_setshow_command_value_string (cmd);
return value_cstring (cmd_val.c_str (), cmd_val.size (),
builtin_type (gdbarch)->builtin_char);
return current_language->value_string (gdbarch,
cmd_val.data (),
cmd_val.size ());
}
case var_string:
@@ -2212,11 +2212,11 @@ str_value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
escaping quotes. So, we directly use the cmd->var string value,
similarly to the value_from_setting code for these cases. */
if (*(char **) cmd->var)
return value_cstring (*(char **) cmd->var, strlen (*(char **) cmd->var),
builtin_type (gdbarch)->builtin_char);
return current_language->value_string (gdbarch,
*(char **) cmd->var,
strlen (*(char **) cmd->var));
else
return value_cstring ("", 1,
builtin_type (gdbarch)->builtin_char);
return current_language->value_string (gdbarch, "", 0);
default:
gdb_assert_not_reached ("bad var_type");

View File

@@ -103,6 +103,14 @@ f_language::get_encoding (struct type *type)
struct value *
f_language::value_string (struct gdbarch *gdbarch,
const char *ptr, ssize_t len) const
{
struct type *type = language_string_char_type (this, gdbarch);
return ::value_string (ptr, len, type);
}
/* A helper function for the "bound" intrinsics that checks that TYPE
is an array. LBOUND_P is true for lower bound; this is used for
the error message, if any. */

View File

@@ -193,6 +193,9 @@ public:
&& TYPE_TARGET_TYPE (type)->code () == TYPE_CODE_CHAR));
}
struct value *value_string (struct gdbarch *gdbarch,
const char *ptr, ssize_t len) const override;
/* See language.h. */
const char *struct_too_deep_ellipsis () const override

View File

@@ -794,9 +794,7 @@ vlscm_convert_typed_value_from_scheme (const char *func_name,
0 /*non-strict*/,
&except_scm);
if (s != NULL)
value = value_cstring (s.get (), len,
language_string_char_type (language,
gdbarch));
value = language->value_string (gdbarch, s.get (), len);
else
value = NULL;
}

View File

@@ -47,6 +47,7 @@
#include <algorithm>
#include "gdbarch.h"
#include "compile/compile-internal.h"
#include "arch-utils.h"
static void set_range_case (void);
@@ -976,6 +977,14 @@ language_string_char_type (const struct language_defn *la,
return ld->arch_info[la->la_language].string_char_type ();
}
struct value *
language_defn::value_string (struct gdbarch *gdbarch,
const char *ptr, ssize_t len) const
{
struct type *type = language_string_char_type (this, gdbarch);
return value_cstring (ptr, len, type);
}
/* See language.h. */
struct type *

View File

@@ -579,6 +579,13 @@ struct language_defn
virtual char string_lower_bound () const
{ return c_style_arrays_p () ? 0 : 1; }
/* Return the LEN characters long string at STR as a value as
represented in this language. GDBARCH is used to infer the
character type. The default implementation returns a
null-terminated C string. */
virtual struct value *value_string (struct gdbarch *gdbarch,
const char *ptr, ssize_t len) const;
/* Returns true if the symbols names should be stored in GDB's data
structures for minimal/partial/full symbols using their linkage (aka
mangled) form; false if the symbol names should be demangled first.

View File

@@ -50,9 +50,6 @@
#define builtin_type_pybool \
language_bool_type (python_language, python_gdbarch)
#define builtin_type_pychar \
language_string_char_type (python_language, python_gdbarch)
struct value_object {
PyObject_HEAD
struct value_object *next;
@@ -1907,8 +1904,9 @@ convert_value_from_python (PyObject *obj)
gdb::unique_xmalloc_ptr<char> s
= python_string_to_target_string (obj);
if (s != NULL)
value = value_cstring (s.get (), strlen (s.get ()),
builtin_type_pychar);
value = python_language->value_string (python_gdbarch,
s.get (),
strlen (s.get ()));
}
else if (PyObject_TypeCheck (obj, &value_object_type))
value = value_copy (((value_object *) obj)->value);

View File

@@ -0,0 +1,32 @@
/* This testcase is part of GDB, the GNU debugger.
Copyright 2021 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
static void
trace_me (void)
{}
static void
end (void)
{}
int
main (void)
{
trace_me ();
end ();
return 0;
}

View File

@@ -0,0 +1,278 @@
# Copyright 2021 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Test that string values are correctly allocated inside GDB when doing
# various operations that yield strings.
#
# The issue that lead to this test was a missing NULL terminator in the C-string
# values. We verify that we can print the null terminator of these strings.
load_lib "trace-support.exp"
load_lib "gdb-guile.exp"
standard_testfile
if {[build_executable "failed to prepare" $testfile $srcfile ]} {
return
}
# Check that the string contained in the convenienced variable $v is
# EXPECTED_STR.
#
# In particular, check that the null terminator is there and that we can't
# access a character past the end of the string.
proc check_v_string { expected_str } {
set len [string length $expected_str]
for { set i 0 } { $i < $len } { incr i } {
set c [string index $expected_str $i]
gdb_test "print \$v\[$i\]" "= $::decimal '$c'"
}
# Check that the string ends with a null terminator.
gdb_test "print \$v\[$i\]" {= 0 '\\000'}
# Check that we can't access a character after the end of the string.
incr i
gdb_test "print \$v\[$i\]" "no such vector element"
}
# Test with string values made by $_gdb_setting & co.
proc_with_prefix test_setting { } {
clean_restart
# This is an internal GDB implementation detail, but the variable backing a
# string setting starts as nullptr (unless explicitly initialized at startup).
# When assigning an empty value, the variable then points to an empty string.
# Test both cases, as it triggers different code paths (in addition to a
# non-empty value).
#
# Use "set trace-user" and "maintenance set test-settings string" as they are
# both not initialized at startup.
with_test_prefix "user setting" {
with_test_prefix "not set" {
foreach_with_prefix conv_func {$_gdb_setting $_gdb_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"trace-user\")"
check_v_string ""
}
}
with_test_prefix "set to empty" {
gdb_test "set trace-user"
foreach_with_prefix conv_func {$_gdb_setting $_gdb_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"trace-user\")"
check_v_string ""
}
}
with_test_prefix "set" {
gdb_test "set trace-user poulet"
foreach_with_prefix conv_func {$_gdb_setting $_gdb_setting_str} {
gdb_test_no_output {set $v = $_gdb_setting("trace-user")}
check_v_string "poulet"
}
}
}
with_test_prefix "maintenance setting" {
with_test_prefix "not set" {
foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"test-settings string\")"
check_v_string ""
}
}
with_test_prefix "set to empty" {
gdb_test "maintenance set test-settings string"
foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"test-settings string\")"
check_v_string ""
}
}
with_test_prefix "set" {
gdb_test "maintenance set test-settings string perchaude"
foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"test-settings string\")"
check_v_string "perchaude"
}
}
}
# Test with a non-string setting, this tests yet another code path.
with_test_prefix "integer setting" {
gdb_test_no_output {set $v = $_gdb_setting_str("remotetimeout")}
check_v_string "2"
}
# Test string values made by $_gdb_setting & co. in all languages.
with_test_prefix "all langs" {
# Get list of supported languages.
set langs {}
gdb_test_multiple "complete set language " "" {
-re "set language (\[^\r\n\]+)" {
lappend langs $expect_out(1,string)
exp_continue
}
-re "$::gdb_prompt $" {
pass $gdb_test_name
}
}
proc quote {str} {
upvar lang lang
if {$lang == "fortran"} {
return "'$str'"
} else {
return "\"$str\""
}
}
gdb_test "maintenance set test-settings string foo"
foreach_with_prefix lang $langs {
gdb_test_no_output "set language $lang"
if {$lang == "modula-2"} {
gdb_test "print \"foo\"" "strings are not implemented"
continue
}
if {$lang == "rust"} {
gdb_test "print \"foo\"" \
"evaluation of this expression requires the target program to be active"
# We could get the above working by starting the
# program, but how to make the below work?
gdb_test "print \$_gdb_maint_setting(\"test-settings string\")" \
"',' or '\\)' expected"
continue
}
if {$lang == "unknown"} {
# Skipped because of PR gdb/28093 -- GDB would crash.
continue
}
set print_output ""
set ptype_output ""
set foo_str [quote foo]
gdb_test_multiple "print $foo_str" "" {
-wrap -re " = (.*)" {
set print_output $expect_out(1,string)
pass $gdb_test_name
}
}
gdb_test_multiple "ptype $foo_str" "" {
-wrap -re " = (.*)" {
set ptype_output $expect_out(1,string)
pass $gdb_test_name
}
}
set cmd_str [quote "test-settings string"]
set ptype_output_re [string_to_regexp $ptype_output]
set print_output_re [string_to_regexp $print_output]
foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
gdb_test "print ${conv_func}($cmd_str)" \
" = $print_output_re"
gdb_test "ptype \$" \
" = $ptype_output_re"
}
}
}
}
# Test with a string value created by gdb.Value in Python.
proc_with_prefix test_python_value { } {
clean_restart
if {[skip_python_tests]} {
untested "skipping test_python_value"
return
}
gdb_test_no_output "python gdb.set_convenience_variable(\"v\", \"bar\")" \
"set convenience var"
check_v_string "bar"
}
# Test with a string value created by make-value in Guile.
proc_with_prefix test_guile_value { } {
clean_restart
if {[skip_guile_tests]} {
untested "skipping test_guile_value"
return
}
# We can't set a convenience var from Guile, but we can append to history.
# Do that, then transfer to a convenience var with a CLI command.
gdb_test_no_output "guile (use-modules (gdb))"
gdb_test_multiple "guile (history-append! (make-value \"foo\"))" "make value" {
-re -wrap "($::decimal)" {
set histnum $expect_out(1,string)
}
}
gdb_test_no_output "set \$v = \$$histnum"
check_v_string "foo"
}
# Test with a string value coming from a string internal var. The only internal
# vars of this type, at the time of writing, are $trace_func and $trace_file.
# They both require inspecting a trace frame. So if the target is capable start
# tracing, record one trace frame, and use $trace_func.
proc_with_prefix test_internal_var { } {
if {![gdb_trace_common_supports_arch]} {
unsupported "arch does not support trace"
return
}
clean_restart $::binfile
if {![runto_main]} {
fail "could not run to main"
return
}
if {![gdb_target_supports_trace]} {
unsupported "target does not support trace"
return
}
gdb_test "break end" "Breakpoint $::decimal at $::hex.*"
gdb_test "trace trace_me" "Tracepoint $::decimal at $::hex.*"
gdb_test_no_output "tstart"
gdb_test "continue" "Breakpoint $::decimal, end.*"
gdb_test_no_output "tstop"
gdb_test "tfind" "Found trace frame 0, tracepoint $::decimal.*"
gdb_test_no_output "set \$v = \$trace_func"
gdb_test "tfind none" "No longer looking at any trace frame.*"
check_v_string "trace_me"
}
test_setting
test_python_value
test_guile_value
test_internal_var

View File

@@ -498,7 +498,7 @@ proc_with_prefix test-enum {} {
gdb_test_no_output "$set_cmd zzz"
show_setting "$show_cmd" "zzz" 0 "yyy"
check_type "test-settings enum" "type = char \\\[3\\\]"
check_type "test-settings enum" "type = char \\\[4\\\]"
test_gdb_complete_multiple "$set_cmd " "" "" {
"xxx"

View File

@@ -289,7 +289,7 @@ mi_create_dynamic_varobj nstype2 nstype2 1 \
"create nstype2 varobj"
mi_list_varobj_children nstype2 {
{ {nstype2.<error at 0>} {<error at 0>} 6 {char \[6\]} }
{ {nstype2.<error at 0>} {<error at 0>} 7 {char \[7\]} }
} "list children after setting exception flag"
mi_create_varobj me me \

View File

@@ -1747,28 +1747,26 @@ value_array (int lowbound, int highbound, struct value **elemvec)
return val;
}
/* See value.h. */
struct value *
value_cstring (const char *ptr, ssize_t len, struct type *char_type)
{
struct value *val;
int lowbound = current_language->string_lower_bound ();
ssize_t highbound = len / TYPE_LENGTH (char_type);
ssize_t highbound = len + 1;
struct type *stringtype
= lookup_array_range_type (char_type, lowbound, highbound + lowbound - 1);
val = allocate_value (stringtype);
memcpy (value_contents_raw (val), ptr, len);
memcpy (value_contents_raw (val), ptr, len * TYPE_LENGTH (char_type));
/* Write the terminating character. */
memset (value_contents_raw (val) + len * TYPE_LENGTH (char_type),
0, TYPE_LENGTH (char_type));
return val;
}
/* Create a value for a string constant by allocating space in the
inferior, copying the data into that space, and returning the
address with type TYPE_CODE_STRING. PTR points to the string
constant data; LEN is number of characters.
Note that string types are like array of char types with a lower
bound of zero and an upper bound of LEN - 1. Also note that the
string may contain embedded null bytes. */
/* See value.h. */
struct value *
value_string (const char *ptr, ssize_t len, struct type *char_type)

View File

@@ -2188,8 +2188,9 @@ value_of_internalvar (struct gdbarch *gdbarch, struct internalvar *var)
break;
case INTERNALVAR_STRING:
val = value_cstring (var->u.string, strlen (var->u.string),
builtin_type (gdbarch)->builtin_char);
val = current_language->value_string (gdbarch,
var->u.string,
strlen (var->u.string));
break;
case INTERNALVAR_VALUE:

View File

@@ -782,8 +782,24 @@ class scoped_value_mark
const struct value *m_value;
};
/* Create not_lval value representing a NULL-terminated C string. The
resulting value has type TYPE_CODE_ARRAY.
PTR points to the string data; LEN is number of characters (does
not include the NULL terminator). CHAR_TYPE is the character
type. */
extern struct value *value_cstring (const char *ptr, ssize_t len,
struct type *char_type);
/* Create a not_lval value with type TYPE_CODE_STRING.
PTR points to the string data; LEN is number of characters.
Note that string types are like array of char types with a lower
bound of zero and an upper bound of LEN - 1. Also note that the
string may contain embedded null bytes. */
extern struct value *value_string (const char *ptr, ssize_t len,
struct type *char_type);