>From b6502d7a990cc834bdcf2154f563da68a2ccc2b4 Mon Sep 17 00:00:00 2001 From: Paul Eggert
Date: Sun, 3 May 2020 18:32:57 -0700 Subject: [PATCH] Fix integer overflow bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem reported by Bruno Haible in: https://lists.gnu.org/r/bug-m4/2020-05/msg00001.html I found some other problems while fixing this one. * src/builtin.c: Include intprops.h. (numeric_arg): Yield long, not int, so that string lengths need not fit in int. All callers changed. Don’t suppress overflow warning merely because the arg has leading whitespace. It’s possible to have numeric overflow and whitespace both. (m4_eval, m4_incr, m4_decr): Do not assume wrapv integer overflow. (m4_substr): Fix integer overflow bug. * src/eval.c: Include intprops.h (eval_lex): Do not assume wrapv integer overflow. --- src/builtin.c | 65 ++++++++++++++++++++++++++++++--------------------- src/eval.c | 8 +++++-- 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/builtin.c b/src/builtin.c index 82a92f68..20535bf3 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -25,6 +25,7 @@ #include "m4.h" #include "execute.h" +#include "intprops.h" #include "memchr2.h" #include "progname.h" #include "regex.h" @@ -407,7 +408,7 @@ bad_argc (token_data *name, int argc, int min, int max) `-----------------------------------------------------------------*/ static bool -numeric_arg (token_data *macro, const char *arg, int *valuep) +numeric_arg (token_data *macro, const char *arg, long int *valuep) { char *endp; @@ -429,13 +430,13 @@ numeric_arg (token_data *macro, const char *arg, int *valuep) TOKEN_DATA_TEXT (macro))); return false; } - if (isspace (to_uchar (*arg))) + if (errno == ERANGE) M4ERROR ((warning_status, 0, - "leading whitespace ignored in builtin `%s'", + "numeric overflow detected in builtin `%s'", TOKEN_DATA_TEXT (macro))); - else if (errno == ERANGE) + if (isspace (to_uchar (*arg))) M4ERROR ((warning_status, 0, - "numeric overflow detected in builtin `%s'", + "leading whitespace ignored in builtin `%s'", TOKEN_DATA_TEXT (macro))); } return true; @@ -1085,8 +1086,8 @@ static void m4_eval (struct obstack *obs, int argc, token_data **argv) { int32_t value = 0; - int radix = 10; - int min = 1; + long int radix = 10; + long int min = 1; const char *s; if (bad_argc (argv[0], argc, 2, 4)) @@ -1095,10 +1096,10 @@ m4_eval (struct obstack *obs, int argc, token_data **argv) if (*ARG (2) && !numeric_arg (argv[0], ARG (2), &radix)) return; - if (radix < 1 || radix > (int) strlen (digits)) + if (radix < 1 || (unsigned long int) radix > strlen (digits)) { M4ERROR ((warning_status, 0, - "radix %d in builtin `%s' out of range", + "radix %ld in builtin `%s' out of range", radix, ARG (0))); return; } @@ -1123,13 +1124,20 @@ m4_eval (struct obstack *obs, int argc, token_data **argv) if (value < 0) { obstack_1grow (obs, '-'); - value = -value; + while (-(min--) < value) + obstack_1grow (obs, '0'); + + do + obstack_1grow (obs, '1'); + while (++value != 0); + } + else + { + while (value < min--) + obstack_1grow (obs, '0'); + while (value-- != 0) + obstack_1grow (obs, '1'); } - /* This assumes 2's-complement for correctly handling INT_MIN. */ - while (min-- - value > 0) - obstack_1grow (obs, '0'); - while (value-- != 0) - obstack_1grow (obs, '1'); obstack_1grow (obs, '\0'); return; } @@ -1150,7 +1158,8 @@ m4_eval (struct obstack *obs, int argc, token_data **argv) static void m4_incr (struct obstack *obs, int argc, token_data **argv) { - int value; + long int value; + int32_t value32; if (bad_argc (argv[0], argc, 2, 2)) return; @@ -1158,13 +1167,15 @@ m4_incr (struct obstack *obs, int argc, token_data **argv) if (!numeric_arg (argv[0], ARG (1), &value)) return; - shipout_int (obs, value + 1); + INT_ADD_WRAPV (value, 1, &value32); + shipout_int (obs, value32); } static void m4_decr (struct obstack *obs, int argc, token_data **argv) { - int value; + long int value; + int32_t value32; if (bad_argc (argv[0], argc, 2, 2)) return; @@ -1172,7 +1183,8 @@ m4_decr (struct obstack *obs, int argc, token_data **argv) if (!numeric_arg (argv[0], ARG (1), &value)) return; - shipout_int (obs, value - 1); + INT_SUBTRACT_WRAPV (value, 1, &value32); + shipout_int (obs, value32); } /* This section contains the macros "divert", "undivert" and "divnum" for @@ -1186,12 +1198,13 @@ m4_decr (struct obstack *obs, int argc, token_data **argv) static void m4_divert (struct obstack *obs M4_GNUC_UNUSED, int argc, token_data **argv) { - int i = 0; + long int i = 0; if (bad_argc (argv[0], argc, 1, 2)) return; - if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &i)) + if (argc >= 2 && ! (numeric_arg (argv[0], ARG (1), &i) + && INT_MIN <= i && i <= INT_MAX)) return; make_diversion (i); @@ -1538,7 +1551,7 @@ m4___program__ (struct obstack *obs, int argc, token_data **argv) static void M4_GNUC_NORETURN m4_m4exit (struct obstack *obs M4_GNUC_UNUSED, int argc, token_data **argv) { - int exit_code = EXIT_SUCCESS; + long int exit_code = EXIT_SUCCESS; /* Warn on bad arguments, but still exit. */ bad_argc (argv[0], argc, 1, 2); @@ -1547,7 +1560,7 @@ m4_m4exit (struct obstack *obs M4_GNUC_UNUSED, int argc, token_data **argv) if (exit_code < 0 || exit_code > 255) { M4ERROR ((warning_status, 0, - "exit status out of range: `%d'", exit_code)); + "exit status out of range: `%ld'", exit_code)); exit_code = EXIT_FAILURE; } /* Change debug stream back to stderr, to force flushing debug stream and @@ -1769,8 +1782,8 @@ m4_index (struct obstack *obs, int argc, token_data **argv) static void m4_substr (struct obstack *obs, int argc, token_data **argv) { - int start = 0; - int length, avail; + long int start = 0; + long int length, avail; if (bad_argc (argv[0], argc, 3, 4)) { @@ -1790,7 +1803,7 @@ m4_substr (struct obstack *obs, int argc, token_data **argv) if (start < 0 || length <= 0 || start >= avail) return; - if (start + length > avail) + if (avail - start < length) length = avail - start; obstack_grow (obs, ARG (1) + start, length); } diff --git a/src/eval.c b/src/eval.c index 7f286a10..c4fd2d36 100644 --- a/src/eval.c +++ b/src/eval.c @@ -25,6 +25,7 @@ is evaluate (). */ #include "m4.h" +#include "intprops.h" /* Evaluates token types. */ @@ -152,7 +153,7 @@ eval_lex (int32_t *val) else base = 10; - /* FIXME - this calculation can overflow. Consider xstrtol. */ + /* FIXME - this calculation can overflow, with no diagnostic. */ *val = 0; for (; *eval_text; eval_text++) { @@ -177,7 +178,10 @@ eval_lex (int32_t *val) else if (digit >= base) break; else - *val = *val * base + digit; + { + INT_MULTIPLY_WRAPV (*val, base, val); + INT_ADD_WRAPV (*val, digit, val); + } } return NUMBER; } -- 2.17.1