diff --git a/Makefile b/Makefile index b7eba509c6..3c88466932 100644 --- a/Makefile +++ b/Makefile @@ -1520,6 +1520,7 @@ CLAR_TEST_SUITES += u-mem-pool CLAR_TEST_SUITES += u-oid-array CLAR_TEST_SUITES += u-oidmap CLAR_TEST_SUITES += u-oidtree +CLAR_TEST_SUITES += u-parse-int CLAR_TEST_SUITES += u-prio-queue CLAR_TEST_SUITES += u-reftable-basics CLAR_TEST_SUITES += u-reftable-block diff --git a/cache-tree.c b/cache-tree.c index 2d8947b518..f8fb290443 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -16,6 +16,7 @@ #include "promisor-remote.h" #include "trace.h" #include "trace2.h" +#include "parse.h" #ifndef DEBUG_CACHE_TREE #define DEBUG_CACHE_TREE 0 @@ -550,32 +551,13 @@ void cache_tree_write(struct strbuf *sb, struct cache_tree *root) static int parse_int(const char **ptr, unsigned long *len_p, int *out) { - const char *s = *ptr; - unsigned long len = *len_p; - int ret = 0; - int sign = 1; + const char *ep; - while (len && *s == '-') { - sign *= -1; - s++; - len--; - } - - while (len) { - if (!isdigit(*s)) - break; - ret *= 10; - ret += *s - '0'; - s++; - len--; - } - - if (s == *ptr) + if (!parse_int_from_buf(*ptr, *len_p, &ep, out)) return -1; - *ptr = s; - *len_p = len; - *out = sign * ret; + *len_p -= ep - *ptr; + *ptr = ep; return 0; } diff --git a/compat/posix.h b/compat/posix.h index 245386fa4a..6992d20978 100644 --- a/compat/posix.h +++ b/compat/posix.h @@ -253,6 +253,8 @@ char *gitdirname(char *); typedef uintmax_t timestamp_t; #define PRItime PRIuMAX #define parse_timestamp strtoumax +#define parse_timestamp_from_buf(buf, len, ep, result) \ + parse_unsigned_from_buf((buf), (len), (ep), (result), TIME_MAX) #define TIME_MAX UINTMAX_MAX #define TIME_MIN 0 diff --git a/fsck.c b/fsck.c index fae18d8561..9e21f8d2d7 100644 --- a/fsck.c +++ b/fsck.c @@ -860,28 +860,13 @@ static int verify_headers(const void *data, unsigned long size, FSCK_MSG_UNTERMINATED_HEADER, "unterminated header"); } -static timestamp_t parse_timestamp_from_buf(const char **start, const char *end) -{ - const char *p = *start; - char buf[24]; /* big enough for 2^64 */ - size_t i = 0; - - while (p < end && isdigit(*p)) { - if (i >= ARRAY_SIZE(buf) - 1) - return TIME_MAX; - buf[i++] = *p++; - } - buf[i] = '\0'; - *start = p; - return parse_timestamp(buf, NULL, 10); -} - static int fsck_ident(const char **ident, const char *ident_end, const struct object_id *oid, enum object_type type, struct fsck_options *options) { const char *p = *ident; const char *nl; + timestamp_t timestamp; nl = memchr(p, '\n', ident_end - p); if (!nl) @@ -933,7 +918,8 @@ static int fsck_ident(const char **ident, const char *ident_end, "invalid author/committer line - bad date"); if (*p == '0' && p[1] != ' ') return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date"); - if (date_overflows(parse_timestamp_from_buf(&p, ident_end))) + if (!parse_timestamp_from_buf(p, ident_end - p, &p, ×tamp) || + date_overflows(timestamp)) return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow"); if (*p != ' ') return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date"); diff --git a/parse.c b/parse.c index 48313571aa..1dcbcf64a1 100644 --- a/parse.c +++ b/parse.c @@ -15,7 +15,7 @@ static uintmax_t get_unit_factor(const char *end) return 0; } -int git_parse_signed(const char *value, intmax_t *ret, intmax_t max) +bool git_parse_signed(const char *value, intmax_t *ret, intmax_t max) { if (value && *value) { char *end; @@ -28,30 +28,30 @@ int git_parse_signed(const char *value, intmax_t *ret, intmax_t max) errno = 0; val = strtoimax(value, &end, 0); if (errno == ERANGE) - return 0; + return false; if (end == value) { errno = EINVAL; - return 0; + return false; } factor = get_unit_factor(end); if (!factor) { errno = EINVAL; - return 0; + return false; } if ((val < 0 && (-max - 1) / factor > val) || (val > 0 && max / factor < val)) { errno = ERANGE; - return 0; + return false; } val *= factor; *ret = val; - return 1; + return true; } errno = EINVAL; - return 0; + return false; } -int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max) +bool git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max) { if (value && *value) { char *end; @@ -61,71 +61,71 @@ int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max) /* negative values would be accepted by strtoumax */ if (strchr(value, '-')) { errno = EINVAL; - return 0; + return false; } errno = 0; val = strtoumax(value, &end, 0); if (errno == ERANGE) - return 0; + return false; if (end == value) { errno = EINVAL; - return 0; + return false; } factor = get_unit_factor(end); if (!factor) { errno = EINVAL; - return 0; + return false; } if (unsigned_mult_overflows(factor, val) || factor * val > max) { errno = ERANGE; - return 0; + return false; } val *= factor; *ret = val; - return 1; + return true; } errno = EINVAL; - return 0; + return false; } -int git_parse_int(const char *value, int *ret) +bool git_parse_int(const char *value, int *ret) { intmax_t tmp; if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(int))) - return 0; + return false; *ret = tmp; - return 1; + return true; } -int git_parse_int64(const char *value, int64_t *ret) +bool git_parse_int64(const char *value, int64_t *ret) { intmax_t tmp; if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(int64_t))) - return 0; + return false; *ret = tmp; - return 1; + return true; } -int git_parse_ulong(const char *value, unsigned long *ret) +bool git_parse_ulong(const char *value, unsigned long *ret) { uintmax_t tmp; if (!git_parse_unsigned(value, &tmp, maximum_unsigned_value_of_type(long))) - return 0; + return false; *ret = tmp; - return 1; + return true; } -int git_parse_ssize_t(const char *value, ssize_t *ret) +bool git_parse_ssize_t(const char *value, ssize_t *ret) { intmax_t tmp; if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(ssize_t))) - return 0; + return false; *ret = tmp; - return 1; + return true; } -int git_parse_double(const char *value, double *ret) +bool git_parse_double(const char *value, double *ret) { char *end; double val; @@ -133,25 +133,25 @@ int git_parse_double(const char *value, double *ret) if (!value || !*value) { errno = EINVAL; - return 0; + return false; } errno = 0; val = strtod(value, &end); if (errno == ERANGE) - return 0; + return false; if (end == value) { errno = EINVAL; - return 0; + return false; } factor = get_unit_factor(end); if (!factor) { errno = EINVAL; - return 0; + return false; } val *= factor; *ret = val; - return 1; + return true; } int git_parse_maybe_bool_text(const char *value) @@ -209,3 +209,99 @@ unsigned long git_env_ulong(const char *k, unsigned long val) die(_("failed to parse %s"), k); return val; } + +/* + * Helper that handles both signed/unsigned cases. If "negate" is NULL, + * negative values are disallowed. If not NULL and the input is negative, + * the value is range-checked but the caller is responsible for actually doing + * the negatiion. You probably don't want to use this! Use one of + * parse_signed_from_buf() or parse_unsigned_from_buf() below. + */ +static bool parse_from_buf_internal(const char *buf, size_t len, + const char **ep, bool *negate, + uintmax_t *ret, uintmax_t max) +{ + const char *end = buf + len; + uintmax_t val = 0; + + while (buf < end && isspace(*buf)) + buf++; + + if (negate) + *negate = false; + if (buf < end && *buf == '-') { + if (!negate) { + errno = EINVAL; + return false; + } + buf++; + *negate = true; + /* Assume negative range is always one larger than positive. */ + max = max + 1; + } else if (buf < end && *buf == '+') { + buf++; + } + + if (buf == end || !isdigit(*buf)) { + errno = EINVAL; + return false; + } + + while (buf < end && isdigit(*buf)) { + int digit = *buf - '0'; + + if (val > max / 10) { + errno = ERANGE; + return false; + } + val *= 10; + if (val > max - digit) { + errno = ERANGE; + return false; + } + val += digit; + + buf++; + } + + *ep = buf; + *ret = val; + return true; +} + +bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep, + uintmax_t *ret, uintmax_t max) +{ + return parse_from_buf_internal(buf, len, ep, NULL, ret, max); +} + +bool parse_signed_from_buf(const char *buf, size_t len, const char **ep, + intmax_t *ret, intmax_t max) +{ + uintmax_t u_ret; + bool negate; + + if (!parse_from_buf_internal(buf, len, ep, &negate, &u_ret, max)) + return false; + /* + * Range already checked internally, but we must apply negation + * ourselves since only we have the signed integer type. + */ + if (negate) { + *ret = u_ret; + *ret = -*ret; + } else { + *ret = u_ret; + } + return true; +} + +bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret) +{ + intmax_t tmp; + if (!parse_signed_from_buf(buf, len, ep, &tmp, + maximum_signed_value_of_type(int))) + return false; + *ret = tmp; + return true; +} diff --git a/parse.h b/parse.h index ea32de9a91..53663c8939 100644 --- a/parse.h +++ b/parse.h @@ -1,13 +1,13 @@ #ifndef PARSE_H #define PARSE_H -int git_parse_signed(const char *value, intmax_t *ret, intmax_t max); -int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max); -int git_parse_ssize_t(const char *, ssize_t *); -int git_parse_ulong(const char *, unsigned long *); -int git_parse_int(const char *value, int *ret); -int git_parse_int64(const char *value, int64_t *ret); -int git_parse_double(const char *value, double *ret); +bool git_parse_signed(const char *value, intmax_t *ret, intmax_t max); +bool git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max); +bool git_parse_ssize_t(const char *, ssize_t *); +bool git_parse_ulong(const char *, unsigned long *); +bool git_parse_int(const char *value, int *ret); +bool git_parse_int64(const char *value, int64_t *ret); +bool git_parse_double(const char *value, double *ret); /** * Same as `git_config_bool`, except that it returns -1 on error rather @@ -19,4 +19,21 @@ int git_parse_maybe_bool_text(const char *value); int git_env_bool(const char *, int); unsigned long git_env_ulong(const char *, unsigned long); +/* + * These functions parse an integer from a buffer that does not need to be + * NUL-terminated. They return true on success, or false if no integer is found + * (in which case errno is set to EINVAL) or if the integer is out of the + * allowable range (in which case errno is ERANGE). + * + * You must pass in a non-NULL value for "ep", which returns a pointer to the + * next character in the buf (similar to strtol(), etc). + * + * These functions always parse in base 10 (and do not allow input like "0xff" + * to switch to base 16). They do not allow unit suffixes like git_parse_int(), + * above. + */ +bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep, uintmax_t *ret, uintmax_t max); +bool parse_signed_from_buf(const char *buf, size_t len, const char **ep, intmax_t *ret, intmax_t max); +bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret); + #endif /* PARSE_H */ diff --git a/t/meson.build b/t/meson.build index 0b44b5e06d..eaa3349b22 100644 --- a/t/meson.build +++ b/t/meson.build @@ -8,6 +8,7 @@ clar_test_suites = [ 'unit-tests/u-oid-array.c', 'unit-tests/u-oidmap.c', 'unit-tests/u-oidtree.c', + 'unit-tests/u-parse-int.c', 'unit-tests/u-prio-queue.c', 'unit-tests/u-reftable-basics.c', 'unit-tests/u-reftable-block.c', diff --git a/t/unit-tests/u-parse-int.c b/t/unit-tests/u-parse-int.c new file mode 100644 index 0000000000..a1601bb16b --- /dev/null +++ b/t/unit-tests/u-parse-int.c @@ -0,0 +1,98 @@ +#include "unit-test.h" +#include "parse.h" + +static void check_int(const char *buf, size_t len, + size_t expect_ep_ofs, int expect_errno, + int expect_result) +{ + const char *ep; + int result; + bool ok = parse_int_from_buf(buf, len, &ep, &result); + + if (expect_errno) { + cl_assert(!ok); + cl_assert_equal_i(expect_errno, errno); + return; + } + + cl_assert(ok); + cl_assert_equal_i(expect_result, result); + cl_assert_equal_i(expect_ep_ofs, ep - buf); +} + +static void check_int_str(const char *buf, size_t ofs, int err, int res) +{ + check_int(buf, strlen(buf), ofs, err, res); +} + +static void check_int_full(const char *buf, int res) +{ + check_int_str(buf, strlen(buf), 0, res); +} + +static void check_int_err(const char *buf, int err) +{ + check_int(buf, strlen(buf), 0, err, 0); +} + +void test_parse_int__basic(void) +{ + cl_invoke(check_int_full("0", 0)); + cl_invoke(check_int_full("11", 11)); + cl_invoke(check_int_full("-23", -23)); + cl_invoke(check_int_full("+23", 23)); + + cl_invoke(check_int_str(" 31337 ", 7, 0, 31337)); + + cl_invoke(check_int_err(" garbage", EINVAL)); + cl_invoke(check_int_err("", EINVAL)); + cl_invoke(check_int_err("-", EINVAL)); + + cl_invoke(check_int("123", 2, 2, 0, 12)); +} + +void test_parse_int__range(void) +{ + /* + * These assume a 32-bit int. We could avoid that with some + * conditionals, but it's probably better for the test to + * fail noisily and we can decide how to handle it then. + */ + cl_invoke(check_int_full("2147483647", 2147483647)); + cl_invoke(check_int_err("2147483648", ERANGE)); + cl_invoke(check_int_full("-2147483647", -2147483647)); + cl_invoke(check_int_full("-2147483648", -2147483648)); + cl_invoke(check_int_err("-2147483649", ERANGE)); +} + +static void check_unsigned(const char *buf, uintmax_t max, + int expect_errno, uintmax_t expect_result) +{ + const char *ep; + uintmax_t result; + bool ok = parse_unsigned_from_buf(buf, strlen(buf), &ep, &result, max); + + if (expect_errno) { + cl_assert(!ok); + cl_assert_equal_i(expect_errno, errno); + return; + } + + cl_assert(ok); + cl_assert_equal_s(ep, ""); + /* + * Do not use cl_assert_equal_i_fmt(..., PRIuMAX) here. The macro + * casts to int under the hood, corrupting the values. + */ + clar__assert_equal(CLAR_CURRENT_FILE, CLAR_CURRENT_FUNC, + CLAR_CURRENT_LINE, + "expect_result != result", 1, + "%"PRIuMAX, expect_result, result); +} + +void test_parse_int__unsigned(void) +{ + cl_invoke(check_unsigned("4294967295", UINT_MAX, 0, 4294967295U)); + cl_invoke(check_unsigned("1053", 1000, ERANGE, 0)); + cl_invoke(check_unsigned("-17", UINT_MAX, EINVAL, 0)); +}