diff --git a/Makefile b/Makefile index 237b56fc9d..751bd40a9f 100644 --- a/Makefile +++ b/Makefile @@ -1510,6 +1510,7 @@ CLAR_TEST_SUITES += u-mem-pool CLAR_TEST_SUITES += u-oid-array CLAR_TEST_SUITES += u-oidmap CLAR_TEST_SUITES += u-oidtree +CLAR_TEST_SUITES += u-parse-int CLAR_TEST_SUITES += u-prio-queue CLAR_TEST_SUITES += u-reftable-basics CLAR_TEST_SUITES += u-reftable-block diff --git a/parse.c b/parse.c index f626846def..1dcbcf64a1 100644 --- a/parse.c +++ b/parse.c @@ -209,3 +209,99 @@ unsigned long git_env_ulong(const char *k, unsigned long val) die(_("failed to parse %s"), k); return val; } + +/* + * Helper that handles both signed/unsigned cases. If "negate" is NULL, + * negative values are disallowed. If not NULL and the input is negative, + * the value is range-checked but the caller is responsible for actually doing + * the negatiion. You probably don't want to use this! Use one of + * parse_signed_from_buf() or parse_unsigned_from_buf() below. + */ +static bool parse_from_buf_internal(const char *buf, size_t len, + const char **ep, bool *negate, + uintmax_t *ret, uintmax_t max) +{ + const char *end = buf + len; + uintmax_t val = 0; + + while (buf < end && isspace(*buf)) + buf++; + + if (negate) + *negate = false; + if (buf < end && *buf == '-') { + if (!negate) { + errno = EINVAL; + return false; + } + buf++; + *negate = true; + /* Assume negative range is always one larger than positive. */ + max = max + 1; + } else if (buf < end && *buf == '+') { + buf++; + } + + if (buf == end || !isdigit(*buf)) { + errno = EINVAL; + return false; + } + + while (buf < end && isdigit(*buf)) { + int digit = *buf - '0'; + + if (val > max / 10) { + errno = ERANGE; + return false; + } + val *= 10; + if (val > max - digit) { + errno = ERANGE; + return false; + } + val += digit; + + buf++; + } + + *ep = buf; + *ret = val; + return true; +} + +bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep, + uintmax_t *ret, uintmax_t max) +{ + return parse_from_buf_internal(buf, len, ep, NULL, ret, max); +} + +bool parse_signed_from_buf(const char *buf, size_t len, const char **ep, + intmax_t *ret, intmax_t max) +{ + uintmax_t u_ret; + bool negate; + + if (!parse_from_buf_internal(buf, len, ep, &negate, &u_ret, max)) + return false; + /* + * Range already checked internally, but we must apply negation + * ourselves since only we have the signed integer type. + */ + if (negate) { + *ret = u_ret; + *ret = -*ret; + } else { + *ret = u_ret; + } + return true; +} + +bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret) +{ + intmax_t tmp; + if (!parse_signed_from_buf(buf, len, ep, &tmp, + maximum_signed_value_of_type(int))) + return false; + *ret = tmp; + return true; +} diff --git a/parse.h b/parse.h index f80cc5b9fd..53663c8939 100644 --- a/parse.h +++ b/parse.h @@ -19,4 +19,21 @@ int git_parse_maybe_bool_text(const char *value); int git_env_bool(const char *, int); unsigned long git_env_ulong(const char *, unsigned long); +/* + * These functions parse an integer from a buffer that does not need to be + * NUL-terminated. They return true on success, or false if no integer is found + * (in which case errno is set to EINVAL) or if the integer is out of the + * allowable range (in which case errno is ERANGE). + * + * You must pass in a non-NULL value for "ep", which returns a pointer to the + * next character in the buf (similar to strtol(), etc). + * + * These functions always parse in base 10 (and do not allow input like "0xff" + * to switch to base 16). They do not allow unit suffixes like git_parse_int(), + * above. + */ +bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep, uintmax_t *ret, uintmax_t max); +bool parse_signed_from_buf(const char *buf, size_t len, const char **ep, intmax_t *ret, intmax_t max); +bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret); + #endif /* PARSE_H */ diff --git a/t/meson.build b/t/meson.build index 7c994d4643..1289614545 100644 --- a/t/meson.build +++ b/t/meson.build @@ -8,6 +8,7 @@ clar_test_suites = [ 'unit-tests/u-oid-array.c', 'unit-tests/u-oidmap.c', 'unit-tests/u-oidtree.c', + 'unit-tests/u-parse-int.c', 'unit-tests/u-prio-queue.c', 'unit-tests/u-reftable-basics.c', 'unit-tests/u-reftable-block.c', diff --git a/t/unit-tests/u-parse-int.c b/t/unit-tests/u-parse-int.c new file mode 100644 index 0000000000..a1601bb16b --- /dev/null +++ b/t/unit-tests/u-parse-int.c @@ -0,0 +1,98 @@ +#include "unit-test.h" +#include "parse.h" + +static void check_int(const char *buf, size_t len, + size_t expect_ep_ofs, int expect_errno, + int expect_result) +{ + const char *ep; + int result; + bool ok = parse_int_from_buf(buf, len, &ep, &result); + + if (expect_errno) { + cl_assert(!ok); + cl_assert_equal_i(expect_errno, errno); + return; + } + + cl_assert(ok); + cl_assert_equal_i(expect_result, result); + cl_assert_equal_i(expect_ep_ofs, ep - buf); +} + +static void check_int_str(const char *buf, size_t ofs, int err, int res) +{ + check_int(buf, strlen(buf), ofs, err, res); +} + +static void check_int_full(const char *buf, int res) +{ + check_int_str(buf, strlen(buf), 0, res); +} + +static void check_int_err(const char *buf, int err) +{ + check_int(buf, strlen(buf), 0, err, 0); +} + +void test_parse_int__basic(void) +{ + cl_invoke(check_int_full("0", 0)); + cl_invoke(check_int_full("11", 11)); + cl_invoke(check_int_full("-23", -23)); + cl_invoke(check_int_full("+23", 23)); + + cl_invoke(check_int_str(" 31337 ", 7, 0, 31337)); + + cl_invoke(check_int_err(" garbage", EINVAL)); + cl_invoke(check_int_err("", EINVAL)); + cl_invoke(check_int_err("-", EINVAL)); + + cl_invoke(check_int("123", 2, 2, 0, 12)); +} + +void test_parse_int__range(void) +{ + /* + * These assume a 32-bit int. We could avoid that with some + * conditionals, but it's probably better for the test to + * fail noisily and we can decide how to handle it then. + */ + cl_invoke(check_int_full("2147483647", 2147483647)); + cl_invoke(check_int_err("2147483648", ERANGE)); + cl_invoke(check_int_full("-2147483647", -2147483647)); + cl_invoke(check_int_full("-2147483648", -2147483648)); + cl_invoke(check_int_err("-2147483649", ERANGE)); +} + +static void check_unsigned(const char *buf, uintmax_t max, + int expect_errno, uintmax_t expect_result) +{ + const char *ep; + uintmax_t result; + bool ok = parse_unsigned_from_buf(buf, strlen(buf), &ep, &result, max); + + if (expect_errno) { + cl_assert(!ok); + cl_assert_equal_i(expect_errno, errno); + return; + } + + cl_assert(ok); + cl_assert_equal_s(ep, ""); + /* + * Do not use cl_assert_equal_i_fmt(..., PRIuMAX) here. The macro + * casts to int under the hood, corrupting the values. + */ + clar__assert_equal(CLAR_CURRENT_FILE, CLAR_CURRENT_FUNC, + CLAR_CURRENT_LINE, + "expect_result != result", 1, + "%"PRIuMAX, expect_result, result); +} + +void test_parse_int__unsigned(void) +{ + cl_invoke(check_unsigned("4294967295", UINT_MAX, 0, 4294967295U)); + cl_invoke(check_unsigned("1053", 1000, ERANGE, 0)); + cl_invoke(check_unsigned("-17", UINT_MAX, EINVAL, 0)); +}