Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc] Alternative algorithm for decimal FP printf #123643

Merged
merged 7 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions libc/config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
"value": false,
"doc": "Use the same mode for double and long double in printf."
},
"LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320": {
"value": false,
"doc": "Use an alternative printf float implementation based on 320-bit floats"
},
"LIBC_CONF_PRINTF_DISABLE_FIXED_POINT": {
"value": false,
"doc": "Disable printing fixed point values in printf and friends."
Expand Down
1 change: 1 addition & 0 deletions libc/docs/configure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ to learn about the defaults for your platform and target.
- ``LIBC_CONF_PRINTF_DISABLE_WRITE_INT``: Disable handling of %n in printf format string.
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_NO_SPECIALIZE_LD``: Use the same mode for double and long double in printf.
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for faster and smaller but less accurate printf doubles.
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative printf float implementation based on 320-bit floats
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large table for better printf long double performance.
* **"pthread" options**
- ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a mutex is in contention (default to 100).
Expand Down
2 changes: 2 additions & 0 deletions libc/src/__support/CPP/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ template <class T> LIBC_INLINE constexpr const T &min(const T &a, const T &b) {
return (a < b) ? a : b;
}

template <class T> LIBC_INLINE constexpr T abs(T a) { return a < 0 ? -a : a; }

template <class InputIt, class UnaryPred>
LIBC_INLINE constexpr InputIt find_if_not(InputIt first, InputIt last,
UnaryPred q) {
Expand Down
179 changes: 179 additions & 0 deletions libc/src/__support/FPUtil/dyadic_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,40 @@
namespace LIBC_NAMESPACE_DECL {
namespace fputil {

// Decide whether to round up a UInt at a given bit position, based on
// the current rounding mode. The assumption is that the caller is
// going to make the integer `value >> rshift`, and then might need to
// round it up by 1 depending on the value of the bits shifted off the
// bottom.
//
// `logical_sign` causes the behavior of FE_DOWNWARD and FE_UPWARD to
// be reversed, which is what you'd want if this is the mantissa of a
// negative floating-point number.
template <size_t Bits>
LIBC_INLINE constexpr bool
need_to_round_up(const LIBC_NAMESPACE::UInt<Bits> &value, size_t rshift,
Sign logical_sign) {
switch (quick_get_round()) {
case FE_TONEAREST:
if (rshift > 0 && rshift <= Bits && value.get_bit(rshift - 1)) {
// We round up, unless the value is an exact halfway case and
// the bit that will end up in the units place is 0, in which
// case tie-break-to-even says round down.
return value.get_bit(rshift) != 0 || (value << (Bits - rshift + 1)) != 0;
} else {
return false;
}
case FE_TOWARDZERO:
return false;
case FE_DOWNWARD:
return logical_sign.is_neg() && (value << (Bits - rshift)) != 0;
case FE_UPWARD:
return logical_sign.is_pos() && (value << (Bits - rshift)) != 0;
default:
__builtin_unreachable();
}
}

// A generic class to perform computations of high precision floating points.
// We store the value in dyadic format, including 3 fields:
// sign : boolean value - false means positive, true means negative
Expand Down Expand Up @@ -101,6 +135,27 @@ template <size_t Bits> struct DyadicFloat {
return exponent + (Bits - 1);
}

// Produce a correctly rounded DyadicFloat from a too-large mantissa,
// by shifting it down and rounding if necessary.
template <size_t MantissaBits>
LIBC_INLINE constexpr static DyadicFloat<Bits>
round(Sign result_sign, int result_exponent,
const LIBC_NAMESPACE::UInt<MantissaBits> &input_mantissa,
size_t rshift) {
MantissaType result_mantissa(input_mantissa >> rshift);
if (need_to_round_up(input_mantissa, rshift, result_sign)) {
++result_mantissa;
if (result_mantissa == 0) {
// Rounding up made the mantissa integer wrap round to 0,
// carrying a bit off the top. So we've rounded up to the next
// exponent.
result_mantissa.set_bit(Bits - 1);
++result_exponent;
}
}
return DyadicFloat(result_sign, result_exponent, result_mantissa);
}

#ifdef LIBC_TYPES_HAS_FLOAT16
template <typename T, bool ShouldSignalExceptions>
LIBC_INLINE constexpr cpp::enable_if_t<
Expand Down Expand Up @@ -374,6 +429,34 @@ template <size_t Bits> struct DyadicFloat {

return new_mant;
}

LIBC_INLINE constexpr MantissaType
as_mantissa_type_rounded(bool *overflowed = nullptr) const {
if (mantissa.is_zero())
return 0;

MantissaType new_mant = mantissa;
if (exponent > 0) {
new_mant <<= exponent;
if (overflowed)
*overflowed = (new_mant >> exponent) != mantissa;
} else if (exponent < 0) {
size_t shift = -exponent;
new_mant >>= shift;
if (need_to_round_up(mantissa, shift, sign))
++new_mant;
}

if (sign.is_neg()) {
new_mant = (~new_mant) + 1;
}

return new_mant;
}

LIBC_INLINE constexpr DyadicFloat operator-() const {
return DyadicFloat(sign.negate(), exponent, mantissa);
}
};

// Quick add - Add 2 dyadic floats with rounding toward 0 and then normalize the
Expand Down Expand Up @@ -433,6 +516,12 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
return result.normalize();
}

template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits> quick_sub(DyadicFloat<Bits> a,
DyadicFloat<Bits> b) {
return quick_add(a, -b);
}

// Quick Mul - Slightly less accurate but efficient multiplication of 2 dyadic
// floats with rounding toward 0 and then normalize the output:
// result.exponent = a.exponent + b.exponent + Bits,
Expand Down Expand Up @@ -464,6 +553,96 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
return result;
}

// Correctly rounded multiplication of 2 dyadic floats, assuming the
// exponent remains within range.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits>
rounded_mul(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b) {
using DblMant = LIBC_NAMESPACE::UInt<(2 * Bits)>;
Sign result_sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
int result_exponent = a.exponent + b.exponent + static_cast<int>(Bits);
auto product = DblMant(a.mantissa) * DblMant(b.mantissa);
// As in quick_mul(), renormalize by 1 bit manually rather than countl_zero
if (product.get_bit(2 * Bits - 1) == 0) {
product <<= 1;
result_exponent -= 1;
}

return DyadicFloat<Bits>::round(result_sign, result_exponent, product, Bits);
}

// Approximate reciprocal - given a nonzero a, make a good approximation to 1/a.
// The method is Newton-Raphson iteration, based on quick_mul.
template <size_t Bits, typename = cpp::enable_if_t<(Bits >= 32)>>
LIBC_INLINE constexpr DyadicFloat<Bits>
approx_reciprocal(const DyadicFloat<Bits> &a) {
// Given an approximation x to 1/a, a better one is x' = x(2-ax).
//
// You can derive this by using the Newton-Raphson formula with the function
// f(x) = 1/x - a. But another way to see that it works is to say: suppose
// that ax = 1-e for some small error e. Then ax' = ax(2-ax) = (1-e)(1+e) =
// 1-e^2. So the error in x' is the square of the error in x, i.e. the number
// of correct bits in x' is double the number in x.

// An initial approximation to the reciprocal
DyadicFloat<Bits> x(Sign::POS, -32 - a.exponent - Bits,
uint64_t(0xFFFFFFFFFFFFFFFF) /
static_cast<uint64_t>(a.mantissa >> (Bits - 32)));

// The constant 2, which we'll need in every iteration
DyadicFloat<Bits> two(Sign::POS, 1, 1);

// We expect at least 31 correct bits from our 32-bit starting approximation
size_t ok_bits = 31;

// The number of good bits doubles in each iteration, except that rounding
// errors introduce a little extra each time. Subtract a bit from our
// accuracy assessment to account for that.
while (ok_bits < Bits) {
x = quick_mul(x, quick_sub(two, quick_mul(a, x)));
ok_bits = 2 * ok_bits - 1;
}

return x;
}

// Correctly rounded division of 2 dyadic floats, assuming the
// exponent remains within range.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits>
rounded_div(const DyadicFloat<Bits> &af, const DyadicFloat<Bits> &bf) {
using DblMant = LIBC_NAMESPACE::UInt<(Bits * 2 + 64)>;

// Make an approximation to the quotient as a * (1/b). Both the
// multiplication and the reciprocal are a bit sloppy, which doesn't
// matter, because we're going to correct for that below.
auto qf = fputil::quick_mul(af, fputil::approx_reciprocal(bf));

// Switch to BigInt and stop using quick_add and quick_mul: now
// we're working in exact integers so as to get the true remainder.
DblMant a = af.mantissa, b = bf.mantissa, q = qf.mantissa;
q <<= 2; // leave room for a round bit, even if exponent decreases
a <<= af.exponent - bf.exponent - qf.exponent + 2;
DblMant qb = q * b;
if (qb < a) {
DblMant too_small = a - b;
while (qb <= too_small) {
qb += b;
++q;
}
} else {
while (qb > a) {
qb -= b;
--q;
}
}

DyadicFloat<(Bits * 2)> qbig(qf.sign, qf.exponent - 2, q);
auto qfinal = DyadicFloat<Bits>::round(qbig.sign, qbig.exponent + Bits,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: just return DyadicFloat<Bits>::round...

qbig.mantissa, Bits);
return qfinal;
}

// Simple polynomial approximation.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits>
Expand Down
18 changes: 12 additions & 6 deletions libc/src/__support/big_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,18 @@ struct BigInt {
// Return the i-th word of the number.
LIBC_INLINE constexpr WordType &operator[](size_t i) { return val[i]; }

// Return the i-th bit of the number.
LIBC_INLINE constexpr bool get_bit(size_t i) const {
const size_t word_index = i / WORD_SIZE;
return 1 & (val[word_index] >> (i % WORD_SIZE));
}

// Set the i-th bit of the number.
LIBC_INLINE constexpr void set_bit(size_t i) {
const size_t word_index = i / WORD_SIZE;
val[word_index] |= WordType(1) << (i % WORD_SIZE);
}

private:
LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) {
constexpr auto compare = [](WordType a, WordType b) {
Expand Down Expand Up @@ -989,12 +1001,6 @@ struct BigInt {
LIBC_INLINE constexpr void clear_msb() {
val.back() &= mask_trailing_ones<WordType, WORD_SIZE - 1>();
}

LIBC_INLINE constexpr void set_bit(size_t i) {
const size_t word_index = i / WORD_SIZE;
val[word_index] |= WordType(1) << (i % WORD_SIZE);
}

LIBC_INLINE constexpr static Division divide_unsigned(const BigInt &dividend,
const BigInt &divider) {
BigInt remainder = dividend;
Expand Down
2 changes: 2 additions & 0 deletions libc/src/__support/sign.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ struct Sign {
static const Sign POS;
static const Sign NEG;

LIBC_INLINE constexpr Sign negate() const { return Sign(!is_negative); }

private:
LIBC_INLINE constexpr explicit Sign(bool is_negative)
: is_negative(is_negative) {}
Expand Down
3 changes: 3 additions & 0 deletions libc/src/stdio/printf_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ endif()
if(LIBC_CONF_PRINTF_FLOAT_TO_STR_NO_SPECIALIZE_LD)
list(APPEND printf_config_copts "-DLIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD")
endif()
if(LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320)
list(APPEND printf_config_copts "-DLIBC_COPT_FLOAT_TO_STR_USE_FLOAT320")
endif()
if(LIBC_CONF_PRINTF_DISABLE_FIXED_POINT)
list(APPEND printf_config_copts "-DLIBC_COPT_PRINTF_DISABLE_FIXED_POINT")
endif()
Expand Down
4 changes: 4 additions & 0 deletions libc/src/stdio/printf_core/converter_atlas.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
// defines convert_float_decimal
// defines convert_float_dec_exp
// defines convert_float_dec_auto
#ifdef LIBC_COPT_FLOAT_TO_STR_USE_FLOAT320
#include "src/stdio/printf_core/float_dec_converter_limited.h"
#else
#include "src/stdio/printf_core/float_dec_converter.h"
#endif
// defines convert_float_hex_exp
#include "src/stdio/printf_core/float_hex_converter.h"
#endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
Expand Down
Loading
Loading