Skip to content

Commit

Permalink
update erthink
Browse files Browse the repository at this point in the history
  • Loading branch information
erthink committed Apr 3, 2020
1 parent 604f0b5 commit 658b50f
Show file tree
Hide file tree
Showing 10 changed files with 209 additions and 141 deletions.
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ This fork of the benchmark was created to demonstrate the performance superiorit

Briefly, about this double-to-string implementation:

1. it is fastest Grisu-based, but not exactly the Grisu3 nor Grisu2;
1. It is fastest Grisu-based, but not exactly the Grisu3 nor Grisu2;

2. compared to Ryū this implementation significantly less in code size and spends less clock cycles per digit, but may slightly inferior in a whole on a 16-17 digit values.
2. Compared to Ryū this implementation significantly less in code size and spends less clock cycles per digit, but may slightly inferior in a whole on a 16-17 digit values.

3. output string representation _always_ roundtrip convertible to the original value, i.e. `strtod()` for character string result will return the exactly original value.
3. Output string representation _always_ roundtrip convertible to the original value, i.e. `strtod()` for character string result will return the exactly original value.

4. generated string representation is shortest for more than 99.95% of IEEE-754 double values, i.e. one extra digit for less than 0.05% values.
4. Generated string representation is shortest for more than `99.963%` of
IEEE-754 double values, i.e. one extra digit for less that `0.037%` values.
Moreover, for less than `0.06%` of double values, the last digit differs
from an ideal nearest by `±1`.

5. for now produces only a raw ASCII representation, e.g. `-22250738585072014e-324` without dot and `'\0'` at the end;
5. For now produces only a raw ASCII representation, e.g. `-22250738585072014e-324` without dot and `'\0'` at the end;

Now I would like to get feedback, assess how much this is in demand and collect suggestions for further improvements. For instance, I think that it is reasonable to implement conversion with a specified precision (i.e., with a specified number of digits), but not provide a printf-like interface.
Now I would like to get feedback, assess how much this is in demand and collect suggestions for further improvements. For instance, I think that it is reasonable to implement conversion with a specified precision (i.e., with a specified number of digits), but not provide a printf-like interface. For more into see [issue#1](https://github.com/erthink/erthink/issues/1).

Any suggestions are welcome!

Expand Down
7 changes: 6 additions & 1 deletion src/erthink/.circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ jobs:
build:
docker:
- image: circleci/buildpack-deps:bionic-browsers
environment:
GTEST_SHUFFLE: 1
GTEST_RUNTIME_LIMIT: 199
steps:
- run: sudo apt update -q && sudo apt install cmake libgtest-dev clang-format-6.0
- checkout
Expand All @@ -11,7 +14,9 @@ jobs:
- run: cmake --version
- run: cmake .
- run: make --keep-going all
- run: ulimit -c unlimited && make --keep-going test
- run: |
ulimit -c unlimited
make --keep-going test
- run:
command: |
mkdir -p /tmp/artifacts
Expand Down
5 changes: 5 additions & 0 deletions src/erthink/.travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ language: cpp
sudo: false
dist: xenial

env:
global:
- GTEST_SHUFFLE=1
- GTEST_RUNTIME_LIMIT=199

addons:
apt:
# sources:
Expand Down
2 changes: 1 addition & 1 deletion src/erthink/appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 0.0.0.{build}

environment:
GTEST_SHUFFLE: 1
GTEST_RUNTIME_LIMIT: 99
GTEST_RUNTIME_LIMIT: 199
matrix:
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
CMAKE_GENERATOR: Visual Studio 16 2019
Expand Down
59 changes: 36 additions & 23 deletions src/erthink/erthink_d2a.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@

#pragma once

/* Double-to-string conversion based on Grisu algorithm by Florian Loitsch,
/* Double-to-string conversion based on Grisu algorithm by Florian Loitsch
* https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
*
* 1. Generated string representation always roundtrip convertible to
* the original value, i.e. by strtod() function.
* Seems this is the fastest Grisu-based implementation,
* but it is not exactly Grisu3 nor Grisu2:
*
* 2. Generated string representation is shortest for more than 99.95% of
* IEEE-754 double values, i.e. one extra digit for less than 0.05% values.
* 1. Generated string representation ALWAYS roundtrip convertible to
* the original value, i.e. any correct implementation of strtod()
* will always return EXACTLY the origin double value.
*
* 2. Generated string representation is shortest for more than 99.963% of
* IEEE-754 double values, i.e. one extra digit for less that 0.037% values.
* Moreover, for less than 0.06% of double values, the last digit differs
* from an ideal nearest by ±1.
*
* 3. Compared to Ryū algorithm (by Ulf Adams), this implementation
* significantly less in code size and spends less clock cycles per digit,
Expand Down Expand Up @@ -167,12 +173,11 @@ struct diy_fp {
return diy_fp(upper.f - uint64_t(diff >> 1), upper.e);
}

void scale(const diy_fp &factor, bool roundup) {
uint_fast32_t scale(const diy_fp &factor) {
const uint64_t l = mul_64x64_128(f, factor.f, &f);
assert(f < UINT64_MAX - INT32_MAX);
if (roundup)
f += l >> 63;
e += factor.e + 64;
return static_cast<uint_fast32_t>(l >> 63);
}

diy_fp operator-(const diy_fp &rhs) const {
Expand Down Expand Up @@ -274,12 +279,12 @@ static __always_inline void round(char *&end, uint64_t delta, uint64_t rest,
(rest + ten_kappa < upper ||
(rest < upper &&
/* closer */ upper - rest >= rest + ten_kappa - upper))) {
if (unlikely(end[-1] < '2')) {
end[-1] -= 1;
if (unlikely(end[-1] < '1')) {
inout_exp10 += 1;
end -= 1;
return;
}
end[-1] -= 1;
rest += ten_kappa;
}
}
Expand Down Expand Up @@ -464,17 +469,17 @@ static inline char *convert(const bool accurate, diy_fp v, char *const buffer,
}

const int lead_zeros = clz64(v.f);
#if 0 /* Given the remaining optimizations, on average it does not have a \
positive effect, although a little faster in a simplest cases. */
// LY: check to output as ordinal
if (unlikely(v.e >= -52 && v.e <= lead_zeros) &&
/* Check to output as ordinal.
* Given the remaining optimizations, on average it does not have a positive
* effect (although a little faster in a simplest cases).
* However, it reduces the number of inaccuracies and non-shortest strings. */
if (!accurate && unlikely(v.e >= -52 && v.e <= lead_zeros) &&
(v.e >= 0 || (v.f << (64 + v.e)) == 0)) {
uint64_t ordinal = (v.e < 0) ? v.f >> -v.e : v.f << v.e;
assert(v.f == ((v.e < 0) ? ordinal << -v.e : ordinal >> v.e));
out_exp10 = 0;
return u2a(ordinal, buffer);
}
#endif

// LY: normalize
assert(v.f <= UINT64_MAX / 2 && lead_zeros > 1);
Expand All @@ -483,12 +488,20 @@ static inline char *convert(const bool accurate, diy_fp v, char *const buffer,
const diy_fp dec_factor = cached_power(v.e, out_exp10);

// LY: get boundaries
const int mojo =
v.f >= UINT64_C(0x8000000000001000) ? lead_zeros : lead_zeros - 1;
const uint64_t delta = (dec_factor.f >> (64 - mojo)) - 3;
v.scale(dec_factor, true);
return make_digits(accurate, v.f + delta / 2, delta, buffer, out_exp10, v.f,
-v.e);
const int mojo = v.f > UINT64_C(0x80000000000007ff) ? 64 : 65;
const uint64_t delta = dec_factor.f >> (mojo - lead_zeros);
assert(delta >= 2);
const uint_fast32_t lsb = v.scale(dec_factor);
if (accurate)
// -1 -2 1 0 1: non-shortest 9522 for 25M probes, ratio 0.038088%
// shortest errors: +5727 -9156
// non-shortest errors: +3 -5
return make_digits(accurate, v.f + ((delta + lsb - 1) >> 1), delta - 2,
buffer, out_exp10, v.f + lsb, -v.e);
else
// -1 -2 1 0 0: non-shortest 9522 for 25M probes, ratio 0.038088%
return make_digits(accurate, v.f + ((delta + lsb - 1) >> 1), delta - 2,
buffer, out_exp10, v.f, -v.e);
}

double inline cast(int64_t i64) {
Expand Down Expand Up @@ -548,13 +561,13 @@ d2a(const double &value,
return ptr;
}

static __maybe_unused char *d2a_accurate(
static inline __maybe_unused char *d2a_accurate(
const double &value,
char *const buffer /* upto d2a_max_chars for -22250738585072014e-324 */) {
return d2a<true>(value, buffer);
}

static __maybe_unused char *d2a_fast(
static inline __maybe_unused char *d2a_fast(
const double &value,
char *const buffer /* upto d2a_max_chars for -22250738585072014e-324 */) {
return d2a<false>(value, buffer);
Expand Down
Loading

0 comments on commit 658b50f

Please sign in to comment.