miloyip · erthink · Mar 24, 2019 · Mar 24, 2019 · Apr 12, 2019 · Apr 12, 2019
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,2 @@
+BasedOnStyle: LLVM
+Standard: Cpp11
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,12 @@
-/bin/*
-/build/gmake
-/build/vs*/
-/intermediate
-result/result.csv
-result/result.html
+.le.ini
+CMakeCache.txt
+CMakeFiles/
+DartConfiguration.tcl
+Makefile
+CMakeLists.txt.user
+cmake_install.cmake
+src/CMakeFiles/
+src/Makefile
+src/cmake_install.cmake
+@*/
+results/*.csv
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,181 @@
+cmake_minimum_required(VERSION 3.8.2)
+cmake_policy(SET CMP0054 NEW)
+if(POLICY CMP0075)
+  cmake_policy(SET CMP0075 NEW)
+endif()
+if(POLICY CMP0025)
+  cmake_policy(SET CMP0025 NEW)
+endif()
+
+project(d2a-benchmark C CXX)
+
+#
+# Set default build type to Release. This is to ease a User's life.
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release CACHE STRING
+    "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+    FORCE)
+endif()
+string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPERCASE)
+
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
+
+set(SOURCES
+  src/stb_test.cpp src/stb/stb_sprintf.h
+
+  src/double-conversion/bignum.cc
+  src/double-conversion/bignum-dtoa.cc
+  src/double-conversion/bignum-dtoa.h
+  src/double-conversion/bignum.h
+  src/double-conversion/cached-powers.cc
+  src/double-conversion/cached-powers.h
+  src/double-conversion/diy-fp.h
+  src/double-conversion/double-conversion.h
+  src/double-conversion/double-to-string.cc
+  src/double-conversion/double-to-string.h
+  src/double-conversion/fast-dtoa.cc
+  src/double-conversion/fast-dtoa.h
+  src/double-conversion/fixed-dtoa.cc
+  src/double-conversion/fixed-dtoa.h
+  src/double-conversion/ieee.h
+  src/double-conversion/string-to-double.cc
+  src/double-conversion/string-to-double.h
+  src/double-conversion/strtod.cc
+  src/double-conversion/strtod.h
+  src/double-conversion/utils.h
+  src/doubleconvtest.cpp
+
+  src/emyg/emyg_dtoa.c
+  src/emyg/emyg_dtoa.h
+  src/emygtest.cpp
+
+  src/erthink/erthink.h
+  src/erthink/erthink_arch.h
+  src/erthink/erthink_bswap.h
+  src/erthink/erthink_byteorder.h
+  src/erthink/erthink_carryadd.h
+  src/erthink/erthink_clz.h++
+  src/erthink/erthink_d2a.h++
+  src/erthink/erthink_defs.h
+  src/erthink/erthink_intrin.h
+  src/erthink/erthink_misc.h++
+  src/erthink/erthink_mul.h
+  src/erthink/erthink_rot.h
+  src/erthink/erthink_u2a.h++
+  src/erthink_test.cpp
+
+  src/ryu/common.h
+  src/ryu/d2fixed.c
+  src/ryu/d2fixed_full_table.h
+  src/ryu/d2s.c
+  src/ryu/d2s_full_table.h
+  src/ryu/d2s_intrinsics.h
+  src/ryu/d2s_small_table.h
+  src/ryu/digit_table.h
+  src/ryu/f2s.c
+  src/ryu/f2s_full_table.h
+  src/ryu/generic_128.c
+  src/ryu/generic_128.h
+  src/ryu/ryu_generic_128.h
+  src/ryu/ryu.h
+  src/ryu/ryu_parse.h
+  src/ryu/s2d.c
+  src/ryu_test.cpp
+
+  src/floaxie/atof.h
+  src/floaxie/bit_ops.h
+  src/floaxie/cached_power.h
+  src/floaxie/conversion_status.h
+  src/floaxie/default_fallback.h
+  src/floaxie/diy_fp.h
+  src/floaxie/fraction.h
+  src/floaxie/ftoa.h
+  src/floaxie/grisu.h
+  src/floaxie/huge_val.h
+  src/floaxie/integer_of_size.h
+  src/floaxie/k_comp.h
+  src/floaxie/krosh.h
+  src/floaxie/memwrap.h
+  src/floaxie/powers_ten.h
+  src/floaxie/powers_ten_double.h
+  src/floaxie/powers_ten_single.h
+  src/floaxie/prettify.h
+  src/floaxie/print.h
+  src/floaxie/static_pow.h
+  src/floaxie/type_punning_cast.h
+  src/floaxietest.cpp
+
+  src/fmt/chrono.h
+  src/fmt/color.h
+  src/fmt/compile.h
+  src/fmt/core.h
+  src/fmt/format.h
+  src/fmt/format-inl.h
+  src/fmt/locale.h
+  src/fmt/os.h
+  src/fmt/ostream.h
+  src/fmt/posix.h
+  src/fmt/printf.h
+  src/fmt/ranges.h
+  src/fmt/format.cc
+  src/fmt/os.cc
+  src/fmttest.cpp
+
+  src/fpconv/fpconv.c
+  src/fpconv/fpconv.h
+  src/fpconv/license
+  src/fpconv/powers.h
+  src/fpconvtest.cpp
+
+  src/gay/dtoa.c
+  src/gay/g_fmt.c
+  src/gaytest.cpp
+
+  src/grisu/diy_fp.h
+  src/grisu/double.h
+  src/grisu/fast_exponent.h
+  src/grisu/grisu2.h
+  src/grisu/grisu2b_59_56.c
+  src/grisu/k_comp.h
+  src/grisu/powers.h
+  src/grisu/powers_ten_round64.h
+  src/grisu/prettify.h
+  src/grisu2btest.cpp
+
+  src/milo/dtoa_milo.h src/milotest.cpp
+  src/msinttypes/inttypes.h src/msinttypes/stdint.h
+
+  src/ostringstream.cpp
+  src/ostrstream.cpp
+  src/sprintf.cpp
+
+  src/resultfilename.h
+  src/test.h
+  src/timer.h
+  src/null.cpp
+  src/main.cpp
+  )
+
+add_executable(d2a-benchmark ${SOURCES})
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+  target_compile_definitions(d2a-benchmark PRIVATE _CRT_SECURE_NO_WARNINGS)
+endif()
+
+if(NOT DEFINED CXX_STANDARD)
+  list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_17 HAS_CXX17)
+  list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_14 HAS_CXX14)
+  list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_11 HAS_CXX11)
+
+  if(NOT HAS_CXX17 LESS 0)
+    set(CXX_STANDARD 17)
+  elseif(NOT HAS_CXX14 LESS 0)
+    set(CXX_STANDARD 14)
+  elseif(NOT HAS_CXX11 LESS 0)
+    set(CXX_STANDARD 11)
+  endif()
+endif()
+if(DEFINED CXX_STANDARD)
+  message(STATUS "Use C++${CXX_STANDARD}")
+  target_compile_features(d2a-benchmark PRIVATE cxx_std_11)
+endif()
diff --git a/license.txt → LICENSE b/license.txt → LICENSE
@@ -1,4 +1,5 @@
-Copyright (C) 2014 Milo Yip
+Copyright (C) 2019 Leonid Yuriev.
+Copyright (C) 2014 Milo Yip.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -0,0 +1,140 @@
+<!-- Required extensions: pymdownx.betterem, pymdownx.tilde, pymdownx.emoji, pymdownx.tasklist, pymdownx.superfences -->
+
+# dtoa Benchmark
+
+Copyright(c) 2019-2020 Leonid Yuriev <[email protected]>,
+Copyright(c) 2014 Milo Yip <[email protected]>
+
+## Preamble
+
+This fork of the benchmark was created to demonstrate the performance superiority of my new `dtoa()` implementation over others.
+
+[Ryū algorithm](https://github.com/ulfjack/ryu) by Ulf Adams known to fastest in much cases, especially for short string representations. However, I would like to draw attention to my own speed-competing but more compact implementation.
+
+Briefly, about this double-to-string implementation:
+
+1. It is fastest Grisu-based, but not exactly the Grisu3 nor Grisu2;
+
+2. Compared to Ryū this implementation significantly less in code size and spends less clock cycles per digit, but may slightly inferior in a whole on a 16-17 digit values.
+
+3. Output string representation _always_ roundtrip convertible to the original value, i.e. `strtod()` for character string result will return the exactly original value.
+
+4. Generated string representation is shortest for more than `99.963%` of
+IEEE-754 double values, i.e. one extra digit for less that `0.037%` values.
+Moreover, for less than `0.06%` of double values, the last digit differs
+from an ideal nearest by `±1`.
+
+5. For now produces only a raw ASCII representation, e.g. `-22250738585072014e-324` without dot and `'\0'` at the end;
+
+Now I would like to get feedback, assess how much this is in demand and collect suggestions for further improvements. For instance, I think that it is reasonable to implement conversion with a specified precision (i.e., with a specified number of digits), but not provide a printf-like interface. For more into see [issue#1](https://github.com/erthink/erthink/issues/1).
+
+Any suggestions are welcome!
+
+## Introduction
+
+This benchmark evaluates the performance of conversion from double precision IEEE-754 floating point (`double`) to ASCII string. The function prototype is:
+
+~~~~~~~~cpp
+void dtoa(double value, char* buffer);
+~~~~~~~~
+
+The character string result **must** be convertible to the original value **exactly** via some correct implementation of `strtod()`, i.e. roundtrip convertible.
+
+Note that `dtoa()` is *not* a standard function in C and C++.
+
+## Procedure
+
+Firstly the program verifies the correctness of implementations.
+
+Then, **RandomDigit** case for benchmark is carried out:
+
+* Generates 2000 random `double` values, filtered out `+/-inf` and `nan`. Then convert them to limited precision (1 to 17 decimal digits in significand).
+
+* Convert these generated numbers into ASCII.
+
+* Each digit group is run for 10000 times. The minimum time duration is measured for 42 trials.
+
+## Build and Run
+
+1. Obtain [cmake](https://cmake.org/download/)
+2. Configure build system by running `cmake .` and build benchmark by running `cmake --build .`
+3. On success, run the `dtoa-benchmark` executable is generated at `dtoa-benchmark/` or corresponding subdirectory (e.g `Release` on Windows).
+4. The results in CSV format will be written to `dtoa-benchmark/results`.
+5. Run GNU `make` in `dtoa-benchmark/results` to generate results in HTML.
+
+## Results
+
+The following are results measured by `RandomDigit` testcase on a PC (Core i7-7820HQ CPU @ 2.90GHz),
+where `dtoa()` is compiled by GNU C++ 9.2.1 for x86-64 Linux.
+The speedup is based on `sprintf`'s _Sum_ values.
+
+Function      |  Min ns |  RMS ns  |  Max ns |   Sum ns  | Speedup |
+:-------------|--------:|---------:|--------:|----------:|--------:|
+null          |     1.2 |    1.200 |     1.2 |      20.4 |   N/A   |
+erthink       |    21.4 |   33.381 |    43.9 |     558.1 | ×23.5   |
+ryu           |    35.3 |   50.011 |    65.7 |     835.4 | ×15.7   |
+emyg          |    38.1 |   58.310 |    69.7 |     983.2 | ×13.3   |
+milo          |    37.8 |   59.819 |    71.8 |    1007.7 | ×13.0   |
+floaxie       |    24.5 |   68.028 |    92.4 |    1089.8 | ×12.0   |
+fmt           |    75.3 |  101.532 |   130.8 |    1712.2 | ×7.7    |
+doubleconv    |    69.7 |  103.070 |   134.0 |    1731.4 | ×7.6    |
+grisu2        |   101.6 |  119.538 |   137.8 |    2024.3 | ×6.5    |
+fpconv        |   102.6 |  145.437 |   167.5 |    2454.2 | ×5.3    |
+stb           |   188.8 |  191.082 |   198.6 |    3248.1 | ×4.0    |
+sprintf       |   700.1 |  772.833 |   827.7 |   13119.8 | ×1.0    |
+ostrstream    |  1058.4 | 1133.081 |  1187.4 |   19250.5 | ×0.7    |
+ostringstream |  1110.9 | 1196.817 |  1267.8 |   20327.0 | ×0.6    |
+
+
+#### All implementations
+![[email protected]_linux-x86_64-gcc9.2_randomdigit_time.png](https://libmdbx.dqdkfa.ru/img/d2a-benchmark/[email protected]_linux-x86_64-gcc9.2_randomdigit_time.png)
+
+#### Fastest competitors
+![[email protected]_linux-x86_64-gcc9.2_randomdigit_time.png](https://libmdbx.dqdkfa.ru/img/d2a-benchmark/[email protected]_linux-x86_64-gcc9.2_randomdigit_time.png)
+
+![[email protected]_linux-x86_64-gcc9.2_randomdigit_timedigit.png](https://libmdbx.dqdkfa.ru/img/d2a-benchmark/[email protected]_linux-x86_64-gcc9.2_randomdigit_timedigit.png)
+
+## Implementations in descending order of speed
+
+Function      | Description
+--------------|-----------
+[erthink](https://gitflic.ru/project/erthink/erthink/blob?file=erthink_d2a.h%2B%2B&branch=c%2B%2B.master) | Leonid Yuriev's Grisu-based C++ implementation.
+[ryu](https://github.com/ulfjack/ryu) | Ulf Adams's [Ryū algorithm](https://dl.acm.org/citation.cfm?id=3192369).
+[emyg](https://github.com/miloyip/dtoa-benchmark/blob/master/src/emyg/emyg_dtoa.c) | C version of Milo Yip's Grisu2 implementation by Doug Currie.
+[milo](https://github.com/miloyip/dtoa-benchmark/blob/master/src/milo/dtoa_milo.h) | Milo Yip's Grisu2 C++ implementation for RapidJSON.
+[floaxie](https://github.com/aclex/floaxie) | Alexey Chernov's Grisu2 implementation.
+[fmt](https://github.com/fmtlib/fmt) | Victor Zverovich's Grisu2 implementation for `{fmt}` C++ library.
+[doubleconv](https://code.google.com/p/double-conversion/) |  C++ implementation extracted from Google's V8 JavaScript Engine with `EcmaScriptConverter().ToShortest()` (based on Grisu3, fall back to slower bignum algorithm when Grisu3 failed to produce shortest implementation).
+[grisu2](http://florian.loitsch.com/publications/bench.tar.gz?attredirects=0) | Florian Loitsch's Grisu2 C implementation [1].
+[fpconv](https://github.com/night-shift/fpconv) | Andreas Samoljuk's Grisu2 C implementation.
+[stb](https://github.com/nothings/stb)         | Jeff Roberts's & Sean Barrett's snprintf() implementation.
+sprintf       | `sprintf()` in C standard library with `"%.17g"` format.
+ostringstream | traditional `std::ostringstream` in C++ standard library with `setprecision(17)`.
+ostrstream    | traditional `std::ostrstream` in C++ standard library with `setprecision(17)`.
+~~[gay](http://www.netlib.org/fp/)~~ | David M. Gay's `dtoa()` C implementation. **Disabled** because of invalid results and/or SIGSEGV.
+
+## FAQ
+
+1. How to add an implementation?
+
+   You may clone an existing implementation file, then modify it and add to `CMakeLists.txt`.
+   Re-run `cmake` to re-configure and re-build benchmark.
+   Note that it will automatically register to the benchmark by macro `REGISTER_TEST(name)`.
+
+   **Making pull request of new implementations is welcome.**
+
+2. Why not converting `double` to `std::string`?
+
+   It may introduce heap allocation, which is a big overhead. User can easily wrap these low-level functions to return `std::string`, if needed.
+
+3. Why fast `dtoa()` functions is needed?
+
+   They are a very common operations in writing data in text format. The standard way of `sprintf()`, `std::stringstream`, often provides poor performance. The author of this benchmark would optimize the `sprintf` implementation in [RapidJSON](https://github.com/miloyip/rapidjson/).
+
+## References
+
+[1] Loitsch, Florian. ["Printing floating-point numbers quickly and accurately with integers."](http://florian.loitsch.com/publications/dtoa-pldi2010.pdf) ACM Sigplan Notices 45.6 (2010): 233-243.
+
+## Related Benchmarks and Discussions
+
+* [Printing Floating-Point Numbers](http://www.ryanjuckett.com/programming/printing-floating-point-numbers/)
diff --git a/build/premake.bat b/build/premake.bat
diff --git a/build/premake.sh b/build/premake.sh