Skip to content

Commit

Permalink
Added optional third party dtoa library.
Browse files Browse the repository at this point in the history
Added the optional Milo Yip DTOA library (emyg_dtoa) to avoid
issues where the standard sprintf() dtoa function changes output
based on locale settings. It is also 40-50% faster than the
standard dtoa for raw numeric data.

If you wish to use this third party library you can compile
libxlsxwriter with it by passing `USE_DTOA_LIBRARY=1` to
make. The USE_DOUBLE_FUNCTION build variable is no longer used.

Imported source from https://github.com/miloyip/dtoa-benchmark

Feature request #272
  • Loading branch information
jmcnamara committed Jul 12, 2021
1 parent 393ded9 commit bda599d
Show file tree
Hide file tree
Showing 23 changed files with 711 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ env:
- NO_VALGRIND=1 USE_STANDARD_TMPFILE=1 CFLAGS='-Werror'
- NO_VALGRIND=1 CFLAGS='-Werror -m32'
- NO_VALGRIND=1 USE_SYSTEM_MINIZIP=1 CFLAGS='-Werror'
- NO_VALGRIND=1 USE_DOUBLE_FUNCTION=1 CFLAGS='-Werror'
- NO_VALGRIND=1 USE_DTOA_LIBRARY=1 CFLAGS='-Werror'
- NO_VALGRIND=1 USE_NO_MD5=1 CFLAGS='-Werror'
- NO_VALGRIND=1 USE_OPENSSL_MD5=1 CFLAGS='-Werror'
- NO_VALGRIND=1 USE_FMEMOPEN=1 CFLAGS='-Werror'
Expand Down
16 changes: 13 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
# during configuration. This may produce bugs while cross-
# compiling or using MinGW/MSYS.
#
# USE_DTOA_LIBRARY
# Use the third party emyg_dtoa() library (default off). The
# emyg_dtoa() library is used to avoid sprintf double issues with
# different locale settings. To enable this library, pass
# `-DUSE_DTOA_LIBRARY=ON` during configuration.
#
# USE_NO_MD5
# Compile without third party MD5 support. This will turn off the
# functionality of avoiding duplicate image files in the output xlsx
Expand Down Expand Up @@ -127,7 +133,7 @@ option(USE_NO_MD5 "Build libxlsxwriter without third party MD5 lib" OFF)
option(USE_OPENSSL_MD5 "Build libxlsxwriter with the OpenSSL MD5 lib" OFF)
option(USE_FMEMOPEN "Use fmemopen() in place of some temporary files" OFF)
option(IOAPI_NO_64 "Disable 64-bit filesystem support" OFF)
option(USE_DOUBLE_FUNCTION "Build libxlsxwriter with locale independent double" OFF)
option(USE_DTOA_LIBRARY "Use the locale independent third party Milo Yip DTOA library" OFF)

if(MSVC)
option(USE_STATIC_MSVC_RUNTIME "Use the static runtime library" OFF)
Expand Down Expand Up @@ -163,8 +169,8 @@ if(USE_FMEMOPEN)
list(APPEND LXW_PRIVATE_COMPILE_DEFINITIONS USE_FMEMOPEN)
endif()

if(USE_DOUBLE_FUNCTION)
list(APPEND LXW_PRIVATE_COMPILE_DEFINITIONS USE_DOUBLE_FUNCTION)
if(USE_DTOA_LIBRARY)
list(APPEND LXW_PRIVATE_COMPILE_DEFINITIONS USE_DTOA_LIBRARY)
endif()

if(NOT BUILD_SHARED_LIBS)
Expand Down Expand Up @@ -260,6 +266,10 @@ if(USE_OPENSSL_MD5)
endif()
endif()

if (USE_DTOA_LIBRARY)
list(APPEND LXW_SOURCES third_party/dtoa/emyg_dtoa.c)
endif()

set(LXW_PROJECT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
set(LXW_LIB_DIR "${LXW_PROJECT_DIR}/lib")
add_library(${PROJECT_NAME} "")
Expand Down
45 changes: 38 additions & 7 deletions License.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ Libxlsxwriter is released under a FreeBSD license:


Libxlsxwriter includes `queue.h` and `tree.h` from FreeBSD, the `minizip`
component of `zlib`, `tmpfileplus` and `md5` which have the following
licenses:
component of `zlib`. It also includes and uses the optional `tmpfileplus`,
`md5` and `emyg_dtoa`. These components which have the following licenses:


Queue.h from FreeBSD:
Expand Down Expand Up @@ -135,18 +135,49 @@ Note, it is possible to compile libxlsxwriter without statically linking the
[Tmpfileplus](http://www.di-mgt.com.au/c_function_to_create_temp_file.html)
has the following license:

This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.

Copyright (c) 2012-16 David Ireland, DI Management Services Pty Ltd
<http://www.di-mgt.com.au/contact/>.
Copyright (c) 2012-16 David Ireland, DI Management Services Pty Ltd
<http://www.di-mgt.com.au/contact/>.

See the [Mozilla Public License, v. 2.0](http://mozilla.org/MPL/2.0/).

Note, it is possible to compile libxlsxwriter using the standard library
`tmpfile()` function instead of `tmpfileplus`, see @ref gsg_tmpdir.

The [Milo Yip DTOA library](https://github.com/miloyip/dtoa-benchmark) for
converting doubles to strings. It has the following license:

Copyright (C) 2015 Doug Currie
based on dtoa_milo.h
Copyright (C) 2014 Milo Yip

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

This Milo Yip DTOA library (emyg_dtoa) is uses to avoid issues where the
standard sprintf() dtoa function changes output based on locale settings. It
is also 40-50% faster than the standard dtoa for raw numeric data. The use of
this library is optional. If you wish to use it you can pass
`USE_DTOA_LIBRARY=1` to make when compiling.

[Openwall MD5](https://openwall.info/wiki/people/solar/software/public-domain-source-code/md5)
has the following licence:

Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ endif
ifndef USE_STANDARD_TMPFILE
$(Q)$(MAKE) -C third_party/tmpfileplus
endif

ifndef USE_NO_MD5
ifndef USE_OPENSSL_MD5
$(Q)$(MAKE) -C third_party/md5
endif
endif
ifdef USE_DTOA_LIBRARY
$(Q)$(MAKE) -C third_party/dtoa
endif

# Build a macOS universal binary.
universal_binary :
Expand Down Expand Up @@ -73,6 +75,7 @@ clean :
$(Q)$(MAKE) clean -C third_party/minizip
$(Q)$(MAKE) clean -C third_party/tmpfileplus
$(Q)$(MAKE) clean -C third_party/md5
$(Q)$(MAKE) clean -C third_party/dtoa

# Run the unit tests.
test : all test_unit test_functional
Expand Down
20 changes: 20 additions & 0 deletions docs/src/getting_started.dox
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,26 @@ Libxlsxwriter can be compiled on a big endian system as follows:

make USE_BIG_ENDIAN=1


@section gsg_dtoa Using a double formatting library

Excel uses an IEEE 754 doubles for all numeric values. These values are stored
in standard `sprintf(...,"%.16G",...)` formatting as numbers like "1234.56" or
"456E+123". However in some locales, such as "de_DE" these numbers can be
stored with the locale specific decimal place like "1234,56" which causes
Excel to give an error when it loads the file.

It some cases this issue can be resolved by using the `setlocale()` or
`uselocale()` functions in your application. Alternatively you can compile
libxlsxwriter with support for a third party `dtoa()` (decimal to ascii)
function. Currently libxlsxwriter uses the [Milo Yip DTOA
library](https://github.com/miloyip/dtoa-benchmark) as an optional
compilation. This avoids the locale sprintf issue and it is also 40-50% faster
than the standard dtoa for raw numeric data.

If you wish to use it you can pass "USE_DTOA_LIBRARY=1" to `make` or
"-DUSE_DTOA_LIBRARY=ON" to cmake.

@section gsg_next Next steps

If you got libxlsxwriter built and working successfully then the next sections
Expand Down
12 changes: 6 additions & 6 deletions docs/src/working_with_memory.dox
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,9 @@

@section ww_mem_constant Constant Memory Mode

By default libxlsxwriter holds all cell data in memory. This is to allow
non-sequential data storage and also to allow future features where formatting
is applied separately from the data.

The effect of this is that for large files libxlsxwriter can consume a lot of
memory.
By default libxlsxwriter holds all cell data in memory to allow non-sequential
data storage. The effect of this is that for large files libxlsxwriter can
consume a lot of memory.

Fortunately, this memory usage can be reduced almost completely by using
workbook_new_opt() and the lxw_workbook_options `constant_memory` property:
Expand Down Expand Up @@ -91,6 +88,9 @@ depending on the amount of repeated string data.
Currently the library is optimized but not highly optimized. Also, the library
is currently single threaded.

Compiling with the embedded but option dtoa library is 40-50% faster for raw
numeric data. See @ref gsg_dtoa.

Next: @ref working_with_macros


Expand Down
3 changes: 3 additions & 0 deletions examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ $(LIBXLSXWRITER):
ifndef USE_STANDARD_TMPFILE
$(Q)$(MAKE) -C ../third_party/tmpfileplus
endif
ifndef USE_STANDARD_DOUBLE
$(Q)$(MAKE) -C ../third_party/dtoa
endif
ifndef USE_NO_MD5
$(Q)$(MAKE) -C ../third_party/md5
endif
Expand Down
26 changes: 26 additions & 0 deletions include/xlsxwriter/third_party/emyg_dtoa.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/* emyg_dtoa.h
** Copyright (C) 2015 Doug Currie
** based on dtoa_milo.h
** Copyright (C) 2014 Milo Yip
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and associated documentation files (the "Software"), to deal
** in the Software without restriction, including without limitation the rights
** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
** copies of the Software, and to permit persons to whom the Software is
** furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Software.
**
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
** THE SOFTWARE.
*/

/* Source from https://github.com/miloyip/dtoa-benchmark */
void emyg_dtoa (double value, char* buffer);
10 changes: 6 additions & 4 deletions include/xlsxwriter/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,15 @@ void lxw_str_tolower(char *str);
FILE *lxw_tmpfile(char *tmpdir);
FILE *lxw_fopen(const char *filename, const char *mode);

/* Use a user defined function to format doubles in sprintf or else a simple
* macro (the default). */
#ifdef USE_DOUBLE_FUNCTION
/* Use the third party dtoa function to avoid locale issues with sprintf
* double formatting. Otherwise we use a simple macro that falls back to the
* default c-lib sprintf.
*/
#ifdef USE_DTOA_LIBRARY
int lxw_sprintf_dbl(char *data, double number);
#else
#define lxw_sprintf_dbl(data, number) \
lxw_snprintf(data, LXW_ATTR_32, "%.16g", number)
lxw_snprintf(data, LXW_ATTR_32, "%.16G", number)
#endif

uint16_t lxw_hash_password(const char *password);
Expand Down
15 changes: 9 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,12 @@ ifdef USE_BIG_ENDIAN
CFLAGS += -DLXW_BIG_ENDIAN
endif

# Use a user-defined double number formatting function.
ifdef USE_DOUBLE_FUNCTION
CFLAGS += -DUSE_DOUBLE_FUNCTION
# Use a third party double number formatting function.
ifdef USE_DTOA_LIBRARY
CFLAGS += -DUSE_DTOA_LIBRARY
DTOA_LIB_DIR = ../third_party/dtoa
DTOA_LIB_OBJ = $(DTOA_LIB_DIR)/emyg_dtoa.o
DTOA_LIB_SO = $(DTOA_LIB_DIR)/emyg_dtoa.so
endif

# Use fmemopen() to avoid creating certain temporary files
Expand Down Expand Up @@ -152,19 +155,19 @@ test_lib : libxlsxwriter_test.a

# The static library.
$(LIBXLSXWRITER_A) : $(OBJS)
$(Q)$(AR) $(ARFLAGS) $@ $(MINIZIP_OBJ) $(TMPFILEPLUS_OBJ) $(MD5_OBJ) $^
$(Q)$(AR) $(ARFLAGS) $@ $(MINIZIP_OBJ) $(TMPFILEPLUS_OBJ) $(DTOA_LIB_OBJ) $(MD5_OBJ) $^

# The dynamic library.
ifeq ($(findstring m32,$(CFLAGS)),m32)
ARCH = -m32
endif

$(LIBXLSXWRITER_SO) : $(SOBJS)
$(Q)$(CC) $(LDFLAGS) $(SOFLAGS) $(ARCH) $(TARGET_ARCH) -o $@ $(MINIZIP_SO) $(TMPFILEPLUS_SO) $(MD5_SO) $^ $(LIBS)
$(Q)$(CC) $(LDFLAGS) $(SOFLAGS) $(ARCH) $(TARGET_ARCH) -o $@ $(MINIZIP_SO) $(TMPFILEPLUS_SO) $(MD5_SO) $(DTOA_LIB_SO) $^ $(LIBS)

# The test library.
$(LIBXLSXWRITER_TO) : $(TOBJS)
$(Q)$(AR) $(ARFLAGS) $@ $(MINIZIP_OBJ) $(TMPFILEPLUS_OBJ) $(MD5_OBJ) $^
$(Q)$(AR) $(ARFLAGS) $@ $(MINIZIP_OBJ) $(TMPFILEPLUS_OBJ) $(DTOA_LIB_SO) $(MD5_OBJ) $^

# Minimal target for quick compile without creating the libs.
test_compile : $(OBJS)
Expand Down
25 changes: 8 additions & 17 deletions src/utility.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
#include "xlsxwriter/common.h"
#include "xlsxwriter/third_party/tmpfileplus.h"

#ifdef USE_DTOA_LIBRARY
#include "xlsxwriter/third_party/emyg_dtoa.h"
#endif

char *error_strings[LXW_MAX_ERRNO + 1] = {
"No error.",
"Memory error, failed to malloc() required memory.",
Expand Down Expand Up @@ -575,27 +579,14 @@ lxw_tmpfile(char *tmpdir)
}

/*
* Sample function to handle sprintf of doubles for locale portable code. This
* is usually handled by a lxw_sprintf_dbl() macro but it can be replaced with
* a function of the same name.
*
* The code below is a simplified example that changes numbers like 123,45 to
* 123.45. End-users can replace this with something more rigorous if
* required.
* Use third party function to handle sprintf of doubles for locale portable
* code.
*/
#ifdef USE_DOUBLE_FUNCTION
#ifdef USE_DTOA_LIBRARY
int
lxw_sprintf_dbl(char *data, double number)
{
char *tmp;

lxw_snprintf(data, LXW_ATTR_32, "%.16g", number);

/* Replace comma with decimal point. */
tmp = strchr(data, ',');
if (tmp)
*tmp = '.';

emyg_dtoa(number, data);
return 0;
}
#endif
Expand Down
6 changes: 3 additions & 3 deletions src/worksheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -3643,7 +3643,7 @@ STATIC void
_write_number_cell(lxw_worksheet *self, char *range,
int32_t style_index, lxw_cell *cell)
{
#ifdef USE_DOUBLE_FUNCTION
#ifdef USE_DTOA_LIBRARY
char data[LXW_ATTR_32];

lxw_sprintf_dbl(data, cell->u.number);
Expand All @@ -3657,11 +3657,11 @@ _write_number_cell(lxw_worksheet *self, char *range,
#else
if (style_index)
fprintf(self->file,
"<c r=\"%s\" s=\"%d\"><v>%.16g</v></c>",
"<c r=\"%s\" s=\"%d\"><v>%.16G</v></c>",
range, style_index, cell->u.number);
else
fprintf(self->file,
"<c r=\"%s\"><v>%.16g</v></c>", range, cell->u.number);
"<c r=\"%s\"><v>%.16G</v></c>", range, cell->u.number);

#endif
}
Expand Down
6 changes: 3 additions & 3 deletions test/functional/src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ ifdef USE_OPENSSL_MD5
LIBS += -lcrypto
endif

# Use a user-defined double number formatting function.
ifdef USE_DOUBLE_FUNCTION
CFLAGS += -DUSE_DOUBLE_FUNCTION
# Use a third party double number formatting function.
ifdef USE_DTOA_LIBRARY
CFLAGS += -DUSE_DTOA_LIBRARY
endif

all : $(LIBXLSXWRITER) $(EXES)
Expand Down
2 changes: 1 addition & 1 deletion test/functional/src/test_data08.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
int main() {

/* Test that the module works if the locale is changed. */
#ifdef USE_DOUBLE_FUNCTION
#ifdef USE_DTOA_LIBRARY
setlocale(LC_NUMERIC, "de_DE");
#endif

Expand Down
Loading

0 comments on commit bda599d

Please sign in to comment.