Skip to content

Commit dfa4a98

Browse files
authored
Zstd compression (#324)
add zstd compression support
1 parent 61133bd commit dfa4a98

23 files changed

+361
-46
lines changed

CMakeLists.txt

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,19 @@ else ()
118118
set(_KEYVI_LINK_LIBRARIES_DYNAMIC "${_KEYVI_LINK_LIBRARIES_DYNAMIC} snappy")
119119
endif ()
120120

121+
# Zstd
122+
find_package(ZSTD REQUIRED)
123+
if (ZSTD_FOUND)
124+
list(APPEND KEYVI_INCLUDES "${ZSTD_INCLUDE_DIRS}")
125+
else ()
126+
message(FATAL_ERROR "Can not find zstd")
127+
endif (ZSTD_FOUND)
128+
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
129+
set(_KEYVI_LINK_LIBRARIES_STATIC "${_KEYVI_LINK_LIBRARIES_STATIC} zstd")
130+
else ()
131+
set(_KEYVI_LINK_LIBRARIES_DYNAMIC "${_KEYVI_LINK_LIBRARIES_DYNAMIC} zstd")
132+
endif ()
133+
121134
# rapidjson
122135
list(APPEND KEYVI_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/keyvi/3rdparty/rapidjson/include")
123136

@@ -143,7 +156,7 @@ string(REPLACE " " ";" _KEYVI_COMPILE_DEFINITIONS_LIST "${_KEYVI_COMPILE_DEFINIT
143156

144157
# keyvicompiler
145158
add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp)
146-
target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
159+
target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
147160
target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
148161
target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
149162
target_include_directories(keyvicompiler PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
@@ -152,7 +165,7 @@ install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL)
152165

153166
# keyviinspector
154167
add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp)
155-
target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
168+
target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
156169
target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
157170
target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
158171
target_include_directories(keyviinspector PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
@@ -161,7 +174,7 @@ install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL)
161174

162175
# keyvimerger
163176
add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp)
164-
target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
177+
target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
165178
target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
166179
target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
167180
target_include_directories(keyvimerger PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
@@ -170,15 +183,15 @@ install (TARGETS keyvimerger DESTINATION bin COMPONENT applications)
170183

171184
# keyvi_c
172185
add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp)
173-
target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
186+
target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
174187
target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
175188
target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
176189
target_include_directories(keyvi_c PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
177190

178191
# unit tests
179192
FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp)
180193
add_executable(unit_test_all ${UNIT_TEST_SOURCES})
181-
target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
194+
target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
182195
target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
183196
target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
184197
target_include_directories(unit_test_all PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
@@ -224,7 +237,7 @@ add_library(keyvi INTERFACE)
224237

225238
target_include_directories(keyvi INTERFACE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
226239
target_compile_definitions(keyvi INTERFACE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
227-
target_link_libraries(keyvi INTERFACE ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
240+
target_link_libraries(keyvi INTERFACE ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
228241

229242
### docs
230243

cmake_modules/FindZSTD.cmake

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Find Zstd, a compression library
2+
find_package(PkgConfig)
3+
4+
pkg_check_modules(ZSTD_PKGCONF libzstd)
5+
6+
find_path(ZSTD_INCLUDE_DIRS
7+
NAMES zstd.h
8+
PATHS ${ZSTD_PKGCONF_INCLUDE_DIRS}
9+
)
10+
11+
find_library(ZSTD_LIBRARIES
12+
NAMES zstd
13+
PATHS ${ZSTD_PKGCONF_LIBRARY_DIRS}
14+
)
15+
16+
include(FindPackageHandleStandardArgs)
17+
find_package_handle_standard_args(ZSTD DEFAULT_MSG ZSTD_INCLUDE_DIRS ZSTD_LIBRARIES)
18+
19+
mark_as_advanced(ZSTD_INCLUDE_DIRS ZSTD_LIBRARIES)

keyvi/include/keyvi/compression/compression_selector.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "keyvi/compression/compression_strategy.h"
3434
#include "keyvi/compression/snappy_compression_strategy.h"
3535
#include "keyvi/compression/zlib_compression_strategy.h"
36+
#include "keyvi/compression/zstd_compression_strategy.h"
3637

3738
// #define ENABLE_TRACING
3839
#include "keyvi/dictionary/util/trace.h"
@@ -51,6 +52,8 @@ inline CompressionStrategy* compression_strategy(const std::string& name = "") {
5152
return new ZlibCompressionStrategy(); // compression level?
5253
} else if (lower_name == "snappy") {
5354
return new SnappyCompressionStrategy();
55+
} else if (lower_name == "zstd") {
56+
return new ZstdCompressionStrategy();
5457
} else if (lower_name == "" || lower_name == "none" || lower_name == "raw") {
5558
return new RawCompressionStrategy();
5659
} else {
@@ -72,6 +75,9 @@ inline decompress_func_t decompressor_by_code(const std::string& s) {
7275
case SNAPPY_COMPRESSION:
7376
TRACE("unpack snappy compressed string");
7477
return SnappyCompressionStrategy::DoDecompress;
78+
case ZSTD_COMPRESSION:
79+
TRACE("unpack zstd compressed string");
80+
return ZstdCompressionStrategy::DoDecompress;
7581
default:
7682
throw std::invalid_argument("Invalid compression code " +
7783
boost::lexical_cast<std::string>(static_cast<int>(s[0])));

keyvi/include/keyvi/compression/compression_strategy.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,16 @@
2929
#include <string>
3030
#include <vector>
3131

32+
#include "keyvi/dictionary/fsa/internal/constants.h"
33+
3234
namespace keyvi {
3335
namespace compression {
3436

3537
enum CompressionCode {
3638
NO_COMPRESSION = 0,
3739
ZLIB_COMPRESSION = 1,
3840
SNAPPY_COMPRESSION = 2,
41+
ZSTD_COMPRESSION = 3,
3942
};
4043

4144
// buffer type which is realloc-able
@@ -69,6 +72,9 @@ struct CompressionStrategy {
6972

7073
/** The "name" of the compression strategy. */
7174
virtual std::string name() const = 0;
75+
76+
/** The minimum version this compressor requires */
77+
virtual uint64_t GetFileVersionMin() const = 0;
7278
};
7379

7480
/**
@@ -95,6 +101,8 @@ struct RawCompressionStrategy final : public CompressionStrategy {
95101
static inline std::string DoDecompress(const std::string& compressed) { return compressed.substr(1); }
96102

97103
std::string name() const { return "raw"; }
104+
105+
uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }
98106
};
99107

100108
} /* namespace compression */

keyvi/include/keyvi/compression/snappy_compression_strategy.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <string>
3131

3232
#include "keyvi/compression/compression_strategy.h"
33+
#include "keyvi/dictionary/fsa/internal/constants.h"
3334

3435
namespace keyvi {
3536
namespace compression {
@@ -61,6 +62,8 @@ struct SnappyCompressionStrategy final : public CompressionStrategy {
6162
}
6263

6364
std::string name() const { return "snappy"; }
65+
66+
uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }
6467
};
6568

6669
} /* namespace compression */

keyvi/include/keyvi/compression/zlib_compression_strategy.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <string>
3232

3333
#include "keyvi/compression/compression_strategy.h"
34+
#include "keyvi/dictionary/fsa/internal/constants.h"
3435

3536
// #define ENABLE_TRACING
3637
#include "keyvi/dictionary/util/trace.h"
@@ -131,6 +132,8 @@ struct ZlibCompressionStrategy final : public CompressionStrategy {
131132

132133
std::string name() const { return "zlib"; }
133134

135+
uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }
136+
134137
private:
135138
z_stream zstream_compress_;
136139
};
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/* * keyvi - A key value store.
2+
*
3+
* Copyright 2015 Hendrik Muhs<[email protected]>
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
/*
19+
* zstd_compression_strategy.h
20+
*
21+
* Created on: September 10, 2016
22+
* Author: Hendrik Muhs<[email protected]>
23+
*/
24+
25+
#ifndef KEYVI_COMPRESSION_ZSTD_COMPRESSION_STRATEGY_H_
26+
#define KEYVI_COMPRESSION_ZSTD_COMPRESSION_STRATEGY_H_
27+
28+
#include <zstd.h>
29+
30+
#include <string>
31+
32+
#include "keyvi/dictionary/fsa/internal/constants.h"
33+
34+
#ifndef ZSTD_DEFAULT_CLEVEL
35+
36+
/*-===== Pre-defined compression levels =====-*/
37+
#define ZSTD_DEFAULT_CLEVEL 3
38+
#define ZSTD_MAX_CLEVEL 22
39+
#endif
40+
41+
#include "keyvi/compression/compression_strategy.h"
42+
43+
// #define ENABLE_TRACING
44+
#include "keyvi/dictionary/util/trace.h"
45+
46+
namespace keyvi {
47+
namespace compression {
48+
49+
/** A compression strategy that wraps zlib. */
50+
struct ZstdCompressionStrategy final : public CompressionStrategy {
51+
ZstdCompressionStrategy(int compression_level = ZSTD_DEFAULT_CLEVEL) : compression_level_(compression_level) {}
52+
53+
inline void Compress(buffer_t* buffer, const char* raw, size_t raw_size) { DoCompress(buffer, raw, raw_size); }
54+
55+
inline void DoCompress(buffer_t* buffer, const char* raw, size_t raw_size) {
56+
size_t output_length = ZSTD_compressBound(raw_size);
57+
buffer->resize(output_length + 1);
58+
buffer->data()[0] = static_cast<char>(ZSTD_COMPRESSION);
59+
60+
output_length = ZSTD_compress(buffer->data() + 1, output_length, raw, raw_size, compression_level_);
61+
buffer->resize(output_length + 1);
62+
}
63+
64+
inline std::string Decompress(const std::string& compressed) { return DoDecompress(compressed); }
65+
66+
static std::string DoDecompress(const std::string& compressed) {
67+
std::string uncompressed;
68+
69+
size_t dest_size = ZSTD_getFrameContentSize(&compressed.data()[1], compressed.size() - 1);
70+
uncompressed.resize(dest_size);
71+
ZSTD_decompress(&uncompressed[0], dest_size, &compressed.data()[1], compressed.size() - 1);
72+
73+
return uncompressed;
74+
}
75+
76+
std::string name() const { return "zstd"; }
77+
78+
uint64_t GetFileVersionMin() const { return 3; }
79+
80+
private:
81+
int compression_level_;
82+
};
83+
84+
} /* namespace compression */
85+
} /* namespace keyvi */
86+
87+
#endif // KEYVI_COMPRESSION_ZSTD_COMPRESSION_STRATEGY_H_

keyvi/include/keyvi/dictionary/dictionary.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class Dictionary final {
7272

7373
uint64_t GetSize() const { return fsa_->GetNumberOfKeys(); }
7474

75+
uint64_t GetVersion() const { return fsa_->GetVersion(); }
76+
7577
/**
7678
* A simple Contains method to check whether a key is in the dictionary.
7779
*

keyvi/include/keyvi/dictionary/dictionary_merger.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@ class DictionaryMerger final {
204204
}
205205

206206
void CompleteMerge() {
207-
ValueStoreMergeT* value_store = new ValueStoreMergeT(params_);
207+
ValueStoreMergeT* value_store = new ValueStoreMergeT(inputFiles_, params_);
208+
208209
generator_ =
209210
GeneratorAdapter::template CreateGenerator<keyvi::dictionary::fsa::internal::SparseArrayPersistence<uint16_t>>(
210211
GetTotalSparseArraySize(), params_, value_store);

keyvi/include/keyvi/dictionary/dictionary_properties.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ class DictionaryProperties {
147147

148148
const std::string& GetSpecializedDictionaryProperties() const { return specialized_dictionary_properties_; }
149149

150+
uint64_t GetVersion() const { return version_; }
151+
150152
std::string GetStatistics() const {
151153
rapidjson::StringBuffer string_buffer;
152154
rapidjson::Writer<rapidjson::StringBuffer> writer(string_buffer);

0 commit comments

Comments
 (0)