Skip to content

Commit f0579d4

Browse files
support data compression & delta-encoding of posting lists (microsoft#297)
* MVP version of Data compress (microsoft#1) * Support delta-encoding (microsoft#2) * support config of compress level (microsoft#3) * integrate zstd with cmake (microsoft#4) * Bug Fix: wrong listPageCount when listTotalBytes % pageSize==0 (microsoft#5) * train & share dictionary (microsoft#6) * rearrange posting list (microsoft#7) * config minDictTraingBufferSize and dictBufferCapacity (microsoft#8) * cmake with local installed zstd (microsoft#9) * refine visual studio config (microsoft#10) * parallel for get compressed size (microsoft#11) * fix check truth bug (microsoft#12) * change zstd branch (microsoft#13) * remove verbose log in truth analysis; refine dockerfile * check rvalue in Compressor.h * resolve back compatibility by add a search option: WithDataCompressionFeatures * remove redundant configs, change head info format only when compression enabled * reuse buffer when decompression * remove exit(1) from search index * bug fix: reuse compression buffer in workspace # with '#' will be ignored, and an empty message aborts the commit. * code refine * optimize mem usage * replace vcpkg zstd with nuget * config issue * fix solution build issue Co-authored-by: Philip Adams <[email protected]>
1 parent f061ca6 commit f0579d4

32 files changed

+648
-67
lines changed

.gitmodules

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[submodule "ThirdParty/zstd"]
2+
path = ThirdParty/zstd
3+
url = https://github.com/facebook/zstd
4+
branch = release

AnnService/Aggregator.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@
165165
<Import Project="..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets" Condition="Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" />
166166
<Import Project="..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets" Condition="Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" />
167167
<Import Project="..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets" Condition="Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" />
168+
<Import Project="..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets" Condition="Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" />
168169
</ImportGroup>
169170
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
170171
<PropertyGroup>
@@ -177,5 +178,6 @@
177178
<Error Condition="!Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets'))" />
178179
<Error Condition="!Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets'))" />
179180
<Error Condition="!Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets'))" />
181+
<Error Condition="!Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets'))" />
180182
</Target>
181183
</Project>

AnnService/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
# Licensed under the MIT License.
33

44
set(AnnService ${PROJECT_SOURCE_DIR}/AnnService)
5+
set(Zstd ${PROJECT_SOURCE_DIR}/ThirdParty/zstd)
56

67
include_directories(${AnnService})
8+
include_directories(${Zstd}/lib)
79

810
file(GLOB_RECURSE HDR_FILES ${AnnService}/inc/Core/*.h ${AnnService}/inc/Helper/*.h)
911
file(GLOB_RECURSE SRC_FILES ${AnnService}/src/Core/*.cpp ${AnnService}/src/Helper/*.cpp)
@@ -32,9 +34,9 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
3234
endif()
3335

3436
add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES})
35-
target_link_libraries (SPTAGLib DistanceUtils)
37+
target_link_libraries (SPTAGLib DistanceUtils libzstd_shared)
3638
add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES})
37-
target_link_libraries (SPTAGLibStatic DistanceUtils)
39+
target_link_libraries (SPTAGLibStatic DistanceUtils libzstd_static)
3840
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
3941
target_compile_options(SPTAGLibStatic PRIVATE -fPIC)
4042
endif()

AnnService/Client.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@
132132
<Import Project="..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets" Condition="Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" />
133133
<Import Project="..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets" Condition="Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" />
134134
<Import Project="..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets" Condition="Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" />
135+
<Import Project="..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets" Condition="Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" />
135136
</ImportGroup>
136137
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
137138
<PropertyGroup>
@@ -144,5 +145,6 @@
144145
<Error Condition="!Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets'))" />
145146
<Error Condition="!Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets'))" />
146147
<Error Condition="!Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets'))" />
148+
<Error Condition="!Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets'))" />
147149
</Target>
148150
</Project>

AnnService/CoreLibrary.vcxproj

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@
160160
<ClInclude Include="inc\Core\MetadataSet.h" />
161161
<ClInclude Include="inc\Core\SearchQuery.h" />
162162
<ClInclude Include="inc\Core\SearchResult.h" />
163+
<ClInclude Include="inc\Core\SPANN\Compressor.h" />
163164
<ClInclude Include="inc\Core\SPANN\ExtraFullGraphSearcher.h" />
164165
<ClInclude Include="inc\Core\SPANN\IExtraSearcher.h" />
165166
<ClInclude Include="inc\Core\SPANN\Index.h" />
@@ -219,5 +220,12 @@
219220
</ItemGroup>
220221
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
221222
<ImportGroup Label="ExtensionTargets">
223+
<Import Project="..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets" Condition="Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" />
222224
</ImportGroup>
225+
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
226+
<PropertyGroup>
227+
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
228+
</PropertyGroup>
229+
<Error Condition="!Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets'))" />
230+
</Target>
223231
</Project>

AnnService/CoreLibrary.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@
214214
<ClInclude Include="inc\Core\Common\OPQQuantizer.h">
215215
<Filter>Header Files\Core\Common</Filter>
216216
</ClInclude>
217+
<ClInclude Include="inc\Core\SPANN\Compressor.h">
218+
<Filter>Header Files\Core\SPANN</Filter>
219+
</ClInclude>
217220
</ItemGroup>
218221
<ItemGroup>
219222
<ClCompile Include="src\Core\VectorIndex.cpp">

AnnService/IndexBuilder.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@
155155
<Import Project="..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets" Condition="Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" />
156156
<Import Project="..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets" Condition="Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" />
157157
<Import Project="..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets" Condition="Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" />
158+
<Import Project="..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets" Condition="Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" />
158159
</ImportGroup>
159160
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
160161
<PropertyGroup>
@@ -167,5 +168,6 @@
167168
<Error Condition="!Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets'))" />
168169
<Error Condition="!Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets'))" />
169170
<Error Condition="!Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets'))" />
171+
<Error Condition="!Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets'))" />
170172
</Target>
171173
</Project>

AnnService/IndexSearcher.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@
156156
<Import Project="..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets" Condition="Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" />
157157
<Import Project="..\packages\boost_regex-vc142.1.72.0.0\build\boost_regex-vc142.targets" Condition="Exists('..\packages\boost_regex-vc142.1.72.0.0\build\boost_regex-vc142.targets')" />
158158
<Import Project="..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets" Condition="Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" />
159+
<Import Project="..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets" Condition="Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" />
159160
</ImportGroup>
160161
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
161162
<PropertyGroup>
@@ -168,5 +169,6 @@
168169
<Error Condition="!Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets'))" />
169170
<Error Condition="!Exists('..\packages\boost_regex-vc142.1.72.0.0\build\boost_regex-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_regex-vc142.1.72.0.0\build\boost_regex-vc142.targets'))" />
170171
<Error Condition="!Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets'))" />
172+
<Error Condition="!Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets'))" />
171173
</Target>
172174
</Project>

AnnService/Quantizer.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
<Import Project="..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets" Condition="Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" />
172172
<Import Project="..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets" Condition="Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" />
173173
<Import Project="..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets" Condition="Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" />
174+
<Import Project="..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets" Condition="Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" />
174175
</ImportGroup>
175176
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
176177
<PropertyGroup>
@@ -183,5 +184,6 @@
183184
<Error Condition="!Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets'))" />
184185
<Error Condition="!Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets'))" />
185186
<Error Condition="!Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets'))" />
187+
<Error Condition="!Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets'))" />
186188
</Target>
187189
</Project>

AnnService/SSDServing.vcxproj

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<?xml version="1.0" encoding="utf-8"?>
1+
<?xml version="1.0" encoding="utf-8"?>
22
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
33
<ItemGroup Label="ProjectConfigurations">
44
<ProjectConfiguration Include="Debug|Win32">
@@ -27,6 +27,9 @@
2727
<ClInclude Include="inc\SSDServing\SSDIndex.h" />
2828
<ClInclude Include="inc\SSDServing\Utils.h" />
2929
</ItemGroup>
30+
<ItemGroup>
31+
<None Include="packages.config" />
32+
</ItemGroup>
3033
<PropertyGroup Label="Globals">
3134
<VCProjectVersion>15.0</VCProjectVersion>
3235
<ProjectGuid>{217B42B7-8F2B-4323-804C-08992CA2F65E}</ProjectGuid>
@@ -177,6 +180,7 @@
177180
<Import Project="..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets" Condition="Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" />
178181
<Import Project="..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets" Condition="Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" />
179182
<Import Project="..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets" Condition="Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" />
183+
<Import Project="..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets" Condition="Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" />
180184
</ImportGroup>
181185
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
182186
<PropertyGroup>
@@ -189,5 +193,6 @@
189193
<Error Condition="!Exists('..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc142.1.72.0.0\build\boost_system-vc142.targets'))" />
190194
<Error Condition="!Exists('..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc142.1.72.0.0\build\boost_thread-vc142.targets'))" />
191195
<Error Condition="!Exists('..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc142.1.72.0.0\build\boost_wserialization-vc142.targets'))" />
196+
<Error Condition="!Exists('..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Zstandard.dyn.x64.1.4.0\build\native\Zstandard.dyn.x64.targets'))" />
192197
</Target>
193198
</Project>

0 commit comments

Comments
 (0)