|
1 | 1 | using System; |
| 2 | +using System.Collections.Generic; |
2 | 3 | using System.Linq; |
3 | 4 | using Apache.Arrow; |
4 | 5 | using NUnit.Framework; |
| 6 | +using ParquetSharp.Dataset.Filter; |
5 | 7 |
|
6 | 8 | namespace ParquetSharp.Dataset.Test.Filter; |
7 | 9 |
|
@@ -128,6 +130,33 @@ public void TestComputeIntRangeMask((long, long) filterRange) |
128 | 130 | TestComputeIntRangeMask<ulong, UInt64Array, UInt64Array.Builder>(rangeStart, rangeEnd, ULongValues, val => checked((long)val)); |
129 | 131 | } |
130 | 132 |
|
| 133 | + [Theory] |
| 134 | + public void TestIntEqualityIncludeRowGroup(long filterValue) |
| 135 | + { |
| 136 | + TestIntEqualityIncludeRowGroup(filterValue, SByteValues, val => val); |
| 137 | + TestIntEqualityIncludeRowGroup(filterValue, ShortValues, val => val); |
| 138 | + TestIntEqualityIncludeRowGroup(filterValue, IntValues, val => val); |
| 139 | + TestIntEqualityIncludeRowGroup(filterValue, LongValues, val => val); |
| 140 | + TestIntEqualityIncludeRowGroup(filterValue, ByteValues, val => val); |
| 141 | + TestIntEqualityIncludeRowGroup(filterValue, UShortValues, val => val); |
| 142 | + TestIntEqualityIncludeRowGroup(filterValue, UIntValues, val => val); |
| 143 | + TestIntEqualityIncludeRowGroup(filterValue, ULongValues, val => checked((long)val)); |
| 144 | + } |
| 145 | + |
| 146 | + [Theory] |
| 147 | + public void TestIntRangeIncludeRowGroup((long, long) filterRange) |
| 148 | + { |
| 149 | + var (rangeStart, rangeEnd) = filterRange; |
| 150 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, SByteValues, val => val); |
| 151 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, ShortValues, val => val); |
| 152 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, IntValues, val => val); |
| 153 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, LongValues, val => val); |
| 154 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, ByteValues, val => val); |
| 155 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, UShortValues, val => val); |
| 156 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, UIntValues, val => val); |
| 157 | + TestIntRangeIncludeRowGroup(rangeStart, rangeEnd, ULongValues, val => checked((long)val)); |
| 158 | + } |
| 159 | + |
131 | 160 | private static void TestComputeIntEqualityMask<T, TArray, TBuilder>(long filterValue, T[] values, Func<T, long> checkedCast) |
132 | 161 | where T : struct |
133 | 162 | where TArray : PrimitiveArray<T> |
@@ -203,6 +232,104 @@ private static void TestComputeIntRangeMask<T, TArray, TBuilder>(long rangeStart |
203 | 232 | } |
204 | 233 | } |
205 | 234 |
|
| 235 | + private static void TestIntEqualityIncludeRowGroup<T>(long filterValue, T[] values, Func<T, long> checkedCast) |
| 236 | + where T : IComparable<T> |
| 237 | + { |
| 238 | + var filter = Col.Named("x").IsEqualTo(filterValue); |
| 239 | + |
| 240 | + var statsRanges = values |
| 241 | + .SelectMany(min => values.Select(max => (min, max))) |
| 242 | + .Where(range => range.max.CompareTo(range.min) >= 0) |
| 243 | + .ToArray(); |
| 244 | + foreach (var statsRange in statsRanges) |
| 245 | + { |
| 246 | + var rowGroupStats = new Dictionary<string, LogicalStatistics> |
| 247 | + { |
| 248 | + { "x", new LogicalStatistics<T>(statsRange.min, statsRange.max) } |
| 249 | + }; |
| 250 | + |
| 251 | + var filterValueInRange = true; |
| 252 | + try |
| 253 | + { |
| 254 | + var longMin = checkedCast(statsRange.min); |
| 255 | + if (filterValue < longMin) |
| 256 | + { |
| 257 | + filterValueInRange = false; |
| 258 | + } |
| 259 | + } |
| 260 | + catch (OverflowException) |
| 261 | + { |
| 262 | + filterValueInRange = false; |
| 263 | + } |
| 264 | + |
| 265 | + try |
| 266 | + { |
| 267 | + var longMax = checkedCast(statsRange.max); |
| 268 | + if (filterValue > longMax) |
| 269 | + { |
| 270 | + filterValueInRange = false; |
| 271 | + } |
| 272 | + } |
| 273 | + catch (OverflowException) |
| 274 | + { |
| 275 | + } |
| 276 | + |
| 277 | + var includeRowGroup = filter.IncludeRowGroup(rowGroupStats); |
| 278 | + Assert.That( |
| 279 | + includeRowGroup, Is.EqualTo(filterValueInRange), |
| 280 | + $"Expected {typeof(T)} stats range [{statsRange.min}, {statsRange.max}] inclusion to be {filterValueInRange}"); |
| 281 | + } |
| 282 | + } |
| 283 | + |
| 284 | + private static void TestIntRangeIncludeRowGroup<T>(long rangeStart, long rangeEnd, T[] values, Func<T, long> checkedCast) |
| 285 | + where T : IComparable<T> |
| 286 | + { |
| 287 | + var filter = Col.Named("x").IsInRange(rangeStart, rangeEnd); |
| 288 | + |
| 289 | + var statsRanges = values |
| 290 | + .SelectMany(min => values.Select(max => (min, max))) |
| 291 | + .Where(range => range.max.CompareTo(range.min) >= 0) |
| 292 | + .ToArray(); |
| 293 | + foreach (var statsRange in statsRanges) |
| 294 | + { |
| 295 | + var rowGroupStats = new Dictionary<string, LogicalStatistics> |
| 296 | + { |
| 297 | + { "x", new LogicalStatistics<T>(statsRange.min, statsRange.max) } |
| 298 | + }; |
| 299 | + |
| 300 | + var rangesOverlap = true; |
| 301 | + try |
| 302 | + { |
| 303 | + var longMin = checkedCast(statsRange.min); |
| 304 | + if (longMin > rangeEnd) |
| 305 | + { |
| 306 | + rangesOverlap = false; |
| 307 | + } |
| 308 | + } |
| 309 | + catch (OverflowException) |
| 310 | + { |
| 311 | + rangesOverlap = false; |
| 312 | + } |
| 313 | + |
| 314 | + try |
| 315 | + { |
| 316 | + var longMax = checkedCast(statsRange.max); |
| 317 | + if (longMax < rangeStart) |
| 318 | + { |
| 319 | + rangesOverlap = false; |
| 320 | + } |
| 321 | + } |
| 322 | + catch (OverflowException) |
| 323 | + { |
| 324 | + } |
| 325 | + |
| 326 | + var includeRowGroup = filter.IncludeRowGroup(rowGroupStats); |
| 327 | + Assert.That( |
| 328 | + includeRowGroup, Is.EqualTo(rangesOverlap), |
| 329 | + $"Expected {typeof(T)} stats range [{statsRange.min}, {statsRange.max}] inclusion to be {rangesOverlap}"); |
| 330 | + } |
| 331 | + } |
| 332 | + |
206 | 333 | private static TArray BuildArray<T, TArray, TBuilder>(T[] values) |
207 | 334 | where T : struct |
208 | 335 | where TArray : IArrowArray |
|
0 commit comments