1
1
import std/ bitops
2
2
3
- import ../ nint128_bitwise, ../ nint128_cast, ../ nint128_comparisons, ../ nint128_types, ../ nint128_cint128
3
+ import ../ nint128_bitops, ../ nint128_bitwise, ../ nint128_cast, ../ nint128_comparisons, ../ nint128_types, ../ nint128_cint128
4
4
5
- import ./ minus, ./ multiplication, ./ subtraction
5
+ import ./ addition, ./ minus, ./ multiplication, ./ subtraction
6
6
7
7
include ../ vendor/ stint/ div2n1n
8
8
@@ -12,7 +12,7 @@ template deltaShiftLimit(x: int): int =
12
12
when defined(amd64): 13
13
13
elif defined(i386): 7
14
14
else : 8
15
-
15
+
16
16
# 128div128
17
17
elif x == 1 :
18
18
when defined(amd64): 16
@@ -49,7 +49,7 @@ func nimDivModImpl(x, y: UInt128, remainder: var UInt128): UInt128 {.inline.} =
49
49
return
50
50
51
51
let divisor_clz = countLeadingZeroBits(divisor.lo)
52
-
52
+
53
53
if dividend.hi < divisor.lo:
54
54
shift = 64 + divisor_clz - countLeadingZeroBits(dividend.hi)
55
55
@@ -208,8 +208,7 @@ func divmod*(x, y: Int128): tuple[q, r: Int128] =
208
208
else :
209
209
nimDivMod(x, y)
210
210
211
- func div128by64to64* (x: UInt128, y: uint64 , remainder: var uint64 ): uint64
212
- {.inline.} =
211
+ func div128by64to64* (x: UInt128, y: uint64 , remainder: var uint64 ): uint64 =
213
212
# Divides 128 by 64, if the high part of the dividend is less than the divisor
214
213
# asm divq is more slow on my 4th generation i7
215
214
var
@@ -236,9 +235,9 @@ func cDiv(x, y: UInt128): UInt128 {.inline, used.} =
236
235
237
236
result = cast [UInt128](r)
238
237
239
- func `div` * (x, y: UInt128): UInt128 {.inline.} =
238
+ func `div` * (x, y: UInt128): UInt128 =
240
239
var remainder: UInt128 # Discarded
241
-
240
+
242
241
when nimvm :
243
242
result = nimDivModImpl(x, y, remainder)
244
243
else :
@@ -258,7 +257,7 @@ func cDiv(x, y: Int128): Int128 {.inline, used.} =
258
257
259
258
result = cast [Int128](r)
260
259
261
- func `div` * (x, y: Int128): Int128 {.inline.} =
260
+ func `div` * (x, y: Int128): Int128 =
262
261
when nimvm :
263
262
nimDivMod(x, y).q
264
263
else :
@@ -278,7 +277,7 @@ func cMod(x, y: UInt128): UInt128 {.inline, used.} =
278
277
279
278
result = cast [UInt128](r)
280
279
281
- func `mod` * (x, y: UInt128): UInt128 {.inline.} =
280
+ func `mod` * (x, y: UInt128): UInt128 =
282
281
when nimvm :
283
282
discard nimDivModImpl(x, y, result )
284
283
else :
@@ -298,11 +297,178 @@ func cMod(x, y: Int128): Int128 {.inline, used.} =
298
297
299
298
result = cast [Int128](r)
300
299
301
- func `mod` * (x, y: Int128): Int128 {.inline.} =
300
+ func `mod` * (x, y: Int128): Int128 =
302
301
when nimvm :
303
302
nimDivMod(x, y).r
304
303
else :
305
304
when shouldUseCInt128(" cmod" ):
306
305
cMod(x, y)
307
306
else :
308
307
nimDivMod(x, y).r
308
+
309
+ # Static div 128
310
+ # Based on libdivide https://github.com/ridiculousfish/libdivide
311
+
312
+ func div256by128to128(n_hi, n_lo, d: UInt128, clz: int , r: var UInt128): UInt128 =
313
+ const
314
+ size = 128
315
+ halfSize = size div 2
316
+ halfMask = (one(UInt128) shl halfSize) - one(UInt128)
317
+
318
+ if n_hi >= d:
319
+ r = high(UInt128)
320
+ return high(UInt128)
321
+
322
+ let
323
+ n_hi = n_hi shl clz
324
+ d = d shl clz
325
+
326
+ template halfQR(n_hi, n_lo, d, d_hi, d_lo: UInt128): tuple [q, r: UInt128] =
327
+
328
+ var (q, r) = divmod(n_hi, d_hi)
329
+ let m = q * d_lo
330
+ r = (r shl halfSize) or n_lo
331
+
332
+ # Fix the reminder, we're at most 2 iterations off
333
+ if r < m:
334
+ q = q - 1 'u64
335
+ r += d
336
+ if r >= d and r < m:
337
+ q = q - 1 'u64
338
+ r += d
339
+ r -= m
340
+ (q, r)
341
+
342
+ let
343
+ d_hi = d shr halfSize
344
+ d_lo = d and halfMask
345
+
346
+ # First half of the quotient
347
+ let (q1, r1) = halfQR(n_hi, zero(UInt128), d, d_hi, d_lo)
348
+
349
+ # Second half
350
+ let (q2, r2) = halfQR(r1, zero(UInt128), d, d_hi, d_lo)
351
+
352
+ result = (q1 shl halfSize) or q2
353
+ r = r2 shr clz
354
+
355
+ func genStaticDivU128(d: UInt128): tuple [magic: UInt128, more: int ] =
356
+ if (d == zero(UInt128)):
357
+ raise newException(DivByZeroDefect, " divider must be != 0" )
358
+
359
+ let
360
+ clz = countLeadingZeroBits(d)
361
+ floor_log_2_d = 127 - clz
362
+
363
+ if (d and (d - 1 )) == zero(UInt128):
364
+ result .magic = zero(UInt128)
365
+ result .more = floor_log_2_d
366
+ else :
367
+ var rem, proposed_m: UInt128
368
+
369
+ proposed_m = div256by128to128(one(UInt128) shl floor_log_2_d, zero(UInt128), d, clz, rem)
370
+
371
+ let e = d - rem
372
+
373
+ if e < (one(UInt128) shl floor_log_2_d):
374
+ result .more = floor_log_2_d
375
+ else :
376
+ proposed_m += proposed_m
377
+ let twice_rem = rem + rem
378
+ if twice_rem >= d or twice_rem < rem:
379
+ proposed_m += 1 'u64
380
+
381
+ result .more = floor_log_2_d or 0x 80
382
+
383
+ result .magic = proposed_m + 1 'u64
384
+
385
+ func `div` * (x: UInt128, y: static [UInt128]): UInt128 {.inline.} =
386
+ const (magic, more) = genStaticDivU128(y)
387
+
388
+ when magic == zero(UInt128):
389
+ result = x shr more
390
+ else :
391
+ var q: UInt128
392
+
393
+ discard mul128by128ToTwo128(magic, x, q)
394
+
395
+ when (more and 0x 80 ) > 0 :
396
+ let t = ((x - q) shr 1 ) + q
397
+ result = t shr (more and 0x 7F )
398
+ else :
399
+ result = q shr more
400
+
401
+ func `mod` * (x: UInt128, y: static [UInt128]): UInt128 {.inline.} =
402
+ if x < y:
403
+ result = x
404
+ else :
405
+ let q = x div y
406
+
407
+ result = x - (y * q)
408
+
409
+ #[ Under construction
410
+ func genStaticDivS128(d: Int128): tuple[magic: Int128, more: int] =
411
+ if (d == zero(Int128)):
412
+ raise newException(DivByZeroDefect, "divider must be != 0")
413
+
414
+ let
415
+ negativeDivisor = if d.hi < 0: true else: false
416
+ absD = if negativeDivisor: -nint128Cast[UInt128](d)
417
+ else: nint128Cast[UInt128](d)
418
+ clz = countLeadingZeroBits(absD)
419
+ floor_log_2_d = 127 - clz
420
+
421
+ if (absD and (absD - 1)) == zero(UInt128):
422
+ result.magic = zero(Int128)
423
+ result.more = floor_log_2_d
424
+ else:
425
+ var rem, proposed_m: UInt128
426
+
427
+ proposed_m = div256by128to128(one(UInt128) shl (floor_log_2_d - 1), zero(UInt128), absD, clz, rem)
428
+
429
+ let e = absD - rem
430
+
431
+ if e < (one(UInt128) shl floor_log_2_d):
432
+ result.more = floor_log_2_d - 1
433
+ else:
434
+ proposed_m += proposed_m
435
+ let twice_rem = rem + rem
436
+ if twice_rem >= absD or twice_rem < rem:
437
+ proposed_m += 1'u64
438
+
439
+ result.more = floor_log_2_d or 0x80
440
+
441
+ result.magic = nint128Cast[Int128](proposed_m + 1'u64)
442
+
443
+ if negativeDivisor:
444
+ result.more = result.more or 0x100 # Mark if negative
445
+ result.magic = -result.magic
446
+
447
+ func `div`*(x: Int128, y: static[Int128]): Int128 {.inline.} =
448
+ const
449
+ (magic, more) = genStaticDivS128(y)
450
+ shift = more and 127
451
+ sign = if more and 0x100: one(Int128)
452
+ else: zero(Int128)
453
+
454
+ when magic == zero(Int128):
455
+ const mask = (one(UInt128) shl shift) - one(UInt128)
456
+
457
+ let uq = cast[UInt128](x) + (cast[UInt128](x shr 127) and mask)
458
+
459
+ result = cast[Int128](uq)
460
+
461
+ result = result shr shift
462
+ result = (result xor sign) - sign
463
+ #[ else:
464
+ var q: UInt128
465
+
466
+ discard mul128by128ToTwo128(magic, x, q)
467
+
468
+ when (more and 0x80) > 0:
469
+ let t = ((x - q) shr 1) + q
470
+ result = t shr (more and 0x7F)
471
+ else:
472
+ result = q shr more
473
+ ]#
474
+ ]#
0 commit comments