Skip to content

Commit 20ed7be

Browse files
committed
div and mod with static divisor for UInt128
1 parent f88047b commit 20ed7be

16 files changed

+399
-46
lines changed

nint128.nimble

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Package
22

3-
version = "0.2.1"
3+
version = "0.3.0"
44
author = "rockcavera"
55
description = "128-bit integers"
66
license = "MIT"

src/nint128/arithmetic/division.nim

Lines changed: 177 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import std/bitops
22

3-
import ../nint128_bitwise, ../nint128_cast, ../nint128_comparisons, ../nint128_types, ../nint128_cint128
3+
import ../nint128_bitops, ../nint128_bitwise, ../nint128_cast, ../nint128_comparisons, ../nint128_types, ../nint128_cint128
44

5-
import ./minus, ./multiplication, ./subtraction
5+
import ./addition, ./minus, ./multiplication, ./subtraction
66

77
include ../vendor/stint/div2n1n
88

@@ -12,7 +12,7 @@ template deltaShiftLimit(x: int): int =
1212
when defined(amd64): 13
1313
elif defined(i386): 7
1414
else: 8
15-
15+
1616
# 128div128
1717
elif x == 1:
1818
when defined(amd64): 16
@@ -49,7 +49,7 @@ func nimDivModImpl(x, y: UInt128, remainder: var UInt128): UInt128 {.inline.} =
4949
return
5050

5151
let divisor_clz = countLeadingZeroBits(divisor.lo)
52-
52+
5353
if dividend.hi < divisor.lo:
5454
shift = 64 + divisor_clz - countLeadingZeroBits(dividend.hi)
5555

@@ -208,8 +208,7 @@ func divmod*(x, y: Int128): tuple[q, r: Int128] =
208208
else:
209209
nimDivMod(x, y)
210210

211-
func div128by64to64*(x: UInt128, y: uint64, remainder: var uint64): uint64
212-
{.inline.} =
211+
func div128by64to64*(x: UInt128, y: uint64, remainder: var uint64): uint64 =
213212
# Divides 128 by 64, if the high part of the dividend is less than the divisor
214213
# asm divq is more slow on my 4th generation i7
215214
var
@@ -236,9 +235,9 @@ func cDiv(x, y: UInt128): UInt128 {.inline, used.} =
236235

237236
result = cast[UInt128](r)
238237

239-
func `div`*(x, y: UInt128): UInt128 {.inline.} =
238+
func `div`*(x, y: UInt128): UInt128 =
240239
var remainder: UInt128 # Discarded
241-
240+
242241
when nimvm:
243242
result = nimDivModImpl(x, y, remainder)
244243
else:
@@ -258,7 +257,7 @@ func cDiv(x, y: Int128): Int128 {.inline, used.} =
258257

259258
result = cast[Int128](r)
260259

261-
func `div`*(x, y: Int128): Int128 {.inline.} =
260+
func `div`*(x, y: Int128): Int128 =
262261
when nimvm:
263262
nimDivMod(x, y).q
264263
else:
@@ -278,7 +277,7 @@ func cMod(x, y: UInt128): UInt128 {.inline, used.} =
278277

279278
result = cast[UInt128](r)
280279

281-
func `mod`*(x, y: UInt128): UInt128 {.inline.} =
280+
func `mod`*(x, y: UInt128): UInt128 =
282281
when nimvm:
283282
discard nimDivModImpl(x, y, result)
284283
else:
@@ -298,11 +297,178 @@ func cMod(x, y: Int128): Int128 {.inline, used.} =
298297

299298
result = cast[Int128](r)
300299

301-
func `mod`*(x, y: Int128): Int128 {.inline.} =
300+
func `mod`*(x, y: Int128): Int128 =
302301
when nimvm:
303302
nimDivMod(x, y).r
304303
else:
305304
when shouldUseCInt128("cmod"):
306305
cMod(x, y)
307306
else:
308307
nimDivMod(x, y).r
308+
309+
# Static div 128
310+
# Based on libdivide https://github.com/ridiculousfish/libdivide
311+
312+
func div256by128to128(n_hi, n_lo, d: UInt128, clz: int, r: var UInt128): UInt128 =
313+
const
314+
size = 128
315+
halfSize = size div 2
316+
halfMask = (one(UInt128) shl halfSize) - one(UInt128)
317+
318+
if n_hi >= d:
319+
r = high(UInt128)
320+
return high(UInt128)
321+
322+
let
323+
n_hi = n_hi shl clz
324+
d = d shl clz
325+
326+
template halfQR(n_hi, n_lo, d, d_hi, d_lo: UInt128): tuple[q, r: UInt128] =
327+
328+
var (q, r) = divmod(n_hi, d_hi)
329+
let m = q * d_lo
330+
r = (r shl halfSize) or n_lo
331+
332+
# Fix the reminder, we're at most 2 iterations off
333+
if r < m:
334+
q = q - 1'u64
335+
r += d
336+
if r >= d and r < m:
337+
q = q - 1'u64
338+
r += d
339+
r -= m
340+
(q, r)
341+
342+
let
343+
d_hi = d shr halfSize
344+
d_lo = d and halfMask
345+
346+
# First half of the quotient
347+
let (q1, r1) = halfQR(n_hi, zero(UInt128), d, d_hi, d_lo)
348+
349+
# Second half
350+
let (q2, r2) = halfQR(r1, zero(UInt128), d, d_hi, d_lo)
351+
352+
result = (q1 shl halfSize) or q2
353+
r = r2 shr clz
354+
355+
func genStaticDivU128(d: UInt128): tuple[magic: UInt128, more: int] =
356+
if (d == zero(UInt128)):
357+
raise newException(DivByZeroDefect, "divider must be != 0")
358+
359+
let
360+
clz = countLeadingZeroBits(d)
361+
floor_log_2_d = 127 - clz
362+
363+
if (d and (d - 1)) == zero(UInt128):
364+
result.magic = zero(UInt128)
365+
result.more = floor_log_2_d
366+
else:
367+
var rem, proposed_m: UInt128
368+
369+
proposed_m = div256by128to128(one(UInt128) shl floor_log_2_d, zero(UInt128), d, clz, rem)
370+
371+
let e = d - rem
372+
373+
if e < (one(UInt128) shl floor_log_2_d):
374+
result.more = floor_log_2_d
375+
else:
376+
proposed_m += proposed_m
377+
let twice_rem = rem + rem
378+
if twice_rem >= d or twice_rem < rem:
379+
proposed_m += 1'u64
380+
381+
result.more = floor_log_2_d or 0x80
382+
383+
result.magic = proposed_m + 1'u64
384+
385+
func `div`*(x: UInt128, y: static[UInt128]): UInt128 {.inline.} =
386+
const (magic, more) = genStaticDivU128(y)
387+
388+
when magic == zero(UInt128):
389+
result = x shr more
390+
else:
391+
var q: UInt128
392+
393+
discard mul128by128ToTwo128(magic, x, q)
394+
395+
when (more and 0x80) > 0:
396+
let t = ((x - q) shr 1) + q
397+
result = t shr (more and 0x7F)
398+
else:
399+
result = q shr more
400+
401+
func `mod`*(x: UInt128, y: static[UInt128]): UInt128 {.inline.} =
402+
if x < y:
403+
result = x
404+
else:
405+
let q = x div y
406+
407+
result = x - (y * q)
408+
409+
#[ Under construction
410+
func genStaticDivS128(d: Int128): tuple[magic: Int128, more: int] =
411+
if (d == zero(Int128)):
412+
raise newException(DivByZeroDefect, "divider must be != 0")
413+
414+
let
415+
negativeDivisor = if d.hi < 0: true else: false
416+
absD = if negativeDivisor: -nint128Cast[UInt128](d)
417+
else: nint128Cast[UInt128](d)
418+
clz = countLeadingZeroBits(absD)
419+
floor_log_2_d = 127 - clz
420+
421+
if (absD and (absD - 1)) == zero(UInt128):
422+
result.magic = zero(Int128)
423+
result.more = floor_log_2_d
424+
else:
425+
var rem, proposed_m: UInt128
426+
427+
proposed_m = div256by128to128(one(UInt128) shl (floor_log_2_d - 1), zero(UInt128), absD, clz, rem)
428+
429+
let e = absD - rem
430+
431+
if e < (one(UInt128) shl floor_log_2_d):
432+
result.more = floor_log_2_d - 1
433+
else:
434+
proposed_m += proposed_m
435+
let twice_rem = rem + rem
436+
if twice_rem >= absD or twice_rem < rem:
437+
proposed_m += 1'u64
438+
439+
result.more = floor_log_2_d or 0x80
440+
441+
result.magic = nint128Cast[Int128](proposed_m + 1'u64)
442+
443+
if negativeDivisor:
444+
result.more = result.more or 0x100 # Mark if negative
445+
result.magic = -result.magic
446+
447+
func `div`*(x: Int128, y: static[Int128]): Int128 {.inline.} =
448+
const
449+
(magic, more) = genStaticDivS128(y)
450+
shift = more and 127
451+
sign = if more and 0x100: one(Int128)
452+
else: zero(Int128)
453+
454+
when magic == zero(Int128):
455+
const mask = (one(UInt128) shl shift) - one(UInt128)
456+
457+
let uq = cast[UInt128](x) + (cast[UInt128](x shr 127) and mask)
458+
459+
result = cast[Int128](uq)
460+
461+
result = result shr shift
462+
result = (result xor sign) - sign
463+
#[else:
464+
var q: UInt128
465+
466+
discard mul128by128ToTwo128(magic, x, q)
467+
468+
when (more and 0x80) > 0:
469+
let t = ((x - q) shr 1) + q
470+
result = t shr (more and 0x7F)
471+
else:
472+
result = q shr more
473+
]#
474+
]#

src/nint128/arithmetic/multiplication.nim

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import ../nint128_cast, ../nint128_comparisons, ../nint128_types, ../nint128_cint128
1+
import ../nint128_bitwise, ../nint128_cast, ../nint128_comparisons, ../nint128_types, ../nint128_cint128
22

3-
import ./minus
3+
import ./addition, ./minus
44

55
when defined(amd64) and defined(vcc):
66
func umul128(a, b: uint64, hi: var uint64): uint64 {.importc: "_umul128",
@@ -117,7 +117,7 @@ func `*`*(a, b: UInt128): UInt128 {.inline.} =
117117

118118
func `*`*(a, b: Int128): Int128 {.inline.} =
119119
when nimvm:
120-
nimMul128by128(a, b)
120+
nimMul128by128(a, b)
121121
else:
122122
when shouldUseCInt128("cmul"):
123123
cMul128by128(a, b)
@@ -138,3 +138,68 @@ func `*`*(a: UInt128, b: uint64): UInt128 {.inline.} =
138138

139139
template `*`*(a: uint64, b: UInt128): UInt128 =
140140
b * a
141+
142+
func mul128by128ToTwo128*(a, b: UInt128, hi: var UInt128): UInt128 =
143+
## Returns the multiplication between `a` and `b`. `hi` is the overflow.
144+
var tmp = mul64by64To128(a.lo, b.lo)
145+
result.lo = tmp.lo
146+
tmp.lo = tmp.hi
147+
tmp.hi = 0'u64
148+
tmp += mul64by64To128(a.hi, b.lo)
149+
hi.lo = tmp.hi
150+
tmp.hi = 0'u64
151+
tmp += mul64by64To128(b.hi, a.lo)
152+
result.hi = tmp.lo
153+
hi += tmp.hi
154+
hi += mul64by64To128(a.hi, b.hi)
155+
156+
func mul128by128ToTwo128*(a, b: UInt128, hi, lo: var UInt128): bool {.inline.} =
157+
## Extended multiplication between two `UInt128` which returns `true` if overflow occurs. `lo` is
158+
## the result of multiplication and `hi` is the overflow.
159+
lo = mul128by128ToTwo128(a, b, hi)
160+
hi > zero(UInt128)
161+
162+
#[ Under construction
163+
func mul128by128ToTwo128*(a, b: Int128, hi, lo: var Int128): bool {.inline.} =
164+
## Extended multiplication between two `Int128` which returns `true` if overflow occurs. `lo` is
165+
## the result of multiplication and `hi` is the overflow.
166+
#[
167+
var uHi, uLo: UInt128
168+
result = mul128by128ToTwo128(nint128Cast[UInt128](a), nint128Cast[UInt128](b), uHi, uLo)
169+
170+
hi = nint128Cast[Int128](uHi)
171+
lo = nint128Cast[Int128](uLo)
172+
]#
173+
174+
var
175+
uHi, uLo: UInt128
176+
x = nint128Cast[UInt128](a)
177+
y = nint128Cast[UInt128](b)
178+
neg = false
179+
180+
if isNegative(a):
181+
x = -x
182+
neg = true
183+
184+
if isNegative(b):
185+
y = -y
186+
neg = true xor neg
187+
188+
result = mul128by128ToTwo128(x, y, uHi, uLo)
189+
lo = nint128Cast[Int128](uLo)
190+
hi = nint128Cast[Int128](uHi)
191+
192+
let loNegative = isNegative(lo)
193+
194+
result = result or loNegative
195+
lo.hi = lo.hi and 0x7FFFFFFFFFFFFFFF'i64 # clearBit(result.hi, 63)
196+
197+
if result:
198+
let bit = uint64(loNegative)
199+
200+
hi = hi shl 1
201+
hi.lo = hi.lo or bit
202+
203+
if neg:
204+
lo = -lo
205+
]#

src/nint128/arithmetic/subtraction.nim

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,21 @@ func `-`*(x, y: Int128): Int128 {.inline.} =
5757

5858
func `-=`*[T: SomeInt128](x: var T, y: T) {.inline.} =
5959
x = x - y
60+
61+
#
62+
# Possible addition to the package
63+
#
64+
65+
func `-`*(x: UInt128, y: uint64): UInt128 {.inline.} =
66+
result.lo = x.lo - y
67+
result.hi = x.hi - uint64(x.lo < y)
68+
69+
func `-`*(x: Int128, y: uint64): Int128 {.inline.} =
70+
result.lo = x.lo - y
71+
result.hi = x.hi - int64(x.lo < y)
72+
73+
func `-=`*[T: SomeInt128](x: var T, y: uint64) {.inline.} =
74+
x = x - y
75+
76+
template `-`*[T: SomeInt128](x: uint64, y: T): T =
77+
y - x

src/nint128/comparisons/equal.nim

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ func cEqual(x, y: UInt128): bool {.inline, used.} =
1111
let
1212
x = cast[CUInt128](x)
1313
y = cast[CUInt128](y)
14-
14+
1515
{.emit: """`result` = (NIM_BOOL)(`x` == `y`);""".}
1616

1717
func nimEqual(x, y: Int128): bool {.inline.} =
@@ -28,7 +28,7 @@ func cEqual(x, y: Int128): bool {.inline, used.} =
2828
let
2929
x = cast[CInt128](x)
3030
y = cast[CInt128](y)
31-
31+
3232
{.emit: """`result` = (NIM_BOOL)(`x` == `y`);""".}
3333

3434
func `==`*(x, y: UInt128): bool {.inline.} =

0 commit comments

Comments
 (0)