-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspecialMath.c
139 lines (132 loc) · 2.86 KB
/
specialMath.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include "specialMath.h"
float fp2(FP* restrict real, FP* restrict imag)
{
FP2 cr = load2(real);
FP2 ci = load2(imag);
FP2 zr = 0;
FP2 zi = 0;
FP2 zr2, zi2, zri;
u64 four;
{
MAKE_STACK_FP_PREC(temp, 1);
loadValue(&temp, 4);
four = temp.value.val[0];
}
for(int i = 0; i < maxiter; i++)
{
zr2 = mul2(zr, zr);
zri = 2 * mul2(zr, zi);
zi2 = mul2(zi, zi);
if(((zr2 + zi2) >> 64) >= four)
return i;
zr = zr2 - zi2 + cr;
zi = zri + ci;
}
return -1;
}
float fp2s(FP* restrict real, FP* restrict imag)
{
FP2 cr = load2(real);
FP2 ci = load2(imag);
FP2 zr = 0;
FP2 zi = 0;
FP2 zr2, zi2, zri;
u64 four;
{
MAKE_STACK_FP_PREC(temp, 1);
loadValue(&temp, 4);
four = temp.value.val[0];
}
int i;
for(i = 0; i < maxiter; i++)
{
zr2 = mul2(zr, zr);
zri = 2 * mul2(zr, zi);
zi2 = mul2(zi, zi);
if(((zr2 + zi2) >> 64) >= four)
break;
zr = zr2 - zi2 + cr;
zi = zri + ci;
}
return smoothEscapeTime(i, getVal2(zr), getVal2(zi), getVal2(cr), getVal2(ci));
}
//Generic arb-precision kernel
#define IMPL_REG_KERNEL(n) \
float fp##n(FP* restrict real, FP* restrict imag) \
{ \
FP##n cr = load##n(real); \
FP##n ci = load##n(imag); \
FP##n zr = zero##n(); \
FP##n zi = zero##n(); \
FP##n zr2, zi2, zri, mag; \
u64 four; \
{ \
MAKE_STACK_FP_PREC(temp, 1); \
loadValue(&temp, 4); \
four = temp.value.val[0]; \
} \
for(int i = 0; i < maxiter; i++) \
{ \
zr2 = mul##n(zr, zr); \
zri = mul##n(zr, zi); \
zri = add##n(zri, zri); \
zi2 = mul##n(zi, zi); \
mag = add##n(zr2, zi2); \
if(mag.w[0] >= four) \
{ \
return i; \
} \
zr = add##n(sub##n(zr2, zi2), cr); \
zi = add##n(zri, ci); \
} \
return -1; \
}
#define IMPL_SMOOTH_KERNEL(n) \
float fp##n##s(FP* restrict real, FP* restrict imag) \
{ \
FP##n cr = load##n(real); \
FP##n ci = load##n(imag); \
FP##n zr = zero##n(); \
FP##n zi = zero##n(); \
FP##n zr2, zi2, zri, mag; \
u64 four; \
{ \
MAKE_STACK_FP_PREC(temp, 1); \
loadValue(&temp, 4); \
four = temp.value.val[0]; \
} \
int i; \
for(i = 0; i < maxiter; i++) \
{ \
zr2 = mul##n(zr, zr); \
zri = mul##n(zr, zi); \
zri = add##n(zri, zri); \
zi2 = mul##n(zi, zi); \
mag = add##n(zr2, zi2); \
if(mag.w[0] >= four) \
{ \
break; \
} \
zr = add##n(sub##n(zr2, zi2), cr); \
zi = add##n(zri, ci); \
} \
if(i == maxiter) \
i = -1; \
return smoothEscapeTime(i, getVal##n(zr), getVal##n(zi), getVal##n(cr), getVal##n(ci)); \
}
IMPL_REG_KERNEL(3)
IMPL_REG_KERNEL(4)
IMPL_REG_KERNEL(5)
IMPL_REG_KERNEL(6)
IMPL_REG_KERNEL(7)
IMPL_REG_KERNEL(8)
IMPL_REG_KERNEL(9)
IMPL_REG_KERNEL(10)
IMPL_SMOOTH_KERNEL(3)
IMPL_SMOOTH_KERNEL(4)
IMPL_SMOOTH_KERNEL(5)
IMPL_SMOOTH_KERNEL(6)
IMPL_SMOOTH_KERNEL(7)
IMPL_SMOOTH_KERNEL(8)
IMPL_SMOOTH_KERNEL(9)
IMPL_SMOOTH_KERNEL(10)