|
| 1 | +// FFT com mod primo |
| 2 | +// usage: |
| 3 | +// constexpr int MOD = 998'244'353; |
| 4 | +// std::vector<mint<MOD> > a(n); |
| 5 | +// std::vector<mint<MOD> > b(m); |
| 6 | +// std::vector<mint<MOD> > c = ntt_convolution(a, b); |
| 7 | +#define _REP_(i, n) for (int i = 0; (i) < (int)(n); ++ (i)) |
| 8 | +#define _REP3_(i, m, n) for (int i = (m); (i) < (int)(n); ++ (i)) |
| 9 | + |
| 10 | +constexpr int32_t modpow(int64_t x, uint64_t k, int32_t MOD) { |
| 11 | + assert (0 <= x and x < MOD); |
| 12 | + int64_t y = 1; |
| 13 | + for (; k; k >>= 1) { |
| 14 | + if (k & 1) (y *= x) %= MOD; |
| 15 | + (x *= x) %= MOD; |
| 16 | + } |
| 17 | + assert (0 <= y and y < MOD); |
| 18 | + return y; |
| 19 | +} |
| 20 | +int32_t modinv_nocheck(int32_t value, int32_t MOD) { |
| 21 | + assert (0 <= value and value < MOD); |
| 22 | + if (value == 0) return -1; |
| 23 | + int64_t a = value, b = MOD; |
| 24 | + int64_t x = 0, y = 1; |
| 25 | + for (int64_t u = 1, v = 0; a; ) { |
| 26 | + int64_t q = b / a; |
| 27 | + x -= q * u; std::swap(x, u); |
| 28 | + y -= q * v; std::swap(y, v); |
| 29 | + b -= q * a; std::swap(b, a); |
| 30 | + } |
| 31 | + if (not (value * x + MOD * y == b and b == 1)) return -1; |
| 32 | + if (x < 0) x += MOD; |
| 33 | + assert (0 <= x and x < MOD); |
| 34 | + return x; |
| 35 | +} |
| 36 | +inline int32_t modinv(int32_t x, int32_t MOD) { |
| 37 | + int32_t y = modinv_nocheck(x, MOD); |
| 38 | + assert (y != -1); |
| 39 | + return y; |
| 40 | +} |
| 41 | + |
| 42 | +template <int32_t MOD> |
| 43 | +struct mint { |
| 44 | + int32_t value; |
| 45 | + mint() : value() {} |
| 46 | + mint(int64_t value_) : value(value_ < 0 ? value_ % MOD + MOD : value_ >= MOD ? value_ % MOD : value_) {} |
| 47 | + mint(int32_t value_, std::nullptr_t) : value(value_) {} |
| 48 | + explicit operator bool() const { return value; } |
| 49 | + inline constexpr mint<MOD> operator + (mint<MOD> other) const { return mint<MOD>(*this) += other; } |
| 50 | + inline constexpr mint<MOD> operator - (mint<MOD> other) const { return mint<MOD>(*this) -= other; } |
| 51 | + inline constexpr mint<MOD> operator * (mint<MOD> other) const { return mint<MOD>(*this) *= other; } |
| 52 | + inline constexpr mint<MOD> & operator += (mint<MOD> other) { this->value += other.value; if (this->value >= MOD) this->value -= MOD; return *this; } |
| 53 | + inline constexpr mint<MOD> & operator -= (mint<MOD> other) { this->value -= other.value; if (this->value < 0) this->value += MOD; return *this; } |
| 54 | + inline constexpr mint<MOD> & operator *= (mint<MOD> other) { this->value = (uint_fast64_t)this->value * other.value % MOD; return *this; } |
| 55 | + inline constexpr mint<MOD> operator - () const { return mint<MOD>(this->value ? MOD - this->value : 0, nullptr); } |
| 56 | + inline constexpr mint<MOD> pow(uint64_t k) const { return mint<MOD>(modpow(value, k, MOD), nullptr); } |
| 57 | + inline mint<MOD> inv() const { return mint<MOD>(modinv(value, MOD), nullptr); } |
| 58 | + inline constexpr mint<MOD> operator / (mint<MOD> other) const { return *this * other.inv(); } |
| 59 | + inline constexpr mint<MOD> operator /= (mint<MOD> other) { return *this *= other.inv(); } |
| 60 | + inline constexpr bool operator == (mint<MOD> other) const { return value == other.value; } |
| 61 | + inline constexpr bool operator != (mint<MOD> other) const { return value != other.value; } |
| 62 | +}; |
| 63 | +template <int32_t MOD> mint<MOD> operator * (int64_t value, mint<MOD> n) { return mint<MOD>(value) * n; } |
| 64 | +template <int32_t MOD> std::istream & operator >> (std::istream & in, mint<MOD> & n) { int64_t value; in >> value; n = value; return in; } |
| 65 | +template <int32_t MOD> std::ostream & operator << (std::ostream & out, mint<MOD> n) { return out << n.value; } |
| 66 | + |
| 67 | +template <int32_t PRIME> struct proth_prime {}; |
| 68 | +template <> struct proth_prime<1224736769> { static constexpr int a = 73, b = 24, g = 3; }; |
| 69 | +template <> struct proth_prime<1053818881> { static constexpr int a = 3 * 5 * 67, b = 20, g = 7; }; |
| 70 | +template <> struct proth_prime<1051721729> { static constexpr int a = 17 * 59, b = 20, g = 6; }; |
| 71 | +template <> struct proth_prime<1045430273> { static constexpr int a = 997, b = 20, g = 3; }; |
| 72 | +template <> struct proth_prime<1012924417> { static constexpr int a = 3 * 7 * 23, b = 21, g = 5; }; |
| 73 | +template <> struct proth_prime<1007681537> { static constexpr int a = 31 * 31, b = 20, g = 3; }; |
| 74 | +template <> struct proth_prime<1004535809> { static constexpr int a = 479, b = 21, g = 3; }; |
| 75 | +template <> struct proth_prime< 998244353> { static constexpr int a = 7 * 17, b = 23, g = 3; }; |
| 76 | +template <> struct proth_prime< 985661441> { static constexpr int a = 5 * 47, b = 22, g = 3; }; |
| 77 | +template <> struct proth_prime< 976224257> { static constexpr int a = 7 * 7 * 19, b = 20, g = 3; }; |
| 78 | +template <> struct proth_prime< 975175681> { static constexpr int a = 3 * 5 * 31, b = 21, g = 17; }; |
| 79 | +template <> struct proth_prime< 962592769> { static constexpr int a = 3 * 3 * 3 * 17, b = 21, g = 7; }; |
| 80 | +template <> struct proth_prime< 950009857> { static constexpr int a = 4 * 151, b = 21, g = 7; }; |
| 81 | +template <> struct proth_prime< 943718401> { static constexpr int a = 3 * 3 * 5 * 5, b = 22, g = 7; }; |
| 82 | +template <> struct proth_prime< 935329793> { static constexpr int a = 223, b = 22, g = 3; }; |
| 83 | +template <> struct proth_prime< 924844033> { static constexpr int a = 3 * 3 * 7 * 7, b = 21, g = 5; }; |
| 84 | +template <> struct proth_prime< 469762049> { static constexpr int a = 7, b = 26, g = 3; }; |
| 85 | +template <> struct proth_prime< 167772161> { static constexpr int a = 5, b = 25, g = 3; }; |
| 86 | + |
| 87 | +struct is_proth_prime_impl { |
| 88 | + template <int32_t PRIME, class T> static auto check(T *) -> decltype(proth_prime<PRIME>::g, std::true_type()); |
| 89 | + template <int32_t PRIME, class T> static auto check(...) -> std::false_type; |
| 90 | +}; |
| 91 | +template <int32_t PRIME> |
| 92 | +struct is_proth_prime : decltype(is_proth_prime_impl::check<PRIME, std::nullptr_t>(nullptr)) { |
| 93 | +}; |
| 94 | + |
| 95 | +/** |
| 96 | + * @brief Number Theoretic Transformation (NTT) for Proth primes |
| 97 | + * @note O(N log N) |
| 98 | + * @note radix-2, decimation-in-frequency, Cooley-Tukey |
| 99 | + * @note cache std::polar (~ 2x faster) |
| 100 | + */ |
| 101 | +template <int32_t PRIME> |
| 102 | +void ntt_inplace(std::vector<mint<PRIME> > & a, bool inverse) { |
| 103 | + const int n = a.size(); |
| 104 | + const int log2_n = __builtin_ctz(n); |
| 105 | + assert (n == 1 << log2_n); |
| 106 | + assert (log2_n <= proth_prime<PRIME>::b); |
| 107 | + |
| 108 | + // prepare rotors |
| 109 | + std::vector<mint<PRIME> > ep, iep; |
| 110 | + while ((int)ep.size() <= log2_n) { |
| 111 | + ep.push_back(mint<PRIME>(proth_prime<PRIME>::g).pow(mint<PRIME>(-1).value / (1 << ep.size()))); |
| 112 | + iep.push_back(ep.back().inv()); |
| 113 | + } |
| 114 | + |
| 115 | + // divide and conquer |
| 116 | + std::vector<mint<PRIME> > b(n); |
| 117 | + _REP3_ (i, 1, log2_n + 1) { |
| 118 | + int w = 1 << (log2_n - i); |
| 119 | + mint<PRIME> base = (inverse ? iep : ep)[i]; |
| 120 | + mint<PRIME> now = 1; |
| 121 | + for (int y = 0; y < n / 2; y += w) { |
| 122 | + _REP_ (x, w) { |
| 123 | + auto l = a[y << 1 | x]; |
| 124 | + auto r = now * a[y << 1 | x | w]; |
| 125 | + b[y | x] = l + r; |
| 126 | + b[y | x | n >> 1] = l - r; |
| 127 | + } |
| 128 | + now *= base; |
| 129 | + } |
| 130 | + std::swap(a, b); |
| 131 | + } |
| 132 | + |
| 133 | + // div by n if inverse |
| 134 | + if (inverse) { |
| 135 | + auto n_inv = mint<PRIME>(n).inv(); |
| 136 | + _REP_ (i, n) { |
| 137 | + a[i] *= n_inv; |
| 138 | + } |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +/** |
| 143 | + * @brief multiprecation on $\mathbb{F}_p[x]$ for Proth primes |
| 144 | + * @note O(N log N) |
| 145 | + * @note (f \ast g)(i) = \sum_{0 \le j \lt i + 1} f(j) g(i - j) |
| 146 | + */ |
| 147 | +template <int32_t PRIME> |
| 148 | +typename std::enable_if<is_proth_prime<PRIME>::value, std::vector<mint<PRIME> > >::type ntt_convolution(const std::vector<mint<PRIME> > & a_, const std::vector<mint<PRIME> > & b_) { |
| 149 | + if (a_.size() <= 32 or b_.size() <= 32) { |
| 150 | + std::vector<mint<PRIME> > c(a_.size() + b_.size() - 1); |
| 151 | + _REP_ (i, a_.size()) _REP_ (j, b_.size()) c[i + j] += a_[i] * b_[j]; |
| 152 | + // return c; |
| 153 | + } |
| 154 | + int m = a_.size() + b_.size() - 1; |
| 155 | + int n = (m == 1 ? 1 : 1 << (32 - __builtin_clz(m - 1))); |
| 156 | + auto a = a_; |
| 157 | + auto b = b_; |
| 158 | + a.resize(n); |
| 159 | + b.resize(n); |
| 160 | + ntt_inplace(a, false); |
| 161 | + ntt_inplace(b, false); |
| 162 | + _REP_ (i, n) { |
| 163 | + a[i] *= b[i]; |
| 164 | + } |
| 165 | + ntt_inplace(a, true); |
| 166 | + a.resize(m); |
| 167 | + return a; |
| 168 | +} |
| 169 | + |
| 170 | +template <int32_t MOD, int32_t MOD1, int32_t MOD2, int32_t MOD3> |
| 171 | +mint<MOD> garner_algorithm_template(mint<MOD1> a1, mint<MOD2> a2, mint<MOD3> a3) { |
| 172 | + static const auto r12 = mint<MOD2>(MOD1).inv(); |
| 173 | + static const auto r13 = mint<MOD3>(MOD1).inv(); |
| 174 | + static const auto r23 = mint<MOD3>(MOD2).inv(); |
| 175 | + a2 = (a2 - a1.value) * r12; |
| 176 | + a3 = (a3 - a1.value) * r13; |
| 177 | + a3 = (a3 - a2.value) * r23; |
| 178 | + return mint<MOD>(a1.value) + a2.value * mint<MOD>(MOD1) + a3.value * (mint<MOD>(MOD1) * mint<MOD>(MOD2)); |
| 179 | +} |
| 180 | + |
| 181 | +/** |
| 182 | + * @brief multiprecation on $\mathbb{Z}/n\mathbb{Z}[x]$ |
| 183 | + */ |
| 184 | +template <int32_t MOD> |
| 185 | +typename std::enable_if<not is_proth_prime<MOD>::value, std::vector<mint<MOD> > >::type ntt_convolution(const std::vector<mint<MOD> > & a, const std::vector<mint<MOD> > & b) { |
| 186 | + if (a.size() <= 32 or b.size() <= 32) { |
| 187 | + std::vector<mint<MOD> > c(a.size() + b.size() - 1); |
| 188 | + _REP_ (i, a.size()) _REP_ (j, b.size()) c[i + j] += a[i] * b[j]; |
| 189 | + // return c; |
| 190 | + } |
| 191 | + constexpr int PRIMES[3] = { 1004535809, 998244353, 985661441 }; |
| 192 | + std::vector<mint<PRIMES[0]> > x0(a.size()); |
| 193 | + std::vector<mint<PRIMES[1]> > x1(a.size()); |
| 194 | + std::vector<mint<PRIMES[2]> > x2(a.size()); |
| 195 | + _REP_ (i, a.size()) { |
| 196 | + x0[i] = a[i].value; |
| 197 | + x1[i] = a[i].value; |
| 198 | + x2[i] = a[i].value; |
| 199 | + } |
| 200 | + std::vector<mint<PRIMES[0]> > y0(b.size()); |
| 201 | + std::vector<mint<PRIMES[1]> > y1(b.size()); |
| 202 | + std::vector<mint<PRIMES[2]> > y2(b.size()); |
| 203 | + _REP_ (j, b.size()) { |
| 204 | + y0[j] = b[j].value; |
| 205 | + y1[j] = b[j].value; |
| 206 | + y2[j] = b[j].value; |
| 207 | + } |
| 208 | + std::vector<mint<PRIMES[0]> > z0 = ntt_convolution<PRIMES[0]>(x0, y0); |
| 209 | + std::vector<mint<PRIMES[1]> > z1 = ntt_convolution<PRIMES[1]>(x1, y1); |
| 210 | + std::vector<mint<PRIMES[2]> > z2 = ntt_convolution<PRIMES[2]>(x2, y2); |
| 211 | + std::vector<mint<MOD> > c(z0.size()); |
| 212 | + _REP_ (k, z0.size()) { |
| 213 | + c[k] = garner_algorithm_template<MOD>(z0[k], z1[k], z2[k]); |
| 214 | + } |
| 215 | + return c; |
| 216 | +} |
0 commit comments