NTT copypasta e ajeitando 128bit

chinchila · chinchila · commit 69cec145898e · 2021-07-07T20:49:13.000-03:00
diff --git a/code/misc/128bit.cpp b/code/misc/128bit.cpp
@@ -1,9 +1,10 @@
 __int128 input(){
     string s;
     cin >> s;
-    ll fst = (s[0] == '-') ? 1 : 0;
+    int fst = (s[0] == '-') ? 1 : 0;
     __int128 v = 0;
-    f(i,fst,s.size()) v = v * 10 + s[i] - '0';
+    for(int i = fst ; i < s.size() ; ++i)
+        v = v * 10 + s[i] - '0';
     if(fst) v = -v;
     return v;
 }
@@ -12,8 +13,8 @@ ostream& operator << (ostream& os,const __int128& v) {
     string ret, sgn;
     __int128 n = v;
     if(v < 0) sgn = "-", n = -v;
-    while(n) ret.pb(n % 10 + '0'), n /= 10;
-    reverse(all(ret));
+    while(n) ret.push_back(n % 10 + '0'), n /= 10;
+    reverse(ret.begin(), ret.end());
     ret = sgn + ret;
     os << ret;
     return os;
diff --git a/code/numeric/ntt_copy_pasta.cpp b/code/numeric/ntt_copy_pasta.cpp
@@ -0,0 +1,216 @@
+// FFT com mod primo
+// usage:
+// constexpr int MOD = 998'244'353;
+// std::vector<mint<MOD> > a(n);
+// std::vector<mint<MOD> > b(m);
+// std::vector<mint<MOD> > c = ntt_convolution(a, b);
+#define _REP_(i, n) for (int i = 0; (i) < (int)(n); ++ (i))
+#define _REP3_(i, m, n) for (int i = (m); (i) < (int)(n); ++ (i))
+
+constexpr int32_t modpow(int64_t x, uint64_t k, int32_t MOD) {
+    assert (0 <= x and x < MOD);
+    int64_t y = 1;
+    for (; k; k >>= 1) {
+        if (k & 1) (y *= x) %= MOD;
+        (x *= x) %= MOD;
+    }
+    assert (0 <= y and y < MOD);
+    return y;
+}
+int32_t modinv_nocheck(int32_t value, int32_t MOD) {
+    assert (0 <= value and value < MOD);
+    if (value == 0) return -1;
+    int64_t a = value, b = MOD;
+    int64_t x = 0, y = 1;
+    for (int64_t u = 1, v = 0; a; ) {
+        int64_t q = b / a;
+        x -= q * u; std::swap(x, u);
+        y -= q * v; std::swap(y, v);
+        b -= q * a; std::swap(b, a);
+    }
+    if (not (value * x + MOD * y == b and b == 1)) return -1;
+    if (x < 0) x += MOD;
+    assert (0 <= x and x < MOD);
+    return x;
+}
+inline int32_t modinv(int32_t x, int32_t MOD) {
+    int32_t y = modinv_nocheck(x, MOD);
+    assert (y != -1);
+    return y;
+}
+
+template <int32_t MOD>
+struct mint {
+    int32_t value;
+    mint() : value() {}
+    mint(int64_t value_) : value(value_ < 0 ? value_ % MOD + MOD : value_ >= MOD ? value_ % MOD : value_) {}
+    mint(int32_t value_, std::nullptr_t) : value(value_) {}
+    explicit operator bool() const { return value; }
+    inline constexpr mint<MOD> operator + (mint<MOD> other) const { return mint<MOD>(*this) += other; }
+    inline constexpr mint<MOD> operator - (mint<MOD> other) const { return mint<MOD>(*this) -= other; }
+    inline constexpr mint<MOD> operator * (mint<MOD> other) const { return mint<MOD>(*this) *= other; }
+    inline constexpr mint<MOD> & operator += (mint<MOD> other) { this->value += other.value; if (this->value >= MOD) this->value -= MOD; return *this; }
+    inline constexpr mint<MOD> & operator -= (mint<MOD> other) { this->value -= other.value; if (this->value <    0) this->value += MOD; return *this; }
+    inline constexpr mint<MOD> & operator *= (mint<MOD> other) { this->value = (uint_fast64_t)this->value * other.value % MOD; return *this; }
+    inline constexpr mint<MOD> operator - () const { return mint<MOD>(this->value ? MOD - this->value : 0, nullptr); }
+    inline constexpr mint<MOD> pow(uint64_t k) const { return mint<MOD>(modpow(value, k, MOD), nullptr); }
+    inline mint<MOD> inv() const { return mint<MOD>(modinv(value, MOD), nullptr); }
+    inline constexpr mint<MOD> operator /  (mint<MOD> other) const { return *this *  other.inv(); }
+    inline constexpr mint<MOD> operator /= (mint<MOD> other)       { return *this *= other.inv(); }
+    inline constexpr bool operator == (mint<MOD> other) const { return value == other.value; }
+    inline constexpr bool operator != (mint<MOD> other) const { return value != other.value; }
+};
+template <int32_t MOD> mint<MOD> operator * (int64_t value, mint<MOD> n) { return mint<MOD>(value) * n; }
+template <int32_t MOD> std::istream & operator >> (std::istream & in, mint<MOD> & n) { int64_t value; in >> value; n = value; return in; }
+template <int32_t MOD> std::ostream & operator << (std::ostream & out, mint<MOD> n) { return out << n.value; }
+
+template <int32_t PRIME> struct proth_prime {};
+template <> struct proth_prime<1224736769> { static constexpr int a =             73, b = 24, g =  3; };
+template <> struct proth_prime<1053818881> { static constexpr int a =     3 * 5 * 67, b = 20, g =  7; };
+template <> struct proth_prime<1051721729> { static constexpr int a =        17 * 59, b = 20, g =  6; };
+template <> struct proth_prime<1045430273> { static constexpr int a =            997, b = 20, g =  3; };
+template <> struct proth_prime<1012924417> { static constexpr int a =     3 * 7 * 23, b = 21, g =  5; };
+template <> struct proth_prime<1007681537> { static constexpr int a =        31 * 31, b = 20, g =  3; };
+template <> struct proth_prime<1004535809> { static constexpr int a =            479, b = 21, g =  3; };
+template <> struct proth_prime< 998244353> { static constexpr int a =         7 * 17, b = 23, g =  3; };
+template <> struct proth_prime< 985661441> { static constexpr int a =         5 * 47, b = 22, g =  3; };
+template <> struct proth_prime< 976224257> { static constexpr int a =     7 * 7 * 19, b = 20, g =  3; };
+template <> struct proth_prime< 975175681> { static constexpr int a =     3 * 5 * 31, b = 21, g = 17; };
+template <> struct proth_prime< 962592769> { static constexpr int a = 3 * 3 * 3 * 17, b = 21, g =  7; };
+template <> struct proth_prime< 950009857> { static constexpr int a =        4 * 151, b = 21, g =  7; };
+template <> struct proth_prime< 943718401> { static constexpr int a =  3 * 3 * 5 * 5, b = 22, g =  7; };
+template <> struct proth_prime< 935329793> { static constexpr int a =            223, b = 22, g =  3; };
+template <> struct proth_prime< 924844033> { static constexpr int a =  3 * 3 * 7 * 7, b = 21, g =  5; };
+template <> struct proth_prime< 469762049> { static constexpr int a =              7, b = 26, g =  3; };
+template <> struct proth_prime< 167772161> { static constexpr int a =              5, b = 25, g =  3; };
+
+struct is_proth_prime_impl {
+    template <int32_t PRIME, class T> static auto check(T *) -> decltype(proth_prime<PRIME>::g, std::true_type());
+    template <int32_t PRIME, class T> static auto check(...) -> std::false_type;
+};
+template <int32_t PRIME>
+struct is_proth_prime : decltype(is_proth_prime_impl::check<PRIME, std::nullptr_t>(nullptr)) {
+};
+
+/**
+ * @brief Number Theoretic Transformation (NTT) for Proth primes
+ * @note O(N log N)
+ * @note radix-2, decimation-in-frequency, Cooley-Tukey
+ * @note cache std::polar (~ 2x faster)
+ */
+template <int32_t PRIME>
+void ntt_inplace(std::vector<mint<PRIME> > & a, bool inverse) {
+    const int n = a.size();
+    const int log2_n = __builtin_ctz(n);
+    assert (n == 1 << log2_n);
+    assert (log2_n <= proth_prime<PRIME>::b);
+
+    // prepare rotors
+    std::vector<mint<PRIME> > ep, iep;
+    while ((int)ep.size() <= log2_n) {
+        ep.push_back(mint<PRIME>(proth_prime<PRIME>::g).pow(mint<PRIME>(-1).value / (1 << ep.size())));
+        iep.push_back(ep.back().inv());
+    }
+
+    // divide and conquer
+    std::vector<mint<PRIME> > b(n);
+    _REP3_ (i, 1, log2_n + 1) {
+        int w = 1 << (log2_n - i);
+        mint<PRIME> base = (inverse ? iep : ep)[i];
+        mint<PRIME> now = 1;
+        for (int y = 0; y < n / 2; y += w) {
+            _REP_ (x, w) {
+                auto l = a[y << 1 | x];
+                auto r = now * a[y << 1 | x | w];
+                b[y | x] = l + r;
+                b[y | x | n >> 1] = l - r;
+            }
+            now *= base;
+        }
+        std::swap(a, b);
+    }
+
+    // div by n if inverse
+    if (inverse) {
+        auto n_inv = mint<PRIME>(n).inv();
+        _REP_ (i, n) {
+            a[i] *= n_inv;
+        }
+    }
+}
+
+/**
+ * @brief multiprecation on $\mathbb{F}_p[x]$ for Proth primes
+ * @note O(N log N)
+ * @note (f \ast g)(i) = \sum_{0 \le j \lt i + 1} f(j) g(i - j)
+ */
+template <int32_t PRIME>
+typename std::enable_if<is_proth_prime<PRIME>::value, std::vector<mint<PRIME> > >::type ntt_convolution(const std::vector<mint<PRIME> > & a_, const std::vector<mint<PRIME> > & b_) {
+    if (a_.size() <= 32 or b_.size() <= 32) {
+        std::vector<mint<PRIME> > c(a_.size() + b_.size() - 1);
+        _REP_ (i, a_.size()) _REP_ (j, b_.size()) c[i + j] += a_[i] * b_[j];
+        // return c;
+    }
+    int m = a_.size() + b_.size() - 1;
+    int n = (m == 1 ? 1 : 1 << (32 - __builtin_clz(m - 1)));
+    auto a = a_;
+    auto b = b_;
+    a.resize(n);
+    b.resize(n);
+    ntt_inplace(a, false);
+    ntt_inplace(b, false);
+    _REP_ (i, n) {
+        a[i] *= b[i];
+    }
+    ntt_inplace(a, true);
+    a.resize(m);
+    return a;
+}
+
+template <int32_t MOD, int32_t MOD1, int32_t MOD2, int32_t MOD3>
+mint<MOD> garner_algorithm_template(mint<MOD1> a1, mint<MOD2> a2, mint<MOD3> a3) {
+    static const auto r12 = mint<MOD2>(MOD1).inv();
+    static const auto r13 = mint<MOD3>(MOD1).inv();
+    static const auto r23 = mint<MOD3>(MOD2).inv();
+    a2 = (a2 - a1.value) * r12;
+    a3 = (a3 - a1.value) * r13;
+    a3 = (a3 - a2.value) * r23;
+    return mint<MOD>(a1.value) + a2.value * mint<MOD>(MOD1) + a3.value * (mint<MOD>(MOD1) * mint<MOD>(MOD2));
+}
+
+/**
+ * @brief multiprecation on $\mathbb{Z}/n\mathbb{Z}[x]$
+ */
+template <int32_t MOD>
+typename std::enable_if<not is_proth_prime<MOD>::value, std::vector<mint<MOD> > >::type ntt_convolution(const std::vector<mint<MOD> > & a, const std::vector<mint<MOD> > & b) {
+    if (a.size() <= 32 or b.size() <= 32) {
+        std::vector<mint<MOD> > c(a.size() + b.size() - 1);
+        _REP_ (i, a.size()) _REP_ (j, b.size()) c[i + j] += a[i] * b[j];
+        // return c;
+    }
+    constexpr int PRIMES[3] = { 1004535809, 998244353, 985661441 };
+    std::vector<mint<PRIMES[0]> > x0(a.size());
+    std::vector<mint<PRIMES[1]> > x1(a.size());
+    std::vector<mint<PRIMES[2]> > x2(a.size());
+    _REP_ (i, a.size()) {
+        x0[i] = a[i].value;
+        x1[i] = a[i].value;
+        x2[i] = a[i].value;
+    }
+    std::vector<mint<PRIMES[0]> > y0(b.size());
+    std::vector<mint<PRIMES[1]> > y1(b.size());
+    std::vector<mint<PRIMES[2]> > y2(b.size());
+    _REP_ (j, b.size()) {
+        y0[j] = b[j].value;
+        y1[j] = b[j].value;
+        y2[j] = b[j].value;
+    }
+    std::vector<mint<PRIMES[0]> > z0 = ntt_convolution<PRIMES[0]>(x0, y0);
+    std::vector<mint<PRIMES[1]> > z1 = ntt_convolution<PRIMES[1]>(x1, y1);
+    std::vector<mint<PRIMES[2]> > z2 = ntt_convolution<PRIMES[2]>(x2, y2);
+    std::vector<mint<MOD> > c(z0.size());
+    _REP_ (k, z0.size()) {
+        c[k] = garner_algorithm_template<MOD>(z0[k], z1[k], z2[k]);
+    }
+    return c;
+}