diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2017-08-02 17:20:54 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2017-08-02 17:20:54 +0800 |
commit | 1099498eb474b7974e2a68b1d45acd21a48fb23b (patch) | |
tree | 3a0e89c4b413463c5ff3ff7c1cecb24b9b1e9f19 | |
parent | 43f36a1994e26188b21214659e99fd12d3e0e446 (diff) | |
download | dexon-mcl-1099498eb474b7974e2a68b1d45acd21a48fb23b.tar.gz dexon-mcl-1099498eb474b7974e2a68b1d45acd21a48fb23b.tar.zst dexon-mcl-1099498eb474b7974e2a68b1d45acd21a48fb23b.zip |
vint::divNM passed test
-rw-r--r-- | include/mcl/vint.hpp | 246 |
1 files changed, 153 insertions, 93 deletions
diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp index 0f9e18a..fda212e 100644 --- a/include/mcl/vint.hpp +++ b/include/mcl/vint.hpp @@ -32,6 +32,20 @@ typedef uint64_t Unit; typedef uint32_t Unit; #endif +template<class T> +void dump(const T *x, size_t n) +{ + const size_t is4byteUnit = sizeof(*x) == 4; + for (size_t i = 0; i < n; i++) { + if (is4byteUnit) { + printf("%08x", (uint32_t)x[n - 1 - i]); + } else { + printf("%016llx", (unsigned long long)x[n - 1 - i]); + } + } + printf("\n"); +} + inline uint64_t make64(uint32_t H, uint32_t L) { return ((uint64_t)H << 32) | L; @@ -113,6 +127,15 @@ size_t getRealSize(const T *x, size_t xn) } template<class T> +size_t getBitSize(const T *x, size_t n) +{ + if (n == 1 && x[0] == 0) return 1; + T v = x[n - 1]; + assert(v); + return (n - 1) * sizeof(T) * 8 + 1 + cybozu::bsr<Unit>(v); +} + +template<class T> void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn); /* @@ -345,8 +368,8 @@ EXIT_0: } /* - z[] = x[xn] + y[yn] - @note size of z must be max(xn, yn) + 1 + z[zn] = x[xn] + y[yn] + @note zn = max(xn, yn) */ template<class T> T addNM(T *z, const T *x, size_t xn, const T *y, size_t yn) @@ -428,7 +451,22 @@ EXIT_0: } /* - z[0..n) = x[0..n] * y + z[xn] = x[xn] - y[yn] + @note xn >= yn +*/ +template<class T> +T subNM(T *z, const T *x, size_t xn, const T *y, size_t yn) +{ + assert(xn >= yn); + T c = vint::subN(z, x, y, yn); + if (xn > yn) { + c = vint::subu1(z + yn, x + yn, xn - yn, c); + } + return c; +} + +/* + z[0..n) = x[0..n) * y return z[n] @note accept z == x */ @@ -518,6 +556,88 @@ T modu1(const T *x, size_t n, T y) } /* + y[] = x[] << bit + 0 < bit < sizeof(T) * 8 + accept y == x +*/ +template<class T> +T shlBit(T *y, const T *x, size_t xn, size_t bit) +{ + assert(0 < bit && bit < sizeof(T) * 8); + assert(xn > 0); + size_t rBit = sizeof(T) * 8 - bit; + T keep = x[xn - 1]; + T prev = keep; + for (size_t i = xn - 1; i > 0; i--) { + T t = x[i - 1]; + y[i] = (prev << bit) | (t >> rBit); + prev = t; + } + y[0] = prev << bit; + return keep >> rBit; +} + +/* + y[yn] = x[xn] << bit + yn = xn + (bit + unitBitBit - 1) / unitBitSize + accept y == x +*/ +template<class T> +void shlN(T *y, const T *x, size_t xn, size_t bit) +{ + assert(xn > 0); + const size_t unitBitSize = sizeof(T) * 8; + size_t q = bit / unitBitSize; + size_t r = bit % unitBitSize; + if (r == 0) { + // don't use copyN(y + q, x, xn); if overlaped + for (size_t i = 0; i < xn; i++) { + y[q + xn - 1 - i] = x[xn - 1 - i]; + } + } else { + y[q + xn] = shlBit(y + q, x, xn, r); + } + clearN(y, q); +} + +/* + y[] = x[] >> bit + 0 < bit < sizeof(T) * 8 +*/ +template<class T> +void shrBit(T *y, const T *x, size_t xn, size_t bit) +{ + assert(0 < bit && bit < sizeof(T) * 8); + assert(xn > 0); + size_t rBit = sizeof(T) * 8 - bit; + T prev = x[0]; + for (size_t i = 1; i < xn; i++) { + T t = x[i]; + y[i - 1] = (prev >> bit) | (t << rBit); + prev = t; + } + y[xn - 1] = prev >> bit; +} +/* + y[yn] = x[xn] >> bit + yn = xn - bit / unitBit +*/ +template<class T> +void shrN(T *y, const T *x, size_t xn, size_t bit) +{ + assert(xn > 0); + const size_t unitBitSize = sizeof(T) * 8; + size_t q = bit / unitBitSize; + size_t r = bit % unitBitSize; + if (r == 0) { + copyN(y, x + q, xn - q); + } else { + shrBit(y, x + q, xn - q, r); + } +} + + +/* get approximate value from x[xn - 1..] @param up [in] round up if true */ @@ -562,7 +682,6 @@ static inline double getApprox(const T *x, size_t xn, bool up) return t; } - /* q[qn] = x[xn] / y[yn] ; qn == xn - yn + 1 if xn >= yn if q r[rn] = x[xn] % y[yn] ; rn = yn before getRealSiz @@ -618,6 +737,34 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn) } T *rr = (T*)CYBOZU_ALLOCA(sizeof(T) * xn); copyN(rr, x, xn); +#if 1 + T *t = (T*)CYBOZU_ALLOCA(sizeof(T) * (xn + 2)); + size_t yb = getBitSize(y, yn); + const size_t unitBitSize = sizeof(T) * 8; + while (vint::compareNM(rr, xn, y, yn) >= 0) { + size_t xb = getBitSize(rr, xn); + if (xb <= yb + 1) { + vint::subNM(rr, rr, xn, y, yn); + xn = getRealSize(rr, xn); + if (qq) vint::addu1<T>(qq, qq, qn, 1); + continue; + } + assert(xb > yb + 1); + size_t w = std::min(unitBitSize, xb - yb); + vint::shrN(t, rr, xn, xb - w); + T q0 = t[0]; + t[yn] = vint::mulu1(t, y, yn, q0); + vint::shlN(t, t, yn + 1, xb - w - yb); + vint::subN(rr, rr, t, xn); + xn = getRealSize(rr, xn); + if (q) { + clearN(t, qn); + t[0] = q0; + vint::shlN(t, t, 1, xb - w - yb); + vint::addN(q, q, t, qn); + } + } +#else T *t = (T*)CYBOZU_ALLOCA(sizeof(T) * (yn + 1)); double yt = getApprox(y, yn, true); while (vint::compareNM(rr, xn, y, yn) >= 0) { @@ -640,93 +787,13 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn) xn--; } } +#endif copyN(r, rr, rn); if (q && q != qq) { copyN(q, qq, qn); } } -/* - y[] = x[] << bit - 0 < bit < sizeof(T) * 8 - accept y == x -*/ -template<class T> -T shlBit(T *y, const T *x, size_t xn, size_t bit) -{ - assert(0 < bit && bit < sizeof(T) * 8); - assert(xn > 0); - size_t rBit = sizeof(T) * 8 - bit; - T keep = x[xn - 1]; - T prev = keep; - for (size_t i = xn - 1; i > 0; i--) { - T t = x[i - 1]; - y[i] = (prev << bit) | (t >> rBit); - prev = t; - } - y[0] = prev << bit; - return keep >> rBit; -} - -/* - y[yn] = x[xn] << bit - yn = xn + (bit + unitBitBit - 1) / unitBitSize - accept y == x -*/ -template<class T> -void shlN(T *y, const T *x, size_t xn, size_t bit) -{ - assert(xn > 0); - const size_t unitBitSize = sizeof(T) * 8; - size_t q = bit / unitBitSize; - size_t r = bit % unitBitSize; - if (r == 0) { - // don't use copyN(y + q, x, xn); if overlaped - for (size_t i = 0; i < xn; i++) { - y[q + xn - 1 - i] = x[xn - 1 - i]; - } - } else { - y[q + xn] = shlBit(y + q, x, xn, r); - } - clearN(y, q); -} - -/* - y[] = x[] >> bit - 0 < bit < sizeof(T) * 8 -*/ -template<class T> -void shrBit(T *y, const T *x, size_t xn, size_t bit) -{ - assert(0 < bit && bit < sizeof(T) * 8); - assert(xn > 0); - size_t rBit = sizeof(T) * 8 - bit; - T prev = x[0]; - for (size_t i = 1; i < xn; i++) { - T t = x[i]; - y[i - 1] = (prev >> bit) | (t << rBit); - prev = t; - } - y[xn - 1] = prev >> bit; -} -/* - y[yn] = x[xn] >> bit - yn = xn - bit / unitBit -*/ -template<class T> -void shrN(T *y, const T *x, size_t xn, size_t bit) -{ - assert(xn > 0); - const size_t unitBitSize = sizeof(T) * 8; - size_t q = bit / unitBitSize; - size_t r = bit % unitBitSize; - if (r == 0) { - copyN(y, x + q, xn - q); - } else { - shrBit(y, x + q, xn - q, r); - } -} - template<class T> class VariableBuffer { std::vector<T> v_; @@ -1121,14 +1188,7 @@ public: void dump() const { printf("size_=%d ", (int)size_); - for (size_t i = 0; i < size_; i++) { -#if MCL_SIZEOF_UNIT == 4 - printf("%08x", (uint32_t)buf_[size_ - 1 - i]); -#else - printf("%016llx", (unsigned long long)buf_[size_ - 1 - i]); -#endif - } - printf("\n"); + vint::dump(buf_, size_); } /* set positive value |