diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-11-01 10:54:54 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-11-01 10:54:54 +0800 |
commit | b49f25a6ccc0d04ac47069c8ed726e6648581bfc (patch) | |
tree | 75285b65123a6014028afe2e5a5c8f2412ea94e5 | |
parent | c8a13c7a28277be1c24276ab2d7380cb3b6e13bf (diff) | |
download | dexon-mcl-b49f25a6ccc0d04ac47069c8ed726e6648581bfc.tar.gz dexon-mcl-b49f25a6ccc0d04ac47069c8ed726e6648581bfc.tar.zst dexon-mcl-b49f25a6ccc0d04ac47069c8ed726e6648581bfc.zip |
use karatsuba for sqr if N >= 6
-rw-r--r-- | misc/karatsuba.cpp | 16 | ||||
-rw-r--r-- | src/fp_llvm.hpp | 3 | ||||
-rw-r--r-- | src/fp_proto.hpp | 51 |
3 files changed, 54 insertions, 16 deletions
diff --git a/misc/karatsuba.cpp b/misc/karatsuba.cpp index fb04ed9..e2156ff 100644 --- a/misc/karatsuba.cpp +++ b/misc/karatsuba.cpp @@ -24,9 +24,9 @@ void dump(const Unit *x, size_t N) printf("\n"); } -void gggKara(uint64_t *z, const uint64_t *x, const uint64_t *y) +void gggKara(uint64_t *z, const uint64_t *x, const uint64_t *) { - MulPre<8, Gtag>::f(z, x, y); + SqrPre<8, Gtag>::f(z, x); } void gggLLVM(uint64_t *z, const uint64_t *x, const uint64_t *y) { @@ -41,12 +41,16 @@ void benchKaratsuba() Unit x[N], z[N * 2]; rg.read(x, N); rg.read(z, N); - CYBOZU_BENCH("g:mulpre", (MulPreCore<N, Gtag>::f), z, z, x); - CYBOZU_BENCH("g:kara ", (MulPre<N, Gtag>::karatsuba), z, z, x); + CYBOZU_BENCH("g:mulPre ", (MulPreCore<N, Gtag>::f), z, z, x); + CYBOZU_BENCH("g:mulKara", (MulPre<N, Gtag>::karatsuba), z, z, x); + CYBOZU_BENCH("g:sqrPre ", (SqrPreCore<N, Gtag>::f), z, z); + CYBOZU_BENCH("g:sqrKara", (SqrPre<N, Gtag>::karatsuba), z, z); #ifdef MCL_USE_LLVM - CYBOZU_BENCH("l:mulpre", (MulPreCore<N, Ltag>::f), z, z, x); - CYBOZU_BENCH("l:kara ", (MulPre<N, Ltag>::karatsuba), z, z, x); + CYBOZU_BENCH("l:mulPre ", (MulPreCore<N, Ltag>::f), z, z, x); + CYBOZU_BENCH("l:mulKara", (MulPre<N, Ltag>::karatsuba), z, z, x); + CYBOZU_BENCH("l:sqrPre ", (SqrPreCore<N, Ltag>::f), z, z); + CYBOZU_BENCH("l:sqrKara", (SqrPre<N, Ltag>::karatsuba), z, z); #endif } diff --git a/src/fp_llvm.hpp b/src/fp_llvm.hpp index 7c295d3..13c5df7 100644 --- a/src/fp_llvm.hpp +++ b/src/fp_llvm.hpp @@ -4,7 +4,8 @@ namespace mcl { namespace fp { template<> struct EnableKaratsuba<Ltag> { - static const size_t minN = 8; /* use karatsuba if N >= 8 */ + static const size_t minMulN = 8; + static const size_t minSqrN = 6; }; #define MCL_DEF_LLVM_FUNC(n) \ diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index 6c54a74..7e19710 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -93,11 +93,6 @@ struct Neg { template<size_t N, class Tag> const void3u Neg<N, Tag>::f = Neg<N, Tag>::func; -static inline void mulPreGmp(Unit *z, const Unit *x, const Unit *y, size_t N) -{ - mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, (int)N); -} - // z[N * 2] <- x[N] * y[N] template<size_t N, class Tag = Gtag> struct MulPreCore { @@ -113,8 +108,11 @@ const void3u MulPreCore<N, Tag>::f = MulPreCore<N, Tag>::func; template<class Tag = Gtag> struct EnableKaratsuba { - static const size_t minN = 100; /* always use mpn_mul_n for Gtag */ + /* always use mpn* for Gtag */ + static const size_t minMulN = 100; + static const size_t minSqrN = 100; }; + template<size_t N, class Tag = Gtag> struct MulPre { /* @@ -154,7 +152,7 @@ struct MulPre { static inline void func(Unit *z, const Unit *x, const Unit *y) { #if 1 - if (N >= EnableKaratsuba<Tag>::minN && (N % 2) == 0) { + if (N >= EnableKaratsuba<Tag>::minMulN && (N % 2) == 0) { karatsuba(z, x, y); return; } @@ -184,15 +182,50 @@ struct MulPre<1, Tag> { // z[N * 2] <- x[N] * x[N] template<size_t N, class Tag = Gtag> -struct SqrPre { +struct SqrPreCore { static inline void func(Unit *y, const Unit *x) { - return mpn_sqr((mp_limb_t*)y, (const mp_limb_t*)x, N); + mpn_sqr((mp_limb_t*)y, (const mp_limb_t*)x, N); } static const void2u f; }; template<size_t N, class Tag> +const void2u SqrPreCore<N, Tag>::f = SqrPreCore<N, Tag>::func; + +template<size_t N, class Tag = Gtag> +struct SqrPre { + /* + W = 1 << H + x = aW + b + x^2 = aaW^2 + 2abW + bb + */ + static inline void karatsuba(Unit *z, const Unit *x) + { + const size_t H = N / 2; + SqrPre<H, Tag>::f(z, x); // b^2 + SqrPre<H, Tag>::f(z + N, x + H); // a^2 + Unit ab[N]; + MulPre<H, Tag>::f(ab, x, x + H); // ab + Unit c = AddPre<N, Tag>::f(ab, ab, ab); + c += AddPre<N, Tag>::f(z + H, z + H, ab); + if (c) { + AddUnitPre<Tag>::f(z + N + H, H, c); + } + } + static inline void func(Unit *y, const Unit *x) + { +#if 1 + if (N >= EnableKaratsuba<Tag>::minSqrN && (N % 2) == 0) { + karatsuba(y, x); + return; + } +#endif + SqrPreCore<N, Tag>::f(y, x); + } + static const void2u f; +}; +template<size_t N, class Tag> const void2u SqrPre<N, Tag>::f = SqrPre<N, Tag>::func; // z[N + 1] <- x[N] * y |