diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-30 11:39:08 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-30 11:39:08 +0800 |
commit | f9026ec77bed29eb2f7b693a9a60d4467b93e35c (patch) | |
tree | c2f7c7dce10158dd738f983143493a1ec169cba8 | |
parent | d9fda466a986110c9fc57a6a6e9de822f4286686 (diff) | |
download | dexon-mcl-f9026ec77bed29eb2f7b693a9a60d4467b93e35c.tar.gz dexon-mcl-f9026ec77bed29eb2f7b693a9a60d4467b93e35c.tar.zst dexon-mcl-f9026ec77bed29eb2f7b693a9a60d4467b93e35c.zip |
split MulPre and MulPreCore
-rw-r--r-- | misc/karatsuba.cpp | 34 | ||||
-rw-r--r-- | src/fp_proto.hpp | 45 |
2 files changed, 40 insertions, 39 deletions
diff --git a/misc/karatsuba.cpp b/misc/karatsuba.cpp index 8c05378..7edd4a8 100644 --- a/misc/karatsuba.cpp +++ b/misc/karatsuba.cpp @@ -39,10 +39,11 @@ void benchKaratsuba() Unit x[N], z[N * 2]; rg.read(x, N); rg.read(z, N); - CYBOZU_BENCH("gmp ", (MulPre<N, Gtag>::f), z, z, x); - CYBOZU_BENCH("gmp-k ", (MulPre<N, Gtag>::karatsuba), z, z, x); - CYBOZU_BENCH("llvm ", (MulPre<N, Ltag>::f), z, z, x); - CYBOZU_BENCH("llvm-k", (MulPre<N, Ltag>::karatsuba), z, z, x); + CYBOZU_BENCH("g:mulpre", (MulPreCore<N, Gtag>::f), z, z, x); + CYBOZU_BENCH("g:kara ", (MulPre<N, Gtag>::karatsuba), z, z, x); + + CYBOZU_BENCH("l:mulpre", (MulPreCore<N, Ltag>::f), z, z, x); + CYBOZU_BENCH("l:kara ", (MulPre<N, Ltag>::karatsuba), z, z, x); } CYBOZU_TEST_AUTO(karatsuba) @@ -56,20 +57,23 @@ CYBOZU_TEST_AUTO(karatsuba) #endif } +#if 0 CYBOZU_TEST_AUTO(mulPre) { cybozu::XorShift rg; // const char *p = "0x2523648240000001ba344d80000000086121000000000013a700000000000013"; // const char *p = "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff"; -// Fp::init(p, mcl::fp::FP_LLVM); -// const mcl::fp::Op& op = Fp::getOp(); - const size_t N = 10; - Unit x[N], y[N], z[N * 2], w[N * 2]; - for (int i = 0; i < 10; i++) { - rg.read(x, N); - rg.read(y, N); - mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); - MulPre<N, Gtag>::karatsuba(w, x, y); - CYBOZU_TEST_EQUAL_ARRAY(z, w, N * 2); - } +// const char *p = "4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603471"; // 640 bit + const char *p = "1552518092300708935148979488462502555256886017116696611139052038026050952686376886330878408828646477950487730697131073206171580044114814391444287275041181139204454976020849905550265285631598444825262999193716468750892846853816057031"; // 768 bit + Fp::init(p, mcl::fp::FP_LLVM); + const mcl::fp::Op& op = Fp::getOp(); + const size_t N = 12; + Unit x[N], y[N]; + rg.read(x, N); + rg.read(y, N); + CYBOZU_BENCH("g:mul ", (Mul<N, Gtag>::f), y, y, x, op.p); + CYBOZU_BENCH("g:mont", (Mont<N, Gtag>::f), y, y, x, op.p); + CYBOZU_BENCH("l:mul ", (Mul<N, Ltag>::f), y, y, x, op.p); + CYBOZU_BENCH("l:mont", (Mont<N, Ltag>::f), y, y, x, op.p); } +#endif diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index 67faff3..82dd6b3 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -49,7 +49,7 @@ struct AddPre { static const u3u f; }; template<size_t N, class Tag> -const u3u AddPre<N, Tag>::f = &AddPre<N, Tag>::func; +const u3u AddPre<N, Tag>::f = AddPre<N, Tag>::func; // (carry, x[N]) <- x[N] + y template<class Tag = Gtag> @@ -61,7 +61,7 @@ struct AddUnitPre { static const u1uII f; }; template<class Tag> -const u1uII AddUnitPre<Tag>::f = &AddUnitPre<Tag>::func; +const u1uII AddUnitPre<Tag>::f = AddUnitPre<Tag>::func; // (carry, z[N]) <- x[N] - y[N] template<size_t N, class Tag = Gtag> @@ -74,7 +74,7 @@ struct SubPre { }; template<size_t N, class Tag> -const u3u SubPre<N, Tag>::f = &SubPre<N, Tag>::func; +const u3u SubPre<N, Tag>::f = SubPre<N, Tag>::func; // y[N] <- (-x[N]) % p[N] template<size_t N, class Tag = Gtag> @@ -97,8 +97,21 @@ static inline void mulPreGmp(Unit *z, const Unit *x, const Unit *y, size_t N) { mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, (int)N); } + // z[N * 2] <- x[N] * y[N] template<size_t N, class Tag = Gtag> +struct MulPreCore { + static inline void func(Unit *z, const Unit *x, const Unit *y) + { + mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, (int)N); + } + static const void3u f; +}; + +template<size_t N, class Tag> +const void3u MulPreCore<N, Tag>::f = MulPreCore<N, Tag>::func; + +template<size_t N, class Tag = Gtag> struct MulPre { /* W = 1 << H @@ -109,14 +122,14 @@ struct MulPre { static inline void karatsuba(Unit *z, const Unit *x, const Unit *y) { const size_t H = N / 2; - MulPre<H, Tag>::f(z, x, y); // bd - MulPre<H, Tag>::f(z + N, x + H, y + H); // ac + MulPreCore<H, Tag>::f(z, x, y); // bd + MulPreCore<H, Tag>::f(z + N, x + H, y + H); // ac Unit a_b[H]; Unit c_d[H]; Unit c1 = AddPre<H, Tag>::f(a_b, x, x + H); // a + b Unit c2 = AddPre<H, Tag>::f(c_d, y, y + H); // c + d Unit tmp[N]; - MulPre<H, Tag>::f(tmp, a_b, c_d); + MulPreCore<H, Tag>::f(tmp, a_b, c_d); Unit c = c1 & c2; if (c1) { c += AddPre<H, Tag>::f(tmp + H, tmp + H, c_d); @@ -134,13 +147,12 @@ struct MulPre { static inline void func(Unit *z, const Unit *x, const Unit *y) { #if 0 - if (N == 0) return; - if (N >= 6 && (N % 2) == 0) { + if (N >= 8 && (N % 2) == 0) { karatsuba(z, x, y); return; } #endif - mulPreGmp(z, x, y, N); + MulPreCore<N, Tag>::f(z, x, y); } static const void3u f; }; @@ -148,21 +160,6 @@ struct MulPre { template<size_t N, class Tag> const void3u MulPre<N, Tag>::f = MulPre<N, Tag>::func; -#if 0 -template<class Tag> -struct MulPre<0, Tag> { - static inline void f(Unit*, const Unit*, const Unit*){} -}; - -template<class Tag> -struct MulPre<1, Tag> { - static inline void f(Unit* z, const Unit* x, const Unit* y) - { - mulPreGmp(z, x, y, 1); - } -}; -#endif - // z[N * 2] <- x[N] * x[N] template<size_t N, class Tag = Gtag> struct SqrPre { |