diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-30 13:38:37 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-30 13:38:37 +0800 |
commit | f21ac0e75b1b8531f804a4fd0567e9ff2354f4be (patch) | |
tree | dea4e84d0392f5a2e9f1b0a16a6d9e65d2b76bcf | |
parent | f9026ec77bed29eb2f7b693a9a60d4467b93e35c (diff) | |
download | dexon-mcl-f21ac0e75b1b8531f804a4fd0567e9ff2354f4be.tar.gz dexon-mcl-f21ac0e75b1b8531f804a4fd0567e9ff2354f4be.tar.zst dexon-mcl-f21ac0e75b1b8531f804a4fd0567e9ff2354f4be.zip |
use karatsuba for N >= 8
-rw-r--r-- | include/mcl/op.hpp | 2 | ||||
-rw-r--r-- | misc/karatsuba.cpp | 24 | ||||
-rw-r--r-- | misc/mul.cpp | 55 | ||||
-rw-r--r-- | src/fp_proto.hpp | 17 |
4 files changed, 76 insertions, 22 deletions
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 920c74c..b858ad9 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -61,7 +61,7 @@ enum Mode { enum PrimeMode { PM_GENERIC = 0, PM_NICT_P192, - PM_NICT_P521, + PM_NICT_P521 }; struct Op { diff --git a/misc/karatsuba.cpp b/misc/karatsuba.cpp index 7edd4a8..b62b3b6 100644 --- a/misc/karatsuba.cpp +++ b/misc/karatsuba.cpp @@ -6,7 +6,9 @@ #include <mcl/fp.hpp> #include <cybozu/xorshift.hpp> #include "../src/fp_proto.hpp" +#ifdef MCL_USE_LLVM #include "../src/fp_llvm.hpp" +#endif #include <cybozu/test.hpp> #include <cybozu/benchmark.hpp> @@ -42,8 +44,10 @@ void benchKaratsuba() CYBOZU_BENCH("g:mulpre", (MulPreCore<N, Gtag>::f), z, z, x); CYBOZU_BENCH("g:kara ", (MulPre<N, Gtag>::karatsuba), z, z, x); +#ifdef MCL_USE_LLVM CYBOZU_BENCH("l:mulpre", (MulPreCore<N, Ltag>::f), z, z, x); CYBOZU_BENCH("l:kara ", (MulPre<N, Ltag>::karatsuba), z, z, x); +#endif } CYBOZU_TEST_AUTO(karatsuba) @@ -57,23 +61,3 @@ CYBOZU_TEST_AUTO(karatsuba) #endif } -#if 0 -CYBOZU_TEST_AUTO(mulPre) -{ - cybozu::XorShift rg; -// const char *p = "0x2523648240000001ba344d80000000086121000000000013a700000000000013"; -// const char *p = "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff"; -// const char *p = "4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603471"; // 640 bit - const char *p = "1552518092300708935148979488462502555256886017116696611139052038026050952686376886330878408828646477950487730697131073206171580044114814391444287275041181139204454976020849905550265285631598444825262999193716468750892846853816057031"; // 768 bit - Fp::init(p, mcl::fp::FP_LLVM); - const mcl::fp::Op& op = Fp::getOp(); - const size_t N = 12; - Unit x[N], y[N]; - rg.read(x, N); - rg.read(y, N); - CYBOZU_BENCH("g:mul ", (Mul<N, Gtag>::f), y, y, x, op.p); - CYBOZU_BENCH("g:mont", (Mont<N, Gtag>::f), y, y, x, op.p); - CYBOZU_BENCH("l:mul ", (Mul<N, Ltag>::f), y, y, x, op.p); - CYBOZU_BENCH("l:mont", (Mont<N, Ltag>::f), y, y, x, op.p); -} -#endif diff --git a/misc/mul.cpp b/misc/mul.cpp new file mode 100644 index 0000000..f43adb9 --- /dev/null +++ b/misc/mul.cpp @@ -0,0 +1,55 @@ +/* + sudo cpufreq-set -c 0 -g performance + mycl mul.cpp -DMCL_USE_LLVM=1 ../lib/libmcl.a && ./a.out +*/ +#include <stdio.h> +#include <mcl/fp.hpp> +#include <cybozu/xorshift.hpp> +#include <cybozu/test.hpp> +#include <cybozu/benchmark.hpp> + +typedef mcl::FpT<> Fp; + +using namespace mcl::fp; + +void dump(const Unit *x, size_t N) +{ + for (size_t i = 0; i < N; i++) { + printf("%016llx ", (long long)x[N - 1 - i]); + } + printf("\n"); +} + +CYBOZU_TEST_AUTO(mulPre) +{ + cybozu::XorShift rg; + const char *pTbl[] = { + "0x2523648240000001ba344d80000000086121000000000013a700000000000013", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", + "4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603471", // 640 bit + "1552518092300708935148979488462502555256886017116696611139052038026050952686376886330878408828646477950487730697131073206171580044114814391444287275041181139204454976020849905550265285631598444825262999193716468750892846853816057031", // 768 bit + }; + const size_t N = 12; + const Mode modeTbl[] = { + FP_GMP_MONT, +#ifdef MCL_USE_LLVM + FP_LLVM_MONT, +#endif + }; + for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(modeTbl); j++) { + Mode mode = modeTbl[j]; + printf("%s\n", ModeToStr(mode)); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(pTbl); i++) { + const char *p = pTbl[i]; + Fp::init(p, mode); + printf("bitSize=%d\n", (int)Fp::getBitSize()); + const Op& op = Fp::getOp(); + Unit x[N], y[N * 2]; + rg.read(x, N); + rg.read(y, N * 2); + CYBOZU_BENCH("mul ", op.fp_mul, y, y, x, op.p); + CYBOZU_BENCH("mulPre", op.fpDbl_mulPre, y, y, x); + CYBOZU_BENCH("mod ", op.fpDbl_mod, y, y, op.p); + } + } +} diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index 82dd6b3..2139b68 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -146,7 +146,7 @@ struct MulPre { } static inline void func(Unit *z, const Unit *x, const Unit *y) { -#if 0 +#if 1 if (N >= 8 && (N % 2) == 0) { karatsuba(z, x, y); return; @@ -160,6 +160,21 @@ struct MulPre { template<size_t N, class Tag> const void3u MulPre<N, Tag>::f = MulPre<N, Tag>::func; +static inline void MulPre0(Unit*, const Unit*, const Unit*) {} + +template<class Tag> +struct MulPre<0, Tag> { + static inline void f(Unit*, const Unit*, const Unit*) {} +}; + +template<class Tag> +struct MulPre<1, Tag> { + static inline void f(Unit* z, const Unit* x, const Unit* y) + { + MulPreCore<1, Tag>::f(z, x, y); + } +}; + // z[N * 2] <- x[N] * x[N] template<size_t N, class Tag = Gtag> struct SqrPre { |