diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2018-10-31 15:43:22 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2018-10-31 15:43:22 +0800 |
commit | 8ac1d066e4c42d5bd8a9658c94a505204eae0ce8 (patch) | |
tree | b4ad65936137816bd23f9557422bf6dab5e83eb7 | |
parent | a1011661126e99f4f407956baccd2865a27c6f41 (diff) | |
download | tangerine-mcl-8ac1d066e4c42d5bd8a9658c94a505204eae0ce8.tar.gz tangerine-mcl-8ac1d066e4c42d5bd8a9658c94a505204eae0ce8.tar.zst tangerine-mcl-8ac1d066e4c42d5bd8a9658c94a505204eae0ce8.zip |
sqrPre4 + mod is slower mul4
-rw-r--r-- | src/fp_generator.hpp | 16 | ||||
-rw-r--r-- | test/bench.hpp | 4 | ||||
-rw-r--r-- | test/fp_tower_test.cpp | 1 |
3 files changed, 15 insertions, 6 deletions
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index d69c531..05ab384 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -1160,7 +1160,7 @@ private: void gen_sqr() { if (op_->primeMode == PM_NIST_P192) { - StackFrame sf(this, 3, 10 | UseRDX, 8 * 6); + StackFrame sf(this, 3, 10 | UseRDX, 6 * 8); Pack t = sf.t; t.append(sf.p[2]); sqrPre3(rsp, sf.p[1], t); @@ -1170,6 +1170,18 @@ private: gen_montSqr3(); return; } +#if 0 // (sqrPre + mod) is slower than mul + if (pn_ == 4 && useMulx_) { + StackFrame sf(this, 3, 10 | UseRDX, 8 * 8); + Pack t = sf.t; + t.append(sf.p[2]); + sqrPre4(rsp, sf.p[1], t); + mov(gp0, sf.p[0]); + mov(gp1, rsp); + call(fpDbl_modL); + return; + } +#endif // sqr(y, x) = mul(y, x, x) #ifdef XBYAK64_WIN mov(r8, rdx); @@ -2129,7 +2141,6 @@ private: sqrPre3(sf.p[0], sf.p[1], t); return func; } -#if 1 if (pn_ == 4 && useMulx_) { StackFrame sf(this, 3, 10 | UseRDX); Pack t = sf.t; @@ -2137,7 +2148,6 @@ private: sqrPre4(sf.p[0], sf.p[1], t); return func; } -#endif if (pn_ == 6 && useMulx_ && useAdx_) { StackFrame sf(this, 3, 10 | UseRDX, 6 * 8); Pack t = sf.t; diff --git a/test/bench.hpp b/test/bench.hpp index 1ca9e5c..84dd7ea 100644 --- a/test/bench.hpp +++ b/test/bench.hpp @@ -44,8 +44,8 @@ void testBench(const G1& P, const G2& Q) CYBOZU_BENCH_C("Fp::add ", C3, Fp::add, x, x, y); CYBOZU_BENCH_C("Fp::sub ", C3, Fp::sub, x, x, y); CYBOZU_BENCH_C("Fp::neg ", C3, Fp::neg, x, x); - CYBOZU_BENCH_C("Fp::mul ", C3, Fp::mul, x, x, y); - CYBOZU_BENCH_C("Fp::sqr ", C3, Fp::sqr, x, x); + CYBOZU_BENCH_C("Fp::mul ", 1000000, Fp::mul, x, x, y); + CYBOZU_BENCH_C("Fp::sqr ", 1000000, Fp::sqr, x, x); CYBOZU_BENCH_C("Fp::inv ", C3, Fp::inv, x, x); Fp2 xx, yy; xx.a = x; diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp index 25f51e3..dc50a84 100644 --- a/test/fp_tower_test.cpp +++ b/test/fp_tower_test.cpp @@ -336,7 +336,6 @@ void testFpDbl() tx.getMpz(mtx); mo = mtx * mtx; } -std::cout << std::hex; CYBOZU_TEST_EQUAL(mz, mo); FpDbl::mod(z, d); |