aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2018-10-31 15:43:22 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2018-10-31 15:43:22 +0800
commit8ac1d066e4c42d5bd8a9658c94a505204eae0ce8 (patch)
treeb4ad65936137816bd23f9557422bf6dab5e83eb7
parenta1011661126e99f4f407956baccd2865a27c6f41 (diff)
downloadtangerine-mcl-8ac1d066e4c42d5bd8a9658c94a505204eae0ce8.tar.gz
tangerine-mcl-8ac1d066e4c42d5bd8a9658c94a505204eae0ce8.tar.zst
tangerine-mcl-8ac1d066e4c42d5bd8a9658c94a505204eae0ce8.zip
sqrPre4 + mod is slower mul4
-rw-r--r--src/fp_generator.hpp16
-rw-r--r--test/bench.hpp4
-rw-r--r--test/fp_tower_test.cpp1
3 files changed, 15 insertions, 6 deletions
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index d69c531..05ab384 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1160,7 +1160,7 @@ private:
void gen_sqr()
{
if (op_->primeMode == PM_NIST_P192) {
- StackFrame sf(this, 3, 10 | UseRDX, 8 * 6);
+ StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre3(rsp, sf.p[1], t);
@@ -1170,6 +1170,18 @@ private:
gen_montSqr3();
return;
}
+#if 0 // (sqrPre + mod) is slower than mul
+ if (pn_ == 4 && useMulx_) {
+ StackFrame sf(this, 3, 10 | UseRDX, 8 * 8);
+ Pack t = sf.t;
+ t.append(sf.p[2]);
+ sqrPre4(rsp, sf.p[1], t);
+ mov(gp0, sf.p[0]);
+ mov(gp1, rsp);
+ call(fpDbl_modL);
+ return;
+ }
+#endif
// sqr(y, x) = mul(y, x, x)
#ifdef XBYAK64_WIN
mov(r8, rdx);
@@ -2129,7 +2141,6 @@ private:
sqrPre3(sf.p[0], sf.p[1], t);
return func;
}
-#if 1
if (pn_ == 4 && useMulx_) {
StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t;
@@ -2137,7 +2148,6 @@ private:
sqrPre4(sf.p[0], sf.p[1], t);
return func;
}
-#endif
if (pn_ == 6 && useMulx_ && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
Pack t = sf.t;
diff --git a/test/bench.hpp b/test/bench.hpp
index 1ca9e5c..84dd7ea 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -44,8 +44,8 @@ void testBench(const G1& P, const G2& Q)
CYBOZU_BENCH_C("Fp::add ", C3, Fp::add, x, x, y);
CYBOZU_BENCH_C("Fp::sub ", C3, Fp::sub, x, x, y);
CYBOZU_BENCH_C("Fp::neg ", C3, Fp::neg, x, x);
- CYBOZU_BENCH_C("Fp::mul ", C3, Fp::mul, x, x, y);
- CYBOZU_BENCH_C("Fp::sqr ", C3, Fp::sqr, x, x);
+ CYBOZU_BENCH_C("Fp::mul ", 1000000, Fp::mul, x, x, y);
+ CYBOZU_BENCH_C("Fp::sqr ", 1000000, Fp::sqr, x, x);
CYBOZU_BENCH_C("Fp::inv ", C3, Fp::inv, x, x);
Fp2 xx, yy;
xx.a = x;
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index 25f51e3..dc50a84 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -336,7 +336,6 @@ void testFpDbl()
tx.getMpz(mtx);
mo = mtx * mtx;
}
-std::cout << std::hex;
CYBOZU_TEST_EQUAL(mz, mo);
FpDbl::mod(z, d);