aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2017-10-04 12:18:47 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2017-10-04 12:18:47 +0800
commitbade8efc64a69b040f51c44dbc1e0d495100498b (patch)
treeb3d4551c7efbaa777bc90c6c63b98779a51890d4
parent0bfdb500a0024c3bb1eab2af1ab38808a827d5e9 (diff)
downloaddexon-mcl-bade8efc64a69b040f51c44dbc1e0d495100498b.tar.gz
dexon-mcl-bade8efc64a69b040f51c44dbc1e0d495100498b.tar.zst
dexon-mcl-bade8efc64a69b040f51c44dbc1e0d495100498b.zip
[bn] mul_024 uses mulOpt
-rw-r--r--include/mcl/bn.hpp127
-rw-r--r--test/bn_test.cpp9
2 files changed, 117 insertions, 19 deletions
diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 8a7e421..b6ca557 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -577,6 +577,7 @@ struct BNT {
typedef mcl::EcT<Fp> G1;
typedef mcl::EcT<Fp2> G2;
typedef HaveFrobenius<G2> G2withF;
+ typedef mcl::FpDblT<Fp> FpDbl;
typedef mcl::Fp2DblT<Fp> Fp2Dbl;
typedef ParamT<Fp> Param;
static Param param;
@@ -788,6 +789,19 @@ struct BNT {
y.a.c = x.c;
y.b.b = x.b;
}
+ static void Fp2Dbl_mulOpt(Fp2Dbl& Z, const Fp2& x, const Fp2& y)
+ {
+ FpDbl D0;
+ Fp s, t;
+ Fp::addPre(s, x.a, x.b);
+ Fp::addPre(t, y.a, y.b);
+ FpDbl::mulPre(D0, x.b, y.b);
+ FpDbl::mulPre(Z.a, x.a, y.a);
+ FpDbl::mulPre(Z.b, s, t);
+ FpDbl::subPre(Z.b, Z.b, Z.a);
+ FpDbl::subPre(Z.b, Z.b, D0);
+ FpDbl::sub(Z.a, Z.a, D0);
+ }
/*
x = (x0 + x1 + x2^2) + (x3 + x4v + x5v^2)w
y = (y0, y4, y2) -> (y0, 0, y2, 0, y4, 0)
@@ -798,6 +812,84 @@ struct BNT {
x2y2 + x5y4 = (x2 + x5)(y2 + y4) - x2y4 - x5y2
x0y2 + x3y4 = (x0 + x3)(y2 + y4) - x0y4 - x3y2
*/
+ // mul_024(z, z(=y), x)
+ static void mul_024(Fp12& z, const Fp6& x)
+ {
+ Fp2& z0 = z.a.a;
+ Fp2& z1 = z.a.b;
+ Fp2& z2 = z.a.c;
+ Fp2& z3 = z.b.a;
+ Fp2& z4 = z.b.b;
+ Fp2& z5 = z.b.c;
+ const Fp2& x0 = x.a;
+ const Fp2& x2 = x.c;
+ const Fp2& x4 = x.b;
+ Fp2 t0, t1, t2;
+ Fp2 s0;
+ Fp2Dbl T3, T4;
+ Fp2Dbl D0, D2, D4;
+ Fp2Dbl S1;
+ Fp2Dbl_mulOpt(D0, z0, x0);
+ Fp2Dbl_mulOpt(D2, z2, x2);
+ Fp2Dbl_mulOpt(D4, z4, x4);
+ Fp2::add(t2, z0, z4);
+ Fp2::add(t1, z0, z2);
+ Fp2::add(s0, z1, z3);
+ s0 += z5;
+ // For z.a.a = z0.
+ Fp2Dbl_mulOpt(S1, z1, x2);
+ Fp2Dbl::add(T3, S1, D4);
+ Fp2Dbl::mul_xi(T4, T3);
+ T4 += D0;
+ Fp2Dbl::mod(z0, T4);
+ // For z.a.b = z1.
+ Fp2Dbl_mulOpt(T3, z5, x4);
+ S1 += T3;
+ T3 += D2;
+ Fp2Dbl::mul_xi(T4, T3);
+ Fp2Dbl_mulOpt(T3, z1, x0);
+ S1 += T3;
+ T4 += T3;
+ Fp2Dbl::mod(z1, T4);
+ // For z.a.c = z2.
+ Fp2::add(t0, x0, x2);
+ Fp2Dbl_mulOpt(T3, t1, t0);
+ T3 -= D0;
+ T3 -= D2;
+ Fp2Dbl_mulOpt(T4, z3, x4);
+ S1 += T4;
+ T3 += T4;
+ // z3 needs z2.
+ // For z.b.a = z3.
+ Fp2::add(t0, z2, z4);
+ Fp2Dbl::mod(z2, T3);
+ Fp2::add(t1, x2, x4);
+ Fp2Dbl_mulOpt(T3, t0, t1);
+ T3 -= D2;
+ T3 -= D4;
+ Fp2Dbl::mul_xi(T4, T3);
+ Fp2Dbl_mulOpt(T3, z3, x0);
+ S1 += T3;
+ T4 += T3;
+ Fp2Dbl::mod(z3, T4);
+ // For z.b.b = z4.
+ Fp2Dbl_mulOpt(T3, z5, x2);
+ S1 += T3;
+ Fp2Dbl::mul_xi(T4, T3);
+ Fp2::add(t0, x0, x4);
+ Fp2Dbl_mulOpt(T3, t2, t0);
+ T3 -= D0;
+ T3 -= D4;
+ T4 += T3;
+ Fp2Dbl::mod(z4, T4);
+ // For z.b.c = z5.
+ Fp2::add(t0, x0, x2);
+ t0 += x4;
+ Fp2Dbl_mulOpt(T3, s0, t0);
+ T3 -= S1;
+ Fp2Dbl::mod(z5, T3);
+ }
+#if 0
static void mul_024(Fp12& z, const Fp12&x, const Fp6& y)
{
#if 1
@@ -828,42 +920,42 @@ struct BNT {
Fp2::add(x0_add_x3, x0, x3);
Fp2Dbl T1, T2;
Fp2Dbl::mulPre(T1, x1_add_x4, y2_add_y4);
- Fp2Dbl::sub(T1, T1, X1Y4);
- Fp2Dbl::sub(T1, T1, X4Y2);
+ T1 -= X1Y4;
+ T1 -= X4Y2;
Fp2Dbl::mul_xi(T1, T1);
Fp2Dbl::mulPre(T2, x0, y0);
- Fp2Dbl::add(T1, T1, T2);
+ T1 += T2;
Fp2Dbl::mod(z.a.a, T1);
Fp2Dbl::mulPre(T1, x2_add_x5, y2_add_y4);
- Fp2Dbl::sub(T1, T1, X2Y4);
- Fp2Dbl::sub(T1, T1, X5Y2);
+ T1 -= X2Y4;
+ T1 -= X5Y2;
Fp2Dbl::mul_xi(T1, T1);
Fp2Dbl::mulPre(T2, x1, y0);
Fp2Dbl::add(T1, T1, T2);
Fp2Dbl::mod(z.a.b, T1);
Fp2Dbl::mulPre(T1, x0_add_x3, y2_add_y4);
- Fp2Dbl::sub(T1, T1, X0Y4);
- Fp2Dbl::sub(T1, T1, X3Y2);
+ T1 -= X0Y4;
+ T1 -= X3Y2;
Fp2Dbl::mulPre(T2, x2, y0);
- Fp2Dbl::add(T1, T1, T2);
+ T1 += T2;
Fp2Dbl::mod(z.a.c, T1);
Fp2Dbl::add(T1, X2Y4, X4Y2);
Fp2Dbl::mul_xi(T1, T1);
Fp2Dbl::mulPre(T2, x3, y0);
- Fp2Dbl::add(T1, T1, T2);
+ T1 += T2;
Fp2Dbl::mod(z.b.a, T1);
Fp2Dbl::mul_xi(T1, X5Y2);
Fp2Dbl::mulPre(T2, x4, y0);
- Fp2Dbl::add(T2, T2, X0Y4);
- Fp2Dbl::add(T1, T1, T2);
+ T2 += X0Y4;
+ T1 += T2;
Fp2Dbl::mod(z.b.b, T1);
Fp2Dbl::mulPre(T1, x5, y0);
- Fp2Dbl::add(T1, T1, X3Y2);
- Fp2Dbl::add(T1, T1, X1Y4);
+ T1 += X3Y2;
+ T1 += X1Y4;
Fp2Dbl::mod(z.b.c, T1);
#else
const Fp2 x0 = x.a.a;
@@ -926,6 +1018,7 @@ struct BNT {
z.b.c += x1y4;
#endif
}
+#endif
static void mul_024_024(Fp12& z, const Fp6& x, const Fp6& y)
{
Fp12 x2, y2;
@@ -1323,14 +1416,14 @@ struct BNT {
for (size_t i = 2; i < param.siTbl.size(); i++) {
dblLine(l, T, P);
Fp12::sqr(f, f);
- mul_024(f, f, l);
+ mul_024(f, l);
if (param.siTbl[i]) {
if (param.siTbl[i] > 0) {
addLine(l, T, Q, P);
} else {
addLine(l, T, negQ, P);
}
- mul_024(f, f, l);
+ mul_024(f, l);
}
}
G2 Q1, Q2;
@@ -1427,11 +1520,11 @@ struct BNT {
mulFp6cb_by_G1xy(l, Qcoeff[idx], P);
idx++;
Fp12::sqr(f, f);
- mul_024(f, f, l);
+ mul_024(f, l);
if (param.siTbl[i]) {
mulFp6cb_by_G1xy(l, Qcoeff[idx], P);
idx++;
- mul_024(f, f, l);
+ mul_024(f, l);
}
}
if (param.z < 0) {
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index 4ce2e5a..4938662 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -245,8 +245,8 @@ void testPairing(const G1& P, const G2& Q, const char *eStr)
}
CYBOZU_TEST_EQUAL(e1, e2);
#ifdef ONLY_BENCH
-// for (int i = 0; i < 1000; i++) BN::pairing(e1, P, Q);
- CYBOZU_BENCH_C("pairing", 1000, BN::pairing, e1, P, Q); // 2.4Mclk
+ for (int i = 0; i < 1000; i++) BN::pairing(e1, P, Q);
+// CYBOZU_BENCH_C("pairing", 1000, BN::pairing, e1, P, Q); // 2.4Mclk
#else
{
Fp12 e = e1, ea;
@@ -360,6 +360,11 @@ CYBOZU_TEST_AUTO(naive)
initPairing(ts.cp, g_mode);
G1 P(ts.g1.a, ts.g1.b);
G2 Q(Fp2(ts.g2.aa, ts.g2.ab), Fp2(ts.g2.ba, ts.g2.bb));
+#if 0
+ testPairing(P, Q, ts.e);
+clk.put();
+return;
+#endif
testFp12pow(P, Q);
testIo(P, Q);
testTrivial(P, Q);