aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/mcl/fp_tower.hpp21
-rw-r--r--include/mcl/op.hpp2
-rw-r--r--src/fp_generator.hpp17
3 files changed, 35 insertions, 5 deletions
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 052b550..3eec138 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -242,9 +242,9 @@ public:
static void (*neg)(Fp2T& y, const Fp2T& x);
static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
static void (*sqr)(Fp2T& y, const Fp2T& x);
+ static void (*mul_xi)(Fp2T& y, const Fp2T& x);
static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); }
static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); }
- static void mul_xi(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_mul_xi(y.a.v_, x.a.v_); }
static void divBy2(Fp2T& y, const Fp2T& x)
{
Fp::divBy2(y.a, x.a);
@@ -405,11 +405,21 @@ public:
sqr = (void (*)(Fp2T& y, const Fp2T& x))op.fp2_sqrA_;
if (sqr == 0) sqr = fp2_sqrC;
op.fp2_inv = fp2_invW;
- if (xi_a == 1) {
- op.fp2_mul_xi = fp2_mul_xi_1_1i;
- } else {
- op.fp2_mul_xi = fp2_mul_xiW;
+ if (op.fp2_mul_xi == 0) {
+ if (xi_a == 1) {
+ /*
+ current fp_generator.hpp generates mul_xi for xi_a = 1
+ */
+ if (op.fp2_mul_xiA_) {
+ op.fp2_mul_xi = op.fp2_mul_xiA_;
+ } else {
+ op.fp2_mul_xi = fp2_mul_xi_1_1i;
+ }
+ } else {
+ op.fp2_mul_xi = fp2_mul_xiW;
+ }
}
+ mul_xi = (void (*)(Fp2T&, const Fp2T&))op.fp2_mul_xi;
const Fp2T xi(xi_a, 1);
const mpz_class& p = Fp::getOp().mp;
Fp2T::pow(g[0], xi, (p - 1) / 6); // g = xi^((p-1)/6)
@@ -621,6 +631,7 @@ template<class Fp_> void (*Fp2T<Fp_>::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y
template<class Fp_> void (*Fp2T<Fp_>::neg)(Fp2T& y, const Fp2T& x);
template<class Fp_> void (*Fp2T<Fp_>::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
template<class Fp_> void (*Fp2T<Fp_>::sqr)(Fp2T& y, const Fp2T& x);
+template<class Fp_> void (*Fp2T<Fp_>::mul_xi)(Fp2T& y, const Fp2T& x);
template<class Fp>
struct Fp2DblT {
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index df6dc4b..0c61643 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -227,6 +227,7 @@ struct Op {
void4u fp2_mulNF;
void2u fp2_inv;
void2u fp2_mul_xi;
+ void2u fp2_mul_xiA_;
uint32_t (*hash)(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
PrimeMode primeMode;
@@ -306,6 +307,7 @@ struct Op {
fp2_mulNF = 0;
fp2_inv = 0;
fp2_mul_xi = 0;
+ fp2_mul_xiA_ = 0;
primeMode = PM_GENERIC;
isFullBit = false;
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 564391c..d1b48d2 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -386,6 +386,9 @@ struct Code : Xbyak::CodeGenerator {
align(16);
op.fp2_sqrA_ = getCurr<void2u>();
gen_fp2_sqr4();
+ align(16);
+ op.fp2_mul_xiA_ = getCurr<void2u>();
+ gen_fp2_mul_xi4();
}
}
void gen_addSubPre(bool isAdd, int n)
@@ -2873,6 +2876,20 @@ private:
gen_raw_fp_sub(sf.p[0], sf.p[1], sf.p[2], sf.t, false);
gen_raw_fp_sub(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false);
}
+ /*
+ for only xi_a = 1
+ */
+ void gen_fp2_mul_xi4()
+ {
+ assert(!isFullBit_);
+ StackFrame sf(this, 2, 8, 8 * 4);
+ gen_raw_fp_add(rsp, sf.p[1], sf.p[1] + FpByte_, sf.t, false);
+ gen_raw_fp_sub(sf.p[0], sf.p[1], sf.p[1] + FpByte_, sf.t, false);
+ for (int i = 0; i < 4; i++) {
+ mov(rax, ptr [rsp + i * 8]);
+ mov(ptr[sf.p[0] + FpByte_ + i * 8], rax);
+ }
+ }
void gen_fp2_neg4()
{
assert(!isFullBit_);