aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2018-11-05 16:27:50 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2018-11-05 16:40:34 +0800
commitc63c62889e05e88289793b2506bebf4636d0ac74 (patch)
tree2e8d6b618986a3475a1043e06cd151e37fea228a
parentf4b4382433d66d89ced6712ab4edb60957fef009 (diff)
downloadtangerine-mcl-c63c62889e05e88289793b2506bebf4636d0ac74.tar.gz
tangerine-mcl-c63c62889e05e88289793b2506bebf4636d0ac74.tar.zst
tangerine-mcl-c63c62889e05e88289793b2506bebf4636d0ac74.zip
refactor generator
-rw-r--r--src/fp_generator.hpp105
1 files changed, 79 insertions, 26 deletions
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 2dca191..c62ecab 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -330,30 +330,13 @@ private:
gen_preInv();
}
op.fp2_addA_ = gen_fp2_add();
-
- if (op.N == 4 && !isFullBit_) {
- align(16);
- op.fp2_subA_ = getCurr<void3u>();
- gen_fp2_sub4();
- align(16);
- op.fp2_negA_ = getCurr<void2u>();
- gen_fp2_neg4();
- align(16);
- op.fp2Dbl_mulPreA_ = getCurr<void3u>();
- gen_fp2Dbl_mulPre();
- align(16);
- op.fp2Dbl_sqrPreA_ = getCurr<void2u>();
- gen_fp2Dbl_sqrPre();
- align(16);
- op.fp2_mulA_ = getCurr<void3u>();
- gen_fp2_mul4();
- align(16);
- op.fp2_sqrA_ = getCurr<void2u>();
- gen_fp2_sqr4();
- align(16);
- op.fp2_mul_xiA_ = getCurr<void2u>();
- gen_fp2_mul_xi4();
- }
+ op.fp2_subA_ = gen_fp2_sub();
+ op.fp2_negA_ = gen_fp2_neg();
+ op.fp2Dbl_mulPreA_ = gen_fp2Dbl_mulPre();
+ op.fp2Dbl_sqrPreA_ = gen_fp2Dbl_sqrPre();
+ op.fp2_mulA_ = gen_fp2_mul();
+ op.fp2_sqrA_ = gen_fp2_sqr();
+ op.fp2_mul_xiA_ = gen_fp2_mul_xi();
}
u3u gen_addSubPre(bool isAdd, int n)
{
@@ -3405,7 +3388,17 @@ private:
}
}
}
- void gen_fp2Dbl_mulPre()
+ void3u gen_fp2Dbl_mulPre()
+ {
+ align(16);
+ void3u func = getCurr<void3u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2Dbl_mulPre4();
+ return func;
+ }
+ return 0;
+ }
+ void gen_fp2Dbl_mulPre4()
{
assert(!isFullBit_);
const RegExp z = rsp + 0 * 8;
@@ -3458,7 +3451,17 @@ private:
gen_raw_sub(gp0, gp1, gp2, rax, 4);
gen_raw_fp_sub(gp0 + 8 * 4, gp1 + 8 * 4, gp2 + 8 * 4, Pack(gt0, gt1, gt2, gt3, gt4, gt5, gt6, gt7), true);
}
- void gen_fp2Dbl_sqrPre()
+ void2u gen_fp2Dbl_sqrPre()
+ {
+ align(16);
+ void2u func = getCurr<void2u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2Dbl_sqrPre4();
+ return func;
+ }
+ return 0;
+ }
+ void gen_fp2Dbl_sqrPre4()
{
assert(!isFullBit_);
const RegExp y = rsp + 0 * 8;
@@ -3533,6 +3536,16 @@ private:
}
return 0;
}
+ void3u gen_fp2_sub()
+ {
+ align(16);
+ void3u func = getCurr<void3u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2_sub4();
+ return func;
+ }
+ return 0;
+ }
void gen_fp2_sub4()
{
assert(!isFullBit_);
@@ -3540,6 +3553,16 @@ private:
gen_raw_fp_sub(sf.p[0], sf.p[1], sf.p[2], sf.t, false);
gen_raw_fp_sub(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false);
}
+ void2u gen_fp2_mul_xi()
+ {
+ align(16);
+ void2u func = getCurr<void2u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2_mul_xi4();
+ return func;
+ }
+ return 0;
+ }
/*
for only xi_a = 1
y.a = a - b
@@ -3589,6 +3612,16 @@ private:
}
#endif
}
+ void2u gen_fp2_neg()
+ {
+ align(16);
+ void2u func = getCurr<void2u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2_neg4();
+ return func;
+ }
+ return 0;
+ }
void gen_fp2_neg4()
{
assert(!isFullBit_);
@@ -3596,6 +3629,16 @@ private:
gen_raw_neg(sf.p[0], sf.p[1], sf.t);
gen_raw_neg(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.t);
}
+ void3u gen_fp2_mul()
+ {
+ align(16);
+ void3u func = getCurr<void3u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2_mul4();
+ return func;
+ }
+ return 0;
+ }
void gen_fp2_mul4()
{
assert(!isFullBit_);
@@ -3644,6 +3687,16 @@ private:
lea(gp1, ptr[d1]);
call(fpDbl_modL);
}
+ void2u gen_fp2_sqr()
+ {
+ align(16);
+ void2u func = getCurr<void2u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2_sqr4();
+ return func;
+ }
+ return 0;
+ }
void gen_fp2_sqr4()
{
assert(!isFullBit_);