diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2018-11-06 14:47:59 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2018-11-06 14:47:59 +0800 |
commit | 2f68f703e4b7e4563a803c54a34623bc91f63161 (patch) | |
tree | 36e8412e31df6fa67f9905d4d2163ef39afc8f08 /src | |
parent | a2bdc82c97e4199f0bab7df95644fc99502aa6c7 (diff) | |
download | tangerine-mcl-2f68f703e4b7e4563a803c54a34623bc91f63161.tar.gz tangerine-mcl-2f68f703e4b7e4563a803c54a34623bc91f63161.tar.zst tangerine-mcl-2f68f703e4b7e4563a803c54a34623bc91f63161.zip |
add Fp2::sqr for bls12
Diffstat (limited to 'src')
-rw-r--r-- | src/fp_generator.hpp | 59 |
1 files changed, 37 insertions, 22 deletions
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 0efd60d..8c0c3b7 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -581,7 +581,7 @@ private: gen_raw_fp_sub(pz, px, py, sf.t, false); } /* - add(pz + offset, px + offset, py + offset); + add(pz, px, py); size of t1, t2 == 6 destroy t0, t1 */ @@ -723,7 +723,7 @@ private: } return 0; } - void gen_raw_fp_sub6(const RegExp& pz, const Reg64& px, const Reg64& py, int offset, const Pack& t, bool withCarry) + void gen_raw_fp_sub6(const RegExp& pz, const RegExp& px, const RegExp& py, int offset, const Pack& t, bool withCarry) { load_rm(t, px + offset); sub_rm(t, py + offset, withCarry); @@ -3654,7 +3654,6 @@ private: { if (isFullBit_) return 0; if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0; -// if (pn_ != 4) return 0; align(16); void3u func = getCurr<void3u>(); bool embedded = pn_ == 4; @@ -3730,17 +3729,11 @@ private: } void2u gen_fp2_sqr() { + if (isFullBit_) return 0; + if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0; align(16); void2u func = getCurr<void2u>(); - if (pn_ == 4 && !isFullBit_) { - gen_fp2_sqr4(); - return func; - } - return 0; - } - void gen_fp2_sqr4() - { - assert(!isFullBit_); + const RegExp y = rsp + 0 * 8; const RegExp x = rsp + 1 * 8; const Ext1 t1(FpByte_, rsp, 2 * 8); @@ -3753,7 +3746,7 @@ private: // t1 = b + b lea(gp0, ptr [t1]); if (nocarry) { - for (int i = 0; i < 4; i++) { + for (int i = 0; i < pn_; i++) { mov(rax, ptr [gp1 + FpByte_ + i * 8]); if (i == 0) { add(rax, rax); @@ -3763,7 +3756,15 @@ private: mov(ptr [gp0 + i * 8], rax); } } else { - gen_raw_fp_add(gp0, gp1 + FpByte_, gp1 + FpByte_, sf.t, false); + if (pn_ == 4) { + gen_raw_fp_add(gp0, gp1 + FpByte_, gp1 + FpByte_, sf.t, false); + } else { + assert(pn_ == 6); + Pack t = sf.t.sub(6, 4); + t.append(rax); + t.append(rdx); + gen_raw_fp_add6(gp0, gp1 + FpByte_, gp1 + FpByte_, sf.t.sub(0, 6), t, false); + } } // t1 = 2ab mov(gp1, gp0); @@ -3771,13 +3772,16 @@ private: call(fp_mulL); if (nocarry) { - Pack a = sf.t.sub(0, 4); - Pack b = sf.t.sub(4, 4); + Pack t = sf.t; + t.append(rdx); + t.append(gp1); + Pack a = t.sub(0, pn_); + Pack b = t.sub(pn_, pn_); mov(gp0, ptr [x]); load_rm(a, gp0); load_rm(b, gp0 + FpByte_); // t2 = a + b - for (int i = 0; i < 4; i++) { + for (int i = 0; i < pn_; i++) { mov(rax, a[i]); if (i == 0) { add(rax, b[i]); @@ -3787,14 +3791,24 @@ private: mov(ptr [(RegExp)t2 + i * 8], rax); } // t3 = a + p - b - mov(gp1, (size_t)p_); - add_rm(a, gp1); + mov(rax, pL_); + add_rm(a, rax); sub_rr(a, b); store_mr(t3, a); } else { mov(gp0, ptr [x]); - gen_raw_fp_add(t2, gp0, gp0 + FpByte_, sf.t, false); - gen_raw_fp_sub(t3, gp0, gp0 + FpByte_, sf.t, false); + if (pn_ == 4) { + gen_raw_fp_add(t2, gp0, gp0 + FpByte_, sf.t, false); + gen_raw_fp_sub(t3, gp0, gp0 + FpByte_, sf.t, false); + } else { + assert(pn_ == 6); + Pack p1 = sf.t.sub(0, 6); + Pack p2 = sf.t.sub(6, 4); + p2.append(rax); + p2.append(rdx); + gen_raw_fp_add6(t2, gp0, gp0 + FpByte_, p1, p2, false); + gen_raw_fp_sub6(t3, gp0, gp0 + FpByte_, 0, p1, false); + } } mov(gp0, ptr [y]); @@ -3802,10 +3816,11 @@ private: lea(gp2, ptr [t3]); call(fp_mulL); mov(gp0, ptr [y]); - for (int i = 0; i < 4; i++) { + for (int i = 0; i < pn_; i++) { mov(rax, ptr [(RegExp)t1 + i * 8]); mov(ptr [gp0 + FpByte_ + i * 8], rax); } + return func; } }; |