diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-05 14:36:51 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-05 14:36:51 +0800 |
commit | cd1257d1097a8ac4ddd1789d06a69839e782e93c (patch) | |
tree | 2215e03a2916e8e8d9f0aa6705b7a9be9be861e1 | |
parent | 079617adf0c64742104c2a2f199e9002b0baea2c (diff) | |
download | dexon-mcl-cd1257d1097a8ac4ddd1789d06a69839e782e93c.tar.gz dexon-mcl-cd1257d1097a8ac4ddd1789d06a69839e782e93c.tar.zst dexon-mcl-cd1257d1097a8ac4ddd1789d06a69839e782e93c.zip |
refactoring setup
-rw-r--r-- | sample/large.cpp | 6 | ||||
-rw-r--r-- | sample/rawbench.cpp | 16 | ||||
-rw-r--r-- | src/fp.cpp | 227 | ||||
-rw-r--r-- | src/fp_proto.hpp | 130 | ||||
-rw-r--r-- | src/gen.cpp | 81 | ||||
-rw-r--r-- | src/low_gmp.hpp | 8 |
6 files changed, 281 insertions, 187 deletions
diff --git a/sample/large.cpp b/sample/large.cpp index 72de4a2..cd79412 100644 --- a/sample/large.cpp +++ b/sample/large.cpp @@ -108,11 +108,11 @@ void test(const std::string& pStr, mcl::fp::Mode mode) } CYBOZU_BENCH("mulPre", op.fpDbl_mulPre, ux, ux, uy); CYBOZU_BENCH("sqrPre", op.fpDbl_sqrPre, ux, ux); - CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux); - CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux); + CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux, op.p); + CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux, op.p); CYBOZU_BENCH("addNC", op.fpDbl_addNC, ux, ux, ux); CYBOZU_BENCH("subNC", op.fpDbl_subNC, ux, ux, ux); - CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux); + CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux, op.p); CYBOZU_BENCH("mul", Fp::mul, x, x, x); compareGmp(pStr); } diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp index 083d0cf..ddfe733 100644 --- a/sample/rawbench.cpp +++ b/sample/rawbench.cpp @@ -36,19 +36,19 @@ void benchRaw(const char *p, mcl::fp::Mode mode) double fpDbl_addT, fpDbl_subT; double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT; double fp2_sqrT, fp2_mulT; - CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy); - CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux); + CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy, op.p); + CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux, op.p); CYBOZU_BENCH_T(fp_addNCT, op.fp_addNC, uz, ux, uy); CYBOZU_BENCH_T(fp_subNCT, op.fp_subNC, uz, uy, ux); - CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux); - CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy); - CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678); + CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p); + CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p); + CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678, op.p); CYBOZU_BENCH_T(fp_mul_UnitPreT, op.fp_mul_UnitPre, ux, ux, 12345678); - CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy); - CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux); + CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy, op.p); + CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux, op.p); CYBOZU_BENCH_T(fpDbl_sqrPreT, op.fpDbl_sqrPre, uz, ux); CYBOZU_BENCH_T(fpDbl_mulPreT, op.fpDbl_mulPre, uz, ux, uy); - CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux); + CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux, op.p); Fp2 f2x, f2y; f2x.a = fx; f2x.b = fy; @@ -107,6 +107,70 @@ Mode StrToMode(const std::string& s) throw cybozu::Exception("StrToMode") << s; } +#ifdef MCL_USE_LLVM + +#define MCL_DEF_LLVM_FUNC(bit) \ +template<>const u3u AddNC<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_addNC ## bit ## L; \ +template<>const u3u SubNC<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_subNC ## bit ## L; \ +template<>const void3u MulPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_mulPre ## bit ## L; \ +template<>const void2u SqrPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_sqrPre ## bit ## L; \ +template<>const void2uI Mul_UnitPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_mul_UnitPre ## bit ## L; \ +template<>const void4u Add<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_add ## bit ## L; \ +template<>const void4u Sub<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_sub ## bit ## L; \ +template<>const void4u Mont<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_mont ## bit ## L; \ +template<>const void3u MontRed<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_montRed ## bit ## L; \ +template<>const void4u DblAdd<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_add ## bit ## L; \ +template<>const void4u DblSub<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_sub ## bit ## L; \ + +template<size_t N> +struct Mul<N, Ltag> { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + Unit xy[N * 2]; + MulPre<N, Ltag>::f(xy, x, y); + Dbl_Mod<N, Gtag>::f(z, xy, p); + } + static const void4u f; +}; + +template<size_t N> +const void4u Mul<N, Ltag>::f = Mul<N, Ltag>::func; + +template<size_t N> +struct Sqr<N, Ltag> { + static inline void func(Unit *y, const Unit *x, const Unit *p) + { + Unit xx[N * 2]; + SqrPre<N, Ltag>::f(xx, x); + Dbl_Mod<N, Gtag>::f(y, xx, p); + } + static const void3u f; +}; + +template<size_t N> +const void3u Sqr<N, Ltag>::f = Sqr<N, Ltag>::func; + +MCL_DEF_LLVM_FUNC(64) +MCL_DEF_LLVM_FUNC(128) +MCL_DEF_LLVM_FUNC(192) +MCL_DEF_LLVM_FUNC(256) +MCL_DEF_LLVM_FUNC(320) +MCL_DEF_LLVM_FUNC(384) +MCL_DEF_LLVM_FUNC(448) +MCL_DEF_LLVM_FUNC(512) +#if CYBOZU_OS_BIT == 32 +MCL_DEF_LLVM_FUNC(160) +MCL_DEF_LLVM_FUNC(224) +MCL_DEF_LLVM_FUNC(288) +MCL_DEF_LLVM_FUNC(352) +MCL_DEF_LLVM_FUNC(416) +MCL_DEF_LLVM_FUNC(480) +MCL_DEF_LLVM_FUNC(544) +#else +MCL_DEF_LLVM_FUNC(576) +#endif + +#endif template<size_t bitSize> struct OpeFunc { @@ -136,43 +200,6 @@ struct OpeFunc { { copyArray(y, x, N); } - static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (AddPre<N, Gtag>::f(z, x, y)) { - SubPre<N, Gtag>::f(z, z, p); - return; - } - Unit tmp[N]; - if (SubPre<N, Gtag>::f(tmp, z, p) == 0) { - memcpy(z, tmp, sizeof(tmp)); - } - } - static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (SubPre<N, Gtag>::f(z, x, y)) { - AddPre<N, Gtag>::f(z, z, p); - } - } - /* - z[N * 2] <- x[N * 2] + y[N * 2] mod p[N] << (N * UnitBitSize) - */ - static inline void fpDbl_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (AddPre<N * 2, Gtag>::f(z, x, y)) { - SubPre<N, Gtag>::f(z + N, z + N, p); - return; - } - Unit tmp[N]; - if (SubPre<N, Gtag>::f(tmp, z + N, p) == 0) { - memcpy(z + N, tmp, sizeof(tmp)); - } - } - static inline void fpDbl_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (SubPre<N * 2, Gtag>::f(z, x, y)) { - AddPre<N, Gtag>::f(z + N, z + N, p); - } - } // z[N] <- mont(x[N], y[N]) static inline void fp_mulMontC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { @@ -189,20 +216,20 @@ struct OpeFunc { Unit t[N + 2]; Mul_UnitPre<N, Gtag>::f(t, p, q); // p * q t[N + 1] = 0; // always zero - c[N + 1] = AddPre<N + 1, Gtag>::f(c, c, t); + c[N + 1] = AddNC<N + 1, Gtag>::f(c, c, t); c++; for (size_t i = 1; i < N; i++) { Mul_UnitPre<N, Gtag>::f(t, x, y[i]); - c[N + 1] = AddPre<N + 1, Gtag>::f(c, c, t); + c[N + 1] = AddNC<N + 1, Gtag>::f(c, c, t); q = c[0] * rp; Mul_UnitPre<N, Gtag>::f(t, p, q); - AddPre<N + 2, Gtag>::f(c, c, t); + AddNC<N + 2, Gtag>::f(c, c, t); c++; } if (c[N]) { - SubPre<N, Gtag>::f(z, c, p); + SubNC<N, Gtag>::f(z, c, p); } else { - if (SubPre<N, Gtag>::f(z, c, p)) { + if (SubNC<N, Gtag>::f(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -221,7 +248,7 @@ struct OpeFunc { Unit *c = buf; Unit q = xy[0] * rp; Mul_UnitPre<N, Gtag>::f(t, p, q); - buf[N * 2] = AddPre<N * 2, Gtag>::f(buf, xy, t); + buf[N * 2] = AddNC<N * 2, Gtag>::f(buf, xy, t); c++; for (size_t i = 1; i < N; i++) { q = c[0] * rp; @@ -231,9 +258,9 @@ struct OpeFunc { c++; } if (c[N]) { - SubPre<N, Gtag>::f(z, c, p); + SubNC<N, Gtag>::f(z, c, p); } else { - if (SubPre<N, Gtag>::f(z, c, p)) { + if (SubNC<N, Gtag>::f(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -289,39 +316,48 @@ struct OpeFunc { if (x != y) fp_clearC(y); return; } - fp_subC(y, p, x, p); + SubNC<N, Gtag>::f(y, p, x); } }; #ifdef MCL_USE_LLVM - #define SET_OP_LLVM(bit) \ + #define SET_OP_LLVM /* assume n */ \ if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \ - fp_add = mcl_fp_add ## bit ## L; \ - fp_sub = mcl_fp_sub ## bit ## L; \ - if (!isFullBit) { \ - fp_addNC = mcl_fp_addNC ## bit ## L; \ - fp_subNC = mcl_fp_subNC ## bit ## L; \ - } \ - fpDbl_mulPre = mcl_fpDbl_mulPre ## bit ## L; \ - fp_mul_UnitPre = mcl_fp_mul_UnitPre ## bit ## L; \ - fpDbl_sqrPre = mcl_fpDbl_sqrPre ## bit ## L; \ + fp_add = Add<n, Ltag>::f; \ + fp_sub = Sub<n, Ltag>::f; \ + fpDbl_add = DblAdd<n, Ltag>::f; \ + fpDbl_sub = DblSub<n, Ltag>::f; \ if (mode == FP_LLVM_MONT) { \ - fpDbl_mod = mcl_fp_montRed ## bit ## L; \ - fp_mul = mcl_fp_mont ## bit ## L; \ + fp_mul = Mont<n, Ltag>::f; \ + fp_sqr = SqrMont<n, Ltag>::f; \ + fpDbl_mod = MontRed<n, Ltag>::f; \ + } else { \ + fp_mul = Mul<n, Ltag>::f; \ + fp_sqr = Sqr<n, Ltag>::f; \ + } \ + fpDbl_mulPre = MulPre<n, Ltag>::f; \ + fpDbl_sqrPre = SqrPre<n, Ltag>::f; \ + fp_mul_UnitPre = Mul_UnitPre<n, Ltag>::f; \ + if (!isFullBit) { \ + fp_addNC = AddNC<n, Ltag>::f; \ + fp_subNC = SubNC<n, Ltag>::f; \ } \ } - #define SET_OP_DBL_LLVM(bit, n2) \ + +#define SET_OP_LLVM2(bit) \ + { \ + const int n = bit / UnitBitSize; \ if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \ - fpDbl_add = mcl_fpDbl_add ## bit ## L; \ - fpDbl_sub = mcl_fpDbl_sub ## bit ## L; \ if (!isFullBit) { \ - fpDbl_addNC = mcl_fp_addNC ## n2 ## L; \ - fpDbl_subNC = mcl_fp_subNC ## n2 ## L; \ + fpDbl_addNC = AddNC<n * 2, Ltag>::f; \ + fpDbl_subNC = SubNC<n * 2, Ltag>::f; \ } \ - } + } \ + } + #else - #define SET_OP_LLVM(bit) - #define SET_OP_DBL_LLVM(bit, n2) + #define SET_OP_LLVM + #define SET_OP_LLVM2(bit) #endif #define SET_OP(bit) \ @@ -332,8 +368,8 @@ struct OpeFunc { fp_clear = OpeFunc<bit>::fp_clearC; \ fp_copy = OpeFunc<bit>::fp_copyC; \ fp_neg = OpeFunc<bit>::fp_negC; \ - fp_add = OpeFunc<bit>::fp_addC; \ - fp_sub = OpeFunc<bit>::fp_subC; \ + fp_add = Add<n, Gtag>::f; \ + fp_sub = Sub<n, Gtag>::f; \ if (isMont) { \ fp_mul = OpeFunc<bit>::fp_mulMontC; \ fp_sqr = OpeFunc<bit>::fp_sqrMontC; \ @@ -350,15 +386,15 @@ struct OpeFunc { fpDbl_sqrPre = SqrPre<n, Gtag>::f; \ fp_mul_UnitPre = Mul_UnitPre<n, Gtag>::f; \ fpN1_mod = N1_Mod<n, Gtag>::f; \ - fpDbl_add = OpeFunc<bit>::fpDbl_addC; \ - fpDbl_sub = OpeFunc<bit>::fpDbl_subC; \ + fpDbl_add = DblAdd<n, Gtag>::f; \ + fpDbl_sub = DblSub<n, Gtag>::f; \ if (!isFullBit) { \ - fp_addNC = AddPre<n, Gtag>::f; \ - fp_subNC = SubPre<n, Gtag>::f; \ - fpDbl_addNC = AddPre<n * 2, Gtag>::f; \ - fpDbl_subNC = SubPre<n * 2, Gtag>::f; \ + fp_addNC = AddNC<n, Gtag>::f; \ + fp_subNC = SubNC<n, Gtag>::f; \ + fpDbl_addNC = AddNC<n * 2, Gtag>::f; \ + fpDbl_subNC = SubNC<n * 2, Gtag>::f; \ } \ - SET_OP_LLVM(bit) \ + SET_OP_LLVM \ } #ifdef MCL_USE_XBYAK @@ -476,41 +512,26 @@ void Op::init(const std::string& mstr, size_t maxBitSize, Mode mode) } #endif switch (roundBit) { - case 64: SET_OP(64); SET_OP_DBL_LLVM(64, 128); break; - case 128: SET_OP(128); SET_OP_DBL_LLVM(128, 256); break; - case 192: SET_OP(192); SET_OP_DBL_LLVM(192, 384); break; - case 256: SET_OP(256); SET_OP_DBL_LLVM(256, 512); break; + case 64: SET_OP(64); SET_OP_LLVM2(64); break; + case 128: SET_OP(128); SET_OP_LLVM2(128); break; + case 192: SET_OP(192); SET_OP_LLVM2(192); break; + case 256: SET_OP(256); SET_OP_LLVM2(256); break; case 320: SET_OP(320); break; case 384: SET_OP(384); break; case 448: SET_OP(448); break; - case 512: SET_OP(512); - // QQQ : need refactor for large prime -#if MCL_MAX_OP_BIT_SIZE == 768 - SET_OP_DBL_LLVM(512, 1024); -#endif - break; + case 512: SET_OP(512); break; #if CYBOZU_OS_BIT == 64 - case 576: SET_OP(576); -#if MCL_MAX_OP_BIT_SIZE == 768 - SET_OP_DBL_LLVM(576, 1152); -#endif - break; + case 576: SET_OP(576); break; #if MCL_MAX_OP_BIT_SIZE == 768 - case 640: SET_OP(640); - SET_OP_DBL_LLVM(640, 1280); - break; - case 704: SET_OP(704); - SET_OP_DBL_LLVM(704, 1408); - break; - case 768: SET_OP(768); - SET_OP_DBL_LLVM(768, 1536); - break; + case 640: SET_OP(640); break; + case 704: SET_OP(704); break; + case 768: SET_OP(768); break; #endif #else - case 32: SET_OP(32); SET_OP_DBL_LLVM(32, 64); break; - case 96: SET_OP(96); SET_OP_DBL_LLVM(96, 192); break; - case 160: SET_OP(160); SET_OP_DBL_LLVM(160, 320); break; - case 224: SET_OP(224); SET_OP_DBL_LLVM(224, 448); break; + case 32: SET_OP(32); SET_OP_LLVM2(32); break; + case 96: SET_OP(96); SET_OP_LLVM2(96); break; + case 160: SET_OP(160); SET_OP_LLVM2(160); break; + case 224: SET_OP(224); SET_OP_LLVM2(224); break; case 288: SET_OP(288); break; case 352: SET_OP(352); break; case 416: SET_OP(416); break; diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index 99763e6..a30730b 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -10,32 +10,124 @@ namespace mcl { namespace fp { +struct Ltag; +struct Atag; + // (carry, z[N]) <- x[N] + y[N] -template<size_t N, class Tag>class AddPre { static const u3u f; }; +template<size_t N, class Tag>struct AddNC { static const u3u f; }; // (carry, z[N]) <- x[N] - y[N] -template<size_t N, class Tag>class SubPre { static const u3u f; }; +template<size_t N, class Tag>struct SubNC { static const u3u f; }; // z[N * 2] <- x[N] * y[N] -template<size_t N, class Tag>class MulPre { static const void3u f; }; +template<size_t N, class Tag>struct MulPre { static const void3u f; }; // z[N * 2] <- x[N] * x[N] -template<size_t N, class Tag>class SqrPre { static const void2u f; }; +template<size_t N, class Tag>struct SqrPre { static const void2u f; }; // z[N + 1] <- x[N] * y -template<size_t N, class Tag>class Mul_UnitPre { static const void2uI f; }; +template<size_t N, class Tag>struct Mul_UnitPre { static const void2uI f; }; // z[N] <- x[N + 1] % p[N] -template<size_t N, class Tag>class N1_Mod { static const void3u f; }; +template<size_t N, class Tag>struct N1_Mod { static const void3u f; }; // z[N] <- x[N * 2] % p[N] -template<size_t N, class Tag>class Dbl_Mod { static const void3u f; }; +template<size_t N, class Tag>struct Dbl_Mod { static const void3u f; }; +// z[N] <- Montgomery(x[N], y[N], p[N]) +template<size_t N, class Tag>struct Mont { static const void4u f; }; +// z[N] <- MontRed(xy[N], p[N]) +template<size_t N, class Tag>struct MontRed { static const void3u f; }; + +// z[N] <- (x[N] * y[N]) % p[N] +template<size_t N, class Tag>struct Mul { static const void4u f; }; +// z[N] <- (x[N] ^ 2) % p[N] +template<size_t N, class Tag>struct Sqr { static const void3u f; }; + +// z[N] <- Montgomery(x[N], x[N], p[N]) +template<size_t N, class Tag> +struct SqrMont { + static inline void func(Unit *y, const Unit *x, const Unit *p) + { + Mont<N, Tag>::f(y, x, x, p); + } + static const void3u f; +}; +template<size_t N, class Tag> +const void3u SqrMont<N, Tag>::f = SqrMont<N, Tag>::func; + +// z[N] <- (x[N] + y[N]) % p[N] +template<size_t N, class Tag> +struct Add { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (AddNC<N, Tag>::f(z, x, y)) { + SubNC<N, Tag>::f(z, z, p); + return; + } + Unit tmp[N]; + if (SubNC<N, Tag>::f(tmp, z, p) == 0) { + memcpy(z, tmp, sizeof(tmp)); + } + } + static const void4u f; +}; + +template<size_t N, class Tag> +const void4u Add<N, Tag>::f = Add<N, Tag>::func; + +// z[N] <- (x[N] - y[N]) % p[N] +template<size_t N, class Tag> +struct Sub { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (SubNC<N, Tag>::f(z, x, y)) { + AddNC<N, Tag>::f(z, z, p); + } + } + static const void4u f; +}; + +template<size_t N, class Tag> +const void4u Sub<N, Tag>::f = Sub<N, Tag>::func; + +// z[N * 2] <- (x[N * 2] + y[N * 2]) mod p[N] << (N * UnitBitSize) +template<size_t N, class Tag> +struct DblAdd { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (AddNC<N * 2, Tag>::f(z, x, y)) { + SubNC<N, Tag>::f(z + N, z + N, p); + return; + } + Unit tmp[N]; + if (SubNC<N, Tag>::f(tmp, z + N, p) == 0) { + memcpy(z + N, tmp, sizeof(tmp)); + } + } + static const void4u f; +}; + +template<size_t N, class Tag> +const void4u DblAdd<N, Tag>::f = DblAdd<N, Tag>::func; + +// z[N * 2] <- (x[N * 2] - y[N * 2]) mod p[N] << (N * UnitBitSize) +template<size_t N, class Tag> +struct DblSub { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (SubNC<N * 2, Tag>::f(z, x, y)) { + AddNC<N, Tag>::f(z + N, z + N, p); + } + } + static const void4u f; +}; + +template<size_t N, class Tag> +const void4u DblSub<N, Tag>::f = DblSub<N, Tag>::func; } } // mcl::fp #ifdef MCL_USE_LLVM -extern "C" { - #define MCL_FP_DEF_FUNC_SUB(len, suf) \ void mcl_fp_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fp_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ -void mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ -void mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ +mcl::fp::Unit mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ +mcl::fp::Unit mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fp_mul_UnitPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \ void mcl_fpDbl_mulPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fpDbl_sqrPre ## len ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \ @@ -45,15 +137,16 @@ void mcl_fpDbl_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const void mcl_fpDbl_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); #define MCL_FP_DEF_FUNC(len) \ - MCL_FP_DEF_FUNC_SUB(len, G) \ MCL_FP_DEF_FUNC_SUB(len, L) \ MCL_FP_DEF_FUNC_SUB(len, A) #define MCL_FP_DEF_FUNC_SPECIAL(suf) \ - void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \ - void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \ - void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \ - void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); +void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \ +void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \ +void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \ +void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); + +extern "C" { MCL_FP_DEF_FUNC(64) MCL_FP_DEF_FUNC(128) @@ -83,14 +176,13 @@ MCL_FP_DEF_FUNC(1408) MCL_FP_DEF_FUNC(1536) #endif -MCL_FP_DEF_FUNC_SPECIAL(G) MCL_FP_DEF_FUNC_SPECIAL(L) MCL_FP_DEF_FUNC_SPECIAL(A) +} + #undef MCL_FP_DEF_FUNC_SUB #undef MCL_FP_DEF_FUNC -} - #endif diff --git a/src/gen.cpp b/src/gen.cpp index b7d9f9f..155a5b6 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -296,57 +296,65 @@ struct Code : public mcl::Generator { void gen_mcl_fp_addsubNC(bool isAdd) { resetGlobalIdx(); + Operand r(Int, unit); Operand pz(IntPtr, bit); Operand px(IntPtr, bit); Operand py(IntPtr, bit); std::string name; if (isAdd) { name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L"; - mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py); + mcl_fp_addNCM[bit] = Function(name, r, pz, px, py); verifyAndSetPrivate(mcl_fp_addNCM[bit]); beginFunc(mcl_fp_addNCM[bit]); } else { name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L"; - mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py); + mcl_fp_subNCM[bit] = Function(name, r, pz, px, py); verifyAndSetPrivate(mcl_fp_subNCM[bit]); beginFunc(mcl_fp_subNCM[bit]); } - Operand x = load(px); - Operand y = load(py); + Operand x = zext(load(px), bit + unit); + Operand y = zext(load(py), bit + unit); Operand z; if (isAdd) { z = add(x, y); + store(trunc(z, bit), pz); + r = trunc(lshr(z, bit), unit); } else { z = sub(x, y); + store(trunc(z, bit), pz); + r = _and(trunc(lshr(z, bit), unit), makeImm(unit, 1)); } - store(z, pz); - ret(Void); + ret(r); endFunc(); } -#if 0 - void gen_mcl_fp_addS() +#if 0 // void-return version + void gen_mcl_fp_addsubNC(bool isAdd) { resetGlobalIdx(); Operand pz(IntPtr, bit); Operand px(IntPtr, bit); Operand py(IntPtr, bit); - Operand pp(IntPtr, bit); - std::string name = "mcl_fp_add" + cybozu::itoa(bit) + "S"; - mcl_fp_addM[bit] = Function(name, Void, pz, px, py, pp); - beginFunc(mcl_fp_addM[bit]); + std::string name; + if (isAdd) { + name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L"; + mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py); + verifyAndSetPrivate(mcl_fp_addNCM[bit]); + beginFunc(mcl_fp_addNCM[bit]); + } else { + name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L"; + mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py); + verifyAndSetPrivate(mcl_fp_subNCM[bit]); + beginFunc(mcl_fp_subNCM[bit]); + } Operand x = load(px); Operand y = load(py); - Operand p = load(pp); - x = zext(x, bit + unit); - y = zext(y, bit + unit); - p = zext(p, bit + unit); - Operand t0 = add(x, y); - Operand t1 = sub(t0, p); - Operand t = lshr(t1, bit); - t = trunc(t, 1); - t = select(t, t0, t1); - t = trunc(t, bit); - store(t, pz); + Operand z; + if (isAdd) { + z = add(x, y); + } else { + z = sub(x, y); + } + store(z, pz); ret(Void); endFunc(); } @@ -385,33 +393,6 @@ struct Code : public mcl::Generator { ret(Void); endFunc(); } -#if 0 - void gen_mcl_fp_subS() - { - resetGlobalIdx(); - Operand pz(IntPtr, bit); - Operand px(IntPtr, bit); - Operand py(IntPtr, bit); - Operand pp(IntPtr, bit); - std::string name = "mcl_fp_sub" + cybozu::itoa(bit) + "S"; - mcl_fp_subM[bit] = Function(name, Void, pz, px, py, pp); - beginFunc(mcl_fp_subM[bit]); - Operand x = load(px); - Operand y = load(py); - x = zext(x, bit + unit); - y = zext(y, bit + unit); - Operand vc = sub(x, y); - Operand v = trunc(vc, bit); // v = x - y - Operand c = lshr(vc, bit); - c = trunc(c, 1); - Operand p = load(pp); - Operand z = select(c, p, makeImm(bit, 0)); - v = add(v, z); - store(v, pz); - ret(Void); - endFunc(); - } -#endif void gen_mcl_fp_sub() { resetGlobalIdx(); diff --git a/src/low_gmp.hpp b/src/low_gmp.hpp index 44477d5..d11a30f 100644 --- a/src/low_gmp.hpp +++ b/src/low_gmp.hpp @@ -7,7 +7,7 @@ namespace mcl { namespace fp { struct Gtag; template<size_t N> -struct AddPre<N, Gtag> { +struct AddNC<N, Gtag> { static inline Unit func(Unit *z, const Unit *x, const Unit *y) { return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); @@ -16,10 +16,10 @@ struct AddPre<N, Gtag> { }; template<size_t N> -const u3u AddPre<N, Gtag>::f = &AddPre<N, Gtag>::func; +const u3u AddNC<N, Gtag>::f = &AddNC<N, Gtag>::func; template<size_t N> -struct SubPre<N, Gtag> { +struct SubNC<N, Gtag> { static inline Unit func(Unit *z, const Unit *x, const Unit *y) { return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); @@ -28,7 +28,7 @@ struct SubPre<N, Gtag> { }; template<size_t N> -const u3u SubPre<N, Gtag>::f = &SubPre<N, Gtag>::func; +const u3u SubNC<N, Gtag>::f = &SubNC<N, Gtag>::func; template<size_t N> struct MulPre<N, Gtag> { |