aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-10-05 14:36:51 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-10-05 14:36:51 +0800
commitcd1257d1097a8ac4ddd1789d06a69839e782e93c (patch)
tree2215e03a2916e8e8d9f0aa6705b7a9be9be861e1
parent079617adf0c64742104c2a2f199e9002b0baea2c (diff)
downloaddexon-mcl-cd1257d1097a8ac4ddd1789d06a69839e782e93c.tar.gz
dexon-mcl-cd1257d1097a8ac4ddd1789d06a69839e782e93c.tar.zst
dexon-mcl-cd1257d1097a8ac4ddd1789d06a69839e782e93c.zip
refactoring setup
-rw-r--r--sample/large.cpp6
-rw-r--r--sample/rawbench.cpp16
-rw-r--r--src/fp.cpp227
-rw-r--r--src/fp_proto.hpp130
-rw-r--r--src/gen.cpp81
-rw-r--r--src/low_gmp.hpp8
6 files changed, 281 insertions, 187 deletions
diff --git a/sample/large.cpp b/sample/large.cpp
index 72de4a2..cd79412 100644
--- a/sample/large.cpp
+++ b/sample/large.cpp
@@ -108,11 +108,11 @@ void test(const std::string& pStr, mcl::fp::Mode mode)
}
CYBOZU_BENCH("mulPre", op.fpDbl_mulPre, ux, ux, uy);
CYBOZU_BENCH("sqrPre", op.fpDbl_sqrPre, ux, ux);
- CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux);
- CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux);
+ CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux, op.p);
+ CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux, op.p);
CYBOZU_BENCH("addNC", op.fpDbl_addNC, ux, ux, ux);
CYBOZU_BENCH("subNC", op.fpDbl_subNC, ux, ux, ux);
- CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux);
+ CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux, op.p);
CYBOZU_BENCH("mul", Fp::mul, x, x, x);
compareGmp(pStr);
}
diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp
index 083d0cf..ddfe733 100644
--- a/sample/rawbench.cpp
+++ b/sample/rawbench.cpp
@@ -36,19 +36,19 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
double fpDbl_addT, fpDbl_subT;
double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT;
double fp2_sqrT, fp2_mulT;
- CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy);
- CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux);
+ CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy, op.p);
+ CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux, op.p);
CYBOZU_BENCH_T(fp_addNCT, op.fp_addNC, uz, ux, uy);
CYBOZU_BENCH_T(fp_subNCT, op.fp_subNC, uz, uy, ux);
- CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux);
- CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy);
- CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678);
+ CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p);
+ CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p);
+ CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678, op.p);
CYBOZU_BENCH_T(fp_mul_UnitPreT, op.fp_mul_UnitPre, ux, ux, 12345678);
- CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy);
- CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux);
+ CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy, op.p);
+ CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux, op.p);
CYBOZU_BENCH_T(fpDbl_sqrPreT, op.fpDbl_sqrPre, uz, ux);
CYBOZU_BENCH_T(fpDbl_mulPreT, op.fpDbl_mulPre, uz, ux, uy);
- CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux);
+ CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux, op.p);
Fp2 f2x, f2y;
f2x.a = fx;
f2x.b = fy;
diff --git a/src/fp.cpp b/src/fp.cpp
index bcaa295..97648d2 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -107,6 +107,70 @@ Mode StrToMode(const std::string& s)
throw cybozu::Exception("StrToMode") << s;
}
+#ifdef MCL_USE_LLVM
+
+#define MCL_DEF_LLVM_FUNC(bit) \
+template<>const u3u AddNC<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_addNC ## bit ## L; \
+template<>const u3u SubNC<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_subNC ## bit ## L; \
+template<>const void3u MulPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_mulPre ## bit ## L; \
+template<>const void2u SqrPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_sqrPre ## bit ## L; \
+template<>const void2uI Mul_UnitPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_mul_UnitPre ## bit ## L; \
+template<>const void4u Add<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_add ## bit ## L; \
+template<>const void4u Sub<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_sub ## bit ## L; \
+template<>const void4u Mont<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_mont ## bit ## L; \
+template<>const void3u MontRed<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_montRed ## bit ## L; \
+template<>const void4u DblAdd<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_add ## bit ## L; \
+template<>const void4u DblSub<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_sub ## bit ## L; \
+
+template<size_t N>
+struct Mul<N, Ltag> {
+ static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
+ {
+ Unit xy[N * 2];
+ MulPre<N, Ltag>::f(xy, x, y);
+ Dbl_Mod<N, Gtag>::f(z, xy, p);
+ }
+ static const void4u f;
+};
+
+template<size_t N>
+const void4u Mul<N, Ltag>::f = Mul<N, Ltag>::func;
+
+template<size_t N>
+struct Sqr<N, Ltag> {
+ static inline void func(Unit *y, const Unit *x, const Unit *p)
+ {
+ Unit xx[N * 2];
+ SqrPre<N, Ltag>::f(xx, x);
+ Dbl_Mod<N, Gtag>::f(y, xx, p);
+ }
+ static const void3u f;
+};
+
+template<size_t N>
+const void3u Sqr<N, Ltag>::f = Sqr<N, Ltag>::func;
+
+MCL_DEF_LLVM_FUNC(64)
+MCL_DEF_LLVM_FUNC(128)
+MCL_DEF_LLVM_FUNC(192)
+MCL_DEF_LLVM_FUNC(256)
+MCL_DEF_LLVM_FUNC(320)
+MCL_DEF_LLVM_FUNC(384)
+MCL_DEF_LLVM_FUNC(448)
+MCL_DEF_LLVM_FUNC(512)
+#if CYBOZU_OS_BIT == 32
+MCL_DEF_LLVM_FUNC(160)
+MCL_DEF_LLVM_FUNC(224)
+MCL_DEF_LLVM_FUNC(288)
+MCL_DEF_LLVM_FUNC(352)
+MCL_DEF_LLVM_FUNC(416)
+MCL_DEF_LLVM_FUNC(480)
+MCL_DEF_LLVM_FUNC(544)
+#else
+MCL_DEF_LLVM_FUNC(576)
+#endif
+
+#endif
template<size_t bitSize>
struct OpeFunc {
@@ -136,43 +200,6 @@ struct OpeFunc {
{
copyArray(y, x, N);
}
- static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
- {
- if (AddPre<N, Gtag>::f(z, x, y)) {
- SubPre<N, Gtag>::f(z, z, p);
- return;
- }
- Unit tmp[N];
- if (SubPre<N, Gtag>::f(tmp, z, p) == 0) {
- memcpy(z, tmp, sizeof(tmp));
- }
- }
- static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
- {
- if (SubPre<N, Gtag>::f(z, x, y)) {
- AddPre<N, Gtag>::f(z, z, p);
- }
- }
- /*
- z[N * 2] <- x[N * 2] + y[N * 2] mod p[N] << (N * UnitBitSize)
- */
- static inline void fpDbl_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
- {
- if (AddPre<N * 2, Gtag>::f(z, x, y)) {
- SubPre<N, Gtag>::f(z + N, z + N, p);
- return;
- }
- Unit tmp[N];
- if (SubPre<N, Gtag>::f(tmp, z + N, p) == 0) {
- memcpy(z + N, tmp, sizeof(tmp));
- }
- }
- static inline void fpDbl_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
- {
- if (SubPre<N * 2, Gtag>::f(z, x, y)) {
- AddPre<N, Gtag>::f(z + N, z + N, p);
- }
- }
// z[N] <- mont(x[N], y[N])
static inline void fp_mulMontC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
@@ -189,20 +216,20 @@ struct OpeFunc {
Unit t[N + 2];
Mul_UnitPre<N, Gtag>::f(t, p, q); // p * q
t[N + 1] = 0; // always zero
- c[N + 1] = AddPre<N + 1, Gtag>::f(c, c, t);
+ c[N + 1] = AddNC<N + 1, Gtag>::f(c, c, t);
c++;
for (size_t i = 1; i < N; i++) {
Mul_UnitPre<N, Gtag>::f(t, x, y[i]);
- c[N + 1] = AddPre<N + 1, Gtag>::f(c, c, t);
+ c[N + 1] = AddNC<N + 1, Gtag>::f(c, c, t);
q = c[0] * rp;
Mul_UnitPre<N, Gtag>::f(t, p, q);
- AddPre<N + 2, Gtag>::f(c, c, t);
+ AddNC<N + 2, Gtag>::f(c, c, t);
c++;
}
if (c[N]) {
- SubPre<N, Gtag>::f(z, c, p);
+ SubNC<N, Gtag>::f(z, c, p);
} else {
- if (SubPre<N, Gtag>::f(z, c, p)) {
+ if (SubNC<N, Gtag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@@ -221,7 +248,7 @@ struct OpeFunc {
Unit *c = buf;
Unit q = xy[0] * rp;
Mul_UnitPre<N, Gtag>::f(t, p, q);
- buf[N * 2] = AddPre<N * 2, Gtag>::f(buf, xy, t);
+ buf[N * 2] = AddNC<N * 2, Gtag>::f(buf, xy, t);
c++;
for (size_t i = 1; i < N; i++) {
q = c[0] * rp;
@@ -231,9 +258,9 @@ struct OpeFunc {
c++;
}
if (c[N]) {
- SubPre<N, Gtag>::f(z, c, p);
+ SubNC<N, Gtag>::f(z, c, p);
} else {
- if (SubPre<N, Gtag>::f(z, c, p)) {
+ if (SubNC<N, Gtag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@@ -289,39 +316,48 @@ struct OpeFunc {
if (x != y) fp_clearC(y);
return;
}
- fp_subC(y, p, x, p);
+ SubNC<N, Gtag>::f(y, p, x);
}
};
#ifdef MCL_USE_LLVM
- #define SET_OP_LLVM(bit) \
+ #define SET_OP_LLVM /* assume n */ \
if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \
- fp_add = mcl_fp_add ## bit ## L; \
- fp_sub = mcl_fp_sub ## bit ## L; \
- if (!isFullBit) { \
- fp_addNC = mcl_fp_addNC ## bit ## L; \
- fp_subNC = mcl_fp_subNC ## bit ## L; \
- } \
- fpDbl_mulPre = mcl_fpDbl_mulPre ## bit ## L; \
- fp_mul_UnitPre = mcl_fp_mul_UnitPre ## bit ## L; \
- fpDbl_sqrPre = mcl_fpDbl_sqrPre ## bit ## L; \
+ fp_add = Add<n, Ltag>::f; \
+ fp_sub = Sub<n, Ltag>::f; \
+ fpDbl_add = DblAdd<n, Ltag>::f; \
+ fpDbl_sub = DblSub<n, Ltag>::f; \
if (mode == FP_LLVM_MONT) { \
- fpDbl_mod = mcl_fp_montRed ## bit ## L; \
- fp_mul = mcl_fp_mont ## bit ## L; \
+ fp_mul = Mont<n, Ltag>::f; \
+ fp_sqr = SqrMont<n, Ltag>::f; \
+ fpDbl_mod = MontRed<n, Ltag>::f; \
+ } else { \
+ fp_mul = Mul<n, Ltag>::f; \
+ fp_sqr = Sqr<n, Ltag>::f; \
+ } \
+ fpDbl_mulPre = MulPre<n, Ltag>::f; \
+ fpDbl_sqrPre = SqrPre<n, Ltag>::f; \
+ fp_mul_UnitPre = Mul_UnitPre<n, Ltag>::f; \
+ if (!isFullBit) { \
+ fp_addNC = AddNC<n, Ltag>::f; \
+ fp_subNC = SubNC<n, Ltag>::f; \
} \
}
- #define SET_OP_DBL_LLVM(bit, n2) \
+
+#define SET_OP_LLVM2(bit) \
+ { \
+ const int n = bit / UnitBitSize; \
if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \
- fpDbl_add = mcl_fpDbl_add ## bit ## L; \
- fpDbl_sub = mcl_fpDbl_sub ## bit ## L; \
if (!isFullBit) { \
- fpDbl_addNC = mcl_fp_addNC ## n2 ## L; \
- fpDbl_subNC = mcl_fp_subNC ## n2 ## L; \
+ fpDbl_addNC = AddNC<n * 2, Ltag>::f; \
+ fpDbl_subNC = SubNC<n * 2, Ltag>::f; \
} \
- }
+ } \
+ }
+
#else
- #define SET_OP_LLVM(bit)
- #define SET_OP_DBL_LLVM(bit, n2)
+ #define SET_OP_LLVM
+ #define SET_OP_LLVM2(bit)
#endif
#define SET_OP(bit) \
@@ -332,8 +368,8 @@ struct OpeFunc {
fp_clear = OpeFunc<bit>::fp_clearC; \
fp_copy = OpeFunc<bit>::fp_copyC; \
fp_neg = OpeFunc<bit>::fp_negC; \
- fp_add = OpeFunc<bit>::fp_addC; \
- fp_sub = OpeFunc<bit>::fp_subC; \
+ fp_add = Add<n, Gtag>::f; \
+ fp_sub = Sub<n, Gtag>::f; \
if (isMont) { \
fp_mul = OpeFunc<bit>::fp_mulMontC; \
fp_sqr = OpeFunc<bit>::fp_sqrMontC; \
@@ -350,15 +386,15 @@ struct OpeFunc {
fpDbl_sqrPre = SqrPre<n, Gtag>::f; \
fp_mul_UnitPre = Mul_UnitPre<n, Gtag>::f; \
fpN1_mod = N1_Mod<n, Gtag>::f; \
- fpDbl_add = OpeFunc<bit>::fpDbl_addC; \
- fpDbl_sub = OpeFunc<bit>::fpDbl_subC; \
+ fpDbl_add = DblAdd<n, Gtag>::f; \
+ fpDbl_sub = DblSub<n, Gtag>::f; \
if (!isFullBit) { \
- fp_addNC = AddPre<n, Gtag>::f; \
- fp_subNC = SubPre<n, Gtag>::f; \
- fpDbl_addNC = AddPre<n * 2, Gtag>::f; \
- fpDbl_subNC = SubPre<n * 2, Gtag>::f; \
+ fp_addNC = AddNC<n, Gtag>::f; \
+ fp_subNC = SubNC<n, Gtag>::f; \
+ fpDbl_addNC = AddNC<n * 2, Gtag>::f; \
+ fpDbl_subNC = SubNC<n * 2, Gtag>::f; \
} \
- SET_OP_LLVM(bit) \
+ SET_OP_LLVM \
}
#ifdef MCL_USE_XBYAK
@@ -476,41 +512,26 @@ void Op::init(const std::string& mstr, size_t maxBitSize, Mode mode)
}
#endif
switch (roundBit) {
- case 64: SET_OP(64); SET_OP_DBL_LLVM(64, 128); break;
- case 128: SET_OP(128); SET_OP_DBL_LLVM(128, 256); break;
- case 192: SET_OP(192); SET_OP_DBL_LLVM(192, 384); break;
- case 256: SET_OP(256); SET_OP_DBL_LLVM(256, 512); break;
+ case 64: SET_OP(64); SET_OP_LLVM2(64); break;
+ case 128: SET_OP(128); SET_OP_LLVM2(128); break;
+ case 192: SET_OP(192); SET_OP_LLVM2(192); break;
+ case 256: SET_OP(256); SET_OP_LLVM2(256); break;
case 320: SET_OP(320); break;
case 384: SET_OP(384); break;
case 448: SET_OP(448); break;
- case 512: SET_OP(512);
- // QQQ : need refactor for large prime
-#if MCL_MAX_OP_BIT_SIZE == 768
- SET_OP_DBL_LLVM(512, 1024);
-#endif
- break;
+ case 512: SET_OP(512); break;
#if CYBOZU_OS_BIT == 64
- case 576: SET_OP(576);
-#if MCL_MAX_OP_BIT_SIZE == 768
- SET_OP_DBL_LLVM(576, 1152);
-#endif
- break;
+ case 576: SET_OP(576); break;
#if MCL_MAX_OP_BIT_SIZE == 768
- case 640: SET_OP(640);
- SET_OP_DBL_LLVM(640, 1280);
- break;
- case 704: SET_OP(704);
- SET_OP_DBL_LLVM(704, 1408);
- break;
- case 768: SET_OP(768);
- SET_OP_DBL_LLVM(768, 1536);
- break;
+ case 640: SET_OP(640); break;
+ case 704: SET_OP(704); break;
+ case 768: SET_OP(768); break;
#endif
#else
- case 32: SET_OP(32); SET_OP_DBL_LLVM(32, 64); break;
- case 96: SET_OP(96); SET_OP_DBL_LLVM(96, 192); break;
- case 160: SET_OP(160); SET_OP_DBL_LLVM(160, 320); break;
- case 224: SET_OP(224); SET_OP_DBL_LLVM(224, 448); break;
+ case 32: SET_OP(32); SET_OP_LLVM2(32); break;
+ case 96: SET_OP(96); SET_OP_LLVM2(96); break;
+ case 160: SET_OP(160); SET_OP_LLVM2(160); break;
+ case 224: SET_OP(224); SET_OP_LLVM2(224); break;
case 288: SET_OP(288); break;
case 352: SET_OP(352); break;
case 416: SET_OP(416); break;
diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp
index 99763e6..a30730b 100644
--- a/src/fp_proto.hpp
+++ b/src/fp_proto.hpp
@@ -10,32 +10,124 @@
namespace mcl { namespace fp {
+struct Ltag;
+struct Atag;
+
// (carry, z[N]) <- x[N] + y[N]
-template<size_t N, class Tag>class AddPre { static const u3u f; };
+template<size_t N, class Tag>struct AddNC { static const u3u f; };
// (carry, z[N]) <- x[N] - y[N]
-template<size_t N, class Tag>class SubPre { static const u3u f; };
+template<size_t N, class Tag>struct SubNC { static const u3u f; };
// z[N * 2] <- x[N] * y[N]
-template<size_t N, class Tag>class MulPre { static const void3u f; };
+template<size_t N, class Tag>struct MulPre { static const void3u f; };
// z[N * 2] <- x[N] * x[N]
-template<size_t N, class Tag>class SqrPre { static const void2u f; };
+template<size_t N, class Tag>struct SqrPre { static const void2u f; };
// z[N + 1] <- x[N] * y
-template<size_t N, class Tag>class Mul_UnitPre { static const void2uI f; };
+template<size_t N, class Tag>struct Mul_UnitPre { static const void2uI f; };
// z[N] <- x[N + 1] % p[N]
-template<size_t N, class Tag>class N1_Mod { static const void3u f; };
+template<size_t N, class Tag>struct N1_Mod { static const void3u f; };
// z[N] <- x[N * 2] % p[N]
-template<size_t N, class Tag>class Dbl_Mod { static const void3u f; };
+template<size_t N, class Tag>struct Dbl_Mod { static const void3u f; };
+// z[N] <- Montgomery(x[N], y[N], p[N])
+template<size_t N, class Tag>struct Mont { static const void4u f; };
+// z[N] <- MontRed(xy[N], p[N])
+template<size_t N, class Tag>struct MontRed { static const void3u f; };
+
+// z[N] <- (x[N] * y[N]) % p[N]
+template<size_t N, class Tag>struct Mul { static const void4u f; };
+// z[N] <- (x[N] ^ 2) % p[N]
+template<size_t N, class Tag>struct Sqr { static const void3u f; };
+
+// z[N] <- Montgomery(x[N], x[N], p[N])
+template<size_t N, class Tag>
+struct SqrMont {
+ static inline void func(Unit *y, const Unit *x, const Unit *p)
+ {
+ Mont<N, Tag>::f(y, x, x, p);
+ }
+ static const void3u f;
+};
+template<size_t N, class Tag>
+const void3u SqrMont<N, Tag>::f = SqrMont<N, Tag>::func;
+
+// z[N] <- (x[N] + y[N]) % p[N]
+template<size_t N, class Tag>
+struct Add {
+ static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
+ {
+ if (AddNC<N, Tag>::f(z, x, y)) {
+ SubNC<N, Tag>::f(z, z, p);
+ return;
+ }
+ Unit tmp[N];
+ if (SubNC<N, Tag>::f(tmp, z, p) == 0) {
+ memcpy(z, tmp, sizeof(tmp));
+ }
+ }
+ static const void4u f;
+};
+
+template<size_t N, class Tag>
+const void4u Add<N, Tag>::f = Add<N, Tag>::func;
+
+// z[N] <- (x[N] - y[N]) % p[N]
+template<size_t N, class Tag>
+struct Sub {
+ static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
+ {
+ if (SubNC<N, Tag>::f(z, x, y)) {
+ AddNC<N, Tag>::f(z, z, p);
+ }
+ }
+ static const void4u f;
+};
+
+template<size_t N, class Tag>
+const void4u Sub<N, Tag>::f = Sub<N, Tag>::func;
+
+// z[N * 2] <- (x[N * 2] + y[N * 2]) mod p[N] << (N * UnitBitSize)
+template<size_t N, class Tag>
+struct DblAdd {
+ static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
+ {
+ if (AddNC<N * 2, Tag>::f(z, x, y)) {
+ SubNC<N, Tag>::f(z + N, z + N, p);
+ return;
+ }
+ Unit tmp[N];
+ if (SubNC<N, Tag>::f(tmp, z + N, p) == 0) {
+ memcpy(z + N, tmp, sizeof(tmp));
+ }
+ }
+ static const void4u f;
+};
+
+template<size_t N, class Tag>
+const void4u DblAdd<N, Tag>::f = DblAdd<N, Tag>::func;
+
+// z[N * 2] <- (x[N * 2] - y[N * 2]) mod p[N] << (N * UnitBitSize)
+template<size_t N, class Tag>
+struct DblSub {
+ static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
+ {
+ if (SubNC<N * 2, Tag>::f(z, x, y)) {
+ AddNC<N, Tag>::f(z + N, z + N, p);
+ }
+ }
+ static const void4u f;
+};
+
+template<size_t N, class Tag>
+const void4u DblSub<N, Tag>::f = DblSub<N, Tag>::func;
} } // mcl::fp
#ifdef MCL_USE_LLVM
-extern "C" {
-
#define MCL_FP_DEF_FUNC_SUB(len, suf) \
void mcl_fp_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
-void mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
-void mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
+mcl::fp::Unit mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
+mcl::fp::Unit mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_mul_UnitPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \
void mcl_fpDbl_mulPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fpDbl_sqrPre ## len ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
@@ -45,15 +137,16 @@ void mcl_fpDbl_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const
void mcl_fpDbl_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p);
#define MCL_FP_DEF_FUNC(len) \
- MCL_FP_DEF_FUNC_SUB(len, G) \
MCL_FP_DEF_FUNC_SUB(len, L) \
MCL_FP_DEF_FUNC_SUB(len, A)
#define MCL_FP_DEF_FUNC_SPECIAL(suf) \
- void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \
- void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \
- void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \
- void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */);
+void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \
+void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \
+void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \
+void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */);
+
+extern "C" {
MCL_FP_DEF_FUNC(64)
MCL_FP_DEF_FUNC(128)
@@ -83,14 +176,13 @@ MCL_FP_DEF_FUNC(1408)
MCL_FP_DEF_FUNC(1536)
#endif
-MCL_FP_DEF_FUNC_SPECIAL(G)
MCL_FP_DEF_FUNC_SPECIAL(L)
MCL_FP_DEF_FUNC_SPECIAL(A)
+}
+
#undef MCL_FP_DEF_FUNC_SUB
#undef MCL_FP_DEF_FUNC
-}
-
#endif
diff --git a/src/gen.cpp b/src/gen.cpp
index b7d9f9f..155a5b6 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -296,57 +296,65 @@ struct Code : public mcl::Generator {
void gen_mcl_fp_addsubNC(bool isAdd)
{
resetGlobalIdx();
+ Operand r(Int, unit);
Operand pz(IntPtr, bit);
Operand px(IntPtr, bit);
Operand py(IntPtr, bit);
std::string name;
if (isAdd) {
name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L";
- mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py);
+ mcl_fp_addNCM[bit] = Function(name, r, pz, px, py);
verifyAndSetPrivate(mcl_fp_addNCM[bit]);
beginFunc(mcl_fp_addNCM[bit]);
} else {
name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L";
- mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py);
+ mcl_fp_subNCM[bit] = Function(name, r, pz, px, py);
verifyAndSetPrivate(mcl_fp_subNCM[bit]);
beginFunc(mcl_fp_subNCM[bit]);
}
- Operand x = load(px);
- Operand y = load(py);
+ Operand x = zext(load(px), bit + unit);
+ Operand y = zext(load(py), bit + unit);
Operand z;
if (isAdd) {
z = add(x, y);
+ store(trunc(z, bit), pz);
+ r = trunc(lshr(z, bit), unit);
} else {
z = sub(x, y);
+ store(trunc(z, bit), pz);
+ r = _and(trunc(lshr(z, bit), unit), makeImm(unit, 1));
}
- store(z, pz);
- ret(Void);
+ ret(r);
endFunc();
}
-#if 0
- void gen_mcl_fp_addS()
+#if 0 // void-return version
+ void gen_mcl_fp_addsubNC(bool isAdd)
{
resetGlobalIdx();
Operand pz(IntPtr, bit);
Operand px(IntPtr, bit);
Operand py(IntPtr, bit);
- Operand pp(IntPtr, bit);
- std::string name = "mcl_fp_add" + cybozu::itoa(bit) + "S";
- mcl_fp_addM[bit] = Function(name, Void, pz, px, py, pp);
- beginFunc(mcl_fp_addM[bit]);
+ std::string name;
+ if (isAdd) {
+ name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L";
+ mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py);
+ verifyAndSetPrivate(mcl_fp_addNCM[bit]);
+ beginFunc(mcl_fp_addNCM[bit]);
+ } else {
+ name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L";
+ mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py);
+ verifyAndSetPrivate(mcl_fp_subNCM[bit]);
+ beginFunc(mcl_fp_subNCM[bit]);
+ }
Operand x = load(px);
Operand y = load(py);
- Operand p = load(pp);
- x = zext(x, bit + unit);
- y = zext(y, bit + unit);
- p = zext(p, bit + unit);
- Operand t0 = add(x, y);
- Operand t1 = sub(t0, p);
- Operand t = lshr(t1, bit);
- t = trunc(t, 1);
- t = select(t, t0, t1);
- t = trunc(t, bit);
- store(t, pz);
+ Operand z;
+ if (isAdd) {
+ z = add(x, y);
+ } else {
+ z = sub(x, y);
+ }
+ store(z, pz);
ret(Void);
endFunc();
}
@@ -385,33 +393,6 @@ struct Code : public mcl::Generator {
ret(Void);
endFunc();
}
-#if 0
- void gen_mcl_fp_subS()
- {
- resetGlobalIdx();
- Operand pz(IntPtr, bit);
- Operand px(IntPtr, bit);
- Operand py(IntPtr, bit);
- Operand pp(IntPtr, bit);
- std::string name = "mcl_fp_sub" + cybozu::itoa(bit) + "S";
- mcl_fp_subM[bit] = Function(name, Void, pz, px, py, pp);
- beginFunc(mcl_fp_subM[bit]);
- Operand x = load(px);
- Operand y = load(py);
- x = zext(x, bit + unit);
- y = zext(y, bit + unit);
- Operand vc = sub(x, y);
- Operand v = trunc(vc, bit); // v = x - y
- Operand c = lshr(vc, bit);
- c = trunc(c, 1);
- Operand p = load(pp);
- Operand z = select(c, p, makeImm(bit, 0));
- v = add(v, z);
- store(v, pz);
- ret(Void);
- endFunc();
- }
-#endif
void gen_mcl_fp_sub()
{
resetGlobalIdx();
diff --git a/src/low_gmp.hpp b/src/low_gmp.hpp
index 44477d5..d11a30f 100644
--- a/src/low_gmp.hpp
+++ b/src/low_gmp.hpp
@@ -7,7 +7,7 @@ namespace mcl { namespace fp {
struct Gtag;
template<size_t N>
-struct AddPre<N, Gtag> {
+struct AddNC<N, Gtag> {
static inline Unit func(Unit *z, const Unit *x, const Unit *y)
{
return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
@@ -16,10 +16,10 @@ struct AddPre<N, Gtag> {
};
template<size_t N>
-const u3u AddPre<N, Gtag>::f = &AddPre<N, Gtag>::func;
+const u3u AddNC<N, Gtag>::f = &AddNC<N, Gtag>::func;
template<size_t N>
-struct SubPre<N, Gtag> {
+struct SubNC<N, Gtag> {
static inline Unit func(Unit *z, const Unit *x, const Unit *y)
{
return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
@@ -28,7 +28,7 @@ struct SubPre<N, Gtag> {
};
template<size_t N>
-const u3u SubPre<N, Gtag>::f = &SubPre<N, Gtag>::func;
+const u3u SubNC<N, Gtag>::f = &SubNC<N, Gtag>::func;
template<size_t N>
struct MulPre<N, Gtag> {