aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-11-01 10:54:54 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-11-01 10:54:54 +0800
commitb49f25a6ccc0d04ac47069c8ed726e6648581bfc (patch)
tree75285b65123a6014028afe2e5a5c8f2412ea94e5
parentc8a13c7a28277be1c24276ab2d7380cb3b6e13bf (diff)
downloaddexon-mcl-b49f25a6ccc0d04ac47069c8ed726e6648581bfc.tar.gz
dexon-mcl-b49f25a6ccc0d04ac47069c8ed726e6648581bfc.tar.zst
dexon-mcl-b49f25a6ccc0d04ac47069c8ed726e6648581bfc.zip
use karatsuba for sqr if N >= 6
-rw-r--r--misc/karatsuba.cpp16
-rw-r--r--src/fp_llvm.hpp3
-rw-r--r--src/fp_proto.hpp51
3 files changed, 54 insertions, 16 deletions
diff --git a/misc/karatsuba.cpp b/misc/karatsuba.cpp
index fb04ed9..e2156ff 100644
--- a/misc/karatsuba.cpp
+++ b/misc/karatsuba.cpp
@@ -24,9 +24,9 @@ void dump(const Unit *x, size_t N)
printf("\n");
}
-void gggKara(uint64_t *z, const uint64_t *x, const uint64_t *y)
+void gggKara(uint64_t *z, const uint64_t *x, const uint64_t *)
{
- MulPre<8, Gtag>::f(z, x, y);
+ SqrPre<8, Gtag>::f(z, x);
}
void gggLLVM(uint64_t *z, const uint64_t *x, const uint64_t *y)
{
@@ -41,12 +41,16 @@ void benchKaratsuba()
Unit x[N], z[N * 2];
rg.read(x, N);
rg.read(z, N);
- CYBOZU_BENCH("g:mulpre", (MulPreCore<N, Gtag>::f), z, z, x);
- CYBOZU_BENCH("g:kara ", (MulPre<N, Gtag>::karatsuba), z, z, x);
+ CYBOZU_BENCH("g:mulPre ", (MulPreCore<N, Gtag>::f), z, z, x);
+ CYBOZU_BENCH("g:mulKara", (MulPre<N, Gtag>::karatsuba), z, z, x);
+ CYBOZU_BENCH("g:sqrPre ", (SqrPreCore<N, Gtag>::f), z, z);
+ CYBOZU_BENCH("g:sqrKara", (SqrPre<N, Gtag>::karatsuba), z, z);
#ifdef MCL_USE_LLVM
- CYBOZU_BENCH("l:mulpre", (MulPreCore<N, Ltag>::f), z, z, x);
- CYBOZU_BENCH("l:kara ", (MulPre<N, Ltag>::karatsuba), z, z, x);
+ CYBOZU_BENCH("l:mulPre ", (MulPreCore<N, Ltag>::f), z, z, x);
+ CYBOZU_BENCH("l:mulKara", (MulPre<N, Ltag>::karatsuba), z, z, x);
+ CYBOZU_BENCH("l:sqrPre ", (SqrPreCore<N, Ltag>::f), z, z);
+ CYBOZU_BENCH("l:sqrKara", (SqrPre<N, Ltag>::karatsuba), z, z);
#endif
}
diff --git a/src/fp_llvm.hpp b/src/fp_llvm.hpp
index 7c295d3..13c5df7 100644
--- a/src/fp_llvm.hpp
+++ b/src/fp_llvm.hpp
@@ -4,7 +4,8 @@ namespace mcl { namespace fp {
template<>
struct EnableKaratsuba<Ltag> {
- static const size_t minN = 8; /* use karatsuba if N >= 8 */
+ static const size_t minMulN = 8;
+ static const size_t minSqrN = 6;
};
#define MCL_DEF_LLVM_FUNC(n) \
diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp
index 6c54a74..7e19710 100644
--- a/src/fp_proto.hpp
+++ b/src/fp_proto.hpp
@@ -93,11 +93,6 @@ struct Neg {
template<size_t N, class Tag>
const void3u Neg<N, Tag>::f = Neg<N, Tag>::func;
-static inline void mulPreGmp(Unit *z, const Unit *x, const Unit *y, size_t N)
-{
- mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, (int)N);
-}
-
// z[N * 2] <- x[N] * y[N]
template<size_t N, class Tag = Gtag>
struct MulPreCore {
@@ -113,8 +108,11 @@ const void3u MulPreCore<N, Tag>::f = MulPreCore<N, Tag>::func;
template<class Tag = Gtag>
struct EnableKaratsuba {
- static const size_t minN = 100; /* always use mpn_mul_n for Gtag */
+ /* always use mpn* for Gtag */
+ static const size_t minMulN = 100;
+ static const size_t minSqrN = 100;
};
+
template<size_t N, class Tag = Gtag>
struct MulPre {
/*
@@ -154,7 +152,7 @@ struct MulPre {
static inline void func(Unit *z, const Unit *x, const Unit *y)
{
#if 1
- if (N >= EnableKaratsuba<Tag>::minN && (N % 2) == 0) {
+ if (N >= EnableKaratsuba<Tag>::minMulN && (N % 2) == 0) {
karatsuba(z, x, y);
return;
}
@@ -184,15 +182,50 @@ struct MulPre<1, Tag> {
// z[N * 2] <- x[N] * x[N]
template<size_t N, class Tag = Gtag>
-struct SqrPre {
+struct SqrPreCore {
static inline void func(Unit *y, const Unit *x)
{
- return mpn_sqr((mp_limb_t*)y, (const mp_limb_t*)x, N);
+ mpn_sqr((mp_limb_t*)y, (const mp_limb_t*)x, N);
}
static const void2u f;
};
template<size_t N, class Tag>
+const void2u SqrPreCore<N, Tag>::f = SqrPreCore<N, Tag>::func;
+
+template<size_t N, class Tag = Gtag>
+struct SqrPre {
+ /*
+ W = 1 << H
+ x = aW + b
+ x^2 = aaW^2 + 2abW + bb
+ */
+ static inline void karatsuba(Unit *z, const Unit *x)
+ {
+ const size_t H = N / 2;
+ SqrPre<H, Tag>::f(z, x); // b^2
+ SqrPre<H, Tag>::f(z + N, x + H); // a^2
+ Unit ab[N];
+ MulPre<H, Tag>::f(ab, x, x + H); // ab
+ Unit c = AddPre<N, Tag>::f(ab, ab, ab);
+ c += AddPre<N, Tag>::f(z + H, z + H, ab);
+ if (c) {
+ AddUnitPre<Tag>::f(z + N + H, H, c);
+ }
+ }
+ static inline void func(Unit *y, const Unit *x)
+ {
+#if 1
+ if (N >= EnableKaratsuba<Tag>::minSqrN && (N % 2) == 0) {
+ karatsuba(y, x);
+ return;
+ }
+#endif
+ SqrPreCore<N, Tag>::f(y, x);
+ }
+ static const void2u f;
+};
+template<size_t N, class Tag>
const void2u SqrPre<N, Tag>::f = SqrPre<N, Tag>::func;
// z[N + 1] <- x[N] * y