aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-10-30 13:38:37 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-10-30 13:38:37 +0800
commitf21ac0e75b1b8531f804a4fd0567e9ff2354f4be (patch)
treedea4e84d0392f5a2e9f1b0a16a6d9e65d2b76bcf
parentf9026ec77bed29eb2f7b693a9a60d4467b93e35c (diff)
downloaddexon-mcl-f21ac0e75b1b8531f804a4fd0567e9ff2354f4be.tar.gz
dexon-mcl-f21ac0e75b1b8531f804a4fd0567e9ff2354f4be.tar.zst
dexon-mcl-f21ac0e75b1b8531f804a4fd0567e9ff2354f4be.zip
use karatsuba for N >= 8
-rw-r--r--include/mcl/op.hpp2
-rw-r--r--misc/karatsuba.cpp24
-rw-r--r--misc/mul.cpp55
-rw-r--r--src/fp_proto.hpp17
4 files changed, 76 insertions, 22 deletions
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 920c74c..b858ad9 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -61,7 +61,7 @@ enum Mode {
enum PrimeMode {
PM_GENERIC = 0,
PM_NICT_P192,
- PM_NICT_P521,
+ PM_NICT_P521
};
struct Op {
diff --git a/misc/karatsuba.cpp b/misc/karatsuba.cpp
index 7edd4a8..b62b3b6 100644
--- a/misc/karatsuba.cpp
+++ b/misc/karatsuba.cpp
@@ -6,7 +6,9 @@
#include <mcl/fp.hpp>
#include <cybozu/xorshift.hpp>
#include "../src/fp_proto.hpp"
+#ifdef MCL_USE_LLVM
#include "../src/fp_llvm.hpp"
+#endif
#include <cybozu/test.hpp>
#include <cybozu/benchmark.hpp>
@@ -42,8 +44,10 @@ void benchKaratsuba()
CYBOZU_BENCH("g:mulpre", (MulPreCore<N, Gtag>::f), z, z, x);
CYBOZU_BENCH("g:kara ", (MulPre<N, Gtag>::karatsuba), z, z, x);
+#ifdef MCL_USE_LLVM
CYBOZU_BENCH("l:mulpre", (MulPreCore<N, Ltag>::f), z, z, x);
CYBOZU_BENCH("l:kara ", (MulPre<N, Ltag>::karatsuba), z, z, x);
+#endif
}
CYBOZU_TEST_AUTO(karatsuba)
@@ -57,23 +61,3 @@ CYBOZU_TEST_AUTO(karatsuba)
#endif
}
-#if 0
-CYBOZU_TEST_AUTO(mulPre)
-{
- cybozu::XorShift rg;
-// const char *p = "0x2523648240000001ba344d80000000086121000000000013a700000000000013";
-// const char *p = "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff";
-// const char *p = "4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603471"; // 640 bit
- const char *p = "1552518092300708935148979488462502555256886017116696611139052038026050952686376886330878408828646477950487730697131073206171580044114814391444287275041181139204454976020849905550265285631598444825262999193716468750892846853816057031"; // 768 bit
- Fp::init(p, mcl::fp::FP_LLVM);
- const mcl::fp::Op& op = Fp::getOp();
- const size_t N = 12;
- Unit x[N], y[N];
- rg.read(x, N);
- rg.read(y, N);
- CYBOZU_BENCH("g:mul ", (Mul<N, Gtag>::f), y, y, x, op.p);
- CYBOZU_BENCH("g:mont", (Mont<N, Gtag>::f), y, y, x, op.p);
- CYBOZU_BENCH("l:mul ", (Mul<N, Ltag>::f), y, y, x, op.p);
- CYBOZU_BENCH("l:mont", (Mont<N, Ltag>::f), y, y, x, op.p);
-}
-#endif
diff --git a/misc/mul.cpp b/misc/mul.cpp
new file mode 100644
index 0000000..f43adb9
--- /dev/null
+++ b/misc/mul.cpp
@@ -0,0 +1,55 @@
+/*
+ sudo cpufreq-set -c 0 -g performance
+ mycl mul.cpp -DMCL_USE_LLVM=1 ../lib/libmcl.a && ./a.out
+*/
+#include <stdio.h>
+#include <mcl/fp.hpp>
+#include <cybozu/xorshift.hpp>
+#include <cybozu/test.hpp>
+#include <cybozu/benchmark.hpp>
+
+typedef mcl::FpT<> Fp;
+
+using namespace mcl::fp;
+
+void dump(const Unit *x, size_t N)
+{
+ for (size_t i = 0; i < N; i++) {
+ printf("%016llx ", (long long)x[N - 1 - i]);
+ }
+ printf("\n");
+}
+
+CYBOZU_TEST_AUTO(mulPre)
+{
+ cybozu::XorShift rg;
+ const char *pTbl[] = {
+ "0x2523648240000001ba344d80000000086121000000000013a700000000000013",
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff",
+ "4562440617622195218641171605700291324893228507248559930579192517899275167208677386505912811317371399778642309573594407310688704721375437998252661319722214188251994674360264950082874192246603471", // 640 bit
+ "1552518092300708935148979488462502555256886017116696611139052038026050952686376886330878408828646477950487730697131073206171580044114814391444287275041181139204454976020849905550265285631598444825262999193716468750892846853816057031", // 768 bit
+ };
+ const size_t N = 12;
+ const Mode modeTbl[] = {
+ FP_GMP_MONT,
+#ifdef MCL_USE_LLVM
+ FP_LLVM_MONT,
+#endif
+ };
+ for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(modeTbl); j++) {
+ Mode mode = modeTbl[j];
+ printf("%s\n", ModeToStr(mode));
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(pTbl); i++) {
+ const char *p = pTbl[i];
+ Fp::init(p, mode);
+ printf("bitSize=%d\n", (int)Fp::getBitSize());
+ const Op& op = Fp::getOp();
+ Unit x[N], y[N * 2];
+ rg.read(x, N);
+ rg.read(y, N * 2);
+ CYBOZU_BENCH("mul ", op.fp_mul, y, y, x, op.p);
+ CYBOZU_BENCH("mulPre", op.fpDbl_mulPre, y, y, x);
+ CYBOZU_BENCH("mod ", op.fpDbl_mod, y, y, op.p);
+ }
+ }
+}
diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp
index 82dd6b3..2139b68 100644
--- a/src/fp_proto.hpp
+++ b/src/fp_proto.hpp
@@ -146,7 +146,7 @@ struct MulPre {
}
static inline void func(Unit *z, const Unit *x, const Unit *y)
{
-#if 0
+#if 1
if (N >= 8 && (N % 2) == 0) {
karatsuba(z, x, y);
return;
@@ -160,6 +160,21 @@ struct MulPre {
template<size_t N, class Tag>
const void3u MulPre<N, Tag>::f = MulPre<N, Tag>::func;
+static inline void MulPre0(Unit*, const Unit*, const Unit*) {}
+
+template<class Tag>
+struct MulPre<0, Tag> {
+ static inline void f(Unit*, const Unit*, const Unit*) {}
+};
+
+template<class Tag>
+struct MulPre<1, Tag> {
+ static inline void f(Unit* z, const Unit* x, const Unit* y)
+ {
+ MulPreCore<1, Tag>::f(z, x, y);
+ }
+};
+
// z[N * 2] <- x[N] * x[N]
template<size_t N, class Tag = Gtag>
struct SqrPre {