#include #include #ifdef MCL_DONT_USE_OPENSSL #include #else #include #endif #include #include #ifdef MCL_USE_XBYAK #include "fp_generator.hpp" #endif #include "low_func.hpp" #ifdef MCL_USE_LLVM #include "proto.hpp" #include "low_func_llvm.hpp" #endif #include #include #ifdef _MSC_VER #pragma warning(disable : 4127) #endif namespace mcl { namespace fp { #ifdef MCL_USE_XBYAK FpGenerator *Op::createFpGenerator() { return new FpGenerator(); } void Op::destroyFpGenerator(FpGenerator *fg) { delete fg; } #endif inline void setUnitAsLE(void *p, Unit x) { #if MCL_SIZEOF_UNIT == 4 cybozu::Set32bitAsLE(p, x); #else cybozu::Set64bitAsLE(p, x); #endif } inline Unit getUnitAsLE(const void *p) { #if MCL_SIZEOF_UNIT == 4 return cybozu::Get32bitAsLE(p); #else return cybozu::Get64bitAsLE(p); #endif } const char *ModeToStr(Mode mode) { switch (mode) { case FP_AUTO: return "auto"; case FP_GMP: return "gmp"; case FP_GMP_MONT: return "gmp_mont"; case FP_LLVM: return "llvm"; case FP_LLVM_MONT: return "llvm_mont"; case FP_XBYAK: return "xbyak"; default: assert(0); return 0; } } Mode StrToMode(const char *s) { static const struct { const char *s; Mode mode; } tbl[] = { { "auto", FP_AUTO }, { "gmp", FP_GMP }, { "gmp_mont", FP_GMP_MONT }, { "llvm", FP_LLVM }, { "llvm_mont", FP_LLVM_MONT }, { "xbyak", FP_XBYAK }, }; for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { if (strcmp(s, tbl[i].s) == 0) return tbl[i].mode; } return FP_AUTO; } bool isEnableJIT() { #if defined(MCL_USE_XBYAK) /* -1:not init, 0:disable, 1:enable */ static int status = -1; if (status == -1) { #ifndef _MSC_VER status = 1; FILE *fp = fopen("/sys/fs/selinux/enforce", "rb"); if (fp) { char c; if (fread(&c, 1, 1, fp) == 1 && c == '1') { status = 0; } fclose(fp); } #endif if (status != 0) { MIE_ALIGN(4096) char buf[4096]; bool ret = Xbyak::CodeArray::protect(buf, sizeof(buf), true); status = ret ? 1 : 0; if (ret) { Xbyak::CodeArray::protect(buf, sizeof(buf), false); } } } return status != 0; #else return false; #endif } void getRandVal(Unit *out, RandGen& rg, const Unit *in, size_t bitSize) { if (rg.isZero()) rg = RandGen::get(); const size_t n = (bitSize + UnitBitSize - 1) / UnitBitSize; const size_t rem = bitSize & (UnitBitSize - 1); assert(n > 0); for (;;) { rg.read(out, n * sizeof(Unit)); if (rem > 0) out[n - 1] &= (Unit(1) << rem) - 1; if (isLessArray(out, in, n)) return; } } uint32_t sha256(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize) { const uint32_t hashSize = 256 / 8; if (maxOutSize < hashSize) return 0; #ifdef MCL_DONT_USE_OPENSSL cybozu::Sha256(msg, msgSize).get(out); #else cybozu::crypto::Hash::digest(out, cybozu::crypto::Hash::N_SHA256, msg, msgSize); #endif return hashSize; } uint32_t sha512(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize) { const uint32_t hashSize = 512 / 8; if (maxOutSize < hashSize) return 0; #ifdef MCL_DONT_USE_OPENSSL cybozu::Sha512(msg, msgSize).get(out); #else cybozu::crypto::Hash::digest(out, cybozu::crypto::Hash::N_SHA512, msg, msgSize); #endif return hashSize; } #ifndef MCL_USE_VINT static inline void set_mpz_t(mpz_t& z, const Unit* p, int n) { int s = n; while (s > 0) { if (p[s - 1]) break; s--; } z->_mp_alloc = n; z->_mp_size = s; z->_mp_d = (mp_limb_t*)const_cast(p); } #endif /* y = (1/x) mod op.p */ static inline void fp_invOpC(Unit *y, const Unit *x, const Op& op) { const int N = (int)op.N; bool b = false; #ifdef MCL_USE_VINT Vint vx, vy, vp; vx.setArray(&b, x, N); assert(b); (void)b; vp.setArray(&b, op.p, N); assert(b); (void)b; Vint::invMod(vy, vx, vp); vy.getArray(&b, y, N); assert(b); (void)b; #else mpz_class my; mpz_t mx, mp; set_mpz_t(mx, x, N); set_mpz_t(mp, op.p, N); mpz_invert(my.get_mpz_t(), mx, mp); gmp::getArray(&b, y, N, my); assert(b); #endif } /* inv(xR) = (1/x)R^-1 -toMont-> 1/x -toMont-> (1/x)R */ static void fp_invMontOpC(Unit *y, const Unit *x, const Op& op) { fp_invOpC(y, x, op); op.fp_mul(y, y, op.R3, op.p); } /* large (N * 2) specification of AddPre, SubPre */ template struct SetFpDbl { static inline void exec(Op&) {} }; template struct SetFpDbl { static inline void exec(Op& op) { // if (!op.isFullBit) { op.fpDbl_addPre = AddPre::f; op.fpDbl_subPre = SubPre::f; // } } }; template void setOp2(Op& op) { op.fp_shr1 = Shr1::f; op.fp_neg = Neg::f; if (op.isFullBit) { op.fp_add = Add::f; op.fp_sub = Sub::f; } else { op.fp_add = Add::f; op.fp_sub = Sub::f; } if (op.isMont) { if (op.isFullBit) { op.fp_mul = Mont::f; op.fp_sqr = SqrMont::f; } else { op.fp_mul = Mont::f; op.fp_sqr = SqrMont::f; } op.fpDbl_mod = MontRed::f; } else { op.fp_mul = Mul::f; op.fp_sqr = Sqr::f; op.fpDbl_mod = Dbl_Mod::f; } op.fp_mulUnit = MulUnit::f; if (!gmpIsFasterThanLLVM) { op.fpDbl_mulPre = MulPre::f; op.fpDbl_sqrPre = SqrPre::f; } op.fp_mulUnitPre = MulUnitPre::f; op.fpN1_mod = N1_Mod::f; op.fpDbl_add = DblAdd::f; op.fpDbl_sub = DblSub::f; op.fp_addPre = AddPre::f; op.fp_subPre = SubPre::f; op.fp2_mulNF = Fp2MulNF::f; SetFpDbl::exec(op); } template void setOp(Op& op, Mode mode) { // generic setup op.fp_isZero = isZeroC; op.fp_clear = clearC; op.fp_copy = copyC; if (op.isMont) { op.fp_invOp = fp_invMontOpC; } else { op.fp_invOp = fp_invOpC; } setOp2(op); #ifdef MCL_USE_LLVM if (mode != fp::FP_GMP && mode != fp::FP_GMP_MONT) { #if defined(MCL_USE_XBYAK) && CYBOZU_HOST == CYBOZU_HOST_INTEL const bool gmpIsFasterThanLLVM = false;//(N == 8 && MCL_SIZEOF_UNIT == 8); Xbyak::util::Cpu cpu; if (cpu.has(Xbyak::util::Cpu::tBMI2)) { setOp2(op); } else #endif { setOp2(op); } } #else (void)mode; #endif } #ifdef MCL_USE_XBYAK inline void invOpForMontC(Unit *y, const Unit *x, const Op& op) { Unit r[maxUnitSize]; int k = op.fp_preInv(r, x); /* S = UnitBitSize xr = 2^k R = 2^(N * S) get r2^(-k)R^2 = r 2^(N * S * 2 - k) */ op.fp_mul(y, r, op.invTbl.data() + k * op.N, op.p); } static void initInvTbl(Op& op) { const size_t N = op.N; const Unit *p = op.p; const size_t invTblN = N * sizeof(Unit) * 8 * 2; op.invTbl.resize(invTblN * N); Unit *tbl = op.invTbl.data() + (invTblN - 1) * N; Unit t[maxUnitSize] = {}; t[0] = 2; op.toMont(tbl, t); for (size_t i = 0; i < invTblN - 1; i++) { op.fp_add(tbl - N, tbl, tbl, p); tbl -= N; } } #endif static bool initForMont(Op& op, const Unit *p, Mode mode) { const size_t N = op.N; bool b; { mpz_class t = 1, R; gmp::getArray(&b, op.one, N, t); if (!b) return false; R = (t << (N * UnitBitSize)) % op.mp; t = (R * R) % op.mp; gmp::getArray(&b, op.R2, N, t); if (!b) return false; t = (t * R) % op.mp; gmp::getArray(&b, op.R3, N, t); if (!b) return false; } op.rp = getMontgomeryCoeff(p[0]); if (mode != FP_XBYAK) return true; #ifdef MCL_USE_XBYAK if (op.fg == 0) op.fg = Op::createFpGenerator(); op.fg->init(op); if (op.isMont && N <= 4) { op.fp_invOp = &invOpForMontC; initInvTbl(op); } #endif return true; } bool Op::init(const mpz_class& _p, size_t maxBitSize, Mode mode, size_t mclMaxBitSize) { if (mclMaxBitSize != MCL_MAX_BIT_SIZE) return false; #ifdef MCL_USE_VINT assert(sizeof(mcl::vint::Unit) == sizeof(Unit)); #else assert(sizeof(mp_limb_t) == sizeof(Unit)); #endif if (maxBitSize > MCL_MAX_BIT_SIZE) return false; if (_p <= 0) return false; clear(); maxN = (maxBitSize + fp::UnitBitSize - 1) / fp::UnitBitSize; N = gmp::getUnitSize(_p); if (N > maxN) return false; { bool b; gmp::getArray(&b, p, N, _p); if (!b) return false; } mp = _p; bitSize = gmp::getBitSize(mp); pmod4 = gmp::getUnit(mp, 0) % 4; /* priority : MCL_USE_XBYAK > MCL_USE_LLVM > none Xbyak > llvm_mont > llvm > gmp_mont > gmp */ #ifdef MCL_USE_XBYAK if (mode == FP_AUTO) mode = FP_XBYAK; if (mode == FP_XBYAK && bitSize > 256) { mode = FP_AUTO; } if (!isEnableJIT()) { mode = FP_AUTO; } #else if (mode == FP_XBYAK) mode = FP_AUTO; #endif #ifdef MCL_USE_LLVM if (mode == FP_AUTO) mode = FP_LLVM_MONT; #else if (mode == FP_LLVM || mode == FP_LLVM_MONT) mode = FP_AUTO; #endif if (mode == FP_AUTO) mode = FP_GMP_MONT; isMont = mode == FP_GMP_MONT || mode == FP_LLVM_MONT || mode == FP_XBYAK; #if 0 fprintf(stderr, "mode=%s, isMont=%d, maxBitSize=%d" #ifdef MCL_USE_XBYAK " MCL_USE_XBYAK" #endif #ifdef MCL_USE_LLVM " MCL_USE_LLVM" #endif "\n", ModeToStr(mode), isMont, (int)maxBitSize); #endif isFullBit = (bitSize % UnitBitSize) == 0; #if defined(MCL_USE_LLVM) || defined(MCL_USE_XBYAK) if ((mode == FP_AUTO || mode == FP_LLVM || mode == FP_XBYAK) && mp == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) { primeMode = PM_NIST_P192; isMont = false; isFastMod = true; } if ((mode == FP_AUTO || mode == FP_LLVM || mode == FP_XBYAK) && mp == mpz_class("0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")) { primeMode = PM_NIST_P521; isMont = false; isFastMod = true; } #endif #if defined(MCL_USE_VINT) && MCL_SIZEOF_UNIT == 8 { const char *secp256k1Str = "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f"; bool b; mpz_class secp256k1; gmp::setStr(&b, secp256k1, secp256k1Str); if (b && mp == secp256k1) { primeMode = PM_SECP256K1; isMont = false; isFastMod = true; } } #endif switch (N) { case 1: setOp<1>(*this, mode); break; case 2: setOp<2>(*this, mode); break; case 3: setOp<3>(*this, mode); break; case 4: setOp<4>(*this, mode); break; // 256 if 64-bit #if MCL_MAX_UNIT_SIZE >= 6 case 5: setOp<5>(*this, mode); break; case 6: setOp<6>(*this, mode); break; #endif #if MCL_MAX_UNIT_SIZE >= 8 case 7: setOp<7>(*this, mode); break; case 8: setOp<8>(*this, mode); break; #endif #if MCL_MAX_UNIT_SIZE >= 9 case 9: setOp<9>(*this, mode); break; // 521 if 64-bit #endif #if MCL_MAX_UNIT_SIZE >= 10 case 10: setOp<10>(*this, mode); break; #endif #if MCL_MAX_UNIT_SIZE >= 12 case 11: setOp<11>(*this, mode); break; case 12: setOp<12>(*this, mode); break; // 768 if 64-bit #endif #if MCL_MAX_UNIT_SIZE >= 14 case 13: setOp<13>(*this, mode); break; case 14: setOp<14>(*this, mode); break; #endif #if MCL_MAX_UNIT_SIZE >= 16 case 15: setOp<15>(*this, mode); break; case 16: setOp<16>(*this, mode); break; // 1024 if 64-bit #endif #if MCL_MAX_UNIT_SIZE >= 17 case 17: setOp<17>(*this, mode); break; // 521 if 32-bit #endif default: return false; } #ifdef MCL_USE_LLVM if (primeMode == PM_NIST_P192) { fp_mul = &mcl_fp_mulNIST_P192L; fp_sqr = &mcl_fp_sqr_NIST_P192L; fpDbl_mod = &mcl_fpDbl_mod_NIST_P192L; } if (primeMode == PM_NIST_P521) { fpDbl_mod = &mcl_fpDbl_mod_NIST_P521L; } #endif #if defined(MCL_USE_VINT) && MCL_SIZEOF_UNIT == 8 if (primeMode == PM_SECP256K1) { fp_mul = &mcl::vint::mcl_fp_mul_SECP256K1; fp_sqr = &mcl::vint::mcl_fp_sqr_SECP256K1; fpDbl_mod = &mcl::vint::mcl_fpDbl_mod_SECP256K1; } #endif if (!fp::initForMont(*this, p, mode)) return false; { bool b; sq.set(&b, mp); if (!b) return false; } if (N * UnitBitSize <= 256) { hash = sha256; } else { hash = sha512; } return true; } void copyUnitToByteAsLE(uint8_t *dst, const Unit *src, size_t byteSize) { while (byteSize >= sizeof(Unit)) { setUnitAsLE(dst, *src++); dst += sizeof(Unit); byteSize -= sizeof(Unit); } if (byteSize == 0) return; Unit x = *src; while (byteSize) { *dst++ = static_cast(x); x >>= 8; byteSize--; } } void copyByteToUnitAsLE(Unit *dst, const uint8_t *src, size_t byteSize) { while (byteSize >= sizeof(Unit)) { *dst++ = getUnitAsLE(src); src += sizeof(Unit); byteSize -= sizeof(Unit); } if (byteSize == 0) return; Unit x = 0; for (size_t i = 0; i < byteSize; i++) { x |= Unit(src[i]) << (i * 8); } *dst = x; } #ifndef CYBOZU_DONT_USE_STRING int detectIoMode(int ioMode, const std::ios_base& ios) { if (ioMode & ~IoPrefix) return ioMode; // IoAuto or IoPrefix const std::ios_base::fmtflags f = ios.flags(); assert(!(f & std::ios_base::oct)); ioMode |= (f & std::ios_base::hex) ? IoHex : 0; if (f & std::ios_base::showbase) { ioMode |= IoPrefix; } return ioMode; } #endif bool copyAndMask(Unit *y, const void *x, size_t xByteSize, const Op& op, MaskMode maskMode) { const size_t fpByteSize = sizeof(Unit) * op.N; if (xByteSize > fpByteSize) { if (maskMode == NoMask) return false; xByteSize = fpByteSize; } // QQQ : fixed later for big endian copyByteToUnitAsLE(y, (const uint8_t*)x, xByteSize); for (size_t i = (xByteSize + sizeof(Unit) - 1) / sizeof(Unit); i < op.N; i++) { y[i] = 0; } if (maskMode == mcl::fp::SmallMask || maskMode == mcl::fp::MaskAndMod) { maskArray(y, op.N, op.bitSize); } if (isGreaterOrEqualArray(y, op.p, op.N)) { switch (maskMode) { case mcl::fp::NoMask: return false; case mcl::fp::SmallMask: maskArray(y, op.N, op.bitSize - 1); break; case mcl::fp::MaskAndMod: default: op.fp_subPre(y, y, op.p); break; } } assert(isLessArray(y, op.p, op.N)); return true; } static bool isInUint64(uint64_t *pv, const fp::Block& b) { assert(fp::UnitBitSize == 32 || fp::UnitBitSize == 64); const size_t start = 64 / fp::UnitBitSize; for (size_t i = start; i < b.n; i++) { if (b.p[i]) return false; } #if MCL_SIZEOF_UNIT == 4 *pv = b.p[0] | (uint64_t(b.p[1]) << 32); #else *pv = b.p[0]; #endif return true; } uint64_t getUint64(bool *pb, const fp::Block& b) { uint64_t v; if (isInUint64(&v, b)) { *pb = true; return v; } *pb = false; return 0; } #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4146) #endif int64_t getInt64(bool *pb, fp::Block& b, const fp::Op& op) { bool isNegative = false; if (fp::isGreaterOrEqualArray(b.p, op.half, op.N)) { op.fp_neg(b.v_, b.p, op.p); b.p = b.v_; isNegative = true; } uint64_t v; if (fp::isInUint64(&v, b)) { const uint64_t c = uint64_t(1) << 63; if (isNegative) { if (v <= c) { // include c *pb = true; // -1 << 63 if (v == c) return int64_t(-9223372036854775807ll - 1); return int64_t(-v); } } else { if (v < c) { // not include c *pb = true; return int64_t(v); } } } *pb = false; return 0; } #ifdef _MSC_VER #pragma warning(pop) #endif void Op::initFp2(int _xi_a) { this->xi_a = _xi_a; } } } // mcl::fp