diff options
| author | MITSUNARI Shigeo <herumi@nifty.com> | 2015-05-05 09:52:44 +0800 | 
|---|---|---|
| committer | MITSUNARI Shigeo <herumi@nifty.com> | 2015-05-05 09:52:44 +0800 | 
| commit | 14fd1d125d21f47c2539a0e820e2a417e0c715f8 (patch) | |
| tree | 4fccb395f51a3f9c1202a783665ce396b649eeb9 | |
| download | tangerine-mcl-14fd1d125d21f47c2539a0e820e2a417e0c715f8.tar.gz tangerine-mcl-14fd1d125d21f47c2539a0e820e2a417e0c715f8.tar.zst tangerine-mcl-14fd1d125d21f47c2539a0e820e2a417e0c715f8.zip | |
from mie
39 files changed, 8447 insertions, 0 deletions
| diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..bfe54da --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,47 @@ +
 +Copyright (c) 2015 MITSUNARI Shigeo
 +All rights reserved.
 +
 +Redistribution and use in source and binary forms, with or without
 +modification, are permitted provided that the following conditions are met:
 +
 +Redistributions of source code must retain the above copyright notice, this
 +list of conditions and the following disclaimer.
 +Redistributions in binary form must reproduce the above copyright notice,
 +this list of conditions and the following disclaimer in the documentation
 +and/or other materials provided with the distribution.
 +Neither the name of the copyright owner nor the names of its contributors may
 +be used to endorse or promote products derived from this software without
 +specific prior written permission.
 +
 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 +THE POSSIBILITY OF SUCH DAMAGE.
 +-----------------------------------------------------------------------------
 +ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
 +す場合に限り、再頒布および使用が許可されます。
 +
 +ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
 +を含めること。
 +バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
 +権表示、本条件一覧、および下記免責条項を含めること。
 +書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
 +に、著作権者の名前またはコントリビューターの名前を使用してはならない。
 +本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
 +れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
 +に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
 +著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
 +問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
 +あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
 +本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
 +喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
 +損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
 +一切責任を負わないものとします。
 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..383af2b --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +include common.mk + +all: +	$(MKDIR) bin +	$(MAKE) -C test +	$(MAKE) -C sample + +test: +	$(MAKE) -C test test + +sample: +	$(MAKE) -C sample test + +clean: +#	$(MAKE) -C src clean +	$(MAKE) -C test clean +	$(MAKE) -C sample clean + +.PHONY: sample + diff --git a/common.mk b/common.mk new file mode 100644 index 0000000..b861db1 --- /dev/null +++ b/common.mk @@ -0,0 +1,105 @@ +GCC_VER=$(shell $(PRE)$(CC) -dumpversion) +UNAME_S=$(shell uname -s) +ifeq ($(UNAME_S),Linux) +  OS=Linux +endif +ifneq ($(UNAME_S),Darwin) +  LDFLAGS += -lrt +endif +CP = cp -f +AR = ar r +MKDIR=mkdir -p +RM=rm -fr +CFLAGS_OPT+=-fomit-frame-pointer -DNDEBUG +ifeq ($(CXX),clang++) +  CFLAGS_OPT+=-O3 +else +  ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1) +    CFLAGS_OPT+=-Ofast +  else +    CFLAGS_OPT+=-O3 +  endif +endif +CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith +CFLAGS+= -g -D_FILE_OFFSET_BITS=64 +CFLAGS+=$(CFLAGS_WARN) +BIT?=64 +ifeq ($(BIT),32) +  CPU?=x86 +else +  ifeq ($(BIT),64) +    CPU?=x64 +  endif +endif +ifeq ($(BIT),0) +	BIT_OPT= +else +	BIT_OPT=-m$(BIT) +endif +ifeq ($(MARCH),) +ifeq ($(shell expr $(GCC_VER) \> 4.2.1),1) +	CFLAGS+=-march=native +endif +else +	CFLAGS+=$(MARCH) +endif + +DEBUG=1 +ifeq ($(RELEASE),1) +	DEBUG=0 +endif + +ifeq ($(DEBUG),0) +  CFLAGS+=$(CFLAGS_OPT) +  OBJDIR=release +  OBJSUF= +else +  ifeq ($(OS),Linux) +    LDFLAGS+=-rdynamic +  endif +  OBJDIR=debug +  OBJSUF=d +endif + +#################################################### + +LDFLAGS += -lpthread -m$(BIT) -lgmp -lgmpxx + +#################################################### + +TOPDIR:=$(realpath $(dir $(lastword $(MAKEFILE_LIST))))/ +EXTDIR:=$(TOPDIR)../cybozulib_ext/ +CFLAGS+= -I$(TOPDIR)include -I$(TOPDIR)../cybozulib/include/ -I$(TOPDIR)../xbyak/ $(BIT_OPT) $(INC_DIR) +LDFLAGS+= -L$(TOPDIR)lib $(BIT_OPT) -Wl,-rpath,'$$ORIGIN/../lib' $(LD_DIR) + +MKDEP = sh -ec '$(PRE)$(CC) -MM $(CFLAGS) $< | sed "s@\($*\)\.o[ :]*@$(OBJDIR)/\1.o $@ : @g" > $@; [ -s $@ ] || rm -f $@; touch $@' + +CLEAN=$(RM) $(TARGET) $(OBJDIR) + +define UNIT_TEST +sh -ec 'for i in $(TARGET); do $$i|grep "ctest:name"; done' > result.txt +grep -v "ng=0, exception=0" result.txt || echo "all unit tests are ok" +endef + +define SAMPLE_TEST +sh -ec 'for i in $(TARGET); do $$i; done' +endef + +.SUFFIXES: .cpp .d .exe + +$(OBJDIR)/%.o: %.cpp +	$(PRE)$(CXX) -c $< -o $@ $(CFLAGS) + +$(OBJDIR)/%.d: %.cpp $(OBJDIR) +	@$(MKDEP) + +$(TOPDIR)bin/%$(OBJSUF).exe: $(OBJDIR)/%.o $(LIBS) +	$(PRE)$(CXX) $< -o $@ $(LIBS) $(LDFLAGS) + +OBJS=$(addprefix $(OBJDIR)/,$(SRC:.cpp=.o)) + +DEPEND_FILE=$(addprefix $(OBJDIR)/, $(SRC:.cpp=.d)) +TEST_FILE=$(addprefix $(TOPDIR)bin/, $(SRC:.cpp=$(OBJSUF).exe)) + +.PHONY: test + diff --git a/common.props b/common.props new file mode 100644 index 0000000..8ec3f67 --- /dev/null +++ b/common.props @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +  <ImportGroup Label="PropertySheets" /> +  <PropertyGroup Label="UserMacros" /> +  <PropertyGroup> +    <OutDir>$(SolutionDir)bin\</OutDir> +  </PropertyGroup> +  <ItemDefinitionGroup> +    <ClCompile> +      <AdditionalIncludeDirectories>$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include</AdditionalIncludeDirectories> +    </ClCompile> +  </ItemDefinitionGroup> +  <ItemDefinitionGroup> +    <ClCompile> +      <WarningLevel>Level4</WarningLevel> +      <RuntimeLibrary>MultiThreaded</RuntimeLibrary> +      <PrecompiledHeaderFile /> +      <PrecompiledHeaderOutputFile /> +      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NOMINMAX</PreprocessorDefinitions> +    </ClCompile> +    <Link> +      <AdditionalLibraryDirectories>$(SolutionDir)../cybozulib_ext/mpir/lib</AdditionalLibraryDirectories> +    </Link> +  </ItemDefinitionGroup> +  <ItemGroup /> +</Project>
\ No newline at end of file diff --git a/debug.props b/debug.props new file mode 100644 index 0000000..d261c8d --- /dev/null +++ b/debug.props @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +  <ImportGroup Label="PropertySheets" /> +  <PropertyGroup Label="UserMacros" /> +  <PropertyGroup> +    <TargetName>$(ProjectName)d</TargetName> +  </PropertyGroup> +  <ItemDefinitionGroup> +    <ClCompile> +      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> +    </ClCompile> +  </ItemDefinitionGroup> +  <ItemGroup /> +</Project>
\ No newline at end of file diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp new file mode 100644 index 0000000..8b70b70 --- /dev/null +++ b/include/mcl/ec.hpp @@ -0,0 +1,585 @@ +#pragma once +/** +	@file +	@brief elliptic curve +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <sstream> +#include <cybozu/exception.hpp> +#include <cybozu/bitvector.hpp> +#include <mcl/operator.hpp> +#include <mcl/power.hpp> +#include <mcl/gmp_util.hpp> + +namespace mcl { + +#define MCL_EC_USE_AFFINE 0 +#define MCL_EC_USE_PROJ 1 +#define MCL_EC_USE_JACOBI 2 + +//#define MCL_EC_COORD MCL_EC_USE_JACOBI +//#define MCL_EC_COORD MCL_EC_USE_PROJ +#ifndef MCL_EC_COORD +	#define MCL_EC_COORD MCL_EC_USE_PROJ +#endif +/* +	elliptic curve +	y^2 = x^3 + ax + b (affine) +	y^2 = x^3 + az^4 + bz^6 (Jacobi) x = X/Z^2, y = Y/Z^3 +*/ +template<class _Fp> +class EcT : public ope::addsub<EcT<_Fp>, +	ope::comparable<EcT<_Fp>, +	ope::hasNegative<EcT<_Fp> > > > { +	enum { +		zero, +		minus3, +		generic +	}; +public: +	typedef _Fp Fp; +	typedef typename Fp::BlockType BlockType; +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +	Fp x, y; +	bool inf_; +#else +	mutable Fp x, y, z; +#endif +	static Fp a_; +	static Fp b_; +	static int specialA_; +	static bool compressedExpression_; +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +	EcT() : inf_(true) {} +#else +	EcT() { z.clear(); } +#endif +	EcT(const Fp& _x, const Fp& _y) +	{ +		set(_x, _y); +	} +	void normalize() const +	{ +#if MCL_EC_COORD == MCL_EC_USE_JACOBI +		if (isZero() || z == 1) return; +		Fp rz, rz2; +		Fp::inv(rz, z); +		rz2 = rz * rz; +		x *= rz2; +		y *= rz2 * rz; +		z = 1; +#elif MCL_EC_COORD == MCL_EC_USE_PROJ +		if (isZero() || z == 1) return; +		Fp rz; +		Fp::inv(rz, z); +		x *= rz; +		y *= rz; +		z = 1; +#endif +	} + +	static inline void setParam(const std::string& astr, const std::string& bstr) +	{ +		a_.fromStr(astr); +		b_.fromStr(bstr); +		if (a_.isZero()) { +			specialA_ = zero; +		} else if (a_ == -3) { +			specialA_ = minus3; +		} else { +			specialA_ = generic; +		} +	} +	static inline bool isValid(const Fp& _x, const Fp& _y) +	{ +		return _y * _y == (_x * _x + a_) * _x + b_; +	} +	void set(const Fp& _x, const Fp& _y, bool verify = true) +	{ +		if (verify && !isValid(_x, _y)) throw cybozu::Exception("ec:EcT:set") << _x << _y; +		x = _x; y = _y; +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +		inf_ = false; +#else +		z = 1; +#endif +	} +	void clear() +	{ +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +		inf_ = true; +#else +		z = 0; +#endif +		x.clear(); +		y.clear(); +	} + +	static inline void dbl(EcT& R, const EcT& P, bool verifyInf = true) +	{ +		if (verifyInf) { +			if (P.isZero()) { +				R.clear(); return; +			} +		} +#if MCL_EC_COORD == MCL_EC_USE_JACOBI +		Fp S, M, t, y2; +		Fp::square(y2, P.y); +		Fp::mul(S, P.x, y2); +		S += S; +		S += S; +		Fp::square(M, P.x); +		switch (specialA_) { +		case zero: +			Fp::add(t, M, M); +			M += t; +			break; +		case minus3: +			Fp::square(t, P.z); +			Fp::square(t, t); +			M -= t; +			Fp::add(t, M, M); +			M += t; +			break; +		case generic: +		default: +			Fp::square(t, P.z); +			Fp::square(t, t); +			t *= a_; +			t += M; +			M += M; +			M += t; +			break; +		} +		Fp::square(R.x, M); +		R.x -= S; +		R.x -= S; +		Fp::mul(R.z, P.y, P.z); +		R.z += R.z; +		Fp::square(y2, y2); +		y2 += y2; +		y2 += y2; +		y2 += y2; +		Fp::sub(R.y, S, R.x); +		R.y *= M; +		R.y -= y2; +#elif MCL_EC_COORD == MCL_EC_USE_PROJ +		Fp w, t, h; +		switch (specialA_) { +		case zero: +			Fp::square(w, P.x); +			Fp::add(t, w, w); +			w += t; +			break; +		case minus3: +			Fp::square(w, P.x); +			Fp::square(t, P.z); +			w -= t; +			Fp::add(t, w, w); +			w += t; +			break; +		case generic: +		default: +			Fp::square(w, P.z); +			w *= a_; +			Fp::square(t, P.x); +			w += t; +			w += t; +			w += t; // w = a z^2 + 3x^2 +			break; +		} +		Fp::mul(R.z, P.y, P.z); // s = yz +		Fp::mul(t, R.z, P.x); +		t *= P.y; // xys +		t += t; +		t += t; // 4(xys) ; 4B +		Fp::square(h, w); +		h -= t; +		h -= t; // w^2 - 8B +		Fp::mul(R.x, h, R.z); +		t -= h; // h is free +		t *= w; +		Fp::square(w, P.y); +		R.x += R.x; +		R.z += R.z; +		Fp::square(h, R.z); +		w *= h; +		R.z *= h; +		Fp::sub(R.y, t, w); +		R.y -= w; +#else +		Fp t, s; +		Fp::square(t, P.x); +		Fp::add(s, t, t); +		t += s; +		t += a_; +		Fp::add(s, P.y, P.y); +		t /= s; +		Fp::square(s, t); +		s -= P.x; +		Fp x3; +		Fp::sub(x3, s, P.x); +		Fp::sub(s, P.x, x3); +		s *= t; +		Fp::sub(R.y, s, P.y); +		R.x = x3; +		R.inf_ = false; +#endif +	} +	static inline void add(EcT& R, const EcT& P, const EcT& Q) +	{ +		if (P.isZero()) { R = Q; return; } +		if (Q.isZero()) { R = P; return; } +#if MCL_EC_COORD == MCL_EC_USE_JACOBI +		Fp r, U1, S1, H, H3; +		Fp::square(r, P.z); +		Fp::square(S1, Q.z); +		Fp::mul(U1, P.x, S1); +		Fp::mul(H, Q.x, r); +		H -= U1; +		r *= P.z; +		S1 *= Q.z; +		S1 *= P.y; +		Fp::mul(r, Q.y, r); +		r -= S1; +		if (H.isZero()) { +			if (r.isZero()) { +				dbl(R, P, false); +			} else { +				R.clear(); +			} +			return; +		} +		Fp::mul(R.z, P.z, Q.z); +		R.z *= H; +		Fp::square(H3, H); // H^2 +		Fp::square(R.y, r); // r^2 +		U1 *= H3; // U1 H^2 +		H3 *= H; // H^3 +		R.y -= U1; +		R.y -= U1; +		Fp::sub(R.x, R.y, H3); +		U1 -= R.x; +		U1 *= r; +		H3 *= S1; +		Fp::sub(R.y, U1, H3); +#elif MCL_EC_COORD == MCL_EC_USE_PROJ +		Fp r, PyQz, v, A, vv; +		Fp::mul(r, P.x, Q.z); +		Fp::mul(PyQz, P.y, Q.z); +		Fp::mul(A, Q.y, P.z); +		Fp::mul(v, Q.x, P.z); +		v -= r; +		if (v.isZero()) { +			Fp::add(vv, A, PyQz); +			if (vv.isZero()) { +				R.clear(); +			} else { +				dbl(R, P, false); +			} +			return; +		} +		Fp::sub(R.y, A, PyQz); +		Fp::square(A, R.y); +		Fp::square(vv, v); +		r *= vv; +		vv *= v; +		Fp::mul(R.z, P.z, Q.z); +		A *= R.z; +		R.z *= vv; +		A -= vv; +		vv *= PyQz; +		A -= r; +		A -= r; +		Fp::mul(R.x, v, A); +		r -= A; +		R.y *= r; +		R.y -= vv; +#else +		Fp t; +		Fp::neg(t, Q.y); +		if (P.y == t) { R.clear(); return; } +		Fp::sub(t, Q.x, P.x); +		if (t.isZero()) { +			dbl(R, P, false); +			return; +		} +		Fp s; +		Fp::sub(s, Q.y, P.y); +		Fp::div(t, s, t); +		R.inf_ = false; +		Fp x3; +		Fp::square(x3, t); +		x3 -= P.x; +		x3 -= Q.x; +		Fp::sub(s, P.x, x3); +		s *= t; +		Fp::sub(R.y, s, P.y); +		R.x = x3; +#endif +	} +	static inline void sub(EcT& R, const EcT& P, const EcT& Q) +	{ +#if 0 +		if (P.inf_) { neg(R, Q); return; } +		if (Q.inf_) { R = P; return; } +		if (P.y == Q.y) { R.clear(); return; } +		Fp t; +		Fp::sub(t, Q.x, P.x); +		if (t.isZero()) { +			dbl(R, P, false); +			return; +		} +		Fp s; +		Fp::add(s, Q.y, P.y); +		Fp::neg(s, s); +		Fp::div(t, s, t); +		R.inf_ = false; +		Fp x3; +		Fp::mul(x3, t, t); +		x3 -= P.x; +		x3 -= Q.x; +		Fp::sub(s, P.x, x3); +		s *= t; +		Fp::sub(R.y, s, P.y); +		R.x = x3; +#else +		EcT nQ; +		neg(nQ, Q); +		add(R, P, nQ); +#endif +	} +	static inline void neg(EcT& R, const EcT& P) +	{ +		if (P.isZero()) { +			R.clear(); +			return; +		} +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +		R.inf_ = false; +		R.x = P.x; +		Fp::neg(R.y, P.y); +#else +		R.x = P.x; +		Fp::neg(R.y, P.y); +		R.z = P.z; +#endif +	} +	template<class N> +	static inline void power(EcT& z, const EcT& x, const N& y) +	{ +		power_impl::power(z, x, y); +	} +	/* +		0 <= P for any P +		(Px, Py) <= (P'x, P'y) iff Px < P'x or Px == P'x and Py <= P'y +	*/ +	static inline int compare(const EcT& P, const EcT& Q) +	{ +		P.normalize(); +		Q.normalize(); +		if (P.isZero()) { +			if (Q.isZero()) return 0; +			return -1; +		} +		if (Q.isZero()) return 1; +		int c = _Fp::compare(P.x, Q.x); +		if (c > 0) return 1; +		if (c < 0) return -1; +		return _Fp::compare(P.y, Q.y); +	} +	bool isZero() const +	{ +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +		return inf_; +#else +		return z.isZero(); +#endif +	} +	friend inline std::ostream& operator<<(std::ostream& os, const EcT& self) +	{ +		if (self.isZero()) { +			return os << '0'; +		} else { +			self.normalize(); +			os << self.x.toStr(16) << '_'; +			if (compressedExpression_) { +				return os << Fp::isYodd(self.y); +			} else { +				return os << self.y.toStr(16); +			} +		} +	} +	friend inline std::istream& operator>>(std::istream& is, EcT& self) +	{ +		std::string str; +		is >> str; +		if (str == "0") { +			self.clear(); +		} else { +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +			self.inf_ = false; +#else +			self.z = 1; +#endif +			size_t pos = str.find('_'); +			if (pos == std::string::npos) throw cybozu::Exception("EcT:operator>>:bad format") << str; +			str[pos] = '\0'; +			self.x.fromStr(&str[0], 16); +			if (compressedExpression_) { +				const char c = str[pos + 1]; +				if ((c == '0' || c == '1') && str.size() == pos + 2) { +					getYfromX(self.y, self.x, c == '1'); +				} else { +					str[pos] = '_'; +					throw cybozu::Exception("EcT:operator>>:bad y") << str; +				} +			} else { +				self.y.fromStr(&str[pos + 1], 16); +			} +		} +		return is; +	} +	static inline void setCompressedExpression(bool compressedExpression) +	{ +		compressedExpression_ = compressedExpression; +	} +	/* +		append to bv(not clear bv) +	*/ +	void appendToBitVec(cybozu::BitVector& bv) const +	{ +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +		#error "not implemented" +#else +		normalize(); +		const size_t bitLen = _Fp::getModBitLen(); +		/* +				elem |x|y|z| +				size  n n 1 if not compressed +				size  n 1 1 if compressed +		*/ +		const size_t maxBitLen = compressedExpression_ ? (bitLen + 1 + 1) : (bitLen * 2 + 1); +		if (isZero()) { +			bv.resize(bv.size() + maxBitLen); +			return; +		} +		x.appendToBitVec(bv); +		if (compressedExpression_) { +			bv.append(Fp::isYodd(y), 1); +		} else { +			y.appendToBitVec(bv); +		} +		bv.append(1, 1); // z = 1 +#endif +	} +	void fromBitVec(const cybozu::BitVector& bv) +	{ +#if MCL_EC_COORD == MCL_EC_USE_AFFINE +		#error "not implemented" +#else +		const size_t bitLen = _Fp::getModBitLen(); +		const size_t maxBitLen = compressedExpression_ ? (bitLen + 1 + 1) : (bitLen * 2 + 1); +		if (bv.size() != maxBitLen) { +			throw cybozu::Exception("EcT:fromBitVec:bad size") << bv.size() << maxBitLen; +		} +		if (!bv.get(maxBitLen - 1)) { // if z = 0 +			clear(); +			return; +		} +		cybozu::BitVector t; +		bv.extract(t, 0, bitLen); +		x.fromBitVec(t); +		if (compressedExpression_) { +			bool odd = bv.get(bitLen); // y +			getYfromX(y, x, odd); +		} else { +			bv.extract(t, bitLen, bitLen); +			y.fromBitVec(t); +		} +		z = 1; +#endif +	} +	static inline size_t getBitVecSize() +	{ +		const size_t bitLen = _Fp::getModBitLen(); +		if (compressedExpression_) { +			return bitLen + 2; +		} else { +			return bitLen * 2 + 1;; +		} +	} +	static inline void getYfromX(Fp& y, const Fp& x, bool isYodd) +	{ +		Fp t; +		Fp::square(t, x); +		t += a_; +		t *= x; +		t += b_; +		Fp::squareRoot(y, t); +		if (Fp::isYodd(y) ^ isYodd) { +			Fp::neg(y, y); +		} +	} +}; + +template<class T> +struct TagMultiGr<EcT<T> > { +	static void square(EcT<T>& z, const EcT<T>& x) +	{ +		EcT<T>::dbl(z, x); +	} +	static void mul(EcT<T>& z, const EcT<T>& x, const EcT<T>& y) +	{ +		EcT<T>::add(z, x, y); +	} +	static void inv(EcT<T>& z, const EcT<T>& x) +	{ +		EcT<T>::neg(z, x); +	} +	static void div(EcT<T>& z, const EcT<T>& x, const EcT<T>& y) +	{ +		EcT<T>::sub(z, x, y); +	} +	static void init(EcT<T>& x) +	{ +		x.clear(); +	} +}; + +template<class _Fp> _Fp EcT<_Fp>::a_; +template<class _Fp> _Fp EcT<_Fp>::b_; +template<class _Fp> int EcT<_Fp>::specialA_; +template<class _Fp> bool EcT<_Fp>::compressedExpression_; + +struct EcParam { +	const char *name; +	const char *p; +	const char *a; +	const char *b; +	const char *gx; +	const char *gy; +	const char *n; +	size_t bitLen; // bit length of p +}; + +} // mcl + +namespace std { CYBOZU_NAMESPACE_TR1_BEGIN +template<class T> struct hash; + +template<class _Fp> +struct hash<mcl::EcT<_Fp> > { +	size_t operator()(const mcl::EcT<_Fp>& P) const +	{ +		if (P.isZero()) return 0; +		P.normalize(); +		uint64_t v = hash<_Fp>()(P.x); +		v = hash<_Fp>()(P.y, v); +		return static_cast<size_t>(v); +	} +}; + +CYBOZU_NAMESPACE_TR1_END } // std diff --git a/include/mcl/ecparam.hpp b/include/mcl/ecparam.hpp new file mode 100644 index 0000000..a5206e9 --- /dev/null +++ b/include/mcl/ecparam.hpp @@ -0,0 +1,161 @@ +#pragma once +/** +	@file +	@brief Elliptic curve parameter +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <mcl/ec.hpp> + +namespace mcl { namespace ecparam { + +const struct mcl::EcParam secp160k1 = { +	"secp160k1", +	"0xfffffffffffffffffffffffffffffffeffffac73", +	"0", +	"7", +	"0x3b4c382ce37aa192a4019e763036f4f5dd4d7ebb", +	"0x938cf935318fdced6bc28286531733c3f03c4fee", +	"0x100000000000000000001b8fa16dfab9aca16b6b3", +	160 +}; +// p=2^160 + 7 +const struct mcl::EcParam p160_1 = { +	"p160_1", +	"0x10000000000000000000000000000000000000007", +	"10", +	"1343632762150092499701637438970764818528075565078", +	"1", +	"1236612389951462151661156731535316138439983579284", +	"1461501637330902918203683518218126812711137002561", +	161 +}; +const struct mcl::EcParam secp192k1 = { +	"secp192k1", +	"0xfffffffffffffffffffffffffffffffffffffffeffffee37", +	"0", +	"3", +	"0xdb4ff10ec057e9ae26b07d0280b7f4341da5d1b1eae06c7d", +	"0x9b2f2f6d9c5628a7844163d015be86344082aa88d95e2f9d", +	"0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d", +	192 +}; +const struct mcl::EcParam secp224k1 = { +	"secp224k1", +	"0xfffffffffffffffffffffffffffffffffffffffffffffffeffffe56d", +	"0", +	"5", +	"0xa1455b334df099df30fc28a169a467e9e47075a90f7e650eb6b7a45c", +	"0x7e089fed7fba344282cafbd6f7e319f7c0b0bd59e2ca4bdb556d61a5", +	"0x10000000000000000000000000001dce8d2ec6184caf0a971769fb1f7", +	224 +}; +const struct mcl::EcParam secp256k1 = { +	"secp256k1", +	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f", +	"0", +	"7", +	"0x79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798", +	"0x483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8", +	"0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141", +	256 +}; +const struct mcl::EcParam secp384r1 = { +	"secp384r1", +	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", +	"-3", +	"0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef", +	"0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7", +	"0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f", +	"0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973", +	384 +}; +const struct mcl::EcParam secp521r1 = { +	"secp521r1", +	"0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", +	"-3", +	"0x51953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", +	"0xc6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", +	"0x11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", +	"0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", +	521 +}; +const struct mcl::EcParam NIST_P192 = { +	"NIST_P192", +	"0xfffffffffffffffffffffffffffffffeffffffffffffffff", +	"-3", +	"0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1", +	"0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012", +	"0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811", +	"0xffffffffffffffffffffffff99def836146bc9b1b4d22831", +	192 +}; +const struct mcl::EcParam NIST_P224 = { +	"NIST_P224", +	"0xffffffffffffffffffffffffffffffff000000000000000000000001", +	"-3", +	"0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", +	"0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", +	"0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", +	"0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d", +	224 +}; +const struct mcl::EcParam NIST_P256 = { +	"NIST_P256", +	"0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff", +	"-3", +	"0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", +	"0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", +	"0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", +	"0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551", +	256 +}; +// same secp384r1 +const struct mcl::EcParam NIST_P384 = { +	"NIST_P384", +	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", +	"-3", +	"0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef", +	"0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7", +	"0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f", +	"0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973", +	384 +}; +// same secp521r1 +const struct mcl::EcParam NIST_P521 = { +	"NIST_P521", +	"0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", +	"-3", +	"0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", +	"0xc6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", +	"0x11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", +	"0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", +	521 +}; + +} // mcl::ecparam + +static inline const mcl::EcParam* getEcParam(const std::string& name) +{ +	static const mcl::EcParam *tbl[] = { +		&ecparam::secp160k1, +		&ecparam::secp192k1, +		&ecparam::secp224k1, +		&ecparam::secp256k1, +		&ecparam::secp384r1, +		&ecparam::secp521r1, + +		&ecparam::NIST_P192, +		&ecparam::NIST_P224, +		&ecparam::NIST_P256, +		&ecparam::NIST_P384, +		&ecparam::NIST_P521, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		if (name == tbl[i]->name) return tbl[i]; +	} +	throw cybozu::Exception("mcl::getEcParam:not support name") << name; +} + +} // mcl diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp new file mode 100644 index 0000000..a8b980b --- /dev/null +++ b/include/mcl/fp.hpp @@ -0,0 +1,446 @@ +#pragma once +/** +	@file +	@brief finite field class +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <sstream> +#include <vector> +#ifdef _MSC_VER +	#pragma warning(push) +	#pragma warning(disable : 4127) +	#ifndef NOMINMAX +		#define NOMINMAX +	#endif +#endif +#if defined(_WIN64) || defined(__x86_64__) +//	#define USE_MONT_FP +#endif +#include <cybozu/hash.hpp> +#include <cybozu/itoa.hpp> +#include <cybozu/atoi.hpp> +#include <cybozu/bitvector.hpp> +#include <mcl/fp_base.hpp> +#include <mcl/fp_util.hpp> +#include <mcl/gmp_util.hpp> +#include <mcl/power.hpp> + +#ifndef MCL_FP_BLOCK_MAX_BIT_N +	#define MCL_FP_BLOCK_MAX_BIT_N 521 +#endif + +namespace mcl { + +struct Block { +	typedef fp::Unit Unit; +	const Unit *p; // pointer to original FpT.v_ +	size_t n; +	static const size_t UnitByteN = sizeof(Unit); +	static const size_t maxUnitN = (MCL_FP_BLOCK_MAX_BIT_N + UnitByteN * 8 - 1) / (UnitByteN * 8); +	Unit v_[maxUnitN]; +}; + +template<class tag = fp::TagDefault, size_t maxBitN = MCL_FP_BLOCK_MAX_BIT_N> +class FpT { +	typedef fp::Unit Unit; +	static const size_t UnitByteN = sizeof(Unit); +	static const size_t maxUnitN = (maxBitN + UnitByteN * 8 - 1) / (UnitByteN * 8); +	static fp::Op op_; +	static mcl::SquareRoot sq_; +	static size_t pBitLen_; +	template<class tag2, size_t maxBitN2> friend class FpT; +	Unit v_[maxUnitN]; +public: +	// return pointer to array v_[] +	const Unit *getUnit() const { return v_; } +	size_t getUnitN() const { return op_.N; } +	typedef Unit BlockType; +	void dump() const +	{ +		const size_t N = op_.N; +		for (size_t i = 0; i < N; i++) { +			printf("%016llx ", (long long)v_[N - 1 - i]); +		} +		printf("\n"); +	} +	static inline void setModulo(const std::string& mstr, int base = 0) +	{ +		bool isMinus; +		mpz_class mp; +		inFromStr(mp, &isMinus, mstr, base); +		if (isMinus) throw cybozu::Exception("mcl:FpT:setModulo:mstr is not minus") << mstr; +		pBitLen_ = Gmp::getBitLen(mp); +		if (pBitLen_ > maxBitN) throw cybozu::Exception("mcl:FpT:setModulo:too large bitLen") << pBitLen_ << maxBitN; +		Unit p[maxUnitN] = {}; +		const size_t n = Gmp::getRaw(p, maxUnitN, mp); +		if (n == 0) throw cybozu::Exception("mcl:FpT:setModulo:bad mstr") << mstr; +#ifdef USE_MONT_FP +		if (pBitLen_ <= 128) {  op_ = fp::MontFp<tag, 128>::init(p); } +#if CYBOZU_OS_BIT == 32 +		else if (pBitLen_ <= 160) { static fp::MontFp<tag, 160> f; op_ = f.init(p); } +#endif +		else if (pBitLen_ <= 192) { static fp::MontFp<tag, 192> f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 +		else if (pBitLen_ <= 224) { static fp::MontFp<tag, 224> f; op_ = f.init(p); } +#endif +		else if (pBitLen_ <= 256) { static fp::MontFp<tag, 256> f; op_ = f.init(p); } +		else if (pBitLen_ <= 384) { static fp::MontFp<tag, 384> f; op_ = f.init(p); } +		else if (pBitLen_ <= 448) { static fp::MontFp<tag, 448> f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 +		else if (pBitLen_ <= 544) { static fp::MontFp<tag, 544> f; op_ = f.init(p); } +#else +		else if (pBitLen_ <= 576) { static fp::MontFp<tag, 576> f; op_ = f.init(p); } +#endif +		else { static fp::MontFp<tag, maxBitN> f; op_ = f.init(p); } +#else +		if (pBitLen_ <= 128) {  op_ = fp::FixedFp<tag, 128>::init(p); } +#if CYBOZU_OS_BIT == 32 +		else if (pBitLen_ <= 160) { static fp::FixedFp<tag, 160> f; op_ = f.init(p); } +#endif +		else if (pBitLen_ <= 192) { static fp::FixedFp<tag, 192> f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 +		else if (pBitLen_ <= 224) { static fp::FixedFp<tag, 224> f; op_ = f.init(p); } +#endif +		else if (pBitLen_ <= 256) { static fp::FixedFp<tag, 256> f; op_ = f.init(p); } +		else if (pBitLen_ <= 384) { static fp::FixedFp<tag, 384> f; op_ = f.init(p); } +		else if (pBitLen_ <= 448) { static fp::FixedFp<tag, 448> f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 +		else if (pBitLen_ <= 544) { static fp::FixedFp<tag, 544> f; op_ = f.init(p); } +#else +		else if (pBitLen_ <= 576) { static fp::FixedFp<tag, 576> f; op_ = f.init(p); } +#endif +		else { static fp::FixedFp<tag, maxBitN> f; op_ = f.init(p); } +#endif +		assert(op_.N <= maxUnitN); +		sq_.set(mp); +	} +	static inline void getModulo(std::string& pstr) +	{ +		Gmp::toStr(pstr, op_.mp); +	} +	static inline bool isYodd(const FpT& x) +	{ +		Block b; +		x.getBlock(b); +		return (b.p[0] & 1) == 1; +	} +	static inline bool squareRoot(FpT& y, const FpT& x) +	{ +		mpz_class mx, my; +		x.toGmp(mx); +		bool b = sq_.get(my, mx); +		if (!b) return false; +		y.fromGmp(my); +		return true; +	} +	FpT() {} +	FpT(const FpT& x) +	{ +		op_.copy(v_, x.v_); +	} +	FpT& operator=(const FpT& x) +	{ +		op_.copy(v_, x.v_); +		return *this; +	} +	void clear() +	{ +		op_.clear(v_); +	} +	FpT(int64_t x) { operator=(x); } +	explicit FpT(const std::string& str, int base = 0) +	{ +		fromStr(str, base); +	} +	FpT& operator=(int64_t x) +	{ +		clear(); +		if (x) { +			int64_t y = x < 0 ? -x : x; +			if (sizeof(Unit) == 8) { +				v_[0] = y; +			} else { +				v_[0] = (uint32_t)y; +				v_[1] = (uint32_t)(y >> 32); +			} +			if (x < 0) neg(*this, *this); +			toMont(*this, *this); +		} +		return *this; +	} +	void toMont(FpT& y, const FpT& x) +	{ +		if (op_.toMont) op_.toMont(y.v_, x.v_); +	} +	void fromMont(FpT& y, const FpT& x) +	{ +		if (op_.fromMont) op_.fromMont(y.v_, x.v_); +	} +	void fromStr(const std::string& str, int base = 0) +	{ +		bool isMinus; +		mpz_class x; +		inFromStr(x, &isMinus, str, base); +		if (x >= op_.mp) throw cybozu::Exception("fp:FpT:fromStr:large str") << str; +		fp::local::toArray(v_, op_.N, x.get_mpz_t()); +		if (isMinus) { +			neg(*this, *this); +		} +		toMont(*this, *this); +	} +	// alias of fromStr +	void set(const std::string& str, int base = 0) { fromStr(str, base); } +	template<class S> +	void setRaw(const S *inBuf, size_t n) +	{ +		const size_t byteN = sizeof(S) * n; +		const size_t fpByteN = sizeof(Unit) * op_.N; +		if (byteN > fpByteN) throw cybozu::Exception("setRaw:bad n") << n << fpByteN; +		assert(byteN <= fpByteN); +		memcpy(v_, inBuf, byteN); +		memset((char *)v_ + byteN, 0, fpByteN - byteN); +		if (!isValid()) throw cybozu::Exception("setRaw:large value"); +		toMont(*this, *this); +	} +	template<class S> +	size_t getRaw(S *outBuf, size_t n) const +	{ +		const size_t byteN = sizeof(S) * n; +		const size_t fpByteN = sizeof(Unit) * op_.N; +		if (byteN < fpByteN) throw cybozu::Exception("getRaw:bad n") << n << fpByteN; +		assert(byteN >= fpByteN); +		Block b; +		getBlock(b); +		memcpy(outBuf, b.p, fpByteN); +		const size_t writeN = (fpByteN + sizeof(S) - 1) / sizeof(S); +		memset((char *)outBuf + fpByteN, 0, writeN * sizeof(S) - fpByteN); +		return writeN; +	} +	void getBlock(Block& b) const +	{ +		assert(maxUnitN <= Block::maxUnitN); +		b.n = op_.N; +		if (op_.fromMont) { +			op_.fromMont(b.v_, v_); +			b.p = &b.v_[0]; +		} else { +			b.p = &v_[0]; +		} +	} +	template<class RG> +	void setRand(RG& rg) +	{ +		fp::getRandVal(v_, rg, op_.p, pBitLen_); +		fromMont(*this, *this); +	} +	static inline void toStr(std::string& str, const Unit *x, size_t n, int base = 10, bool withPrefix = false) +	{ +		switch (base) { +		case 10: +			{ +				mpz_class t; +				Gmp::setRaw(t, x, n); +				Gmp::toStr(str, t, 10); +			} +			return; +		case 16: +			mcl::fp::toStr16(str, x, n, withPrefix); +			return; +		case 2: +			mcl::fp::toStr2(str, x, n, withPrefix); +			return; +		default: +			throw cybozu::Exception("fp:FpT:toStr:bad base") << base; +		} +	} +	void toStr(std::string& str, int base = 10, bool withPrefix = false) const +	{ +		Block b; +		getBlock(b); +		toStr(str, b.p, b.n, base, withPrefix); +	} +	std::string toStr(int base = 10, bool withPrefix = false) const +	{ +		std::string str; +		toStr(str, base, withPrefix); +		return str; +	} +	void toGmp(mpz_class& x) const +	{ +		Block b; +		getBlock(b); +		Gmp::setRaw(x, b.p, b.n); +	} +	mpz_class toGmp() const +	{ +		mpz_class x; +		toGmp(x); +		return x; +	} +	void fromGmp(const mpz_class& x) +	{ +		setRaw(Gmp::getBlock(x), Gmp::getBlockSize(x)); +	} +	static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.add(z.v_, x.v_, y.v_); } +	static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.sub(z.v_, x.v_, y.v_); } +	static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.mul(z.v_, x.v_, y.v_); } +	static inline void inv(FpT& y, const FpT& x) { op_.inv(y.v_, x.v_); } +	static inline void neg(FpT& y, const FpT& x) { op_.neg(y.v_, x.v_); } +	static inline void square(FpT& y, const FpT& x) { op_.square(y.v_, x.v_); } +	static inline void div(FpT& z, const FpT& x, const FpT& y) +	{ +		FpT rev; +		inv(rev, y); +		mul(z, x, rev); +	} +	static inline void powerArray(FpT& z, const FpT& x, const Unit *y, size_t yn) +	{ +		FpT out(1); +		FpT t(x); +		for (size_t i = 0; i < yn; i++) { +			const Unit v = y[i]; +			int m = (int)sizeof(Unit) * 8; +			if (i == yn - 1) { +				while (m > 0 && (v & (Unit(1) << (m - 1))) == 0) { +					m--; +				} +			} +			for (int j = 0; j < m; j++) { +				if (v & (Unit(1) << j)) { +					out *= t; +				} +				t *= t; +			} +		} +		z = out; +	} +	template<class tag2, size_t maxBitN2> +	static inline void power(FpT& z, const FpT& x, const FpT<tag2, maxBitN2>& y) +	{ +		Block b; +		y.getBlock(b); +		powerArray(z, x, b.p, b.n); +	} +	static inline void power(FpT& z, const FpT& x, int y) +	{ +		if (y < 0) throw cybozu::Exception("FpT:power with negative y is not support") << y; +		const Unit u = y; +		powerArray(z, x, &u, 1); +	} +	static inline void power(FpT& z, const FpT& x, const mpz_class& y) +	{ +		if (y < 0) throw cybozu::Exception("FpT:power with negative y is not support") << y; +		powerArray(z, x, Gmp::getBlock(y), Gmp::getBlockSize(x)); +	} +	bool isZero() const { return op_.isZero(v_); } +	/* +		append to bv(not clear bv) +	*/ +	void appendToBitVec(cybozu::BitVector& bv) const +	{ +		Block b; +		getBlock(b); +		bv.append(b.p, pBitLen_); +	} +	bool isValid() const +	{ +		return fp::local::compareArray(v_, op_.p, op_.N) < 0; +	} +	void fromBitVec(const cybozu::BitVector& bv) +	{ +		if (bv.size() != pBitLen_) throw cybozu::Exception("FpT:fromBitVec:bad size") << bv.size() << pBitLen_; +		setRaw(bv.getBlock(), bv.getBlockSize()); +	} +	static inline size_t getModBitLen() { return pBitLen_; } +	static inline size_t getBitVecSize() { return pBitLen_; } +	bool operator==(const FpT& rhs) const { return fp::local::isEqualArray(v_, rhs.v_, op_.N); } +	bool operator!=(const FpT& rhs) const { return !operator==(rhs); } +	inline friend FpT operator+(const FpT& x, const FpT& y) { FpT z; add(z, x, y); return z; } +	inline friend FpT operator-(const FpT& x, const FpT& y) { FpT z; sub(z, x, y); return z; } +	inline friend FpT operator*(const FpT& x, const FpT& y) { FpT z; mul(z, x, y); return z; } +	inline friend FpT operator/(const FpT& x, const FpT& y) { FpT z; div(z, x, y); return z; } +	FpT& operator+=(const FpT& x) { add(*this, *this, x); return *this; } +	FpT& operator-=(const FpT& x) { sub(*this, *this, x); return *this; } +	FpT& operator*=(const FpT& x) { mul(*this, *this, x); return *this; } +	FpT& operator/=(const FpT& x) { div(*this, *this, x); return *this; } +	FpT operator-() const { FpT x; neg(x, *this); return x; } +	friend inline std::ostream& operator<<(std::ostream& os, const FpT& self) +	{ +		const std::ios_base::fmtflags f = os.flags(); +		if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator<<:oct is not supported"); +		const int base = (f & std::ios_base::hex) ? 16 : 10; +		const bool showBase = (f & std::ios_base::showbase) != 0; +		std::string str; +		self.toStr(str, base, showBase); +		return os << str; +	} +	friend inline std::istream& operator>>(std::istream& is, FpT& self) +	{ +		const std::ios_base::fmtflags f = is.flags(); +		if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator>>:oct is not supported"); +		const int base = (f & std::ios_base::hex) ? 16 : 0; +		std::string str; +		is >> str; +		self.fromStr(str, base); +		return is; +	} +	/* +		not support +		getBitLen, operator<, > +	*/ +	/* +		QQQ : should be removed +	*/ +	bool operator<(const FpT&) const { return false; } +	static inline int compare(const FpT& x, const FpT& y) +	{ +		Block xb, yb; +		x.getBlock(xb); +		y.getBlock(yb); +		return fp::local::compareArray(xb.p, yb.p, xb.n); +	} +private: +	static inline void inFromStr(mpz_class& x, bool *isMinus, const std::string& str, int base) +	{ +		const char *p = fp::verifyStr(isMinus, &base, str); +		if (!Gmp::fromStr(x, p, base)) { +			throw cybozu::Exception("fp:FpT:inFromStr") << str; +		} +	} +}; + +template<class tag, size_t maxBitN> fp::Op FpT<tag, maxBitN>::op_; +template<class tag, size_t maxBitN> mcl::SquareRoot FpT<tag, maxBitN>::sq_; +template<class tag, size_t maxBitN> size_t FpT<tag, maxBitN>::pBitLen_; + +namespace power_impl { + +template<class G, class tag, size_t bitN, template<class _tag, size_t _bitN>class FpT> +void power(G& z, const G& x, const FpT<tag, bitN>& y) +{ +	Block b; +	y.getBlock(b); +	mcl::power_impl::powerArray(z, x, b.p, b.n); +} + +} // mcl::power_impl +} // mcl + +namespace std { CYBOZU_NAMESPACE_TR1_BEGIN +template<class T> struct hash; + +template<class tag, size_t maxBitN> +struct hash<mcl::FpT<tag, maxBitN> > : public std::unary_function<mcl::FpT<tag, maxBitN>, size_t> { +	size_t operator()(const mcl::FpT<tag, maxBitN>& x, uint64_t v = 0) const +	{ +		return static_cast<size_t>(cybozu::hash64(x.getUnit(), x.getUnitN(), v)); +	} +}; + +CYBOZU_NAMESPACE_TR1_END } // std::tr1 + +#ifdef _WIN32 +	#pragma warning(pop) +#endif diff --git a/include/mcl/fp_base.hpp b/include/mcl/fp_base.hpp new file mode 100644 index 0000000..0fb174f --- /dev/null +++ b/include/mcl/fp_base.hpp @@ -0,0 +1,527 @@ +#pragma once +/** +	@file +	@brief basic operation +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#ifdef _MSC_VER +	#pragma warning(push) +	#pragma warning(disable : 4616) +	#pragma warning(disable : 4800) +	#pragma warning(disable : 4244) +	#pragma warning(disable : 4127) +	#pragma warning(disable : 4512) +	#pragma warning(disable : 4146) +#endif +#include <stdint.h> +#include <assert.h> +#include <mcl/gmp_util.hpp> +#ifdef _MSC_VER +	#pragma warning(pop) +#endif +#include <cybozu/inttype.hpp> +#ifdef USE_MONT_FP +#include <mcl/fp_generator.hpp> +#endif + +namespace mcl { namespace fp { + +#if defined(CYBOZU_OS_BIT) && (CYBOZU_OS_BIT == 32) +typedef uint32_t Unit; +#else +typedef uint64_t Unit; +#endif + +typedef void (*void1op)(Unit*); +typedef void (*void2op)(Unit*, const Unit*); +typedef void (*void3op)(Unit*, const Unit*, const Unit*); +typedef void (*void4op)(Unit*, const Unit*, const Unit*, const Unit*); +typedef int (*int2op)(Unit*, const Unit*); +typedef void (*void4Iop)(Unit*, const Unit*, const Unit*, const Unit*, Unit); + +} } // mcl::fp + +#ifdef MCL_USE_LLVM + +extern "C" { + +#define MCL_FP_DEF_FUNC(len) \ +void mcl_fp_add ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_add ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_sub ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_sub ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_mul ## len ## pre(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_mont ## len(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, mcl::fp::Unit); + +MCL_FP_DEF_FUNC(128) +MCL_FP_DEF_FUNC(192) +MCL_FP_DEF_FUNC(256) +MCL_FP_DEF_FUNC(320) +MCL_FP_DEF_FUNC(384) +MCL_FP_DEF_FUNC(448) +MCL_FP_DEF_FUNC(512) +#if CYBOZU_OS_BIT == 32 +MCL_FP_DEF_FUNC(160) +MCL_FP_DEF_FUNC(224) +MCL_FP_DEF_FUNC(288) +MCL_FP_DEF_FUNC(352) +MCL_FP_DEF_FUNC(416) +MCL_FP_DEF_FUNC(480) +MCL_FP_DEF_FUNC(544) +#else +MCL_FP_DEF_FUNC(576) +#endif + +void mcl_fp_mul_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); + +} + +#endif + +namespace mcl { namespace fp { + +namespace local { + +inline int compareArray(const Unit* x, const Unit* y, size_t n) +{ +	for (size_t i = n - 1; i != size_t(-1); i--) { +		if (x[i] < y[i]) return -1; +		if (x[i] > y[i]) return 1; +	} +	return 0; +} + +inline bool isEqualArray(const Unit* x, const Unit* y, size_t n) +{ +	for (size_t i = 0; i < n; i++) { +		if (x[i] != y[i]) return false; +	} +	return true; +} + +inline bool isZeroArray(const Unit *x, size_t n) +{ +	for (size_t i = 0; i < n; i++) { +		if (x[i]) return false; +	} +	return true; +} + +inline void clearArray(Unit *x, size_t begin, size_t end) +{ +	for (size_t i = begin; i < end; i++) x[i] = 0; +} + +inline void copyArray(Unit *y, const Unit *x, size_t n) +{ +	for (size_t i = 0; i < n; i++) y[i] = x[i]; +} + +inline void toArray(Unit *y, size_t yn, const mpz_srcptr x) +{ +	const int xn = x->_mp_size; +	assert(xn >= 0); +	const Unit* xp = (const Unit*)x->_mp_d; +	assert(xn <= (int)yn); +	copyArray(y, xp, xn); +	clearArray(y, xn, yn); +} + +} // mcl::fp +struct TagDefault; + +struct Op { +	mpz_class mp; +	const Unit* p; +	size_t N; +	bool (*isZero)(const Unit*); +	void1op clear; +	void2op neg; +	void2op inv; +	void2op square; +	void2op copy; +	void3op add; +	void3op sub; +	void3op mul; +	// for Montgomery +	void2op toMont; +	void2op fromMont; +	Op() +		: p(0), N(0), isZero(0), clear(0), neg(0), inv(0) +		, square(0), copy(0),add(0), sub(0), mul(0), toMont(0), fromMont(0) +	{ +	} +}; + +template<class tag, size_t bitN> +struct FixedFp { +	typedef fp::Unit Unit; +	static const size_t N = (bitN + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); +	static mpz_class mp_; +	static Unit p_[N]; +	static inline void setModulo(const Unit* p) +	{ +		assert(N >= 2); +		assert(sizeof(mp_limb_t) == sizeof(Unit)); +		copy(p_, p); +		Gmp::setRaw(mp_, p, N); +	} +	static inline void set_mpz_t(mpz_t& z, const Unit* p, int n = (int)N) +	{ +		z->_mp_alloc = n; +		int i = n; +		while (i > 0 && p[i - 1] == 0) { +			i--; +		} +		z->_mp_size = i; +		z->_mp_d = (mp_limb_t*)const_cast<Unit*>(p); +	} +	static inline void set_zero(mpz_t& z, Unit *p, size_t n) +	{ +		z->_mp_alloc = (int)n; +		z->_mp_size = 0; +		z->_mp_d = (mp_limb_t*)p; +	} +	static inline void clear(Unit *x) +	{ +		local::clearArray(x, 0, N); +	} +	static inline void copy(Unit *y, const Unit *x) +	{ +		local::copyArray(y, x, N); +	} +	static inline void add(Unit *z, const Unit *x, const Unit *y) +	{ +		Unit ret[N + 2]; // not N + 1 +		mpz_t mz, mx, my; +		set_zero(mz, ret, N + 2); +		set_mpz_t(mx, x); +		set_mpz_t(my, y); +		mpz_add(mz, mx, my); +		if (mpz_cmp(mz, mp_.get_mpz_t()) >= 0) { +			mpz_sub(mz, mz, mp_.get_mpz_t()); +		} +		local::toArray(z, N, mz); +	} +#ifdef MCL_USE_LLVM +#if CYBOZU_OS_BIT == 64 +	static inline void add128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add128S(z, x, y, p_); } +	static inline void sub128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub128S(z, x, y, p_); } +	static inline void add192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add192S(z, x, y, p_); } +	static inline void sub192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub192S(z, x, y, p_); } +	static inline void add256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add256S(z, x, y, p_); } +	static inline void sub256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub256S(z, x, y, p_); } +	static inline void add384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add384L(z, x, y, p_); } +	static inline void sub384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub384L(z, x, y, p_); } + +	static inline void add576(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add576L(z, x, y, p_); } +	static inline void sub576(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub576L(z, x, y, p_); } +#else +	static inline void add128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add128S(z, x, y, p_); } +	static inline void sub128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub128S(z, x, y, p_); } +	static inline void add192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add192L(z, x, y, p_); } +	static inline void sub192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub192L(z, x, y, p_); } +	static inline void add256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add256L(z, x, y, p_); } +	static inline void sub256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub256L(z, x, y, p_); } +	static inline void add384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add384L(z, x, y, p_); } +	static inline void sub384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub384L(z, x, y, p_); } + +	static inline void add160(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add160L(z, x, y, p_); } +	static inline void sub160(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub160L(z, x, y, p_); } +	static inline void add224(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add224L(z, x, y, p_); } +	static inline void sub224(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub224L(z, x, y, p_); } +	static inline void add544(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add544L(z, x, y, p_); } +	static inline void sub544(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub544L(z, x, y, p_); } +#endif +#endif +	static inline void sub(Unit *z, const Unit *x, const Unit *y) +	{ +		Unit ret[N + 1]; +		mpz_t mz, mx, my; +		set_zero(mz, ret, N + 1); +		set_mpz_t(mx, x); +		set_mpz_t(my, y); +		mpz_sub(mz, mx, my); +		if (mpz_sgn(mz) < 0) { +			mpz_add(mz, mz, mp_.get_mpz_t()); +		} +		local::toArray(z, N, mz); +	} +	static inline void mul(Unit *z, const Unit *x, const Unit *y) +	{ +		Unit ret[N * 2]; +#ifdef MCL_USE_LLVM +#if CYBOZU_OS_BIT == 64 +		if (bitN <= 128) { mcl_fp_mul128pre(ret, x, y); mod(z, ret); return; } +		if (bitN <= 192) { mcl_fp_mul192pre(ret, x, y); mod(z, ret); return; } +		if (bitN <= 256) { mcl_fp_mul256pre(ret, x, y); mod(z, ret); return; } +		if (bitN <= 384) { mcl_fp_mul384pre(ret, x, y); mod(z, ret); return; } +//		if (bitN <= 576) { mcl_fp_mul576pre(ret, x, y); mod(z, ret); return; } +#else +		if (bitN <= 128) { mcl_fp_mul128pre(ret, x, y); mod(z, ret); return; } +		if (bitN <= 160) { mcl_fp_mul160pre(ret, x, y); mod(z, ret); return; } +		if (bitN <= 192) { mcl_fp_mul192pre(ret, x, y); mod(z, ret); return; } +		if (bitN <= 224) { mcl_fp_mul224pre(ret, x, y); mod(z, ret); return; } +//		if (bitN <= 256) { mcl_fp_mul256pre(ret, x, y); mod(z, ret); return; } +//		if (bitN <= 384) { mcl_fp_mul384pre(ret, x, y); mod(z, ret); return; } +//		if (bitN <= 544) { mcl_fp_mul544pre(ret, x, y); mod(z, ret); return; } +#endif +#endif +#if 0 +		pre_mul(ret, x, y); +		mod(z, ret); +#else +		mpz_t mx, my, mz; +		set_zero(mz, ret, N * 2); +		set_mpz_t(mx, x); +		set_mpz_t(my, y); +		mpz_mul(mz, mx, my); +		mpz_mod(mz, mz, mp_.get_mpz_t()); +		local::toArray(z, N, mz); +#endif +	} +	static inline void pre_mul(Unit *z, const Unit *x, const Unit *y) +	{ +		mpz_t mx, my, mz; +		set_zero(mz, z, N * 2); +		set_mpz_t(mx, x); +		set_mpz_t(my, y); +		mpz_mul(mz, mx, my); +		local::toArray(z, N * 2, mz); +	} +	// x[N * 2] -> y[N] +	static inline void mod(Unit *y, const Unit *x) +	{ +		mpz_t mx, my; +		set_mpz_t(mx, x, N * 2); +		set_mpz_t(my, y, N); +		mpz_mod(my, mx, mp_.get_mpz_t()); +		local::clearArray(y, my->_mp_size, N); +	} +	static inline void square(Unit *z, const Unit *x) +	{ +		mul(z, x, x); // QQQ : use powMod with 2? +	} +	static inline void inv(Unit *y, const Unit *x) +	{ +		mpz_class my; +		mpz_t mx; +		set_mpz_t(mx, x); +		mpz_invert(my.get_mpz_t(), mx, mp_.get_mpz_t()); +		local::toArray(y, N, my.get_mpz_t()); +	} +	static inline bool isZero(const Unit *x) +	{ +		return local::isZeroArray(x, N); +	} +	static inline void neg(Unit *y, const Unit *x) +	{ +		if (isZero(x)) { +			if (x != y) clear(y); +			return; +		} +		sub(y, p_, x); +	} +	static inline Op init(const Unit *p) +	{ +		setModulo(p); +		Op op; +		op.N = N; +		op.isZero = &isZero; +		op.clear = &clear; +		op.neg = &neg; +		op.inv = &inv; +		op.square = □ +		op.copy = © +#ifdef MCL_USE_LLVM +		printf("fp2 use llvm bitN=%zd\n", bitN); +		if (bitN <= 128) { +			op.add = &add128; +			op.sub = &sub128; +		} else +#if CYBOZU_OS_BIT == 32 +		if (bitN <= 160) { +			op.add = &add160; +			op.sub = &sub160; +		} else +#endif +		if (bitN <= 192) { +			op.add = &add192; +			op.sub = &sub192; +		} else +#if CYBOZU_OS_BIT == 32 +		if (bitN <= 224) { +			op.add = &add224; +			op.sub = &sub224; +		} else +#endif +		if (bitN <= 256) { +			op.add = &add256; +			op.sub = &sub256; +		} else +		if (bitN <= 384) { +			op.add = &add384; +			op.sub = &sub384; +		} else +#if CYBOZU_OS_BIT == 64 +		if (bitN <= 576) { +			op.add = &add576; +			op.sub = &sub576; +		} else +#else +		if (bitN <= 544) { +			op.add = &add544; +			op.sub = &sub544; +		} else +#endif +#endif +		{ +			op.add = &add; +			op.sub = ⊂ +		} +#ifdef MCL_USE_LLVM +		if (mp_ == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) { +			op.mul = &mcl_fp_mul_NIST_P192; // slower than MontFp192 +		} else +#endif +		{ +			op.mul = &mul; +		} +		op.mp = mp_; +		op.p = &p_[0]; +		return op; +	} +}; + +template<class tag, size_t bitN> mpz_class FixedFp<tag, bitN>::mp_; +template<class tag, size_t bitN> fp::Unit FixedFp<tag, bitN>::p_[FixedFp<tag, bitN>::N]; + +#ifdef USE_MONT_FP +template<class tag, size_t bitN> +struct MontFp { +	typedef fp::Unit Unit; +	static const size_t N = (bitN + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); +	static const size_t invTblN = N * sizeof(Unit) * 8 * 2; +	static mpz_class mp_; +//	static mcl::SquareRoot sq_; +	static Unit p_[N]; +	static Unit one_[N]; +	static Unit R_[N]; // (1 << (N * 64)) % p +	static Unit RR_[N]; // (R * R) % p +	static Unit invTbl_[invTblN][N]; +	static size_t modBitLen_; +	static FpGenerator fg_; +	static void3op add_; +	static void3op mul_; + +	static inline void fromRawGmp(Unit *y, const mpz_class& x) +	{ +		local::toArray(y, N, x.get_mpz_t()); +	} +	static inline void setModulo(const Unit *p) +	{ +		copy(p_, p); +		Gmp::setRaw(mp_, p, N); +//		sq_.set(pOrg_); + +		mpz_class t = 1; +		fromRawGmp(one_, t); +		t = (t << (N * 64)) % mp_; +		fromRawGmp(R_, t); +		t = (t * t) % mp_; +		fromRawGmp(RR_, t); +		fg_.init(p_, N); + +		add_ = Xbyak::CastTo<void3op>(fg_.add_); +		mul_ = Xbyak::CastTo<void3op>(fg_.mul_); +	} +	static void initInvTbl(Unit invTbl[invTblN][N]) +	{ +		Unit t[N]; +		clear(t); +		t[0] = 2; +		toMont(t, t); +		for (int i = 0; i < invTblN; i++) { +			copy(invTbl[invTblN - 1 - i], t); +			add_(t, t, t); +		} +	} +	static inline void clear(Unit *x) +	{ +		local::clearArray(x, 0, N); +	} +	static inline void copy(Unit *y, const Unit *x) +	{ +		local::copyArray(y, x, N); +	} +	static inline bool isZero(const Unit *x) +	{ +		return local::isZeroArray(x, N); +	} +	static inline void invC(Unit *y, const Unit *x) +	{ +		const int2op preInv = Xbyak::CastTo<int2op>(fg_.preInv_); +		Unit r[N]; +		int k = preInv(r, x); +		/* +			xr = 2^k +			R = 2^(N * 64) +			get r2^(-k)R^2 = r 2^(N * 64 * 2 - k) +		*/ +		mul_(y, r, invTbl_[k]); +	} +	static inline void squareC(Unit *y, const Unit *x) +	{ +		mul_(y, x, x); +	} +	static inline void toMont(Unit *y, const Unit *x) +	{ +		mul_(y, x, RR_); +	} +	static inline void fromMont(Unit *y, const Unit *x) +	{ +		mul_(y, x, one_); +	} +	static inline Op init(const Unit *p) +	{ +puts("use MontFp2"); +		setModulo(p); +		Op op; +		op.N = N; +		op.isZero = &isZero; +		op.clear = &clear; +		op.neg = Xbyak::CastTo<void2op>(fg_.neg_); +		op.inv = &invC; +		op.square = Xbyak::CastTo<void2op>(fg_.sqr_); +		if (op.square == 0) op.square = &squareC; +		op.copy = © +		op.add = add_; +		op.sub = Xbyak::CastTo<void3op>(fg_.sub_); +		op.mul = mul_; +		op.mp = mp_; +		op.p = &p_[0]; +		op.toMont = &toMont; +		op.fromMont = &fromMont; + +//		shr1 = Xbyak::CastTo<void2op>(fg_.shr1_); +//		addNc = Xbyak::CastTo<bool3op>(fg_.addNc_); +//		subNc = Xbyak::CastTo<bool3op>(fg_.subNc_); +		initInvTbl(invTbl_); +		return op; +	} +}; +template<class tag, size_t bitN> mpz_class MontFp<tag, bitN>::mp_; +template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::p_[MontFp<tag, bitN>::N]; +template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::one_[MontFp<tag, bitN>::N]; +template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::R_[MontFp<tag, bitN>::N]; +template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::RR_[MontFp<tag, bitN>::N]; +template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::invTbl_[MontFp<tag, bitN>::invTblN][MontFp<tag, bitN>::N]; +template<class tag, size_t bitN> size_t MontFp<tag, bitN>::modBitLen_; +template<class tag, size_t bitN> FpGenerator MontFp<tag, bitN>::fg_; +template<class tag, size_t bitN> void3op MontFp<tag, bitN>::add_; +template<class tag, size_t bitN> void3op MontFp<tag, bitN>::mul_; +#endif + +} } // mcl::fp diff --git a/include/mcl/fp_generator.hpp b/include/mcl/fp_generator.hpp new file mode 100644 index 0000000..9820ca9 --- /dev/null +++ b/include/mcl/fp_generator.hpp @@ -0,0 +1,1675 @@ +#pragma once +/** +	@file +	@brief Fp generator +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <stdio.h> +#include <assert.h> +#include <cybozu/exception.hpp> + +namespace mcl { + +namespace montgomery { + +/* +	get pp such that p * pp = -1 mod M, +	where p is prime and M = 1 << 64(or 32). +	@param pLow [in] p mod M +	T is uint32_t or uint64_t +*/ +template<class T> +T getCoff(T pLow) +{ +	T ret = 0; +	T t = 0; +	T x = 1; + +	for (size_t i = 0; i < sizeof(T) * 8; i++) { +		if ((t & 1) == 0) { +			t += pLow; +			ret += x; +		} +		t >>= 1; +		x <<= 1; +	} +	return ret; +} + +} } // mcl::montgomery + +#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64) + +#ifndef XBYAK_NO_OP_NAMES +	#define XBYAK_NO_OP_NAMES +#endif +#include <xbyak/xbyak.h> +#include <xbyak/xbyak_util.h> + +namespace mcl { + +namespace fp_gen_local { + +class MemReg { +	const Xbyak::Reg64 *r_; +	const Xbyak::RegExp *m_; +	size_t offset_; +public: +	MemReg(const Xbyak::Reg64 *r, const Xbyak::RegExp *m, size_t offset) : r_(r), m_(m), offset_(offset) {} +	bool isReg() const { return r_ != 0; } +	const Xbyak::Reg64& getReg() const { return *r_; } +	Xbyak::RegExp getMem() const { return *m_ + offset_ * sizeof(size_t); } +}; + +struct MixPack { +	static const size_t useAll = 100; +	Xbyak::util::Pack p; +	Xbyak::RegExp m; +	size_t mn; +	MixPack() : mn(0) {} +	MixPack(Xbyak::util::Pack& remain, size_t& rspPos, size_t n, size_t useRegNum = useAll) +	{ +		init(remain, rspPos, n, useRegNum); +	} +	void init(Xbyak::util::Pack& remain, size_t& rspPos, size_t n, size_t useRegNum = useAll) +	{ +		size_t pn = std::min(remain.size(), n); +		if (useRegNum != useAll && useRegNum < pn) pn = useRegNum; +		this->mn = n - pn; +		this->m = Xbyak::util::rsp + rspPos; +		this->p = remain.sub(0, pn); +		remain = remain.sub(pn); +		rspPos += mn * 8; +	} +	size_t size() const { return p.size() + mn; } +	bool isReg(size_t n) const { return n < p.size(); } +	const Xbyak::Reg64& getReg(size_t n) const +	{ +		assert(n < p.size()); +		return p[n]; +	} +	Xbyak::RegExp getMem(size_t n) const +	{ +		const size_t pn = p.size(); +		assert(pn <= n && n < size()); +		return m + (int)((n - pn) * sizeof(size_t)); +	} +	MemReg operator[](size_t n) const +	{ +		const size_t pn = p.size(); +		return MemReg((n < pn) ? &p[n] : 0, (n < pn) ? 0 : &m, n - pn); +	} +	void removeLast() +	{ +		if (!size()) throw cybozu::Exception("MixPack:removeLast:empty"); +		if (mn > 0) { +			mn--; +		} else { +			p = p.sub(0, p.size() - 1); +		} +	} +	/* +		replace Mem with r if possible +	*/ +	bool replaceMemWith(Xbyak::CodeGenerator *code, const Xbyak::Reg64& r) +	{ +		if (mn == 0) return false; +		p.append(r); +		code->mov(r, code->ptr [m]); +		m = m + 8; +		mn--; +		return true; +	} +}; + +} // fp_gen_local + +/* +	op(r, rm); +	r  : reg +	rm : Reg/Mem +*/ +#define MCL_FP_GEN_OP_RM(op, r, rm) \ +if (rm.isReg()) { \ +	op(r, rm.getReg()); \ +} else { \ +	op(r, qword [rm.getMem()]); \ +} + +/* +	op(rm, r); +	rm : Reg/Mem +	r  : reg +*/ +#define MCL_FP_GEN_OP_MR(op, rm, r) \ +if (rm.isReg()) { \ +	op(rm.getReg(), r); \ +} else { \ +	op(qword [rm.getMem()], r); \ +} + +struct FpGenerator : Xbyak::CodeGenerator { +	typedef Xbyak::RegExp RegExp; +	typedef Xbyak::Reg64 Reg64; +	typedef Xbyak::Xmm Xmm; +	typedef Xbyak::Operand Operand; +	typedef Xbyak::util::StackFrame StackFrame; +	typedef Xbyak::util::Pack Pack; +	typedef fp_gen_local::MixPack MixPack; +	typedef fp_gen_local::MemReg MemReg; +	static const int UseRDX = Xbyak::util::UseRDX; +	static const int UseRCX = Xbyak::util::UseRCX; +	Xbyak::util::Cpu cpu_; +	bool useMulx_; +	const uint64_t *p_; +	uint64_t pp_; +	int pn_; +	bool isFullBit_; +	// add/sub without carry. return true if overflow +	typedef bool (*bool3op)(uint64_t*, const uint64_t*, const uint64_t*); + +	// add/sub with mod +	typedef void (*void3op)(uint64_t*, const uint64_t*, const uint64_t*); + +	// mul without carry. return top of z +	typedef uint64_t (*uint3opI)(uint64_t*, const uint64_t*, uint64_t); + +	// neg +	typedef void (*void2op)(uint64_t*, const uint64_t*); + +	// preInv +	typedef int (*int2op)(uint64_t*, const uint64_t*); +	bool3op addNc_; +	bool3op subNc_; +	void3op add_; +	void3op sub_; +	void3op mul_; +	uint3opI mulI_; +	void2op sqr_; +	void2op neg_; +	void2op shr1_; +	int2op preInv_; +	FpGenerator() +		: CodeGenerator(4096 * 8) +		, p_(0) +		, pp_(0) +		, pn_(0) +		, isFullBit_(0) +		, addNc_(0) +		, subNc_(0) +		, add_(0) +		, sub_(0) +		, mul_(0) +		, mulI_(0) +		, neg_(0) +		, shr1_(0) +		, preInv_(0) +	{ +		useMulx_ = cpu_.has(Xbyak::util::Cpu::tBMI2); +	} +	/* +		@param p [in] pointer to prime +		@param pn [in] length of prime +	*/ +	void init(const uint64_t *p, int pn) +	{ +		if (pn < 2) throw cybozu::Exception("mcl:FpGenerator:small pn") << pn; +		p_ = p; +		pp_ = montgomery::getCoff(p[0]); +		pn_ = pn; +		isFullBit_ = (p_[pn_ - 1] >> 63) != 0; +//		printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_); + +		setSize(0); // reset code +		align(16); +		addNc_ = getCurr<bool3op>(); +		gen_addSubNc(true); +		align(16); +		subNc_ = getCurr<bool3op>(); +		gen_addSubNc(false); +		align(16); +		add_ = getCurr<void3op>(); +		gen_addMod(); +		align(16); +		sub_ = getCurr<void3op>(); +		gen_sub(); +		align(16); +		neg_ = getCurr<void2op>(); +		gen_neg(); +		align(16); +		mulI_ = getCurr<uint3opI>(); +		gen_mulI(); +		align(16); +		mul_ = getCurr<void3op>(); +		gen_mul(); +		align(16); +		sqr_ = getCurr<void2op>(); +		if (!gen_sqr()) { +			sqr_ = 0; +		} +		align(16); +		shr1_ = getCurr<void2op>(); +		gen_shr1(); +		preInv_ = getCurr<int2op>(); +		gen_preInv(); +	} +	void gen_addSubNc(bool isAdd) +	{ +		StackFrame sf(this, 3); +		if (isAdd) { +			gen_raw_add(sf.p[0], sf.p[1], sf.p[2], rax); +		} else { +			gen_raw_sub(sf.p[0], sf.p[1], sf.p[2], rax); +		} +		setc(al); +		movzx(eax, al); +	} +	/* +		pz[] = px[] + py[] +	*/ +	void gen_raw_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Reg64& t) +	{ +		mov(t, ptr [px]); +		add(t, ptr [py]); +		mov(ptr [pz], t); +		for (int i = 1; i < pn_; i++) { +			mov(t, ptr [px + i * 8]); +			adc(t, ptr [py + i * 8]); +			mov(ptr [pz + i * 8], t); +		} +	} +	/* +		pz[] = px[] - py[] +	*/ +	void gen_raw_sub(const RegExp& pz, const RegExp& px, const RegExp& py, const Reg64& t) +	{ +		mov(t, ptr [px]); +		sub(t, ptr [py]); +		mov(ptr [pz], t); +		for (int i = 1; i < pn_; i++) { +			mov(t, ptr [px + i * 8]); +			sbb(t, ptr [py + i * 8]); +			mov(ptr [pz + i * 8], t); +		} +	} +	/* +		pz[] = -px[] +	*/ +	void gen_raw_neg(const RegExp& pz, const RegExp& px, const Reg64& t0, const Reg64& t1) +	{ +		inLocalLabel(); +		mov(t0, ptr [px]); +		test(t0, t0); +		jnz(".neg"); +		if (pn_ > 1) { +			for (int i = 1; i < pn_; i++) { +				or_(t0, ptr [px + i * 8]); +			} +			jnz(".neg"); +		} +		// zero +		for (int i = 0; i < pn_; i++) { +			mov(ptr [pz + i * 8], t0); +		} +		jmp(".exit"); +	L(".neg"); +		mov(t1, (size_t)p_); +		gen_raw_sub(pz, t1, px, t0); +	L(".exit"); +		outLocalLabel(); +	} +	/* +		(rdx:pz[0..n-1]) = px[0..n-1] * y +		use t, rax, rdx +		if n > 2 +		use +		wk[0] if useMulx_ +		wk[0..n-2] otherwise +	*/ +	void gen_raw_mulI(const RegExp& pz, const RegExp& px, const Reg64& y, const MixPack& wk, const Reg64& t, size_t n) +	{ +		assert(n >= 2); +		if (n == 2) { +			mov(rax, ptr [px]); +			mul(y); +			mov(ptr [pz], rax); +			mov(t, rdx); +			mov(rax, ptr [px + 8]); +			mul(y); +			add(rax, t); +			adc(rdx, 0); +			mov(ptr [pz + 8], rax); +			return; +		} +		if (useMulx_) { +			assert(wk.size() > 0 && wk.isReg(0)); +			const Reg64& t1 = wk.getReg(0); +			// mulx(H, L, x) = [H:L] = x * rdx +			mov(rdx, y); +			mulx(t1, rax, ptr [px]); // [y:rax] = px * y +			mov(ptr [pz], rax); +			const Reg64 *pt0 = &t; +			const Reg64 *pt1 = &t1; +			for (size_t i = 1; i < n - 1; i++) { +				mulx(*pt0, rax, ptr [px + i * 8]); +				if (i == 1) { +					add(rax, *pt1); +				} else { +					adc(rax, *pt1); +				} +				mov(ptr [pz + i * 8], rax); +				std::swap(pt0, pt1); +			} +			mulx(rdx, rax, ptr [px + (n - 1) * 8]); +			adc(rax, *pt1); +			mov(ptr [pz + (n - 1) * 8], rax); +			adc(rdx, 0); +			return; +		} +		assert(wk.size() >= n - 1); +		for (size_t i = 0; i < n; i++) { +			mov(rax, ptr [px + i * 8]); +			mul(y); +			if (i < n - 1) { +				mov(ptr [pz + i * 8], rax); +				g_mov(wk[i], rdx); +			} +		} +		for (size_t i = 1; i < n - 1; i++) { +			mov(t, ptr [pz + i * 8]); +			if (i == 1) { +				g_add(t, wk[i - 1]); +			} else { +				g_adc(t, wk[i - 1]); +			} +			mov(ptr [pz + i * 8], t); +		} +		g_adc(rax, wk[n - 2]); +		mov(ptr [pz + (n - 1) * 8], rax); +		adc(rdx, 0); +	} +	void gen_mulI() +	{ +		assert(pn_ >= 2); +		const int regNum = useMulx_ ? 2 : (1 + std::min(pn_ - 1, 8)); +		const int stackSize = useMulx_ ? 0 : (pn_ - 1) * 8; +		StackFrame sf(this, 3, regNum | UseRDX, stackSize); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		const Reg64& y = sf.p[2]; +		size_t rspPos = 0; +		Pack remain = sf.t.sub(1); +		MixPack wk(remain, rspPos, pn_ - 1); +		gen_raw_mulI(pz, px, y, wk, sf.t[0], pn_); +		mov(rax, rdx); +	} +	/* +		pz[] = px[] +	*/ +	void gen_mov(const RegExp& pz, const RegExp& px, const Reg64& t, int n) +	{ +		for (int i = 0; i < n; i++) { +			mov(t, ptr [px + i * 8]); +			mov(ptr [pz + i * 8], t); +		} +	} +	void gen_addMod3() +	{ +		StackFrame sf(this, 3, 7); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		const Reg64& py = sf.p[2]; + +		const Reg64& t0 = sf.t[0]; +		const Reg64& t1 = sf.t[1]; +		const Reg64& t2 = sf.t[2]; +		const Reg64& t3 = sf.t[3]; +		const Reg64& t4 = sf.t[4]; +		const Reg64& t5 = sf.t[5]; +		const Reg64& t6 = sf.t[6]; + +		xor_(t6, t6); +		load_rm(Pack(t2, t1, t0), px); +		add_rm(Pack(t2, t1, t0), py); +		mov_rr(Pack(t5, t4, t3), Pack(t2, t1, t0)); +		adc(t6, 0); +		mov(rax, (size_t)p_); +		sub_rm(Pack(t5, t4, t3), rax); +		sbb(t6, 0); +		cmovc(t5, t2); +		cmovc(t4, t1); +		cmovc(t3, t0); +		store_mr(pz, Pack(t5, t4, t3)); +	} +	void gen_subMod_le4(int n) +	{ +		assert(2 <= n && n <= 4); +		StackFrame sf(this, 3, (n - 1) * 2); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		const Reg64& py = sf.p[2]; + +		Pack rx = sf.t.sub(0, n - 1); +		rx.append(px); // rx = [px, t1, t0] +		Pack ry = sf.t.sub(n - 1, n - 1); +		ry.append(rax); // ry = [rax, t3, t2] + +		load_rm(rx, px); // destroy px +		sub_rm(rx, py); +#if 0 +		sbb(ry[0], ry[0]); // rx[0] = (x > y) ? 0 : -1 +		for (int i = 1; i < n; i++) mov(ry[i], ry[0]); +		mov(py, (size_t)p_); +		for (int i = 0; i < n; i++) and_(ry[i], qword [py + 8 * i]); +		add_rr(rx, ry); +#else +		// a little faster +		sbb(py, py); // py = (x > y) ? 0 : -1 +		mov(rax, (size_t)p_); +		load_rm(ry, rax); // destroy rax +		for (size_t i = 0; i < ry.size(); i++) { +			and_(ry[i], py); +		} +		add_rr(rx, ry); +#endif +		store_mr(pz, rx); +	} +	void gen_addMod() +	{ +		if (pn_ == 3) { +			gen_addMod3(); +			return; +		} +		StackFrame sf(this, 3, 0, pn_ * 8); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		const Reg64& py = sf.p[2]; +		const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR; + +		inLocalLabel(); +		gen_raw_add(pz, px, py, rax); +		mov(px, (size_t)p_); // destroy px +		if (isFullBit_) { +			jc(".over", jmpMode); +		} +#ifdef MCL_USE_JMP +		for (int i = 0; i < pn_; i++) { +			mov(py, ptr [pz + (pn_ - 1 - i) * 8]); // destroy py +			cmp(py, ptr [px + (pn_ - 1 - i) * 8]); +			jc(".exit", jmpMode); +			jnz(".over", jmpMode); +		} +		L(".over"); +			gen_raw_sub(pz, pz, px, rax); +		L(".exit"); +#else +		gen_raw_sub(rsp, pz, px, rax); +		jc(".exit", jmpMode); +		gen_mov(pz, rsp, rax, pn_); +		if (isFullBit_) { +			jmp(".exit", jmpMode); +			L(".over"); +			gen_raw_sub(pz, pz, px, rax); +		} +		L(".exit"); +#endif +		outLocalLabel(); +	} +	void gen_sub() +	{ +		if (pn_ <= 4) { +			gen_subMod_le4(pn_); +			return; +		} +		StackFrame sf(this, 3); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		const Reg64& py = sf.p[2]; +		const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR; + +		inLocalLabel(); +		gen_raw_sub(pz, px, py, rax); +		jnc(".exit", jmpMode); +		mov(px, (size_t)p_); +		gen_raw_add(pz, pz, px, rax); +	L(".exit"); +		outLocalLabel(); +	} +	void gen_neg() +	{ +		StackFrame sf(this, 2, 2); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		gen_raw_neg(pz, px, sf.t[0], sf.t[1]); +	} +	void gen_shr1() +	{ +		const int c = 1; +		StackFrame sf(this, 2, 1); +		const Reg64 *t0 = &rax; +		const Reg64 *t1 = &sf.t[0]; +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		mov(*t0, ptr [px]); +		for (int i = 0; i < pn_ - 1; i++) { +			mov(*t1, ptr [px + 8 * (i + 1)]); +			shrd(*t0, *t1, c); +			mov(ptr [pz + i * 8], *t0); +			std::swap(t0, t1); +		} +		shr(*t0, c); +		mov(ptr [pz + (pn_ - 1) * 8], *t0); +	} +	void gen_mul() +	{ +		if (pn_ == 3) { +			gen_montMul3(p_, pp_); +		} else if (pn_ == 4) { +			gen_montMul4(p_, pp_); +		} else if (pn_ <= 9) { +			gen_montMulN(p_, pp_, pn_); +		} else { +			throw cybozu::Exception("mcl:FpGenerator:gen_mul:not implemented for") << pn_; +		} +	} +	bool gen_sqr() +	{ +		if (pn_ == 3) { +			gen_montSqr3(p_, pp_); +			return true; +		} +		return false; +	} +	/* +		input (pz[], px[], py[]) +		z[] <- montgomery(x[], y[]) +	*/ +	void gen_montMulN(const uint64_t *p, uint64_t pp, int n) +	{ +		assert(2 <= pn_ && pn_ <= 9); +		const int regNum = useMulx_ ? 4 : 3 + std::min(n - 1, 7); +		const int stackSize = (n * 3 + (isFullBit_ ? 2 : 1)) * 8; +		StackFrame sf(this, 3, regNum | UseRDX, stackSize); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +		const Reg64& py = sf.p[2]; +		const Reg64& y = sf.t[0]; +		const Reg64& pAddr = sf.t[1]; +		const Reg64& t = sf.t[2]; +		Pack remain = sf.t.sub(3); +		size_t rspPos = 0; + +		MixPack pw1(remain, rspPos, n - 1); +		const RegExp pw2 = rsp + rspPos; // pw2[0..n-1] +		const RegExp pc = pw2 + n * 8; // pc[0..n+1] +		mov(pAddr, (size_t)p); + +		for (int i = 0; i < n; i++) { +			mov(y, ptr [py + i * 8]); +			montgomeryN_1(pp, n, pc, px, y, pAddr, t, pw1, pw2, i == 0); +		} +		// pz[] = pc[] - p[] +		gen_raw_sub(pz, pc, pAddr, t); +		if (isFullBit_) sbb(qword[pc + n * 8], 0); +		jnc("@f"); +		for (int i = 0; i < n; i++) { +			mov(t, ptr [pc + i * 8]); +			mov(ptr [pz + i * 8], t); +		} +	L("@@"); +	} +	/* +		input (z, x, y) = (p0, p1, p2) +		z[0..3] <- montgomery(x[0..3], y[0..3]) +		destroy gt0, ..., gt9, xm0, xm1, p2 +	*/ +	void gen_montMul4(const uint64_t *p, uint64_t pp) +	{ +		StackFrame sf(this, 3, 10 | UseRDX); +		const Reg64& p0 = sf.p[0]; +		const Reg64& p1 = sf.p[1]; +		const Reg64& p2 = sf.p[2]; + +		const Reg64& t0 = sf.t[0]; +		const Reg64& t1 = sf.t[1]; +		const Reg64& t2 = sf.t[2]; +		const Reg64& t3 = sf.t[3]; +		const Reg64& t4 = sf.t[4]; +		const Reg64& t5 = sf.t[5]; +		const Reg64& t6 = sf.t[6]; +		const Reg64& t7 = sf.t[7]; +		const Reg64& t8 = sf.t[8]; +		const Reg64& t9 = sf.t[9]; + +		movq(xm0, p0); // save p0 +		mov(p0, (uint64_t)p); +		movq(xm1, p2); +		mov(p2, ptr [p2]); +		montgomery4_1(pp, t0, t7, t3, t2, t1, p1, p2, p0, t4, t5, t6, t8, t9, true, xm2); + +		movq(p2, xm1); +		mov(p2, ptr [p2 + 8]); +		montgomery4_1(pp, t1, t0, t7, t3, t2, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2); + +		movq(p2, xm1); +		mov(p2, ptr [p2 + 16]); +		montgomery4_1(pp, t2, t1, t0, t7, t3, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2); + +		movq(p2, xm1); +		mov(p2, ptr [p2 + 24]); +		montgomery4_1(pp, t3, t2, t1, t0, t7, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2); +		// [t7:t3:t2:t1:t0] + +		mov(t4, t0); +		mov(t5, t1); +		mov(t6, t2); +		mov(rdx, t3); +		sub_rm(Pack(t3, t2, t1, t0), p0); +		if (isFullBit_) sbb(t7, 0); +		cmovc(t0, t4); +		cmovc(t1, t5); +		cmovc(t2, t6); +		cmovc(t3, rdx); + +		movq(p0, xm0); // load p0 +		store_mr(p0, Pack(t3, t2, t1, t0)); +	} +	/* +		input (z, x, y) = (p0, p1, p2) +		z[0..2] <- montgomery(x[0..2], y[0..2]) +		destroy gt0, ..., gt9, xm0, xm1, p2 +	*/ +	void gen_montMul3(const uint64_t *p, uint64_t pp) +	{ +		StackFrame sf(this, 3, 10 | UseRDX); +		const Reg64& p0 = sf.p[0]; +		const Reg64& p1 = sf.p[1]; +		const Reg64& p2 = sf.p[2]; + +		const Reg64& t0 = sf.t[0]; +		const Reg64& t1 = sf.t[1]; +		const Reg64& t2 = sf.t[2]; +		const Reg64& t3 = sf.t[3]; +		const Reg64& t4 = sf.t[4]; +		const Reg64& t5 = sf.t[5]; +		const Reg64& t6 = sf.t[6]; +		const Reg64& t7 = sf.t[7]; +		const Reg64& t8 = sf.t[8]; +		const Reg64& t9 = sf.t[9]; + +		movq(xm0, p0); // save p0 +		mov(t7, (uint64_t)p); +		mov(t9, ptr [p2]); +		//                c3, c2, c1, c0, px, y,  p, +		montgomery3_1(pp, t0, t3, t2, t1, p1, t9, t7, t4, t5, t6, t8, p0, true); +		mov(t9, ptr [p2 + 8]); +		montgomery3_1(pp, t1, t0, t3, t2, p1, t9, t7, t4, t5, t6, t8, p0, false); + +		mov(t9, ptr [p2 + 16]); +		montgomery3_1(pp, t2, t1, t0, t3, p1, t9, t7, t4, t5, t6, t8, p0, false); + +		// [(t3):t2:t1:t0] +		mov(t4, t0); +		mov(t5, t1); +		mov(t6, t2); +		sub_rm(Pack(t2, t1, t0), t7); +		if (isFullBit_) sbb(t3, 0); +		cmovc(t0, t4); +		cmovc(t1, t5); +		cmovc(t2, t6); +		movq(p0, xm0); +		store_mr(p0, Pack(t2, t1, t0)); +	} +	/* +		input (pz, px) +		z[0..2] <- montgomery(px[0..2], px[0..2]) +		destroy gt0, ..., gt9, xm0, xm1, p2 +	*/ +	void gen_montSqr3(const uint64_t *p, uint64_t pp) +	{ +		StackFrame sf(this, 3, 10 | UseRDX, 16 * 3); +		const Reg64& pz = sf.p[0]; +		const Reg64& px = sf.p[1]; +//		const Reg64& py = sf.p[2]; // not used + +		const Reg64& t0 = sf.t[0]; +		const Reg64& t1 = sf.t[1]; +		const Reg64& t2 = sf.t[2]; +		const Reg64& t3 = sf.t[3]; +		const Reg64& t4 = sf.t[4]; +		const Reg64& t5 = sf.t[5]; +		const Reg64& t6 = sf.t[6]; +		const Reg64& t7 = sf.t[7]; +		const Reg64& t8 = sf.t[8]; +		const Reg64& t9 = sf.t[9]; + +		movq(xm0, pz); // save pz +		mov(t7, (uint64_t)p); +		mov(t9, ptr [px]); +		mul3x1_sqr1(px, t9, t3, t2, t1, t0); +		mov(t0, rdx); +		montgomery3_sub(pp, t0, t9, t2, t1, px, t3, t7, t4, t5, t6, t8, pz, true); + +		mov(t3, ptr [px + 8]); +		mul3x1_sqr2(px, t3, t6, t5, t4); +		add_rr(Pack(t1, t0, t9, t2), Pack(rdx, rax, t5, t4)); +		if (isFullBit_) setc(pz.cvt8()); +		montgomery3_sub(pp, t1, t3, t9, t2, px, t0, t7, t4, t5, t6, t8, pz, false); + +		mov(t0, ptr [px + 16]); +		mul3x1_sqr3(t0, t5, t4); +		add_rr(Pack(t2, t1, t3, t9), Pack(rdx, rax, t5, t4)); +		if (isFullBit_) setc(pz.cvt8()); +		montgomery3_sub(pp, t2, t0, t3, t9, px, t1, t7, t4, t5, t6, t8, pz, false); + +		// [t9:t2:t0:t3] +		mov(t4, t3); +		mov(t5, t0); +		mov(t6, t2); +		sub_rm(Pack(t2, t0, t3), t7); +		if (isFullBit_) sbb(t9, 0); +		cmovc(t3, t4); +		cmovc(t0, t5); +		cmovc(t2, t6); +		movq(pz, xm0); +		store_mr(pz, Pack(t2, t0, t3)); +	} +	static inline void debug_put_inner(const uint64_t *ptr, int n) +	{ +		printf("debug "); +		for (int i = 0; i < n; i++) { +			printf("%016llx", (long long)ptr[n - 1 - i]); +		} +		printf("\n"); +	} +#ifdef _MSC_VER +	void debug_put(const RegExp& m, int n) +	{ +		assert(n <= 8); +		static uint64_t regBuf[7]; + +		push(rax); +		mov(rax, (size_t)regBuf); +		mov(ptr [rax + 8 * 0], rcx); +		mov(ptr [rax + 8 * 1], rdx); +		mov(ptr [rax + 8 * 2], r8); +		mov(ptr [rax + 8 * 3], r9); +		mov(ptr [rax + 8 * 4], r10); +		mov(ptr [rax + 8 * 5], r11); +		mov(rcx, ptr [rsp + 8]); // org rax +		mov(ptr [rax + 8 * 6], rcx); // save +		mov(rcx, ptr [rax + 8 * 0]); // org rcx +		pop(rax); + +		lea(rcx, ptr [m]); +		mov(rdx, n); +		mov(rax, (size_t)debug_put_inner); +		sub(rsp, 32); +		call(rax); +		add(rsp, 32); + +		push(rax); +		mov(rax, (size_t)regBuf); +		mov(rcx, ptr [rax + 8 * 0]); +		mov(rdx, ptr [rax + 8 * 1]); +		mov(r8, ptr [rax + 8 * 2]); +		mov(r9, ptr [rax + 8 * 3]); +		mov(r10, ptr [rax + 8 * 4]); +		mov(r11, ptr [rax + 8 * 5]); +		mov(rax, ptr [rax + 8 * 6]); +		add(rsp, 8); +	} +#endif +	/* +		z >>= c +		@note shrd(r/m, r, imm) +	*/ +	void shr_mp(const MixPack& z, uint8_t c, const Reg64& t) +	{ +		const size_t n = z.size(); +		for (size_t i = 0; i < n - 1; i++) { +			const Reg64 *p; +			if (z.isReg(i + 1)) { +				p = &z.getReg(i + 1); +			} else { +				mov(t, ptr [z.getMem(i + 1)]); +				p = &t; +			} +			if (z.isReg(i)) { +				shrd(z.getReg(i), *p, c); +			} else { +				shrd(qword [z.getMem(i)], *p, c); +			} +		} +		if (z.isReg(n - 1)) { +			shr(z.getReg(n - 1), c); +		} else { +			shr(qword [z.getMem(n - 1)], c); +		} +	} +	/* +		z *= 2 +	*/ +	void twice_mp(const MixPack& z, const Reg64& t) +	{ +		g_add(z[0], z[0], t); +		for (size_t i = 1, n = z.size(); i < n; i++) { +			g_adc(z[i], z[i], t); +		} +	} +	/* +		z += x +	*/ +	void add_mp(const MixPack& z, const MixPack& x, const Reg64& t) +	{ +		assert(z.size() == x.size()); +		g_add(z[0], x[0], t); +		for (size_t i = 1, n = z.size(); i < n; i++) { +			g_adc(z[i], x[i], t); +		} +	} +	void add_m_m(const RegExp& mz, const RegExp& mx, const Reg64& t, int n) +	{ +		for (int i = 0; i < n; i++) { +			mov(t, ptr [mx + i * 8]); +			if (i == 0) { +				add(ptr [mz + i * 8], t); +			} else { +				adc(ptr [mz + i * 8], t); +			} +		} +	} +	/* +		mz[] = mx[] - y +	*/ +	void sub_m_mp_m(const RegExp& mz, const RegExp& mx, const MixPack& y, const Reg64& t) +	{ +		for (size_t i = 0; i < y.size(); i++) { +			mov(t, ptr [mx + i * 8]); +			if (i == 0) { +				if (y.isReg(i)) { +					sub(t, y.getReg(i)); +				} else { +					sub(t, ptr [y.getMem(i)]); +				} +			} else { +				if (y.isReg(i)) { +					sbb(t, y.getReg(i)); +				} else { +					sbb(t, ptr [y.getMem(i)]); +				} +			} +			mov(ptr [mz + i * 8], t); +		} +	} +	/* +		z -= x +	*/ +	void sub_mp(const MixPack& z, const MixPack& x, const Reg64& t) +	{ +		assert(z.size() == x.size()); +		g_sub(z[0], x[0], t); +		for (size_t i = 1, n = z.size(); i < n; i++) { +			g_sbb(z[i], x[i], t); +		} +	} +	/* +		z -= px[] +	*/ +	void sub_mp_m(const MixPack& z, const RegExp& px, const Reg64& t) +	{ +		if (z.isReg(0)) { +			sub(z.getReg(0), ptr [px]); +		} else { +			mov(t, ptr [px]); +			sub(ptr [z.getMem(0)], t); +		} +		for (size_t i = 1, n = z.size(); i < n; i++) { +			if (z.isReg(i)) { +				sbb(z.getReg(i), ptr [px + i * 8]); +			} else { +				mov(t, ptr [px + i * 8]); +				sbb(ptr [z.getMem(i)], t); +			} +		} +	} +	void store_mp(const RegExp& m, const MixPack& z, const Reg64& t) +	{ +		for (size_t i = 0, n = z.size(); i < n; i++) { +			if (z.isReg(i)) { +				mov(ptr [m + i * 8], z.getReg(i)); +			} else { +				mov(t, ptr [z.getMem(i)]); +				mov(ptr [m + i * 8], t); +			} +		} +	} +	void load_mp(const MixPack& z, const RegExp& m, const Reg64& t) +	{ +		for (size_t i = 0, n = z.size(); i < n; i++) { +			if (z.isReg(i)) { +				mov(z.getReg(i), ptr [m + i * 8]); +			} else { +				mov(t, ptr [m + i * 8]); +				mov(ptr [z.getMem(i)], t); +			} +		} +	} +	void set_mp(const MixPack& z, const Reg64& t) +	{ +		for (size_t i = 0, n = z.size(); i < n; i++) { +			MCL_FP_GEN_OP_MR(mov, z[i], t) +		} +	} +	void mov_mp(const MixPack& z, const MixPack& x, const Reg64& t) +	{ +		for (size_t i = 0, n = z.size(); i < n; i++) { +			const MemReg zi = z[i], xi = x[i]; +			if (z.isReg(i)) { +				MCL_FP_GEN_OP_RM(mov, zi.getReg(), xi) +			} else { +				if (x.isReg(i)) { +					mov(ptr [z.getMem(i)], x.getReg(i)); +				} else { +					mov(t, ptr [x.getMem(i)]); +					mov(ptr [z.getMem(i)], t); +				} +			} +		} +	} +#ifdef _MSC_VER +	void debug_put_mp(const MixPack& mp, int n, const Reg64& t) +	{ +		if (n >= 10) exit(1); +		static uint64_t buf[10]; +		movq(xm0, rax); +		mov(rax, (size_t)buf); +		store_mp(rax, mp, t); +		movq(rax, xm0); +		push(rax); +		mov(rax, (size_t)buf); +		debug_put(rax, n); +		pop(rax); +	} +#endif + +	std::string mkLabel(const char *label, int n) const +	{ +		return std::string(label) + Xbyak::Label::toStr(n); +	} +	/* +		int k = preInvC(pr, px) +	*/ +	void gen_preInv() +	{ +		assert(pn_ >= 2); +		const int freeRegNum = 13; +		if (pn_ > 9) { +			throw cybozu::Exception("mcl:FpGenerator:gen_preInv:large pn_") << pn_; +		} +		StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max<int>(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8); +		const Reg64& pr = sf.p[0]; +		const Reg64& px = sf.p[1]; +		const Reg64& t = rcx; +		/* +			k = rax, t = rcx : temporary +			use rdx, pr, px in main loop, so we can use 13 registers +			v = t[0, pn_) : all registers +		*/ +		size_t rspPos = 0; + +		assert(sf.t.size() >= (size_t)pn_); +		Pack remain = sf.t; + +		const MixPack rr(remain, rspPos, pn_); +		remain.append(rdx); +		const MixPack ss(remain, rspPos, pn_); +		remain.append(px); +		const int rSize = (int)remain.size(); +		MixPack vv(remain, rspPos, pn_, rSize > 0 ? rSize / 2 : -1); +		remain.append(pr); +		MixPack uu(remain, rspPos, pn_); + +		const RegExp keep_pr = rsp + rspPos; +		rspPos += 8; +		const RegExp rTop = rsp + rspPos; // used if isFullBit_ + +		inLocalLabel(); +		mov(ptr [keep_pr], pr); +		mov(rax, px); +		// px is free frome here +		load_mp(vv, rax, t); // v = x +		mov(rax, (size_t)p_); +		load_mp(uu, rax, t); // u = p_ +		// k = 0 +		xor_(rax, rax); +		// rTop = 0 +		if (isFullBit_) { +			mov(ptr [rTop], rax); +		} +		// r = 0; +		set_mp(rr, rax); +		// s = 1 +		set_mp(ss, rax); +		if (ss.isReg(0)) { +			mov(ss.getReg(0), 1); +		} else { +			mov(qword [ss.getMem(0)], 1); +		} +#if 0 +	L(".lp"); +		or_mp(vv, t); +		jz(".exit", T_NEAR); + +		g_test(uu[0], 1); +		jz(".u_even", T_NEAR); +		g_test(vv[0], 1); +		jz(".v_even", T_NEAR); +		for (int i = pn_ - 1; i >= 0; i--) { +			g_cmp(vv[i], uu[i], t); +			jc(".v_lt_u", T_NEAR); +			if (i > 0) jnz(".v_ge_u", T_NEAR); +		} + +	L(".v_ge_u"); +		sub_mp(vv, uu, t); +		add_mp(ss, rr, t); +	L(".v_even"); +		shr_mp(vv, 1, t); +		twice_mp(rr, t); +		if (isFullBit_) { +			sbb(t, t); +			mov(ptr [rTop], t); +		} +		inc(rax); +		jmp(".lp", T_NEAR); +	L(".v_lt_u"); +		sub_mp(uu, vv, t); +		add_mp(rr, ss, t); +		if (isFullBit_) { +			sbb(t, t); +			mov(ptr [rTop], t); +		} +	L(".u_even"); +		shr_mp(uu, 1, t); +		twice_mp(ss, t); +		inc(rax); +		jmp(".lp", T_NEAR); +#else +		for (int cn = pn_; cn > 0; cn--) { +			const std::string _lp = mkLabel(".lp", cn); +			const std::string _u_v_odd = mkLabel(".u_v_odd", cn); +			const std::string _u_even = mkLabel(".u_even", cn); +			const std::string _v_even = mkLabel(".v_even", cn); +			const std::string _v_ge_u = mkLabel(".v_ge_u", cn); +			const std::string _v_lt_u = mkLabel(".v_lt_u", cn); +		L(_lp); +			or_mp(vv, t); +			jz(".exit", T_NEAR); + +			g_test(uu[0], 1); +			jz(_u_even, T_NEAR); +			g_test(vv[0], 1); +			jz(_v_even, T_NEAR); +		L(_u_v_odd); +			if (cn > 1) { +				isBothZero(vv[cn - 1], uu[cn - 1], t); +				jz(mkLabel(".u_v_odd", cn - 1), T_NEAR); +			} +			for (int i = cn - 1; i >= 0; i--) { +				g_cmp(vv[i], uu[i], t); +				jc(_v_lt_u, T_NEAR); +				if (i > 0) jnz(_v_ge_u, T_NEAR); +			} + +		L(_v_ge_u); +			sub_mp(vv, uu, t); +			add_mp(ss, rr, t); +		L(_v_even); +			shr_mp(vv, 1, t); +			twice_mp(rr, t); +			if (isFullBit_) { +				sbb(t, t); +				mov(ptr [rTop], t); +			} +			inc(rax); +			jmp(_lp, T_NEAR); +		L(_v_lt_u); +			sub_mp(uu, vv, t); +			add_mp(rr, ss, t); +			if (isFullBit_) { +				sbb(t, t); +				mov(ptr [rTop], t); +			} +		L(_u_even); +			shr_mp(uu, 1, t); +			twice_mp(ss, t); +			inc(rax); +			jmp(_lp, T_NEAR); + +			if (cn > 0) { +				vv.removeLast(); +				uu.removeLast(); +			} +		} +#endif +	L(".exit"); +		assert(ss.isReg(0) && ss.isReg(1)); +		const Reg64& t2 = ss.getReg(0); +		const Reg64& t3 = ss.getReg(1); + +		mov(t2, (size_t)p_); +		if (isFullBit_) { +			mov(t, ptr [rTop]); +			test(t, t); +			jz("@f"); +			sub_mp_m(rr, t2, t); +		L("@@"); +		} +		mov(t3, ptr [keep_pr]); +		// pr[] = p[] - rr +		sub_m_mp_m(t3, t2, rr, t); +		jnc("@f"); +		// pr[] += p[] +		add_m_m(t3, t2, t, pn_); +	L("@@"); +		outLocalLabel(); +	} +	void mov32c(const Reg64& r, uint64_t c) +	{ +		if (c & 0xffffffff00000000ULL) { +			mov(r, c); +		} else { +			mov(Xbyak::Reg32(r.getIdx()), (uint32_t)c); +		} +	} +private: +	FpGenerator(const FpGenerator&); +	void operator=(const FpGenerator&); +	void make_op_rm(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const Reg64& op1, const MemReg& op2) +	{ +		if (op2.isReg()) { +			(this->*op)(op1, op2.getReg()); +		} else { +			(this->*op)(op1, qword [op2.getMem()]); +		} +	} +	void make_op_mr(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const MemReg& op1, const Reg64& op2) +	{ +		if (op1.isReg()) { +			(this->*op)(op1.getReg(), op2); +		} else { +			(this->*op)(qword [op1.getMem()], op2); +		} +	} +	void make_op(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const MemReg& op1, const MemReg& op2, const Reg64& t) +	{ +		if (op1.isReg()) { +			make_op_rm(op, op1.getReg(), op2); +		} else if (op2.isReg()) { +			(this->*op)(ptr [op1.getMem()], op2.getReg()); +		} else { +			mov(t, ptr [op2.getMem()]); +			(this->*op)(ptr [op1.getMem()], t); +		} +	} +	void g_add(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::add, op1, op2, t); } +	void g_adc(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::adc, op1, op2, t); } +	void g_sub(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::sub, op1, op2, t); } +	void g_sbb(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::sbb, op1, op2, t); } +	void g_cmp(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::cmp, op1, op2, t); } +	void g_or(const Reg64& r, const MemReg& op) { make_op_rm(&Xbyak::CodeGenerator::or_, r, op); } +	void g_test(const MemReg& op1, const MemReg& op2, const Reg64& t) +	{ +		const MemReg *pop1 = &op1; +		const MemReg *pop2 = &op2; +		if (!pop1->isReg()) { +			std::swap(pop1, pop2); +		} +		// (M, M), (R, M), (R, R) +		if (pop1->isReg()) { +			MCL_FP_GEN_OP_MR(test, (*pop2), pop1->getReg()) +		} else { +			mov(t, ptr [pop1->getMem()]); +			test(ptr [pop2->getMem()], t); +		} +	} +	void g_mov(const MemReg& op, const Reg64& r) +	{ +		make_op_mr(&Xbyak::CodeGenerator::mov, op, r); +	} +	void g_mov(const Reg64& r, const MemReg& op) +	{ +		make_op_rm(&Xbyak::CodeGenerator::mov, r, op); +	} +	void g_add(const Reg64& r, const MemReg& mr) { MCL_FP_GEN_OP_RM(add, r, mr) } +	void g_adc(const Reg64& r, const MemReg& mr) { MCL_FP_GEN_OP_RM(adc, r, mr) } +	void isBothZero(const MemReg& op1, const MemReg& op2, const Reg64& t) +	{ +		g_mov(t, op1); +		g_or(t, op2); +	} +	void g_test(const MemReg& op, int imm) +	{ +		MCL_FP_GEN_OP_MR(test, op, imm) +	} +	/* +		z[] = x[] +	*/ +	void mov_rr(const Pack& z, const Pack& x) +	{ +		assert(z.size() == x.size()); +		for (int i = 0, n = (int)x.size(); i < n; i++) { +			mov(z[i], x[i]); +		} +	} +	/* +		m[] = x[] +	*/ +	void store_mr(const RegExp& m, const Pack& x) +	{ +		for (int i = 0, n = (int)x.size(); i < n; i++) { +			mov(ptr [m + 8 * i], x[i]); +		} +	} +	/* +		x[] = m[] +	*/ +	void load_rm(const Pack& z, const RegExp& m) +	{ +		for (int i = 0, n = (int)z.size(); i < n; i++) { +			mov(z[i], ptr [m + 8 * i]); +		} +	} +	/* +		z[] += x[] +	*/ +	void add_rr(const Pack& z, const Pack& x) +	{ +		add(z[0], x[0]); +		assert(z.size() == x.size()); +		for (size_t i = 1, n = z.size(); i < n; i++) { +			adc(z[i], x[i]); +		} +	} +	/* +		z[] -= x[] +	*/ +	void sub_rr(const Pack& z, const Pack& x) +	{ +		sub(z[0], x[0]); +		assert(z.size() == x.size()); +		for (size_t i = 1, n = z.size(); i < n; i++) { +			sbb(z[i], x[i]); +		} +	} +	/* +		z[] += m[] +	*/ +	void add_rm(const Pack& z, const RegExp& m) +	{ +		add(z[0], ptr [m + 8 * 0]); +		for (int i = 1, n = (int)z.size(); i < n; i++) { +			adc(z[i], ptr [m + 8 * i]); +		} +	} +	/* +		z[] -= m[] +	*/ +	void sub_rm(const Pack& z, const RegExp& m) +	{ +		sub(z[0], ptr [m + 8 * 0]); +		for (int i = 1, n = (int)z.size(); i < n; i++) { +			sbb(z[i], ptr [m + 8 * i]); +		} +	} +	/* +		t = all or z[i] +		ZF = z is zero +	*/ +	void or_mp(const MixPack& z, const Reg64& t) +	{ +		const size_t n = z.size(); +		if (n == 1) { +			if (z.isReg(0)) { +				test(z.getReg(0), z.getReg(0)); +			} else { +				mov(t, ptr [z.getMem(0)]); +				test(t, t); +			} +		} else { +			g_mov(t, z[0]); +			for (size_t i = 1; i < n; i++) { +				g_or(t, z[i]); +			} +		} +	} +	/* +		[rdx:x:t1:t0] <- py[2:1:0] * x +		destroy x, t +	*/ +	void mul3x1(const RegExp& py, const Reg64& x, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t) +	{ +		if (useMulx_) { +			// mulx(H, L, x) = [H:L] = x * rdx +			/* +				rdx:x +				    t:t1 +				      rax:t0 +			*/ +			mov(rdx, x); +			mulx(rax, t0, ptr [py]); // [rax:t0] = py[0] * x +			mulx(t, t1, ptr [py + 8]); // [t:t1] = py[1] * x +			add(t1, rax); +			mulx(rdx, x, ptr [py + 8 * 2]); +			adc(x, t); +			adc(rdx, 0); +		} else { +			mov(rax, ptr [py]); +			mul(x); +			mov(t0, rax); +			mov(t1, rdx); +			mov(rax, ptr [py + 8]); +			mul(x); +			mov(t, rax); +			mov(t2, rdx); +			mov(rax, ptr [py + 8 * 2]); +			mul(x); +			/* +				rdx:rax +				     t2:t +				        t1:t0 +			*/ +			add(t1, t); +			adc(rax, t2); +			adc(rdx, 0); +			mov(x, rax); +		} +	} +	/* +		[x2:x1:x0] * x0 +	*/ +	void mul3x1_sqr1(const RegExp& px, const Reg64& x0, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t) +	{ +		mov(rax, x0); +		mul(x0); +		mov(t0, rax); +		mov(t1, rdx); +		mov(rax, ptr [px + 8]); +		mul(x0); +		mov(ptr [rsp + 0 * 8], rax); // (x0 * x1)_L +		mov(ptr [rsp + 1 * 8], rdx); // (x0 * x1)_H +		mov(t, rax); +		mov(t2, rdx); +		mov(rax, ptr [px + 8 * 2]); +		mul(x0); +		mov(ptr [rsp + 2 * 8], rax); // (x0 * x2)_L +		mov(ptr [rsp + 3 * 8], rdx); // (x0 * x2)_H +		/* +			rdx:rax +			     t2:t +			        t1:t0 +		*/ +		add(t1, t); +		adc(t2, rax); +		adc(rdx, 0); +	} +	/* +		[x2:x1:x0] * x1 +	*/ +	void mul3x1_sqr2(const RegExp& px, const Reg64& x1, const Reg64& t2, const Reg64& t1, const Reg64& t0) +	{ +		mov(t0, ptr [rsp + 0 * 8]);// (x0 * x1)_L +		mov(rax, x1); +		mul(x1); +		mov(t1, rax); +		mov(t2, rdx); +		mov(rax, ptr [px + 8 * 2]); +		mul(x1); +		mov(ptr [rsp + 4 * 8], rax); // (x1 * x2)_L +		mov(ptr [rsp + 5 * 8], rdx); // (x1 * x2)_H +		/* +			rdx:rax +			     t2:t1 +			         t:t0 +		*/ +		add(t1, ptr [rsp + 1 * 8]); // (x0 * x1)_H +		adc(rax, t2); +		adc(rdx, 0); +	} +	/* +		[rdx:rax:t1:t0] = [x2:x1:x0] * x2 +	*/ +	void mul3x1_sqr3(const Reg64& x2, const Reg64& t1, const Reg64& t0) +	{ +		mov(rax, x2); +		mul(x2); +		/* +			rdx:rax +			     t2:t +			        t1:t0 +		*/ +		mov(t0, ptr [rsp + 2 * 8]); // (x0 * x2)_L +		mov(t1, ptr [rsp + 3 * 8]); // (x0 * x2)_H +		add(t1, ptr [rsp + 4 * 8]); // (x1 * x2)_L +		adc(rax, ptr [rsp + 5 * 8]); // (x1 * x2)_H +		adc(rdx, 0); +	} + +	/* +		c = [c3:y:c1:c0] = c + x[2..0] * y +		q = uint64_t(c0 * pp) +		c = (c + q * p) >> 64 +		input  [c3:c2:c1:c0], px, y, p +		output [c0:c3:c2:c1] ; c0 == 0 unless isFullBit_ + +		@note use rax, rdx, destroy y +	*/ +	void montgomery3_sub(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0, +		const Reg64& /*px*/, const Reg64& y, const Reg64& p, +		const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst) +	{ +		// input [c3:y:c1:0] +		// [t4:c3:y:c1:c0] +		// t4 = 0 or 1 if isFullBit_, = 0 otherwise +		mov(rax, pp); +		mul(c0); // q = rax +		mov(c2, rax); +		mul3x1(p, c2, t2, t1, t0, t3); +		// [rdx:c2:t1:t0] = p * q +		add(c0, t0); // always c0 is zero +		adc(c1, t1); +		adc(c2, y); +		adc(c3, rdx); +		if (isFullBit_) { +			if (isFirst) { +				setc(c0.cvt8()); +			} else { +				adc(c0.cvt8(), t4.cvt8()); +			} +		} +	} +	/* +		c = [c3:c2:c1:c0] +		c += x[2..0] * y +		q = uint64_t(c0 * pp) +		c = (c + q * p) >> 64 +		input  [c3:c2:c1:c0], px, y, p +		output [c0:c3:c2:c1] ; c0 == 0 unless isFullBit_ + +		@note use rax, rdx, destroy y +	*/ +	void montgomery3_1(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0, +		const Reg64& px, const Reg64& y, const Reg64& p, +		const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst) +	{ +		if (isFirst) { +			mul3x1(px, y, c2, c1, c0, c3); +			mov(c3, rdx); +			// [c3:y:c1:c0] = px[2..0] * y +		} else { +			mul3x1(px, y, t2, t1, t0, t3); +			// [rdx:y:t1:t0] = px[2..0] * y +			add_rr(Pack(c3, y, c1, c0), Pack(rdx, c2, t1, t0)); +			if (isFullBit_) setc(t4.cvt8()); +		} +		montgomery3_sub(pp, c3, c2, c1, c0, px, y, p, t0, t1, t2, t3, t4, isFirst); +	} +	/* +		pc[0..n] += x[0..n-1] * y ; pc[] = 0 if isFirst +		pc[n + 1] is temporary used if isFullBit_ +		q = uint64_t(pc[0] * pp) +		pc[] = (pc[] + q * p) >> 64 +		input : pc[], px[], y, p[], pw1[], pw2[] +		output : pc[0..n]   ; if isFullBit_ +		         pc[0..n-1] ; if !isFullBit_ +		destroy y +		use +		pw1[0] if useMulx_ +		pw1[0..n-2] otherwise +		pw2[0..n-1] +	*/ +	void montgomeryN_1(uint64_t pp, int n, const RegExp& pc, const RegExp& px, const Reg64& y, const Reg64& p, const Reg64& t, const MixPack& pw1, const RegExp& pw2, bool isFirst) +	{ +		// pc[] += x[] * y +		if (isFirst) { +			gen_raw_mulI(pc, px, y, pw1, t, n); +			mov(ptr [pc + n * 8], rdx); +		} else { +			gen_raw_mulI(pw2, px, y, pw1, t, n); +			mov(t, ptr [pw2 + 0 * 8]); +			add(ptr [pc + 0 * 8], t); +			for (int i = 1; i < n; i++) { +				mov(t, ptr [pw2 + i * 8]); +				adc(ptr [pc + i * 8], t); +			} +			adc(ptr [pc + n * 8], rdx); +			if (isFullBit_) { +				mov(t, 0); +				adc(t, 0); +				mov(qword [pc + (n + 1) * 8], t); +			} +		} +		mov(rax, pp); +		mul(qword [pc]); +		mov(y, rax); // y = q +		gen_raw_mulI(pw2, p, y, pw1, t, n); +		// c[] = (c[] + pw2[]) >> 64 +		mov(t, ptr [pw2 + 0 * 8]); +		add(t, ptr [pc + 0 * 8]); +		for (int i = 1; i < n; i++) { +			mov(t, ptr [pw2 + i * 8]); +			adc(t, ptr [pc + i * 8]); +			mov(ptr [pc + (i - 1) * 8], t); +		} +		adc(rdx, ptr [pc + n * 8]); +		mov(ptr [pc + (n - 1) * 8], rdx); +		if (isFullBit_) { +			if (isFirst) { +				mov(t, 0); +			} else { +				mov(t, ptr [pc + (n + 1) * 8]); +			} +			adc(t, 0); +			mov(qword [pc + n * 8], t); +		} else { +			xor_(eax, eax); +			mov(ptr [pc + n * 8], rax); +		} +	} +	/* +		[rdx:x:t2:t1:t0] <- py[3:2:1:0] * x +		destroy x, t +	*/ +	void mul4x1(const RegExp& py, const Reg64& x, const Reg64& t3, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t) +	{ +		if (useMulx_) { +			mov(rdx, x); +			mulx(t1, t0, ptr [py + 8 * 0]); +			mulx(t2, rax, ptr [py + 8 * 1]); +			add(t1, rax); +			mulx(x, rax, ptr [py + 8 * 2]); +			adc(t2, rax); +			mulx(rdx, rax, ptr [py + 8 * 3]); +			adc(x, rax); +			adc(rdx, 0); +		} else { +			mov(rax, ptr [py]); +			mul(x); +			mov(t0, rax); +			mov(t1, rdx); +			mov(rax, ptr [py + 8]); +			mul(x); +			mov(t, rax); +			mov(t2, rdx); +			mov(rax, ptr [py + 8 * 2]); +			mul(x); +			mov(t3, rax); +			mov(rax, x); +			mov(x, rdx); +			mul(qword [py + 8 * 3]); +			add(t1, t); +			adc(t2, t3); +			adc(x, rax); +			adc(rdx, 0); +		} +	} + +	/* +		c = [c4:c3:c2:c1:c0] +		c += x[3..0] * y +		q = uint64_t(c0 * pp) +		c = (c + q * p) >> 64 +		input  [c4:c3:c2:c1:c0], px, y, p +		output [c0:c4:c3:c2:c1] + +		@note use rax, rdx, destroy y +		use xt if isFullBit_ +	*/ +	void montgomery4_1(uint64_t pp, const Reg64& c4, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0, +		const Reg64& px, const Reg64& y, const Reg64& p, +		const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst, const Xmm& xt) +	{ +		if (isFirst) { +			mul4x1(px, y, c3, c2, c1, c0, c4); +			mov(c4, rdx); +			// [c4:y:c2:c1:c0] = px[3..0] * y +		} else { +			mul4x1(px, y, t3, t2, t1, t0, t4); +			// [rdx:y:t2:t1:t0] = px[3..0] * y +			if (isFullBit_) { +				movq(xt, px); +				xor_(px, px); +			} +			add_rr(Pack(c4, y, c2, c1, c0), Pack(rdx, c3, t2, t1, t0)); +			if (isFullBit_) { +				adc(px, 0); +			} +		} +		// [px:c4:y:c2:c1:c0] +		// px = 0 or 1 if isFullBit_, = 0 otherwise +		mov(rax, pp); +		mul(c0); // q = rax +		mov(c3, rax); +		mul4x1(p, c3, t3, t2, t1, t0, t4); +		add(c0, t0); // always c0 is zero +		adc(c1, t1); +		adc(c2, t2); +		adc(c3, y); +		adc(c4, rdx); +		if (isFullBit_) { +			if (isFirst) { +				adc(c0, 0); +			} else { +				adc(c0, px); +				movq(px, xt); +			} +		} +	} +}; + +} // mcl + +#endif diff --git a/include/mcl/fp_util.hpp b/include/mcl/fp_util.hpp new file mode 100644 index 0000000..7419672 --- /dev/null +++ b/include/mcl/fp_util.hpp @@ -0,0 +1,294 @@ +#pragma once +#include <vector> +#include <cybozu/itoa.hpp> +#include <cybozu/atoi.hpp> +#include <cybozu/bitvector.hpp> +/** +	@file +	@brief utility of Fp +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ + +namespace mcl { namespace fp { + +#if defined(CYBOZU_OS_BIT) && (CYBOZU_OS_BIT == 32) +	typedef uint32_t BlockType; +#else +	typedef uint64_t BlockType; +#endif + +template<class S> +void setBlockBit(S *buf, size_t bitLen, bool b) +{ +	const size_t unitSize = sizeof(S) * 8; +	const size_t q = bitLen / unitSize; +	const size_t r = bitLen % unitSize; +	if (b) { +		buf[q] |= S(1) << r; +	} else { +		buf[q] &= ~(S(1) << r); +	} +} +template<class S> +bool getBlockBit(const S *buf, size_t bitLen) +{ +	const size_t unitSize = sizeof(S) * 8; +	const size_t q = bitLen / unitSize; +	const size_t r = bitLen % unitSize; +	return (buf[q] & (S(1) << r)) != 0; +} +/* +	convert x[0..n) to hex string +	start "0x" if withPrefix +*/ +template<class T> +void toStr16(std::string& str, const T *x, size_t n, bool withPrefix = false) +{ +	size_t fullN = 0; +	if (n > 1) { +		size_t pos = n - 1; +		while (pos > 0) { +			if (x[pos]) break; +			pos--; +		} +		if (pos > 0) fullN = pos; +	} +	const T v = n == 0 ? 0 : x[fullN]; +	const size_t topLen = cybozu::getHexLength(v); +	const size_t startPos = withPrefix ? 2 : 0; +	const size_t lenT = sizeof(T) * 2; +	str.resize(startPos + fullN * lenT + topLen); +	if (withPrefix) { +		str[0] = '0'; +		str[1] = 'x'; +	} +	cybozu::itohex(&str[startPos], topLen, v, false); +	for (size_t i = 0; i < fullN; i++) { +		cybozu::itohex(&str[startPos + topLen + i * lenT], lenT, x[fullN - 1 - i], false); +	} +} + +/* +	convert x[0..n) to bin string +	start "0b" if withPrefix +*/ +template<class T> +void toStr2(std::string& str, const T *x, size_t n, bool withPrefix) +{ +	size_t fullN = 0; +	if (n > 1) { +		size_t pos = n - 1; +		while (pos > 0) { +			if (x[pos]) break; +			pos--; +		} +		if (pos > 0) fullN = pos; +	} +	const T v = n == 0 ? 0 : x[fullN]; +	const size_t topLen = cybozu::getBinLength(v); +	const size_t startPos = withPrefix ? 2 : 0; +	const size_t lenT = sizeof(T) * 8; +	str.resize(startPos + fullN * lenT + topLen); +	if (withPrefix) { +		str[0] = '0'; +		str[1] = 'b'; +	} +	cybozu::itobin(&str[startPos], topLen, v); +	for (size_t i = 0; i < fullN; i++) { +		cybozu::itobin(&str[startPos + topLen + i * lenT], lenT, x[fullN - 1 - i]); +	} +} + +/* +	convert hex string to x[0..xn) +	hex string = [0-9a-fA-F]+ +*/ +template<class T> +void fromStr16(T *x, size_t xn, const char *str, size_t strLen) +{ +	if (strLen == 0) throw cybozu::Exception("fp:fromStr16:strLen is zero"); +	const size_t unitLen = sizeof(T) * 2; +	const size_t q = strLen / unitLen; +	const size_t r = strLen % unitLen; +	const size_t requireSize = q + (r ? 1 : 0); +	if (xn < requireSize) throw cybozu::Exception("fp:fromStr16:short size") << xn << requireSize; +	for (size_t i = 0; i < q; i++) { +		bool b; +		x[i] = cybozu::hextoi(&b, &str[r + (q - 1 - i) * unitLen], unitLen); +		if (!b) throw cybozu::Exception("fp:fromStr16:bad char") << cybozu::exception::makeString(str, strLen); +	} +	if (r) { +		bool b; +		x[q] = cybozu::hextoi(&b, str, r); +		if (!b) throw cybozu::Exception("fp:fromStr16:bad char") << cybozu::exception::makeString(str, strLen); +	} +	for (size_t i = requireSize; i < xn; i++) x[i] = 0; +} + +/* +	@param base [inout] +*/ +inline const char *verifyStr(bool *isMinus, int *base, const std::string& str) +{ +	const char *p = str.c_str(); +	if (*p == '-') { +		*isMinus = true; +		p++; +	} else { +		*isMinus = false; +	} +	if (p[0] == '0') { +		if (p[1] == 'x') { +			if (*base != 0 && *base != 16) { +				throw cybozu::Exception("fp:verifyStr:bad base") << *base << str; +			} +			*base = 16; +			p += 2; +		} else if (p[1] == 'b') { +			if (*base != 0 && *base != 2) { +				throw cybozu::Exception("fp:verifyStr:bad base") << *base << str; +			} +			*base = 2; +			p += 2; +		} +	} +	if (*base == 0) *base = 10; +	if (*p == '\0') throw cybozu::Exception("fp:verifyStr:str is empty"); +	return p; +} + +template<class S> +size_t getRoundNum(size_t x) +{ +	const size_t size = sizeof(S) * 8; +	return (x + size - 1) / size; +} + +/* +	compare x[0, n) with y[0, n) +*/ +template<class S> +int compareArray(const S* x, const S* y, size_t n) +{ +	for (size_t i = 0; i < n; i++) { +		const S a = x[n - 1 - i]; +		const S b = y[n - 1 - i]; +		if (a > b) return 1; +		if (a < b) return -1; +	} +	return 0; +} + +/* +	get random value less than in[] +	n = (bitLen + sizeof(S) * 8) / (sizeof(S) * 8) +	input  in[0..n) +	output out[n..n) +	0 <= out < in +*/ +template<class RG, class S> +inline void getRandVal(S *out, RG& rg, const S *in, size_t bitLen) +{ +	const size_t unitBitSize = sizeof(S) * 8; +	const size_t n = getRoundNum<S>(bitLen); +	const size_t rem = bitLen & (unitBitSize - 1); +	for (;;) { +		rg.read(out, n); +		if (rem > 0) out[n - 1] &= (S(1) << rem) - 1; +		if (compareArray(out, in, n) < 0) return; +	} +} + +/* +	z[] = (x[] << shift) | y +	@param z [out] z[0..n) +	@param x [in] x[0..n) +	@param n [in] length of x, z +	@param shift [in] 0 <= shift < (sizeof(S) * 8) +	@param y [in] +	@return (x[] << shift)[n] +*/ +template<class S> +S shiftLeftOr(S* z, const S* x, size_t n, size_t shift, S y = 0) +{ +	if (n == 0) { +		throw cybozu::Exception("fp:shiftLeftOr:bad n"); +	} +	if (shift == 0) { +		for (size_t i = n - 1; i > 0; i--) { +			z[i] = x[i]; +		} +		z[0] = x[0] | y; +		return 0; +	} +	const size_t unitSize = sizeof(S) * 8; +	if (shift >= unitSize) { +		throw cybozu::Exception("fp:shiftLeftOr:large shift") << shift; +	} +	const size_t rev = unitSize - shift; +	S ret = x[n - 1] >> rev; +	for (size_t i = n - 1; i > 0; i--) { +		z[i] = (x[i] << shift) | (x[i - 1] >> rev); +	} +	z[0] = (x[0] << shift) | y; +	return ret; +} +template<class S> +void shiftRight(S* z, const S* x, size_t n, size_t shift) +{ +	if (n == 0) return; +	if (shift == 0) { +		for (size_t i = 0; i < n; i++) { +			z[i] = x[i]; +		} +		return; +	} +	const size_t unitSize = sizeof(S) * 8; +	if (shift >= unitSize) { +		throw cybozu::Exception("fp:shiftRight:large shift") << shift; +	} +	const size_t rev = unitSize - shift; +	S prev = x[0]; +	for (size_t i = 0; i < n - 1; i++) { +		S t = x[i + 1]; +		z[i] = (prev >> shift) | (t << rev); +		prev = t; +	} +	z[n - 1] = prev >> shift; +} + +template<class Vec, class T> +size_t splitBitVec(Vec& v, const cybozu::BitVectorT<T>& bv, size_t width) +{ +	if (width > sizeof(typename Vec::value_type) * 8) { +		throw cybozu::Exception("fp:splitBitVec:bad width") << width; +	} +	const size_t q = bv.size() / width; +	const size_t r = bv.size() % width; +	for (size_t i = 0; i < q; i++) { +		v.push_back(bv.extract(i * width, width)); +	} +	if (r > 0) { +		v.push_back(bv.extract(q * width, r)); +	} +	return r ? r : width; +} + +template<class Vec, class T> +void concatBitVec(cybozu::BitVectorT<T>& bv, const Vec& v, size_t width, size_t lastWidth) +{ +	if (width > sizeof(typename Vec::value_type) * 8) { +		throw cybozu::Exception("fp:splitBitVec:bad width") << width; +	} +	bv.clear(); +	for (size_t i = 0; i < v.size() - 1; i++) { +		bv.append(v[i], width); +	} +	bv.append(v[v.size() - 1], lastWidth); +} + +} // mcl::fp + +} // fp diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp new file mode 100644 index 0000000..c29c870 --- /dev/null +++ b/include/mcl/gmp_util.hpp @@ -0,0 +1,378 @@ +#pragma once +/** +	@file +	@brief util function for gmp +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <stdio.h> +#include <stdlib.h> +#include <vector> +#include <assert.h> +#ifdef _MSC_VER +	#pragma warning(push) +	#pragma warning(disable : 4616) +	#pragma warning(disable : 4800) +	#pragma warning(disable : 4244) +	#pragma warning(disable : 4127) +	#pragma warning(disable : 4512) +	#pragma warning(disable : 4146) +#endif +#include <gmpxx.h> +#include <stdint.h> +#ifdef _MSC_VER +	#pragma warning(pop) +#endif +#ifdef _MSC_VER +#if _MSC_VER == 1900 +#ifdef _DEBUG +#pragma comment(lib, "14/mpird.lib") +#pragma comment(lib, "14/mpirxxd.lib") +#else +#pragma comment(lib, "14/mpir.lib") +#pragma comment(lib, "14/mpirxx.lib") +#endif +#elif _MSC_VER == 1800 +#ifdef _DEBUG +#pragma comment(lib, "12/mpird.lib") +#pragma comment(lib, "12/mpirxxd.lib") +#else +#pragma comment(lib, "12/mpir.lib") +#pragma comment(lib, "12/mpirxx.lib") +#endif +#else +#ifdef _DEBUG +#pragma comment(lib, "mpird.lib") +#pragma comment(lib, "mpirxxd.lib") +#else +#pragma comment(lib, "mpir.lib") +#pragma comment(lib, "mpirxx.lib") +#endif +#endif +#endif +#include <mcl/operator.hpp> + +namespace mcl { + +struct Gmp { +	typedef mpz_class ImplType; +#if CYBOZU_OS_BIT == 64 +	typedef uint64_t BlockType; +#else +	typedef uint32_t BlockType; +#endif +	// z = [buf[n-1]:..:buf[1]:buf[0]] +	// eg. buf[] = {0x12345678, 0xaabbccdd}; => z = 0xaabbccdd12345678; +	template<class T> +	static void setRaw(mpz_class& z, const T *buf, size_t n) +	{ +		mpz_import(z.get_mpz_t(), n, -1, sizeof(*buf), 0, 0, buf); +	} +	/* +		return positive written size +		return 0 if failure +	*/ +	template<class T> +	static size_t getRaw(T *buf, size_t maxSize, const mpz_class& x) +	{ +		const size_t totalSize = sizeof(T) * maxSize; +		if (getBitLen(x) > totalSize * 8) return 0; +		memset(buf, 0, sizeof(*buf) * maxSize); +		size_t size; +		mpz_export(buf, &size, -1, sizeof(T), 0, 0, x.get_mpz_t()); +		// if x == 0, then size = 0 for gmp, size = 1 for mpir +		return size == 0 ? 1 : size; +	} +	static inline void set(mpz_class& z, uint64_t x) +	{ +		setRaw(z, &x, 1); +	} +	static inline bool fromStr(mpz_class& z, const std::string& str, int base = 0) +	{ +		return z.set_str(str, base) == 0; +	} +	static inline void toStr(std::string& str, const mpz_class& z, int base = 10) +	{ +		str = z.get_str(base); +	} +	static inline void add(mpz_class& z, const mpz_class& x, const mpz_class& y) +	{ +		mpz_add(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline void add(mpz_class& z, const mpz_class& x, unsigned int y) +	{ +		mpz_add_ui(z.get_mpz_t(), x.get_mpz_t(), y); +	} +	static inline void sub(mpz_class& z, const mpz_class& x, const mpz_class& y) +	{ +		mpz_sub(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline void sub(mpz_class& z, const mpz_class& x, unsigned int y) +	{ +		mpz_sub_ui(z.get_mpz_t(), x.get_mpz_t(), y); +	} +	static inline void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) +	{ +		mpz_mul(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline void square(mpz_class& z, const mpz_class& x) +	{ +		mpz_mul(z.get_mpz_t(), x.get_mpz_t(), x.get_mpz_t()); +	} +	static inline void mul(mpz_class& z, const mpz_class& x, unsigned int y) +	{ +		mpz_mul_ui(z.get_mpz_t(), x.get_mpz_t(), y); +	} +	static inline void divmod(mpz_class& q, mpz_class& r, const mpz_class& x, const mpz_class& y) +	{ +		mpz_divmod(q.get_mpz_t(), r.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline void div(mpz_class& q, const mpz_class& x, const mpz_class& y) +	{ +		mpz_div(q.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline void div(mpz_class& q, const mpz_class& x, unsigned int y) +	{ +		mpz_div_ui(q.get_mpz_t(), x.get_mpz_t(), y); +	} +	static inline void mod(mpz_class& r, const mpz_class& x, const mpz_class& m) +	{ +		mpz_mod(r.get_mpz_t(), x.get_mpz_t(), m.get_mpz_t()); +	} +	static inline void mod(mpz_class& r, const mpz_class& x, unsigned int m) +	{ +		mpz_mod_ui(r.get_mpz_t(), x.get_mpz_t(), m); +	} +	static inline void clear(mpz_class& z) +	{ +		mpz_set_ui(z.get_mpz_t(), 0); +	} +	static inline bool isZero(const mpz_class& z) +	{ +		return mpz_sgn(z.get_mpz_t()) == 0; +	} +	static inline bool isNegative(const mpz_class& z) +	{ +		return mpz_sgn(z.get_mpz_t()) < 0; +	} +	static inline void neg(mpz_class& z, const mpz_class& x) +	{ +		mpz_neg(z.get_mpz_t(), x.get_mpz_t()); +	} +	static inline int compare(const mpz_class& x, const mpz_class & y) +	{ +		return mpz_cmp(x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline int compare(const mpz_class& x, int y) +	{ +		return mpz_cmp_si(x.get_mpz_t(), y); +	} +	template<class T> +	static inline void addMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m) +	{ +		add(z, x, y); +		if (compare(z, m) >= 0) { +			sub(z, z, m); +		} +	} +	template<class T> +	static inline void subMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m) +	{ +		sub(z, x, y); +		if (!isNegative(z)) return; +		add(z, z, m); +	} +	template<class T> +	static inline void mulMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m) +	{ +		mul(z, x, y); +		mod(z, z, m); +	} +	static inline void squareMod(mpz_class& z, const mpz_class& x, const mpz_class& m) +	{ +		square(z, x); +		mod(z, z, m); +	} +	// z = x^y (y >= 0) +	static inline void pow(mpz_class& z, const mpz_class& x, unsigned int y) +	{ +		mpz_pow_ui(z.get_mpz_t(), x.get_mpz_t(), y); +	} +	// z = x^y mod m (y >=0) +	static inline void powMod(mpz_class& z, const mpz_class& x, const mpz_class& y, const mpz_class& m) +	{ +		mpz_powm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t(), m.get_mpz_t()); +	} +	// z = 1/x mod m +	static inline void invMod(mpz_class& z, const mpz_class& x, const mpz_class& m) +	{ +		mpz_invert(z.get_mpz_t(), x.get_mpz_t(), m.get_mpz_t()); +	} +	// z = lcm(x, y) +	static inline void lcm(mpz_class& z, const mpz_class& x, const mpz_class& y) +	{ +		mpz_lcm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline mpz_class lcm(const mpz_class& x, const mpz_class& y) +	{ +		mpz_class z; +		lcm(z, x, y); +		return z; +	} +	// z = gcd(x, y) +	static inline void gcd(mpz_class& z, const mpz_class& x, const mpz_class& y) +	{ +		mpz_gcd(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); +	} +	static inline mpz_class gcd(const mpz_class& x, const mpz_class& y) +	{ +		mpz_class z; +		gcd(z, x, y); +		return z; +	} +	/* +		assume p : odd prime +		return  1 if x^2 = a mod p for some x +		return -1 if x^2 != a mod p for any x +	*/ +	static inline int legendre(const mpz_class& a, const mpz_class& p) +	{ +		return mpz_legendre(a.get_mpz_t(), p.get_mpz_t()); +	} +	static inline bool isPrime(const mpz_class& x) +	{ +		return mpz_probab_prime_p(x.get_mpz_t(), 25) != 0; +	} +	static inline size_t getBitLen(const mpz_class& x) +	{ +		return mpz_sizeinbase(x.get_mpz_t(), 2); +	} +	static inline BlockType getBlock(const mpz_class& x, size_t i) +	{ +		return x.get_mpz_t()->_mp_d[i]; +	} +	static inline const BlockType *getBlock(const mpz_class& x) +	{ +		return reinterpret_cast<const BlockType*>(x.get_mpz_t()->_mp_d); +	} +	static inline size_t getBlockSize(const mpz_class& x) +	{ +		assert(x.get_mpz_t()->_mp_size >= 0); +		return x.get_mpz_t()->_mp_size; +	} +	template<class RG> +	static inline void getRand(mpz_class& z, size_t bitLen, RG& rg) +	{ +		assert(bitLen > 1); +		const size_t rem = bitLen & 31; +		const size_t n = (bitLen + 31) / 32; +		std::vector<uint32_t> buf(n); +		rg.read(buf.data(), n); +		uint32_t v = buf[n - 1]; +		if (rem == 0) { +			v |= 1U << 31; +		} else { +			v &= (1U << rem) - 1; +			v |= 1U << (rem - 1); +		} +		buf[n - 1] = v; +		Gmp::setRaw(z, &buf[0], n); +	} +	template<class RG> +	static void getRandPrime(mpz_class& z, size_t bitLen, RG& rg, bool setSecondBit = false, bool mustBe3mod4 = false) +	{ +		assert(bitLen > 2); +		do { +			getRand(z, bitLen, rg); +			if (setSecondBit) { +				z |= mpz_class(1) << (bitLen - 2); +			} +			if (mustBe3mod4) { +				z |= 3; +			} +		} while (!(isPrime(z))); +	} +}; + +/* +	Tonelli-Shanks +*/ +class SquareRoot { +	bool isPrime; +	mpz_class p; +	mpz_class g; +	int r; +	mpz_class q; // p - 1 = 2^r q +	mpz_class s; // s = g^q +public: +	SquareRoot() : isPrime(false) {} +	void set(const mpz_class& p) +	{ +		if (p <= 2) throw cybozu::Exception("SquareRoot:bad p") << p; +		isPrime = Gmp::isPrime(p); +		if (!isPrime) return; // don't throw until get() is called +		this->p = p; +		// g is quadratic nonresidue +		g = 2; +		while (Gmp::legendre(g, p) > 0) { +			g++; +		} +		// p - 1 = 2^r q, q is odd +		r = 0; +		q = p - 1; +		while ((q & 1) == 0) { +			r++; +			q /= 2; +		} +		Gmp::powMod(s, g, q, p); +	} +	/* +		solve x^2 = a mod p +	*/ +	bool get(mpz_class& x, const mpz_class& a) const +	{ +		if (!isPrime) throw cybozu::Exception("SquareRoot:get:not prime") << p; +		if (Gmp::legendre(a, p) < 0) return false; +		if (r == 1) { +			Gmp::powMod(x, a, (p + 1) / 4, p); +			return true; +		} +		mpz_class c = s, d; +		int e = r; +		Gmp::powMod(d, a, q, p); +		Gmp::powMod(x, a, (q + 1) / 2, p); // destroy a if &x == &a +		while (d != 1) { +			int i = 1; +			mpz_class dd = (d * d) % p; +			while (dd != 1) { +				dd = (dd * dd) % p; +				i++; +			} +			mpz_class b = 1; +			b <<= e - i - 1; +			Gmp::powMod(b, c, b, p); +			x = (x * b) % p; +			c = (b * b) % p; +			d = (d * c) % p; +			e = i; +		} +		return true; +	} +}; + +namespace ope { + +template<> +struct Optimized<mpz_class> { +	void init(const mpz_class&) {} +	bool hasPowMod() const { return true; } +	static void powMod(mpz_class& z, const mpz_class& x, const mpz_class& y, const mpz_class& m) +	{ +		mpz_powm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t(), m.get_mpz_t()); +	} +}; + +} // mcl::ope + +} // mcl diff --git a/include/mcl/mont_fp.hpp b/include/mcl/mont_fp.hpp new file mode 100644 index 0000000..1b539bf --- /dev/null +++ b/include/mcl/mont_fp.hpp @@ -0,0 +1,463 @@ +#pragma once +/** +	@file +	@brief Fp with montgomery(EXPERIMENTAL IMPLEMENTAION) +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause + +	@note this class should be merged to FpT +*/ +#include <sstream> +#include <vector> +#include <mcl/gmp_util.hpp> +#include <mcl/fp.hpp> +#include <mcl/fp_generator.hpp> + +namespace mcl { + +template<size_t N, class tag = fp_local::TagDefault> +class MontFpT : public ope::addsub<MontFpT<N, tag>, +	ope::mulable<MontFpT<N, tag>, +	ope::invertible<MontFpT<N, tag>, +	ope::hasNegative<MontFpT<N, tag>, +	ope::hasIO<MontFpT<N, tag> > > > > > { + +	static mpz_class pOrg_; +	static mcl::SquareRoot sq_; +	static MontFpT p_; +	static MontFpT one_; +	static MontFpT R_; // (1 << (N * 64)) % p +	static MontFpT RR_; // (R * R) % p +	static MontFpT invTbl_[N * 64 * 2]; +	static size_t modBitLen_; +public: +	static FpGenerator fg_; +private: +	uint64_t v_[N]; +	void fromRawGmp(const mpz_class& x) +	{ +		if (Gmp::getRaw(v_, N, x) == 0) { +			throw cybozu::Exception("MontFpT:fromRawGmp") << x; +		} +	} +	template<class S> +	void setMaskMod(std::vector<S>& buf) +	{ +		assert(buf.size() * sizeof(S) * 8 <= modBitLen_); +		assert(!buf.empty()); +		fp::maskBuffer(&buf[0], buf.size(), modBitLen_); +		memcpy(v_, &buf[0], buf.size() * sizeof(S)); +		if (compare(*this, p_) >= 0) { +			subNc(v_, v_, p_.v_); +		} +		assert(compare(*this, p_) < 0); +	} +	static void initInvTbl(MontFpT *invTbl) +	{ +		MontFpT t(2); +		const int n = N * 64 * 2; +		for (int i = 0; i < n; i++) { +			invTbl[n - 1 - i] = t; +			t += t; +		} +	} +	typedef void (*void3op)(MontFpT&, const MontFpT&, const MontFpT&); +	typedef bool (*bool3op)(MontFpT&, const MontFpT&, const MontFpT&); +	typedef void (*void2op)(MontFpT&, const MontFpT&); +	typedef int (*int2op)(MontFpT&, const MontFpT&); +public: +	static const size_t BlockSize = N; +	typedef uint64_t BlockType; +	MontFpT() {} +	MontFpT(int x) { operator=(x); } +	MontFpT(uint64_t x) { operator=(x); } +	explicit MontFpT(const std::string& str, int base = 0) +	{ +		fromStr(str, base); +	} +	MontFpT& operator=(int x) +	{ +		if (x == 0) { +			clear(); +		} else { +			v_[0] = abs(x); +			for (size_t i = 1; i < N; i++) v_[i] = 0; +			mul(*this, *this, RR_); +			if (x < 0) { +				neg(*this, *this); +			} +		} +		return *this; +	} +	MontFpT& operator=(uint64_t x) +	{ +		v_[0] = x; +		for (size_t i = 1; i < N; i++) v_[i] = 0; +		mul(*this, *this, RR_); +		return *this; +	} +	void fromStr(const std::string& str, int base = 0) +	{ +		bool isMinus; +		const char *p = fp::verifyStr(&isMinus, &base, str); + +		if (base == 16) { +			MontFpT t; +			mcl::fp::fromStr16(t.v_, N, p, str.size() - (p - str.c_str())); +			if (compare(t, p_) >= 0) throw cybozu::Exception("fp:MontFpT:str is too large") << str; +			mul(*this, t, RR_); +		} else { +			mpz_class t; +			if (!Gmp::fromStr(t, p, base)) { +				throw cybozu::Exception("fp:MontFpT:fromStr") << str; +			} +			toMont(*this, t); +		} +		if (isMinus) { +			neg(*this, *this); +		} +	} +	void put() const +	{ +		for (int i = N - 1; i >= 0; i--) { +			printf("%016llx ", v_[i]); +		} +		printf("\n"); +	} +	void set(const std::string& str, int base = 0) { fromStr(str, base); } +	void toStr(std::string& str, int base = 10, bool withPrefix = false) const +	{ +		if (isZero()) { +			str = "0"; +			return; +		} +		if (base == 16 || base == 2) { +			MontFpT t; +			mul(t, *this, one_); +			if (base == 16) { +				mcl::fp::toStr16(str, t.v_, N, withPrefix); +			} else { +				mcl::fp::toStr2(str, t.v_, N, withPrefix); +			} +			return; +		} +		if (base != 10) throw cybozu::Exception("fp:MontFpT:toStr:bad base") << base; +		// QQQ : remove conversion to gmp +		mpz_class t; +		fromMont(t, *this); +		Gmp::toStr(str, t, base); +	} +	std::string toStr(int base = 10, bool withPrefix = false) const +	{ +		std::string str; +		toStr(str, base, withPrefix); +		return str; +	} +	void clear() +	{ +		for (size_t i = 0; i < N; i++) v_[i] = 0; +	} +	template<class RG> +	void setRand(RG& rg) +	{ +		fp::getRandVal(v_, rg, p_.v_, modBitLen_); +	} +	template<class S> +	void setRaw(const S *inBuf, size_t n) +	{ +		n = std::min(n, fp::getRoundNum<S>(modBitLen_)); +		if (n == 0) { +			clear(); +			return; +		} +		std::vector<S> buf(inBuf, inBuf + n); +		setMaskMod(buf); +	} +	static inline void setModulo(const std::string& pstr, int base = 0) +	{ +		bool isMinus; +		const char *p = fp::verifyStr(&isMinus, &base, pstr); +		if (isMinus) throw cybozu::Exception("MontFp:setModulo:mstr is not pinus") << pstr; +		if (!Gmp::fromStr(pOrg_, p, base)) { +			throw cybozu::Exception("fp:MontFpT:setModulo") << pstr << base; +		} +		modBitLen_ = Gmp::getBitLen(pOrg_); +		if (fp::getRoundNum<uint64_t>(modBitLen_) > N) { +			throw cybozu::Exception("MontFp:setModulo:bad prime length") << pstr; +		} +		p_.fromRawGmp(pOrg_); +		sq_.set(pOrg_); + +		mpz_class t = 1; +		one_.fromRawGmp(t); +		t = (t << (N * 64)) % pOrg_; +		R_.fromRawGmp(t); +		t = (t * t) % pOrg_; +		RR_.fromRawGmp(t); +		fg_.init(p_.v_, N); +		add = Xbyak::CastTo<void3op>(fg_.add_); +		sub = Xbyak::CastTo<void3op>(fg_.sub_); +		mul = Xbyak::CastTo<void3op>(fg_.mul_); +		square = Xbyak::CastTo<void2op>(fg_.sqr_); +		if (square == 0) square = squareC; +		neg = Xbyak::CastTo<void2op>(fg_.neg_); +		shr1 = Xbyak::CastTo<void2op>(fg_.shr1_); +		addNc = Xbyak::CastTo<bool3op>(fg_.addNc_); +		subNc = Xbyak::CastTo<bool3op>(fg_.subNc_); +		preInv = Xbyak::CastTo<int2op>(fg_.preInv_); +		initInvTbl(invTbl_); +	} +	static inline void getModulo(std::string& pstr) +	{ +		Gmp::toStr(pstr, pOrg_); +	} +	static inline bool isYodd(const MontFpT& y) +	{ +#if 0 +		return (y.v_[0] & 1) == 1; +#else +		MontFpT t; // QQQ : is necessary? +		mul(t, y, one_); +		return (t.v_[0] & 1) == 1; +#endif +	} +	static inline bool squareRoot(MontFpT& y, const MontFpT& x) +	{ +		mpz_class t; +		fromMont(t, x); +		if (!sq_.get(t, t)) return false; +		toMont(y, t); +		return true; +	} +	static inline void fromMont(mpz_class& z, const MontFpT& x) +	{ +		MontFpT t; +		mul(t, x, one_); +		Gmp::setRaw(z, t.v_, N); +	} +	static inline void toMont(MontFpT& z, const mpz_class& x) +	{ +		if (x >= pOrg_) throw cybozu::Exception("fp:MontFpT:toMont:large x") << x; +		MontFpT t; +		t.fromRawGmp(x); +		mul(z, t, RR_); +	} +	static void3op add; +	static void3op sub; +	static void3op mul; +	static void2op square; +	static void2op neg; +	static void2op shr1; +	static bool3op addNc; +	static bool3op subNc; +	static int2op preInv; +	static inline void squareC(MontFpT& z, const MontFpT& x) +	{ +		mul(z, x, x); +	} +	static inline int preInvC(MontFpT& r, const MontFpT& x) +	{ +		MontFpT u, v, s; +		u = p_; +		v = x; +		r.clear(); +		s.clear(); s.v_[0] = 1; // s is real 1 +		int k = 0; +		// u, v : Pack, r, s : mem +		bool rTop = false; +	LP: +		if (v.isZero()) goto EXIT; +		if ((u.v_[0] & 1) == 0) { +			goto U_EVEN; +		} +		if ((v.v_[0] & 1) == 0) { +			goto V_EVEN; +		} +		if (compare(v, u) < 0) { +			goto V_LT_U; +		} +		subNc(v, v, u); // sub_rr +		addNc(s, s, r); // add_mm +	V_EVEN: +		shr1(v, v); // shr1_r +		rTop = addNc(r, r, r); // twice_m +		k++; +		goto LP; +	V_LT_U: +		subNc(u, u, v); // sub_rr +		rTop = addNc(r, r, s); // add_mm +	U_EVEN: +		shr1(u, u); // shr1_r +		addNc(s, s, s); // twice_m +		k++; +		goto LP; +	EXIT:; +		if (rTop) subNc(r, r, p_); +		if (subNc(r, p_, r)) { +			addNc(r, r, p_); +		} +		return k; +	} +	static inline void inv(MontFpT& z, const MontFpT& x) +	{ +#if 1 +		MontFpT r; +#if 1 +		int k = preInv(r, x); +#else +		MontFpT s; +		int h = preInvC(s, x); +		int k = preInv(r, x); +		if (r != s || k != h) { +			std::cout << std::hex; +			PUT(x); +			PUT(r); +			PUT(s); +			printf("k=%d, h=%d\n", k, h); +			exit(1); +		} +#endif +		/* +			xr = 2^k +			R = 2^(N * 64) +			get r2^(-k)R^2 = r 2^(N * 64 * 2 - k) +		*/ +		mul(z, r, invTbl_[k]); +#else +		mpz_class t; +		fromMont(t, x); +		Gmp::invMod(t, t, pOrg_); +		toMont(z, t); +#endif +	} +	static inline void div(MontFpT& z, const MontFpT& x, const MontFpT& y) +	{ +		MontFpT ry; +		inv(ry, y); +		mul(z, x, ry); +	} +#if 0 +	static inline BlockType getBlock(const MontFpT& x, size_t i) +	{ +		return Gmp::getBlock(x.v, i); +	} +	static inline const BlockType *getBlock(const MontFpT& x) +	{ +		return Gmp::getBlock(x.v); +	} +	static inline size_t getBlockSize(const MontFpT& x) +	{ +		return Gmp::getBlockSize(x.v); +	} +	static inline void shr(MontFpT& z, const MontFpT& x, size_t n) +	{ +		z.v = x.v >> n; +	} +#endif +	/* +		append to bv(not clear bv) +	*/ +	void appendToBitVec(cybozu::BitVector& bv) const +	{ +		MontFpT t; +		MontFpT::mul(t, *this, MontFpT::one_); +		bv.append(t.v_, modBitLen_); +	} +	void fromBitVec(const cybozu::BitVector& bv) +	{ +		const size_t bitLen = bv.size(); +		if (bitLen != modBitLen_) throw cybozu::Exception("MontFp:fromBitVec:bad size") << bitLen << modBitLen_; +		const size_t blockN = cybozu::RoundupBit<BlockType>(bitLen); +		const MontFpT* src; +		MontFpT t; +		if (blockN == N) { +			src = (const MontFpT*)bv.getBlock(); +		} else { +			cybozu::CopyBit(t.v_, bv.getBlock(), bitLen); +			for (size_t i = blockN; i < N; i++) t.v_[i] = 0; +			src = &t; +		} +		mul(*this, *src, RR_); +		if (compare(*this, p_) >= 0) { +			throw cybozu::Exception("MontFpT:fromBitVec:large x") << *this << p_; +		} +	} +	static inline size_t getBitVecSize() { return modBitLen_; } +	static inline int compare(const MontFpT& x, const MontFpT& y) +	{ +		return fp::compareArray(x.v_, y.v_, N); +	} +	static inline bool isZero(const MontFpT& x) +	{ +		if (x.v_[0]) return false; +		uint64_t r = 0; +		for (size_t i = 1; i < N; i++) { +			r |= x.v_[i]; +		} +		return r == 0; +	} +	bool isZero() const { return isZero(*this); } +	template<class Z> +	static void power(MontFpT& z, const MontFpT& x, const Z& y) +	{ +		power_impl::power(z, x, y); +	} +	const uint64_t* getInnerValue() const { return v_; } +	bool operator==(const MontFpT& rhs) const { return compare(*this, rhs) == 0; } +	bool operator!=(const MontFpT& rhs) const { return compare(*this, rhs) != 0; } +	static inline size_t getModBitLen() { return modBitLen_; } +	static inline uint64_t cvtInt(const MontFpT& x, bool *err = 0) +	{ +		MontFpT t; +		mul(t, x, one_); +		for (size_t i = 1; i < N; i++) { +			if (t.v_[i]) { +				if (err) { +					*err = true; +					return 0; +				} else { +					throw cybozu::Exception("MontFp:cvtInt:too large") << x; +				} +			} +		} +		if (err) *err = false; +		return t.v_[0]; +	} +	uint64_t cvtInt(bool *err = 0) const { return cvtInt(*this, err); } +}; + +template<size_t N, class tag>mpz_class MontFpT<N, tag>::pOrg_; +template<size_t N, class tag>mcl::SquareRoot MontFpT<N, tag>::sq_; +template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::p_; +template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::one_; +template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::R_; +template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::RR_; +template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::invTbl_[N * 64 * 2]; +template<size_t N, class tag>FpGenerator MontFpT<N, tag>::fg_; +template<size_t N, class tag>size_t MontFpT<N, tag>::modBitLen_; + +template<size_t N, class tag>typename MontFpT<N, tag>::void3op MontFpT<N, tag>::add; +template<size_t N, class tag>typename MontFpT<N, tag>::void3op MontFpT<N, tag>::sub; +template<size_t N, class tag>typename MontFpT<N, tag>::void3op MontFpT<N, tag>::mul; +template<size_t N, class tag>typename MontFpT<N, tag>::void2op MontFpT<N, tag>::square; +template<size_t N, class tag>typename MontFpT<N, tag>::void2op MontFpT<N, tag>::neg; +template<size_t N, class tag>typename MontFpT<N, tag>::void2op MontFpT<N, tag>::shr1; +template<size_t N, class tag>typename MontFpT<N, tag>::bool3op MontFpT<N, tag>::addNc; +template<size_t N, class tag>typename MontFpT<N, tag>::bool3op MontFpT<N, tag>::subNc; +template<size_t N, class tag>typename MontFpT<N, tag>::int2op MontFpT<N, tag>::preInv; + +} // mcl + +namespace std { CYBOZU_NAMESPACE_TR1_BEGIN +template<class T> struct hash; + +template<size_t N, class tag> +struct hash<mcl::MontFpT<N, tag> > { +	size_t operator()(const mcl::MontFpT<N, tag>& x, uint64_t v = 0) const +	{ +		return static_cast<size_t>(cybozu::hash64(x.getInnerValue(), N, v)); +	} +}; + +CYBOZU_NAMESPACE_TR1_END } // std::tr1 diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp new file mode 100644 index 0000000..f5d0df3 --- /dev/null +++ b/include/mcl/operator.hpp @@ -0,0 +1,118 @@ +#pragma once +/** +	@file +	@brief operator +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <ios> +#include <cybozu/exception.hpp> + +#ifdef _WIN32 +	#ifndef MCL_FORCE_INLINE +		#define MCL_FORCE_INLINE __forceinline +	#endif +	#pragma warning(push) +	#pragma warning(disable : 4714) +#else +	#ifndef MCL_FORCE_INLINE +		#define MCL_FORCE_INLINE __attribute__((always_inline)) +	#endif +#endif + +namespace mcl { namespace ope { + +template<class T> +struct Empty {}; + +/* +	T must have compare +*/ +template<class T, class E = Empty<T> > +struct comparable : E { +	friend MCL_FORCE_INLINE bool operator<(const T& x, const T& y) { return T::compare(x, y) < 0; } +	friend MCL_FORCE_INLINE bool operator>=(const T& x, const T& y) { return !operator<(x, y); } + +	friend MCL_FORCE_INLINE bool operator>(const T& x, const T& y) { return T::compare(x, y) > 0; } +	friend MCL_FORCE_INLINE bool operator<=(const T& x, const T& y) { return !operator>(x, y); } +	friend MCL_FORCE_INLINE bool operator==(const T& x, const T& y) { return T::compare(x, y) == 0; } +	friend MCL_FORCE_INLINE bool operator!=(const T& x, const T& y) { return !operator==(x, y); } +}; + +/* +	T must have add, sub +*/ +template<class T, class E = Empty<T> > +struct addsub : E { +	template<class S> MCL_FORCE_INLINE T& operator+=(const S& rhs) { T::add(static_cast<T&>(*this), static_cast<const T&>(*this), rhs); return static_cast<T&>(*this); } +	template<class S> MCL_FORCE_INLINE T& operator-=(const S& rhs) { T::sub(static_cast<T&>(*this), static_cast<const T&>(*this), rhs); return static_cast<T&>(*this); } +	template<class S> friend MCL_FORCE_INLINE T operator+(const T& a, const S& b) { T c; T::add(c, a, b); return c; } +	template<class S> friend MCL_FORCE_INLINE T operator-(const T& a, const S& b) { T c; T::sub(c, a, b); return c; } +}; + +/* +	T must have mul +*/ +template<class T, class E = Empty<T> > +struct mulable : E { +	template<class S> MCL_FORCE_INLINE T& operator*=(const S& rhs) { T::mul(static_cast<T&>(*this), static_cast<const T&>(*this), rhs); return static_cast<T&>(*this); } +	template<class S> friend MCL_FORCE_INLINE T operator*(const T& a, const S& b) { T c; T::mul(c, a, b); return c; } +}; + +/* +	T must have inv, mul +*/ +template<class T, class E = Empty<T> > +struct invertible : E { +	MCL_FORCE_INLINE T& operator/=(const T& rhs) { T c; T::inv(c, rhs); T::mul(static_cast<T&>(*this), static_cast<const T&>(*this), c); return static_cast<T&>(*this); } +	friend MCL_FORCE_INLINE T operator/(const T& a, const T& b) { T c; T::inv(c, b); T::mul(c, c, a); return c; } +}; + +/* +	T must have neg +*/ +template<class T, class E = Empty<T> > +struct hasNegative : E { +	MCL_FORCE_INLINE T operator-() const { T c; T::neg(c, static_cast<const T&>(*this)); return c; } +}; + +template<class T, class E = Empty<T> > +struct hasIO : E { +	friend inline std::ostream& operator<<(std::ostream& os, const T& self) +	{ +		const std::ios_base::fmtflags f = os.flags(); +		if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator<<:oct is not supported"); +		const int base = (f & std::ios_base::hex) ? 16 : 10; +		const bool showBase = (f & std::ios_base::showbase) != 0; +		std::string str; +		self.toStr(str, base, showBase); +		return os << str; +	} +	friend inline std::istream& operator>>(std::istream& is, T& self) +	{ +		const std::ios_base::fmtflags f = is.flags(); +		if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator>>:oct is not supported"); +		const int base = (f & std::ios_base::hex) ? 16 : 0; +		std::string str; +		is >> str; +		self.fromStr(str, base); +		return is; +	} +}; + +template<class T> +struct Optimized { +	bool hasMulMod() const { return false; } +	void init(const T&) {} +	static void mulMod(T&, const T&, const T&) {} +	static void mulMod(T&, const T&, unsigned int) {} +	bool hasPowMod() const { return false; } +	static void powMod(T&, const T&, const T&, const T&) {} +}; + +} } // mcl::ope + +#ifdef _WIN32 +//	#pragma warning(pop) +#endif diff --git a/include/mcl/power.hpp b/include/mcl/power.hpp new file mode 100644 index 0000000..27fd15e --- /dev/null +++ b/include/mcl/power.hpp @@ -0,0 +1,181 @@ +#pragma once +/** +	@file +	@brief power +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <assert.h> +#include <cybozu/bit_operation.hpp> +#include <mcl/tagmultigr.hpp> +#ifdef _MSC_VER +	#pragma warning(push) +	#pragma warning(disable : 4616) +	#pragma warning(disable : 4800) +	#pragma warning(disable : 4244) +	#pragma warning(disable : 4127) +	#pragma warning(disable : 4512) +	#pragma warning(disable : 4146) +#endif +#include <gmpxx.h> +#ifdef _MSC_VER +	#pragma warning(pop) +#endif + +namespace mcl { + +namespace power_impl { + +template<class F> +struct TagInt { +	typedef typename F::BlockType BlockType; +	static size_t getBlockSize(const F& x) +	{ +		return F::getBlockSize(x); +	} +	static BlockType getBlock(const F& x, size_t i) +	{ +		return F::getBlock(x, i); +	} +	static const BlockType* getBlock(const F& x) +	{ +		return F::getBlock(x); +	} +	static size_t getBitLen(const F& x) +	{ +		return F::getBitLen(x); +	} +	static void shr(F& x, size_t n) +	{ +		F::shr(x, x, n); +	} +}; + +template<> +struct TagInt<int> { +	typedef int BlockType; +	static int getBlockSize(int) +	{ +		return 1; +	} +	static BlockType getBlock(int x, size_t i) +	{ +		assert(i == 0); +		cybozu::disable_warning_unused_variable(i); +		return x; +	} +	static const BlockType* getBlock(const int& x) +	{ +		return &x; +	} +	static size_t getBitLen(int x) +	{ +		return x == 0 ? 1 : cybozu::bsr(x) + 1; +	} +	static void shr(int& x, size_t n) +	{ +		x >>= n; +	} +}; + +template<> +struct TagInt<size_t> { +	typedef size_t BlockType; +	static size_t getBlockSize(size_t) +	{ +		return 1; +	} +	static BlockType getBlock(size_t x, size_t i) +	{ +		assert(i == 0); +		cybozu::disable_warning_unused_variable(i); +		return x; +	} +	static const BlockType* getBlock(const size_t& x) +	{ +		return &x; +	} +	static size_t getBitLen(size_t x) +	{ +		return x == 0 ? 1 : cybozu::bsr<size_t>(x) + 1; +	} +	static void shr(size_t& x, size_t n) +	{ +		x >>= n; +	} +}; + +template<> +struct TagInt<mpz_class> { +	typedef mp_limb_t BlockType; +	static size_t getBlockSize(const mpz_class& x) +	{ +		return x.get_mpz_t()->_mp_size; +	} +	static BlockType getBlock(const mpz_class& x, size_t i) +	{ +		return x.get_mpz_t()->_mp_d[i]; +	} +	static const BlockType* getBlock(const mpz_class& x) +	{ +		return x.get_mpz_t()->_mp_d; +	} +	static size_t getBitLen(const mpz_class& x) +	{ +		return mpz_sizeinbase(x.get_mpz_t(), 2); +	} +	static void shr(mpz_class& x, size_t n) +	{ +		x >>= n; +	} +}; + +template<class G, class BlockType> +void powerArray(G& z, const G& x, const BlockType *y, size_t n) +{ +	typedef TagMultiGr<G> TagG; +	G out; +	TagG::init(out); +	G t(x); +	for (size_t i = 0; i < n; i++) { +		BlockType v = y[i]; +		int m = (int)sizeof(BlockType) * 8; +		if (i == n - 1) { +			// avoid unused multiplication +			while (m > 0 && (v & (BlockType(1) << (m - 1))) == 0) { +				m--; +			} +		} +		for (int j = 0; j < m; j++) { +			if (v & (BlockType(1) << j)) { +				TagG::mul(out, out, t); +			} +			TagG::square(t, t); +		} +	} +	z = out; +} + +template<class G, class F> +void power(G& z, const G& x, const F& _y) +{ +	typedef TagMultiGr<G> TagG; +	typedef power_impl::TagInt<F> TagI; +	if (_y == 0) { +		TagG::init(z); +		return; +	} +	if (_y == 1) { +		z = x; +		return; +	} +	bool isNegative = _y < 0; +	const F& y = isNegative ? -_y : _y; +	powerArray(z, x, TagI::getBlock(y), TagI::getBlockSize(y)); +	if (isNegative) { +		TagG::inv(z, z); +	} +} + +} } // mcl::power_impl diff --git a/include/mcl/tagmultigr.hpp b/include/mcl/tagmultigr.hpp new file mode 100644 index 0000000..51add36 --- /dev/null +++ b/include/mcl/tagmultigr.hpp @@ -0,0 +1,39 @@ +#pragma once +/** +	@file +	@brief TagMultiGr +	@author MITSUNARI Shigeo(@herumi) +	@license modified new BSD license +	http://opensource.org/licenses/BSD-3-Clause +*/ +#include <assert.h> + +namespace mcl { + +// default tag is for multiplicative group +template<class G> +struct TagMultiGr { +	static void square(G& z, const G& x) +	{ +		G::mul(z, x, x); +	} +	static void mul(G& z, const G& x, const G& y) +	{ +		G::mul(z, x, y); +	} +	static void inv(G& z, const G& x) +	{ +		G::inv(z, x); +	} +	static void div(G& z, const G& x, const G& y) +	{ +		G::div(z, x, y); +	} +	static void init(G& x) +	{ +		x = 1; +	} +}; + +} // mcl + @@ -0,0 +1,25 @@ +Microsoft Visual Studio Solution File, Format Version 12.00
 +# Visual Studio Express 2012 for Windows Desktop
 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fp_test", "test\proj\fp_test\fp_test.vcxproj", "{51266DE6-B57B-4AE3-B85C-282F170E1728}"
 +EndProject
 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ec_test", "test\proj\ec_test\ec_test.vcxproj", "{46B6E88E-739A-406B-9F68-BC46C5950FA3}"
 +EndProject
 +Global
 +	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 +		Debug|x64 = Debug|x64
 +		Release|x64 = Release|x64
 +	EndGlobalSection
 +	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 +		{51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.ActiveCfg = Debug|x64
 +		{51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.Build.0 = Debug|x64
 +		{51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.ActiveCfg = Release|x64
 +		{51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.Build.0 = Release|x64
 +		{46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.ActiveCfg = Debug|x64
 +		{46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.Build.0 = Debug|x64
 +		{46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.ActiveCfg = Release|x64
 +		{46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.Build.0 = Release|x64
 +	EndGlobalSection
 +	GlobalSection(SolutionProperties) = preSolution
 +		HideSolutionNode = FALSE
 +	EndGlobalSection
 +EndGlobal
 diff --git a/release.props b/release.props new file mode 100644 index 0000000..88b0830 --- /dev/null +++ b/release.props @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +  <ImportGroup Label="PropertySheets" /> +  <PropertyGroup Label="UserMacros" /> +  <PropertyGroup /> +  <ItemDefinitionGroup> +    <ClCompile> +      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary> +    </ClCompile> +  </ItemDefinitionGroup> +  <ItemGroup /> +</Project>
\ No newline at end of file diff --git a/sample/Makefile b/sample/Makefile new file mode 100644 index 0000000..96678a6 --- /dev/null +++ b/sample/Makefile @@ -0,0 +1,23 @@ +include ../common.mk + +TARGET=$(TEST_FILE) +LIBS= + +SRC=$(wildcard *.cpp) + +all: $(TARGET) + +test: $(TARGET) +	@$(SAMPLE_TEST) + +$(OBJDIR): +	@$(MKDIR) $(OBJDIR) + +clean: +	$(CLEAN) + +$(LIBS): +	$(MAKE) -C ../src + +-include $(DEPEND_FILE) + diff --git a/sample/ecdh_smpl.cpp b/sample/ecdh_smpl.cpp new file mode 100644 index 0000000..91bb9f6 --- /dev/null +++ b/sample/ecdh_smpl.cpp @@ -0,0 +1,69 @@ +/* +	sample of Elliptic Curve Diffie-Hellman key sharing +*/ +#include <iostream> +#include <fstream> +#include <cybozu/random_generator.hpp> +#include <mcl/fp.hpp> +#include <mcl/gmp_util.hpp> +#include <mcl/ecparam.hpp> +#include <mcl/ec.hpp> +#include <mcl/fp.hpp> +typedef mcl::FpT<> Fp; + +struct ZnTag; + +typedef mcl::EcT<Fp> Ec; +typedef mcl::FpT<ZnTag> Zn; + +int main() +{ +	cybozu::RandomGenerator rg; +	/* +		system setup with a parameter secp192k1 recommended by SECG +		Ec is an elliptic curve over Fp +		the cyclic group of <P> is isomorphic to Zn +	*/ +	const mcl::EcParam& para = mcl::ecparam::secp192k1; +	Zn::setModulo(para.n); +	Fp::setModulo(para.p); +	Ec::setParam(para.a, para.b); +	const Ec P(Fp(para.gx), Fp(para.gy)); + +	/* +		Alice setups a private key a and public key aP +	*/ +	Zn a; +	Ec aP; + +	a.setRand(rg); +	Ec::power(aP, P, a); // aP = a * P; + +	std::cout << "aP=" << aP << std::endl; + +	/* +		Bob setups a private key b and public key bP +	*/ +	Zn b; +	Ec bP; + +	b.setRand(rg); +	Ec::power(bP, P, b); // bP = b * P; + +	std::cout << "bP=" << bP << std::endl; + +	Ec abP, baP; + +	// Alice uses bP(B's public key) and a(A's priavte key) +	Ec::power(abP, bP, a); // abP = a * (bP) + +	// Bob uses aP(A's public key) and b(B's private key) +	Ec::power(baP, aP, b); // baP = b * (aP) + +	if (abP == baP) { +		std::cout << "key sharing succeed:" << abP << std::endl; +	} else { +		std::cout << "ERR(not here)" << std::endl; +	} +} + diff --git a/sample/random_smpl.cpp b/sample/random_smpl.cpp new file mode 100644 index 0000000..19944de --- /dev/null +++ b/sample/random_smpl.cpp @@ -0,0 +1,29 @@ +#include <mcl/fp.hpp> +#include <mcl/gmp_util.hpp> +#include <mcl/ecparam.hpp> +#include <cybozu/random_generator.hpp> +#include <map> +#include <mcl/fp.hpp> +typedef mcl::FpT<> Fp; + +typedef std::map<std::string, int> Map; + +int main(int argc, char *argv[]) +{ +	cybozu::RandomGenerator rg; +	const char *p = mcl::ecparam::secp192k1.p; +	if (argc == 2) { +		p = argv[1]; +	} +	Fp::setModulo(p); +	Fp x; +	printf("p=%s\n", p); +	Map m; +	for (int i = 0; i < 10000; i++) { +		x.setRand(rg); +		m[x.toStr(16)]++; +	} +	for (Map::const_iterator i = m.begin(), ie = m.end(); i != ie; ++i) { +		printf("%s %d\n", i->first.c_str(), i->second); +	} +} diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..37c0873 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,42 @@ +VER=-3.5 +LLC=llc$(VER) +OPT=opt$(VER) +DIS=llvm-dis$(VER) +ASM=llvm-as$(VER) +OPT_LLC= $(OPT) -O3 -o - | $(LLC) -O3 -o - + +SRC = once.txt all.txt long.txt short.txt mul.txt +TARGET=x64.s x86.s arm.s arm64.s +AFLAGS=-mattr=bmi2 +all: $(TARGET) + +base64.ll: gen.py $(SRC) +	python gen.py 64 + +base32.ll: gen.py $(SRC) +	python gen.py 32 + +x64: base64.ll +	$(LLC) base64.ll -o - -x86-asm-syntax=intel +x86: base32.ll +	$(LLC) base32.ll -o - -x86-asm-syntax=intel -march=x86 +arm64: base64.ll +	$(LLC) base64.ll -o - -march=aarch64 + +arm: base32.ll +	$(LLC) base32.ll -o - -march=arm + +opt: base64.ll +	cat base64.ll|$(OPT_LLC) -x86-asm-syntax=intel $(AFLAGS) + +x64.s: base64.ll +	cat base64.ll|$(OPT_LLC) $(AFLAGS) > x64.s +x86.s: base32.ll +	cat base32.ll|$(OPT_LLC) $(AFLAGS) -march=x86 > x86.s +arm.s: base32.ll +	cat base32.ll|$(OPT_LLC) -march=arm > arm.s +arm64.s: base32.ll +	cat base64.ll|$(OPT_LLC) -march=aarch64 > arm64.s +clean: +	rm -rf base*.ll *.s + diff --git a/src/all.txt b/src/all.txt new file mode 100644 index 0000000..0dbd4f9 --- /dev/null +++ b/src/all.txt @@ -0,0 +1,7 @@ +declare { i$(bit), i1 } @llvm.usub.with.overflow.i$(bit)(i$(bit) %x, i$(bit) %y) + +define i$(unit) @extract$(bit+unit)(i$(bit+unit) %x, i$(bit+unit) %shift) { +	%t0 = lshr i$(bit+unit) %x, %shift +	%t1 = trunc i$(bit+unit) %t0 to i$(unit) +	ret i$(unit) %t1 +} diff --git a/src/gen.py b/src/gen.py new file mode 100644 index 0000000..acdd1ab --- /dev/null +++ b/src/gen.py @@ -0,0 +1,187 @@ +import sys, re + +# @for <var>, <begin>, <end> +RE_FOR = re.compile(r'@for\s+(\w+)\s*,\s*([^ ]+)\s*,\s*([^ ]+)') +# $(<exp>) +RE_VAL = re.compile(r'\$\(([^)]+)\)') +# @define <var>=<exp> +RE_DEFINE = re.compile(r'@define\s+(\w+)\s*=(.*)') +# @if <exp> +RE_IF = re.compile(r'@if\s+(.*)') +# @elif <exp> +RE_ELIF = re.compile(r'@elif\s+(.*)') + +def evalStr(s, envG, envL={}): +	def eval2str(x): +		s = x.group(1) +		v = eval(s, envG, envL) +		return str(v) +	s = RE_VAL.sub(eval2str, s) +	return s + +def parseDefine(s, envG, envL): +	""" +	if s is @define statement, then update envL and return True +	otherwise return False +	""" +	p = RE_DEFINE.match(s) +	if not p: +		return False +	lhs = p.group(1).strip() +	rhs = p.group(2).strip() +	envL[lhs] = eval(rhs, envG, envL) +	return True + +def parseFor(s, envG): +	""" +	@for i, 0, 3 +	<exp> +	@endif + +	| +	v +	@define i = 0 +	<exp> +	exp +	@define i = 1 +	<exp> +	@define i = 2 +	<exp> + +	""" +	out = "" +	inFor = False +	envL = {} +	for line in s.split('\n'): +		stripped = line.strip() +		# save @define for parseIf +		parseDefine(stripped, envG, envL) +		if inFor: +			if line.strip() == '@endfor': +				inFor = False +				for i in xrange(b, e): +					out += "@define %s = %d\n" % (v, i) +					out += sub +			else: +				sub += line + '\n' +		else: +			p = RE_FOR.search(stripped) +			if p: +				v = p.group(1).strip() +				b = eval(p.group(2), envG) +				e = eval(p.group(3), envG) +				sub = "" +				inFor = True +			else: +				out += line + '\n' +	return out + +def parseIf(s, envG): +	out = "" +	IF_INIT = 0 +	IF_IF = 1 +	IF_ELSE = 2 +	ifState = IF_INIT +	ifVar = False +	# available variables in @(<expr>) +	envL = {} +	def evalIntLoc(s): +		return eval(s, envG, envL) +	for line in s.split('\n'): +		stripped = line.strip() +		# remove @define +		if parseDefine(stripped, envG, envL): +			continue +		if ifState == IF_INIT: +			p = RE_IF.match(stripped) +			if p: +				ifState = IF_IF +				ifVar = evalIntLoc(p.group(1)) +				continue +		elif ifState == IF_IF: +			if stripped == '@endif': +				ifState = IF_INIT +				continue +			elif stripped == '@else': +				ifState = IF_ELSE +				ifVar = not ifVar +				continue +			p = RE_ELIF.match(stripped) +			if p: +				ifVar = evalIntLoc(p.group(1)) +				continue +			if not ifVar: +				continue +		elif ifState == IF_ELSE: +			if stripped == '@endif': +				ifState = IF_INIT +				continue +			if not ifVar: +				continue +		else: +			raise Exception('bad state', ifState) +		out += evalStr(line, envG, envL) + '\n' +	return out + +def parse(s, unitL, bitL): +	""" +		eval "@(<expr>)" to integer + +		@for <var>, <begin>, <end> +		... +		@endfor + +		REMARK : @for is not nestable + +		@define <var> = <exp> +		REMARK : var is global + +		@if <exp> +		@elif <exp> +		@endif + +		REMARK : @if is not nestable +	""" +	# available variables in @(<expr>) +	envG = { +		'unit' : unitL, +		'bit' : bitL, +		'N' : bitL / unitL, +	} +	s = parseFor(s, envG) +	s = parseIf(s, envG) +	return s + +def gen(fo, inLame, unitL, bitLL): +	fi = open(inLame, 'r') +	s = fi.read() +	fi.close() +	for bitL in bitLL: +		t = parse(s, unitL, bitL) +		fo.write(t) + +def main(): +	argv = sys.argv +	args = len(argv) +	unitL = 64 +	if args == 2: +		unitL = int(argv[1]) +	if unitL not in [32, 64]: +		print "bad unitL", unitL +		exit(1) + +	outLame = 'base%d.ll' % unitL +	fo = open(outLame, 'w') +#	gen(fo, 't.txt', unitL, [unitL * 4]) +#	exit(1) +	gen(fo, 'once.txt', unitL, [unitL * 2]) + +	bitLL = range(unitL, 576 + 1, unitL) +	gen(fo, 'all.txt', unitL, bitLL) +	gen(fo, 'short.txt', unitL, bitLL) +	gen(fo, 'long.txt', unitL, bitLL) +	gen(fo, 'mul.txt', unitL, bitLL[1:]) +	fo.close() + +if __name__ == "__main__": +    main() diff --git a/src/long.txt b/src/long.txt new file mode 100644 index 0000000..31082a7 --- /dev/null +++ b/src/long.txt @@ -0,0 +1,54 @@ +define void @mcl_fp_add$(bit)L(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { +	%x = load i$(bit)* %px +	%y = load i$(bit)* %py +	%p = load i$(bit)* %pp +	%x1 = zext i$(bit) %x to i$(bit+unit) +	%y1 = zext i$(bit) %y to i$(bit+unit) +	%p1 = zext i$(bit) %p to i$(bit+unit) +	%t0 = add i$(bit+unit) %x1, %y1 ; x + y +	%t1 = trunc i$(bit+unit) %t0 to i$(bit) +	store i$(bit) %t1, i$(bit)* %pz +	%vc = sub i$(bit+unit) %t0, %p1 +	%c = lshr i$(bit+unit) %vc, $(bit+unit-1) +	%c1 = trunc i$(bit+unit) %c to i1 +	br i1 %c1, label %carry, label %nocarry +nocarry: +	%v = trunc i$(bit+unit) %vc to i$(bit) +	store i$(bit) %v, i$(bit)* %pz +	ret void +carry: +	ret void +} + +define internal { i$(bit), i$(unit) } @local_sbb$(bit)(i$(bit) %x, i$(bit) %y) { +	%x1 = zext i$(bit) %x to i$(bit+unit) +	%y1 = zext i$(bit) %y to i$(bit+unit) +	%v1 = sub i$(bit+unit) %x1, %y1 +	%v = trunc i$(bit+unit) %v1 to i$(bit) +	%c = lshr i$(bit+unit) %v1, $(bit) +	%c1 = trunc i$(bit+unit) %c to i$(unit) +	%r1 = insertvalue { i$(bit), i$(unit) } undef, i$(bit) %v, 0 +	%r2 = insertvalue { i$(bit), i$(unit) } %r1, i$(unit) %c1, 1 +	ret { i$(bit), i$(unit) } %r2 +} + +define void @mcl_fp_sub$(bit)L(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { +	%x = load i$(bit)* %px +	%y = load i$(bit)* %py +	%x1 = zext i$(bit) %x to i$(bit+unit) +	%y1 = zext i$(bit) %y to i$(bit+unit) +	%vc = sub i$(bit+unit) %x1, %y1 +	%v = trunc i$(bit+unit) %vc to i$(bit) +	%c = lshr i$(bit+unit) %vc, $(bit+unit-1) +	%c1 = trunc i$(bit+unit) %c to i1 +	store i$(bit) %v, i$(bit)* %pz +	br i1 %c1, label %carry, label %nocarry +nocarry: +	ret void +carry: +	%p = load i$(bit)* %pp +	%t = add i$(bit) %v, %p ; x - y + p +	store i$(bit) %t, i$(bit)* %pz +	ret void +} + diff --git a/src/mul.txt b/src/mul.txt new file mode 100644 index 0000000..4621c7a --- /dev/null +++ b/src/mul.txt @@ -0,0 +1,81 @@ +@define bu = bit + unit +define private i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y) +@if N > 4 +noinline +@endif +{ +@for i, 0, N +  %x$(i) = call i$(unit) @extract$(bit)(i$(bit) %x, i$(bit) $(unit*i)) +  %x$(i)y = call i$(unit*2) @mul$(unit)x$(unit)(i$(unit) %x$(i), i$(unit) %y) +  %x$(i)y0 = zext i$(unit*2) %x$(i)y to i$(bu) +@endfor +@for i, 1, N +  %x$(i)y1 = shl i$(bu) %x$(i)y0, $(unit*i) +@endfor +  %t0 = add i$(bu) %x0y0, %x1y1 +@for i, 1, N-1 +  %t$(i) = add i$(bu) %t$(i-1), %x$(i+1)y1 +@endfor +  ret i$(bu) %t$(N-2) +} +define void @mcl_fp_mul$(bit)pre(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) { +  %x = load i$(bit)* %px +  %y = load i$(bit)* %py +@for i, 0, N +  %y$(i) = call i$(unit) @extract$(bit)(i$(bit) %y, i$(bit) $(unit*i)) +@endfor +  %sum0 = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y0) +  %t0 = trunc i$(bu) %sum0 to i$(unit) +  store i$(unit) %t0, i$(unit)* %pz +@for i, 1, N + +  %s$(i-1) = lshr i$(bu) %sum$(i-1), $(unit) +  %xy$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y$(i)) +  %sum$(i) = add i$(bu) %s$(i-1), %xy$(i) +  %z$(i) = getelementptr i$(unit)* %pz, i32 $(i) +  @if i < N - 1 +  %ts$(i) = trunc i$(bu) %sum$(i) to i$(unit) +  store i$(unit) %ts$(i), i$(unit)* %z$(i) +  @endif +@endfor +  %p = bitcast i$(unit)* %z$(N-1) to i$(bu)* +  store i$(bu) %sum$(N-1), i$(bu)* %p +  ret void +} + +@define bu = bit + unit +@define bu2 = bit + unit * 2 +define void @mcl_fp_mont$(bit)(i$(bit)* %pz, i$(bit)* %px, i$(unit)* %py, i$(bit)* %pp, i$(unit) %r) { +	%p = load i$(bit)* %pp +	%x = load i$(bit)* %px + +@for i, 0, N +	%py$(i) = getelementptr i$(unit)* %py, i$(unit) $(i) +	%y$(i) = load i$(unit)* %py$(i) +	%xy$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y$(i)) +@if i == 0 +	%a0 = zext i$(bu) %xy0 to i$(bu2) + +	%at$(i) = trunc i$(bu) %xy$(i) to i$(unit) +@else +	%xye$(i) = zext i$(bu) %xy$(i) to i$(bu2) +	%a$(i) = add i$(bu2) %s$(i-1), %xye$(i) +	%at$(i) = trunc i$(bu2) %a$(i) to i$(unit) +@endif +	%q$(i) = mul i$(unit) %at$(i), %r +	%pq$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %p, i$(unit) %q$(i)) +	%pqe$(i) = zext i$(bu) %pq$(i) to i$(bu2) +	%t$(i) = add i$(bu2) %a$(i), %pqe$(i) +	%s$(i) = lshr i$(bu2) %t$(i), $(unit) +@endfor +	%v = trunc i$(bu2) %s$(N-1) to i$(bu) +	%pe = zext i$(bit) %p to i$(bu) +	%vc = sub i$(bu) %v, %pe +	%c = lshr i$(bu) %vc, $(bit) +	%c1 = trunc i$(bu) %c to i1 +	%z = select i1 %c1, i$(bu) %v, i$(bu) %vc +	%zt = trunc i$(bu) %z to i$(bit) +	store i$(bit) %zt, i$(bit)* %pz +	ret void +} + diff --git a/src/once.txt b/src/once.txt new file mode 100644 index 0000000..501fb2b --- /dev/null +++ b/src/once.txt @@ -0,0 +1,74 @@ + +define i$(unit*2) @mul$(unit)x$(unit)(i$(unit) %x, i$(unit) %y) { +	%x0 = zext i$(unit) %x to i$(unit*2) +	%y0 = zext i$(unit) %y to i$(unit*2) +	%z = mul i$(unit*2) %x0, %y0 +	ret i$(unit*2) %z +} + +; NIST_P192 +; 0xfffffffffffffffffffffffffffffffeffffffffffffffff +; +;       0                1                2 +; ffffffffffffffff fffffffffffffffe ffffffffffffffff +; +; p = (1 << 192) - (1 << 64) - 1 +; (1 << 192) % p = (1 << 64) + 1 +; +; L : 192bit +; Hi: 64bit +; x = [H:L] = [H2:H1:H0:L] +; mod p +;    x = L + H + (H << 64) +;      = L + H + [H1:H0:0] + H2 + (H2 << 64) +;[e:t] = L + H + [H1:H0:H2] + [H2:0] ; 2bit(e) over +;      = t + e + (e << 64) + +define internal i64 @extract192to64(i192 %x, i192 %shift) { +	%t0 = lshr i192 %x, %shift +	%t1 = trunc i192 %t0 to i64 +	ret i64 %t1 +} + +define internal void @modNIST_P192(i192* %out, i192* %px) { +	%L192 = load i192* %px +	%L = zext i192 %L192 to i256 + +	%pH = getelementptr i192* %px, i32 1 +	%H192 = load i192* %pH +	%H = zext i192 %H192 to i256 + +	%H10_ = shl i192 %H192, 64 +	%H10 = zext i192 %H10_ to i256 + +	%H2_ = call i64 @extract192to64(i192 %H192, i192 128) +	%H2 = zext i64 %H2_ to i256 +	%H102 = or i256 %H10, %H2 + +	%H2s = shl i256 %H2, 64 + +	%t0 = add i256 %L, %H +	%t1 = add i256 %t0, %H102 +	%t2 = add i256 %t1, %H2s + +	%e = lshr i256 %t2, 192 +	%t3 = trunc i256 %t2 to i192 +	%e1 = trunc i256 %e to i192 + + +	%t4 = add i192 %t3, %e1 +	%e2 = shl i192 %e1, 64 +	%t5 = add i192 %t4, %e2 + +	store i192 %t5, i192* %out + +	ret void +} + +define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) { +	%buf = alloca i192, i32 2 +	%p = bitcast i192* %buf to i$(unit)* +	call void @mcl_fp_mul192pre(i$(unit)* %p, i192* %px, i192* %py) +	call void @modNIST_P192(i192* %pz, i192* %buf) +	ret void +} diff --git a/src/short.txt b/src/short.txt new file mode 100644 index 0000000..931a63f --- /dev/null +++ b/src/short.txt @@ -0,0 +1,46 @@ +define void @mcl_fp_add$(bit)S(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { +entry: +	%x = load i$(bit)* %px +	%y = load i$(bit)* %py +	%p = load i$(bit)* %pp +	%x1 = zext i$(bit) %x to i$(bit+unit) +	%y1 = zext i$(bit) %y to i$(bit+unit) +	%p1 = zext i$(bit) %p to i$(bit+unit) +	%t0 = add i$(bit+unit) %x1, %y1 ; x + y +	%t1 = sub i$(bit+unit) %t0, %p1 ; x + y - p +	%t2 = lshr i$(bit+unit) %t1, $(bit) +	%t3 = trunc i$(bit+unit) %t2 to i1 +	%t4 = select i1 %t3, i$(bit+unit) %t0, i$(bit+unit) %t1 +	%t5 = trunc i$(bit+unit) %t4 to i$(bit) +	store i$(bit) %t5, i$(bit)* %pz +	ret void +} + +define internal { i$(bit), i$(unit) } @mcl_local_sbb$(bit)(i$(bit) %x, i$(bit) %y) { +	%x1 = zext i$(bit) %x to i$(bit+unit) +	%y1 = zext i$(bit) %y to i$(bit+unit) +	%v1 = sub i$(bit+unit) %x1, %y1 +	%v = trunc i$(bit+unit) %v1 to i$(bit) +	%c = lshr i$(bit+unit) %v1, $(bit) +	%c1 = trunc i$(bit+unit) %c to i$(unit) +	%r1 = insertvalue { i$(bit), i$(unit) } undef, i$(bit) %v, 0 +	%r2 = insertvalue { i$(bit), i$(unit) } %r1, i$(unit) %c1, 1 +	ret { i$(bit), i$(unit) } %r2 +} + +define void @mcl_fp_sub$(bit)S(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { +	%x = load i$(bit)* %px +	%y = load i$(bit)* %py +	%x1 = zext i$(bit) %x to i$(bit+unit) +	%y1 = zext i$(bit) %y to i$(bit+unit) +	%vc = sub i$(bit+unit) %x1, %y1 +	%v = trunc i$(bit+unit) %vc to i$(bit) +	%c = lshr i$(bit+unit) %vc, $(bit+unit-1) +	%c1 = trunc i$(bit+unit) %c to i1 +	%p = load i$(bit)* %pp +	%a = select i1 %c1, i$(bit) %p, i$(bit) 0 +	%v1 = add i$(bit) %v, %a +	store i$(bit) %v1, i$(bit)* %pz +	ret void +} + diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..b1e01dd --- /dev/null +++ b/test/Makefile @@ -0,0 +1,42 @@ +include ../common.mk + +ifeq ($(USE_MONT_FP),1) +  CFLAGS += -DUSE_MONT_FP +endif + +ifeq ($(USE_LLVM),1) +  CFLAGS += -DMIE_USE_LLVM +  ASM_SRC=../src/$(CPU).s +  ASM_OBJ=$(ASM_SRC:.s=.o) +  SRC+=$(ASM_SRC) +  LDFLAGS+=$(ASM_OBJ) +endif + +TARGET=$(TEST_FILE) +LIBS= + +SRC=fp_test.cpp ec_test.cpp fp_util_test.cpp +ifeq ($(CPU),x64) +  SRC+=fp_generator_test.cpp mont_fp_test.cpp +endif + +all: $(TARGET) + +test: $(TARGET) $(ASM_OBJ) +	@$(UNIT_TEST) + +$(OBJDIR): +	@$(MKDIR) $(OBJDIR) + +clean: +	$(CLEAN) + +$(LIBS): +	$(MAKE) -C ../src + +-include $(DEPEND_FILE) + +ifeq ($(USE_LLVM),1) +$(ASM_OBJ): $(ASM_SRC) +	$(CXX) $< -o $@ -c +endif diff --git a/test/base_test.cpp b/test/base_test.cpp new file mode 100644 index 0000000..29a177f --- /dev/null +++ b/test/base_test.cpp @@ -0,0 +1,392 @@ +#include <map> +#define MCL_USE_LLVM +#include <mcl/fp_base.hpp> +#include <cybozu/test.hpp> +#include <cybozu/benchmark.hpp> +#include <cybozu/xorshift.hpp> +#include <cybozu/bit_operation.hpp> +#include <mcl/fp_util.hpp> +#include <mcl/fp.hpp> + +#include <mcl/fp_generator.hpp> +#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64) +	#define USE_XBYAK +	static mcl::FpGenerator fg; +#endif +#define PUT(x) std::cout << #x "=" << (x) << std::endl + +const size_t MAX_N = 32; +typedef mcl::fp::Unit Unit; + +size_t getUnitN(size_t bitLen) +{ +	return (bitLen + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); +} + +void setMpz(mpz_class& mx, const Unit *x, size_t n) +{ +	mcl::Gmp::setRaw(mx, x, n); +} +void getMpz(Unit *x, size_t n, const mpz_class& mx) +{ +	mcl::fp::local::toArray(x,  n, mx.get_mpz_t()); +} + +struct Montgomery { +	mpz_class p_; +	mpz_class R_; // (1 << (n_ * 64)) % p +	mpz_class RR_; // (R * R) % p +	Unit r_; // p * r = -1 mod M = 1 << 64 +	size_t n_; +	Montgomery() {} +	explicit Montgomery(const mpz_class& p) +	{ +		p_ = p; +		r_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0)); +		n_ = mcl::Gmp::getBlockSize(p); +		R_ = 1; +		R_ = (R_ << (n_ * 64)) % p_; +		RR_ = (R_ * R_) % p_; +	} + +	void toMont(mpz_class& x) const { mul(x, x, RR_); } +	void fromMont(mpz_class& x) const { mul(x, x, 1); } + +	void mont(Unit *z, const Unit *x, const Unit *y) const +	{ +		mpz_class mx, my; +		setMpz(mx, x, n_); +		setMpz(my, y, n_); +		mul(mx, mx, my); +		getMpz(z, n_, mx); +	} +	void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) const +	{ +#if 1 +		const size_t ySize = mcl::Gmp::getBlockSize(y); +		mpz_class c = y == 0 ? mpz_class(0) : x * mcl::Gmp::getBlock(y, 0); +		Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_; +		c += p_ * q; +		c >>= sizeof(Unit) * 8; +		for (size_t i = 1; i < n_; i++) { +			if (i < ySize) { +				c += x * mcl::Gmp::getBlock(y, i); +			} +			Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_; +			c += p_ * q; +			c >>= sizeof(Unit) * 8; +		} +		if (c >= p_) { +			c -= p_; +		} +		z = c; +#else +		z = x * y; +		const size_t zSize = mcl::Gmp::getBlockSize(z); +		for (size_t i = 0; i < n_; i++) { +			if (i < zSize) { +				Unit q = mcl::Gmp::getBlock(z, 0) * r_; +				z += p_ * (mp_limb_t)q; +			} +			z >>= sizeof(Unit) * 8; +		} +		if (z >= p_) { +			z -= p_; +		} +#endif +	} +}; + +void put(const char *msg, const Unit *x, size_t n) +{ +	printf("%s ", msg); +	for (size_t i = 0; i < n; i++) printf("%016llx ", (long long)x[n - 1 - i]); +	printf("\n"); +} +void verifyEqual(const Unit *x, const Unit *y, size_t n, const char *file, int line) +{ +	bool ok = mcl::fp::local::isEqualArray(x, y, n); +	CYBOZU_TEST_ASSERT(ok); +	if (ok) return; +	printf("%s:%d\n", file, line); +	put("L", x, n); +	put("R", y, n); +	exit(1); +} +#define VERIFY_EQUAL(x, y, n) verifyEqual(x, y, n, __FILE__, __LINE__) + +void addC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) +{ +	mpz_class mx, my, mp; +	setMpz(mx, x, n); +	setMpz(my, y, n); +	setMpz(mp, p, n); +	mx += my; +	if (mx >= mp) mx -= mp; +	getMpz(z, n, mx); +} +void subC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) +{ +	mpz_class mx, my, mp; +	setMpz(mx, x, n); +	setMpz(my, y, n); +	setMpz(mp, p, n); +	mx -= my; +	if (mx < 0) mx += mp; +	getMpz(z, n, mx); +} +static inline void set_zero(mpz_t& z, Unit *p, size_t n) +{ +	z->_mp_alloc = (int)n; +	z->_mp_size = 0; +	z->_mp_d = (mp_limb_t*)p; +} +static inline void set_mpz_t(mpz_t& z, const Unit* p, int n) +{ +	z->_mp_alloc = n; +	int i = n; +	while (i > 0 && p[i - 1] == 0) { +		i--; +	} +	z->_mp_size = i; +	z->_mp_d = (mp_limb_t*)p; +} + +// z[2n] <- x[n] * y[n] +void mulPreC(Unit *z, const Unit *x, const Unit *y, size_t n) +{ +#if 1 +	mpz_t mx, my, mz; +	set_zero(mz, z, n * 2); +	set_mpz_t(mx, x, n); +	set_mpz_t(my, y, n); +	mpz_mul(mz, mx, my); +	mcl::fp::local::toArray(z, n * 2, mz); +#else +	mpz_class mx, my; +	setMpz(mx, x, n); +	setMpz(my, y, n); +	mx *= my; +	getMpz(z, n * 2, mx); +#endif +} + +void modC(Unit *y, const Unit *x, const Unit *p, size_t n) +{ +	mpz_t mx, my, mp; +	set_mpz_t(mx, x, n * 2); +	set_mpz_t(my, y, n); +	set_mpz_t(mp, p, n); +	mpz_mod(my, mx, mp); +	mcl::fp::local::clearArray(y, my->_mp_size, n); +} + +void mul(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) +{ +	Unit ret[MAX_N * 2]; +	mpz_t mx, my, mz, mp; +	set_zero(mz, ret, MAX_N * 2); +	set_mpz_t(mx, x, n); +	set_mpz_t(my, y, n); +	set_mpz_t(mp, p, n); +	mpz_mul(mz, mx, my); +	mpz_mod(mz, mz, mp); +	mcl::fp::local::toArray(z, n, mz); +} + +typedef mcl::fp::void3op void3op; +typedef mcl::fp::void4op void4op; +typedef mcl::fp::void4Iop void4Iop; + +const struct FuncOp { +	size_t bitLen; +	void4op addS; +	void4op addL; +	void4op subS; +	void4op subL; +	void3op mulPre; +	void4Iop mont; +} gFuncOpTbl[] = { +	{ 128, mcl_fp_add128S, mcl_fp_add128L, mcl_fp_sub128S, mcl_fp_sub128L, mcl_fp_mul128pre, mcl_fp_mont128 }, +	{ 192, mcl_fp_add192S, mcl_fp_add192L, mcl_fp_sub192S, mcl_fp_sub192L, mcl_fp_mul192pre, mcl_fp_mont192 }, +	{ 256, mcl_fp_add256S, mcl_fp_add256L, mcl_fp_sub256S, mcl_fp_sub256L, mcl_fp_mul256pre, mcl_fp_mont256 }, +	{ 320, mcl_fp_add320S, mcl_fp_add320L, mcl_fp_sub320S, mcl_fp_sub320L, mcl_fp_mul320pre, mcl_fp_mont320 }, +	{ 384, mcl_fp_add384S, mcl_fp_add384L, mcl_fp_sub384S, mcl_fp_sub384L, mcl_fp_mul384pre, mcl_fp_mont384 }, +	{ 448, mcl_fp_add448S, mcl_fp_add448L, mcl_fp_sub448S, mcl_fp_sub448L, mcl_fp_mul448pre, mcl_fp_mont448 }, +	{ 512, mcl_fp_add512S, mcl_fp_add512L, mcl_fp_sub512S, mcl_fp_sub512L, mcl_fp_mul512pre, mcl_fp_mont512 }, +#if CYBOZU_OS_BIT == 32 +	{ 160, mcl_fp_add160S, mcl_fp_add160L, mcl_fp_sub160S, mcl_fp_sub160L, mcl_fp_mul160pre, mcl_fp_mont160 }, +	{ 224, mcl_fp_add224S, mcl_fp_add224L, mcl_fp_sub224S, mcl_fp_sub224L, mcl_fp_mul224pre, mcl_fp_mont224 }, +	{ 288, mcl_fp_add288S, mcl_fp_add288L, mcl_fp_sub288S, mcl_fp_sub288L, mcl_fp_mul288pre, mcl_fp_mont288 }, +	{ 352, mcl_fp_add352S, mcl_fp_add352L, mcl_fp_sub352S, mcl_fp_sub352L, mcl_fp_mul352pre, mcl_fp_mont352 }, +	{ 416, mcl_fp_add416S, mcl_fp_add416L, mcl_fp_sub416S, mcl_fp_sub416L, mcl_fp_mul416pre, mcl_fp_mont416 }, +	{ 480, mcl_fp_add480S, mcl_fp_add480L, mcl_fp_sub480S, mcl_fp_sub480L, mcl_fp_mul480pre, mcl_fp_mont480 }, +	{ 544, mcl_fp_add544S, mcl_fp_add544L, mcl_fp_sub544S, mcl_fp_sub544L, mcl_fp_mul544pre, mcl_fp_mont544 }, +#else +	{ 576, mcl_fp_add576S, mcl_fp_add576L, mcl_fp_sub576S, mcl_fp_sub576L, mcl_fp_mul576pre, mcl_fp_mont576 }, +#endif +}; + +FuncOp getFuncOp(size_t bitLen) +{ +	typedef std::map<size_t, FuncOp> Map; +	static Map map; +	static bool init = false; +	if (!init) { +		init = true; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(gFuncOpTbl); i++) { +			map[gFuncOpTbl[i].bitLen] = gFuncOpTbl[i]; +		} +	} +	for (Map::const_iterator i = map.begin(), ie = map.end(); i != ie; ++i) { +		if (bitLen <= i->second.bitLen) { +			return i->second; +		} +	} +	printf("ERR bitLen=%d\n", (int)bitLen); +	exit(1); +} + +void test(const Unit *p, size_t bitLen) +{ +	printf("bitLen %d\n", (int)bitLen); +	const size_t n = getUnitN(bitLen); +#ifdef NDEBUG +	bool doBench = true; +#else +	bool doBench = false; +#endif +	const FuncOp funcOp = getFuncOp(bitLen); +	const void4op addS = funcOp.addS; +	const void4op addL = funcOp.addL; +	const void4op subS = funcOp.subS; +	const void4op subL = funcOp.subL; +	const void3op mulPre = funcOp.mulPre; +	const void4Iop mont = funcOp.mont; + +	mcl::fp::Unit x[MAX_N], y[MAX_N]; +	mcl::fp::Unit z[MAX_N], w[MAX_N]; +	mcl::fp::Unit z2[MAX_N * 2]; +	mcl::fp::Unit w2[MAX_N * 2]; +	cybozu::XorShift rg; +	mcl::fp::getRandVal(x, rg, p, bitLen); +	mcl::fp::getRandVal(y, rg, p, bitLen); +	const size_t C = 10; + +	addC(z, x, y, p, n); +	addS(w, x, y, p); +	VERIFY_EQUAL(z, w, n); +	for (size_t i = 0; i < C; i++) { +		addC(z, y, z, p, n); +		addS(w, y, w, p); +		VERIFY_EQUAL(z, w, n); +		addC(z, y, z, p, n); +		addL(w, y, w, p); +		VERIFY_EQUAL(z, w, n); +		subC(z, x, z, p, n); +		subS(w, x, w, p); +		VERIFY_EQUAL(z, w, n); +		subC(z, x, z, p, n); +		subL(w, x, w, p); +		VERIFY_EQUAL(z, w, n); +		mulPreC(z2, x, z, n); +		mulPre(w2, x, z); +		VERIFY_EQUAL(z2, w2, n * 2); +	} +	{ +		mpz_class mp; +		setMpz(mp, p, n); +		Montgomery m(mp); +#ifdef USE_XBYAK +		if (bitLen > 128) fg.init(p, n); +#endif +		/* +			real mont +			   0    0 +			   1    R^-1 +			   R    1 +			  -1    -R^-1 +			  -R    -1 +		*/ +		mpz_class t = 1; +		const mpz_class R = (t << (n * 64)) % mp; +		const mpz_class tbl[] = { +			0, 1, R, mp - 1, mp - R +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			const mpz_class& mx = tbl[i]; +			for (size_t j = i; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) { +				const mpz_class& my = tbl[j]; +				getMpz(x, n, mx); +				getMpz(y, n, my); +				m.mont(z, x, y); +				mont(w, x, y, p, m.r_); +				VERIFY_EQUAL(z, w, n); +#ifdef USE_XBYAK +				if (bitLen > 128) { +					fg.mul_(w, x, y); +					VERIFY_EQUAL(z, w, n); +				} +#endif +			} +		} +		if (doBench) { +//			CYBOZU_BENCH("montC", m.mont, x, y, x); +			CYBOZU_BENCH("montA  ", mont, x, y, x, p, m.r_); +		} +	} +	if (doBench) { +//		CYBOZU_BENCH("addS", addS, x, y, x, p); // slow +//		CYBOZU_BENCH("subS", subS, x, y, x, p); +//		CYBOZU_BENCH("addL", addL, x, y, x, p); +//		CYBOZU_BENCH("subL", subL, x, y, x, p); +		CYBOZU_BENCH("mulPreA", mulPre, w2, y, x); +		CYBOZU_BENCH("mulPreC", mulPreC, w2, y, x, n); +		CYBOZU_BENCH("modC   ", modC, x, w2, p, n); +	} +#ifdef USE_XBYAK +	if (bitLen <= 128) return; +	if (doBench) { +		fg.init(p, n); +		CYBOZU_BENCH("addA   ", fg.add_, x, y, x); +		CYBOZU_BENCH("subA   ", fg.sub_, x, y, x); +//		CYBOZU_BENCH("mulA", fg.mul_, x, y, x); +	} +#endif +	printf("mont test %d\n", (int)bitLen); +} + +CYBOZU_TEST_AUTO(all) +{ +	const struct { +		size_t n; +		const uint64_t p[9]; +	} tbl[] = { +//		{ 2, { 0xf000000000000001, 1, } }, +		{ 2, { 0x000000000000001d, 0x8000000000000000, } }, +		{ 3, { 0x000000000000012b, 0x0000000000000000, 0x0000000080000000, } }, +//		{ 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x07ffffffffffffff, } }, +//		{ 3, { 0x7900342423332197, 0x1234567890123456, 0x1480948109481904, } }, +		{ 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0xffffffffffffffff, } }, +//		{ 4, { 0x7900342423332197, 0x4242342420123456, 0x1234567892342342, 0x1480948109481904, } }, +//		{ 4, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x17ffffffffffffff, 0x1513423423423415, } }, +		{ 4, { 0xa700000000000013, 0x6121000000000013, 0xba344d8000000008, 0x2523648240000001, } }, +//		{ 5, { 0x0000000000000009, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } }, +		{ 5, { 0xfffffffffffffc97, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, +//		{ 6, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x4820984290482212, 0x9482094820948209, 0x0194810841094810, } }, +//		{ 6, { 0x7204224233321972, 0x0342308472047204, 0x4567890123456790, 0x0948204204243123, 0x2098420984209482, 0x2093482094810948, } }, +		{ 6, { 0x00000000ffffffff, 0xffffffff00000000, 0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, +//		{ 7, { 0x0000000000000063, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } }, +		{ 7, { 0x000000000fffcff1, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, +		{ 8, { 0xffffffffffffd0c9, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, +		{ 9, { 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x00000000000001ff, } }, +//		{ 9, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x2498540975555312, 0x9482904924029424, 0x0948209842098402, 0x1098410948109482, 0x0820958209582094, 0x0000000000000029, } }, +//		{ 9, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x7fffffffffffffff, 0x8572938572398583, 0x5732057823857293, 0x9820948205872380, 0x3409238420492034, 0x9483842098340298, 0x0000000000000003, } }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		const size_t n = tbl[i].n; +		const size_t bitLen = (n - 1) * 64 + cybozu::bsr<uint64_t>(tbl[i].p[n - 1]) + 1; +		test((const Unit*)tbl[i].p, bitLen); +	} +} + diff --git a/test/ec_test.cpp b/test/ec_test.cpp new file mode 100644 index 0000000..1255a1d --- /dev/null +++ b/test/ec_test.cpp @@ -0,0 +1,397 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#define CYBOZU_TEST_DISABLE_AUTO_RUN +#include <cybozu/test.hpp> +#include <cybozu/benchmark.hpp> +#include <mcl/gmp_util.hpp> + +#include <mcl/fp.hpp> +typedef mcl::FpT<> Fp_3; +typedef mcl::FpT<> Fp_4; +typedef mcl::FpT<> Fp_6; +typedef mcl::FpT<> Fp_9; +#include <mcl/ec.hpp> +#include <mcl/ecparam.hpp> +#include <time.h> + +struct tagZn; +typedef mcl::FpT<tagZn> Zn; + +template<class Fp> +struct Test { +	typedef mcl::EcT<Fp> Ec; +	const mcl::EcParam& para; +	Test(const mcl::EcParam& para) +		: para(para) +	{ +		Fp::setModulo(para.p); +		Zn::setModulo(para.n); +		Ec::setParam(para.a, para.b); +//		CYBOZU_TEST_EQUAL(para.bitLen, Fp(-1).getBitLen()); +	} +	void cstr() const +	{ +		Ec O; +		CYBOZU_TEST_ASSERT(O.isZero()); +		Ec P; +		Ec::neg(P, O); +		CYBOZU_TEST_EQUAL(P, O); +	} +	void ope() const +	{ +		Fp x(para.gx); +		Fp y(para.gy); +		Zn n = 0; +		CYBOZU_TEST_ASSERT(Ec::isValid(x, y)); +		Ec P(x, y), Q, R, O; +		{ +			Ec::neg(Q, P); +			CYBOZU_TEST_EQUAL(Q.x, P.x); +			CYBOZU_TEST_EQUAL(Q.y, -P.y); + +			R = P + Q; +			CYBOZU_TEST_ASSERT(R.isZero()); + +			R = P + O; +			CYBOZU_TEST_EQUAL(R, P); +			R = O + P; +			CYBOZU_TEST_EQUAL(R, P); +		} + +		{ +			Ec::dbl(R, P); +			Ec R2 = P + P; +			CYBOZU_TEST_EQUAL(R, R2); +			{ +				Ec P2 = P; +				Ec::dbl(P2, P2); +				CYBOZU_TEST_EQUAL(P2, R2); +			} +			Ec R3L = R2 + P; +			Ec R3R = P + R2; +			CYBOZU_TEST_EQUAL(R3L, R3R); +			{ +				Ec RR = R2; +				RR = RR + P; +				CYBOZU_TEST_EQUAL(RR, R3L); +				RR = R2; +				RR = P + RR; +				CYBOZU_TEST_EQUAL(RR, R3L); +				RR = P; +				RR = RR + RR; +				CYBOZU_TEST_EQUAL(RR, R2); +			} +			Ec::power(R, P, 2); +			CYBOZU_TEST_EQUAL(R, R2); +			Ec R4L = R3L + R2; +			Ec R4R = R2 + R3L; +			CYBOZU_TEST_EQUAL(R4L, R4R); +			Ec::power(R, P, 5); +			CYBOZU_TEST_EQUAL(R, R4L); +		} +		{ +			R = P; +			for (int i = 0; i < 10; i++) { +				R += P; +			} +			Ec R2; +			Ec::power(R2, P, 11); +			CYBOZU_TEST_EQUAL(R, R2); +		} +		Ec::power(R, P, n - 1); +		CYBOZU_TEST_EQUAL(R, -P); +		R += P; // Ec::power(R, P, n); +		CYBOZU_TEST_ASSERT(R.isZero()); +	} + +	void power() const +	{ +		Fp x(para.gx); +		Fp y(para.gy); +		Ec P(x, y); +		Ec Q; +		Ec R; +		for (int i = 0; i < 100; i++) { +			Ec::power(Q, P, i); +			CYBOZU_TEST_EQUAL(Q, R); +			R += P; +		} +	} + +	void neg_power() const +	{ +		Fp x(para.gx); +		Fp y(para.gy); +		Ec P(x, y); +		Ec Q; +		Ec R; +		for (int i = 0; i < 100; i++) { +			Ec::power(Q, P, -i); +			CYBOZU_TEST_EQUAL(Q, R); +			R -= P; +		} +	} +	void squareRoot() const +	{ +		Fp x(para.gx); +		Fp y(para.gy); +		bool odd = Fp::isYodd(y); +		Fp yy; +		Ec::getYfromX(yy, x, odd); +		CYBOZU_TEST_EQUAL(yy, y); +		Fp::neg(y, y); +		odd = Fp::isYodd(y); +		yy.clear(); +		Ec::getYfromX(yy, x, odd); +		CYBOZU_TEST_EQUAL(yy, y); +	} +	void power_fp() const +	{ +		Fp x(para.gx); +		Fp y(para.gy); +		Ec P(x, y); +		Ec Q; +		Ec R; +		for (int i = 0; i < 100; i++) { +			Ec::power(Q, P, Zn(i)); +			CYBOZU_TEST_EQUAL(Q, R); +			R += P; +		} +	} +	void binaryExpression() const +	{ +		puts("test binaryExpression"); +		const Fp x(para.gx); +		const Fp y(para.gy); +		Ec P(x, y); +		Ec Q; +		// not compressed +		Ec::setCompressedExpression(false); +		{ +			cybozu::BitVector bv; +			P.appendToBitVec(bv); +			Q.fromBitVec(bv); +			CYBOZU_TEST_EQUAL(P, Q); +		} +		{ +			P = -P; +			cybozu::BitVector bv; +			P.appendToBitVec(bv); +			Q.fromBitVec(bv); +			CYBOZU_TEST_EQUAL(P, Q); +		} +		P.clear(); +		{ +			cybozu::BitVector bv; +			P.appendToBitVec(bv); +			Q.fromBitVec(bv); +			CYBOZU_TEST_EQUAL(P, Q); +		} +		// compressed +		Ec::setCompressedExpression(true); +		P.set(x, y); +		{ +			cybozu::BitVector bv; +			P.appendToBitVec(bv); +			Q.fromBitVec(bv); +			CYBOZU_TEST_EQUAL(P, Q); +		} +		{ +			P = -P; +			cybozu::BitVector bv; +			P.appendToBitVec(bv); +			Q.fromBitVec(bv); +			CYBOZU_TEST_EQUAL(P, Q); +		} +		P.clear(); +		{ +			cybozu::BitVector bv; +			P.appendToBitVec(bv); +			Q.fromBitVec(bv); +			CYBOZU_TEST_EQUAL(P, Q); +		} +	} +	void str() const +	{ +		puts("test str"); +		const Fp x(para.gx); +		const Fp y(para.gy); +		Ec P(x, y); +		Ec Q; +		// not compressed +		Ec::setCompressedExpression(false); +		{ +			std::stringstream ss; +			ss << P; +			ss >> Q; +			CYBOZU_TEST_EQUAL(P, Q); +		} +		{ +			P = -P; +			std::stringstream ss; +			ss << P; +			ss >> Q; +			CYBOZU_TEST_EQUAL(P, Q); +		} +		P.clear(); +		{ +			std::stringstream ss; +			ss << P; +			ss >> Q; +			CYBOZU_TEST_EQUAL(P, Q); +		} +		// compressed +		Ec::setCompressedExpression(true); +		P.set(x, y); +		{ +			std::stringstream ss; +			ss << P; +			ss >> Q; +			CYBOZU_TEST_EQUAL(P, Q); +		} +		{ +			P = -P; +			std::stringstream ss; +			ss << P; +			ss >> Q; +			CYBOZU_TEST_EQUAL(P, Q); +		} +		P.clear(); +		{ +			std::stringstream ss; +			ss << P; +			ss >> Q; +			CYBOZU_TEST_EQUAL(P, Q); +		} +	} + +	template<class F> +	void test(F f, const char *msg) const +	{ +		const int N = 300000; +		Fp x(para.gx); +		Fp y(para.gy); +		Ec P(x, y); +		Ec Q = P + P + P; +		clock_t begin = clock(); +		for (int i = 0; i < N; i++) { +			f(Q, P, Q); +		} +		clock_t end = clock(); +		printf("%s %.2fusec\n", msg, (end - begin) / double(CLOCKS_PER_SEC) / N * 1e6); +	} +	/* +		add 8.71usec -> 6.94 +		sub 6.80usec -> 4.84 +		dbl 9.59usec -> 7.75 +		pos 2730usec -> 2153 +	*/ +	void bench() const +	{ +		Fp x(para.gx); +		Fp y(para.gy); +		Ec P(x, y); +		Ec Q = P + P + P; +		CYBOZU_BENCH("add", Ec::add, Q, P, Q); +		CYBOZU_BENCH("sub", Ec::sub, Q, P, Q); +		CYBOZU_BENCH("dbl", Ec::dbl, P, P); +		Zn z("-3"); +		CYBOZU_BENCH("pow", Ec::power, P, P, z); +	} +/* +Affine : sandy-bridge +add 3.17usec +sub 2.43usec +dbl 3.32usec +pow 905.00usec +Jacobi +add 2.34usec +sub 2.65usec +dbl 1.56usec +pow 499.00usec +*/ +	void run() const +	{ +		cstr(); +		ope(); +		power(); +		neg_power(); +		power_fp(); +		binaryExpression(); +		squareRoot(); +		str(); +#ifdef NDEBUG +		bench(); +#endif +	} +private: +	Test(const Test&); +	void operator=(const Test&); +}; + +template<class Fp> +void test_sub(const mcl::EcParam *para, size_t paraNum) +{ +	for (size_t i = 0; i < paraNum; i++) { +		puts(para[i].name); +		Test<Fp>(para[i]).run(); +	} +} + +int g_partial = -1; + +CYBOZU_TEST_AUTO(all) +{ +#ifdef USE_MONT_FP +	puts("use MontFp"); +#else +	puts("use GMP"); +#endif +	if (g_partial & (1 << 3)) { +		const struct mcl::EcParam para3[] = { +	//		mcl::ecparam::p160_1, +			mcl::ecparam::secp160k1, +			mcl::ecparam::secp192k1, +			mcl::ecparam::NIST_P192, +		}; +		test_sub<Fp_3>(para3, CYBOZU_NUM_OF_ARRAY(para3)); +	} + +	if (g_partial & (1 << 4)) { +		const struct mcl::EcParam para4[] = { +			mcl::ecparam::secp224k1, +			mcl::ecparam::secp256k1, +			mcl::ecparam::NIST_P224, +			mcl::ecparam::NIST_P256, +		}; +		test_sub<Fp_4>(para4, CYBOZU_NUM_OF_ARRAY(para4)); +	} + +	if (g_partial & (1 << 6)) { +		const struct mcl::EcParam para6[] = { +	//		mcl::ecparam::secp384r1, +			mcl::ecparam::NIST_P384, +		}; +		test_sub<Fp_6>(para6, CYBOZU_NUM_OF_ARRAY(para6)); +	} + +	if (g_partial & (1 << 9)) { +		const struct mcl::EcParam para9[] = { +	//		mcl::ecparam::secp521r1, +			mcl::ecparam::NIST_P521, +		}; +		test_sub<Fp_9>(para9, CYBOZU_NUM_OF_ARRAY(para9)); +	} +} + +int main(int argc, char *argv[]) +{ +	if (argc == 1) { +		g_partial = -1; +	} else { +		g_partial = 0; +		for (int i = 1; i < argc; i++) { +			g_partial |= 1 << atoi(argv[i]); +		} +	} +	return cybozu::test::autoRun.run(argc, argv); +} diff --git a/test/fp_generator_test.cpp b/test/fp_generator_test.cpp new file mode 100644 index 0000000..9a61ab2 --- /dev/null +++ b/test/fp_generator_test.cpp @@ -0,0 +1,222 @@ +#include <cybozu/test.hpp> +#if CYBOZU_OS_BIT == 32 +// not support +#else +#include <mcl/gmp_util.hpp> +#include <stdint.h> +#include <string> +#include <cybozu/itoa.hpp> +#include <mcl/fp_generator.hpp> +#include <mcl/fp.hpp> +#include <iostream> +#include <cybozu/xorshift.hpp> +#include <cybozu/benchmark.hpp> + +typedef mcl::FpT<> Fp; + +const int MAX_N = 4; + +const char *primeTable[] = { +	"7fffffffffffffffffffffffffffffff", // 127bit(not full) +	"ffffffffffffffffffffffffffffff61", // 128bit(full) +	"fffffffffffffffffffffffffffffffffffffffeffffee37", // 192bit(full) +	"2523648240000001ba344d80000000086121000000000013a700000000000013", // 254bit(not full) +}; + +/* +	p is output buffer +	pStr is hex +	return the size of p +*/ +int convertToArray(uint64_t *p, const mpz_class& x) +{ +	const int pn = int(sizeof(mp_limb_t) * x.get_mpz_t()->_mp_size / sizeof(*p)); +	if (pn > MAX_N) { +		printf("pn(%d) is too large\n", pn); +		exit(1); +	} +	const uint64_t *q = (const uint64_t*)x.get_mpz_t()->_mp_d; +	std::copy(q, q + pn, p); +	std::fill(p + pn, p + MAX_N, 0); +	return pn; +} +int convertToArray(uint64_t *p, const char *pStr) +{ +	mpz_class x; +	x.set_str(pStr, 16); +	return convertToArray(p, x); +} + +struct Int { +	int vn; +	uint64_t v[MAX_N]; +	Int() +		: vn(0) +	{ +	} +	explicit Int(int vn) +	{ +		if (vn > MAX_N) { +			printf("vn(%d) is too large\n", vn); +			exit(1); +		} +		this->vn = vn; +	} +	void set(const char *str) { fromStr(str); } +	void set(const Fp& rhs) +	{ +		convertToArray(v, rhs.toGmp()); +	} +	void set(const uint64_t* x) +	{ +		for (int i = 0; i < vn; i++) v[i] = x[i]; +	} +	void fromStr(const char *str) +	{ +		convertToArray(v, str); +	} +	std::string toStr() const +	{ +		std::string ret; +		for (int i = 0; i < vn; i++) { +			ret += cybozu::itohex(v[vn - 1 - i], false); +		} +		return ret; +	} +	void put(const char *msg = "") const +	{ +		if (msg) printf("%s=", msg); +		printf("%s\n", toStr().c_str()); +	} +	bool operator==(const Int& rhs) const +	{ +		if (vn != rhs.vn) return false; +		for (int i = 0; i < vn; i++) { +			if (v[i] != rhs.v[i]) return false; +		} +		return true; +	} +	bool operator!=(const Int& rhs) const { return !operator==(rhs); } +	bool operator==(const Fp& rhs) const +	{ +		Int t(vn); +		t.set(rhs); +		return operator==(t); +	} +	bool operator!=(const Fp& rhs) const { return !operator==(rhs); } +}; +static inline std::ostream& operator<<(std::ostream& os, const Int& x) +{ +	return os << x.toStr(); +} + +void testAddSub(const mcl::FpGenerator& fg, int pn) +{ +	Fp x, y; +	Int mx(pn), my(pn); +	x.fromStr("0x8811aabb23427cc"); +	y.fromStr("0x8811aabb23427cc11"); +	mx.set(x); +	my.set(y); +	for (int i = 0; i < 30; i++) { +		CYBOZU_TEST_EQUAL(mx, x); +		x += x; +		fg.add_(mx.v, mx.v, mx.v); +	} +	for (int i = 0; i < 30; i++) { +		CYBOZU_TEST_EQUAL(mx, x); +		x += y; +		fg.add_(mx.v, mx.v, my.v); +	} +	for (int i = 0; i < 30; i++) { +		CYBOZU_TEST_EQUAL(my, y); +		y -= x; +		fg.sub_(my.v, my.v, mx.v); +	} +} + +void testNeg(const mcl::FpGenerator& fg, int pn) +{ +	Fp x; +	Int mx(pn), my(pn); +	const char *tbl[] = { +		"0", +		"0x12346", +		"0x11223344556677881122334455667788", +		"0x0abbccddeeffaabb0000000000000000", +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		x.fromStr(tbl[i]); +		mx.set(x); +		x = -x; +		fg.neg_(mx.v, mx.v); +		CYBOZU_TEST_EQUAL(mx, x); +	} +} + +void testMulI(const mcl::FpGenerator& fg, int pn) +{ +	cybozu::XorShift rg; +	for (int i = 0; i < 100; i++) { +		uint64_t x[MAX_N]; +		uint64_t z[MAX_N + 1]; +		rg.read(x, pn); +		uint64_t y = rg.get64(); +		mpz_class mx; +		mcl::Gmp::setRaw(mx, x, pn); +		mpz_class my; +		mcl::Gmp::set(my, y); +		mx *= my; +		uint64_t d = fg.mulI_(z, x, y); +		z[pn] = d; +		mcl::Gmp::setRaw(my, z, pn + 1); +		CYBOZU_TEST_EQUAL(mx, my); +	} +	{ +		uint64_t x[MAX_N]; +		uint64_t z[MAX_N + 1]; +		rg.read(x, pn); +		uint64_t y = rg.get64(); +		CYBOZU_BENCH_C("mulI", 10000000, fg.mulI_, z, x, y); +	} +} + +void testShr1(const mcl::FpGenerator& fg, int pn) +{ +	cybozu::XorShift rg; +	for (int i = 0; i < 100; i++) { +		uint64_t x[MAX_N]; +		uint64_t z[MAX_N]; +		rg.read(x, pn); +		mpz_class mx; +		mcl::Gmp::setRaw(mx, x, pn); +		mx >>= 1; +		fg.shr1_(z, x); +		mpz_class my; +		mcl::Gmp::setRaw(my, z, pn); +		CYBOZU_TEST_EQUAL(mx, my); +	} +} + +void test(const char *pStr) +{ +	Fp::setModulo(pStr, 16); +	uint64_t p[MAX_N]; +	const int pn = convertToArray(p, pStr); +	printf("pn=%d\n", pn); +	mcl::FpGenerator fg; +	fg.init(p, pn); +	testAddSub(fg, pn); +	testNeg(fg, pn); +	testMulI(fg, pn); +	testShr1(fg, pn); +} + +CYBOZU_TEST_AUTO(all) +{ +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(primeTable); i++) { +		printf("test prime i=%d\n", (int)i); +		test(primeTable[i]); +	} +} +#endif diff --git a/test/fp_test.cpp b/test/fp_test.cpp new file mode 100644 index 0000000..aac80a1 --- /dev/null +++ b/test/fp_test.cpp @@ -0,0 +1,465 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#include <cybozu/test.hpp> +#include <mcl/fp.hpp> +#include <cybozu/benchmark.hpp> +#include <time.h> + +#ifdef _MSC_VER +	#pragma warning(disable: 4127) // const condition +#endif + +typedef mcl::FpT<> Fp; + +const int m = 65537; +struct Init { +	Init() +	{ +		std::ostringstream ms; +		ms << m; +		Fp::setModulo(ms.str()); +	} +}; + +CYBOZU_TEST_SETUP_FIXTURE(Init); + +#ifndef MCL_ONLY_BENCH +CYBOZU_TEST_AUTO(cstr) +{ +	const struct { +		const char *str; +		int val; +	} tbl[] = { +		{ "0", 0 }, +		{ "1", 1 }, +		{ "123", 123 }, +		{ "0x123", 0x123 }, +		{ "0b10101", 21 }, +		{ "-123", m - 123 }, +		{ "-0x123", m - 0x123 }, +		{ "-0b10101", m - 21 }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		// string cstr +		Fp x(tbl[i].str); +		CYBOZU_TEST_EQUAL(x, tbl[i].val); + +		// int cstr +		Fp y(tbl[i].val); +		CYBOZU_TEST_EQUAL(y, x); + +		// copy cstr +		Fp z(x); +		CYBOZU_TEST_EQUAL(z, x); + +		// assign int +		Fp w; +		w = tbl[i].val; +		CYBOZU_TEST_EQUAL(w, x); + +		// assign self +		Fp u; +		u = w; +		CYBOZU_TEST_EQUAL(u, x); + +		// conv +		std::ostringstream os; +		os << tbl[i].val; + +		std::string str; +		x.toStr(str); +		CYBOZU_TEST_EQUAL(str, os.str()); +	} +} + +CYBOZU_TEST_AUTO(fromStr) +{ +	const struct { +		const char *in; +		int out; +		int base; +	} tbl[] = { +		{ "100", 100, 0 }, // set base = 10 if base = 0 +		{ "100", 4, 2 }, +		{ "100", 256, 16 }, +		{ "0b100", 4, 0 }, +		{ "0b100", 4, 2 }, +		{ "0x100", 256, 0 }, +		{ "0x100", 256, 16 }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		Fp x; +		x.fromStr(tbl[i].in, tbl[i].base); +		CYBOZU_TEST_EQUAL(x, tbl[i].out); +	} +	// conflict prefix with base +	Fp x; +	CYBOZU_TEST_EXCEPTION(x.fromStr("0b100", 16), cybozu::Exception); +	CYBOZU_TEST_EXCEPTION(x.fromStr("0x100", 2), cybozu::Exception); +} + +CYBOZU_TEST_AUTO(stream) +{ +	const struct { +		const char *in; +		int out10; +		int out16; +	} tbl[] = { +		{ "100", 100, 256 }, // set base = 10 if base = 0 +		{ "0x100", 256, 256 }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		{ +			std::istringstream is(tbl[i].in); +			Fp x; +			is >> x; +			CYBOZU_TEST_EQUAL(x, tbl[i].out10); +		} +		{ +			std::istringstream is(tbl[i].in); +			Fp x; +			is >> std::hex >> x; +			CYBOZU_TEST_EQUAL(x, tbl[i].out16); +		} +	} +	std::istringstream is("0b100"); +	Fp x; +	CYBOZU_TEST_EXCEPTION(is >> std::hex >> x, cybozu::Exception); +} + +CYBOZU_TEST_AUTO(conv) +{ +	const char *bin = "0b1001000110100"; +	const char *hex = "0x1234"; +	const char *dec = "4660"; +	Fp b(bin); +	Fp h(hex); +	Fp d(dec); +	CYBOZU_TEST_EQUAL(b, h); +	CYBOZU_TEST_EQUAL(b, d); + +	std::string str; +	b.toStr(str, 2, true); +	CYBOZU_TEST_EQUAL(str, bin); +	b.toStr(str); +	CYBOZU_TEST_EQUAL(str, dec); +	b.toStr(str, 16, true); +	CYBOZU_TEST_EQUAL(str, hex); +} + +CYBOZU_TEST_AUTO(compare) +{ +	const struct { +		int lhs; +		int rhs; +		int cmp; +	} tbl[] = { +		{ 0, 0, 0 }, +		{ 1, 0, 1 }, +		{ 0, 1, -1 }, +		{ -1, 0, 1 }, // m-1, 0 +		{ 0, -1, -1 }, // 0, m-1 +		{ 123, 456, -1 }, +		{ 456, 123, 1 }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		const Fp x(tbl[i].lhs); +		const Fp y(tbl[i].rhs); +		const int cmp = tbl[i].cmp; +		if (cmp == 0) { +			CYBOZU_TEST_EQUAL(x, y); +		} else { +			CYBOZU_TEST_ASSERT(x != y); +		} +	} +	{ +		Fp x(5); +		CYBOZU_TEST_ASSERT(x == 5); +	} +} + +CYBOZU_TEST_AUTO(modulo) +{ +	std::ostringstream ms; +	ms << m; + +	std::string str; +	Fp::getModulo(str); +	CYBOZU_TEST_EQUAL(str, ms.str()); +} + +CYBOZU_TEST_AUTO(ope) +{ +	const struct { +		int x; +		int y; +		int add; // x + y +		int sub; // x - y +		int mul; // x * y +		int sqr; // x^2 +	} tbl[] = { +		{ 0, 1, 1, m - 1, 0, 0 }, +		{ 9, 5, 14, 4, 45, 81 }, +		{ 10, 13, 23, m - 3, 130, 100 }, +		{ 2000, 1000, 3000, 1000, (2000 * 1000) % m, (2000 * 2000) % m }, +		{ 12345, 9999, 12345 + 9999, 12345 - 9999, (12345 * 9999) % m, (12345 * 12345) % m }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		const Fp x(tbl[i].x); +		const Fp y(tbl[i].y); +		Fp z; +		Fp::add(z, x, y); +		CYBOZU_TEST_EQUAL(z, tbl[i].add); +		Fp::sub(z, x, y); +		CYBOZU_TEST_EQUAL(z, tbl[i].sub); +		Fp::mul(z, x, y); +		CYBOZU_TEST_EQUAL(z, tbl[i].mul); + +		Fp r; +		Fp::inv(r, y); +		Fp::mul(z, z, r); +		CYBOZU_TEST_EQUAL(z, tbl[i].x); +		z = x + y; +		CYBOZU_TEST_EQUAL(z, tbl[i].add); +		z = x - y; +		CYBOZU_TEST_EQUAL(z, tbl[i].sub); +		z = x * y; +		CYBOZU_TEST_EQUAL(z, tbl[i].mul); + +		Fp::square(z, x); +		CYBOZU_TEST_EQUAL(z, tbl[i].sqr); + +		z = x / y; +		z *= y; +		CYBOZU_TEST_EQUAL(z, tbl[i].x); +	} +} + +struct tag2; + +CYBOZU_TEST_AUTO(power) +{ +	Fp x, y, z; +	x = 12345; +	z = 1; +	for (int i = 0; i < 100; i++) { +		Fp::power(y, x, i); +		CYBOZU_TEST_EQUAL(y, z); +		z *= x; +	} +	typedef mcl::FpT<tag2, 128> Fp2; +	Fp2::setModulo("1009"); +	x = 5; +	Fp2 n = 3; +	z = 3; +	Fp::power(x, x, z); +	CYBOZU_TEST_EQUAL(x, 125); +	x = 5; +	Fp::power(x, x, n); +	CYBOZU_TEST_EQUAL(x, 125); +} + +CYBOZU_TEST_AUTO(power_fp) +{ +	Fp x, y, z; +	x = 12345; +	z = 1; +	for (int i = 0; i < 100; i++) { +		Fp::power(y, x, Fp(i)); +		CYBOZU_TEST_EQUAL(y, z); +		z *= x; +	} +} + +struct TagAnother; + +CYBOZU_TEST_AUTO(another) +{ +	typedef mcl::FpT<TagAnother, 128> G; +	G::setModulo("13"); +	G a = 3; +	G b = 9; +	a *= b; +	CYBOZU_TEST_EQUAL(a, 1); +} + + +CYBOZU_TEST_AUTO(setRaw) +{ +	Fp::setModulo("1000000000000000000117"); +	char b1[] = { 0x56, 0x34, 0x12 }; +	Fp x; +	x.setRaw(b1, 3); +	CYBOZU_TEST_EQUAL(x, 0x123456); +	int b2[] = { 0x12, 0x34 }; +	x.setRaw(b2, 2); +	CYBOZU_TEST_EQUAL(x, Fp("0x3400000012")); +	x.fromStr("0xffffffffffff"); + +	Fp::setModulo("0x10000000000001234567a5"); +	const struct { +		uint32_t buf[3]; +		size_t bufN; +		const char *expected; +	} tbl[] = { +		{ { 0x234567a4, 0x00000001, 0x00100000}, 1, "0x234567a4" }, +		{ { 0x234567a4, 0x00000001, 0x00100000}, 2, "0x1234567a4" }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		x.setRaw(tbl[i].buf, tbl[i].bufN); +		CYBOZU_TEST_EQUAL(x, Fp(tbl[i].expected)); +	} +	uint32_t large[3] = { 0x234567a5, 0x00000001, 0x00100000}; +	CYBOZU_TEST_EXCEPTION(x.setRaw(large, 3), cybozu::Exception); +} + + +CYBOZU_TEST_AUTO(set64bit) +{ +	Fp::setModulo("0x1000000000000000000f"); +	const struct { +		const char *p; +		int64_t i; +	} tbl[] = { +		{ "0x1234567812345678", int64_t(0x1234567812345678ull) }, +		{ "0xfffedcba987edcba997", -int64_t(0x1234567812345678ull) }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		Fp x(tbl[i].p); +		Fp y(tbl[i].i); +		CYBOZU_TEST_EQUAL(x, y); +	} +} + +CYBOZU_TEST_AUTO(getRaw) +{ +	const struct { +		const char *s; +		uint32_t v[4]; +		size_t vn; +	} tbl[] = { +		{ "0", { 0, 0, 0, 0 }, 1 }, +		{ "1234", { 1234, 0, 0, 0 }, 1 }, +		{ "0xaabbccdd12345678", { 0x12345678, 0xaabbccdd, 0, 0 }, 2 }, +		{ "0x11112222333344445555666677778888", { 0x77778888, 0x55556666, 0x33334444, 0x11112222 }, 4 }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		mpz_class x(tbl[i].s); +		const size_t bufN = 8; +		uint32_t buf[bufN]; +		size_t n = mcl::Gmp::getRaw(buf, bufN, x); +		CYBOZU_TEST_EQUAL(n, tbl[i].vn); +		CYBOZU_TEST_EQUAL_ARRAY(buf, tbl[i].v, n); +	} +} + +CYBOZU_TEST_AUTO(toStr) +{ +	const char *tbl[] = { +		"0x0", +		"0x5", +		"0x123", +		"0x123456789012345679adbc", +		"0xffffffff26f2fc170f69466a74defd8d", +		"0x100000000000000000000000000000033", +		"0x11ee12312312940000000000000000000000000002342343" +	}; +	Fp::setModulo("0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d"); +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		mpz_class x(tbl[i]); +		Fp y(tbl[i]); +		std::string xs, ys; +		mcl::Gmp::toStr(xs, x, 16); +		y.toStr(ys, 16); +		CYBOZU_TEST_EQUAL(xs, ys); +	} +} + +CYBOZU_TEST_AUTO(binaryRepl) +{ +	const struct { +		const char *s; +		size_t n; +		uint32_t v[6]; +	} tbl[] = { +		{ "0", 0, { 0, 0, 0, 0, 0 } }, +		{ "1234", 1, { 1234, 0, 0, 0, 0 } }, +		{ "0xaabbccdd12345678", 2, { 0x12345678, 0xaabbccdd, 0, 0, 0 } }, +		{ "0x11112222333344445555666677778888", 4, { 0x77778888, 0x55556666, 0x33334444, 0x11112222, 0 } }, +		{ "0x9911112222333344445555666677778888", 5, { 0x77778888, 0x55556666, 0x33334444, 0x11112222, 0x99, 0 } }, +	}; +	Fp::setModulo("0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d"); +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		Fp x(tbl[i].s); +		cybozu::BitVector bv; +		x.appendToBitVec(bv); +		CYBOZU_TEST_EQUAL(bv.size(), Fp::getModBitLen()); +		CYBOZU_TEST_EQUAL(bv.size(), Fp::getBitVecSize()); +		const Fp::BlockType *block = bv.getBlock(); +		if (sizeof(Fp::BlockType) == 4) { +			CYBOZU_TEST_EQUAL_ARRAY(block, tbl[i].v, tbl[i].n); +		} else { +			const size_t n = (tbl[i].n + 1) / 2; +			for (size_t j = 0; j < n; j++) { +				uint64_t v = (uint64_t(tbl[i].v[j * 2 + 1]) << 32) | tbl[i].v[j * 2]; +				CYBOZU_TEST_EQUAL(block[j], v); +			} +		} +		Fp y; +		y.fromBitVec(bv); +		CYBOZU_TEST_EQUAL(x, y); +	} +} +#endif + +#ifdef NDEBUG +void benchSub(const char *pStr, const char *xStr, const char *yStr) +	try +{ +	Fp::setModulo(pStr); +	Fp x(xStr); +	Fp y(yStr); + +	CYBOZU_BENCH("add", Fp::add, x, x, x); +	CYBOZU_BENCH("sub", Fp::sub, x, x, y); +	CYBOZU_BENCH("mul", Fp::mul, x, x, x); +	CYBOZU_BENCH("square", Fp::square, x, x); +	CYBOZU_BENCH("inv", x += y;Fp::inv, x, x); // avoid same jmp +	CYBOZU_BENCH("div", x += y;Fp::div, x, y, x); +	puts(""); +} catch (std::exception& e) { +	printf("ERR %s\n", e.what()); +} + +// square 76clk@sandy +CYBOZU_TEST_AUTO(bench3) +{ +	const char *pStr = "0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d"; +	const char *xStr = "0x148094810948190412345678901234567900342423332197"; +	const char *yStr = "0x7fffffffffffffffffffffe26f2fc170f69466a74defd8d"; +	benchSub(pStr, xStr, yStr); +} + +CYBOZU_TEST_AUTO(bench4) +{ +	const char *pStr = "0x2523648240000001ba344d80000000086121000000000013a700000000000013"; +	const char *xStr = "0x1480948109481904123456789234234242423424201234567900342423332197"; +	const char *yStr = "0x151342342342341517fffffffffffffffffffffe26f2fc170f69466a74defd8d"; +	benchSub(pStr, xStr, yStr); +} + +CYBOZU_TEST_AUTO(bench6) +{ +	const char *pStr = "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff"; +	const char *xStr = "0x19481084109481094820948209482094820984290482212345678901234567900342308472047204720422423332197"; +	const char *yStr = "0x209348209481094820984209842094820948204204243123456789012345679003423084720472047204224233321972"; +	benchSub(pStr, xStr, yStr); +} + +CYBOZU_TEST_AUTO(bench9) +{ +	const char *pStr = "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"; +	const char *xStr = "0x2908209582095820941098410948109482094820984209840294829049240294242498540975555312345678901234567900342308472047204720422423332197"; +	const char *yStr = "0x3948384209834029834092384204920349820948205872380573205782385729385729385723985837ffffffffffffffffffffffe26f2fc170f69466a74defd8d"; +	benchSub(pStr, xStr, yStr); +} +#endif diff --git a/test/fp_util_test.cpp b/test/fp_util_test.cpp new file mode 100644 index 0000000..28d94ed --- /dev/null +++ b/test/fp_util_test.cpp @@ -0,0 +1,191 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#include <mcl/fp_util.hpp> +#include <cybozu/test.hpp> + +CYBOZU_TEST_AUTO(toStr16) +{ +	const struct { +		uint32_t x[4]; +		size_t n; +		const char *str; +	} tbl[] = { +		{ { 0, 0, 0, 0 }, 0, "0" }, +		{ { 0x123, 0, 0, 0 }, 1, "123" }, +		{ { 0x12345678, 0xaabbcc, 0, 0 }, 2, "aabbcc12345678" }, +		{ { 0, 0x12, 0x234a, 0 }, 3, "234a0000001200000000" }, +		{ { 1, 2, 0xffffffff, 0x123abc }, 4, "123abcffffffff0000000200000001" }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		std::string str; +		mcl::fp::toStr16(str, tbl[i].x, tbl[i].n, false); +		CYBOZU_TEST_EQUAL(str, tbl[i].str); +		mcl::fp::toStr16(str, tbl[i].x, tbl[i].n, true); +		CYBOZU_TEST_EQUAL(str, std::string("0x") + tbl[i].str); +	} +} + +// CYBOZU_TEST_AUTO(toStr2) // QQQ +// CYBOZU_TEST_AUTO(verifyStr) // QQQ + +CYBOZU_TEST_AUTO(fromStr16) +{ +	const struct { +		const char *str; +		uint64_t x[4]; +	} tbl[] = { +		{ "0", { 0, 0, 0, 0 } }, +		{ "5", { 5, 0, 0, 0 } }, +		{ "123", { 0x123, 0, 0, 0 } }, +		{ "123456789012345679adbc", { uint64_t(0x789012345679adbcull), 0x123456, 0, 0 } }, +		{ "ffffffff26f2fc170f69466a74defd8d", { uint64_t(0x0f69466a74defd8dull), uint64_t(0xffffffff26f2fc17ull), 0, 0 } }, +		{ "100000000000000000000000000000033", { uint64_t(0x0000000000000033ull), 0, 1, 0 } }, +		{ "11ee12312312940000000000000000000000000002342343", { uint64_t(0x0000000002342343ull), uint64_t(0x0000000000000000ull), uint64_t(0x11ee123123129400ull), 0 } }, +		{ "1234567890abcdefABCDEF123456789aba32134723424242424", { uint64_t(0x2134723424242424ull), uint64_t(0xDEF123456789aba3ull), uint64_t(0x4567890abcdefABCull), 0x123 } }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		const size_t xN = 4; +		uint64_t x[xN]; +		mcl::fp::fromStr16(x, xN, tbl[i].str, strlen(tbl[i].str)); +		for (size_t j = 0; j < xN; j++) { +			CYBOZU_TEST_EQUAL(x[j], tbl[i].x[j]); +		} +	} +} + +CYBOZU_TEST_AUTO(compareArray) +{ +	const struct { +		uint32_t a[4]; +		uint32_t b[4]; +		size_t n; +		int expect; +	} tbl[] = { +		{ { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, 0, 0 }, +		{ { 1, 0, 0, 0 }, { 0, 0, 0, 0 }, 1, 1 }, +		{ { 0, 0, 0, 0 }, { 1, 0, 0, 0 }, 1, -1 }, +		{ { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, 1, 0 }, +		{ { 3, 1, 1, 0 }, { 2, 1, 1, 0 }, 4, 1 }, +		{ { 9, 2, 1, 1 }, { 1, 3, 1, 1 }, 4, -1 }, +		{ { 1, 7, 8, 4 }, { 1, 7, 8, 9 }, 3, 0 }, +		{ { 1, 7, 8, 4 }, { 1, 7, 8, 9 }, 4, -1 }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		int e = mcl::fp::compareArray(tbl[i].a, tbl[i].b, tbl[i].n); +		CYBOZU_TEST_EQUAL(e, tbl[i].expect); +	} +} + +struct Rand { +	std::vector<uint32_t> v; +	size_t pos; +	int count; +	void read(uint32_t *x, size_t n) +	{ +		if (v.size() < pos + n) throw cybozu::Exception("Rand:get:bad n") << v.size() << pos << n; +		std::copy(v.begin() + pos, v.begin() + pos + n, x); +		pos += n; +		count++; +	} +	Rand(const uint32_t *x, size_t n) +		: pos(0) +		, count(0) +	{ +		for (size_t i = 0; i < n; i++) { +			v.push_back(x[i]); +		} +	} +}; + +CYBOZU_TEST_AUTO(getRandVal) +{ +	const size_t rn = 8; +	const struct { +		uint32_t r[rn]; +		uint32_t mod[2]; +		size_t bitLen; +		int count; +		uint32_t expect[2]; +	} tbl[] = { +		{ { 1, 2, 3, 4, 5, 6, 7, 8 }, { 5, 6 }, 64, 1, { 1, 2 } }, +		{ { 0xfffffffc, 0x7, 3, 4, 5, 6, 7, 8 }, { 0xfffffffe, 0x3 }, 34, 1, { 0xfffffffc, 0x3 } }, +		{ { 0xfffffffc, 0x7, 3, 4, 5, 6, 7, 8 }, { 0xfffffffb, 0x3 }, 34, 2, { 3, 0 } }, +		{ { 2, 3, 5, 7, 4, 3, 0, 3 }, { 1, 0x3 }, 34, 4, { 0, 3 } }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		Rand rg(tbl[i].r, rn); +		uint32_t out[2]; +		mcl::fp::getRandVal(out, rg, tbl[i].mod, tbl[i].bitLen); +		CYBOZU_TEST_EQUAL(out[0], tbl[i].expect[0]); +		CYBOZU_TEST_EQUAL(out[1], tbl[i].expect[1]); +		CYBOZU_TEST_EQUAL(rg.count, tbl[i].count); +	} +} + +CYBOZU_TEST_AUTO(shiftLeftOr) +{ +	const struct { +		uint32_t x[4]; +		size_t n; +		size_t shift; +		uint32_t y; +		uint32_t z[4]; +		uint32_t ret; +	} tbl[] = { +		{ { 0x12345678, 0, 0, 0 }, 1, 0, 0, { 0x12345678, 0, 0, 0 }, 0 }, +		{ { 0x12345678, 0, 0, 0 }, 1, 1, 0, { 0x2468acf0, 0, 0, 0 }, 0 }, +		{ { 0xf2345678, 0, 0, 0 }, 1, 1, 5, { 0xe468acf5, 0, 0, 0 }, 1 }, +		{ { 0x12345678, 0x9abcdef0, 0x11112222, 0xffccaaee }, 4, 19, 0x1234, { 0xb3c01234, 0xf78091a2, 0x1114d5e6, 0x57708889 }, 0x7fe65 }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		uint32_t z[4]; +		uint32_t ret = mcl::fp::shiftLeftOr(z, tbl[i].x, tbl[i].n, tbl[i].shift, tbl[i].y); +		CYBOZU_TEST_EQUAL_ARRAY(z, tbl[i].z, tbl[i].n); +		CYBOZU_TEST_EQUAL(ret, tbl[i].ret); +	} +} + +CYBOZU_TEST_AUTO(shiftRight) +{ +	const struct { +		uint32_t x[4]; +		size_t n; +		size_t shift; +		uint32_t z[4]; +	} tbl[] = { +		{ { 0x12345678, 0, 0, 0 }, 4, 0, { 0x12345678, 0, 0, 0 } }, +		{ { 0x12345678, 0xaaaabbbb, 0xffeebbcc, 0xfeba9874 }, 4, 1, { 0x891a2b3c, 0x55555ddd, 0x7ff75de6, 0x7f5d4c3a } }, +		{ { 0x12345678, 0xaaaabbbb, 0xffeebbcc, 0xfeba9874 }, 4, 18, { 0xaeeec48d, 0xaef32aaa, 0xa61d3ffb, 0x3fae } }, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		uint32_t z[4]; +		mcl::fp::shiftRight(z, tbl[i].x, tbl[i].n, tbl[i].shift); +		CYBOZU_TEST_EQUAL_ARRAY(z, tbl[i].z, tbl[i].n); +	} +} + +CYBOZU_TEST_AUTO(splitBitVec) +{ +	uint32_t tbl[] = { 0x12345678, 0xaaaabbbb, 0xffeebbcc }; +	typedef cybozu::BitVectorT<uint32_t> BitVec; +	typedef std::vector<int> IntVec; +	BitVec bv; +	bv.append(tbl, sizeof(tbl) * 8); +	for (size_t len = bv.size(); len > 0; len--) { +		bv.resize(len); +		for (size_t w = 1; w < 18; w++) { +			IntVec iv; +			size_t last = mcl::fp::splitBitVec(iv, bv, w); +			size_t q = len / w; +			size_t r = len % w; +			if (r == 0) { +				r = w; +			} else { +				q++; +			} +			CYBOZU_TEST_EQUAL(iv.size(), q); +			BitVec bv2; +			mcl::fp::concatBitVec(bv2, iv, w, last); +			CYBOZU_TEST_ASSERT(bv == bv2); +		} +	} +} diff --git a/test/mk32.sh b/test/mk32.sh new file mode 100644 index 0000000..4d5f607 --- /dev/null +++ b/test/mk32.sh @@ -0,0 +1 @@ +g++ -O3 -march=native base_test.cpp ../src/x86.s -m32 -I ~/32/include/ -I ../include/ -I ../../xbyak/ -I ../../cybozulib/include ~/32/lib/libgmp.a ~/32/lib/libgmpxx.a -I ~/32/lib -DNDEBUG diff --git a/test/mont_fp_test.cpp b/test/mont_fp_test.cpp new file mode 100644 index 0000000..6e13e4d --- /dev/null +++ b/test/mont_fp_test.cpp @@ -0,0 +1,809 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#include <cybozu/test.hpp> +#include <cybozu/benchmark.hpp> +#include <time.h> + +#define USE_MONT_FP +#include <mcl/fp.hpp> +typedef mcl::FpT<> Zn; +typedef mcl::FpT<> MontFp3; +typedef mcl::FpT<> MontFp4; +typedef mcl::FpT<> MontFp6; +typedef mcl::FpT<> MontFp9; + +struct Montgomery { +	typedef mcl::Gmp::BlockType BlockType; +	mpz_class p_; +	mpz_class R_; // (1 << (pn_ * 64)) % p +	mpz_class RR_; // (R * R) % p +	BlockType pp_; // p * pp = -1 mod M = 1 << 64 +	size_t pn_; +	Montgomery() {} +	explicit Montgomery(const mpz_class& p) +	{ +		p_ = p; +		pp_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0)); +		pn_ = mcl::Gmp::getBlockSize(p); +		R_ = 1; +		R_ = (R_ << (pn_ * 64)) % p_; +		RR_ = (R_ * R_) % p_; +	} + +	void toMont(mpz_class& x) const { mul(x, x, RR_); } +	void fromMont(mpz_class& x) const { mul(x, x, 1); } + +	void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) const +	{ +#if 0 +		const size_t ySize = mcl::Gmp::getBlockSize(y); +		mpz_class c = x * mcl::Gmp::getBlock(y, 0); +		BlockType q = mcl::Gmp::getBlock(c, 0) * pp_; +		c += p_ * q; +		c >>= sizeof(BlockType) * 8; +		for (size_t i = 1; i < pn_; i++) { +			if (i < ySize) { +				c += x * mcl::Gmp::getBlock(y, i); +			} +			BlockType q = mcl::Gmp::getBlock(c, 0) * pp_; +			c += p_ * q; +			c >>= sizeof(BlockType) * 8; +		} +		if (c >= p_) { +			c -= p_; +		} +		z = c; +#else +		z = x * y; +		for (size_t i = 0; i < pn_; i++) { +			BlockType q = mcl::Gmp::getBlock(z, 0) * pp_; +			z += p_ * (mp_limb_t)q; +			z >>= sizeof(BlockType) * 8; +		} +		if (z >= p_) { +			z -= p_; +		} +#endif +	} +}; + +template<class T> +mpz_class toGmp(const T& x) +{ +	std::string str = x.toStr(); +	mpz_class t; +	mcl::Gmp::fromStr(t, str); +	return t; +} + +template<class T> +std::string toStr(const T& x) +{ +	std::ostringstream os; +	os << x; +	return os.str(); +} + +template<class T, class U> +T castTo(const U& x) +{ +	T t; +	t.fromStr(toStr(x)); +	return t; +} + +template<class T> +void putRaw(const T& x) +{ +	const uint64_t *p = x.getInnerValue(); +	for (size_t i = 0, n = T::BlockSize; i < n; i++) { +		printf("%016llx", p[n - 1 - i]); +	} +	printf("\n"); +} + +template<size_t N> +void put(const uint64_t (&x)[N]) +{ +	for (size_t i = 0; i < N; i++) { +		printf("%016llx", x[N - 1 - i]); +	} +	printf("\n"); +} + +template<size_t N> +struct Test { +	typedef mcl::FpT<> Fp; +	mpz_class m; +	void run(const char *p) +	{ +		Fp::setModulo(p); +		m = p; +		Zn::setModulo(p); +		edge(); +		cstr(); +		toStr(); +		fromStr(); +		stream(); +		conv(); +		compare(); +		modulo(); +		ope(); +		cvtInt(); +		power(); +		neg_power(); +		power_Zn(); +		setRaw(); +		set64bit(); +		getRaw(); +		binaryExp(); +		bench(); +	} +	void cstr() +	{ +		const struct { +			const char *str; +			int val; +		} tbl[] = { +			{ "0", 0 }, +			{ "1", 1 }, +			{ "123", 123 }, +			{ "0x123", 0x123 }, +			{ "0b10101", 21 }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			// string cstr +			Fp x(tbl[i].str); +			CYBOZU_TEST_EQUAL(x, tbl[i].val); + +			// int cstr +			Fp y(tbl[i].val); +			CYBOZU_TEST_EQUAL(y, x); + +			// copy cstr +			Fp z(x); +			CYBOZU_TEST_EQUAL(z, x); + +			// assign int +			Fp w; +			w = tbl[i].val; +			CYBOZU_TEST_EQUAL(w, x); + +			// assign self +			Fp u; +			u = w; +			CYBOZU_TEST_EQUAL(u, x); + +			// conv +			std::ostringstream os; +			os << tbl[i].val; + +			std::string str; +			x.toStr(str); +			CYBOZU_TEST_EQUAL(str, os.str()); +		} +		const struct { +			const char *str; +			int val; +		} tbl2[] = { +			{ "-123", 123 }, +			{ "-0x123", 0x123 }, +			{ "-0b10101", 21 }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl2); i++) { +			Fp x(tbl2[i].str); +			x = -x; +			CYBOZU_TEST_EQUAL(x, tbl2[i].val); +		} +	} +	void toStr() +	{ +		Fp x(0); +		std::string str; +		str = x.toStr(); +		CYBOZU_TEST_EQUAL(str, "0"); +		str = x.toStr(2, true); +		CYBOZU_TEST_EQUAL(str, "0"); +		str = x.toStr(2, false); +		CYBOZU_TEST_EQUAL(str, "0"); +		str = x.toStr(16, true); +		CYBOZU_TEST_EQUAL(str, "0"); +		str = x.toStr(16, false); +		CYBOZU_TEST_EQUAL(str, "0"); + +		x = 123; +		str = x.toStr(); +		CYBOZU_TEST_EQUAL(str, "123"); +		str = x.toStr(2, true); +		CYBOZU_TEST_EQUAL(str, "0b1111011"); +		str = x.toStr(2, false); +		CYBOZU_TEST_EQUAL(str, "1111011"); +		str = x.toStr(16, true); +		CYBOZU_TEST_EQUAL(str, "0x7b"); +		str = x.toStr(16, false); +		CYBOZU_TEST_EQUAL(str, "7b"); + +		{ +			std::ostringstream os; +			os << x; +			CYBOZU_TEST_EQUAL(os.str(), "123"); +		} +		{ +			std::ostringstream os; +			os << std::hex << std::showbase << x; +			CYBOZU_TEST_EQUAL(os.str(), "0x7b"); +		} +		{ +			std::ostringstream os; +			os << std::hex << x; +			CYBOZU_TEST_EQUAL(os.str(), "7b"); +		} +	} + +	void fromStr() +	{ +		const struct { +			const char *in; +			int out; +			int base; +		} tbl[] = { +			{ "100", 100, 0 }, // set base = 10 if base = 0 +			{ "100", 4, 2 }, +			{ "100", 256, 16 }, +			{ "0b100", 4, 0 }, +			{ "0b100", 4, 2 }, +			{ "0x100", 256, 0 }, +			{ "0x100", 256, 16 }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			Fp x; +			x.fromStr(tbl[i].in, tbl[i].base); +			CYBOZU_TEST_EQUAL(x, tbl[i].out); +		} +		// conflict prefix with base +		Fp x; +		CYBOZU_TEST_EXCEPTION(x.fromStr("0b100", 16), cybozu::Exception); +		CYBOZU_TEST_EXCEPTION(x.fromStr("0x100", 2), cybozu::Exception); +	} + +	void stream() +	{ +		const struct { +			const char *in; +			int out10; +			int out16; +		} tbl[] = { +			{ "100", 100, 256 }, // set base = 10 if base = 0 +			{ "0x100", 256, 256 }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			{ +				std::istringstream is(tbl[i].in); +				Fp x; +				is >> x; +				CYBOZU_TEST_EQUAL(x, tbl[i].out10); +			} +			{ +				std::istringstream is(tbl[i].in); +				Fp x; +				is >> std::hex >> x; +				CYBOZU_TEST_EQUAL(x, tbl[i].out16); +			} +		} +		std::istringstream is("0b100"); +		Fp x; +		CYBOZU_TEST_EXCEPTION(is >> std::hex >> x, cybozu::Exception); +	} +	void edge() +	{ +#if 0 +		std::cout << std::hex; +		/* +			real mont +			   0    0 +			   1    R^-1 +			   R    1 +			  -1    -R^-1 +			  -R    -1 +		*/ +		mpz_class t = 1; +		const mpz_class R = (t << (N * 64)) % m; +		const mpz_class tbl[] = { +			0, 1, R, m - 1, m - R +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			const mpz_class& x = tbl[i]; +			for (size_t j = i; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) { +				const mpz_class& y = tbl[j]; +				mpz_class z = (x * y) % m; +				Fp xx, yy; +				Fp::toMont(xx, x); +				Fp::toMont(yy, y); +				Fp zz = xx * yy; +				mpz_class t; +				Fp::fromMont(t, zz); +				CYBOZU_TEST_EQUAL(z, t); +			} +		} +		std::cout << std::dec; +#endif +	} + +	void conv() +	{ +		const char *bin = "0b100100011010001010110011110001001000000010010001101000101011001111000100100000001001000110100010101100111100010010000"; +		const char *hex = "0x123456789012345678901234567890"; +		const char *dec = "94522879687365475552814062743484560"; +		Fp b(bin); +		Fp h(hex); +		Fp d(dec); +		CYBOZU_TEST_EQUAL(b, h); +		CYBOZU_TEST_EQUAL(b, d); + +		std::string str; +		b.toStr(str, 2, true); +		CYBOZU_TEST_EQUAL(str, bin); +		b.toStr(str); +		CYBOZU_TEST_EQUAL(str, dec); +		b.toStr(str, 16, true); +		CYBOZU_TEST_EQUAL(str, hex); +	} + +	void compare() +	{ +		const struct { +			int lhs; +			int rhs; +			int cmp; +		} tbl[] = { +			{ 0, 0, 0 }, +			{ 1, 0, 1 }, +			{ 0, 1, -1 }, +			{ -1, 0, 1 }, // m-1, 0 +			{ 0, -1, -1 }, // 0, m-1 +			{ 123, 456, -1 }, +			{ 456, 123, 1 }, +			{ 5, 5, 0 }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			const Fp x(tbl[i].lhs); +			const Fp y(tbl[i].rhs); +			const int cmp = tbl[i].cmp; +			if (cmp == 0) { +				CYBOZU_TEST_EQUAL(x, y); +			} else { +				CYBOZU_TEST_ASSERT(x != y); +			} +		} +	} + +	void modulo() +	{ +		std::ostringstream ms; +		ms << m; + +		std::string str; +		Fp::getModulo(str); +		CYBOZU_TEST_EQUAL(str, ms.str()); +	} + +	void ope() +	{ +		const struct { +			Zn x; +			Zn y; +			Zn add; // x + y +			Zn sub; // x - y +			Zn mul; // x * y +			Zn sqr; // x * x +		} tbl[] = { +			{ 0, 1, 1, -1, 0, 0 }, +			{ 9, 7, 16, 2, 63, 81 }, +			{ 10, 13, 23, -3, 130, 100 }, +			{ 2000, -1000, 1000, 3000, -2000000, 4000000 }, +			{ -12345, -9999, -(12345 + 9999), - 12345 + 9999, 12345 * 9999, 12345 * 12345 }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			const Fp x(castTo<Fp>(tbl[i].x)); +			const Fp y(castTo<Fp>(tbl[i].y)); +			Fp z; +			Fp::add(z, x, y); +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].add)); +			Fp::sub(z, x, y); +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].sub)); +			Fp::mul(z, x, y); +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].mul)); + +			Fp r; +			Fp::inv(r, y); +			Zn rr = 1 / tbl[i].y; +			CYBOZU_TEST_EQUAL(r, castTo<Fp>(rr)); +			Fp::mul(z, z, r); +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].x)); + +			z = x + y; +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].add)); +			z = x - y; +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].sub)); +			z = x * y; +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].mul)); +			Fp::square(z, x); +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].sqr)); + +			z = x / y; +			z *= y; +			CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].x)); +		} +	} +	void cvtInt() +	{ +#if 0 +		Fp x; +		x = 12345; +		uint64_t y = x.cvtInt(); +		CYBOZU_TEST_EQUAL(y, 12345u); +		x.fromStr("123456789012342342342342342"); +		CYBOZU_TEST_EXCEPTION(x.cvtInt(), cybozu::Exception); +		bool err = false; +		CYBOZU_TEST_NO_EXCEPTION(x.cvtInt(&err)); +		CYBOZU_TEST_ASSERT(err); +#endif +	} + +	void power() +	{ +		Fp x, y, z; +		x = 12345; +		z = 1; +		for (int i = 0; i < 100; i++) { +			Fp::power(y, x, i); +			CYBOZU_TEST_EQUAL(y, z); +			z *= x; +		} +	} + +	void neg_power() +	{ +		Fp x, y, z; +		x = 12345; +		z = 1; +		Fp rx = 1 / x; +		for (int i = 0; i < 100; i++) { +			Fp::power(y, x, -i); +			CYBOZU_TEST_EQUAL(y, z); +			z *= rx; +		} +	} + +	void power_Zn() +	{ +		Fp x, y, z; +		x = 12345; +		z = 1; +		for (int i = 0; i < 100; i++) { +			Fp::power(y, x, Zn(i)); +			CYBOZU_TEST_EQUAL(y, z); +			z *= x; +		} +	} + +	void setRaw() +	{ +		// QQQ +#if 0 +		char b1[] = { 0x56, 0x34, 0x12 }; +		Fp x; +		x.setRaw(b1, 3); +		CYBOZU_TEST_EQUAL(x, 0x123456); +		int b2[] = { 0x12, 0x34 }; +		x.setRaw(b2, 2); +		CYBOZU_TEST_EQUAL(x, Fp("0x3400000012")); +#endif +	} +	void binaryExp() +	{ +		puts("binaryExp"); +		for (int i = 2; i < 7; i++) { +			mpz_class g = m / i; +			Fp x, y; +//			Fp::toMont(x, g); +			x.fromGmp(g); +			cybozu::BitVector bv; +			x.appendToBitVec(bv); +			uint64_t buf[N]; +			mcl::Gmp::getRaw(buf, N, g); +			CYBOZU_TEST_EQUAL(bv.getBlockSize(), N); +			CYBOZU_TEST_EQUAL(bv.size(), Fp::getModBitLen()); +			CYBOZU_TEST_EQUAL(bv.size(), Fp::getBitVecSize()); +			const uint64_t *p = bv.getBlock(); +			CYBOZU_TEST_EQUAL_ARRAY(p, buf, N); +		} +		const mpz_class yy("0x1255556666777788881111222233334444"); +		if (yy > m) { +			return; +		} +		Fp y; +//		Fp::toMont(y, yy); +		y.fromGmp(yy); +		uint64_t b1[N] = { uint64_t(0x1111222233334444ull), uint64_t(0x5555666677778888ull), 0x12 }; +		Fp x; +		cybozu::BitVector bv; +		bv.append(b1, Fp::getModBitLen()); +		x.fromBitVec(bv); +		CYBOZU_TEST_EQUAL(x, y); +		bv.clear(); +		x.appendToBitVec(bv); +		const uint64_t *b2 = bv.getBlock(); +		CYBOZU_TEST_EQUAL_ARRAY(b1, b2, N); +	} + +	void set64bit() +	{ +		const struct { +			const char *p; +			uint64_t i; +		} tbl[] = { +			{ "0x1234567812345678", uint64_t(0x1234567812345678ull) }, +			{ "0xaaaaaaaaaaaaaaaa", uint64_t(0xaaaaaaaaaaaaaaaaull) }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			Fp x(tbl[i].p); +			Fp y(tbl[i].i); +			CYBOZU_TEST_EQUAL(x, y); +		} +	} + +	void getRaw() +	{ +		const struct { +			const char *s; +			uint32_t v[4]; +			size_t vn; +		} tbl[] = { +			{ "0", { 0, 0, 0, 0 }, 1 }, +			{ "1234", { 1234, 0, 0, 0 }, 1 }, +			{ "0xaabbccdd12345678", { 0x12345678, 0xaabbccdd, 0, 0 }, 2 }, +			{ "0x11112222333344445555666677778888", { 0x77778888, 0x55556666, 0x33334444, 0x11112222 }, 4 }, +		}; +		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +			mpz_class x(tbl[i].s); +			const size_t bufN = 8; +			uint32_t buf[bufN]; +			size_t n = mcl::Gmp::getRaw(buf, bufN, x); +			CYBOZU_TEST_EQUAL(n, tbl[i].vn); +			for (size_t j = 0; j < n; j++) { +				CYBOZU_TEST_EQUAL(buf[j], tbl[i].v[j]); +			} +		} +	} +	void bench() +	{ +		Fp x("-123456789"); +		Fp y("-0x7ffffffff"); +		CYBOZU_BENCH("add", operator+, x, x); +		CYBOZU_BENCH("sub", operator-, x, y); +		CYBOZU_BENCH("mul", operator*, x, x); +		CYBOZU_BENCH("sqr", Fp::square, x, x); +		CYBOZU_BENCH("div", y += x; operator/, x, y); +	} +}; + +void customTest(const char *pStr, const char *xStr, const char *yStr) +{ +#if 0 +	{ +		pStr = "0xfffffffffffffffffffffffffffffffffffffffeffffee37", +		MontFp3::setModulo(pStr); +		static uint64_t x[3] = { 1, 0, 0 }; +		uint64_t z[3]; +std::cout<<std::hex; +		MontFp3::inv(*(MontFp3*)z, *(const MontFp3*)x); +put(z); +		exit(1); +	} +#endif +#if 0 +	std::cout << std::hex; +	uint64_t x[9] = { 0xff7fffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x1ff }; +	uint64_t y[9] = { 0xff7fffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x1ff }; +	uint64_t z1[9], z2[9]; +	MontFp9::setModulo(pStr); +	MontFp9::fg_.mul_(z2, x, y); +	put(z2); +	{ +		puts("C"); +		mpz_class p(pStr); +		Montgomery mont(p); +		mpz_class xx, yy; +		mcl::Gmp::setRaw(xx, x, CYBOZU_NUM_OF_ARRAY(x)); +		mcl::Gmp::setRaw(yy, y, CYBOZU_NUM_OF_ARRAY(y)); +		mpz_class z; +		mont.mul(z, xx, yy); +		std::cout << std::hex << z << std::endl; +	} +	exit(1); +#else +	std::string rOrg, rC, rAsm; +	Zn::setModulo(pStr); +	Zn s(xStr), t(yStr); +	s *= t; +	rOrg = toStr(s); +	{ +		puts("C"); +		mpz_class p(pStr); +		Montgomery mont(p); +		mpz_class x(xStr), y(yStr); +		mont.toMont(x); +		mont.toMont(y); +		mpz_class z; +		mont.mul(z, x, y); +		mont.fromMont(z); +		rC = toStr(z); +	} + +	puts("asm"); +	MontFp9::setModulo(pStr); +	MontFp9 x(xStr), y(yStr); +	x *= y; +	rAsm = toStr(x); +	CYBOZU_TEST_EQUAL(rOrg, rC); +	CYBOZU_TEST_EQUAL(rOrg, rAsm); +#endif +} + +CYBOZU_TEST_AUTO(customTest) +{ +	const struct { +		const char *p; +		const char *x; +		const char *y; +	} tbl[] = { +		{ +			"0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", +//			"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", +//			"0xfffffffffffffffffffffffffffffffffffffffeffffee37", +			"0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe", +			"0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe" +		}, +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		customTest(tbl[i].p, tbl[i].x, tbl[i].y); +	} +} + +CYBOZU_TEST_AUTO(test3) +{ +	Test<3> test; +	const char *tbl[] = { +		"0x000000000000000100000000000000000000000000000033", // min prime +		"0x00000000fffffffffffffffffffffffffffffffeffffac73", +		"0x0000000100000000000000000001b8fa16dfab9aca16b6b3", +		"0x000000010000000000000000000000000000000000000007", +		"0x30000000000000000000000000000000000000000000002b", +		"0x70000000000000000000000000000000000000000000001f", +		"0x800000000000000000000000000000000000000000000005", +		"0xfffffffffffffffffffffffffffffffffffffffeffffee37", +		"0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d", +		"0xffffffffffffffffffffffffffffffffffffffffffffff13", // max prime +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		printf("prime=%s\n", tbl[i]); +		test.run(tbl[i]); +	} +} + +CYBOZU_TEST_AUTO(test4) +{ +	Test<4> test; +	const char *tbl[] = { +		"0x0000000000000001000000000000000000000000000000000000000000000085", // min prime +		"0x2523648240000001ba344d80000000086121000000000013a700000000000013", +		"0x7523648240000001ba344d80000000086121000000000013a700000000000017", +		"0x800000000000000000000000000000000000000000000000000000000000005f", +		"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // max prime +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		printf("prime=%s\n", tbl[i]); +		test.run(tbl[i]); +	} +} + +CYBOZU_TEST_AUTO(test6) +{ +	Test<6> test; +	const char *tbl[] = { +		"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		printf("prime=%s\n", tbl[i]); +		test.run(tbl[i]); +	} +} + +CYBOZU_TEST_AUTO(test9) +{ +	Test<9> test; +	const char *tbl[] = { +		"0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", +	}; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		printf("prime=%s\n", tbl[i]); +		test.run(tbl[i]); +	} +} + +CYBOZU_TEST_AUTO(toStr16) +{ +	const char *tbl[] = { +		"0x0", +		"0x5", +		"0x123", +		"0x123456789012345679adbc", +		"0xffffffff26f2fc170f69466a74defd8d", +		"0x100000000000000000000000000000033", +		"0x11ee12312312940000000000000000000000000002342343" +	}; +	MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13"); +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		std::string str, str2; +		MontFp3 x(tbl[i]); +		x.toStr(str, 16); +		mpz_class y(tbl[i]); +		mcl::Gmp::toStr(str2, y, 16); +		CYBOZU_TEST_EQUAL(str, str2); +	} +} + +#if 0 +CYBOZU_TEST_AUTO(toStr16bench) +{ +	const char *tbl[] = { +		"0x0", +		"0x5", +		"0x123", +		"0x123456789012345679adbc", +		"0xffffffff26f2fc170f69466a74defd8d", +		"0x100000000000000000000000000000033", +		"0x11ee12312312940000000000000000000000000002342343" +	}; +	const int C = 500000; +	MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13"); +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		std::string str, str2; +		MontFp3 x(tbl[i]); +		CYBOZU_BENCH_C("Mont:toStr", C, x.toStr, str, 16); +		mpz_class y(tbl[i]); +		CYBOZU_BENCH_C("Gmp:toStr ", C, mcl::Gmp::toStr, str2, y, 16); +		str2.insert(0, "0x"); +		CYBOZU_TEST_EQUAL(str, str2); +	} +} + +CYBOZU_TEST_AUTO(fromStr16bench) +{ +	const char *tbl[] = { +		"0x0", +		"0x5", +		"0x123", +		"0x123456789012345679adbc", +		"0xffffffff26f2fc170f69466a74defd8d", +		"0x100000000000000000000000000000033", +		"0x11ee12312312940000000000000000000000000002342343" +	}; +	const int C = 500000; +	MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13"); +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		std::string str = tbl[i]; +		MontFp3 x; +		CYBOZU_BENCH_C("Mont:fromStr", C, x.fromStr, str); + +		mpz_class y; +		str.erase(0, 2); +		CYBOZU_BENCH_C("Gmp:fromStr ", C, mcl::Gmp::fromStr, y, str, 16); +		x.toStr(str, 16); +		std::string str2; +		mcl::Gmp::toStr(str2, y, 16); +		str2.insert(0, "0x"); +		CYBOZU_TEST_EQUAL(str, str2); +	} +} +#endif diff --git a/test/proj/ec_test/ec_test.vcxproj b/test/proj/ec_test/ec_test.vcxproj new file mode 100644 index 0000000..b141754 --- /dev/null +++ b/test/proj/ec_test/ec_test.vcxproj @@ -0,0 +1,88 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +  <ItemGroup Label="ProjectConfigurations"> +    <ProjectConfiguration Include="Debug|x64"> +      <Configuration>Debug</Configuration> +      <Platform>x64</Platform> +    </ProjectConfiguration> +    <ProjectConfiguration Include="Release|x64"> +      <Configuration>Release</Configuration> +      <Platform>x64</Platform> +    </ProjectConfiguration> +  </ItemGroup> +  <PropertyGroup Label="Globals"> +    <ProjectGuid>{46B6E88E-739A-406B-9F68-BC46C5950FA3}</ProjectGuid> +    <Keyword>Win32Proj</Keyword> +    <RootNamespace>ec_test</RootNamespace> +  </PropertyGroup> +  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> +    <ConfigurationType>Application</ConfigurationType> +    <UseDebugLibraries>true</UseDebugLibraries> +    <PlatformToolset>v110</PlatformToolset> +    <CharacterSet>MultiByte</CharacterSet> +  </PropertyGroup> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> +    <ConfigurationType>Application</ConfigurationType> +    <UseDebugLibraries>false</UseDebugLibraries> +    <PlatformToolset>v110</PlatformToolset> +    <WholeProgramOptimization>true</WholeProgramOptimization> +    <CharacterSet>MultiByte</CharacterSet> +  </PropertyGroup> +  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> +  <ImportGroup Label="ExtensionSettings"> +  </ImportGroup> +  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets"> +    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> +    <Import Project="$(SolutionDir)common.props" /> +    <Import Project="$(SolutionDir)debug.props" /> +  </ImportGroup> +  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets"> +    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> +    <Import Project="$(SolutionDir)common.props" /> +    <Import Project="$(SolutionDir)release.props" /> +  </ImportGroup> +  <PropertyGroup Label="UserMacros" /> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> +    <LinkIncremental>true</LinkIncremental> +  </PropertyGroup> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> +    <LinkIncremental>false</LinkIncremental> +  </PropertyGroup> +  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> +    <ClCompile> +      <PrecompiledHeader> +      </PrecompiledHeader> +      <WarningLevel>Level3</WarningLevel> +      <Optimization>Disabled</Optimization> +      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> +    </ClCompile> +    <Link> +      <SubSystem>Console</SubSystem> +      <GenerateDebugInformation>true</GenerateDebugInformation> +    </Link> +  </ItemDefinitionGroup> +  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> +    <ClCompile> +      <WarningLevel>Level3</WarningLevel> +      <PrecompiledHeader> +      </PrecompiledHeader> +      <Optimization>MaxSpeed</Optimization> +      <FunctionLevelLinking>true</FunctionLevelLinking> +      <IntrinsicFunctions>true</IntrinsicFunctions> +      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> +    </ClCompile> +    <Link> +      <SubSystem>Console</SubSystem> +      <GenerateDebugInformation>true</GenerateDebugInformation> +      <EnableCOMDATFolding>true</EnableCOMDATFolding> +      <OptimizeReferences>true</OptimizeReferences> +    </Link> +  </ItemDefinitionGroup> +  <ItemGroup> +    <ClCompile Include="$(SolutionDir)test\ec_test.cpp" /> +  </ItemGroup> +  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> +  <ImportGroup Label="ExtensionTargets"> +  </ImportGroup> +</Project>
\ No newline at end of file diff --git a/test/proj/fp_test/fp_test.vcxproj b/test/proj/fp_test/fp_test.vcxproj new file mode 100644 index 0000000..a77dc21 --- /dev/null +++ b/test/proj/fp_test/fp_test.vcxproj @@ -0,0 +1,91 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +  <ItemGroup Label="ProjectConfigurations"> +    <ProjectConfiguration Include="Debug|x64"> +      <Configuration>Debug</Configuration> +      <Platform>x64</Platform> +    </ProjectConfiguration> +    <ProjectConfiguration Include="Release|x64"> +      <Configuration>Release</Configuration> +      <Platform>x64</Platform> +    </ProjectConfiguration> +  </ItemGroup> +  <PropertyGroup Label="Globals"> +    <ProjectGuid>{51266DE6-B57B-4AE3-B85C-282F170E1728}</ProjectGuid> +    <Keyword>Win32Proj</Keyword> +    <RootNamespace>fp_test</RootNamespace> +  </PropertyGroup> +  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> +    <ConfigurationType>Application</ConfigurationType> +    <UseDebugLibraries>true</UseDebugLibraries> +    <PlatformToolset>v110</PlatformToolset> +    <CharacterSet>MultiByte</CharacterSet> +  </PropertyGroup> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> +    <ConfigurationType>Application</ConfigurationType> +    <UseDebugLibraries>false</UseDebugLibraries> +    <PlatformToolset>v110</PlatformToolset> +    <WholeProgramOptimization>true</WholeProgramOptimization> +    <CharacterSet>MultiByte</CharacterSet> +  </PropertyGroup> +  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> +  <ImportGroup Label="ExtensionSettings"> +  </ImportGroup> +  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets"> +    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> +    <Import Project="$(SolutionDir)common.props" /> +    <Import Project="$(SolutionDir)debug.props" /> +  </ImportGroup> +  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets"> +    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> +    <Import Project="$(SolutionDir)common.props" /> +    <Import Project="$(SolutionDir)release.props" /> +  </ImportGroup> +  <PropertyGroup Label="UserMacros" /> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> +    <LinkIncremental>true</LinkIncremental> +  </PropertyGroup> +  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> +    <LinkIncremental>false</LinkIncremental> +  </PropertyGroup> +  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> +    <ClCompile> +      <PrecompiledHeader> +      </PrecompiledHeader> +      <WarningLevel>Level3</WarningLevel> +      <Optimization>Disabled</Optimization> +      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> +      <AdditionalIncludeDirectories>$(SolutionDir)../xbyak/;$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include</AdditionalIncludeDirectories> +    </ClCompile> +    <Link> +      <SubSystem>Console</SubSystem> +      <GenerateDebugInformation>true</GenerateDebugInformation> +      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile> +    </Link> +  </ItemDefinitionGroup> +  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> +    <ClCompile> +      <WarningLevel>Level3</WarningLevel> +      <PrecompiledHeader> +      </PrecompiledHeader> +      <Optimization>MaxSpeed</Optimization> +      <FunctionLevelLinking>true</FunctionLevelLinking> +      <IntrinsicFunctions>true</IntrinsicFunctions> +      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> +      <AdditionalIncludeDirectories>$(SolutionDir)../xbyak/;$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include</AdditionalIncludeDirectories> +    </ClCompile> +    <Link> +      <SubSystem>Console</SubSystem> +      <GenerateDebugInformation>true</GenerateDebugInformation> +      <EnableCOMDATFolding>true</EnableCOMDATFolding> +      <OptimizeReferences>true</OptimizeReferences> +    </Link> +  </ItemDefinitionGroup> +  <ItemGroup> +    <ClCompile Include="$(SolutionDir)test\\fp_test.cpp" /> +  </ItemGroup> +  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> +  <ImportGroup Label="ExtensionTargets"> +  </ImportGroup> +</Project>
\ No newline at end of file diff --git a/test/sq_test.cpp b/test/sq_test.cpp new file mode 100644 index 0000000..6174be6 --- /dev/null +++ b/test/sq_test.cpp @@ -0,0 +1,21 @@ +#include <mcl/gmp_util.hpp> +#include <cybozu/test.hpp> +#include <iostream> + +CYBOZU_TEST_AUTO(sqrt) +{ +	const int tbl[] = { 3, 5, 7, 11, 13, 17, 19, 257, 997, 1031 }; +	mcl::SquareRoot sq; +	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { +		const mpz_class p = tbl[i]; +		sq.set(p); +		for (mpz_class a = 1; a < p; a++) { +			mpz_class x; +			if (sq.get(x, a)) { +				mpz_class y; +				y = (x * x) % p; +				CYBOZU_TEST_EQUAL(a, y); +			} +		} +	} +} | 
