diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-09-05 16:11:03 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-09-05 16:11:03 +0800 |
commit | e312097f27bcdee838175a23085ddeb59e10a031 (patch) | |
tree | 76c229f9b3b37a2aa2b3dd895dd08bd3fe4c4094 | |
parent | 5d2d435b16e350597c38d0251deaddc1073b975c (diff) | |
download | tangerine-mcl-e312097f27bcdee838175a23085ddeb59e10a031.tar.gz tangerine-mcl-e312097f27bcdee838175a23085ddeb59e10a031.tar.zst tangerine-mcl-e312097f27bcdee838175a23085ddeb59e10a031.zip |
add mcl_fp_addNC for x86-64 by nasm
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | common.mk | 1 | ||||
-rw-r--r-- | include/mcl/fp_tower.hpp | 11 | ||||
-rw-r--r-- | src/asm/low_x86-64.asm | 88 |
4 files changed, 104 insertions, 5 deletions
@@ -41,6 +41,11 @@ ifeq ($(HAS_BMI2),1) LLVM_FLAGS+=-mattr=bmi2 endif +ifneq ($(ASM),) + LOW_ASM_OBJ=$(LOW_ASM_SRC:.asm=.o) + LIB_OBJ+=$(LOW_ASM_OBJ) +endif + $(MCL_LIB): $(LIB_OBJ) -$(MKDIR) $(@D) $(AR) $@ $(LIB_OBJ) @@ -57,6 +62,7 @@ $(LLVM_SRC): $(GEN_EXE) $(FUNC_LIST) $(FUNC_LIST): $(LOW_ASM_SRC) $(shell awk '/global/ { print $$2}' $(LOW_ASM_SRC) > $(FUNC_LIST) || touch $(FUNC_LIST)) + $(shell awk '/proc/ { print $$2}' $(LOW_ASM_SRC) >> $(FUNC_LIST)) $(GEN_EXE): src/gen.cpp src/llvm_gen.hpp $(CXX) -o $@ $< $(CFLAGS) -O0 @@ -64,6 +70,9 @@ $(GEN_EXE): src/gen.cpp src/llvm_gen.hpp asm: $(LLVM_SRC) $(LLVM_OPT) -O3 -o - $(LLVM_SRC) | $(LLVM_LLC) -O3 $(LLVM_FLAGS) -x86-asm-syntax=intel +$(LOW_ASM_OBJ): $(LOW_ASM_SRC) + $(ASM) $< + ################################################################## VPATH=test sample src @@ -9,6 +9,7 @@ ifeq ($(ARCH),x86_64) BIT=64 BIT_OPT=-m64 LOW_ASM_SRC=src/asm/low_x86-64.asm + ASM=nasm -felf64 endif ifeq ($(ARCH),x86) CPU=x86 diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index a86894b..22618d0 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -63,6 +63,7 @@ class Fp2T : public fp::Operator<Fp2T<Fp> > { typedef FpDblT<Fp> FpDbl; static Fp xi_a_; public: + typedef typename Fp::BaseFp BaseFp; Fp a, b; Fp2T() { } Fp2T(int64_t a) : a(a), b(0) { } @@ -93,15 +94,15 @@ public: */ friend std::ostream& operator<<(std::ostream& os, const Fp2T& self) { - return os << self.a << mcl::getIoSeparator() << self.b; + return os << self.a << Fp::getIoSeparator() << self.b; } friend std::istream& operator>>(std::istream& is, Fp2T& self) { return is >> self.a >> self.b; } - std::string getStr(int base = 10, bool withPrefix = false) + std::string getStr(int ioMode) { - return a.getStr(base, withPrefix) + ' ' + b.getStr(base, withPrefix); + return a.getStr(ioMode) + fp::getIoSeparator(ioMode) + b.getStr(ioMode); } bool isZero() const { return a.isZero() && b.isZero(); } bool isOne() const { return a.isOne() && b.isZero(); } @@ -416,7 +417,7 @@ struct Fp6T : public fp::Operator<Fp6T<Fp> > { bool operator!=(const Fp6T& rhs) const { return !operator==(rhs); } friend std::ostream& operator<<(std::ostream& os, const Fp6T& x) { - const char *sep = mcl::getIoSeparator(); + const char *sep = Fp::getIoSeparator(); return os << x.a << sep << x.b << sep << x.c; } friend std::istream& operator>>(std::istream& is, Fp6T& x) @@ -682,7 +683,7 @@ struct Fp12T : public fp::Operator<Fp12T<Fp> > { } friend std::ostream& operator<<(std::ostream& os, const Fp12T& self) { - return os << self.a << mcl::getIoSeparator() << self.b; + return os << self.a << Fp::getIoSeparator() << self.b; } friend std::istream& operator>>(std::istream& is, Fp12T& self) { diff --git a/src/asm/low_x86-64.asm b/src/asm/low_x86-64.asm index e69de29..72faa69 100644 --- a/src/asm/low_x86-64.asm +++ b/src/asm/low_x86-64.asm @@ -0,0 +1,88 @@ + +; Linux rdi rsi rdx rcx +; Win rcx rdx r8 r9 + +%ifdef _WIN64 + %define p1org rcx + %define p2org rdx + %define p3org r8 + %define p4org r9 +%else + %define p1org rdi + %define p2org rsi + %define p3org rdx + %define p4org rcx +%endif + +%imacro proc 1 +global %1 +%1: +%endmacro + +segment .text + +%imacro addNC 1 + mov rax, [p2org] + add rax, [p3org] + mov [p1org], rax +%assign i 1 +%rep %1 + mov rax, [p2org + i * 8] + adc rax, [p3org + i * 8] + mov [p1org + i * 8], rax +%assign i (i+1) +%endrep + setc al + movzx eax, al + ret +%endmacro + +proc mcl_fp_addNC64 + addNC 0 +proc mcl_fp_addNC128 + addNC 1 +proc mcl_fp_addNC192 + addNC 2 +proc mcl_fp_addNC256 + addNC 3 +proc mcl_fp_addNC320 + addNC 4 +proc mcl_fp_addNC384 + addNC 5 +proc mcl_fp_addNC448 + addNC 6 +proc mcl_fp_addNC512 + addNC 7 +proc mcl_fp_addNC576 + addNC 8 +proc mcl_fp_addNC640 + addNC 9 +proc mcl_fp_addNC704 + addNC 10 +proc mcl_fp_addNC768 + addNC 11 +proc mcl_fp_addNC832 + addNC 12 +proc mcl_fp_addNC896 + addNC 13 +proc mcl_fp_addNC960 + addNC 14 +proc mcl_fp_addNC1024 + addNC 15 +proc mcl_fp_addNC1088 + addNC 16 +proc mcl_fp_addNC1152 + addNC 17 +proc mcl_fp_addNC1216 + addNC 18 +proc mcl_fp_addNC1280 + addNC 19 +proc mcl_fp_addNC1344 + addNC 20 +proc mcl_fp_addNC1408 + addNC 21 +proc mcl_fp_addNC1472 + addNC 22 +proc mcl_fp_addNC1536 + addNC 23 + |