diff options
Diffstat (limited to 'vendor/github.com/byzantine-lab/mcl/src/asm/x86-64mac.bmi2.s')
-rw-r--r-- | vendor/github.com/byzantine-lab/mcl/src/asm/x86-64mac.bmi2.s | 13830 |
1 files changed, 13830 insertions, 0 deletions
diff --git a/vendor/github.com/byzantine-lab/mcl/src/asm/x86-64mac.bmi2.s b/vendor/github.com/byzantine-lab/mcl/src/asm/x86-64mac.bmi2.s new file mode 100644 index 000000000..849c66649 --- /dev/null +++ b/vendor/github.com/byzantine-lab/mcl/src/asm/x86-64mac.bmi2.s @@ -0,0 +1,13830 @@ + .section __TEXT,__text,regular,pure_instructions + .macosx_version_min 10, 12 + .globl _makeNIST_P192Lbmi2 + .p2align 4, 0x90 +_makeNIST_P192Lbmi2: ## @makeNIST_P192Lbmi2 +## BB#0: + movq $-1, %rax + movq $-2, %rdx + movq $-1, %rcx + retq + + .globl _mcl_fpDbl_mod_NIST_P192Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mod_NIST_P192Lbmi2: ## @mcl_fpDbl_mod_NIST_P192Lbmi2 +## BB#0: + pushq %r14 + pushq %rbx + movq 16(%rsi), %r10 + movq 24(%rsi), %r8 + movq 40(%rsi), %r9 + movq 8(%rsi), %rax + addq %r9, %rax + adcq $0, %r10 + sbbq %rcx, %rcx + andl $1, %ecx + movq 32(%rsi), %r11 + movq (%rsi), %r14 + addq %r8, %r14 + adcq %r11, %rax + adcq %r9, %r10 + adcq $0, %rcx + addq %r9, %r14 + adcq %r8, %rax + adcq %r11, %r10 + adcq $0, %rcx + addq %rcx, %r14 + adcq %rax, %rcx + adcq $0, %r10 + sbbq %rax, %rax + andl $1, %eax + movq %r14, %rsi + addq $1, %rsi + movq %rcx, %rdx + adcq $1, %rdx + movq %r10, %rbx + adcq $0, %rbx + adcq $-1, %rax + andl $1, %eax + cmovneq %r14, %rsi + movq %rsi, (%rdi) + testb %al, %al + cmovneq %rcx, %rdx + movq %rdx, 8(%rdi) + cmovneq %r10, %rbx + movq %rbx, 16(%rdi) + popq %rbx + popq %r14 + retq + + .globl _mcl_fp_sqr_NIST_P192Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sqr_NIST_P192Lbmi2: ## @mcl_fp_sqr_NIST_P192Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdi, -8(%rsp) ## 8-byte Spill + movq 16(%rsi), %r8 + movq (%rsi), %rcx + movq 8(%rsi), %rsi + movq %r8, %rdx + mulxq %rsi, %r14, %rbx + movq %rbx, -16(%rsp) ## 8-byte Spill + movq %rsi, %rdx + mulxq %rsi, %r13, %r15 + mulxq %rcx, %r12, %rsi + addq %rsi, %r13 + adcq %r14, %r15 + adcq $0, %rbx + movq %rcx, %rdx + mulxq %rcx, %r9, %rax + addq %r12, %rax + movq %r8, %rdx + mulxq %rcx, %rbp, %r11 + adcq %rbp, %rsi + movq %r11, %r10 + adcq $0, %r10 + addq %r12, %rax + adcq %r13, %rsi + adcq %r15, %r10 + adcq $0, %rbx + mulxq %r8, %rcx, %rdi + addq %r14, %r11 + adcq -16(%rsp), %rcx ## 8-byte Folded Reload + adcq $0, %rdi + addq %rbp, %rsi + adcq %r10, %r11 + adcq %rbx, %rcx + adcq $0, %rdi + addq %rdi, %rax + adcq $0, %rsi + sbbq %rdx, %rdx + andl $1, %edx + addq %r11, %r9 + adcq %rcx, %rax + adcq %rdi, %rsi + adcq $0, %rdx + addq %rdi, %r9 + adcq %r11, %rax + adcq %rcx, %rsi + adcq $0, %rdx + addq %rdx, %r9 + adcq %rax, %rdx + adcq $0, %rsi + sbbq %rax, %rax + andl $1, %eax + movq %r9, %rcx + addq $1, %rcx + movq %rdx, %rdi + adcq $1, %rdi + movq %rsi, %rbp + adcq $0, %rbp + adcq $-1, %rax + andl $1, %eax + cmovneq %r9, %rcx + movq -8(%rsp), %rbx ## 8-byte Reload + movq %rcx, (%rbx) + testb %al, %al + cmovneq %rdx, %rdi + movq %rdi, 8(%rbx) + cmovneq %rsi, %rbp + movq %rbp, 16(%rbx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mulNIST_P192Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulNIST_P192Lbmi2: ## @mcl_fp_mulNIST_P192Lbmi2 +## BB#0: + pushq %r14 + pushq %rbx + subq $56, %rsp + movq %rdi, %r14 + leaq 8(%rsp), %rdi + callq _mcl_fpDbl_mulPre3Lbmi2 + movq 24(%rsp), %r9 + movq 32(%rsp), %r8 + movq 48(%rsp), %rdi + movq 16(%rsp), %rbx + addq %rdi, %rbx + adcq $0, %r9 + sbbq %rcx, %rcx + andl $1, %ecx + movq 40(%rsp), %rsi + movq 8(%rsp), %rdx + addq %r8, %rdx + adcq %rsi, %rbx + adcq %rdi, %r9 + adcq $0, %rcx + addq %rdi, %rdx + adcq %r8, %rbx + adcq %rsi, %r9 + adcq $0, %rcx + addq %rcx, %rdx + adcq %rbx, %rcx + adcq $0, %r9 + sbbq %rsi, %rsi + andl $1, %esi + movq %rdx, %rdi + addq $1, %rdi + movq %rcx, %rbx + adcq $1, %rbx + movq %r9, %rax + adcq $0, %rax + adcq $-1, %rsi + andl $1, %esi + cmovneq %rdx, %rdi + movq %rdi, (%r14) + testb %sil, %sil + cmovneq %rcx, %rbx + movq %rbx, 8(%r14) + cmovneq %r9, %rax + movq %rax, 16(%r14) + addq $56, %rsp + popq %rbx + popq %r14 + retq + + .globl _mcl_fpDbl_mod_NIST_P521Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mod_NIST_P521Lbmi2: ## @mcl_fpDbl_mod_NIST_P521Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + movq 120(%rsi), %r9 + movq 128(%rsi), %r14 + movq %r14, %r8 + shldq $55, %r9, %r8 + movq 112(%rsi), %r10 + shldq $55, %r10, %r9 + movq 104(%rsi), %r11 + shldq $55, %r11, %r10 + movq 96(%rsi), %r15 + shldq $55, %r15, %r11 + movq 88(%rsi), %r12 + shldq $55, %r12, %r15 + movq 80(%rsi), %rcx + shldq $55, %rcx, %r12 + movq 64(%rsi), %rbx + movq 72(%rsi), %rax + shldq $55, %rax, %rcx + shrq $9, %r14 + shldq $55, %rbx, %rax + ## kill: %EBX<def> %EBX<kill> %RBX<kill> %RBX<def> + andl $511, %ebx ## imm = 0x1FF + addq (%rsi), %rax + adcq 8(%rsi), %rcx + adcq 16(%rsi), %r12 + adcq 24(%rsi), %r15 + adcq 32(%rsi), %r11 + adcq 40(%rsi), %r10 + adcq 48(%rsi), %r9 + adcq 56(%rsi), %r8 + adcq %r14, %rbx + movl %ebx, %esi + shrl $9, %esi + andl $1, %esi + addq %rax, %rsi + adcq $0, %rcx + adcq $0, %r12 + adcq $0, %r15 + adcq $0, %r11 + adcq $0, %r10 + adcq $0, %r9 + adcq $0, %r8 + adcq $0, %rbx + movq %rsi, %rax + andq %r12, %rax + andq %r15, %rax + andq %r11, %rax + andq %r10, %rax + andq %r9, %rax + andq %r8, %rax + movq %rbx, %rdx + orq $-512, %rdx ## imm = 0xFE00 + andq %rax, %rdx + andq %rcx, %rdx + cmpq $-1, %rdx + je LBB4_1 +## BB#3: ## %nonzero + movq %rsi, (%rdi) + movq %rcx, 8(%rdi) + movq %r12, 16(%rdi) + movq %r15, 24(%rdi) + movq %r11, 32(%rdi) + movq %r10, 40(%rdi) + movq %r9, 48(%rdi) + movq %r8, 56(%rdi) + andl $511, %ebx ## imm = 0x1FF + movq %rbx, 64(%rdi) + jmp LBB4_2 +LBB4_1: ## %zero + movq $0, 64(%rdi) + movq $0, 56(%rdi) + movq $0, 48(%rdi) + movq $0, 40(%rdi) + movq $0, 32(%rdi) + movq $0, 24(%rdi) + movq $0, 16(%rdi) + movq $0, 8(%rdi) + movq $0, (%rdi) +LBB4_2: ## %zero + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_mulUnitPre1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre1Lbmi2: ## @mcl_fp_mulUnitPre1Lbmi2 +## BB#0: + mulxq (%rsi), %rcx, %rax + movq %rcx, (%rdi) + movq %rax, 8(%rdi) + retq + + .globl _mcl_fpDbl_mulPre1Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre1Lbmi2: ## @mcl_fpDbl_mulPre1Lbmi2 +## BB#0: + movq (%rdx), %rdx + mulxq (%rsi), %rcx, %rax + movq %rcx, (%rdi) + movq %rax, 8(%rdi) + retq + + .globl _mcl_fpDbl_sqrPre1Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre1Lbmi2: ## @mcl_fpDbl_sqrPre1Lbmi2 +## BB#0: + movq (%rsi), %rdx + mulxq %rdx, %rcx, %rax + movq %rcx, (%rdi) + movq %rax, 8(%rdi) + retq + + .globl _mcl_fp_mont1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont1Lbmi2: ## @mcl_fp_mont1Lbmi2 +## BB#0: + movq %rdx, %rax + movq (%rsi), %rdx + mulxq (%rax), %rsi, %r8 + movq -8(%rcx), %rdx + imulq %rsi, %rdx + movq (%rcx), %rcx + mulxq %rcx, %rdx, %rax + addq %rsi, %rdx + adcq %r8, %rax + sbbq %rdx, %rdx + andl $1, %edx + movq %rax, %rsi + subq %rcx, %rsi + sbbq $0, %rdx + testb $1, %dl + cmovneq %rax, %rsi + movq %rsi, (%rdi) + retq + + .globl _mcl_fp_montNF1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF1Lbmi2: ## @mcl_fp_montNF1Lbmi2 +## BB#0: + movq %rdx, %rax + movq (%rsi), %rdx + mulxq (%rax), %rsi, %r8 + movq -8(%rcx), %rdx + imulq %rsi, %rdx + movq (%rcx), %rcx + mulxq %rcx, %rdx, %rax + addq %rsi, %rdx + adcq %r8, %rax + movq %rax, %rdx + subq %rcx, %rdx + cmovsq %rax, %rdx + movq %rdx, (%rdi) + retq + + .globl _mcl_fp_montRed1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed1Lbmi2: ## @mcl_fp_montRed1Lbmi2 +## BB#0: + movq (%rsi), %rcx + movq -8(%rdx), %rax + imulq %rcx, %rax + movq (%rdx), %r8 + movq %rax, %rdx + mulxq %r8, %rax, %rdx + addq %rcx, %rax + adcq 8(%rsi), %rdx + sbbq %rax, %rax + andl $1, %eax + movq %rdx, %rcx + subq %r8, %rcx + sbbq $0, %rax + testb $1, %al + cmovneq %rdx, %rcx + movq %rcx, (%rdi) + retq + + .globl _mcl_fp_addPre1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre1Lbmi2: ## @mcl_fp_addPre1Lbmi2 +## BB#0: + movq (%rdx), %rax + addq (%rsi), %rax + movq %rax, (%rdi) + sbbq %rax, %rax + andl $1, %eax + retq + + .globl _mcl_fp_subPre1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre1Lbmi2: ## @mcl_fp_subPre1Lbmi2 +## BB#0: + movq (%rsi), %rcx + xorl %eax, %eax + subq (%rdx), %rcx + movq %rcx, (%rdi) + sbbq $0, %rax + andl $1, %eax + retq + + .globl _mcl_fp_shr1_1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_1Lbmi2: ## @mcl_fp_shr1_1Lbmi2 +## BB#0: + movq (%rsi), %rax + shrq %rax + movq %rax, (%rdi) + retq + + .globl _mcl_fp_add1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add1Lbmi2: ## @mcl_fp_add1Lbmi2 +## BB#0: + movq (%rdx), %rax + addq (%rsi), %rax + movq %rax, (%rdi) + sbbq %rdx, %rdx + andl $1, %edx + subq (%rcx), %rax + sbbq $0, %rdx + testb $1, %dl + jne LBB14_2 +## BB#1: ## %nocarry + movq %rax, (%rdi) +LBB14_2: ## %carry + retq + + .globl _mcl_fp_addNF1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF1Lbmi2: ## @mcl_fp_addNF1Lbmi2 +## BB#0: + movq (%rdx), %rax + addq (%rsi), %rax + movq %rax, %rdx + subq (%rcx), %rdx + cmovsq %rax, %rdx + movq %rdx, (%rdi) + retq + + .globl _mcl_fp_sub1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub1Lbmi2: ## @mcl_fp_sub1Lbmi2 +## BB#0: + movq (%rsi), %rax + xorl %esi, %esi + subq (%rdx), %rax + movq %rax, (%rdi) + sbbq $0, %rsi + testb $1, %sil + jne LBB16_2 +## BB#1: ## %nocarry + retq +LBB16_2: ## %carry + addq (%rcx), %rax + movq %rax, (%rdi) + retq + + .globl _mcl_fp_subNF1Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF1Lbmi2: ## @mcl_fp_subNF1Lbmi2 +## BB#0: + movq (%rsi), %rax + subq (%rdx), %rax + movq %rax, %rdx + sarq $63, %rdx + andq (%rcx), %rdx + addq %rax, %rdx + movq %rdx, (%rdi) + retq + + .globl _mcl_fpDbl_add1Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add1Lbmi2: ## @mcl_fpDbl_add1Lbmi2 +## BB#0: + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + movq %rax, (%rdi) + sbbq %rax, %rax + andl $1, %eax + movq %rdx, %rsi + subq (%rcx), %rsi + sbbq $0, %rax + testb $1, %al + cmovneq %rdx, %rsi + movq %rsi, 8(%rdi) + retq + + .globl _mcl_fpDbl_sub1Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub1Lbmi2: ## @mcl_fpDbl_sub1Lbmi2 +## BB#0: + movq (%rsi), %rax + movq 8(%rsi), %r8 + xorl %esi, %esi + subq (%rdx), %rax + sbbq 8(%rdx), %r8 + movq %rax, (%rdi) + movl $0, %eax + sbbq $0, %rax + testb $1, %al + cmovneq (%rcx), %rsi + addq %r8, %rsi + movq %rsi, 8(%rdi) + retq + + .globl _mcl_fp_mulUnitPre2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre2Lbmi2: ## @mcl_fp_mulUnitPre2Lbmi2 +## BB#0: + mulxq 8(%rsi), %rax, %rcx + mulxq (%rsi), %rdx, %rsi + movq %rdx, (%rdi) + addq %rax, %rsi + movq %rsi, 8(%rdi) + adcq $0, %rcx + movq %rcx, 16(%rdi) + retq + + .globl _mcl_fpDbl_mulPre2Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre2Lbmi2: ## @mcl_fpDbl_mulPre2Lbmi2 +## BB#0: + movq %rdx, %r10 + movq (%rsi), %r11 + movq 8(%rsi), %r8 + movq (%r10), %rsi + movq %r11, %rdx + mulxq %rsi, %rdx, %r9 + movq %rdx, (%rdi) + movq %r8, %rdx + mulxq %rsi, %rsi, %rax + addq %r9, %rsi + adcq $0, %rax + movq 8(%r10), %rcx + movq %r11, %rdx + mulxq %rcx, %rdx, %r9 + addq %rsi, %rdx + movq %rdx, 8(%rdi) + movq %r8, %rdx + mulxq %rcx, %rdx, %rcx + adcq %rax, %rdx + sbbq %rax, %rax + andl $1, %eax + addq %r9, %rdx + movq %rdx, 16(%rdi) + adcq %rcx, %rax + movq %rax, 24(%rdi) + retq + + .globl _mcl_fpDbl_sqrPre2Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre2Lbmi2: ## @mcl_fpDbl_sqrPre2Lbmi2 +## BB#0: + movq (%rsi), %rax + movq 8(%rsi), %rcx + movq %rax, %rdx + mulxq %rax, %rdx, %rsi + movq %rdx, (%rdi) + movq %rcx, %rdx + mulxq %rax, %rdx, %r8 + addq %rdx, %rsi + movq %r8, %rax + adcq $0, %rax + addq %rdx, %rsi + movq %rsi, 8(%rdi) + movq %rcx, %rdx + mulxq %rcx, %rdx, %rcx + adcq %rax, %rdx + sbbq %rax, %rax + andl $1, %eax + addq %r8, %rdx + movq %rdx, 16(%rdi) + adcq %rcx, %rax + movq %rax, 24(%rdi) + retq + + .globl _mcl_fp_mont2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont2Lbmi2: ## @mcl_fp_mont2Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq (%rdx), %rax + movq 8(%rdx), %r11 + movq %r9, %rdx + mulxq %rax, %r10, %r13 + movq %r8, %rdx + mulxq %rax, %r14, %rsi + addq %r10, %rsi + adcq $0, %r13 + movq -8(%rcx), %rbp + movq (%rcx), %r10 + movq %r14, %rdx + imulq %rbp, %rdx + movq 8(%rcx), %r15 + mulxq %r15, %r12, %rcx + mulxq %r10, %rdx, %rbx + addq %r12, %rbx + adcq $0, %rcx + addq %r14, %rdx + adcq %rsi, %rbx + adcq %r13, %rcx + sbbq %rsi, %rsi + andl $1, %esi + movq %r11, %rdx + mulxq %r9, %r9, %r14 + movq %r11, %rdx + mulxq %r8, %r8, %rax + addq %r9, %rax + adcq $0, %r14 + addq %rbx, %r8 + adcq %rcx, %rax + adcq %rsi, %r14 + sbbq %rsi, %rsi + andl $1, %esi + imulq %r8, %rbp + movq %rbp, %rdx + mulxq %r15, %rcx, %rbx + mulxq %r10, %rdx, %rbp + addq %rcx, %rbp + adcq $0, %rbx + addq %r8, %rdx + adcq %rax, %rbp + adcq %r14, %rbx + adcq $0, %rsi + movq %rbp, %rax + subq %r10, %rax + movq %rbx, %rcx + sbbq %r15, %rcx + sbbq $0, %rsi + andl $1, %esi + cmovneq %rbx, %rcx + testb %sil, %sil + cmovneq %rbp, %rax + movq %rax, (%rdi) + movq %rcx, 8(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF2Lbmi2: ## @mcl_fp_montNF2Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq (%rdx), %rax + movq 8(%rdx), %r11 + movq %r9, %rdx + mulxq %rax, %r10, %rsi + movq %r8, %rdx + mulxq %rax, %r15, %r13 + addq %r10, %r13 + adcq $0, %rsi + movq -8(%rcx), %rbp + movq (%rcx), %r10 + movq %r15, %rdx + imulq %rbp, %rdx + movq 8(%rcx), %r14 + mulxq %r10, %rcx, %r12 + addq %r15, %rcx + mulxq %r14, %rbx, %rcx + adcq %r13, %rbx + adcq $0, %rsi + addq %r12, %rbx + adcq %rcx, %rsi + movq %r11, %rdx + mulxq %r9, %r9, %rcx + movq %r11, %rdx + mulxq %r8, %r8, %rax + addq %r9, %rax + adcq $0, %rcx + addq %rbx, %r8 + adcq %rsi, %rax + adcq $0, %rcx + imulq %r8, %rbp + movq %rbp, %rdx + mulxq %r14, %rbx, %rsi + mulxq %r10, %rbp, %rdx + addq %r8, %rbp + adcq %rax, %rbx + adcq $0, %rcx + addq %rdx, %rbx + adcq %rsi, %rcx + movq %rbx, %rax + subq %r10, %rax + movq %rcx, %rdx + sbbq %r14, %rdx + cmovsq %rbx, %rax + movq %rax, (%rdi) + cmovsq %rcx, %rdx + movq %rdx, 8(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed2Lbmi2: ## @mcl_fp_montRed2Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %rbx + movq -8(%rdx), %r15 + movq (%rdx), %r8 + movq (%rsi), %r10 + movq %r10, %rcx + imulq %r15, %rcx + movq 8(%rdx), %r9 + movq %rcx, %rdx + mulxq %r9, %r11, %r14 + mulxq %r8, %rcx, %rax + addq %r11, %rax + adcq $0, %r14 + movq 24(%rsi), %r11 + addq %r10, %rcx + adcq 8(%rsi), %rax + adcq 16(%rsi), %r14 + adcq $0, %r11 + sbbq %rcx, %rcx + andl $1, %ecx + imulq %rax, %r15 + movq %r15, %rdx + mulxq %r9, %r10, %rbx + mulxq %r8, %rsi, %rdx + addq %r10, %rdx + adcq $0, %rbx + addq %rax, %rsi + adcq %r14, %rdx + adcq %r11, %rbx + adcq $0, %rcx + movq %rdx, %rax + subq %r8, %rax + movq %rbx, %rsi + sbbq %r9, %rsi + sbbq $0, %rcx + andl $1, %ecx + cmovneq %rbx, %rsi + testb %cl, %cl + cmovneq %rdx, %rax + movq %rax, (%rdi) + movq %rsi, 8(%rdi) + popq %rbx + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_addPre2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre2Lbmi2: ## @mcl_fp_addPre2Lbmi2 +## BB#0: + movq (%rdx), %rax + movq 8(%rdx), %rcx + addq (%rsi), %rax + adcq 8(%rsi), %rcx + movq %rax, (%rdi) + movq %rcx, 8(%rdi) + sbbq %rax, %rax + andl $1, %eax + retq + + .globl _mcl_fp_subPre2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre2Lbmi2: ## @mcl_fp_subPre2Lbmi2 +## BB#0: + movq (%rsi), %rcx + movq 8(%rsi), %rsi + xorl %eax, %eax + subq (%rdx), %rcx + sbbq 8(%rdx), %rsi + movq %rcx, (%rdi) + movq %rsi, 8(%rdi) + sbbq $0, %rax + andl $1, %eax + retq + + .globl _mcl_fp_shr1_2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_2Lbmi2: ## @mcl_fp_shr1_2Lbmi2 +## BB#0: + movq (%rsi), %rax + movq 8(%rsi), %rcx + shrdq $1, %rcx, %rax + movq %rax, (%rdi) + shrq %rcx + movq %rcx, 8(%rdi) + retq + + .globl _mcl_fp_add2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add2Lbmi2: ## @mcl_fp_add2Lbmi2 +## BB#0: + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + sbbq %rsi, %rsi + andl $1, %esi + subq (%rcx), %rax + sbbq 8(%rcx), %rdx + sbbq $0, %rsi + testb $1, %sil + jne LBB29_2 +## BB#1: ## %nocarry + movq %rax, (%rdi) + movq %rdx, 8(%rdi) +LBB29_2: ## %carry + retq + + .globl _mcl_fp_addNF2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF2Lbmi2: ## @mcl_fp_addNF2Lbmi2 +## BB#0: + movq (%rdx), %rax + movq 8(%rdx), %r8 + addq (%rsi), %rax + adcq 8(%rsi), %r8 + movq %rax, %rsi + subq (%rcx), %rsi + movq %r8, %rdx + sbbq 8(%rcx), %rdx + testq %rdx, %rdx + cmovsq %rax, %rsi + movq %rsi, (%rdi) + cmovsq %r8, %rdx + movq %rdx, 8(%rdi) + retq + + .globl _mcl_fp_sub2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub2Lbmi2: ## @mcl_fp_sub2Lbmi2 +## BB#0: + movq (%rsi), %rax + movq 8(%rsi), %r8 + xorl %esi, %esi + subq (%rdx), %rax + sbbq 8(%rdx), %r8 + movq %rax, (%rdi) + movq %r8, 8(%rdi) + sbbq $0, %rsi + testb $1, %sil + jne LBB31_2 +## BB#1: ## %nocarry + retq +LBB31_2: ## %carry + movq 8(%rcx), %rdx + addq (%rcx), %rax + movq %rax, (%rdi) + adcq %r8, %rdx + movq %rdx, 8(%rdi) + retq + + .globl _mcl_fp_subNF2Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF2Lbmi2: ## @mcl_fp_subNF2Lbmi2 +## BB#0: + movq (%rsi), %r8 + movq 8(%rsi), %rsi + subq (%rdx), %r8 + sbbq 8(%rdx), %rsi + movq %rsi, %rdx + sarq $63, %rdx + movq 8(%rcx), %rax + andq %rdx, %rax + andq (%rcx), %rdx + addq %r8, %rdx + movq %rdx, (%rdi) + adcq %rsi, %rax + movq %rax, 8(%rdi) + retq + + .globl _mcl_fpDbl_add2Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add2Lbmi2: ## @mcl_fpDbl_add2Lbmi2 +## BB#0: + movq 24(%rdx), %r8 + movq 24(%rsi), %r9 + movq 16(%rdx), %r10 + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %r10 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + adcq %r8, %r9 + sbbq %rax, %rax + andl $1, %eax + movq %r10, %rdx + subq (%rcx), %rdx + movq %r9, %rsi + sbbq 8(%rcx), %rsi + sbbq $0, %rax + andl $1, %eax + cmovneq %r10, %rdx + movq %rdx, 16(%rdi) + testb %al, %al + cmovneq %r9, %rsi + movq %rsi, 24(%rdi) + retq + + .globl _mcl_fpDbl_sub2Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub2Lbmi2: ## @mcl_fpDbl_sub2Lbmi2 +## BB#0: + movq 24(%rdx), %r8 + movq 24(%rsi), %r9 + movq 16(%rsi), %r10 + movq (%rsi), %r11 + movq 8(%rsi), %rsi + xorl %eax, %eax + subq (%rdx), %r11 + sbbq 8(%rdx), %rsi + sbbq 16(%rdx), %r10 + movq %r11, (%rdi) + movq %rsi, 8(%rdi) + sbbq %r8, %r9 + movl $0, %edx + sbbq $0, %rdx + andl $1, %edx + movq (%rcx), %rsi + cmoveq %rax, %rsi + testb %dl, %dl + cmovneq 8(%rcx), %rax + addq %r10, %rsi + movq %rsi, 16(%rdi) + adcq %r9, %rax + movq %rax, 24(%rdi) + retq + + .globl _mcl_fp_mulUnitPre3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre3Lbmi2: ## @mcl_fp_mulUnitPre3Lbmi2 +## BB#0: + mulxq 16(%rsi), %r8, %rcx + mulxq 8(%rsi), %r9, %rax + mulxq (%rsi), %rdx, %rsi + movq %rdx, (%rdi) + addq %r9, %rsi + movq %rsi, 8(%rdi) + adcq %r8, %rax + movq %rax, 16(%rdi) + adcq $0, %rcx + movq %rcx, 24(%rdi) + retq + + .globl _mcl_fpDbl_mulPre3Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre3Lbmi2: ## @mcl_fpDbl_mulPre3Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %rbx + movq %rdx, %r9 + movq (%rsi), %r10 + movq 8(%rsi), %r8 + movq (%r9), %rax + movq %r10, %rdx + mulxq %rax, %rdx, %r14 + movq 16(%rsi), %r11 + movq %rdx, (%rdi) + movq %r11, %rdx + mulxq %rax, %rsi, %rbx + movq %r8, %rdx + mulxq %rax, %rax, %rcx + addq %r14, %rax + adcq %rsi, %rcx + adcq $0, %rbx + movq 8(%r9), %rsi + movq %r10, %rdx + mulxq %rsi, %rdx, %r14 + addq %rax, %rdx + movq %rdx, 8(%rdi) + movq %r11, %rdx + mulxq %rsi, %rax, %r15 + movq %r8, %rdx + mulxq %rsi, %rsi, %rdx + adcq %rcx, %rsi + adcq %rbx, %rax + sbbq %rcx, %rcx + andl $1, %ecx + addq %r14, %rsi + adcq %rdx, %rax + adcq %r15, %rcx + movq 16(%r9), %rbx + movq %r10, %rdx + mulxq %rbx, %rdx, %r9 + addq %rsi, %rdx + movq %rdx, 16(%rdi) + movq %r11, %rdx + mulxq %rbx, %rsi, %r10 + movq %r8, %rdx + mulxq %rbx, %rbx, %rdx + adcq %rax, %rbx + adcq %rcx, %rsi + sbbq %rax, %rax + andl $1, %eax + addq %r9, %rbx + movq %rbx, 24(%rdi) + adcq %rdx, %rsi + movq %rsi, 32(%rdi) + adcq %r10, %rax + movq %rax, 40(%rdi) + popq %rbx + popq %r14 + popq %r15 + retq + + .globl _mcl_fpDbl_sqrPre3Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre3Lbmi2: ## @mcl_fpDbl_sqrPre3Lbmi2 +## BB#0: + pushq %r14 + pushq %rbx + movq 16(%rsi), %r10 + movq (%rsi), %rcx + movq 8(%rsi), %rsi + movq %rcx, %rdx + mulxq %rcx, %rdx, %rax + movq %rdx, (%rdi) + movq %r10, %rdx + mulxq %rcx, %r11, %r8 + movq %rsi, %rdx + mulxq %rcx, %rdx, %r14 + addq %rdx, %rax + movq %r14, %rbx + adcq %r11, %rbx + movq %r8, %rcx + adcq $0, %rcx + addq %rdx, %rax + movq %rax, 8(%rdi) + movq %r10, %rdx + mulxq %rsi, %rax, %r9 + movq %rsi, %rdx + mulxq %rsi, %rsi, %rdx + adcq %rbx, %rsi + adcq %rax, %rcx + sbbq %rbx, %rbx + andl $1, %ebx + addq %r14, %rsi + adcq %rdx, %rcx + adcq %r9, %rbx + addq %r11, %rsi + movq %rsi, 16(%rdi) + movq %r10, %rdx + mulxq %r10, %rsi, %rdx + adcq %rax, %rcx + adcq %rbx, %rsi + sbbq %rax, %rax + andl $1, %eax + addq %r8, %rcx + movq %rcx, 24(%rdi) + adcq %r9, %rsi + movq %rsi, 32(%rdi) + adcq %rdx, %rax + movq %rax, 40(%rdi) + popq %rbx + popq %r14 + retq + + .globl _mcl_fp_mont3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont3Lbmi2: ## @mcl_fp_mont3Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, %r14 + movq %rdi, -8(%rsp) ## 8-byte Spill + movq 16(%rsi), %r12 + movq (%r14), %rax + movq %r14, -16(%rsp) ## 8-byte Spill + movq %r12, %rdx + movq %r12, -24(%rsp) ## 8-byte Spill + mulxq %rax, %r11, %rbp + movq (%rsi), %r15 + movq 8(%rsi), %rdx + movq %rdx, -48(%rsp) ## 8-byte Spill + mulxq %rax, %rbx, %r8 + movq %r15, %rdx + movq %r15, -32(%rsp) ## 8-byte Spill + mulxq %rax, %r9, %rdi + addq %rbx, %rdi + adcq %r11, %r8 + adcq $0, %rbp + movq -8(%rcx), %r13 + movq %r9, %rdx + imulq %r13, %rdx + movq 8(%rcx), %rax + movq %rax, -56(%rsp) ## 8-byte Spill + mulxq %rax, %r11, %r10 + movq (%rcx), %rax + movq %rax, -64(%rsp) ## 8-byte Spill + mulxq %rax, %rsi, %rbx + addq %r11, %rbx + movq 16(%rcx), %rax + movq %rax, -40(%rsp) ## 8-byte Spill + mulxq %rax, %rcx, %rax + adcq %r10, %rcx + adcq $0, %rax + addq %r9, %rsi + adcq %rdi, %rbx + movq 8(%r14), %rdx + adcq %r8, %rcx + adcq %rbp, %rax + sbbq %r9, %r9 + andl $1, %r9d + mulxq %r12, %r11, %rdi + movq -48(%rsp), %r12 ## 8-byte Reload + mulxq %r12, %r10, %rsi + mulxq %r15, %r8, %rbp + addq %r10, %rbp + adcq %r11, %rsi + adcq $0, %rdi + addq %rbx, %r8 + adcq %rcx, %rbp + adcq %rax, %rsi + adcq %r9, %rdi + sbbq %r11, %r11 + andl $1, %r11d + movq %r8, %rdx + imulq %r13, %rdx + movq -40(%rsp), %r14 ## 8-byte Reload + mulxq %r14, %r9, %rcx + mulxq -56(%rsp), %r10, %rax ## 8-byte Folded Reload + mulxq -64(%rsp), %rdx, %rbx ## 8-byte Folded Reload + addq %r10, %rbx + adcq %r9, %rax + adcq $0, %rcx + addq %r8, %rdx + adcq %rbp, %rbx + adcq %rsi, %rax + adcq %rdi, %rcx + adcq $0, %r11 + movq -16(%rsp), %rdx ## 8-byte Reload + movq 16(%rdx), %rdx + mulxq -24(%rsp), %r9, %rsi ## 8-byte Folded Reload + mulxq %r12, %r10, %r15 + mulxq -32(%rsp), %r8, %rdi ## 8-byte Folded Reload + addq %r10, %rdi + adcq %r9, %r15 + adcq $0, %rsi + addq %rbx, %r8 + adcq %rax, %rdi + adcq %rcx, %r15 + adcq %r11, %rsi + sbbq %rbx, %rbx + andl $1, %ebx + imulq %r8, %r13 + movq %r13, %rdx + mulxq %r14, %r9, %rbp + movq %r14, %r12 + movq -56(%rsp), %r14 ## 8-byte Reload + mulxq %r14, %r10, %rax + movq -64(%rsp), %rcx ## 8-byte Reload + mulxq %rcx, %r11, %rdx + addq %r10, %rdx + adcq %r9, %rax + adcq $0, %rbp + addq %r8, %r11 + adcq %rdi, %rdx + adcq %r15, %rax + adcq %rsi, %rbp + adcq $0, %rbx + movq %rdx, %rsi + subq %rcx, %rsi + movq %rax, %rdi + sbbq %r14, %rdi + movq %rbp, %rcx + sbbq %r12, %rcx + sbbq $0, %rbx + andl $1, %ebx + cmovneq %rbp, %rcx + testb %bl, %bl + cmovneq %rdx, %rsi + movq -8(%rsp), %rdx ## 8-byte Reload + movq %rsi, (%rdx) + cmovneq %rax, %rdi + movq %rdi, 8(%rdx) + movq %rcx, 16(%rdx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF3Lbmi2: ## @mcl_fp_montNF3Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r8 + movq %rdx, %r10 + movq %rdi, -8(%rsp) ## 8-byte Spill + movq (%rsi), %rcx + movq 8(%rsi), %rdi + movq %rdi, -32(%rsp) ## 8-byte Spill + movq (%r10), %rax + movq %r10, -16(%rsp) ## 8-byte Spill + movq %rdi, %rdx + mulxq %rax, %rbx, %r14 + movq %rcx, %rdx + movq %rcx, -24(%rsp) ## 8-byte Spill + mulxq %rax, %r15, %r12 + movq 16(%rsi), %r11 + addq %rbx, %r12 + movq %r11, %rdx + mulxq %rax, %rsi, %rbx + adcq %r14, %rsi + adcq $0, %rbx + movq -8(%r8), %r9 + movq (%r8), %r14 + movq %r15, %rdx + imulq %r9, %rdx + mulxq %r14, %rbp, %r13 + addq %r15, %rbp + movq 8(%r8), %r15 + mulxq %r15, %rdi, %rbp + adcq %r12, %rdi + movq 16(%r8), %r12 + mulxq %r12, %rax, %r8 + adcq %rsi, %rax + adcq $0, %rbx + addq %r13, %rdi + movq 8(%r10), %rdx + adcq %rbp, %rax + adcq %r8, %rbx + movq -32(%rsp), %r10 ## 8-byte Reload + mulxq %r10, %rsi, %r8 + mulxq %rcx, %r13, %rbp + addq %rsi, %rbp + mulxq %r11, %rcx, %rsi + adcq %r8, %rcx + adcq $0, %rsi + addq %rdi, %r13 + adcq %rax, %rbp + adcq %rbx, %rcx + adcq $0, %rsi + movq %r13, %rdx + imulq %r9, %rdx + mulxq %r14, %rdi, %rbx + addq %r13, %rdi + mulxq %r15, %rax, %rdi + adcq %rbp, %rax + mulxq %r12, %rbp, %rdx + adcq %rcx, %rbp + adcq $0, %rsi + addq %rbx, %rax + adcq %rdi, %rbp + adcq %rdx, %rsi + movq -16(%rsp), %rcx ## 8-byte Reload + movq 16(%rcx), %rdx + mulxq %r10, %rbx, %r8 + mulxq -24(%rsp), %r10, %rdi ## 8-byte Folded Reload + addq %rbx, %rdi + mulxq %r11, %rcx, %rbx + adcq %r8, %rcx + adcq $0, %rbx + addq %rax, %r10 + adcq %rbp, %rdi + adcq %rsi, %rcx + adcq $0, %rbx + imulq %r10, %r9 + movq %r9, %rdx + mulxq %r14, %rdx, %r8 + addq %r10, %rdx + movq %r9, %rdx + mulxq %r12, %rbp, %rsi + mulxq %r15, %rax, %rdx + adcq %rdi, %rax + adcq %rcx, %rbp + adcq $0, %rbx + addq %r8, %rax + adcq %rdx, %rbp + adcq %rsi, %rbx + movq %rax, %rcx + subq %r14, %rcx + movq %rbp, %rdx + sbbq %r15, %rdx + movq %rbx, %rsi + sbbq %r12, %rsi + movq %rsi, %rdi + sarq $63, %rdi + cmovsq %rax, %rcx + movq -8(%rsp), %rax ## 8-byte Reload + movq %rcx, (%rax) + cmovsq %rbp, %rdx + movq %rdx, 8(%rax) + cmovsq %rbx, %rsi + movq %rsi, 16(%rax) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed3Lbmi2: ## @mcl_fp_montRed3Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, %rcx + movq %rdi, -8(%rsp) ## 8-byte Spill + movq -8(%rcx), %r15 + movq (%rcx), %r9 + movq (%rsi), %rbx + movq %rbx, %rdx + imulq %r15, %rdx + movq 16(%rcx), %rax + mulxq %rax, %r14, %r11 + movq %rax, %rbp + movq %rbp, -16(%rsp) ## 8-byte Spill + movq 8(%rcx), %r10 + mulxq %r10, %rax, %r13 + mulxq %r9, %rdx, %rcx + addq %rax, %rcx + adcq %r14, %r13 + adcq $0, %r11 + movq 40(%rsi), %r14 + movq 32(%rsi), %r12 + addq %rbx, %rdx + adcq 8(%rsi), %rcx + adcq 16(%rsi), %r13 + adcq 24(%rsi), %r11 + adcq $0, %r12 + adcq $0, %r14 + sbbq %rsi, %rsi + andl $1, %esi + movq %rcx, %rdx + imulq %r15, %rdx + mulxq %rbp, %rbp, %rdi + mulxq %r10, %r8, %rbx + mulxq %r9, %rdx, %rax + addq %r8, %rax + adcq %rbp, %rbx + adcq $0, %rdi + addq %rcx, %rdx + adcq %r13, %rax + adcq %r11, %rbx + adcq %r12, %rdi + adcq $0, %r14 + adcq $0, %rsi + imulq %rax, %r15 + movq %r15, %rdx + movq -16(%rsp), %r13 ## 8-byte Reload + mulxq %r13, %r8, %rcx + movq %r15, %rdx + mulxq %r10, %r11, %r12 + mulxq %r9, %r15, %rdx + addq %r11, %rdx + adcq %r8, %r12 + adcq $0, %rcx + addq %rax, %r15 + adcq %rbx, %rdx + adcq %rdi, %r12 + adcq %r14, %rcx + adcq $0, %rsi + movq %rdx, %rax + subq %r9, %rax + movq %r12, %rdi + sbbq %r10, %rdi + movq %rcx, %rbp + sbbq %r13, %rbp + sbbq $0, %rsi + andl $1, %esi + cmovneq %rcx, %rbp + testb %sil, %sil + cmovneq %rdx, %rax + movq -8(%rsp), %rcx ## 8-byte Reload + movq %rax, (%rcx) + cmovneq %r12, %rdi + movq %rdi, 8(%rcx) + movq %rbp, 16(%rcx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_addPre3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre3Lbmi2: ## @mcl_fp_addPre3Lbmi2 +## BB#0: + movq 16(%rdx), %rax + movq (%rdx), %rcx + movq 8(%rdx), %rdx + addq (%rsi), %rcx + adcq 8(%rsi), %rdx + adcq 16(%rsi), %rax + movq %rcx, (%rdi) + movq %rdx, 8(%rdi) + movq %rax, 16(%rdi) + sbbq %rax, %rax + andl $1, %eax + retq + + .globl _mcl_fp_subPre3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre3Lbmi2: ## @mcl_fp_subPre3Lbmi2 +## BB#0: + movq 16(%rsi), %r8 + movq (%rsi), %rcx + movq 8(%rsi), %rsi + xorl %eax, %eax + subq (%rdx), %rcx + sbbq 8(%rdx), %rsi + sbbq 16(%rdx), %r8 + movq %rcx, (%rdi) + movq %rsi, 8(%rdi) + movq %r8, 16(%rdi) + sbbq $0, %rax + andl $1, %eax + retq + + .globl _mcl_fp_shr1_3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_3Lbmi2: ## @mcl_fp_shr1_3Lbmi2 +## BB#0: + movq 16(%rsi), %rax + movq (%rsi), %rcx + movq 8(%rsi), %rdx + shrdq $1, %rdx, %rcx + movq %rcx, (%rdi) + shrdq $1, %rax, %rdx + movq %rdx, 8(%rdi) + shrq %rax + movq %rax, 16(%rdi) + retq + + .globl _mcl_fp_add3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add3Lbmi2: ## @mcl_fp_add3Lbmi2 +## BB#0: + movq 16(%rdx), %r8 + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %r8 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) + sbbq %rsi, %rsi + andl $1, %esi + subq (%rcx), %rax + sbbq 8(%rcx), %rdx + sbbq 16(%rcx), %r8 + sbbq $0, %rsi + testb $1, %sil + jne LBB44_2 +## BB#1: ## %nocarry + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) +LBB44_2: ## %carry + retq + + .globl _mcl_fp_addNF3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF3Lbmi2: ## @mcl_fp_addNF3Lbmi2 +## BB#0: + movq 16(%rdx), %r8 + movq (%rdx), %r10 + movq 8(%rdx), %r9 + addq (%rsi), %r10 + adcq 8(%rsi), %r9 + adcq 16(%rsi), %r8 + movq %r10, %rsi + subq (%rcx), %rsi + movq %r9, %rdx + sbbq 8(%rcx), %rdx + movq %r8, %rax + sbbq 16(%rcx), %rax + movq %rax, %rcx + sarq $63, %rcx + cmovsq %r10, %rsi + movq %rsi, (%rdi) + cmovsq %r9, %rdx + movq %rdx, 8(%rdi) + cmovsq %r8, %rax + movq %rax, 16(%rdi) + retq + + .globl _mcl_fp_sub3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub3Lbmi2: ## @mcl_fp_sub3Lbmi2 +## BB#0: + movq 16(%rsi), %r8 + movq (%rsi), %rax + movq 8(%rsi), %r9 + xorl %esi, %esi + subq (%rdx), %rax + sbbq 8(%rdx), %r9 + sbbq 16(%rdx), %r8 + movq %rax, (%rdi) + movq %r9, 8(%rdi) + movq %r8, 16(%rdi) + sbbq $0, %rsi + testb $1, %sil + jne LBB46_2 +## BB#1: ## %nocarry + retq +LBB46_2: ## %carry + movq 8(%rcx), %rdx + movq 16(%rcx), %rsi + addq (%rcx), %rax + movq %rax, (%rdi) + adcq %r9, %rdx + movq %rdx, 8(%rdi) + adcq %r8, %rsi + movq %rsi, 16(%rdi) + retq + + .globl _mcl_fp_subNF3Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF3Lbmi2: ## @mcl_fp_subNF3Lbmi2 +## BB#0: + movq 16(%rsi), %r10 + movq (%rsi), %r8 + movq 8(%rsi), %r9 + subq (%rdx), %r8 + sbbq 8(%rdx), %r9 + sbbq 16(%rdx), %r10 + movq %r10, %rdx + sarq $63, %rdx + movq %rdx, %rsi + shldq $1, %r10, %rsi + andq (%rcx), %rsi + movq 16(%rcx), %rax + andq %rdx, %rax + andq 8(%rcx), %rdx + addq %r8, %rsi + movq %rsi, (%rdi) + adcq %r9, %rdx + movq %rdx, 8(%rdi) + adcq %r10, %rax + movq %rax, 16(%rdi) + retq + + .globl _mcl_fpDbl_add3Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add3Lbmi2: ## @mcl_fpDbl_add3Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %rbx + movq 40(%rdx), %r10 + movq 40(%rsi), %r8 + movq 32(%rdx), %r11 + movq 24(%rdx), %r14 + movq 24(%rsi), %r15 + movq 32(%rsi), %r9 + movq 16(%rdx), %rbx + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %rbx + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %rbx, 16(%rdi) + adcq %r14, %r15 + adcq %r11, %r9 + adcq %r10, %r8 + sbbq %rax, %rax + andl $1, %eax + movq %r15, %rdx + subq (%rcx), %rdx + movq %r9, %rsi + sbbq 8(%rcx), %rsi + movq %r8, %rbx + sbbq 16(%rcx), %rbx + sbbq $0, %rax + andl $1, %eax + cmovneq %r15, %rdx + movq %rdx, 24(%rdi) + testb %al, %al + cmovneq %r9, %rsi + movq %rsi, 32(%rdi) + cmovneq %r8, %rbx + movq %rbx, 40(%rdi) + popq %rbx + popq %r14 + popq %r15 + retq + + .globl _mcl_fpDbl_sub3Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub3Lbmi2: ## @mcl_fpDbl_sub3Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + movq 40(%rdx), %r10 + movq 40(%rsi), %r8 + movq 32(%rsi), %r9 + movq 24(%rsi), %r11 + movq 16(%rsi), %r14 + movq (%rsi), %rbx + movq 8(%rsi), %rax + xorl %esi, %esi + subq (%rdx), %rbx + sbbq 8(%rdx), %rax + movq 24(%rdx), %r15 + movq 32(%rdx), %r12 + sbbq 16(%rdx), %r14 + movq %rbx, (%rdi) + movq %rax, 8(%rdi) + movq %r14, 16(%rdi) + sbbq %r15, %r11 + sbbq %r12, %r9 + sbbq %r10, %r8 + movl $0, %eax + sbbq $0, %rax + andl $1, %eax + movq (%rcx), %rdx + cmoveq %rsi, %rdx + testb %al, %al + movq 16(%rcx), %rax + cmoveq %rsi, %rax + cmovneq 8(%rcx), %rsi + addq %r11, %rdx + movq %rdx, 24(%rdi) + adcq %r9, %rsi + movq %rsi, 32(%rdi) + adcq %r8, %rax + movq %rax, 40(%rdi) + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_mulUnitPre4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre4Lbmi2: ## @mcl_fp_mulUnitPre4Lbmi2 +## BB#0: + mulxq 24(%rsi), %r8, %r11 + mulxq 16(%rsi), %r9, %rax + mulxq 8(%rsi), %r10, %rcx + mulxq (%rsi), %rdx, %rsi + movq %rdx, (%rdi) + addq %r10, %rsi + movq %rsi, 8(%rdi) + adcq %r9, %rcx + movq %rcx, 16(%rdi) + adcq %r8, %rax + movq %rax, 24(%rdi) + adcq $0, %r11 + movq %r11, 32(%rdi) + retq + + .globl _mcl_fpDbl_mulPre4Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre4Lbmi2: ## @mcl_fpDbl_mulPre4Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq (%rsi), %r14 + movq 8(%rsi), %r10 + movq (%rdx), %rcx + movq %rdx, %rbp + movq %r14, %rdx + mulxq %rcx, %rdx, %r15 + movq 24(%rsi), %r11 + movq 16(%rsi), %r9 + movq %rdx, (%rdi) + movq %r10, %rdx + mulxq %rcx, %rbx, %r12 + addq %r15, %rbx + movq %r9, %rdx + mulxq %rcx, %r13, %r15 + adcq %r12, %r13 + movq %r11, %rdx + mulxq %rcx, %rcx, %r12 + adcq %r15, %rcx + adcq $0, %r12 + movq 8(%rbp), %rax + movq %r14, %rdx + mulxq %rax, %r8, %rdx + movq %rdx, -8(%rsp) ## 8-byte Spill + addq %rbx, %r8 + movq %r10, %rdx + mulxq %rax, %r15, %rdx + movq %rdx, -16(%rsp) ## 8-byte Spill + adcq %r13, %r15 + movq %r9, %rdx + mulxq %rax, %rbx, %r13 + adcq %rcx, %rbx + movq %r11, %rdx + mulxq %rax, %rcx, %rax + adcq %r12, %rcx + sbbq %r12, %r12 + andl $1, %r12d + addq -8(%rsp), %r15 ## 8-byte Folded Reload + adcq -16(%rsp), %rbx ## 8-byte Folded Reload + adcq %r13, %rcx + movq %r8, 8(%rdi) + adcq %rax, %r12 + movq %rbp, %r13 + movq 16(%r13), %rax + movq %r14, %rdx + mulxq %rax, %rdx, %r8 + addq %r15, %rdx + movq %rdx, 16(%rdi) + movq %r10, %rdx + mulxq %rax, %rbp, %r10 + adcq %rbx, %rbp + movq %r11, %rdx + mulxq %rax, %r14, %r11 + movq %r9, %rdx + mulxq %rax, %r15, %rdx + adcq %rcx, %r15 + adcq %r12, %r14 + sbbq %rcx, %rcx + andl $1, %ecx + addq %r8, %rbp + adcq %r10, %r15 + adcq %rdx, %r14 + adcq %r11, %rcx + movq 24(%r13), %rdx + mulxq 24(%rsi), %rbx, %r8 + mulxq (%rsi), %rax, %r9 + addq %rbp, %rax + movq %rax, 24(%rdi) + mulxq 16(%rsi), %rbp, %rax + mulxq 8(%rsi), %rsi, %rdx + adcq %r15, %rsi + adcq %r14, %rbp + adcq %rcx, %rbx + sbbq %rcx, %rcx + andl $1, %ecx + addq %r9, %rsi + movq %rsi, 32(%rdi) + adcq %rdx, %rbp + movq %rbp, 40(%rdi) + adcq %rax, %rbx + movq %rbx, 48(%rdi) + adcq %r8, %rcx + movq %rcx, 56(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sqrPre4Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre4Lbmi2: ## @mcl_fpDbl_sqrPre4Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 24(%rsi), %r8 + movq 16(%rsi), %r9 + movq (%rsi), %rcx + movq 8(%rsi), %rax + movq %rcx, %rdx + mulxq %rcx, %rdx, %r11 + movq %rdx, (%rdi) + movq %r9, %rdx + mulxq %rcx, %rbp, %r10 + movq %rbp, -16(%rsp) ## 8-byte Spill + movq %r10, -8(%rsp) ## 8-byte Spill + movq %rax, %rdx + mulxq %rcx, %r12, %r15 + addq %r12, %r11 + movq %r15, %rbx + adcq %rbp, %rbx + movq %r8, %rdx + mulxq %rcx, %rcx, %r13 + adcq %r10, %rcx + adcq $0, %r13 + addq %r12, %r11 + movq %rax, %rdx + mulxq %rax, %rbp, %r12 + adcq %rbx, %rbp + movq %r8, %rdx + mulxq %rax, %r10, %rbx + movq %r9, %rdx + mulxq %rax, %r14, %rdx + adcq %r14, %rcx + adcq %r13, %r10 + sbbq %rax, %rax + andl $1, %eax + addq %r15, %rbp + adcq %r12, %rcx + adcq %rdx, %r10 + movq %rdx, %r12 + adcq %rbx, %rax + movq %r11, 8(%rdi) + addq -16(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 16(%rdi) + movq %r8, %rdx + mulxq %r9, %r11, %r8 + movq %r9, %rdx + mulxq %r9, %r15, %rdx + adcq %r14, %rcx + adcq %r10, %r15 + adcq %rax, %r11 + sbbq %rax, %rax + andl $1, %eax + addq -8(%rsp), %rcx ## 8-byte Folded Reload + adcq %r12, %r15 + adcq %rdx, %r11 + adcq %r8, %rax + movq 24(%rsi), %rdx + mulxq 16(%rsi), %rbx, %r8 + mulxq 8(%rsi), %rbp, %r9 + mulxq (%rsi), %rsi, %r10 + addq %rcx, %rsi + movq %rsi, 24(%rdi) + adcq %r15, %rbp + adcq %r11, %rbx + mulxq %rdx, %rdx, %rcx + adcq %rax, %rdx + sbbq %rax, %rax + andl $1, %eax + addq %r10, %rbp + movq %rbp, 32(%rdi) + adcq %r9, %rbx + movq %rbx, 40(%rdi) + adcq %r8, %rdx + movq %rdx, 48(%rdi) + adcq %rcx, %rax + movq %rax, 56(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mont4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont4Lbmi2: ## @mcl_fp_mont4Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, %r13 + movq %rdi, -8(%rsp) ## 8-byte Spill + movq 24(%rsi), %rdi + movq %rdi, -32(%rsp) ## 8-byte Spill + movq (%r13), %rax + movq %r13, -16(%rsp) ## 8-byte Spill + movq %rdi, %rdx + mulxq %rax, %rdi, %r11 + movq 16(%rsi), %rdx + movq %rdx, -40(%rsp) ## 8-byte Spill + mulxq %rax, %rbx, %r10 + movq (%rsi), %rbp + movq %rbp, -48(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + mulxq %rax, %rsi, %r12 + movq %rbp, %rdx + mulxq %rax, %r14, %r8 + addq %rsi, %r8 + adcq %rbx, %r12 + adcq %rdi, %r10 + adcq $0, %r11 + movq -8(%rcx), %rax + movq %rax, -88(%rsp) ## 8-byte Spill + movq %r14, %rdx + imulq %rax, %rdx + movq 24(%rcx), %rax + movq %rax, -64(%rsp) ## 8-byte Spill + mulxq %rax, %r15, %rax + movq 16(%rcx), %rsi + movq %rsi, -80(%rsp) ## 8-byte Spill + mulxq %rsi, %r9, %rsi + movq (%rcx), %rbp + movq %rbp, -24(%rsp) ## 8-byte Spill + movq 8(%rcx), %rcx + movq %rcx, -72(%rsp) ## 8-byte Spill + mulxq %rcx, %rdi, %rcx + mulxq %rbp, %rdx, %rbx + addq %rdi, %rbx + adcq %r9, %rcx + adcq %r15, %rsi + adcq $0, %rax + addq %r14, %rdx + adcq %r8, %rbx + adcq %r12, %rcx + adcq %r10, %rsi + adcq %r11, %rax + sbbq %rdi, %rdi + andl $1, %edi + movq 8(%r13), %rdx + mulxq -32(%rsp), %r12, %r10 ## 8-byte Folded Reload + mulxq -40(%rsp), %r15, %r11 ## 8-byte Folded Reload + mulxq -56(%rsp), %r14, %rbp ## 8-byte Folded Reload + mulxq -48(%rsp), %r8, %r9 ## 8-byte Folded Reload + addq %r14, %r9 + adcq %r15, %rbp + adcq %r12, %r11 + adcq $0, %r10 + addq %rbx, %r8 + adcq %rcx, %r9 + adcq %rsi, %rbp + adcq %rax, %r11 + adcq %rdi, %r10 + sbbq %rbx, %rbx + andl $1, %ebx + movq %r8, %rdx + imulq -88(%rsp), %rdx ## 8-byte Folded Reload + mulxq -64(%rsp), %r14, %rcx ## 8-byte Folded Reload + mulxq -80(%rsp), %r15, %rsi ## 8-byte Folded Reload + mulxq -72(%rsp), %r12, %rax ## 8-byte Folded Reload + movq -24(%rsp), %r13 ## 8-byte Reload + mulxq %r13, %rdx, %rdi + addq %r12, %rdi + adcq %r15, %rax + adcq %r14, %rsi + adcq $0, %rcx + addq %r8, %rdx + adcq %r9, %rdi + adcq %rbp, %rax + adcq %r11, %rsi + adcq %r10, %rcx + adcq $0, %rbx + movq -16(%rsp), %rdx ## 8-byte Reload + movq 16(%rdx), %rdx + mulxq -32(%rsp), %r14, %r11 ## 8-byte Folded Reload + mulxq -40(%rsp), %r15, %rbp ## 8-byte Folded Reload + mulxq -56(%rsp), %r12, %r8 ## 8-byte Folded Reload + mulxq -48(%rsp), %r9, %r10 ## 8-byte Folded Reload + addq %r12, %r10 + adcq %r15, %r8 + adcq %r14, %rbp + adcq $0, %r11 + addq %rdi, %r9 + adcq %rax, %r10 + adcq %rsi, %r8 + adcq %rcx, %rbp + adcq %rbx, %r11 + sbbq %rax, %rax + movq %r9, %rdx + imulq -88(%rsp), %rdx ## 8-byte Folded Reload + mulxq -72(%rsp), %rcx, %rsi ## 8-byte Folded Reload + mulxq %r13, %r14, %rdi + addq %rcx, %rdi + mulxq -80(%rsp), %rcx, %r15 ## 8-byte Folded Reload + adcq %rsi, %rcx + movq -64(%rsp), %r13 ## 8-byte Reload + mulxq %r13, %rbx, %rsi + adcq %r15, %rbx + adcq $0, %rsi + andl $1, %eax + addq %r9, %r14 + adcq %r10, %rdi + adcq %r8, %rcx + adcq %rbp, %rbx + adcq %r11, %rsi + adcq $0, %rax + movq -16(%rsp), %rdx ## 8-byte Reload + movq 24(%rdx), %rdx + mulxq -32(%rsp), %r11, %r8 ## 8-byte Folded Reload + mulxq -40(%rsp), %r15, %r9 ## 8-byte Folded Reload + mulxq -56(%rsp), %r12, %r14 ## 8-byte Folded Reload + mulxq -48(%rsp), %r10, %rbp ## 8-byte Folded Reload + addq %r12, %rbp + adcq %r15, %r14 + adcq %r11, %r9 + adcq $0, %r8 + addq %rdi, %r10 + adcq %rcx, %rbp + adcq %rbx, %r14 + adcq %rsi, %r9 + adcq %rax, %r8 + sbbq %rax, %rax + andl $1, %eax + movq -88(%rsp), %rdx ## 8-byte Reload + imulq %r10, %rdx + mulxq %r13, %rcx, %rdi + movq %rcx, -88(%rsp) ## 8-byte Spill + mulxq -80(%rsp), %r15, %rsi ## 8-byte Folded Reload + movq -72(%rsp), %rbx ## 8-byte Reload + mulxq %rbx, %r12, %rcx + movq -24(%rsp), %r11 ## 8-byte Reload + mulxq %r11, %rdx, %r13 + addq %r12, %r13 + adcq %r15, %rcx + adcq -88(%rsp), %rsi ## 8-byte Folded Reload + adcq $0, %rdi + addq %r10, %rdx + adcq %rbp, %r13 + adcq %r14, %rcx + adcq %r9, %rsi + adcq %r8, %rdi + adcq $0, %rax + movq %r13, %rdx + subq %r11, %rdx + movq %rcx, %rbp + sbbq %rbx, %rbp + movq %rsi, %r8 + sbbq -80(%rsp), %r8 ## 8-byte Folded Reload + movq %rdi, %rbx + sbbq -64(%rsp), %rbx ## 8-byte Folded Reload + sbbq $0, %rax + andl $1, %eax + cmovneq %rdi, %rbx + testb %al, %al + cmovneq %r13, %rdx + movq -8(%rsp), %rax ## 8-byte Reload + movq %rdx, (%rax) + cmovneq %rcx, %rbp + movq %rbp, 8(%rax) + cmovneq %rsi, %r8 + movq %r8, 16(%rax) + movq %rbx, 24(%rax) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF4Lbmi2: ## @mcl_fp_montNF4Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdi, -8(%rsp) ## 8-byte Spill + movq (%rsi), %rdi + movq %rdi, -56(%rsp) ## 8-byte Spill + movq 8(%rsi), %rbp + movq %rbp, -64(%rsp) ## 8-byte Spill + movq (%rdx), %rax + movq %rdx, %r15 + movq %r15, -24(%rsp) ## 8-byte Spill + movq %rbp, %rdx + mulxq %rax, %rbp, %r9 + movq %rdi, %rdx + mulxq %rax, %r12, %rbx + movq 16(%rsi), %rdx + movq %rdx, -40(%rsp) ## 8-byte Spill + addq %rbp, %rbx + mulxq %rax, %r14, %rbp + adcq %r9, %r14 + movq 24(%rsi), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rax, %r8, %rdi + adcq %rbp, %r8 + adcq $0, %rdi + movq -8(%rcx), %r13 + movq (%rcx), %rax + movq %rax, -48(%rsp) ## 8-byte Spill + movq %r12, %rdx + imulq %r13, %rdx + mulxq %rax, %rax, %r11 + addq %r12, %rax + movq 8(%rcx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + mulxq %rax, %rbp, %r10 + adcq %rbx, %rbp + movq 16(%rcx), %rax + movq %rax, -32(%rsp) ## 8-byte Spill + mulxq %rax, %rsi, %rbx + adcq %r14, %rsi + movq 24(%rcx), %rax + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq %rax, %rcx, %rdx + adcq %r8, %rcx + adcq $0, %rdi + addq %r11, %rbp + adcq %r10, %rsi + adcq %rbx, %rcx + adcq %rdx, %rdi + movq 8(%r15), %rdx + movq -64(%rsp), %r12 ## 8-byte Reload + mulxq %r12, %rbx, %r9 + movq -56(%rsp), %r15 ## 8-byte Reload + mulxq %r15, %r10, %r11 + addq %rbx, %r11 + mulxq -40(%rsp), %rax, %r8 ## 8-byte Folded Reload + adcq %r9, %rax + mulxq -80(%rsp), %r9, %rbx ## 8-byte Folded Reload + adcq %r8, %r9 + adcq $0, %rbx + addq %rbp, %r10 + adcq %rsi, %r11 + adcq %rcx, %rax + adcq %rdi, %r9 + adcq $0, %rbx + movq %r10, %rdx + imulq %r13, %rdx + movq -48(%rsp), %r14 ## 8-byte Reload + mulxq %r14, %rcx, %r8 + addq %r10, %rcx + mulxq -16(%rsp), %r10, %rdi ## 8-byte Folded Reload + adcq %r11, %r10 + mulxq -32(%rsp), %rcx, %rsi ## 8-byte Folded Reload + adcq %rax, %rcx + mulxq -72(%rsp), %rax, %rdx ## 8-byte Folded Reload + adcq %r9, %rax + adcq $0, %rbx + addq %r8, %r10 + adcq %rdi, %rcx + adcq %rsi, %rax + adcq %rdx, %rbx + movq -24(%rsp), %rdx ## 8-byte Reload + movq 16(%rdx), %rdx + mulxq %r12, %rsi, %r8 + mulxq %r15, %r11, %rbp + addq %rsi, %rbp + movq -40(%rsp), %r12 ## 8-byte Reload + mulxq %r12, %rdi, %r9 + adcq %r8, %rdi + mulxq -80(%rsp), %r8, %rsi ## 8-byte Folded Reload + adcq %r9, %r8 + adcq $0, %rsi + addq %r10, %r11 + adcq %rcx, %rbp + adcq %rax, %rdi + adcq %rbx, %r8 + adcq $0, %rsi + movq %r11, %rdx + imulq %r13, %rdx + mulxq %r14, %rax, %r10 + addq %r11, %rax + movq -16(%rsp), %r14 ## 8-byte Reload + mulxq %r14, %r9, %rbx + adcq %rbp, %r9 + movq -32(%rsp), %r15 ## 8-byte Reload + mulxq %r15, %rax, %rbp + adcq %rdi, %rax + mulxq -72(%rsp), %rcx, %rdx ## 8-byte Folded Reload + adcq %r8, %rcx + adcq $0, %rsi + addq %r10, %r9 + adcq %rbx, %rax + adcq %rbp, %rcx + adcq %rdx, %rsi + movq -24(%rsp), %rdx ## 8-byte Reload + movq 24(%rdx), %rdx + mulxq -64(%rsp), %rbx, %r8 ## 8-byte Folded Reload + mulxq -56(%rsp), %r11, %rbp ## 8-byte Folded Reload + addq %rbx, %rbp + mulxq %r12, %rdi, %r10 + adcq %r8, %rdi + mulxq -80(%rsp), %r8, %rbx ## 8-byte Folded Reload + adcq %r10, %r8 + adcq $0, %rbx + addq %r9, %r11 + adcq %rax, %rbp + adcq %rcx, %rdi + adcq %rsi, %r8 + adcq $0, %rbx + imulq %r11, %r13 + movq %r13, %rdx + movq -48(%rsp), %r12 ## 8-byte Reload + mulxq %r12, %rcx, %r9 + addq %r11, %rcx + mulxq %r14, %r11, %r10 + adcq %rbp, %r11 + movq %r15, %rsi + mulxq %rsi, %rax, %rcx + adcq %rdi, %rax + movq -72(%rsp), %rbp ## 8-byte Reload + mulxq %rbp, %r15, %rdx + adcq %r8, %r15 + adcq $0, %rbx + addq %r9, %r11 + adcq %r10, %rax + adcq %rcx, %r15 + adcq %rdx, %rbx + movq %r11, %rcx + subq %r12, %rcx + movq %rax, %rdx + sbbq %r14, %rdx + movq %r15, %rdi + sbbq %rsi, %rdi + movq %rbx, %rsi + sbbq %rbp, %rsi + cmovsq %r11, %rcx + movq -8(%rsp), %rbp ## 8-byte Reload + movq %rcx, (%rbp) + cmovsq %rax, %rdx + movq %rdx, 8(%rbp) + cmovsq %r15, %rdi + movq %rdi, 16(%rbp) + cmovsq %rbx, %rsi + movq %rsi, 24(%rbp) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed4Lbmi2: ## @mcl_fp_montRed4Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, %rcx + movq %rdi, -8(%rsp) ## 8-byte Spill + movq -8(%rcx), %r13 + movq (%rcx), %rax + movq %rax, -32(%rsp) ## 8-byte Spill + movq (%rsi), %r10 + movq %r10, %rdx + imulq %r13, %rdx + movq 24(%rcx), %rdi + mulxq %rdi, %r9, %r15 + movq %rdi, %r14 + movq %r14, -40(%rsp) ## 8-byte Spill + movq 16(%rcx), %rdi + movq %rdi, -48(%rsp) ## 8-byte Spill + mulxq %rdi, %rdi, %rbx + movq 8(%rcx), %rcx + movq %rcx, -56(%rsp) ## 8-byte Spill + mulxq %rcx, %rcx, %r8 + mulxq %rax, %rdx, %rbp + addq %rcx, %rbp + adcq %rdi, %r8 + adcq %r9, %rbx + adcq $0, %r15 + movq 56(%rsi), %r11 + movq 48(%rsi), %rcx + addq %r10, %rdx + movq 40(%rsi), %r12 + adcq 8(%rsi), %rbp + adcq 16(%rsi), %r8 + adcq 24(%rsi), %rbx + adcq 32(%rsi), %r15 + adcq $0, %r12 + adcq $0, %rcx + movq %rcx, -64(%rsp) ## 8-byte Spill + adcq $0, %r11 + sbbq %rsi, %rsi + andl $1, %esi + movq %rbp, %rdx + imulq %r13, %rdx + mulxq %r14, %rax, %r9 + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq -48(%rsp), %r14, %rdi ## 8-byte Folded Reload + mulxq -56(%rsp), %r10, %rcx ## 8-byte Folded Reload + mulxq -32(%rsp), %rdx, %rax ## 8-byte Folded Reload + addq %r10, %rax + adcq %r14, %rcx + adcq -72(%rsp), %rdi ## 8-byte Folded Reload + adcq $0, %r9 + addq %rbp, %rdx + adcq %r8, %rax + adcq %rbx, %rcx + adcq %r15, %rdi + adcq %r12, %r9 + adcq $0, -64(%rsp) ## 8-byte Folded Spill + adcq $0, %r11 + movq %r11, -72(%rsp) ## 8-byte Spill + adcq $0, %rsi + movq %rax, %rdx + imulq %r13, %rdx + movq -40(%rsp), %r15 ## 8-byte Reload + mulxq %r15, %rbp, %r8 + movq %rbp, -16(%rsp) ## 8-byte Spill + movq -48(%rsp), %r11 ## 8-byte Reload + mulxq %r11, %rbx, %r10 + movq %rbx, -24(%rsp) ## 8-byte Spill + mulxq -56(%rsp), %r12, %rbp ## 8-byte Folded Reload + movq -32(%rsp), %r14 ## 8-byte Reload + mulxq %r14, %rdx, %rbx + addq %r12, %rbx + adcq -24(%rsp), %rbp ## 8-byte Folded Reload + adcq -16(%rsp), %r10 ## 8-byte Folded Reload + adcq $0, %r8 + addq %rax, %rdx + adcq %rcx, %rbx + adcq %rdi, %rbp + adcq %r9, %r10 + adcq -64(%rsp), %r8 ## 8-byte Folded Reload + adcq $0, -72(%rsp) ## 8-byte Folded Spill + adcq $0, %rsi + imulq %rbx, %r13 + movq %r13, %rdx + mulxq %r15, %rax, %rdi + movq %rax, -64(%rsp) ## 8-byte Spill + movq %r13, %rdx + mulxq %r11, %r9, %rax + movq -56(%rsp), %r11 ## 8-byte Reload + mulxq %r11, %r12, %rcx + mulxq %r14, %r15, %r13 + addq %r12, %r13 + adcq %r9, %rcx + adcq -64(%rsp), %rax ## 8-byte Folded Reload + adcq $0, %rdi + addq %rbx, %r15 + adcq %rbp, %r13 + adcq %r10, %rcx + adcq %r8, %rax + adcq -72(%rsp), %rdi ## 8-byte Folded Reload + adcq $0, %rsi + movq %r13, %rdx + subq %r14, %rdx + movq %rcx, %rbp + sbbq %r11, %rbp + movq %rax, %r8 + sbbq -48(%rsp), %r8 ## 8-byte Folded Reload + movq %rdi, %rbx + sbbq -40(%rsp), %rbx ## 8-byte Folded Reload + sbbq $0, %rsi + andl $1, %esi + cmovneq %rdi, %rbx + testb %sil, %sil + cmovneq %r13, %rdx + movq -8(%rsp), %rsi ## 8-byte Reload + movq %rdx, (%rsi) + cmovneq %rcx, %rbp + movq %rbp, 8(%rsi) + cmovneq %rax, %r8 + movq %r8, 16(%rsi) + movq %rbx, 24(%rsi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_addPre4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre4Lbmi2: ## @mcl_fp_addPre4Lbmi2 +## BB#0: + movq 24(%rdx), %r8 + movq 24(%rsi), %r9 + movq 16(%rdx), %rax + movq (%rdx), %rcx + movq 8(%rdx), %rdx + addq (%rsi), %rcx + adcq 8(%rsi), %rdx + adcq 16(%rsi), %rax + movq %rcx, (%rdi) + movq %rdx, 8(%rdi) + movq %rax, 16(%rdi) + adcq %r8, %r9 + movq %r9, 24(%rdi) + sbbq %rax, %rax + andl $1, %eax + retq + + .globl _mcl_fp_subPre4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre4Lbmi2: ## @mcl_fp_subPre4Lbmi2 +## BB#0: + movq 24(%rdx), %r8 + movq 24(%rsi), %r9 + movq 16(%rsi), %r10 + movq (%rsi), %rcx + movq 8(%rsi), %rsi + xorl %eax, %eax + subq (%rdx), %rcx + sbbq 8(%rdx), %rsi + sbbq 16(%rdx), %r10 + movq %rcx, (%rdi) + movq %rsi, 8(%rdi) + movq %r10, 16(%rdi) + sbbq %r8, %r9 + movq %r9, 24(%rdi) + sbbq $0, %rax + andl $1, %eax + retq + + .globl _mcl_fp_shr1_4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_4Lbmi2: ## @mcl_fp_shr1_4Lbmi2 +## BB#0: + movq 24(%rsi), %rax + movq 16(%rsi), %rcx + movq (%rsi), %rdx + movq 8(%rsi), %rsi + shrdq $1, %rsi, %rdx + movq %rdx, (%rdi) + shrdq $1, %rcx, %rsi + movq %rsi, 8(%rdi) + shrdq $1, %rax, %rcx + movq %rcx, 16(%rdi) + shrq %rax + movq %rax, 24(%rdi) + retq + + .globl _mcl_fp_add4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add4Lbmi2: ## @mcl_fp_add4Lbmi2 +## BB#0: + movq 24(%rdx), %r10 + movq 24(%rsi), %r8 + movq 16(%rdx), %r9 + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %r9 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r9, 16(%rdi) + adcq %r10, %r8 + movq %r8, 24(%rdi) + sbbq %rsi, %rsi + andl $1, %esi + subq (%rcx), %rax + sbbq 8(%rcx), %rdx + sbbq 16(%rcx), %r9 + sbbq 24(%rcx), %r8 + sbbq $0, %rsi + testb $1, %sil + jne LBB59_2 +## BB#1: ## %nocarry + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r9, 16(%rdi) + movq %r8, 24(%rdi) +LBB59_2: ## %carry + retq + + .globl _mcl_fp_addNF4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF4Lbmi2: ## @mcl_fp_addNF4Lbmi2 +## BB#0: + pushq %rbx + movq 24(%rdx), %r8 + movq 16(%rdx), %r9 + movq (%rdx), %r11 + movq 8(%rdx), %r10 + addq (%rsi), %r11 + adcq 8(%rsi), %r10 + adcq 16(%rsi), %r9 + adcq 24(%rsi), %r8 + movq %r11, %rsi + subq (%rcx), %rsi + movq %r10, %rdx + sbbq 8(%rcx), %rdx + movq %r9, %rax + sbbq 16(%rcx), %rax + movq %r8, %rbx + sbbq 24(%rcx), %rbx + testq %rbx, %rbx + cmovsq %r11, %rsi + movq %rsi, (%rdi) + cmovsq %r10, %rdx + movq %rdx, 8(%rdi) + cmovsq %r9, %rax + movq %rax, 16(%rdi) + cmovsq %r8, %rbx + movq %rbx, 24(%rdi) + popq %rbx + retq + + .globl _mcl_fp_sub4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub4Lbmi2: ## @mcl_fp_sub4Lbmi2 +## BB#0: + movq 24(%rdx), %r10 + movq 24(%rsi), %r8 + movq 16(%rsi), %r9 + movq (%rsi), %rax + movq 8(%rsi), %r11 + xorl %esi, %esi + subq (%rdx), %rax + sbbq 8(%rdx), %r11 + sbbq 16(%rdx), %r9 + movq %rax, (%rdi) + movq %r11, 8(%rdi) + movq %r9, 16(%rdi) + sbbq %r10, %r8 + movq %r8, 24(%rdi) + sbbq $0, %rsi + testb $1, %sil + jne LBB61_2 +## BB#1: ## %nocarry + retq +LBB61_2: ## %carry + movq 24(%rcx), %r10 + movq 8(%rcx), %rsi + movq 16(%rcx), %rdx + addq (%rcx), %rax + movq %rax, (%rdi) + adcq %r11, %rsi + movq %rsi, 8(%rdi) + adcq %r9, %rdx + movq %rdx, 16(%rdi) + adcq %r8, %r10 + movq %r10, 24(%rdi) + retq + + .globl _mcl_fp_subNF4Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF4Lbmi2: ## @mcl_fp_subNF4Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + movdqu (%rdx), %xmm0 + movdqu 16(%rdx), %xmm1 + pshufd $78, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,0,1] + movd %xmm2, %r8 + movdqu (%rsi), %xmm2 + movdqu 16(%rsi), %xmm3 + pshufd $78, %xmm3, %xmm4 ## xmm4 = xmm3[2,3,0,1] + movd %xmm4, %r15 + movd %xmm1, %r9 + movd %xmm3, %r11 + pshufd $78, %xmm0, %xmm1 ## xmm1 = xmm0[2,3,0,1] + movd %xmm1, %r10 + pshufd $78, %xmm2, %xmm1 ## xmm1 = xmm2[2,3,0,1] + movd %xmm1, %r14 + movd %xmm0, %rdx + movd %xmm2, %r12 + subq %rdx, %r12 + sbbq %r10, %r14 + sbbq %r9, %r11 + sbbq %r8, %r15 + movq %r15, %rdx + sarq $63, %rdx + movq 24(%rcx), %rsi + andq %rdx, %rsi + movq 16(%rcx), %rax + andq %rdx, %rax + movq 8(%rcx), %rbx + andq %rdx, %rbx + andq (%rcx), %rdx + addq %r12, %rdx + movq %rdx, (%rdi) + adcq %r14, %rbx + movq %rbx, 8(%rdi) + adcq %r11, %rax + movq %rax, 16(%rdi) + adcq %r15, %rsi + movq %rsi, 24(%rdi) + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fpDbl_add4Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add4Lbmi2: ## @mcl_fpDbl_add4Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 56(%rdx), %r9 + movq 56(%rsi), %r8 + movq 48(%rdx), %r10 + movq 48(%rsi), %r12 + movq 40(%rdx), %r11 + movq 32(%rdx), %r14 + movq 24(%rdx), %r15 + movq 16(%rdx), %rbx + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %rbx + movq 40(%rsi), %r13 + movq 24(%rsi), %rbp + movq 32(%rsi), %rsi + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %rbx, 16(%rdi) + adcq %r15, %rbp + movq %rbp, 24(%rdi) + adcq %r14, %rsi + adcq %r11, %r13 + adcq %r10, %r12 + adcq %r9, %r8 + sbbq %rax, %rax + andl $1, %eax + movq %rsi, %rdx + subq (%rcx), %rdx + movq %r13, %rbp + sbbq 8(%rcx), %rbp + movq %r12, %rbx + sbbq 16(%rcx), %rbx + movq %r8, %r9 + sbbq 24(%rcx), %r9 + sbbq $0, %rax + andl $1, %eax + cmovneq %rsi, %rdx + movq %rdx, 32(%rdi) + testb %al, %al + cmovneq %r13, %rbp + movq %rbp, 40(%rdi) + cmovneq %r12, %rbx + movq %rbx, 48(%rdi) + cmovneq %r8, %r9 + movq %r9, 56(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sub4Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub4Lbmi2: ## @mcl_fpDbl_sub4Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + movq 56(%rdx), %r9 + movq 56(%rsi), %r8 + movq 48(%rdx), %r10 + movq 24(%rdx), %r11 + movq (%rsi), %rbx + xorl %eax, %eax + subq (%rdx), %rbx + movq %rbx, (%rdi) + movq 8(%rsi), %rbx + sbbq 8(%rdx), %rbx + movq %rbx, 8(%rdi) + movq 16(%rsi), %rbx + sbbq 16(%rdx), %rbx + movq %rbx, 16(%rdi) + movq 24(%rsi), %rbx + sbbq %r11, %rbx + movq 40(%rdx), %r11 + movq 32(%rdx), %rdx + movq %rbx, 24(%rdi) + movq 32(%rsi), %r12 + sbbq %rdx, %r12 + movq 48(%rsi), %r14 + movq 40(%rsi), %r15 + sbbq %r11, %r15 + sbbq %r10, %r14 + sbbq %r9, %r8 + movl $0, %edx + sbbq $0, %rdx + andl $1, %edx + movq (%rcx), %rsi + cmoveq %rax, %rsi + testb %dl, %dl + movq 16(%rcx), %rdx + cmoveq %rax, %rdx + movq 24(%rcx), %rbx + cmoveq %rax, %rbx + cmovneq 8(%rcx), %rax + addq %r12, %rsi + movq %rsi, 32(%rdi) + adcq %r15, %rax + movq %rax, 40(%rdi) + adcq %r14, %rdx + movq %rdx, 48(%rdi) + adcq %r8, %rbx + movq %rbx, 56(%rdi) + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_mulUnitPre5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre5Lbmi2: ## @mcl_fp_mulUnitPre5Lbmi2 +## BB#0: + pushq %r14 + pushq %rbx + mulxq 32(%rsi), %r8, %r11 + mulxq 24(%rsi), %r9, %rax + mulxq 16(%rsi), %r10, %rcx + mulxq 8(%rsi), %r14, %rbx + mulxq (%rsi), %rdx, %rsi + movq %rdx, (%rdi) + addq %r14, %rsi + movq %rsi, 8(%rdi) + adcq %r10, %rbx + movq %rbx, 16(%rdi) + adcq %r9, %rcx + movq %rcx, 24(%rdi) + adcq %r8, %rax + movq %rax, 32(%rdi) + adcq $0, %r11 + movq %r11, 40(%rdi) + popq %rbx + popq %r14 + retq + + .globl _mcl_fpDbl_mulPre5Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre5Lbmi2: ## @mcl_fpDbl_mulPre5Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, -24(%rsp) ## 8-byte Spill + movq %rdi, -40(%rsp) ## 8-byte Spill + movq (%rsi), %r11 + movq 8(%rsi), %r10 + movq (%rdx), %rcx + movq %r10, %rdx + mulxq %rcx, %rax, %r14 + movq %r11, %rdx + mulxq %rcx, %rdx, %rbx + movq %rdx, -56(%rsp) ## 8-byte Spill + movq 24(%rsi), %rbp + movq %rbp, -48(%rsp) ## 8-byte Spill + movq 16(%rsi), %r15 + addq %rax, %rbx + movq %r15, %rdx + mulxq %rcx, %rax, %r13 + adcq %r14, %rax + movq %rbp, %rdx + mulxq %rcx, %r8, %r12 + adcq %r13, %r8 + movq 32(%rsi), %r14 + movq %r14, %rdx + mulxq %rcx, %r9, %r13 + adcq %r12, %r9 + movq -56(%rsp), %rcx ## 8-byte Reload + movq %rcx, (%rdi) + adcq $0, %r13 + movq -24(%rsp), %rdi ## 8-byte Reload + movq 8(%rdi), %rbp + movq %r11, %rdx + mulxq %rbp, %r12, %r11 + addq %rbx, %r12 + movq %r10, %rdx + mulxq %rbp, %rbx, %rcx + movq %rcx, -56(%rsp) ## 8-byte Spill + adcq %rax, %rbx + movq %r15, %rdx + mulxq %rbp, %rcx, %r10 + adcq %r8, %rcx + movq -48(%rsp), %rdx ## 8-byte Reload + mulxq %rbp, %rax, %r8 + adcq %r9, %rax + movq %r14, %rdx + mulxq %rbp, %r15, %rdx + adcq %r13, %r15 + sbbq %r14, %r14 + andl $1, %r14d + addq %r11, %rbx + movq -40(%rsp), %rbp ## 8-byte Reload + movq %r12, 8(%rbp) + adcq -56(%rsp), %rcx ## 8-byte Folded Reload + adcq %r10, %rax + adcq %r8, %r15 + adcq %rdx, %r14 + movq (%rsi), %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + movq 8(%rsi), %r8 + movq %r8, -48(%rsp) ## 8-byte Spill + movq 16(%rdi), %rbp + mulxq %rbp, %r12, %rdx + movq %rdx, -8(%rsp) ## 8-byte Spill + addq %rbx, %r12 + movq %r8, %rdx + mulxq %rbp, %rbx, %rdx + movq %rdx, -16(%rsp) ## 8-byte Spill + adcq %rcx, %rbx + movq 16(%rsi), %r11 + movq %r11, %rdx + mulxq %rbp, %rcx, %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + adcq %rax, %rcx + movq 24(%rsi), %r13 + movq %r13, %rdx + mulxq %rbp, %r9, %r10 + adcq %r15, %r9 + movq 32(%rsi), %r15 + movq %r15, %rdx + mulxq %rbp, %r8, %rdx + adcq %r14, %r8 + sbbq %r14, %r14 + andl $1, %r14d + addq -8(%rsp), %rbx ## 8-byte Folded Reload + adcq -16(%rsp), %rcx ## 8-byte Folded Reload + adcq -32(%rsp), %r9 ## 8-byte Folded Reload + adcq %r10, %r8 + adcq %rdx, %r14 + movq -40(%rsp), %r10 ## 8-byte Reload + movq %r12, 16(%r10) + movq %rdi, %rbp + movq 24(%rbp), %rax + movq -56(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r12, %rdi + addq %rbx, %r12 + movq -48(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %rbx, %rdx + movq %rdx, -48(%rsp) ## 8-byte Spill + adcq %rcx, %rbx + movq %r11, %rdx + mulxq %rax, %rcx, %r11 + adcq %r9, %rcx + movq %r13, %rdx + mulxq %rax, %r13, %r9 + adcq %r8, %r13 + movq %r15, %rdx + mulxq %rax, %r8, %rdx + adcq %r14, %r8 + sbbq %r14, %r14 + andl $1, %r14d + addq %rdi, %rbx + movq %r12, 24(%r10) + movq %r10, %rdi + adcq -48(%rsp), %rcx ## 8-byte Folded Reload + adcq %r11, %r13 + adcq %r9, %r8 + adcq %rdx, %r14 + movq 32(%rbp), %rdx + mulxq 8(%rsi), %rax, %r9 + mulxq (%rsi), %rbp, %r10 + addq %rbx, %rbp + adcq %rcx, %rax + mulxq 16(%rsi), %rbx, %r11 + adcq %r13, %rbx + movq %rbp, 32(%rdi) + mulxq 32(%rsi), %rcx, %r15 + mulxq 24(%rsi), %rsi, %rdx + adcq %r8, %rsi + adcq %r14, %rcx + sbbq %rbp, %rbp + andl $1, %ebp + addq %r10, %rax + movq %rax, 40(%rdi) + adcq %r9, %rbx + movq %rbx, 48(%rdi) + adcq %r11, %rsi + movq %rsi, 56(%rdi) + adcq %rdx, %rcx + movq %rcx, 64(%rdi) + adcq %r15, %rbp + movq %rbp, 72(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sqrPre5Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre5Lbmi2: ## @mcl_fpDbl_sqrPre5Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 16(%rsi), %r11 + movq (%rsi), %rax + movq 8(%rsi), %rcx + movq %r11, %rdx + mulxq %rax, %rbx, %r15 + movq 32(%rsi), %r9 + movq 24(%rsi), %r13 + movq %rcx, %rdx + mulxq %rax, %r12, %rbp + movq %rbp, -16(%rsp) ## 8-byte Spill + movq %rax, %rdx + mulxq %rax, %rdx, %r14 + movq %rdx, -24(%rsp) ## 8-byte Spill + addq %r12, %r14 + adcq %rbp, %rbx + movq %r13, %rdx + mulxq %rax, %r8, %r10 + adcq %r15, %r8 + movq %r9, %rdx + movq %r9, -8(%rsp) ## 8-byte Spill + mulxq %rax, %rbp, %r15 + adcq %r10, %rbp + movq -24(%rsp), %rax ## 8-byte Reload + movq %rax, (%rdi) + adcq $0, %r15 + addq %r12, %r14 + movq %rcx, %rdx + mulxq %rcx, %rax, %rdx + movq %rdx, -24(%rsp) ## 8-byte Spill + adcq %rbx, %rax + movq %r11, %rdx + mulxq %rcx, %rbx, %r10 + adcq %r8, %rbx + movq %r13, %rdx + mulxq %rcx, %r13, %r8 + adcq %rbp, %r13 + movq %r9, %rdx + mulxq %rcx, %r12, %rcx + adcq %r15, %r12 + sbbq %r15, %r15 + andl $1, %r15d + addq -16(%rsp), %rax ## 8-byte Folded Reload + movq %r14, 8(%rdi) + adcq -24(%rsp), %rbx ## 8-byte Folded Reload + adcq %r10, %r13 + adcq %r8, %r12 + adcq %rcx, %r15 + movq (%rsi), %r9 + movq 8(%rsi), %r10 + movq %r9, %rdx + mulxq %r11, %rbp, %rcx + movq %rcx, -16(%rsp) ## 8-byte Spill + addq %rax, %rbp + movq %r10, %rdx + mulxq %r11, %rax, %r8 + adcq %rbx, %rax + movq %r11, %rdx + mulxq %r11, %r14, %rcx + movq %rcx, -24(%rsp) ## 8-byte Spill + adcq %r13, %r14 + movq 24(%rsi), %rcx + movq %rcx, %rdx + mulxq %r11, %rbx, %r13 + adcq %r12, %rbx + movq -8(%rsp), %rdx ## 8-byte Reload + mulxq %r11, %r12, %rdx + adcq %r15, %r12 + sbbq %r15, %r15 + andl $1, %r15d + addq -16(%rsp), %rax ## 8-byte Folded Reload + adcq %r8, %r14 + movq %rbp, 16(%rdi) + adcq -24(%rsp), %rbx ## 8-byte Folded Reload + adcq %r13, %r12 + adcq %rdx, %r15 + movq %r10, %rdx + mulxq %rcx, %r10, %rdx + movq %rdx, -8(%rsp) ## 8-byte Spill + movq %r9, %rdx + mulxq %rcx, %r13, %rdx + movq %rdx, -16(%rsp) ## 8-byte Spill + addq %rax, %r13 + movq 16(%rsi), %r8 + movq 32(%rsi), %rax + adcq %r14, %r10 + movq %r8, %rdx + mulxq %rcx, %r9, %r14 + adcq %rbx, %r9 + movq %rcx, %rdx + mulxq %rcx, %r11, %rbp + adcq %r12, %r11 + movq %rax, %rdx + mulxq %rcx, %r12, %rdx + adcq %r15, %r12 + sbbq %rbx, %rbx + andl $1, %ebx + addq -16(%rsp), %r10 ## 8-byte Folded Reload + movq %r13, 24(%rdi) + adcq -8(%rsp), %r9 ## 8-byte Folded Reload + adcq %r14, %r11 + adcq %rbp, %r12 + adcq %rdx, %rbx + movq %rax, %rdx + mulxq 24(%rsi), %rbp, %r14 + mulxq (%rsi), %rdx, %r15 + addq %r10, %rdx + movq %rdx, 32(%rdi) + movq %rax, %rdx + mulxq 8(%rsi), %rsi, %r10 + adcq %r9, %rsi + movq %r8, %rdx + mulxq %rax, %rcx, %r8 + adcq %r11, %rcx + adcq %r12, %rbp + movq %rax, %rdx + mulxq %rax, %rdx, %rax + adcq %rbx, %rdx + sbbq %rbx, %rbx + andl $1, %ebx + addq %r15, %rsi + movq %rsi, 40(%rdi) + adcq %r10, %rcx + movq %rcx, 48(%rdi) + adcq %r8, %rbp + movq %rbp, 56(%rdi) + adcq %r14, %rdx + movq %rdx, 64(%rdi) + adcq %rax, %rbx + movq %rbx, 72(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mont5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont5Lbmi2: ## @mcl_fp_mont5Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, -96(%rsp) ## 8-byte Spill + movq %rdi, -8(%rsp) ## 8-byte Spill + movq 32(%rsi), %rdi + movq %rdi, -104(%rsp) ## 8-byte Spill + movq (%rdx), %rax + movq %rdi, %rdx + mulxq %rax, %r10, %rbx + movq 24(%rsi), %rdx + movq %rdx, -24(%rsp) ## 8-byte Spill + mulxq %rax, %r12, %r14 + movq 16(%rsi), %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + mulxq %rax, %r13, %r11 + movq (%rsi), %rbp + movq %rbp, -40(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdx + movq %rdx, -48(%rsp) ## 8-byte Spill + mulxq %rax, %rdi, %r9 + movq %rbp, %rdx + mulxq %rax, %r15, %r8 + addq %rdi, %r8 + adcq %r13, %r9 + adcq %r12, %r11 + adcq %r10, %r14 + adcq $0, %rbx + movq %rbx, -112(%rsp) ## 8-byte Spill + movq -8(%rcx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + movq %r15, %rdx + imulq %rax, %rdx + movq 32(%rcx), %rax + movq %rax, -56(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %r12 + movq %rax, -120(%rsp) ## 8-byte Spill + movq 24(%rcx), %rax + movq %rax, -64(%rsp) ## 8-byte Spill + mulxq %rax, %r13, %r10 + movq 8(%rcx), %rax + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq %rax, %rdi, %rbp + movq (%rcx), %rax + movq %rax, -80(%rsp) ## 8-byte Spill + mulxq %rax, %rsi, %rbx + addq %rdi, %rbx + movq 16(%rcx), %rax + movq %rax, -88(%rsp) ## 8-byte Spill + mulxq %rax, %rdi, %rcx + adcq %rbp, %rdi + adcq %r13, %rcx + adcq -120(%rsp), %r10 ## 8-byte Folded Reload + adcq $0, %r12 + addq %r15, %rsi + adcq %r8, %rbx + adcq %r9, %rdi + adcq %r11, %rcx + adcq %r14, %r10 + adcq -112(%rsp), %r12 ## 8-byte Folded Reload + sbbq %rbp, %rbp + andl $1, %ebp + movq -96(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + mulxq -104(%rsp), %rax, %r14 ## 8-byte Folded Reload + movq %rax, -112(%rsp) ## 8-byte Spill + mulxq -24(%rsp), %rax, %r15 ## 8-byte Folded Reload + movq %rax, -120(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %r13, %r9 ## 8-byte Folded Reload + mulxq -48(%rsp), %r8, %rsi ## 8-byte Folded Reload + mulxq -40(%rsp), %r11, %rax ## 8-byte Folded Reload + addq %r8, %rax + adcq %r13, %rsi + adcq -120(%rsp), %r9 ## 8-byte Folded Reload + adcq -112(%rsp), %r15 ## 8-byte Folded Reload + adcq $0, %r14 + addq %rbx, %r11 + adcq %rdi, %rax + adcq %rcx, %rsi + adcq %r10, %r9 + adcq %r12, %r15 + adcq %rbp, %r14 + sbbq %r12, %r12 + andl $1, %r12d + movq %r11, %rdx + imulq -16(%rsp), %rdx ## 8-byte Folded Reload + mulxq -56(%rsp), %rcx, %r10 ## 8-byte Folded Reload + movq %rcx, -112(%rsp) ## 8-byte Spill + mulxq -64(%rsp), %rcx, %rdi ## 8-byte Folded Reload + movq %rcx, -120(%rsp) ## 8-byte Spill + mulxq -88(%rsp), %r13, %rcx ## 8-byte Folded Reload + mulxq -72(%rsp), %r8, %rbx ## 8-byte Folded Reload + mulxq -80(%rsp), %rdx, %rbp ## 8-byte Folded Reload + addq %r8, %rbp + adcq %r13, %rbx + adcq -120(%rsp), %rcx ## 8-byte Folded Reload + adcq -112(%rsp), %rdi ## 8-byte Folded Reload + adcq $0, %r10 + addq %r11, %rdx + adcq %rax, %rbp + adcq %rsi, %rbx + adcq %r9, %rcx + adcq %r15, %rdi + adcq %r14, %r10 + adcq $0, %r12 + movq -96(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + mulxq -104(%rsp), %rax, %r15 ## 8-byte Folded Reload + movq %rax, -112(%rsp) ## 8-byte Spill + mulxq -24(%rsp), %rax, %r11 ## 8-byte Folded Reload + movq %rax, -120(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %r13, %r9 ## 8-byte Folded Reload + mulxq -48(%rsp), %rsi, %r8 ## 8-byte Folded Reload + mulxq -40(%rsp), %r14, %rax ## 8-byte Folded Reload + addq %rsi, %rax + adcq %r13, %r8 + adcq -120(%rsp), %r9 ## 8-byte Folded Reload + adcq -112(%rsp), %r11 ## 8-byte Folded Reload + adcq $0, %r15 + addq %rbp, %r14 + adcq %rbx, %rax + adcq %rcx, %r8 + adcq %rdi, %r9 + adcq %r10, %r11 + adcq %r12, %r15 + sbbq %r13, %r13 + andl $1, %r13d + movq %r14, %rdx + imulq -16(%rsp), %rdx ## 8-byte Folded Reload + mulxq -56(%rsp), %rcx, %r12 ## 8-byte Folded Reload + movq %rcx, -112(%rsp) ## 8-byte Spill + mulxq -64(%rsp), %rcx, %r10 ## 8-byte Folded Reload + movq %rcx, -120(%rsp) ## 8-byte Spill + mulxq -88(%rsp), %rdi, %rsi ## 8-byte Folded Reload + mulxq -72(%rsp), %rcx, %rbx ## 8-byte Folded Reload + mulxq -80(%rsp), %rdx, %rbp ## 8-byte Folded Reload + addq %rcx, %rbp + adcq %rdi, %rbx + adcq -120(%rsp), %rsi ## 8-byte Folded Reload + adcq -112(%rsp), %r10 ## 8-byte Folded Reload + adcq $0, %r12 + addq %r14, %rdx + adcq %rax, %rbp + adcq %r8, %rbx + adcq %r9, %rsi + adcq %r11, %r10 + adcq %r15, %r12 + adcq $0, %r13 + movq -96(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdx + mulxq -104(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rcx, -120(%rsp) ## 8-byte Spill + movq %rax, -112(%rsp) ## 8-byte Spill + mulxq -24(%rsp), %r11, %r14 ## 8-byte Folded Reload + mulxq -32(%rsp), %r8, %r9 ## 8-byte Folded Reload + mulxq -48(%rsp), %rax, %rdi ## 8-byte Folded Reload + mulxq -40(%rsp), %r15, %rcx ## 8-byte Folded Reload + addq %rax, %rcx + adcq %r8, %rdi + adcq %r11, %r9 + adcq -120(%rsp), %r14 ## 8-byte Folded Reload + movq -112(%rsp), %rax ## 8-byte Reload + adcq $0, %rax + addq %rbp, %r15 + adcq %rbx, %rcx + adcq %rsi, %rdi + adcq %r10, %r9 + adcq %r12, %r14 + adcq %r13, %rax + movq %rax, -112(%rsp) ## 8-byte Spill + sbbq %r12, %r12 + andl $1, %r12d + movq %r15, %rdx + imulq -16(%rsp), %rdx ## 8-byte Folded Reload + mulxq -56(%rsp), %rax, %rbp ## 8-byte Folded Reload + movq %rax, -120(%rsp) ## 8-byte Spill + mulxq -64(%rsp), %r13, %r10 ## 8-byte Folded Reload + mulxq -88(%rsp), %rbx, %r8 ## 8-byte Folded Reload + mulxq -72(%rsp), %rsi, %r11 ## 8-byte Folded Reload + mulxq -80(%rsp), %rdx, %rax ## 8-byte Folded Reload + addq %rsi, %rax + adcq %rbx, %r11 + adcq %r13, %r8 + adcq -120(%rsp), %r10 ## 8-byte Folded Reload + adcq $0, %rbp + addq %r15, %rdx + adcq %rcx, %rax + adcq %rdi, %r11 + adcq %r9, %r8 + adcq %r14, %r10 + adcq -112(%rsp), %rbp ## 8-byte Folded Reload + adcq $0, %r12 + movq -96(%rsp), %rcx ## 8-byte Reload + movq 32(%rcx), %rdx + mulxq -104(%rsp), %rcx, %r14 ## 8-byte Folded Reload + movq %rcx, -96(%rsp) ## 8-byte Spill + mulxq -24(%rsp), %rcx, %rbx ## 8-byte Folded Reload + movq %rcx, -104(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %rsi, %r15 ## 8-byte Folded Reload + mulxq -48(%rsp), %rcx, %r9 ## 8-byte Folded Reload + mulxq -40(%rsp), %r13, %rdi ## 8-byte Folded Reload + addq %rcx, %rdi + adcq %rsi, %r9 + adcq -104(%rsp), %r15 ## 8-byte Folded Reload + adcq -96(%rsp), %rbx ## 8-byte Folded Reload + adcq $0, %r14 + addq %rax, %r13 + adcq %r11, %rdi + adcq %r8, %r9 + adcq %r10, %r15 + adcq %rbp, %rbx + adcq %r12, %r14 + sbbq %rax, %rax + movq -16(%rsp), %rdx ## 8-byte Reload + imulq %r13, %rdx + mulxq -80(%rsp), %r10, %rcx ## 8-byte Folded Reload + mulxq -72(%rsp), %r8, %rsi ## 8-byte Folded Reload + addq %rcx, %r8 + mulxq -88(%rsp), %rbp, %r11 ## 8-byte Folded Reload + adcq %rsi, %rbp + mulxq -64(%rsp), %rcx, %r12 ## 8-byte Folded Reload + adcq %r11, %rcx + mulxq -56(%rsp), %rsi, %r11 ## 8-byte Folded Reload + adcq %r12, %rsi + adcq $0, %r11 + andl $1, %eax + addq %r13, %r10 + adcq %rdi, %r8 + adcq %r9, %rbp + adcq %r15, %rcx + adcq %rbx, %rsi + adcq %r14, %r11 + adcq $0, %rax + movq %r8, %rdi + subq -80(%rsp), %rdi ## 8-byte Folded Reload + movq %rbp, %rbx + sbbq -72(%rsp), %rbx ## 8-byte Folded Reload + movq %rcx, %r9 + sbbq -88(%rsp), %r9 ## 8-byte Folded Reload + movq %rsi, %rdx + sbbq -64(%rsp), %rdx ## 8-byte Folded Reload + movq %r11, %r10 + sbbq -56(%rsp), %r10 ## 8-byte Folded Reload + sbbq $0, %rax + andl $1, %eax + cmovneq %rsi, %rdx + testb %al, %al + cmovneq %r8, %rdi + movq -8(%rsp), %rax ## 8-byte Reload + movq %rdi, (%rax) + cmovneq %rbp, %rbx + movq %rbx, 8(%rax) + cmovneq %rcx, %r9 + movq %r9, 16(%rax) + movq %rdx, 24(%rax) + cmovneq %r11, %r10 + movq %r10, 32(%rax) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF5Lbmi2: ## @mcl_fp_montNF5Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, -96(%rsp) ## 8-byte Spill + movq %rdi, -8(%rsp) ## 8-byte Spill + movq (%rsi), %r13 + movq 8(%rsi), %rbp + movq %rbp, -104(%rsp) ## 8-byte Spill + movq (%rdx), %rax + movq %rbp, %rdx + mulxq %rax, %rbp, %r9 + movq %r13, %rdx + movq %r13, -24(%rsp) ## 8-byte Spill + mulxq %rax, %r8, %r10 + movq 16(%rsi), %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + addq %rbp, %r10 + mulxq %rax, %rbp, %rbx + adcq %r9, %rbp + movq 24(%rsi), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rax, %r15, %r9 + adcq %rbx, %r15 + movq 32(%rsi), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %r11 + adcq %r9, %rax + adcq $0, %r11 + movq -8(%rcx), %rsi + movq %rsi, -32(%rsp) ## 8-byte Spill + movq %r8, %rdx + imulq %rsi, %rdx + movq (%rcx), %rsi + movq %rsi, -48(%rsp) ## 8-byte Spill + mulxq %rsi, %rbx, %r14 + addq %r8, %rbx + movq 8(%rcx), %rsi + movq %rsi, -40(%rsp) ## 8-byte Spill + mulxq %rsi, %rbx, %r12 + adcq %r10, %rbx + movq 16(%rcx), %rsi + movq %rsi, -16(%rsp) ## 8-byte Spill + mulxq %rsi, %r10, %rdi + adcq %rbp, %r10 + movq 24(%rcx), %rsi + movq %rsi, -88(%rsp) ## 8-byte Spill + mulxq %rsi, %r9, %rbp + adcq %r15, %r9 + movq 32(%rcx), %rcx + movq %rcx, -56(%rsp) ## 8-byte Spill + mulxq %rcx, %r8, %rcx + adcq %rax, %r8 + adcq $0, %r11 + addq %r14, %rbx + adcq %r12, %r10 + adcq %rdi, %r9 + adcq %rbp, %r8 + adcq %rcx, %r11 + movq -96(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + mulxq -104(%rsp), %rcx, %rsi ## 8-byte Folded Reload + mulxq %r13, %r14, %rax + addq %rcx, %rax + mulxq -64(%rsp), %rcx, %rdi ## 8-byte Folded Reload + adcq %rsi, %rcx + mulxq -72(%rsp), %rsi, %r15 ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -80(%rsp), %rdi, %rbp ## 8-byte Folded Reload + adcq %r15, %rdi + adcq $0, %rbp + addq %rbx, %r14 + adcq %r10, %rax + adcq %r9, %rcx + adcq %r8, %rsi + adcq %r11, %rdi + adcq $0, %rbp + movq %r14, %rdx + movq -32(%rsp), %r12 ## 8-byte Reload + imulq %r12, %rdx + mulxq -48(%rsp), %rbx, %r15 ## 8-byte Folded Reload + addq %r14, %rbx + movq -40(%rsp), %r13 ## 8-byte Reload + mulxq %r13, %r8, %rbx + adcq %rax, %r8 + mulxq -16(%rsp), %r9, %rax ## 8-byte Folded Reload + adcq %rcx, %r9 + mulxq -88(%rsp), %r10, %rcx ## 8-byte Folded Reload + adcq %rsi, %r10 + mulxq -56(%rsp), %r11, %rdx ## 8-byte Folded Reload + adcq %rdi, %r11 + adcq $0, %rbp + addq %r15, %r8 + adcq %rbx, %r9 + adcq %rax, %r10 + adcq %rcx, %r11 + adcq %rdx, %rbp + movq -96(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + mulxq -104(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq -24(%rsp), %r14, %rsi ## 8-byte Folded Reload + addq %rcx, %rsi + mulxq -64(%rsp), %rbx, %rcx ## 8-byte Folded Reload + adcq %rax, %rbx + mulxq -72(%rsp), %rdi, %r15 ## 8-byte Folded Reload + adcq %rcx, %rdi + mulxq -80(%rsp), %rcx, %rax ## 8-byte Folded Reload + adcq %r15, %rcx + adcq $0, %rax + addq %r8, %r14 + adcq %r9, %rsi + adcq %r10, %rbx + adcq %r11, %rdi + adcq %rbp, %rcx + adcq $0, %rax + movq %r14, %rdx + imulq %r12, %rdx + movq -48(%rsp), %r12 ## 8-byte Reload + mulxq %r12, %rbp, %r15 + addq %r14, %rbp + mulxq %r13, %r8, %rbp + adcq %rsi, %r8 + movq -16(%rsp), %r13 ## 8-byte Reload + mulxq %r13, %r9, %rsi + adcq %rbx, %r9 + mulxq -88(%rsp), %r10, %rbx ## 8-byte Folded Reload + adcq %rdi, %r10 + mulxq -56(%rsp), %r11, %rdx ## 8-byte Folded Reload + adcq %rcx, %r11 + adcq $0, %rax + addq %r15, %r8 + adcq %rbp, %r9 + adcq %rsi, %r10 + adcq %rbx, %r11 + adcq %rdx, %rax + movq -96(%rsp), %rcx ## 8-byte Reload + movq 24(%rcx), %rdx + mulxq -104(%rsp), %rdi, %rsi ## 8-byte Folded Reload + mulxq -24(%rsp), %r14, %rcx ## 8-byte Folded Reload + addq %rdi, %rcx + mulxq -64(%rsp), %rbx, %rdi ## 8-byte Folded Reload + adcq %rsi, %rbx + mulxq -72(%rsp), %rsi, %r15 ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -80(%rsp), %rdi, %rbp ## 8-byte Folded Reload + adcq %r15, %rdi + adcq $0, %rbp + addq %r8, %r14 + adcq %r9, %rcx + adcq %r10, %rbx + adcq %r11, %rsi + adcq %rax, %rdi + adcq $0, %rbp + movq %r14, %rdx + imulq -32(%rsp), %rdx ## 8-byte Folded Reload + mulxq %r12, %rax, %r11 + addq %r14, %rax + mulxq -40(%rsp), %r8, %r14 ## 8-byte Folded Reload + adcq %rcx, %r8 + mulxq %r13, %r9, %rax + adcq %rbx, %r9 + movq -88(%rsp), %r12 ## 8-byte Reload + mulxq %r12, %r10, %rbx + adcq %rsi, %r10 + mulxq -56(%rsp), %rcx, %rdx ## 8-byte Folded Reload + adcq %rdi, %rcx + adcq $0, %rbp + addq %r11, %r8 + adcq %r14, %r9 + adcq %rax, %r10 + adcq %rbx, %rcx + adcq %rdx, %rbp + movq -96(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdx + mulxq -104(%rsp), %rdi, %rbx ## 8-byte Folded Reload + mulxq -24(%rsp), %r14, %rsi ## 8-byte Folded Reload + addq %rdi, %rsi + mulxq -64(%rsp), %rdi, %rax ## 8-byte Folded Reload + adcq %rbx, %rdi + mulxq -72(%rsp), %rbx, %r15 ## 8-byte Folded Reload + adcq %rax, %rbx + mulxq -80(%rsp), %r11, %rax ## 8-byte Folded Reload + adcq %r15, %r11 + adcq $0, %rax + addq %r8, %r14 + adcq %r9, %rsi + adcq %r10, %rdi + adcq %rcx, %rbx + adcq %rbp, %r11 + adcq $0, %rax + movq -32(%rsp), %rdx ## 8-byte Reload + imulq %r14, %rdx + movq -48(%rsp), %r10 ## 8-byte Reload + mulxq %r10, %rcx, %rbp + movq %rbp, -96(%rsp) ## 8-byte Spill + addq %r14, %rcx + movq -40(%rsp), %r9 ## 8-byte Reload + mulxq %r9, %r14, %rcx + movq %rcx, -104(%rsp) ## 8-byte Spill + adcq %rsi, %r14 + movq %r13, %r8 + mulxq %r8, %r15, %r13 + adcq %rdi, %r15 + mulxq %r12, %rbp, %rcx + adcq %rbx, %rbp + movq -56(%rsp), %rbx ## 8-byte Reload + mulxq %rbx, %r12, %rdx + adcq %r11, %r12 + adcq $0, %rax + addq -96(%rsp), %r14 ## 8-byte Folded Reload + adcq -104(%rsp), %r15 ## 8-byte Folded Reload + adcq %r13, %rbp + adcq %rcx, %r12 + adcq %rdx, %rax + movq %r14, %rcx + subq %r10, %rcx + movq %r15, %rsi + sbbq %r9, %rsi + movq %rbp, %rdi + sbbq %r8, %rdi + movq %r12, %r8 + sbbq -88(%rsp), %r8 ## 8-byte Folded Reload + movq %rax, %rdx + sbbq %rbx, %rdx + movq %rdx, %rbx + sarq $63, %rbx + cmovsq %r14, %rcx + movq -8(%rsp), %rbx ## 8-byte Reload + movq %rcx, (%rbx) + cmovsq %r15, %rsi + movq %rsi, 8(%rbx) + cmovsq %rbp, %rdi + movq %rdi, 16(%rbx) + cmovsq %r12, %r8 + movq %r8, 24(%rbx) + cmovsq %rax, %rdx + movq %rdx, 32(%rbx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed5Lbmi2: ## @mcl_fp_montRed5Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, %rcx + movq %rdi, -8(%rsp) ## 8-byte Spill + movq -8(%rcx), %rax + movq %rax, -104(%rsp) ## 8-byte Spill + movq (%rsi), %r15 + movq %r15, %rdx + imulq %rax, %rdx + movq 32(%rcx), %rax + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq %rax, %r8, %r14 + movq 24(%rcx), %r12 + mulxq %r12, %r10, %r13 + movq %r12, -56(%rsp) ## 8-byte Spill + movq 16(%rcx), %r9 + mulxq %r9, %rdi, %rbp + movq %r9, -64(%rsp) ## 8-byte Spill + movq (%rcx), %rbx + movq %rbx, -40(%rsp) ## 8-byte Spill + movq 8(%rcx), %rax + movq %rax, -80(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %r11 + mulxq %rbx, %rdx, %rcx + addq %rax, %rcx + adcq %rdi, %r11 + adcq %r10, %rbp + adcq %r8, %r13 + adcq $0, %r14 + addq %r15, %rdx + movq 72(%rsi), %rax + movq 64(%rsi), %rdx + adcq 8(%rsi), %rcx + adcq 16(%rsi), %r11 + adcq 24(%rsi), %rbp + adcq 32(%rsi), %r13 + adcq 40(%rsi), %r14 + movq %r14, -112(%rsp) ## 8-byte Spill + movq 56(%rsi), %rdi + movq 48(%rsi), %rsi + adcq $0, %rsi + movq %rsi, -32(%rsp) ## 8-byte Spill + adcq $0, %rdi + movq %rdi, -88(%rsp) ## 8-byte Spill + adcq $0, %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + adcq $0, %rax + movq %rax, -48(%rsp) ## 8-byte Spill + sbbq %rsi, %rsi + andl $1, %esi + movq %rcx, %rdx + movq -104(%rsp), %r14 ## 8-byte Reload + imulq %r14, %rdx + mulxq -72(%rsp), %rax, %r15 ## 8-byte Folded Reload + movq %rax, -16(%rsp) ## 8-byte Spill + mulxq %r12, %rax, %r10 + movq %rax, -24(%rsp) ## 8-byte Spill + mulxq %r9, %rbx, %r8 + movq -80(%rsp), %r12 ## 8-byte Reload + mulxq %r12, %r9, %rdi + mulxq -40(%rsp), %rdx, %rax ## 8-byte Folded Reload + addq %r9, %rax + adcq %rbx, %rdi + adcq -24(%rsp), %r8 ## 8-byte Folded Reload + adcq -16(%rsp), %r10 ## 8-byte Folded Reload + adcq $0, %r15 + addq %rcx, %rdx + adcq %r11, %rax + adcq %rbp, %rdi + adcq %r13, %r8 + adcq -112(%rsp), %r10 ## 8-byte Folded Reload + adcq -32(%rsp), %r15 ## 8-byte Folded Reload + adcq $0, -88(%rsp) ## 8-byte Folded Spill + adcq $0, -96(%rsp) ## 8-byte Folded Spill + adcq $0, -48(%rsp) ## 8-byte Folded Spill + adcq $0, %rsi + movq %rax, %rdx + imulq %r14, %rdx + mulxq -72(%rsp), %rcx, %r13 ## 8-byte Folded Reload + movq %rcx, -112(%rsp) ## 8-byte Spill + mulxq -56(%rsp), %rcx, %r14 ## 8-byte Folded Reload + movq %rcx, -32(%rsp) ## 8-byte Spill + mulxq -64(%rsp), %r11, %rbx ## 8-byte Folded Reload + mulxq %r12, %r9, %rbp + mulxq -40(%rsp), %rdx, %rcx ## 8-byte Folded Reload + addq %r9, %rcx + adcq %r11, %rbp + adcq -32(%rsp), %rbx ## 8-byte Folded Reload + adcq -112(%rsp), %r14 ## 8-byte Folded Reload + adcq $0, %r13 + addq %rax, %rdx + adcq %rdi, %rcx + adcq %r8, %rbp + adcq %r10, %rbx + adcq %r15, %r14 + adcq -88(%rsp), %r13 ## 8-byte Folded Reload + adcq $0, -96(%rsp) ## 8-byte Folded Spill + adcq $0, -48(%rsp) ## 8-byte Folded Spill + adcq $0, %rsi + movq %rcx, %rdx + imulq -104(%rsp), %rdx ## 8-byte Folded Reload + movq -72(%rsp), %r9 ## 8-byte Reload + mulxq %r9, %rax, %r12 + movq %rax, -88(%rsp) ## 8-byte Spill + mulxq -56(%rsp), %rax, %r10 ## 8-byte Folded Reload + movq %rax, -112(%rsp) ## 8-byte Spill + mulxq -64(%rsp), %r8, %r11 ## 8-byte Folded Reload + mulxq -80(%rsp), %rdi, %r15 ## 8-byte Folded Reload + mulxq -40(%rsp), %rdx, %rax ## 8-byte Folded Reload + addq %rdi, %rax + adcq %r8, %r15 + adcq -112(%rsp), %r11 ## 8-byte Folded Reload + adcq -88(%rsp), %r10 ## 8-byte Folded Reload + adcq $0, %r12 + addq %rcx, %rdx + adcq %rbp, %rax + adcq %rbx, %r15 + adcq %r14, %r11 + adcq %r13, %r10 + adcq -96(%rsp), %r12 ## 8-byte Folded Reload + adcq $0, -48(%rsp) ## 8-byte Folded Spill + adcq $0, %rsi + movq -104(%rsp), %rdx ## 8-byte Reload + imulq %rax, %rdx + mulxq %r9, %rdi, %rcx + movq %rdi, -96(%rsp) ## 8-byte Spill + mulxq -56(%rsp), %rbp, %rdi ## 8-byte Folded Reload + movq %rbp, -104(%rsp) ## 8-byte Spill + mulxq -64(%rsp), %r13, %rbp ## 8-byte Folded Reload + movq -40(%rsp), %r14 ## 8-byte Reload + mulxq %r14, %r8, %r9 + mulxq -80(%rsp), %rbx, %rdx ## 8-byte Folded Reload + addq %r9, %rbx + adcq %r13, %rdx + adcq -104(%rsp), %rbp ## 8-byte Folded Reload + adcq -96(%rsp), %rdi ## 8-byte Folded Reload + adcq $0, %rcx + addq %rax, %r8 + adcq %r15, %rbx + adcq %r11, %rdx + adcq %r10, %rbp + adcq %r12, %rdi + adcq -48(%rsp), %rcx ## 8-byte Folded Reload + adcq $0, %rsi + movq %rbx, %rax + subq %r14, %rax + movq %rdx, %r8 + sbbq -80(%rsp), %r8 ## 8-byte Folded Reload + movq %rbp, %r9 + sbbq -64(%rsp), %r9 ## 8-byte Folded Reload + movq %rdi, %r10 + sbbq -56(%rsp), %r10 ## 8-byte Folded Reload + movq %rcx, %r11 + sbbq -72(%rsp), %r11 ## 8-byte Folded Reload + sbbq $0, %rsi + andl $1, %esi + cmovneq %rcx, %r11 + testb %sil, %sil + cmovneq %rbx, %rax + movq -8(%rsp), %rcx ## 8-byte Reload + movq %rax, (%rcx) + cmovneq %rdx, %r8 + movq %r8, 8(%rcx) + cmovneq %rbp, %r9 + movq %r9, 16(%rcx) + cmovneq %rdi, %r10 + movq %r10, 24(%rcx) + movq %r11, 32(%rcx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_addPre5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre5Lbmi2: ## @mcl_fp_addPre5Lbmi2 +## BB#0: + movq 32(%rdx), %r8 + movq 24(%rdx), %r9 + movq 24(%rsi), %r11 + movq 32(%rsi), %r10 + movq 16(%rdx), %rcx + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %rcx + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %rcx, 16(%rdi) + adcq %r9, %r11 + movq %r11, 24(%rdi) + adcq %r8, %r10 + movq %r10, 32(%rdi) + sbbq %rax, %rax + andl $1, %eax + retq + + .globl _mcl_fp_subPre5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre5Lbmi2: ## @mcl_fp_subPre5Lbmi2 +## BB#0: + pushq %rbx + movq 32(%rsi), %r10 + movq 24(%rdx), %r8 + movq 32(%rdx), %r9 + movq 24(%rsi), %r11 + movq 16(%rsi), %rcx + movq (%rsi), %rbx + movq 8(%rsi), %rsi + xorl %eax, %eax + subq (%rdx), %rbx + sbbq 8(%rdx), %rsi + sbbq 16(%rdx), %rcx + movq %rbx, (%rdi) + movq %rsi, 8(%rdi) + movq %rcx, 16(%rdi) + sbbq %r8, %r11 + movq %r11, 24(%rdi) + sbbq %r9, %r10 + movq %r10, 32(%rdi) + sbbq $0, %rax + andl $1, %eax + popq %rbx + retq + + .globl _mcl_fp_shr1_5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_5Lbmi2: ## @mcl_fp_shr1_5Lbmi2 +## BB#0: + movq 32(%rsi), %r8 + movq 24(%rsi), %rcx + movq 16(%rsi), %rdx + movq (%rsi), %rax + movq 8(%rsi), %rsi + shrdq $1, %rsi, %rax + movq %rax, (%rdi) + shrdq $1, %rdx, %rsi + movq %rsi, 8(%rdi) + shrdq $1, %rcx, %rdx + movq %rdx, 16(%rdi) + shrdq $1, %r8, %rcx + movq %rcx, 24(%rdi) + shrq %r8 + movq %r8, 32(%rdi) + retq + + .globl _mcl_fp_add5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add5Lbmi2: ## @mcl_fp_add5Lbmi2 +## BB#0: + pushq %rbx + movq 32(%rdx), %r11 + movq 24(%rdx), %rbx + movq 24(%rsi), %r9 + movq 32(%rsi), %r8 + movq 16(%rdx), %r10 + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %r10 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + adcq %rbx, %r9 + movq %r9, 24(%rdi) + adcq %r11, %r8 + movq %r8, 32(%rdi) + sbbq %rsi, %rsi + andl $1, %esi + subq (%rcx), %rax + sbbq 8(%rcx), %rdx + sbbq 16(%rcx), %r10 + sbbq 24(%rcx), %r9 + sbbq 32(%rcx), %r8 + sbbq $0, %rsi + testb $1, %sil + jne LBB74_2 +## BB#1: ## %nocarry + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + movq %r9, 24(%rdi) + movq %r8, 32(%rdi) +LBB74_2: ## %carry + popq %rbx + retq + + .globl _mcl_fp_addNF5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF5Lbmi2: ## @mcl_fp_addNF5Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %rbx + movq 32(%rdx), %r8 + movq 24(%rdx), %r9 + movq 16(%rdx), %r10 + movq (%rdx), %r14 + movq 8(%rdx), %r11 + addq (%rsi), %r14 + adcq 8(%rsi), %r11 + adcq 16(%rsi), %r10 + adcq 24(%rsi), %r9 + adcq 32(%rsi), %r8 + movq %r14, %rsi + subq (%rcx), %rsi + movq %r11, %rdx + sbbq 8(%rcx), %rdx + movq %r10, %rbx + sbbq 16(%rcx), %rbx + movq %r9, %r15 + sbbq 24(%rcx), %r15 + movq %r8, %rax + sbbq 32(%rcx), %rax + movq %rax, %rcx + sarq $63, %rcx + cmovsq %r14, %rsi + movq %rsi, (%rdi) + cmovsq %r11, %rdx + movq %rdx, 8(%rdi) + cmovsq %r10, %rbx + movq %rbx, 16(%rdi) + cmovsq %r9, %r15 + movq %r15, 24(%rdi) + cmovsq %r8, %rax + movq %rax, 32(%rdi) + popq %rbx + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_sub5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub5Lbmi2: ## @mcl_fp_sub5Lbmi2 +## BB#0: + pushq %r14 + pushq %rbx + movq 32(%rsi), %r8 + movq 24(%rdx), %r11 + movq 32(%rdx), %r14 + movq 24(%rsi), %r9 + movq 16(%rsi), %r10 + movq (%rsi), %rax + movq 8(%rsi), %rsi + xorl %ebx, %ebx + subq (%rdx), %rax + sbbq 8(%rdx), %rsi + sbbq 16(%rdx), %r10 + movq %rax, (%rdi) + movq %rsi, 8(%rdi) + movq %r10, 16(%rdi) + sbbq %r11, %r9 + movq %r9, 24(%rdi) + sbbq %r14, %r8 + movq %r8, 32(%rdi) + sbbq $0, %rbx + testb $1, %bl + je LBB76_2 +## BB#1: ## %carry + movq 32(%rcx), %r11 + movq 24(%rcx), %r14 + movq 8(%rcx), %rdx + movq 16(%rcx), %rbx + addq (%rcx), %rax + movq %rax, (%rdi) + adcq %rsi, %rdx + movq %rdx, 8(%rdi) + adcq %r10, %rbx + movq %rbx, 16(%rdi) + adcq %r9, %r14 + movq %r14, 24(%rdi) + adcq %r8, %r11 + movq %r11, 32(%rdi) +LBB76_2: ## %nocarry + popq %rbx + popq %r14 + retq + + .globl _mcl_fp_subNF5Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF5Lbmi2: ## @mcl_fp_subNF5Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 32(%rsi), %r12 + movdqu (%rdx), %xmm0 + movdqu 16(%rdx), %xmm1 + pshufd $78, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,0,1] + movd %xmm2, %r9 + movdqu (%rsi), %xmm2 + movdqu 16(%rsi), %xmm3 + pshufd $78, %xmm3, %xmm4 ## xmm4 = xmm3[2,3,0,1] + movd %xmm4, %r8 + movd %xmm1, %r10 + movd %xmm3, %r14 + pshufd $78, %xmm0, %xmm1 ## xmm1 = xmm0[2,3,0,1] + movd %xmm1, %r11 + pshufd $78, %xmm2, %xmm1 ## xmm1 = xmm2[2,3,0,1] + movd %xmm1, %r15 + movd %xmm0, %rsi + movd %xmm2, %r13 + subq %rsi, %r13 + sbbq %r11, %r15 + sbbq %r10, %r14 + sbbq %r9, %r8 + sbbq 32(%rdx), %r12 + movq %r12, %rdx + sarq $63, %rdx + movq %rdx, %rsi + shldq $1, %r12, %rsi + movq 8(%rcx), %rax + andq %rsi, %rax + andq (%rcx), %rsi + movq 32(%rcx), %r9 + andq %rdx, %r9 + rorxq $63, %rdx, %rbx + andq 24(%rcx), %rdx + andq 16(%rcx), %rbx + addq %r13, %rsi + movq %rsi, (%rdi) + adcq %r15, %rax + movq %rax, 8(%rdi) + adcq %r14, %rbx + movq %rbx, 16(%rdi) + adcq %r8, %rdx + movq %rdx, 24(%rdi) + adcq %r12, %r9 + movq %r9, 32(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fpDbl_add5Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add5Lbmi2: ## @mcl_fpDbl_add5Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 72(%rdx), %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq 64(%rdx), %r11 + movq 56(%rdx), %r14 + movq 48(%rdx), %r15 + movq 24(%rsi), %rbp + movq 32(%rsi), %r13 + movq 16(%rdx), %r12 + movq (%rdx), %rbx + movq 8(%rdx), %rax + addq (%rsi), %rbx + adcq 8(%rsi), %rax + adcq 16(%rsi), %r12 + adcq 24(%rdx), %rbp + adcq 32(%rdx), %r13 + movq 40(%rdx), %r9 + movq %rbx, (%rdi) + movq 72(%rsi), %r8 + movq %rax, 8(%rdi) + movq 64(%rsi), %r10 + movq %r12, 16(%rdi) + movq 56(%rsi), %r12 + movq %rbp, 24(%rdi) + movq 48(%rsi), %rbp + movq 40(%rsi), %rbx + movq %r13, 32(%rdi) + adcq %r9, %rbx + adcq %r15, %rbp + adcq %r14, %r12 + adcq %r11, %r10 + adcq -8(%rsp), %r8 ## 8-byte Folded Reload + sbbq %rsi, %rsi + andl $1, %esi + movq %rbx, %rax + subq (%rcx), %rax + movq %rbp, %rdx + sbbq 8(%rcx), %rdx + movq %r12, %r9 + sbbq 16(%rcx), %r9 + movq %r10, %r11 + sbbq 24(%rcx), %r11 + movq %r8, %r14 + sbbq 32(%rcx), %r14 + sbbq $0, %rsi + andl $1, %esi + cmovneq %rbx, %rax + movq %rax, 40(%rdi) + testb %sil, %sil + cmovneq %rbp, %rdx + movq %rdx, 48(%rdi) + cmovneq %r12, %r9 + movq %r9, 56(%rdi) + cmovneq %r10, %r11 + movq %r11, 64(%rdi) + cmovneq %r8, %r14 + movq %r14, 72(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sub5Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub5Lbmi2: ## @mcl_fpDbl_sub5Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 72(%rdx), %r9 + movq 64(%rdx), %r10 + movq 56(%rdx), %r14 + movq 16(%rsi), %r8 + movq (%rsi), %r15 + movq 8(%rsi), %r11 + xorl %eax, %eax + subq (%rdx), %r15 + sbbq 8(%rdx), %r11 + sbbq 16(%rdx), %r8 + movq 24(%rsi), %r12 + sbbq 24(%rdx), %r12 + movq %r15, (%rdi) + movq 32(%rsi), %rbx + sbbq 32(%rdx), %rbx + movq %r11, 8(%rdi) + movq 48(%rdx), %r15 + movq 40(%rdx), %rdx + movq %r8, 16(%rdi) + movq 72(%rsi), %r8 + movq %r12, 24(%rdi) + movq 64(%rsi), %r11 + movq %rbx, 32(%rdi) + movq 40(%rsi), %rbp + sbbq %rdx, %rbp + movq 56(%rsi), %r12 + movq 48(%rsi), %r13 + sbbq %r15, %r13 + sbbq %r14, %r12 + sbbq %r10, %r11 + sbbq %r9, %r8 + movl $0, %edx + sbbq $0, %rdx + andl $1, %edx + movq (%rcx), %rsi + cmoveq %rax, %rsi + testb %dl, %dl + movq 16(%rcx), %rdx + cmoveq %rax, %rdx + movq 8(%rcx), %rbx + cmoveq %rax, %rbx + movq 32(%rcx), %r9 + cmoveq %rax, %r9 + cmovneq 24(%rcx), %rax + addq %rbp, %rsi + movq %rsi, 40(%rdi) + adcq %r13, %rbx + movq %rbx, 48(%rdi) + adcq %r12, %rdx + movq %rdx, 56(%rdi) + adcq %r11, %rax + movq %rax, 64(%rdi) + adcq %r8, %r9 + movq %r9, 72(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mulUnitPre6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre6Lbmi2: ## @mcl_fp_mulUnitPre6Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + mulxq 40(%rsi), %r8, %r11 + mulxq 32(%rsi), %r9, %r12 + mulxq 24(%rsi), %r10, %rcx + mulxq 16(%rsi), %r14, %rbx + mulxq 8(%rsi), %r15, %rax + mulxq (%rsi), %rdx, %rsi + movq %rdx, (%rdi) + addq %r15, %rsi + movq %rsi, 8(%rdi) + adcq %r14, %rax + movq %rax, 16(%rdi) + adcq %r10, %rbx + movq %rbx, 24(%rdi) + adcq %r9, %rcx + movq %rcx, 32(%rdi) + adcq %r8, %r12 + movq %r12, 40(%rdi) + adcq $0, %r11 + movq %r11, 48(%rdi) + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fpDbl_mulPre6Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre6Lbmi2: ## @mcl_fpDbl_mulPre6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, %r11 + movq %rdi, -48(%rsp) ## 8-byte Spill + movq (%rsi), %r15 + movq 8(%rsi), %rcx + movq %rcx, -80(%rsp) ## 8-byte Spill + movq (%r11), %rax + movq %r11, -56(%rsp) ## 8-byte Spill + movq %rcx, %rdx + mulxq %rax, %rcx, %r14 + movq %r15, %rdx + mulxq %rax, %rdx, %rbp + movq %rdx, -72(%rsp) ## 8-byte Spill + movq 24(%rsi), %rbx + movq %rbx, -88(%rsp) ## 8-byte Spill + movq 16(%rsi), %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + addq %rcx, %rbp + mulxq %rax, %rcx, %r12 + adcq %r14, %rcx + movq %rbx, %rdx + mulxq %rax, %rbx, %r14 + adcq %r12, %rbx + movq 32(%rsi), %r12 + movq %r12, %rdx + mulxq %rax, %r8, %r13 + adcq %r14, %r8 + movq 40(%rsi), %r14 + movq %r14, %rdx + mulxq %rax, %r9, %r10 + adcq %r13, %r9 + movq -72(%rsp), %rax ## 8-byte Reload + movq %rax, (%rdi) + adcq $0, %r10 + movq 8(%r11), %rdi + movq %r15, %rdx + mulxq %rdi, %r13, %rax + movq %rax, -72(%rsp) ## 8-byte Spill + addq %rbp, %r13 + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rbp, %rax + movq %rax, -80(%rsp) ## 8-byte Spill + adcq %rcx, %rbp + movq -64(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rax, %r11 + adcq %rbx, %rax + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rbx, %rcx + movq %rcx, -88(%rsp) ## 8-byte Spill + adcq %r8, %rbx + movq %r12, %rdx + mulxq %rdi, %rcx, %r8 + adcq %r9, %rcx + movq %r14, %rdx + mulxq %rdi, %r12, %rdx + adcq %r10, %r12 + sbbq %r15, %r15 + andl $1, %r15d + addq -72(%rsp), %rbp ## 8-byte Folded Reload + adcq -80(%rsp), %rax ## 8-byte Folded Reload + adcq %r11, %rbx + movq -48(%rsp), %rdi ## 8-byte Reload + movq %r13, 8(%rdi) + adcq -88(%rsp), %rcx ## 8-byte Folded Reload + adcq %r8, %r12 + adcq %rdx, %r15 + movq (%rsi), %rdx + movq %rdx, -88(%rsp) ## 8-byte Spill + movq 8(%rsi), %r8 + movq %r8, -80(%rsp) ## 8-byte Spill + movq -56(%rsp), %r14 ## 8-byte Reload + movq 16(%r14), %rdi + mulxq %rdi, %r13, %rdx + movq %rdx, -8(%rsp) ## 8-byte Spill + addq %rbp, %r13 + movq %r8, %rdx + mulxq %rdi, %r8, %rdx + movq %rdx, -16(%rsp) ## 8-byte Spill + adcq %rax, %r8 + movq 16(%rsi), %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + mulxq %rdi, %r11, %rax + movq %rax, -24(%rsp) ## 8-byte Spill + adcq %rbx, %r11 + movq 24(%rsi), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rdi, %rax, %rbx + adcq %rcx, %rax + movq 32(%rsi), %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + mulxq %rdi, %r10, %rcx + adcq %r12, %r10 + movq 40(%rsi), %rdx + movq %rdx, -40(%rsp) ## 8-byte Spill + mulxq %rdi, %r9, %rdx + adcq %r15, %r9 + sbbq %rbp, %rbp + andl $1, %ebp + addq -8(%rsp), %r8 ## 8-byte Folded Reload + adcq -16(%rsp), %r11 ## 8-byte Folded Reload + adcq -24(%rsp), %rax ## 8-byte Folded Reload + adcq %rbx, %r10 + adcq %rcx, %r9 + adcq %rdx, %rbp + movq -48(%rsp), %rcx ## 8-byte Reload + movq %r13, 16(%rcx) + movq 24(%r14), %rdi + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r12, %rcx + movq %rcx, -88(%rsp) ## 8-byte Spill + addq %r8, %r12 + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rbx, %rcx + movq %rcx, -80(%rsp) ## 8-byte Spill + adcq %r11, %rbx + movq -64(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rcx, %r11 + adcq %rax, %rcx + movq -72(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r14, %rax + movq %rax, -64(%rsp) ## 8-byte Spill + adcq %r10, %r14 + movq -32(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r8, %rax + adcq %r9, %r8 + movq -40(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r13, %rdx + adcq %rbp, %r13 + sbbq %r15, %r15 + andl $1, %r15d + addq -88(%rsp), %rbx ## 8-byte Folded Reload + adcq -80(%rsp), %rcx ## 8-byte Folded Reload + adcq %r11, %r14 + movq -48(%rsp), %rdi ## 8-byte Reload + movq %r12, 24(%rdi) + adcq -64(%rsp), %r8 ## 8-byte Folded Reload + adcq %rax, %r13 + adcq %rdx, %r15 + movq (%rsi), %rdx + movq %rdx, -88(%rsp) ## 8-byte Spill + movq 8(%rsi), %rbp + movq %rbp, -80(%rsp) ## 8-byte Spill + movq -56(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdi + mulxq %rdi, %r12, %rax + movq %rax, -64(%rsp) ## 8-byte Spill + addq %rbx, %r12 + movq %rbp, %rdx + mulxq %rdi, %rbx, %rax + movq %rax, -72(%rsp) ## 8-byte Spill + adcq %rcx, %rbx + movq 16(%rsi), %r11 + movq %r11, %rdx + mulxq %rdi, %rax, %rcx + movq %rcx, -32(%rsp) ## 8-byte Spill + adcq %r14, %rax + movq 24(%rsi), %r14 + movq %r14, %rdx + mulxq %rdi, %rbp, %rcx + movq %rcx, -40(%rsp) ## 8-byte Spill + adcq %r8, %rbp + movq 32(%rsi), %r8 + movq %r8, %rdx + mulxq %rdi, %rcx, %r10 + adcq %r13, %rcx + movq 40(%rsi), %r13 + movq %r13, %rdx + mulxq %rdi, %r9, %rdx + adcq %r15, %r9 + sbbq %rsi, %rsi + andl $1, %esi + addq -64(%rsp), %rbx ## 8-byte Folded Reload + adcq -72(%rsp), %rax ## 8-byte Folded Reload + adcq -32(%rsp), %rbp ## 8-byte Folded Reload + adcq -40(%rsp), %rcx ## 8-byte Folded Reload + adcq %r10, %r9 + adcq %rdx, %rsi + movq -48(%rsp), %r10 ## 8-byte Reload + movq %r12, 32(%r10) + movq -56(%rsp), %rdx ## 8-byte Reload + movq 40(%rdx), %rdi + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r15, %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + addq %rbx, %r15 + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rbx, %r12 + adcq %rax, %rbx + movq %r11, %rdx + mulxq %rdi, %rax, %r11 + adcq %rbp, %rax + movq %r14, %rdx + mulxq %rdi, %rbp, %r14 + adcq %rcx, %rbp + movq %r8, %rdx + mulxq %rdi, %rcx, %r8 + adcq %r9, %rcx + movq %r13, %rdx + mulxq %rdi, %rdi, %r9 + adcq %rsi, %rdi + sbbq %rsi, %rsi + andl $1, %esi + addq -56(%rsp), %rbx ## 8-byte Folded Reload + movq %r15, 40(%r10) + movq %rbx, 48(%r10) + adcq %r12, %rax + movq %rax, 56(%r10) + adcq %r11, %rbp + movq %rbp, 64(%r10) + adcq %r14, %rcx + movq %rcx, 72(%r10) + adcq %r8, %rdi + movq %rdi, 80(%r10) + adcq %r9, %rsi + movq %rsi, 88(%r10) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sqrPre6Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre6Lbmi2: ## @mcl_fpDbl_sqrPre6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdi, %r9 + movq 16(%rsi), %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + movq (%rsi), %rcx + movq 8(%rsi), %rax + mulxq %rcx, %r10, %r8 + movq 24(%rsi), %rbp + movq %rbp, -48(%rsp) ## 8-byte Spill + movq %rax, %rdx + mulxq %rcx, %r11, %rbx + movq %rbx, -40(%rsp) ## 8-byte Spill + movq %rcx, %rdx + mulxq %rcx, %rdx, %r14 + movq %rdx, -56(%rsp) ## 8-byte Spill + addq %r11, %r14 + adcq %rbx, %r10 + movq %rbp, %rdx + mulxq %rcx, %r15, %rbp + adcq %r8, %r15 + movq 32(%rsi), %rbx + movq %rbx, %rdx + mulxq %rcx, %r8, %r13 + adcq %rbp, %r8 + movq 40(%rsi), %rdi + movq %rdi, %rdx + mulxq %rcx, %rcx, %r12 + adcq %r13, %rcx + movq %r9, -24(%rsp) ## 8-byte Spill + movq -56(%rsp), %rdx ## 8-byte Reload + movq %rdx, (%r9) + adcq $0, %r12 + addq %r11, %r14 + movq %rax, %rdx + mulxq %rax, %rbp, %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + adcq %r10, %rbp + movq -64(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r13, %r10 + adcq %r15, %r13 + movq -48(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r15, %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + adcq %r8, %r15 + movq %rbx, %rdx + mulxq %rax, %rbx, %r8 + adcq %rcx, %rbx + movq %rdi, %rdx + mulxq %rax, %r11, %rax + adcq %r12, %r11 + sbbq %r12, %r12 + andl $1, %r12d + addq -40(%rsp), %rbp ## 8-byte Folded Reload + adcq -56(%rsp), %r13 ## 8-byte Folded Reload + movq %r14, 8(%r9) + adcq %r10, %r15 + adcq -64(%rsp), %rbx ## 8-byte Folded Reload + adcq %r8, %r11 + adcq %rax, %r12 + movq (%rsi), %rdx + movq %rdx, -48(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdi + movq %rdi, -64(%rsp) ## 8-byte Spill + movq 16(%rsi), %rcx + mulxq %rcx, %rax, %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + addq %rbp, %rax + movq %rax, -40(%rsp) ## 8-byte Spill + movq %rdi, %rdx + mulxq %rcx, %rbp, %rax + movq %rax, -8(%rsp) ## 8-byte Spill + adcq %r13, %rbp + movq %rcx, %rdx + mulxq %rcx, %r13, %rax + movq %rax, -16(%rsp) ## 8-byte Spill + adcq %r15, %r13 + movq 24(%rsi), %rax + movq %rax, %rdx + mulxq %rcx, %r8, %rdi + movq %rdi, -56(%rsp) ## 8-byte Spill + adcq %r8, %rbx + movq 32(%rsi), %r10 + movq %r10, %rdx + mulxq %rcx, %r14, %r15 + adcq %r11, %r14 + movq 40(%rsi), %r11 + movq %r11, %rdx + mulxq %rcx, %r9, %rdx + adcq %r12, %r9 + sbbq %rcx, %rcx + andl $1, %ecx + addq -32(%rsp), %rbp ## 8-byte Folded Reload + adcq -8(%rsp), %r13 ## 8-byte Folded Reload + adcq -16(%rsp), %rbx ## 8-byte Folded Reload + adcq %rdi, %r14 + adcq %r15, %r9 + adcq %rdx, %rcx + movq -48(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %rdi, %rdx + movq %rdx, -48(%rsp) ## 8-byte Spill + addq %rbp, %rdi + movq -64(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r15, %rbp + adcq %r13, %r15 + adcq %r8, %rbx + movq %rax, %rdx + mulxq %rax, %r8, %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + adcq %r14, %r8 + movq %r10, %rdx + mulxq %rax, %r12, %r10 + adcq %r9, %r12 + movq %r11, %rdx + mulxq %rax, %r13, %rax + adcq %rcx, %r13 + sbbq %r9, %r9 + andl $1, %r9d + addq -48(%rsp), %r15 ## 8-byte Folded Reload + adcq %rbp, %rbx + movq -24(%rsp), %rdx ## 8-byte Reload + movq -40(%rsp), %rbp ## 8-byte Reload + movq %rbp, 16(%rdx) + movq %rdi, 24(%rdx) + adcq -56(%rsp), %r8 ## 8-byte Folded Reload + adcq -64(%rsp), %r12 ## 8-byte Folded Reload + adcq %r10, %r13 + adcq %rax, %r9 + movq (%rsi), %rcx + movq 8(%rsi), %rdi + movq %rdi, -64(%rsp) ## 8-byte Spill + movq 32(%rsi), %rax + movq %rcx, %rdx + mulxq %rax, %rdx, %rbp + movq %rbp, -56(%rsp) ## 8-byte Spill + addq %r15, %rdx + movq %rdx, -40(%rsp) ## 8-byte Spill + movq %rdi, %rdx + mulxq %rax, %r15, %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + adcq %rbx, %r15 + movq 16(%rsi), %r10 + movq %r10, %rdx + mulxq %rax, %r14, %rbx + adcq %r8, %r14 + movq 24(%rsi), %r8 + movq %r8, %rdx + mulxq %rax, %rbp, %rdi + adcq %r12, %rbp + movq %rax, %rdx + mulxq %rax, %r11, %r12 + adcq %r13, %r11 + movq 40(%rsi), %rsi + movq %rsi, %rdx + mulxq %rax, %r13, %rdx + movq %rdx, -48(%rsp) ## 8-byte Spill + adcq %r13, %r9 + sbbq %rax, %rax + andl $1, %eax + addq -56(%rsp), %r15 ## 8-byte Folded Reload + adcq -32(%rsp), %r14 ## 8-byte Folded Reload + adcq %rbx, %rbp + adcq %rdi, %r11 + adcq %r12, %r9 + adcq %rdx, %rax + movq %rcx, %rdx + mulxq %rsi, %r12, %rcx + addq %r15, %r12 + movq -64(%rsp), %rdx ## 8-byte Reload + mulxq %rsi, %rdi, %r15 + adcq %r14, %rdi + movq %r10, %rdx + mulxq %rsi, %rbx, %r10 + adcq %rbp, %rbx + movq %r8, %rdx + mulxq %rsi, %rbp, %r8 + adcq %r11, %rbp + adcq %r13, %r9 + movq %rsi, %rdx + mulxq %rsi, %rsi, %r11 + adcq %rax, %rsi + sbbq %rax, %rax + andl $1, %eax + addq %rcx, %rdi + movq -24(%rsp), %rdx ## 8-byte Reload + movq -40(%rsp), %rcx ## 8-byte Reload + movq %rcx, 32(%rdx) + movq %r12, 40(%rdx) + movq %rdi, 48(%rdx) + adcq %r15, %rbx + movq %rbx, 56(%rdx) + adcq %r10, %rbp + movq %rbp, 64(%rdx) + adcq %r8, %r9 + movq %r9, 72(%rdx) + adcq -48(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 80(%rdx) + adcq %r11, %rax + movq %rax, 88(%rdx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mont6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont6Lbmi2: ## @mcl_fp_mont6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $32, %rsp + movq %rdx, -88(%rsp) ## 8-byte Spill + movq %rdi, 24(%rsp) ## 8-byte Spill + movq 40(%rsi), %rdi + movq %rdi, -96(%rsp) ## 8-byte Spill + movq (%rdx), %rax + movq %rdi, %rdx + mulxq %rax, %r11, %rbx + movq 32(%rsi), %rdx + movq %rdx, (%rsp) ## 8-byte Spill + mulxq %rax, %r14, %r12 + movq 24(%rsi), %rdx + movq %rdx, -8(%rsp) ## 8-byte Spill + mulxq %rax, %r15, %r13 + movq 16(%rsi), %rdx + movq %rdx, -16(%rsp) ## 8-byte Spill + mulxq %rax, %r8, %r10 + movq (%rsi), %rbp + movq %rbp, -24(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + mulxq %rax, %rdi, %r9 + movq %rbp, %rdx + mulxq %rax, %rdx, %rbp + movq %rdx, -128(%rsp) ## 8-byte Spill + addq %rdi, %rbp + adcq %r8, %r9 + adcq %r15, %r10 + adcq %r14, %r13 + adcq %r11, %r12 + adcq $0, %rbx + movq %rbx, -120(%rsp) ## 8-byte Spill + movq -8(%rcx), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + imulq %rax, %rdx + movq 40(%rcx), %rax + movq %rax, -40(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %r15 + movq %rax, -112(%rsp) ## 8-byte Spill + movq 16(%rcx), %rax + movq %rax, -48(%rsp) ## 8-byte Spill + mulxq %rax, %r8, %rax + movq 8(%rcx), %rsi + movq %rsi, -56(%rsp) ## 8-byte Spill + mulxq %rsi, %rbx, %r11 + movq (%rcx), %rsi + movq %rsi, -64(%rsp) ## 8-byte Spill + mulxq %rsi, %rsi, %r14 + addq %rbx, %r14 + adcq %r8, %r11 + movq 24(%rcx), %rdi + movq %rdi, -72(%rsp) ## 8-byte Spill + mulxq %rdi, %rdi, %r8 + adcq %rax, %rdi + movq 32(%rcx), %rax + movq %rax, -80(%rsp) ## 8-byte Spill + mulxq %rax, %rbx, %rax + adcq %r8, %rbx + adcq -112(%rsp), %rax ## 8-byte Folded Reload + adcq $0, %r15 + addq -128(%rsp), %rsi ## 8-byte Folded Reload + adcq %rbp, %r14 + adcq %r9, %r11 + adcq %r10, %rdi + adcq %r13, %rbx + adcq %r12, %rax + adcq -120(%rsp), %r15 ## 8-byte Folded Reload + sbbq %r10, %r10 + andl $1, %r10d + movq -88(%rsp), %rcx ## 8-byte Reload + movq 8(%rcx), %rdx + mulxq -96(%rsp), %rsi, %rcx ## 8-byte Folded Reload + movq %rsi, -112(%rsp) ## 8-byte Spill + movq %rcx, -120(%rsp) ## 8-byte Spill + mulxq (%rsp), %rcx, %r13 ## 8-byte Folded Reload + movq %rcx, -104(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %r12, %rcx ## 8-byte Folded Reload + movq %rcx, -128(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %rbp, %rcx ## 8-byte Folded Reload + mulxq -24(%rsp), %rsi, %r9 ## 8-byte Folded Reload + addq %rbp, %r9 + mulxq -16(%rsp), %rbp, %r8 ## 8-byte Folded Reload + adcq %rcx, %rbp + adcq %r12, %r8 + movq -128(%rsp), %rdx ## 8-byte Reload + adcq -104(%rsp), %rdx ## 8-byte Folded Reload + adcq -112(%rsp), %r13 ## 8-byte Folded Reload + movq -120(%rsp), %rcx ## 8-byte Reload + adcq $0, %rcx + addq %r14, %rsi + adcq %r11, %r9 + adcq %rdi, %rbp + adcq %rbx, %r8 + adcq %rax, %rdx + movq %rdx, -128(%rsp) ## 8-byte Spill + adcq %r15, %r13 + adcq %r10, %rcx + movq %rcx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, -112(%rsp) ## 8-byte Spill + movq %rsi, %rbx + movq %rbx, %rdx + imulq 8(%rsp), %rdx ## 8-byte Folded Reload + mulxq -40(%rsp), %rax, %r12 ## 8-byte Folded Reload + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -80(%rsp), %r14, %r11 ## 8-byte Folded Reload + mulxq -56(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq -64(%rsp), %rdi, %rsi ## 8-byte Folded Reload + addq %rcx, %rsi + mulxq -48(%rsp), %rcx, %r10 ## 8-byte Folded Reload + adcq %rax, %rcx + mulxq -72(%rsp), %rax, %r15 ## 8-byte Folded Reload + adcq %r10, %rax + adcq %r14, %r15 + adcq -104(%rsp), %r11 ## 8-byte Folded Reload + adcq $0, %r12 + addq %rbx, %rdi + adcq %r9, %rsi + adcq %rbp, %rcx + adcq %r8, %rax + adcq -128(%rsp), %r15 ## 8-byte Folded Reload + adcq %r13, %r11 + adcq -120(%rsp), %r12 ## 8-byte Folded Reload + movq -112(%rsp), %r10 ## 8-byte Reload + adcq $0, %r10 + movq -88(%rsp), %rdx ## 8-byte Reload + movq 16(%rdx), %rdx + mulxq -96(%rsp), %rbp, %rdi ## 8-byte Folded Reload + movq %rbp, -112(%rsp) ## 8-byte Spill + movq %rdi, -120(%rsp) ## 8-byte Spill + mulxq (%rsp), %rdi, %rbp ## 8-byte Folded Reload + movq %rdi, -104(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %rdi, %r13 ## 8-byte Folded Reload + movq %rdi, 16(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %rdi, %r14 ## 8-byte Folded Reload + mulxq -24(%rsp), %rbx, %r9 ## 8-byte Folded Reload + movq %rbx, -128(%rsp) ## 8-byte Spill + addq %rdi, %r9 + mulxq -16(%rsp), %rbx, %r8 ## 8-byte Folded Reload + adcq %r14, %rbx + adcq 16(%rsp), %r8 ## 8-byte Folded Reload + adcq -104(%rsp), %r13 ## 8-byte Folded Reload + adcq -112(%rsp), %rbp ## 8-byte Folded Reload + movq -120(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + movq -128(%rsp), %rdi ## 8-byte Reload + addq %rsi, %rdi + movq %rdi, -128(%rsp) ## 8-byte Spill + adcq %rcx, %r9 + adcq %rax, %rbx + adcq %r15, %r8 + adcq %r11, %r13 + adcq %r12, %rbp + adcq %r10, %rdx + movq %rdx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, -112(%rsp) ## 8-byte Spill + movq %rdi, %rdx + imulq 8(%rsp), %rdx ## 8-byte Folded Reload + mulxq -40(%rsp), %rax, %r11 ## 8-byte Folded Reload + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -80(%rsp), %r15, %r12 ## 8-byte Folded Reload + mulxq -56(%rsp), %rax, %rcx ## 8-byte Folded Reload + mulxq -64(%rsp), %rdi, %r14 ## 8-byte Folded Reload + addq %rax, %r14 + mulxq -48(%rsp), %rax, %r10 ## 8-byte Folded Reload + adcq %rcx, %rax + mulxq -72(%rsp), %rsi, %rcx ## 8-byte Folded Reload + adcq %r10, %rsi + adcq %r15, %rcx + adcq -104(%rsp), %r12 ## 8-byte Folded Reload + adcq $0, %r11 + addq -128(%rsp), %rdi ## 8-byte Folded Reload + adcq %r9, %r14 + adcq %rbx, %rax + adcq %r8, %rsi + adcq %r13, %rcx + adcq %rbp, %r12 + adcq -120(%rsp), %r11 ## 8-byte Folded Reload + adcq $0, -112(%rsp) ## 8-byte Folded Spill + movq -88(%rsp), %rdx ## 8-byte Reload + movq 24(%rdx), %rdx + mulxq -96(%rsp), %rbp, %rdi ## 8-byte Folded Reload + movq %rbp, -128(%rsp) ## 8-byte Spill + movq %rdi, -120(%rsp) ## 8-byte Spill + mulxq (%rsp), %rdi, %r15 ## 8-byte Folded Reload + movq %rdi, -104(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %r10, %rbp ## 8-byte Folded Reload + mulxq -32(%rsp), %rbx, %r9 ## 8-byte Folded Reload + mulxq -24(%rsp), %r13, %rdi ## 8-byte Folded Reload + addq %rbx, %rdi + mulxq -16(%rsp), %rbx, %r8 ## 8-byte Folded Reload + adcq %r9, %rbx + adcq %r10, %r8 + adcq -104(%rsp), %rbp ## 8-byte Folded Reload + adcq -128(%rsp), %r15 ## 8-byte Folded Reload + movq -120(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq %r14, %r13 + adcq %rax, %rdi + adcq %rsi, %rbx + adcq %rcx, %r8 + adcq %r12, %rbp + adcq %r11, %r15 + adcq -112(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, -128(%rsp) ## 8-byte Spill + movq %r13, %rdx + imulq 8(%rsp), %rdx ## 8-byte Folded Reload + mulxq -40(%rsp), %rax, %r10 ## 8-byte Folded Reload + movq %rax, -112(%rsp) ## 8-byte Spill + mulxq -80(%rsp), %rax, %r12 ## 8-byte Folded Reload + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -56(%rsp), %rax, %r11 ## 8-byte Folded Reload + mulxq -64(%rsp), %rcx, %rsi ## 8-byte Folded Reload + addq %rax, %rsi + mulxq -48(%rsp), %r14, %r9 ## 8-byte Folded Reload + adcq %r11, %r14 + mulxq -72(%rsp), %rax, %r11 ## 8-byte Folded Reload + adcq %r9, %rax + adcq -104(%rsp), %r11 ## 8-byte Folded Reload + adcq -112(%rsp), %r12 ## 8-byte Folded Reload + adcq $0, %r10 + addq %r13, %rcx + adcq %rdi, %rsi + adcq %rbx, %r14 + adcq %r8, %rax + adcq %rbp, %r11 + adcq %r15, %r12 + adcq -120(%rsp), %r10 ## 8-byte Folded Reload + movq -128(%rsp), %r15 ## 8-byte Reload + adcq $0, %r15 + movq -88(%rsp), %rcx ## 8-byte Reload + movq 32(%rcx), %rdx + mulxq -96(%rsp), %rdi, %rcx ## 8-byte Folded Reload + movq %rdi, -112(%rsp) ## 8-byte Spill + movq %rcx, -120(%rsp) ## 8-byte Spill + mulxq (%rsp), %rdi, %rcx ## 8-byte Folded Reload + movq %rdi, 16(%rsp) ## 8-byte Spill + movq %rcx, -128(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %r13, %rbp ## 8-byte Folded Reload + mulxq -32(%rsp), %rdi, %rcx ## 8-byte Folded Reload + mulxq -24(%rsp), %rbx, %r8 ## 8-byte Folded Reload + movq %rbx, -104(%rsp) ## 8-byte Spill + addq %rdi, %r8 + mulxq -16(%rsp), %rbx, %r9 ## 8-byte Folded Reload + adcq %rcx, %rbx + adcq %r13, %r9 + adcq 16(%rsp), %rbp ## 8-byte Folded Reload + movq -128(%rsp), %rdx ## 8-byte Reload + adcq -112(%rsp), %rdx ## 8-byte Folded Reload + movq -120(%rsp), %rcx ## 8-byte Reload + adcq $0, %rcx + movq -104(%rsp), %rdi ## 8-byte Reload + addq %rsi, %rdi + movq %rdi, -104(%rsp) ## 8-byte Spill + adcq %r14, %r8 + adcq %rax, %rbx + adcq %r11, %r9 + adcq %r12, %rbp + adcq %r10, %rdx + movq %rdx, -128(%rsp) ## 8-byte Spill + adcq %r15, %rcx + movq %rcx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, %r13 + movq %rdi, %rdx + imulq 8(%rsp), %rdx ## 8-byte Folded Reload + mulxq -40(%rsp), %r14, %rax ## 8-byte Folded Reload + movq %rax, -112(%rsp) ## 8-byte Spill + mulxq -80(%rsp), %r12, %r15 ## 8-byte Folded Reload + mulxq -56(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq -64(%rsp), %rdi, %rsi ## 8-byte Folded Reload + addq %rcx, %rsi + mulxq -48(%rsp), %r11, %r10 ## 8-byte Folded Reload + adcq %rax, %r11 + mulxq -72(%rsp), %rax, %rcx ## 8-byte Folded Reload + adcq %r10, %rax + adcq %r12, %rcx + adcq %r14, %r15 + movq -112(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq -104(%rsp), %rdi ## 8-byte Folded Reload + adcq %r8, %rsi + adcq %rbx, %r11 + adcq %r9, %rax + adcq %rbp, %rcx + adcq -128(%rsp), %r15 ## 8-byte Folded Reload + adcq -120(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -112(%rsp) ## 8-byte Spill + adcq $0, %r13 + movq %r13, -120(%rsp) ## 8-byte Spill + movq -88(%rsp), %rdx ## 8-byte Reload + movq 40(%rdx), %rdx + mulxq -96(%rsp), %rbp, %rdi ## 8-byte Folded Reload + movq %rbp, -128(%rsp) ## 8-byte Spill + movq %rdi, -88(%rsp) ## 8-byte Spill + mulxq (%rsp), %rbx, %rdi ## 8-byte Folded Reload + movq %rdi, -96(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %r10, %rbp ## 8-byte Folded Reload + mulxq -16(%rsp), %r8, %r12 ## 8-byte Folded Reload + mulxq -32(%rsp), %rdi, %r14 ## 8-byte Folded Reload + mulxq -24(%rsp), %r13, %r9 ## 8-byte Folded Reload + addq %rdi, %r9 + adcq %r8, %r14 + adcq %r10, %r12 + adcq %rbx, %rbp + movq -96(%rsp), %rdi ## 8-byte Reload + adcq -128(%rsp), %rdi ## 8-byte Folded Reload + movq -88(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq %rsi, %r13 + adcq %r11, %r9 + adcq %rax, %r14 + adcq %rcx, %r12 + adcq %r15, %rbp + adcq -112(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, -96(%rsp) ## 8-byte Spill + adcq -120(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -88(%rsp) ## 8-byte Spill + sbbq %rcx, %rcx + movq 8(%rsp), %rdx ## 8-byte Reload + imulq %r13, %rdx + mulxq -64(%rsp), %r8, %rax ## 8-byte Folded Reload + mulxq -56(%rsp), %r10, %rdi ## 8-byte Folded Reload + addq %rax, %r10 + mulxq -48(%rsp), %rsi, %rax ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -72(%rsp), %rbx, %r11 ## 8-byte Folded Reload + adcq %rax, %rbx + mulxq -80(%rsp), %rdi, %r15 ## 8-byte Folded Reload + adcq %r11, %rdi + mulxq -40(%rsp), %rax, %r11 ## 8-byte Folded Reload + adcq %r15, %rax + adcq $0, %r11 + andl $1, %ecx + addq %r13, %r8 + adcq %r9, %r10 + adcq %r14, %rsi + adcq %r12, %rbx + adcq %rbp, %rdi + adcq -96(%rsp), %rax ## 8-byte Folded Reload + adcq -88(%rsp), %r11 ## 8-byte Folded Reload + adcq $0, %rcx + movq %r10, %rbp + subq -64(%rsp), %rbp ## 8-byte Folded Reload + movq %rsi, %rdx + sbbq -56(%rsp), %rdx ## 8-byte Folded Reload + movq %rbx, %r8 + sbbq -48(%rsp), %r8 ## 8-byte Folded Reload + movq %rdi, %r9 + sbbq -72(%rsp), %r9 ## 8-byte Folded Reload + movq %rax, %r14 + sbbq -80(%rsp), %r14 ## 8-byte Folded Reload + movq %r11, %r15 + sbbq -40(%rsp), %r15 ## 8-byte Folded Reload + sbbq $0, %rcx + andl $1, %ecx + cmovneq %rdi, %r9 + testb %cl, %cl + cmovneq %r10, %rbp + movq 24(%rsp), %rcx ## 8-byte Reload + movq %rbp, (%rcx) + cmovneq %rsi, %rdx + movq %rdx, 8(%rcx) + cmovneq %rbx, %r8 + movq %r8, 16(%rcx) + movq %r9, 24(%rcx) + cmovneq %rax, %r14 + movq %r14, 32(%rcx) + cmovneq %r11, %r15 + movq %r15, 40(%rcx) + addq $32, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF6Lbmi2: ## @mcl_fp_montNF6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, -120(%rsp) ## 8-byte Spill + movq %rdi, -8(%rsp) ## 8-byte Spill + movq (%rsi), %rax + movq %rax, -112(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdi + movq %rdi, -128(%rsp) ## 8-byte Spill + movq (%rdx), %rbp + movq %rdi, %rdx + mulxq %rbp, %rdi, %rbx + movq %rax, %rdx + mulxq %rbp, %r9, %r14 + movq 16(%rsi), %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + addq %rdi, %r14 + mulxq %rbp, %rdi, %r8 + adcq %rbx, %rdi + movq 24(%rsi), %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + mulxq %rbp, %rbx, %r10 + adcq %r8, %rbx + movq 32(%rsi), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rbp, %r8, %r11 + adcq %r10, %r8 + movq 40(%rsi), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rbp, %rsi, %r15 + adcq %r11, %rsi + adcq $0, %r15 + movq -8(%rcx), %rax + movq %rax, -104(%rsp) ## 8-byte Spill + movq %r9, %rdx + imulq %rax, %rdx + movq (%rcx), %rax + movq %rax, -96(%rsp) ## 8-byte Spill + mulxq %rax, %rbp, %rax + movq %rax, -88(%rsp) ## 8-byte Spill + addq %r9, %rbp + movq 8(%rcx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + mulxq %rax, %r12, %r9 + adcq %r14, %r12 + movq 16(%rcx), %rax + movq %rax, -24(%rsp) ## 8-byte Spill + mulxq %rax, %r14, %rax + adcq %rdi, %r14 + movq 24(%rcx), %rdi + movq %rdi, -32(%rsp) ## 8-byte Spill + mulxq %rdi, %r13, %rdi + adcq %rbx, %r13 + movq 32(%rcx), %rbp + movq %rbp, -40(%rsp) ## 8-byte Spill + mulxq %rbp, %r11, %rbx + adcq %r8, %r11 + movq 40(%rcx), %rcx + movq %rcx, -48(%rsp) ## 8-byte Spill + mulxq %rcx, %r10, %rcx + adcq %rsi, %r10 + adcq $0, %r15 + addq -88(%rsp), %r12 ## 8-byte Folded Reload + adcq %r9, %r14 + adcq %rax, %r13 + adcq %rdi, %r11 + adcq %rbx, %r10 + adcq %rcx, %r15 + movq -120(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + mulxq -128(%rsp), %rcx, %rsi ## 8-byte Folded Reload + mulxq -112(%rsp), %rbx, %rax ## 8-byte Folded Reload + addq %rcx, %rax + mulxq -56(%rsp), %rcx, %rdi ## 8-byte Folded Reload + adcq %rsi, %rcx + mulxq -64(%rsp), %rsi, %r8 ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -72(%rsp), %rdi, %rbp ## 8-byte Folded Reload + movq %rbp, -88(%rsp) ## 8-byte Spill + adcq %r8, %rdi + mulxq -80(%rsp), %r8, %r9 ## 8-byte Folded Reload + adcq -88(%rsp), %r8 ## 8-byte Folded Reload + adcq $0, %r9 + addq %r12, %rbx + adcq %r14, %rax + adcq %r13, %rcx + adcq %r11, %rsi + adcq %r10, %rdi + adcq %r15, %r8 + adcq $0, %r9 + movq %rbx, %rdx + imulq -104(%rsp), %rdx ## 8-byte Folded Reload + mulxq -96(%rsp), %rbp, %r13 ## 8-byte Folded Reload + addq %rbx, %rbp + mulxq -16(%rsp), %r11, %rbx ## 8-byte Folded Reload + adcq %rax, %r11 + mulxq -24(%rsp), %r14, %rax ## 8-byte Folded Reload + adcq %rcx, %r14 + mulxq -32(%rsp), %r10, %rcx ## 8-byte Folded Reload + adcq %rsi, %r10 + mulxq -40(%rsp), %r15, %rsi ## 8-byte Folded Reload + adcq %rdi, %r15 + mulxq -48(%rsp), %r12, %rdx ## 8-byte Folded Reload + adcq %r8, %r12 + adcq $0, %r9 + addq %r13, %r11 + adcq %rbx, %r14 + adcq %rax, %r10 + adcq %rcx, %r15 + adcq %rsi, %r12 + adcq %rdx, %r9 + movq -120(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + mulxq -128(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq -112(%rsp), %r13, %rdi ## 8-byte Folded Reload + addq %rcx, %rdi + mulxq -56(%rsp), %rbx, %rcx ## 8-byte Folded Reload + adcq %rax, %rbx + mulxq -64(%rsp), %rsi, %rbp ## 8-byte Folded Reload + adcq %rcx, %rsi + mulxq -72(%rsp), %rax, %rcx ## 8-byte Folded Reload + movq %rcx, -88(%rsp) ## 8-byte Spill + adcq %rbp, %rax + mulxq -80(%rsp), %r8, %rcx ## 8-byte Folded Reload + adcq -88(%rsp), %r8 ## 8-byte Folded Reload + adcq $0, %rcx + addq %r11, %r13 + adcq %r14, %rdi + adcq %r10, %rbx + adcq %r15, %rsi + adcq %r12, %rax + adcq %r9, %r8 + adcq $0, %rcx + movq %r13, %rdx + imulq -104(%rsp), %rdx ## 8-byte Folded Reload + mulxq -96(%rsp), %rbp, %r12 ## 8-byte Folded Reload + addq %r13, %rbp + mulxq -16(%rsp), %r11, %rbp ## 8-byte Folded Reload + adcq %rdi, %r11 + mulxq -24(%rsp), %r9, %rdi ## 8-byte Folded Reload + adcq %rbx, %r9 + mulxq -32(%rsp), %r10, %rbx ## 8-byte Folded Reload + adcq %rsi, %r10 + mulxq -40(%rsp), %r14, %rsi ## 8-byte Folded Reload + adcq %rax, %r14 + mulxq -48(%rsp), %r15, %rax ## 8-byte Folded Reload + adcq %r8, %r15 + adcq $0, %rcx + addq %r12, %r11 + adcq %rbp, %r9 + adcq %rdi, %r10 + adcq %rbx, %r14 + adcq %rsi, %r15 + adcq %rax, %rcx + movq -120(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdx + mulxq -128(%rsp), %rsi, %rax ## 8-byte Folded Reload + mulxq -112(%rsp), %r13, %rbx ## 8-byte Folded Reload + addq %rsi, %rbx + mulxq -56(%rsp), %rdi, %rbp ## 8-byte Folded Reload + adcq %rax, %rdi + mulxq -64(%rsp), %rsi, %r8 ## 8-byte Folded Reload + adcq %rbp, %rsi + mulxq -72(%rsp), %rax, %rbp ## 8-byte Folded Reload + adcq %r8, %rax + mulxq -80(%rsp), %r8, %r12 ## 8-byte Folded Reload + adcq %rbp, %r8 + adcq $0, %r12 + addq %r11, %r13 + adcq %r9, %rbx + adcq %r10, %rdi + adcq %r14, %rsi + adcq %r15, %rax + adcq %rcx, %r8 + adcq $0, %r12 + movq %r13, %rdx + imulq -104(%rsp), %rdx ## 8-byte Folded Reload + mulxq -96(%rsp), %rbp, %rcx ## 8-byte Folded Reload + addq %r13, %rbp + mulxq -16(%rsp), %r11, %rbp ## 8-byte Folded Reload + adcq %rbx, %r11 + mulxq -24(%rsp), %r9, %rbx ## 8-byte Folded Reload + adcq %rdi, %r9 + mulxq -32(%rsp), %r10, %rdi ## 8-byte Folded Reload + adcq %rsi, %r10 + mulxq -40(%rsp), %r14, %rsi ## 8-byte Folded Reload + adcq %rax, %r14 + mulxq -48(%rsp), %r15, %rax ## 8-byte Folded Reload + adcq %r8, %r15 + adcq $0, %r12 + addq %rcx, %r11 + adcq %rbp, %r9 + adcq %rbx, %r10 + adcq %rdi, %r14 + adcq %rsi, %r15 + adcq %rax, %r12 + movq -120(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdx + mulxq -128(%rsp), %rsi, %rcx ## 8-byte Folded Reload + mulxq -112(%rsp), %r13, %rax ## 8-byte Folded Reload + addq %rsi, %rax + mulxq -56(%rsp), %rbx, %rsi ## 8-byte Folded Reload + adcq %rcx, %rbx + mulxq -64(%rsp), %rdi, %rcx ## 8-byte Folded Reload + adcq %rsi, %rdi + mulxq -72(%rsp), %rsi, %rbp ## 8-byte Folded Reload + adcq %rcx, %rsi + mulxq -80(%rsp), %r8, %rcx ## 8-byte Folded Reload + adcq %rbp, %r8 + adcq $0, %rcx + addq %r11, %r13 + adcq %r9, %rax + adcq %r10, %rbx + adcq %r14, %rdi + adcq %r15, %rsi + adcq %r12, %r8 + adcq $0, %rcx + movq %r13, %rdx + imulq -104(%rsp), %rdx ## 8-byte Folded Reload + mulxq -96(%rsp), %rbp, %r9 ## 8-byte Folded Reload + addq %r13, %rbp + mulxq -16(%rsp), %r13, %rbp ## 8-byte Folded Reload + adcq %rax, %r13 + mulxq -24(%rsp), %r11, %rax ## 8-byte Folded Reload + adcq %rbx, %r11 + mulxq -32(%rsp), %r10, %rbx ## 8-byte Folded Reload + adcq %rdi, %r10 + mulxq -40(%rsp), %r14, %rdi ## 8-byte Folded Reload + adcq %rsi, %r14 + mulxq -48(%rsp), %rsi, %rdx ## 8-byte Folded Reload + adcq %r8, %rsi + adcq $0, %rcx + addq %r9, %r13 + adcq %rbp, %r11 + adcq %rax, %r10 + adcq %rbx, %r14 + adcq %rdi, %rsi + adcq %rdx, %rcx + movq -120(%rsp), %rax ## 8-byte Reload + movq 40(%rax), %rdx + mulxq -128(%rsp), %rdi, %rax ## 8-byte Folded Reload + mulxq -112(%rsp), %r8, %rbx ## 8-byte Folded Reload + addq %rdi, %rbx + mulxq -56(%rsp), %rdi, %rbp ## 8-byte Folded Reload + adcq %rax, %rdi + mulxq -64(%rsp), %r15, %rax ## 8-byte Folded Reload + adcq %rbp, %r15 + mulxq -72(%rsp), %r12, %rbp ## 8-byte Folded Reload + adcq %rax, %r12 + mulxq -80(%rsp), %r9, %rax ## 8-byte Folded Reload + adcq %rbp, %r9 + adcq $0, %rax + addq %r13, %r8 + adcq %r11, %rbx + adcq %r10, %rdi + adcq %r14, %r15 + adcq %rsi, %r12 + adcq %rcx, %r9 + adcq $0, %rax + movq -104(%rsp), %rdx ## 8-byte Reload + imulq %r8, %rdx + mulxq -96(%rsp), %rcx, %rsi ## 8-byte Folded Reload + movq %rsi, -104(%rsp) ## 8-byte Spill + addq %r8, %rcx + movq -16(%rsp), %r11 ## 8-byte Reload + mulxq %r11, %r8, %rcx + movq %rcx, -112(%rsp) ## 8-byte Spill + adcq %rbx, %r8 + movq -24(%rsp), %r10 ## 8-byte Reload + mulxq %r10, %rsi, %rcx + movq %rcx, -120(%rsp) ## 8-byte Spill + adcq %rdi, %rsi + movq -32(%rsp), %r13 ## 8-byte Reload + mulxq %r13, %rdi, %rcx + movq %rcx, -128(%rsp) ## 8-byte Spill + adcq %r15, %rdi + movq -40(%rsp), %rcx ## 8-byte Reload + mulxq %rcx, %r15, %rbx + adcq %r12, %r15 + movq -48(%rsp), %r14 ## 8-byte Reload + mulxq %r14, %r12, %rbp + adcq %r9, %r12 + adcq $0, %rax + addq -104(%rsp), %r8 ## 8-byte Folded Reload + adcq -112(%rsp), %rsi ## 8-byte Folded Reload + adcq -120(%rsp), %rdi ## 8-byte Folded Reload + adcq -128(%rsp), %r15 ## 8-byte Folded Reload + adcq %rbx, %r12 + adcq %rbp, %rax + movq %r8, %rbp + subq -96(%rsp), %rbp ## 8-byte Folded Reload + movq %rsi, %rbx + sbbq %r11, %rbx + movq %rdi, %r11 + sbbq %r10, %r11 + movq %r15, %r10 + sbbq %r13, %r10 + movq %r12, %r9 + sbbq %rcx, %r9 + movq %rax, %rcx + sbbq %r14, %rcx + movq %rcx, %rdx + sarq $63, %rdx + cmovsq %r8, %rbp + movq -8(%rsp), %rdx ## 8-byte Reload + movq %rbp, (%rdx) + cmovsq %rsi, %rbx + movq %rbx, 8(%rdx) + cmovsq %rdi, %r11 + movq %r11, 16(%rdx) + cmovsq %r15, %r10 + movq %r10, 24(%rdx) + cmovsq %r12, %r9 + movq %r9, 32(%rdx) + cmovsq %rax, %rcx + movq %rcx, 40(%rdx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed6Lbmi2: ## @mcl_fp_montRed6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + pushq %rax + movq %rdx, %rcx + movq %rdi, (%rsp) ## 8-byte Spill + movq -8(%rcx), %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq (%rsi), %r9 + movq %r9, %rdx + imulq %rax, %rdx + movq 40(%rcx), %rax + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq %rax, %r12, %r13 + movq 32(%rcx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + mulxq %rax, %r10, %r8 + movq 24(%rcx), %rax + movq %rax, -48(%rsp) ## 8-byte Spill + mulxq %rax, %r14, %r15 + movq 16(%rcx), %rax + movq %rax, -32(%rsp) ## 8-byte Spill + mulxq %rax, %rbp, %r11 + movq (%rcx), %rdi + movq %rdi, -40(%rsp) ## 8-byte Spill + movq 8(%rcx), %rax + movq %rax, -64(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %rbx + mulxq %rdi, %rdx, %rcx + addq %rax, %rcx + adcq %rbp, %rbx + adcq %r14, %r11 + adcq %r10, %r15 + adcq %r12, %r8 + adcq $0, %r13 + addq %r9, %rdx + adcq 8(%rsi), %rcx + adcq 16(%rsi), %rbx + adcq 24(%rsi), %r11 + adcq 32(%rsi), %r15 + adcq 40(%rsi), %r8 + movq %r8, -112(%rsp) ## 8-byte Spill + adcq 48(%rsi), %r13 + movq %r13, -104(%rsp) ## 8-byte Spill + movq 88(%rsi), %r8 + movq 80(%rsi), %rdx + movq 72(%rsi), %rdi + movq 64(%rsi), %rax + movq 56(%rsi), %r14 + adcq $0, %r14 + adcq $0, %rax + movq %rax, -88(%rsp) ## 8-byte Spill + adcq $0, %rdi + movq %rdi, -96(%rsp) ## 8-byte Spill + adcq $0, %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, -24(%rsp) ## 8-byte Spill + sbbq %r12, %r12 + andl $1, %r12d + movq %rcx, %rdx + imulq -8(%rsp), %rdx ## 8-byte Folded Reload + mulxq -72(%rsp), %rsi, %rax ## 8-byte Folded Reload + movq %rsi, -120(%rsp) ## 8-byte Spill + movq %rax, -128(%rsp) ## 8-byte Spill + mulxq -16(%rsp), %rax, %r13 ## 8-byte Folded Reload + movq %rax, -56(%rsp) ## 8-byte Spill + mulxq -48(%rsp), %rbp, %r10 ## 8-byte Folded Reload + mulxq -32(%rsp), %r9, %r8 ## 8-byte Folded Reload + mulxq -64(%rsp), %rsi, %rdi ## 8-byte Folded Reload + mulxq -40(%rsp), %rdx, %rax ## 8-byte Folded Reload + addq %rsi, %rax + adcq %r9, %rdi + adcq %rbp, %r8 + adcq -56(%rsp), %r10 ## 8-byte Folded Reload + adcq -120(%rsp), %r13 ## 8-byte Folded Reload + movq -128(%rsp), %rsi ## 8-byte Reload + adcq $0, %rsi + addq %rcx, %rdx + adcq %rbx, %rax + adcq %r11, %rdi + adcq %r15, %r8 + adcq -112(%rsp), %r10 ## 8-byte Folded Reload + adcq -104(%rsp), %r13 ## 8-byte Folded Reload + adcq %r14, %rsi + movq %rsi, -128(%rsp) ## 8-byte Spill + adcq $0, -88(%rsp) ## 8-byte Folded Spill + adcq $0, -96(%rsp) ## 8-byte Folded Spill + adcq $0, -80(%rsp) ## 8-byte Folded Spill + adcq $0, -24(%rsp) ## 8-byte Folded Spill + adcq $0, %r12 + movq %rax, %rdx + imulq -8(%rsp), %rdx ## 8-byte Folded Reload + mulxq -72(%rsp), %rsi, %rcx ## 8-byte Folded Reload + movq %rsi, -112(%rsp) ## 8-byte Spill + movq %rcx, -104(%rsp) ## 8-byte Spill + movq -16(%rsp), %rbx ## 8-byte Reload + mulxq %rbx, %rcx, %r14 + movq %rcx, -120(%rsp) ## 8-byte Spill + mulxq -48(%rsp), %rcx, %r15 ## 8-byte Folded Reload + movq %rcx, -56(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %r11, %rbp ## 8-byte Folded Reload + mulxq -64(%rsp), %rsi, %r9 ## 8-byte Folded Reload + mulxq -40(%rsp), %rdx, %rcx ## 8-byte Folded Reload + addq %rsi, %rcx + adcq %r11, %r9 + adcq -56(%rsp), %rbp ## 8-byte Folded Reload + adcq -120(%rsp), %r15 ## 8-byte Folded Reload + adcq -112(%rsp), %r14 ## 8-byte Folded Reload + movq -104(%rsp), %rsi ## 8-byte Reload + adcq $0, %rsi + addq %rax, %rdx + adcq %rdi, %rcx + adcq %r8, %r9 + adcq %r10, %rbp + adcq %r13, %r15 + adcq -128(%rsp), %r14 ## 8-byte Folded Reload + adcq -88(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -104(%rsp) ## 8-byte Spill + adcq $0, -96(%rsp) ## 8-byte Folded Spill + adcq $0, -80(%rsp) ## 8-byte Folded Spill + adcq $0, -24(%rsp) ## 8-byte Folded Spill + adcq $0, %r12 + movq %rcx, %rdx + movq -8(%rsp), %r13 ## 8-byte Reload + imulq %r13, %rdx + mulxq -72(%rsp), %rsi, %rax ## 8-byte Folded Reload + movq %rsi, -112(%rsp) ## 8-byte Spill + movq %rax, -128(%rsp) ## 8-byte Spill + mulxq %rbx, %rsi, %rax + movq %rsi, -120(%rsp) ## 8-byte Spill + movq %rax, -88(%rsp) ## 8-byte Spill + movq -48(%rsp), %r11 ## 8-byte Reload + mulxq %r11, %rax, %rbx + movq %rax, -56(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %r10, %r8 ## 8-byte Folded Reload + mulxq -64(%rsp), %rsi, %rdi ## 8-byte Folded Reload + mulxq -40(%rsp), %rdx, %rax ## 8-byte Folded Reload + addq %rsi, %rax + adcq %r10, %rdi + adcq -56(%rsp), %r8 ## 8-byte Folded Reload + adcq -120(%rsp), %rbx ## 8-byte Folded Reload + movq -88(%rsp), %r10 ## 8-byte Reload + adcq -112(%rsp), %r10 ## 8-byte Folded Reload + movq -128(%rsp), %rsi ## 8-byte Reload + adcq $0, %rsi + addq %rcx, %rdx + adcq %r9, %rax + adcq %rbp, %rdi + adcq %r15, %r8 + adcq %r14, %rbx + adcq -104(%rsp), %r10 ## 8-byte Folded Reload + movq %r10, -88(%rsp) ## 8-byte Spill + adcq -96(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -128(%rsp) ## 8-byte Spill + adcq $0, -80(%rsp) ## 8-byte Folded Spill + adcq $0, -24(%rsp) ## 8-byte Folded Spill + adcq $0, %r12 + movq %rax, %rdx + imulq %r13, %rdx + mulxq -72(%rsp), %rsi, %rcx ## 8-byte Folded Reload + movq %rsi, -104(%rsp) ## 8-byte Spill + movq %rcx, -96(%rsp) ## 8-byte Spill + mulxq -16(%rsp), %rsi, %rcx ## 8-byte Folded Reload + movq %rsi, -112(%rsp) ## 8-byte Spill + mulxq %r11, %rsi, %r13 + movq %rsi, -120(%rsp) ## 8-byte Spill + movq -32(%rsp), %r10 ## 8-byte Reload + mulxq %r10, %r15, %r14 + mulxq -64(%rsp), %rsi, %r9 ## 8-byte Folded Reload + movq -40(%rsp), %r11 ## 8-byte Reload + mulxq %r11, %rdx, %rbp + addq %rsi, %rbp + adcq %r15, %r9 + adcq -120(%rsp), %r14 ## 8-byte Folded Reload + adcq -112(%rsp), %r13 ## 8-byte Folded Reload + adcq -104(%rsp), %rcx ## 8-byte Folded Reload + movq -96(%rsp), %rsi ## 8-byte Reload + adcq $0, %rsi + addq %rax, %rdx + adcq %rdi, %rbp + adcq %r8, %r9 + adcq %rbx, %r14 + adcq -88(%rsp), %r13 ## 8-byte Folded Reload + adcq -128(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -128(%rsp) ## 8-byte Spill + adcq -80(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -96(%rsp) ## 8-byte Spill + adcq $0, -24(%rsp) ## 8-byte Folded Spill + adcq $0, %r12 + movq -8(%rsp), %rdx ## 8-byte Reload + imulq %rbp, %rdx + mulxq -72(%rsp), %rax, %rsi ## 8-byte Folded Reload + movq %rax, -80(%rsp) ## 8-byte Spill + mulxq %r10, %rax, %r15 + mulxq %r11, %r10, %rdi + mulxq -64(%rsp), %rbx, %r8 ## 8-byte Folded Reload + addq %rdi, %rbx + adcq %rax, %r8 + mulxq -48(%rsp), %rax, %rdi ## 8-byte Folded Reload + adcq %r15, %rax + movq -16(%rsp), %r15 ## 8-byte Reload + mulxq %r15, %rdx, %r11 + adcq %rdi, %rdx + adcq -80(%rsp), %r11 ## 8-byte Folded Reload + adcq $0, %rsi + addq %rbp, %r10 + adcq %r9, %rbx + adcq %r14, %r8 + adcq %r13, %rax + adcq -128(%rsp), %rdx ## 8-byte Folded Reload + adcq -96(%rsp), %r11 ## 8-byte Folded Reload + adcq -24(%rsp), %rsi ## 8-byte Folded Reload + adcq $0, %r12 + movq %rbx, %rcx + subq -40(%rsp), %rcx ## 8-byte Folded Reload + movq %r8, %rdi + sbbq -64(%rsp), %rdi ## 8-byte Folded Reload + movq %rax, %rbp + sbbq -32(%rsp), %rbp ## 8-byte Folded Reload + movq %rdx, %r9 + sbbq -48(%rsp), %r9 ## 8-byte Folded Reload + movq %r11, %r10 + sbbq %r15, %r10 + movq %rsi, %r15 + sbbq -72(%rsp), %r15 ## 8-byte Folded Reload + sbbq $0, %r12 + andl $1, %r12d + cmovneq %rsi, %r15 + testb %r12b, %r12b + cmovneq %rbx, %rcx + movq (%rsp), %rsi ## 8-byte Reload + movq %rcx, (%rsi) + cmovneq %r8, %rdi + movq %rdi, 8(%rsi) + cmovneq %rax, %rbp + movq %rbp, 16(%rsi) + cmovneq %rdx, %r9 + movq %r9, 24(%rsi) + cmovneq %r11, %r10 + movq %r10, 32(%rsi) + movq %r15, 40(%rsi) + addq $8, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_addPre6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre6Lbmi2: ## @mcl_fp_addPre6Lbmi2 +## BB#0: + pushq %r14 + pushq %rbx + movq 40(%rdx), %r8 + movq 40(%rsi), %r11 + movq 32(%rdx), %r9 + movq 24(%rdx), %r10 + movq 24(%rsi), %rax + movq 32(%rsi), %r14 + movq 16(%rdx), %rbx + movq (%rdx), %rcx + movq 8(%rdx), %rdx + addq (%rsi), %rcx + adcq 8(%rsi), %rdx + adcq 16(%rsi), %rbx + movq %rcx, (%rdi) + movq %rdx, 8(%rdi) + movq %rbx, 16(%rdi) + adcq %r10, %rax + movq %rax, 24(%rdi) + adcq %r9, %r14 + movq %r14, 32(%rdi) + adcq %r8, %r11 + movq %r11, 40(%rdi) + sbbq %rax, %rax + andl $1, %eax + popq %rbx + popq %r14 + retq + + .globl _mcl_fp_subPre6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre6Lbmi2: ## @mcl_fp_subPre6Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %rbx + movq 40(%rdx), %r8 + movq 40(%rsi), %r9 + movq 32(%rsi), %r10 + movq 24(%rsi), %r11 + movq 16(%rsi), %rcx + movq (%rsi), %rbx + movq 8(%rsi), %rsi + xorl %eax, %eax + subq (%rdx), %rbx + sbbq 8(%rdx), %rsi + movq 24(%rdx), %r14 + movq 32(%rdx), %r15 + sbbq 16(%rdx), %rcx + movq %rbx, (%rdi) + movq %rsi, 8(%rdi) + movq %rcx, 16(%rdi) + sbbq %r14, %r11 + movq %r11, 24(%rdi) + sbbq %r15, %r10 + movq %r10, 32(%rdi) + sbbq %r8, %r9 + movq %r9, 40(%rdi) + sbbq $0, %rax + andl $1, %eax + popq %rbx + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_shr1_6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_6Lbmi2: ## @mcl_fp_shr1_6Lbmi2 +## BB#0: + movq 40(%rsi), %r8 + movq 32(%rsi), %r9 + movq 24(%rsi), %rdx + movq 16(%rsi), %rax + movq (%rsi), %rcx + movq 8(%rsi), %rsi + shrdq $1, %rsi, %rcx + movq %rcx, (%rdi) + shrdq $1, %rax, %rsi + movq %rsi, 8(%rdi) + shrdq $1, %rdx, %rax + movq %rax, 16(%rdi) + shrdq $1, %r9, %rdx + movq %rdx, 24(%rdi) + shrdq $1, %r8, %r9 + movq %r9, 32(%rdi) + shrq %r8 + movq %r8, 40(%rdi) + retq + + .globl _mcl_fp_add6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add6Lbmi2: ## @mcl_fp_add6Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %rbx + movq 40(%rdx), %r14 + movq 40(%rsi), %r8 + movq 32(%rdx), %r15 + movq 24(%rdx), %rbx + movq 24(%rsi), %r10 + movq 32(%rsi), %r9 + movq 16(%rdx), %r11 + movq (%rdx), %rax + movq 8(%rdx), %rdx + addq (%rsi), %rax + adcq 8(%rsi), %rdx + adcq 16(%rsi), %r11 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r11, 16(%rdi) + adcq %rbx, %r10 + movq %r10, 24(%rdi) + adcq %r15, %r9 + movq %r9, 32(%rdi) + adcq %r14, %r8 + movq %r8, 40(%rdi) + sbbq %rsi, %rsi + andl $1, %esi + subq (%rcx), %rax + sbbq 8(%rcx), %rdx + sbbq 16(%rcx), %r11 + sbbq 24(%rcx), %r10 + sbbq 32(%rcx), %r9 + sbbq 40(%rcx), %r8 + sbbq $0, %rsi + testb $1, %sil + jne LBB89_2 +## BB#1: ## %nocarry + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r11, 16(%rdi) + movq %r10, 24(%rdi) + movq %r9, 32(%rdi) + movq %r8, 40(%rdi) +LBB89_2: ## %carry + popq %rbx + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_addNF6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF6Lbmi2: ## @mcl_fp_addNF6Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 40(%rdx), %r8 + movq 32(%rdx), %r9 + movq 24(%rdx), %r10 + movq 16(%rdx), %r11 + movq (%rdx), %r15 + movq 8(%rdx), %r14 + addq (%rsi), %r15 + adcq 8(%rsi), %r14 + adcq 16(%rsi), %r11 + adcq 24(%rsi), %r10 + adcq 32(%rsi), %r9 + adcq 40(%rsi), %r8 + movq %r15, %rsi + subq (%rcx), %rsi + movq %r14, %rbx + sbbq 8(%rcx), %rbx + movq %r11, %rdx + sbbq 16(%rcx), %rdx + movq %r10, %r13 + sbbq 24(%rcx), %r13 + movq %r9, %r12 + sbbq 32(%rcx), %r12 + movq %r8, %rax + sbbq 40(%rcx), %rax + movq %rax, %rcx + sarq $63, %rcx + cmovsq %r15, %rsi + movq %rsi, (%rdi) + cmovsq %r14, %rbx + movq %rbx, 8(%rdi) + cmovsq %r11, %rdx + movq %rdx, 16(%rdi) + cmovsq %r10, %r13 + movq %r13, 24(%rdi) + cmovsq %r9, %r12 + movq %r12, 32(%rdi) + cmovsq %r8, %rax + movq %rax, 40(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_sub6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub6Lbmi2: ## @mcl_fp_sub6Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + movq 40(%rdx), %r14 + movq 40(%rsi), %r8 + movq 32(%rsi), %r9 + movq 24(%rsi), %r10 + movq 16(%rsi), %r11 + movq (%rsi), %rax + movq 8(%rsi), %rsi + xorl %ebx, %ebx + subq (%rdx), %rax + sbbq 8(%rdx), %rsi + movq 24(%rdx), %r15 + movq 32(%rdx), %r12 + sbbq 16(%rdx), %r11 + movq %rax, (%rdi) + movq %rsi, 8(%rdi) + movq %r11, 16(%rdi) + sbbq %r15, %r10 + movq %r10, 24(%rdi) + sbbq %r12, %r9 + movq %r9, 32(%rdi) + sbbq %r14, %r8 + movq %r8, 40(%rdi) + sbbq $0, %rbx + testb $1, %bl + je LBB91_2 +## BB#1: ## %carry + movq 40(%rcx), %r14 + movq 32(%rcx), %r15 + movq 24(%rcx), %r12 + movq 8(%rcx), %rbx + movq 16(%rcx), %rdx + addq (%rcx), %rax + movq %rax, (%rdi) + adcq %rsi, %rbx + movq %rbx, 8(%rdi) + adcq %r11, %rdx + movq %rdx, 16(%rdi) + adcq %r10, %r12 + movq %r12, 24(%rdi) + adcq %r9, %r15 + movq %r15, 32(%rdi) + adcq %r8, %r14 + movq %r14, 40(%rdi) +LBB91_2: ## %nocarry + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_subNF6Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF6Lbmi2: ## @mcl_fp_subNF6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movdqu (%rdx), %xmm0 + movdqu 16(%rdx), %xmm1 + movdqu 32(%rdx), %xmm2 + pshufd $78, %xmm2, %xmm3 ## xmm3 = xmm2[2,3,0,1] + movd %xmm3, %r10 + movdqu (%rsi), %xmm3 + movdqu 16(%rsi), %xmm4 + movdqu 32(%rsi), %xmm5 + pshufd $78, %xmm5, %xmm6 ## xmm6 = xmm5[2,3,0,1] + movd %xmm6, %rax + movd %xmm2, %r11 + movd %xmm5, %r8 + pshufd $78, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,0,1] + movd %xmm2, %r14 + pshufd $78, %xmm4, %xmm2 ## xmm2 = xmm4[2,3,0,1] + movd %xmm2, %r9 + movd %xmm1, %r15 + movd %xmm4, %r12 + pshufd $78, %xmm0, %xmm1 ## xmm1 = xmm0[2,3,0,1] + movd %xmm1, %r13 + pshufd $78, %xmm3, %xmm1 ## xmm1 = xmm3[2,3,0,1] + movd %xmm1, %rbp + movd %xmm0, %rdx + movd %xmm3, %rbx + subq %rdx, %rbx + sbbq %r13, %rbp + sbbq %r15, %r12 + sbbq %r14, %r9 + sbbq %r11, %r8 + sbbq %r10, %rax + movq %rax, %rdx + sarq $63, %rdx + movq %rdx, %rsi + shldq $1, %rax, %rsi + andq (%rcx), %rsi + movq 40(%rcx), %r10 + andq %rdx, %r10 + movq 32(%rcx), %r11 + andq %rdx, %r11 + movq 24(%rcx), %r14 + andq %rdx, %r14 + rorxq $63, %rdx, %r15 + andq 16(%rcx), %rdx + andq 8(%rcx), %r15 + addq %rbx, %rsi + movq %rsi, (%rdi) + adcq %rbp, %r15 + movq %r15, 8(%rdi) + adcq %r12, %rdx + movq %rdx, 16(%rdi) + adcq %r9, %r14 + movq %r14, 24(%rdi) + adcq %r8, %r11 + movq %r11, 32(%rdi) + adcq %rax, %r10 + movq %r10, 40(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_add6Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add6Lbmi2: ## @mcl_fpDbl_add6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 88(%rdx), %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq 80(%rdx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + movq 72(%rdx), %r14 + movq 64(%rdx), %r15 + movq 24(%rsi), %rbp + movq 32(%rsi), %r13 + movq 16(%rdx), %r12 + movq (%rdx), %rbx + movq 8(%rdx), %rax + addq (%rsi), %rbx + adcq 8(%rsi), %rax + adcq 16(%rsi), %r12 + adcq 24(%rdx), %rbp + adcq 32(%rdx), %r13 + movq 56(%rdx), %r11 + movq 48(%rdx), %r9 + movq 40(%rdx), %rdx + movq %rbx, (%rdi) + movq 88(%rsi), %r8 + movq %rax, 8(%rdi) + movq 80(%rsi), %r10 + movq %r12, 16(%rdi) + movq 72(%rsi), %r12 + movq %rbp, 24(%rdi) + movq 40(%rsi), %rax + adcq %rdx, %rax + movq 64(%rsi), %rdx + movq %r13, 32(%rdi) + movq 56(%rsi), %r13 + movq 48(%rsi), %rbp + adcq %r9, %rbp + movq %rax, 40(%rdi) + adcq %r11, %r13 + adcq %r15, %rdx + adcq %r14, %r12 + adcq -16(%rsp), %r10 ## 8-byte Folded Reload + adcq -8(%rsp), %r8 ## 8-byte Folded Reload + sbbq %rax, %rax + andl $1, %eax + movq %rbp, %rsi + subq (%rcx), %rsi + movq %r13, %rbx + sbbq 8(%rcx), %rbx + movq %rdx, %r9 + sbbq 16(%rcx), %r9 + movq %r12, %r11 + sbbq 24(%rcx), %r11 + movq %r10, %r14 + sbbq 32(%rcx), %r14 + movq %r8, %r15 + sbbq 40(%rcx), %r15 + sbbq $0, %rax + andl $1, %eax + cmovneq %rbp, %rsi + movq %rsi, 48(%rdi) + testb %al, %al + cmovneq %r13, %rbx + movq %rbx, 56(%rdi) + cmovneq %rdx, %r9 + movq %r9, 64(%rdi) + cmovneq %r12, %r11 + movq %r11, 72(%rdi) + cmovneq %r10, %r14 + movq %r14, 80(%rdi) + cmovneq %r8, %r15 + movq %r15, 88(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sub6Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub6Lbmi2: ## @mcl_fpDbl_sub6Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 88(%rdx), %r9 + movq 80(%rdx), %r10 + movq 72(%rdx), %r14 + movq 16(%rsi), %r8 + movq (%rsi), %r15 + movq 8(%rsi), %r11 + xorl %eax, %eax + subq (%rdx), %r15 + sbbq 8(%rdx), %r11 + sbbq 16(%rdx), %r8 + movq 24(%rsi), %rbx + sbbq 24(%rdx), %rbx + movq 32(%rsi), %r12 + sbbq 32(%rdx), %r12 + movq 64(%rdx), %r13 + movq %r15, (%rdi) + movq 56(%rdx), %rbp + movq %r11, 8(%rdi) + movq 48(%rdx), %r15 + movq 40(%rdx), %rdx + movq %r8, 16(%rdi) + movq 88(%rsi), %r8 + movq %rbx, 24(%rdi) + movq 40(%rsi), %rbx + sbbq %rdx, %rbx + movq 80(%rsi), %r11 + movq %r12, 32(%rdi) + movq 48(%rsi), %rdx + sbbq %r15, %rdx + movq 72(%rsi), %r15 + movq %rbx, 40(%rdi) + movq 64(%rsi), %r12 + movq 56(%rsi), %rsi + sbbq %rbp, %rsi + sbbq %r13, %r12 + sbbq %r14, %r15 + sbbq %r10, %r11 + sbbq %r9, %r8 + movl $0, %ebp + sbbq $0, %rbp + andl $1, %ebp + movq (%rcx), %r14 + cmoveq %rax, %r14 + testb %bpl, %bpl + movq 16(%rcx), %r9 + cmoveq %rax, %r9 + movq 8(%rcx), %rbp + cmoveq %rax, %rbp + movq 40(%rcx), %r10 + cmoveq %rax, %r10 + movq 32(%rcx), %rbx + cmoveq %rax, %rbx + cmovneq 24(%rcx), %rax + addq %rdx, %r14 + movq %r14, 48(%rdi) + adcq %rsi, %rbp + movq %rbp, 56(%rdi) + adcq %r12, %r9 + movq %r9, 64(%rdi) + adcq %r15, %rax + movq %rax, 72(%rdi) + adcq %r11, %rbx + movq %rbx, 80(%rdi) + adcq %r8, %r10 + movq %r10, 88(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mulUnitPre7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre7Lbmi2: ## @mcl_fp_mulUnitPre7Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + mulxq 48(%rsi), %r8, %r11 + mulxq 40(%rsi), %r9, %r13 + mulxq 32(%rsi), %r10, %rcx + mulxq 8(%rsi), %r12, %r14 + mulxq (%rsi), %r15, %rbx + addq %r12, %rbx + mulxq 24(%rsi), %r12, %rax + mulxq 16(%rsi), %rdx, %rsi + movq %r15, (%rdi) + movq %rbx, 8(%rdi) + adcq %r14, %rdx + movq %rdx, 16(%rdi) + adcq %r12, %rsi + movq %rsi, 24(%rdi) + adcq %r10, %rax + movq %rax, 32(%rdi) + adcq %r9, %rcx + movq %rcx, 40(%rdi) + adcq %r8, %r13 + movq %r13, 48(%rdi) + adcq $0, %r11 + movq %r11, 56(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fpDbl_mulPre7Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre7Lbmi2: ## @mcl_fpDbl_mulPre7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdx, %r14 + movq %rsi, %r8 + movq %rdi, %r13 + movq %r13, -48(%rsp) ## 8-byte Spill + movq (%r8), %rcx + movq %rcx, -72(%rsp) ## 8-byte Spill + movq 8(%r8), %rax + movq %rax, -104(%rsp) ## 8-byte Spill + movq (%r14), %rsi + movq %r14, -64(%rsp) ## 8-byte Spill + movq %rax, %rdx + mulxq %rsi, %rbp, %rax + movq %rcx, %rdx + mulxq %rsi, %rdx, %rcx + movq %rdx, -56(%rsp) ## 8-byte Spill + movq 24(%r8), %rdi + movq %rdi, -88(%rsp) ## 8-byte Spill + movq 16(%r8), %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + addq %rbp, %rcx + mulxq %rsi, %rbx, %rbp + adcq %rax, %rbx + movq %rdi, %rdx + mulxq %rsi, %r12, %rax + adcq %rbp, %r12 + movq 32(%r8), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rsi, %r9, %rbp + adcq %rax, %r9 + movq 40(%r8), %rdi + movq %rdi, %rdx + mulxq %rsi, %r10, %rax + adcq %rbp, %r10 + movq 48(%r8), %r15 + movq %r15, %rdx + mulxq %rsi, %rsi, %r11 + adcq %rax, %rsi + movq -56(%rsp), %rax ## 8-byte Reload + movq %rax, (%r13) + adcq $0, %r11 + movq 8(%r14), %r13 + movq -72(%rsp), %rdx ## 8-byte Reload + mulxq %r13, %r14, %rax + movq %rax, -72(%rsp) ## 8-byte Spill + addq %rcx, %r14 + movq -104(%rsp), %rdx ## 8-byte Reload + mulxq %r13, %rcx, %rax + movq %rax, -104(%rsp) ## 8-byte Spill + adcq %rbx, %rcx + movq -96(%rsp), %rdx ## 8-byte Reload + mulxq %r13, %rbx, %rax + movq %rax, -96(%rsp) ## 8-byte Spill + adcq %r12, %rbx + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %r13, %rbp, %rax + movq %rax, -88(%rsp) ## 8-byte Spill + adcq %r9, %rbp + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %r13, %rax, %r9 + adcq %r10, %rax + movq %rdi, %rdx + mulxq %r13, %r10, %rdi + adcq %rsi, %r10 + movq %r15, %rdx + mulxq %r13, %r13, %rdx + adcq %r11, %r13 + sbbq %r12, %r12 + andl $1, %r12d + addq -72(%rsp), %rcx ## 8-byte Folded Reload + adcq -104(%rsp), %rbx ## 8-byte Folded Reload + adcq -96(%rsp), %rbp ## 8-byte Folded Reload + adcq -88(%rsp), %rax ## 8-byte Folded Reload + adcq %r9, %r10 + movq -48(%rsp), %rsi ## 8-byte Reload + movq %r14, 8(%rsi) + adcq %rdi, %r13 + adcq %rdx, %r12 + movq (%r8), %rsi + movq %rsi, -88(%rsp) ## 8-byte Spill + movq 8(%r8), %r11 + movq %r11, -104(%rsp) ## 8-byte Spill + movq -64(%rsp), %rdx ## 8-byte Reload + movq 16(%rdx), %rdi + movq %rsi, %rdx + mulxq %rdi, %r9, %rdx + movq %rdx, -8(%rsp) ## 8-byte Spill + addq %rcx, %r9 + movq %r11, %rdx + mulxq %rdi, %r14, %rcx + movq %rcx, -16(%rsp) ## 8-byte Spill + adcq %rbx, %r14 + movq 16(%r8), %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + mulxq %rdi, %rsi, %rcx + movq %rcx, -24(%rsp) ## 8-byte Spill + adcq %rbp, %rsi + movq 24(%r8), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rdi, %rbp, %rcx + movq %rcx, -32(%rsp) ## 8-byte Spill + adcq %rax, %rbp + movq 32(%r8), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rdi, %r11, %rax + movq %rax, -40(%rsp) ## 8-byte Spill + adcq %r10, %r11 + movq 40(%r8), %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + mulxq %rdi, %r15, %rax + adcq %r13, %r15 + movq 48(%r8), %r13 + movq %r13, %rdx + mulxq %rdi, %rcx, %rdx + adcq %r12, %rcx + sbbq %rbx, %rbx + andl $1, %ebx + addq -8(%rsp), %r14 ## 8-byte Folded Reload + adcq -16(%rsp), %rsi ## 8-byte Folded Reload + adcq -24(%rsp), %rbp ## 8-byte Folded Reload + adcq -32(%rsp), %r11 ## 8-byte Folded Reload + adcq -40(%rsp), %r15 ## 8-byte Folded Reload + adcq %rax, %rcx + adcq %rdx, %rbx + movq -48(%rsp), %rax ## 8-byte Reload + movq %r9, 16(%rax) + movq -64(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdi + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r9, %rax + movq %rax, -88(%rsp) ## 8-byte Spill + addq %r14, %r9 + movq -104(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rax, %rdx + movq %rdx, -104(%rsp) ## 8-byte Spill + adcq %rsi, %rax + movq -96(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r14, %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + adcq %rbp, %r14 + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r10, %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + adcq %r11, %r10 + movq -72(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rbp, %rsi + adcq %r15, %rbp + movq -56(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r11, %r15 + adcq %rcx, %r11 + movq %r13, %rdx + mulxq %rdi, %r13, %rcx + adcq %rbx, %r13 + sbbq %r12, %r12 + andl $1, %r12d + addq -88(%rsp), %rax ## 8-byte Folded Reload + adcq -104(%rsp), %r14 ## 8-byte Folded Reload + adcq -96(%rsp), %r10 ## 8-byte Folded Reload + adcq -80(%rsp), %rbp ## 8-byte Folded Reload + adcq %rsi, %r11 + movq -48(%rsp), %rdi ## 8-byte Reload + movq %r9, 24(%rdi) + adcq %r15, %r13 + adcq %rcx, %r12 + movq (%r8), %rdx + movq %rdx, -88(%rsp) ## 8-byte Spill + movq 8(%r8), %rbx + movq %rbx, -104(%rsp) ## 8-byte Spill + movq -64(%rsp), %rcx ## 8-byte Reload + movq 32(%rcx), %rcx + mulxq %rcx, %rsi, %rdx + movq %rdx, -16(%rsp) ## 8-byte Spill + addq %rax, %rsi + movq %rbx, %rdx + mulxq %rcx, %r9, %rax + movq %rax, -24(%rsp) ## 8-byte Spill + adcq %r14, %r9 + movq 16(%r8), %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + mulxq %rcx, %rax, %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + adcq %r10, %rax + movq 24(%r8), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rcx, %r15, %rdx + movq %rdx, -40(%rsp) ## 8-byte Spill + adcq %rbp, %r15 + movq 32(%r8), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rcx, %r10, %rbp + adcq %r11, %r10 + movq 40(%r8), %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + mulxq %rcx, %r11, %rbx + adcq %r13, %r11 + movq 48(%r8), %rdx + movq %rdx, -8(%rsp) ## 8-byte Spill + mulxq %rcx, %r14, %rcx + adcq %r12, %r14 + sbbq %r12, %r12 + andl $1, %r12d + addq -16(%rsp), %r9 ## 8-byte Folded Reload + adcq -24(%rsp), %rax ## 8-byte Folded Reload + adcq -32(%rsp), %r15 ## 8-byte Folded Reload + adcq -40(%rsp), %r10 ## 8-byte Folded Reload + adcq %rbp, %r11 + adcq %rbx, %r14 + adcq %rcx, %r12 + movq %rsi, 32(%rdi) + movq -64(%rsp), %rsi ## 8-byte Reload + movq 40(%rsi), %rdi + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r13, %rcx + movq %rcx, -88(%rsp) ## 8-byte Spill + addq %r9, %r13 + movq -104(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rcx, %rdx + movq %rdx, -104(%rsp) ## 8-byte Spill + adcq %rax, %rcx + movq -96(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rax, %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + adcq %r15, %rax + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rbx, %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + adcq %r10, %rbx + movq -72(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %rbp, %r15 + adcq %r11, %rbp + movq -56(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r9, %r11 + adcq %r14, %r9 + movq -8(%rsp), %rdx ## 8-byte Reload + mulxq %rdi, %r10, %rdx + adcq %r12, %r10 + sbbq %rdi, %rdi + andl $1, %edi + addq -88(%rsp), %rcx ## 8-byte Folded Reload + adcq -104(%rsp), %rax ## 8-byte Folded Reload + adcq -96(%rsp), %rbx ## 8-byte Folded Reload + adcq -80(%rsp), %rbp ## 8-byte Folded Reload + adcq %r15, %r9 + movq -48(%rsp), %r14 ## 8-byte Reload + movq %r13, 40(%r14) + adcq %r11, %r10 + adcq %rdx, %rdi + movq 48(%rsi), %rdx + mulxq (%r8), %r11, %rsi + movq %rsi, -64(%rsp) ## 8-byte Spill + addq %rcx, %r11 + mulxq 8(%r8), %rsi, %r15 + adcq %rax, %rsi + mulxq 16(%r8), %rcx, %rax + movq %rax, -104(%rsp) ## 8-byte Spill + adcq %rbx, %rcx + mulxq 24(%r8), %rbx, %r12 + adcq %rbp, %rbx + mulxq 32(%r8), %rbp, %r13 + adcq %r9, %rbp + mulxq 40(%r8), %rax, %r9 + adcq %r10, %rax + mulxq 48(%r8), %rdx, %r8 + adcq %rdi, %rdx + sbbq %r10, %r10 + andl $1, %r10d + addq -64(%rsp), %rsi ## 8-byte Folded Reload + adcq %r15, %rcx + movq %r11, 48(%r14) + movq %rsi, 56(%r14) + movq %rcx, 64(%r14) + adcq -104(%rsp), %rbx ## 8-byte Folded Reload + movq %rbx, 72(%r14) + adcq %r12, %rbp + movq %rbp, 80(%r14) + adcq %r13, %rax + movq %rax, 88(%r14) + adcq %r9, %rdx + movq %rdx, 96(%r14) + adcq %r8, %r10 + movq %r10, 104(%r14) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sqrPre7Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre7Lbmi2: ## @mcl_fpDbl_sqrPre7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdi, -40(%rsp) ## 8-byte Spill + movq 16(%rsi), %rdx + movq %rdx, -88(%rsp) ## 8-byte Spill + movq (%rsi), %rcx + movq 8(%rsi), %rax + mulxq %rcx, %r8, %r10 + movq 24(%rsi), %rbx + movq %rbx, -96(%rsp) ## 8-byte Spill + movq %rax, %rdx + mulxq %rcx, %r12, %rbp + movq %rbp, -48(%rsp) ## 8-byte Spill + movq %rcx, %rdx + mulxq %rcx, %rdx, %rdi + movq %rdx, -80(%rsp) ## 8-byte Spill + addq %r12, %rdi + adcq %rbp, %r8 + movq %rbx, %rdx + mulxq %rcx, %rbp, %r9 + adcq %r10, %rbp + movq 32(%rsi), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rcx, %r11, %r14 + adcq %r9, %r11 + movq 40(%rsi), %rdx + movq %rdx, -56(%rsp) ## 8-byte Spill + mulxq %rcx, %r10, %r15 + adcq %r14, %r10 + movq 48(%rsi), %r14 + movq %r14, %rdx + mulxq %rcx, %rcx, %r13 + adcq %r15, %rcx + movq -40(%rsp), %rdx ## 8-byte Reload + movq -80(%rsp), %rbx ## 8-byte Reload + movq %rbx, (%rdx) + adcq $0, %r13 + addq %r12, %rdi + movq %rax, %rdx + mulxq %rax, %r12, %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + adcq %r8, %r12 + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r8, %rdx + movq %rdx, -88(%rsp) ## 8-byte Spill + adcq %rbp, %r8 + movq -96(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r9, %rbp + adcq %r11, %r9 + movq -72(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r15, %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + adcq %r10, %r15 + movq -56(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r11, %rbx + adcq %rcx, %r11 + movq %r14, %rdx + mulxq %rax, %r14, %rax + adcq %r13, %r14 + sbbq %r13, %r13 + andl $1, %r13d + addq -48(%rsp), %r12 ## 8-byte Folded Reload + adcq -80(%rsp), %r8 ## 8-byte Folded Reload + adcq -88(%rsp), %r9 ## 8-byte Folded Reload + adcq %rbp, %r15 + movq -40(%rsp), %rcx ## 8-byte Reload + movq %rdi, 8(%rcx) + adcq -96(%rsp), %r11 ## 8-byte Folded Reload + adcq %rbx, %r14 + adcq %rax, %r13 + movq (%rsi), %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + movq 8(%rsi), %rcx + movq %rcx, -88(%rsp) ## 8-byte Spill + movq 16(%rsi), %rbx + mulxq %rbx, %rax, %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + addq %r12, %rax + movq %rax, -48(%rsp) ## 8-byte Spill + movq %rcx, %rdx + mulxq %rbx, %r10, %rax + movq %rax, -16(%rsp) ## 8-byte Spill + adcq %r8, %r10 + movq %rbx, %rdx + mulxq %rbx, %r12, %rax + movq %rax, -24(%rsp) ## 8-byte Spill + adcq %r9, %r12 + movq 24(%rsi), %rax + movq %rax, %rdx + mulxq %rbx, %r8, %rdi + movq %rdi, -56(%rsp) ## 8-byte Spill + adcq %r8, %r15 + movq 32(%rsi), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rbx, %rcx, %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + adcq %r11, %rcx + movq 40(%rsi), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rbx, %rbp, %r11 + adcq %r14, %rbp + movq 48(%rsi), %r14 + movq %r14, %rdx + mulxq %rbx, %r9, %rdx + adcq %r13, %r9 + sbbq %rbx, %rbx + andl $1, %ebx + addq -64(%rsp), %r10 ## 8-byte Folded Reload + adcq -16(%rsp), %r12 ## 8-byte Folded Reload + adcq -24(%rsp), %r15 ## 8-byte Folded Reload + adcq %rdi, %rcx + adcq -32(%rsp), %rbp ## 8-byte Folded Reload + adcq %r11, %r9 + adcq %rdx, %rbx + movq -96(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %rdi, %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + addq %r10, %rdi + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r11, %rdx + movq %rdx, -88(%rsp) ## 8-byte Spill + adcq %r12, %r11 + adcq %r8, %r15 + movq %rax, %rdx + mulxq %rax, %r8, %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + adcq %rcx, %r8 + movq -72(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r13, %rcx + movq %rcx, -72(%rsp) ## 8-byte Spill + adcq %rbp, %r13 + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r12, %rbp + adcq %r9, %r12 + movq %r14, %rdx + mulxq %rax, %rcx, %rax + adcq %rbx, %rcx + sbbq %r10, %r10 + andl $1, %r10d + addq -96(%rsp), %r11 ## 8-byte Folded Reload + adcq -88(%rsp), %r15 ## 8-byte Folded Reload + adcq -56(%rsp), %r8 ## 8-byte Folded Reload + adcq -64(%rsp), %r13 ## 8-byte Folded Reload + movq -40(%rsp), %rdx ## 8-byte Reload + movq -48(%rsp), %rbx ## 8-byte Reload + movq %rbx, 16(%rdx) + movq %rdi, 24(%rdx) + adcq -72(%rsp), %r12 ## 8-byte Folded Reload + adcq %rbp, %rcx + adcq %rax, %r10 + movq (%rsi), %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdi + movq %rdi, -88(%rsp) ## 8-byte Spill + movq 32(%rsi), %rbx + mulxq %rbx, %rax, %rdx + movq %rdx, -24(%rsp) ## 8-byte Spill + addq %r11, %rax + movq %rax, -48(%rsp) ## 8-byte Spill + movq %rdi, %rdx + mulxq %rbx, %r9, %rax + movq %rax, -32(%rsp) ## 8-byte Spill + adcq %r15, %r9 + movq 16(%rsi), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rbx, %r15, %rax + movq %rax, -8(%rsp) ## 8-byte Spill + adcq %r8, %r15 + movq 24(%rsi), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rbx, %r8, %rbp + adcq %r13, %r8 + movq %rbx, %rdx + mulxq %rbx, %r13, %r14 + adcq %r12, %r13 + movq 40(%rsi), %rax + movq %rax, %rdx + mulxq %rbx, %rdx, %rdi + movq %rdx, -16(%rsp) ## 8-byte Spill + movq %rdi, -56(%rsp) ## 8-byte Spill + adcq %rdx, %rcx + movq 48(%rsi), %rdx + movq %rdx, -64(%rsp) ## 8-byte Spill + mulxq %rbx, %r11, %rdx + adcq %r10, %r11 + sbbq %r12, %r12 + andl $1, %r12d + addq -24(%rsp), %r9 ## 8-byte Folded Reload + adcq -32(%rsp), %r15 ## 8-byte Folded Reload + adcq -8(%rsp), %r8 ## 8-byte Folded Reload + adcq %rbp, %r13 + adcq %r14, %rcx + adcq %rdi, %r11 + adcq %rdx, %r12 + movq -96(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r14, %rdi + addq %r9, %r14 + movq -88(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %rbx, %rdx + movq %rdx, -88(%rsp) ## 8-byte Spill + adcq %r15, %rbx + movq -72(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %rbp, %rdx + movq %rdx, -96(%rsp) ## 8-byte Spill + adcq %r8, %rbp + movq -80(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %r10, %r15 + adcq %r13, %r10 + adcq -16(%rsp), %rcx ## 8-byte Folded Reload + movq %rax, %rdx + mulxq %rax, %r9, %r13 + adcq %r11, %r9 + movq -64(%rsp), %rdx ## 8-byte Reload + mulxq %rax, %rax, %r11 + adcq %r12, %rax + sbbq %r8, %r8 + andl $1, %r8d + addq %rdi, %rbx + adcq -88(%rsp), %rbp ## 8-byte Folded Reload + adcq -96(%rsp), %r10 ## 8-byte Folded Reload + adcq %r15, %rcx + movq -40(%rsp), %rdi ## 8-byte Reload + movq -48(%rsp), %rdx ## 8-byte Reload + movq %rdx, 32(%rdi) + movq %r14, 40(%rdi) + adcq -56(%rsp), %r9 ## 8-byte Folded Reload + adcq %r13, %rax + adcq %r11, %r8 + movq 48(%rsi), %rdx + mulxq (%rsi), %r12, %r11 + addq %rbx, %r12 + mulxq 8(%rsi), %rbx, %r14 + adcq %rbp, %rbx + mulxq 16(%rsi), %rbp, %r15 + adcq %r10, %rbp + mulxq 24(%rsi), %rdi, %r10 + adcq %rcx, %rdi + mulxq 32(%rsi), %rcx, %r13 + adcq %r9, %rcx + mulxq 40(%rsi), %rsi, %r9 + adcq %rax, %rsi + mulxq %rdx, %rdx, %rax + adcq %r8, %rdx + sbbq %r8, %r8 + andl $1, %r8d + addq %r11, %rbx + adcq %r14, %rbp + movq -40(%rsp), %r11 ## 8-byte Reload + movq %r12, 48(%r11) + movq %rbx, 56(%r11) + movq %rbp, 64(%r11) + adcq %r15, %rdi + movq %rdi, 72(%r11) + adcq %r10, %rcx + movq %rcx, 80(%r11) + adcq %r13, %rsi + movq %rsi, 88(%r11) + adcq %r9, %rdx + movq %rdx, 96(%r11) + adcq %rax, %r8 + movq %r8, 104(%r11) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mont7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont7Lbmi2: ## @mcl_fp_mont7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $56, %rsp + movq %rdx, -56(%rsp) ## 8-byte Spill + movq %rdi, 48(%rsp) ## 8-byte Spill + movq 48(%rsi), %rdi + movq %rdi, -64(%rsp) ## 8-byte Spill + movq (%rdx), %rax + movq %rdi, %rdx + mulxq %rax, %rdx, %r13 + movq %rdx, -40(%rsp) ## 8-byte Spill + movq 40(%rsi), %rdx + movq %rdx, -72(%rsp) ## 8-byte Spill + mulxq %rax, %rdx, %r8 + movq %rdx, -48(%rsp) ## 8-byte Spill + movq 32(%rsi), %rdx + movq %rdx, -80(%rsp) ## 8-byte Spill + mulxq %rax, %r10, %rdi + movq 24(%rsi), %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + mulxq %rax, %r14, %rbp + movq 16(%rsi), %rdx + movq %rdx, 32(%rsp) ## 8-byte Spill + mulxq %rax, %r12, %r15 + movq (%rsi), %rbx + movq %rbx, 24(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdx + movq %rdx, 16(%rsp) ## 8-byte Spill + mulxq %rax, %rsi, %r11 + movq %rbx, %rdx + mulxq %rax, %rdx, %r9 + movq %rdx, -96(%rsp) ## 8-byte Spill + addq %rsi, %r9 + adcq %r12, %r11 + adcq %r14, %r15 + adcq %r10, %rbp + movq %rbp, -112(%rsp) ## 8-byte Spill + adcq -48(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, -104(%rsp) ## 8-byte Spill + adcq -40(%rsp), %r8 ## 8-byte Folded Reload + movq %r8, -128(%rsp) ## 8-byte Spill + adcq $0, %r13 + movq %r13, -120(%rsp) ## 8-byte Spill + movq -8(%rcx), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + imulq %rax, %rdx + movq 32(%rcx), %rax + movq %rax, -40(%rsp) ## 8-byte Spill + mulxq %rax, %rbx, %r13 + movq 16(%rcx), %rsi + movq %rsi, -48(%rsp) ## 8-byte Spill + mulxq %rsi, %r14, %rbp + movq 8(%rcx), %rsi + movq %rsi, 8(%rsp) ## 8-byte Spill + mulxq %rsi, %rsi, %rax + movq (%rcx), %rdi + movq %rdi, (%rsp) ## 8-byte Spill + mulxq %rdi, %r8, %r12 + addq %rsi, %r12 + adcq %r14, %rax + movq %rax, %rdi + movq 24(%rcx), %rsi + movq %rsi, -8(%rsp) ## 8-byte Spill + mulxq %rsi, %r10, %r14 + adcq %rbp, %r10 + adcq %rbx, %r14 + movq 40(%rcx), %rsi + movq %rsi, -16(%rsp) ## 8-byte Spill + mulxq %rsi, %rbp, %rsi + adcq %r13, %rbp + movq 48(%rcx), %rax + movq %rax, -24(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %rbx + adcq %rsi, %rax + adcq $0, %rbx + addq -96(%rsp), %r8 ## 8-byte Folded Reload + adcq %r9, %r12 + adcq %r11, %rdi + movq %rdi, -96(%rsp) ## 8-byte Spill + adcq %r15, %r10 + adcq -112(%rsp), %r14 ## 8-byte Folded Reload + adcq -104(%rsp), %rbp ## 8-byte Folded Reload + adcq -128(%rsp), %rax ## 8-byte Folded Reload + adcq -120(%rsp), %rbx ## 8-byte Folded Reload + sbbq %rsi, %rsi + andl $1, %esi + movq -56(%rsp), %rcx ## 8-byte Reload + movq 8(%rcx), %rdx + mulxq -64(%rsp), %rdi, %rcx ## 8-byte Folded Reload + movq %rdi, -104(%rsp) ## 8-byte Spill + movq %rcx, -120(%rsp) ## 8-byte Spill + mulxq -72(%rsp), %rdi, %rcx ## 8-byte Folded Reload + movq %rdi, -88(%rsp) ## 8-byte Spill + movq %rcx, -128(%rsp) ## 8-byte Spill + mulxq 16(%rsp), %r9, %r8 ## 8-byte Folded Reload + mulxq 24(%rsp), %rdi, %r11 ## 8-byte Folded Reload + movq %rdi, -112(%rsp) ## 8-byte Spill + addq %r9, %r11 + mulxq 32(%rsp), %rcx, %r9 ## 8-byte Folded Reload + adcq %r8, %rcx + movq %rcx, %rdi + mulxq -32(%rsp), %r13, %rcx ## 8-byte Folded Reload + adcq %r9, %r13 + mulxq -80(%rsp), %r8, %r15 ## 8-byte Folded Reload + adcq %rcx, %r8 + adcq -88(%rsp), %r15 ## 8-byte Folded Reload + movq -128(%rsp), %rdx ## 8-byte Reload + adcq -104(%rsp), %rdx ## 8-byte Folded Reload + movq -120(%rsp), %rcx ## 8-byte Reload + adcq $0, %rcx + movq -112(%rsp), %r9 ## 8-byte Reload + addq %r12, %r9 + movq %r9, -112(%rsp) ## 8-byte Spill + movq %r11, %r12 + adcq -96(%rsp), %r12 ## 8-byte Folded Reload + adcq %r10, %rdi + movq %rdi, -88(%rsp) ## 8-byte Spill + adcq %r14, %r13 + adcq %rbp, %r8 + adcq %rax, %r15 + adcq %rbx, %rdx + movq %rdx, -128(%rsp) ## 8-byte Spill + adcq %rsi, %rcx + movq %rcx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, -96(%rsp) ## 8-byte Spill + movq %r9, %rdx + imulq 40(%rsp), %rdx ## 8-byte Folded Reload + mulxq -24(%rsp), %r10, %rax ## 8-byte Folded Reload + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq 8(%rsp), %rdi, %rbx ## 8-byte Folded Reload + mulxq (%rsp), %r14, %r9 ## 8-byte Folded Reload + addq %rdi, %r9 + mulxq -48(%rsp), %rbp, %r11 ## 8-byte Folded Reload + adcq %rbx, %rbp + adcq %rcx, %r11 + mulxq -40(%rsp), %rbx, %rsi ## 8-byte Folded Reload + adcq %rax, %rbx + mulxq -16(%rsp), %rax, %rcx ## 8-byte Folded Reload + adcq %rsi, %rax + adcq %r10, %rcx + movq -104(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq -112(%rsp), %r14 ## 8-byte Folded Reload + adcq %r12, %r9 + adcq -88(%rsp), %rbp ## 8-byte Folded Reload + adcq %r13, %r11 + adcq %r8, %rbx + adcq %r15, %rax + adcq -128(%rsp), %rcx ## 8-byte Folded Reload + adcq -120(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -104(%rsp) ## 8-byte Spill + adcq $0, -96(%rsp) ## 8-byte Folded Spill + movq -56(%rsp), %rdx ## 8-byte Reload + movq 16(%rdx), %rdx + mulxq -64(%rsp), %rdi, %rsi ## 8-byte Folded Reload + movq %rdi, -112(%rsp) ## 8-byte Spill + movq %rsi, -120(%rsp) ## 8-byte Spill + mulxq -72(%rsp), %rdi, %rsi ## 8-byte Folded Reload + movq %rdi, -88(%rsp) ## 8-byte Spill + movq %rsi, -128(%rsp) ## 8-byte Spill + mulxq 32(%rsp), %rdi, %r10 ## 8-byte Folded Reload + mulxq 16(%rsp), %rsi, %r13 ## 8-byte Folded Reload + mulxq 24(%rsp), %r8, %r15 ## 8-byte Folded Reload + addq %rsi, %r15 + adcq %rdi, %r13 + mulxq -32(%rsp), %r12, %rsi ## 8-byte Folded Reload + adcq %r10, %r12 + mulxq -80(%rsp), %r10, %r14 ## 8-byte Folded Reload + adcq %rsi, %r10 + adcq -88(%rsp), %r14 ## 8-byte Folded Reload + movq -128(%rsp), %rsi ## 8-byte Reload + adcq -112(%rsp), %rsi ## 8-byte Folded Reload + movq -120(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq %r9, %r8 + movq %r8, -112(%rsp) ## 8-byte Spill + adcq %rbp, %r15 + adcq %r11, %r13 + adcq %rbx, %r12 + adcq %rax, %r10 + adcq %rcx, %r14 + adcq -104(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -128(%rsp) ## 8-byte Spill + adcq -96(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, %rbx + movq %r8, %rdx + imulq 40(%rsp), %rdx ## 8-byte Folded Reload + mulxq -24(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rcx, -96(%rsp) ## 8-byte Spill + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq 8(%rsp), %rbp, %rsi ## 8-byte Folded Reload + mulxq (%rsp), %r11, %r8 ## 8-byte Folded Reload + addq %rbp, %r8 + mulxq -48(%rsp), %rbp, %r9 ## 8-byte Folded Reload + adcq %rsi, %rbp + adcq %rcx, %r9 + mulxq -40(%rsp), %rsi, %rdi ## 8-byte Folded Reload + adcq %rax, %rsi + mulxq -16(%rsp), %rax, %rcx ## 8-byte Folded Reload + adcq %rdi, %rax + adcq -96(%rsp), %rcx ## 8-byte Folded Reload + movq -104(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq -112(%rsp), %r11 ## 8-byte Folded Reload + adcq %r15, %r8 + adcq %r13, %rbp + adcq %r12, %r9 + adcq %r10, %rsi + adcq %r14, %rax + adcq -128(%rsp), %rcx ## 8-byte Folded Reload + adcq -120(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -104(%rsp) ## 8-byte Spill + adcq $0, %rbx + movq %rbx, -128(%rsp) ## 8-byte Spill + movq -56(%rsp), %rdx ## 8-byte Reload + movq 24(%rdx), %rdx + mulxq -64(%rsp), %rbx, %rdi ## 8-byte Folded Reload + movq %rbx, -96(%rsp) ## 8-byte Spill + movq %rdi, -120(%rsp) ## 8-byte Spill + mulxq -72(%rsp), %rdi, %r13 ## 8-byte Folded Reload + movq %rdi, -88(%rsp) ## 8-byte Spill + mulxq 32(%rsp), %r10, %r11 ## 8-byte Folded Reload + mulxq 16(%rsp), %rdi, %r15 ## 8-byte Folded Reload + mulxq 24(%rsp), %rbx, %r12 ## 8-byte Folded Reload + movq %rbx, -112(%rsp) ## 8-byte Spill + addq %rdi, %r12 + adcq %r10, %r15 + mulxq -32(%rsp), %rbx, %rdi ## 8-byte Folded Reload + adcq %r11, %rbx + mulxq -80(%rsp), %r10, %r14 ## 8-byte Folded Reload + adcq %rdi, %r10 + adcq -88(%rsp), %r14 ## 8-byte Folded Reload + adcq -96(%rsp), %r13 ## 8-byte Folded Reload + movq -120(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + movq -112(%rsp), %rdi ## 8-byte Reload + addq %r8, %rdi + movq %rdi, -112(%rsp) ## 8-byte Spill + adcq %rbp, %r12 + adcq %r9, %r15 + adcq %rsi, %rbx + adcq %rax, %r10 + adcq %rcx, %r14 + adcq -104(%rsp), %r13 ## 8-byte Folded Reload + adcq -128(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, -104(%rsp) ## 8-byte Spill + movq %rdi, %rdx + imulq 40(%rsp), %rdx ## 8-byte Folded Reload + mulxq -24(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rcx, -96(%rsp) ## 8-byte Spill + movq %rax, -128(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq 8(%rsp), %rbp, %rsi ## 8-byte Folded Reload + mulxq (%rsp), %r11, %r8 ## 8-byte Folded Reload + addq %rbp, %r8 + mulxq -48(%rsp), %rbp, %r9 ## 8-byte Folded Reload + adcq %rsi, %rbp + adcq %rcx, %r9 + mulxq -40(%rsp), %rsi, %rdi ## 8-byte Folded Reload + adcq %rax, %rsi + mulxq -16(%rsp), %rax, %rcx ## 8-byte Folded Reload + adcq %rdi, %rax + adcq -96(%rsp), %rcx ## 8-byte Folded Reload + movq -128(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq -112(%rsp), %r11 ## 8-byte Folded Reload + adcq %r12, %r8 + adcq %r15, %rbp + adcq %rbx, %r9 + adcq %r10, %rsi + adcq %r14, %rax + adcq %r13, %rcx + adcq -120(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -128(%rsp) ## 8-byte Spill + adcq $0, -104(%rsp) ## 8-byte Folded Spill + movq -56(%rsp), %rdx ## 8-byte Reload + movq 32(%rdx), %rdx + mulxq -64(%rsp), %rbx, %rdi ## 8-byte Folded Reload + movq %rbx, -112(%rsp) ## 8-byte Spill + movq %rdi, -120(%rsp) ## 8-byte Spill + mulxq -72(%rsp), %rdi, %r11 ## 8-byte Folded Reload + movq %rdi, -96(%rsp) ## 8-byte Spill + mulxq 32(%rsp), %r10, %r13 ## 8-byte Folded Reload + mulxq 16(%rsp), %rdi, %r15 ## 8-byte Folded Reload + mulxq 24(%rsp), %rbx, %r12 ## 8-byte Folded Reload + addq %rdi, %r12 + adcq %r10, %r15 + mulxq -32(%rsp), %r10, %rdi ## 8-byte Folded Reload + adcq %r13, %r10 + mulxq -80(%rsp), %r13, %r14 ## 8-byte Folded Reload + adcq %rdi, %r13 + adcq -96(%rsp), %r14 ## 8-byte Folded Reload + adcq -112(%rsp), %r11 ## 8-byte Folded Reload + movq -120(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq %r8, %rbx + movq %rbx, -96(%rsp) ## 8-byte Spill + adcq %rbp, %r12 + adcq %r9, %r15 + adcq %rsi, %r10 + adcq %rax, %r13 + adcq %rcx, %r14 + adcq -128(%rsp), %r11 ## 8-byte Folded Reload + movq %r11, -88(%rsp) ## 8-byte Spill + adcq -104(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, -112(%rsp) ## 8-byte Spill + movq %rbx, %rdx + imulq 40(%rsp), %rdx ## 8-byte Folded Reload + mulxq -24(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rcx, -128(%rsp) ## 8-byte Spill + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq 8(%rsp), %rbp, %rsi ## 8-byte Folded Reload + mulxq (%rsp), %r9, %r11 ## 8-byte Folded Reload + addq %rbp, %r11 + mulxq -48(%rsp), %rbp, %r8 ## 8-byte Folded Reload + adcq %rsi, %rbp + adcq %rcx, %r8 + mulxq -40(%rsp), %rsi, %rdi ## 8-byte Folded Reload + adcq %rax, %rsi + mulxq -16(%rsp), %rax, %rcx ## 8-byte Folded Reload + adcq %rdi, %rax + adcq -128(%rsp), %rcx ## 8-byte Folded Reload + movq -104(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq -96(%rsp), %r9 ## 8-byte Folded Reload + adcq %r12, %r11 + adcq %r15, %rbp + adcq %r10, %r8 + adcq %r13, %rsi + adcq %r14, %rax + adcq -88(%rsp), %rcx ## 8-byte Folded Reload + adcq -120(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -104(%rsp) ## 8-byte Spill + adcq $0, -112(%rsp) ## 8-byte Folded Spill + movq -56(%rsp), %rdx ## 8-byte Reload + movq 40(%rdx), %rdx + mulxq -64(%rsp), %rbx, %rdi ## 8-byte Folded Reload + movq %rbx, -96(%rsp) ## 8-byte Spill + movq %rdi, -120(%rsp) ## 8-byte Spill + mulxq -72(%rsp), %rbx, %rdi ## 8-byte Folded Reload + movq %rbx, -88(%rsp) ## 8-byte Spill + movq %rdi, -128(%rsp) ## 8-byte Spill + mulxq 32(%rsp), %rbx, %r10 ## 8-byte Folded Reload + mulxq 16(%rsp), %rdi, %r13 ## 8-byte Folded Reload + mulxq 24(%rsp), %r9, %r12 ## 8-byte Folded Reload + addq %rdi, %r12 + adcq %rbx, %r13 + mulxq -32(%rsp), %r15, %rdi ## 8-byte Folded Reload + adcq %r10, %r15 + mulxq -80(%rsp), %r10, %r14 ## 8-byte Folded Reload + adcq %rdi, %r10 + adcq -88(%rsp), %r14 ## 8-byte Folded Reload + movq -128(%rsp), %rdi ## 8-byte Reload + adcq -96(%rsp), %rdi ## 8-byte Folded Reload + movq -120(%rsp), %rdx ## 8-byte Reload + adcq $0, %rdx + addq %r11, %r9 + movq %r9, -96(%rsp) ## 8-byte Spill + adcq %rbp, %r12 + adcq %r8, %r13 + adcq %rsi, %r15 + adcq %rax, %r10 + adcq %rcx, %r14 + adcq -104(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, -128(%rsp) ## 8-byte Spill + adcq -112(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -120(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, -112(%rsp) ## 8-byte Spill + movq %r9, %rdx + imulq 40(%rsp), %rdx ## 8-byte Folded Reload + mulxq -24(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rcx, -88(%rsp) ## 8-byte Spill + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -8(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq 8(%rsp), %rdi, %rsi ## 8-byte Folded Reload + mulxq (%rsp), %r11, %rbx ## 8-byte Folded Reload + addq %rdi, %rbx + mulxq -48(%rsp), %r8, %r9 ## 8-byte Folded Reload + adcq %rsi, %r8 + adcq %rcx, %r9 + mulxq -40(%rsp), %rdi, %rbp ## 8-byte Folded Reload + adcq %rax, %rdi + mulxq -16(%rsp), %rcx, %rsi ## 8-byte Folded Reload + adcq %rbp, %rcx + adcq -88(%rsp), %rsi ## 8-byte Folded Reload + movq -104(%rsp), %rax ## 8-byte Reload + adcq $0, %rax + addq -96(%rsp), %r11 ## 8-byte Folded Reload + adcq %r12, %rbx + adcq %r13, %r8 + adcq %r15, %r9 + adcq %r10, %rdi + adcq %r14, %rcx + adcq -128(%rsp), %rsi ## 8-byte Folded Reload + adcq -120(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -104(%rsp) ## 8-byte Spill + movq -112(%rsp), %r12 ## 8-byte Reload + adcq $0, %r12 + movq -56(%rsp), %rax ## 8-byte Reload + movq 48(%rax), %rdx + mulxq -64(%rsp), %rbp, %rax ## 8-byte Folded Reload + movq %rbp, -120(%rsp) ## 8-byte Spill + movq %rax, -56(%rsp) ## 8-byte Spill + mulxq -72(%rsp), %rbp, %rax ## 8-byte Folded Reload + movq %rbp, -128(%rsp) ## 8-byte Spill + movq %rax, -64(%rsp) ## 8-byte Spill + mulxq -80(%rsp), %rbp, %rax ## 8-byte Folded Reload + movq %rbp, -112(%rsp) ## 8-byte Spill + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %r13, %rbp ## 8-byte Folded Reload + mulxq 32(%rsp), %r14, %r15 ## 8-byte Folded Reload + mulxq 16(%rsp), %rax, %r11 ## 8-byte Folded Reload + mulxq 24(%rsp), %rdx, %r10 ## 8-byte Folded Reload + movq %rdx, -80(%rsp) ## 8-byte Spill + addq %rax, %r10 + adcq %r14, %r11 + adcq %r13, %r15 + adcq -112(%rsp), %rbp ## 8-byte Folded Reload + movq -72(%rsp), %r14 ## 8-byte Reload + adcq -128(%rsp), %r14 ## 8-byte Folded Reload + movq -64(%rsp), %rdx ## 8-byte Reload + adcq -120(%rsp), %rdx ## 8-byte Folded Reload + movq -56(%rsp), %rax ## 8-byte Reload + adcq $0, %rax + movq -80(%rsp), %r13 ## 8-byte Reload + addq %rbx, %r13 + movq %r13, -80(%rsp) ## 8-byte Spill + adcq %r8, %r10 + adcq %r9, %r11 + adcq %rdi, %r15 + adcq %rcx, %rbp + movq %rbp, -32(%rsp) ## 8-byte Spill + adcq %rsi, %r14 + movq %r14, -72(%rsp) ## 8-byte Spill + adcq -104(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -64(%rsp) ## 8-byte Spill + adcq %r12, %rax + movq %rax, -56(%rsp) ## 8-byte Spill + sbbq %rdi, %rdi + movq 40(%rsp), %rdx ## 8-byte Reload + imulq %r13, %rdx + mulxq -8(%rsp), %rbp, %rsi ## 8-byte Folded Reload + mulxq 8(%rsp), %rcx, %rbx ## 8-byte Folded Reload + mulxq (%rsp), %r13, %rax ## 8-byte Folded Reload + addq %rcx, %rax + mulxq -48(%rsp), %rcx, %r9 ## 8-byte Folded Reload + adcq %rbx, %rcx + adcq %rbp, %r9 + mulxq -40(%rsp), %rbp, %rbx ## 8-byte Folded Reload + adcq %rsi, %rbp + mulxq -16(%rsp), %rsi, %r14 ## 8-byte Folded Reload + adcq %rbx, %rsi + mulxq -24(%rsp), %rdx, %rbx ## 8-byte Folded Reload + adcq %r14, %rdx + adcq $0, %rbx + andl $1, %edi + addq -80(%rsp), %r13 ## 8-byte Folded Reload + adcq %r10, %rax + adcq %r11, %rcx + adcq %r15, %r9 + adcq -32(%rsp), %rbp ## 8-byte Folded Reload + adcq -72(%rsp), %rsi ## 8-byte Folded Reload + adcq -64(%rsp), %rdx ## 8-byte Folded Reload + adcq -56(%rsp), %rbx ## 8-byte Folded Reload + adcq $0, %rdi + movq %rax, %r8 + subq (%rsp), %r8 ## 8-byte Folded Reload + movq %rcx, %r10 + sbbq 8(%rsp), %r10 ## 8-byte Folded Reload + movq %r9, %r11 + sbbq -48(%rsp), %r11 ## 8-byte Folded Reload + movq %rbp, %r14 + sbbq -8(%rsp), %r14 ## 8-byte Folded Reload + movq %rsi, %r15 + sbbq -40(%rsp), %r15 ## 8-byte Folded Reload + movq %rdx, %r12 + sbbq -16(%rsp), %r12 ## 8-byte Folded Reload + movq %rbx, %r13 + sbbq -24(%rsp), %r13 ## 8-byte Folded Reload + sbbq $0, %rdi + andl $1, %edi + cmovneq %rbx, %r13 + testb %dil, %dil + cmovneq %rax, %r8 + movq 48(%rsp), %rax ## 8-byte Reload + movq %r8, (%rax) + cmovneq %rcx, %r10 + movq %r10, 8(%rax) + cmovneq %r9, %r11 + movq %r11, 16(%rax) + cmovneq %rbp, %r14 + movq %r14, 24(%rax) + cmovneq %rsi, %r15 + movq %r15, 32(%rax) + cmovneq %rdx, %r12 + movq %r12, 40(%rax) + movq %r13, 48(%rax) + addq $56, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF7Lbmi2: ## @mcl_fp_montNF7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $40, %rsp + movq %rdx, -88(%rsp) ## 8-byte Spill + movq %rdi, 32(%rsp) ## 8-byte Spill + movq (%rsi), %rax + movq %rax, -112(%rsp) ## 8-byte Spill + movq 8(%rsi), %rdi + movq %rdi, -96(%rsp) ## 8-byte Spill + movq (%rdx), %rbp + movq %rdi, %rdx + mulxq %rbp, %rdi, %rbx + movq %rax, %rdx + mulxq %rbp, %r8, %r14 + movq 16(%rsi), %rdx + movq %rdx, -104(%rsp) ## 8-byte Spill + addq %rdi, %r14 + mulxq %rbp, %r15, %rax + adcq %rbx, %r15 + movq 24(%rsi), %rdx + movq %rdx, -24(%rsp) ## 8-byte Spill + mulxq %rbp, %rbx, %rdi + adcq %rax, %rbx + movq 32(%rsi), %rdx + movq %rdx, -32(%rsp) ## 8-byte Spill + mulxq %rbp, %r11, %rax + adcq %rdi, %r11 + movq 40(%rsi), %rdx + movq %rdx, -40(%rsp) ## 8-byte Spill + mulxq %rbp, %r9, %rdi + adcq %rax, %r9 + movq 48(%rsi), %rdx + movq %rdx, -48(%rsp) ## 8-byte Spill + mulxq %rbp, %r10, %rbp + adcq %rdi, %r10 + adcq $0, %rbp + movq -8(%rcx), %rax + movq %rax, -80(%rsp) ## 8-byte Spill + movq %r8, %rdx + imulq %rax, %rdx + movq (%rcx), %rax + movq %rax, -64(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %rsi + movq %rsi, -128(%rsp) ## 8-byte Spill + addq %r8, %rax + movq 8(%rcx), %rax + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq %rax, %r8, %rsi + movq %rsi, -120(%rsp) ## 8-byte Spill + adcq %r14, %r8 + movq 16(%rcx), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + mulxq %rax, %rsi, %r13 + adcq %r15, %rsi + movq 24(%rcx), %rax + movq %rax, (%rsp) ## 8-byte Spill + mulxq %rax, %r12, %rax + adcq %rbx, %r12 + movq 32(%rcx), %rdi + movq %rdi, -8(%rsp) ## 8-byte Spill + mulxq %rdi, %r15, %rbx + adcq %r11, %r15 + movq 40(%rcx), %rdi + movq %rdi, -16(%rsp) ## 8-byte Spill + mulxq %rdi, %r14, %rdi + adcq %r9, %r14 + movq 48(%rcx), %rcx + movq %rcx, -56(%rsp) ## 8-byte Spill + mulxq %rcx, %r11, %rcx + adcq %r10, %r11 + adcq $0, %rbp + addq -128(%rsp), %r8 ## 8-byte Folded Reload + movq %r8, -128(%rsp) ## 8-byte Spill + adcq -120(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -120(%rsp) ## 8-byte Spill + adcq %r13, %r12 + adcq %rax, %r15 + adcq %rbx, %r14 + adcq %rdi, %r11 + adcq %rcx, %rbp + movq -88(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + mulxq -96(%rsp), %rcx, %rsi ## 8-byte Folded Reload + mulxq -112(%rsp), %r13, %rax ## 8-byte Folded Reload + addq %rcx, %rax + mulxq -104(%rsp), %rcx, %rdi ## 8-byte Folded Reload + adcq %rsi, %rcx + mulxq -24(%rsp), %rsi, %r8 ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -32(%rsp), %rdi, %r9 ## 8-byte Folded Reload + adcq %r8, %rdi + mulxq -40(%rsp), %r8, %rbx ## 8-byte Folded Reload + adcq %r9, %r8 + mulxq -48(%rsp), %r9, %r10 ## 8-byte Folded Reload + adcq %rbx, %r9 + adcq $0, %r10 + addq -128(%rsp), %r13 ## 8-byte Folded Reload + adcq -120(%rsp), %rax ## 8-byte Folded Reload + adcq %r12, %rcx + adcq %r15, %rsi + adcq %r14, %rdi + adcq %r11, %r8 + adcq %rbp, %r9 + adcq $0, %r10 + movq %r13, %rdx + imulq -80(%rsp), %rdx ## 8-byte Folded Reload + mulxq -64(%rsp), %rbp, %rbx ## 8-byte Folded Reload + movq %rbx, -128(%rsp) ## 8-byte Spill + addq %r13, %rbp + mulxq -72(%rsp), %rbp, %r14 ## 8-byte Folded Reload + adcq %rax, %rbp + mulxq 8(%rsp), %rax, %r11 ## 8-byte Folded Reload + adcq %rcx, %rax + mulxq (%rsp), %r12, %rcx ## 8-byte Folded Reload + adcq %rsi, %r12 + mulxq -8(%rsp), %r15, %rbx ## 8-byte Folded Reload + adcq %rdi, %r15 + mulxq -16(%rsp), %r13, %rdi ## 8-byte Folded Reload + adcq %r8, %r13 + mulxq -56(%rsp), %rsi, %rdx ## 8-byte Folded Reload + adcq %r9, %rsi + adcq $0, %r10 + addq -128(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, -128(%rsp) ## 8-byte Spill + adcq %r14, %rax + movq %rax, -120(%rsp) ## 8-byte Spill + adcq %r11, %r12 + adcq %rcx, %r15 + adcq %rbx, %r13 + adcq %rdi, %rsi + adcq %rdx, %r10 + movq -88(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + mulxq -96(%rsp), %rcx, %rax ## 8-byte Folded Reload + mulxq -112(%rsp), %r14, %rdi ## 8-byte Folded Reload + addq %rcx, %rdi + mulxq -104(%rsp), %rbp, %rcx ## 8-byte Folded Reload + adcq %rax, %rbp + mulxq -24(%rsp), %rbx, %r8 ## 8-byte Folded Reload + adcq %rcx, %rbx + mulxq -32(%rsp), %rax, %r9 ## 8-byte Folded Reload + adcq %r8, %rax + mulxq -40(%rsp), %r8, %rcx ## 8-byte Folded Reload + movq %rcx, 16(%rsp) ## 8-byte Spill + adcq %r9, %r8 + mulxq -48(%rsp), %r9, %r11 ## 8-byte Folded Reload + adcq 16(%rsp), %r9 ## 8-byte Folded Reload + adcq $0, %r11 + addq -128(%rsp), %r14 ## 8-byte Folded Reload + adcq -120(%rsp), %rdi ## 8-byte Folded Reload + adcq %r12, %rbp + adcq %r15, %rbx + adcq %r13, %rax + adcq %rsi, %r8 + adcq %r10, %r9 + adcq $0, %r11 + movq %r14, %rdx + imulq -80(%rsp), %rdx ## 8-byte Folded Reload + mulxq -64(%rsp), %rsi, %rcx ## 8-byte Folded Reload + movq %rcx, -128(%rsp) ## 8-byte Spill + addq %r14, %rsi + mulxq -72(%rsp), %rsi, %r13 ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq 8(%rsp), %rdi, %r15 ## 8-byte Folded Reload + adcq %rbp, %rdi + mulxq (%rsp), %rcx, %rbp ## 8-byte Folded Reload + adcq %rbx, %rcx + mulxq -8(%rsp), %r14, %rbx ## 8-byte Folded Reload + adcq %rax, %r14 + mulxq -16(%rsp), %r12, %rax ## 8-byte Folded Reload + adcq %r8, %r12 + mulxq -56(%rsp), %r10, %rdx ## 8-byte Folded Reload + adcq %r9, %r10 + adcq $0, %r11 + addq -128(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -128(%rsp) ## 8-byte Spill + adcq %r13, %rdi + movq %rdi, -120(%rsp) ## 8-byte Spill + adcq %r15, %rcx + adcq %rbp, %r14 + adcq %rbx, %r12 + adcq %rax, %r10 + adcq %rdx, %r11 + movq -88(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdx + mulxq -96(%rsp), %rsi, %rax ## 8-byte Folded Reload + mulxq -112(%rsp), %r15, %rbp ## 8-byte Folded Reload + addq %rsi, %rbp + mulxq -104(%rsp), %rbx, %rdi ## 8-byte Folded Reload + adcq %rax, %rbx + mulxq -24(%rsp), %rsi, %rax ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -32(%rsp), %rdi, %r9 ## 8-byte Folded Reload + adcq %rax, %rdi + mulxq -40(%rsp), %r8, %rax ## 8-byte Folded Reload + adcq %r9, %r8 + mulxq -48(%rsp), %r9, %r13 ## 8-byte Folded Reload + adcq %rax, %r9 + adcq $0, %r13 + addq -128(%rsp), %r15 ## 8-byte Folded Reload + adcq -120(%rsp), %rbp ## 8-byte Folded Reload + adcq %rcx, %rbx + adcq %r14, %rsi + adcq %r12, %rdi + adcq %r10, %r8 + adcq %r11, %r9 + adcq $0, %r13 + movq %r15, %rdx + imulq -80(%rsp), %rdx ## 8-byte Folded Reload + mulxq -64(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rax, -128(%rsp) ## 8-byte Spill + addq %r15, %rcx + mulxq -72(%rsp), %rcx, %r11 ## 8-byte Folded Reload + adcq %rbp, %rcx + mulxq 8(%rsp), %rbp, %r10 ## 8-byte Folded Reload + adcq %rbx, %rbp + mulxq (%rsp), %rax, %rbx ## 8-byte Folded Reload + adcq %rsi, %rax + mulxq -8(%rsp), %r14, %rsi ## 8-byte Folded Reload + adcq %rdi, %r14 + mulxq -16(%rsp), %r15, %rdi ## 8-byte Folded Reload + adcq %r8, %r15 + mulxq -56(%rsp), %r12, %rdx ## 8-byte Folded Reload + adcq %r9, %r12 + adcq $0, %r13 + addq -128(%rsp), %rcx ## 8-byte Folded Reload + adcq %r11, %rbp + movq %rbp, -128(%rsp) ## 8-byte Spill + adcq %r10, %rax + movq %rax, -120(%rsp) ## 8-byte Spill + adcq %rbx, %r14 + adcq %rsi, %r15 + adcq %rdi, %r12 + adcq %rdx, %r13 + movq -88(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdx + mulxq -96(%rsp), %rsi, %rdi ## 8-byte Folded Reload + mulxq -112(%rsp), %r11, %r8 ## 8-byte Folded Reload + addq %rsi, %r8 + mulxq -104(%rsp), %rbx, %rsi ## 8-byte Folded Reload + adcq %rdi, %rbx + mulxq -24(%rsp), %rbp, %rdi ## 8-byte Folded Reload + adcq %rsi, %rbp + mulxq -32(%rsp), %rsi, %r9 ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -40(%rsp), %rdi, %rax ## 8-byte Folded Reload + adcq %r9, %rdi + mulxq -48(%rsp), %r9, %r10 ## 8-byte Folded Reload + adcq %rax, %r9 + adcq $0, %r10 + addq %rcx, %r11 + adcq -128(%rsp), %r8 ## 8-byte Folded Reload + adcq -120(%rsp), %rbx ## 8-byte Folded Reload + adcq %r14, %rbp + adcq %r15, %rsi + adcq %r12, %rdi + adcq %r13, %r9 + adcq $0, %r10 + movq %r11, %rdx + imulq -80(%rsp), %rdx ## 8-byte Folded Reload + mulxq -64(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rax, -128(%rsp) ## 8-byte Spill + addq %r11, %rcx + mulxq -72(%rsp), %rcx, %r13 ## 8-byte Folded Reload + adcq %r8, %rcx + mulxq 8(%rsp), %rax, %r8 ## 8-byte Folded Reload + adcq %rbx, %rax + mulxq (%rsp), %rbx, %r11 ## 8-byte Folded Reload + adcq %rbp, %rbx + mulxq -8(%rsp), %r14, %rbp ## 8-byte Folded Reload + adcq %rsi, %r14 + mulxq -16(%rsp), %r15, %rsi ## 8-byte Folded Reload + adcq %rdi, %r15 + mulxq -56(%rsp), %r12, %rdx ## 8-byte Folded Reload + adcq %r9, %r12 + adcq $0, %r10 + addq -128(%rsp), %rcx ## 8-byte Folded Reload + adcq %r13, %rax + movq %rax, -128(%rsp) ## 8-byte Spill + adcq %r8, %rbx + movq %rbx, -120(%rsp) ## 8-byte Spill + adcq %r11, %r14 + adcq %rbp, %r15 + adcq %rsi, %r12 + adcq %rdx, %r10 + movq -88(%rsp), %rax ## 8-byte Reload + movq 40(%rax), %rdx + mulxq -96(%rsp), %rsi, %rax ## 8-byte Folded Reload + mulxq -112(%rsp), %r11, %rbp ## 8-byte Folded Reload + addq %rsi, %rbp + mulxq -104(%rsp), %rbx, %rdi ## 8-byte Folded Reload + adcq %rax, %rbx + mulxq -24(%rsp), %rsi, %rax ## 8-byte Folded Reload + adcq %rdi, %rsi + mulxq -32(%rsp), %rdi, %r9 ## 8-byte Folded Reload + adcq %rax, %rdi + mulxq -40(%rsp), %r8, %rax ## 8-byte Folded Reload + adcq %r9, %r8 + mulxq -48(%rsp), %r9, %r13 ## 8-byte Folded Reload + adcq %rax, %r9 + adcq $0, %r13 + addq %rcx, %r11 + adcq -128(%rsp), %rbp ## 8-byte Folded Reload + adcq -120(%rsp), %rbx ## 8-byte Folded Reload + adcq %r14, %rsi + adcq %r15, %rdi + adcq %r12, %r8 + adcq %r10, %r9 + adcq $0, %r13 + movq %r11, %rdx + imulq -80(%rsp), %rdx ## 8-byte Folded Reload + mulxq -64(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rax, -120(%rsp) ## 8-byte Spill + addq %r11, %rcx + mulxq -72(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rax, 16(%rsp) ## 8-byte Spill + adcq %rbp, %rcx + mulxq 8(%rsp), %rax, %rbp ## 8-byte Folded Reload + movq %rbp, 24(%rsp) ## 8-byte Spill + adcq %rbx, %rax + movq %rax, -128(%rsp) ## 8-byte Spill + mulxq (%rsp), %r14, %rbp ## 8-byte Folded Reload + adcq %rsi, %r14 + mulxq -8(%rsp), %r11, %r12 ## 8-byte Folded Reload + adcq %rdi, %r11 + mulxq -16(%rsp), %r10, %rbx ## 8-byte Folded Reload + adcq %r8, %r10 + mulxq -56(%rsp), %rdi, %rax ## 8-byte Folded Reload + adcq %r9, %rdi + adcq $0, %r13 + addq -120(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -120(%rsp) ## 8-byte Spill + movq -128(%rsp), %rcx ## 8-byte Reload + adcq 16(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -128(%rsp) ## 8-byte Spill + adcq 24(%rsp), %r14 ## 8-byte Folded Reload + adcq %rbp, %r11 + adcq %r12, %r10 + adcq %rbx, %rdi + adcq %rax, %r13 + movq -88(%rsp), %rax ## 8-byte Reload + movq 48(%rax), %rdx + mulxq -96(%rsp), %rbp, %r9 ## 8-byte Folded Reload + mulxq -112(%rsp), %r8, %rax ## 8-byte Folded Reload + addq %rbp, %rax + mulxq -104(%rsp), %rbx, %rcx ## 8-byte Folded Reload + adcq %r9, %rbx + mulxq -24(%rsp), %rbp, %r9 ## 8-byte Folded Reload + adcq %rcx, %rbp + mulxq -32(%rsp), %rcx, %r12 ## 8-byte Folded Reload + adcq %r9, %rcx + mulxq -40(%rsp), %r15, %rsi ## 8-byte Folded Reload + movq %rsi, -112(%rsp) ## 8-byte Spill + adcq %r12, %r15 + mulxq -48(%rsp), %r12, %r9 ## 8-byte Folded Reload + adcq -112(%rsp), %r12 ## 8-byte Folded Reload + adcq $0, %r9 + addq -120(%rsp), %r8 ## 8-byte Folded Reload + adcq -128(%rsp), %rax ## 8-byte Folded Reload + adcq %r14, %rbx + adcq %r11, %rbp + adcq %r10, %rcx + adcq %rdi, %r15 + adcq %r13, %r12 + adcq $0, %r9 + movq -80(%rsp), %rdx ## 8-byte Reload + imulq %r8, %rdx + mulxq -64(%rsp), %rdi, %rsi ## 8-byte Folded Reload + movq %rsi, -80(%rsp) ## 8-byte Spill + addq %r8, %rdi + mulxq -72(%rsp), %r8, %rsi ## 8-byte Folded Reload + movq %rsi, -112(%rsp) ## 8-byte Spill + adcq %rax, %r8 + movq 8(%rsp), %r11 ## 8-byte Reload + mulxq %r11, %rsi, %rax + movq %rax, -88(%rsp) ## 8-byte Spill + adcq %rbx, %rsi + movq (%rsp), %r14 ## 8-byte Reload + mulxq %r14, %rdi, %rax + movq %rax, -96(%rsp) ## 8-byte Spill + adcq %rbp, %rdi + movq -8(%rsp), %rbp ## 8-byte Reload + mulxq %rbp, %rax, %rbx + movq %rbx, -104(%rsp) ## 8-byte Spill + adcq %rcx, %rax + movq -16(%rsp), %rbx ## 8-byte Reload + mulxq %rbx, %rcx, %r13 + adcq %r15, %rcx + mulxq -56(%rsp), %rdx, %r15 ## 8-byte Folded Reload + adcq %r12, %rdx + adcq $0, %r9 + addq -80(%rsp), %r8 ## 8-byte Folded Reload + adcq -112(%rsp), %rsi ## 8-byte Folded Reload + adcq -88(%rsp), %rdi ## 8-byte Folded Reload + adcq -96(%rsp), %rax ## 8-byte Folded Reload + adcq -104(%rsp), %rcx ## 8-byte Folded Reload + adcq %r13, %rdx + adcq %r15, %r9 + movq %r8, %r13 + subq -64(%rsp), %r13 ## 8-byte Folded Reload + movq %rsi, %r12 + sbbq -72(%rsp), %r12 ## 8-byte Folded Reload + movq %rdi, %r10 + sbbq %r11, %r10 + movq %rax, %r11 + sbbq %r14, %r11 + movq %rcx, %r14 + sbbq %rbp, %r14 + movq %rdx, %r15 + sbbq %rbx, %r15 + movq %r9, %rbp + sbbq -56(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, %rbx + sarq $63, %rbx + cmovsq %r8, %r13 + movq 32(%rsp), %rbx ## 8-byte Reload + movq %r13, (%rbx) + cmovsq %rsi, %r12 + movq %r12, 8(%rbx) + cmovsq %rdi, %r10 + movq %r10, 16(%rbx) + cmovsq %rax, %r11 + movq %r11, 24(%rbx) + cmovsq %rcx, %r14 + movq %r14, 32(%rbx) + cmovsq %rdx, %r15 + movq %r15, 40(%rbx) + cmovsq %r9, %rbp + movq %rbp, 48(%rbx) + addq $40, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed7Lbmi2: ## @mcl_fp_montRed7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $56, %rsp + movq %rdx, %rcx + movq %rdi, 48(%rsp) ## 8-byte Spill + movq -8(%rcx), %rax + movq %rax, -72(%rsp) ## 8-byte Spill + movq (%rsi), %r13 + movq %r13, %rdx + imulq %rax, %rdx + movq 48(%rcx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + mulxq %rax, %rdi, %rax + movq %rdi, -64(%rsp) ## 8-byte Spill + movq %rax, -120(%rsp) ## 8-byte Spill + movq 40(%rcx), %rax + movq %rax, -24(%rsp) ## 8-byte Spill + mulxq %rax, %r10, %rax + movq %rax, -128(%rsp) ## 8-byte Spill + movq 32(%rcx), %rax + movq %rax, -32(%rsp) ## 8-byte Spill + mulxq %rax, %r14, %r8 + movq 24(%rcx), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + mulxq %rax, %r12, %r15 + movq 16(%rcx), %rax + movq %rax, -40(%rsp) ## 8-byte Spill + mulxq %rax, %rbp, %rbx + movq (%rcx), %rdi + movq %rdi, -48(%rsp) ## 8-byte Spill + movq 8(%rcx), %rax + movq %rax, -8(%rsp) ## 8-byte Spill + mulxq %rax, %rax, %r11 + mulxq %rdi, %rdx, %r9 + addq %rax, %r9 + adcq %rbp, %r11 + adcq %r12, %rbx + adcq %r14, %r15 + adcq %r10, %r8 + movq -128(%rsp), %rcx ## 8-byte Reload + adcq -64(%rsp), %rcx ## 8-byte Folded Reload + movq -120(%rsp), %rax ## 8-byte Reload + adcq $0, %rax + addq %r13, %rdx + adcq 8(%rsi), %r9 + adcq 16(%rsi), %r11 + adcq 24(%rsi), %rbx + adcq 32(%rsi), %r15 + adcq 40(%rsi), %r8 + movq %r8, -112(%rsp) ## 8-byte Spill + adcq 48(%rsi), %rcx + movq %rcx, -128(%rsp) ## 8-byte Spill + adcq 56(%rsi), %rax + movq %rax, -120(%rsp) ## 8-byte Spill + movq 104(%rsi), %r8 + movq 96(%rsi), %rdx + movq 88(%rsi), %rdi + movq 80(%rsi), %rbp + movq 72(%rsi), %rax + movq 64(%rsi), %rcx + adcq $0, %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + adcq $0, %rax + movq %rax, -88(%rsp) ## 8-byte Spill + adcq $0, %rbp + movq %rbp, -56(%rsp) ## 8-byte Spill + adcq $0, %rdi + movq %rdi, -80(%rsp) ## 8-byte Spill + adcq $0, %rdx + movq %rdx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, -64(%rsp) ## 8-byte Spill + sbbq %rax, %rax + andl $1, %eax + movq %rax, (%rsp) ## 8-byte Spill + movq %r9, %rdx + imulq -72(%rsp), %rdx ## 8-byte Folded Reload + movq -16(%rsp), %r13 ## 8-byte Reload + mulxq %r13, %rcx, %rax + movq %rcx, 32(%rsp) ## 8-byte Spill + movq %rax, -96(%rsp) ## 8-byte Spill + mulxq -24(%rsp), %rcx, %rax ## 8-byte Folded Reload + movq %rcx, 40(%rsp) ## 8-byte Spill + movq %rax, -104(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %r14, %r12 ## 8-byte Folded Reload + mulxq 16(%rsp), %r8, %rax ## 8-byte Folded Reload + mulxq -40(%rsp), %rsi, %r10 ## 8-byte Folded Reload + mulxq -8(%rsp), %rcx, %rdi ## 8-byte Folded Reload + mulxq -48(%rsp), %rdx, %rbp ## 8-byte Folded Reload + addq %rcx, %rbp + adcq %rsi, %rdi + adcq %r8, %r10 + adcq %r14, %rax + movq %rax, %rcx + adcq 40(%rsp), %r12 ## 8-byte Folded Reload + movq -104(%rsp), %rsi ## 8-byte Reload + adcq 32(%rsp), %rsi ## 8-byte Folded Reload + movq -96(%rsp), %rax ## 8-byte Reload + adcq $0, %rax + addq %r9, %rdx + adcq %r11, %rbp + adcq %rbx, %rdi + adcq %r15, %r10 + adcq -112(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -112(%rsp) ## 8-byte Spill + adcq -128(%rsp), %r12 ## 8-byte Folded Reload + adcq -120(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -104(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -96(%rsp) ## 8-byte Spill + adcq $0, -88(%rsp) ## 8-byte Folded Spill + adcq $0, -56(%rsp) ## 8-byte Folded Spill + adcq $0, -80(%rsp) ## 8-byte Folded Spill + adcq $0, 24(%rsp) ## 8-byte Folded Spill + adcq $0, -64(%rsp) ## 8-byte Folded Spill + adcq $0, (%rsp) ## 8-byte Folded Spill + movq %rbp, %rdx + imulq -72(%rsp), %rdx ## 8-byte Folded Reload + mulxq %r13, %rcx, %rax + movq %rcx, 8(%rsp) ## 8-byte Spill + movq %rax, -120(%rsp) ## 8-byte Spill + movq -24(%rsp), %r15 ## 8-byte Reload + mulxq %r15, %rcx, %rax + movq %rcx, 32(%rsp) ## 8-byte Spill + movq %rax, -128(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %r11, %r13 ## 8-byte Folded Reload + mulxq 16(%rsp), %r9, %r14 ## 8-byte Folded Reload + mulxq -40(%rsp), %rsi, %r8 ## 8-byte Folded Reload + mulxq -8(%rsp), %rax, %rbx ## 8-byte Folded Reload + mulxq -48(%rsp), %rdx, %rcx ## 8-byte Folded Reload + addq %rax, %rcx + adcq %rsi, %rbx + adcq %r9, %r8 + adcq %r11, %r14 + adcq 32(%rsp), %r13 ## 8-byte Folded Reload + movq -128(%rsp), %rsi ## 8-byte Reload + adcq 8(%rsp), %rsi ## 8-byte Folded Reload + movq -120(%rsp), %rax ## 8-byte Reload + adcq $0, %rax + addq %rbp, %rdx + adcq %rdi, %rcx + adcq %r10, %rbx + adcq -112(%rsp), %r8 ## 8-byte Folded Reload + adcq %r12, %r14 + adcq -104(%rsp), %r13 ## 8-byte Folded Reload + adcq -96(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -128(%rsp) ## 8-byte Spill + adcq -88(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -120(%rsp) ## 8-byte Spill + adcq $0, -56(%rsp) ## 8-byte Folded Spill + adcq $0, -80(%rsp) ## 8-byte Folded Spill + adcq $0, 24(%rsp) ## 8-byte Folded Spill + adcq $0, -64(%rsp) ## 8-byte Folded Spill + adcq $0, (%rsp) ## 8-byte Folded Spill + movq %rcx, %rdx + imulq -72(%rsp), %rdx ## 8-byte Folded Reload + mulxq -16(%rsp), %rsi, %rax ## 8-byte Folded Reload + movq %rsi, -88(%rsp) ## 8-byte Spill + movq %rax, -96(%rsp) ## 8-byte Spill + mulxq %r15, %rsi, %rax + movq %rsi, -112(%rsp) ## 8-byte Spill + movq %rax, -104(%rsp) ## 8-byte Spill + movq -32(%rsp), %r15 ## 8-byte Reload + mulxq %r15, %rax, %r12 + movq %rax, 8(%rsp) ## 8-byte Spill + mulxq 16(%rsp), %r9, %rbp ## 8-byte Folded Reload + mulxq -40(%rsp), %rdi, %r10 ## 8-byte Folded Reload + mulxq -8(%rsp), %rsi, %r11 ## 8-byte Folded Reload + mulxq -48(%rsp), %rdx, %rax ## 8-byte Folded Reload + addq %rsi, %rax + adcq %rdi, %r11 + adcq %r9, %r10 + adcq 8(%rsp), %rbp ## 8-byte Folded Reload + adcq -112(%rsp), %r12 ## 8-byte Folded Reload + movq -104(%rsp), %rdi ## 8-byte Reload + adcq -88(%rsp), %rdi ## 8-byte Folded Reload + movq -96(%rsp), %rsi ## 8-byte Reload + adcq $0, %rsi + addq %rcx, %rdx + adcq %rbx, %rax + adcq %r8, %r11 + adcq %r14, %r10 + adcq %r13, %rbp + adcq -128(%rsp), %r12 ## 8-byte Folded Reload + adcq -120(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, -104(%rsp) ## 8-byte Spill + adcq -56(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -96(%rsp) ## 8-byte Spill + adcq $0, -80(%rsp) ## 8-byte Folded Spill + adcq $0, 24(%rsp) ## 8-byte Folded Spill + adcq $0, -64(%rsp) ## 8-byte Folded Spill + adcq $0, (%rsp) ## 8-byte Folded Spill + movq %rax, %rdx + imulq -72(%rsp), %rdx ## 8-byte Folded Reload + mulxq -16(%rsp), %rsi, %rcx ## 8-byte Folded Reload + movq %rsi, -128(%rsp) ## 8-byte Spill + movq %rcx, -56(%rsp) ## 8-byte Spill + mulxq -24(%rsp), %rsi, %rcx ## 8-byte Folded Reload + movq %rsi, -88(%rsp) ## 8-byte Spill + movq %rcx, -120(%rsp) ## 8-byte Spill + mulxq %r15, %rcx, %r13 + movq %rcx, -112(%rsp) ## 8-byte Spill + movq 16(%rsp), %r15 ## 8-byte Reload + mulxq %r15, %r9, %r14 + mulxq -40(%rsp), %rdi, %rbx ## 8-byte Folded Reload + mulxq -8(%rsp), %rsi, %r8 ## 8-byte Folded Reload + mulxq -48(%rsp), %rdx, %rcx ## 8-byte Folded Reload + addq %rsi, %rcx + adcq %rdi, %r8 + adcq %r9, %rbx + adcq -112(%rsp), %r14 ## 8-byte Folded Reload + adcq -88(%rsp), %r13 ## 8-byte Folded Reload + movq -120(%rsp), %rdi ## 8-byte Reload + adcq -128(%rsp), %rdi ## 8-byte Folded Reload + movq -56(%rsp), %rsi ## 8-byte Reload + adcq $0, %rsi + addq %rax, %rdx + adcq %r11, %rcx + adcq %r10, %r8 + adcq %rbp, %rbx + adcq %r12, %r14 + adcq -104(%rsp), %r13 ## 8-byte Folded Reload + adcq -96(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, -120(%rsp) ## 8-byte Spill + adcq -80(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, -56(%rsp) ## 8-byte Spill + adcq $0, 24(%rsp) ## 8-byte Folded Spill + adcq $0, -64(%rsp) ## 8-byte Folded Spill + adcq $0, (%rsp) ## 8-byte Folded Spill + movq %rcx, %rdx + imulq -72(%rsp), %rdx ## 8-byte Folded Reload + mulxq -16(%rsp), %rsi, %rax ## 8-byte Folded Reload + movq %rsi, -96(%rsp) ## 8-byte Spill + movq %rax, -80(%rsp) ## 8-byte Spill + mulxq -24(%rsp), %rsi, %rax ## 8-byte Folded Reload + movq %rsi, -104(%rsp) ## 8-byte Spill + movq %rax, -128(%rsp) ## 8-byte Spill + mulxq -32(%rsp), %rax, %r12 ## 8-byte Folded Reload + movq %rax, -88(%rsp) ## 8-byte Spill + movq %r15, %r11 + mulxq %r11, %rax, %r15 + movq %rax, -112(%rsp) ## 8-byte Spill + mulxq -40(%rsp), %rdi, %rbp ## 8-byte Folded Reload + movq -8(%rsp), %r9 ## 8-byte Reload + mulxq %r9, %rax, %r10 + mulxq -48(%rsp), %rdx, %rsi ## 8-byte Folded Reload + addq %rax, %rsi + adcq %rdi, %r10 + adcq -112(%rsp), %rbp ## 8-byte Folded Reload + adcq -88(%rsp), %r15 ## 8-byte Folded Reload + adcq -104(%rsp), %r12 ## 8-byte Folded Reload + movq -128(%rsp), %rdi ## 8-byte Reload + adcq -96(%rsp), %rdi ## 8-byte Folded Reload + movq -80(%rsp), %rax ## 8-byte Reload + adcq $0, %rax + addq %rcx, %rdx + adcq %r8, %rsi + adcq %rbx, %r10 + adcq %r14, %rbp + adcq %r13, %r15 + adcq -120(%rsp), %r12 ## 8-byte Folded Reload + adcq -56(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, -128(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -80(%rsp) ## 8-byte Spill + adcq $0, -64(%rsp) ## 8-byte Folded Spill + adcq $0, (%rsp) ## 8-byte Folded Spill + movq -72(%rsp), %rdx ## 8-byte Reload + imulq %rsi, %rdx + mulxq %r11, %rcx, %rax + movq %rax, -72(%rsp) ## 8-byte Spill + mulxq %r9, %rbx, %rdi + mulxq -48(%rsp), %r11, %r14 ## 8-byte Folded Reload + addq %rbx, %r14 + mulxq -40(%rsp), %rbx, %r13 ## 8-byte Folded Reload + adcq %rdi, %rbx + adcq %rcx, %r13 + mulxq -32(%rsp), %r8, %rdi ## 8-byte Folded Reload + adcq -72(%rsp), %r8 ## 8-byte Folded Reload + mulxq -24(%rsp), %rcx, %r9 ## 8-byte Folded Reload + adcq %rdi, %rcx + mulxq -16(%rsp), %rdx, %rdi ## 8-byte Folded Reload + adcq %r9, %rdx + adcq $0, %rdi + addq %rsi, %r11 + adcq %r10, %r14 + adcq %rbp, %rbx + adcq %r15, %r13 + adcq %r12, %r8 + adcq -128(%rsp), %rcx ## 8-byte Folded Reload + adcq -80(%rsp), %rdx ## 8-byte Folded Reload + adcq -64(%rsp), %rdi ## 8-byte Folded Reload + movq (%rsp), %rax ## 8-byte Reload + adcq $0, %rax + movq %r14, %rsi + subq -48(%rsp), %rsi ## 8-byte Folded Reload + movq %rbx, %rbp + sbbq -8(%rsp), %rbp ## 8-byte Folded Reload + movq %r13, %r9 + sbbq -40(%rsp), %r9 ## 8-byte Folded Reload + movq %r8, %r10 + sbbq 16(%rsp), %r10 ## 8-byte Folded Reload + movq %rcx, %r11 + sbbq -32(%rsp), %r11 ## 8-byte Folded Reload + movq %rdx, %r15 + sbbq -24(%rsp), %r15 ## 8-byte Folded Reload + movq %rdi, %r12 + sbbq -16(%rsp), %r12 ## 8-byte Folded Reload + sbbq $0, %rax + andl $1, %eax + cmovneq %rdi, %r12 + testb %al, %al + cmovneq %r14, %rsi + movq 48(%rsp), %rdi ## 8-byte Reload + movq %rsi, (%rdi) + cmovneq %rbx, %rbp + movq %rbp, 8(%rdi) + cmovneq %r13, %r9 + movq %r9, 16(%rdi) + cmovneq %r8, %r10 + movq %r10, 24(%rdi) + cmovneq %rcx, %r11 + movq %r11, 32(%rdi) + cmovneq %rdx, %r15 + movq %r15, 40(%rdi) + movq %r12, 48(%rdi) + addq $56, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_addPre7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre7Lbmi2: ## @mcl_fp_addPre7Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + movq 48(%rdx), %r8 + movq 48(%rsi), %r14 + movq 40(%rdx), %r9 + movq 40(%rsi), %r15 + movq 32(%rdx), %r10 + movq 24(%rdx), %r11 + movq 16(%rdx), %r12 + movq (%rdx), %rcx + movq 8(%rdx), %rdx + addq (%rsi), %rcx + adcq 8(%rsi), %rdx + movq 24(%rsi), %rax + movq 32(%rsi), %rbx + adcq 16(%rsi), %r12 + movq %rcx, (%rdi) + movq %rdx, 8(%rdi) + movq %r12, 16(%rdi) + adcq %r11, %rax + movq %rax, 24(%rdi) + adcq %r10, %rbx + movq %rbx, 32(%rdi) + adcq %r9, %r15 + movq %r15, 40(%rdi) + adcq %r8, %r14 + movq %r14, 48(%rdi) + sbbq %rax, %rax + andl $1, %eax + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_subPre7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre7Lbmi2: ## @mcl_fp_subPre7Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx + movq 48(%rdx), %r8 + movq 48(%rsi), %r10 + movq 40(%rdx), %r9 + movq 40(%rsi), %r15 + movq 24(%rdx), %r11 + movq 32(%rdx), %r14 + movq (%rsi), %rbx + movq 8(%rsi), %r12 + xorl %eax, %eax + subq (%rdx), %rbx + sbbq 8(%rdx), %r12 + movq 16(%rsi), %rcx + sbbq 16(%rdx), %rcx + movq 32(%rsi), %rdx + movq 24(%rsi), %rsi + movq %rbx, (%rdi) + movq %r12, 8(%rdi) + movq %rcx, 16(%rdi) + sbbq %r11, %rsi + movq %rsi, 24(%rdi) + sbbq %r14, %rdx + movq %rdx, 32(%rdi) + sbbq %r9, %r15 + movq %r15, 40(%rdi) + sbbq %r8, %r10 + movq %r10, 48(%rdi) + sbbq $0, %rax + andl $1, %eax + popq %rbx + popq %r12 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_shr1_7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_7Lbmi2: ## @mcl_fp_shr1_7Lbmi2 +## BB#0: + movq 48(%rsi), %r8 + movq 40(%rsi), %r9 + movq 32(%rsi), %r10 + movq 24(%rsi), %rax + movq 16(%rsi), %rcx + movq (%rsi), %rdx + movq 8(%rsi), %rsi + shrdq $1, %rsi, %rdx + movq %rdx, (%rdi) + shrdq $1, %rcx, %rsi + movq %rsi, 8(%rdi) + shrdq $1, %rax, %rcx + movq %rcx, 16(%rdi) + shrdq $1, %r10, %rax + movq %rax, 24(%rdi) + shrdq $1, %r9, %r10 + movq %r10, 32(%rdi) + shrdq $1, %r8, %r9 + movq %r9, 40(%rdi) + shrq %r8 + movq %r8, 48(%rdi) + retq + + .globl _mcl_fp_add7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add7Lbmi2: ## @mcl_fp_add7Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 48(%rdx), %r14 + movq 48(%rsi), %r8 + movq 40(%rdx), %r15 + movq 40(%rsi), %r9 + movq 32(%rdx), %r12 + movq 24(%rdx), %r13 + movq 16(%rdx), %r10 + movq (%rdx), %r11 + movq 8(%rdx), %rdx + addq (%rsi), %r11 + adcq 8(%rsi), %rdx + movq 24(%rsi), %rax + movq 32(%rsi), %rbx + adcq 16(%rsi), %r10 + movq %r11, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + adcq %r13, %rax + movq %rax, 24(%rdi) + adcq %r12, %rbx + movq %rbx, 32(%rdi) + adcq %r15, %r9 + movq %r9, 40(%rdi) + adcq %r14, %r8 + movq %r8, 48(%rdi) + sbbq %rsi, %rsi + andl $1, %esi + subq (%rcx), %r11 + sbbq 8(%rcx), %rdx + sbbq 16(%rcx), %r10 + sbbq 24(%rcx), %rax + sbbq 32(%rcx), %rbx + sbbq 40(%rcx), %r9 + sbbq 48(%rcx), %r8 + sbbq $0, %rsi + testb $1, %sil + jne LBB104_2 +## BB#1: ## %nocarry + movq %r11, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + movq %rax, 24(%rdi) + movq %rbx, 32(%rdi) + movq %r9, 40(%rdi) + movq %r8, 48(%rdi) +LBB104_2: ## %carry + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_addNF7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF7Lbmi2: ## @mcl_fp_addNF7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 48(%rdx), %r9 + movq 40(%rdx), %rbp + movq 32(%rdx), %r10 + movq 24(%rdx), %r11 + movq 16(%rdx), %r14 + movq (%rdx), %r12 + movq 8(%rdx), %r15 + addq (%rsi), %r12 + adcq 8(%rsi), %r15 + adcq 16(%rsi), %r14 + adcq 24(%rsi), %r11 + adcq 32(%rsi), %r10 + adcq 40(%rsi), %rbp + movq %rbp, -8(%rsp) ## 8-byte Spill + adcq 48(%rsi), %r9 + movq %r12, %rsi + subq (%rcx), %rsi + movq %r15, %rdx + sbbq 8(%rcx), %rdx + movq %r14, %rax + sbbq 16(%rcx), %rax + movq %r11, %rbx + sbbq 24(%rcx), %rbx + movq %r10, %r13 + sbbq 32(%rcx), %r13 + sbbq 40(%rcx), %rbp + movq %r9, %r8 + sbbq 48(%rcx), %r8 + movq %r8, %rcx + sarq $63, %rcx + cmovsq %r12, %rsi + movq %rsi, (%rdi) + cmovsq %r15, %rdx + movq %rdx, 8(%rdi) + cmovsq %r14, %rax + movq %rax, 16(%rdi) + cmovsq %r11, %rbx + movq %rbx, 24(%rdi) + cmovsq %r10, %r13 + movq %r13, 32(%rdi) + cmovsq -8(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 40(%rdi) + cmovsq %r9, %r8 + movq %r8, 48(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_sub7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub7Lbmi2: ## @mcl_fp_sub7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 48(%rdx), %r14 + movq 48(%rsi), %r8 + movq 40(%rdx), %r15 + movq 40(%rsi), %r9 + movq 32(%rdx), %r12 + movq (%rsi), %rax + movq 8(%rsi), %r11 + xorl %ebx, %ebx + subq (%rdx), %rax + sbbq 8(%rdx), %r11 + movq 16(%rsi), %r13 + sbbq 16(%rdx), %r13 + movq 32(%rsi), %r10 + movq 24(%rsi), %rsi + sbbq 24(%rdx), %rsi + movq %rax, (%rdi) + movq %r11, 8(%rdi) + movq %r13, 16(%rdi) + movq %rsi, 24(%rdi) + sbbq %r12, %r10 + movq %r10, 32(%rdi) + sbbq %r15, %r9 + movq %r9, 40(%rdi) + sbbq %r14, %r8 + movq %r8, 48(%rdi) + sbbq $0, %rbx + testb $1, %bl + je LBB106_2 +## BB#1: ## %carry + movq 48(%rcx), %r14 + movq 40(%rcx), %r15 + movq 32(%rcx), %r12 + movq 24(%rcx), %rbx + movq 8(%rcx), %rdx + movq 16(%rcx), %rbp + addq (%rcx), %rax + movq %rax, (%rdi) + adcq %r11, %rdx + movq %rdx, 8(%rdi) + adcq %r13, %rbp + movq %rbp, 16(%rdi) + adcq %rsi, %rbx + movq %rbx, 24(%rdi) + adcq %r10, %r12 + movq %r12, 32(%rdi) + adcq %r9, %r15 + movq %r15, 40(%rdi) + adcq %r8, %r14 + movq %r14, 48(%rdi) +LBB106_2: ## %nocarry + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_subNF7Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF7Lbmi2: ## @mcl_fp_subNF7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r8 + movq 48(%rsi), %r11 + movdqu (%rdx), %xmm0 + movdqu 16(%rdx), %xmm1 + movdqu 32(%rdx), %xmm2 + pshufd $78, %xmm2, %xmm3 ## xmm3 = xmm2[2,3,0,1] + movd %xmm3, %r14 + movdqu (%rsi), %xmm3 + movdqu 16(%rsi), %xmm4 + movdqu 32(%rsi), %xmm5 + pshufd $78, %xmm5, %xmm6 ## xmm6 = xmm5[2,3,0,1] + movd %xmm6, %rcx + movd %xmm2, %r15 + movd %xmm5, %r9 + pshufd $78, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,0,1] + movd %xmm2, %r12 + pshufd $78, %xmm4, %xmm2 ## xmm2 = xmm4[2,3,0,1] + movd %xmm2, %r10 + movd %xmm1, %r13 + pshufd $78, %xmm0, %xmm1 ## xmm1 = xmm0[2,3,0,1] + movd %xmm1, %rax + pshufd $78, %xmm3, %xmm1 ## xmm1 = xmm3[2,3,0,1] + movd %xmm0, %rbx + movd %xmm3, %rsi + subq %rbx, %rsi + movd %xmm1, %rbx + sbbq %rax, %rbx + movd %xmm4, %rbp + sbbq %r13, %rbp + sbbq %r12, %r10 + sbbq %r15, %r9 + sbbq %r14, %rcx + movq %rcx, -8(%rsp) ## 8-byte Spill + sbbq 48(%rdx), %r11 + movq %r11, %rax + sarq $63, %rax + movq %rax, %rdx + shldq $1, %r11, %rdx + andq (%r8), %rdx + movq 48(%r8), %r14 + andq %rax, %r14 + movq 40(%r8), %r15 + andq %rax, %r15 + movq 32(%r8), %r12 + andq %rax, %r12 + movq 24(%r8), %r13 + andq %rax, %r13 + movq 16(%r8), %rcx + andq %rax, %rcx + andq 8(%r8), %rax + addq %rsi, %rdx + adcq %rbx, %rax + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + adcq %rbp, %rcx + movq %rcx, 16(%rdi) + adcq %r10, %r13 + movq %r13, 24(%rdi) + adcq %r9, %r12 + movq %r12, 32(%rdi) + adcq -8(%rsp), %r15 ## 8-byte Folded Reload + movq %r15, 40(%rdi) + adcq %r11, %r14 + movq %r14, 48(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_add7Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add7Lbmi2: ## @mcl_fpDbl_add7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r8 + movq 104(%rdx), %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq 96(%rdx), %rax + movq %rax, -24(%rsp) ## 8-byte Spill + movq 88(%rdx), %r11 + movq 80(%rdx), %r14 + movq 24(%rsi), %r15 + movq 32(%rsi), %r12 + movq 16(%rdx), %r9 + movq (%rdx), %rax + movq 8(%rdx), %rbx + addq (%rsi), %rax + adcq 8(%rsi), %rbx + adcq 16(%rsi), %r9 + adcq 24(%rdx), %r15 + adcq 32(%rdx), %r12 + movq 72(%rdx), %r13 + movq 64(%rdx), %rbp + movq %rax, (%rdi) + movq 56(%rdx), %r10 + movq %rbx, 8(%rdi) + movq 48(%rdx), %rcx + movq 40(%rdx), %rdx + movq %r9, 16(%rdi) + movq 104(%rsi), %r9 + movq %r15, 24(%rdi) + movq 40(%rsi), %rbx + adcq %rdx, %rbx + movq 96(%rsi), %r15 + movq %r12, 32(%rdi) + movq 48(%rsi), %rdx + adcq %rcx, %rdx + movq 88(%rsi), %rax + movq %rbx, 40(%rdi) + movq 56(%rsi), %rcx + adcq %r10, %rcx + movq 80(%rsi), %r12 + movq %rdx, 48(%rdi) + movq 72(%rsi), %rdx + movq 64(%rsi), %rsi + adcq %rbp, %rsi + adcq %r13, %rdx + adcq %r14, %r12 + adcq %r11, %rax + movq %rax, -16(%rsp) ## 8-byte Spill + adcq -24(%rsp), %r15 ## 8-byte Folded Reload + movq %r15, -24(%rsp) ## 8-byte Spill + adcq -8(%rsp), %r9 ## 8-byte Folded Reload + sbbq %rbp, %rbp + andl $1, %ebp + movq %rcx, %rbx + subq (%r8), %rbx + movq %rsi, %r10 + sbbq 8(%r8), %r10 + movq %rdx, %r11 + sbbq 16(%r8), %r11 + movq %r12, %r14 + sbbq 24(%r8), %r14 + movq -16(%rsp), %r13 ## 8-byte Reload + sbbq 32(%r8), %r13 + sbbq 40(%r8), %r15 + movq %r9, %rax + sbbq 48(%r8), %rax + sbbq $0, %rbp + andl $1, %ebp + cmovneq %rcx, %rbx + movq %rbx, 56(%rdi) + testb %bpl, %bpl + cmovneq %rsi, %r10 + movq %r10, 64(%rdi) + cmovneq %rdx, %r11 + movq %r11, 72(%rdi) + cmovneq %r12, %r14 + movq %r14, 80(%rdi) + cmovneq -16(%rsp), %r13 ## 8-byte Folded Reload + movq %r13, 88(%rdi) + cmovneq -24(%rsp), %r15 ## 8-byte Folded Reload + movq %r15, 96(%rdi) + cmovneq %r9, %rax + movq %rax, 104(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sub7Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub7Lbmi2: ## @mcl_fpDbl_sub7Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r8 + movq 104(%rdx), %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq 96(%rdx), %r10 + movq 88(%rdx), %r14 + movq 16(%rsi), %rax + movq (%rsi), %r15 + movq 8(%rsi), %r11 + xorl %ecx, %ecx + subq (%rdx), %r15 + sbbq 8(%rdx), %r11 + sbbq 16(%rdx), %rax + movq 24(%rsi), %rbx + sbbq 24(%rdx), %rbx + movq 32(%rsi), %r12 + sbbq 32(%rdx), %r12 + movq 80(%rdx), %r13 + movq 72(%rdx), %rbp + movq %r15, (%rdi) + movq 64(%rdx), %r9 + movq %r11, 8(%rdi) + movq 56(%rdx), %r15 + movq %rax, 16(%rdi) + movq 48(%rdx), %r11 + movq 40(%rdx), %rdx + movq %rbx, 24(%rdi) + movq 40(%rsi), %rbx + sbbq %rdx, %rbx + movq 104(%rsi), %rax + movq %r12, 32(%rdi) + movq 48(%rsi), %r12 + sbbq %r11, %r12 + movq 96(%rsi), %r11 + movq %rbx, 40(%rdi) + movq 56(%rsi), %rdx + sbbq %r15, %rdx + movq 88(%rsi), %r15 + movq %r12, 48(%rdi) + movq 64(%rsi), %rbx + sbbq %r9, %rbx + movq 80(%rsi), %r12 + movq 72(%rsi), %r9 + sbbq %rbp, %r9 + sbbq %r13, %r12 + sbbq %r14, %r15 + sbbq %r10, %r11 + sbbq -8(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -8(%rsp) ## 8-byte Spill + movl $0, %ebp + sbbq $0, %rbp + andl $1, %ebp + movq (%r8), %r10 + cmoveq %rcx, %r10 + testb %bpl, %bpl + movq 16(%r8), %rbp + cmoveq %rcx, %rbp + movq 8(%r8), %rsi + cmoveq %rcx, %rsi + movq 48(%r8), %r14 + cmoveq %rcx, %r14 + movq 40(%r8), %r13 + cmoveq %rcx, %r13 + movq 32(%r8), %rax + cmoveq %rcx, %rax + cmovneq 24(%r8), %rcx + addq %rdx, %r10 + adcq %rbx, %rsi + movq %r10, 56(%rdi) + movq %rsi, 64(%rdi) + adcq %r9, %rbp + movq %rbp, 72(%rdi) + adcq %r12, %rcx + movq %rcx, 80(%rdi) + adcq %r15, %rax + movq %rax, 88(%rdi) + adcq %r11, %r13 + movq %r13, 96(%rdi) + adcq -8(%rsp), %r14 ## 8-byte Folded Reload + movq %r14, 104(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .p2align 4, 0x90 +l_mulPv512x64: ## @mulPv512x64 +## BB#0: + mulxq (%rsi), %rcx, %rax + movq %rcx, (%rdi) + mulxq 8(%rsi), %rcx, %r8 + addq %rax, %rcx + movq %rcx, 8(%rdi) + mulxq 16(%rsi), %rcx, %r9 + adcq %r8, %rcx + movq %rcx, 16(%rdi) + mulxq 24(%rsi), %rax, %rcx + adcq %r9, %rax + movq %rax, 24(%rdi) + mulxq 32(%rsi), %rax, %r8 + adcq %rcx, %rax + movq %rax, 32(%rdi) + mulxq 40(%rsi), %rcx, %r9 + adcq %r8, %rcx + movq %rcx, 40(%rdi) + mulxq 48(%rsi), %rax, %rcx + adcq %r9, %rax + movq %rax, 48(%rdi) + mulxq 56(%rsi), %rax, %rdx + adcq %rcx, %rax + movq %rax, 56(%rdi) + adcq $0, %rdx + movq %rdx, 64(%rdi) + movq %rdi, %rax + retq + + .globl _mcl_fp_mulUnitPre8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre8Lbmi2: ## @mcl_fp_mulUnitPre8Lbmi2 +## BB#0: + pushq %rbx + subq $80, %rsp + movq %rdi, %rbx + leaq 8(%rsp), %rdi + callq l_mulPv512x64 + movq 72(%rsp), %r8 + movq 64(%rsp), %r9 + movq 56(%rsp), %r10 + movq 48(%rsp), %r11 + movq 40(%rsp), %rdi + movq 32(%rsp), %rax + movq 24(%rsp), %rcx + movq 8(%rsp), %rdx + movq 16(%rsp), %rsi + movq %rdx, (%rbx) + movq %rsi, 8(%rbx) + movq %rcx, 16(%rbx) + movq %rax, 24(%rbx) + movq %rdi, 32(%rbx) + movq %r11, 40(%rbx) + movq %r10, 48(%rbx) + movq %r9, 56(%rbx) + movq %r8, 64(%rbx) + addq $80, %rsp + popq %rbx + retq + + .globl _mcl_fpDbl_mulPre8Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre8Lbmi2: ## @mcl_fpDbl_mulPre8Lbmi2 +## BB#0: + pushq %rbp + movq %rsp, %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $200, %rsp + movq %rdx, %r15 + movq %rsi, %rbx + movq %rdi, %r14 + callq _mcl_fpDbl_mulPre4Lbmi2 + leaq 64(%r14), %rdi + leaq 32(%rbx), %rsi + leaq 32(%r15), %rdx + callq _mcl_fpDbl_mulPre4Lbmi2 + movq 56(%rbx), %r10 + movq 48(%rbx), %rdx + movq (%rbx), %rsi + movq 8(%rbx), %rdi + addq 32(%rbx), %rsi + adcq 40(%rbx), %rdi + adcq 16(%rbx), %rdx + adcq 24(%rbx), %r10 + pushfq + popq %r8 + xorl %r9d, %r9d + movq 56(%r15), %rcx + movq 48(%r15), %r13 + movq (%r15), %r12 + movq 8(%r15), %rbx + addq 32(%r15), %r12 + adcq 40(%r15), %rbx + adcq 16(%r15), %r13 + adcq 24(%r15), %rcx + movl $0, %eax + cmovbq %r10, %rax + movq %rax, -88(%rbp) ## 8-byte Spill + movl $0, %eax + cmovbq %rdx, %rax + movq %rax, -80(%rbp) ## 8-byte Spill + movl $0, %eax + cmovbq %rdi, %rax + movq %rax, -72(%rbp) ## 8-byte Spill + movl $0, %eax + cmovbq %rsi, %rax + movq %rax, -64(%rbp) ## 8-byte Spill + sbbq %r15, %r15 + movq %rsi, -168(%rbp) + movq %rdi, -160(%rbp) + movq %rdx, -152(%rbp) + movq %r10, -144(%rbp) + movq %r12, -136(%rbp) + movq %rbx, -128(%rbp) + movq %r13, -120(%rbp) + movq %rcx, -112(%rbp) + pushq %r8 + popfq + cmovaeq %r9, %rcx + movq %rcx, -48(%rbp) ## 8-byte Spill + cmovaeq %r9, %r13 + cmovaeq %r9, %rbx + cmovaeq %r9, %r12 + sbbq %rax, %rax + movq %rax, -56(%rbp) ## 8-byte Spill + leaq -232(%rbp), %rdi + leaq -168(%rbp), %rsi + leaq -136(%rbp), %rdx + callq _mcl_fpDbl_mulPre4Lbmi2 + addq -64(%rbp), %r12 ## 8-byte Folded Reload + adcq -72(%rbp), %rbx ## 8-byte Folded Reload + adcq -80(%rbp), %r13 ## 8-byte Folded Reload + movq -48(%rbp), %r10 ## 8-byte Reload + adcq -88(%rbp), %r10 ## 8-byte Folded Reload + sbbq %rax, %rax + andl $1, %eax + movq -56(%rbp), %rdx ## 8-byte Reload + andl %edx, %r15d + andl $1, %r15d + addq -200(%rbp), %r12 + adcq -192(%rbp), %rbx + adcq -184(%rbp), %r13 + adcq -176(%rbp), %r10 + adcq %rax, %r15 + movq -208(%rbp), %rax + movq -216(%rbp), %rcx + movq -232(%rbp), %rsi + movq -224(%rbp), %rdx + subq (%r14), %rsi + sbbq 8(%r14), %rdx + sbbq 16(%r14), %rcx + sbbq 24(%r14), %rax + movq 32(%r14), %rdi + movq %rdi, -80(%rbp) ## 8-byte Spill + movq 40(%r14), %r8 + movq %r8, -88(%rbp) ## 8-byte Spill + sbbq %rdi, %r12 + sbbq %r8, %rbx + movq 48(%r14), %rdi + movq %rdi, -72(%rbp) ## 8-byte Spill + sbbq %rdi, %r13 + movq 56(%r14), %rdi + movq %rdi, -64(%rbp) ## 8-byte Spill + sbbq %rdi, %r10 + sbbq $0, %r15 + movq 64(%r14), %r11 + subq %r11, %rsi + movq 72(%r14), %rdi + movq %rdi, -56(%rbp) ## 8-byte Spill + sbbq %rdi, %rdx + movq 80(%r14), %rdi + movq %rdi, -48(%rbp) ## 8-byte Spill + sbbq %rdi, %rcx + movq 88(%r14), %rdi + movq %rdi, -104(%rbp) ## 8-byte Spill + sbbq %rdi, %rax + movq 96(%r14), %rdi + movq %rdi, -96(%rbp) ## 8-byte Spill + sbbq %rdi, %r12 + movq 104(%r14), %rdi + sbbq %rdi, %rbx + movq 112(%r14), %r8 + sbbq %r8, %r13 + movq 120(%r14), %r9 + sbbq %r9, %r10 + sbbq $0, %r15 + addq -80(%rbp), %rsi ## 8-byte Folded Reload + adcq -88(%rbp), %rdx ## 8-byte Folded Reload + movq %rsi, 32(%r14) + adcq -72(%rbp), %rcx ## 8-byte Folded Reload + movq %rdx, 40(%r14) + adcq -64(%rbp), %rax ## 8-byte Folded Reload + movq %rcx, 48(%r14) + adcq %r11, %r12 + movq %rax, 56(%r14) + movq %r12, 64(%r14) + adcq -56(%rbp), %rbx ## 8-byte Folded Reload + movq %rbx, 72(%r14) + adcq -48(%rbp), %r13 ## 8-byte Folded Reload + movq %r13, 80(%r14) + adcq -104(%rbp), %r10 ## 8-byte Folded Reload + movq %r10, 88(%r14) + adcq -96(%rbp), %r15 ## 8-byte Folded Reload + movq %r15, 96(%r14) + adcq $0, %rdi + movq %rdi, 104(%r14) + adcq $0, %r8 + movq %r8, 112(%r14) + adcq $0, %r9 + movq %r9, 120(%r14) + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sqrPre8Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre8Lbmi2: ## @mcl_fpDbl_sqrPre8Lbmi2 +## BB#0: + pushq %rbp + movq %rsp, %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $200, %rsp + movq %rsi, %rbx + movq %rdi, %r14 + movq %rbx, %rdx + callq _mcl_fpDbl_mulPre4Lbmi2 + leaq 64(%r14), %rdi + leaq 32(%rbx), %rsi + movq %rsi, %rdx + callq _mcl_fpDbl_mulPre4Lbmi2 + movq 56(%rbx), %r15 + movq 48(%rbx), %rax + movq (%rbx), %rcx + movq 8(%rbx), %rdx + addq 32(%rbx), %rcx + adcq 40(%rbx), %rdx + adcq 16(%rbx), %rax + adcq 24(%rbx), %r15 + pushfq + popq %r8 + pushfq + popq %r9 + pushfq + popq %r10 + pushfq + popq %rdi + pushfq + popq %rbx + sbbq %rsi, %rsi + movq %rsi, -56(%rbp) ## 8-byte Spill + leaq (%rcx,%rcx), %rsi + xorl %r11d, %r11d + pushq %rbx + popfq + cmovaeq %r11, %rsi + movq %rsi, -48(%rbp) ## 8-byte Spill + movq %rdx, %r13 + shldq $1, %rcx, %r13 + pushq %rdi + popfq + cmovaeq %r11, %r13 + movq %rax, %r12 + shldq $1, %rdx, %r12 + pushq %r10 + popfq + cmovaeq %r11, %r12 + movq %r15, %rbx + movq %rcx, -168(%rbp) + movq %rdx, -160(%rbp) + movq %rax, -152(%rbp) + movq %r15, -144(%rbp) + movq %rcx, -136(%rbp) + movq %rdx, -128(%rbp) + movq %rax, -120(%rbp) + movq %r15, -112(%rbp) + shldq $1, %rax, %r15 + pushq %r9 + popfq + cmovaeq %r11, %r15 + shrq $63, %rbx + pushq %r8 + popfq + cmovaeq %r11, %rbx + leaq -232(%rbp), %rdi + leaq -168(%rbp), %rsi + leaq -136(%rbp), %rdx + callq _mcl_fpDbl_mulPre4Lbmi2 + movq -56(%rbp), %rax ## 8-byte Reload + andl $1, %eax + movq -48(%rbp), %r10 ## 8-byte Reload + addq -200(%rbp), %r10 + adcq -192(%rbp), %r13 + adcq -184(%rbp), %r12 + adcq -176(%rbp), %r15 + adcq %rbx, %rax + movq %rax, %rbx + movq -208(%rbp), %rax + movq -216(%rbp), %rcx + movq -232(%rbp), %rsi + movq -224(%rbp), %rdx + subq (%r14), %rsi + sbbq 8(%r14), %rdx + sbbq 16(%r14), %rcx + sbbq 24(%r14), %rax + movq 32(%r14), %r9 + movq %r9, -56(%rbp) ## 8-byte Spill + movq 40(%r14), %r8 + movq %r8, -48(%rbp) ## 8-byte Spill + sbbq %r9, %r10 + sbbq %r8, %r13 + movq 48(%r14), %rdi + movq %rdi, -104(%rbp) ## 8-byte Spill + sbbq %rdi, %r12 + movq 56(%r14), %rdi + movq %rdi, -96(%rbp) ## 8-byte Spill + sbbq %rdi, %r15 + sbbq $0, %rbx + movq 64(%r14), %r11 + subq %r11, %rsi + movq 72(%r14), %rdi + movq %rdi, -88(%rbp) ## 8-byte Spill + sbbq %rdi, %rdx + movq 80(%r14), %rdi + movq %rdi, -80(%rbp) ## 8-byte Spill + sbbq %rdi, %rcx + movq 88(%r14), %rdi + movq %rdi, -72(%rbp) ## 8-byte Spill + sbbq %rdi, %rax + movq 96(%r14), %rdi + movq %rdi, -64(%rbp) ## 8-byte Spill + sbbq %rdi, %r10 + movq 104(%r14), %rdi + sbbq %rdi, %r13 + movq 112(%r14), %r8 + sbbq %r8, %r12 + movq 120(%r14), %r9 + sbbq %r9, %r15 + sbbq $0, %rbx + addq -56(%rbp), %rsi ## 8-byte Folded Reload + adcq -48(%rbp), %rdx ## 8-byte Folded Reload + movq %rsi, 32(%r14) + adcq -104(%rbp), %rcx ## 8-byte Folded Reload + movq %rdx, 40(%r14) + adcq -96(%rbp), %rax ## 8-byte Folded Reload + movq %rcx, 48(%r14) + adcq %r11, %r10 + movq %rax, 56(%r14) + movq %r10, 64(%r14) + adcq -88(%rbp), %r13 ## 8-byte Folded Reload + movq %r13, 72(%r14) + adcq -80(%rbp), %r12 ## 8-byte Folded Reload + movq %r12, 80(%r14) + adcq -72(%rbp), %r15 ## 8-byte Folded Reload + movq %r15, 88(%r14) + movq %rbx, %rax + adcq -64(%rbp), %rax ## 8-byte Folded Reload + movq %rax, 96(%r14) + adcq $0, %rdi + movq %rdi, 104(%r14) + adcq $0, %r8 + movq %r8, 112(%r14) + adcq $0, %r9 + movq %r9, 120(%r14) + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mont8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont8Lbmi2: ## @mcl_fp_mont8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $1256, %rsp ## imm = 0x4E8 + movq %rcx, %r13 + movq %rdx, 64(%rsp) ## 8-byte Spill + movq %rsi, 72(%rsp) ## 8-byte Spill + movq %rdi, 96(%rsp) ## 8-byte Spill + movq -8(%r13), %rbx + movq %rbx, 80(%rsp) ## 8-byte Spill + movq %r13, 56(%rsp) ## 8-byte Spill + movq (%rdx), %rdx + leaq 1184(%rsp), %rdi + callq l_mulPv512x64 + movq 1184(%rsp), %r15 + movq 1192(%rsp), %r14 + movq %r15, %rdx + imulq %rbx, %rdx + movq 1248(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + movq 1240(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + movq 1232(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 1224(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq 1216(%rsp), %r12 + movq 1208(%rsp), %rbx + movq 1200(%rsp), %rbp + leaq 1112(%rsp), %rdi + movq %r13, %rsi + callq l_mulPv512x64 + addq 1112(%rsp), %r15 + adcq 1120(%rsp), %r14 + adcq 1128(%rsp), %rbp + movq %rbp, 88(%rsp) ## 8-byte Spill + adcq 1136(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + adcq 1144(%rsp), %r12 + movq %r12, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %r13 ## 8-byte Reload + adcq 1152(%rsp), %r13 + movq (%rsp), %rbx ## 8-byte Reload + adcq 1160(%rsp), %rbx + movq 40(%rsp), %rbp ## 8-byte Reload + adcq 1168(%rsp), %rbp + movq 24(%rsp), %rax ## 8-byte Reload + adcq 1176(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + sbbq %r15, %r15 + movq 64(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + leaq 1040(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + andl $1, %r15d + addq 1040(%rsp), %r14 + movq 88(%rsp), %rax ## 8-byte Reload + adcq 1048(%rsp), %rax + movq %rax, 88(%rsp) ## 8-byte Spill + movq 32(%rsp), %rax ## 8-byte Reload + adcq 1056(%rsp), %rax + movq %rax, %r12 + movq 8(%rsp), %rax ## 8-byte Reload + adcq 1064(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + adcq 1072(%rsp), %r13 + movq %r13, 16(%rsp) ## 8-byte Spill + adcq 1080(%rsp), %rbx + movq %rbx, (%rsp) ## 8-byte Spill + adcq 1088(%rsp), %rbp + movq 24(%rsp), %rax ## 8-byte Reload + adcq 1096(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 1104(%rsp), %r15 + movq %r15, 48(%rsp) ## 8-byte Spill + sbbq %r15, %r15 + movq %r14, %rdx + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 968(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + andl $1, %r15d + addq 968(%rsp), %r14 + movq 88(%rsp), %r13 ## 8-byte Reload + adcq 976(%rsp), %r13 + adcq 984(%rsp), %r12 + movq %r12, 32(%rsp) ## 8-byte Spill + movq 8(%rsp), %r14 ## 8-byte Reload + adcq 992(%rsp), %r14 + movq 16(%rsp), %rbx ## 8-byte Reload + adcq 1000(%rsp), %rbx + movq (%rsp), %rax ## 8-byte Reload + adcq 1008(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + adcq 1016(%rsp), %rbp + movq %rbp, %r12 + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 1024(%rsp), %rbp + movq 48(%rsp), %rax ## 8-byte Reload + adcq 1032(%rsp), %rax + movq %rax, 48(%rsp) ## 8-byte Spill + adcq $0, %r15 + movq 64(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + leaq 896(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq %r13, %rcx + addq 896(%rsp), %rcx + movq 32(%rsp), %r13 ## 8-byte Reload + adcq 904(%rsp), %r13 + adcq 912(%rsp), %r14 + adcq 920(%rsp), %rbx + movq %rbx, 16(%rsp) ## 8-byte Spill + movq (%rsp), %rax ## 8-byte Reload + adcq 928(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + adcq 936(%rsp), %r12 + movq %r12, 40(%rsp) ## 8-byte Spill + adcq 944(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq 48(%rsp), %r12 ## 8-byte Reload + adcq 952(%rsp), %r12 + adcq 960(%rsp), %r15 + sbbq %rbx, %rbx + movq %rcx, %rdx + movq %rcx, %rbp + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 824(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + andl $1, %ebx + addq 824(%rsp), %rbp + adcq 832(%rsp), %r13 + movq %r13, 32(%rsp) ## 8-byte Spill + adcq 840(%rsp), %r14 + movq %r14, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %r13 ## 8-byte Reload + adcq 848(%rsp), %r13 + movq (%rsp), %rbp ## 8-byte Reload + adcq 856(%rsp), %rbp + movq 40(%rsp), %r14 ## 8-byte Reload + adcq 864(%rsp), %r14 + movq 24(%rsp), %rax ## 8-byte Reload + adcq 872(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 880(%rsp), %r12 + adcq 888(%rsp), %r15 + adcq $0, %rbx + movq 64(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdx + leaq 752(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 32(%rsp), %rax ## 8-byte Reload + addq 752(%rsp), %rax + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 760(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + adcq 768(%rsp), %r13 + movq %r13, 16(%rsp) ## 8-byte Spill + adcq 776(%rsp), %rbp + movq %rbp, (%rsp) ## 8-byte Spill + adcq 784(%rsp), %r14 + movq %r14, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 792(%rsp), %rbp + adcq 800(%rsp), %r12 + adcq 808(%rsp), %r15 + adcq 816(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + sbbq %r13, %r13 + movq %rax, %rdx + movq %rax, %rbx + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 680(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq %r13, %rax + andl $1, %eax + addq 680(%rsp), %rbx + movq 8(%rsp), %r14 ## 8-byte Reload + adcq 688(%rsp), %r14 + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 696(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + movq (%rsp), %r13 ## 8-byte Reload + adcq 704(%rsp), %r13 + movq 40(%rsp), %rbx ## 8-byte Reload + adcq 712(%rsp), %rbx + adcq 720(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq %r12, %rbp + adcq 728(%rsp), %rbp + adcq 736(%rsp), %r15 + movq 32(%rsp), %r12 ## 8-byte Reload + adcq 744(%rsp), %r12 + adcq $0, %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 64(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdx + leaq 608(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq %r14, %rax + addq 608(%rsp), %rax + movq 16(%rsp), %r14 ## 8-byte Reload + adcq 616(%rsp), %r14 + adcq 624(%rsp), %r13 + movq %r13, (%rsp) ## 8-byte Spill + adcq 632(%rsp), %rbx + movq %rbx, %r13 + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 640(%rsp), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 648(%rsp), %rbp + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 656(%rsp), %r15 + adcq 664(%rsp), %r12 + movq %r12, 32(%rsp) ## 8-byte Spill + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 672(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + sbbq %rbp, %rbp + movq %rax, %rdx + movq %rax, %rbx + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 536(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq %rbp, %rax + andl $1, %eax + addq 536(%rsp), %rbx + adcq 544(%rsp), %r14 + movq %r14, 16(%rsp) ## 8-byte Spill + movq (%rsp), %rbx ## 8-byte Reload + adcq 552(%rsp), %rbx + adcq 560(%rsp), %r13 + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 568(%rsp), %rbp + movq 48(%rsp), %r12 ## 8-byte Reload + adcq 576(%rsp), %r12 + adcq 584(%rsp), %r15 + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 592(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + movq 8(%rsp), %r14 ## 8-byte Reload + adcq 600(%rsp), %r14 + adcq $0, %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 64(%rsp), %rax ## 8-byte Reload + movq 40(%rax), %rdx + leaq 464(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 16(%rsp), %rax ## 8-byte Reload + addq 464(%rsp), %rax + adcq 472(%rsp), %rbx + adcq 480(%rsp), %r13 + movq %r13, 40(%rsp) ## 8-byte Spill + adcq 488(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + adcq 496(%rsp), %r12 + adcq 504(%rsp), %r15 + movq %r15, 16(%rsp) ## 8-byte Spill + movq 32(%rsp), %r15 ## 8-byte Reload + adcq 512(%rsp), %r15 + adcq 520(%rsp), %r14 + movq %r14, 8(%rsp) ## 8-byte Spill + movq (%rsp), %r14 ## 8-byte Reload + adcq 528(%rsp), %r14 + sbbq %r13, %r13 + movq %rax, %rdx + movq %rax, %rbp + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 392(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq %r13, %rax + andl $1, %eax + addq 392(%rsp), %rbp + adcq 400(%rsp), %rbx + movq %rbx, (%rsp) ## 8-byte Spill + movq 40(%rsp), %rbp ## 8-byte Reload + adcq 408(%rsp), %rbp + movq 24(%rsp), %rbx ## 8-byte Reload + adcq 416(%rsp), %rbx + adcq 424(%rsp), %r12 + movq 16(%rsp), %r13 ## 8-byte Reload + adcq 432(%rsp), %r13 + adcq 440(%rsp), %r15 + movq %r15, 32(%rsp) ## 8-byte Spill + movq 8(%rsp), %r15 ## 8-byte Reload + adcq 448(%rsp), %r15 + adcq 456(%rsp), %r14 + adcq $0, %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq 64(%rsp), %rax ## 8-byte Reload + movq 48(%rax), %rdx + leaq 320(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq (%rsp), %rax ## 8-byte Reload + addq 320(%rsp), %rax + adcq 328(%rsp), %rbp + movq %rbp, 40(%rsp) ## 8-byte Spill + adcq 336(%rsp), %rbx + movq %rbx, 24(%rsp) ## 8-byte Spill + movq %r12, %rbp + adcq 344(%rsp), %rbp + adcq 352(%rsp), %r13 + movq 32(%rsp), %r12 ## 8-byte Reload + adcq 360(%rsp), %r12 + adcq 368(%rsp), %r15 + movq %r15, 8(%rsp) ## 8-byte Spill + adcq 376(%rsp), %r14 + movq %r14, (%rsp) ## 8-byte Spill + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 384(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + sbbq %r15, %r15 + movq %rax, %rdx + movq %rax, %rbx + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 248(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + andl $1, %r15d + addq 248(%rsp), %rbx + movq 40(%rsp), %rax ## 8-byte Reload + adcq 256(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %r14 ## 8-byte Reload + adcq 264(%rsp), %r14 + adcq 272(%rsp), %rbp + movq %rbp, 48(%rsp) ## 8-byte Spill + movq %r13, %rbx + adcq 280(%rsp), %rbx + movq %r12, %rbp + adcq 288(%rsp), %rbp + movq 8(%rsp), %r13 ## 8-byte Reload + adcq 296(%rsp), %r13 + movq (%rsp), %rax ## 8-byte Reload + adcq 304(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 16(%rsp), %r12 ## 8-byte Reload + adcq 312(%rsp), %r12 + adcq $0, %r15 + movq 64(%rsp), %rax ## 8-byte Reload + movq 56(%rax), %rdx + leaq 176(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 40(%rsp), %rax ## 8-byte Reload + addq 176(%rsp), %rax + adcq 184(%rsp), %r14 + movq %r14, 24(%rsp) ## 8-byte Spill + movq 48(%rsp), %rcx ## 8-byte Reload + adcq 192(%rsp), %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + adcq 200(%rsp), %rbx + movq %rbx, 16(%rsp) ## 8-byte Spill + adcq 208(%rsp), %rbp + adcq 216(%rsp), %r13 + movq %r13, 8(%rsp) ## 8-byte Spill + movq (%rsp), %r14 ## 8-byte Reload + adcq 224(%rsp), %r14 + adcq 232(%rsp), %r12 + adcq 240(%rsp), %r15 + sbbq %rbx, %rbx + movq 80(%rsp), %rdx ## 8-byte Reload + imulq %rax, %rdx + movq %rax, %r13 + leaq 104(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + andl $1, %ebx + addq 104(%rsp), %r13 + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 112(%rsp), %rcx + movq 48(%rsp), %rdx ## 8-byte Reload + adcq 120(%rsp), %rdx + movq 16(%rsp), %rsi ## 8-byte Reload + adcq 128(%rsp), %rsi + movq %rbp, %rdi + adcq 136(%rsp), %rdi + movq %rdi, 32(%rsp) ## 8-byte Spill + movq 8(%rsp), %r8 ## 8-byte Reload + adcq 144(%rsp), %r8 + movq %r8, 8(%rsp) ## 8-byte Spill + movq %r14, %r9 + adcq 152(%rsp), %r9 + movq %r9, (%rsp) ## 8-byte Spill + adcq 160(%rsp), %r12 + adcq 168(%rsp), %r15 + adcq $0, %rbx + movq %rcx, %rax + movq %rcx, %r11 + movq 56(%rsp), %rbp ## 8-byte Reload + subq (%rbp), %rax + movq %rdx, %rcx + movq %rdx, %r14 + sbbq 8(%rbp), %rcx + movq %rsi, %rdx + movq %rsi, %r13 + sbbq 16(%rbp), %rdx + movq %rdi, %rsi + sbbq 24(%rbp), %rsi + movq %r8, %rdi + sbbq 32(%rbp), %rdi + movq %r9, %r10 + sbbq 40(%rbp), %r10 + movq %r12, %r8 + sbbq 48(%rbp), %r8 + movq %r15, %r9 + sbbq 56(%rbp), %r9 + sbbq $0, %rbx + andl $1, %ebx + cmovneq %r15, %r9 + testb %bl, %bl + cmovneq %r11, %rax + movq 96(%rsp), %rbx ## 8-byte Reload + movq %rax, (%rbx) + cmovneq %r14, %rcx + movq %rcx, 8(%rbx) + cmovneq %r13, %rdx + movq %rdx, 16(%rbx) + cmovneq 32(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 24(%rbx) + cmovneq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 32(%rbx) + cmovneq (%rsp), %r10 ## 8-byte Folded Reload + movq %r10, 40(%rbx) + cmovneq %r12, %r8 + movq %r8, 48(%rbx) + movq %r9, 56(%rbx) + addq $1256, %rsp ## imm = 0x4E8 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF8Lbmi2: ## @mcl_fp_montNF8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $1240, %rsp ## imm = 0x4D8 + movq %rcx, 40(%rsp) ## 8-byte Spill + movq %rdx, 48(%rsp) ## 8-byte Spill + movq %rsi, 56(%rsp) ## 8-byte Spill + movq %rdi, 80(%rsp) ## 8-byte Spill + movq -8(%rcx), %rbx + movq %rbx, 64(%rsp) ## 8-byte Spill + movq (%rdx), %rdx + leaq 1168(%rsp), %rdi + callq l_mulPv512x64 + movq 1168(%rsp), %r15 + movq 1176(%rsp), %r12 + movq %r15, %rdx + imulq %rbx, %rdx + movq 1232(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 1224(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 1216(%rsp), %r13 + movq 1208(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 1200(%rsp), %r14 + movq 1192(%rsp), %rbp + movq 1184(%rsp), %rbx + leaq 1096(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 1096(%rsp), %r15 + adcq 1104(%rsp), %r12 + movq %r12, 16(%rsp) ## 8-byte Spill + adcq 1112(%rsp), %rbx + adcq 1120(%rsp), %rbp + adcq 1128(%rsp), %r14 + movq %r14, %r12 + movq 8(%rsp), %r14 ## 8-byte Reload + adcq 1136(%rsp), %r14 + adcq 1144(%rsp), %r13 + movq (%rsp), %rax ## 8-byte Reload + adcq 1152(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 32(%rsp), %rax ## 8-byte Reload + adcq 1160(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 48(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + leaq 1024(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 1088(%rsp), %r15 + movq 16(%rsp), %rax ## 8-byte Reload + addq 1024(%rsp), %rax + adcq 1032(%rsp), %rbx + movq %rbx, 72(%rsp) ## 8-byte Spill + movq %rbp, %rbx + adcq 1040(%rsp), %rbx + adcq 1048(%rsp), %r12 + adcq 1056(%rsp), %r14 + movq %r14, 8(%rsp) ## 8-byte Spill + movq %r13, %rbp + adcq 1064(%rsp), %rbp + movq (%rsp), %rcx ## 8-byte Reload + adcq 1072(%rsp), %rcx + movq %rcx, (%rsp) ## 8-byte Spill + movq 32(%rsp), %r14 ## 8-byte Reload + adcq 1080(%rsp), %r14 + adcq $0, %r15 + movq %rax, %rdx + movq %rax, %r13 + imulq 64(%rsp), %rdx ## 8-byte Folded Reload + leaq 952(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 952(%rsp), %r13 + movq 72(%rsp), %rax ## 8-byte Reload + adcq 960(%rsp), %rax + movq %rax, 72(%rsp) ## 8-byte Spill + adcq 968(%rsp), %rbx + movq %rbx, 16(%rsp) ## 8-byte Spill + movq %r12, %rbx + adcq 976(%rsp), %rbx + movq 8(%rsp), %r12 ## 8-byte Reload + adcq 984(%rsp), %r12 + adcq 992(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq (%rsp), %r13 ## 8-byte Reload + adcq 1000(%rsp), %r13 + movq %r14, %rbp + adcq 1008(%rsp), %rbp + adcq 1016(%rsp), %r15 + movq 48(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + leaq 880(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 944(%rsp), %r14 + movq 72(%rsp), %rax ## 8-byte Reload + addq 880(%rsp), %rax + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 888(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + adcq 896(%rsp), %rbx + adcq 904(%rsp), %r12 + movq %r12, 8(%rsp) ## 8-byte Spill + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 912(%rsp), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 920(%rsp), %r13 + movq %r13, (%rsp) ## 8-byte Spill + adcq 928(%rsp), %rbp + movq %rbp, 32(%rsp) ## 8-byte Spill + adcq 936(%rsp), %r15 + adcq $0, %r14 + movq %rax, %rdx + movq %rax, %rbp + imulq 64(%rsp), %rdx ## 8-byte Folded Reload + leaq 808(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 808(%rsp), %rbp + movq 16(%rsp), %r13 ## 8-byte Reload + adcq 816(%rsp), %r13 + movq %rbx, %r12 + adcq 824(%rsp), %r12 + movq 8(%rsp), %rbx ## 8-byte Reload + adcq 832(%rsp), %rbx + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 840(%rsp), %rbp + movq (%rsp), %rax ## 8-byte Reload + adcq 848(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 32(%rsp), %rax ## 8-byte Reload + adcq 856(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + adcq 864(%rsp), %r15 + adcq 872(%rsp), %r14 + movq 48(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdx + leaq 736(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 800(%rsp), %rax + movq %r13, %rcx + addq 736(%rsp), %rcx + adcq 744(%rsp), %r12 + movq %r12, 24(%rsp) ## 8-byte Spill + adcq 752(%rsp), %rbx + movq %rbx, 8(%rsp) ## 8-byte Spill + adcq 760(%rsp), %rbp + movq %rbp, %r13 + movq (%rsp), %rbp ## 8-byte Reload + adcq 768(%rsp), %rbp + movq 32(%rsp), %rbx ## 8-byte Reload + adcq 776(%rsp), %rbx + adcq 784(%rsp), %r15 + adcq 792(%rsp), %r14 + adcq $0, %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq %rcx, %rdx + movq %rcx, %r12 + imulq 64(%rsp), %rdx ## 8-byte Folded Reload + leaq 664(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 664(%rsp), %r12 + movq 24(%rsp), %rax ## 8-byte Reload + adcq 672(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + movq 8(%rsp), %rax ## 8-byte Reload + adcq 680(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + adcq 688(%rsp), %r13 + adcq 696(%rsp), %rbp + movq %rbp, (%rsp) ## 8-byte Spill + adcq 704(%rsp), %rbx + adcq 712(%rsp), %r15 + adcq 720(%rsp), %r14 + movq 16(%rsp), %r12 ## 8-byte Reload + adcq 728(%rsp), %r12 + movq 48(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdx + leaq 592(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 656(%rsp), %rcx + movq 24(%rsp), %rax ## 8-byte Reload + addq 592(%rsp), %rax + movq 8(%rsp), %rbp ## 8-byte Reload + adcq 600(%rsp), %rbp + adcq 608(%rsp), %r13 + movq %r13, 24(%rsp) ## 8-byte Spill + movq (%rsp), %r13 ## 8-byte Reload + adcq 616(%rsp), %r13 + adcq 624(%rsp), %rbx + adcq 632(%rsp), %r15 + adcq 640(%rsp), %r14 + adcq 648(%rsp), %r12 + movq %r12, 16(%rsp) ## 8-byte Spill + adcq $0, %rcx + movq %rcx, (%rsp) ## 8-byte Spill + movq %rax, %rdx + movq %rax, %r12 + imulq 64(%rsp), %rdx ## 8-byte Folded Reload + leaq 520(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 520(%rsp), %r12 + adcq 528(%rsp), %rbp + movq %rbp, 8(%rsp) ## 8-byte Spill + movq 24(%rsp), %r12 ## 8-byte Reload + adcq 536(%rsp), %r12 + movq %r13, %rbp + adcq 544(%rsp), %rbp + adcq 552(%rsp), %rbx + adcq 560(%rsp), %r15 + adcq 568(%rsp), %r14 + movq 16(%rsp), %r13 ## 8-byte Reload + adcq 576(%rsp), %r13 + movq (%rsp), %rax ## 8-byte Reload + adcq 584(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 48(%rsp), %rax ## 8-byte Reload + movq 40(%rax), %rdx + leaq 448(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 512(%rsp), %rcx + movq 8(%rsp), %rax ## 8-byte Reload + addq 448(%rsp), %rax + adcq 456(%rsp), %r12 + movq %r12, 24(%rsp) ## 8-byte Spill + adcq 464(%rsp), %rbp + adcq 472(%rsp), %rbx + adcq 480(%rsp), %r15 + adcq 488(%rsp), %r14 + adcq 496(%rsp), %r13 + movq %r13, 16(%rsp) ## 8-byte Spill + movq (%rsp), %r13 ## 8-byte Reload + adcq 504(%rsp), %r13 + adcq $0, %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq %rax, %rdx + movq %rax, %r12 + imulq 64(%rsp), %rdx ## 8-byte Folded Reload + leaq 376(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 376(%rsp), %r12 + movq 24(%rsp), %rax ## 8-byte Reload + adcq 384(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 392(%rsp), %rbp + adcq 400(%rsp), %rbx + adcq 408(%rsp), %r15 + adcq 416(%rsp), %r14 + movq 16(%rsp), %r12 ## 8-byte Reload + adcq 424(%rsp), %r12 + adcq 432(%rsp), %r13 + movq 8(%rsp), %rax ## 8-byte Reload + adcq 440(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 48(%rsp), %rax ## 8-byte Reload + movq 48(%rax), %rdx + leaq 304(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 368(%rsp), %rcx + movq 24(%rsp), %rax ## 8-byte Reload + addq 304(%rsp), %rax + adcq 312(%rsp), %rbp + movq %rbp, (%rsp) ## 8-byte Spill + adcq 320(%rsp), %rbx + adcq 328(%rsp), %r15 + adcq 336(%rsp), %r14 + adcq 344(%rsp), %r12 + movq %r12, 16(%rsp) ## 8-byte Spill + adcq 352(%rsp), %r13 + movq 8(%rsp), %rbp ## 8-byte Reload + adcq 360(%rsp), %rbp + adcq $0, %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + movq %rax, %rdx + movq %rax, %r12 + imulq 64(%rsp), %rdx ## 8-byte Folded Reload + leaq 232(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 232(%rsp), %r12 + movq (%rsp), %rax ## 8-byte Reload + adcq 240(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + adcq 248(%rsp), %rbx + adcq 256(%rsp), %r15 + adcq 264(%rsp), %r14 + movq 16(%rsp), %r12 ## 8-byte Reload + adcq 272(%rsp), %r12 + adcq 280(%rsp), %r13 + adcq 288(%rsp), %rbp + movq %rbp, 8(%rsp) ## 8-byte Spill + movq 32(%rsp), %rbp ## 8-byte Reload + adcq 296(%rsp), %rbp + movq 48(%rsp), %rax ## 8-byte Reload + movq 56(%rax), %rdx + leaq 160(%rsp), %rdi + movq 56(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + movq 224(%rsp), %rcx + movq (%rsp), %rax ## 8-byte Reload + addq 160(%rsp), %rax + adcq 168(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + adcq 176(%rsp), %r15 + adcq 184(%rsp), %r14 + adcq 192(%rsp), %r12 + movq %r12, 16(%rsp) ## 8-byte Spill + adcq 200(%rsp), %r13 + movq 8(%rsp), %rbx ## 8-byte Reload + adcq 208(%rsp), %rbx + adcq 216(%rsp), %rbp + movq %rbp, %r12 + adcq $0, %rcx + movq %rcx, (%rsp) ## 8-byte Spill + movq 64(%rsp), %rdx ## 8-byte Reload + imulq %rax, %rdx + movq %rax, %rbp + leaq 88(%rsp), %rdi + movq 40(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 88(%rsp), %rbp + movq 32(%rsp), %r11 ## 8-byte Reload + adcq 96(%rsp), %r11 + adcq 104(%rsp), %r15 + adcq 112(%rsp), %r14 + movq 16(%rsp), %rsi ## 8-byte Reload + adcq 120(%rsp), %rsi + movq %rsi, 16(%rsp) ## 8-byte Spill + adcq 128(%rsp), %r13 + adcq 136(%rsp), %rbx + movq %rbx, 8(%rsp) ## 8-byte Spill + adcq 144(%rsp), %r12 + movq (%rsp), %r8 ## 8-byte Reload + adcq 152(%rsp), %r8 + movq %r11, %rax + movq 40(%rsp), %rbp ## 8-byte Reload + subq (%rbp), %rax + movq %r15, %rcx + sbbq 8(%rbp), %rcx + movq %r14, %rdx + sbbq 16(%rbp), %rdx + sbbq 24(%rbp), %rsi + movq %r13, %rdi + sbbq 32(%rbp), %rdi + movq %rbx, %r9 + sbbq 40(%rbp), %r9 + movq %r12, %r10 + sbbq 48(%rbp), %r10 + movq %rbp, %rbx + movq %r8, %rbp + sbbq 56(%rbx), %rbp + testq %rbp, %rbp + cmovsq %r11, %rax + movq 80(%rsp), %rbx ## 8-byte Reload + movq %rax, (%rbx) + cmovsq %r15, %rcx + movq %rcx, 8(%rbx) + cmovsq %r14, %rdx + movq %rdx, 16(%rbx) + cmovsq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 24(%rbx) + cmovsq %r13, %rdi + movq %rdi, 32(%rbx) + cmovsq 8(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 40(%rbx) + cmovsq %r12, %r10 + movq %r10, 48(%rbx) + cmovsq %r8, %rbp + movq %rbp, 56(%rbx) + addq $1240, %rsp ## imm = 0x4D8 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed8Lbmi2: ## @mcl_fp_montRed8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $776, %rsp ## imm = 0x308 + movq %rdx, %rax + movq %rdi, 192(%rsp) ## 8-byte Spill + movq -8(%rax), %rcx + movq %rcx, 104(%rsp) ## 8-byte Spill + movq (%rsi), %r15 + movq 8(%rsi), %rdx + movq %rdx, 8(%rsp) ## 8-byte Spill + movq %r15, %rdx + imulq %rcx, %rdx + movq 120(%rsi), %rcx + movq %rcx, 112(%rsp) ## 8-byte Spill + movq 112(%rsi), %rcx + movq %rcx, 56(%rsp) ## 8-byte Spill + movq 104(%rsi), %rcx + movq %rcx, 96(%rsp) ## 8-byte Spill + movq 96(%rsi), %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + movq 88(%rsi), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + movq 80(%rsi), %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + movq 72(%rsi), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + movq 64(%rsi), %r13 + movq 56(%rsi), %rcx + movq %rcx, 64(%rsp) ## 8-byte Spill + movq 48(%rsi), %r14 + movq 40(%rsi), %rcx + movq %rcx, 72(%rsp) ## 8-byte Spill + movq 32(%rsi), %r12 + movq 24(%rsi), %rbx + movq 16(%rsi), %rbp + movq %rax, %rcx + movq (%rcx), %rax + movq %rax, 136(%rsp) ## 8-byte Spill + movq 56(%rcx), %rax + movq %rax, 184(%rsp) ## 8-byte Spill + movq 48(%rcx), %rax + movq %rax, 176(%rsp) ## 8-byte Spill + movq 40(%rcx), %rax + movq %rax, 168(%rsp) ## 8-byte Spill + movq 32(%rcx), %rax + movq %rax, 160(%rsp) ## 8-byte Spill + movq 24(%rcx), %rax + movq %rax, 152(%rsp) ## 8-byte Spill + movq 16(%rcx), %rax + movq %rax, 144(%rsp) ## 8-byte Spill + movq 8(%rcx), %rax + movq %rax, 128(%rsp) ## 8-byte Spill + movq %rcx, %rsi + movq %rsi, 88(%rsp) ## 8-byte Spill + leaq 704(%rsp), %rdi + callq l_mulPv512x64 + addq 704(%rsp), %r15 + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 712(%rsp), %rcx + adcq 720(%rsp), %rbp + movq %rbp, 80(%rsp) ## 8-byte Spill + adcq 728(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + adcq 736(%rsp), %r12 + movq %r12, 120(%rsp) ## 8-byte Spill + movq 72(%rsp), %rax ## 8-byte Reload + adcq 744(%rsp), %rax + movq %rax, 72(%rsp) ## 8-byte Spill + adcq 752(%rsp), %r14 + movq %r14, %r12 + movq 64(%rsp), %rax ## 8-byte Reload + adcq 760(%rsp), %rax + movq %rax, 64(%rsp) ## 8-byte Spill + adcq 768(%rsp), %r13 + movq %r13, 8(%rsp) ## 8-byte Spill + adcq $0, 16(%rsp) ## 8-byte Folded Spill + movq 40(%rsp), %r15 ## 8-byte Reload + adcq $0, %r15 + adcq $0, 24(%rsp) ## 8-byte Folded Spill + adcq $0, 48(%rsp) ## 8-byte Folded Spill + adcq $0, 96(%rsp) ## 8-byte Folded Spill + movq 56(%rsp), %r13 ## 8-byte Reload + adcq $0, %r13 + movq 112(%rsp), %r14 ## 8-byte Reload + adcq $0, %r14 + sbbq %rbx, %rbx + movq %rcx, %rbp + movq %rbp, %rdx + imulq 104(%rsp), %rdx ## 8-byte Folded Reload + leaq 632(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + andl $1, %ebx + movq %rbx, %rax + addq 632(%rsp), %rbp + movq 80(%rsp), %rsi ## 8-byte Reload + adcq 640(%rsp), %rsi + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 648(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + movq 120(%rsp), %rcx ## 8-byte Reload + adcq 656(%rsp), %rcx + movq %rcx, 120(%rsp) ## 8-byte Spill + movq 72(%rsp), %rcx ## 8-byte Reload + adcq 664(%rsp), %rcx + movq %rcx, 72(%rsp) ## 8-byte Spill + adcq 672(%rsp), %r12 + movq 64(%rsp), %rcx ## 8-byte Reload + adcq 680(%rsp), %rcx + movq %rcx, 64(%rsp) ## 8-byte Spill + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 688(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 696(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + adcq $0, %r15 + movq %r15, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rbx ## 8-byte Reload + adcq $0, %rbx + movq 48(%rsp), %r15 ## 8-byte Reload + adcq $0, %r15 + adcq $0, 96(%rsp) ## 8-byte Folded Spill + adcq $0, %r13 + movq %r13, 56(%rsp) ## 8-byte Spill + adcq $0, %r14 + movq %r14, 112(%rsp) ## 8-byte Spill + movq %rax, %rbp + adcq $0, %rbp + movq %rsi, %rdx + movq %rsi, %r14 + imulq 104(%rsp), %rdx ## 8-byte Folded Reload + leaq 560(%rsp), %rdi + movq 88(%rsp), %r13 ## 8-byte Reload + movq %r13, %rsi + callq l_mulPv512x64 + addq 560(%rsp), %r14 + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 568(%rsp), %rcx + movq 120(%rsp), %rax ## 8-byte Reload + adcq 576(%rsp), %rax + movq %rax, 120(%rsp) ## 8-byte Spill + movq 72(%rsp), %rax ## 8-byte Reload + adcq 584(%rsp), %rax + movq %rax, 72(%rsp) ## 8-byte Spill + adcq 592(%rsp), %r12 + movq %r12, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %r14 ## 8-byte Reload + adcq 600(%rsp), %r14 + movq 8(%rsp), %rax ## 8-byte Reload + adcq 608(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %rax ## 8-byte Reload + adcq 616(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq 40(%rsp), %rax ## 8-byte Reload + adcq 624(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + adcq $0, %rbx + movq %rbx, 24(%rsp) ## 8-byte Spill + adcq $0, %r15 + movq %r15, 48(%rsp) ## 8-byte Spill + movq 96(%rsp), %rbx ## 8-byte Reload + adcq $0, %rbx + movq 56(%rsp), %r15 ## 8-byte Reload + adcq $0, %r15 + adcq $0, 112(%rsp) ## 8-byte Folded Spill + adcq $0, %rbp + movq %rbp, 80(%rsp) ## 8-byte Spill + movq %rcx, %rbp + movq %rbp, %rdx + movq 104(%rsp), %r12 ## 8-byte Reload + imulq %r12, %rdx + leaq 488(%rsp), %rdi + movq %r13, %rsi + callq l_mulPv512x64 + addq 488(%rsp), %rbp + movq 120(%rsp), %rax ## 8-byte Reload + adcq 496(%rsp), %rax + movq 72(%rsp), %rbp ## 8-byte Reload + adcq 504(%rsp), %rbp + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 512(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + adcq 520(%rsp), %r14 + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 528(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 536(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + movq 40(%rsp), %r13 ## 8-byte Reload + adcq 544(%rsp), %r13 + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 552(%rsp), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq $0, 48(%rsp) ## 8-byte Folded Spill + adcq $0, %rbx + movq %rbx, 96(%rsp) ## 8-byte Spill + movq %r15, %rbx + adcq $0, %rbx + adcq $0, 112(%rsp) ## 8-byte Folded Spill + adcq $0, 80(%rsp) ## 8-byte Folded Spill + movq %rax, %rdx + movq %rax, %r15 + imulq %r12, %rdx + leaq 416(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 416(%rsp), %r15 + adcq 424(%rsp), %rbp + movq %rbp, %rax + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 432(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + movq %r14, %r12 + adcq 440(%rsp), %r12 + movq 8(%rsp), %r14 ## 8-byte Reload + adcq 448(%rsp), %r14 + movq 16(%rsp), %rbp ## 8-byte Reload + adcq 456(%rsp), %rbp + adcq 464(%rsp), %r13 + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 472(%rsp), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + movq 48(%rsp), %rcx ## 8-byte Reload + adcq 480(%rsp), %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + adcq $0, 96(%rsp) ## 8-byte Folded Spill + adcq $0, %rbx + movq %rbx, 56(%rsp) ## 8-byte Spill + movq 112(%rsp), %r15 ## 8-byte Reload + adcq $0, %r15 + adcq $0, 80(%rsp) ## 8-byte Folded Spill + movq %rax, %rbx + movq %rbx, %rdx + imulq 104(%rsp), %rdx ## 8-byte Folded Reload + leaq 344(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 344(%rsp), %rbx + movq 32(%rsp), %rax ## 8-byte Reload + adcq 352(%rsp), %rax + adcq 360(%rsp), %r12 + movq %r12, 64(%rsp) ## 8-byte Spill + adcq 368(%rsp), %r14 + movq %r14, 8(%rsp) ## 8-byte Spill + adcq 376(%rsp), %rbp + movq %rbp, 16(%rsp) ## 8-byte Spill + adcq 384(%rsp), %r13 + movq %r13, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %r13 ## 8-byte Reload + adcq 392(%rsp), %r13 + movq 48(%rsp), %r12 ## 8-byte Reload + adcq 400(%rsp), %r12 + movq 96(%rsp), %r14 ## 8-byte Reload + adcq 408(%rsp), %r14 + movq 56(%rsp), %rbp ## 8-byte Reload + adcq $0, %rbp + movq %r15, %rbx + adcq $0, %rbx + adcq $0, 80(%rsp) ## 8-byte Folded Spill + movq %rax, %rdx + movq %rax, %r15 + imulq 104(%rsp), %rdx ## 8-byte Folded Reload + leaq 272(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 272(%rsp), %r15 + movq 64(%rsp), %rcx ## 8-byte Reload + adcq 280(%rsp), %rcx + movq 8(%rsp), %rax ## 8-byte Reload + adcq 288(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %rax ## 8-byte Reload + adcq 296(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq 40(%rsp), %rax ## 8-byte Reload + adcq 304(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 312(%rsp), %r13 + movq %r13, 24(%rsp) ## 8-byte Spill + adcq 320(%rsp), %r12 + movq %r12, 48(%rsp) ## 8-byte Spill + adcq 328(%rsp), %r14 + movq %r14, %r13 + adcq 336(%rsp), %rbp + movq %rbp, %r12 + adcq $0, %rbx + movq %rbx, %r14 + movq 80(%rsp), %r15 ## 8-byte Reload + adcq $0, %r15 + movq 104(%rsp), %rdx ## 8-byte Reload + movq %rcx, %rbx + imulq %rbx, %rdx + leaq 200(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv512x64 + addq 200(%rsp), %rbx + movq 8(%rsp), %rax ## 8-byte Reload + adcq 208(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %r8 ## 8-byte Reload + adcq 216(%rsp), %r8 + movq %r8, 16(%rsp) ## 8-byte Spill + movq 40(%rsp), %rdx ## 8-byte Reload + adcq 224(%rsp), %rdx + movq 24(%rsp), %rsi ## 8-byte Reload + adcq 232(%rsp), %rsi + movq 48(%rsp), %rdi ## 8-byte Reload + adcq 240(%rsp), %rdi + movq %r13, %rbp + adcq 248(%rsp), %rbp + movq %r12, %rbx + adcq 256(%rsp), %rbx + movq %rbx, 56(%rsp) ## 8-byte Spill + movq %r14, %r9 + adcq 264(%rsp), %r9 + adcq $0, %r15 + movq %r15, %r10 + subq 136(%rsp), %rax ## 8-byte Folded Reload + movq %r8, %rcx + sbbq 128(%rsp), %rcx ## 8-byte Folded Reload + movq %rdx, %r13 + sbbq 144(%rsp), %r13 ## 8-byte Folded Reload + movq %rsi, %r12 + sbbq 152(%rsp), %r12 ## 8-byte Folded Reload + movq %rdi, %r14 + sbbq 160(%rsp), %r14 ## 8-byte Folded Reload + movq %rbp, %r11 + sbbq 168(%rsp), %r11 ## 8-byte Folded Reload + movq %rbx, %r8 + sbbq 176(%rsp), %r8 ## 8-byte Folded Reload + movq %r9, %r15 + sbbq 184(%rsp), %r9 ## 8-byte Folded Reload + sbbq $0, %r10 + andl $1, %r10d + cmovneq %r15, %r9 + testb %r10b, %r10b + cmovneq 8(%rsp), %rax ## 8-byte Folded Reload + movq 192(%rsp), %rbx ## 8-byte Reload + movq %rax, (%rbx) + cmovneq 16(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 8(%rbx) + cmovneq %rdx, %r13 + movq %r13, 16(%rbx) + cmovneq %rsi, %r12 + movq %r12, 24(%rbx) + cmovneq %rdi, %r14 + movq %r14, 32(%rbx) + cmovneq %rbp, %r11 + movq %r11, 40(%rbx) + cmovneq 56(%rsp), %r8 ## 8-byte Folded Reload + movq %r8, 48(%rbx) + movq %r9, 56(%rbx) + addq $776, %rsp ## imm = 0x308 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_addPre8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre8Lbmi2: ## @mcl_fp_addPre8Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 56(%rdx), %r8 + movq 56(%rsi), %r15 + movq 48(%rdx), %r9 + movq 48(%rsi), %r12 + movq 40(%rdx), %r10 + movq 32(%rdx), %r11 + movq 24(%rdx), %r14 + movq 16(%rdx), %rbx + movq (%rdx), %rcx + movq 8(%rdx), %rdx + addq (%rsi), %rcx + adcq 8(%rsi), %rdx + adcq 16(%rsi), %rbx + movq 40(%rsi), %r13 + movq 24(%rsi), %rax + movq 32(%rsi), %rsi + movq %rcx, (%rdi) + movq %rdx, 8(%rdi) + movq %rbx, 16(%rdi) + adcq %r14, %rax + movq %rax, 24(%rdi) + adcq %r11, %rsi + movq %rsi, 32(%rdi) + adcq %r10, %r13 + movq %r13, 40(%rdi) + adcq %r9, %r12 + movq %r12, 48(%rdi) + adcq %r8, %r15 + movq %r15, 56(%rdi) + sbbq %rax, %rax + andl $1, %eax + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_subPre8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre8Lbmi2: ## @mcl_fp_subPre8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 56(%rdx), %r8 + movq 56(%rsi), %r15 + movq 48(%rdx), %r9 + movq 40(%rdx), %r10 + movq 24(%rdx), %r11 + movq 32(%rdx), %r14 + movq (%rsi), %rbx + movq 8(%rsi), %r12 + xorl %eax, %eax + subq (%rdx), %rbx + sbbq 8(%rdx), %r12 + movq 16(%rsi), %rcx + sbbq 16(%rdx), %rcx + movq 48(%rsi), %r13 + movq 40(%rsi), %rdx + movq 32(%rsi), %rbp + movq 24(%rsi), %rsi + movq %rbx, (%rdi) + movq %r12, 8(%rdi) + movq %rcx, 16(%rdi) + sbbq %r11, %rsi + movq %rsi, 24(%rdi) + sbbq %r14, %rbp + movq %rbp, 32(%rdi) + sbbq %r10, %rdx + movq %rdx, 40(%rdi) + sbbq %r9, %r13 + movq %r13, 48(%rdi) + sbbq %r8, %r15 + movq %r15, 56(%rdi) + sbbq $0, %rax + andl $1, %eax + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_shr1_8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_8Lbmi2: ## @mcl_fp_shr1_8Lbmi2 +## BB#0: + movq 56(%rsi), %r8 + movq 48(%rsi), %r9 + movq 40(%rsi), %r10 + movq 32(%rsi), %r11 + movq 24(%rsi), %rcx + movq 16(%rsi), %rdx + movq (%rsi), %rax + movq 8(%rsi), %rsi + shrdq $1, %rsi, %rax + movq %rax, (%rdi) + shrdq $1, %rdx, %rsi + movq %rsi, 8(%rdi) + shrdq $1, %rcx, %rdx + movq %rdx, 16(%rdi) + shrdq $1, %r11, %rcx + movq %rcx, 24(%rdi) + shrdq $1, %r10, %r11 + movq %r11, 32(%rdi) + shrdq $1, %r9, %r10 + movq %r10, 40(%rdi) + shrdq $1, %r8, %r9 + movq %r9, 48(%rdi) + shrq %r8 + movq %r8, 56(%rdi) + retq + + .globl _mcl_fp_add8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add8Lbmi2: ## @mcl_fp_add8Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 56(%rdx), %r15 + movq 56(%rsi), %r8 + movq 48(%rdx), %r12 + movq 48(%rsi), %r9 + movq 40(%rsi), %r13 + movq 24(%rsi), %r11 + movq 32(%rsi), %r10 + movq (%rdx), %r14 + movq 8(%rdx), %rbx + addq (%rsi), %r14 + adcq 8(%rsi), %rbx + movq 16(%rdx), %rax + adcq 16(%rsi), %rax + adcq 24(%rdx), %r11 + movq 40(%rdx), %rsi + adcq 32(%rdx), %r10 + movq %r14, (%rdi) + movq %rbx, 8(%rdi) + movq %rax, 16(%rdi) + movq %r11, 24(%rdi) + movq %r10, 32(%rdi) + adcq %r13, %rsi + movq %rsi, 40(%rdi) + adcq %r12, %r9 + movq %r9, 48(%rdi) + adcq %r15, %r8 + movq %r8, 56(%rdi) + sbbq %rdx, %rdx + andl $1, %edx + subq (%rcx), %r14 + sbbq 8(%rcx), %rbx + sbbq 16(%rcx), %rax + sbbq 24(%rcx), %r11 + sbbq 32(%rcx), %r10 + sbbq 40(%rcx), %rsi + sbbq 48(%rcx), %r9 + sbbq 56(%rcx), %r8 + sbbq $0, %rdx + testb $1, %dl + jne LBB120_2 +## BB#1: ## %nocarry + movq %r14, (%rdi) + movq %rbx, 8(%rdi) + movq %rax, 16(%rdi) + movq %r11, 24(%rdi) + movq %r10, 32(%rdi) + movq %rsi, 40(%rdi) + movq %r9, 48(%rdi) + movq %r8, 56(%rdi) +LBB120_2: ## %carry + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_addNF8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF8Lbmi2: ## @mcl_fp_addNF8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 56(%rdx), %r8 + movq 48(%rdx), %rbp + movq 40(%rdx), %rbx + movq 32(%rdx), %rax + movq 24(%rdx), %r11 + movq 16(%rdx), %r15 + movq (%rdx), %r13 + movq 8(%rdx), %r12 + addq (%rsi), %r13 + adcq 8(%rsi), %r12 + adcq 16(%rsi), %r15 + adcq 24(%rsi), %r11 + adcq 32(%rsi), %rax + movq %rax, %r10 + movq %r10, -24(%rsp) ## 8-byte Spill + adcq 40(%rsi), %rbx + movq %rbx, %r9 + movq %r9, -16(%rsp) ## 8-byte Spill + adcq 48(%rsi), %rbp + movq %rbp, %rax + movq %rax, -8(%rsp) ## 8-byte Spill + adcq 56(%rsi), %r8 + movq %r13, %rsi + subq (%rcx), %rsi + movq %r12, %rdx + sbbq 8(%rcx), %rdx + movq %r15, %rbx + sbbq 16(%rcx), %rbx + movq %r11, %r14 + sbbq 24(%rcx), %r14 + movq %r10, %rbp + sbbq 32(%rcx), %rbp + movq %r9, %r10 + sbbq 40(%rcx), %r10 + movq %rax, %r9 + sbbq 48(%rcx), %r9 + movq %r8, %rax + sbbq 56(%rcx), %rax + testq %rax, %rax + cmovsq %r13, %rsi + movq %rsi, (%rdi) + cmovsq %r12, %rdx + movq %rdx, 8(%rdi) + cmovsq %r15, %rbx + movq %rbx, 16(%rdi) + cmovsq %r11, %r14 + movq %r14, 24(%rdi) + cmovsq -24(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 32(%rdi) + cmovsq -16(%rsp), %r10 ## 8-byte Folded Reload + movq %r10, 40(%rdi) + cmovsq -8(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 48(%rdi) + cmovsq %r8, %rax + movq %rax, 56(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_sub8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub8Lbmi2: ## @mcl_fp_sub8Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 56(%rdx), %r12 + movq 56(%rsi), %r8 + movq 48(%rdx), %r13 + movq (%rsi), %rax + movq 8(%rsi), %r10 + xorl %ebx, %ebx + subq (%rdx), %rax + sbbq 8(%rdx), %r10 + movq 16(%rsi), %r11 + sbbq 16(%rdx), %r11 + movq 24(%rsi), %r15 + sbbq 24(%rdx), %r15 + movq 32(%rsi), %r14 + sbbq 32(%rdx), %r14 + movq 48(%rsi), %r9 + movq 40(%rsi), %rsi + sbbq 40(%rdx), %rsi + movq %rax, (%rdi) + movq %r10, 8(%rdi) + movq %r11, 16(%rdi) + movq %r15, 24(%rdi) + movq %r14, 32(%rdi) + movq %rsi, 40(%rdi) + sbbq %r13, %r9 + movq %r9, 48(%rdi) + sbbq %r12, %r8 + movq %r8, 56(%rdi) + sbbq $0, %rbx + testb $1, %bl + je LBB122_2 +## BB#1: ## %carry + addq (%rcx), %rax + movq %rax, (%rdi) + movq 8(%rcx), %rax + adcq %r10, %rax + movq %rax, 8(%rdi) + movq 16(%rcx), %rax + adcq %r11, %rax + movq %rax, 16(%rdi) + movq 24(%rcx), %rax + adcq %r15, %rax + movq %rax, 24(%rdi) + movq 32(%rcx), %rax + adcq %r14, %rax + movq %rax, 32(%rdi) + movq 40(%rcx), %rax + adcq %rsi, %rax + movq %rax, 40(%rdi) + movq 48(%rcx), %rax + adcq %r9, %rax + movq %rax, 48(%rdi) + movq 56(%rcx), %rax + adcq %r8, %rax + movq %rax, 56(%rdi) +LBB122_2: ## %nocarry + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_subNF8Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF8Lbmi2: ## @mcl_fp_subNF8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r8 + movq %rdi, %r9 + movdqu (%rdx), %xmm0 + movdqu 16(%rdx), %xmm1 + movdqu 32(%rdx), %xmm2 + movdqu 48(%rdx), %xmm3 + pshufd $78, %xmm3, %xmm4 ## xmm4 = xmm3[2,3,0,1] + movd %xmm4, %r12 + movdqu (%rsi), %xmm4 + movdqu 16(%rsi), %xmm5 + movdqu 32(%rsi), %xmm8 + movdqu 48(%rsi), %xmm7 + pshufd $78, %xmm7, %xmm6 ## xmm6 = xmm7[2,3,0,1] + movd %xmm6, %rcx + movd %xmm3, %r13 + movd %xmm7, %rdi + pshufd $78, %xmm2, %xmm3 ## xmm3 = xmm2[2,3,0,1] + movd %xmm3, %rbp + pshufd $78, %xmm8, %xmm3 ## xmm3 = xmm8[2,3,0,1] + movd %xmm3, %rdx + movd %xmm2, %rsi + pshufd $78, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,0,1] + movd %xmm2, %r11 + pshufd $78, %xmm5, %xmm2 ## xmm2 = xmm5[2,3,0,1] + movd %xmm1, %r15 + pshufd $78, %xmm0, %xmm1 ## xmm1 = xmm0[2,3,0,1] + movd %xmm1, %rbx + pshufd $78, %xmm4, %xmm1 ## xmm1 = xmm4[2,3,0,1] + movd %xmm0, %rax + movd %xmm4, %r14 + subq %rax, %r14 + movd %xmm1, %r10 + sbbq %rbx, %r10 + movd %xmm5, %rbx + sbbq %r15, %rbx + movd %xmm2, %r15 + sbbq %r11, %r15 + movd %xmm8, %r11 + sbbq %rsi, %r11 + sbbq %rbp, %rdx + movq %rdx, -24(%rsp) ## 8-byte Spill + sbbq %r13, %rdi + movq %rdi, -16(%rsp) ## 8-byte Spill + sbbq %r12, %rcx + movq %rcx, -8(%rsp) ## 8-byte Spill + movq %rcx, %rbp + sarq $63, %rbp + movq 56(%r8), %r12 + andq %rbp, %r12 + movq 48(%r8), %r13 + andq %rbp, %r13 + movq 40(%r8), %rdi + andq %rbp, %rdi + movq 32(%r8), %rsi + andq %rbp, %rsi + movq 24(%r8), %rdx + andq %rbp, %rdx + movq 16(%r8), %rcx + andq %rbp, %rcx + movq 8(%r8), %rax + andq %rbp, %rax + andq (%r8), %rbp + addq %r14, %rbp + adcq %r10, %rax + movq %rbp, (%r9) + adcq %rbx, %rcx + movq %rax, 8(%r9) + movq %rcx, 16(%r9) + adcq %r15, %rdx + movq %rdx, 24(%r9) + adcq %r11, %rsi + movq %rsi, 32(%r9) + adcq -24(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 40(%r9) + adcq -16(%rsp), %r13 ## 8-byte Folded Reload + movq %r13, 48(%r9) + adcq -8(%rsp), %r12 ## 8-byte Folded Reload + movq %r12, 56(%r9) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_add8Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add8Lbmi2: ## @mcl_fpDbl_add8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r8 + movq 120(%rdx), %rax + movq %rax, -32(%rsp) ## 8-byte Spill + movq 112(%rdx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + movq 104(%rdx), %rax + movq %rax, -24(%rsp) ## 8-byte Spill + movq 96(%rdx), %r14 + movq 24(%rsi), %r15 + movq 32(%rsi), %r11 + movq 16(%rdx), %r12 + movq (%rdx), %rbx + movq 8(%rdx), %rax + addq (%rsi), %rbx + adcq 8(%rsi), %rax + adcq 16(%rsi), %r12 + adcq 24(%rdx), %r15 + adcq 32(%rdx), %r11 + movq 88(%rdx), %rbp + movq 80(%rdx), %r13 + movq %rbx, (%rdi) + movq 72(%rdx), %r10 + movq %rax, 8(%rdi) + movq 64(%rdx), %r9 + movq %r12, 16(%rdi) + movq 40(%rdx), %r12 + movq %r15, 24(%rdi) + movq 40(%rsi), %rbx + adcq %r12, %rbx + movq 56(%rdx), %r15 + movq 48(%rdx), %r12 + movq %r11, 32(%rdi) + movq 48(%rsi), %rdx + adcq %r12, %rdx + movq 120(%rsi), %r12 + movq %rbx, 40(%rdi) + movq 56(%rsi), %rax + adcq %r15, %rax + movq 112(%rsi), %rcx + movq %rdx, 48(%rdi) + movq 64(%rsi), %rbx + adcq %r9, %rbx + movq 104(%rsi), %rdx + movq %rax, 56(%rdi) + movq 72(%rsi), %r9 + adcq %r10, %r9 + movq 80(%rsi), %r11 + adcq %r13, %r11 + movq 96(%rsi), %rax + movq 88(%rsi), %r15 + adcq %rbp, %r15 + adcq %r14, %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq %rdx, %rax + adcq -24(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -24(%rsp) ## 8-byte Spill + adcq -16(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -16(%rsp) ## 8-byte Spill + adcq -32(%rsp), %r12 ## 8-byte Folded Reload + movq %r12, -32(%rsp) ## 8-byte Spill + sbbq %rbp, %rbp + andl $1, %ebp + movq %rbx, %rsi + subq (%r8), %rsi + movq %r9, %rdx + sbbq 8(%r8), %rdx + movq %r11, %r10 + sbbq 16(%r8), %r10 + movq %r15, %r14 + sbbq 24(%r8), %r14 + movq -8(%rsp), %r13 ## 8-byte Reload + sbbq 32(%r8), %r13 + movq %rax, %r12 + sbbq 40(%r8), %r12 + movq %rcx, %rax + sbbq 48(%r8), %rax + movq -32(%rsp), %rcx ## 8-byte Reload + sbbq 56(%r8), %rcx + sbbq $0, %rbp + andl $1, %ebp + cmovneq %rbx, %rsi + movq %rsi, 64(%rdi) + testb %bpl, %bpl + cmovneq %r9, %rdx + movq %rdx, 72(%rdi) + cmovneq %r11, %r10 + movq %r10, 80(%rdi) + cmovneq %r15, %r14 + movq %r14, 88(%rdi) + cmovneq -8(%rsp), %r13 ## 8-byte Folded Reload + movq %r13, 96(%rdi) + cmovneq -24(%rsp), %r12 ## 8-byte Folded Reload + movq %r12, 104(%rdi) + cmovneq -16(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 112(%rdi) + cmovneq -32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 120(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sub8Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub8Lbmi2: ## @mcl_fpDbl_sub8Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r15 + movq 120(%rdx), %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq 112(%rdx), %rax + movq %rax, -16(%rsp) ## 8-byte Spill + movq 104(%rdx), %rax + movq %rax, -24(%rsp) ## 8-byte Spill + movq 16(%rsi), %r9 + movq (%rsi), %r12 + movq 8(%rsi), %r14 + xorl %r8d, %r8d + subq (%rdx), %r12 + sbbq 8(%rdx), %r14 + sbbq 16(%rdx), %r9 + movq 24(%rsi), %rbx + sbbq 24(%rdx), %rbx + movq 32(%rsi), %r13 + sbbq 32(%rdx), %r13 + movq 96(%rdx), %rbp + movq 88(%rdx), %r11 + movq %r12, (%rdi) + movq 80(%rdx), %r12 + movq %r14, 8(%rdi) + movq 72(%rdx), %r10 + movq %r9, 16(%rdi) + movq 40(%rdx), %r9 + movq %rbx, 24(%rdi) + movq 40(%rsi), %rbx + sbbq %r9, %rbx + movq 48(%rdx), %r9 + movq %r13, 32(%rdi) + movq 48(%rsi), %r14 + sbbq %r9, %r14 + movq 64(%rdx), %r13 + movq 56(%rdx), %r9 + movq %rbx, 40(%rdi) + movq 56(%rsi), %rdx + sbbq %r9, %rdx + movq 120(%rsi), %rcx + movq %r14, 48(%rdi) + movq 64(%rsi), %rbx + sbbq %r13, %rbx + movq 112(%rsi), %rax + movq %rdx, 56(%rdi) + movq 72(%rsi), %r9 + sbbq %r10, %r9 + movq 80(%rsi), %r13 + sbbq %r12, %r13 + movq 88(%rsi), %r12 + sbbq %r11, %r12 + movq 104(%rsi), %rdx + movq 96(%rsi), %r14 + sbbq %rbp, %r14 + sbbq -24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -24(%rsp) ## 8-byte Spill + sbbq -16(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -16(%rsp) ## 8-byte Spill + sbbq -8(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -8(%rsp) ## 8-byte Spill + movl $0, %ebp + sbbq $0, %rbp + andl $1, %ebp + movq (%r15), %r11 + cmoveq %r8, %r11 + testb %bpl, %bpl + movq 16(%r15), %rbp + cmoveq %r8, %rbp + movq 8(%r15), %rsi + cmoveq %r8, %rsi + movq 56(%r15), %r10 + cmoveq %r8, %r10 + movq 48(%r15), %rdx + cmoveq %r8, %rdx + movq 40(%r15), %rcx + cmoveq %r8, %rcx + movq 32(%r15), %rax + cmoveq %r8, %rax + cmovneq 24(%r15), %r8 + addq %rbx, %r11 + adcq %r9, %rsi + movq %r11, 64(%rdi) + adcq %r13, %rbp + movq %rsi, 72(%rdi) + movq %rbp, 80(%rdi) + adcq %r12, %r8 + movq %r8, 88(%rdi) + adcq %r14, %rax + movq %rax, 96(%rdi) + adcq -24(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 104(%rdi) + adcq -16(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 112(%rdi) + adcq -8(%rsp), %r10 ## 8-byte Folded Reload + movq %r10, 120(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .p2align 4, 0x90 +l_mulPv576x64: ## @mulPv576x64 +## BB#0: + mulxq (%rsi), %rcx, %rax + movq %rcx, (%rdi) + mulxq 8(%rsi), %rcx, %r8 + addq %rax, %rcx + movq %rcx, 8(%rdi) + mulxq 16(%rsi), %rcx, %r9 + adcq %r8, %rcx + movq %rcx, 16(%rdi) + mulxq 24(%rsi), %rax, %rcx + adcq %r9, %rax + movq %rax, 24(%rdi) + mulxq 32(%rsi), %rax, %r8 + adcq %rcx, %rax + movq %rax, 32(%rdi) + mulxq 40(%rsi), %rcx, %r9 + adcq %r8, %rcx + movq %rcx, 40(%rdi) + mulxq 48(%rsi), %rax, %rcx + adcq %r9, %rax + movq %rax, 48(%rdi) + mulxq 56(%rsi), %rax, %r8 + adcq %rcx, %rax + movq %rax, 56(%rdi) + mulxq 64(%rsi), %rax, %rcx + adcq %r8, %rax + movq %rax, 64(%rdi) + adcq $0, %rcx + movq %rcx, 72(%rdi) + movq %rdi, %rax + retq + + .globl _mcl_fp_mulUnitPre9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mulUnitPre9Lbmi2: ## @mcl_fp_mulUnitPre9Lbmi2 +## BB#0: + pushq %r14 + pushq %rbx + subq $88, %rsp + movq %rdi, %rbx + leaq 8(%rsp), %rdi + callq l_mulPv576x64 + movq 80(%rsp), %r8 + movq 72(%rsp), %r9 + movq 64(%rsp), %r10 + movq 56(%rsp), %r11 + movq 48(%rsp), %r14 + movq 40(%rsp), %rax + movq 32(%rsp), %rcx + movq 24(%rsp), %rdx + movq 8(%rsp), %rsi + movq 16(%rsp), %rdi + movq %rsi, (%rbx) + movq %rdi, 8(%rbx) + movq %rdx, 16(%rbx) + movq %rcx, 24(%rbx) + movq %rax, 32(%rbx) + movq %r14, 40(%rbx) + movq %r11, 48(%rbx) + movq %r10, 56(%rbx) + movq %r9, 64(%rbx) + movq %r8, 72(%rbx) + addq $88, %rsp + popq %rbx + popq %r14 + retq + + .globl _mcl_fpDbl_mulPre9Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_mulPre9Lbmi2: ## @mcl_fpDbl_mulPre9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $808, %rsp ## imm = 0x328 + movq %rdx, %rax + movq %rdi, %r12 + movq (%rax), %rdx + movq %rax, %rbx + movq %rbx, 80(%rsp) ## 8-byte Spill + leaq 728(%rsp), %rdi + movq %rsi, %rbp + movq %rbp, 72(%rsp) ## 8-byte Spill + callq l_mulPv576x64 + movq 800(%rsp), %r13 + movq 792(%rsp), %rax + movq %rax, 48(%rsp) ## 8-byte Spill + movq 784(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq 776(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 768(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 760(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + movq 752(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 744(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + movq 728(%rsp), %rax + movq 736(%rsp), %r14 + movq %rax, (%r12) + movq %r12, 64(%rsp) ## 8-byte Spill + movq 8(%rbx), %rdx + leaq 648(%rsp), %rdi + movq %rbp, %rsi + callq l_mulPv576x64 + movq 720(%rsp), %r8 + movq 712(%rsp), %rcx + movq 704(%rsp), %rdx + movq 696(%rsp), %rsi + movq 688(%rsp), %rdi + movq 680(%rsp), %rbp + addq 648(%rsp), %r14 + movq 672(%rsp), %rax + movq 656(%rsp), %rbx + movq 664(%rsp), %r15 + movq %r14, 8(%r12) + adcq 24(%rsp), %rbx ## 8-byte Folded Reload + adcq 32(%rsp), %r15 ## 8-byte Folded Reload + adcq 40(%rsp), %rax ## 8-byte Folded Reload + movq %rax, %r14 + adcq (%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 24(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 32(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 40(%rsp) ## 8-byte Spill + adcq 48(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, (%rsp) ## 8-byte Spill + adcq %r13, %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 16(%rsp) ## 8-byte Spill + movq 80(%rsp), %r13 ## 8-byte Reload + movq 16(%r13), %rdx + leaq 568(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 640(%rsp), %r8 + movq 632(%rsp), %r9 + movq 624(%rsp), %r10 + movq 616(%rsp), %rdi + movq 608(%rsp), %rbp + movq 600(%rsp), %rcx + addq 568(%rsp), %rbx + movq 592(%rsp), %rdx + movq 576(%rsp), %r12 + movq 584(%rsp), %rsi + movq 64(%rsp), %rax ## 8-byte Reload + movq %rbx, 16(%rax) + adcq %r15, %r12 + adcq %r14, %rsi + movq %rsi, 48(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 56(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 40(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 32(%rsp) ## 8-byte Spill + adcq (%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 40(%rsp) ## 8-byte Spill + adcq 8(%rsp), %r10 ## 8-byte Folded Reload + movq %r10, (%rsp) ## 8-byte Spill + adcq 16(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 8(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 16(%rsp) ## 8-byte Spill + movq 24(%r13), %rdx + leaq 488(%rsp), %rdi + movq 72(%rsp), %r15 ## 8-byte Reload + movq %r15, %rsi + callq l_mulPv576x64 + movq 560(%rsp), %r8 + movq 552(%rsp), %rcx + movq 544(%rsp), %rdx + movq 536(%rsp), %rsi + movq 528(%rsp), %rdi + movq 520(%rsp), %rbp + addq 488(%rsp), %r12 + movq 512(%rsp), %rax + movq 496(%rsp), %rbx + movq 504(%rsp), %r13 + movq 64(%rsp), %r14 ## 8-byte Reload + movq %r12, 24(%r14) + adcq 48(%rsp), %rbx ## 8-byte Folded Reload + adcq 56(%rsp), %r13 ## 8-byte Folded Reload + adcq 24(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 32(%rsp) ## 8-byte Spill + adcq 40(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 40(%rsp) ## 8-byte Spill + adcq (%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, (%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 8(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 16(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 48(%rsp) ## 8-byte Spill + movq 80(%rsp), %r12 ## 8-byte Reload + movq 32(%r12), %rdx + leaq 408(%rsp), %rdi + movq %r15, %rsi + callq l_mulPv576x64 + movq 480(%rsp), %r8 + movq 472(%rsp), %r9 + movq 464(%rsp), %rdx + movq 456(%rsp), %rsi + movq 448(%rsp), %rdi + movq 440(%rsp), %rbp + addq 408(%rsp), %rbx + movq 432(%rsp), %rax + movq 416(%rsp), %r15 + movq 424(%rsp), %rcx + movq %rbx, 32(%r14) + adcq %r13, %r15 + adcq 24(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 56(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 40(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 32(%rsp) ## 8-byte Spill + adcq (%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 40(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, (%rsp) ## 8-byte Spill + adcq 16(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 8(%rsp) ## 8-byte Spill + adcq 48(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 16(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 48(%rsp) ## 8-byte Spill + movq %r12, %r14 + movq 40(%r14), %rdx + leaq 328(%rsp), %rdi + movq 72(%rsp), %r13 ## 8-byte Reload + movq %r13, %rsi + callq l_mulPv576x64 + movq 400(%rsp), %r8 + movq 392(%rsp), %r9 + movq 384(%rsp), %rsi + movq 376(%rsp), %rdi + movq 368(%rsp), %rbx + movq 360(%rsp), %rbp + addq 328(%rsp), %r15 + movq 352(%rsp), %rcx + movq 336(%rsp), %r12 + movq 344(%rsp), %rdx + movq 64(%rsp), %rax ## 8-byte Reload + movq %r15, 40(%rax) + adcq 56(%rsp), %r12 ## 8-byte Folded Reload + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 56(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 40(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 32(%rsp) ## 8-byte Spill + adcq (%rsp), %rbx ## 8-byte Folded Reload + movq %rbx, 40(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, (%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 48(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 16(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 48(%rsp) ## 8-byte Spill + movq 48(%r14), %rdx + leaq 248(%rsp), %rdi + movq %r13, %rsi + movq %r13, %r15 + callq l_mulPv576x64 + movq 320(%rsp), %r8 + movq 312(%rsp), %r9 + movq 304(%rsp), %rsi + movq 296(%rsp), %rdi + movq 288(%rsp), %rbx + movq 280(%rsp), %rbp + addq 248(%rsp), %r12 + movq 272(%rsp), %rcx + movq 256(%rsp), %r13 + movq 264(%rsp), %rdx + movq 64(%rsp), %rax ## 8-byte Reload + movq %r12, 48(%rax) + adcq 56(%rsp), %r13 ## 8-byte Folded Reload + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 56(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 40(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 32(%rsp) ## 8-byte Spill + adcq (%rsp), %rbx ## 8-byte Folded Reload + movq %rbx, 40(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, (%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 48(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 16(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 48(%rsp) ## 8-byte Spill + movq 56(%r14), %rdx + leaq 168(%rsp), %rdi + movq %r15, %rsi + callq l_mulPv576x64 + movq 240(%rsp), %rcx + movq 232(%rsp), %rdx + movq 224(%rsp), %rsi + movq 216(%rsp), %rdi + movq 208(%rsp), %rbx + addq 168(%rsp), %r13 + movq 200(%rsp), %r12 + movq 192(%rsp), %rbp + movq 176(%rsp), %r14 + movq 184(%rsp), %r15 + movq 64(%rsp), %rax ## 8-byte Reload + movq %r13, 56(%rax) + adcq 56(%rsp), %r14 ## 8-byte Folded Reload + adcq 24(%rsp), %r15 ## 8-byte Folded Reload + adcq 32(%rsp), %rbp ## 8-byte Folded Reload + adcq 40(%rsp), %r12 ## 8-byte Folded Reload + adcq (%rsp), %rbx ## 8-byte Folded Reload + movq %rbx, %r13 + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, (%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 48(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq $0, %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + movq 80(%rsp), %rax ## 8-byte Reload + movq 64(%rax), %rdx + leaq 88(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 88(%rsp), %r14 + adcq 96(%rsp), %r15 + movq 160(%rsp), %r8 + adcq 104(%rsp), %rbp + movq 152(%rsp), %r9 + movq 144(%rsp), %rdx + movq 136(%rsp), %rsi + movq 128(%rsp), %rdi + movq 120(%rsp), %rbx + movq 112(%rsp), %rax + movq 64(%rsp), %rcx ## 8-byte Reload + movq %r14, 64(%rcx) + movq %r15, 72(%rcx) + adcq %r12, %rax + movq %rbp, 80(%rcx) + movq %rax, 88(%rcx) + adcq %r13, %rbx + movq %rbx, 96(%rcx) + adcq (%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 104(%rcx) + adcq 8(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 112(%rcx) + adcq 16(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 120(%rcx) + adcq 48(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 128(%rcx) + adcq $0, %r8 + movq %r8, 136(%rcx) + addq $808, %rsp ## imm = 0x328 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sqrPre9Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sqrPre9Lbmi2: ## @mcl_fpDbl_sqrPre9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $808, %rsp ## imm = 0x328 + movq %rsi, %r15 + movq %rdi, %r14 + movq (%r15), %rdx + leaq 728(%rsp), %rdi + callq l_mulPv576x64 + movq 800(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 792(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + movq 784(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq 776(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 768(%rsp), %rax + movq %rax, 56(%rsp) ## 8-byte Spill + movq 760(%rsp), %rax + movq %rax, 48(%rsp) ## 8-byte Spill + movq 752(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + movq 744(%rsp), %rax + movq %rax, 80(%rsp) ## 8-byte Spill + movq 728(%rsp), %rax + movq 736(%rsp), %r12 + movq %rax, (%r14) + movq %r14, 72(%rsp) ## 8-byte Spill + movq 8(%r15), %rdx + leaq 648(%rsp), %rdi + movq %r15, %rsi + callq l_mulPv576x64 + movq 720(%rsp), %r8 + movq 712(%rsp), %rcx + movq 704(%rsp), %rdx + movq 696(%rsp), %rsi + movq 688(%rsp), %rdi + movq 680(%rsp), %rbp + addq 648(%rsp), %r12 + movq 672(%rsp), %rax + movq 656(%rsp), %rbx + movq 664(%rsp), %r13 + movq %r12, 8(%r14) + adcq 80(%rsp), %rbx ## 8-byte Folded Reload + adcq 40(%rsp), %r13 ## 8-byte Folded Reload + adcq 48(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 56(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 56(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 32(%rsp) ## 8-byte Spill + movq %r15, 64(%rsp) ## 8-byte Spill + movq 16(%r15), %rdx + leaq 568(%rsp), %rdi + movq %r15, %rsi + callq l_mulPv576x64 + movq 640(%rsp), %r8 + movq 632(%rsp), %rcx + movq 624(%rsp), %rdx + movq 616(%rsp), %rsi + movq 608(%rsp), %rdi + movq 600(%rsp), %rbp + addq 568(%rsp), %rbx + movq 592(%rsp), %rax + movq 576(%rsp), %r14 + movq 584(%rsp), %r12 + movq 72(%rsp), %r15 ## 8-byte Reload + movq %rbx, 16(%r15) + adcq %r13, %r14 + adcq 40(%rsp), %r12 ## 8-byte Folded Reload + adcq 48(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 56(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 56(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %rsi ## 8-byte Reload + movq 24(%rsi), %rdx + leaq 488(%rsp), %rdi + callq l_mulPv576x64 + movq 560(%rsp), %r8 + movq 552(%rsp), %rcx + movq 544(%rsp), %rdx + movq 536(%rsp), %rsi + movq 528(%rsp), %rdi + movq 520(%rsp), %rbp + addq 488(%rsp), %r14 + movq 512(%rsp), %rax + movq 496(%rsp), %rbx + movq 504(%rsp), %r13 + movq %r14, 24(%r15) + adcq %r12, %rbx + adcq 40(%rsp), %r13 ## 8-byte Folded Reload + adcq 48(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 56(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 56(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %rsi ## 8-byte Reload + movq 32(%rsi), %rdx + leaq 408(%rsp), %rdi + callq l_mulPv576x64 + movq 480(%rsp), %r8 + movq 472(%rsp), %rcx + movq 464(%rsp), %rdx + movq 456(%rsp), %rsi + movq 448(%rsp), %rdi + movq 440(%rsp), %rbp + addq 408(%rsp), %rbx + movq 432(%rsp), %rax + movq 416(%rsp), %r14 + movq 424(%rsp), %r12 + movq %rbx, 32(%r15) + adcq %r13, %r14 + adcq 40(%rsp), %r12 ## 8-byte Folded Reload + adcq 48(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 56(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 56(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %rsi ## 8-byte Reload + movq 40(%rsi), %rdx + leaq 328(%rsp), %rdi + callq l_mulPv576x64 + movq 400(%rsp), %r8 + movq 392(%rsp), %rcx + movq 384(%rsp), %rdx + movq 376(%rsp), %rsi + movq 368(%rsp), %rdi + movq 360(%rsp), %rbp + addq 328(%rsp), %r14 + movq 352(%rsp), %rax + movq 336(%rsp), %rbx + movq 344(%rsp), %r13 + movq %r14, 40(%r15) + adcq %r12, %rbx + adcq 40(%rsp), %r13 ## 8-byte Folded Reload + adcq 48(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 56(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 56(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %rsi ## 8-byte Reload + movq 48(%rsi), %rdx + leaq 248(%rsp), %rdi + callq l_mulPv576x64 + movq 320(%rsp), %r8 + movq 312(%rsp), %rcx + movq 304(%rsp), %rdx + movq 296(%rsp), %rsi + movq 288(%rsp), %rdi + movq 280(%rsp), %rbp + addq 248(%rsp), %rbx + movq 272(%rsp), %rax + movq 256(%rsp), %r12 + movq 264(%rsp), %r14 + movq %rbx, 48(%r15) + adcq %r13, %r12 + adcq 40(%rsp), %r14 ## 8-byte Folded Reload + adcq 48(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 56(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 56(%rsp) ## 8-byte Spill + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 8(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %rsi ## 8-byte Reload + movq 56(%rsi), %rdx + leaq 168(%rsp), %rdi + callq l_mulPv576x64 + movq 240(%rsp), %r8 + movq 232(%rsp), %rdx + movq 224(%rsp), %rsi + movq 216(%rsp), %rdi + movq 208(%rsp), %rbx + movq 200(%rsp), %rcx + addq 168(%rsp), %r12 + movq 192(%rsp), %r15 + movq 176(%rsp), %r13 + movq 184(%rsp), %rbp + movq 72(%rsp), %rax ## 8-byte Reload + movq %r12, 56(%rax) + adcq %r14, %r13 + adcq 40(%rsp), %rbp ## 8-byte Folded Reload + adcq 48(%rsp), %r15 ## 8-byte Folded Reload + adcq 56(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, %r12 + adcq 8(%rsp), %rbx ## 8-byte Folded Reload + movq %rbx, %r14 + adcq 16(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 8(%rsp) ## 8-byte Spill + adcq 24(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 16(%rsp) ## 8-byte Spill + adcq 32(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 24(%rsp) ## 8-byte Spill + adcq $0, %r8 + movq %r8, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %rsi ## 8-byte Reload + movq 64(%rsi), %rdx + leaq 88(%rsp), %rdi + callq l_mulPv576x64 + addq 88(%rsp), %r13 + adcq 96(%rsp), %rbp + movq 160(%rsp), %r8 + adcq 104(%rsp), %r15 + movq 152(%rsp), %r9 + movq 144(%rsp), %rdx + movq 136(%rsp), %rsi + movq 128(%rsp), %rdi + movq 120(%rsp), %rbx + movq 112(%rsp), %rax + movq 72(%rsp), %rcx ## 8-byte Reload + movq %r13, 64(%rcx) + movq %rbp, 72(%rcx) + adcq %r12, %rax + movq %r15, 80(%rcx) + movq %rax, 88(%rcx) + adcq %r14, %rbx + movq %rbx, 96(%rcx) + adcq 8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 104(%rcx) + adcq 16(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 112(%rcx) + adcq 24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 120(%rcx) + adcq 32(%rsp), %r9 ## 8-byte Folded Reload + movq %r9, 128(%rcx) + adcq $0, %r8 + movq %r8, 136(%rcx) + addq $808, %rsp ## imm = 0x328 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_mont9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_mont9Lbmi2: ## @mcl_fp_mont9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $1560, %rsp ## imm = 0x618 + movq %rcx, 72(%rsp) ## 8-byte Spill + movq %rdx, 96(%rsp) ## 8-byte Spill + movq %rsi, 88(%rsp) ## 8-byte Spill + movq %rdi, 112(%rsp) ## 8-byte Spill + movq -8(%rcx), %rbx + movq %rbx, 80(%rsp) ## 8-byte Spill + movq (%rdx), %rdx + leaq 1480(%rsp), %rdi + callq l_mulPv576x64 + movq 1480(%rsp), %r14 + movq 1488(%rsp), %r15 + movq %r14, %rdx + imulq %rbx, %rdx + movq 1552(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + movq 1544(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + movq 1536(%rsp), %rax + movq %rax, 56(%rsp) ## 8-byte Spill + movq 1528(%rsp), %r12 + movq 1520(%rsp), %r13 + movq 1512(%rsp), %rbx + movq 1504(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 1496(%rsp), %rbp + leaq 1400(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 1400(%rsp), %r14 + adcq 1408(%rsp), %r15 + adcq 1416(%rsp), %rbp + movq %rbp, 8(%rsp) ## 8-byte Spill + movq (%rsp), %rax ## 8-byte Reload + adcq 1424(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + adcq 1432(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + adcq 1440(%rsp), %r13 + movq %r13, 16(%rsp) ## 8-byte Spill + adcq 1448(%rsp), %r12 + movq %r12, 48(%rsp) ## 8-byte Spill + movq 56(%rsp), %rbx ## 8-byte Reload + adcq 1456(%rsp), %rbx + movq 40(%rsp), %r14 ## 8-byte Reload + adcq 1464(%rsp), %r14 + movq 24(%rsp), %r13 ## 8-byte Reload + adcq 1472(%rsp), %r13 + sbbq %rbp, %rbp + movq 96(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + leaq 1320(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %ebp + addq 1320(%rsp), %r15 + movq 8(%rsp), %rax ## 8-byte Reload + adcq 1328(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq (%rsp), %rax ## 8-byte Reload + adcq 1336(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 32(%rsp), %r12 ## 8-byte Reload + adcq 1344(%rsp), %r12 + movq 16(%rsp), %rax ## 8-byte Reload + adcq 1352(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq 48(%rsp), %rax ## 8-byte Reload + adcq 1360(%rsp), %rax + movq %rax, 48(%rsp) ## 8-byte Spill + adcq 1368(%rsp), %rbx + adcq 1376(%rsp), %r14 + movq %r14, 40(%rsp) ## 8-byte Spill + adcq 1384(%rsp), %r13 + movq %r13, 24(%rsp) ## 8-byte Spill + adcq 1392(%rsp), %rbp + sbbq %r14, %r14 + movq %r15, %rdx + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 1240(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq %r14, %rax + andl $1, %eax + addq 1240(%rsp), %r15 + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 1248(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq (%rsp), %r14 ## 8-byte Reload + adcq 1256(%rsp), %r14 + adcq 1264(%rsp), %r12 + movq %r12, 32(%rsp) ## 8-byte Spill + movq 16(%rsp), %r12 ## 8-byte Reload + adcq 1272(%rsp), %r12 + movq 48(%rsp), %r13 ## 8-byte Reload + adcq 1280(%rsp), %r13 + adcq 1288(%rsp), %rbx + movq %rbx, 56(%rsp) ## 8-byte Spill + movq 40(%rsp), %r15 ## 8-byte Reload + adcq 1296(%rsp), %r15 + movq 24(%rsp), %rbx ## 8-byte Reload + adcq 1304(%rsp), %rbx + adcq 1312(%rsp), %rbp + adcq $0, %rax + movq %rax, 64(%rsp) ## 8-byte Spill + movq 96(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + leaq 1160(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 8(%rsp), %rax ## 8-byte Reload + addq 1160(%rsp), %rax + adcq 1168(%rsp), %r14 + movq %r14, (%rsp) ## 8-byte Spill + movq 32(%rsp), %r14 ## 8-byte Reload + adcq 1176(%rsp), %r14 + adcq 1184(%rsp), %r12 + movq %r12, 16(%rsp) ## 8-byte Spill + movq %r13, %r12 + adcq 1192(%rsp), %r12 + movq 56(%rsp), %rcx ## 8-byte Reload + adcq 1200(%rsp), %rcx + movq %rcx, 56(%rsp) ## 8-byte Spill + adcq 1208(%rsp), %r15 + movq %r15, %r13 + adcq 1216(%rsp), %rbx + movq %rbx, 24(%rsp) ## 8-byte Spill + adcq 1224(%rsp), %rbp + movq 64(%rsp), %rcx ## 8-byte Reload + adcq 1232(%rsp), %rcx + movq %rcx, 64(%rsp) ## 8-byte Spill + sbbq %r15, %r15 + movq %rax, %rdx + movq %rax, %rbx + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 1080(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq %r15, %rax + andl $1, %eax + addq 1080(%rsp), %rbx + movq (%rsp), %rcx ## 8-byte Reload + adcq 1088(%rsp), %rcx + movq %rcx, (%rsp) ## 8-byte Spill + movq %r14, %r15 + adcq 1096(%rsp), %r15 + movq 16(%rsp), %r14 ## 8-byte Reload + adcq 1104(%rsp), %r14 + movq %r12, %rbx + adcq 1112(%rsp), %rbx + movq 56(%rsp), %rcx ## 8-byte Reload + adcq 1120(%rsp), %rcx + movq %rcx, 56(%rsp) ## 8-byte Spill + adcq 1128(%rsp), %r13 + movq %r13, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %r13 ## 8-byte Reload + adcq 1136(%rsp), %r13 + adcq 1144(%rsp), %rbp + movq 64(%rsp), %r12 ## 8-byte Reload + adcq 1152(%rsp), %r12 + adcq $0, %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 96(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdx + leaq 1000(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq (%rsp), %rax ## 8-byte Reload + addq 1000(%rsp), %rax + adcq 1008(%rsp), %r15 + movq %r15, 32(%rsp) ## 8-byte Spill + adcq 1016(%rsp), %r14 + movq %r14, %r15 + adcq 1024(%rsp), %rbx + movq %rbx, 48(%rsp) ## 8-byte Spill + movq 56(%rsp), %r14 ## 8-byte Reload + adcq 1032(%rsp), %r14 + movq 40(%rsp), %rcx ## 8-byte Reload + adcq 1040(%rsp), %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + adcq 1048(%rsp), %r13 + movq %r13, 24(%rsp) ## 8-byte Spill + adcq 1056(%rsp), %rbp + adcq 1064(%rsp), %r12 + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 1072(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + sbbq %rbx, %rbx + movq %rax, %rdx + movq %rax, %r13 + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 920(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %ebx + movq %rbx, %rax + addq 920(%rsp), %r13 + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 928(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + adcq 936(%rsp), %r15 + movq %r15, 16(%rsp) ## 8-byte Spill + movq 48(%rsp), %r15 ## 8-byte Reload + adcq 944(%rsp), %r15 + movq %r14, %r13 + adcq 952(%rsp), %r13 + movq 40(%rsp), %r14 ## 8-byte Reload + adcq 960(%rsp), %r14 + movq 24(%rsp), %rbx ## 8-byte Reload + adcq 968(%rsp), %rbx + adcq 976(%rsp), %rbp + adcq 984(%rsp), %r12 + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 992(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + adcq $0, %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 96(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdx + leaq 840(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 32(%rsp), %rax ## 8-byte Reload + addq 840(%rsp), %rax + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 848(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + adcq 856(%rsp), %r15 + adcq 864(%rsp), %r13 + movq %r13, 56(%rsp) ## 8-byte Spill + adcq 872(%rsp), %r14 + movq %r14, 40(%rsp) ## 8-byte Spill + adcq 880(%rsp), %rbx + movq %rbx, 24(%rsp) ## 8-byte Spill + adcq 888(%rsp), %rbp + adcq 896(%rsp), %r12 + movq 8(%rsp), %r13 ## 8-byte Reload + adcq 904(%rsp), %r13 + movq (%rsp), %rcx ## 8-byte Reload + adcq 912(%rsp), %rcx + movq %rcx, (%rsp) ## 8-byte Spill + sbbq %rbx, %rbx + movq %rax, %rdx + movq %rax, %r14 + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 760(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %ebx + movq %rbx, %rax + addq 760(%rsp), %r14 + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 768(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + adcq 776(%rsp), %r15 + movq 56(%rsp), %r14 ## 8-byte Reload + adcq 784(%rsp), %r14 + movq 40(%rsp), %rcx ## 8-byte Reload + adcq 792(%rsp), %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 800(%rsp), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 808(%rsp), %rbp + movq %r12, %rbx + adcq 816(%rsp), %rbx + movq %r13, %r12 + adcq 824(%rsp), %r12 + movq (%rsp), %r13 ## 8-byte Reload + adcq 832(%rsp), %r13 + adcq $0, %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 96(%rsp), %rax ## 8-byte Reload + movq 40(%rax), %rdx + leaq 680(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 16(%rsp), %rax ## 8-byte Reload + addq 680(%rsp), %rax + adcq 688(%rsp), %r15 + movq %r15, 48(%rsp) ## 8-byte Spill + adcq 696(%rsp), %r14 + movq %r14, 56(%rsp) ## 8-byte Spill + movq 40(%rsp), %rcx ## 8-byte Reload + adcq 704(%rsp), %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %r15 ## 8-byte Reload + adcq 712(%rsp), %r15 + adcq 720(%rsp), %rbp + adcq 728(%rsp), %rbx + movq %rbx, 64(%rsp) ## 8-byte Spill + adcq 736(%rsp), %r12 + movq %r12, 8(%rsp) ## 8-byte Spill + adcq 744(%rsp), %r13 + movq %r13, (%rsp) ## 8-byte Spill + movq 32(%rsp), %r13 ## 8-byte Reload + adcq 752(%rsp), %r13 + sbbq %r14, %r14 + movq %rax, %rdx + movq %rax, %rbx + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 600(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %r14d + addq 600(%rsp), %rbx + movq 48(%rsp), %rax ## 8-byte Reload + adcq 608(%rsp), %rax + movq %rax, 48(%rsp) ## 8-byte Spill + movq 56(%rsp), %rax ## 8-byte Reload + adcq 616(%rsp), %rax + movq %rax, 56(%rsp) ## 8-byte Spill + movq 40(%rsp), %rbx ## 8-byte Reload + adcq 624(%rsp), %rbx + adcq 632(%rsp), %r15 + movq %r15, 24(%rsp) ## 8-byte Spill + adcq 640(%rsp), %rbp + movq 64(%rsp), %r12 ## 8-byte Reload + adcq 648(%rsp), %r12 + movq 8(%rsp), %rax ## 8-byte Reload + adcq 656(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq (%rsp), %r15 ## 8-byte Reload + adcq 664(%rsp), %r15 + adcq 672(%rsp), %r13 + adcq $0, %r14 + movq %r14, 16(%rsp) ## 8-byte Spill + movq 96(%rsp), %rax ## 8-byte Reload + movq 48(%rax), %rdx + leaq 520(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 48(%rsp), %rax ## 8-byte Reload + addq 520(%rsp), %rax + movq 56(%rsp), %r14 ## 8-byte Reload + adcq 528(%rsp), %r14 + adcq 536(%rsp), %rbx + movq %rbx, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 544(%rsp), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 552(%rsp), %rbp + adcq 560(%rsp), %r12 + movq %r12, 64(%rsp) ## 8-byte Spill + movq 8(%rsp), %r12 ## 8-byte Reload + adcq 568(%rsp), %r12 + adcq 576(%rsp), %r15 + movq %r15, (%rsp) ## 8-byte Spill + adcq 584(%rsp), %r13 + movq %r13, 32(%rsp) ## 8-byte Spill + movq 16(%rsp), %r15 ## 8-byte Reload + adcq 592(%rsp), %r15 + sbbq %rbx, %rbx + movq %rax, %rdx + movq %rax, %r13 + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 440(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %ebx + movq %rbx, %rax + addq 440(%rsp), %r13 + adcq 448(%rsp), %r14 + movq %r14, 56(%rsp) ## 8-byte Spill + movq 40(%rsp), %r14 ## 8-byte Reload + adcq 456(%rsp), %r14 + movq 24(%rsp), %rbx ## 8-byte Reload + adcq 464(%rsp), %rbx + adcq 472(%rsp), %rbp + movq %rbp, 104(%rsp) ## 8-byte Spill + movq 64(%rsp), %rcx ## 8-byte Reload + adcq 480(%rsp), %rcx + movq %rcx, 64(%rsp) ## 8-byte Spill + adcq 488(%rsp), %r12 + movq %r12, 8(%rsp) ## 8-byte Spill + movq (%rsp), %rbp ## 8-byte Reload + adcq 496(%rsp), %rbp + movq 32(%rsp), %r12 ## 8-byte Reload + adcq 504(%rsp), %r12 + adcq 512(%rsp), %r15 + movq %r15, %r13 + adcq $0, %rax + movq %rax, 48(%rsp) ## 8-byte Spill + movq 96(%rsp), %rax ## 8-byte Reload + movq 56(%rax), %rdx + leaq 360(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 56(%rsp), %rax ## 8-byte Reload + addq 360(%rsp), %rax + adcq 368(%rsp), %r14 + adcq 376(%rsp), %rbx + movq %rbx, 24(%rsp) ## 8-byte Spill + movq 104(%rsp), %rcx ## 8-byte Reload + adcq 384(%rsp), %rcx + movq %rcx, 104(%rsp) ## 8-byte Spill + movq 64(%rsp), %rbx ## 8-byte Reload + adcq 392(%rsp), %rbx + movq 8(%rsp), %r15 ## 8-byte Reload + adcq 400(%rsp), %r15 + adcq 408(%rsp), %rbp + movq %rbp, (%rsp) ## 8-byte Spill + adcq 416(%rsp), %r12 + movq %r12, %rbp + adcq 424(%rsp), %r13 + movq %r13, 16(%rsp) ## 8-byte Spill + movq 48(%rsp), %rcx ## 8-byte Reload + adcq 432(%rsp), %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + sbbq %r13, %r13 + movq %rax, %rdx + movq %rax, %r12 + imulq 80(%rsp), %rdx ## 8-byte Folded Reload + leaq 280(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %r13d + addq 280(%rsp), %r12 + adcq 288(%rsp), %r14 + movq %r14, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rax ## 8-byte Reload + adcq 296(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + movq 104(%rsp), %r14 ## 8-byte Reload + adcq 304(%rsp), %r14 + adcq 312(%rsp), %rbx + movq %rbx, 64(%rsp) ## 8-byte Spill + adcq 320(%rsp), %r15 + movq %r15, 8(%rsp) ## 8-byte Spill + movq (%rsp), %rbx ## 8-byte Reload + adcq 328(%rsp), %rbx + adcq 336(%rsp), %rbp + movq %rbp, 32(%rsp) ## 8-byte Spill + movq 16(%rsp), %r12 ## 8-byte Reload + adcq 344(%rsp), %r12 + movq 48(%rsp), %rbp ## 8-byte Reload + adcq 352(%rsp), %rbp + adcq $0, %r13 + movq 96(%rsp), %rax ## 8-byte Reload + movq 64(%rax), %rdx + leaq 200(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 40(%rsp), %rax ## 8-byte Reload + addq 200(%rsp), %rax + movq 24(%rsp), %r15 ## 8-byte Reload + adcq 208(%rsp), %r15 + adcq 216(%rsp), %r14 + movq %r14, 104(%rsp) ## 8-byte Spill + movq 64(%rsp), %r14 ## 8-byte Reload + adcq 224(%rsp), %r14 + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 232(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + adcq 240(%rsp), %rbx + movq %rbx, (%rsp) ## 8-byte Spill + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 248(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + adcq 256(%rsp), %r12 + movq %r12, 16(%rsp) ## 8-byte Spill + adcq 264(%rsp), %rbp + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 272(%rsp), %r13 + sbbq %rbx, %rbx + movq 80(%rsp), %rdx ## 8-byte Reload + imulq %rax, %rdx + movq %rax, %r12 + leaq 120(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %ebx + addq 120(%rsp), %r12 + adcq 128(%rsp), %r15 + movq 104(%rsp), %rbp ## 8-byte Reload + adcq 136(%rsp), %rbp + movq %r14, %rcx + adcq 144(%rsp), %rcx + movq %rcx, 64(%rsp) ## 8-byte Spill + movq 8(%rsp), %r8 ## 8-byte Reload + adcq 152(%rsp), %r8 + movq %r8, 8(%rsp) ## 8-byte Spill + movq (%rsp), %r9 ## 8-byte Reload + adcq 160(%rsp), %r9 + movq %r9, (%rsp) ## 8-byte Spill + movq 32(%rsp), %r10 ## 8-byte Reload + adcq 168(%rsp), %r10 + movq %r10, 32(%rsp) ## 8-byte Spill + movq 16(%rsp), %rdi ## 8-byte Reload + adcq 176(%rsp), %rdi + movq %rdi, 16(%rsp) ## 8-byte Spill + movq 48(%rsp), %r14 ## 8-byte Reload + adcq 184(%rsp), %r14 + adcq 192(%rsp), %r13 + adcq $0, %rbx + movq %r15, %rsi + movq %r15, %r12 + movq 72(%rsp), %rdx ## 8-byte Reload + subq (%rdx), %rsi + movq %rbp, %rax + movq %rbp, %r15 + sbbq 8(%rdx), %rax + movq %rcx, %rbp + sbbq 16(%rdx), %rbp + movq %r8, %rcx + sbbq 24(%rdx), %rcx + movq %r9, %r8 + sbbq 32(%rdx), %r8 + movq %r10, %r11 + sbbq 40(%rdx), %r11 + movq %rdi, %r10 + sbbq 48(%rdx), %r10 + movq %r14, %rdi + sbbq 56(%rdx), %rdi + movq %r13, %r9 + sbbq 64(%rdx), %r9 + sbbq $0, %rbx + andl $1, %ebx + cmovneq %r13, %r9 + testb %bl, %bl + cmovneq %r12, %rsi + movq 112(%rsp), %rbx ## 8-byte Reload + movq %rsi, (%rbx) + cmovneq %r15, %rax + movq %rax, 8(%rbx) + cmovneq 64(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 16(%rbx) + cmovneq 8(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 24(%rbx) + cmovneq (%rsp), %r8 ## 8-byte Folded Reload + movq %r8, 32(%rbx) + cmovneq 32(%rsp), %r11 ## 8-byte Folded Reload + movq %r11, 40(%rbx) + cmovneq 16(%rsp), %r10 ## 8-byte Folded Reload + movq %r10, 48(%rbx) + cmovneq %r14, %rdi + movq %rdi, 56(%rbx) + movq %r9, 64(%rbx) + addq $1560, %rsp ## imm = 0x618 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montNF9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montNF9Lbmi2: ## @mcl_fp_montNF9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $1560, %rsp ## imm = 0x618 + movq %rcx, 72(%rsp) ## 8-byte Spill + movq %rdx, 80(%rsp) ## 8-byte Spill + movq %rsi, 88(%rsp) ## 8-byte Spill + movq %rdi, 112(%rsp) ## 8-byte Spill + movq -8(%rcx), %rbx + movq %rbx, 96(%rsp) ## 8-byte Spill + movq (%rdx), %rdx + leaq 1480(%rsp), %rdi + callq l_mulPv576x64 + movq 1480(%rsp), %r12 + movq 1488(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq %r12, %rdx + imulq %rbx, %rdx + movq 1552(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + movq 1544(%rsp), %r13 + movq 1536(%rsp), %rax + movq %rax, 48(%rsp) ## 8-byte Spill + movq 1528(%rsp), %rax + movq %rax, 64(%rsp) ## 8-byte Spill + movq 1520(%rsp), %r14 + movq 1512(%rsp), %r15 + movq 1504(%rsp), %rbx + movq 1496(%rsp), %rbp + leaq 1400(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 1400(%rsp), %r12 + movq 16(%rsp), %rax ## 8-byte Reload + adcq 1408(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + adcq 1416(%rsp), %rbp + movq %rbp, 104(%rsp) ## 8-byte Spill + adcq 1424(%rsp), %rbx + movq %rbx, (%rsp) ## 8-byte Spill + adcq 1432(%rsp), %r15 + movq %r15, 8(%rsp) ## 8-byte Spill + adcq 1440(%rsp), %r14 + movq %r14, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %rbx ## 8-byte Reload + adcq 1448(%rsp), %rbx + movq 48(%rsp), %r12 ## 8-byte Reload + adcq 1456(%rsp), %r12 + adcq 1464(%rsp), %r13 + movq %r13, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 1472(%rsp), %rbp + movq 80(%rsp), %rax ## 8-byte Reload + movq 8(%rax), %rdx + leaq 1320(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 1392(%rsp), %rax + movq 16(%rsp), %rcx ## 8-byte Reload + addq 1320(%rsp), %rcx + movq 104(%rsp), %r15 ## 8-byte Reload + adcq 1328(%rsp), %r15 + movq (%rsp), %r14 ## 8-byte Reload + adcq 1336(%rsp), %r14 + movq 8(%rsp), %rdx ## 8-byte Reload + adcq 1344(%rsp), %rdx + movq %rdx, 8(%rsp) ## 8-byte Spill + movq 32(%rsp), %r13 ## 8-byte Reload + adcq 1352(%rsp), %r13 + adcq 1360(%rsp), %rbx + movq %rbx, 64(%rsp) ## 8-byte Spill + adcq 1368(%rsp), %r12 + movq %r12, 48(%rsp) ## 8-byte Spill + movq 40(%rsp), %rdx ## 8-byte Reload + adcq 1376(%rsp), %rdx + movq %rdx, 40(%rsp) ## 8-byte Spill + adcq 1384(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + adcq $0, %rax + movq %rax, %rbp + movq %rcx, %rdx + movq %rcx, %rbx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 1240(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 1240(%rsp), %rbx + adcq 1248(%rsp), %r15 + movq %r15, 104(%rsp) ## 8-byte Spill + adcq 1256(%rsp), %r14 + movq %r14, (%rsp) ## 8-byte Spill + movq 8(%rsp), %r12 ## 8-byte Reload + adcq 1264(%rsp), %r12 + adcq 1272(%rsp), %r13 + movq %r13, %r14 + movq 64(%rsp), %r13 ## 8-byte Reload + adcq 1280(%rsp), %r13 + movq 48(%rsp), %rbx ## 8-byte Reload + adcq 1288(%rsp), %rbx + movq 40(%rsp), %r15 ## 8-byte Reload + adcq 1296(%rsp), %r15 + movq 24(%rsp), %rax ## 8-byte Reload + adcq 1304(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 1312(%rsp), %rbp + movq %rbp, 56(%rsp) ## 8-byte Spill + movq 80(%rsp), %rax ## 8-byte Reload + movq 16(%rax), %rdx + leaq 1160(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 1232(%rsp), %rax + movq 104(%rsp), %rcx ## 8-byte Reload + addq 1160(%rsp), %rcx + movq (%rsp), %rbp ## 8-byte Reload + adcq 1168(%rsp), %rbp + adcq 1176(%rsp), %r12 + movq %r12, 8(%rsp) ## 8-byte Spill + adcq 1184(%rsp), %r14 + adcq 1192(%rsp), %r13 + movq %r13, %r12 + adcq 1200(%rsp), %rbx + movq %rbx, 48(%rsp) ## 8-byte Spill + adcq 1208(%rsp), %r15 + movq %r15, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rbx ## 8-byte Reload + adcq 1216(%rsp), %rbx + movq 56(%rsp), %rdx ## 8-byte Reload + adcq 1224(%rsp), %rdx + movq %rdx, 56(%rsp) ## 8-byte Spill + movq %rax, %r15 + adcq $0, %r15 + movq %rcx, %rdx + movq %rcx, %r13 + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 1080(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 1080(%rsp), %r13 + adcq 1088(%rsp), %rbp + movq %rbp, (%rsp) ## 8-byte Spill + movq 8(%rsp), %r13 ## 8-byte Reload + adcq 1096(%rsp), %r13 + adcq 1104(%rsp), %r14 + adcq 1112(%rsp), %r12 + movq %r12, 64(%rsp) ## 8-byte Spill + movq 48(%rsp), %r12 ## 8-byte Reload + adcq 1120(%rsp), %r12 + movq 40(%rsp), %rbp ## 8-byte Reload + adcq 1128(%rsp), %rbp + adcq 1136(%rsp), %rbx + movq %rbx, 24(%rsp) ## 8-byte Spill + movq 56(%rsp), %rbx ## 8-byte Reload + adcq 1144(%rsp), %rbx + adcq 1152(%rsp), %r15 + movq 80(%rsp), %rax ## 8-byte Reload + movq 24(%rax), %rdx + leaq 1000(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 1072(%rsp), %rax + movq (%rsp), %rcx ## 8-byte Reload + addq 1000(%rsp), %rcx + adcq 1008(%rsp), %r13 + movq %r13, 8(%rsp) ## 8-byte Spill + adcq 1016(%rsp), %r14 + movq %r14, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %r14 ## 8-byte Reload + adcq 1024(%rsp), %r14 + adcq 1032(%rsp), %r12 + adcq 1040(%rsp), %rbp + movq %rbp, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %r13 ## 8-byte Reload + adcq 1048(%rsp), %r13 + adcq 1056(%rsp), %rbx + movq %rbx, 56(%rsp) ## 8-byte Spill + adcq 1064(%rsp), %r15 + movq %r15, 16(%rsp) ## 8-byte Spill + adcq $0, %rax + movq %rax, (%rsp) ## 8-byte Spill + movq %rcx, %rdx + movq %rcx, %rbx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 920(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 920(%rsp), %rbx + movq 8(%rsp), %rax ## 8-byte Reload + adcq 928(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + movq 32(%rsp), %rbp ## 8-byte Reload + adcq 936(%rsp), %rbp + movq %r14, %rbx + adcq 944(%rsp), %rbx + adcq 952(%rsp), %r12 + movq 40(%rsp), %rax ## 8-byte Reload + adcq 960(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 968(%rsp), %r13 + movq %r13, %r15 + movq 56(%rsp), %r13 ## 8-byte Reload + adcq 976(%rsp), %r13 + movq 16(%rsp), %r14 ## 8-byte Reload + adcq 984(%rsp), %r14 + movq (%rsp), %rax ## 8-byte Reload + adcq 992(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 80(%rsp), %rax ## 8-byte Reload + movq 32(%rax), %rdx + leaq 840(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 912(%rsp), %rax + movq 8(%rsp), %rcx ## 8-byte Reload + addq 840(%rsp), %rcx + adcq 848(%rsp), %rbp + movq %rbp, 32(%rsp) ## 8-byte Spill + adcq 856(%rsp), %rbx + movq %rbx, 64(%rsp) ## 8-byte Spill + adcq 864(%rsp), %r12 + movq 40(%rsp), %rbp ## 8-byte Reload + adcq 872(%rsp), %rbp + adcq 880(%rsp), %r15 + movq %r15, 24(%rsp) ## 8-byte Spill + adcq 888(%rsp), %r13 + adcq 896(%rsp), %r14 + movq %r14, 16(%rsp) ## 8-byte Spill + movq (%rsp), %rdx ## 8-byte Reload + adcq 904(%rsp), %rdx + movq %rdx, (%rsp) ## 8-byte Spill + adcq $0, %rax + movq %rax, %r14 + movq %rcx, %rdx + movq %rcx, %rbx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 760(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 760(%rsp), %rbx + movq 32(%rsp), %rax ## 8-byte Reload + adcq 768(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 64(%rsp), %r15 ## 8-byte Reload + adcq 776(%rsp), %r15 + adcq 784(%rsp), %r12 + movq %r12, 48(%rsp) ## 8-byte Spill + movq %rbp, %rbx + adcq 792(%rsp), %rbx + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 800(%rsp), %rbp + adcq 808(%rsp), %r13 + movq 16(%rsp), %rax ## 8-byte Reload + adcq 816(%rsp), %rax + movq %rax, 16(%rsp) ## 8-byte Spill + movq (%rsp), %r12 ## 8-byte Reload + adcq 824(%rsp), %r12 + adcq 832(%rsp), %r14 + movq 80(%rsp), %rax ## 8-byte Reload + movq 40(%rax), %rdx + leaq 680(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 752(%rsp), %rcx + movq 32(%rsp), %rax ## 8-byte Reload + addq 680(%rsp), %rax + adcq 688(%rsp), %r15 + movq %r15, 64(%rsp) ## 8-byte Spill + movq 48(%rsp), %rdx ## 8-byte Reload + adcq 696(%rsp), %rdx + movq %rdx, 48(%rsp) ## 8-byte Spill + adcq 704(%rsp), %rbx + movq %rbx, 40(%rsp) ## 8-byte Spill + adcq 712(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + adcq 720(%rsp), %r13 + movq %r13, %r15 + movq 16(%rsp), %rbx ## 8-byte Reload + adcq 728(%rsp), %rbx + adcq 736(%rsp), %r12 + movq %r12, (%rsp) ## 8-byte Spill + adcq 744(%rsp), %r14 + movq %r14, 32(%rsp) ## 8-byte Spill + adcq $0, %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq %rax, %rdx + movq %rax, %r13 + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 600(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 600(%rsp), %r13 + movq 64(%rsp), %r13 ## 8-byte Reload + adcq 608(%rsp), %r13 + movq 48(%rsp), %r12 ## 8-byte Reload + adcq 616(%rsp), %r12 + movq 40(%rsp), %rbp ## 8-byte Reload + adcq 624(%rsp), %rbp + movq 24(%rsp), %rax ## 8-byte Reload + adcq 632(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 640(%rsp), %r15 + movq %r15, 56(%rsp) ## 8-byte Spill + adcq 648(%rsp), %rbx + movq %rbx, 16(%rsp) ## 8-byte Spill + movq (%rsp), %r14 ## 8-byte Reload + adcq 656(%rsp), %r14 + movq 32(%rsp), %rbx ## 8-byte Reload + adcq 664(%rsp), %rbx + movq 8(%rsp), %r15 ## 8-byte Reload + adcq 672(%rsp), %r15 + movq 80(%rsp), %rax ## 8-byte Reload + movq 48(%rax), %rdx + leaq 520(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 592(%rsp), %rcx + movq %r13, %rax + addq 520(%rsp), %rax + adcq 528(%rsp), %r12 + movq %r12, 48(%rsp) ## 8-byte Spill + movq %rbp, %r12 + adcq 536(%rsp), %r12 + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 544(%rsp), %rbp + movq 56(%rsp), %rdx ## 8-byte Reload + adcq 552(%rsp), %rdx + movq %rdx, 56(%rsp) ## 8-byte Spill + movq 16(%rsp), %rdx ## 8-byte Reload + adcq 560(%rsp), %rdx + movq %rdx, 16(%rsp) ## 8-byte Spill + adcq 568(%rsp), %r14 + movq %r14, (%rsp) ## 8-byte Spill + adcq 576(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + adcq 584(%rsp), %r15 + movq %r15, 8(%rsp) ## 8-byte Spill + adcq $0, %rcx + movq %rcx, %r13 + movq %rax, %rdx + movq %rax, %r14 + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 440(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 440(%rsp), %r14 + movq 48(%rsp), %rax ## 8-byte Reload + adcq 448(%rsp), %rax + movq %rax, 48(%rsp) ## 8-byte Spill + adcq 456(%rsp), %r12 + adcq 464(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq 56(%rsp), %r14 ## 8-byte Reload + adcq 472(%rsp), %r14 + movq 16(%rsp), %r15 ## 8-byte Reload + adcq 480(%rsp), %r15 + movq (%rsp), %rbp ## 8-byte Reload + adcq 488(%rsp), %rbp + movq 32(%rsp), %rbx ## 8-byte Reload + adcq 496(%rsp), %rbx + movq 8(%rsp), %rax ## 8-byte Reload + adcq 504(%rsp), %rax + movq %rax, 8(%rsp) ## 8-byte Spill + adcq 512(%rsp), %r13 + movq 80(%rsp), %rax ## 8-byte Reload + movq 56(%rax), %rdx + leaq 360(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 432(%rsp), %rcx + movq 48(%rsp), %rax ## 8-byte Reload + addq 360(%rsp), %rax + adcq 368(%rsp), %r12 + movq %r12, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rdx ## 8-byte Reload + adcq 376(%rsp), %rdx + movq %rdx, 24(%rsp) ## 8-byte Spill + adcq 384(%rsp), %r14 + movq %r14, 56(%rsp) ## 8-byte Spill + adcq 392(%rsp), %r15 + movq %r15, 16(%rsp) ## 8-byte Spill + adcq 400(%rsp), %rbp + movq %rbp, (%rsp) ## 8-byte Spill + adcq 408(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + movq 8(%rsp), %r14 ## 8-byte Reload + adcq 416(%rsp), %r14 + adcq 424(%rsp), %r13 + movq %r13, %r15 + adcq $0, %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + movq %rax, %rdx + movq %rax, %r12 + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 280(%rsp), %rdi + movq 72(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 280(%rsp), %r12 + movq 40(%rsp), %rax ## 8-byte Reload + adcq 288(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 296(%rsp), %rbp + movq 56(%rsp), %rax ## 8-byte Reload + adcq 304(%rsp), %rax + movq %rax, 56(%rsp) ## 8-byte Spill + movq 16(%rsp), %r13 ## 8-byte Reload + adcq 312(%rsp), %r13 + movq (%rsp), %r12 ## 8-byte Reload + adcq 320(%rsp), %r12 + movq 32(%rsp), %rbx ## 8-byte Reload + adcq 328(%rsp), %rbx + adcq 336(%rsp), %r14 + movq %r14, 8(%rsp) ## 8-byte Spill + adcq 344(%rsp), %r15 + movq %r15, 64(%rsp) ## 8-byte Spill + movq 48(%rsp), %r14 ## 8-byte Reload + adcq 352(%rsp), %r14 + movq 80(%rsp), %rax ## 8-byte Reload + movq 64(%rax), %rdx + leaq 200(%rsp), %rdi + movq 88(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + movq 272(%rsp), %rcx + movq 40(%rsp), %rax ## 8-byte Reload + addq 200(%rsp), %rax + adcq 208(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq 56(%rsp), %rbp ## 8-byte Reload + adcq 216(%rsp), %rbp + adcq 224(%rsp), %r13 + movq %r13, 16(%rsp) ## 8-byte Spill + adcq 232(%rsp), %r12 + movq %r12, (%rsp) ## 8-byte Spill + adcq 240(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + movq 8(%rsp), %r15 ## 8-byte Reload + adcq 248(%rsp), %r15 + movq 64(%rsp), %r12 ## 8-byte Reload + adcq 256(%rsp), %r12 + adcq 264(%rsp), %r14 + adcq $0, %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + movq 96(%rsp), %rdx ## 8-byte Reload + imulq %rax, %rdx + movq %rax, %rbx + leaq 120(%rsp), %rdi + movq 72(%rsp), %r13 ## 8-byte Reload + movq %r13, %rsi + callq l_mulPv576x64 + addq 120(%rsp), %rbx + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 128(%rsp), %rcx + movq %rbp, %rdx + adcq 136(%rsp), %rdx + movq 16(%rsp), %rsi ## 8-byte Reload + adcq 144(%rsp), %rsi + movq %rsi, 16(%rsp) ## 8-byte Spill + movq (%rsp), %rdi ## 8-byte Reload + adcq 152(%rsp), %rdi + movq %rdi, (%rsp) ## 8-byte Spill + movq 32(%rsp), %rbx ## 8-byte Reload + adcq 160(%rsp), %rbx + movq %rbx, 32(%rsp) ## 8-byte Spill + movq %r15, %r8 + adcq 168(%rsp), %r8 + movq %r8, 8(%rsp) ## 8-byte Spill + movq %r12, %r15 + adcq 176(%rsp), %r15 + adcq 184(%rsp), %r14 + movq 40(%rsp), %r9 ## 8-byte Reload + adcq 192(%rsp), %r9 + movq %rcx, %rax + movq %rcx, %r11 + movq %r13, %rbp + subq (%rbp), %rax + movq %rdx, %rcx + movq %rdx, %r12 + sbbq 8(%rbp), %rcx + movq %rsi, %rdx + sbbq 16(%rbp), %rdx + movq %rdi, %rsi + sbbq 24(%rbp), %rsi + movq %rbx, %rdi + sbbq 32(%rbp), %rdi + movq %r8, %r10 + sbbq 40(%rbp), %r10 + movq %r15, %r13 + sbbq 48(%rbp), %r13 + movq %r14, %r8 + sbbq 56(%rbp), %r8 + movq %rbp, %rbx + movq %r9, %rbp + sbbq 64(%rbx), %rbp + movq %rbp, %rbx + sarq $63, %rbx + cmovsq %r11, %rax + movq 112(%rsp), %rbx ## 8-byte Reload + movq %rax, (%rbx) + cmovsq %r12, %rcx + movq %rcx, 8(%rbx) + cmovsq 16(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 16(%rbx) + cmovsq (%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 24(%rbx) + cmovsq 32(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 32(%rbx) + cmovsq 8(%rsp), %r10 ## 8-byte Folded Reload + movq %r10, 40(%rbx) + cmovsq %r15, %r13 + movq %r13, 48(%rbx) + cmovsq %r14, %r8 + movq %r8, 56(%rbx) + cmovsq %r9, %rbp + movq %rbp, 64(%rbx) + addq $1560, %rsp ## imm = 0x618 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_montRed9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_montRed9Lbmi2: ## @mcl_fp_montRed9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $936, %rsp ## imm = 0x3A8 + movq %rdx, %rax + movq %rdi, 208(%rsp) ## 8-byte Spill + movq -8(%rax), %rcx + movq %rcx, 96(%rsp) ## 8-byte Spill + movq (%rsi), %r14 + movq 8(%rsi), %rdx + movq %rdx, (%rsp) ## 8-byte Spill + movq %r14, %rdx + imulq %rcx, %rdx + movq 136(%rsi), %rcx + movq %rcx, 88(%rsp) ## 8-byte Spill + movq 128(%rsi), %rcx + movq %rcx, 56(%rsp) ## 8-byte Spill + movq 120(%rsi), %rcx + movq %rcx, 80(%rsp) ## 8-byte Spill + movq 112(%rsi), %rcx + movq %rcx, 72(%rsp) ## 8-byte Spill + movq 104(%rsi), %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + movq 96(%rsi), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + movq 88(%rsi), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq 80(%rsi), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + movq 72(%rsi), %r12 + movq 64(%rsi), %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + movq 56(%rsi), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + movq 48(%rsi), %rcx + movq %rcx, 64(%rsp) ## 8-byte Spill + movq 40(%rsi), %rbp + movq 32(%rsi), %rbx + movq 24(%rsi), %r13 + movq 16(%rsi), %r15 + movq %rax, %rcx + movq (%rcx), %rax + movq %rax, 144(%rsp) ## 8-byte Spill + movq 64(%rcx), %rax + movq %rax, 200(%rsp) ## 8-byte Spill + movq 56(%rcx), %rax + movq %rax, 192(%rsp) ## 8-byte Spill + movq 48(%rcx), %rax + movq %rax, 184(%rsp) ## 8-byte Spill + movq 40(%rcx), %rax + movq %rax, 176(%rsp) ## 8-byte Spill + movq 32(%rcx), %rax + movq %rax, 168(%rsp) ## 8-byte Spill + movq 24(%rcx), %rax + movq %rax, 160(%rsp) ## 8-byte Spill + movq 16(%rcx), %rax + movq %rax, 152(%rsp) ## 8-byte Spill + movq 8(%rcx), %rax + movq %rax, 136(%rsp) ## 8-byte Spill + movq %rcx, %rsi + movq %rsi, 104(%rsp) ## 8-byte Spill + leaq 856(%rsp), %rdi + callq l_mulPv576x64 + addq 856(%rsp), %r14 + movq (%rsp), %rcx ## 8-byte Reload + adcq 864(%rsp), %rcx + adcq 872(%rsp), %r15 + adcq 880(%rsp), %r13 + adcq 888(%rsp), %rbx + movq %rbx, 120(%rsp) ## 8-byte Spill + adcq 896(%rsp), %rbp + movq %rbp, 112(%rsp) ## 8-byte Spill + movq 64(%rsp), %rax ## 8-byte Reload + adcq 904(%rsp), %rax + movq %rax, 64(%rsp) ## 8-byte Spill + movq 32(%rsp), %rax ## 8-byte Reload + adcq 912(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 40(%rsp), %rax ## 8-byte Reload + adcq 920(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + adcq 928(%rsp), %r12 + movq %r12, (%rsp) ## 8-byte Spill + movq 24(%rsp), %rbp ## 8-byte Reload + adcq $0, %rbp + adcq $0, 8(%rsp) ## 8-byte Folded Spill + adcq $0, 16(%rsp) ## 8-byte Folded Spill + adcq $0, 48(%rsp) ## 8-byte Folded Spill + adcq $0, 72(%rsp) ## 8-byte Folded Spill + adcq $0, 80(%rsp) ## 8-byte Folded Spill + adcq $0, 56(%rsp) ## 8-byte Folded Spill + movq 88(%rsp), %r14 ## 8-byte Reload + adcq $0, %r14 + sbbq %r12, %r12 + movq %rcx, %rdx + movq %rcx, %rbx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 776(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + andl $1, %r12d + addq 776(%rsp), %rbx + adcq 784(%rsp), %r15 + adcq 792(%rsp), %r13 + movq %r13, 128(%rsp) ## 8-byte Spill + movq 120(%rsp), %rax ## 8-byte Reload + adcq 800(%rsp), %rax + movq %rax, 120(%rsp) ## 8-byte Spill + movq 112(%rsp), %rax ## 8-byte Reload + adcq 808(%rsp), %rax + movq %rax, 112(%rsp) ## 8-byte Spill + movq 64(%rsp), %rax ## 8-byte Reload + adcq 816(%rsp), %rax + movq %rax, 64(%rsp) ## 8-byte Spill + movq 32(%rsp), %rax ## 8-byte Reload + adcq 824(%rsp), %rax + movq %rax, 32(%rsp) ## 8-byte Spill + movq 40(%rsp), %rax ## 8-byte Reload + adcq 832(%rsp), %rax + movq %rax, 40(%rsp) ## 8-byte Spill + movq (%rsp), %rax ## 8-byte Reload + adcq 840(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + adcq 848(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq 8(%rsp), %r13 ## 8-byte Reload + adcq $0, %r13 + adcq $0, 16(%rsp) ## 8-byte Folded Spill + adcq $0, 48(%rsp) ## 8-byte Folded Spill + adcq $0, 72(%rsp) ## 8-byte Folded Spill + adcq $0, 80(%rsp) ## 8-byte Folded Spill + movq 56(%rsp), %rbx ## 8-byte Reload + adcq $0, %rbx + adcq $0, %r14 + movq %r14, 88(%rsp) ## 8-byte Spill + adcq $0, %r12 + movq %r15, %rdx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 696(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 696(%rsp), %r15 + movq 128(%rsp), %rcx ## 8-byte Reload + adcq 704(%rsp), %rcx + movq 120(%rsp), %rax ## 8-byte Reload + adcq 712(%rsp), %rax + movq %rax, 120(%rsp) ## 8-byte Spill + movq 112(%rsp), %rax ## 8-byte Reload + adcq 720(%rsp), %rax + movq %rax, 112(%rsp) ## 8-byte Spill + movq 64(%rsp), %rbp ## 8-byte Reload + adcq 728(%rsp), %rbp + movq 32(%rsp), %r14 ## 8-byte Reload + adcq 736(%rsp), %r14 + movq 40(%rsp), %r15 ## 8-byte Reload + adcq 744(%rsp), %r15 + movq (%rsp), %rax ## 8-byte Reload + adcq 752(%rsp), %rax + movq %rax, (%rsp) ## 8-byte Spill + movq 24(%rsp), %rax ## 8-byte Reload + adcq 760(%rsp), %rax + movq %rax, 24(%rsp) ## 8-byte Spill + adcq 768(%rsp), %r13 + movq %r13, 8(%rsp) ## 8-byte Spill + adcq $0, 16(%rsp) ## 8-byte Folded Spill + movq 48(%rsp), %r13 ## 8-byte Reload + adcq $0, %r13 + adcq $0, 72(%rsp) ## 8-byte Folded Spill + adcq $0, 80(%rsp) ## 8-byte Folded Spill + adcq $0, %rbx + movq %rbx, 56(%rsp) ## 8-byte Spill + adcq $0, 88(%rsp) ## 8-byte Folded Spill + adcq $0, %r12 + movq %rcx, %rbx + movq %rbx, %rdx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 616(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 616(%rsp), %rbx + movq 120(%rsp), %rax ## 8-byte Reload + adcq 624(%rsp), %rax + movq 112(%rsp), %rcx ## 8-byte Reload + adcq 632(%rsp), %rcx + movq %rcx, 112(%rsp) ## 8-byte Spill + adcq 640(%rsp), %rbp + movq %rbp, 64(%rsp) ## 8-byte Spill + adcq 648(%rsp), %r14 + movq %r14, 32(%rsp) ## 8-byte Spill + adcq 656(%rsp), %r15 + movq (%rsp), %r14 ## 8-byte Reload + adcq 664(%rsp), %r14 + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 672(%rsp), %rbp + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 680(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 688(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + adcq $0, %r13 + movq %r13, 48(%rsp) ## 8-byte Spill + adcq $0, 72(%rsp) ## 8-byte Folded Spill + adcq $0, 80(%rsp) ## 8-byte Folded Spill + adcq $0, 56(%rsp) ## 8-byte Folded Spill + adcq $0, 88(%rsp) ## 8-byte Folded Spill + adcq $0, %r12 + movq %rax, %rbx + movq %rbx, %rdx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 536(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 536(%rsp), %rbx + movq 112(%rsp), %rax ## 8-byte Reload + adcq 544(%rsp), %rax + movq 64(%rsp), %rcx ## 8-byte Reload + adcq 552(%rsp), %rcx + movq %rcx, 64(%rsp) ## 8-byte Spill + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 560(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + adcq 568(%rsp), %r15 + movq %r15, 40(%rsp) ## 8-byte Spill + adcq 576(%rsp), %r14 + movq %r14, (%rsp) ## 8-byte Spill + adcq 584(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq 8(%rsp), %r13 ## 8-byte Reload + adcq 592(%rsp), %r13 + movq 16(%rsp), %r15 ## 8-byte Reload + adcq 600(%rsp), %r15 + movq 48(%rsp), %rbp ## 8-byte Reload + adcq 608(%rsp), %rbp + movq 72(%rsp), %rbx ## 8-byte Reload + adcq $0, %rbx + adcq $0, 80(%rsp) ## 8-byte Folded Spill + adcq $0, 56(%rsp) ## 8-byte Folded Spill + adcq $0, 88(%rsp) ## 8-byte Folded Spill + adcq $0, %r12 + movq %rax, %rdx + movq %rax, %r14 + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 456(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 456(%rsp), %r14 + movq 64(%rsp), %rax ## 8-byte Reload + adcq 464(%rsp), %rax + movq 32(%rsp), %rcx ## 8-byte Reload + adcq 472(%rsp), %rcx + movq %rcx, 32(%rsp) ## 8-byte Spill + movq 40(%rsp), %rcx ## 8-byte Reload + adcq 480(%rsp), %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + movq (%rsp), %rcx ## 8-byte Reload + adcq 488(%rsp), %rcx + movq %rcx, (%rsp) ## 8-byte Spill + movq 24(%rsp), %rcx ## 8-byte Reload + adcq 496(%rsp), %rcx + movq %rcx, 24(%rsp) ## 8-byte Spill + adcq 504(%rsp), %r13 + movq %r13, 8(%rsp) ## 8-byte Spill + adcq 512(%rsp), %r15 + movq %r15, 16(%rsp) ## 8-byte Spill + adcq 520(%rsp), %rbp + movq %rbp, 48(%rsp) ## 8-byte Spill + adcq 528(%rsp), %rbx + movq %rbx, 72(%rsp) ## 8-byte Spill + movq 80(%rsp), %r14 ## 8-byte Reload + adcq $0, %r14 + movq 56(%rsp), %r13 ## 8-byte Reload + adcq $0, %r13 + movq 88(%rsp), %rbx ## 8-byte Reload + adcq $0, %rbx + adcq $0, %r12 + movq %rax, %rdx + movq %rax, %r15 + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 376(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 376(%rsp), %r15 + movq 32(%rsp), %rax ## 8-byte Reload + adcq 384(%rsp), %rax + movq 40(%rsp), %rcx ## 8-byte Reload + adcq 392(%rsp), %rcx + movq %rcx, 40(%rsp) ## 8-byte Spill + movq (%rsp), %rcx ## 8-byte Reload + adcq 400(%rsp), %rcx + movq %rcx, (%rsp) ## 8-byte Spill + movq 24(%rsp), %rbp ## 8-byte Reload + adcq 408(%rsp), %rbp + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 416(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 424(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + movq 48(%rsp), %rcx ## 8-byte Reload + adcq 432(%rsp), %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + movq 72(%rsp), %r15 ## 8-byte Reload + adcq 440(%rsp), %r15 + adcq 448(%rsp), %r14 + movq %r14, 80(%rsp) ## 8-byte Spill + adcq $0, %r13 + movq %r13, %r14 + adcq $0, %rbx + movq %rbx, 88(%rsp) ## 8-byte Spill + adcq $0, %r12 + movq %rax, %rbx + movq %rbx, %rdx + imulq 96(%rsp), %rdx ## 8-byte Folded Reload + leaq 296(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 296(%rsp), %rbx + movq 40(%rsp), %rax ## 8-byte Reload + adcq 304(%rsp), %rax + movq (%rsp), %r13 ## 8-byte Reload + adcq 312(%rsp), %r13 + adcq 320(%rsp), %rbp + movq 8(%rsp), %rcx ## 8-byte Reload + adcq 328(%rsp), %rcx + movq %rcx, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %rcx ## 8-byte Reload + adcq 336(%rsp), %rcx + movq %rcx, 16(%rsp) ## 8-byte Spill + movq 48(%rsp), %rcx ## 8-byte Reload + adcq 344(%rsp), %rcx + movq %rcx, 48(%rsp) ## 8-byte Spill + adcq 352(%rsp), %r15 + movq %r15, 72(%rsp) ## 8-byte Spill + movq 80(%rsp), %r15 ## 8-byte Reload + adcq 360(%rsp), %r15 + adcq 368(%rsp), %r14 + movq %r14, 56(%rsp) ## 8-byte Spill + movq 88(%rsp), %r14 ## 8-byte Reload + adcq $0, %r14 + adcq $0, %r12 + movq 96(%rsp), %rdx ## 8-byte Reload + imulq %rax, %rdx + movq %rax, %rbx + leaq 216(%rsp), %rdi + movq 104(%rsp), %rsi ## 8-byte Reload + callq l_mulPv576x64 + addq 216(%rsp), %rbx + movq %r13, %rsi + adcq 224(%rsp), %rsi + movq %rsi, (%rsp) ## 8-byte Spill + adcq 232(%rsp), %rbp + movq %rbp, 24(%rsp) ## 8-byte Spill + movq 8(%rsp), %r9 ## 8-byte Reload + adcq 240(%rsp), %r9 + movq %r9, 8(%rsp) ## 8-byte Spill + movq 16(%rsp), %r8 ## 8-byte Reload + adcq 248(%rsp), %r8 + movq %r8, 16(%rsp) ## 8-byte Spill + movq 48(%rsp), %rbx ## 8-byte Reload + adcq 256(%rsp), %rbx + movq 72(%rsp), %rax ## 8-byte Reload + adcq 264(%rsp), %rax + movq %r15, %rcx + adcq 272(%rsp), %rcx + movq 56(%rsp), %rdx ## 8-byte Reload + adcq 280(%rsp), %rdx + movq %rdx, 56(%rsp) ## 8-byte Spill + adcq 288(%rsp), %r14 + movq %r14, %r11 + adcq $0, %r12 + subq 144(%rsp), %rsi ## 8-byte Folded Reload + movq %rbp, %rdi + sbbq 136(%rsp), %rdi ## 8-byte Folded Reload + movq %r9, %rbp + sbbq 152(%rsp), %rbp ## 8-byte Folded Reload + movq %r8, %r13 + sbbq 160(%rsp), %r13 ## 8-byte Folded Reload + movq %rbx, %r15 + sbbq 168(%rsp), %r15 ## 8-byte Folded Reload + movq %rax, %r14 + sbbq 176(%rsp), %r14 ## 8-byte Folded Reload + movq %rcx, %r10 + sbbq 184(%rsp), %r10 ## 8-byte Folded Reload + movq %rdx, %r8 + sbbq 192(%rsp), %r8 ## 8-byte Folded Reload + movq %r11, %r9 + sbbq 200(%rsp), %r9 ## 8-byte Folded Reload + sbbq $0, %r12 + andl $1, %r12d + cmovneq %r11, %r9 + testb %r12b, %r12b + cmovneq (%rsp), %rsi ## 8-byte Folded Reload + movq 208(%rsp), %rdx ## 8-byte Reload + movq %rsi, (%rdx) + cmovneq 24(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 8(%rdx) + cmovneq 8(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 16(%rdx) + cmovneq 16(%rsp), %r13 ## 8-byte Folded Reload + movq %r13, 24(%rdx) + cmovneq %rbx, %r15 + movq %r15, 32(%rdx) + cmovneq %rax, %r14 + movq %r14, 40(%rdx) + cmovneq %rcx, %r10 + movq %r10, 48(%rdx) + cmovneq 56(%rsp), %r8 ## 8-byte Folded Reload + movq %r8, 56(%rdx) + movq %r9, 64(%rdx) + addq $936, %rsp ## imm = 0x3A8 + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_addPre9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addPre9Lbmi2: ## @mcl_fp_addPre9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 64(%rdx), %r8 + movq 64(%rsi), %r15 + movq 56(%rsi), %r9 + movq 48(%rsi), %r10 + movq 40(%rsi), %r11 + movq 24(%rsi), %r12 + movq 32(%rsi), %r14 + movq (%rdx), %rbx + movq 8(%rdx), %rcx + addq (%rsi), %rbx + adcq 8(%rsi), %rcx + movq 16(%rdx), %rax + adcq 16(%rsi), %rax + adcq 24(%rdx), %r12 + movq 56(%rdx), %r13 + movq 48(%rdx), %rsi + movq 40(%rdx), %rbp + movq 32(%rdx), %rdx + movq %rbx, (%rdi) + movq %rcx, 8(%rdi) + movq %rax, 16(%rdi) + movq %r12, 24(%rdi) + adcq %r14, %rdx + movq %rdx, 32(%rdi) + adcq %r11, %rbp + movq %rbp, 40(%rdi) + adcq %r10, %rsi + movq %rsi, 48(%rdi) + adcq %r9, %r13 + movq %r13, 56(%rdi) + adcq %r8, %r15 + movq %r15, 64(%rdi) + sbbq %rax, %rax + andl $1, %eax + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_subPre9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subPre9Lbmi2: ## @mcl_fp_subPre9Lbmi2 +## BB#0: + movq 32(%rdx), %r8 + movq (%rsi), %rcx + xorl %eax, %eax + subq (%rdx), %rcx + movq %rcx, (%rdi) + movq 8(%rsi), %rcx + sbbq 8(%rdx), %rcx + movq %rcx, 8(%rdi) + movq 16(%rsi), %rcx + sbbq 16(%rdx), %rcx + movq %rcx, 16(%rdi) + movq 24(%rsi), %rcx + sbbq 24(%rdx), %rcx + movq %rcx, 24(%rdi) + movq 32(%rsi), %rcx + sbbq %r8, %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%rdi) + movq 40(%rsi), %rcx + sbbq %r8, %rcx + movq 48(%rdx), %r8 + movq %rcx, 40(%rdi) + movq 48(%rsi), %rcx + sbbq %r8, %rcx + movq 56(%rdx), %r8 + movq %rcx, 48(%rdi) + movq 56(%rsi), %rcx + sbbq %r8, %rcx + movq %rcx, 56(%rdi) + movq 64(%rdx), %rcx + movq 64(%rsi), %rdx + sbbq %rcx, %rdx + movq %rdx, 64(%rdi) + sbbq $0, %rax + andl $1, %eax + retq + + .globl _mcl_fp_shr1_9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_shr1_9Lbmi2: ## @mcl_fp_shr1_9Lbmi2 +## BB#0: + pushq %rbx + movq 64(%rsi), %r8 + movq 56(%rsi), %r9 + movq 48(%rsi), %r10 + movq 40(%rsi), %r11 + movq 32(%rsi), %rcx + movq 24(%rsi), %rdx + movq 16(%rsi), %rax + movq (%rsi), %rbx + movq 8(%rsi), %rsi + shrdq $1, %rsi, %rbx + movq %rbx, (%rdi) + shrdq $1, %rax, %rsi + movq %rsi, 8(%rdi) + shrdq $1, %rdx, %rax + movq %rax, 16(%rdi) + shrdq $1, %rcx, %rdx + movq %rdx, 24(%rdi) + shrdq $1, %r11, %rcx + movq %rcx, 32(%rdi) + shrdq $1, %r10, %r11 + movq %r11, 40(%rdi) + shrdq $1, %r9, %r10 + movq %r10, 48(%rdi) + shrdq $1, %r8, %r9 + movq %r9, 56(%rdi) + shrq %r8 + movq %r8, 64(%rdi) + popq %rbx + retq + + .globl _mcl_fp_add9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_add9Lbmi2: ## @mcl_fp_add9Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 64(%rdx), %r12 + movq 64(%rsi), %r8 + movq 56(%rsi), %r13 + movq 48(%rsi), %r9 + movq 40(%rsi), %r10 + movq 24(%rsi), %r14 + movq 32(%rsi), %r11 + movq (%rdx), %rbx + movq 8(%rdx), %r15 + addq (%rsi), %rbx + adcq 8(%rsi), %r15 + movq 16(%rdx), %rax + adcq 16(%rsi), %rax + adcq 24(%rdx), %r14 + adcq 32(%rdx), %r11 + adcq 40(%rdx), %r10 + movq 56(%rdx), %rsi + adcq 48(%rdx), %r9 + movq %rbx, (%rdi) + movq %r15, 8(%rdi) + movq %rax, 16(%rdi) + movq %r14, 24(%rdi) + movq %r11, 32(%rdi) + movq %r10, 40(%rdi) + movq %r9, 48(%rdi) + adcq %r13, %rsi + movq %rsi, 56(%rdi) + adcq %r12, %r8 + movq %r8, 64(%rdi) + sbbq %rdx, %rdx + andl $1, %edx + subq (%rcx), %rbx + sbbq 8(%rcx), %r15 + sbbq 16(%rcx), %rax + sbbq 24(%rcx), %r14 + sbbq 32(%rcx), %r11 + sbbq 40(%rcx), %r10 + sbbq 48(%rcx), %r9 + sbbq 56(%rcx), %rsi + sbbq 64(%rcx), %r8 + sbbq $0, %rdx + testb $1, %dl + jne LBB136_2 +## BB#1: ## %nocarry + movq %rbx, (%rdi) + movq %r15, 8(%rdi) + movq %rax, 16(%rdi) + movq %r14, 24(%rdi) + movq %r11, 32(%rdi) + movq %r10, 40(%rdi) + movq %r9, 48(%rdi) + movq %rsi, 56(%rdi) + movq %r8, 64(%rdi) +LBB136_2: ## %carry + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_addNF9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_addNF9Lbmi2: ## @mcl_fp_addNF9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rdi, %r8 + movq 64(%rdx), %r10 + movq 56(%rdx), %r11 + movq 48(%rdx), %r9 + movq 40(%rdx), %rax + movq 32(%rdx), %rdi + movq 24(%rdx), %rbp + movq 16(%rdx), %r15 + movq (%rdx), %rbx + movq 8(%rdx), %r13 + addq (%rsi), %rbx + adcq 8(%rsi), %r13 + adcq 16(%rsi), %r15 + adcq 24(%rsi), %rbp + movq %rbp, -24(%rsp) ## 8-byte Spill + adcq 32(%rsi), %rdi + movq %rdi, -40(%rsp) ## 8-byte Spill + adcq 40(%rsi), %rax + movq %rax, -32(%rsp) ## 8-byte Spill + adcq 48(%rsi), %r9 + movq %r9, %rdi + movq %rdi, -16(%rsp) ## 8-byte Spill + adcq 56(%rsi), %r11 + movq %r11, %rax + movq %rax, -8(%rsp) ## 8-byte Spill + adcq 64(%rsi), %r10 + movq %r10, %r9 + movq %rbx, %rsi + subq (%rcx), %rsi + movq %r13, %rdx + sbbq 8(%rcx), %rdx + movq %r15, %r12 + sbbq 16(%rcx), %r12 + sbbq 24(%rcx), %rbp + movq -40(%rsp), %r14 ## 8-byte Reload + sbbq 32(%rcx), %r14 + movq -32(%rsp), %r11 ## 8-byte Reload + sbbq 40(%rcx), %r11 + movq %rdi, %r10 + sbbq 48(%rcx), %r10 + movq %rax, %rdi + sbbq 56(%rcx), %rdi + movq %r9, %rax + sbbq 64(%rcx), %rax + movq %rax, %rcx + sarq $63, %rcx + cmovsq %rbx, %rsi + movq %rsi, (%r8) + cmovsq %r13, %rdx + movq %rdx, 8(%r8) + cmovsq %r15, %r12 + movq %r12, 16(%r8) + cmovsq -24(%rsp), %rbp ## 8-byte Folded Reload + movq %rbp, 24(%r8) + cmovsq -40(%rsp), %r14 ## 8-byte Folded Reload + movq %r14, 32(%r8) + cmovsq -32(%rsp), %r11 ## 8-byte Folded Reload + movq %r11, 40(%r8) + cmovsq -16(%rsp), %r10 ## 8-byte Folded Reload + movq %r10, 48(%r8) + cmovsq -8(%rsp), %rdi ## 8-byte Folded Reload + movq %rdi, 56(%r8) + cmovsq %r9, %rax + movq %rax, 64(%r8) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fp_sub9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_sub9Lbmi2: ## @mcl_fp_sub9Lbmi2 +## BB#0: + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq 64(%rdx), %r13 + movq (%rsi), %rax + movq 8(%rsi), %r9 + xorl %ebx, %ebx + subq (%rdx), %rax + sbbq 8(%rdx), %r9 + movq 16(%rsi), %r10 + sbbq 16(%rdx), %r10 + movq 24(%rsi), %r11 + sbbq 24(%rdx), %r11 + movq 32(%rsi), %r12 + sbbq 32(%rdx), %r12 + movq 40(%rsi), %r14 + sbbq 40(%rdx), %r14 + movq 48(%rsi), %r15 + sbbq 48(%rdx), %r15 + movq 64(%rsi), %r8 + movq 56(%rsi), %rsi + sbbq 56(%rdx), %rsi + movq %rax, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r14, 40(%rdi) + movq %r15, 48(%rdi) + movq %rsi, 56(%rdi) + sbbq %r13, %r8 + movq %r8, 64(%rdi) + sbbq $0, %rbx + testb $1, %bl + je LBB138_2 +## BB#1: ## %carry + addq (%rcx), %rax + movq %rax, (%rdi) + movq 8(%rcx), %rax + adcq %r9, %rax + movq %rax, 8(%rdi) + movq 16(%rcx), %rax + adcq %r10, %rax + movq %rax, 16(%rdi) + movq 24(%rcx), %rax + adcq %r11, %rax + movq %rax, 24(%rdi) + movq 32(%rcx), %rax + adcq %r12, %rax + movq %rax, 32(%rdi) + movq 40(%rcx), %rax + adcq %r14, %rax + movq %rax, 40(%rdi) + movq 48(%rcx), %rax + adcq %r15, %rax + movq %rax, 48(%rdi) + movq 56(%rcx), %rax + adcq %rsi, %rax + movq %rax, 56(%rdi) + movq 64(%rcx), %rax + adcq %r8, %rax + movq %rax, 64(%rdi) +LBB138_2: ## %nocarry + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .globl _mcl_fp_subNF9Lbmi2 + .p2align 4, 0x90 +_mcl_fp_subNF9Lbmi2: ## @mcl_fp_subNF9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r10 + movq %rdi, %rbx + movq 64(%rsi), %r11 + movdqu (%rdx), %xmm1 + movdqu 16(%rdx), %xmm2 + movdqu 32(%rdx), %xmm3 + movdqu 48(%rdx), %xmm4 + pshufd $78, %xmm4, %xmm0 ## xmm0 = xmm4[2,3,0,1] + movd %xmm0, %r8 + movdqu (%rsi), %xmm5 + movdqu 16(%rsi), %xmm6 + movdqu 32(%rsi), %xmm7 + movdqu 48(%rsi), %xmm8 + pshufd $78, %xmm8, %xmm0 ## xmm0 = xmm8[2,3,0,1] + movd %xmm0, %rax + movd %xmm4, %r9 + pshufd $78, %xmm3, %xmm0 ## xmm0 = xmm3[2,3,0,1] + movd %xmm0, %rdi + pshufd $78, %xmm7, %xmm0 ## xmm0 = xmm7[2,3,0,1] + movd %xmm3, %rcx + pshufd $78, %xmm2, %xmm3 ## xmm3 = xmm2[2,3,0,1] + movd %xmm3, %rbp + pshufd $78, %xmm6, %xmm3 ## xmm3 = xmm6[2,3,0,1] + movd %xmm2, %r13 + pshufd $78, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,0,1] + movd %xmm2, %r12 + pshufd $78, %xmm5, %xmm2 ## xmm2 = xmm5[2,3,0,1] + movd %xmm1, %rsi + movd %xmm5, %r15 + subq %rsi, %r15 + movd %xmm2, %r14 + sbbq %r12, %r14 + movd %xmm6, %r12 + sbbq %r13, %r12 + movd %xmm3, %r13 + sbbq %rbp, %r13 + movd %xmm7, %rsi + sbbq %rcx, %rsi + movq %rsi, -16(%rsp) ## 8-byte Spill + movd %xmm0, %rcx + sbbq %rdi, %rcx + movq %rcx, -24(%rsp) ## 8-byte Spill + movd %xmm8, %rcx + sbbq %r9, %rcx + movq %rcx, -32(%rsp) ## 8-byte Spill + sbbq %r8, %rax + movq %rax, -8(%rsp) ## 8-byte Spill + sbbq 64(%rdx), %r11 + movq %r11, -40(%rsp) ## 8-byte Spill + movq %r11, %rdx + sarq $63, %rdx + movq %rdx, %rbp + shldq $1, %r11, %rbp + movq 24(%r10), %r9 + andq %rbp, %r9 + movq 8(%r10), %rdi + andq %rbp, %rdi + andq (%r10), %rbp + movq 64(%r10), %r11 + andq %rdx, %r11 + rorxq $63, %rdx, %rax + andq 56(%r10), %rdx + movq 48(%r10), %r8 + andq %rax, %r8 + movq 40(%r10), %rsi + andq %rax, %rsi + movq 32(%r10), %rcx + andq %rax, %rcx + andq 16(%r10), %rax + addq %r15, %rbp + adcq %r14, %rdi + movq %rbp, (%rbx) + adcq %r12, %rax + movq %rdi, 8(%rbx) + adcq %r13, %r9 + movq %rax, 16(%rbx) + movq %r9, 24(%rbx) + adcq -16(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 32(%rbx) + adcq -24(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 40(%rbx) + adcq -32(%rsp), %r8 ## 8-byte Folded Reload + movq %r8, 48(%rbx) + adcq -8(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, 56(%rbx) + adcq -40(%rsp), %r11 ## 8-byte Folded Reload + movq %r11, 64(%rbx) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_add9Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_add9Lbmi2: ## @mcl_fpDbl_add9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r15 + movq 136(%rdx), %rax + movq %rax, -48(%rsp) ## 8-byte Spill + movq 128(%rdx), %rax + movq %rax, -40(%rsp) ## 8-byte Spill + movq 120(%rdx), %r10 + movq 112(%rdx), %r11 + movq 24(%rsi), %rcx + movq 32(%rsi), %r14 + movq 16(%rdx), %rbp + movq (%rdx), %rax + movq 8(%rdx), %rbx + addq (%rsi), %rax + adcq 8(%rsi), %rbx + adcq 16(%rsi), %rbp + adcq 24(%rdx), %rcx + adcq 32(%rdx), %r14 + movq 104(%rdx), %r9 + movq 96(%rdx), %r13 + movq %rax, (%rdi) + movq 88(%rdx), %r8 + movq %rbx, 8(%rdi) + movq 80(%rdx), %r12 + movq %rbp, 16(%rdi) + movq 40(%rdx), %rax + movq %rcx, 24(%rdi) + movq 40(%rsi), %rbp + adcq %rax, %rbp + movq 48(%rdx), %rcx + movq %r14, 32(%rdi) + movq 48(%rsi), %rax + adcq %rcx, %rax + movq 56(%rdx), %r14 + movq %rbp, 40(%rdi) + movq 56(%rsi), %rbp + adcq %r14, %rbp + movq 72(%rdx), %rcx + movq 64(%rdx), %rdx + movq %rax, 48(%rdi) + movq 64(%rsi), %rax + adcq %rdx, %rax + movq 136(%rsi), %rbx + movq %rbp, 56(%rdi) + movq 72(%rsi), %rbp + adcq %rcx, %rbp + movq 128(%rsi), %rcx + movq %rax, 64(%rdi) + movq 80(%rsi), %rdx + adcq %r12, %rdx + movq 88(%rsi), %r12 + adcq %r8, %r12 + movq 96(%rsi), %r14 + adcq %r13, %r14 + movq %r14, -8(%rsp) ## 8-byte Spill + movq 104(%rsi), %rax + adcq %r9, %rax + movq %rax, -32(%rsp) ## 8-byte Spill + movq 120(%rsi), %rax + movq 112(%rsi), %rsi + adcq %r11, %rsi + movq %rsi, -24(%rsp) ## 8-byte Spill + adcq %r10, %rax + movq %rax, -16(%rsp) ## 8-byte Spill + adcq -40(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -40(%rsp) ## 8-byte Spill + adcq -48(%rsp), %rbx ## 8-byte Folded Reload + movq %rbx, -48(%rsp) ## 8-byte Spill + sbbq %r9, %r9 + andl $1, %r9d + movq %rbp, %r10 + subq (%r15), %r10 + movq %rdx, %r11 + sbbq 8(%r15), %r11 + movq %r12, %rbx + sbbq 16(%r15), %rbx + sbbq 24(%r15), %r14 + movq -32(%rsp), %r13 ## 8-byte Reload + sbbq 32(%r15), %r13 + movq -24(%rsp), %rsi ## 8-byte Reload + sbbq 40(%r15), %rsi + movq -16(%rsp), %rax ## 8-byte Reload + sbbq 48(%r15), %rax + sbbq 56(%r15), %rcx + movq -48(%rsp), %r8 ## 8-byte Reload + sbbq 64(%r15), %r8 + sbbq $0, %r9 + andl $1, %r9d + cmovneq %rbp, %r10 + movq %r10, 72(%rdi) + testb %r9b, %r9b + cmovneq %rdx, %r11 + movq %r11, 80(%rdi) + cmovneq %r12, %rbx + movq %rbx, 88(%rdi) + cmovneq -8(%rsp), %r14 ## 8-byte Folded Reload + movq %r14, 96(%rdi) + cmovneq -32(%rsp), %r13 ## 8-byte Folded Reload + movq %r13, 104(%rdi) + cmovneq -24(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 112(%rdi) + cmovneq -16(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 120(%rdi) + cmovneq -40(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 128(%rdi) + cmovneq -48(%rsp), %r8 ## 8-byte Folded Reload + movq %r8, 136(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + .globl _mcl_fpDbl_sub9Lbmi2 + .p2align 4, 0x90 +_mcl_fpDbl_sub9Lbmi2: ## @mcl_fpDbl_sub9Lbmi2 +## BB#0: + pushq %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + movq %rcx, %r14 + movq 136(%rdx), %rax + movq %rax, -24(%rsp) ## 8-byte Spill + movq 128(%rdx), %rax + movq %rax, -32(%rsp) ## 8-byte Spill + movq 120(%rdx), %rax + movq %rax, -40(%rsp) ## 8-byte Spill + movq 16(%rsi), %r11 + movq (%rsi), %r12 + movq 8(%rsi), %r13 + xorl %r9d, %r9d + subq (%rdx), %r12 + sbbq 8(%rdx), %r13 + sbbq 16(%rdx), %r11 + movq 24(%rsi), %rbx + sbbq 24(%rdx), %rbx + movq 32(%rsi), %rbp + sbbq 32(%rdx), %rbp + movq 112(%rdx), %r10 + movq 104(%rdx), %rcx + movq %r12, (%rdi) + movq 96(%rdx), %rax + movq %r13, 8(%rdi) + movq 88(%rdx), %r13 + movq %r11, 16(%rdi) + movq 40(%rdx), %r11 + movq %rbx, 24(%rdi) + movq 40(%rsi), %rbx + sbbq %r11, %rbx + movq 48(%rdx), %r11 + movq %rbp, 32(%rdi) + movq 48(%rsi), %rbp + sbbq %r11, %rbp + movq 56(%rdx), %r11 + movq %rbx, 40(%rdi) + movq 56(%rsi), %rbx + sbbq %r11, %rbx + movq 64(%rdx), %r11 + movq %rbp, 48(%rdi) + movq 64(%rsi), %rbp + sbbq %r11, %rbp + movq 80(%rdx), %r8 + movq 72(%rdx), %r11 + movq %rbx, 56(%rdi) + movq 72(%rsi), %r15 + sbbq %r11, %r15 + movq 136(%rsi), %rdx + movq %rbp, 64(%rdi) + movq 80(%rsi), %rbp + sbbq %r8, %rbp + movq 88(%rsi), %r12 + sbbq %r13, %r12 + movq 96(%rsi), %r13 + sbbq %rax, %r13 + movq 104(%rsi), %rax + sbbq %rcx, %rax + movq %rax, -16(%rsp) ## 8-byte Spill + movq 112(%rsi), %rax + sbbq %r10, %rax + movq %rax, -8(%rsp) ## 8-byte Spill + movq 128(%rsi), %rax + movq 120(%rsi), %rcx + sbbq -40(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, -40(%rsp) ## 8-byte Spill + sbbq -32(%rsp), %rax ## 8-byte Folded Reload + movq %rax, -32(%rsp) ## 8-byte Spill + sbbq -24(%rsp), %rdx ## 8-byte Folded Reload + movq %rdx, -24(%rsp) ## 8-byte Spill + movl $0, %r8d + sbbq $0, %r8 + andl $1, %r8d + movq (%r14), %r10 + cmoveq %r9, %r10 + testb %r8b, %r8b + movq 16(%r14), %r8 + cmoveq %r9, %r8 + movq 8(%r14), %rdx + cmoveq %r9, %rdx + movq 64(%r14), %rbx + cmoveq %r9, %rbx + movq 56(%r14), %r11 + cmoveq %r9, %r11 + movq 48(%r14), %rsi + cmoveq %r9, %rsi + movq 40(%r14), %rcx + cmoveq %r9, %rcx + movq 32(%r14), %rax + cmoveq %r9, %rax + cmovneq 24(%r14), %r9 + addq %r15, %r10 + adcq %rbp, %rdx + movq %r10, 72(%rdi) + adcq %r12, %r8 + movq %rdx, 80(%rdi) + adcq %r13, %r9 + movq %r8, 88(%rdi) + movq %r9, 96(%rdi) + adcq -16(%rsp), %rax ## 8-byte Folded Reload + movq %rax, 104(%rdi) + adcq -8(%rsp), %rcx ## 8-byte Folded Reload + movq %rcx, 112(%rdi) + adcq -40(%rsp), %rsi ## 8-byte Folded Reload + movq %rsi, 120(%rdi) + adcq -32(%rsp), %r11 ## 8-byte Folded Reload + movq %r11, 128(%rdi) + adcq -24(%rsp), %rbx ## 8-byte Folded Reload + movq %rbx, 136(%rdi) + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + retq + + +.subsections_via_symbols |