19 files changed, 2608 insertions, 0 deletions
diff --git a/crypto/bn256/cloudflare/bn256.go b/crypto/bn256/cloudflare/bn256.go
new file mode 100644
index 000000000..c6ea2d07e
--- /dev/null
+++ b/crypto/bn256/cloudflare/bn256.go
@@ -0,0 +1,481 @@
+// Package bn256 implements a particular bilinear group at the 128-bit security
+// level.
+//
+// Bilinear groups are the basis of many of the new cryptographic protocols that
+// have been proposed over the past decade. They consist of a triplet of groups
+// (G₁, G₂ and GT) such that there exists a function e(g₁ˣ,g₂ʸ)=gTˣʸ (where gₓ
+// is a generator of the respective group). That function is called a pairing
+// function.
+//
+// This package specifically implements the Optimal Ate pairing over a 256-bit
+// Barreto-Naehrig curve as described in
+// http://cryptojedi.org/papers/dclxvi-20100714.pdf. Its output is compatible
+// with the implementation described in that paper.
+package bn256
+
+import (
+	"crypto/rand"
+	"errors"
+	"io"
+	"math/big"
+)
+
+func randomK(r io.Reader) (k *big.Int, err error) {
+	for {
+		k, err = rand.Int(r, Order)
+		if k.Sign() > 0 || err != nil {
+			return
+		}
+	}
+}
+
+// G1 is an abstract cyclic group. The zero value is suitable for use as the
+// output of an operation, but cannot be used as an input.
+type G1 struct {
+	p *curvePoint
+}
+
+// RandomG1 returns x and g₁ˣ where x is a random, non-zero number read from r.
+func RandomG1(r io.Reader) (*big.Int, *G1, error) {
+	k, err := randomK(r)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return k, new(G1).ScalarBaseMult(k), nil
+}
+
+func (g *G1) String() string {
+	return "bn256.G1" + g.p.String()
+}
+
+// ScalarBaseMult sets e to g*k where g is the generator of the group and then
+// returns e.
+func (e *G1) ScalarBaseMult(k *big.Int) *G1 {
+	if e.p == nil {
+		e.p = &curvePoint{}
+	}
+	e.p.Mul(curveGen, k)
+	return e
+}
+
+// ScalarMult sets e to a*k and then returns e.
+func (e *G1) ScalarMult(a *G1, k *big.Int) *G1 {
+	if e.p == nil {
+		e.p = &curvePoint{}
+	}
+	e.p.Mul(a.p, k)
+	return e
+}
+
+// Add sets e to a+b and then returns e.
+func (e *G1) Add(a, b *G1) *G1 {
+	if e.p == nil {
+		e.p = &curvePoint{}
+	}
+	e.p.Add(a.p, b.p)
+	return e
+}
+
+// Neg sets e to -a and then returns e.
+func (e *G1) Neg(a *G1) *G1 {
+	if e.p == nil {
+		e.p = &curvePoint{}
+	}
+	e.p.Neg(a.p)
+	return e
+}
+
+// Set sets e to a and then returns e.
+func (e *G1) Set(a *G1) *G1 {
+	if e.p == nil {
+		e.p = &curvePoint{}
+	}
+	e.p.Set(a.p)
+	return e
+}
+
+// Marshal converts e to a byte slice.
+func (e *G1) Marshal() []byte {
+	// Each value is a 256-bit number.
+	const numBytes = 256 / 8
+
+	e.p.MakeAffine()
+	ret := make([]byte, numBytes*2)
+	if e.p.IsInfinity() {
+		return ret
+	}
+	temp := &gfP{}
+
+	montDecode(temp, &e.p.x)
+	temp.Marshal(ret)
+	montDecode(temp, &e.p.y)
+	temp.Marshal(ret[numBytes:])
+
+	return ret
+}
+
+// Unmarshal sets e to the result of converting the output of Marshal back into
+// a group element and then returns e.
+func (e *G1) Unmarshal(m []byte) ([]byte, error) {
+	// Each value is a 256-bit number.
+	const numBytes = 256 / 8
+	if len(m) < 2*numBytes {
+		return nil, errors.New("bn256: not enough data")
+	}
+	// Unmarshal the points and check their caps
+	if e.p == nil {
+		e.p = &curvePoint{}
+	} else {
+		e.p.x, e.p.y = gfP{0}, gfP{0}
+	}
+	var err error
+	if err = e.p.x.Unmarshal(m); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.Unmarshal(m[numBytes:]); err != nil {
+		return nil, err
+	}
+	// Encode into Montgomery form and ensure it's on the curve
+	montEncode(&e.p.x, &e.p.x)
+	montEncode(&e.p.y, &e.p.y)
+
+	zero := gfP{0}
+	if e.p.x == zero && e.p.y == zero {
+		// This is the point at infinity.
+		e.p.y = *newGFp(1)
+		e.p.z = gfP{0}
+		e.p.t = gfP{0}
+	} else {
+		e.p.z = *newGFp(1)
+		e.p.t = *newGFp(1)
+
+		if !e.p.IsOnCurve() {
+			return nil, errors.New("bn256: malformed point")
+		}
+	}
+	return m[2*numBytes:], nil
+}
+
+// G2 is an abstract cyclic group. The zero value is suitable for use as the
+// output of an operation, but cannot be used as an input.
+type G2 struct {
+	p *twistPoint
+}
+
+// RandomG2 returns x and g₂ˣ where x is a random, non-zero number read from r.
+func RandomG2(r io.Reader) (*big.Int, *G2, error) {
+	k, err := randomK(r)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return k, new(G2).ScalarBaseMult(k), nil
+}
+
+func (e *G2) String() string {
+	return "bn256.G2" + e.p.String()
+}
+
+// ScalarBaseMult sets e to g*k where g is the generator of the group and then
+// returns out.
+func (e *G2) ScalarBaseMult(k *big.Int) *G2 {
+	if e.p == nil {
+		e.p = &twistPoint{}
+	}
+	e.p.Mul(twistGen, k)
+	return e
+}
+
+// ScalarMult sets e to a*k and then returns e.
+func (e *G2) ScalarMult(a *G2, k *big.Int) *G2 {
+	if e.p == nil {
+		e.p = &twistPoint{}
+	}
+	e.p.Mul(a.p, k)
+	return e
+}
+
+// Add sets e to a+b and then returns e.
+func (e *G2) Add(a, b *G2) *G2 {
+	if e.p == nil {
+		e.p = &twistPoint{}
+	}
+	e.p.Add(a.p, b.p)
+	return e
+}
+
+// Neg sets e to -a and then returns e.
+func (e *G2) Neg(a *G2) *G2 {
+	if e.p == nil {
+		e.p = &twistPoint{}
+	}
+	e.p.Neg(a.p)
+	return e
+}
+
+// Set sets e to a and then returns e.
+func (e *G2) Set(a *G2) *G2 {
+	if e.p == nil {
+		e.p = &twistPoint{}
+	}
+	e.p.Set(a.p)
+	return e
+}
+
+// Marshal converts e into a byte slice.
+func (e *G2) Marshal() []byte {
+	// Each value is a 256-bit number.
+	const numBytes = 256 / 8
+
+	if e.p == nil {
+		e.p = &twistPoint{}
+	}
+
+	e.p.MakeAffine()
+	ret := make([]byte, numBytes*4)
+	if e.p.IsInfinity() {
+		return ret
+	}
+	temp := &gfP{}
+
+	montDecode(temp, &e.p.x.x)
+	temp.Marshal(ret)
+	montDecode(temp, &e.p.x.y)
+	temp.Marshal(ret[numBytes:])
+	montDecode(temp, &e.p.y.x)
+	temp.Marshal(ret[2*numBytes:])
+	montDecode(temp, &e.p.y.y)
+	temp.Marshal(ret[3*numBytes:])
+
+	return ret
+}
+
+// Unmarshal sets e to the result of converting the output of Marshal back into
+// a group element and then returns e.
+func (e *G2) Unmarshal(m []byte) ([]byte, error) {
+	// Each value is a 256-bit number.
+	const numBytes = 256 / 8
+	if len(m) < 4*numBytes {
+		return nil, errors.New("bn256: not enough data")
+	}
+	// Unmarshal the points and check their caps
+	if e.p == nil {
+		e.p = &twistPoint{}
+	}
+	var err error
+	if err = e.p.x.x.Unmarshal(m); err != nil {
+		return nil, err
+	}
+	if err = e.p.x.y.Unmarshal(m[numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.x.Unmarshal(m[2*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.y.Unmarshal(m[3*numBytes:]); err != nil {
+		return nil, err
+	}
+	// Encode into Montgomery form and ensure it's on the curve
+	montEncode(&e.p.x.x, &e.p.x.x)
+	montEncode(&e.p.x.y, &e.p.x.y)
+	montEncode(&e.p.y.x, &e.p.y.x)
+	montEncode(&e.p.y.y, &e.p.y.y)
+
+	if e.p.x.IsZero() && e.p.y.IsZero() {
+		// This is the point at infinity.
+		e.p.y.SetOne()
+		e.p.z.SetZero()
+		e.p.t.SetZero()
+	} else {
+		e.p.z.SetOne()
+		e.p.t.SetOne()
+
+		if !e.p.IsOnCurve() {
+			return nil, errors.New("bn256: malformed point")
+		}
+	}
+	return m[4*numBytes:], nil
+}
+
+// GT is an abstract cyclic group. The zero value is suitable for use as the
+// output of an operation, but cannot be used as an input.
+type GT struct {
+	p *gfP12
+}
+
+// Pair calculates an Optimal Ate pairing.
+func Pair(g1 *G1, g2 *G2) *GT {
+	return &GT{optimalAte(g2.p, g1.p)}
+}
+
+// PairingCheck calculates the Optimal Ate pairing for a set of points.
+func PairingCheck(a []*G1, b []*G2) bool {
+	acc := new(gfP12)
+	acc.SetOne()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].p.IsInfinity() || b[i].p.IsInfinity() {
+			continue
+		}
+		acc.Mul(acc, miller(b[i].p, a[i].p))
+	}
+	return finalExponentiation(acc).IsOne()
+}
+
+// Miller applies Miller's algorithm, which is a bilinear function from the
+// source groups to F_p^12. Miller(g1, g2).Finalize() is equivalent to Pair(g1,
+// g2).
+func Miller(g1 *G1, g2 *G2) *GT {
+	return &GT{miller(g2.p, g1.p)}
+}
+
+func (g *GT) String() string {
+	return "bn256.GT" + g.p.String()
+}
+
+// ScalarMult sets e to a*k and then returns e.
+func (e *GT) ScalarMult(a *GT, k *big.Int) *GT {
+	if e.p == nil {
+		e.p = &gfP12{}
+	}
+	e.p.Exp(a.p, k)
+	return e
+}
+
+// Add sets e to a+b and then returns e.
+func (e *GT) Add(a, b *GT) *GT {
+	if e.p == nil {
+		e.p = &gfP12{}
+	}
+	e.p.Mul(a.p, b.p)
+	return e
+}
+
+// Neg sets e to -a and then returns e.
+func (e *GT) Neg(a *GT) *GT {
+	if e.p == nil {
+		e.p = &gfP12{}
+	}
+	e.p.Conjugate(a.p)
+	return e
+}
+
+// Set sets e to a and then returns e.
+func (e *GT) Set(a *GT) *GT {
+	if e.p == nil {
+		e.p = &gfP12{}
+	}
+	e.p.Set(a.p)
+	return e
+}
+
+// Finalize is a linear function from F_p^12 to GT.
+func (e *GT) Finalize() *GT {
+	ret := finalExponentiation(e.p)
+	e.p.Set(ret)
+	return e
+}
+
+// Marshal converts e into a byte slice.
+func (e *GT) Marshal() []byte {
+	// Each value is a 256-bit number.
+	const numBytes = 256 / 8
+
+	ret := make([]byte, numBytes*12)
+	temp := &gfP{}
+
+	montDecode(temp, &e.p.x.x.x)
+	temp.Marshal(ret)
+	montDecode(temp, &e.p.x.x.y)
+	temp.Marshal(ret[numBytes:])
+	montDecode(temp, &e.p.x.y.x)
+	temp.Marshal(ret[2*numBytes:])
+	montDecode(temp, &e.p.x.y.y)
+	temp.Marshal(ret[3*numBytes:])
+	montDecode(temp, &e.p.x.z.x)
+	temp.Marshal(ret[4*numBytes:])
+	montDecode(temp, &e.p.x.z.y)
+	temp.Marshal(ret[5*numBytes:])
+	montDecode(temp, &e.p.y.x.x)
+	temp.Marshal(ret[6*numBytes:])
+	montDecode(temp, &e.p.y.x.y)
+	temp.Marshal(ret[7*numBytes:])
+	montDecode(temp, &e.p.y.y.x)
+	temp.Marshal(ret[8*numBytes:])
+	montDecode(temp, &e.p.y.y.y)
+	temp.Marshal(ret[9*numBytes:])
+	montDecode(temp, &e.p.y.z.x)
+	temp.Marshal(ret[10*numBytes:])
+	montDecode(temp, &e.p.y.z.y)
+	temp.Marshal(ret[11*numBytes:])
+
+	return ret
+}
+
+// Unmarshal sets e to the result of converting the output of Marshal back into
+// a group element and then returns e.
+func (e *GT) Unmarshal(m []byte) ([]byte, error) {
+	// Each value is a 256-bit number.
+	const numBytes = 256 / 8
+
+	if len(m) < 12*numBytes {
+		return nil, errors.New("bn256: not enough data")
+	}
+
+	if e.p == nil {
+		e.p = &gfP12{}
+	}
+
+	var err error
+	if err = e.p.x.x.x.Unmarshal(m); err != nil {
+		return nil, err
+	}
+	if err = e.p.x.x.y.Unmarshal(m[numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.x.y.x.Unmarshal(m[2*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.x.y.y.Unmarshal(m[3*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.x.z.x.Unmarshal(m[4*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.x.z.y.Unmarshal(m[5*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.x.x.Unmarshal(m[6*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.x.y.Unmarshal(m[7*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.y.x.Unmarshal(m[8*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.y.y.Unmarshal(m[9*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.z.x.Unmarshal(m[10*numBytes:]); err != nil {
+		return nil, err
+	}
+	if err = e.p.y.z.y.Unmarshal(m[11*numBytes:]); err != nil {
+		return nil, err
+	}
+	montEncode(&e.p.x.x.x, &e.p.x.x.x)
+	montEncode(&e.p.x.x.y, &e.p.x.x.y)
+	montEncode(&e.p.x.y.x, &e.p.x.y.x)
+	montEncode(&e.p.x.y.y, &e.p.x.y.y)
+	montEncode(&e.p.x.z.x, &e.p.x.z.x)
+	montEncode(&e.p.x.z.y, &e.p.x.z.y)
+	montEncode(&e.p.y.x.x, &e.p.y.x.x)
+	montEncode(&e.p.y.x.y, &e.p.y.x.y)
+	montEncode(&e.p.y.y.x, &e.p.y.y.x)
+	montEncode(&e.p.y.y.y, &e.p.y.y.y)
+	montEncode(&e.p.y.z.x, &e.p.y.z.x)
+	montEncode(&e.p.y.z.y, &e.p.y.z.y)
+
+	return m[12*numBytes:], nil
+}
diff --git a/crypto/bn256/cloudflare/bn256_test.go b/crypto/bn256/cloudflare/bn256_test.go
new file mode 100644
index 000000000..369a3edaa
--- /dev/null
+++ b/crypto/bn256/cloudflare/bn256_test.go
@@ -0,0 +1,118 @@
+// +build amd64,!appengine,!gccgo
+
+package bn256
+
+import (
+	"bytes"
+	"crypto/rand"
+	"testing"
+)
+
+func TestG1Marshal(t *testing.T) {
+	_, Ga, err := RandomG1(rand.Reader)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ma := Ga.Marshal()
+
+	Gb := new(G1)
+	_, err = Gb.Unmarshal(ma)
+	if err != nil {
+		t.Fatal(err)
+	}
+	mb := Gb.Marshal()
+
+	if !bytes.Equal(ma, mb) {
+		t.Fatal("bytes are different")
+	}
+}
+
+func TestG2Marshal(t *testing.T) {
+	_, Ga, err := RandomG2(rand.Reader)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ma := Ga.Marshal()
+
+	Gb := new(G2)
+	_, err = Gb.Unmarshal(ma)
+	if err != nil {
+		t.Fatal(err)
+	}
+	mb := Gb.Marshal()
+
+	if !bytes.Equal(ma, mb) {
+		t.Fatal("bytes are different")
+	}
+}
+
+func TestBilinearity(t *testing.T) {
+	for i := 0; i < 2; i++ {
+		a, p1, _ := RandomG1(rand.Reader)
+		b, p2, _ := RandomG2(rand.Reader)
+		e1 := Pair(p1, p2)
+
+		e2 := Pair(&G1{curveGen}, &G2{twistGen})
+		e2.ScalarMult(e2, a)
+		e2.ScalarMult(e2, b)
+
+		if *e1.p != *e2.p {
+			t.Fatalf("bad pairing result: %s", e1)
+		}
+	}
+}
+
+func TestTripartiteDiffieHellman(t *testing.T) {
+	a, _ := rand.Int(rand.Reader, Order)
+	b, _ := rand.Int(rand.Reader, Order)
+	c, _ := rand.Int(rand.Reader, Order)
+
+	pa, pb, pc := new(G1), new(G1), new(G1)
+	qa, qb, qc := new(G2), new(G2), new(G2)
+
+	pa.Unmarshal(new(G1).ScalarBaseMult(a).Marshal())
+	qa.Unmarshal(new(G2).ScalarBaseMult(a).Marshal())
+	pb.Unmarshal(new(G1).ScalarBaseMult(b).Marshal())
+	qb.Unmarshal(new(G2).ScalarBaseMult(b).Marshal())
+	pc.Unmarshal(new(G1).ScalarBaseMult(c).Marshal())
+	qc.Unmarshal(new(G2).ScalarBaseMult(c).Marshal())
+
+	k1 := Pair(pb, qc)
+	k1.ScalarMult(k1, a)
+	k1Bytes := k1.Marshal()
+
+	k2 := Pair(pc, qa)
+	k2.ScalarMult(k2, b)
+	k2Bytes := k2.Marshal()
+
+	k3 := Pair(pa, qb)
+	k3.ScalarMult(k3, c)
+	k3Bytes := k3.Marshal()
+
+	if !bytes.Equal(k1Bytes, k2Bytes) || !bytes.Equal(k2Bytes, k3Bytes) {
+		t.Errorf("keys didn't agree")
+	}
+}
+
+func BenchmarkG1(b *testing.B) {
+	x, _ := rand.Int(rand.Reader, Order)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		new(G1).ScalarBaseMult(x)
+	}
+}
+
+func BenchmarkG2(b *testing.B) {
+	x, _ := rand.Int(rand.Reader, Order)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		new(G2).ScalarBaseMult(x)
+	}
+}
+func BenchmarkPairing(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Pair(&G1{curveGen}, &G2{twistGen})
+	}
+}
diff --git a/crypto/bn256/cloudflare/constants.go b/crypto/bn256/cloudflare/constants.go
new file mode 100644
index 000000000..5122aae64
--- /dev/null
+++ b/crypto/bn256/cloudflare/constants.go
@@ -0,0 +1,59 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bn256
+
+import (
+	"math/big"
+)
+
+func bigFromBase10(s string) *big.Int {
+	n, _ := new(big.Int).SetString(s, 10)
+	return n
+}
+
+// u is the BN parameter that determines the prime: 1868033³.
+var u = bigFromBase10("4965661367192848881")
+
+// Order is the number of elements in both G₁ and G₂: 36u⁴+36u³+18u²+6u+1.
+var Order = bigFromBase10("21888242871839275222246405745257275088548364400416034343698204186575808495617")
+
+// P is a prime over which we form a basic field: 36u⁴+36u³+24u²+6u+1.
+var P = bigFromBase10("21888242871839275222246405745257275088696311157297823662689037894645226208583")
+
+// p2 is p, represented as little-endian 64-bit words.
+var p2 = [4]uint64{0x3c208c16d87cfd47, 0x97816a916871ca8d, 0xb85045b68181585d, 0x30644e72e131a029}
+
+// np is the negative inverse of p, mod 2^256.
+var np = [4]uint64{0x87d20782e4866389, 0x9ede7d651eca6ac9, 0xd8afcbd01833da80, 0xf57a22b791888c6b}
+
+// rN1 is R^-1 where R = 2^256 mod p.
+var rN1 = &gfP{0xed84884a014afa37, 0xeb2022850278edf8, 0xcf63e9cfb74492d9, 0x2e67157159e5c639}
+
+// r2 is R^2 where R = 2^256 mod p.
+var r2 = &gfP{0xf32cfc5b538afa89, 0xb5e71911d44501fb, 0x47ab1eff0a417ff6, 0x06d89f71cab8351f}
+
+// r3 is R^3 where R = 2^256 mod p.
+var r3 = &gfP{0xb1cd6dafda1530df, 0x62f210e6a7283db6, 0xef7f0b0c0ada0afb, 0x20fd6e902d592544}
+
+// xiToPMinus1Over6 is ξ^((p-1)/6) where ξ = i+9.
+var xiToPMinus1Over6 = &gfP2{gfP{0xa222ae234c492d72, 0xd00f02a4565de15b, 0xdc2ff3a253dfc926, 0x10a75716b3899551}, gfP{0xaf9ba69633144907, 0xca6b1d7387afb78a, 0x11bded5ef08a2087, 0x02f34d751a1f3a7c}}
+
+// xiToPMinus1Over3 is ξ^((p-1)/3) where ξ = i+9.
+var xiToPMinus1Over3 = &gfP2{gfP{0x6e849f1ea0aa4757, 0xaa1c7b6d89f89141, 0xb6e713cdfae0ca3a, 0x26694fbb4e82ebc3}, gfP{0xb5773b104563ab30, 0x347f91c8a9aa6454, 0x7a007127242e0991, 0x1956bcd8118214ec}}
+
+// xiToPMinus1Over2 is ξ^((p-1)/2) where ξ = i+9.
+var xiToPMinus1Over2 = &gfP2{gfP{0xa1d77ce45ffe77c7, 0x07affd117826d1db, 0x6d16bd27bb7edc6b, 0x2c87200285defecc}, gfP{0xe4bbdd0c2936b629, 0xbb30f162e133bacb, 0x31a9d1b6f9645366, 0x253570bea500f8dd}}
+
+// xiToPSquaredMinus1Over3 is ξ^((p²-1)/3) where ξ = i+9.
+var xiToPSquaredMinus1Over3 = &gfP{0x3350c88e13e80b9c, 0x7dce557cdb5e56b9, 0x6001b4b8b615564a, 0x2682e617020217e0}
+
+// xiTo2PSquaredMinus2Over3 is ξ^((2p²-2)/3) where ξ = i+9 (a cubic root of unity, mod p).
+var xiTo2PSquaredMinus2Over3 = &gfP{0x71930c11d782e155, 0xa6bb947cffbe3323, 0xaa303344d4741444, 0x2c3b3f0d26594943}
+
+// xiToPSquaredMinus1Over6 is ξ^((1p²-1)/6) where ξ = i+9 (a cubic root of -1, mod p).
+var xiToPSquaredMinus1Over6 = &gfP{0xca8d800500fa1bf2, 0xf0c5d61468b39769, 0x0e201271ad0d4418, 0x04290f65bad856e6}
+
+// xiTo2PMinus2Over3 is ξ^((2p-2)/3) where ξ = i+9.
+var xiTo2PMinus2Over3 = &gfP2{gfP{0x5dddfd154bd8c949, 0x62cb29a5a4445b60, 0x37bc870a0c7dd2b9, 0x24830a9d3171f0fd}, gfP{0x7361d77f843abe92, 0xa5bb2bd3273411fb, 0x9c941f314b3e2399, 0x15df9cddbb9fd3ec}}
diff --git a/crypto/bn256/cloudflare/curve.go b/crypto/bn256/cloudflare/curve.go
new file mode 100644
index 000000000..b6aecc0a6
--- /dev/null
+++ b/crypto/bn256/cloudflare/curve.go
@@ -0,0 +1,229 @@
+package bn256
+
+import (
+	"math/big"
+)
+
+// curvePoint implements the elliptic curve y²=x³+3. Points are kept in Jacobian
+// form and t=z² when valid. G₁ is the set of points of this curve on GF(p).
+type curvePoint struct {
+	x, y, z, t gfP
+}
+
+var curveB = newGFp(3)
+
+// curveGen is the generator of G₁.
+var curveGen = &curvePoint{
+	x: *newGFp(1),
+	y: *newGFp(2),
+	z: *newGFp(1),
+	t: *newGFp(1),
+}
+
+func (c *curvePoint) String() string {
+	c.MakeAffine()
+	x, y := &gfP{}, &gfP{}
+	montDecode(x, &c.x)
+	montDecode(y, &c.y)
+	return "(" + x.String() + ", " + y.String() + ")"
+}
+
+func (c *curvePoint) Set(a *curvePoint) {
+	c.x.Set(&a.x)
+	c.y.Set(&a.y)
+	c.z.Set(&a.z)
+	c.t.Set(&a.t)
+}
+
+// IsOnCurve returns true iff c is on the curve.
+func (c *curvePoint) IsOnCurve() bool {
+	c.MakeAffine()
+	if c.IsInfinity() {
+		return true
+	}
+
+	y2, x3 := &gfP{}, &gfP{}
+	gfpMul(y2, &c.y, &c.y)
+	gfpMul(x3, &c.x, &c.x)
+	gfpMul(x3, x3, &c.x)
+	gfpAdd(x3, x3, curveB)
+
+	return *y2 == *x3
+}
+
+func (c *curvePoint) SetInfinity() {
+	c.x = gfP{0}
+	c.y = *newGFp(1)
+	c.z = gfP{0}
+	c.t = gfP{0}
+}
+
+func (c *curvePoint) IsInfinity() bool {
+	return c.z == gfP{0}
+}
+
+func (c *curvePoint) Add(a, b *curvePoint) {
+	if a.IsInfinity() {
+		c.Set(b)
+		return
+	}
+	if b.IsInfinity() {
+		c.Set(a)
+		return
+	}
+
+	// See http://hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/addition/add-2007-bl.op3
+
+	// Normalize the points by replacing a = [x1:y1:z1] and b = [x2:y2:z2]
+	// by [u1:s1:z1·z2] and [u2:s2:z1·z2]
+	// where u1 = x1·z2², s1 = y1·z2³ and u1 = x2·z1², s2 = y2·z1³
+	z12, z22 := &gfP{}, &gfP{}
+	gfpMul(z12, &a.z, &a.z)
+	gfpMul(z22, &b.z, &b.z)
+
+	u1, u2 := &gfP{}, &gfP{}
+	gfpMul(u1, &a.x, z22)
+	gfpMul(u2, &b.x, z12)
+
+	t, s1 := &gfP{}, &gfP{}
+	gfpMul(t, &b.z, z22)
+	gfpMul(s1, &a.y, t)
+
+	s2 := &gfP{}
+	gfpMul(t, &a.z, z12)
+	gfpMul(s2, &b.y, t)
+
+	// Compute x = (2h)²(s²-u1-u2)
+	// where s = (s2-s1)/(u2-u1) is the slope of the line through
+	// (u1,s1) and (u2,s2). The extra factor 2h = 2(u2-u1) comes from the value of z below.
+	// This is also:
+	// 4(s2-s1)² - 4h²(u1+u2) = 4(s2-s1)² - 4h³ - 4h²(2u1)
+	//                        = r² - j - 2v
+	// with the notations below.
+	h := &gfP{}
+	gfpSub(h, u2, u1)
+	xEqual := *h == gfP{0}
+
+	gfpAdd(t, h, h)
+	// i = 4h²
+	i := &gfP{}
+	gfpMul(i, t, t)
+	// j = 4h³
+	j := &gfP{}
+	gfpMul(j, h, i)
+
+	gfpSub(t, s2, s1)
+	yEqual := *t == gfP{0}
+	if xEqual && yEqual {
+		c.Double(a)
+		return
+	}
+	r := &gfP{}
+	gfpAdd(r, t, t)
+
+	v := &gfP{}
+	gfpMul(v, u1, i)
+
+	// t4 = 4(s2-s1)²
+	t4, t6 := &gfP{}, &gfP{}
+	gfpMul(t4, r, r)
+	gfpAdd(t, v, v)
+	gfpSub(t6, t4, j)
+
+	gfpSub(&c.x, t6, t)
+
+	// Set y = -(2h)³(s1 + s*(x/4h²-u1))
+	// This is also
+	// y = - 2·s1·j - (s2-s1)(2x - 2i·u1) = r(v-x) - 2·s1·j
+	gfpSub(t, v, &c.x) // t7
+	gfpMul(t4, s1, j)  // t8
+	gfpAdd(t6, t4, t4) // t9
+	gfpMul(t4, r, t)   // t10
+	gfpSub(&c.y, t4, t6)
+
+	// Set z = 2(u2-u1)·z1·z2 = 2h·z1·z2
+	gfpAdd(t, &a.z, &b.z) // t11
+	gfpMul(t4, t, t)      // t12
+	gfpSub(t, t4, z12)    // t13
+	gfpSub(t4, t, z22)    // t14
+	gfpMul(&c.z, t4, h)
+}
+
+func (c *curvePoint) Double(a *curvePoint) {
+	// See http://hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/doubling/dbl-2009-l.op3
+	A, B, C := &gfP{}, &gfP{}, &gfP{}
+	gfpMul(A, &a.x, &a.x)
+	gfpMul(B, &a.y, &a.y)
+	gfpMul(C, B, B)
+
+	t, t2 := &gfP{}, &gfP{}
+	gfpAdd(t, &a.x, B)
+	gfpMul(t2, t, t)
+	gfpSub(t, t2, A)
+	gfpSub(t2, t, C)
+
+	d, e, f := &gfP{}, &gfP{}, &gfP{}
+	gfpAdd(d, t2, t2)
+	gfpAdd(t, A, A)
+	gfpAdd(e, t, A)
+	gfpMul(f, e, e)
+
+	gfpAdd(t, d, d)
+	gfpSub(&c.x, f, t)
+
+	gfpAdd(t, C, C)
+	gfpAdd(t2, t, t)
+	gfpAdd(t, t2, t2)
+	gfpSub(&c.y, d, &c.x)
+	gfpMul(t2, e, &c.y)
+	gfpSub(&c.y, t2, t)
+
+	gfpMul(t, &a.y, &a.z)
+	gfpAdd(&c.z, t, t)
+}
+
+func (c *curvePoint) Mul(a *curvePoint, scalar *big.Int) {
+	sum, t := &curvePoint{}, &curvePoint{}
+	sum.SetInfinity()
+
+	for i := scalar.BitLen(); i >= 0; i-- {
+		t.Double(sum)
+		if scalar.Bit(i) != 0 {
+			sum.Add(t, a)
+		} else {
+			sum.Set(t)
+		}
+	}
+	c.Set(sum)
+}
+
+func (c *curvePoint) MakeAffine() {
+	if c.z == *newGFp(1) {
+		return
+	} else if c.z == *newGFp(0) {
+		c.x = gfP{0}
+		c.y = *newGFp(1)
+		c.t = gfP{0}
+		return
+	}
+
+	zInv := &gfP{}
+	zInv.Invert(&c.z)
+
+	t, zInv2 := &gfP{}, &gfP{}
+	gfpMul(t, &c.y, zInv)
+	gfpMul(zInv2, zInv, zInv)
+
+	gfpMul(&c.x, &c.x, zInv2)
+	gfpMul(&c.y, t, zInv2)
+
+	c.z = *newGFp(1)
+	c.t = *newGFp(1)
+}
+
+func (c *curvePoint) Neg(a *curvePoint) {
+	c.x.Set(&a.x)
+	gfpNeg(&c.y, &a.y)
+	c.z.Set(&a.z)
+	c.t = gfP{0}
+}
diff --git a/crypto/bn256/cloudflare/example_test.go b/crypto/bn256/cloudflare/example_test.go
new file mode 100644
index 000000000..2ee545c67
--- /dev/null
+++ b/crypto/bn256/cloudflare/example_test.go
@@ -0,0 +1,45 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64,!appengine,!gccgo
+
+package bn256
+
+import (
+	"crypto/rand"
+)
+
+func ExamplePair() {
+	// This implements the tripartite Diffie-Hellman algorithm from "A One
+	// Round Protocol for Tripartite Diffie-Hellman", A. Joux.
+	// http://www.springerlink.com/content/cddc57yyva0hburb/fulltext.pdf
+
+	// Each of three parties, a, b and c, generate a private value.
+	a, _ := rand.Int(rand.Reader, Order)
+	b, _ := rand.Int(rand.Reader, Order)
+	c, _ := rand.Int(rand.Reader, Order)
+
+	// Then each party calculates g₁ and g₂ times their private value.
+	pa := new(G1).ScalarBaseMult(a)
+	qa := new(G2).ScalarBaseMult(a)
+
+	pb := new(G1).ScalarBaseMult(b)
+	qb := new(G2).ScalarBaseMult(b)
+
+	pc := new(G1).ScalarBaseMult(c)
+	qc := new(G2).ScalarBaseMult(c)
+
+	// Now each party exchanges its public values with the other two and
+	// all parties can calculate the shared key.
+	k1 := Pair(pb, qc)
+	k1.ScalarMult(k1, a)
+
+	k2 := Pair(pc, qa)
+	k2.ScalarMult(k2, b)
+
+	k3 := Pair(pa, qb)
+	k3.ScalarMult(k3, c)
+
+	// k1, k2 and k3 will all be equal.
+}
diff --git a/crypto/bn256/cloudflare/gfp.go b/crypto/bn256/cloudflare/gfp.go
new file mode 100644
index 000000000..e8e84e7b3
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp.go
@@ -0,0 +1,81 @@
+package bn256
+
+import (
+	"errors"
+	"fmt"
+)
+
+type gfP [4]uint64
+
+func newGFp(x int64) (out *gfP) {
+	if x >= 0 {
+		out = &gfP{uint64(x)}
+	} else {
+		out = &gfP{uint64(-x)}
+		gfpNeg(out, out)
+	}
+
+	montEncode(out, out)
+	return out
+}
+
+func (e *gfP) String() string {
+	return fmt.Sprintf("%16.16x%16.16x%16.16x%16.16x", e[3], e[2], e[1], e[0])
+}
+
+func (e *gfP) Set(f *gfP) {
+	e[0] = f[0]
+	e[1] = f[1]
+	e[2] = f[2]
+	e[3] = f[3]
+}
+
+func (e *gfP) Invert(f *gfP) {
+	bits := [4]uint64{0x3c208c16d87cfd45, 0x97816a916871ca8d, 0xb85045b68181585d, 0x30644e72e131a029}
+
+	sum, power := &gfP{}, &gfP{}
+	sum.Set(rN1)
+	power.Set(f)
+
+	for word := 0; word < 4; word++ {
+		for bit := uint(0); bit < 64; bit++ {
+			if (bits[word]>>bit)&1 == 1 {
+				gfpMul(sum, sum, power)
+			}
+			gfpMul(power, power, power)
+		}
+	}
+
+	gfpMul(sum, sum, r3)
+	e.Set(sum)
+}
+
+func (e *gfP) Marshal(out []byte) {
+	for w := uint(0); w < 4; w++ {
+		for b := uint(0); b < 8; b++ {
+			out[8*w+b] = byte(e[3-w] >> (56 - 8*b))
+		}
+	}
+}
+
+func (e *gfP) Unmarshal(in []byte) error {
+	// Unmarshal the bytes into little endian form
+	for w := uint(0); w < 4; w++ {
+		for b := uint(0); b < 8; b++ {
+			e[3-w] += uint64(in[8*w+b]) << (56 - 8*b)
+		}
+	}
+	// Ensure the point respects the curve modulus
+	for i := 3; i >= 0; i-- {
+		if e[i] < p2[i] {
+			return nil
+		}
+		if e[i] > p2[i] {
+			return errors.New("bn256: coordinate exceeds modulus")
+		}
+	}
+	return errors.New("bn256: coordinate equals modulus")
+}
+
+func montEncode(c, a *gfP) { gfpMul(c, a, r2) }
+func montDecode(c, a *gfP) { gfpMul(c, a, &gfP{1}) }
diff --git a/crypto/bn256/cloudflare/gfp.h b/crypto/bn256/cloudflare/gfp.h
new file mode 100644
index 000000000..66f5a4d07
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp.h
@@ -0,0 +1,32 @@
+#define storeBlock(a0,a1,a2,a3, r) \
+	MOVQ a0,  0+r \
+	MOVQ a1,  8+r \
+	MOVQ a2, 16+r \
+	MOVQ a3, 24+r
+
+#define loadBlock(r, a0,a1,a2,a3) \
+	MOVQ  0+r, a0 \
+	MOVQ  8+r, a1 \
+	MOVQ 16+r, a2 \
+	MOVQ 24+r, a3
+
+#define gfpCarry(a0,a1,a2,a3,a4, b0,b1,b2,b3,b4) \
+	\ // b = a-p
+	MOVQ a0, b0 \
+	MOVQ a1, b1 \
+	MOVQ a2, b2 \
+	MOVQ a3, b3 \
+	MOVQ a4, b4 \
+	\
+	SUBQ ·p2+0(SB), b0 \
+	SBBQ ·p2+8(SB), b1 \
+	SBBQ ·p2+16(SB), b2 \
+	SBBQ ·p2+24(SB), b3 \
+	SBBQ $0, b4 \
+	\
+	\ // if b is negative then return a
+	\ // else return b
+	CMOVQCC b0, a0 \
+	CMOVQCC b1, a1 \
+	CMOVQCC b2, a2 \
+	CMOVQCC b3, a3
diff --git a/crypto/bn256/cloudflare/gfp12.go b/crypto/bn256/cloudflare/gfp12.go
new file mode 100644
index 000000000..93fb368a7
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp12.go
@@ -0,0 +1,160 @@
+package bn256
+
+// For details of the algorithms used, see "Multiplication and Squaring on
+// Pairing-Friendly Fields, Devegili et al.
+// http://eprint.iacr.org/2006/471.pdf.
+
+import (
+	"math/big"
+)
+
+// gfP12 implements the field of size p¹² as a quadratic extension of gfP6
+// where ω²=τ.
+type gfP12 struct {
+	x, y gfP6 // value is xω + y
+}
+
+func (e *gfP12) String() string {
+	return "(" + e.x.String() + "," + e.y.String() + ")"
+}
+
+func (e *gfP12) Set(a *gfP12) *gfP12 {
+	e.x.Set(&a.x)
+	e.y.Set(&a.y)
+	return e
+}
+
+func (e *gfP12) SetZero() *gfP12 {
+	e.x.SetZero()
+	e.y.SetZero()
+	return e
+}
+
+func (e *gfP12) SetOne() *gfP12 {
+	e.x.SetZero()
+	e.y.SetOne()
+	return e
+}
+
+func (e *gfP12) IsZero() bool {
+	return e.x.IsZero() && e.y.IsZero()
+}
+
+func (e *gfP12) IsOne() bool {
+	return e.x.IsZero() && e.y.IsOne()
+}
+
+func (e *gfP12) Conjugate(a *gfP12) *gfP12 {
+	e.x.Neg(&a.x)
+	e.y.Set(&a.y)
+	return e
+}
+
+func (e *gfP12) Neg(a *gfP12) *gfP12 {
+	e.x.Neg(&a.x)
+	e.y.Neg(&a.y)
+	return e
+}
+
+// Frobenius computes (xω+y)^p = x^p ω·ξ^((p-1)/6) + y^p
+func (e *gfP12) Frobenius(a *gfP12) *gfP12 {
+	e.x.Frobenius(&a.x)
+	e.y.Frobenius(&a.y)
+	e.x.MulScalar(&e.x, xiToPMinus1Over6)
+	return e
+}
+
+// FrobeniusP2 computes (xω+y)^p² = x^p² ω·ξ^((p²-1)/6) + y^p²
+func (e *gfP12) FrobeniusP2(a *gfP12) *gfP12 {
+	e.x.FrobeniusP2(&a.x)
+	e.x.MulGFP(&e.x, xiToPSquaredMinus1Over6)
+	e.y.FrobeniusP2(&a.y)
+	return e
+}
+
+func (e *gfP12) FrobeniusP4(a *gfP12) *gfP12 {
+	e.x.FrobeniusP4(&a.x)
+	e.x.MulGFP(&e.x, xiToPSquaredMinus1Over3)
+	e.y.FrobeniusP4(&a.y)
+	return e
+}
+
+func (e *gfP12) Add(a, b *gfP12) *gfP12 {
+	e.x.Add(&a.x, &b.x)
+	e.y.Add(&a.y, &b.y)
+	return e
+}
+
+func (e *gfP12) Sub(a, b *gfP12) *gfP12 {
+	e.x.Sub(&a.x, &b.x)
+	e.y.Sub(&a.y, &b.y)
+	return e
+}
+
+func (e *gfP12) Mul(a, b *gfP12) *gfP12 {
+	tx := (&gfP6{}).Mul(&a.x, &b.y)
+	t := (&gfP6{}).Mul(&b.x, &a.y)
+	tx.Add(tx, t)
+
+	ty := (&gfP6{}).Mul(&a.y, &b.y)
+	t.Mul(&a.x, &b.x).MulTau(t)
+
+	e.x.Set(tx)
+	e.y.Add(ty, t)
+	return e
+}
+
+func (e *gfP12) MulScalar(a *gfP12, b *gfP6) *gfP12 {
+	e.x.Mul(&e.x, b)
+	e.y.Mul(&e.y, b)
+	return e
+}
+
+func (c *gfP12) Exp(a *gfP12, power *big.Int) *gfP12 {
+	sum := (&gfP12{}).SetOne()
+	t := &gfP12{}
+
+	for i := power.BitLen() - 1; i >= 0; i-- {
+		t.Square(sum)
+		if power.Bit(i) != 0 {
+			sum.Mul(t, a)
+		} else {
+			sum.Set(t)
+		}
+	}
+
+	c.Set(sum)
+	return c
+}
+
+func (e *gfP12) Square(a *gfP12) *gfP12 {
+	// Complex squaring algorithm
+	v0 := (&gfP6{}).Mul(&a.x, &a.y)
+
+	t := (&gfP6{}).MulTau(&a.x)
+	t.Add(&a.y, t)
+	ty := (&gfP6{}).Add(&a.x, &a.y)
+	ty.Mul(ty, t).Sub(ty, v0)
+	t.MulTau(v0)
+	ty.Sub(ty, t)
+
+	e.x.Add(v0, v0)
+	e.y.Set(ty)
+	return e
+}
+
+func (e *gfP12) Invert(a *gfP12) *gfP12 {
+	// See "Implementing cryptographic pairings", M. Scott, section 3.2.
+	// ftp://136.206.11.249/pub/crypto/pairings.pdf
+	t1, t2 := &gfP6{}, &gfP6{}
+
+	t1.Square(&a.x)
+	t2.Square(&a.y)
+	t1.MulTau(t1).Sub(t2, t1)
+	t2.Invert(t1)
+
+	e.x.Neg(&a.x)
+	e.y.Set(&a.y)
+	e.MulScalar(e, t2)
+	return e
+}
diff --git a/crypto/bn256/cloudflare/gfp2.go b/crypto/bn256/cloudflare/gfp2.go
new file mode 100644
index 000000000..90a89e8b4
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp2.go
@@ -0,0 +1,156 @@
+package bn256
+
+// For details of the algorithms used, see "Multiplication and Squaring on
+// Pairing-Friendly Fields, Devegili et al.
+// http://eprint.iacr.org/2006/471.pdf.
+
+// gfP2 implements a field of size p² as a quadratic extension of the base field
+// where i²=-1.
+type gfP2 struct {
+	x, y gfP // value is xi+y.
+}
+
+func gfP2Decode(in *gfP2) *gfP2 {
+	out := &gfP2{}
+	montDecode(&out.x, &in.x)
+	montDecode(&out.y, &in.y)
+	return out
+}
+
+func (e *gfP2) String() string {
+	return "(" + e.x.String() + ", " + e.y.String() + ")"
+}
+
+func (e *gfP2) Set(a *gfP2) *gfP2 {
+	e.x.Set(&a.x)
+	e.y.Set(&a.y)
+	return e
+}
+
+func (e *gfP2) SetZero() *gfP2 {
+	e.x = gfP{0}
+	e.y = gfP{0}
+	return e
+}
+
+func (e *gfP2) SetOne() *gfP2 {
+	e.x = gfP{0}
+	e.y = *newGFp(1)
+	return e
+}
+
+func (e *gfP2) IsZero() bool {
+	zero := gfP{0}
+	return e.x == zero && e.y == zero
+}
+
+func (e *gfP2) IsOne() bool {
+	zero, one := gfP{0}, *newGFp(1)
+	return e.x == zero && e.y == one
+}
+
+func (e *gfP2) Conjugate(a *gfP2) *gfP2 {
+	e.y.Set(&a.y)
+	gfpNeg(&e.x, &a.x)
+	return e
+}
+
+func (e *gfP2) Neg(a *gfP2) *gfP2 {
+	gfpNeg(&e.x, &a.x)
+	gfpNeg(&e.y, &a.y)
+	return e
+}
+
+func (e *gfP2) Add(a, b *gfP2) *gfP2 {
+	gfpAdd(&e.x, &a.x, &b.x)
+	gfpAdd(&e.y, &a.y, &b.y)
+	return e
+}
+
+func (e *gfP2) Sub(a, b *gfP2) *gfP2 {
+	gfpSub(&e.x, &a.x, &b.x)
+	gfpSub(&e.y, &a.y, &b.y)
+	return e
+}
+
+// See "Multiplication and Squaring in Pairing-Friendly Fields",
+// http://eprint.iacr.org/2006/471.pdf
+func (e *gfP2) Mul(a, b *gfP2) *gfP2 {
+	tx, t := &gfP{}, &gfP{}
+	gfpMul(tx, &a.x, &b.y)
+	gfpMul(t, &b.x, &a.y)
+	gfpAdd(tx, tx, t)
+
+	ty := &gfP{}
+	gfpMul(ty, &a.y, &b.y)
+	gfpMul(t, &a.x, &b.x)
+	gfpSub(ty, ty, t)
+
+	e.x.Set(tx)
+	e.y.Set(ty)
+	return e
+}
+
+func (e *gfP2) MulScalar(a *gfP2, b *gfP) *gfP2 {
+	gfpMul(&e.x, &a.x, b)
+	gfpMul(&e.y, &a.y, b)
+	return e
+}
+
+// MulXi sets e=ξa where ξ=i+9 and then returns e.
+func (e *gfP2) MulXi(a *gfP2) *gfP2 {
+	// (xi+y)(i+9) = (9x+y)i+(9y-x)
+	tx := &gfP{}
+	gfpAdd(tx, &a.x, &a.x)
+	gfpAdd(tx, tx, tx)
+	gfpAdd(tx, tx, tx)
+	gfpAdd(tx, tx, &a.x)
+
+	gfpAdd(tx, tx, &a.y)
+
+	ty := &gfP{}
+	gfpAdd(ty, &a.y, &a.y)
+	gfpAdd(ty, ty, ty)
+	gfpAdd(ty, ty, ty)
+	gfpAdd(ty, ty, &a.y)
+
+	gfpSub(ty, ty, &a.x)
+
+	e.x.Set(tx)
+	e.y.Set(ty)
+	return e
+}
+
+func (e *gfP2) Square(a *gfP2) *gfP2 {
+	// Complex squaring algorithm:
+	// (xi+y)² = (x+y)(y-x) + 2*i*x*y
+	tx, ty := &gfP{}, &gfP{}
+	gfpSub(tx, &a.y, &a.x)
+	gfpAdd(ty, &a.x, &a.y)
+	gfpMul(ty, tx, ty)
+
+	gfpMul(tx, &a.x, &a.y)
+	gfpAdd(tx, tx, tx)
+
+	e.x.Set(tx)
+	e.y.Set(ty)
+	return e
+}
+
+func (e *gfP2) Invert(a *gfP2) *gfP2 {
+	// See "Implementing cryptographic pairings", M. Scott, section 3.2.
+	// ftp://136.206.11.249/pub/crypto/pairings.pdf
+	t1, t2 := &gfP{}, &gfP{}
+	gfpMul(t1, &a.x, &a.x)
+	gfpMul(t2, &a.y, &a.y)
+	gfpAdd(t1, t1, t2)
+
+	inv := &gfP{}
+	inv.Invert(t1)
+
+	gfpNeg(t1, &a.x)
+
+	gfpMul(&e.x, t1, inv)
+	gfpMul(&e.y, &a.y, inv)
+	return e
+}
diff --git a/crypto/bn256/cloudflare/gfp6.go b/crypto/bn256/cloudflare/gfp6.go
new file mode 100644
index 000000000..83d61b781
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp6.go
@@ -0,0 +1,213 @@
+package bn256
+
+// For details of the algorithms used, see "Multiplication and Squaring on
+// Pairing-Friendly Fields, Devegili et al.
+// http://eprint.iacr.org/2006/471.pdf.
+
+// gfP6 implements the field of size p⁶ as a cubic extension of gfP2 where τ³=ξ
+// and ξ=i+3.
+type gfP6 struct {
+	x, y, z gfP2 // value is xτ² + yτ + z
+}
+
+func (e *gfP6) String() string {
+	return "(" + e.x.String() + ", " + e.y.String() + ", " + e.z.String() + ")"
+}
+
+func (e *gfP6) Set(a *gfP6) *gfP6 {
+	e.x.Set(&a.x)
+	e.y.Set(&a.y)
+	e.z.Set(&a.z)
+	return e
+}
+
+func (e *gfP6) SetZero() *gfP6 {
+	e.x.SetZero()
+	e.y.SetZero()
+	e.z.SetZero()
+	return e
+}
+
+func (e *gfP6) SetOne() *gfP6 {
+	e.x.SetZero()
+	e.y.SetZero()
+	e.z.SetOne()
+	return e
+}
+
+func (e *gfP6) IsZero() bool {
+	return e.x.IsZero() && e.y.IsZero() && e.z.IsZero()
+}
+
+func (e *gfP6) IsOne() bool {
+	return e.x.IsZero() && e.y.IsZero() && e.z.IsOne()
+}
+
+func (e *gfP6) Neg(a *gfP6) *gfP6 {
+	e.x.Neg(&a.x)
+	e.y.Neg(&a.y)
+	e.z.Neg(&a.z)
+	return e
+}
+
+func (e *gfP6) Frobenius(a *gfP6) *gfP6 {
+	e.x.Conjugate(&a.x)
+	e.y.Conjugate(&a.y)
+	e.z.Conjugate(&a.z)
+
+	e.x.Mul(&e.x, xiTo2PMinus2Over3)
+	e.y.Mul(&e.y, xiToPMinus1Over3)
+	return e
+}
+
+// FrobeniusP2 computes (xτ²+yτ+z)^(p²) = xτ^(2p²) + yτ^(p²) + z
+func (e *gfP6) FrobeniusP2(a *gfP6) *gfP6 {
+	// τ^(2p²) = τ²τ^(2p²-2) = τ²ξ^((2p²-2)/3)
+	e.x.MulScalar(&a.x, xiTo2PSquaredMinus2Over3)
+	// τ^(p²) = ττ^(p²-1) = τξ^((p²-1)/3)
+	e.y.MulScalar(&a.y, xiToPSquaredMinus1Over3)
+	e.z.Set(&a.z)
+	return e
+}
+
+func (e *gfP6) FrobeniusP4(a *gfP6) *gfP6 {
+	e.x.MulScalar(&a.x, xiToPSquaredMinus1Over3)
+	e.y.MulScalar(&a.y, xiTo2PSquaredMinus2Over3)
+	e.z.Set(&a.z)
+	return e
+}
+
+func (e *gfP6) Add(a, b *gfP6) *gfP6 {
+	e.x.Add(&a.x, &b.x)
+	e.y.Add(&a.y, &b.y)
+	e.z.Add(&a.z, &b.z)
+	return e
+}
+
+func (e *gfP6) Sub(a, b *gfP6) *gfP6 {
+	e.x.Sub(&a.x, &b.x)
+	e.y.Sub(&a.y, &b.y)
+	e.z.Sub(&a.z, &b.z)
+	return e
+}
+
+func (e *gfP6) Mul(a, b *gfP6) *gfP6 {
+	// "Multiplication and Squaring on Pairing-Friendly Fields"
+	// Section 4, Karatsuba method.
+	// http://eprint.iacr.org/2006/471.pdf
+	v0 := (&gfP2{}).Mul(&a.z, &b.z)
+	v1 := (&gfP2{}).Mul(&a.y, &b.y)
+	v2 := (&gfP2{}).Mul(&a.x, &b.x)
+
+	t0 := (&gfP2{}).Add(&a.x, &a.y)
+	t1 := (&gfP2{}).Add(&b.x, &b.y)
+	tz := (&gfP2{}).Mul(t0, t1)
+	tz.Sub(tz, v1).Sub(tz, v2).MulXi(tz).Add(tz, v0)
+
+	t0.Add(&a.y, &a.z)
+	t1.Add(&b.y, &b.z)
+	ty := (&gfP2{}).Mul(t0, t1)
+	t0.MulXi(v2)
+	ty.Sub(ty, v0).Sub(ty, v1).Add(ty, t0)
+
+	t0.Add(&a.x, &a.z)
+	t1.Add(&b.x, &b.z)
+	tx := (&gfP2{}).Mul(t0, t1)
+	tx.Sub(tx, v0).Add(tx, v1).Sub(tx, v2)
+
+	e.x.Set(tx)
+	e.y.Set(ty)
+	e.z.Set(tz)
+	return e
+}
+
+func (e *gfP6) MulScalar(a *gfP6, b *gfP2) *gfP6 {
+	e.x.Mul(&a.x, b)
+	e.y.Mul(&a.y, b)
+	e.z.Mul(&a.z, b)
+	return e
+}
+
+func (e *gfP6) MulGFP(a *gfP6, b *gfP) *gfP6 {
+	e.x.MulScalar(&a.x, b)
+	e.y.MulScalar(&a.y, b)
+	e.z.MulScalar(&a.z, b)
+	return e
+}
+
+// MulTau computes τ·(aτ²+bτ+c) = bτ²+cτ+aξ
+func (e *gfP6) MulTau(a *gfP6) *gfP6 {
+	tz := (&gfP2{}).MulXi(&a.x)
+	ty := (&gfP2{}).Set(&a.y)
+
+	e.y.Set(&a.z)
+	e.x.Set(ty)
+	e.z.Set(tz)
+	return e
+}
+
+func (e *gfP6) Square(a *gfP6) *gfP6 {
+	v0 := (&gfP2{}).Square(&a.z)
+	v1 := (&gfP2{}).Square(&a.y)
+	v2 := (&gfP2{}).Square(&a.x)
+
+	c0 := (&gfP2{}).Add(&a.x, &a.y)
+	c0.Square(c0).Sub(c0, v1).Sub(c0, v2).MulXi(c0).Add(c0, v0)
+
+	c1 := (&gfP2{}).Add(&a.y, &a.z)
+	c1.Square(c1).Sub(c1, v0).Sub(c1, v1)
+	xiV2 := (&gfP2{}).MulXi(v2)
+	c1.Add(c1, xiV2)
+
+	c2 := (&gfP2{}).Add(&a.x, &a.z)
+	c2.Square(c2).Sub(c2, v0).Add(c2, v1).Sub(c2, v2)
+
+	e.x.Set(c2)
+	e.y.Set(c1)
+	e.z.Set(c0)
+	return e
+}
+
+func (e *gfP6) Invert(a *gfP6) *gfP6 {
+	// See "Implementing cryptographic pairings", M. Scott, section 3.2.
+	// ftp://136.206.11.249/pub/crypto/pairings.pdf
+
+	// Here we can give a short explanation of how it works: let j be a cubic root of
+	// unity in GF(p²) so that 1+j+j²=0.
+	// Then (xτ² + yτ + z)(xj²τ² + yjτ + z)(xjτ² + yj²τ + z)
+	// = (xτ² + yτ + z)(Cτ²+Bτ+A)
+	// = (x³ξ²+y³ξ+z³-3ξxyz) = F is an element of the base field (the norm).
+	//
+	// On the other hand (xj²τ² + yjτ + z)(xjτ² + yj²τ + z)
+	// = τ²(y²-ξxz) + τ(ξx²-yz) + (z²-ξxy)
+	//
+	// So that's why A = (z²-ξxy), B = (ξx²-yz), C = (y²-ξxz)
+	t1 := (&gfP2{}).Mul(&a.x, &a.y)
+	t1.MulXi(t1)
+
+	A := (&gfP2{}).Square(&a.z)
+	A.Sub(A, t1)
+
+	B := (&gfP2{}).Square(&a.x)
+	B.MulXi(B)
+	t1.Mul(&a.y, &a.z)
+	B.Sub(B, t1)
+
+	C := (&gfP2{}).Square(&a.y)
+	t1.Mul(&a.x, &a.z)
+	C.Sub(C, t1)
+
+	F := (&gfP2{}).Mul(C, &a.y)
+	F.MulXi(F)
+	t1.Mul(A, &a.z)
+	F.Add(F, t1)
+	t1.Mul(B, &a.x).MulXi(t1)
+	F.Add(F, t1)
+
+	F.Invert(F)
+
+	e.x.Mul(C, F)
+	e.y.Mul(B, F)
+	e.z.Mul(A, F)
+	return e
+}
diff --git a/crypto/bn256/cloudflare/gfp_amd64.go b/crypto/bn256/cloudflare/gfp_amd64.go
new file mode 100644
index 000000000..ac4f1a9c6
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp_amd64.go
@@ -0,0 +1,15 @@
+// +build amd64,!appengine,!gccgo
+
+package bn256
+
+// go:noescape
+func gfpNeg(c, a *gfP)
+
+//go:noescape
+func gfpAdd(c, a, b *gfP)
+
+//go:noescape
+func gfpSub(c, a, b *gfP)
+
+//go:noescape
+func gfpMul(c, a, b *gfP)
diff --git a/crypto/bn256/cloudflare/gfp_amd64.s b/crypto/bn256/cloudflare/gfp_amd64.s
new file mode 100644
index 000000000..2d0176f2e
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp_amd64.s
@@ -0,0 +1,97 @@
+// +build amd64,!appengine,!gccgo
+
+#include "gfp.h"
+#include "mul.h"
+#include "mul_bmi2.h"
+
+TEXT ·gfpNeg(SB),0,$0-16
+	MOVQ ·p2+0(SB), R8
+	MOVQ ·p2+8(SB), R9
+	MOVQ ·p2+16(SB), R10
+	MOVQ ·p2+24(SB), R11
+
+	MOVQ a+8(FP), DI
+	SUBQ 0(DI), R8
+	SBBQ 8(DI), R9
+	SBBQ 16(DI), R10
+	SBBQ 24(DI), R11
+
+	MOVQ $0, AX
+	gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,R15,BX)
+
+	MOVQ c+0(FP), DI
+	storeBlock(R8,R9,R10,R11, 0(DI))
+	RET
+
+TEXT ·gfpAdd(SB),0,$0-24
+	MOVQ a+8(FP), DI
+	MOVQ b+16(FP), SI
+
+	loadBlock(0(DI), R8,R9,R10,R11)
+	MOVQ $0, R12
+
+	ADDQ  0(SI), R8
+	ADCQ  8(SI), R9
+	ADCQ 16(SI), R10
+	ADCQ 24(SI), R11
+	ADCQ $0, R12
+
+	gfpCarry(R8,R9,R10,R11,R12, R13,R14,R15,AX,BX)
+
+	MOVQ c+0(FP), DI
+	storeBlock(R8,R9,R10,R11, 0(DI))
+	RET
+
+TEXT ·gfpSub(SB),0,$0-24
+	MOVQ a+8(FP), DI
+	MOVQ b+16(FP), SI
+
+	loadBlock(0(DI), R8,R9,R10,R11)
+
+	MOVQ ·p2+0(SB), R12
+	MOVQ ·p2+8(SB), R13
+	MOVQ ·p2+16(SB), R14
+	MOVQ ·p2+24(SB), R15
+	MOVQ $0, AX
+
+	SUBQ  0(SI), R8
+	SBBQ  8(SI), R9
+	SBBQ 16(SI), R10
+	SBBQ 24(SI), R11
+
+	CMOVQCC AX, R12
+	CMOVQCC AX, R13
+	CMOVQCC AX, R14
+	CMOVQCC AX, R15
+
+	ADDQ R12, R8
+	ADCQ R13, R9
+	ADCQ R14, R10
+	ADCQ R15, R11
+
+	MOVQ c+0(FP), DI
+	storeBlock(R8,R9,R10,R11, 0(DI))
+	RET
+
+TEXT ·gfpMul(SB),0,$160-24
+	MOVQ a+8(FP), DI
+	MOVQ b+16(FP), SI
+
+	// Jump to a slightly different implementation if MULX isn't supported.
+	CMPB runtime·support_bmi2(SB), $0
+	JE   nobmi2Mul
+
+	mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI))
+	storeBlock( R8, R9,R10,R11,  0(SP))
+	storeBlock(R12,R13,R14,R15, 32(SP))
+	gfpReduceBMI2()
+	JMP end
+
+nobmi2Mul:
+	mul(0(DI),8(DI),16(DI),24(DI), 0(SI), 0(SP))
+	gfpReduce(0(SP))
+
+end:
+	MOVQ c+0(FP), DI
+	storeBlock(R12,R13,R14,R15, 0(DI))
+	RET
diff --git a/crypto/bn256/cloudflare/gfp_pure.go b/crypto/bn256/cloudflare/gfp_pure.go
new file mode 100644
index 000000000..8fa5d3053
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp_pure.go
@@ -0,0 +1,19 @@
+//  +build !amd64 appengine gccgo
+
+package bn256
+
+func gfpNeg(c, a *gfP) {
+	panic("unsupported architecture")
+}
+
+func gfpAdd(c, a, b *gfP) {
+	panic("unsupported architecture")
+}
+
+func gfpSub(c, a, b *gfP) {
+	panic("unsupported architecture")
+}
+
+func gfpMul(c, a, b *gfP) {
+	panic("unsupported architecture")
+}
diff --git a/crypto/bn256/cloudflare/gfp_test.go b/crypto/bn256/cloudflare/gfp_test.go
new file mode 100644
index 000000000..aff5e0531
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp_test.go
@@ -0,0 +1,62 @@
+// +build amd64,!appengine,!gccgo
+
+package bn256
+
+import (
+	"testing"
+)
+
+// Tests that negation works the same way on both assembly-optimized and pure Go
+// implementation.
+func TestGFpNeg(t *testing.T) {
+	n := &gfP{0x0123456789abcdef, 0xfedcba9876543210, 0xdeadbeefdeadbeef, 0xfeebdaedfeebdaed}
+	w := &gfP{0xfedcba9876543211, 0x0123456789abcdef, 0x2152411021524110, 0x0114251201142512}
+	h := &gfP{}
+
+	gfpNeg(h, n)
+	if *h != *w {
+		t.Errorf("negation mismatch: have %#x, want %#x", *h, *w)
+	}
+}
+
+// Tests that addition works the same way on both assembly-optimized and pure Go
+// implementation.
+func TestGFpAdd(t *testing.T) {
+	a := &gfP{0x0123456789abcdef, 0xfedcba9876543210, 0xdeadbeefdeadbeef, 0xfeebdaedfeebdaed}
+	b := &gfP{0xfedcba9876543210, 0x0123456789abcdef, 0xfeebdaedfeebdaed, 0xdeadbeefdeadbeef}
+	w := &gfP{0xc3df73e9278302b8, 0x687e956e978e3572, 0x254954275c18417f, 0xad354b6afc67f9b4}
+	h := &gfP{}
+
+	gfpAdd(h, a, b)
+	if *h != *w {
+		t.Errorf("addition mismatch: have %#x, want %#x", *h, *w)
+	}
+}
+
+// Tests that subtraction works the same way on both assembly-optimized and pure Go
+// implementation.
+func TestGFpSub(t *testing.T) {
+	a := &gfP{0x0123456789abcdef, 0xfedcba9876543210, 0xdeadbeefdeadbeef, 0xfeebdaedfeebdaed}
+	b := &gfP{0xfedcba9876543210, 0x0123456789abcdef, 0xfeebdaedfeebdaed, 0xdeadbeefdeadbeef}
+	w := &gfP{0x02468acf13579bdf, 0xfdb97530eca86420, 0xdfc1e401dfc1e402, 0x203e1bfe203e1bfd}
+	h := &gfP{}
+
+	gfpSub(h, a, b)
+	if *h != *w {
+		t.Errorf("subtraction mismatch: have %#x, want %#x", *h, *w)
+	}
+}
+
+// Tests that multiplication works the same way on both assembly-optimized and pure Go
+// implementation.
+func TestGFpMul(t *testing.T) {
+	a := &gfP{0x0123456789abcdef, 0xfedcba9876543210, 0xdeadbeefdeadbeef, 0xfeebdaedfeebdaed}
+	b := &gfP{0xfedcba9876543210, 0x0123456789abcdef, 0xfeebdaedfeebdaed, 0xdeadbeefdeadbeef}
+	w := &gfP{0xcbcbd377f7ad22d3, 0x3b89ba5d849379bf, 0x87b61627bd38b6d2, 0xc44052a2a0e654b2}
+	h := &gfP{}
+
+	gfpMul(h, a, b)
+	if *h != *w {
+		t.Errorf("multiplication mismatch: have %#x, want %#x", *h, *w)
+	}
+}
diff --git a/crypto/bn256/cloudflare/main_test.go b/crypto/bn256/cloudflare/main_test.go
new file mode 100644
index 000000000..f0d59a404
--- /dev/null
+++ b/crypto/bn256/cloudflare/main_test.go
@@ -0,0 +1,73 @@
+// +build amd64,!appengine,!gccgo
+
+package bn256
+
+import (
+	"testing"
+
+	"crypto/rand"
+)
+
+func TestRandomG2Marshal(t *testing.T) {
+	for i := 0; i < 10; i++ {
+		n, g2, err := RandomG2(rand.Reader)
+		if err != nil {
+			t.Error(err)
+			continue
+		}
+		t.Logf("%d: %x\n", n, g2.Marshal())
+	}
+}
+
+func TestPairings(t *testing.T) {
+	a1 := new(G1).ScalarBaseMult(bigFromBase10("1"))
+	a2 := new(G1).ScalarBaseMult(bigFromBase10("2"))
+	a37 := new(G1).ScalarBaseMult(bigFromBase10("37"))
+	an1 := new(G1).ScalarBaseMult(bigFromBase10("21888242871839275222246405745257275088548364400416034343698204186575808495616"))
+
+	b0 := new(G2).ScalarBaseMult(bigFromBase10("0"))
+	b1 := new(G2).ScalarBaseMult(bigFromBase10("1"))
+	b2 := new(G2).ScalarBaseMult(bigFromBase10("2"))
+	b27 := new(G2).ScalarBaseMult(bigFromBase10("27"))
+	b999 := new(G2).ScalarBaseMult(bigFromBase10("999"))
+	bn1 := new(G2).ScalarBaseMult(bigFromBase10("21888242871839275222246405745257275088548364400416034343698204186575808495616"))
+
+	p1 := Pair(a1, b1)
+	pn1 := Pair(a1, bn1)
+	np1 := Pair(an1, b1)
+	if pn1.String() != np1.String() {
+		t.Error("Pairing mismatch: e(a, -b) != e(-a, b)")
+	}
+	if !PairingCheck([]*G1{a1, an1}, []*G2{b1, b1}) {
+		t.Error("MultiAte check gave false negative!")
+	}
+	p0 := new(GT).Add(p1, pn1)
+	p0_2 := Pair(a1, b0)
+	if p0.String() != p0_2.String() {
+		t.Error("Pairing mismatch: e(a, b) * e(a, -b) != 1")
+	}
+	p0_3 := new(GT).ScalarMult(p1, bigFromBase10("21888242871839275222246405745257275088548364400416034343698204186575808495617"))
+	if p0.String() != p0_3.String() {
+		t.Error("Pairing mismatch: e(a, b) has wrong order")
+	}
+	p2 := Pair(a2, b1)
+	p2_2 := Pair(a1, b2)
+	p2_3 := new(GT).ScalarMult(p1, bigFromBase10("2"))
+	if p2.String() != p2_2.String() {
+		t.Error("Pairing mismatch: e(a, b * 2) != e(a * 2, b)")
+	}
+	if p2.String() != p2_3.String() {
+		t.Error("Pairing mismatch: e(a, b * 2) != e(a, b) ** 2")
+	}
+	if p2.String() == p1.String() {
+		t.Error("Pairing is degenerate!")
+	}
+	if PairingCheck([]*G1{a1, a1}, []*G2{b1, b1}) {
+		t.Error("MultiAte check gave false positive!")
+	}
+	p999 := Pair(a37, b27)
+	p999_2 := Pair(a1, b999)
+	if p999.String() != p999_2.String() {
+		t.Error("Pairing mismatch: e(a * 37, b * 27) != e(a, b * 999)")
+	}
+}
diff --git a/crypto/bn256/cloudflare/mul.h b/crypto/bn256/cloudflare/mul.h
new file mode 100644
index 000000000..bab5da831
--- /dev/null
+++ b/crypto/bn256/cloudflare/mul.h
@@ -0,0 +1,181 @@
+#define mul(a0,a1,a2,a3, rb, stack) \
+	MOVQ a0, AX \
+	MULQ 0+rb \
+	MOVQ AX, R8 \
+	MOVQ DX, R9 \
+	MOVQ a0, AX \
+	MULQ 8+rb \
+	ADDQ AX, R9 \
+	ADCQ $0, DX \
+	MOVQ DX, R10 \
+	MOVQ a0, AX \
+	MULQ 16+rb \
+	ADDQ AX, R10 \
+	ADCQ $0, DX \
+	MOVQ DX, R11 \
+	MOVQ a0, AX \
+	MULQ 24+rb \
+	ADDQ AX, R11 \
+	ADCQ $0, DX \
+	MOVQ DX, R12 \
+	\
+	storeBlock(R8,R9,R10,R11, 0+stack) \
+	MOVQ R12, 32+stack \
+	\
+	MOVQ a1, AX \
+	MULQ 0+rb \
+	MOVQ AX, R8 \
+	MOVQ DX, R9 \
+	MOVQ a1, AX \
+	MULQ 8+rb \
+	ADDQ AX, R9 \
+	ADCQ $0, DX \
+	MOVQ DX, R10 \
+	MOVQ a1, AX \
+	MULQ 16+rb \
+	ADDQ AX, R10 \
+	ADCQ $0, DX \
+	MOVQ DX, R11 \
+	MOVQ a1, AX \
+	MULQ 24+rb \
+	ADDQ AX, R11 \
+	ADCQ $0, DX \
+	MOVQ DX, R12 \
+	\
+	ADDQ 8+stack, R8 \
+	ADCQ 16+stack, R9 \
+	ADCQ 24+stack, R10 \
+	ADCQ 32+stack, R11 \
+	ADCQ $0, R12 \
+	storeBlock(R8,R9,R10,R11, 8+stack) \
+	MOVQ R12, 40+stack \
+	\
+	MOVQ a2, AX \
+	MULQ 0+rb \
+	MOVQ AX, R8 \
+	MOVQ DX, R9 \
+	MOVQ a2, AX \
+	MULQ 8+rb \
+	ADDQ AX, R9 \
+	ADCQ $0, DX \
+	MOVQ DX, R10 \
+	MOVQ a2, AX \
+	MULQ 16+rb \
+	ADDQ AX, R10 \
+	ADCQ $0, DX \
+	MOVQ DX, R11 \
+	MOVQ a2, AX \
+	MULQ 24+rb \
+	ADDQ AX, R11 \
+	ADCQ $0, DX \
+	MOVQ DX, R12 \
+	\
+	ADDQ 16+stack, R8 \
+	ADCQ 24+stack, R9 \
+	ADCQ 32+stack, R10 \
+	ADCQ 40+stack, R11 \
+	ADCQ $0, R12 \
+	storeBlock(R8,R9,R10,R11, 16+stack) \
+	MOVQ R12, 48+stack \
+	\
+	MOVQ a3, AX \
+	MULQ 0+rb \
+	MOVQ AX, R8 \
+	MOVQ DX, R9 \
+	MOVQ a3, AX \
+	MULQ 8+rb \
+	ADDQ AX, R9 \
+	ADCQ $0, DX \
+	MOVQ DX, R10 \
+	MOVQ a3, AX \
+	MULQ 16+rb \
+	ADDQ AX, R10 \
+	ADCQ $0, DX \
+	MOVQ DX, R11 \
+	MOVQ a3, AX \
+	MULQ 24+rb \
+	ADDQ AX, R11 \
+	ADCQ $0, DX \
+	MOVQ DX, R12 \
+	\
+	ADDQ 24+stack, R8 \
+	ADCQ 32+stack, R9 \
+	ADCQ 40+stack, R10 \
+	ADCQ 48+stack, R11 \
+	ADCQ $0, R12 \
+	storeBlock(R8,R9,R10,R11, 24+stack) \
+	MOVQ R12, 56+stack
+
+#define gfpReduce(stack) \
+	\ // m = (T * N') mod R, store m in R8:R9:R10:R11
+	MOVQ ·np+0(SB), AX \
+	MULQ 0+stack \
+	MOVQ AX, R8 \
+	MOVQ DX, R9 \
+	MOVQ ·np+0(SB), AX \
+	MULQ 8+stack \
+	ADDQ AX, R9 \
+	ADCQ $0, DX \
+	MOVQ DX, R10 \
+	MOVQ ·np+0(SB), AX \
+	MULQ 16+stack \
+	ADDQ AX, R10 \
+	ADCQ $0, DX \
+	MOVQ DX, R11 \
+	MOVQ ·np+0(SB), AX \
+	MULQ 24+stack \
+	ADDQ AX, R11 \
+	\
+	MOVQ ·np+8(SB), AX \
+	MULQ 0+stack \
+	MOVQ AX, R12 \
+	MOVQ DX, R13 \
+	MOVQ ·np+8(SB), AX \
+	MULQ 8+stack \
+	ADDQ AX, R13 \
+	ADCQ $0, DX \
+	MOVQ DX, R14 \
+	MOVQ ·np+8(SB), AX \
+	MULQ 16+stack \
+	ADDQ AX, R14 \
+	\
+	ADDQ R12, R9 \
+	ADCQ R13, R10 \
+	ADCQ R14, R11 \
+	\
+	MOVQ ·np+16(SB), AX \
+	MULQ 0+stack \
+	MOVQ AX, R12 \
+	MOVQ DX, R13 \
+	MOVQ ·np+16(SB), AX \
+	MULQ 8+stack \
+	ADDQ AX, R13 \
+	\
+	ADDQ R12, R10 \
+	ADCQ R13, R11 \
+	\
+	MOVQ ·np+24(SB), AX \
+	MULQ 0+stack \
+	ADDQ AX, R11 \
+	\
+	storeBlock(R8,R9,R10,R11, 64+stack) \
+	\
+	\ // m * N
+	mul(·p2+0(SB),·p2+8(SB),·p2+16(SB),·p2+24(SB), 64+stack, 96+stack) \
+	\
+	\ // Add the 512-bit intermediate to m*N
+	loadBlock(96+stack, R8,R9,R10,R11) \
+	loadBlock(128+stack, R12,R13,R14,R15) \
+	\
+	MOVQ $0, AX \
+	ADDQ 0+stack, R8 \
+	ADCQ 8+stack, R9 \
+	ADCQ 16+stack, R10 \
+	ADCQ 24+stack, R11 \
+	ADCQ 32+stack, R12 \
+	ADCQ 40+stack, R13 \
+	ADCQ 48+stack, R14 \
+	ADCQ 56+stack, R15 \
+	ADCQ $0, AX \
+	\
+	gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX)
diff --git a/crypto/bn256/cloudflare/mul_bmi2.h b/crypto/bn256/cloudflare/mul_bmi2.h
new file mode 100644
index 000000000..71ad0499a
--- /dev/null
+++ b/crypto/bn256/cloudflare/mul_bmi2.h
@@ -0,0 +1,112 @@
+#define mulBMI2(a0,a1,a2,a3, rb) \
+	MOVQ a0, DX \
+	MOVQ $0, R13 \
+	MULXQ 0+rb, R8, R9 \
+	MULXQ 8+rb, AX, R10 \
+	ADDQ AX, R9 \
+	MULXQ 16+rb, AX, R11 \
+	ADCQ AX, R10 \
+	MULXQ 24+rb, AX, R12 \
+	ADCQ AX, R11 \
+	ADCQ $0, R12 \
+	ADCQ $0, R13 \
+	\
+	MOVQ a1, DX \
+	MOVQ $0, R14 \
+	MULXQ 0+rb, AX, BX \
+	ADDQ AX, R9 \
+	ADCQ BX, R10 \
+	MULXQ 16+rb, AX, BX \
+	ADCQ AX, R11 \
+	ADCQ BX, R12 \
+	ADCQ $0, R13 \
+	MULXQ 8+rb, AX, BX \
+	ADDQ AX, R10 \
+	ADCQ BX, R11 \
+	MULXQ 24+rb, AX, BX \
+	ADCQ AX, R12 \
+	ADCQ BX, R13 \
+	ADCQ $0, R14 \
+	\
+	MOVQ a2, DX \
+	MOVQ $0, R15 \
+	MULXQ 0+rb, AX, BX \
+	ADDQ AX, R10 \
+	ADCQ BX, R11 \
+	MULXQ 16+rb, AX, BX \
+	ADCQ AX, R12 \
+	ADCQ BX, R13 \
+	ADCQ $0, R14 \
+	MULXQ 8+rb, AX, BX \
+	ADDQ AX, R11 \
+	ADCQ BX, R12 \
+	MULXQ 24+rb, AX, BX \
+	ADCQ AX, R13 \
+	ADCQ BX, R14 \
+	ADCQ $0, R15 \
+	\
+	MOVQ a3, DX \
+	MULXQ 0+rb, AX, BX \
+	ADDQ AX, R11 \
+	ADCQ BX, R12 \
+	MULXQ 16+rb, AX, BX \
+	ADCQ AX, R13 \
+	ADCQ BX, R14 \
+	ADCQ $0, R15 \
+	MULXQ 8+rb, AX, BX \
+	ADDQ AX, R12 \
+	ADCQ BX, R13 \
+	MULXQ 24+rb, AX, BX \
+	ADCQ AX, R14 \
+	ADCQ BX, R15
+
+#define gfpReduceBMI2() \
+	\ // m = (T * N') mod R, store m in R8:R9:R10:R11
+	MOVQ ·np+0(SB), DX \
+	MULXQ 0(SP), R8, R9 \
+	MULXQ 8(SP), AX, R10 \
+	ADDQ AX, R9 \
+	MULXQ 16(SP), AX, R11 \
+	ADCQ AX, R10 \
+	MULXQ 24(SP), AX, BX \
+	ADCQ AX, R11 \
+	\
+	MOVQ ·np+8(SB), DX \
+	MULXQ 0(SP), AX, BX \
+	ADDQ AX, R9 \
+	ADCQ BX, R10 \
+	MULXQ 16(SP), AX, BX \
+	ADCQ AX, R11 \
+	MULXQ 8(SP), AX, BX \
+	ADDQ AX, R10 \
+	ADCQ BX, R11 \
+	\
+	MOVQ ·np+16(SB), DX \
+	MULXQ 0(SP), AX, BX \
+	ADDQ AX, R10 \
+	ADCQ BX, R11 \
+	MULXQ 8(SP), AX, BX \
+	ADDQ AX, R11 \
+	\
+	MOVQ ·np+24(SB), DX \
+	MULXQ 0(SP), AX, BX \
+	ADDQ AX, R11 \
+	\
+	storeBlock(R8,R9,R10,R11, 64(SP)) \
+	\
+	\ // m * N
+	mulBMI2(·p2+0(SB),·p2+8(SB),·p2+16(SB),·p2+24(SB), 64(SP)) \
+	\
+	\ // Add the 512-bit intermediate to m*N
+	MOVQ $0, AX \
+	ADDQ 0(SP), R8 \
+	ADCQ 8(SP), R9 \
+	ADCQ 16(SP), R10 \
+	ADCQ 24(SP), R11 \
+	ADCQ 32(SP), R12 \
+	ADCQ 40(SP), R13 \
+	ADCQ 48(SP), R14 \
+	ADCQ 56(SP), R15 \
+	ADCQ $0, AX \
+	\
+	gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX)
diff --git a/crypto/bn256/cloudflare/optate.go b/crypto/bn256/cloudflare/optate.go
new file mode 100644
index 000000000..b71e50e3a
--- /dev/null
+++ b/crypto/bn256/cloudflare/optate.go
@@ -0,0 +1,271 @@
+package bn256
+
+func lineFunctionAdd(r, p *twistPoint, q *curvePoint, r2 *gfP2) (a, b, c *gfP2, rOut *twistPoint) {
+	// See the mixed addition algorithm from "Faster Computation of the
+	// Tate Pairing", http://arxiv.org/pdf/0904.0854v3.pdf
+	B := (&gfP2{}).Mul(&p.x, &r.t)
+
+	D := (&gfP2{}).Add(&p.y, &r.z)
+	D.Square(D).Sub(D, r2).Sub(D, &r.t).Mul(D, &r.t)
+
+	H := (&gfP2{}).Sub(B, &r.x)
+	I := (&gfP2{}).Square(H)
+
+	E := (&gfP2{}).Add(I, I)
+	E.Add(E, E)
+
+	J := (&gfP2{}).Mul(H, E)
+
+	L1 := (&gfP2{}).Sub(D, &r.y)
+	L1.Sub(L1, &r.y)
+
+	V := (&gfP2{}).Mul(&r.x, E)
+
+	rOut = &twistPoint{}
+	rOut.x.Square(L1).Sub(&rOut.x, J).Sub(&rOut.x, V).Sub(&rOut.x, V)
+
+	rOut.z.Add(&r.z, H).Square(&rOut.z).Sub(&rOut.z, &r.t).Sub(&rOut.z, I)
+
+	t := (&gfP2{}).Sub(V, &rOut.x)
+	t.Mul(t, L1)
+	t2 := (&gfP2{}).Mul(&r.y, J)
+	t2.Add(t2, t2)
+	rOut.y.Sub(t, t2)
+
+	rOut.t.Square(&rOut.z)
+
+	t.Add(&p.y, &rOut.z).Square(t).Sub(t, r2).Sub(t, &rOut.t)
+
+	t2.Mul(L1, &p.x)
+	t2.Add(t2, t2)
+	a = (&gfP2{}).Sub(t2, t)
+
+	c = (&gfP2{}).MulScalar(&rOut.z, &q.y)
+	c.Add(c, c)
+
+	b = (&gfP2{}).Neg(L1)
+	b.MulScalar(b, &q.x).Add(b, b)
+
+	return
+}
+
+func lineFunctionDouble(r *twistPoint, q *curvePoint) (a, b, c *gfP2, rOut *twistPoint) {
+	// See the doubling algorithm for a=0 from "Faster Computation of the
+	// Tate Pairing", http://arxiv.org/pdf/0904.0854v3.pdf
+	A := (&gfP2{}).Square(&r.x)
+	B := (&gfP2{}).Square(&r.y)
+	C := (&gfP2{}).Square(B)
+
+	D := (&gfP2{}).Add(&r.x, B)
+	D.Square(D).Sub(D, A).Sub(D, C).Add(D, D)
+
+	E := (&gfP2{}).Add(A, A)
+	E.Add(E, A)
+
+	G := (&gfP2{}).Square(E)
+
+	rOut = &twistPoint{}
+	rOut.x.Sub(G, D).Sub(&rOut.x, D)
+
+	rOut.z.Add(&r.y, &r.z).Square(&rOut.z).Sub(&rOut.z, B).Sub(&rOut.z, &r.t)
+
+	rOut.y.Sub(D, &rOut.x).Mul(&rOut.y, E)
+	t := (&gfP2{}).Add(C, C)
+	t.Add(t, t).Add(t, t)
+	rOut.y.Sub(&rOut.y, t)
+
+	rOut.t.Square(&rOut.z)
+
+	t.Mul(E, &r.t).Add(t, t)
+	b = (&gfP2{}).Neg(t)
+	b.MulScalar(b, &q.x)
+
+	a = (&gfP2{}).Add(&r.x, E)
+	a.Square(a).Sub(a, A).Sub(a, G)
+	t.Add(B, B).Add(t, t)
+	a.Sub(a, t)
+
+	c = (&gfP2{}).Mul(&rOut.z, &r.t)
+	c.Add(c, c).MulScalar(c, &q.y)
+
+	return
+}
+
+func mulLine(ret *gfP12, a, b, c *gfP2) {
+	a2 := &gfP6{}
+	a2.y.Set(a)
+	a2.z.Set(b)
+	a2.Mul(a2, &ret.x)
+	t3 := (&gfP6{}).MulScalar(&ret.y, c)
+
+	t := (&gfP2{}).Add(b, c)
+	t2 := &gfP6{}
+	t2.y.Set(a)
+	t2.z.Set(t)
+	ret.x.Add(&ret.x, &ret.y)
+
+	ret.y.Set(t3)
+
+	ret.x.Mul(&ret.x, t2).Sub(&ret.x, a2).Sub(&ret.x, &ret.y)
+	a2.MulTau(a2)
+	ret.y.Add(&ret.y, a2)
+}
+
+// sixuPlus2NAF is 6u+2 in non-adjacent form.
+var sixuPlus2NAF = []int8{0, 0, 0, 1, 0, 1, 0, -1, 0, 0, 1, -1, 0, 0, 1, 0,
+	0, 1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0, 0, 1, 1,
+	1, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, -1, 0, 0, 1,
+	1, 0, 0, -1, 0, 0, 0, 1, 1, 0, -1, 0, 0, 1, 0, 1, 1}
+
+// miller implements the Miller loop for calculating the Optimal Ate pairing.
+// See algorithm 1 from http://cryptojedi.org/papers/dclxvi-20100714.pdf
+func miller(q *twistPoint, p *curvePoint) *gfP12 {
+	ret := (&gfP12{}).SetOne()
+
+	aAffine := &twistPoint{}
+	aAffine.Set(q)
+	aAffine.MakeAffine()
+
+	bAffine := &curvePoint{}
+	bAffine.Set(p)
+	bAffine.MakeAffine()
+
+	minusA := &twistPoint{}
+	minusA.Neg(aAffine)
+
+	r := &twistPoint{}
+	r.Set(aAffine)
+
+	r2 := (&gfP2{}).Square(&aAffine.y)
+
+	for i := len(sixuPlus2NAF) - 1; i > 0; i-- {
+		a, b, c, newR := lineFunctionDouble(r, bAffine)
+		if i != len(sixuPlus2NAF)-1 {
+			ret.Square(ret)
+		}
+
+		mulLine(ret, a, b, c)
+		r = newR
+
+		switch sixuPlus2NAF[i-1] {
+		case 1:
+			a, b, c, newR = lineFunctionAdd(r, aAffine, bAffine, r2)
+		case -1:
+			a, b, c, newR = lineFunctionAdd(r, minusA, bAffine, r2)
+		default:
+			continue
+		}
+
+		mulLine(ret, a, b, c)
+		r = newR
+	}
+
+	// In order to calculate Q1 we have to convert q from the sextic twist
+	// to the full GF(p^12) group, apply the Frobenius there, and convert
+	// back.
+	//
+	// The twist isomorphism is (x', y') -> (xω², yω³). If we consider just
+	// x for a moment, then after applying the Frobenius, we have x̄ω^(2p)
+	// where x̄ is the conjugate of x. If we are going to apply the inverse
+	// isomorphism we need a value with a single coefficient of ω² so we
+	// rewrite this as x̄ω^(2p-2)ω². ξ⁶ = ω and, due to the construction of
+	// p, 2p-2 is a multiple of six. Therefore we can rewrite as
+	// x̄ξ^((p-1)/3)ω² and applying the inverse isomorphism eliminates the
+	// ω².
+	//
+	// A similar argument can be made for the y value.
+
+	q1 := &twistPoint{}
+	q1.x.Conjugate(&aAffine.x).Mul(&q1.x, xiToPMinus1Over3)
+	q1.y.Conjugate(&aAffine.y).Mul(&q1.y, xiToPMinus1Over2)
+	q1.z.SetOne()
+	q1.t.SetOne()
+
+	// For Q2 we are applying the p² Frobenius. The two conjugations cancel
+	// out and we are left only with the factors from the isomorphism. In
+	// the case of x, we end up with a pure number which is why
+	// xiToPSquaredMinus1Over3 is ∈ GF(p). With y we get a factor of -1. We
+	// ignore this to end up with -Q2.
+
+	minusQ2 := &twistPoint{}
+	minusQ2.x.MulScalar(&aAffine.x, xiToPSquaredMinus1Over3)
+	minusQ2.y.Set(&aAffine.y)
+	minusQ2.z.SetOne()
+	minusQ2.t.SetOne()
+
+	r2.Square(&q1.y)
+	a, b, c, newR := lineFunctionAdd(r, q1, bAffine, r2)
+	mulLine(ret, a, b, c)
+	r = newR
+
+	r2.Square(&minusQ2.y)
+	a, b, c, newR = lineFunctionAdd(r, minusQ2, bAffine, r2)
+	mulLine(ret, a, b, c)
+	r = newR
+
+	return ret
+}
+
+// finalExponentiation computes the (p¹²-1)/Order-th power of an element of
+// GF(p¹²) to obtain an element of GT (steps 13-15 of algorithm 1 from
+// http://cryptojedi.org/papers/dclxvi-20100714.pdf)
+func finalExponentiation(in *gfP12) *gfP12 {
+	t1 := &gfP12{}
+
+	// This is the p^6-Frobenius
+	t1.x.Neg(&in.x)
+	t1.y.Set(&in.y)
+
+	inv := &gfP12{}
+	inv.Invert(in)
+	t1.Mul(t1, inv)
+
+	t2 := (&gfP12{}).FrobeniusP2(t1)
+	t1.Mul(t1, t2)
+
+	fp := (&gfP12{}).Frobenius(t1)
+	fp2 := (&gfP12{}).FrobeniusP2(t1)
+	fp3 := (&gfP12{}).Frobenius(fp2)
+
+	fu := (&gfP12{}).Exp(t1, u)
+	fu2 := (&gfP12{}).Exp(fu, u)
+	fu3 := (&gfP12{}).Exp(fu2, u)
+
+	y3 := (&gfP12{}).Frobenius(fu)
+	fu2p := (&gfP12{}).Frobenius(fu2)
+	fu3p := (&gfP12{}).Frobenius(fu3)
+	y2 := (&gfP12{}).FrobeniusP2(fu2)
+
+	y0 := &gfP12{}
+	y0.Mul(fp, fp2).Mul(y0, fp3)
+
+	y1 := (&gfP12{}).Conjugate(t1)
+	y5 := (&gfP12{}).Conjugate(fu2)
+	y3.Conjugate(y3)
+	y4 := (&gfP12{}).Mul(fu, fu2p)
+	y4.Conjugate(y4)
+
+	y6 := (&gfP12{}).Mul(fu3, fu3p)
+	y6.Conjugate(y6)
+
+	t0 := (&gfP12{}).Square(y6)
+	t0.Mul(t0, y4).Mul(t0, y5)
+	t1.Mul(y3, y5).Mul(t1, t0)
+	t0.Mul(t0, y2)
+	t1.Square(t1).Mul(t1, t0).Square(t1)
+	t0.Mul(t1, y1)
+	t1.Mul(t1, y0)
+	t0.Square(t0).Mul(t0, t1)
+
+	return t0
+}
+
+func optimalAte(a *twistPoint, b *curvePoint) *gfP12 {
+	e := miller(a, b)
+	ret := finalExponentiation(e)
+
+	if a.IsInfinity() || b.IsInfinity() {
+		ret.SetOne()
+	}
+	return ret
+}
diff --git a/crypto/bn256/cloudflare/twist.go b/crypto/bn256/cloudflare/twist.go
new file mode 100644
index 000000000..0c2f80d4e
--- /dev/null
+++ b/crypto/bn256/cloudflare/twist.go
@@ -0,0 +1,204 @@
+package bn256
+
+import (
+	"math/big"
+)
+
+// twistPoint implements the elliptic curve y²=x³+3/ξ over GF(p²). Points are
+// kept in Jacobian form and t=z² when valid. The group G₂ is the set of
+// n-torsion points of this curve over GF(p²) (where n = Order)
+type twistPoint struct {
+	x, y, z, t gfP2
+}
+
+var twistB = &gfP2{
+	gfP{0x38e7ecccd1dcff67, 0x65f0b37d93ce0d3e, 0xd749d0dd22ac00aa, 0x0141b9ce4a688d4d},
+	gfP{0x3bf938e377b802a8, 0x020b1b273633535d, 0x26b7edf049755260, 0x2514c6324384a86d},
+}
+
+// twistGen is the generator of group G₂.
+var twistGen = &twistPoint{
+	gfP2{
+		gfP{0xafb4737da84c6140, 0x6043dd5a5802d8c4, 0x09e950fc52a02f86, 0x14fef0833aea7b6b},
+		gfP{0x8e83b5d102bc2026, 0xdceb1935497b0172, 0xfbb8264797811adf, 0x19573841af96503b},
+	},
+	gfP2{
+		gfP{0x64095b56c71856ee, 0xdc57f922327d3cbb, 0x55f935be33351076, 0x0da4a0e693fd6482},
+		gfP{0x619dfa9d886be9f6, 0xfe7fd297f59e9b78, 0xff9e1a62231b7dfe, 0x28fd7eebae9e4206},
+	},
+	gfP2{*newGFp(0), *newGFp(1)},
+	gfP2{*newGFp(0), *newGFp(1)},
+}
+
+func (c *twistPoint) String() string {
+	c.MakeAffine()
+	x, y := gfP2Decode(&c.x), gfP2Decode(&c.y)
+	return "(" + x.String() + ", " + y.String() + ")"
+}
+
+func (c *twistPoint) Set(a *twistPoint) {
+	c.x.Set(&a.x)
+	c.y.Set(&a.y)
+	c.z.Set(&a.z)
+	c.t.Set(&a.t)
+}
+
+// IsOnCurve returns true iff c is on the curve.
+func (c *twistPoint) IsOnCurve() bool {
+	c.MakeAffine()
+	if c.IsInfinity() {
+		return true
+	}
+
+	y2, x3 := &gfP2{}, &gfP2{}
+	y2.Square(&c.y)
+	x3.Square(&c.x).Mul(x3, &c.x).Add(x3, twistB)
+
+	if *y2 != *x3 {
+		return false
+	}
+	cneg := &twistPoint{}
+	cneg.Mul(c, Order)
+	return cneg.z.IsZero()
+}
+
+func (c *twistPoint) SetInfinity() {
+	c.x.SetZero()
+	c.y.SetOne()
+	c.z.SetZero()
+	c.t.SetZero()
+}
+
+func (c *twistPoint) IsInfinity() bool {
+	return c.z.IsZero()
+}
+
+func (c *twistPoint) Add(a, b *twistPoint) {
+	// For additional comments, see the same function in curve.go.
+
+	if a.IsInfinity() {
+		c.Set(b)
+		return
+	}
+	if b.IsInfinity() {
+		c.Set(a)
+		return
+	}
+
+	// See http://hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/addition/add-2007-bl.op3
+	z12 := (&gfP2{}).Square(&a.z)
+	z22 := (&gfP2{}).Square(&b.z)
+	u1 := (&gfP2{}).Mul(&a.x, z22)
+	u2 := (&gfP2{}).Mul(&b.x, z12)
+
+	t := (&gfP2{}).Mul(&b.z, z22)
+	s1 := (&gfP2{}).Mul(&a.y, t)
+
+	t.Mul(&a.z, z12)
+	s2 := (&gfP2{}).Mul(&b.y, t)
+
+	h := (&gfP2{}).Sub(u2, u1)
+	xEqual := h.IsZero()
+
+	t.Add(h, h)
+	i := (&gfP2{}).Square(t)
+	j := (&gfP2{}).Mul(h, i)
+
+	t.Sub(s2, s1)
+	yEqual := t.IsZero()
+	if xEqual && yEqual {
+		c.Double(a)
+		return
+	}
+	r := (&gfP2{}).Add(t, t)
+
+	v := (&gfP2{}).Mul(u1, i)
+
+	t4 := (&gfP2{}).Square(r)
+	t.Add(v, v)
+	t6 := (&gfP2{}).Sub(t4, j)
+	c.x.Sub(t6, t)
+
+	t.Sub(v, &c.x) // t7
+	t4.Mul(s1, j)  // t8
+	t6.Add(t4, t4) // t9
+	t4.Mul(r, t)   // t10
+	c.y.Sub(t4, t6)
+
+	t.Add(&a.z, &b.z) // t11
+	t4.Square(t)      // t12
+	t.Sub(t4, z12)    // t13
+	t4.Sub(t, z22)    // t14
+	c.z.Mul(t4, h)
+}
+
+func (c *twistPoint) Double(a *twistPoint) {
+	// See http://hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/doubling/dbl-2009-l.op3
+	A := (&gfP2{}).Square(&a.x)
+	B := (&gfP2{}).Square(&a.y)
+	C := (&gfP2{}).Square(B)
+
+	t := (&gfP2{}).Add(&a.x, B)
+	t2 := (&gfP2{}).Square(t)
+	t.Sub(t2, A)
+	t2.Sub(t, C)
+	d := (&gfP2{}).Add(t2, t2)
+	t.Add(A, A)
+	e := (&gfP2{}).Add(t, A)
+	f := (&gfP2{}).Square(e)
+
+	t.Add(d, d)
+	c.x.Sub(f, t)
+
+	t.Add(C, C)
+	t2.Add(t, t)
+	t.Add(t2, t2)
+	c.y.Sub(d, &c.x)
+	t2.Mul(e, &c.y)
+	c.y.Sub(t2, t)
+
+	t.Mul(&a.y, &a.z)
+	c.z.Add(t, t)
+}
+
+func (c *twistPoint) Mul(a *twistPoint, scalar *big.Int) {
+	sum, t := &twistPoint{}, &twistPoint{}
+
+	for i := scalar.BitLen(); i >= 0; i-- {
+		t.Double(sum)
+		if scalar.Bit(i) != 0 {
+			sum.Add(t, a)
+		} else {
+			sum.Set(t)
+		}
+	}
+
+	c.Set(sum)
+}
+
+func (c *twistPoint) MakeAffine() {
+	if c.z.IsOne() {
+		return
+	} else if c.z.IsZero() {
+		c.x.SetZero()
+		c.y.SetOne()
+		c.t.SetZero()
+		return
+	}
+
+	zInv := (&gfP2{}).Invert(&c.z)
+	t := (&gfP2{}).Mul(&c.y, zInv)
+	zInv2 := (&gfP2{}).Square(zInv)
+	c.y.Mul(t, zInv2)
+	t.Mul(&c.x, zInv2)
+	c.x.Set(t)
+	c.z.SetOne()
+	c.t.SetOne()
+}
+
+func (c *twistPoint) Neg(a *twistPoint) {
+	c.x.Set(&a.x)
+	c.y.Neg(&a.y)
+	c.z.Set(&a.z)
+	c.t.SetZero()
+}