diff options
Diffstat (limited to 'swarm/bmt/bmt_test.go')
-rw-r--r-- | swarm/bmt/bmt_test.go | 289 |
1 files changed, 220 insertions, 69 deletions
diff --git a/swarm/bmt/bmt_test.go b/swarm/bmt/bmt_test.go index ae40eadab..891d8cbb2 100644 --- a/swarm/bmt/bmt_test.go +++ b/swarm/bmt/bmt_test.go @@ -39,13 +39,12 @@ var counts = []int{1, 2, 3, 4, 5, 8, 9, 15, 16, 17, 32, 37, 42, 53, 63, 64, 65, // calculates the Keccak256 SHA3 hash of the data func sha3hash(data ...[]byte) []byte { h := sha3.NewKeccak256() - return doHash(h, nil, data...) + return doSum(h, nil, data...) } // TestRefHasher tests that the RefHasher computes the expected BMT hash for -// all data lengths between 0 and 256 bytes +// some small data lengths func TestRefHasher(t *testing.T) { - // the test struct is used to specify the expected BMT hash for // segment counts between from and to and lengths from 1 to datalength type test struct { @@ -129,7 +128,7 @@ func TestRefHasher(t *testing.T) { } } -// tests if hasher responds with correct hash +// tests if hasher responds with correct hash comparing the reference implementation return value func TestHasherEmptyData(t *testing.T) { hasher := sha3.NewKeccak256 var data []byte @@ -140,7 +139,7 @@ func TestHasherEmptyData(t *testing.T) { bmt := New(pool) rbmt := NewRefHasher(hasher, count) refHash := rbmt.Hash(data) - expHash := Hash(bmt, nil, data) + expHash := syncHash(bmt, nil, data) if !bytes.Equal(expHash, refHash) { t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) } @@ -148,7 +147,8 @@ func TestHasherEmptyData(t *testing.T) { } } -func TestHasherCorrectness(t *testing.T) { +// tests sequential write with entire max size written in one go +func TestSyncHasherCorrectness(t *testing.T) { data := newData(BufferSize) hasher := sha3.NewKeccak256 size := hasher().Size() @@ -157,7 +157,7 @@ func TestHasherCorrectness(t *testing.T) { for _, count := range counts { t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) { max := count * size - incr := 1 + var incr int capacity := 1 pool := NewTreePool(hasher, count, capacity) defer pool.Drain(0) @@ -173,6 +173,44 @@ func TestHasherCorrectness(t *testing.T) { } } +// tests order-neutral concurrent writes with entire max size written in one go +func TestAsyncCorrectness(t *testing.T) { + data := newData(BufferSize) + hasher := sha3.NewKeccak256 + size := hasher().Size() + whs := []whenHash{first, last, random} + + for _, double := range []bool{false, true} { + for _, wh := range whs { + for _, count := range counts { + t.Run(fmt.Sprintf("double_%v_hash_when_%v_segments_%v", double, wh, count), func(t *testing.T) { + max := count * size + var incr int + capacity := 1 + pool := NewTreePool(hasher, count, capacity) + defer pool.Drain(0) + for n := 1; n <= max; n += incr { + incr = 1 + rand.Intn(5) + bmt := New(pool) + d := data[:n] + rbmt := NewRefHasher(hasher, count) + exp := rbmt.Hash(d) + got := syncHash(bmt, nil, d) + if !bytes.Equal(got, exp) { + t.Fatalf("wrong sync hash for datalength %v: expected %x (ref), got %x", n, exp, got) + } + sw := bmt.NewAsyncWriter(double) + got = asyncHashRandom(sw, nil, d, wh) + if !bytes.Equal(got, exp) { + t.Fatalf("wrong async hash for datalength %v: expected %x, got %x", n, exp, got) + } + } + }) + } + } + } +} + // Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize func TestHasherReuse(t *testing.T) { t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) { @@ -183,6 +221,7 @@ func TestHasherReuse(t *testing.T) { }) } +// tests if bmt reuse is not corrupting result func testHasherReuse(poolsize int, t *testing.T) { hasher := sha3.NewKeccak256 pool := NewTreePool(hasher, SegmentCount, poolsize) @@ -191,7 +230,7 @@ func testHasherReuse(poolsize int, t *testing.T) { for i := 0; i < 100; i++ { data := newData(BufferSize) - n := rand.Intn(bmt.DataLength()) + n := rand.Intn(bmt.Size()) err := testHasherCorrectness(bmt, hasher, data, n, SegmentCount) if err != nil { t.Fatal(err) @@ -199,8 +238,8 @@ func testHasherReuse(poolsize int, t *testing.T) { } } -// Tests if pool can be cleanly reused even in concurrent use -func TestBMTHasherConcurrentUse(t *testing.T) { +// Tests if pool can be cleanly reused even in concurrent use by several hasher +func TestBMTConcurrentUse(t *testing.T) { hasher := sha3.NewKeccak256 pool := NewTreePool(hasher, SegmentCount, PoolSize) defer pool.Drain(0) @@ -211,7 +250,7 @@ func TestBMTHasherConcurrentUse(t *testing.T) { go func() { bmt := New(pool) data := newData(BufferSize) - n := rand.Intn(bmt.DataLength()) + n := rand.Intn(bmt.Size()) errc <- testHasherCorrectness(bmt, hasher, data, n, 128) }() } @@ -234,7 +273,7 @@ LOOP: // Tests BMT Hasher io.Writer interface is working correctly // even multiple short random write buffers -func TestBMTHasherWriterBuffers(t *testing.T) { +func TestBMTWriterBuffers(t *testing.T) { hasher := sha3.NewKeccak256 for _, count := range counts { @@ -247,7 +286,7 @@ func TestBMTHasherWriterBuffers(t *testing.T) { data := newData(n) rbmt := NewRefHasher(hasher, count) refHash := rbmt.Hash(data) - expHash := Hash(bmt, nil, data) + expHash := syncHash(bmt, nil, data) if !bytes.Equal(expHash, refHash) { t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) } @@ -308,57 +347,65 @@ func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, coun data := d[:n] rbmt := NewRefHasher(hasher, count) exp := sha3hash(span, rbmt.Hash(data)) - got := Hash(bmt, span, data) + got := syncHash(bmt, span, data) if !bytes.Equal(got, exp) { return fmt.Errorf("wrong hash: expected %x, got %x", exp, got) } return err } -func BenchmarkSHA3_4k(t *testing.B) { benchmarkSHA3(4096, t) } -func BenchmarkSHA3_2k(t *testing.B) { benchmarkSHA3(4096/2, t) } -func BenchmarkSHA3_1k(t *testing.B) { benchmarkSHA3(4096/4, t) } -func BenchmarkSHA3_512b(t *testing.B) { benchmarkSHA3(4096/8, t) } -func BenchmarkSHA3_256b(t *testing.B) { benchmarkSHA3(4096/16, t) } -func BenchmarkSHA3_128b(t *testing.B) { benchmarkSHA3(4096/32, t) } - -func BenchmarkBMTBaseline_4k(t *testing.B) { benchmarkBMTBaseline(4096, t) } -func BenchmarkBMTBaseline_2k(t *testing.B) { benchmarkBMTBaseline(4096/2, t) } -func BenchmarkBMTBaseline_1k(t *testing.B) { benchmarkBMTBaseline(4096/4, t) } -func BenchmarkBMTBaseline_512b(t *testing.B) { benchmarkBMTBaseline(4096/8, t) } -func BenchmarkBMTBaseline_256b(t *testing.B) { benchmarkBMTBaseline(4096/16, t) } -func BenchmarkBMTBaseline_128b(t *testing.B) { benchmarkBMTBaseline(4096/32, t) } - -func BenchmarkRefHasher_4k(t *testing.B) { benchmarkRefHasher(4096, t) } -func BenchmarkRefHasher_2k(t *testing.B) { benchmarkRefHasher(4096/2, t) } -func BenchmarkRefHasher_1k(t *testing.B) { benchmarkRefHasher(4096/4, t) } -func BenchmarkRefHasher_512b(t *testing.B) { benchmarkRefHasher(4096/8, t) } -func BenchmarkRefHasher_256b(t *testing.B) { benchmarkRefHasher(4096/16, t) } -func BenchmarkRefHasher_128b(t *testing.B) { benchmarkRefHasher(4096/32, t) } - -func BenchmarkBMTHasher_4k(t *testing.B) { benchmarkBMTHasher(4096, t) } -func BenchmarkBMTHasher_2k(t *testing.B) { benchmarkBMTHasher(4096/2, t) } -func BenchmarkBMTHasher_1k(t *testing.B) { benchmarkBMTHasher(4096/4, t) } -func BenchmarkBMTHasher_512b(t *testing.B) { benchmarkBMTHasher(4096/8, t) } -func BenchmarkBMTHasher_256b(t *testing.B) { benchmarkBMTHasher(4096/16, t) } -func BenchmarkBMTHasher_128b(t *testing.B) { benchmarkBMTHasher(4096/32, t) } - -func BenchmarkBMTHasherNoPool_4k(t *testing.B) { benchmarkBMTHasherPool(1, 4096, t) } -func BenchmarkBMTHasherNoPool_2k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/2, t) } -func BenchmarkBMTHasherNoPool_1k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/4, t) } -func BenchmarkBMTHasherNoPool_512b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/8, t) } -func BenchmarkBMTHasherNoPool_256b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/16, t) } -func BenchmarkBMTHasherNoPool_128b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/32, t) } - -func BenchmarkBMTHasherPool_4k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096, t) } -func BenchmarkBMTHasherPool_2k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/2, t) } -func BenchmarkBMTHasherPool_1k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/4, t) } -func BenchmarkBMTHasherPool_512b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/8, t) } -func BenchmarkBMTHasherPool_256b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/16, t) } -func BenchmarkBMTHasherPool_128b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/32, t) } +// +func BenchmarkBMT(t *testing.B) { + for size := 4096; size >= 128; size /= 2 { + t.Run(fmt.Sprintf("%v_size_%v", "SHA3", size), func(t *testing.B) { + benchmarkSHA3(t, size) + }) + t.Run(fmt.Sprintf("%v_size_%v", "Baseline", size), func(t *testing.B) { + benchmarkBMTBaseline(t, size) + }) + t.Run(fmt.Sprintf("%v_size_%v", "REF", size), func(t *testing.B) { + benchmarkRefHasher(t, size) + }) + t.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(t *testing.B) { + benchmarkBMT(t, size) + }) + } +} + +type whenHash = int + +const ( + first whenHash = iota + last + random +) + +func BenchmarkBMTAsync(t *testing.B) { + whs := []whenHash{first, last, random} + for size := 4096; size >= 128; size /= 2 { + for _, wh := range whs { + for _, double := range []bool{false, true} { + t.Run(fmt.Sprintf("double_%v_hash_when_%v_size_%v", double, wh, size), func(t *testing.B) { + benchmarkBMTAsync(t, size, wh, double) + }) + } + } + } +} + +func BenchmarkPool(t *testing.B) { + caps := []int{1, PoolSize} + for size := 4096; size >= 128; size /= 2 { + for _, c := range caps { + t.Run(fmt.Sprintf("poolsize_%v_size_%v", c, size), func(t *testing.B) { + benchmarkPool(t, c, size) + }) + } + } +} // benchmarks simple sha3 hash on chunks -func benchmarkSHA3(n int, t *testing.B) { +func benchmarkSHA3(t *testing.B, n int) { data := newData(n) hasher := sha3.NewKeccak256 h := hasher() @@ -366,9 +413,7 @@ func benchmarkSHA3(n int, t *testing.B) { t.ReportAllocs() t.ResetTimer() for i := 0; i < t.N; i++ { - h.Reset() - h.Write(data) - h.Sum(nil) + doSum(h, nil, data) } } @@ -377,7 +422,7 @@ func benchmarkSHA3(n int, t *testing.B) { // doing it on n PoolSize each reusing the base hasher // the premise is that this is the minimum computation needed for a BMT // therefore this serves as a theoretical optimum for concurrent implementations -func benchmarkBMTBaseline(n int, t *testing.B) { +func benchmarkBMTBaseline(t *testing.B, n int) { hasher := sha3.NewKeccak256 hashSize := hasher().Size() data := newData(hashSize) @@ -394,9 +439,7 @@ func benchmarkBMTBaseline(n int, t *testing.B) { defer wg.Done() h := hasher() for atomic.AddInt32(&i, 1) < count { - h.Reset() - h.Write(data) - h.Sum(nil) + doSum(h, nil, data) } }() } @@ -405,21 +448,39 @@ func benchmarkBMTBaseline(n int, t *testing.B) { } // benchmarks BMT Hasher -func benchmarkBMTHasher(n int, t *testing.B) { +func benchmarkBMT(t *testing.B, n int) { data := newData(n) hasher := sha3.NewKeccak256 pool := NewTreePool(hasher, SegmentCount, PoolSize) + bmt := New(pool) t.ReportAllocs() t.ResetTimer() for i := 0; i < t.N; i++ { - bmt := New(pool) - Hash(bmt, nil, data) + syncHash(bmt, nil, data) + } +} + +// benchmarks BMT hasher with asynchronous concurrent segment/section writes +func benchmarkBMTAsync(t *testing.B, n int, wh whenHash, double bool) { + data := newData(n) + hasher := sha3.NewKeccak256 + pool := NewTreePool(hasher, SegmentCount, PoolSize) + bmt := New(pool).NewAsyncWriter(double) + idxs, segments := splitAndShuffle(bmt.SectionSize(), data) + shuffle(len(idxs), func(i int, j int) { + idxs[i], idxs[j] = idxs[j], idxs[i] + }) + + t.ReportAllocs() + t.ResetTimer() + for i := 0; i < t.N; i++ { + asyncHash(bmt, nil, n, wh, idxs, segments) } } // benchmarks 100 concurrent bmt hashes with pool capacity -func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) { +func benchmarkPool(t *testing.B, poolsize, n int) { data := newData(n) hasher := sha3.NewKeccak256 pool := NewTreePool(hasher, SegmentCount, poolsize) @@ -434,7 +495,7 @@ func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) { go func() { defer wg.Done() bmt := New(pool) - Hash(bmt, nil, data) + syncHash(bmt, nil, data) }() } wg.Wait() @@ -442,7 +503,7 @@ func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) { } // benchmarks the reference hasher -func benchmarkRefHasher(n int, t *testing.B) { +func benchmarkRefHasher(t *testing.B, n int) { data := newData(n) hasher := sha3.NewKeccak256 rbmt := NewRefHasher(hasher, 128) @@ -462,3 +523,93 @@ func newData(bufferSize int) []byte { } return data } + +// Hash hashes the data and the span using the bmt hasher +func syncHash(h *Hasher, span, data []byte) []byte { + h.ResetWithLength(span) + h.Write(data) + return h.Sum(nil) +} + +func splitAndShuffle(secsize int, data []byte) (idxs []int, segments [][]byte) { + l := len(data) + n := l / secsize + if l%secsize > 0 { + n++ + } + for i := 0; i < n; i++ { + idxs = append(idxs, i) + end := (i + 1) * secsize + if end > l { + end = l + } + section := data[i*secsize : end] + segments = append(segments, section) + } + shuffle(n, func(i int, j int) { + idxs[i], idxs[j] = idxs[j], idxs[i] + }) + return idxs, segments +} + +// splits the input data performs a random shuffle to mock async section writes +func asyncHashRandom(bmt SectionWriter, span []byte, data []byte, wh whenHash) (s []byte) { + idxs, segments := splitAndShuffle(bmt.SectionSize(), data) + return asyncHash(bmt, span, len(data), wh, idxs, segments) +} + +// mock for async section writes for BMT SectionWriter +// requires a permutation (a random shuffle) of list of all indexes of segments +// and writes them in order to the appropriate section +// the Sum function is called according to the wh parameter (first, last, random [relative to segment writes]) +func asyncHash(bmt SectionWriter, span []byte, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) { + bmt.Reset() + if l == 0 { + return bmt.Sum(nil, l, span) + } + c := make(chan []byte, 1) + hashf := func() { + c <- bmt.Sum(nil, l, span) + } + maxsize := len(idxs) + var r int + if wh == random { + r = rand.Intn(maxsize) + } + for i, idx := range idxs { + bmt.Write(idx, segments[idx]) + if (wh == first || wh == random) && i == r { + go hashf() + } + } + if wh == last { + return bmt.Sum(nil, l, span) + } + return <-c +} + +// this is also in swarm/network_test.go +// shuffle pseudo-randomizes the order of elements. +// n is the number of elements. Shuffle panics if n < 0. +// swap swaps the elements with indexes i and j. +func shuffle(n int, swap func(i, j int)) { + if n < 0 { + panic("invalid argument to Shuffle") + } + + // Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle + // Shuffle really ought not be called with n that doesn't fit in 32 bits. + // Not only will it take a very long time, but with 2³¹! possible permutations, + // there's no way that any PRNG can have a big enough internal state to + // generate even a minuscule percentage of the possible permutations. + // Nevertheless, the right API signature accepts an int n, so handle it as best we can. + i := n - 1 + for ; i > 1<<31-1-1; i-- { + j := int(rand.Int63n(int64(i + 1))) + swap(i, j) + } + for ; i > 0; i-- { + j := int(rand.Int31n(int32(i + 1))) + swap(i, j) + } +} |