From fd5d51c9ae3256a1f24cf974dcd02433a259677e Mon Sep 17 00:00:00 2001
From: Zsolt Felfoldi <zsfelfoldi@gmail.com>
Date: Fri, 5 May 2017 20:11:55 +0200
Subject: common/bitutil: added data compression algorithm

---
 common/bitutil/compress.go | 93 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 common/bitutil/compress.go

diff --git a/common/bitutil/compress.go b/common/bitutil/compress.go
new file mode 100644
index 000000000..c6c139ab9
--- /dev/null
+++ b/common/bitutil/compress.go
@@ -0,0 +1,93 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bitutil
+
+/*
+The compression algorithm implemented by CompressBytes and DecompressBytes is
+optimized for "sparse" input data which contains a lot of zero bytes. Decompression
+requires knowledge of the decompressed data length. Compression works as follows:
+
+if data only contains zeroes,
+  CompressBytes(data) == nil
+otherwise if len(data) <= 1,
+ CompressBytes(data) == data
+otherwise:
+ CompressBytes(data) == append(CompressBytes(nonZeroBits(data)), nonZeroBytes(data)...)
+where
+ nonZeroBits(data) is a bit vector with len(data) bits (MSB first):
+  nonZeroBits(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0
+  len(nonZeroBits(data)) == (len(data)+7)/8
+ nonZeroBytes(data) contains the non-zero bytes of data in the same order
+*/
+
+// CompressBytes compresses the input byte slice
+func CompressBytes(data []byte) []byte {
+	if len(data) == 0 {
+		return nil
+	}
+	if len(data) == 1 {
+		if data[0] == 0 {
+			return nil
+		} else {
+			return data
+		}
+	}
+
+	bitsLen := (len(data) + 7) / 8
+	nonZeroBits := make([]byte, bitsLen)
+	nonZeroBytes := make([]byte, 0, len(data))
+	for i, b := range data {
+		if b != 0 {
+			nonZeroBytes = append(nonZeroBytes, b)
+			nonZeroBits[i/8] |= 1 << byte(7-i%8)
+		}
+	}
+	if len(nonZeroBytes) == 0 {
+		return nil
+	}
+	return append(CompressBytes(nonZeroBits), nonZeroBytes...)
+}
+
+// DecompressBytes decompresses data with a known target size.
+// In addition to the decompressed output, the function returns the length of
+// compressed input data corresponding to the output. The input slice may be longer.
+// If the input slice is too short, (nil, -1) is returned.
+func DecompressBytes(data []byte, targetLen int) ([]byte, int) {
+	decomp := make([]byte, targetLen)
+	if len(data) == 0 {
+		return decomp, 0
+	}
+	if targetLen == 1 {
+		return data[0:1], 1
+	}
+
+	bitsLen := (targetLen + 7) / 8
+	nonZeroBits, ptr := DecompressBytes(data, bitsLen)
+	if ptr < 0 {
+		return nil, -1
+	}
+	for i, _ := range decomp {
+		if nonZeroBits[i/8]&(1<<byte(7-i%8)) != 0 {
+			if ptr == len(data) {
+				return nil, -1
+			}
+			decomp[i] = data[ptr]
+			ptr++
+		}
+	}
+	return decomp, ptr
+}
-- 
cgit 


From cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb Mon Sep 17 00:00:00 2001
From: Péter Szilágyi <peterke@gmail.com>
Date: Sat, 6 May 2017 18:35:59 +0300
Subject: common/bitutil: fix decompression corner cases; fuzz, test & bench

---
 common/bitutil/compress.go      | 137 ++++++++++++++++++++++++------------
 common/bitutil/compress_fuzz.go |  56 +++++++++++++++
 common/bitutil/compress_test.go | 152 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 302 insertions(+), 43 deletions(-)
 create mode 100644 common/bitutil/compress_fuzz.go
 create mode 100644 common/bitutil/compress_test.go

diff --git a/common/bitutil/compress.go b/common/bitutil/compress.go
index c6c139ab9..a806c0e8b 100644
--- a/common/bitutil/compress.go
+++ b/common/bitutil/compress.go
@@ -16,78 +16,129 @@
 
 package bitutil
 
-/*
-The compression algorithm implemented by CompressBytes and DecompressBytes is
-optimized for "sparse" input data which contains a lot of zero bytes. Decompression
-requires knowledge of the decompressed data length. Compression works as follows:
+import "errors"
 
-if data only contains zeroes,
-  CompressBytes(data) == nil
-otherwise if len(data) <= 1,
- CompressBytes(data) == data
-otherwise:
- CompressBytes(data) == append(CompressBytes(nonZeroBits(data)), nonZeroBytes(data)...)
-where
- nonZeroBits(data) is a bit vector with len(data) bits (MSB first):
-  nonZeroBits(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0
-  len(nonZeroBits(data)) == (len(data)+7)/8
- nonZeroBytes(data) contains the non-zero bytes of data in the same order
-*/
+var (
+	// ErrMissingData is returned from decompression if the byte referenced by
+	// the bitset header overflows the input data.
+	ErrMissingData = errors.New("missing bytes on input")
 
-// CompressBytes compresses the input byte slice
+	// ErrUnreferencedData is returned from decompression if not all bytes were used
+	// up from the input data after decompressing it.
+	ErrUnreferencedData = errors.New("extra bytes on input")
+
+	// ErrExceededTarget is returned from decompression if the bitset header has
+	// more bits defined than the number of target buffer space available.
+	ErrExceededTarget = errors.New("target data size exceeded")
+
+	// ErrZeroContent is returned from decompression if a data byte referenced in
+	// the bitset header is actually a zero byte.
+	ErrZeroContent = errors.New("zero byte in input content")
+)
+
+// The compression algorithm implemented by CompressBytes and DecompressBytes is
+// optimized for sparse input data which contains a lot of zero bytes. Decompression
+// requires knowledge of the decompressed data length.
+//
+// Compression works as follows:
+//
+//   if data only contains zeroes,
+//       CompressBytes(data) == nil
+//   otherwise if len(data) <= 1,
+//       CompressBytes(data) == data
+//   otherwise:
+//       CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...)
+//       where
+//         nonZeroBitset(data) is a bit vector with len(data) bits (MSB first):
+//             nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0
+//             len(nonZeroBitset(data)) == (len(data)+7)/8
+//         nonZeroBytes(data) contains the non-zero bytes of data in the same order
+
+// CompressBytes compresses the input byte slice according to the sparse bitset
+// representation algorithm.
 func CompressBytes(data []byte) []byte {
+	// Empty slices get compressed to nil
 	if len(data) == 0 {
 		return nil
 	}
+	// One byte slices compress to nil or retain the single byte
 	if len(data) == 1 {
 		if data[0] == 0 {
 			return nil
-		} else {
-			return data
 		}
+		return data
 	}
-
-	bitsLen := (len(data) + 7) / 8
-	nonZeroBits := make([]byte, bitsLen)
+	// Calculate the bitset of set bytes, and gather the non-zero bytes
+	nonZeroBitset := make([]byte, (len(data)+7)/8)
 	nonZeroBytes := make([]byte, 0, len(data))
+
 	for i, b := range data {
 		if b != 0 {
 			nonZeroBytes = append(nonZeroBytes, b)
-			nonZeroBits[i/8] |= 1 << byte(7-i%8)
+			nonZeroBitset[i/8] |= 1 << byte(7-i%8)
 		}
 	}
 	if len(nonZeroBytes) == 0 {
 		return nil
 	}
-	return append(CompressBytes(nonZeroBits), nonZeroBytes...)
+	return append(CompressBytes(nonZeroBitset), nonZeroBytes...)
 }
 
-// DecompressBytes decompresses data with a known target size.
-// In addition to the decompressed output, the function returns the length of
-// compressed input data corresponding to the output. The input slice may be longer.
-// If the input slice is too short, (nil, -1) is returned.
-func DecompressBytes(data []byte, targetLen int) ([]byte, int) {
-	decomp := make([]byte, targetLen)
-	if len(data) == 0 {
-		return decomp, 0
+// DecompressBytes decompresses data with a known target size. In addition to the
+// decompressed output, the function returns the length of compressed input data
+// corresponding to the output as the input slice may be longer.
+func DecompressBytes(data []byte, target int) ([]byte, error) {
+	out, size, err := decompressBytes(data, target)
+	if err != nil {
+		return nil, err
 	}
-	if targetLen == 1 {
-		return data[0:1], 1
+	if size != len(data) {
+		return nil, ErrUnreferencedData
 	}
+	return out, nil
+}
 
-	bitsLen := (targetLen + 7) / 8
-	nonZeroBits, ptr := DecompressBytes(data, bitsLen)
-	if ptr < 0 {
-		return nil, -1
+// decompressBytes decompresses data with a known target size. In addition to the
+// decompressed output, the function returns the length of compressed input data
+// corresponding to the output as the input slice may be longer.
+func decompressBytes(data []byte, target int) ([]byte, int, error) {
+	// Sanity check 0 targets to avoid infinite recursion
+	if target == 0 {
+		return nil, 0, nil
+	}
+	// Handle the zero and single byte corner cases
+	decomp := make([]byte, target)
+	if len(data) == 0 {
+		return decomp, 0, nil
+	}
+	if target == 1 {
+		decomp[0] = data[0] // copy to avoid referencing the input slice
+		if data[0] != 0 {
+			return decomp, 1, nil
+		}
+		return decomp, 0, nil
+	}
+	// Decompress the bitset of set bytes and distribute the non zero bytes
+	nonZeroBitset, ptr, err := decompressBytes(data, (target+7)/8)
+	if err != nil {
+		return nil, ptr, err
 	}
-	for i, _ := range decomp {
-		if nonZeroBits[i/8]&(1<<byte(7-i%8)) != 0 {
-			if ptr == len(data) {
-				return nil, -1
+	for i := 0; i < 8*len(nonZeroBitset); i++ {
+		if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 {
+			// Make sure we have enough data to push into the correct slot
+			if ptr >= len(data) {
+				return nil, 0, ErrMissingData
+			}
+			if i >= len(decomp) {
+				return nil, 0, ErrExceededTarget
+			}
+			// Make sure the data is valid and push into the slot
+			if data[ptr] == 0 {
+				return nil, 0, ErrZeroContent
 			}
 			decomp[i] = data[ptr]
 			ptr++
 		}
 	}
-	return decomp, ptr
+	return decomp, ptr, nil
 }
diff --git a/common/bitutil/compress_fuzz.go b/common/bitutil/compress_fuzz.go
new file mode 100644
index 000000000..2b7fe2977
--- /dev/null
+++ b/common/bitutil/compress_fuzz.go
@@ -0,0 +1,56 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// +build gofuzz
+
+package bitutil
+
+import "bytes"
+
+// Fuzz implements a go-fuzz fuzzer method to test various compression method
+// invocations.
+func Fuzz(data []byte) int {
+	if len(data) == 0 {
+		return -1
+	}
+	if data[0]%2 == 0 {
+		return fuzzCompress(data[1:])
+	}
+	return fuzzDecompress(data[1:])
+}
+
+// fuzzCompress implements a go-fuzz fuzzer method to test the bit compression and
+// decompression algorithm.
+func fuzzCompress(data []byte) int {
+	proc, _ := DecompressBytes(CompressBytes(data), len(data))
+	if !bytes.Equal(data, proc) {
+		panic("content mismatch")
+	}
+	return 0
+}
+
+// fuzzDecompress implements a go-fuzz fuzzer method to test the bit decompression
+// and recompression algorithm.
+func fuzzDecompress(data []byte) int {
+	blob, err := DecompressBytes(data, 1024)
+	if err != nil {
+		return 0
+	}
+	if comp := CompressBytes(blob); !bytes.Equal(comp, data) {
+		panic("content mismatch")
+	}
+	return 0
+}
diff --git a/common/bitutil/compress_test.go b/common/bitutil/compress_test.go
new file mode 100644
index 000000000..ef38bc7b3
--- /dev/null
+++ b/common/bitutil/compress_test.go
@@ -0,0 +1,152 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bitutil
+
+import (
+	"bytes"
+	"math/rand"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common/hexutil"
+)
+
+// Tests that data compression and decompression works correctly.
+func TestCompressCycle(t *testing.T) {
+	tests := []string{
+		// Tests generated by go-fuzz to maximize code coverage
+		"0x000000000000000000",
+		"0xef0400",
+		"0xdf7070533534333636313639343638373532313536346c1bc33339343837313070706336343035336336346c65fefb3930393233383838ac2f65fefb",
+		"0x7b64000000",
+		"0x000034000000000000",
+		"0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f0000000000000000000",
+		"0x4912385c0e7b64000000",
+		"0x000034000000000000000000000000000000",
+		"0x00",
+		"0x000003e834ff7f0000",
+		"0x0000",
+		"0x0000000000000000000000000000000000000000000000000000000000ff00",
+		"0x895f0c6a020f850c6a020f85f88df88d",
+		"0xdf7070533534333636313639343638373432313536346c1bc3315aac2f65fefb",
+		"0x0000000000",
+		"0xdf70706336346c65fefb",
+		"0x00006d643634000000",
+		"0xdf7070533534333636313639343638373532313536346c1bc333393438373130707063363430353639343638373532313536346c1bc333393438336336346c65fe",
+	}
+	for i, tt := range tests {
+		data := hexutil.MustDecode(tt)
+
+		proc, err := DecompressBytes(CompressBytes(data), len(data))
+		if err != nil {
+			t.Errorf("test %d: failed to decompress compressed data: %v", i, err)
+			continue
+		}
+		if !bytes.Equal(data, proc) {
+			t.Errorf("test %d: compress/decompress mismatch: have %x, want %x", i, proc, data)
+		}
+	}
+}
+
+// Tests that data decompression works
+func TestDecompress(t *testing.T) {
+	tests := []struct {
+		size  int
+		input string
+		fail  error
+	}{
+		{size: 0, input: "0x"},
+
+		// Crashers generated by go-fuzz
+		{size: 0, input: "0x0020", fail: ErrUnreferencedData},
+		{size: 0, input: "0x30", fail: ErrUnreferencedData},
+		{size: 1, input: "0x00", fail: ErrUnreferencedData},
+		{size: 2, input: "0x07", fail: ErrMissingData},
+		{size: 1024, input: "0x8000", fail: ErrZeroContent},
+
+		// Tests generated by go-fuzz to maximize code coverage
+		{size: 29490, input: "0x343137343733323134333839373334323073333930783e3078333930783e70706336346c65303e", fail: ErrMissingData},
+		{size: 59395, input: "0x00", fail: ErrUnreferencedData},
+		{size: 52574, input: "0x70706336346c65c0de", fail: ErrExceededTarget},
+		{size: 42264, input: "0x07", fail: ErrMissingData},
+		{size: 52, input: "0xa5045bad48f4", fail: ErrExceededTarget},
+		{size: 52574, input: "0xc0de", fail: ErrMissingData},
+		{size: 52574, input: "0x"},
+		{size: 29490, input: "0x34313734373332313433383937333432307333393078073034333839373334323073333930783e3078333937333432307333393078073061333930783e70706336346c65303e", fail: ErrMissingData},
+		{size: 29491, input: "0x3973333930783e30783e", fail: ErrMissingData},
+
+		{size: 1024, input: "0x808080608080"},
+		{size: 1024, input: "0x808470705e3632383337363033313434303137393130306c6580ef46806380635a80"},
+		{size: 1024, input: "0x8080808070"},
+		{size: 1024, input: "0x808070705e36346c6580ef46806380635a80"},
+		{size: 1024, input: "0x80808046802680"},
+		{size: 1024, input: "0x4040404035"},
+		{size: 1024, input: "0x4040bf3ba2b3f684402d353234373438373934409fe5b1e7ada94ebfd7d0505e27be4035"},
+		{size: 1024, input: "0x404040bf3ba2b3f6844035"},
+		{size: 1024, input: "0x40402d35323437343837393440bfd7d0505e27be4035"},
+	}
+	for i, tt := range tests {
+		data := hexutil.MustDecode(tt.input)
+
+		orig, err := DecompressBytes(data, tt.size)
+		if err != tt.fail {
+			t.Errorf("test %d: failure mismatch: have %v, want %v", i, err, tt.fail)
+		}
+		if err != nil {
+			continue
+		}
+		if comp := CompressBytes(orig); !bytes.Equal(comp, data) {
+			t.Errorf("test %d: decompress/compress mismatch: have %x, want %x", i, comp, data)
+		}
+	}
+}
+
+// Crude benchmark for compressing random slices of bytes.
+func BenchmarkCompress1KBVerySparse(b *testing.B) { benchmarkCompress(b, 1024, 0.0001) }
+func BenchmarkCompress2KBVerySparse(b *testing.B) { benchmarkCompress(b, 2048, 0.0001) }
+func BenchmarkCompress4KBVerySparse(b *testing.B) { benchmarkCompress(b, 4096, 0.0001) }
+
+func BenchmarkCompress1KBSparse(b *testing.B) { benchmarkCompress(b, 1024, 0.001) }
+func BenchmarkCompress2KBSparse(b *testing.B) { benchmarkCompress(b, 2048, 0.001) }
+func BenchmarkCompress4KBSparse(b *testing.B) { benchmarkCompress(b, 4096, 0.001) }
+
+func BenchmarkCompress1KBDense(b *testing.B) { benchmarkCompress(b, 1024, 0.1) }
+func BenchmarkCompress2KBDense(b *testing.B) { benchmarkCompress(b, 2048, 0.1) }
+func BenchmarkCompress4KBDense(b *testing.B) { benchmarkCompress(b, 4096, 0.1) }
+
+func BenchmarkCompress1KBSaturated(b *testing.B) { benchmarkCompress(b, 1024, 0.5) }
+func BenchmarkCompress2KBSaturated(b *testing.B) { benchmarkCompress(b, 2048, 0.5) }
+func BenchmarkCompress4KBSaturated(b *testing.B) { benchmarkCompress(b, 4096, 0.5) }
+
+func benchmarkCompress(b *testing.B, bytes int, fill float64) {
+	// Generate a random slice of bytes to compress
+	random := rand.NewSource(0) // reproducible and comparable
+
+	data := make([]byte, bytes)
+	bits := int(float64(bytes) * 8 * fill)
+
+	for i := 0; i < bits; i++ {
+		idx := random.Int63() % int64(len(data))
+		bit := uint(random.Int63() % 8)
+		data[idx] |= 1 << bit
+	}
+	// Reset the benchmark and measure compression/decompression
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		DecompressBytes(CompressBytes(data), len(data))
+	}
+}
-- 
cgit 


From 82defe5c5663ca0c28563f8a111d327c87726267 Mon Sep 17 00:00:00 2001
From: Péter Szilágyi <peterke@gmail.com>
Date: Mon, 8 May 2017 10:40:48 +0300
Subject: common/compress: internalize encoders, add length wrappers

---
 common/bitutil/compress.go      | 72 ++++++++++++++++++++----------
 common/bitutil/compress_fuzz.go | 24 +++++-----
 common/bitutil/compress_test.go | 99 ++++++++++++++++++++++++++---------------
 3 files changed, 125 insertions(+), 70 deletions(-)

diff --git a/common/bitutil/compress.go b/common/bitutil/compress.go
index a806c0e8b..c057cee4a 100644
--- a/common/bitutil/compress.go
+++ b/common/bitutil/compress.go
@@ -19,21 +19,21 @@ package bitutil
 import "errors"
 
 var (
-	// ErrMissingData is returned from decompression if the byte referenced by
+	// errMissingData is returned from decompression if the byte referenced by
 	// the bitset header overflows the input data.
-	ErrMissingData = errors.New("missing bytes on input")
+	errMissingData = errors.New("missing bytes on input")
 
-	// ErrUnreferencedData is returned from decompression if not all bytes were used
+	// errUnreferencedData is returned from decompression if not all bytes were used
 	// up from the input data after decompressing it.
-	ErrUnreferencedData = errors.New("extra bytes on input")
+	errUnreferencedData = errors.New("extra bytes on input")
 
-	// ErrExceededTarget is returned from decompression if the bitset header has
+	// errExceededTarget is returned from decompression if the bitset header has
 	// more bits defined than the number of target buffer space available.
-	ErrExceededTarget = errors.New("target data size exceeded")
+	errExceededTarget = errors.New("target data size exceeded")
 
-	// ErrZeroContent is returned from decompression if a data byte referenced in
+	// errZeroContent is returned from decompression if a data byte referenced in
 	// the bitset header is actually a zero byte.
-	ErrZeroContent = errors.New("zero byte in input content")
+	errZeroContent = errors.New("zero byte in input content")
 )
 
 // The compression algorithm implemented by CompressBytes and DecompressBytes is
@@ -55,8 +55,20 @@ var (
 //         nonZeroBytes(data) contains the non-zero bytes of data in the same order
 
 // CompressBytes compresses the input byte slice according to the sparse bitset
-// representation algorithm.
+// representation algorithm. If the result is bigger than the original input, no
+// compression is done.
 func CompressBytes(data []byte) []byte {
+	if out := bitsetEncodeBytes(data); len(out) < len(data) {
+		return out
+	}
+	cpy := make([]byte, len(data))
+	copy(cpy, data)
+	return cpy
+}
+
+// bitsetEncodeBytes compresses the input byte slice according to the sparse
+// bitset representation algorithm.
+func bitsetEncodeBytes(data []byte) []byte {
 	// Empty slices get compressed to nil
 	if len(data) == 0 {
 		return nil
@@ -81,27 +93,41 @@ func CompressBytes(data []byte) []byte {
 	if len(nonZeroBytes) == 0 {
 		return nil
 	}
-	return append(CompressBytes(nonZeroBitset), nonZeroBytes...)
+	return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...)
 }
 
-// DecompressBytes decompresses data with a known target size. In addition to the
-// decompressed output, the function returns the length of compressed input data
-// corresponding to the output as the input slice may be longer.
+// DecompressBytes decompresses data with a known target size. If the input data
+// matches the size of the target, it means no compression was done in the first
+// place.
 func DecompressBytes(data []byte, target int) ([]byte, error) {
-	out, size, err := decompressBytes(data, target)
+	if len(data) > target {
+		return nil, errExceededTarget
+	}
+	if len(data) == target {
+		cpy := make([]byte, len(data))
+		copy(cpy, data)
+		return cpy, nil
+	}
+	return bitsetDecodeBytes(data, target)
+}
+
+// bitsetDecodeBytes decompresses data with a known target size.
+func bitsetDecodeBytes(data []byte, target int) ([]byte, error) {
+	out, size, err := bitsetDecodePartialBytes(data, target)
 	if err != nil {
 		return nil, err
 	}
 	if size != len(data) {
-		return nil, ErrUnreferencedData
+		return nil, errUnreferencedData
 	}
 	return out, nil
 }
 
-// decompressBytes decompresses data with a known target size. In addition to the
-// decompressed output, the function returns the length of compressed input data
-// corresponding to the output as the input slice may be longer.
-func decompressBytes(data []byte, target int) ([]byte, int, error) {
+// bitsetDecodePartialBytes decompresses data with a known target size, but does
+// not enforce consuming all the input bytes. In addition to the decompressed
+// output, the function returns the length of compressed input data corresponding
+// to the output as the input slice may be longer.
+func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) {
 	// Sanity check 0 targets to avoid infinite recursion
 	if target == 0 {
 		return nil, 0, nil
@@ -119,7 +145,7 @@ func decompressBytes(data []byte, target int) ([]byte, int, error) {
 		return decomp, 0, nil
 	}
 	// Decompress the bitset of set bytes and distribute the non zero bytes
-	nonZeroBitset, ptr, err := decompressBytes(data, (target+7)/8)
+	nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8)
 	if err != nil {
 		return nil, ptr, err
 	}
@@ -127,14 +153,14 @@ func decompressBytes(data []byte, target int) ([]byte, int, error) {
 		if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 {
 			// Make sure we have enough data to push into the correct slot
 			if ptr >= len(data) {
-				return nil, 0, ErrMissingData
+				return nil, 0, errMissingData
 			}
 			if i >= len(decomp) {
-				return nil, 0, ErrExceededTarget
+				return nil, 0, errExceededTarget
 			}
 			// Make sure the data is valid and push into the slot
 			if data[ptr] == 0 {
-				return nil, 0, ErrZeroContent
+				return nil, 0, errZeroContent
 			}
 			decomp[i] = data[ptr]
 			ptr++
diff --git a/common/bitutil/compress_fuzz.go b/common/bitutil/compress_fuzz.go
index 2b7fe2977..1b87f50ed 100644
--- a/common/bitutil/compress_fuzz.go
+++ b/common/bitutil/compress_fuzz.go
@@ -20,36 +20,36 @@ package bitutil
 
 import "bytes"
 
-// Fuzz implements a go-fuzz fuzzer method to test various compression method
+// Fuzz implements a go-fuzz fuzzer method to test various encoding method
 // invocations.
 func Fuzz(data []byte) int {
 	if len(data) == 0 {
 		return -1
 	}
 	if data[0]%2 == 0 {
-		return fuzzCompress(data[1:])
+		return fuzzEncode(data[1:])
 	}
-	return fuzzDecompress(data[1:])
+	return fuzzDecode(data[1:])
 }
 
-// fuzzCompress implements a go-fuzz fuzzer method to test the bit compression and
-// decompression algorithm.
-func fuzzCompress(data []byte) int {
-	proc, _ := DecompressBytes(CompressBytes(data), len(data))
+// fuzzEncode implements a go-fuzz fuzzer method to test the bitset encoding and
+// decoding algorithm.
+func fuzzEncode(data []byte) int {
+	proc, _ := bitsetDecodeBytes(bitsetEncodeBytes(data), len(data))
 	if !bytes.Equal(data, proc) {
 		panic("content mismatch")
 	}
 	return 0
 }
 
-// fuzzDecompress implements a go-fuzz fuzzer method to test the bit decompression
-// and recompression algorithm.
-func fuzzDecompress(data []byte) int {
-	blob, err := DecompressBytes(data, 1024)
+// fuzzDecode implements a go-fuzz fuzzer method to test the bit decoding and
+// reencoding algorithm.
+func fuzzDecode(data []byte) int {
+	blob, err := bitsetDecodeBytes(data, 1024)
 	if err != nil {
 		return 0
 	}
-	if comp := CompressBytes(blob); !bytes.Equal(comp, data) {
+	if comp := bitsetEncodeBytes(blob); !bytes.Equal(comp, data) {
 		panic("content mismatch")
 	}
 	return 0
diff --git a/common/bitutil/compress_test.go b/common/bitutil/compress_test.go
index ef38bc7b3..805ab0369 100644
--- a/common/bitutil/compress_test.go
+++ b/common/bitutil/compress_test.go
@@ -24,8 +24,8 @@ import (
 	"github.com/ethereum/go-ethereum/common/hexutil"
 )
 
-// Tests that data compression and decompression works correctly.
-func TestCompressCycle(t *testing.T) {
+// Tests that data bitset encoding and decoding works and is bijective.
+func TestEncodingCycle(t *testing.T) {
 	tests := []string{
 		// Tests generated by go-fuzz to maximize code coverage
 		"0x000000000000000000",
@@ -50,7 +50,7 @@ func TestCompressCycle(t *testing.T) {
 	for i, tt := range tests {
 		data := hexutil.MustDecode(tt)
 
-		proc, err := DecompressBytes(CompressBytes(data), len(data))
+		proc, err := bitsetDecodeBytes(bitsetEncodeBytes(data), len(data))
 		if err != nil {
 			t.Errorf("test %d: failed to decompress compressed data: %v", i, err)
 			continue
@@ -61,8 +61,8 @@ func TestCompressCycle(t *testing.T) {
 	}
 }
 
-// Tests that data decompression works
-func TestDecompress(t *testing.T) {
+// Tests that data bitset decoding and rencoding works and is bijective.
+func TestDecodingCycle(t *testing.T) {
 	tests := []struct {
 		size  int
 		input string
@@ -71,22 +71,22 @@ func TestDecompress(t *testing.T) {
 		{size: 0, input: "0x"},
 
 		// Crashers generated by go-fuzz
-		{size: 0, input: "0x0020", fail: ErrUnreferencedData},
-		{size: 0, input: "0x30", fail: ErrUnreferencedData},
-		{size: 1, input: "0x00", fail: ErrUnreferencedData},
-		{size: 2, input: "0x07", fail: ErrMissingData},
-		{size: 1024, input: "0x8000", fail: ErrZeroContent},
+		{size: 0, input: "0x0020", fail: errUnreferencedData},
+		{size: 0, input: "0x30", fail: errUnreferencedData},
+		{size: 1, input: "0x00", fail: errUnreferencedData},
+		{size: 2, input: "0x07", fail: errMissingData},
+		{size: 1024, input: "0x8000", fail: errZeroContent},
 
 		// Tests generated by go-fuzz to maximize code coverage
-		{size: 29490, input: "0x343137343733323134333839373334323073333930783e3078333930783e70706336346c65303e", fail: ErrMissingData},
-		{size: 59395, input: "0x00", fail: ErrUnreferencedData},
-		{size: 52574, input: "0x70706336346c65c0de", fail: ErrExceededTarget},
-		{size: 42264, input: "0x07", fail: ErrMissingData},
-		{size: 52, input: "0xa5045bad48f4", fail: ErrExceededTarget},
-		{size: 52574, input: "0xc0de", fail: ErrMissingData},
+		{size: 29490, input: "0x343137343733323134333839373334323073333930783e3078333930783e70706336346c65303e", fail: errMissingData},
+		{size: 59395, input: "0x00", fail: errUnreferencedData},
+		{size: 52574, input: "0x70706336346c65c0de", fail: errExceededTarget},
+		{size: 42264, input: "0x07", fail: errMissingData},
+		{size: 52, input: "0xa5045bad48f4", fail: errExceededTarget},
+		{size: 52574, input: "0xc0de", fail: errMissingData},
 		{size: 52574, input: "0x"},
-		{size: 29490, input: "0x34313734373332313433383937333432307333393078073034333839373334323073333930783e3078333937333432307333393078073061333930783e70706336346c65303e", fail: ErrMissingData},
-		{size: 29491, input: "0x3973333930783e30783e", fail: ErrMissingData},
+		{size: 29490, input: "0x34313734373332313433383937333432307333393078073034333839373334323073333930783e3078333937333432307333393078073061333930783e70706336346c65303e", fail: errMissingData},
+		{size: 29491, input: "0x3973333930783e30783e", fail: errMissingData},
 
 		{size: 1024, input: "0x808080608080"},
 		{size: 1024, input: "0x808470705e3632383337363033313434303137393130306c6580ef46806380635a80"},
@@ -101,37 +101,66 @@ func TestDecompress(t *testing.T) {
 	for i, tt := range tests {
 		data := hexutil.MustDecode(tt.input)
 
-		orig, err := DecompressBytes(data, tt.size)
+		orig, err := bitsetDecodeBytes(data, tt.size)
 		if err != tt.fail {
 			t.Errorf("test %d: failure mismatch: have %v, want %v", i, err, tt.fail)
 		}
 		if err != nil {
 			continue
 		}
-		if comp := CompressBytes(orig); !bytes.Equal(comp, data) {
+		if comp := bitsetEncodeBytes(orig); !bytes.Equal(comp, data) {
 			t.Errorf("test %d: decompress/compress mismatch: have %x, want %x", i, comp, data)
 		}
 	}
 }
 
+// TestCompression tests that compression works by returning either the bitset
+// encoded input, or the actual input if the bitset version is longer.
+func TestCompression(t *testing.T) {
+	// Check the the compression returns the bitset encoding is shorter
+	in := hexutil.MustDecode("0x4912385c0e7b64000000")
+	out := hexutil.MustDecode("0x80fe4912385c0e7b64")
+
+	if data := CompressBytes(in); bytes.Compare(data, out) != 0 {
+		t.Errorf("encoding mismatch for sparse data: have %x, want %x", data, out)
+	}
+	if data, err := DecompressBytes(out, len(in)); err != nil || bytes.Compare(data, in) != 0 {
+		t.Errorf("decoding mismatch for sparse data: have %x, want %x, error %v", data, in, err)
+	}
+	// Check the the compression returns the input if the bitset encoding is longer
+	in = hexutil.MustDecode("0xdf7070533534333636313639343638373532313536346c1bc33339343837313070706336343035336336346c65fefb3930393233383838ac2f65fefb")
+	out = hexutil.MustDecode("0xdf7070533534333636313639343638373532313536346c1bc33339343837313070706336343035336336346c65fefb3930393233383838ac2f65fefb")
+
+	if data := CompressBytes(in); bytes.Compare(data, out) != 0 {
+		t.Errorf("encoding mismatch for dense data: have %x, want %x", data, out)
+	}
+	if data, err := DecompressBytes(out, len(in)); err != nil || bytes.Compare(data, in) != 0 {
+		t.Errorf("decoding mismatch for dense data: have %x, want %x, error %v", data, in, err)
+	}
+	// Check that decompressing a longer input than the target fails
+	if _, err := DecompressBytes([]byte{0xc0, 0x01, 0x01}, 2); err != errExceededTarget {
+		t.Errorf("decoding error mismatch for long data: have %v, want %v", err, errExceededTarget)
+	}
+}
+
 // Crude benchmark for compressing random slices of bytes.
-func BenchmarkCompress1KBVerySparse(b *testing.B) { benchmarkCompress(b, 1024, 0.0001) }
-func BenchmarkCompress2KBVerySparse(b *testing.B) { benchmarkCompress(b, 2048, 0.0001) }
-func BenchmarkCompress4KBVerySparse(b *testing.B) { benchmarkCompress(b, 4096, 0.0001) }
+func BenchmarkEncoding1KBVerySparse(b *testing.B) { benchmarkEncoding(b, 1024, 0.0001) }
+func BenchmarkEncoding2KBVerySparse(b *testing.B) { benchmarkEncoding(b, 2048, 0.0001) }
+func BenchmarkEncoding4KBVerySparse(b *testing.B) { benchmarkEncoding(b, 4096, 0.0001) }
 
-func BenchmarkCompress1KBSparse(b *testing.B) { benchmarkCompress(b, 1024, 0.001) }
-func BenchmarkCompress2KBSparse(b *testing.B) { benchmarkCompress(b, 2048, 0.001) }
-func BenchmarkCompress4KBSparse(b *testing.B) { benchmarkCompress(b, 4096, 0.001) }
+func BenchmarkEncoding1KBSparse(b *testing.B) { benchmarkEncoding(b, 1024, 0.001) }
+func BenchmarkEncoding2KBSparse(b *testing.B) { benchmarkEncoding(b, 2048, 0.001) }
+func BenchmarkEncoding4KBSparse(b *testing.B) { benchmarkEncoding(b, 4096, 0.001) }
 
-func BenchmarkCompress1KBDense(b *testing.B) { benchmarkCompress(b, 1024, 0.1) }
-func BenchmarkCompress2KBDense(b *testing.B) { benchmarkCompress(b, 2048, 0.1) }
-func BenchmarkCompress4KBDense(b *testing.B) { benchmarkCompress(b, 4096, 0.1) }
+func BenchmarkEncoding1KBDense(b *testing.B) { benchmarkEncoding(b, 1024, 0.1) }
+func BenchmarkEncoding2KBDense(b *testing.B) { benchmarkEncoding(b, 2048, 0.1) }
+func BenchmarkEncoding4KBDense(b *testing.B) { benchmarkEncoding(b, 4096, 0.1) }
 
-func BenchmarkCompress1KBSaturated(b *testing.B) { benchmarkCompress(b, 1024, 0.5) }
-func BenchmarkCompress2KBSaturated(b *testing.B) { benchmarkCompress(b, 2048, 0.5) }
-func BenchmarkCompress4KBSaturated(b *testing.B) { benchmarkCompress(b, 4096, 0.5) }
+func BenchmarkEncoding1KBSaturated(b *testing.B) { benchmarkEncoding(b, 1024, 0.5) }
+func BenchmarkEncoding2KBSaturated(b *testing.B) { benchmarkEncoding(b, 2048, 0.5) }
+func BenchmarkEncoding4KBSaturated(b *testing.B) { benchmarkEncoding(b, 4096, 0.5) }
 
-func benchmarkCompress(b *testing.B, bytes int, fill float64) {
+func benchmarkEncoding(b *testing.B, bytes int, fill float64) {
 	// Generate a random slice of bytes to compress
 	random := rand.NewSource(0) // reproducible and comparable
 
@@ -143,10 +172,10 @@ func benchmarkCompress(b *testing.B, bytes int, fill float64) {
 		bit := uint(random.Int63() % 8)
 		data[idx] |= 1 << bit
 	}
-	// Reset the benchmark and measure compression/decompression
+	// Reset the benchmark and measure encoding/decoding
 	b.ResetTimer()
 	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
-		DecompressBytes(CompressBytes(data), len(data))
+		bitsetDecodeBytes(bitsetEncodeBytes(data), len(data))
 	}
 }
-- 
cgit