From 702218008ee2b6d708d6b2821cdef80736bb3224 Mon Sep 17 00:00:00 2001 From: Taylor Gerring Date: Mon, 16 Feb 2015 14:28:33 +0100 Subject: Add versioned dependencies from godep --- .../code.google.com/p/snappy-go/snappy/decode.go | 124 ++++++++++ .../code.google.com/p/snappy-go/snappy/encode.go | 174 ++++++++++++++ .../code.google.com/p/snappy-go/snappy/snappy.go | 38 +++ .../p/snappy-go/snappy/snappy_test.go | 261 +++++++++++++++++++++ 4 files changed, 597 insertions(+) create mode 100644 Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/decode.go create mode 100644 Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/encode.go create mode 100644 Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy.go create mode 100644 Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy_test.go (limited to 'Godeps/_workspace/src/code.google.com/p/snappy-go') diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/decode.go b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/decode.go new file mode 100644 index 000000000..d93c1b9db --- /dev/null +++ b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/decode.go @@ -0,0 +1,124 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" +) + +// ErrCorrupt reports that the input is invalid. +var ErrCorrupt = errors.New("snappy: corrupt input") + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n == 0 { + return 0, 0, ErrCorrupt + } + if uint64(int(v)) != v { + return 0, 0, errors.New("snappy: decoded block is too large") + } + return int(v), n, nil +} + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// It is valid to pass a nil dst. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if len(dst) < dLen { + dst = make([]byte, dLen) + } + + var d, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint(src[s] >> 2) + switch { + case x < 60: + s += 1 + case x == 60: + s += 2 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-1]) + case x == 61: + s += 3 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-2]) | uint(src[s-1])<<8 + case x == 62: + s += 4 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16 + case x == 63: + s += 5 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24 + } + length = int(x + 1) + if length <= 0 { + return nil, errors.New("snappy: unsupported literal length") + } + if length > len(dst)-d || length > len(src)-s { + return nil, ErrCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if s > len(src) { + return nil, ErrCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(src[s-2])&0xe0<<3 | int(src[s-1]) + + case tagCopy2: + s += 3 + if s > len(src) { + return nil, ErrCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(src[s-2]) | int(src[s-1])<<8 + + case tagCopy4: + return nil, errors.New("snappy: unsupported COPY_4 tag") + } + + end := d + length + if offset > d || end > len(dst) { + return nil, ErrCorrupt + } + for ; d < end; d++ { + dst[d] = dst[d-offset] + } + } + if d != dLen { + return nil, ErrCorrupt + } + return dst[:d], nil +} diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/encode.go b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/encode.go new file mode 100644 index 000000000..b2371db11 --- /dev/null +++ b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/encode.go @@ -0,0 +1,174 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" +) + +// We limit how far copy back-references can go, the same as the C++ code. +const maxOffset = 1 << 15 + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + case n < 1<<16: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + case n < 1<<24: + dst[0] = 62<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + dst[3] = uint8(n >> 16) + i = 4 + case int64(n) < 1<<32: + dst[0] = 63<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + dst[3] = uint8(n >> 16) + dst[4] = uint8(n >> 24) + i = 5 + default: + panic("snappy: source buffer is too long") + } + if copy(dst[i:], lit) != len(lit) { + panic("snappy: destination buffer is too short") + } + return i + len(lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +func emitCopy(dst []byte, offset, length int) int { + i := 0 + for length > 0 { + x := length - 4 + if 0 <= x && x < 1<<3 && offset < 1<<11 { + dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + i += 2 + break + } + + x = length + if x > 1<<6 { + x = 1 << 6 + } + dst[i+0] = uint8(x-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= x + } + return i +} + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// It is valid to pass a nil dst. +func Encode(dst, src []byte) ([]byte, error) { + if n := MaxEncodedLen(len(src)); len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + // Return early if src is short. + if len(src) <= 4 { + if len(src) != 0 { + d += emitLiteral(dst[d:], src) + } + return dst[:d], nil + } + + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + const maxTableSize = 1 << 14 + shift, tableSize := uint(32-8), 1<<8 + for tableSize < maxTableSize && tableSize < len(src) { + shift-- + tableSize *= 2 + } + var table [maxTableSize]int + + // Iterate over the source bytes. + var ( + s int // The iterator position. + t int // The last position with the same hash as s. + lit int // The start position of any pending literal bytes. + ) + for s+3 < len(src) { + // Update the hash table. + b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3] + h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24 + p := &table[(h*0x1e35a7bd)>>shift] + // We need to to store values in [-1, inf) in table. To save + // some initialization time, (re)use the table's zero value + // and shift the values against this zero: add 1 on writes, + // subtract 1 on reads. + t, *p = *p-1, s+1 + // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. + if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] { + s++ + continue + } + // Otherwise, we have a match. First, emit any pending literal bytes. + if lit != s { + d += emitLiteral(dst[d:], src[lit:s]) + } + // Extend the match to be as long as possible. + s0 := s + s, t = s+4, t+4 + for s < len(src) && src[s] == src[t] { + s++ + t++ + } + // Emit the copied bytes. + d += emitCopy(dst[d:], s-t, s-s0) + lit = s + } + + // Emit any final pending literal bytes and return. + if lit != len(src) { + d += emitLiteral(dst[d:], src[lit:]) + } + return dst[:d], nil +} + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +func MaxEncodedLen(srcLen int) int { + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + return 32 + srcLen + srcLen/6 +} diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy.go b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy.go new file mode 100644 index 000000000..2f1b790d0 --- /dev/null +++ b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy.go @@ -0,0 +1,38 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snappy implements the snappy block-based compression format. +// It aims for very high speeds and reasonable compression. +// +// The C++ snappy implementation is at http://code.google.com/p/snappy/ +package snappy + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer supported. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy_test.go b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy_test.go new file mode 100644 index 000000000..7ba839244 --- /dev/null +++ b/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy_test.go @@ -0,0 +1,261 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "bytes" + "flag" + "fmt" + "io" + "io/ioutil" + "math/rand" + "net/http" + "os" + "path/filepath" + "strings" + "testing" +) + +var download = flag.Bool("download", false, "If true, download any missing files before running benchmarks") + +func roundtrip(b, ebuf, dbuf []byte) error { + e, err := Encode(ebuf, b) + if err != nil { + return fmt.Errorf("encoding error: %v", err) + } + d, err := Decode(dbuf, e) + if err != nil { + return fmt.Errorf("decoding error: %v", err) + } + if !bytes.Equal(b, d) { + return fmt.Errorf("roundtrip mismatch:\n\twant %v\n\tgot %v", b, d) + } + return nil +} + +func TestEmpty(t *testing.T) { + if err := roundtrip(nil, nil, nil); err != nil { + t.Fatal(err) + } +} + +func TestSmallCopy(t *testing.T) { + for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { + for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { + for i := 0; i < 32; i++ { + s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb" + if err := roundtrip([]byte(s), ebuf, dbuf); err != nil { + t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err) + } + } + } + } +} + +func TestSmallRand(t *testing.T) { + rand.Seed(27354294) + for n := 1; n < 20000; n += 23 { + b := make([]byte, n) + for i, _ := range b { + b[i] = uint8(rand.Uint32()) + } + if err := roundtrip(b, nil, nil); err != nil { + t.Fatal(err) + } + } +} + +func TestSmallRegular(t *testing.T) { + for n := 1; n < 20000; n += 23 { + b := make([]byte, n) + for i, _ := range b { + b[i] = uint8(i%10 + 'a') + } + if err := roundtrip(b, nil, nil); err != nil { + t.Fatal(err) + } + } +} + +func benchDecode(b *testing.B, src []byte) { + encoded, err := Encode(nil, src) + if err != nil { + b.Fatal(err) + } + // Bandwidth is in amount of uncompressed data. + b.SetBytes(int64(len(src))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Decode(src, encoded) + } +} + +func benchEncode(b *testing.B, src []byte) { + // Bandwidth is in amount of uncompressed data. + b.SetBytes(int64(len(src))) + dst := make([]byte, MaxEncodedLen(len(src))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Encode(dst, src) + } +} + +func readFile(b *testing.B, filename string) []byte { + src, err := ioutil.ReadFile(filename) + if err != nil { + b.Fatalf("failed reading %s: %s", filename, err) + } + if len(src) == 0 { + b.Fatalf("%s has zero length", filename) + } + return src +} + +// expand returns a slice of length n containing repeated copies of src. +func expand(src []byte, n int) []byte { + dst := make([]byte, n) + for x := dst; len(x) > 0; { + i := copy(x, src) + x = x[i:] + } + return dst +} + +func benchWords(b *testing.B, n int, decode bool) { + // Note: the file is OS-language dependent so the resulting values are not + // directly comparable for non-US-English OS installations. + data := expand(readFile(b, "/usr/share/dict/words"), n) + if decode { + benchDecode(b, data) + } else { + benchEncode(b, data) + } +} + +func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) } +func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) } +func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) } +func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) } +func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) } +func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) } +func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) } +func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) } + +// testFiles' values are copied directly from +// https://code.google.com/p/snappy/source/browse/trunk/snappy_unittest.cc. +// The label field is unused in snappy-go. +var testFiles = []struct { + label string + filename string +}{ + {"html", "html"}, + {"urls", "urls.10K"}, + {"jpg", "house.jpg"}, + {"pdf", "mapreduce-osdi-1.pdf"}, + {"html4", "html_x_4"}, + {"cp", "cp.html"}, + {"c", "fields.c"}, + {"lsp", "grammar.lsp"}, + {"xls", "kennedy.xls"}, + {"txt1", "alice29.txt"}, + {"txt2", "asyoulik.txt"}, + {"txt3", "lcet10.txt"}, + {"txt4", "plrabn12.txt"}, + {"bin", "ptt5"}, + {"sum", "sum"}, + {"man", "xargs.1"}, + {"pb", "geo.protodata"}, + {"gaviota", "kppkn.gtb"}, +} + +// The test data files are present at this canonical URL. +const baseURL = "https://snappy.googlecode.com/svn/trunk/testdata/" + +func downloadTestdata(basename string) (errRet error) { + filename := filepath.Join("testdata", basename) + f, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create %s: %s", filename, err) + } + defer f.Close() + defer func() { + if errRet != nil { + os.Remove(filename) + } + }() + resp, err := http.Get(baseURL + basename) + if err != nil { + return fmt.Errorf("failed to download %s: %s", baseURL+basename, err) + } + defer resp.Body.Close() + _, err = io.Copy(f, resp.Body) + if err != nil { + return fmt.Errorf("failed to write %s: %s", filename, err) + } + return nil +} + +func benchFile(b *testing.B, n int, decode bool) { + filename := filepath.Join("testdata", testFiles[n].filename) + if stat, err := os.Stat(filename); err != nil || stat.Size() == 0 { + if !*download { + b.Fatal("test data not found; skipping benchmark without the -download flag") + } + // Download the official snappy C++ implementation reference test data + // files for benchmarking. + if err := os.Mkdir("testdata", 0777); err != nil && !os.IsExist(err) { + b.Fatalf("failed to create testdata: %s", err) + } + for _, tf := range testFiles { + if err := downloadTestdata(tf.filename); err != nil { + b.Fatalf("failed to download testdata: %s", err) + } + } + } + data := readFile(b, filename) + if decode { + benchDecode(b, data) + } else { + benchEncode(b, data) + } +} + +// Naming convention is kept similar to what snappy's C++ implementation uses. +func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) } +func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) } +func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) } +func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) } +func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) } +func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) } +func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) } +func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) } +func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) } +func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) } +func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) } +func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) } +func Benchmark_UFlat12(b *testing.B) { benchFile(b, 12, true) } +func Benchmark_UFlat13(b *testing.B) { benchFile(b, 13, true) } +func Benchmark_UFlat14(b *testing.B) { benchFile(b, 14, true) } +func Benchmark_UFlat15(b *testing.B) { benchFile(b, 15, true) } +func Benchmark_UFlat16(b *testing.B) { benchFile(b, 16, true) } +func Benchmark_UFlat17(b *testing.B) { benchFile(b, 17, true) } +func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) } +func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) } +func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) } +func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) } +func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) } +func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) } +func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) } +func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) } +func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) } +func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) } +func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) } +func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) } +func Benchmark_ZFlat12(b *testing.B) { benchFile(b, 12, false) } +func Benchmark_ZFlat13(b *testing.B) { benchFile(b, 13, false) } +func Benchmark_ZFlat14(b *testing.B) { benchFile(b, 14, false) } +func Benchmark_ZFlat15(b *testing.B) { benchFile(b, 15, false) } +func Benchmark_ZFlat16(b *testing.B) { benchFile(b, 16, false) } +func Benchmark_ZFlat17(b *testing.B) { benchFile(b, 17, false) } -- cgit