diff options
author | Péter Szilágyi <peterke@gmail.com> | 2016-02-11 22:16:52 +0800 |
---|---|---|
committer | Péter Szilágyi <peterke@gmail.com> | 2016-02-11 22:16:52 +0800 |
commit | b019f3ee29ce55c3d515ee8bafe0f4bb14221c0a (patch) | |
tree | 26e023be6c99a10e82a5a0ebadd1e42cefe9bd3c /Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go | |
parent | b05e472c076d30035233d6a8b5fb3360b236e3ff (diff) | |
download | go-tangerine-b019f3ee29ce55c3d515ee8bafe0f4bb14221c0a.tar.gz go-tangerine-b019f3ee29ce55c3d515ee8bafe0f4bb14221c0a.tar.zst go-tangerine-b019f3ee29ce55c3d515ee8bafe0f4bb14221c0a.zip |
Godeps: update all dependencies to latest code
Diffstat (limited to 'Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go')
-rw-r--r-- | Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go | 151 |
1 files changed, 114 insertions, 37 deletions
diff --git a/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go b/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go index fc87c2103..8e1db0202 100644 --- a/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go +++ b/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go @@ -6,9 +6,6 @@ package main -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - import ( "bufio" "fmt" @@ -19,6 +16,7 @@ import ( "unicode/utf8" "golang.org/x/text/encoding" + "golang.org/x/text/internal/gen" ) const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + @@ -41,11 +39,47 @@ var encodings = []struct { "", "CodePage437", encoding.ASCIISub, - ascii + - "ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒ" + - "áíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐" + - "└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀" + - "αßΓπΣσµτΦΘΩδ∞∅∈∩≡±≥≤⌠⌡÷≈°•·√ⁿ²∎\u00a0", + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm", + }, + { + "IBM Code Page 850", + "PC850Multilingual", + "", + "CodePage850", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm", + }, + { + "IBM Code Page 852", + "PCp852", + "", + "CodePage852", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm", + }, + { + "IBM Code Page 855", + "IBM855", + "", + "CodePage855", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm", + }, + { + "Windows Code Page 858", // PC latin1 with Euro + "IBM00858", + "", + "CodePage858", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm", + }, + { + "IBM Code Page 862", + "PC862LatinHebrew", + "", + "CodePage862", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm", }, { "IBM Code Page 866", @@ -324,23 +358,63 @@ func getWHATWG(url string) string { return ascii + string(mapping) } +func getUCM(url string) string { + res, err := http.Get(url) + if err != nil { + log.Fatalf("%q: Get: %v", url, err) + } + defer res.Body.Close() + + mapping := make([]rune, 256) + for i := range mapping { + mapping[i] = '\ufffd' + } + + charsFound := 0 + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + var c byte + var r rune + if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil { + continue + } + mapping[c] = r + charsFound++ + } + + if charsFound < 200 { + log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound) + } + + return string(mapping) +} + func main() { mibs := map[string]bool{} all := []string{} - buf := make([]byte, 8) - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("package charmap\n\n") - fmt.Printf("import (\n") - fmt.Printf("\t\"golang.org/x/text/encoding\"\n") - fmt.Printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n") - fmt.Printf(")\n\n") + w := gen.NewCodeWriter() + defer w.WriteGoFile("tables.go", "charmap") + + printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) } + + printf("import (\n") + printf("\t\"golang.org/x/text/encoding\"\n") + printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n") + printf(")\n\n") for _, e := range encodings { varNames := strings.Split(e.varName, ",") all = append(all, varNames...) varName := varNames[0] - if strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/") { + switch { + case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"): e.mapping = getWHATWG(e.mapping) + case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"): + e.mapping = getUCM(e.mapping) } asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00 @@ -352,42 +426,40 @@ func main() { lvn = 3 } lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:] - fmt.Printf("// %s is the %s encoding.\n", varName, e.name) + printf("// %s is the %s encoding.\n", varName, e.name) if e.comment != "" { - fmt.Printf("//\n// %s\n", e.comment) + printf("//\n// %s\n", e.comment) } - fmt.Printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n", + printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n", varName, lowerVarName, lowerVarName, e.name) if mibs[e.mib] { log.Fatalf("MIB type %q declared multiple times.", e.mib) } - fmt.Printf("mib: identifier.%s,\n", e.mib) - fmt.Printf("asciiSuperset: %t,\n", asciiSuperset) - fmt.Printf("low: 0x%02x,\n", low) - fmt.Printf("replacement: 0x%02x,\n", e.replacement) + printf("mib: identifier.%s,\n", e.mib) + printf("asciiSuperset: %t,\n", asciiSuperset) + printf("low: 0x%02x,\n", low) + printf("replacement: 0x%02x,\n", e.replacement) - fmt.Printf("decode: [256]utf8Enc{\n") + printf("decode: [256]utf8Enc{\n") i, backMapping := 0, map[rune]byte{} for _, c := range e.mapping { - if _, ok := backMapping[c]; !ok { + if _, ok := backMapping[c]; !ok && c != utf8.RuneError { backMapping[c] = byte(i) } - for j := range buf { - buf[j] = 0 - } - n := utf8.EncodeRune(buf, c) + var buf [8]byte + n := utf8.EncodeRune(buf[:], c) if n > 3 { panic(fmt.Sprintf("rune %q (%U) is too long", c, c)) } - fmt.Printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2]) + printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2]) if i%2 == 1 { - fmt.Printf("\n") + printf("\n") } i++ } - fmt.Printf("},\n") + printf("},\n") - fmt.Printf("encode: [256]uint32{\n") + printf("encode: [256]uint32{\n") encode := make([]uint32, 0, 256) for c, i := range backMapping { encode = append(encode, uint32(i)<<24|uint32(c)) @@ -397,15 +469,20 @@ func main() { encode = append(encode, encode[len(encode)-1]) } for i, enc := range encode { - fmt.Printf("0x%08x,", enc) + printf("0x%08x,", enc) if i%8 == 7 { - fmt.Printf("\n") + printf("\n") } } - fmt.Printf("},\n}\n") + printf("},\n}\n") + + // Add an estimate of the size of a single charmap{} struct value, which + // includes two 256 elem arrays of 4 bytes and some extra fields, which + // align to 3 uint64s on 64-bit architectures. + w.Size += 2*4*256 + 3*8 } // TODO: add proper line breaking. - fmt.Printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n")) + printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n")) } type byRune []uint32 |