diff options
Diffstat (limited to 'camel/camel-charset-map.c')
-rw-r--r-- | camel/camel-charset-map.c | 254 |
1 files changed, 0 insertions, 254 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c deleted file mode 100644 index ddc8f0a896..0000000000 --- a/camel/camel-charset-map.c +++ /dev/null @@ -1,254 +0,0 @@ - -#include <stdio.h> - -/* - if you want to build the charset map, add the root directory of - libunicode to the include path and define BUILD_MAP, - then run it as - ./a.out > camel-charset-map-private.h - - The tables genereated work like this: - - An indirect array for each page of unicode character - Each array element has an indirect pointer to one of the bytes of - the generated bitmask. -*/ - -#ifdef BUILD_MAP -#include "iso/iso8859-2.h" -#include "iso/iso8859-3.h" -#include "iso/iso8859-4.h" -#include "iso/iso8859-5.h" -#include "iso/iso8859-6.h" -#include "iso/iso8859-7.h" -#include "iso/iso8859-8.h" -#include "iso/iso8859-9.h" -#include "iso/iso8859-10.h" -#include "iso/iso8859-14.h" -#include "iso/iso8859-15.h" -#include "iso/koi8-r.h" -#include "iso/koi8-u.h" -#include "msft/cp932.h" -#include "jis/shiftjis.h" - -static struct { - unsigned short *table; - char *name; - int type; /* type of table */ - unsigned int bit; /* assigned bit */ -} tables[] = { - { iso8859_2_table, "iso-8859-2", 0, 0} , - { iso8859_3_table, "iso-8859-3", 0, 0} , - { iso8859_4_table, "iso-8859-4", 0, 0}, - { iso8859_5_table, "iso-8859-5", 0, 0}, -/* apparently -6 has special digits? */ - { iso8859_6_table, "iso-8859-6", 0, 0}, - { iso8859_7_table, "iso-8859-7", 0, 0}, - { iso8859_8_table, "iso-8859-8", 0, 0}, - { iso8859_9_table, "iso-8859-9", 0, 0}, - { iso8859_10_table, "iso-8859-10", 0, 0}, - { iso8859_14_table, "iso-8859-14", 0, 0}, - { iso8859_15_table, "iso-8859-15", 0, 0}, - { koi8_r_table, "koi8-r", 0, 0}, - { koi8_u_table, "koi8-u", 0, 0}, - { cp932_table, "CP932", 1, 0}, - { sjis_table, "Shift-JIS", 1, 0}, - { 0, 0} -}; - -unsigned int encoding_map[256 * 256]; - -static void -add_bigmap(unsigned short **table, int bit) -{ - int i; - int j; - - for (i=0;i<256;i++) { - unsigned short *tab = table[i]; - if (tab) { - for (j=0;j<256;j++) { - if (tab[j]) - encoding_map[tab[j]] |= bit; - } - } - } -} - -main() -{ - int i, j; - unsigned short *tab; - int max, min; - int bit = 0x01; - int k; - int bytes; - -#if 0 - /* iso-latin-1 (not needed-detected in code) */ - for (i=0;i<256;i++) { - encoding_map[i] |= bit; - } - bit <<= 1; -#endif - - /* dont count the terminator */ - bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8; - - /* the other latin charsets */ - for (j=0;tables[j].table;j++) { - switch (tables[j].type) { - case 0: /* table from 128-256 */ - tab = tables[j].table; - for (i=0;i<128;i++) { - /* 0-127 is the common */ - encoding_map[i] |= bit; - encoding_map[tab[i]] |= bit; - } - break; - case 1: /* sparse table */ - add_bigmap(tables[j].table, bit); - break; - } - tables[j].bit = bit; - bit <<= 1; - } - - printf("/* This file is automatically generated: DO NOT EDIT */\n\n"); - - for (i=0;i<256;i++) { - /* first, do we need this block? */ - for (k=0;k<bytes;k++) { - for (j=0;j<256;j++) { - if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0) - break; - } - if (j < 256) { - /* yes, dump it */ - printf("static unsigned char m%02x%x[256] = {\n\t", i, k); - for (j=0;j<256;j++) { - printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff ); - if (((j+1)&7) == 0 && j<255) - printf("\n\t"); - } - printf("\n};\n\n"); - } - } - } - - printf("struct {\n"); - for (k=0;k<bytes;k++) { - printf("\tunsigned char *bits%d;\n", k); - } - printf("} camel_charmap[256] = {\n\t"); - for (i=0;i<256;i++) { - /* first, do we need this block? */ - printf("{ "); - for (k=0;k<bytes;k++) { - for (j=0;j<256;j++) { - if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0) - break; - } - if (j < 256) { - printf("m%02x%x, ", i, k); - } else { - printf("0, "); - } - } - printf("}, "); - if (((i+1)&7) == 0 && i<255) - printf("\n\t"); - } - printf("\n};\n\n"); - - printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n"); - for (j=0;tables[j].table;j++) { - printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit); - } - printf("};\n\n"); - - printf("#define charset_mask(x) \\\n"); - for (k=0;k<bytes;k++) { - if (k!=0) - printf("\t| "); - else - printf("\t"); - printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8); - if (k<bytes-1) - printf("\t\\\n"); - } - printf("\n\n"); - -} - -#else - -#include "camel-charset-map.h" -#include "camel-charset-map-private.h" -#include <unicode.h> -#include <glib.h> - -unsigned int -camel_charset_mask(unsigned int c) -{ - if (c>0xffff) - return 0; - - return charset_mask(c); -} - -/* gets the best charset from the mask of chars in it */ -const char * -camel_charset_best_mask(unsigned int mask) -{ - int i; - - for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) { - if (camel_charinfo[i].bit & mask) - return camel_charinfo[i].name; - } - return "UTF-8"; -} - -/* finds the minimum charset for this string NULL means US-ASCII */ -const char * -camel_charset_best(const char *in, int len) -{ - unsigned int mask = ~0; - int level = 0; - const char *inptr = in, *inend = in+len; - - /* check what charset a given string will fit in */ - while (inptr < inend) { - unicode_char_t c; - const char *newinptr; - newinptr = unicode_get_utf8(inptr, &c); - if (newinptr == NULL) { - inptr++; - continue; - } - inptr = newinptr; - if (c<=0xffff) { - mask |= camel_charset_mask(c); - - if (c>=128 && c<256) - level = MAX(level, 1); - else if (c>=256) - level = MAX(level, 2); - } else { - mask = 0; - level = MAX(level, 2); - } - } - - if (level == 1) - return "ISO-8859-1"; - else if (level == 2) - return camel_charset_best_mask(mask); - else - return NULL; -} - - -#endif /* !BUILD_MAP */ - |