aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-charset-map.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-charset-map.c')
-rw-r--r--camel/camel-charset-map.c254
1 files changed, 0 insertions, 254 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c
deleted file mode 100644
index ddc8f0a896..0000000000
--- a/camel/camel-charset-map.c
+++ /dev/null
@@ -1,254 +0,0 @@
-
-#include <stdio.h>
-
-/*
- if you want to build the charset map, add the root directory of
- libunicode to the include path and define BUILD_MAP,
- then run it as
- ./a.out > camel-charset-map-private.h
-
- The tables genereated work like this:
-
- An indirect array for each page of unicode character
- Each array element has an indirect pointer to one of the bytes of
- the generated bitmask.
-*/
-
-#ifdef BUILD_MAP
-#include "iso/iso8859-2.h"
-#include "iso/iso8859-3.h"
-#include "iso/iso8859-4.h"
-#include "iso/iso8859-5.h"
-#include "iso/iso8859-6.h"
-#include "iso/iso8859-7.h"
-#include "iso/iso8859-8.h"
-#include "iso/iso8859-9.h"
-#include "iso/iso8859-10.h"
-#include "iso/iso8859-14.h"
-#include "iso/iso8859-15.h"
-#include "iso/koi8-r.h"
-#include "iso/koi8-u.h"
-#include "msft/cp932.h"
-#include "jis/shiftjis.h"
-
-static struct {
- unsigned short *table;
- char *name;
- int type; /* type of table */
- unsigned int bit; /* assigned bit */
-} tables[] = {
- { iso8859_2_table, "iso-8859-2", 0, 0} ,
- { iso8859_3_table, "iso-8859-3", 0, 0} ,
- { iso8859_4_table, "iso-8859-4", 0, 0},
- { iso8859_5_table, "iso-8859-5", 0, 0},
-/* apparently -6 has special digits? */
- { iso8859_6_table, "iso-8859-6", 0, 0},
- { iso8859_7_table, "iso-8859-7", 0, 0},
- { iso8859_8_table, "iso-8859-8", 0, 0},
- { iso8859_9_table, "iso-8859-9", 0, 0},
- { iso8859_10_table, "iso-8859-10", 0, 0},
- { iso8859_14_table, "iso-8859-14", 0, 0},
- { iso8859_15_table, "iso-8859-15", 0, 0},
- { koi8_r_table, "koi8-r", 0, 0},
- { koi8_u_table, "koi8-u", 0, 0},
- { cp932_table, "CP932", 1, 0},
- { sjis_table, "Shift-JIS", 1, 0},
- { 0, 0}
-};
-
-unsigned int encoding_map[256 * 256];
-
-static void
-add_bigmap(unsigned short **table, int bit)
-{
- int i;
- int j;
-
- for (i=0;i<256;i++) {
- unsigned short *tab = table[i];
- if (tab) {
- for (j=0;j<256;j++) {
- if (tab[j])
- encoding_map[tab[j]] |= bit;
- }
- }
- }
-}
-
-main()
-{
- int i, j;
- unsigned short *tab;
- int max, min;
- int bit = 0x01;
- int k;
- int bytes;
-
-#if 0
- /* iso-latin-1 (not needed-detected in code) */
- for (i=0;i<256;i++) {
- encoding_map[i] |= bit;
- }
- bit <<= 1;
-#endif
-
- /* dont count the terminator */
- bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8;
-
- /* the other latin charsets */
- for (j=0;tables[j].table;j++) {
- switch (tables[j].type) {
- case 0: /* table from 128-256 */
- tab = tables[j].table;
- for (i=0;i<128;i++) {
- /* 0-127 is the common */
- encoding_map[i] |= bit;
- encoding_map[tab[i]] |= bit;
- }
- break;
- case 1: /* sparse table */
- add_bigmap(tables[j].table, bit);
- break;
- }
- tables[j].bit = bit;
- bit <<= 1;
- }
-
- printf("/* This file is automatically generated: DO NOT EDIT */\n\n");
-
- for (i=0;i<256;i++) {
- /* first, do we need this block? */
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
- break;
- }
- if (j < 256) {
- /* yes, dump it */
- printf("static unsigned char m%02x%x[256] = {\n\t", i, k);
- for (j=0;j<256;j++) {
- printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff );
- if (((j+1)&7) == 0 && j<255)
- printf("\n\t");
- }
- printf("\n};\n\n");
- }
- }
- }
-
- printf("struct {\n");
- for (k=0;k<bytes;k++) {
- printf("\tunsigned char *bits%d;\n", k);
- }
- printf("} camel_charmap[256] = {\n\t");
- for (i=0;i<256;i++) {
- /* first, do we need this block? */
- printf("{ ");
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
- break;
- }
- if (j < 256) {
- printf("m%02x%x, ", i, k);
- } else {
- printf("0, ");
- }
- }
- printf("}, ");
- if (((i+1)&7) == 0 && i<255)
- printf("\n\t");
- }
- printf("\n};\n\n");
-
- printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
- for (j=0;tables[j].table;j++) {
- printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
- }
- printf("};\n\n");
-
- printf("#define charset_mask(x) \\\n");
- for (k=0;k<bytes;k++) {
- if (k!=0)
- printf("\t| ");
- else
- printf("\t");
- printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8);
- if (k<bytes-1)
- printf("\t\\\n");
- }
- printf("\n\n");
-
-}
-
-#else
-
-#include "camel-charset-map.h"
-#include "camel-charset-map-private.h"
-#include <unicode.h>
-#include <glib.h>
-
-unsigned int
-camel_charset_mask(unsigned int c)
-{
- if (c>0xffff)
- return 0;
-
- return charset_mask(c);
-}
-
-/* gets the best charset from the mask of chars in it */
-const char *
-camel_charset_best_mask(unsigned int mask)
-{
- int i;
-
- for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) {
- if (camel_charinfo[i].bit & mask)
- return camel_charinfo[i].name;
- }
- return "UTF-8";
-}
-
-/* finds the minimum charset for this string NULL means US-ASCII */
-const char *
-camel_charset_best(const char *in, int len)
-{
- unsigned int mask = ~0;
- int level = 0;
- const char *inptr = in, *inend = in+len;
-
- /* check what charset a given string will fit in */
- while (inptr < inend) {
- unicode_char_t c;
- const char *newinptr;
- newinptr = unicode_get_utf8(inptr, &c);
- if (newinptr == NULL) {
- inptr++;
- continue;
- }
- inptr = newinptr;
- if (c<=0xffff) {
- mask |= camel_charset_mask(c);
-
- if (c>=128 && c<256)
- level = MAX(level, 1);
- else if (c>=256)
- level = MAX(level, 2);
- } else {
- mask = 0;
- level = MAX(level, 2);
- }
- }
-
- if (level == 1)
- return "ISO-8859-1";
- else if (level == 2)
- return camel_charset_best_mask(mask);
- else
- return NULL;
-}
-
-
-#endif /* !BUILD_MAP */
-