aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-charset-map.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-charset-map.c')
-rw-r--r--camel/camel-charset-map.c447
1 files changed, 0 insertions, 447 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c
deleted file mode 100644
index 02ea31a44c..0000000000
--- a/camel/camel-charset-map.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; -*- */
-
-/*
- * Authors:
- * Michael Zucchi <notzed@ximian.com>
- * Dan Winship <danw@ximian.com>
- *
- * Copyright 2000, 2001 Ximian, Inc. (www.ximian.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-
-/*
- if you want to build the charset map, compile this with something like:
- gcc -DBUILD_MAP camel-charset-map.c `glib-config --cflags`
- (plus any -I/-L/-l flags you need for iconv), then run it as
- ./a.out > camel-charset-map-private.h
-
- Note that the big-endian variant isn't tested...
-
- The tables genereated work like this:
-
- An indirect array for each page of unicode character
- Each array element has an indirect pointer to one of the bytes of
- the generated bitmask.
-*/
-
-#ifdef BUILD_MAP
-#include <iconv.h>
-#include <glib.h>
-
-static struct {
- char *name;
- unsigned int bit; /* assigned bit */
-} tables[] = {
- /* These are the 8bit character sets (other than iso-8859-1,
- * which is special-cased) which are supported by both other
- * mailers and the GNOME environment. Note that the order
- * they're listed in is the order they'll be tried in, so put
- * the more-popular ones first.
- */
- { "iso-8859-2", 0 }, /* Central/Eastern European */
- { "iso-8859-4", 0 }, /* Baltic */
- { "koi8-r", 0 }, /* Russian */
- { "windows-1251", 0 }, /* Russian */
- { "koi8-u", 0 }, /* Ukranian */
- { "iso-8859-5", 0 }, /* Least-popular Russian encoding */
- { "iso-8859-7", 0 }, /* Greek */
- { "iso-8859-9", 0 }, /* Turkish */
- { "iso-8859-13", 0 }, /* Baltic again */
- { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most
- * programs that support this support UTF8
- */
- { 0, 0 }
-};
-
-unsigned int encoding_map[256 * 256];
-
-#if G_BYTE_ORDER == G_BIG_ENDIAN
-#define UCS "UCS-4BE"
-#else
-#define UCS "UCS-4LE"
-#endif
-
-void main(void)
-{
- int i, j;
- int max, min;
- int bit = 0x01;
- int k;
- int bytes;
- iconv_t cd;
- char in[128];
- guint32 out[128];
- char *inptr, *outptr;
- size_t inlen, outlen;
-
- /* dont count the terminator */
- bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8;
-
- for (i = 0; i < 128; i++)
- in[i] = i + 128;
-
- for (j = 0; tables[j].name; j++) {
- cd = iconv_open (UCS, tables[j].name);
- inptr = in;
- outptr = (char *)(out);
- inlen = sizeof (in);
- outlen = sizeof (out);
- while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) {
- if (errno == EILSEQ) {
- inptr++;
- inlen--;
- } else {
- printf ("%s\n", strerror (errno));
- exit (1);
- }
- }
- iconv_close (cd);
-
- for (i = 0; i < 128 - outlen / 4; i++) {
- encoding_map[i] |= bit;
- encoding_map[out[i]] |= bit;
- }
-
- tables[j].bit = bit;
- bit <<= 1;
- }
-
- printf("/* This file is automatically generated: DO NOT EDIT */\n\n");
-
- for (i=0;i<256;i++) {
- /* first, do we need this block? */
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
- break;
- }
- if (j < 256) {
- /* yes, dump it */
- printf("static unsigned char m%02x%x[256] = {\n\t", i, k);
- for (j=0;j<256;j++) {
- printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff );
- if (((j+1)&7) == 0 && j<255)
- printf("\n\t");
- }
- printf("\n};\n\n");
- }
- }
- }
-
- printf("struct {\n");
- for (k=0;k<bytes;k++) {
- printf("\tunsigned char *bits%d;\n", k);
- }
- printf("} camel_charmap[256] = {\n\t");
- for (i=0;i<256;i++) {
- /* first, do we need this block? */
- printf("{ ");
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
- break;
- }
- if (j < 256) {
- printf("m%02x%x, ", i, k);
- } else {
- printf("0, ");
- }
- }
- printf("}, ");
- if (((i+1)&7) == 0 && i<255)
- printf("\n\t");
- }
- printf("\n};\n\n");
-
- printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
- for (j=0;tables[j].name;j++) {
- printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
- }
- printf("};\n\n");
-
- printf("#define charset_mask(x) \\\n");
- for (k=0;k<bytes;k++) {
- if (k!=0)
- printf("\t| ");
- else
- printf("\t");
- printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8);
- if (k<bytes-1)
- printf("\t\\\n");
- }
- printf("\n\n");
-
-}
-
-#else
-
-#include "camel-charset-map.h"
-#include "camel-charset-map-private.h"
-#include "hash-table-utils.h"
-#include <gal/unicode/gunicode.h>
-#include <locale.h>
-#include <string.h>
-#include <ctype.h>
-#include <glib.h>
-#ifdef ENABLE_THREADS
-#include <pthread.h>
-#endif
-
-
-#ifdef ENABLE_THREADS
-static pthread_mutex_t iconv_charsets_lock = PTHREAD_MUTEX_INITIALIZER;
-#define ICONV_CHARSETS_LOCK() pthread_mutex_lock (&iconv_charsets_lock)
-#define ICONV_CHARSETS_UNLOCK() pthread_mutex_unlock (&iconv_charsets_lock)
-#else
-#define ICONV_CHARSETS_LOCK()
-#define ICONV_CHARSETS_UNLOCK()
-#endif /* ENABLE_THREADS */
-
-static GHashTable *iconv_charsets = NULL;
-static char *locale_charset = NULL;
-
-struct {
- char *charset;
- char *iconv_name;
-} known_iconv_charsets[] = {
- /* charset name, iconv-friendly charset name */
- { "iso-8859-1", "iso-8859-1" },
- { "iso8859-1", "iso-8859-1" },
- /* the above mostly serves as an example for iso-style charsets,
- but we have code that will populate the iso-*'s if/when they
- show up in camel_charset_map_to_iconv() so I'm
- not going to bother putting them all in here... */
- { "windows-cp1251", "cp1251" },
- { "windows-1251", "cp1251" },
- { "cp1251", "cp1251" },
- /* the above mostly serves as an example for windows-style
- charsets, but we have code that will parse and convert them
- to their cp#### equivalents if/when they show up in
- camel_charset_map_to_iconv() so I'm not going to bother
- putting them all in here... */
- { "ks_c_5601-1987", "euc-kr" },
- { NULL, NULL }
-};
-
-
-static void
-shutdown_foreach (gpointer key, gpointer value, gpointer data)
-{
- g_free (key);
- g_free (value);
-}
-
-static void
-camel_charset_map_shutdown (void)
-{
- g_hash_table_foreach (iconv_charsets, shutdown_foreach, NULL);
- g_hash_table_destroy (iconv_charsets);
- g_free (locale_charset);
-}
-
-void
-camel_charset_map_init (void)
-{
- char *locale;
- int i;
-
- if (iconv_charsets)
- return;
-
- iconv_charsets = g_hash_table_new (g_strcase_hash, g_strcase_equal);
- for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
- g_hash_table_insert (iconv_charsets, g_strdup (known_iconv_charsets[i].charset),
- g_strdup (known_iconv_charsets[i].iconv_name));
- }
-
- locale = setlocale (LC_ALL, NULL);
-
- if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
- /* The locale "C" or "POSIX" is a portable locale; its
- * LC_CTYPE part corresponds to the 7-bit ASCII character
- * set.
- */
-
- locale_charset = NULL;
- } else {
- /* A locale name is typically of the form language[_terri-
- * tory][.codeset][@modifier], where language is an ISO 639
- * language code, territory is an ISO 3166 country code, and
- * codeset is a character set or encoding identifier like
- * ISO-8859-1 or UTF-8.
- */
- char *p;
- int len;
-
- p = strchr (locale, '@');
- len = p ? (p - locale) : strlen (locale);
- if ((p = strchr (locale, '.'))) {
- locale_charset = g_strndup (p + 1, len - (p - locale) + 1);
- g_strdown (locale_charset);
- }
- }
-
- g_atexit (camel_charset_map_shutdown);
-}
-
-void
-camel_charset_init (CamelCharset *c)
-{
- c->mask = ~0;
- c->level = 0;
-}
-
-void
-camel_charset_step (CamelCharset *c, const char *in, int len)
-{
- register unsigned int mask;
- register int level;
- const char *inptr = in, *inend = in+len;
-
- mask = c->mask;
- level = c->level;
-
- /* check what charset a given string will fit in */
- while (inptr < inend) {
- gunichar c;
- const char *newinptr;
- newinptr = g_utf8_next_char(inptr);
- c = g_utf8_get_char(inptr);
- if (newinptr == NULL || !g_unichar_validate (c)) {
- inptr++;
- continue;
- }
-
- inptr = newinptr;
- if (c<=0xffff) {
- mask &= charset_mask(c);
-
- if (c>=128 && c<256)
- level = MAX(level, 1);
- else if (c>=256)
- level = MAX(level, 2);
- } else {
- mask = 0;
- level = MAX(level, 2);
- }
- }
-
- c->mask = mask;
- c->level = level;
-}
-
-/* gets the best charset from the mask of chars in it */
-static const char *
-camel_charset_best_mask(unsigned int mask)
-{
- int i;
-
- for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) {
- if (camel_charinfo[i].bit & mask)
- return camel_charinfo[i].name;
- }
- return "UTF-8";
-}
-
-const char *
-camel_charset_best_name (CamelCharset *charset)
-{
- if (charset->level == 1)
- return "ISO-8859-1";
- else if (charset->level == 2)
- return camel_charset_best_mask (charset->mask);
- else
- return NULL;
-
-}
-
-/* finds the minimum charset for this string NULL means US-ASCII */
-const char *
-camel_charset_best (const char *in, int len)
-{
- CamelCharset charset;
-
- camel_charset_init (&charset);
- camel_charset_step (&charset, in, len);
- return camel_charset_best_name (&charset);
-}
-
-const char *
-camel_charset_locale_name (void)
-{
- return locale_charset;
-}
-
-const char *
-camel_charset_to_iconv (const char *name)
-{
- const char *charset;
-
- if (name == NULL)
- return NULL;
-
- /* special-case hack... */
- if (!g_strcasecmp (name, "x-unknown"))
- return locale_charset ? locale_charset : "iso-8859-1";
-
- ICONV_CHARSETS_LOCK ();
- charset = g_hash_table_lookup (iconv_charsets, name);
- if (!charset) {
- /* Attempt to friendlyify the charset */
- char *new_charset, *p;
- int len;
-
- if (!g_strncasecmp (name, "iso", 3) && name[3] != '-' && name[3] != '_') {
- /* Hack to convert charsets like ISO8859-1 to iconv-friendly ISO-8859-1 */
- len = strlen (name);
- new_charset = g_malloc (len + 2);
- memcpy (new_charset, name, 3);
- new_charset[3] = '-';
- memcpy (new_charset + 4, name + 3, len - 3);
- new_charset[len + 1] = '\0';
- } else if (!g_strncasecmp (name, "windows-", 8)) {
- /* Convert charsets like windows-1251 and windows-cp1251 to iconv-friendly cp1251 */
- new_charset = (char *) name + 8;
- if (!g_strncasecmp (new_charset, "cp", 2))
- new_charset += 2;
-
- for (p = new_charset; *p && isdigit ((unsigned) *p); p++);
- if (*p == '\0')
- new_charset = g_strdup_printf ("cp%s", new_charset);
- else
- new_charset = g_strdup (name);
- } else {
- /* *shrug* - add it to the hash table just the way it is? */
- new_charset = g_strdup (name);
- }
-
- g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset);
- charset = new_charset;
- }
- ICONV_CHARSETS_UNLOCK ();
-
- return charset;
-}
-
-#endif /* !BUILD_MAP */
-