diff options
author | Jeffrey Stedfast <fejj@ximian.com> | 2003-02-21 05:04:19 +0800 |
---|---|---|
committer | Jeffrey Stedfast <fejj@src.gnome.org> | 2003-02-21 05:04:19 +0800 |
commit | abada7e2cd02933caa7a2643c0771b3ee7a63cfe (patch) | |
tree | def1c8730c5190323d408d2058a837da47bdc1fb /camel/camel-charset-map.c | |
parent | 08c781ab889611a7b9ca1605b36361032cc80f7d (diff) | |
download | gsoc2013-evolution-abada7e2cd02933caa7a2643c0771b3ee7a63cfe.tar.gz gsoc2013-evolution-abada7e2cd02933caa7a2643c0771b3ee7a63cfe.tar.zst gsoc2013-evolution-abada7e2cd02933caa7a2643c0771b3ee7a63cfe.zip |
Call camel_iconv_init(). (camel_shutdown): Call camel_iconv_shutdown().
2003-02-20 Jeffrey Stedfast <fejj@ximian.com>
* camel.c (camel_init): Call camel_iconv_init().
(camel_shutdown): Call camel_iconv_shutdown().
* camel-sasl-digest-md5.c (digest_response): Updated to use
camel-iconv and the new camel-charset-map functions.
* camel-mime-utils.c: Updated to use camel-iconv and the new
camel-charset-map functions.
* camel-mime-part-utils.c (check_html_charset): Use
camel_charset_canonical_name() instead of e_iconv_charset_name()
which is longer available.
(convert_buffer): Use camel-iconv.
(simple_data_wrapper_construct_from_parser): Since
camel_charset_iso_to_windows() returns the charset in it's
canonical format, no need to re-canonicalise it.
* camel-mime-part.c (process_header): Use
camel_charset_canonical_name() instead of e_iconv_charset_name()
which is longer available.
* camel-mime-message.c (process_header): Use
camel_charset_canonical_name() instead of e_iconv_charset_name()
which is longer available.
* camel-mime-filter-charset.c: Use camel-iconv.
* camel-folder-summary.c (message_info_new): Use
camel_charset_canonical_name() instead of e_iconv_charset_name()
which is longer available.
(content_info_new): Use camel_charset_locale_name().
(camel_message_info_new_from_header): Same as message_info_new().
* camel-search-private.c: Use g_alloca() instead of alloca().
* camel-filter-search.c (check_header): Use
camel_charset_canonical_name() instead of e_iconv_charset_name()
which is longer available.
* camel-charset-map.c (camel_charset_locale_name): New function,
replaces e_iconv_locale_charset().
(camel_charset_canonical_name): New function, similar to
e_iconv_charset_name() but instead of returning the iconv-friendly
name, it returns the canonical name. (g_iconv will do the
iconv-friendly name conversions for us).
svn path=/trunk/; revision=19977
Diffstat (limited to 'camel/camel-charset-map.c')
-rw-r--r-- | camel/camel-charset-map.c | 189 |
1 files changed, 181 insertions, 8 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c index be57d882e8..5fcd490dde 100644 --- a/camel/camel-charset-map.c +++ b/camel/camel-charset-map.c @@ -3,9 +3,10 @@ /* * Authors: * Michael Zucchi <notzed@ximian.com> + * Jeffrey Stedfast <fejj@ximian.com> * Dan Winship <danw@ximian.com> * - * Copyright 2000, 2001 Ximian, Inc. (www.ximian.com) + * Copyright 2000, 2003 Ximian, Inc. (www.ximian.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -22,12 +23,15 @@ * USA */ + #ifdef HAVE_CONFIG_H #include <config.h> #endif -#include <errno.h> #include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> /* if you want to build the charset map, compile this with something like: @@ -200,16 +204,16 @@ int main (void) #include "camel-charset-map.h" #include "camel-charset-map-private.h" #include "string-utils.h" + +#include <glib.h> #include <glib/gunicode.h> #include <locale.h> -#include <string.h> #include <ctype.h> -#include <glib.h> #ifdef ENABLE_THREADS #include <pthread.h> #endif -#ifdef HAVE_ALLOCA_H -#include <alloca.h> +#ifdef HAVE_CODESET +#include <langinfo.h> #endif void @@ -295,9 +299,179 @@ camel_charset_best (const char *in, int len) } +#ifdef G_THREADS_ENABLED +static GStaticMutex lock = G_STATIC_MUTEX_INIT; +#define LOCK() g_static_mutex_lock(&lock) +#define UNLOCK() g_static_mutex_unlock(&lock) +#else +#define LOCK() +#define UNLOCK() +#endif + +static char *locale_charset = NULL; +static GHashTable *canon_charsets = NULL; + +static void +canon_charsets_init (int keep) +{ + char *locale; + + LOCK (); + + if (canon_charsets != NULL) { + if (!keep) + UNLOCK (); + return; + } + + canon_charsets = g_hash_table_new (g_str_hash, g_str_equal); + + locale = setlocale (LC_ALL, NULL); + + if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) { + /* The locale "C" or "POSIX" is a portable locale; its + * LC_CTYPE part corresponds to the 7-bit ASCII character + * set. + */ + + locale_charset = NULL; + } else { +#ifdef HAVE_CODESET + locale_charset = g_strdup (nl_langinfo (CODESET)); + g_ascii_strdown (locale_charset, -1); +#else + /* A locale name is typically of the form language[_terri- + * tory][.codeset][@modifier], where language is an ISO 639 + * language code, territory is an ISO 3166 country code, and + * codeset is a character set or encoding identifier like + * ISO-8859-1 or UTF-8. + */ + char *codeset, *p; + + codeset = strchr (locale, '.'); + if (codeset) { + codeset++; + + /* ; is a hack for debian systems and / is a hack for Solaris systems */ + for (p = codeset; *p && !strchr ("@;/", *p); p++) + ; + locale_charset = g_strndup (codeset, p - codeset); + g_ascii_strdown (locale_charset, -1); + } else { + /* charset unknown */ + locale_charset = NULL; + } +#endif + } + + if (!keep) + UNLOCK (); +} + + +/** + * camel_charset_locale_name: + * + * Returns the name of the system's locale charset. + **/ +const char * +camel_charset_locale_name (void) +{ + canon_charsets_init (FALSE); + + return locale_charset; +} + + +/** + * camel_charset_canonical_name: + * @charset: charset to canonicalise + * + * Returns the charset in its canonical format. This is currently only + * needed for iso charsets but also handles canonicalisation of + * windows charsets. May need to expand this to handle canincalisation + * of more charsets in the future? + **/ +const char * +camel_charset_canonical_name (const char *charset) +{ + char *name, *canon, *tmp; + + if (charset == NULL) + return NULL; + + name = g_alloca (strlen (charset)); + strcpy (name, charset); + g_ascii_strdown (name, -1); + + canon_charsets_init (TRUE); + canon = g_hash_table_lookup (canon_charsets, name); + if (canon != NULL) { + UNLOCK (); + return canon; + } + + /* Unknown, try canonicalise some basic charset types to something that should work */ + if (strncmp (name, "iso", 3) == 0) { + /* Convert iso-nnnn-n or isonnnn-n or iso_nnnn-n to iso-nnnn-n or isonnnn-n */ + int iso, codepage; + char *p; + + tmp = name + 3; + if (*tmp == '-' || *tmp == '_') + tmp++; + + iso = strtoul (tmp, &p, 10); + + if (iso == 10646) { + /* they all become iso-10646 */ + canon = g_strdup ("iso-10646"); + } else { + /* iso-8859-# */ + tmp = p; + if (*tmp == '-' || *tmp == '_') + tmp++; + + codepage = strtoul (tmp, &p, 10); + + if (p > tmp) { + /* codepage is numeric */ + canon = g_strdup_printf ("iso-%d-%d", iso, codepage); + } else { + /* codepage is a string - probably iso-2022-jp or something */ + canon = g_strdup_printf ("iso-%d-%s", iso, p); + } + } + } else if (strncmp (name, "windows-", 8) == 0) { + /* Convert windows-#### and windows-cp#### to windows-cp#### */ + tmp = name + 8; + if (!strncmp (tmp, "cp", 2)) + tmp += 2; + canon = g_strdup_printf ("windows-cp%s", tmp); + } else if (strncmp (name, "microsoft-", 10) == 0) { + /* Convert microsoft-#### or microsoft-cp#### to windows-cp#### */ + tmp = name + 10; + if (!strncmp (tmp, "cp", 2)) + tmp += 2; + canon = g_strdup_printf ("windows-cp%s", tmp); + } else if (strncmp (name, "cp125", 5) == 0) { + /* Convert cp125# to windows-cp#### */ + canon = g_strdup_printf ("windows-%s", name); + } else { + /* Just assume its ok enough as is, case and all */ + canon = g_strdup (charset); + } + + g_hash_table_insert (canon_charsets, g_strdup (name), canon); + UNLOCK (); + + return canon; +} + + /** * camel_charset_iso_to_windows: - * @isocharset: an ISO charset + * @isocharset: a canonicalised ISO charset * * Returns the equivalent Windows charset. **/ @@ -351,4 +525,3 @@ camel_charset_iso_to_windows (const char *isocharset) } #endif /* !BUILD_MAP */ - |