From 8a56749aa6d8544544e75a79971144fb0b29fd81 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Thu, 19 Jul 2001 21:00:27 +0000 Subject: Remove my iso8859-1 -> iso-8859-1 hack and use 2001-07-19 Jeffrey Stedfast * camel-mime-utils.c (rfc2047_decode_word): Remove my iso8859-1 -> iso-8859-1 hack and use camel_charset_get_iconv_friendly_name() instead. (rfc2184_decode): Use camel_charset_get_iconv_friendly_name() * camel.c (camel_init): Call camel_charset_map_init(). * camel-charset-map.c (camel_charset_map_init): New function to initialize the charset-equivalent lookup table. To be called by camel_init(). (camel_charset_get_iconv_friendly_name): New function to try and convert a charset into something that iconv is more likely to accept. svn path=/trunk/; revision=11235 --- camel/ChangeLog | 16 +++++++ camel/camel-charset-map.c | 107 ++++++++++++++++++++++++++++++++++++++++++++-- camel/camel-charset-map.h | 4 ++ camel/camel-mime-utils.c | 62 +++++++++++++-------------- camel/camel.c | 3 ++ 5 files changed, 156 insertions(+), 36 deletions(-) (limited to 'camel') diff --git a/camel/ChangeLog b/camel/ChangeLog index 413c00c913..f3c3f4136a 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,19 @@ +2001-07-19 Jeffrey Stedfast + + * camel-mime-utils.c (rfc2047_decode_word): Remove my iso8859-1 -> + iso-8859-1 hack and use camel_charset_get_iconv_friendly_name() + instead. + (rfc2184_decode): Use camel_charset_get_iconv_friendly_name() + + * camel.c (camel_init): Call camel_charset_map_init(). + + * camel-charset-map.c (camel_charset_map_init): New function to + initialize the charset-equivalent lookup table. To be called by + camel_init(). + (camel_charset_get_iconv_friendly_name): New function to try and + convert a charset into something that iconv is more likely to + accept. + 2001-07-19 Peter Williams * Revert 7/11/2001 patch for IMAP INBOX filtering at NotZed's diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c index 236197ef86..d609321997 100644 --- a/camel/camel-charset-map.c +++ b/camel/camel-charset-map.c @@ -197,19 +197,85 @@ void main(void) #include "camel-charset-map.h" #include "camel-charset-map-private.h" +#include "hash-table-utils.h" #include #include #include #include +#ifdef ENABLE_THREADS +#include +#endif + + +#ifdef ENABLE_THREADS +static pthread_mutex_t iconv_charsets_lock = PTHREAD_MUTEX_INITIALIZER; +#define ICONV_CHARSETS_LOCK() pthread_mutex_lock (&iconv_charsets_lock) +#define ICONV_CHARSETS_UNLOCK() pthread_mutex_unlock (&iconv_charsets_lock) +#else +#define ICONV_CHARSETS_LOCK() +#define ICONV_CHARSETS_UNLOCK() +#endif /* ENABLE_THREADS */ + +static GHashTable *iconv_charsets = NULL; + +struct { + char *charset; + char *iconv_name; +} known_iconv_charsets[] = { + /* charset name, iconv-friendly charset name */ + { "iso-8859-1", "iso-8859-1" }, + { "iso8859-1", "iso-8859-1" }, + /* the above mostly serves as an example for iso-style charsets, + but we have code that will populate the iso-*'s if/when they + show up in camel_charset_map_get_iconv_friendly_name() so I'm + not going to bother putting them all in here... */ + { "windows-cp1251", "cp1251" }, + { "windows-1251", "cp1251" }, + { "cp1251", "cp1251" }, + { NULL, NULL } +}; + + +static void +shutdown_foreach (gpointer key, gpointer value, gpointer data) +{ + g_free (key); + g_free (value); +} + +static void +camel_charset_map_shutdown (void) +{ + g_hash_table_foreach (iconv_charsets, shutdown_foreach, NULL); + g_hash_table_destroy (iconv_charsets); +} + +void +camel_charset_map_init (void) +{ + int i; + + if (iconv_charsets) + return; + + iconv_charsets = g_hash_table_new (g_strcase_hash, g_strcase_equal); + for (i = 0; known_iconv_charsets[i].charset != NULL; i++) { + g_hash_table_insert (iconv_charsets, g_strdup (known_iconv_charsets[i].charset), + g_strdup (known_iconv_charsets[i].iconv_name)); + } + + g_atexit (camel_charset_map_shutdown); +} -void camel_charset_init(CamelCharset *c) +void +camel_charset_init (CamelCharset *c) { c->mask = ~0; c->level = 0; } void -camel_charset_step(CamelCharset *c, const char *in, int len) +camel_charset_step (CamelCharset *c, const char *in, int len) { register unsigned int mask; register int level; @@ -260,7 +326,8 @@ camel_charset_best_mask(unsigned int mask) return "UTF-8"; } -const char *camel_charset_best_name(CamelCharset *charset) +const char * +camel_charset_best_name(CamelCharset *charset) { if (charset->level == 1) return "ISO-8859-1"; @@ -317,5 +384,39 @@ camel_charset_locale_name (void) return charset; } +const char * +camel_charset_get_iconv_friendly_name (const char *name) +{ + const char *charset; + + ICONV_CHARSETS_LOCK (); + charset = g_hash_table_lookup (iconv_charsets, name); + if (!charset) { + /* Attempt to friendlyify the charset */ + char *new_charset; + int len; + + /* Hack to convert charsets like ISO8859-1 to iconv-friendly ISO-8859-1 */ + if (!g_strncasecmp (name, "iso", 3) && name[3] != '-' && name[3] != '_') { + len = strlen (name); + new_charset = g_malloc (len + 2); + memcpy (new_charset, name, 3); + new_charset[3] = '-'; + memcpy (new_charset + 4, name + 3, len - 3); + new_charset[len + 1] = '\0'; + g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset); + } else { + /* *shrug* - add it to the hash table just the way it is? */ + new_charset = g_strdup (name); + g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset); + } + + charset = new_charset; + } + ICONV_CHARSETS_UNLOCK (); + + return charset; +} + #endif /* !BUILD_MAP */ diff --git a/camel/camel-charset-map.h b/camel/camel-charset-map.h index 54d62f8f1e..47b3cc0cab 100644 --- a/camel/camel-charset-map.h +++ b/camel/camel-charset-map.h @@ -28,6 +28,8 @@ struct _CamelCharset { int level; }; +void camel_charset_map_init (void); + void camel_charset_init(CamelCharset *); void camel_charset_step(CamelCharset *, const char *in, int len); const char *camel_charset_best_name(CamelCharset *); @@ -37,4 +39,6 @@ const char *camel_charset_best(const char *in, int len); char *camel_charset_locale_name (void); +const char *camel_charset_get_iconv_friendly_name (const char *name); + #endif /* ! _CAMEL_CHARSET_MAP_H */ diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index 357c183f02..ba704b19dc 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -904,6 +904,7 @@ rfc2047_decode_word(const char *in, int len) const char *inptr = in+2; const char *inend = in+len-2; const char *inbuf; + const char *charset; char *encname; int tmplen; int ret; @@ -949,19 +950,12 @@ rfc2047_decode_word(const char *in, int len) if (inlen > 0) { /* yuck, all this snot is to setup iconv! */ tmplen = inptr - in - 3; - encname = alloca (tmplen + 2); - - /* Hack to convert charsets like ISO8859-1 to iconv-friendly ISO-8859-1 */ - if (!g_strncasecmp (in + 2, "iso", 3) && *(in + 5) != '-') { - memcpy (encname, in + 2, 3); - encname[3] = '-'; - memcpy (encname + 4, in + 5, tmplen - 3); - tmplen++; - } else { - memcpy (encname, in + 2, tmplen); - } + encname = alloca (tmplen + 1); + memcpy (encname, in + 2, tmplen); encname[tmplen] = '\0'; + charset = camel_charset_get_iconv_friendly_name (encname); + inbuf = decword; outlen = inlen * 6 + 16; @@ -969,27 +963,27 @@ rfc2047_decode_word(const char *in, int len) outbuf = outbase; /* TODO: Should this cache iconv converters? */ - ic = iconv_open ("UTF-8", encname); + ic = iconv_open ("UTF-8", charset); if (ic != (iconv_t)-1) { ret = iconv (ic, &inbuf, &inlen, &outbuf, &outlen); - if (ret>=0) { + if (ret >= 0) { iconv (ic, NULL, 0, &outbuf, &outlen); *outbuf = 0; decoded = g_strdup (outbase); } iconv_close (ic); } else { - w(g_warning("Cannot decode charset, header display may be corrupt: %s: %s", - encname, strerror(errno))); + w(g_warning ("Cannot decode charset, header display may be corrupt: %s: %s", + charset, g_strerror (errno))); /* TODO: Should this do this, or just leave the encoded strings? */ decword[inlen] = 0; - decoded = g_strdup(decword); + decoded = g_strdup (decword); } } } - + d(printf("decoded '%s'\n", decoded)); - + return decoded; } @@ -1166,16 +1160,16 @@ rfc2047_encode_word(GString *outstring, const char *in, int len, const char *typ d(printf("Converting [%d] '%.*s' to %s\n", len, len, in, type)); /* convert utf8->encoding */ - bufflen = len*6+16; - buffer = alloca(bufflen); + bufflen = len * 6 + 16; + buffer = alloca (bufflen); inlen = len; inptr = in; - - ascii = alloca(bufflen); - - if (strcasecmp(type, "UTF-8") != 0) - ic = iconv_open(type, "UTF-8"); - + + ascii = alloca (bufflen); + + if (g_strcasecmp (type, "UTF-8") != 0) + ic = iconv_open (type, "UTF-8"); + while (inlen) { int convlen, i, proclen; @@ -1802,6 +1796,7 @@ rfc2184_decode (const char *in, int len) { const char *inptr = in; const char *inend = in + len; + const char *charset; char *decoded = NULL; char *encoding; @@ -1810,11 +1805,12 @@ rfc2184_decode (const char *in, int len) return NULL; encoding = g_strndup (in, inptr - in); + charset = camel_charset_get_iconv_friendly_name (encoding); + g_free (encoding); + inptr = memchr (inptr + 1, '\'', inend - inptr - 1); - if (!inptr) { - g_free (encoding); + if (!inptr) return NULL; - } inptr++; if (inptr < inend) { @@ -1825,14 +1821,14 @@ rfc2184_decode (const char *in, int len) inbuf = decword = hex_decode (inptr, inend - inptr); inlen = strlen (inbuf); - - ic = iconv_open ("UTF-8", encoding); + + ic = iconv_open ("UTF-8", charset); if (ic != (iconv_t) -1) { int ret; - + outlen = inlen * 6 + 16; outbuf = outbase = g_malloc (outlen); - + ret = iconv (ic, &inbuf, &inlen, &outbuf, &outlen); if (ret >= 0) { iconv (ic, NULL, 0, &outbuf, &outlen); diff --git a/camel/camel.c b/camel/camel.c index aaf31ca775..4a7c8fd7bf 100644 --- a/camel/camel.c +++ b/camel/camel.c @@ -35,6 +35,7 @@ #endif /* HAVE_NSS */ #include "camel.h" +#include "camel-charset-map.h" gboolean camel_verbose_debug = FALSE; @@ -62,6 +63,8 @@ camel_init (const char *configdir, gboolean nss_init) if (getenv ("CAMEL_VERBOSE_DEBUG")) camel_verbose_debug = TRUE; + camel_charset_map_init (); + #ifdef HAVE_NSS if (nss_init) { PR_Init (PR_SYSTEM_THREAD, PR_PRIORITY_NORMAL, 10); -- cgit