diff options
author | Jeffrey Stedfast <fejj@ximian.com> | 2003-07-09 02:36:40 +0800 |
---|---|---|
committer | Jeffrey Stedfast <fejj@src.gnome.org> | 2003-07-09 02:36:40 +0800 |
commit | 11b8f83aadaf2c49a7a29721fde80ac2015b2484 (patch) | |
tree | 374e7f8aa0a92e3097b20d6f4d04f945fb436464 /camel/camel-iconv.c | |
parent | a58c26d7a588f6822469072b68aa38319175f588 (diff) | |
download | gsoc2013-evolution-11b8f83aadaf2c49a7a29721fde80ac2015b2484.tar.gz gsoc2013-evolution-11b8f83aadaf2c49a7a29721fde80ac2015b2484.tar.zst gsoc2013-evolution-11b8f83aadaf2c49a7a29721fde80ac2015b2484.zip |
Updated (new copy/paste from e-iconv).
2003-07-08 Jeffrey Stedfast <fejj@ximian.com>
* camel-iconv.c: Updated (new copy/paste from e-iconv).
* camel-block-file.c (camel_block_file_get_block): Use
camel_read() rather than libc read.
* camel-tcp-stream-raw.c (stream_read): Use camel_read().
(stream_write): Use camel_write().
* camel-stream-fs.c (stream_read): Use camel_read().
(stream_write): Use camel_write().
svn path=/trunk/; revision=21759
Diffstat (limited to 'camel/camel-iconv.c')
-rw-r--r-- | camel/camel-iconv.c | 268 |
1 files changed, 180 insertions, 88 deletions
diff --git a/camel/camel-iconv.c b/camel/camel-iconv.c index 3f1708c18b..fee061a2b9 100644 --- a/camel/camel-iconv.c +++ b/camel/camel-iconv.c @@ -55,12 +55,25 @@ static GStaticMutex lock = G_STATIC_MUTEX_INIT; #endif +typedef struct _EDListNode { + struct _EDListNode *next; + struct _EDListNode *prev; +} EDListNode; + +typedef struct _EDList { + struct _EDListNode *head; + struct _EDListNode *tail; + struct _EDListNode *tailpred; +} EDList; + +#define E_DLIST_INITIALISER(l) { (EDListNode *)&l.tail, 0, (EDListNode *)&l.head } + struct _iconv_cache_node { struct _iconv_cache_node *next; struct _iconv_cache_node *prev; - + struct _iconv_cache *parent; - + int busy; iconv_t cd; }; @@ -68,13 +81,13 @@ struct _iconv_cache_node { struct _iconv_cache { struct _iconv_cache *next; struct _iconv_cache *prev; - + char *conv; - + EDList open; /* stores iconv_cache_nodes, busy ones up front */ }; -#define ICONV_CACHE_SIZE (16) +#define CAMEL_ICONV_CACHE_SIZE (16) static EDList iconv_cache_list; static GHashTable *iconv_cache; @@ -91,62 +104,136 @@ struct { } known_iconv_charsets[] = { #if 0 /* charset name, iconv-friendly charset name */ - { "iso-8859-1", "iso-8859-1" }, - { "iso8859-1", "iso-8859-1" }, + { "iso-8859-1", "iso-8859-1" }, + { "iso8859-1", "iso-8859-1" }, /* the above mostly serves as an example for iso-style charsets, but we have code that will populate the iso-*'s if/when they - show up in camel_iconv_charset_name() so I'm + show up in e_iconv_charset_name() so I'm not going to bother putting them all in here... */ - { "windows-cp1251", "cp1251" }, - { "windows-1251", "cp1251" }, - { "cp1251", "cp1251" }, + { "windows-cp1251", "cp1251" }, + { "windows-1251", "cp1251" }, + { "cp1251", "cp1251" }, /* the above mostly serves as an example for windows-style charsets, but we have code that will parse and convert them to their cp#### equivalents if/when they show up in - camel_iconv_charset_name() so I'm not going to bother + e_iconv_charset_name() so I'm not going to bother putting them all in here either... */ #endif /* charset name (lowercase!), iconv-friendly name (sometimes case sensitive) */ - { "utf-8", "UTF-8" }, - { "utf8", "UTF-8" }, + { "utf-8", "UTF-8" }, /* 10646 is a special case, its usually UCS-2 big endian */ /* This might need some checking but should be ok for solaris/linux */ - { "iso-10646-1", "UCS-2BE" }, - { "iso_10646-1", "UCS-2BE" }, - { "iso10646-1", "UCS-2BE" }, - { "iso-10646", "UCS-2BE" }, - { "iso_10646", "UCS-2BE" }, - { "iso10646", "UCS-2BE" }, - - /* "ks_c_5601-1987" seems to be the most common of this lot */ - { "ks_c_5601-1987", "EUC-KR" }, - { "5601", "EUC-KR" }, - { "ksc-5601", "EUC-KR" }, - { "ksc-5601-1987", "EUC-KR" }, - { "ksc-5601_1987", "EUC-KR" }, + { "iso-10646-1", "UCS-2BE" }, + { "iso_10646-1", "UCS-2BE" }, + { "iso10646-1", "UCS-2BE" }, + { "iso-10646", "UCS-2BE" }, + { "iso_10646", "UCS-2BE" }, + { "iso10646", "UCS-2BE" }, + + { "ks_c_5601-1987", "EUC-KR" }, /* FIXME: Japanese/Korean/Chinese stuff needs checking */ - { "euckr-0", "EUC-KR" }, - { "5601", "EUC-KR" }, - { "big5-0", "BIG5" }, - { "big5.eten-0", "BIG5" }, - { "big5hkscs-0", "BIG5HKCS" }, - { "gb2312-0", "gb2312" }, - { "gb2312.1980-0", "gb2312" }, - { "euc-cn", "gb2312" }, - { "gb18030-0", "gb18030" }, - { "gbk-0", "GBK" }, - - { "eucjp-0", "eucJP" }, /* should this map to "EUC-JP" instead? */ - { "ujis-0", "ujis" }, /* we might want to map this to EUC-JP */ - { "jisx0208.1983-0", "SJIS" }, - { "jisx0212.1990-0", "SJIS" }, - { "pck", "SJIS" }, - { NULL, NULL } + { "euckr-0", "EUC-KR" }, + { "5601", "EUC-KR" }, + { "big5-0", "BIG5" }, + { "big5.eten-0", "BIG5" }, + { "big5hkscs-0", "BIG5HKSCS" }, + { "gb2312-0", "gb2312" }, + { "gb2312.1980-0", "gb2312" }, + { "gb-2312", "gb2312" }, + { "gb18030-0", "gb18030" }, + { "gbk-0", "GBK" }, + + { "eucjp-0", "eucJP" }, + { "ujis-0", "ujis" }, + { "jisx0208.1983-0","SJIS" }, + { "jisx0212.1990-0","SJIS" }, + { "pck", "SJIS" }, + { NULL, NULL } }; + +/* Another copy of this trivial list implementation + Why? This stuff gets called a lot (potentially), should run fast, + and g_list's are f@@#$ed up to make this a hassle */ +static void +e_dlist_init (EDList *v) +{ + v->head = (EDListNode *) &v->tail; + v->tail = 0; + v->tailpred = (EDListNode *) &v->head; +} + +static EDListNode * +e_dlist_addhead (EDList *l, EDListNode *n) +{ + n->next = l->head; + n->prev = (EDListNode *) &l->head; + l->head->prev = n; + l->head = n; + return n; +} + +static EDListNode * +e_dlist_addtail (EDList *l, EDListNode *n) +{ + n->next = (EDListNode *) &l->tail; + n->prev = l->tailpred; + l->tailpred->next = n; + l->tailpred = n; + return n; +} + +static EDListNode * +e_dlist_remove (EDListNode *n) +{ + n->next->prev = n->prev; + n->prev->next = n->next; + return n; +} + + +static void +locale_parse_lang (const char *locale) +{ + char *codeset, *lang; + + if ((codeset = strchr (locale, '.'))) + lang = g_strndup (locale, codeset - locale); + else + lang = g_strdup (locale); + + /* validate the language */ + if (strlen (lang) >= 2) { + if (lang[2] == '-' || lang[2] == '_') { + /* canonicalise the lang */ + camel_strdown (lang); + + /* validate the country code */ + if (strlen (lang + 3) > 2) { + /* invalid country code */ + lang[2] = '\0'; + } else { + lang[2] = '-'; + e_strup (lang + 3); + } + } else if (lang[2] != '\0') { + /* invalid language */ + g_free (lang); + lang = NULL; + } + + locale_lang = lang; + } else { + /* invalid language */ + locale_lang = NULL; + g_free (lang); + } +} + + /** * camel_iconv_init: * @@ -156,14 +243,14 @@ struct { static void camel_iconv_init (int keep) { - char *from, *to; + char *from, *to, *locale; int i; LOCK (); if (iconv_charsets != NULL) { if (!keep) - UNLOCK(); + UNLOCK (); return; } @@ -172,7 +259,7 @@ camel_iconv_init (int keep) for (i = 0; known_iconv_charsets[i].charset != NULL; i++) { from = g_strdup (known_iconv_charsets[i].charset); to = g_strdup (known_iconv_charsets[i].iconv_name); - e_strdown (from); + camel_strdown (from); g_hash_table_insert (iconv_charsets, from, to); } @@ -230,7 +317,7 @@ camel_iconv_init (int keep) * camel_iconv_charset_name: * @charset: charset name * - * Maps charset names to the names that glib's g_iconv_open() is more + * Maps charset names to the names that iconv_open() is more * likely able to handle. * * Returns an iconv-friendly name for @charset. @@ -253,9 +340,9 @@ camel_iconv_charset_name (const char *charset) return iname; } - /* Unknown, try to convert some basic charset types to something that should work */ - if (!strncmp (name, "iso", 3)) { - /* Convert iso-####-# or iso####-# or iso_####-# into the canonical form: iso-####-# */ + /* Unknown, try canonicalise some basic charset types to something that should work */ + if (strncmp (name, "iso", 3) == 0) { + /* Convert iso-####-# or iso####-# or iso_####-# to iso-####-# or iso####-# */ int iso, codepage; char *p; @@ -264,9 +351,10 @@ camel_iconv_charset_name (const char *charset) tmp++; iso = strtoul (tmp, &p, 10); + if (iso == 10646) { - /* they all become iso-10646 */ - ret = g_strdup ("iso-10646"); + /* they all become ICONV_10646 */ + iname = g_strdup (ICONV_10646); } else { tmp = p; if (*tmp == '-' || *tmp == '_') @@ -276,10 +364,15 @@ camel_iconv_charset_name (const char *charset) if (p > tmp) { /* codepage is numeric */ - ret = g_strdup_printf ("iso-%d-%d", iso, codepage); +#ifdef __aix__ + if (codepage == 13) + iname = g_strdup ("IBM-921"); + else +#endif /* __aix__ */ + iname = g_strdup_printf (ICONV_ISO_D_FORMAT, iso, codepage); } else { /* codepage is a string - probably iso-2022-jp or something */ - ret = g_strdup_printf ("iso-%d-%s", iso, p); + iname = g_strdup_printf (ICONV_ISO_S_FORMAT, iso, p); } } } else if (strncmp (name, "windows-", 8) == 0) { @@ -295,7 +388,7 @@ camel_iconv_charset_name (const char *charset) tmp += 2; iname = g_strdup_printf ("CP%s", tmp); } else { - /* Just assume its ok enough as is, case and all - let g_iconv_open() handle this */ + /* Just assume its ok enough as is, case and all */ iname = g_strdup (charset); } @@ -309,23 +402,26 @@ static void flush_entry (struct _iconv_cache *ic) { struct _iconv_cache_node *in, *nn; - + in = (struct _iconv_cache_node *) ic->open.head; nn = in->next; while (nn) { if (in->cd != (iconv_t) -1) { g_hash_table_remove (iconv_cache_open, in->cd); - g_iconv_close (in->cd); + iconv_close (in->cd); } + g_free (in); in = nn; nn = in->next; } + g_free (ic->conv); g_free (ic); } +/* This should run pretty quick, its called a lot */ /** * camel_iconv_open: * @to: charset to convert to @@ -343,27 +439,21 @@ flush_entry (struct _iconv_cache *ic) iconv_t camel_iconv_open (const char *to, const char *from) { - struct _iconv_cache_node *in; struct _iconv_cache *ic; + struct _iconv_cache_node *in; + int errnosav; iconv_t cd; char *key; - if (from == NULL || to == NULL) { + if (to == NULL || from == NULL) { errno = EINVAL; return (iconv_t) -1; } - if (!strcasecmp (from, "x-unknown")) - from = camel_iconv_locale_charset (); - - /* Even tho g_iconv_open will find the appropriate charset - * format(s) for the to/from charset strings (hahaha, yea - * right), we still convert them to their canonical format - * here so that our key is in a standard format */ - from = camel_iconv_charset_name (from); to = camel_iconv_charset_name (to); - key = g_alloca (strlen (from) + strlen (to) + 2); - sprintf (key, "%s:%s", from, to); + from = camel_iconv_charset_name (from); + key = g_alloca (strlen (to) + strlen (from) + 2); + sprintf (key, "%s:%s", to, from); LOCK (); @@ -371,15 +461,15 @@ camel_iconv_open (const char *to, const char *from) if (ic) { e_dlist_remove ((EDListNode *) ic); } else { - struct _iconv_cache *last = (struct _iconv_cache *)iconv_cache_list.tailpred; + struct _iconv_cache *last = (struct _iconv_cache *) iconv_cache_list.tailpred; struct _iconv_cache *prev; prev = last->prev; - while (prev && iconv_cache_size > ICONV_CACHE_SIZE) { + while (prev && iconv_cache_size > CAMEL_ICONV_CACHE_SIZE) { in = (struct _iconv_cache_node *) last->open.head; if (in->next && !in->busy) { d(printf ("Flushing iconv converter '%s'\n", last->conv)); - e_dlist_remove ((EDListNode *)last); + e_dlist_remove ((EDListNode *) last); g_hash_table_remove (iconv_cache, last->conv); flush_entry (last); iconv_cache_size--; @@ -390,20 +480,19 @@ camel_iconv_open (const char *to, const char *from) iconv_cache_size++; - ic = g_new (struct _iconv_cache, 1); + ic = g_new (struct _iconv_cache); e_dlist_init (&ic->open); - ic->conv = g_strdup (tofrom); + ic->conv = g_strdup (key); g_hash_table_insert (iconv_cache, ic->conv, ic); - cd(printf ("Creating iconv converter '%s'\n", ic->conv)); + d(printf ("Creating iconv converter '%s'\n", ic->conv)); } - e_dlist_addhead (&iconv_cache_list, (EDListNode *) ic); /* If we have a free iconv, use it */ in = (struct _iconv_cache_node *) ic->open.tailpred; if (in->prev && !in->busy) { - cd(printf ("using existing iconv converter '%s'\n", ic->conv)); + d(printf ("using existing iconv converter '%s'\n", ic->conv)); cd = in->cd; if (cd != (iconv_t) -1) { /* work around some broken iconv implementations @@ -413,15 +502,15 @@ camel_iconv_open (const char *to, const char *from) char *buggy_iconv_buf = NULL; /* resets the converter */ - g_iconv (cd, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len); + iconv (cd, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len); in->busy = TRUE; e_dlist_remove ((EDListNode *) in); e_dlist_addhead (&ic->open, (EDListNode *) in); } } else { - d(printf ("creating new iconv converter '%s'\n", ic->conv)); - cd = g_iconv_open (to, from); - in = g_new (struct _iconv_cache_node, 1); + d(printf("creating new iconv converter '%s'\n", ic->conv)); + cd = iconv_open (to, from); + in = g_new (struct _iconv_cache_node); in->cd = cd; in->parent = ic; e_dlist_addhead (&ic->open, (EDListNode *) in); @@ -436,7 +525,7 @@ camel_iconv_open (const char *to, const char *from) } } - UNLOCK(); + UNLOCK (); return cd; } @@ -455,7 +544,7 @@ camel_iconv_open (const char *to, const char *from) size_t camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size_t *outleft) { - return g_iconv (cd, (char **) inbuf, inleft, outbuf, outleft); + return iconv (cd, (ICONV_CONST char **) inbuf, inleft, outbuf, outleft); } @@ -468,15 +557,16 @@ camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size * Returns 0 on success or -1 on fail as well as setting an * appropriate errno value. **/ -int +void camel_iconv_close (iconv_t cd) { struct _iconv_cache_node *in; - if (cd == (iconv_t)-1) + if (cd == (iconv_t) -1) return; LOCK (); + in = g_hash_table_lookup (iconv_cache_open, cd); if (in) { d(printf ("closing iconv converter '%s'\n", in->parent->conv)); @@ -485,11 +575,13 @@ camel_iconv_close (iconv_t cd) e_dlist_addtail (&in->parent->open, (EDListNode *) in); } else { g_warning ("trying to close iconv i dont know about: %p", cd); - g_iconv_close (cd); + iconv_close (cd); } + UNLOCK (); } + const char * camel_iconv_locale_charset (void) { @@ -516,7 +608,7 @@ static struct { char *lang; } cjkr_lang_map[] = { { "Big5", "zh" }, - { "BIG5HKCS", "zh" }, + { "BIG5HKSCS", "zh" }, { "gb2312", "zh" }, { "gb18030", "zh" }, { "gbk", "zh" }, |