From c0a0accfc3f5a93bf8844b5e0fa17766eceddf31 Mon Sep 17 00:00:00 2001 From: Suresh Chandrasekharan Date: Tue, 29 Jul 2003 13:52:43 +0000 Subject: Fix for #46168 'some additional locale aliases required for chinese 2003-07-11 Suresh Chandrasekharan * camel-iconv.c: Fix for #46168 'some additional locale aliases required for chinese support'. 2003-07-25 Jeffrey Stedfast * camel-mime-utils.c (header_decode_word): Revert NotZed's fix for bug #42170 - this causes even more problems than it solves. See bug #46331 for info. Basically, each address header would be converted to UTF-8 twice which means no raw 8bit address header would render correctly. (header_decode_mailbox): Perform a sanity check on the resultant addr->str to make sure that it is valid UTF-8, if not convert it to UTF-8. Fixes bug #42170. svn path=/trunk/; revision=22013 --- camel/ChangeLog | 16 ++++++++ camel/camel-iconv.c | 3 ++ camel/camel-mime-filter-tohtml.c | 8 ++++ camel/camel-mime-utils.c | 86 ++++++++++++++++++++-------------------- 4 files changed, 70 insertions(+), 43 deletions(-) diff --git a/camel/ChangeLog b/camel/ChangeLog index f33507b81f..8c1c475996 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,19 @@ +2003-07-11 Suresh Chandrasekharan + + * camel-iconv.c: Fix for #46168 'some additional locale aliases + required for chinese support'. + +2003-07-25 Jeffrey Stedfast + + * camel-mime-utils.c (header_decode_word): Revert NotZed's fix for + bug #42170 - this causes even more problems than it solves. See + bug #46331 for info. Basically, each address header would be + converted to UTF-8 twice which means no raw 8bit address header + would render correctly. + (header_decode_mailbox): Perform a sanity check on the resultant + addr->str to make sure that it is valid UTF-8, if not convert it + to UTF-8. Fixes bug #42170. + 2003-07-23 Jeffrey Stedfast * camel-mime-message.c (find_best_encoding): Revert my previous diff --git a/camel/camel-iconv.c b/camel/camel-iconv.c index fee061a2b9..690ec520ad 100644 --- a/camel/camel-iconv.c +++ b/camel/camel-iconv.c @@ -136,6 +136,9 @@ struct { /* FIXME: Japanese/Korean/Chinese stuff needs checking */ { "euckr-0", "EUC-KR" }, { "5601", "EUC-KR" }, + { "zh_TW-euc", "EUC-TW" }, + { "zh_CN.euc", "gb2312" }, + { "zh_TW-big5", "BIG5" }, { "big5-0", "BIG5" }, { "big5.eten-0", "BIG5" }, { "big5hkscs-0", "BIG5HKSCS" }, diff --git a/camel/camel-mime-filter-tohtml.c b/camel/camel-mime-filter-tohtml.c index 0ffd1ed26b..45112731a0 100644 --- a/camel/camel-mime-filter-tohtml.c +++ b/camel/camel-mime-filter-tohtml.c @@ -32,6 +32,14 @@ #include "camel-mime-filter-tohtml.h" #include "camel-utf8.h" +/** + * TODO: convert common text/plain 'markup' to html. eg.: + * + * _word_ -> _word_ + * *word* -> *word* + * /word/ -> /word/ + **/ + #define d(x) #define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index ff2c0bb5fd..5ed7cf6bea 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -1772,36 +1772,18 @@ header_decode_atom(const char **in) } static char * -header_decode_word(const char **in, const char *charset) +header_decode_word (const char **in) { - char *out; - - header_decode_lwsp(in); - if (**in == '"') - out = header_decode_quoted_string(in); - else - out = header_decode_atom(in); - - /* FIXME: temporary workaround for non-ascii-data problem, see bug #42710 */ - if (out) { - char *p; - - for (p=out;*p;p++) { - if ((*p) & 0x80) { - GString *newstr = g_string_new(""); - - if (charset == NULL || !append_8bit(newstr, out, strlen(out), charset)) - append_latin1(newstr, out, strlen(out)); - - g_free(out); - out = newstr->str; - g_string_free(newstr, FALSE); - break; - } - } + const char *inptr = *in; + + header_decode_lwsp (&inptr); + if (*inptr == '"') { + *in = inptr; + return header_decode_quoted_string (in); + } else { + *in = inptr; + return header_decode_atom (in); } - - return out; } static char * @@ -2295,7 +2277,7 @@ header_decode_addrspec(const char **in) header_decode_lwsp(&inptr); /* addr-spec */ - word = header_decode_word(&inptr, NULL); + word = header_decode_word (&inptr); if (word) { addr = g_string_append(addr, word); header_decode_lwsp(&inptr); @@ -2303,7 +2285,7 @@ header_decode_addrspec(const char **in) while (*inptr == '.' && word) { inptr++; addr = g_string_append_c(addr, '.'); - word = header_decode_word(&inptr, NULL); + word = header_decode_word (&inptr); if (word) { addr = g_string_append(addr, word); header_decode_lwsp(&inptr); @@ -2364,7 +2346,7 @@ header_decode_mailbox(const char **in, const char *charset) addr = g_string_new(""); /* for each address */ - pre = header_decode_word(&inptr, charset); + pre = header_decode_word (&inptr); header_decode_lwsp(&inptr); if (!(*inptr == '.' || *inptr == '@' || *inptr==',' || *inptr=='\0')) { /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */ @@ -2378,7 +2360,7 @@ header_decode_mailbox(const char **in, const char *charset) last = pre; g_free(text); - pre = header_decode_word(&inptr, charset); + pre = header_decode_word (&inptr); if (pre) { size_t l = strlen (last); size_t p = strlen (pre); @@ -2396,7 +2378,7 @@ header_decode_mailbox(const char **in, const char *charset) while (!pre && *inptr && *inptr != '<') { w(g_warning("Working around stupid mailer bug #5: unescaped characters in names")); name = g_string_append_c(name, *inptr++); - pre = header_decode_word(&inptr, charset); + pre = header_decode_word (&inptr); } } g_free(last); @@ -2423,7 +2405,7 @@ header_decode_mailbox(const char **in, const char *charset) w(g_warning("broken route-address, missing ':': %s", *in)); } } - pre = header_decode_word(&inptr, charset); + pre = header_decode_word (&inptr); header_decode_lwsp(&inptr); } else { w(g_warning("broken address? %s", *in)); @@ -2440,7 +2422,7 @@ header_decode_mailbox(const char **in, const char *charset) while (*inptr == '.' && pre) { inptr++; g_free(pre); - pre = header_decode_word(&inptr, charset); + pre = header_decode_word (&inptr); addr = g_string_append_c(addr, '.'); if (pre) addr = g_string_append(addr, pre); @@ -2542,14 +2524,32 @@ header_decode_mailbox(const char **in, const char *charset) *in = inptr; if (addr->len > 0) { + if (!g_utf8_validate (addr->str, addr->len, NULL)) { + /* workaround for invalid addr-specs containing 8bit chars (see bug #42170 for details) */ + const char *locale_charset; + GString *out; + + locale_charset = e_iconv_locale_charset (); + + out = g_string_new (""); + + if ((charset == NULL || !append_8bit (out, addr->str, addr->len, charset)) + && (locale_charset == NULL || !append_8bit (out, addr->str, addr->len, locale_charset))) + append_latin1 (out, addr->str, addr->len); + + g_string_free (addr, TRUE); + addr = out; + } + address = header_address_new_name(name ? name->str : "", addr->str); } - + + d(printf("got mailbox: %s\n", addr->str)); + g_string_free(addr, TRUE); if (name) g_string_free(name, TRUE); - - d(printf("got mailbox: %s\n", addr->str)); + return address; } @@ -2563,7 +2563,7 @@ header_decode_address(const char **in, const char *charset) /* pre-scan, trying to work out format, discard results */ header_decode_lwsp(&inptr); - while ( (pre = header_decode_word(&inptr, charset)) ) { + while ((pre = header_decode_word (&inptr))) { group = g_string_append(group, pre); group = g_string_append(group, " "); g_free(pre); @@ -2678,7 +2678,7 @@ header_contentid_decode (const char *in) } /* Eudora has been known to use <.@> as a content-id */ - if (!(buf = header_decode_word (&inptr, NULL)) && !strchr (".@", *inptr)) + if (!(buf = header_decode_word (&inptr)) && !strchr (".@", *inptr)) return NULL; addr = g_string_new (""); @@ -2693,10 +2693,10 @@ header_contentid_decode (const char *in) if (!at) { if (*inptr == '.') { g_string_append_c (addr, *inptr++); - buf = header_decode_word (&inptr, NULL); + buf = header_decode_word (&inptr); } else if (*inptr == '@') { g_string_append_c (addr, *inptr++); - buf = header_decode_word (&inptr, NULL); + buf = header_decode_word (&inptr); at = TRUE; } } else if (strchr (".[]", *inptr)) { @@ -2769,7 +2769,7 @@ header_references_decode_single (const char **in, struct _header_references **he break; } } else { - word = header_decode_word (&inptr, NULL); + word = header_decode_word (&inptr); if (word) g_free (word); else if (*inptr != '\0') -- cgit