diff options
author | 4 <NotZed@Ximian.com> | 2001-09-25 03:31:07 +0800 |
---|---|---|
committer | Michael Zucci <zucchi@src.gnome.org> | 2001-09-25 03:31:07 +0800 |
commit | 450e955e76ca9174658c0bb94e99e4174df2fe48 (patch) | |
tree | 6d708cc2b707fc24618cdcf6fdd9ed2539a2189e | |
parent | 8ac999f87feb11e3e33f0a5ab33d92f15e57c613 (diff) | |
download | gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.tar.gz gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.tar.zst gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.zip |
If the type is encoded, get the fallback charset from the message
2001-09-24 <NotZed@Ximian.com>
* camel-filter-search.c (check_header): If the type is encoded,
get the fallback charset from the message content-type.
* camel-mime-part-utils.c (check_html_charset): Dont check for
charset==null before calling charset_to_iconv.
(simple_data_wrapper_construct_from_parser): "
* camel-mime-message.c (process_header): Try use content-type
charset param as the fallback charset.
* camel-charset-map.c (camel_charset_to_iconv): Handle name ==
NULL, return NULL.
* camel-folder-summary.c (camel_folder_summary_format_address):
(camel_folder_summary_format_string): Made private again, removed
#warning about it. Renamed to s/camel_folder//.
(summary_format_string): Take default charset param.
(camel_message_info_new_from_header, message_info_new): Decode
content-type field to get the charset parameter to use as the
default charset for decoding strings.
* camel-search-private.c (camel_search_header_match): Pass NULL as
the charset, the locale charset is always tried.
(camel_search_header_match): Supply a default_charset parameter to
be used with TYPE_ENCODED params.
* camel-mime-utils.c
(header_param): get rid of the g_strcasecmp crap.
(header_set_param): Same here.
(header_decode_param_list): And here.
(header_decode_text): Totally rewritten. 30% of its size. If the
word is not rfc2047 encoded, always try default_charset if
supplied, if that fails, try locale charset if it exists, if that
fails then assume latin1/7 bit ascii.
(append_8bit): Changed to return FALSE if we can't convert for
whatever reason, and dont append anything.
* camel-mime-part.h (struct _CamelMimePart): Move content_type and
headers out of the 'private' section.
* camel-mime-part.c (get_headers): Dont do any conversion on the
header.
(process_header): Get the content-type charset as the fallback
charset for decode_string.
(construct_from_parser): IF we have a content-type header, process
it before doing anything else, so we have access to a fallback
charset for invalid headers.
svn path=/trunk/; revision=13096
-rw-r--r-- | camel/ChangeLog | 50 | ||||
-rw-r--r-- | camel/camel-charset-map.c | 3 | ||||
-rw-r--r-- | camel/camel-filter-search.c | 10 | ||||
-rw-r--r-- | camel/camel-folder-search.c | 3 | ||||
-rw-r--r-- | camel/camel-folder-summary.c | 56 | ||||
-rw-r--r-- | camel/camel-mime-message.c | 5 | ||||
-rw-r--r-- | camel/camel-mime-part-utils.c | 6 | ||||
-rw-r--r-- | camel/camel-mime-part.c | 14 | ||||
-rw-r--r-- | camel/camel-mime-part.h | 15 | ||||
-rw-r--r-- | camel/camel-mime-utils.c | 162 | ||||
-rw-r--r-- | camel/camel-search-private.c | 4 | ||||
-rw-r--r-- | camel/camel-search-private.h | 2 |
12 files changed, 190 insertions, 140 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index 42da8598af..c09a7c62ae 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,53 @@ +2001-09-24 <NotZed@Ximian.com> + + * camel-filter-search.c (check_header): If the type is encoded, + get the fallback charset from the message content-type. + + * camel-mime-part-utils.c (check_html_charset): Dont check for + charset==null before calling charset_to_iconv. + (simple_data_wrapper_construct_from_parser): " + + * camel-mime-message.c (process_header): Try use content-type + charset param as the fallback charset. + + * camel-charset-map.c (camel_charset_to_iconv): Handle name == + NULL, return NULL. + + * camel-folder-summary.c (camel_folder_summary_format_address): + (camel_folder_summary_format_string): Made private again, removed + #warning about it. Renamed to s/camel_folder//. + (summary_format_string): Take default charset param. + (camel_message_info_new_from_header, message_info_new): Decode + content-type field to get the charset parameter to use as the + default charset for decoding strings. + + * camel-search-private.c (camel_search_header_match): Pass NULL as + the charset, the locale charset is always tried. + (camel_search_header_match): Supply a default_charset parameter to + be used with TYPE_ENCODED params. + + * camel-mime-utils.c + (header_param): get rid of the g_strcasecmp crap. + (header_set_param): Same here. + (header_decode_param_list): And here. + (header_decode_text): Totally rewritten. 30% of its size. If the + word is not rfc2047 encoded, always try default_charset if + supplied, if that fails, try locale charset if it exists, if that + fails then assume latin1/7 bit ascii. + (append_8bit): Changed to return FALSE if we can't convert for + whatever reason, and dont append anything. + + * camel-mime-part.h (struct _CamelMimePart): Move content_type and + headers out of the 'private' section. + + * camel-mime-part.c (get_headers): Dont do any conversion on the + header. + (process_header): Get the content-type charset as the fallback + charset for decode_string. + (construct_from_parser): IF we have a content-type header, process + it before doing anything else, so we have access to a fallback + charset for invalid headers. + 2001-09-23 Jeffrey Stedfast <fejj@ximian.com> * camel-tcp-stream-openssl.c (ssl_verify): Don't even try to alert diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c index 63e33c81ec..dbd956a15d 100644 --- a/camel/camel-charset-map.c +++ b/camel/camel-charset-map.c @@ -397,6 +397,9 @@ camel_charset_to_iconv (const char *name) { const char *charset; + if (name == NULL) + return NULL; + ICONV_CHARSETS_LOCK (); charset = g_hash_table_lookup (iconv_charsets, name); if (!charset) { diff --git a/camel/camel-filter-search.c b/camel/camel-filter-search.c index d6d60e2cf3..c9c3d2853e 100644 --- a/camel/camel-filter-search.c +++ b/camel/camel-filter-search.c @@ -115,6 +115,8 @@ check_header (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMess char *name = argv[0]->value.string; const char *header; camel_search_t type = CAMEL_SEARCH_TYPE_ENCODED; + CamelContentType *ct; + const char *charset = NULL; if (strcasecmp(name, "x-camel-mlist") == 0) { header = camel_message_info_mlist(fms->info); @@ -123,12 +125,18 @@ check_header (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMess header = camel_medium_get_header(CAMEL_MEDIUM(fms->message), argv[0]->value.string); if (strcasecmp("to", name) == 0 || strcasecmp("cc", name) == 0 || strcasecmp("from", name) == 0) type = CAMEL_SEARCH_TYPE_ADDRESS_ENCODED; + else { + ct = camel_mime_part_get_content_type(CAMEL_MIME_PART(fms->message)); + if (ct) + charset = camel_charset_to_iconv(header_content_type_param(ct, "charset")); + } } if (header) { for (i=1; i<argc && !matched; i++) { if (argv[i]->type == ESEXP_RES_STRING) - matched = camel_search_header_match(header, argv[i]->value.string, how, type); + matched = camel_search_header_match(header, argv[i]->value.string, + how, type, charset); } } } diff --git a/camel/camel-folder-search.c b/camel/camel-folder-search.c index a72a5743ad..cca04d3248 100644 --- a/camel/camel-folder-search.c +++ b/camel/camel-folder-search.c @@ -642,7 +642,8 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder /* performs an OR of all words */ for (i=1;i<argc && !truth;i++) { if (argv[i]->type == ESEXP_RES_STRING) - truth = camel_search_header_match(header, argv[i]->value.string, how, type); + truth = camel_search_header_match(header, argv[i]->value.string, + how, type, NULL); } } } diff --git a/camel/camel-folder-summary.c b/camel/camel-folder-summary.c index f5f2131fa2..45c2fadae4 100644 --- a/camel/camel-folder-summary.c +++ b/camel/camel-folder-summary.c @@ -1382,11 +1382,8 @@ static CamelMessageContentInfo * content_info_new_from_message(CamelFolderSummar return ci; } -#ifndef NO_WARNINGS -#warning "These should be made private again, easy to fix (used in filter-driver)" -#endif -char * -camel_folder_summary_format_address(struct _header_raw *h, const char *name) +static char * +summary_format_address(struct _header_raw *h, const char *name) { struct _header_address *addr; const char *text; @@ -1404,16 +1401,15 @@ camel_folder_summary_format_address(struct _header_raw *h, const char *name) return ret; } -char * -camel_folder_summary_format_string (struct _header_raw *h, const char *name) +static char * +summary_format_string (struct _header_raw *h, const char *name, const char *charset) { - const char *charset, *text; + const char *text; text = header_raw_find (&h, name, NULL); if (text) { while (isspace ((unsigned) *text)) text++; - charset = camel_charset_locale_name (); return header_decode_string (text, charset); } else { return NULL; @@ -1485,15 +1481,28 @@ message_info_new(CamelFolderSummary *s, struct _header_raw *h) char *msgid; int count; char *subject, *from, *to, *cc, *mlist; + struct _header_content_type *ct = NULL; + const char *content, *charset = NULL; mi = camel_folder_summary_info_new(s); - subject = camel_folder_summary_format_string(h, "subject"); - from = camel_folder_summary_format_address(h, "from"); - to = camel_folder_summary_format_address(h, "to"); - cc = camel_folder_summary_format_address(h, "cc"); + if ((content = header_raw_find(&h, "Content-Type", NULL)) + && (ct = header_content_type_decode(content)) + && (charset = header_content_type_param(ct, "charset")) + && (strcasecmp(charset, "us-ascii") == 0)) + charset = NULL; + + charset = camel_charset_to_iconv(charset); + + subject = summary_format_string(h, "subject", charset); + from = summary_format_address(h, "from"); + to = summary_format_address(h, "to"); + cc = summary_format_address(h, "cc"); mlist = header_raw_check_mailing_list(&h); + if (ct) + header_content_type_unref(ct); + #ifdef DOEPOOLV e_poolv_set(mi->strings, CAMEL_MESSAGE_INFO_SUBJECT, subject, TRUE); e_poolv_set(mi->strings, CAMEL_MESSAGE_INFO_FROM, from, TRUE); @@ -2340,13 +2349,26 @@ camel_message_info_new_from_header (struct _header_raw *header) { CamelMessageInfo *info; char *subject, *from, *to, *cc, *mlist; + struct _header_content_type *ct = NULL; + const char *content, *charset = NULL; + + if ((content = header_raw_find(&header, "Content-Type", NULL)) + && (ct = header_content_type_decode(content)) + && (charset = header_content_type_param(ct, "charset")) + && (strcasecmp(charset, "us-ascii") == 0)) + charset = NULL; + + charset = camel_charset_to_iconv(charset); - subject = camel_folder_summary_format_string(header, "subject"); - from = camel_folder_summary_format_address(header, "from"); - to = camel_folder_summary_format_address(header, "to"); - cc = camel_folder_summary_format_address(header, "cc"); + subject = summary_format_string(header, "subject", charset); + from = summary_format_address(header, "from"); + to = summary_format_address(header, "to"); + cc = summary_format_address(header, "cc"); mlist = header_raw_check_mailing_list(&header); + if (ct) + header_content_type_unref(ct); + info = camel_message_info_new(); camel_message_info_set_subject(info, subject); diff --git a/camel/camel-mime-message.c b/camel/camel-mime-message.c index cbae7fb3f1..13182093b7 100644 --- a/camel/camel-mime-message.c +++ b/camel/camel-mime-message.c @@ -531,7 +531,10 @@ process_header (CamelMedium *medium, const char *header_name, const char *header break; case HEADER_SUBJECT: g_free (message->subject); - charset = camel_charset_locale_name (); + if (((CamelMimePart *)message)->content_type) + charset = camel_charset_to_iconv(header_content_type_param(((CamelMimePart *)message)->content_type, "charset")); + else + charset = NULL; message->subject = g_strstrip (header_decode_string (header_value, charset)); break; case HEADER_TO: diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c index a9e971ba58..ba9bf07d5f 100644 --- a/camel/camel-mime-part-utils.c +++ b/camel/camel-mime-part-utils.c @@ -80,8 +80,7 @@ check_html_charset(char *buffer, int length) && (val = camel_html_parser_attr(hp, "content")) && (ct = header_content_type_decode(val))) { charset = header_content_type_param(ct, "charset"); - if (charset) - charset = camel_charset_to_iconv (charset); + charset = camel_charset_to_iconv (charset); header_content_type_unref(ct); } break; @@ -189,8 +188,7 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser ct = camel_mime_parser_content_type(mp); if (header_content_type_is(ct, "text", "*")) { charset = header_content_type_param(ct, "charset"); - if (charset) - charset = camel_charset_to_iconv (charset); + charset = camel_charset_to_iconv (charset); if (fdec) { d(printf("Adding CRLF conversion filter\n")); diff --git a/camel/camel-mime-part.c b/camel/camel-mime-part.c index a4d9a2eb0d..5e6d6b3b09 100644 --- a/camel/camel-mime-part.c +++ b/camel/camel-mime-part.c @@ -212,7 +212,10 @@ process_header(CamelMedium *medium, const char *header_name, const char *header_ switch (header_type) { case HEADER_DESCRIPTION: /* raw header->utf8 conversion */ g_free (mime_part->description); - charset = camel_charset_locale_name (); + if (mime_part->content_type) + charset = camel_charset_to_iconv(header_content_type_param(mime_part->content_type, "charset")); + else + charset = NULL; mime_part->description = g_strstrip (header_decode_string (header_value, charset)); break; case HEADER_DISPOSITION: @@ -299,7 +302,7 @@ get_headers (CamelMedium *medium) headers = g_array_new (FALSE, FALSE, sizeof (CamelMediumHeader)); for (h = part->headers; h; h = h->next) { header.name = h->name; - header.value = header_decode_string (h->value, NULL); + header.value = h->value; g_array_append_val (headers, header); } @@ -660,6 +663,7 @@ static int construct_from_parser(CamelMimePart *dw, CamelMimeParser *mp) { struct _header_raw *headers; + const char *content; char *buf; int len; @@ -675,6 +679,12 @@ construct_from_parser(CamelMimePart *dw, CamelMimeParser *mp) case HSCAN_MULTIPART: /* we have the headers, build them into 'us' */ headers = camel_mime_parser_headers_raw(mp); + + /* if content-type exists, process it first, set for fallback charset in headers */ + content = header_raw_find(&headers, "content-type", NULL); + if (content) + process_header((CamelMedium *)dw, "content-type", content); + while (headers) { camel_medium_add_header((CamelMedium *)dw, headers->name, headers->value); headers = headers->next; diff --git a/camel/camel-mime-part.h b/camel/camel-mime-part.h index bf302f507f..4bbcddaf15 100644 --- a/camel/camel-mime-part.h +++ b/camel/camel-mime-part.h @@ -60,19 +60,18 @@ typedef enum _CamelMimePartEncodingType CamelMimePartEncodingType; struct _CamelMimePart { CamelMedium parent_object; + + CamelContentType *content_type; + struct _header_raw *headers; /* mime headers */ /* All fields here are -** PRIVATE **- */ - gchar *description; + char *description; CamelMimeDisposition *disposition; - gchar *content_id; - gchar *content_MD5; - gchar *content_location; + char *content_id; + char *content_MD5; + char *content_location; GList *content_languages; CamelMimePartEncodingType encoding; - - CamelContentType *content_type; - - struct _header_raw *headers; /* mime headers */ }; typedef struct _CamelMimePartClass { diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index e1f483023d..3498a9fd74 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -1021,124 +1021,80 @@ append_latin1 (GString *out, const char *in, int len) return out; } -static void -append_8bit (GString *out, const char *inbuf, int inlen, const char *default_charset) +static int +append_8bit (GString *out, const char *inbuf, int inlen, const char *charset) { char *outbase, *outbuf; int outlen; iconv_t ic; - ic = iconv_open ("UTF-8", default_charset); - if (ic != (iconv_t) -1) { - int ret; - - outlen = inlen * 6 + 16; - outbuf = outbase = g_malloc (outlen); - - ret = iconv (ic, &inbuf, &inlen, &outbuf, &outlen); - if (ret >= 0) { - iconv (ic, NULL, 0, &outbuf, &outlen); - *outbuf = '\0'; - } - - iconv_close (ic); + ic = iconv_open ("UTF-8", charset); + if (ic == (iconv_t) -1) + return FALSE; + + outlen = inlen * 6 + 16; + outbuf = outbase = g_malloc(outlen); - /* FIXME: is outlen == strlen (outbuf) ?? */ - g_string_append_len (out, outbase, strlen (outbase)); - } else { - /* bah, completely broken...just append as raw text */ - g_string_append_len (out, inbuf, inlen); + if (iconv(ic, &inbuf, &inlen, &outbuf, &outlen) == -1) { + w(g_warning("Conversion to '%s' failed: %s", charset, strerror(errno))); + g_free(outbase); + return FALSE; } + + *outbuf = 0; + g_string_append(out, outbase); + g_free(outbase); + iconv_close(ic); + + return TRUE; + } -/* decodes a simple text, rfc822 */ +/* decodes a simple text, rfc822 + rfc2047 */ static char * header_decode_text (const char *in, int inlen, const char *default_charset) { GString *out; - char *inptr, *inend, *start, *word_start; - char *decoded; - gboolean wasdword = FALSE; - gboolean wasspace = FALSE; - gboolean islatin1 = FALSE; - - out = g_string_new (""); - start = inptr = (char *) in; + const char *inptr, *inend, *start, *locale_charset; + char *dword = NULL; + + locale_charset = camel_charset_locale_name(); + + out = g_string_new(""); + inptr = in; inend = inptr + inlen; - - word_start = NULL; - while (inptr && inptr < inend) { - unsigned char c = *inptr++; - - if (is_lwsp (c) && !wasspace) { - char *word, *dword; - - if (word_start) - word = word_start; - else - word = start; - - dword = rfc2047_decode_word (word, inptr - word - 1); - - if (dword) { - if (!wasdword && word_start) - g_string_append_len (out, start, word_start - start); - - g_string_append (out, dword); - g_free (dword); - wasdword = TRUE; - } else if (islatin1 || !default_charset) { - /* append_latin1 is safe for 7bit ascii too */ - append_latin1 (out, start, inptr - start - 1); - wasdword = FALSE; - } else { - append_8bit (out, start, inptr - start - 1, default_charset); - wasdword = FALSE; - } - - start = inptr - 1; - word_start = NULL; - wasspace = TRUE; - } else if (!is_lwsp (c)) { - wasspace = FALSE; - if (!word_start) - word_start = inptr - 1; - - if (c & 0x80 || c <= 127) - islatin1 = TRUE; - else - islatin1 = FALSE; - } - } - - if (inptr - start) { - char *word, *dword; - - if (word_start) - word = word_start; - else - word = start; - - dword = rfc2047_decode_word (word, inptr - word); - + + while (inptr < inend) { + start = inptr; + while (inptr < inend && is_lwsp(*inptr)) + inptr++; + + if (inptr == inend) { + g_string_append_len(out, start, inptr-start); + break; + } else if (dword == NULL) + g_string_append_len(out, start, inptr-start); + + start = inptr; + while (inptr < inend && !is_lwsp(*inptr)) + inptr++; + + dword = rfc2047_decode_word(start, inptr-start); if (dword) { - if (!wasdword && word_start) - g_string_append_len (out, start, word_start - start); - - g_string_append (out, dword); - g_free (dword); - } else if (islatin1 || !default_charset) { - /* append_latin1 is safe for 7bit ascii too */ - append_latin1 (out, start, inptr - start); - } else { - append_8bit (out, start, inptr - start, default_charset); + g_string_append(out, dword); + g_free(dword); + } else if ((default_charset == NULL + || !append_8bit(out, start, inptr-start, default_charset)) + && (locale_charset == NULL + || !append_8bit(out, start, inptr-start, locale_charset))) { + append_latin1(out, start, inptr-start); } } - - decoded = out->str; + + dword = out->str; g_string_free (out, FALSE); - return decoded; + return dword; } char * @@ -1173,7 +1129,7 @@ rfc2047_encode_word(GString *outstring, const char *in, int len, const char *typ ascii = alloca (bufflen); - if (g_strcasecmp (type, "UTF-8") != 0) + if (strcasecmp (type, "UTF-8") != 0) ic = iconv_open (type, "UTF-8"); while (inlen) { @@ -2023,7 +1979,7 @@ header_decode_param (const char **in, char **paramp, char **valuep, int *is_rfc2 char * header_param (struct _header_param *p, const char *name) { - while (p && g_strcasecmp (p->name, name) != 0) + while (p && strcasecmp (p->name, name) != 0) p = p->next; if (p) return p->value; @@ -2037,7 +1993,7 @@ header_set_param (struct _header_param **l, const char *name, const char *value) while (p->next) { pn = p->next; - if (!g_strcasecmp (pn->name, name)) { + if (!strcasecmp (pn->name, name)) { g_free (pn->value); if (value) { pn->value = g_strdup (value); @@ -2716,7 +2672,7 @@ header_decode_param_list(const char **in) if (header_decode_param (&inptr, &name, &value, &is_rfc2184) != 0) break; - if (is_rfc2184 && tail && !g_strcasecmp (name, tail->name)) { + if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) { /* rfc2184 allows a parameter to be broken into multiple parts * and it looks like we've found one. Append this value to the * last value. diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c index be90e69d99..759ca7cf3e 100644 --- a/camel/camel-search-private.c +++ b/camel/camel-search-private.c @@ -391,7 +391,7 @@ header_match(const char *value, const char *match, camel_search_match_t how) /* searhces for match inside value, if match is mixed case, hten use case-sensitive, else insensitive */ gboolean -camel_search_header_match (const char *value, const char *match, camel_search_match_t how, camel_search_t type) +camel_search_header_match (const char *value, const char *match, camel_search_match_t how, camel_search_t type, const char *default_charset) { const char *name, *addr; int truth = FALSE; @@ -403,7 +403,7 @@ camel_search_header_match (const char *value, const char *match, camel_search_ma switch(type) { case CAMEL_SEARCH_TYPE_ENCODED: - v = header_decode_string(value, camel_charset_locale_name()); + v = header_decode_string(value, default_charset); /* FIXME: Find header charset */ truth = header_match(v, match, how); g_free(v); break; diff --git a/camel/camel-search-private.h b/camel/camel-search-private.h index 9b00f35060..ac88dd7257 100644 --- a/camel/camel-search-private.h +++ b/camel/camel-search-private.h @@ -47,7 +47,7 @@ typedef enum { int camel_search_build_match_regex(regex_t *pattern, camel_search_flags_t type, int argc, struct _ESExpResult **argv, CamelException *ex); gboolean camel_search_message_body_contains(CamelDataWrapper *object, regex_t *pattern); -gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how, camel_search_t type); +gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how, camel_search_t type, const char *default_charset); gboolean camel_search_header_soundex(const char *header, const char *match); #endif /* ! _CAMEL_SEARCH_PRIVATE_H */ |