If the type is encoded, get the fallback charset from the message

2001-09-24 <NotZed@Ximian.com> * camel-filter-search.c (check_header): If the type is encoded, get the fallback charset from the message content-type. * camel-mime-part-utils.c (check_html_charset): Dont check for charset==null before calling charset_to_iconv. (simple_data_wrapper_construct_from_parser): " * camel-mime-message.c (process_header): Try use content-type charset param as the fallback charset. * camel-charset-map.c (camel_charset_to_iconv): Handle name == NULL, return NULL. * camel-folder-summary.c (camel_folder_summary_format_address): (camel_folder_summary_format_string): Made private again, removed #warning about it. Renamed to s/camel_folder//. (summary_format_string): Take default charset param. (camel_message_info_new_from_header, message_info_new): Decode content-type field to get the charset parameter to use as the default charset for decoding strings. * camel-search-private.c (camel_search_header_match): Pass NULL as the charset, the locale charset is always tried. (camel_search_header_match): Supply a default_charset parameter to be used with TYPE_ENCODED params. * camel-mime-utils.c (header_param): get rid of the g_strcasecmp crap. (header_set_param): Same here. (header_decode_param_list): And here. (header_decode_text): Totally rewritten. 30% of its size. If the word is not rfc2047 encoded, always try default_charset if supplied, if that fails, try locale charset if it exists, if that fails then assume latin1/7 bit ascii. (append_8bit): Changed to return FALSE if we can't convert for whatever reason, and dont append anything. * camel-mime-part.h (struct _CamelMimePart): Move content_type and headers out of the 'private' section. * camel-mime-part.c (get_headers): Dont do any conversion on the header. (process_header): Get the content-type charset as the fallback charset for decode_string. (construct_from_parser): IF we have a content-type header, process it before doing anything else, so we have access to a fallback charset for invalid headers. svn path=/trunk/; revision=13096
author: 4 <NotZed@Ximian.com> 2001-09-25 03:31:07 +0800
committer: Michael Zucci <zucchi@src.gnome.org> 2001-09-25 03:31:07 +0800
commit: 450e955e76ca9174658c0bb94e99e4174df2fe48 (patch)
tree: 6d708cc2b707fc24618cdcf6fdd9ed2539a2189e /camel/camel-mime-utils.c
parent: 8ac999f87feb11e3e33f0a5ab33d92f15e57c613 (diff)
download: gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.tar.gz
gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.tar.zst
gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.zip
1 files changed, 59 insertions, 103 deletions
diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c
index e1f483023d..3498a9fd74 100644
--- a/camel/camel-mime-utils.c
+++ b/camel/camel-mime-utils.c
@@ -1021,124 +1021,80 @@ append_latin1 (GString *out, const char *in, int len)
 	return out;
 }
 
-static void
-append_8bit (GString *out, const char *inbuf, int inlen, const char *default_charset)
+static int
+append_8bit (GString *out, const char *inbuf, int inlen, const char *charset)
 {
 	char *outbase, *outbuf;
 	int outlen;
 	iconv_t ic;
 	
-	ic = iconv_open ("UTF-8", default_charset);
-	if (ic != (iconv_t) -1) {
-		int ret;
-		
-		outlen = inlen * 6 + 16;
-		outbuf = outbase = g_malloc (outlen);
-		
-		ret = iconv (ic, &inbuf, &inlen, &outbuf, &outlen);
-		if (ret >= 0) {
-			iconv (ic, NULL, 0, &outbuf, &outlen);
-			*outbuf = '\0';
-		}
-		
-		iconv_close (ic);
+	ic = iconv_open ("UTF-8", charset);
+	if (ic == (iconv_t) -1)
+		return FALSE;
+
+	outlen = inlen * 6 + 16;
+	outbuf = outbase = g_malloc(outlen);
 		
-		/* FIXME: is outlen == strlen (outbuf) ?? */
-		g_string_append_len (out, outbase, strlen (outbase));
-	} else {
-		/* bah, completely broken...just append as raw text */
-		g_string_append_len (out, inbuf, inlen);
+	if (iconv(ic, &inbuf, &inlen, &outbuf, &outlen) == -1) {
+		w(g_warning("Conversion to '%s' failed: %s", charset, strerror(errno)));
+		g_free(outbase);
+		return FALSE;
 	}
+
+	*outbuf = 0;
+	g_string_append(out, outbase);
+	g_free(outbase);
+	iconv_close(ic);
+
+	return TRUE;
+	
 }
 
-/* decodes a simple text, rfc822 */
+/* decodes a simple text, rfc822 + rfc2047 */
 static char *
 header_decode_text (const char *in, int inlen, const char *default_charset)
 {
 	GString *out;
-	char *inptr, *inend, *start, *word_start;
-	char *decoded;
-	gboolean wasdword = FALSE;
-	gboolean wasspace = FALSE;
-	gboolean islatin1 = FALSE;
-	
-	out = g_string_new ("");
-	start = inptr = (char *) in;
+	const char *inptr, *inend, *start, *locale_charset;
+	char *dword = NULL;
+
+	locale_charset = camel_charset_locale_name();
+
+	out = g_string_new("");
+	inptr = in;
 	inend = inptr + inlen;
-	
-	word_start = NULL;
-	while (inptr && inptr < inend) {
-		unsigned char c = *inptr++;
-		
-		if (is_lwsp (c) && !wasspace) {
-			char *word, *dword;
-			
-			if (word_start)
-				word = word_start;
-			else
-				word = start;
-			
-			dword = rfc2047_decode_word (word, inptr - word - 1);
-			
-			if (dword) {
-				if (!wasdword && word_start)
-					g_string_append_len (out, start, word_start - start);
-				
-				g_string_append (out, dword);
-				g_free (dword);
-				wasdword = TRUE;
-			} else if (islatin1 || !default_charset) {
-				/* append_latin1 is safe for 7bit ascii too */
-				append_latin1 (out, start, inptr - start - 1);
-				wasdword = FALSE;
-			} else {
-				append_8bit (out, start, inptr - start - 1, default_charset);
-				wasdword = FALSE;
-			}
-			
-			start = inptr - 1;
-			word_start = NULL;
-			wasspace = TRUE;
-		} else if (!is_lwsp (c)) {
-			wasspace = FALSE;
-			if (!word_start)
-				word_start = inptr - 1;
-			
-			if (c & 0x80 || c <= 127)
-				islatin1 = TRUE;
-			else
-				islatin1 = FALSE;
-		}
-	}
-	
-	if (inptr - start) {
-		char *word, *dword;
-		
-		if (word_start)
-			word = word_start;
-		else
-			word = start;
-		
-		dword = rfc2047_decode_word (word, inptr - word);
-		
+
+	while (inptr < inend) {
+		start = inptr;
+		while (inptr < inend && is_lwsp(*inptr))
+			inptr++;
+
+		if (inptr == inend) {
+			g_string_append_len(out, start, inptr-start);
+			break;
+		} else if (dword == NULL)
+			g_string_append_len(out, start, inptr-start);
+
+		start = inptr;
+		while (inptr < inend && !is_lwsp(*inptr))
+			inptr++;
+
+		dword = rfc2047_decode_word(start, inptr-start);
 		if (dword) {
-			if (!wasdword && word_start)
-				g_string_append_len (out, start, word_start - start);
-			
-			g_string_append (out, dword);
-			g_free (dword);
-		} else if (islatin1 || !default_charset) {
-			/* append_latin1 is safe for 7bit ascii too */
-			append_latin1 (out, start, inptr - start);
-		} else {
-			append_8bit (out, start, inptr - start, default_charset);
+			g_string_append(out, dword);
+			g_free(dword);
+		} else if ((default_charset == NULL
+			    || !append_8bit(out, start, inptr-start, default_charset))
+			   && (locale_charset == NULL
+			       || !append_8bit(out, start, inptr-start, locale_charset))) {
+			append_latin1(out, start, inptr-start);
 		}
 	}
-	
-	decoded = out->str;
+
+	dword = out->str;
 	g_string_free (out, FALSE);
 	
-	return decoded;
+	return dword;
 }
 
 char *
@@ -1173,7 +1129,7 @@ rfc2047_encode_word(GString *outstring, const char *in, int len, const char *typ
 	
 	ascii = alloca (bufflen);
 	
-	if (g_strcasecmp (type, "UTF-8") != 0)
+	if (strcasecmp (type, "UTF-8") != 0)
 		ic = iconv_open (type, "UTF-8");
 	
 	while (inlen) {
@@ -2023,7 +1979,7 @@ header_decode_param (const char **in, char **paramp, char **valuep, int *is_rfc2
 char *
 header_param (struct _header_param *p, const char *name)
 {
-	while (p && g_strcasecmp (p->name, name) != 0)
+	while (p && strcasecmp (p->name, name) != 0)
 		p = p->next;
 	if (p)
 		return p->value;
@@ -2037,7 +1993,7 @@ header_set_param (struct _header_param **l, const char *name, const char *value)
 
 	while (p->next) {
 		pn = p->next;
-		if (!g_strcasecmp (pn->name, name)) {
+		if (!strcasecmp (pn->name, name)) {
 			g_free (pn->value);
 			if (value) {
 				pn->value = g_strdup (value);
@@ -2716,7 +2672,7 @@ header_decode_param_list(const char **in)
 		if (header_decode_param (&inptr, &name, &value, &is_rfc2184) != 0)
 			break;
 		
-		if (is_rfc2184 && tail && !g_strcasecmp (name, tail->name)) {
+		if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) {
 			/* rfc2184 allows a parameter to be broken into multiple parts
 			 * and it looks like we've found one. Append this value to the
 			 * last value.
author	4 <NotZed@Ximian.com>	2001-09-25 03:31:07 +0800
committer	Michael Zucci <zucchi@src.gnome.org>	2001-09-25 03:31:07 +0800
commit	450e955e76ca9174658c0bb94e99e4174df2fe48 (patch)
tree	6d708cc2b707fc24618cdcf6fdd9ed2539a2189e /camel/camel-mime-utils.c
parent	8ac999f87feb11e3e33f0a5ab33d92f15e57c613 (diff)
download	gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.tar.gz gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.tar.zst gsoc2013-evolution-450e955e76ca9174658c0bb94e99e4174df2fe48.zip