diff options
| author | Jeffrey Stedfast <fejj@ximian.com> | 2001-12-18 09:28:27 +0800 | 
|---|---|---|
| committer | Jeffrey Stedfast <fejj@src.gnome.org> | 2001-12-18 09:28:27 +0800 | 
| commit | f6408daa103092f18789a719a4123224b259f71f (patch) | |
| tree | 838b491516e1b3669428136d73019aa9afe5f2c3 | |
| parent | 13299ab7e073cf4d412cec019e4240a7634c1cf5 (diff) | |
| download | gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.gz gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.zst gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.zip  | |
New function to map ISO charsets to the Windows charsets.
2001-12-17  Jeffrey Stedfast  <fejj@ximian.com>
	* camel-charset-map.c (camel_charset_iso_to_windows): New function
	to map ISO charsets to the Windows charsets.
	* camel-mime-part-utils.c (broken_windows_charset): Detect Windows
	charsets.
	(simple_data_wrapper_construct_from_parser): Simplify a tad and
	also check for iso-8859-* charsets that are really Windows
	charsets. Fixes bug #12631.
svn path=/trunk/; revision=15144
| -rw-r--r-- | camel/ChangeLog | 26 | ||||
| -rw-r--r-- | camel/camel-charset-map.c | 55 | ||||
| -rw-r--r-- | camel/camel-charset-map.h | 2 | ||||
| -rw-r--r-- | camel/camel-mime-part-utils.c | 75 | 
4 files changed, 127 insertions, 31 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index cb8a9eaf6a..b82266ff9e 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,14 @@ +2001-12-17  Jeffrey Stedfast  <fejj@ximian.com> + +	* camel-charset-map.c (camel_charset_iso_to_windows): New function +	to map ISO charsets to the Windows charsets. + +	* camel-mime-part-utils.c (broken_windows_charset): Detect Windows +	charsets. +	(simple_data_wrapper_construct_from_parser): Simplify a tad and +	also check for iso-8859-* charsets that are really Windows +	charsets. Fixes bug #12631. +  2001-12-17  Dan Winship  <danw@ximian.com>  	* Makefile.am (INCLUDES): define CAMEL_PROVIDERDIR to be the @@ -7,11 +18,16 @@  	* providers/imap/Makefile.am (camel_provider_LTLIBRARIES,  	camel_provider_DATA): renamed from provider_LTLIBRARIES, -	provider_DATA. -	* providers/local/Makefile.am: Likewise -	* providers/nntp/Makefile.am: Likewise -	* providers/pop3/Makefile.am: Likewise -	* providers/sendmail/Makefile.am: Likewise +	provider_DATA.   + +	* providers/local/Makefile.am: Likewise  + +	* providers/nntp/Makefile.am: Likewise  + +	* providers/pop3/Makefile.am: Likewise  + +	* providers/sendmail/Makefile.am: Likewise  +  	* providers/smtp/Makefile.am: Likewise  2001-12-16  Jeffrey Stedfast  <fejj@ximian.com> diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c index 17962d74be..2416dd2504 100644 --- a/camel/camel-charset-map.c +++ b/camel/camel-charset-map.c @@ -292,5 +292,60 @@ camel_charset_best (const char *in, int len)  	return camel_charset_best_name (&charset);  } + +/** + * camel_charset_iso_to_windows: + * @isocharset: an ISO charset + * + * Returns the equivalent Windows charset. + **/ +const char * +camel_charset_iso_to_windows (const char *isocharset) +{ +	/* According to http://czyborra.com/charsets/codepages.html, +	 * the charset mapping is as follows: +	 * +	 * iso-8859-1  maps to windows-cp1252 +	 * iso-8859-2  maps to windows-cp1250 +	 * iso-8859-3  maps to windows-cp???? +	 * iso-8859-4  maps to windows-cp???? +	 * iso-8859-5  maps to windows-cp1251 +	 * iso-8859-6  maps to windows-cp1256 +	 * iso-8859-7  maps to windows-cp1253 +	 * iso-8859-8  maps to windows-cp1255 +	 * iso-8859-9  maps to windows-cp1254 +	 * iso-8859-10 maps to windows-cp???? +	 * iso-8859-11 maps to windows-cp???? +	 * iso-8859-12 maps to windows-cp???? +	 * iso-8859-13 maps to windows-cp1257 +	 * +	 * Assumptions: +	 *  - I'm going to assume that since iso-8859-4 and +	 *    iso-8859-13 are Baltic that it also maps to +	 *    windows-cp1257. +	 */ +	 +	if (!strcasecmp (isocharset, "iso-8859-1")) +		return "windows-cp1252"; +	else if (!strcasecmp (isocharset, "iso-8859-2")) +		return "windows-cp1250"; +	else if (!strcasecmp (isocharset, "iso-8859-4")) +		return "windows-cp1257"; +	else if (!strcasecmp (isocharset, "iso-8859-5")) +		return "windows-cp1251"; +	else if (!strcasecmp (isocharset, "iso-8859-6")) +		return "windows-cp1256"; +	else if (!strcasecmp (isocharset, "iso-8859-7")) +		return "windows-cp1253"; +	else if (!strcasecmp (isocharset, "iso-8859-8")) +		return "windows-cp1255"; +	else if (!strcasecmp (isocharset, "iso-8859-9")) +		return "windows-cp1254"; +	else if (!strcasecmp (isocharset, "iso-8859-13")) +		return "windows-cp1257"; +	 +	return isocharset; +} +  #endif /* !BUILD_MAP */ diff --git a/camel/camel-charset-map.h b/camel/camel-charset-map.h index 7c7022c0a1..0cae1916a6 100644 --- a/camel/camel-charset-map.h +++ b/camel/camel-charset-map.h @@ -37,4 +37,6 @@ const char *camel_charset_best_name(CamelCharset *);  /* helper function */  const char *camel_charset_best(const char *in, int len); +const char *camel_charset_iso_to_windows (const char *isocharset); +  #endif /* ! _CAMEL_CHARSET_MAP_H */ diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c index 65c99c6dc8..08787df2cd 100644 --- a/camel/camel-mime-part-utils.c +++ b/camel/camel-mime-part-utils.c @@ -155,6 +155,28 @@ convert_buffer (GByteArray *in, const char *to, const char *from)  	return out;  } +/* We don't really use the charset argument except for debugging... */ +static gboolean +broken_windows_charset (GByteArray *buffer, const char *charset) +{ +	register unsigned char *inptr; +	unsigned char *inend; +	 +	inptr = buffer->data; +	inend = inptr + buffer->len; +	 +	while (inptr < inend) { +		register unsigned char c = *inptr++; +		 +		if (c >= 128 && c <= 159) { +			g_warning ("Encountered Windows charset parading as %s", charset); +			return TRUE; +		} +	} +	 +	return FALSE; +} +  static gboolean  is_7bit (GByteArray *buffer)  { @@ -172,33 +194,24 @@ static void  simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp)  {  	CamelMimeFilter *fdec = NULL, *fcrlf = NULL; +	CamelMimeFilterBasicType enctype;  	int len, decid = -1, crlfid = -1;  	struct _header_content_type *ct; +	const char *charset = NULL;  	GByteArray *buffer;  	char *encoding, *buf; -	const char *charset = NULL; -	CamelMimeFilterBasicType enctype = 0;  	CamelStream *mem; - -	d(printf("constructing data-wrapper\n")); +	 +	d(printf ("simple_data_wrapper_construct_from_parser()\n"));  	/* first, work out conversion, if any, required, we dont care about what we dont know about */ -	encoding = header_content_encoding_decode(camel_mime_parser_header(mp, "content-transfer-encoding", NULL)); +	encoding = header_content_encoding_decode (camel_mime_parser_header (mp, "Content-Transfer-Encoding", NULL));  	if (encoding) { -		if (!strcasecmp(encoding, "base64")) { -			d(printf("Adding base64 decoder ...\n")); -			enctype = CAMEL_MIME_FILTER_BASIC_BASE64_DEC; -		} else if (!strcasecmp(encoding, "quoted-printable")) { -			d(printf("Adding quoted-printable decoder ...\n")); -			enctype = CAMEL_MIME_FILTER_BASIC_QP_DEC; -		} else if (!strcasecmp (encoding, "x-uuencode")) { -			d(printf("Adding uudecoder ...\n")); -			enctype = CAMEL_MIME_FILTER_BASIC_UU_DEC; -		} +		enctype = camel_mime_part_encoding_from_string (encoding);  		g_free (encoding); -		if (enctype != 0) { -			fdec = (CamelMimeFilter *)camel_mime_filter_basic_new_type(enctype); +		if (enctype != CAMEL_MIME_PART_ENCODING_DEFAULT) { +			fdec = (CamelMimeFilter *) camel_mime_filter_basic_new_type (enctype);  			decid = camel_mime_parser_filter_add (mp, fdec);  		}  	} @@ -229,21 +242,32 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser  		charset = check_html_charset(buffer->data, buffer->len);  	/* if we need to do charset conversion, see if we can/it works/etc */ -	if (charset && !(strcasecmp(charset, "us-ascii") == 0 -			 || strcasecmp(charset, "utf-8") == 0 -			 || strncasecmp(charset, "x-", 2) == 0)) { +	if (charset && !(strcasecmp (charset, "us-ascii") == 0 +			 || strcasecmp (charset, "utf-8") == 0 +			 || strncasecmp (charset, "x-", 2) == 0)) {  		GByteArray *out; -		out = convert_buffer(buffer, "UTF-8", charset); +		/* You often see Microsoft Windows users announcing their texts +		 * as being in ISO-8859-1 even when in fact they contain funny +		 * characters from the Windows-CP1252 superset. +		 */ +		if (!strncasecmp (charset, "iso-8859", 8)) { +			/* check for Windows-specific chars... */ +			if (broken_windows_charset (buffer, charset)) { +				charset = camel_charset_iso_to_windows (charset); +				charset = e_iconv_charset_name (charset); +			} +		} +		 +		out = convert_buffer (buffer, "UTF-8", charset);  		if (out) {  			/* converted ok, use this data instead */  			g_byte_array_free(buffer, TRUE);  			buffer = out;  		} else { -			g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);  			/* else failed to convert, leave as raw? */ +			g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);  			dw->rawtext = TRUE; -			/* should we change the content-type header? */  		}  	} else if (header_content_type_is (ct, "text", "*")) {  		if (charset == NULL) { @@ -258,10 +282,9 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser  			dw->rawtext = !g_utf8_validate (buffer->data, buffer->len, NULL);  		}  	} -			 - +	  	d(printf("message part kept in memory!\n")); -		 +	  	mem = camel_stream_mem_new_with_byte_array(buffer);  	camel_data_wrapper_construct_from_stream(dw, mem);  	camel_object_unref((CamelObject *)mem);  | 
