From 35bce8194110451a354c31be4cf22c5f965b48c5 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Tue, 25 Feb 2003 20:07:35 +0000 Subject: canonicalise the charset name (if it is an iso charset) so that our 2003-02-25 Jeffrey Stedfast * camel-mime-part-utils.c (simple_data_wrapper_construct_from_parser): canonicalise the charset name (if it is an iso charset) so that our strncasecmp to see if it is an iso-8859-# charset will be guarenteed to work on all systems. (canon_charset_name): New function to return the canonical iso charset name. (simple_data_wrapper_construct_from_parser): If the charset is NULL *or* the charset == "us-ascii" then check that it is 7bit clean to decide if it is rawtext (we did not check the case where charset was "us-ascii" before). svn path=/trunk/; revision=20057 --- camel/ChangeLog | 14 +++++++++++ camel/camel-mime-part-utils.c | 58 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/camel/ChangeLog b/camel/ChangeLog index 34f36dcd53..29be0f4807 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,17 @@ +2003-02-25 Jeffrey Stedfast + + * camel-mime-part-utils.c + (simple_data_wrapper_construct_from_parser): canonicalise the + charset name (if it is an iso charset) so that our strncasecmp to + see if it is an iso-8859-# charset will be guarenteed to work on + all systems. + (canon_charset_name): New function to return the canonical iso + charset name. + (simple_data_wrapper_construct_from_parser): If the charset is + NULL *or* the charset == "us-ascii" then check that it is 7bit + clean to decide if it is rawtext (we did not check the case where + charset was "us-ascii" before). + 2003-02-25 Jeffrey Stedfast * camel-mime-message.c: Same as the ones below. diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c index f7f1d7dfe7..32f4397d64 100644 --- a/camel/camel-mime-part-utils.c +++ b/camel/camel-mime-part-utils.c @@ -222,6 +222,60 @@ is_7bit (GByteArray *buffer) return TRUE; } +static const char *iso_charsets[] = { + "us-ascii", + "iso-8859-1", + "iso-8859-2", + "iso-8859-3", + "iso-8859-4", + "iso-8859-5", + "iso-8859-6", + "iso-8859-7", + "iso-8859-8", + "iso-8859-9", + "iso-8859-10", + "iso-8859-11", + "iso-8859-12", + "iso-8859-13", + "iso-8859-14", + "iso-8859-15", + "iso-8859-16" +}; + +#define NUM_ISO_CHARSETS (sizeof (iso_charsets) / sizeof (iso_charsets[0])) + +static const char * +canon_charset_name (const char *charset) +{ + const char *ptr; + char *endptr; + int iso; + + if (strncasecmp (charset, "iso", 3) != 0) + return charset; + + ptr = charset + 3; + if (*ptr == '-' || *ptr == '_') + ptr++; + + /* if it's not an iso-8859-# charset, we don't care about it */ + if (strncmp (ptr, "8859", 4) != 0) + return charset; + + ptr += 4; + if (*ptr == '-' || *ptr == '_') + ptr++; + + iso = strtoul (ptr, &endptr, 10); + if (endptr == ptr || *endptr != '\0') + return charset; + + if (iso >= NUM_ISO_CHARSETS) + return charset; + + return iso_charsets[iso]; +} + /* simple data wrapper */ static void simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp) @@ -311,7 +365,7 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser * as being in ISO-8859-1 even when in fact they contain funny * characters from the Windows-CP1252 superset. */ - /* FIXME: not all systems will use the canonical "iso-8859-#" format */ + charset = canon_charset_name (charset); if (!strncasecmp (charset, "iso-8859", 8)) { /* check for Windows-specific chars... */ if (broken_windows_charset (buffer, charset)) @@ -329,7 +383,7 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser dw->rawtext = TRUE; } } else if (header_content_type_is (ct, "text", "*")) { - if (charset == NULL) { + if (charset == NULL || !strcasecmp (charset, "us-ascii")) { /* check that it's 7bit */ dw->rawtext = !is_7bit (buffer); } else if (!strncasecmp (charset, "x-", 2)) { -- cgit