aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeffrey Stedfast <fejj@ximian.com>2001-07-12 07:56:31 +0800
committerJeffrey Stedfast <fejj@src.gnome.org>2001-07-12 07:56:31 +0800
commit768ef5c60bd59daa227910c68f4b829db480d6ac (patch)
tree1f2a0d1de14e8c6f09898152438232a3b0a14336
parent188ec0d0254b5f1e6fa1f98e70f5ad0990184490 (diff)
downloadgsoc2013-evolution-768ef5c60bd59daa227910c68f4b829db480d6ac.tar.gz
gsoc2013-evolution-768ef5c60bd59daa227910c68f4b829db480d6ac.tar.zst
gsoc2013-evolution-768ef5c60bd59daa227910c68f4b829db480d6ac.zip
New function to extract a meta-tag charset value if it exists.
2001-07-11 Jeffrey Stedfast <fejj@ximian.com> * camel-mime-part-utils.c (extract_metatag_charset): New function to extract a meta-tag charset value if it exists. (simple_data_wrapper_construct_from_parser): Along the same lines as the code I previously ripped out, but this time use the mime-parser's seek ability to help us along. Currently I read up to a 2k buffer size - this is probably overkill, 1k is probably plenty. * camel-mime-utils.c (html_meta_param_list_decode): When we get to an `=', we must skip past it before trying to grab the param value. duh. svn path=/trunk/; revision=11021
-rw-r--r--camel/ChangeLog14
-rw-r--r--camel/camel-mime-filter-charset.c4
-rw-r--r--camel/camel-mime-part-utils.c123
-rw-r--r--camel/camel-mime-utils.c4
4 files changed, 119 insertions, 26 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog
index 762b025d04..b7914e1745 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,5 +1,19 @@
2001-07-11 Jeffrey Stedfast <fejj@ximian.com>
+ * camel-mime-part-utils.c (extract_metatag_charset): New function
+ to extract a meta-tag charset value if it exists.
+ (simple_data_wrapper_construct_from_parser): Along the same lines
+ as the code I previously ripped out, but this time use the
+ mime-parser's seek ability to help us along. Currently I read up
+ to a 2k buffer size - this is probably overkill, 1k is probably
+ plenty.
+
+ * camel-mime-utils.c (html_meta_param_list_decode): When we get to
+ an `=', we must skip past it before trying to grab the param
+ value. duh.
+
+2001-07-11 Jeffrey Stedfast <fejj@ximian.com>
+
* camel-mime-part-utils.c
(simple_data_wrapper_construct_from_parser): Ripped out my code
since it was never being used since the mime parser is not using a
diff --git a/camel/camel-mime-filter-charset.c b/camel/camel-mime-filter-charset.c
index 34c80c50f9..808e4064a7 100644
--- a/camel/camel-mime-filter-charset.c
+++ b/camel/camel-mime-filter-charset.c
@@ -221,14 +221,14 @@ camel_mime_filter_charset_init (CamelMimeFilterCharset *obj)
CamelMimeFilterCharset *
camel_mime_filter_charset_new (void)
{
- CamelMimeFilterCharset *new = CAMEL_MIME_FILTER_CHARSET ( camel_object_new (camel_mime_filter_charset_get_type ()));
+ CamelMimeFilterCharset *new = CAMEL_MIME_FILTER_CHARSET (camel_object_new (camel_mime_filter_charset_get_type ()));
return new;
}
CamelMimeFilterCharset *
camel_mime_filter_charset_new_convert(const char *from_charset, const char *to_charset)
{
- CamelMimeFilterCharset *new = CAMEL_MIME_FILTER_CHARSET ( camel_object_new (camel_mime_filter_charset_get_type ()));
+ CamelMimeFilterCharset *new = CAMEL_MIME_FILTER_CHARSET (camel_object_new (camel_mime_filter_charset_get_type ()));
new->ic = iconv_open(to_charset, from_charset);
if (new->ic == (iconv_t) -1) {
diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c
index adf1e99dc8..3bec8bfb3c 100644
--- a/camel/camel-mime-part-utils.c
+++ b/camel/camel-mime-part-utils.c
@@ -44,6 +44,61 @@
#define d(x) /*(printf("%s(%d): ", __FILE__, __LINE__),(x))*/
+static char *
+extract_metatag_charset (GByteArray *buffer)
+{
+ /* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
+ const char *slashhead, *data;
+ char *charset = NULL;
+
+ data = buffer->data;
+
+ slashhead = strstrcase (data, "</head");
+ if (!slashhead)
+ slashhead = data + buffer->len;
+
+ /* Yea, this is ugly */
+ while (data < slashhead) {
+ struct _header_param *params;
+ const char *meta, *metaend;
+ const char *val;
+
+ meta = strstrcase (data, "<meta");
+ if (!meta)
+ break;
+
+ metaend = strchr (meta, '>');
+ if (!metaend)
+ metaend = slashhead;
+ else
+ metaend++;
+
+ params = html_meta_param_list_decode (meta, metaend - meta);
+ if (params) {
+ val = header_param (params, "http-equiv");
+ if (val && !g_strcasecmp (val, "Content-Type")) {
+ struct _header_content_type *content_type;
+
+ val = header_param (params, "content");
+ content_type = header_content_type_decode (val);
+ charset = g_strdup (header_content_type_param (content_type, "charset"));
+
+ header_content_type_unref (content_type);
+ }
+
+ header_param_list_free (params);
+
+ /* break as soon as we find a charset */
+ if (charset)
+ break;
+ }
+
+ data = metaend;
+ }
+
+ return charset;
+}
+
/* simple data wrapper */
static void
simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp)
@@ -91,6 +146,7 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
ct = camel_mime_parser_content_type (mp);
if (header_content_type_is (ct, "text", "*")) {
const char *charset = header_content_type_param (ct, "charset");
+ char *acharset = NULL; /* to be alloca'd on demand */
if (fdec) {
d(printf("Adding CRLF conversion filter\n"));
@@ -99,6 +155,28 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
crlfid = camel_mime_parser_filter_add (mp, fcrlf);
}
+ /* Possible Lame Mailer Alert... check the META tags for a charset */
+ if (!charset && header_content_type_is (ct, "text", "html")) {
+ GByteArray *bytes;
+ const char *buf;
+ off_t offset;
+ int len;
+
+ offset = camel_mime_parser_tell (mp);
+ /* if we can't find the charset within the first 2k, we ain't gonna find it */
+ len = camel_mime_parser_read (mp, &buf, 2048);
+ camel_mime_parser_seek (mp, offset, SEEK_SET);
+
+ /* we only do this because we need it to be null terminated */
+ bytes = g_byte_array_new ();
+ g_byte_array_append (bytes, buf, len);
+ g_byte_array_append (bytes, "", 1);
+
+ acharset = extract_metatag_charset (bytes);
+ charset = acharset;
+ g_byte_array_free (bytes, TRUE);
+ }
+
/* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */
if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) {
d(printf("Adding conversion filter from %s to UTF-8\n", charset));
@@ -109,6 +187,8 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset);
}
}
+
+ g_free (acharset);
}
buffer = g_byte_array_new ();
@@ -193,47 +273,45 @@ camel_mime_part_construct_content_from_parser (CamelMimePart *dw, CamelMimeParse
CamelDataWrapper *content = NULL;
char *buf;
int len;
-
- printf ("camel_mime_part_construct_content_from_parser()\n");
- switch (camel_mime_parser_state(mp)) {
+ switch (camel_mime_parser_state (mp)) {
case HSCAN_HEADER:
d(printf("Creating body part\n"));
- content = camel_data_wrapper_new();
- simple_data_wrapper_construct_from_parser(content, mp);
+ content = camel_data_wrapper_new ();
+ simple_data_wrapper_construct_from_parser (content, mp);
break;
case HSCAN_MESSAGE:
d(printf("Creating message part\n"));
- content = (CamelDataWrapper *)camel_mime_message_new();
- camel_mime_part_construct_from_parser((CamelMimePart *)content, mp);
+ content = (CamelDataWrapper *) camel_mime_message_new ();
+ camel_mime_part_construct_from_parser ((CamelMimePart *)content, mp);
break;
case HSCAN_MULTIPART: {
CamelDataWrapper *bodypart;
-
+
#ifndef NO_WARNINGS
#warning This should use a camel-mime-multipart
#endif
d(printf("Creating multi-part\n"));
- content = (CamelDataWrapper *)camel_multipart_new();
-
+ content = (CamelDataWrapper *)camel_multipart_new ();
+
/* FIXME: use the real boundary? */
- camel_multipart_set_boundary((CamelMultipart *)content, NULL);
- while (camel_mime_parser_step(mp, &buf, &len) != HSCAN_MULTIPART_END) {
- camel_mime_parser_unstep(mp);
- bodypart = (CamelDataWrapper *)camel_mime_part_new();
- camel_mime_part_construct_from_parser((CamelMimePart *)bodypart, mp);
- camel_multipart_add_part((CamelMultipart *)content, (CamelMimePart *)bodypart);
+ camel_multipart_set_boundary ((CamelMultipart *)content, NULL);
+ while (camel_mime_parser_step (mp, &buf, &len) != HSCAN_MULTIPART_END) {
+ camel_mime_parser_unstep (mp);
+ bodypart = (CamelDataWrapper *)camel_mime_part_new ();
+ camel_mime_part_construct_from_parser ((CamelMimePart *)bodypart, mp);
+ camel_multipart_add_part ((CamelMultipart *)content, (CamelMimePart *)bodypart);
camel_object_unref ((CamelObject *)bodypart);
}
-
+
/* these are only return valid data in the MULTIPART_END state */
- camel_multipart_set_preface((CamelMultipart *)content, camel_mime_parser_preface(mp));
- camel_multipart_set_postface((CamelMultipart *)content, camel_mime_parser_postface(mp));
-
+ camel_multipart_set_preface ((CamelMultipart *)content, camel_mime_parser_preface (mp));
+ camel_multipart_set_postface ((CamelMultipart *)content, camel_mime_parser_postface (mp));
+
d(printf("Created multi-part\n"));
break; }
default:
- g_warning("Invalid state encountered???: %d", camel_mime_parser_state(mp));
+ g_warning("Invalid state encountered???: %d", camel_mime_parser_state (mp));
}
if (content) {
#ifndef NO_WARNINGS
@@ -242,8 +320,7 @@ camel_mime_part_construct_content_from_parser (CamelMimePart *dw, CamelMimeParse
/* would you believe you have to set this BEFORE you set the content object??? oh my god !!!! */
camel_data_wrapper_set_mime_type_field (content,
camel_mime_part_get_content_type ((CamelMimePart *)dw));
- camel_medium_set_content_object((CamelMedium *)dw, content);
+ camel_medium_set_content_object ((CamelMedium *)dw, content);
camel_object_unref ((CamelObject *)content);
}
}
-
diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c
index b87824e862..eb228cd748 100644
--- a/camel/camel-mime-utils.c
+++ b/camel/camel-mime-utils.c
@@ -1685,7 +1685,7 @@ header_decode_quoted_string(const char **in)
}
*outptr++ = c;
}
- *outptr = 0;
+ *outptr = '\0';
}
*in = inptr;
return out;
@@ -2723,6 +2723,7 @@ header_param_list_decode(const char *in)
struct _header_param *
html_meta_param_list_decode (const char *in, int inlen)
{
+ /* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
struct _header_param *params = NULL, *last = NULL;
const char *inptr, *inend;
@@ -2753,6 +2754,7 @@ html_meta_param_list_decode (const char *in, int inlen)
break;
}
+ inptr++;
value = header_decode_value (&inptr);
header_decode_lwsp (&inptr);