aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-mime-part-utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-mime-part-utils.c')
-rw-r--r--camel/camel-mime-part-utils.c390
1 files changed, 0 insertions, 390 deletions
diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c
deleted file mode 100644
index 987e5e77d7..0000000000
--- a/camel/camel-mime-part-utils.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; fill-column: 160 -*- */
-/* camel-mime-part-utils : Utility for mime parsing and so on
- *
- * Authors: Bertrand Guiheneuf <bertrand@helixcode.com>
- * Michael Zucchi <notzed@ximian.com>
- * Jeffrey Stedfast <fejj@ximian.com>
- *
- * Copyright 1999, 2000 Ximian, Inc. (www.ximian.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-
-#include <gal/util/e-iconv.h>
-#include <gal/unicode/gunicode.h>
-
-#include "string-utils.h"
-#include "camel-mime-part-utils.h"
-#include "camel-mime-message.h"
-#include "camel-multipart.h"
-#include "camel-seekable-substream.h"
-#include "camel-stream-fs.h"
-#include "camel-stream-filter.h"
-#include "camel-stream-mem.h"
-#include "camel-mime-filter-basic.h"
-#include "camel-mime-filter-charset.h"
-#include "camel-mime-filter-crlf.h"
-#include "camel-mime-filter-save.h"
-#include "camel-html-parser.h"
-#include "camel-charset-map.h"
-
-#define d(x) /*(printf("%s(%d): ", __FILE__, __LINE__),(x))*/
-
-/* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
-
-static const char *
-check_html_charset(char *buffer, int length)
-{
- CamelHTMLParser *hp;
- const char *charset = NULL;
- camel_html_parser_t state;
- struct _header_content_type *ct;
-
- /* if we need to first base64/qp decode, do this here, sigh */
- hp = camel_html_parser_new();
- camel_html_parser_set_data(hp, buffer, length, TRUE);
-
- do {
- const char *data;
- int len;
- const char *val;
-
- state = camel_html_parser_step(hp, &data, &len);
-
- /* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
-
- switch(state) {
- case CAMEL_HTML_PARSER_ELEMENT:
- val = camel_html_parser_tag(hp);
- d(printf("Got tag: %s\n", val));
- if (g_strcasecmp(val, "meta") == 0
- && (val = camel_html_parser_attr(hp, "http-equiv"))
- && g_strcasecmp(val, "content-type") == 0
- && (val = camel_html_parser_attr(hp, "content"))
- && (ct = header_content_type_decode(val))) {
- charset = header_content_type_param(ct, "charset");
- charset = e_iconv_charset_name (charset);
- header_content_type_unref(ct);
- }
- break;
- default:
- /* ignore everything else */
- break;
- }
- } while (charset == NULL && state != CAMEL_HTML_PARSER_EOF);
-
- camel_object_unref((CamelObject *)hp);
-
- return charset;
-}
-
-static GByteArray *
-convert_buffer (GByteArray *in, const char *to, const char *from)
-{
- size_t inleft, outleft, outlen, converted = 0;
- GByteArray *out = NULL;
- const char *inbuf;
- char *outbuf;
- iconv_t cd;
-
- if (in->len == 0)
- return g_byte_array_new();
-
- d(printf("converting buffer from %s to %s: '%.*s'\n", from, to, (int)in->len, in->data));
-
- cd = e_iconv_open(to, from);
- if (cd == (iconv_t) -1) {
- g_warning ("Cannot convert from '%s' to '%s': %s", from, to, g_strerror (errno));
- return NULL;
- }
-
- outlen = in->len * 2 + 16;
- out = g_byte_array_new ();
- g_byte_array_set_size (out, outlen);
-
- inbuf = in->data;
- inleft = in->len;
-
- do {
- outbuf = out->data + converted;
- outleft = outlen - converted;
-
- converted = e_iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
- if (converted == (size_t) -1) {
- if (errno != E2BIG && errno != EINVAL)
- goto fail;
- }
-
- /*
- * E2BIG There is not sufficient room at *outbuf.
- *
- * We just need to grow our outbuffer and try again.
- */
-
- converted = outlen - outleft;
- if (errno == E2BIG) {
- outlen += inleft * 2 + 16;
- out = g_byte_array_set_size (out, outlen);
- outbuf = out->data + converted;
- }
-
- } while (errno == E2BIG && inleft > 0);
-
- /*
- * EINVAL An incomplete multibyte sequence has been encoun­
- * tered in the input.
- *
- * We'll just have to ignore it...
- */
-
- /* flush the iconv conversion */
- e_iconv (cd, NULL, NULL, &outbuf, &outleft);
-
- /* now set the true length on the GByteArray */
- converted = outlen - outleft;
- g_byte_array_set_size (out, converted);
-
- e_iconv_close (cd);
-
- return out;
-
- fail:
- g_warning ("Cannot convert from '%s' to '%s': %s", from, to, g_strerror (errno));
-
- g_byte_array_free (out, TRUE);
-
- e_iconv_close (cd);
-
- return NULL;
-}
-
-/* We don't really use the charset argument except for debugging... */
-static gboolean
-broken_windows_charset (GByteArray *buffer, const char *charset)
-{
- register unsigned char *inptr;
- unsigned char *inend;
-
- inptr = buffer->data;
- inend = inptr + buffer->len;
-
- while (inptr < inend) {
- register unsigned char c = *inptr++;
-
- if (c >= 128 && c <= 159) {
- g_warning ("Encountered Windows charset parading as %s", charset);
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-static gboolean
-is_7bit (GByteArray *buffer)
-{
- register unsigned int i;
-
- for (i = 0; i < buffer->len; i++)
- if (buffer->data[i] > 127)
- return FALSE;
-
- return TRUE;
-}
-
-/* simple data wrapper */
-static void
-simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp)
-{
- CamelMimeFilter *fdec = NULL, *fcrlf = NULL;
- CamelMimeFilterBasicType enctype = 0;
- int len, decid = -1, crlfid = -1;
- struct _header_content_type *ct;
- const char *charset = NULL;
- GByteArray *buffer;
- char *encoding, *buf;
- CamelStream *mem;
-
- d(printf ("simple_data_wrapper_construct_from_parser()\n"));
-
- /* first, work out conversion, if any, required, we dont care about what we dont know about */
- encoding = header_content_encoding_decode (camel_mime_parser_header (mp, "Content-Transfer-Encoding", NULL));
- if (encoding) {
- if (!strcasecmp (encoding, "base64")) {
- d(printf("Adding base64 decoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_BASE64_DEC;
- } else if (!strcasecmp (encoding, "quoted-printable")) {
- d(printf("Adding quoted-printable decoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_QP_DEC;
- } else if (!strcasecmp (encoding, "x-uuencode")) {
- d(printf("Adding uudecoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_UU_DEC;
- }
- g_free (encoding);
-
- if (enctype != 0) {
- fdec = (CamelMimeFilter *)camel_mime_filter_basic_new_type(enctype);
- decid = camel_mime_parser_filter_add (mp, fdec);
- }
- }
-
- /* If we're doing text, we also need to do CRLF->LF and may have to convert it to UTF8 as well. */
- ct = camel_mime_parser_content_type(mp);
- if (header_content_type_is(ct, "text", "*")) {
- charset = header_content_type_param(ct, "charset");
- charset = e_iconv_charset_name(charset);
-
- if (fdec) {
- d(printf("Adding CRLF conversion filter\n"));
- fcrlf = (CamelMimeFilter *)camel_mime_filter_crlf_new(CAMEL_MIME_FILTER_CRLF_DECODE,
- CAMEL_MIME_FILTER_CRLF_MODE_CRLF_ONLY);
- crlfid = camel_mime_parser_filter_add(mp, fcrlf);
- }
- }
-
- /* read in the entire content */
- buffer = g_byte_array_new();
- while (camel_mime_parser_step(mp, &buf, &len) != HSCAN_BODY_END) {
- d(printf("appending o/p data: %d: %.*s\n", len, len, buf));
- g_byte_array_append(buffer, buf, len);
- }
-
- /* Possible Lame Mailer Alert... check the META tags for a charset */
- if (!charset && header_content_type_is (ct, "text", "html"))
- charset = check_html_charset(buffer->data, buffer->len);
-
- /* if we need to do charset conversion, see if we can/it works/etc */
- if (charset && !(strcasecmp (charset, "us-ascii") == 0
- || strcasecmp (charset, "utf-8") == 0
- || strncasecmp (charset, "x-", 2) == 0)) {
- GByteArray *out;
-
- /* You often see Microsoft Windows users announcing their texts
- * as being in ISO-8859-1 even when in fact they contain funny
- * characters from the Windows-CP1252 superset.
- */
- if (!strncasecmp (charset, "iso-8859", 8)) {
- /* check for Windows-specific chars... */
- if (broken_windows_charset (buffer, charset)) {
- charset = camel_charset_iso_to_windows (charset);
- charset = e_iconv_charset_name (charset);
- }
- }
-
- out = convert_buffer (buffer, "UTF-8", charset);
- if (out) {
- /* converted ok, use this data instead */
- g_byte_array_free(buffer, TRUE);
- buffer = out;
- } else {
- /* else failed to convert, leave as raw? */
- g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);
- dw->rawtext = TRUE;
- }
- } else if (header_content_type_is (ct, "text", "*")) {
- if (charset == NULL) {
- /* check that it's 7bit */
- dw->rawtext = !is_7bit (buffer);
- } else if (!strncasecmp (charset, "x-", 2)) {
- /* we're not even going to bother trying to convert, so set the
- rawtext bit to TRUE and let the mailer deal with it. */
- dw->rawtext = TRUE;
- } else if (!strcasecmp (charset, "utf-8")) {
- /* check that it is valid utf8 */
- dw->rawtext = !g_utf8_validate (buffer->data, buffer->len, NULL);
- }
- }
-
- d(printf("message part kept in memory!\n"));
-
- mem = camel_stream_mem_new_with_byte_array(buffer);
- camel_data_wrapper_construct_from_stream(dw, mem);
- camel_object_unref((CamelObject *)mem);
-
- camel_mime_parser_filter_remove(mp, decid);
- camel_mime_parser_filter_remove(mp, crlfid);
-
- if (fdec)
- camel_object_unref((CamelObject *)fdec);
- if (fcrlf)
- camel_object_unref((CamelObject *)fcrlf);
-}
-
-/* This replaces the data wrapper repository ... and/or could be replaced by it? */
-void
-camel_mime_part_construct_content_from_parser (CamelMimePart *dw, CamelMimeParser *mp)
-{
- CamelDataWrapper *content = NULL;
- char *buf;
- int len;
-
- switch (camel_mime_parser_state (mp)) {
- case HSCAN_HEADER:
- d(printf("Creating body part\n"));
- content = camel_data_wrapper_new ();
- simple_data_wrapper_construct_from_parser (content, mp);
- break;
- case HSCAN_MESSAGE:
- d(printf("Creating message part\n"));
- content = (CamelDataWrapper *) camel_mime_message_new ();
- camel_mime_part_construct_from_parser ((CamelMimePart *)content, mp);
- break;
- case HSCAN_MULTIPART: {
- struct _header_content_type *content_type;
- CamelDataWrapper *bodypart;
-
- /* FIXME: we should use a came-mime-mutlipart, not jsut a camel-multipart, but who cares */
- d(printf("Creating multi-part\n"));
-
- content = (CamelDataWrapper *)camel_multipart_new ();
-
- content_type = camel_mime_parser_content_type (mp);
- camel_multipart_set_boundary ((CamelMultipart *)content,
- header_content_type_param (content_type, "boundary"));
-
- while (camel_mime_parser_step (mp, &buf, &len) != HSCAN_MULTIPART_END) {
- camel_mime_parser_unstep (mp);
- bodypart = (CamelDataWrapper *)camel_mime_part_new ();
- camel_mime_part_construct_from_parser ((CamelMimePart *)bodypart, mp);
- camel_multipart_add_part ((CamelMultipart *)content, (CamelMimePart *)bodypart);
- camel_object_unref ((CamelObject *)bodypart);
- }
-
- /* these are only return valid data in the MULTIPART_END state */
- camel_multipart_set_preface ((CamelMultipart *)content, camel_mime_parser_preface (mp));
- camel_multipart_set_postface ((CamelMultipart *)content, camel_mime_parser_postface (mp));
-
- d(printf("Created multi-part\n"));
- break; }
- default:
- g_warning("Invalid state encountered???: %d", camel_mime_parser_state (mp));
- }
- if (content) {
- /* would you believe you have to set this BEFORE you set the content object??? oh my god !!!! */
- camel_data_wrapper_set_mime_type_field (content,
- camel_mime_part_get_content_type ((CamelMimePart *)dw));
- camel_medium_set_content_object ((CamelMedium *)dw, content);
- camel_object_unref ((CamelObject *)content);
- }
-}