diff options
Diffstat (limited to 'camel/camel-mime-parser.c')
-rw-r--r-- | camel/camel-mime-parser.c | 1930 |
1 files changed, 1930 insertions, 0 deletions
diff --git a/camel/camel-mime-parser.c b/camel/camel-mime-parser.c new file mode 100644 index 0000000000..ddf1b6b4a9 --- /dev/null +++ b/camel/camel-mime-parser.c @@ -0,0 +1,1930 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003 Ximian Inc. + * + * Authors: Michael Zucchi <notzed@ximian.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* What should hopefully be a fast mail parser */ + +/* Do not change this code without asking me (Michael Zucchi) first + + There is almost always a reason something was done a certain way. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include <string.h> + +#include <stdio.h> +#include <errno.h> + +#include <regex.h> +#include <ctype.h> + +#include <glib.h> +#include "camel-mime-parser.h" +#include "camel-mime-utils.h" +#include "camel-mime-filter.h" +#include "camel-stream.h" +#include "camel-seekable-stream.h" + +#include "e-util/e-memory.h" + +#define r(x) +#define h(x) +#define c(x) +#define d(x) + +/*#define PRESERVE_HEADERS*/ + +/*#define PURIFY*/ + +#define MEMPOOL + +#ifdef PURIFY +int inend_id = -1, + inbuffer_id = -1; +#endif + +#define SCAN_BUF 4096 /* size of read buffer */ +#define SCAN_HEAD 128 /* headroom guaranteed to be before each read buffer */ + +/* a little hacky, but i couldn't be bothered renaming everything */ +#define _header_scan_state _CamelMimeParserPrivate +#define _PRIVATE(o) (((CamelMimeParser *)(o))->priv) + +struct _header_scan_state { + + /* global state */ + + enum _camel_mime_parser_state state; + + /* for building headers during scanning */ + char *outbuf; + char *outptr; + char *outend; + + int fd; /* input for a fd input */ + CamelStream *stream; /* or for a stream */ + + int ioerrno; /* io error state */ + + /* for scanning input buffers */ + char *realbuf; /* the real buffer, SCAN_HEAD*2 + SCAN_BUF bytes */ + char *inbuf; /* points to a subset of the allocated memory, the underflow */ + char *inptr; /* (upto SCAN_HEAD) is for use by filters so they dont copy all data */ + char *inend; + + int atleast; + + off_t seek; /* current offset to start of buffer */ + int unstep; /* how many states to 'unstep' (repeat the current state) */ + + unsigned int midline:1; /* are we mid-line interrupted? */ + unsigned int scan_from:1; /* do we care about From lines? */ + unsigned int scan_pre_from:1; /* do we return pre-from data? */ + unsigned int eof:1; /* reached eof? */ + + off_t start_of_from; /* where from started */ + off_t start_of_boundary; /* where the last boundary started */ + off_t start_of_headers; /* where headers started from the last scan */ + + off_t header_start; /* start of last header, or -1 */ + + /* filters to apply to all content before output */ + int filterid; /* id of next filter */ + struct _header_scan_filter *filters; + + /* per message/part info */ + struct _header_scan_stack *parts; + +}; + +struct _header_scan_stack { + struct _header_scan_stack *parent; + + enum _camel_mime_parser_state savestate; /* state at invocation of this part */ + +#ifdef MEMPOOL + EMemPool *pool; /* memory pool to keep track of headers/etc at this level */ +#endif + struct _camel_header_raw *headers; /* headers for this part */ + + CamelContentType *content_type; + + /* I dont use GString's casue you can't efficiently append a buffer to them */ + GByteArray *pretext; /* for multipart types, save the pre-boundary data here */ + GByteArray *posttext; /* for multipart types, save the post-boundary data here */ + int prestage; /* used to determine if it is a pre-boundary or post-boundary data segment */ + + GByteArray *from_line; /* the from line */ + + char *boundary; /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */ + int boundarylen; /* actual length of boundary, including leading -- if there is one */ + int boundarylenfinal; /* length of boundary, including trailing -- if there is one */ + int atleast; /* the biggest boundary from here to the parent */ +}; + +struct _header_scan_filter { + struct _header_scan_filter *next; + int id; + CamelMimeFilter *filter; +}; + +static void folder_scan_step(struct _header_scan_state *s, char **databuffer, size_t *datalength); +static void folder_scan_drop_step(struct _header_scan_state *s); +static int folder_scan_init_with_fd(struct _header_scan_state *s, int fd); +static int folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream); +static struct _header_scan_state *folder_scan_init(void); +static void folder_scan_close(struct _header_scan_state *s); +static struct _header_scan_stack *folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, size_t *length); +static struct _header_scan_stack *folder_scan_header(struct _header_scan_state *s, int *lastone); +static int folder_scan_skip_line(struct _header_scan_state *s, GByteArray *save); +static off_t folder_seek(struct _header_scan_state *s, off_t offset, int whence); +static off_t folder_tell(struct _header_scan_state *s); +static int folder_read(struct _header_scan_state *s); +static void folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h); + +#ifdef MEMPOOL +static void header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h, char *header, int offset); +#endif + +static void camel_mime_parser_class_init (CamelMimeParserClass *klass); +static void camel_mime_parser_init (CamelMimeParser *obj); + +#if d(!)0 +static char *states[] = { + "CAMEL_MIME_PARSER_STATE_INITIAL", + "CAMEL_MIME_PARSER_STATE_PRE_FROM", /* pre-from data */ + "CAMEL_MIME_PARSER_STATE_FROM", /* got 'From' line */ + "CAMEL_MIME_PARSER_STATE_HEADER", /* toplevel header */ + "CAMEL_MIME_PARSER_STATE_BODY", /* scanning body of message */ + "CAMEL_MIME_PARSER_STATE_MULTIPART", /* got multipart header */ + "CAMEL_MIME_PARSER_STATE_MESSAGE", /* rfc822/news message */ + + "CAMEL_MIME_PARSER_STATE_PART", /* part of a multipart */ + + "CAMEL_MIME_PARSER_STATE_EOF", /* end of file */ + "CAMEL_MIME_PARSER_STATE_PRE_FROM_END", + "CAMEL_MIME_PARSER_STATE_FROM_END", + "CAMEL_MIME_PARSER_STATE_HEAER_END", + "CAMEL_MIME_PARSER_STATE_BODY_END", + "CAMEL_MIME_PARSER_STATE_MULTIPART_END", + "CAMEL_MIME_PARSER_STATE_MESSAGE_END", +}; +#endif + +static CamelObjectClass *camel_mime_parser_parent; + +static void +camel_mime_parser_class_init (CamelMimeParserClass *klass) +{ + camel_mime_parser_parent = camel_type_get_global_classfuncs (camel_object_get_type ()); +} + +static void +camel_mime_parser_init (CamelMimeParser *obj) +{ + struct _header_scan_state *s; + + s = folder_scan_init(); + _PRIVATE(obj) = s; +} + +static void +camel_mime_parser_finalise(CamelObject *o) +{ + struct _header_scan_state *s = _PRIVATE(o); +#ifdef PURIFY + purify_watch_remove_all(); +#endif + folder_scan_close(s); +} + +CamelType +camel_mime_parser_get_type (void) +{ + static CamelType type = CAMEL_INVALID_TYPE; + + if (type == CAMEL_INVALID_TYPE) { + type = camel_type_register (camel_object_get_type (), "CamelMimeParser", + sizeof (CamelMimeParser), + sizeof (CamelMimeParserClass), + (CamelObjectClassInitFunc) camel_mime_parser_class_init, + NULL, + (CamelObjectInitFunc) camel_mime_parser_init, + (CamelObjectFinalizeFunc) camel_mime_parser_finalise); + } + + return type; +} + +/** + * camel_mime_parser_new: + * + * Create a new CamelMimeParser object. + * + * Return value: A new CamelMimeParser widget. + **/ +CamelMimeParser * +camel_mime_parser_new (void) +{ + CamelMimeParser *new = CAMEL_MIME_PARSER ( camel_object_new (camel_mime_parser_get_type ())); + return new; +} + + +/** + * camel_mime_parser_filter_add: + * @m: + * @mf: + * + * Add a filter that will be applied to any body content before it is passed + * to the caller. Filters may be pipelined to perform multi-pass operations + * on the content, and are applied in the order they were added. + * + * Note that filters are only applied to the body content of messages, and once + * a filter has been set, all content returned by a filter_step() with a state + * of CAMEL_MIME_PARSER_STATE_BODY will have passed through the filter. + * + * Return value: An id that may be passed to filter_remove() to remove + * the filter, or -1 if the operation failed. + **/ +int +camel_mime_parser_filter_add(CamelMimeParser *m, CamelMimeFilter *mf) +{ + struct _header_scan_state *s = _PRIVATE(m); + struct _header_scan_filter *f, *new; + + new = g_malloc(sizeof(*new)); + new->filter = mf; + new->id = s->filterid++; + if (s->filterid == -1) + s->filterid++; + new->next = 0; + camel_object_ref((CamelObject *)mf); + + /* yes, this is correct, since 'next' is the first element of the struct */ + f = (struct _header_scan_filter *)&s->filters; + while (f->next) + f = f->next; + f->next = new; + return new->id; +} + +/** + * camel_mime_parser_filter_remove: + * @m: + * @id: + * + * Remove a processing filter from the pipeline. There is no + * restriction on the order the filters can be removed. + **/ +void +camel_mime_parser_filter_remove(CamelMimeParser *m, int id) +{ + struct _header_scan_state *s = _PRIVATE(m); + struct _header_scan_filter *f, *old; + + f = (struct _header_scan_filter *)&s->filters; + while (f && f->next) { + old = f->next; + if (old->id == id) { + camel_object_unref((CamelObject *)old->filter); + f->next = old->next; + g_free(old); + /* there should only be a single matching id, but + scan the whole lot anyway */ + } + f = f->next; + } +} + +/** + * camel_mime_parser_header: + * @m: + * @name: Name of header. + * @offset: Pointer that can receive the offset of the header in + * the stream from the start of parsing. + * + * Lookup a header by name. + * + * Return value: The header value, or NULL if the header is not + * defined. + **/ +const char * +camel_mime_parser_header(CamelMimeParser *m, const char *name, int *offset) +{ + struct _header_scan_state *s = _PRIVATE(m); + + if (s->parts && + s->parts->headers) { + return camel_header_raw_find(&s->parts->headers, name, offset); + } + return NULL; +} + +/** + * camel_mime_parser_headers_raw: + * @m: + * + * Get the list of the raw headers which are defined for the + * current state of the parser. These headers are valid + * until the next call to parser_step(), or parser_drop_step(). + * + * Return value: The raw headers, or NULL if there are no headers + * defined for the current part or state. These are READ ONLY. + **/ +struct _camel_header_raw * +camel_mime_parser_headers_raw(CamelMimeParser *m) +{ + struct _header_scan_state *s = _PRIVATE(m); + + if (s->parts) + return s->parts->headers; + return NULL; +} + +static const char * +byte_array_to_string(GByteArray *array) +{ + if (array == NULL) + return NULL; + + if (array->len == 0 || array->data[array->len-1] != '\0') + g_byte_array_append(array, "", 1); + + return array->data; +} + +/** + * camel_mime_parser_preface: + * @m: + * + * Retrieve the preface text for the current multipart. + * Can only be used when the state is CAMEL_MIME_PARSER_STATE_MULTIPART_END. + * + * Return value: The preface text, or NULL if there wasn't any. + **/ +const char * +camel_mime_parser_preface(CamelMimeParser *m) +{ + struct _header_scan_state *s = _PRIVATE(m); + + if (s->parts) + return byte_array_to_string(s->parts->pretext); + + return NULL; +} + +/** + * camel_mime_parser_postface: + * @m: + * + * Retrieve the postface text for the current multipart. + * Only returns valid data when the current state if + * CAMEL_MIME_PARSER_STATE_MULTIPART_END. + * + * Return value: The postface text, or NULL if there wasn't any. + **/ +const char * +camel_mime_parser_postface(CamelMimeParser *m) +{ + struct _header_scan_state *s = _PRIVATE(m); + + if (s->parts) + return byte_array_to_string(s->parts->posttext); + + return NULL; +} + +/** + * camel_mime_parser_from_line: + * @m: + * + * Get the last scanned "From " line, from a recently scanned from. + * This should only be called in the CAMEL_MIME_PARSER_STATE_FROM state. The + * from line will include the closing \n found (if there was one). + * + * The return value will remain valid while in the CAMEL_MIME_PARSER_STATE_FROM + * state, or any deeper state. + * + * Return value: The From line, or NULL if called out of context. + **/ +const char * +camel_mime_parser_from_line(CamelMimeParser *m) +{ + struct _header_scan_state *s = _PRIVATE(m); + + if (s->parts) + return byte_array_to_string(s->parts->from_line); + + return NULL; +} + +/** + * camel_mime_parser_init_with_fd: + * @m: + * @fd: A valid file descriptor. + * + * Initialise the scanner with an fd. The scanner's offsets + * will be relative to the current file position of the file + * descriptor. As a result, seekable descritors should + * be seeked using the parser seek functions. + * + * Return value: Returns -1 on error. + **/ +int +camel_mime_parser_init_with_fd(CamelMimeParser *m, int fd) +{ + struct _header_scan_state *s = _PRIVATE(m); + + return folder_scan_init_with_fd(s, fd); +} + +/** + * camel_mime_parser_init_with_stream: + * @m: + * @stream: + * + * Initialise the scanner with a source stream. The scanner's + * offsets will be relative to the current file position of + * the stream. As a result, seekable streams should only + * be seeked using the parser seek function. + * + * Return value: -1 on error. + **/ +int +camel_mime_parser_init_with_stream(CamelMimeParser *m, CamelStream *stream) +{ + struct _header_scan_state *s = _PRIVATE(m); + + return folder_scan_init_with_stream(s, stream); +} + +/** + * camel_mime_parser_scan_from: + * @parser: MIME parser object + * @scan_from: #TRUE if the scanner should scan From lines. + * + * Tell the scanner if it should scan "^From " lines or not. + * + * If the scanner is scanning from lines, two additional + * states CAMEL_MIME_PARSER_STATE_FROM and CAMEL_MIME_PARSER_STATE_FROM_END will be returned + * to the caller during parsing. + * + * This may also be preceeded by an optional + * CAMEL_MIME_PARSER_STATE_PRE_FROM state which contains the scanned data + * found before the From line is encountered. See also + * scan_pre_from(). + **/ +void +camel_mime_parser_scan_from (CamelMimeParser *parser, gboolean scan_from) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + s->scan_from = scan_from; +} + +/** + * camel_mime_parser_scan_pre_from: + * @parser: MIME parser object + * @scan_pre_from: #TRUE if we want to get pre-from data. + * + * Tell the scanner whether we want to know abou the pre-from + * data during a scan. If we do, then we may get an additional + * state CAMEL_MIME_PARSER_STATE_PRE_FROM which returns the specified data. + **/ +void +camel_mime_parser_scan_pre_from (CamelMimeParser *parser, gboolean scan_pre_from) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + s->scan_pre_from = scan_pre_from; +} + +/** + * camel_mime_parser_content_type: + * @parser: MIME parser object + * + * Get the content type defined in the current part. + * + * Return value: A content_type structure, or NULL if there + * is no content-type defined for this part of state of the + * parser. + **/ +CamelContentType * +camel_mime_parser_content_type (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + /* FIXME: should this search up until it's found the 'right' + content-type? can it? */ + if (s->parts) + return s->parts->content_type; + + return NULL; +} + +/** + * camel_mime_parser_unstep: + * @parser: MIME parser object + * + * Cause the last step operation to repeat itself. If this is + * called repeated times, then the same step will be repeated + * that many times. + * + * Note that it is not possible to scan back using this function, + * only to have a way of peeking the next state. + **/ +void +camel_mime_parser_unstep (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + s->unstep++; +} + +/** + * camel_mime_parser_drop_step: + * @parser: MIME parser object + * + * Drop the last step call. This should only be used + * in conjunction with seeking of the stream as the + * stream may be in an undefined state relative to the + * state of the parser. + * + * Use this call with care. + **/ +void +camel_mime_parser_drop_step (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + s->unstep = 0; + folder_scan_drop_step(s); +} + +/** + * camel_mime_parser_step: + * @parser: MIME parser object + * @databuffer: Pointer to accept a pointer to the data + * associated with this step (if any). May be #NULL, + * in which case datalength is also ingored. + * @datalength: Pointer to accept a pointer to the data + * length associated with this step (if any). + * + * Parse the next part of the MIME message. If _unstep() + * has been called, then continue to return the same state + * for that many calls. + * + * If the step is CAMEL_MIME_PARSER_STATE_BODY then the databuffer and datalength + * pointers will be setup to point to the internal data buffer + * of the scanner and may be processed as required. Any + * filters will have already been applied to this data. + * + * Refer to the state diagram elsewhere for a full listing of + * the states an application is gauranteed to get from the + * scanner. + * + * Return value: The current new state of the parser + * is returned. + **/ +enum _camel_mime_parser_state +camel_mime_parser_step (CamelMimeParser *parser, char **databuffer, size_t *datalength) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + d(printf("OLD STATE: '%s' :\n", states[s->state])); + + if (s->unstep <= 0) { + char *dummy; + size_t dummylength; + + if (databuffer == NULL) { + databuffer = &dummy; + datalength = &dummylength; + } + + folder_scan_step(s, databuffer, datalength); + } else + s->unstep--; + + d(printf("NEW STATE: '%s' :\n", states[s->state])); + + return s->state; +} + +/** + * camel_mime_parser_read: + * @parser: MIME parser object + * @databuffer: + * @len: + * + * Read at most @len bytes from the internal mime parser buffer. + * + * Returns the address of the internal buffer in @databuffer, + * and the length of useful data. + * + * @len may be specified as INT_MAX, in which case you will + * get the full remainder of the buffer at each call. + * + * Note that no parsing of the data read through this function + * occurs, so no state changes occur, but the seek position + * is updated appropriately. + * + * Return value: The number of bytes available, or -1 on error. + **/ +int +camel_mime_parser_read (CamelMimeParser *parser, const char **databuffer, int len) +{ + struct _header_scan_state *s = _PRIVATE (parser); + int there; + + if (len == 0) + return 0; + + d(printf("parser::read() reading %d bytes\n", len)); + + there = MIN(s->inend - s->inptr, len); + d(printf("parser::read() there = %d bytes\n", there)); + if (there > 0) { + *databuffer = s->inptr; + s->inptr += there; + return there; + } + + if (folder_read(s) == -1) + return -1; + + there = MIN(s->inend - s->inptr, len); + d(printf("parser::read() had to re-read, now there = %d bytes\n", there)); + + *databuffer = s->inptr; + s->inptr += there; + + return there; +} + +/** + * camel_mime_parser_tell: + * @parser: MIME parser object + * + * Return the current scanning offset. The meaning of this + * value will depend on the current state of the parser. + * + * An incomplete listing of the states: + * + * CAMEL_MIME_PARSER_STATE_INITIAL, The start of the current message. + * CAMEL_MIME_PARSER_STATE_HEADER, CAMEL_MIME_PARSER_STATE_MESSAGE, CAMEL_MIME_PARSER_STATE_MULTIPART, the character + * position immediately after the end of the header. + * CAMEL_MIME_PARSER_STATE_BODY, Position within the message of the start + * of the current data block. + * CAMEL_MIME_PARSER_STATE_*_END, The position of the character starting + * the next section of the scan (the last position + 1 of + * the respective current state). + * + * Return value: See above. + **/ +off_t +camel_mime_parser_tell (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return folder_tell(s); +} + +/** + * camel_mime_parser_tell_start_headers: + * @parser: MIME parser object + * + * Find out the position within the file of where the + * headers started, this is cached by the parser + * at the time. + * + * Return value: The header start position, or -1 if + * no headers were scanned in the current state. + **/ +off_t +camel_mime_parser_tell_start_headers (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return s->start_of_headers; +} + +/** + * camel_mime_parser_tell_start_from: + * @parser: MIME parser object + * + * If the parser is scanning From lines, then this returns + * the position of the start of the From line. + * + * Return value: The start of the from line, or -1 if there + * was no From line, or From lines are not being scanned. + **/ +off_t +camel_mime_parser_tell_start_from (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return s->start_of_from; +} + +/** + * camel_mime_parser_tell_start_boundary: + * @parser: MIME parser object + * + * When parsing a multipart, this returns the start of the last + * boundary. + * + * Return value: The start of the boundary, or -1 if there + * was no boundary encountered yet. + **/ +off_t +camel_mime_parser_tell_start_boundary(CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return s->start_of_boundary; +} + +/** + * camel_mime_parser_seek: + * @parser: MIME parser object + * @offset: Number of bytes to offset the seek by. + * @whence: SEEK_SET, SEEK_CUR, SEEK_END + * + * Reset the source position to a known value. + * + * Note that if the source stream/descriptor was not + * positioned at 0 to begin with, and an absolute seek + * is specified (whence != SEEK_CUR), then the seek + * position may not match the desired seek position. + * + * Return value: The new seek offset, or -1 on + * an error (for example, trying to seek on a non-seekable + * stream or file descriptor). + **/ +off_t +camel_mime_parser_seek(CamelMimeParser *parser, off_t offset, int whence) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return folder_seek(s, offset, whence); +} + +/** + * camel_mime_parser_state: + * @parser: MIME parser object + * + * Get the current parser state. + * + * Return value: The current parser state. + **/ +enum _camel_mime_parser_state +camel_mime_parser_state (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return s->state; +} + +/** + * camel_mime_parser_push_state: + * @mp: MIME parser object + * @newstate: New state + * @boundary: Boundary marker for state. + * + * Pre-load a new parser state. Used to post-parse multipart content + * without headers. + **/ +void +camel_mime_parser_push_state(CamelMimeParser *mp, enum _camel_mime_parser_state newstate, const char *boundary) +{ + struct _header_scan_stack *h; + struct _header_scan_state *s = _PRIVATE(mp); + + h = g_malloc0(sizeof(*h)); + h->boundarylen = strlen(boundary)+2; + h->boundarylenfinal = h->boundarylen+2; + h->boundary = g_malloc(h->boundarylen+3); + sprintf(h->boundary, "--%s--", boundary); + folder_push_part(s, h); + s->state = newstate; +} + +/** + * camel_mime_parser_stream: + * @parser: MIME parser object + * + * Get the stream, if any, the parser has been initialised + * with. May be used to setup sub-streams, but should not + * be read from directly (without saving and restoring + * the seek position in between). + * + * Return value: The stream from _init_with_stream(), or NULL + * if the parser is reading from a file descriptor or is + * uninitialised. + **/ +CamelStream * +camel_mime_parser_stream (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return s->stream; +} + +/** + * camel_mime_parser_fd: + * @parser: MIME parser object + * + * Return the file descriptor, if any, the parser has been + * initialised with. + * + * Should not be read from unless the parser it to terminate, + * or the seek offset can be reset before the next parse + * step. + * + * Return value: The file descriptor or -1 if the parser + * is reading from a stream or has not been initialised. + **/ +int +camel_mime_parser_fd (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return s->fd; +} + +/* Return errno of the parser, incase any error occured during processing */ +int +camel_mime_parser_errno (CamelMimeParser *parser) +{ + struct _header_scan_state *s = _PRIVATE (parser); + + return s->ioerrno; +} + +/* ********************************************************************** */ +/* Implementation */ +/* ********************************************************************** */ + +/* read the next bit of data, ensure there is enough room 'atleast' bytes */ +static int +folder_read(struct _header_scan_state *s) +{ + int len; + int inoffset; + + if (s->inptr<s->inend-s->atleast || s->eof) + return s->inend-s->inptr; +#ifdef PURIFY + purify_watch_remove(inend_id); + purify_watch_remove(inbuffer_id); +#endif + /* check for any remaning bytes (under the atleast limit( */ + inoffset = s->inend - s->inptr; + if (inoffset>0) { + memcpy(s->inbuf, s->inptr, inoffset); + } + if (s->stream) { + len = camel_stream_read(s->stream, s->inbuf+inoffset, SCAN_BUF-inoffset); + } else { + len = read(s->fd, s->inbuf+inoffset, SCAN_BUF-inoffset); + } + r(printf("read %d bytes, offset = %d\n", len, inoffset)); + if (len>=0) { + /* add on the last read block */ + s->seek += s->inptr - s->inbuf; + s->inptr = s->inbuf; + s->inend = s->inbuf+len+inoffset; + s->eof = (len == 0); + r(printf("content = %d '%.*s'\n",s->inend - s->inptr, s->inend - s->inptr, s->inptr)); + } else { + s->ioerrno = errno?errno:EIO; + } + + g_assert(s->inptr<=s->inend); +#ifdef PURIFY + inend_id = purify_watch(&s->inend); + inbuffer_id = purify_watch_n(s->inend+1, SCAN_HEAD-1, "rw"); +#endif + r(printf("content = %d '%.*s'\n", s->inend - s->inptr, s->inend - s->inptr, s->inptr)); + /* set a sentinal, for the inner loops to check against */ + s->inend[0] = '\n'; + return s->inend-s->inptr; +} + +/* return the current absolute position of the data pointer */ +static off_t +folder_tell(struct _header_scan_state *s) +{ + return s->seek + (s->inptr - s->inbuf); +} + +/* + need some way to prime the parser state, so this actually works for + other than top-level messages +*/ +static off_t +folder_seek(struct _header_scan_state *s, off_t offset, int whence) +{ + off_t newoffset; + + if (s->stream) { + if (CAMEL_IS_SEEKABLE_STREAM(s->stream)) { + /* NOTE: assumes whence seekable stream == whence libc, which is probably + the case (or bloody well should've been) */ + newoffset = camel_seekable_stream_seek((CamelSeekableStream *)s->stream, offset, whence); + } else { + newoffset = -1; + errno = EINVAL; + } + } else { + newoffset = lseek(s->fd, offset, whence); + } +#ifdef PURIFY + purify_watch_remove(inend_id); + purify_watch_remove(inbuffer_id); +#endif + if (newoffset != -1) { + s->seek = newoffset; + s->inptr = s->inbuf; + s->inend = s->inbuf; + s->eof = FALSE; + } else { + s->ioerrno = errno?errno:EIO; + } +#ifdef PURIFY + inend_id = purify_watch(&s->inend); + inbuffer_id = purify_watch_n(s->inend+1, SCAN_HEAD-1, "rw"); +#endif + return newoffset; +} + +static void +folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h) +{ + if (s->parts && s->parts->atleast > h->boundarylenfinal) + h->atleast = s->parts->atleast; + else + h->atleast = MAX(h->boundarylenfinal, 1); + + h->parent = s->parts; + s->parts = h; +} + +static void +folder_pull_part(struct _header_scan_state *s) +{ + struct _header_scan_stack *h; + + h = s->parts; + if (h) { + s->parts = h->parent; + g_free(h->boundary); +#ifdef MEMPOOL + e_mempool_destroy(h->pool); +#else + camel_header_raw_clear(&h->headers); +#endif + camel_content_type_unref(h->content_type); + if (h->pretext) + g_byte_array_free(h->pretext, TRUE); + if (h->posttext) + g_byte_array_free(h->posttext, TRUE); + if (h->from_line) + g_byte_array_free(h->from_line, TRUE); + g_free(h); + } else { + g_warning("Header stack underflow!\n"); + } +} + +static int +folder_scan_skip_line(struct _header_scan_state *s, GByteArray *save) +{ + int atleast = s->atleast; + register char *inptr, *inend, c; + int len; + + s->atleast = 1; + + d(printf("skipping line\n")); + + while ( (len = folder_read(s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */ + inptr = s->inptr; + inend = s->inend; + + c = -1; + while (inptr<inend + && (c = *inptr++)!='\n') { + d(printf("(%2x,%c)", c, isprint(c)?c:'.')); + ; + } + + if (save) + g_byte_array_append(save, s->inptr, inptr-s->inptr); + + s->inptr = inptr; + + if (c=='\n') { + s->atleast = atleast; + return 0; + } + } + + d(printf("couldn't find end of line?\n")); + + s->atleast = atleast; + + return -1; /* not found */ +} + +/* TODO: Is there any way to make this run faster? It gets called a lot ... */ +static struct _header_scan_stack * +folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *lastone) +{ + struct _header_scan_stack *part; + int len = s->inend - boundary; /* make sure we dont access past the buffer */ + + h(printf("checking boundary marker upto %d bytes\n", len)); + part = s->parts; + while (part) { + h(printf(" boundary: %s\n", part->boundary)); + h(printf(" against: '%.*s'\n", part->boundarylen, boundary)); + if (part->boundary + && part->boundarylen <= len + && memcmp(boundary, part->boundary, part->boundarylen)==0) { + h(printf("matched boundary: %s\n", part->boundary)); + /* again, make sure we're in range */ + if (part->boundarylenfinal <= len) { + int extra = part->boundarylenfinal - part->boundarylen; + + /* check the extra stuff on an final boundary, normally -- for mime parts */ + if (extra>0) { + *lastone = memcmp(&boundary[part->boundarylen], + &part->boundary[part->boundarylen], + extra) == 0; + } else { + *lastone = TRUE; + } + h(printf("checking lastone = %s\n", *lastone?"TRUE":"FALSE")); + } else { + h(printf("not enough room to check last one?\n")); + *lastone = FALSE; + } + /*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/ + return part; + } + part = part->parent; + } + return NULL; +} + +#ifdef MEMPOOL +static void +header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h, char *header, int offset) +{ + struct _camel_header_raw *l, *n; + char *content; + + content = strchr(header, ':'); + if (content) { + register int len; + n = e_mempool_alloc(h->pool, sizeof(*n)); + n->next = NULL; + + len = content-header; + n->name = e_mempool_alloc(h->pool, len+1); + memcpy(n->name, header, len); + n->name[len] = 0; + + content++; + + len = s->outptr - content; + n->value = e_mempool_alloc(h->pool, len+1); + memcpy(n->value, content, len); + n->value[len] = 0; + + n->offset = offset; + + l = (struct _camel_header_raw *)&h->headers; + while (l->next) { + l = l->next; + } + l->next = n; + } + +} + +#define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c)) + +#endif + +/* Copy the string start->inptr into the header buffer (s->outbuf), + grow if necessary + remove trailing \r chars (\n's assumed already removed) + and track the start offset of the header */ +/* Basically an optimised version of g_byte_array_append() */ +#define header_append(s, start, inptr) \ +{ \ + register int headerlen = inptr-start; \ + \ + if (headerlen > 0) { \ + if (headerlen >= (s->outend - s->outptr)) { \ + register char *outnew; \ + register int len = ((s->outend - s->outbuf)+headerlen)*2+1; \ + outnew = g_realloc(s->outbuf, len); \ + s->outptr = s->outptr - s->outbuf + outnew; \ + s->outbuf = outnew; \ + s->outend = outnew + len; \ + } \ + if (start[headerlen-1] == '\r') \ + headerlen--; \ + memcpy(s->outptr, start, headerlen); \ + s->outptr += headerlen; \ + } \ + if (s->header_start == -1) \ + s->header_start = (start-s->inbuf) + s->seek; \ +} + +static struct _header_scan_stack * +folder_scan_header(struct _header_scan_state *s, int *lastone) +{ + int atleast = s->atleast, newatleast; + char *start = NULL; + int len; + struct _header_scan_stack *h; + char *inend; + register char *inptr; + + h(printf("scanning first bit\n")); + + h = g_malloc0(sizeof(*h)); +#ifdef MEMPOOL + h->pool = e_mempool_new(8192, 4096, E_MEMPOOL_ALIGN_STRUCT); +#endif + + if (s->parts) + newatleast = s->parts->atleast; + else + newatleast = 1; + *lastone = FALSE; + + do { + s->atleast = newatleast; + + h(printf("atleast = %d\n", s->atleast)); + + while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */ + inptr = s->inptr; + inend = s->inend-s->atleast+1; + + while (inptr<inend) { + if (!s->midline) { + if (folder_boundary_check(s, inptr, lastone)) { + if ((s->outptr>s->outbuf)) + goto header_truncated; /* may not actually be truncated */ + + goto header_done; + } + } + + start = inptr; + + /* goto next line/sentinal */ + while ((*inptr++)!='\n') + ; + + g_assert(inptr<=s->inend+1); + + /* check for sentinal or real end of line */ + if (inptr > inend) { + h(printf("not at end of line yet, going further\n")); + /* didn't find end of line within our allowed area */ + inptr = inend; + s->midline = TRUE; + header_append(s, start, inptr); + } else { + h(printf("got line part: '%.*s'\n", inptr-1-start, start)); + /* got a line, strip and add it, process it */ + s->midline = FALSE; +#ifdef PRESERVE_HEADERS + header_append(s, start, inptr); +#else + header_append(s, start, inptr-1); +#endif + /* check for end of headers */ + if (s->outbuf == s->outptr) + goto header_done; + + /* check for continuation/compress headers, we have atleast 1 char here to work with */ + if (inptr[0] == ' ' || inptr[0] == '\t') { + h(printf("continuation\n")); +#ifndef PRESERVE_HEADERS + /* TODO: this wont catch multiple space continuation across a read boundary, but + that is assumed rare, and not fatal anyway */ + do + inptr++; + while (*inptr == ' ' || *inptr == '\t'); + inptr--; + *inptr = ' '; +#endif + } else { + /* otherwise, complete header, add it */ +#ifdef PRESERVE_HEADERS + s->outptr--; + if (s->outptr[-1] == '\r') + s->outptr--; +#endif + s->outptr[0] = 0; + + h(printf("header '%s' at %d\n", s->outbuf, (int)s->header_start)); + + header_raw_append_parse(&h->headers, s->outbuf, s->header_start); + s->outptr = s->outbuf; + s->header_start = -1; + } + } + } + s->inptr = inptr; + } + h(printf("end of file? read %d bytes\n", len)); + newatleast = 1; + } while (s->atleast > 1); + + if ((s->outptr > s->outbuf) || s->inend > s->inptr) { + start = s->inptr; + inptr = s->inend; + if (inptr > start) { + if (inptr[-1] == '\n') + inptr--; + } + goto header_truncated; + } + + s->atleast = atleast; + + return h; + +header_truncated: + header_append(s, start, inptr); + + s->outptr[0] = 0; + if (s->outbuf == s->outptr) + goto header_done; + + header_raw_append_parse(&h->headers, s->outbuf, s->header_start); + + s->outptr = s->outbuf; +header_done: + s->inptr = inptr; + s->atleast = atleast; + s->header_start = -1; + return h; +} + +static struct _header_scan_stack * +folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, size_t *length) +{ + int atleast = s->atleast, newatleast; + register char *inptr; + char *inend; + char *start; + int len; + struct _header_scan_stack *part; + int onboundary = FALSE; + + c(printf("scanning content\n")); + + part = s->parts; + if (part) + newatleast = part->atleast; + else + newatleast = 1; + *lastone = FALSE; + + c(printf("atleast = %d\n", newatleast)); + + do { + s->atleast = newatleast; + + while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */ + inptr = s->inptr; + if (s->eof) + inend = s->inend; + else + inend = s->inend-s->atleast+1; + start = inptr; + + c(printf("inptr = %p, inend = %p\n", inptr, inend)); + + while (inptr<inend) { + if (!s->midline + && (part = folder_boundary_check(s, inptr, lastone))) { + onboundary = TRUE; + + /* since we truncate the boundary data, we need at least 1 char here spare, + to remain in the same state */ + if ( (inptr-start) > 1) + goto content; + + /* otherwise, jump to the state of the boundary we actually found */ + goto normal_exit; + } + + /* goto the next line */ + while ((*inptr++)!='\n') + ; + + /* check the sentinal, if we went past the atleast limit, and reset it to there */ + if (inptr > inend) { + s->midline = TRUE; + inptr = inend; + } else { + s->midline = FALSE; + } + } + + c(printf("ran out of input, dumping what i have (%d) bytes midline = %s\n", + inptr-start, s->midline?"TRUE":"FALSE")); + goto content; + } + newatleast = 1; + } while (s->atleast > 1); + + c(printf("length read = %d\n", len)); + + if (s->inend > s->inptr) { + start = s->inptr; + inptr = s->inend; + goto content; + } + + *length = 0; + s->atleast = atleast; + return NULL; + +content: + /* treat eof as the last boundary in From mode */ + if (s->scan_from && s->eof && s->atleast <= 1) { + onboundary = TRUE; + part = NULL; + } else { + part = s->parts; + } +normal_exit: + s->atleast = atleast; + s->inptr = inptr; + + *data = start; + /* if we hit a boundary, we should not include the closing \n */ + if (onboundary && (inptr-start)>0) + *length = inptr-start-1; + else + *length = inptr-start; + + /*printf("got %scontent: '%.*s'\n", s->midline?"partial ":"", inptr-start, start);*/ + + return part; +} + + +static void +folder_scan_close(struct _header_scan_state *s) +{ + g_free(s->realbuf); + g_free(s->outbuf); + while (s->parts) + folder_pull_part(s); + if (s->fd != -1) + close(s->fd); + if (s->stream) { + camel_object_unref((CamelObject *)s->stream); + } + g_free(s); +} + + +static struct _header_scan_state * +folder_scan_init(void) +{ + struct _header_scan_state *s; + + s = g_malloc(sizeof(*s)); + + s->fd = -1; + s->stream = NULL; + s->ioerrno = 0; + + s->outbuf = g_malloc(1024); + s->outptr = s->outbuf; + s->outend = s->outbuf+1024; + + s->realbuf = g_malloc(SCAN_BUF + SCAN_HEAD*2); + s->inbuf = s->realbuf + SCAN_HEAD; + s->inptr = s->inbuf; + s->inend = s->inbuf; + s->atleast = 0; + + s->seek = 0; /* current character position in file of the last read block */ + s->unstep = 0; + + s->header_start = -1; + + s->start_of_from = -1; + s->start_of_headers = -1; + s->start_of_boundary = -1; + + s->midline = FALSE; + s->scan_from = FALSE; + s->scan_pre_from = FALSE; + s->eof = FALSE; + + s->filters = NULL; + s->filterid = 1; + + s->parts = NULL; + + s->state = CAMEL_MIME_PARSER_STATE_INITIAL; + return s; +} + +static void +drop_states(struct _header_scan_state *s) +{ + while (s->parts) { + folder_scan_drop_step(s); + } + s->unstep = 0; + s->state = CAMEL_MIME_PARSER_STATE_INITIAL; +} + +static void +folder_scan_reset(struct _header_scan_state *s) +{ + drop_states(s); + s->inend = s->inbuf; + s->inptr = s->inbuf; + s->inend[0] = '\n'; + if (s->fd != -1) { + close(s->fd); + s->fd = -1; + } + if (s->stream) { + camel_object_unref((CamelObject *)s->stream); + s->stream = NULL; + } + s->ioerrno = 0; + s->eof = FALSE; +} + +static int +folder_scan_init_with_fd(struct _header_scan_state *s, int fd) +{ + folder_scan_reset(s); + s->fd = fd; + + return 0; +} + +static int +folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream) +{ + folder_scan_reset(s); + s->stream = stream; + camel_object_ref((CamelObject *)stream); + + return 0; +} + +#define USE_FROM + +static void +folder_scan_step(struct _header_scan_state *s, char **databuffer, size_t *datalength) +{ + struct _header_scan_stack *h, *hb; + const char *content; + const char *bound; + int type, state, seenlast; + CamelContentType *ct = NULL; + struct _header_scan_filter *f; + size_t presize; + +/* printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/ + +tail_recurse: + d({ + printf("\nSCAN STACK:\n"); + printf(" '%s' :\n", states[s->state]); + hb = s->parts; + while (hb) { + printf(" '%s' : %s ", states[hb->savestate], hb->boundary); + if (hb->content_type) { + printf("(%s/%s)", hb->content_type->type, hb->content_type->subtype); + } else { + printf("(default)"); + } + printf("\n"); + hb = hb->parent; + } + printf("\n"); + }); + + switch (s->state) { + +#ifdef USE_FROM + case CAMEL_MIME_PARSER_STATE_INITIAL: + if (s->scan_from) { + h = g_malloc0(sizeof(*h)); + h->boundary = g_strdup("From "); + h->boundarylen = strlen(h->boundary); + h->boundarylenfinal = h->boundarylen; + h->from_line = g_byte_array_new(); + folder_push_part(s, h); + s->state = CAMEL_MIME_PARSER_STATE_PRE_FROM; + } else { + s->start_of_from = -1; + goto scan_header; + } + + case CAMEL_MIME_PARSER_STATE_PRE_FROM: + + h = s->parts; + do { + hb = folder_scan_content(s, &state, databuffer, datalength); + if (s->scan_pre_from && *datalength > 0) { + d(printf("got pre-from content %d bytes\n", *datalength)); + return; + } + } while (hb==h && *datalength>0); + + if (*datalength==0 && hb==h) { + d(printf("found 'From '\n")); + s->start_of_from = folder_tell(s); + folder_scan_skip_line(s, h->from_line); + h->savestate = CAMEL_MIME_PARSER_STATE_INITIAL; + s->state = CAMEL_MIME_PARSER_STATE_FROM; + } else { + folder_pull_part(s); + s->state = CAMEL_MIME_PARSER_STATE_EOF; + } + return; +#else + case CAMEL_MIME_PARSER_STATE_INITIAL: + case CAMEL_MIME_PARSER_STATE_PRE_FROM: +#endif /* !USE_FROM */ + + scan_header: + case CAMEL_MIME_PARSER_STATE_FROM: + s->start_of_headers = folder_tell(s); + h = folder_scan_header(s, &state); +#ifdef USE_FROM + if (s->scan_from) + h->savestate = CAMEL_MIME_PARSER_STATE_FROM_END; + else +#endif + h->savestate = CAMEL_MIME_PARSER_STATE_EOF; + + /* FIXME: should this check for MIME-Version: 1.0 as well? */ + + type = CAMEL_MIME_PARSER_STATE_HEADER; + if ( (content = camel_header_raw_find(&h->headers, "Content-Type", NULL)) + && (ct = camel_content_type_decode(content))) { + if (!g_ascii_strcasecmp(ct->type, "multipart")) { + if (!camel_content_type_is(ct, "multipart", "signed") + && (bound = camel_content_type_param(ct, "boundary"))) { + d(printf("multipart, boundary = %s\n", bound)); + h->boundarylen = strlen(bound)+2; + h->boundarylenfinal = h->boundarylen+2; + h->boundary = g_malloc(h->boundarylen+3); + sprintf(h->boundary, "--%s--", bound); + type = CAMEL_MIME_PARSER_STATE_MULTIPART; + } else { + /*camel_content_type_unref(ct); + ct = camel_content_type_decode("text/plain");*/ +/* We can't quite do this, as it will mess up all the offsets ... */ +/* camel_header_raw_replace(&h->headers, "Content-Type", "text/plain", offset);*/ + /*g_warning("Multipart with no boundary, treating as text/plain");*/ + } + } else if (!strcasecmp(ct->type, "message")) { + if (!strcasecmp(ct->subtype, "rfc822") + || !strcasecmp(ct->subtype, "news") + /*|| !g_ascii_strcasecmp(ct->subtype, "partial")*/) { + type = CAMEL_MIME_PARSER_STATE_MESSAGE; + } + } + } else { + /* make the default type for multipart/digest be message/rfc822 */ + if ((s->parts + && camel_content_type_is(s->parts->content_type, "multipart", "digest"))) { + ct = camel_content_type_decode("message/rfc822"); + type = CAMEL_MIME_PARSER_STATE_MESSAGE; + d(printf("parent was multipart/digest, autoupgrading to message/rfc822?\n")); + /* maybe we should do this too? + header_raw_append_parse(&h->headers, "Content-Type: message/rfc822", -1);*/ + } else { + ct = camel_content_type_decode("text/plain"); + } + } + h->content_type = ct; + folder_push_part(s, h); + s->state = type; + return; + + case CAMEL_MIME_PARSER_STATE_HEADER: + s->state = CAMEL_MIME_PARSER_STATE_BODY; + + case CAMEL_MIME_PARSER_STATE_BODY: + h = s->parts; + *datalength = 0; + presize = SCAN_HEAD; + f = s->filters; + + do { + hb = folder_scan_content (s, &state, databuffer, datalength); + + d(printf ("\n\nOriginal content: '")); + d(fwrite(*databuffer, sizeof(char), *datalength, stdout)); + d(printf("'\n")); + + if (*datalength > 0) { + while (f) { + camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize, + databuffer, datalength, &presize); + d(printf("Filtered content (%s): '", ((CamelObject *)f->filter)->klass->name)); + d(fwrite(*databuffer, sizeof(char), *datalength, stdout)); + d(printf("'\n")); + f = f->next; + } + return; + } + } while (hb == h && *datalength > 0); + + /* check for any filter completion data */ + while (f) { + camel_mime_filter_complete(f->filter, *databuffer, *datalength, presize, + databuffer, datalength, &presize); + f = f->next; + } + + if (*datalength > 0) + return; + + s->state = CAMEL_MIME_PARSER_STATE_BODY_END; + break; + + case CAMEL_MIME_PARSER_STATE_MULTIPART: + h = s->parts; + /* This mess looks for the next boundary on this + level. Once it finds the last one, it keeps going, + looking for post-multipart content ('postface'). + Because messages might have duplicate boundaries for + different parts, it makes sure it stops if its already + found an end boundary for this part. It handles + truncated and missing boundaries appropriately too. */ + seenlast = FALSE; + do { + do { + hb = folder_scan_content(s, &state, databuffer, datalength); + if (*datalength>0) { + /* instead of a new state, we'll just store it locally and provide + an accessor function */ + d(printf("Multipart %s Content %p: '%.*s'\n", + h->prestage>0?"post":"pre", h, *datalength, *databuffer)); + if (h->prestage > 0) { + if (h->posttext == NULL) + h->posttext = g_byte_array_new(); + g_byte_array_append(h->posttext, *databuffer, *datalength); + } else { + if (h->pretext == NULL) + h->pretext = g_byte_array_new(); + g_byte_array_append(h->pretext, *databuffer, *datalength); + } + } + } while (hb==h && *datalength>0); + h->prestage++; + if (*datalength==0 && hb==h && !seenlast) { + d(printf("got boundary: %s last=%d\n", hb->boundary, state)); + s->start_of_boundary = folder_tell(s); + folder_scan_skip_line(s, NULL); + if (!state) { + s->state = CAMEL_MIME_PARSER_STATE_FROM; + folder_scan_step(s, databuffer, datalength); + s->parts->savestate = CAMEL_MIME_PARSER_STATE_MULTIPART; /* set return state for the new head part */ + return; + } else + seenlast = TRUE; + } else { + break; + } + } while (1); + + s->state = CAMEL_MIME_PARSER_STATE_MULTIPART_END; + break; + + case CAMEL_MIME_PARSER_STATE_MESSAGE: + s->state = CAMEL_MIME_PARSER_STATE_FROM; + folder_scan_step(s, databuffer, datalength); + s->parts->savestate = CAMEL_MIME_PARSER_STATE_MESSAGE_END; + break; + + case CAMEL_MIME_PARSER_STATE_FROM_END: + case CAMEL_MIME_PARSER_STATE_BODY_END: + case CAMEL_MIME_PARSER_STATE_MULTIPART_END: + case CAMEL_MIME_PARSER_STATE_MESSAGE_END: + s->state = s->parts->savestate; + folder_pull_part(s); + if (s->state & CAMEL_MIME_PARSER_STATE_END) + return; + goto tail_recurse; + + case CAMEL_MIME_PARSER_STATE_EOF: + return; + + default: + g_warning("Invalid state in camel-mime-parser: %d", s->state); + break; + } + + return; +} + +/* drops the current state back one */ +static void +folder_scan_drop_step(struct _header_scan_state *s) +{ + switch (s->state) { + case CAMEL_MIME_PARSER_STATE_EOF: + s->state = CAMEL_MIME_PARSER_STATE_INITIAL; + case CAMEL_MIME_PARSER_STATE_INITIAL: + return; + + case CAMEL_MIME_PARSER_STATE_FROM: + case CAMEL_MIME_PARSER_STATE_PRE_FROM: + s->state = CAMEL_MIME_PARSER_STATE_INITIAL; + folder_pull_part(s); + return; + + case CAMEL_MIME_PARSER_STATE_MESSAGE: + case CAMEL_MIME_PARSER_STATE_HEADER: + case CAMEL_MIME_PARSER_STATE_MULTIPART: + + case CAMEL_MIME_PARSER_STATE_FROM_END: + case CAMEL_MIME_PARSER_STATE_BODY_END: + case CAMEL_MIME_PARSER_STATE_MULTIPART_END: + case CAMEL_MIME_PARSER_STATE_MESSAGE_END: + + s->state = s->parts->savestate; + folder_pull_part(s); + if (s->state & CAMEL_MIME_PARSER_STATE_END) { + s->state &= ~CAMEL_MIME_PARSER_STATE_END; + } + return; + default: + /* FIXME: not sure if this is entirely right */ + break; + } +} + +#ifdef STANDALONE +int main(int argc, char **argv) +{ + int fd; + struct _header_scan_state *s; + char *data; + size_t len; + int state; + char *name = "/tmp/evmail/Inbox"; + struct _header_scan_stack *h; + int i; + int attach = 0; + + if (argc==2) + name = argv[1]; + + printf("opening: %s", name); + + for (i=1;i<argc;i++) { + const char *encoding = NULL, *charset = NULL; + char *attachname; + + name = argv[i]; + printf("opening: %s", name); + + fd = open(name, O_RDONLY); + if (fd==-1) { + perror("Cannot open mailbox"); + exit(1); + } + s = folder_scan_init(); + folder_scan_init_with_fd(s, fd); + s->scan_from = FALSE; +#if 0 + h = g_malloc0(sizeof(*h)); + h->savestate = CAMEL_MIME_PARSER_STATE_EOF; + folder_push_part(s, h); +#endif + while (s->state != CAMEL_MIME_PARSER_STATE_EOF) { + folder_scan_step(s, &data, &len); + printf("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]); + switch (s->state) { + case CAMEL_MIME_PARSER_STATE_HEADER: + if (s->parts->content_type + && (charset = camel_content_type_param(s->parts->content_type, "charset"))) { + if (g_ascii_strcasecmp(charset, "us-ascii")) { +#if 0 + folder_push_filter_charset(s, "UTF-8", charset); +#endif + } else { + charset = NULL; + } + } else { + charset = NULL; + } + + encoding = camel_header_raw_find(&s->parts->headers, "Content-transfer-encoding", 0); + printf("encoding = '%s'\n", encoding); + if (encoding && !strncasecmp(encoding, " base64", 7)) { + printf("adding base64 filter\n"); + attachname = g_strdup_printf("attach.%d.%d", i, attach++); +#if 0 + folder_push_filter_save(s, attachname); +#endif + g_free(attachname); +#if 0 + folder_push_filter_mime(s, 0); +#endif + } + if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) { + printf("adding quoted-printable filter\n"); + attachname = g_strdup_printf("attach.%d.%d", i, attach++); +#if 0 + folder_push_filter_save(s, attachname); +#endif + g_free(attachname); +#if 0 + folder_push_filter_mime(s, 1); +#endif + } + + break; + case CAMEL_MIME_PARSER_STATE_BODY: + printf("got body %d '%.*s'\n", len, len, data); + break; + case CAMEL_MIME_PARSER_STATE_BODY_END: + printf("end body %d '%.*s'\n", len, len, data); + if (encoding && !strncasecmp(encoding, " base64", 7)) { + printf("removing filters\n"); +#if 0 + folder_filter_pull(s); + folder_filter_pull(s); +#endif + } + if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) { + printf("removing filters\n"); +#if 0 + folder_filter_pull(s); + folder_filter_pull(s); +#endif + } + if (charset) { +#if 0 + folder_filter_pull(s); +#endif + charset = NULL; + } + encoding = NULL; + break; + default: + break; + } + } + folder_scan_close(s); + close(fd); + } + return 0; +} + +#endif /* STANDALONE */ + |