diff options
Diffstat (limited to 'camel/camel-mime-parser.c')
-rw-r--r-- | camel/camel-mime-parser.c | 1248 |
1 files changed, 0 insertions, 1248 deletions
diff --git a/camel/camel-mime-parser.c b/camel/camel-mime-parser.c deleted file mode 100644 index 2f1d9fd70d..0000000000 --- a/camel/camel-mime-parser.c +++ /dev/null @@ -1,1248 +0,0 @@ -/* - * Copyright (C) 2000 Helix Code Inc. - * - * Authors: Michael Zucchi <notzed@helixcode.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public License - * as published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* What should hopefully be a fast mail parser */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> - -#include <string.h> - -#include <stdio.h> -#include <errno.h> - -#include <unicode.h> - -#include "camel-mime-parser.h" -#include "camel-mime-utils.h" -#include "camel-mime-filter.h" -#include "camel-stream.h" -#include "camel-seekable-stream.h" - -#define r(x) -#define h(x) -#define c(x) -#define d(x) - -#define SCAN_BUF 4096 /* size of read buffer */ -#define SCAN_HEAD 128 /* headroom guaranteed to be before each read buffer */ - -/* a little hacky, but i couldn't be bothered renaming everything */ -#define _header_scan_state _CamelMimeParserPrivate -#define _PRIVATE(o) (((CamelMimeParser *)(o))->priv) - -struct _header_scan_state { - - /* global state */ - - enum _header_state state; - - /* for building headers during scanning */ - char *outbuf; - char *outptr; - char *outend; - - int fd; /* input for a fd input */ - CamelStream *stream; /* or for a stream */ - - /* for scanning input buffers */ - char *realbuf; /* the real buffer, SCAN_HEAD*2 + SCAN_BUF bytes */ - char *inbuf; /* points to a subset of the allocated memory, the underflow */ - char *inptr; /* (upto SCAN_HEAD) is for use by filters so they dont copy all data */ - char *inend; - - int atleast; - - int seek; /* current offset to start of buffer */ - int unstep; /* how many states to 'unstep' (repeat the current state) */ - - int midline; /* are we mid-line interrupted? */ - int scan_from; /* do we care about From lines? */ - - int start_of_from; /* where from started */ - int start_of_headers; /* where headers started from the last scan */ - - int header_start; /* start of last header, or -1 */ - - struct _header_scan_stack *top_part; /* top of message header */ - int top_start; /* offset of start */ - - struct _header_scan_stack *pending; /* if we're pending part info, from the wrong part end */ - - /* filters to apply to all content before output */ - int filterid; /* id of next filter */ - struct _header_scan_filter *filters; - - /* per message/part info */ - struct _header_scan_stack *parts; - -}; - -struct _header_scan_stack { - struct _header_scan_stack *parent; - - enum _header_state savestate; /* state at invocation of this part */ - - struct _header_raw *headers; /* headers for this part */ - - struct _header_content_type *content_type; - - char *boundary; /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */ - int boundarylen; /* length of boundary, including leading -- */ -}; - -struct _header_scan_filter { - struct _header_scan_filter *next; - int id; - CamelMimeFilter *filter; -}; - -static void folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength); -static int folder_scan_init_with_fd(struct _header_scan_state *s, int fd); -static int folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream); -static struct _header_scan_state *folder_scan_init(void); -static void folder_scan_close(struct _header_scan_state *s); -static struct _header_scan_stack *folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length); -static struct _header_scan_stack *folder_scan_header(struct _header_scan_state *s, int *lastone); -static int folder_scan_skip_line(struct _header_scan_state *s); -static off_t folder_seek(struct _header_scan_state *s, off_t offset, int whence); -static off_t folder_tell(struct _header_scan_state *s); - -static void camel_mime_parser_class_init (CamelMimeParserClass *klass); -static void camel_mime_parser_init (CamelMimeParser *obj); - -static char *states[] = { - "HSCAN_INITIAL", - "HSCAN_FROM", /* got 'From' line */ - "HSCAN_HEADER", /* toplevel header */ - "HSCAN_BODY", /* scanning body of message */ - "HSCAN_MULTIPART", /* got multipart header */ - "HSCAN_MESSAGE", /* rfc822 message */ - - "HSCAN_PART", /* part of a multipart */ - "<invalid>", - - "HSCAN_EOF", /* end of file */ - "HSCAN_FROM_END", - "HSCAN_HEAER_END", - "HSCAN_BODY_END", - "HSCAN_MULTIPART_END", - "HSCAN_MESSAGE_END", -}; - -static GtkObjectClass *camel_mime_parser_parent; - -enum SIGNALS { - LAST_SIGNAL -}; - -static guint signals[LAST_SIGNAL] = { 0 }; - -guint -camel_mime_parser_get_type (void) -{ - static guint type = 0; - - if (!type) { - GtkTypeInfo type_info = { - "CamelMimeParser", - sizeof (CamelMimeParser), - sizeof (CamelMimeParserClass), - (GtkClassInitFunc) camel_mime_parser_class_init, - (GtkObjectInitFunc) camel_mime_parser_init, - (GtkArgSetFunc) NULL, - (GtkArgGetFunc) NULL - }; - - type = gtk_type_unique (gtk_object_get_type (), &type_info); - } - - return type; -} - -static void -finalise(GtkObject *o) -{ - struct _header_scan_state *s = _PRIVATE(o); - - folder_scan_close(s); - - ((GtkObjectClass *)camel_mime_parser_parent)->finalize (o); -} - -static void -camel_mime_parser_class_init (CamelMimeParserClass *klass) -{ - GtkObjectClass *object_class = (GtkObjectClass *) klass; - - camel_mime_parser_parent = gtk_type_class (gtk_object_get_type ()); - - object_class->finalize = finalise; - - gtk_object_class_add_signals (object_class, signals, LAST_SIGNAL); -} - -static void -camel_mime_parser_init (CamelMimeParser *obj) -{ - struct _header_scan_state *s; - - s = folder_scan_init(); - _PRIVATE(obj) = s; -} - -/** - * camel_mime_parser_new: - * - * Create a new CamelMimeParser object. - * - * Return value: A new CamelMimeParser widget. - **/ -CamelMimeParser * -camel_mime_parser_new (void) -{ - CamelMimeParser *new = CAMEL_MIME_PARSER ( gtk_type_new (camel_mime_parser_get_type ())); - return new; -} - - -int -camel_mime_parser_filter_add(CamelMimeParser *m, CamelMimeFilter *mf) -{ - struct _header_scan_state *s = _PRIVATE(m); - struct _header_scan_filter *f, *new; - - new = g_malloc(sizeof(*new)); - new->filter = mf; - new->id = s->filterid++; - if (s->filterid == -1) - s->filterid++; - new->next = 0; - gtk_object_ref((GtkObject *)mf); - - /* yes, this is correct, since 'next' is the first element of the struct */ - f = (struct _header_scan_filter *)&s->filters; - while (f->next) - f = f->next; - f->next = new; - return new->id; -} - -void -camel_mime_parser_filter_remove(CamelMimeParser *m, int id) -{ - struct _header_scan_state *s = _PRIVATE(m); - struct _header_scan_filter *f, *old; - - f = (struct _header_scan_filter *)&s->filters; - while (f && f->next) { - old = f->next; - if (old->id == id) { - gtk_object_unref((GtkObject *)old->filter); - f->next = old->next; - g_free(old); - /* there should only be a single matching id, but - scan the whole lot anyway */ - } - f = f->next; - } -} - -const char * -camel_mime_parser_header(CamelMimeParser *m, const char *name, int *offset) -{ - struct _header_scan_state *s = _PRIVATE(m); - - if (s->parts && - s->parts->headers) { - return header_raw_find(&s->parts->headers, name, offset); - } - return NULL; -} - -struct _header_raw * -camel_mime_parser_headers_raw(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - - if (s->parts) - return s->parts->headers; - return NULL; -} - -int -camel_mime_parser_init_with_fd(CamelMimeParser *m, int fd) -{ - struct _header_scan_state *s = _PRIVATE(m); - - return folder_scan_init_with_fd(s, fd); -} - -int -camel_mime_parser_init_with_stream(CamelMimeParser *m, CamelStream *stream) -{ - struct _header_scan_state *s = _PRIVATE(m); - - return folder_scan_init_with_stream(s, stream); -} - -void -camel_mime_parser_scan_from(CamelMimeParser *m, int scan_from) -{ - struct _header_scan_state *s = _PRIVATE(m); - s->scan_from = scan_from; -} - -struct _header_content_type * -camel_mime_parser_content_type(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - - /* FIXME: should this search up until its found the 'right' - content-type? can it? */ - if (s->parts) - return s->parts->content_type; - return NULL; -} - -void camel_mime_parser_unstep(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - - s->unstep++; -} - -enum _header_state -camel_mime_parser_step(CamelMimeParser *m, char **databuffer, int *datalength) -{ - struct _header_scan_state *s = _PRIVATE(m); - - d(printf("OLD STATE: '%s' :\n", states[s->state])); - - if (s->unstep <= 0) - folder_scan_step(s, databuffer, datalength); - else - s->unstep--; - - d(printf("NEW STATE: '%s' :\n", states[s->state])); - - return s->state; -} - -off_t camel_mime_parser_tell(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - - return folder_tell(s); -} - -off_t camel_mime_parser_tell_start_headers(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - - return s->start_of_headers; -} - -off_t camel_mime_parser_tell_start_from(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - - return s->start_of_from; -} - -off_t camel_mime_parser_seek(CamelMimeParser *m, off_t off, int whence) -{ - struct _header_scan_state *s = _PRIVATE(m); - return folder_seek(s, off, whence); -} - -enum _header_state camel_mime_parser_state(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - return s->state; -} - -CamelStream *camel_mime_parser_stream(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - return s->stream; -} - -int camel_mime_parser_fd(CamelMimeParser *m) -{ - struct _header_scan_state *s = _PRIVATE(m); - return s->fd; -} - -/* ********************************************************************** */ -/* Implementation */ -/* ********************************************************************** */ - -/* read the next bit of data, ensure there is enough room 'atleast' bytes */ -static int -folder_read(struct _header_scan_state *s) -{ - int len; - int inoffset; - - if (s->inptr<s->inend-s->atleast) - return s->inend-s->inptr; - - /* check for any remaning bytes (under the atleast limit( */ - inoffset = s->inend - s->inptr; - if (inoffset>0) { - memcpy(s->inbuf, s->inptr, inoffset); - } - if (s->stream) { - len = camel_stream_read(s->stream, s->inbuf+inoffset, SCAN_BUF-inoffset); - } else { - len = read(s->fd, s->inbuf+inoffset, SCAN_BUF-inoffset); - } - r(printf("read %d bytes, offset = %d\n", len, inoffset)); - if (len>=0) { - /* add on the last read block */ - s->seek += s->inptr - s->inbuf; - s->inptr = s->inbuf; - s->inend = s->inbuf+len+inoffset; - r(printf("content = %d '%.*s'\n",s->inend - s->inptr, s->inend - s->inptr, s->inptr)); - } - r(printf("content = %d '%.*s'\n", s->inend - s->inptr, s->inend - s->inptr, s->inptr)); - return s->inend-s->inptr; -} - -/* return the current absolute position of the data pointer */ -static off_t -folder_tell(struct _header_scan_state *s) -{ - return s->seek + (s->inptr - s->inbuf); -} - -/* - need some way to prime the parser state, so this actually works for - other than top-level messages -*/ -static off_t -folder_seek(struct _header_scan_state *s, off_t offset, int whence) -{ - off_t newoffset; - int len; - - if (s->stream) { - if (CAMEL_IS_SEEKABLE_STREAM(s->stream)) { - /* NOTE: assumes whence seekable stream == whence libc, which is probably - the case (or bloody well should've been) */ - newoffset = camel_seekable_stream_seek((CamelSeekableStream *)s->stream, offset, whence); - } else { - newoffset = -1; - errno = EINVAL; - } - } else { - newoffset = lseek(s->fd, offset, whence); - } - if (newoffset != -1) { - s->seek = newoffset; - s->inptr = s->inbuf; - s->inend = s->inbuf; - if (s->stream) - len = camel_stream_read(s->stream, s->inbuf, SCAN_BUF); - else - len = read(s->fd, s->inbuf, SCAN_BUF); - if (len>=0) - s->inend = s->inbuf+len; - else - newoffset = -1; - } - return newoffset; -} - -static void -folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h) -{ - h->parent = s->parts; - s->parts = h; -} - -static void -folder_pull_part(struct _header_scan_state *s) -{ - struct _header_scan_stack *h; - - h = s->parts; - if (h) { - s->parts = h->parent; - g_free(h->boundary); - header_raw_clear(&h->headers); - header_content_type_unref(h->content_type); - g_free(h); - } else { - g_warning("Header stack underflow!\n"); - } -} - -static int -folder_scan_skip_line(struct _header_scan_state *s) -{ - int atleast = s->atleast; - register char *inptr, *inend, c; - int len; - - s->atleast = 1; - - while ( (len = folder_read(s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */ - inptr = s->inptr; - inend = s->inend-1; - - c = -1; - while (inptr<inend - && (c = *inptr++)!='\n') - ; - - s->inptr = inptr; - - if (c=='\n') { - s->atleast = atleast; - return 0; - } - } - - s->atleast = atleast; - - return -1; /* not found */ -} - -static struct _header_scan_stack * -folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *lastone) -{ - struct _header_scan_stack *part; - int len = s->atleast-2; /* make sure we dont access past the buffer */ - - h(printf("checking boundary marker upto %d bytes\n", len)); - part = s->parts; - while (part) { - h(printf(" boundary: %s\n", part->boundary)); - h(printf(" against: '%.*s'\n", len, boundary)); - if (part->boundary - && part->boundarylen <= len - && memcmp(boundary, part->boundary, part->boundarylen)==0) { - h(printf("matched boundary: %s\n", part->boundary)); - /* again, make sure we're in range */ - if (part->boundarylen <= len+2) { - h(printf("checking lastone\n")); - *lastone = (boundary[part->boundarylen]=='-' - && boundary[part->boundarylen+1]=='-'); - } else { - h(printf("not enough room to check last one?\n")); - *lastone = FALSE; - } - /*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/ - return part; - } - part = part->parent; - } - return NULL; -} - -static struct _header_scan_stack * -folder_scan_header(struct _header_scan_state *s, int *lastone) -{ - int atleast = s->atleast; - register char *inptr, *inend; - char *start; - int len; - struct _header_scan_stack *part, *overpart = s->parts; - struct _header_scan_stack *h; - - h(printf("scanning first bit\n")); - - h = g_malloc0(sizeof(*h)); - - /* FIXME: this info should be cached ? */ - part = s->parts; - s->atleast = 5; - while (part) { - if (part->boundary) - s->atleast = MAX(s->atleast, part->boundarylen+2); - part = part->parent; - } -#if 0 - s->atleast = MAX(s->atleast, 5); - if (s->parts) - s->atleast = MAX(s->atleast, s->parts->boundarylen+2); -#endif - - *lastone = FALSE; -retry: - - while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */ - inptr = s->inptr; - inend = s->inend-s->atleast; - start = inptr; - - while (inptr<=inend) { - register int c=-1; - /*printf(" '%.20s'\n", inptr);*/ - - if (!s->midline - && (part = folder_boundary_check(s, inptr, lastone))) { - if ((s->outptr>s->outbuf) || (inptr-start)) - goto header_truncated; /* may not actually be truncated */ - - goto normal_exit; - } - - /* goto next line */ - while (inptr<=inend && (c = *inptr++)!='\n') - ; - - /* allocate/append - this wont get executed unless we have *huge* headers, - and then probably only once */ - { - register int headerlen = inptr-start; - register int len = (s->outend - s->outbuf); - char *outnew; - - if (headerlen >= len) { - len = (len+headerlen)*2+1; - outnew = g_realloc(s->outbuf, len); - s->outptr = s->outptr - s->outbuf + outnew; - s->outbuf = outnew; - s->outend = outnew + len; - } - memcpy(s->outptr, start, headerlen); - s->outptr += headerlen; - } - - h(printf("outbuf[0] = %02x '%c' oubuf[1] = %02x '%c'\n", - s->outbuf[0], isprint(s->outbuf[0])?s->outbuf[0]:'.', - s->outbuf[1], isprint(s->outbuf[1])?s->outbuf[1]:'.')); - - if (s->header_start == -1) - s->header_start = (start-s->inbuf) + s->seek; - - if (c!='\n') { - s->midline = TRUE; - } else { - if (!(inptr[0] == ' ' || inptr[0] == '\t')) { - if (s->outbuf[0] == '\n' - || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) { - goto header_done; - } - - /* we always have at least _1_ char here ... */ - if (s->outptr[-1] == '\n') - s->outptr--; - s->outptr[0] = 0; - - d(printf("header %.10s at %d\n", s->outbuf, s->header_start)); - - header_raw_append_parse(&h->headers, s->outbuf, s->header_start); - if (inptr[0]=='\n' - || (inptr[0] == '\r' && inptr[1]=='\n')) { - inptr++; - goto header_done; - } - s->outptr = s->outbuf; - s->header_start = -1; - } - s->midline = FALSE; - start = inptr; - } - } - s->inptr = inptr; - } - - /* ok, we're at the end of the data, just make sure we're not missing out some small - truncated header markers */ - if (overpart) { - overpart = overpart->parent; - while (overpart) { - if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) { - s->atleast = overpart->boundarylen+2; - h(printf("Retrying next smaller part ...\n")); - goto retry; - } - overpart = overpart->parent; - } - } - - if ((s->outptr > s->outbuf) || s->inend > s->inptr) { - start = s->inptr; - inptr = s->inend; - goto header_truncated; - } - - s->atleast = atleast; - - return h; - -header_truncated: - - { - register int headerlen = inptr-start; - register int len = (s->outend - s->outbuf); - char *outnew; - - if (headerlen >= len) { - len = (len+headerlen)*2+1; - outnew = g_realloc(s->outbuf, len); - s->outptr = s->outptr - s->outbuf + outnew; - s->outbuf = outnew; - s->outend = outnew + len; - } - memcpy(s->outptr, start, headerlen); - s->outptr += headerlen; - } - if (s->outptr>s->outbuf && s->outptr[-1] == '\n') - s->outptr--; - s->outptr[0] = 0; - - if (s->header_start == -1) - s->header_start = (start-s->inbuf) + s->seek; - - if (s->outbuf[0] == '\n' - || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) { - goto header_done; - } - - header_raw_append_parse(&h->headers, s->outbuf, s->header_start); - -header_done: - part = s->parts; - - s->outptr = s->outbuf; -normal_exit: - s->inptr = inptr; - s->atleast = atleast; - s->header_start = -1; - return h; -} - -static struct _header_scan_stack * -folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length) -{ - int atleast = s->atleast; - register char *inptr, *inend; - char *start; - int len; - struct _header_scan_stack *part, *overpart = s->parts; - int already_packed = FALSE; - - /*printf("scanning content\n");*/ - - /* FIXME: this info should be cached ? */ - part = s->parts; - s->atleast = 5; - while (part) { - if (part->boundary) { - c(printf("boundary: %s\n", part->boundary)); - s->atleast = MAX(s->atleast, part->boundarylen+2); - } - part = part->parent; - } -/* s->atleast = MAX(s->atleast, 5);*/ -#if 0 - if (s->parts) - s->atleast = MAX(s->atleast, s->parts->boundarylen+2); -#endif - *lastone = FALSE; - -retry: - c(printf("atleast = %d\n", s->atleast)); - - while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */ - inptr = s->inptr; - inend = s->inend-s->atleast; - start = inptr; - - c(printf("inptr = %p, inend = %p\n", inptr, inend)); - - while (inptr<=inend) { - if (!s->midline - && (part = folder_boundary_check(s, inptr, lastone))) { - if ( (inptr-start) ) - goto content; - - goto normal_exit; - } - /* goto the next line */ - while (inptr<=inend && (*inptr++)!='\n') - ; - - s->midline = FALSE; - } - - /* *sigh* so much for the beautiful simplicity of the code so far - here we - have the snot to deal with the nasty end-cases that come from the read-ahead - buffers we use */ - /* what this does, is if we are somewhere near the end of the buffer, - force it to the front, and re-read, ensuring we bunch as much together - as possible, for the final read, without copying too much of the time */ - /* make sure we dont loop forever, but also make sure we try smaller - boundaries, if there are any, so we dont miss any. */ - /* this is not needed for the header scanner, since it copies its own - data */ - c(printf("start offset = %d atleast = %d\n", start-s->inbuf, s->atleast)); - if (start > (s->inbuf + s->atleast)) { - /* force a re-scan of this data */ - s->inptr = start; - if (already_packed) - goto smaller_boundary; - c(printf("near the end, try and bunch things up a bit first\n")); - already_packed = TRUE; - } else { - c(printf("dumping what i've got ...\n")); - /* what would be nice here, is if that we're at eof, we bunch the last - little bit in the same content, but i dont think this is easy */ - goto content_mid; - } - } - - c(printf("length read = %d\n", len)); -smaller_boundary: - - /* ok, we're at the end of the data, just make sure we're not missing out some small - truncated header markers */ - if (overpart) { - overpart = overpart->parent; - while (overpart) { - if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) { - s->atleast = overpart->boundarylen+2; - c(printf("Retrying next smaller part ...\n")); - goto retry; - } - overpart = overpart->parent; - } - } - - if (s->inend > s->inptr) { - start = s->inptr; - inptr = s->inend; - goto content; - } - - *length = 0; - s->atleast = atleast; - return NULL; - -content_mid: - s->midline = TRUE; -content: - part = s->parts; -normal_exit: - s->atleast = atleast; - s->inptr = inptr; - - *data = start; - *length = inptr-start; - -/* printf("got %scontent: %.*s", s->midline?"partial ":"", inptr-start, start);*/ - - return part; -} - - -static void -folder_scan_close(struct _header_scan_state *s) -{ - g_free(s->realbuf); - g_free(s->outbuf); - while (s->parts) - folder_pull_part(s); - if (s->fd != -1) - close(s->fd); - if (s->stream) - gtk_object_unref((GtkObject *)s->stream); - g_free(s); -} - - -static struct _header_scan_state * -folder_scan_init(void) -{ - struct _header_scan_state *s; - - s = g_malloc(sizeof(*s)); - - s->fd = -1; - s->stream = NULL; - - s->outbuf = g_malloc(1024); - s->outptr = s->outbuf; - s->outend = s->outbuf+1024; - - s->realbuf = g_malloc(SCAN_BUF + SCAN_HEAD*2); - s->inbuf = s->realbuf + SCAN_HEAD; - s->inptr = s->inbuf; - s->inend = s->inbuf; - s->atleast = 0; - - s->seek = 0; /* current character position in file of the last read block */ - s->unstep = 0; - - s->header_start = -1; - - s->start_of_from = -1; - s->start_of_headers = -1; - - s->midline = FALSE; - s->scan_from = FALSE; - - s->filters = NULL; - s->filterid = 1; - - s->parts = NULL; - - s->state = HSCAN_INITIAL; - return s; -} - -static int -folder_scan_init_with_fd(struct _header_scan_state *s, int fd) -{ - int len; - - len = read(fd, s->inbuf, SCAN_BUF); - if (len>=0) { - s->inend = s->inbuf+len; - if (s->fd != -1) - close(s->fd); - s->fd = fd; - if (s->stream) { - gtk_object_unref((GtkObject *)s->stream); - s->stream = NULL; - } - return 0; - } else { - return -1; - } -} - -static int -folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream) -{ - int len; - - len = camel_stream_read(stream, s->inbuf, SCAN_BUF); - if (len>=0) { - s->inend = s->inbuf+len; - if (s->stream) - gtk_object_unref((GtkObject *)s->stream); - s->stream = stream; - gtk_object_ref((GtkObject *)stream); - if (s->fd != -1) { - close(s->fd); - s->fd = -1; - } - return 0; - } else { - return -1; - } -} - -#define USE_FROM - -static void -folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength) -{ - struct _header_scan_stack *h, *hb; - const char *content; - const char *bound; - int type; - int state; - struct _header_content_type *ct = NULL; - struct _header_scan_filter *f; - size_t presize; - -/* printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/ - -tail_recurse: - d({ - printf("\nSCAN STACK:\n"); - printf(" '%s' :\n", states[s->state]); - hb = s->parts; - while (hb) { - printf(" '%s' : %s\n", states[hb->savestate], hb->boundary); - hb = hb->parent; - } - printf("\n"); - }); - - switch (s->state) { - - case HSCAN_INITIAL: -#ifdef USE_FROM - if (s->scan_from) { - /* FIXME: it would be nice not to have to allocate this every pass */ - h = g_malloc0(sizeof(*h)); - h->boundary = g_strdup("From "); - h->boundarylen = strlen(h->boundary); - folder_push_part(s, h); - - h = s->parts; - do { - hb = folder_scan_content(s, &state, databuffer, datalength); - } while (hb==h && *datalength>0); - - if (*datalength==0 && hb==h) { - d(printf("found 'From '\n")); - s->start_of_from = folder_tell(s); - folder_scan_skip_line(s); - h->savestate = HSCAN_INITIAL; - s->state = HSCAN_FROM; - } else { - folder_pull_part(s); - s->state = HSCAN_EOF; - } - return; - } else { - s->start_of_from = -1; - } - -#endif - case HSCAN_FROM: - s->start_of_headers = folder_tell(s); - h = folder_scan_header(s, &state); -#ifdef USE_FROM - if (s->scan_from) - h->savestate = HSCAN_FROM_END; - else -#endif - h->savestate = HSCAN_EOF; - - /* FIXME: should this check for MIME-Version: 1.0 as well? */ - - type = HSCAN_HEADER; - if ( (content = header_raw_find(&h->headers, "Content-Type", NULL)) - && (ct = header_content_type_decode(content))) { - if (!strcasecmp(ct->type, "multipart")) { - bound = header_content_type_param(ct, "boundary"); - if (bound) { - d(printf("multipart, boundary = %s\n", bound)); - h->boundarylen = strlen(bound)+2; - h->boundary = g_malloc(h->boundarylen+3); - sprintf(h->boundary, "--%s--", bound); - type = HSCAN_MULTIPART; - } else { - g_warning("Multipart with no boundary, treating as text/plain"); - } - } else if (!strcasecmp(ct->type, "message")) { - if (!strcasecmp(ct->subtype, "rfc822") - /*|| !strcasecmp(ct->subtype, "partial")*/) { - type = HSCAN_MESSAGE; - } - } - } - h->content_type = ct; - folder_push_part(s, h); - s->state = type; - return; - - case HSCAN_HEADER: - s->state = HSCAN_BODY; - - case HSCAN_BODY: - h = s->parts; - *datalength = 0; - presize = SCAN_HEAD; - f = s->filters; - - do { - hb = folder_scan_content(s, &state, databuffer, datalength); - if (*datalength>0) { - d(printf("Content raw: '%.*s'\n", *datalength, *databuffer)); - - while (f) { - camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize, - databuffer, datalength, &presize); - f = f->next; - } - return; - } - } while (hb==h && *datalength>0); - - /* check for any filter completion data */ - while (f) { - camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize, - databuffer, datalength, &presize); - f = f->next; - } - if (*datalength > 0) - return; - - s->state = HSCAN_BODY_END; - break; - - case HSCAN_MULTIPART: - h = s->parts; - do { - do { - hb = folder_scan_content(s, &state, databuffer, datalength); - if (*datalength>0) { - /* FIXME: needs a state to return this shit??? */ - d(printf("Multipart Content: '%.*s'\n", *datalength, *databuffer)); - } - } while (hb==h && *datalength>0); - if (*datalength==0 && hb==h) { - d(printf("got boundary: %s\n", hb->boundary)); - folder_scan_skip_line(s); - if (!state) { - s->state = HSCAN_FROM; - folder_scan_step(s, databuffer, datalength); - s->parts->savestate = HSCAN_MULTIPART; /* set return state for the new head part */ - return; - } - } else { - break; - } - } while (1); - - s->state = HSCAN_MULTIPART_END; - break; - - case HSCAN_MESSAGE: - s->state = HSCAN_FROM; - folder_scan_step(s, databuffer, datalength); - s->parts->savestate = HSCAN_MESSAGE_END; - break; - - case HSCAN_FROM_END: - case HSCAN_BODY_END: - case HSCAN_MULTIPART_END: - case HSCAN_MESSAGE_END: - s->state = s->parts->savestate; - folder_pull_part(s); - if (s->state & HSCAN_END) - return; - goto tail_recurse; - - case HSCAN_EOF: - return; - - default: - g_warning("Invalid state in camel-mime-parser: %d", s->state); - break; - } - - return; -} - -#ifdef STANDALONE -int main(int argc, char **argv) -{ - int fd; - struct _header_scan_state *s; - char *data; - int len; - int state; - char *name = "/tmp/evmail/Inbox"; - struct _header_scan_stack *h; - int i; - int attach = 0; - - if (argc==2) - name = argv[1]; - - printf("opening: %s", name); - - for (i=1;i<argc;i++) { - const char *encoding = NULL, *charset = NULL; - char *attachname; - - name = argv[i]; - printf("opening: %s", name); - - fd = open(name, O_RDONLY); - if (fd==-1) { - perror("Cannot open mailbox"); - exit(1); - } - s = folder_scan_init(fd); - s->scan_from = FALSE; -#if 0 - h = g_malloc0(sizeof(*h)); - h->savestate = HSCAN_EOF; - folder_push_part(s, h); -#endif - while (s->state != HSCAN_EOF) { - folder_scan_step(s, &data, &len); - printf("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]); - switch (s->state) { - case HSCAN_HEADER: - if (s->parts->content_type - && (charset = header_content_type_param(s->parts->content_type, "charset"))) { - if (strcasecmp(charset, "us-ascii")) { - folder_push_filter_charset(s, "UTF-8", charset); - } else { - charset = NULL; - } - } else { - charset = NULL; - } - - encoding = header_raw_find(&s->parts->headers, "Content-transfer-encoding"); - printf("encoding = '%s'\n", encoding); - if (encoding && !strncasecmp(encoding, " base64", 7)) { - printf("adding base64 filter\n"); - attachname = g_strdup_printf("attach.%d.%d", i, attach++); - folder_push_filter_save(s, attachname); - g_free(attachname); - folder_push_filter_mime(s, 0); - } - if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) { - printf("adding quoted-printable filter\n"); - attachname = g_strdup_printf("attach.%d.%d", i, attach++); - folder_push_filter_save(s, attachname); - g_free(attachname); - folder_push_filter_mime(s, 1); - } - - break; - case HSCAN_BODY: - break; - case HSCAN_BODY_END: - if (encoding && !strncasecmp(encoding, " base64", 7)) { - printf("removing filters\n"); - folder_filter_pull(s); - folder_filter_pull(s); - } - if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) { - printf("removing filters\n"); - folder_filter_pull(s); - folder_filter_pull(s); - } - if (charset) { - folder_filter_pull(s); - charset = NULL; - } - encoding = NULL; - break; - default: - break; - } - } - folder_scan_close(s); - close(fd); - } - return 0; -} - -#endif /* STANDALONE */ |