diff options
author | bertrand <bertrand@helixcode.com> | 2000-01-12 10:09:50 +0800 |
---|---|---|
committer | Bertrand Guiheneuf <bertrand@src.gnome.org> | 2000-01-12 10:09:50 +0800 |
commit | c6ff08354321779cfa5995ee768b6f17a32847c5 (patch) | |
tree | 86a3766cb1b518fe037664c4e144ca3e3097af39 /camel/providers/mbox/camel-mbox-parser.c | |
parent | edcdb2068858eebca3c23978c91b827a077c02a1 (diff) | |
download | gsoc2013-evolution-c6ff08354321779cfa5995ee768b6f17a32847c5.tar.gz gsoc2013-evolution-c6ff08354321779cfa5995ee768b6f17a32847c5.tar.zst gsoc2013-evolution-c6ff08354321779cfa5995ee768b6f17a32847c5.zip |
Backup of the first clean and working mbox file parser. It both find the
2000-01-11 bertrand <bertrand@helixcode.com>
Backup of the first clean and working mbox file
parser. It both find the message and pre-parse
the message, that is, retrieve some key headers,
and the first lines of the body.
svn path=/trunk/; revision=1556
Diffstat (limited to 'camel/providers/mbox/camel-mbox-parser.c')
-rw-r--r-- | camel/providers/mbox/camel-mbox-parser.c | 449 |
1 files changed, 411 insertions, 38 deletions
diff --git a/camel/providers/mbox/camel-mbox-parser.c b/camel/providers/mbox/camel-mbox-parser.c index a13032a391..e5c18e70d8 100644 --- a/camel/providers/mbox/camel-mbox-parser.c +++ b/camel/providers/mbox/camel-mbox-parser.c @@ -26,71 +26,444 @@ #include "camel-mbox-parser.h" #include "camel-log.h" #include "camel-exception.h" - #include <sys/types.h> #include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/stat.h> +#include <fcntl.h> + + + +#define MBOX_PARSER_BUF_SIZE 1000 + +#define MBOX_PARSER_FROM_KW "from:" +#define MBOX_PARSER_FROM_KW_SZ 5 + +#define MBOX_PARSER_DATE_KW "date:" +#define MBOX_PARSER_DATE_KW_SZ 5 + +#define MBOX_PARSER_X_EVOLUTION_KW "x-evolution:" +#define MBOX_PARSER_X_EVOLUTION_KW_SZ 12 + +/* the maximum lentgh of all the previous keywords */ +#define MBOX_PARSER_MAX_KW_SIZE 12 + + +#define MBOX_PARSER_SUMMARY_SIZE 100 -GList * -camel_mbox_find_message_positions (int fd, gint first_position, CamelException *ex) + + +typedef struct { + + int fd; /* file descriptor of the mbox file */ + guint real_position; /* real position in the file */ + + + gchar *message_delimiter; /* message delimiter string */ + guint message_delimiter_length; + + guint message_summary_size; /* how many characters from the begining of the + mail to put into the message summary */ + + GArray *preparsed_messages; /* array of MessagePreParsingInfo */ + CamelMboxParserMessageInfo current_message_info; /* used to store curent info */ + gboolean is_pending_message; /* is there some message information pending ? */ + + /* buffer info */ + gchar *buffer; /* temporary buffer */ + guint left_chunk_size; /* size of the left chunk in the temp buffer */ + guint last_position; /* last position that can be compared to a keyword */ + guint current_position; /* current position in the temp buffer */ + gboolean eof; /* did we read the entire file */ + + /* other */ + GString *tmp_string; /* temporary string to fill the headers in */ + + + +} CamelMboxPreParser; + + +/* clear a preparsing info structure */ +static void +clear_message_info (CamelMboxParserMessageInfo *preparsing_info) { -#define MBOX_PARSER_BUF_SIZE 1000 + preparsing_info->message_position = 0; + preparsing_info->from = NULL; + preparsing_info->date = NULL; + preparsing_info->subject = NULL; + preparsing_info->status = NULL; + preparsing_info->priority = NULL; + preparsing_info->references = NULL; +} - off_t seek_res; - GList *message_positions = NULL; - char buffer[MBOX_PARSER_BUF_SIZE]; - ssize_t buf_nb_read; - /* set the initial position */ - seek_res = lseek (fd, first_position, SEEK_SET); - if (seek_res == (off_t)-1) goto io_error; +static CamelMboxPreParser * +new_parser (int fd, + const gchar *message_delimiter) +{ + + CamelMboxPreParser *parser; - /* populate the buffer and initialize the search proc */ - buf_nb_read = read (fd, buffer, MBOX_PARSER_BUF_SIZE); + parser = g_new0 (CamelMboxPreParser, 1); - while (buf_nb_read>0) { - current_pos = 0; + parser->fd = fd; + parser->buffer = g_new (gchar, MBOX_PARSER_BUF_SIZE); + parser->current_position = 0; + parser->message_delimiter = g_strdup (message_delimiter); + parser->message_delimiter_length = strlen (message_delimiter); + parser->real_position = 0; + parser->preparsed_messages = g_array_new (FALSE, FALSE, sizeof (CamelMboxParserMessageInfo)); + parser->message_summary_size = MBOX_PARSER_SUMMARY_SIZE; + + parser->left_chunk_size = MAX (parser->message_delimiter_length, MBOX_PARSER_MAX_KW_SIZE); + parser->eof = FALSE; + + parser->tmp_string = g_string_sized_new (1000); + + return parser; +} + + + +/* ** handle exceptions here */ +/* read the first chunk of data in the buffer */ +static void +initialize_buffer (CamelMboxPreParser *parser, + guint first_position) +{ + gint seek_res; + gint buf_nb_read; + + g_assert (parser); + + /* set the search start position */ + seek_res = lseek (parser->fd, first_position, SEEK_SET); + //if (seek_res == (off_t)-1) goto io_error; + + + /* the first part of the buffer is filled with newlines, + but the next time a chunk of buffer is read, it will + be filled with the last bytes of the previous chunk. + This allows simple g_strcasecmp to test for the presence of + the keyword */ + memset (parser->buffer, '\n', parser->left_chunk_size); + do { + buf_nb_read = read (parser->fd, parser->buffer + parser->left_chunk_size, + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); + } while ((buf_nb_read == -1) && (errno == EINTR)); + /* ** check for an error here */ + + parser->last_position = buf_nb_read - parser->left_chunk_size; + if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size)) + parser->eof =TRUE; + + parser->current_position = 0; +} + + + + +/* read next data in the mbox file */ +static void +read_next_buffer_chunk (CamelMboxPreParser *parser) +{ + gint buf_nb_read; + + + g_assert (parser); + + /* read the next chunk of data in the folder file : */ + /* - first, copy the last bytes from the previous + chunk at the begining of the new one. */ + memcpy (parser->buffer, + parser->buffer + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size, + parser->left_chunk_size); + + /* - then read the next chunk on disk */ + do { + buf_nb_read = read (parser->fd, + parser->buffer + parser->left_chunk_size, + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); + } while ((buf_nb_read == -1) && (errno == EINTR)); + /* ** check for an error here */ + + + parser->last_position = buf_nb_read - parser->left_chunk_size; + if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size)) + parser->eof =TRUE; + + parser->current_position = 0; + +} + + + +/* read next char in the buffer */ +static void +goto_next_char (CamelMboxPreParser *parser) +{ + if (parser->current_position < parser->last_position) + parser->current_position++; + else + read_next_buffer_chunk (parser); + + parser->real_position++; +} + + + + +static void +new_message_detected (CamelMboxPreParser *parser) +{ + /* if we were filling a message information + save it in the message information array */ + + if (parser->is_pending_message) { + g_array_append_vals (parser->preparsed_messages, (gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); +} + + clear_message_info ( &(parser->current_message_info)); + + (parser->current_message_info).message_position = parser->real_position; + + parser->is_pending_message = TRUE; +} + + + + +/* read a header value and put it in the string pointer + to by header_content */ +static void +read_header (CamelMboxPreParser *parser, gchar **header_content) +{ + gboolean space = FALSE; + gboolean newline = FALSE; + gboolean header_end = FALSE; + gchar *buffer; + gchar c; + + + g_assert (parser); + + /* reset the header buffer string */ + parser->tmp_string = g_string_truncate (parser->tmp_string, 0); + + buffer = parser->buffer; + + while (! (parser->eof || header_end) ) { + /* read the current character */ + c = buffer[parser->current_position]; + if (space) { + if (c == ' ' && c == '\t') + goto next_char; + else + space = FALSE; + } + + if (newline) { + if (c == ' ' && c == '\t') { + + space = TRUE; + newline = FALSE; + goto next_char; + } else { + + header_end = TRUE; + continue; + } + } + + if (c == '\n') { + newline = TRUE; + goto next_char; + } + + /* feed the header content */ + parser->tmp_string = g_string_append_c (parser->tmp_string, c); - /* read the next chunk of data in the folder file */ - buf_nb_read = read (fd, buffer, MBOX_PARSER_BUF_SIZE); + next_char: /* read next char in the buffer */ + goto_next_char (parser); } + + /* copy the buffer in the preparsing information structure */ + *header_content = g_strndup (parser->tmp_string->str, parser->tmp_string->len); +} + + +/* read the begining of the message and put it in the message + summary field + +*/ +static void +read_message_begining (CamelMboxPreParser *parser, gchar **message_summary) +{ + guint nb_read = 0; + gchar *buffer; + g_assert (parser); - + /* reset the header buffer string */ + parser->tmp_string = g_string_truncate (parser->tmp_string, 0); + + buffer = parser->buffer; + /* the message should not be filled character by + character but there is no g_string_n_append + function, so for the moment, this is a lazy + implementation */ + while (! (parser->eof) && nb_read<parser->message_summary_size) { + + parser->tmp_string = g_string_append_c (parser->tmp_string, + buffer[parser->current_position]); + nb_read++; + goto_next_char (parser); + } + + *message_summary = g_strndup (parser->tmp_string->str, parser->tmp_string->len); +} + + + + + + + +GArray * +camel_mbox_parse_file (int fd, guint start_position, const gchar *message_delimiter) +{ + CamelMboxPreParser *parser; + gboolean is_parsing_a_message = FALSE; + gchar c; - /* io exception handling */ - io_error : - switch errno { - case EACCES : + + /* create the parser */ + parser = new_parser (fd, message_delimiter); + + /* initialize the temporary char buffer */ + initialize_buffer (parser, start_position); + + while (!parser->eof) { + + /* read the current character */ + c = parser->buffer[parser->current_position]; + goto_next_char (parser); - camel_exception_setv (ex, - CAMEL_EXCEPTION_FOLDER_INSUFFICIENT_PERMISSION, - "Unable to list the directory. Full Error text is : %s ", - strerror (errno)); - break; + if (c == '\n') { - case ENOENT : - case ENOTDIR : - camel_exception_setv (ex, - CAMEL_EXCEPTION_FOLDER_INVALID_PATH, - "Invalid mbox folder path. Full Error text is : %s ", - strerror (errno)); - break; + /* is the next part a message delimiter ? */ + if (g_strncasecmp (parser->buffer + parser->current_position, + parser->message_delimiter, + parser->message_delimiter_length) == 0) { + + is_parsing_a_message = TRUE; + new_message_detected (parser); + goto_next_char (parser); + continue; + } - default : - camel_exception_set (ex, - CAMEL_EXCEPTION_SYSTEM, - "Unable to delete the mbox folder."); + if (is_parsing_a_message) { + + /* is the next part a "from" header ? */ + if (g_strncasecmp (parser->buffer + parser->current_position, + MBOX_PARSER_FROM_KW, + MBOX_PARSER_FROM_KW_SZ) == 0) { + + parser->current_position += MBOX_PARSER_FROM_KW_SZ; + read_header (parser, (gchar **) ((gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + + G_STRUCT_OFFSET (CamelMboxParserMessageInfo, from))); + continue; + } + + /* is it an empty line ? */ + if (parser->buffer[parser->current_position] == '\n') { + + goto_next_char (parser); + read_message_begining (parser, (gchar **) ((gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + + G_STRUCT_OFFSET (CamelMboxParserMessageInfo, body_summary))); + is_parsing_a_message = FALSE; + } + + } } + + } + + /* if there is a pending message information put it in the array */ + if (parser->is_pending_message) { + g_array_append_vals (parser->preparsed_messages, (gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); + } + + /* free the parser */ + /* ** FIXME : FREE THE PARSER */ + + return parser->preparsed_messages; + +} + + + + + + + + + +#ifdef MBOX_PARSER_TEST +/* to build the test : + gcc -o test_parser -DMBOX_PARSER_TEST -I ../.. -I ../../.. \ + -I /usr/lib/glib/include camel-mbox-parser.c \ + -lglib ../../.libs/libcamel.a + + + */ + +int +main (int argc, char **argv) +{ + int test_file_fd; + int i; + GArray *message_positions; + CamelMboxParserMessageInfo *message_info; + + + test_file_fd = open (argv[1], O_RDONLY); + message_positions = camel_mbox_parse_file (test_file_fd, + 0, + "From "); + + printf ("Found %d messages \n", message_positions->len); + +#if 0 + for (i=0; i<message_positions->len; i++) { + //message_info = g_array_index(message_positions, CamelMboxParserMessageInfo, i); + message_info = ((CamelMboxParserMessageInfo *)(message_positions->data)) + i; + printf ("\n\n** Message %d : \n", i); + printf ("\t From: %s\n", message_info->from) ; + printf ("\t Summary: %s\n", message_info->body_summary) ; + } +#endif } + + + + +#endif /* MBOX_PARSER_TEST */ |