aboutsummaryrefslogtreecommitdiffstats
path: root/camel/providers/mbox/camel-mbox-parser.c
diff options
context:
space:
mode:
authorbertrand <bertrand@helixcode.com>2000-01-12 10:09:50 +0800
committerBertrand Guiheneuf <bertrand@src.gnome.org>2000-01-12 10:09:50 +0800
commitc6ff08354321779cfa5995ee768b6f17a32847c5 (patch)
tree86a3766cb1b518fe037664c4e144ca3e3097af39 /camel/providers/mbox/camel-mbox-parser.c
parentedcdb2068858eebca3c23978c91b827a077c02a1 (diff)
downloadgsoc2013-evolution-c6ff08354321779cfa5995ee768b6f17a32847c5.tar.gz
gsoc2013-evolution-c6ff08354321779cfa5995ee768b6f17a32847c5.tar.zst
gsoc2013-evolution-c6ff08354321779cfa5995ee768b6f17a32847c5.zip
Backup of the first clean and working mbox file parser. It both find the
2000-01-11 bertrand <bertrand@helixcode.com> Backup of the first clean and working mbox file parser. It both find the message and pre-parse the message, that is, retrieve some key headers, and the first lines of the body. svn path=/trunk/; revision=1556
Diffstat (limited to 'camel/providers/mbox/camel-mbox-parser.c')
-rw-r--r--camel/providers/mbox/camel-mbox-parser.c449
1 files changed, 411 insertions, 38 deletions
diff --git a/camel/providers/mbox/camel-mbox-parser.c b/camel/providers/mbox/camel-mbox-parser.c
index a13032a391..e5c18e70d8 100644
--- a/camel/providers/mbox/camel-mbox-parser.c
+++ b/camel/providers/mbox/camel-mbox-parser.c
@@ -26,71 +26,444 @@
#include "camel-mbox-parser.h"
#include "camel-log.h"
#include "camel-exception.h"
-
#include <sys/types.h>
#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+
+
+#define MBOX_PARSER_BUF_SIZE 1000
+
+#define MBOX_PARSER_FROM_KW "from:"
+#define MBOX_PARSER_FROM_KW_SZ 5
+
+#define MBOX_PARSER_DATE_KW "date:"
+#define MBOX_PARSER_DATE_KW_SZ 5
+
+#define MBOX_PARSER_X_EVOLUTION_KW "x-evolution:"
+#define MBOX_PARSER_X_EVOLUTION_KW_SZ 12
+
+/* the maximum lentgh of all the previous keywords */
+#define MBOX_PARSER_MAX_KW_SIZE 12
+
+
+#define MBOX_PARSER_SUMMARY_SIZE 100
-GList *
-camel_mbox_find_message_positions (int fd, gint first_position, CamelException *ex)
+
+
+typedef struct {
+
+ int fd; /* file descriptor of the mbox file */
+ guint real_position; /* real position in the file */
+
+
+ gchar *message_delimiter; /* message delimiter string */
+ guint message_delimiter_length;
+
+ guint message_summary_size; /* how many characters from the begining of the
+ mail to put into the message summary */
+
+ GArray *preparsed_messages; /* array of MessagePreParsingInfo */
+ CamelMboxParserMessageInfo current_message_info; /* used to store curent info */
+ gboolean is_pending_message; /* is there some message information pending ? */
+
+ /* buffer info */
+ gchar *buffer; /* temporary buffer */
+ guint left_chunk_size; /* size of the left chunk in the temp buffer */
+ guint last_position; /* last position that can be compared to a keyword */
+ guint current_position; /* current position in the temp buffer */
+ gboolean eof; /* did we read the entire file */
+
+ /* other */
+ GString *tmp_string; /* temporary string to fill the headers in */
+
+
+
+} CamelMboxPreParser;
+
+
+/* clear a preparsing info structure */
+static void
+clear_message_info (CamelMboxParserMessageInfo *preparsing_info)
{
-#define MBOX_PARSER_BUF_SIZE 1000
+ preparsing_info->message_position = 0;
+ preparsing_info->from = NULL;
+ preparsing_info->date = NULL;
+ preparsing_info->subject = NULL;
+ preparsing_info->status = NULL;
+ preparsing_info->priority = NULL;
+ preparsing_info->references = NULL;
+}
- off_t seek_res;
- GList *message_positions = NULL;
- char buffer[MBOX_PARSER_BUF_SIZE];
- ssize_t buf_nb_read;
- /* set the initial position */
- seek_res = lseek (fd, first_position, SEEK_SET);
- if (seek_res == (off_t)-1) goto io_error;
+static CamelMboxPreParser *
+new_parser (int fd,
+ const gchar *message_delimiter)
+{
+
+ CamelMboxPreParser *parser;
- /* populate the buffer and initialize the search proc */
- buf_nb_read = read (fd, buffer, MBOX_PARSER_BUF_SIZE);
+ parser = g_new0 (CamelMboxPreParser, 1);
- while (buf_nb_read>0) {
- current_pos = 0;
+ parser->fd = fd;
+ parser->buffer = g_new (gchar, MBOX_PARSER_BUF_SIZE);
+ parser->current_position = 0;
+ parser->message_delimiter = g_strdup (message_delimiter);
+ parser->message_delimiter_length = strlen (message_delimiter);
+ parser->real_position = 0;
+ parser->preparsed_messages = g_array_new (FALSE, FALSE, sizeof (CamelMboxParserMessageInfo));
+ parser->message_summary_size = MBOX_PARSER_SUMMARY_SIZE;
+
+ parser->left_chunk_size = MAX (parser->message_delimiter_length, MBOX_PARSER_MAX_KW_SIZE);
+ parser->eof = FALSE;
+
+ parser->tmp_string = g_string_sized_new (1000);
+
+ return parser;
+}
+
+
+
+/* ** handle exceptions here */
+/* read the first chunk of data in the buffer */
+static void
+initialize_buffer (CamelMboxPreParser *parser,
+ guint first_position)
+{
+ gint seek_res;
+ gint buf_nb_read;
+
+ g_assert (parser);
+
+ /* set the search start position */
+ seek_res = lseek (parser->fd, first_position, SEEK_SET);
+ //if (seek_res == (off_t)-1) goto io_error;
+
+
+ /* the first part of the buffer is filled with newlines,
+ but the next time a chunk of buffer is read, it will
+ be filled with the last bytes of the previous chunk.
+ This allows simple g_strcasecmp to test for the presence of
+ the keyword */
+ memset (parser->buffer, '\n', parser->left_chunk_size);
+ do {
+ buf_nb_read = read (parser->fd, parser->buffer + parser->left_chunk_size,
+ MBOX_PARSER_BUF_SIZE - parser->left_chunk_size);
+ } while ((buf_nb_read == -1) && (errno == EINTR));
+ /* ** check for an error here */
+
+ parser->last_position = buf_nb_read - parser->left_chunk_size;
+ if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size))
+ parser->eof =TRUE;
+
+ parser->current_position = 0;
+}
+
+
+
+
+/* read next data in the mbox file */
+static void
+read_next_buffer_chunk (CamelMboxPreParser *parser)
+{
+ gint buf_nb_read;
+
+
+ g_assert (parser);
+
+ /* read the next chunk of data in the folder file : */
+ /* - first, copy the last bytes from the previous
+ chunk at the begining of the new one. */
+ memcpy (parser->buffer,
+ parser->buffer + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size,
+ parser->left_chunk_size);
+
+ /* - then read the next chunk on disk */
+ do {
+ buf_nb_read = read (parser->fd,
+ parser->buffer + parser->left_chunk_size,
+ MBOX_PARSER_BUF_SIZE - parser->left_chunk_size);
+ } while ((buf_nb_read == -1) && (errno == EINTR));
+ /* ** check for an error here */
+
+
+ parser->last_position = buf_nb_read - parser->left_chunk_size;
+ if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size))
+ parser->eof =TRUE;
+
+ parser->current_position = 0;
+
+}
+
+
+
+/* read next char in the buffer */
+static void
+goto_next_char (CamelMboxPreParser *parser)
+{
+ if (parser->current_position < parser->last_position)
+ parser->current_position++;
+ else
+ read_next_buffer_chunk (parser);
+
+ parser->real_position++;
+}
+
+
+
+
+static void
+new_message_detected (CamelMboxPreParser *parser)
+{
+ /* if we were filling a message information
+ save it in the message information array */
+
+ if (parser->is_pending_message) {
+ g_array_append_vals (parser->preparsed_messages, (gchar *)parser +
+ G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1);
+}
+
+ clear_message_info ( &(parser->current_message_info));
+
+ (parser->current_message_info).message_position = parser->real_position;
+
+ parser->is_pending_message = TRUE;
+}
+
+
+
+
+/* read a header value and put it in the string pointer
+ to by header_content */
+static void
+read_header (CamelMboxPreParser *parser, gchar **header_content)
+{
+ gboolean space = FALSE;
+ gboolean newline = FALSE;
+ gboolean header_end = FALSE;
+ gchar *buffer;
+ gchar c;
+
+
+ g_assert (parser);
+
+ /* reset the header buffer string */
+ parser->tmp_string = g_string_truncate (parser->tmp_string, 0);
+
+ buffer = parser->buffer;
+
+ while (! (parser->eof || header_end) ) {
+ /* read the current character */
+ c = buffer[parser->current_position];
+ if (space) {
+ if (c == ' ' && c == '\t')
+ goto next_char;
+ else
+ space = FALSE;
+ }
+
+ if (newline) {
+ if (c == ' ' && c == '\t') {
+
+ space = TRUE;
+ newline = FALSE;
+ goto next_char;
+ } else {
+
+ header_end = TRUE;
+ continue;
+ }
+ }
+
+ if (c == '\n') {
+ newline = TRUE;
+ goto next_char;
+ }
+
+ /* feed the header content */
+ parser->tmp_string = g_string_append_c (parser->tmp_string, c);
- /* read the next chunk of data in the folder file */
- buf_nb_read = read (fd, buffer, MBOX_PARSER_BUF_SIZE);
+ next_char: /* read next char in the buffer */
+ goto_next_char (parser);
}
+
+ /* copy the buffer in the preparsing information structure */
+ *header_content = g_strndup (parser->tmp_string->str, parser->tmp_string->len);
+}
+
+
+/* read the begining of the message and put it in the message
+ summary field
+
+*/
+static void
+read_message_begining (CamelMboxPreParser *parser, gchar **message_summary)
+{
+ guint nb_read = 0;
+ gchar *buffer;
+ g_assert (parser);
-
+ /* reset the header buffer string */
+ parser->tmp_string = g_string_truncate (parser->tmp_string, 0);
+
+ buffer = parser->buffer;
+ /* the message should not be filled character by
+ character but there is no g_string_n_append
+ function, so for the moment, this is a lazy
+ implementation */
+ while (! (parser->eof) && nb_read<parser->message_summary_size) {
+
+ parser->tmp_string = g_string_append_c (parser->tmp_string,
+ buffer[parser->current_position]);
+ nb_read++;
+ goto_next_char (parser);
+ }
+
+ *message_summary = g_strndup (parser->tmp_string->str, parser->tmp_string->len);
+}
+
+
+
+
+
+
+
+GArray *
+camel_mbox_parse_file (int fd, guint start_position, const gchar *message_delimiter)
+{
+ CamelMboxPreParser *parser;
+ gboolean is_parsing_a_message = FALSE;
+ gchar c;
- /* io exception handling */
- io_error :
- switch errno {
- case EACCES :
+
+ /* create the parser */
+ parser = new_parser (fd, message_delimiter);
+
+ /* initialize the temporary char buffer */
+ initialize_buffer (parser, start_position);
+
+ while (!parser->eof) {
+
+ /* read the current character */
+ c = parser->buffer[parser->current_position];
+ goto_next_char (parser);
- camel_exception_setv (ex,
- CAMEL_EXCEPTION_FOLDER_INSUFFICIENT_PERMISSION,
- "Unable to list the directory. Full Error text is : %s ",
- strerror (errno));
- break;
+ if (c == '\n') {
- case ENOENT :
- case ENOTDIR :
- camel_exception_setv (ex,
- CAMEL_EXCEPTION_FOLDER_INVALID_PATH,
- "Invalid mbox folder path. Full Error text is : %s ",
- strerror (errno));
- break;
+ /* is the next part a message delimiter ? */
+ if (g_strncasecmp (parser->buffer + parser->current_position,
+ parser->message_delimiter,
+ parser->message_delimiter_length) == 0) {
+
+ is_parsing_a_message = TRUE;
+ new_message_detected (parser);
+ goto_next_char (parser);
+ continue;
+ }
- default :
- camel_exception_set (ex,
- CAMEL_EXCEPTION_SYSTEM,
- "Unable to delete the mbox folder.");
+ if (is_parsing_a_message) {
+
+ /* is the next part a "from" header ? */
+ if (g_strncasecmp (parser->buffer + parser->current_position,
+ MBOX_PARSER_FROM_KW,
+ MBOX_PARSER_FROM_KW_SZ) == 0) {
+
+ parser->current_position += MBOX_PARSER_FROM_KW_SZ;
+ read_header (parser, (gchar **) ((gchar *)parser +
+ G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) +
+ G_STRUCT_OFFSET (CamelMboxParserMessageInfo, from)));
+ continue;
+ }
+
+ /* is it an empty line ? */
+ if (parser->buffer[parser->current_position] == '\n') {
+
+ goto_next_char (parser);
+ read_message_begining (parser, (gchar **) ((gchar *)parser +
+ G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) +
+ G_STRUCT_OFFSET (CamelMboxParserMessageInfo, body_summary)));
+ is_parsing_a_message = FALSE;
+ }
+
+ }
}
+
+ }
+
+ /* if there is a pending message information put it in the array */
+ if (parser->is_pending_message) {
+ g_array_append_vals (parser->preparsed_messages, (gchar *)parser +
+ G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1);
+ }
+
+ /* free the parser */
+ /* ** FIXME : FREE THE PARSER */
+
+ return parser->preparsed_messages;
+
+}
+
+
+
+
+
+
+
+
+
+#ifdef MBOX_PARSER_TEST
+/* to build the test :
+ gcc -o test_parser -DMBOX_PARSER_TEST -I ../.. -I ../../.. \
+ -I /usr/lib/glib/include camel-mbox-parser.c \
+ -lglib ../../.libs/libcamel.a
+
+
+ */
+
+int
+main (int argc, char **argv)
+{
+ int test_file_fd;
+ int i;
+ GArray *message_positions;
+ CamelMboxParserMessageInfo *message_info;
+
+
+ test_file_fd = open (argv[1], O_RDONLY);
+ message_positions = camel_mbox_parse_file (test_file_fd,
+ 0,
+ "From ");
+
+ printf ("Found %d messages \n", message_positions->len);
+
+#if 0
+ for (i=0; i<message_positions->len; i++) {
+ //message_info = g_array_index(message_positions, CamelMboxParserMessageInfo, i);
+ message_info = ((CamelMboxParserMessageInfo *)(message_positions->data)) + i;
+ printf ("\n\n** Message %d : \n", i);
+ printf ("\t From: %s\n", message_info->from) ;
+ printf ("\t Summary: %s\n", message_info->body_summary) ;
+ }
+#endif
}
+
+
+
+
+#endif /* MBOX_PARSER_TEST */