diff options
Diffstat (limited to 'camel/providers/mbox/camel-mbox-search.c')
-rw-r--r-- | camel/providers/mbox/camel-mbox-search.c | 857 |
1 files changed, 857 insertions, 0 deletions
diff --git a/camel/providers/mbox/camel-mbox-search.c b/camel/providers/mbox/camel-mbox-search.c new file mode 100644 index 0000000000..b46c046b6d --- /dev/null +++ b/camel/providers/mbox/camel-mbox-search.c @@ -0,0 +1,857 @@ +/* + * Copyright 2000 HelixCode (http://www.helixcode.com). + * + * Author : + * Michael Zucchi <notzed@helixcode.com> + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ + +#include <glib.h> +#include <stdio.h> +#include <time.h> +#include <string.h> + +#include <camel/gmime-utils.h> +#include <camel/camel-log.h> +#include "camel/camel-mime-message.h" +#include "camel/camel-mime-part.h" +#include "camel/camel-stream.h" +#include "camel/camel-stream-fs.h" +#include "camel/camel.h" +#include "camel-mbox-folder.h" + +#define HAVE_IBEX +#ifdef HAVE_IBEX +#include "ibex.h" +#endif + +#define p(x) /* parse debug */ +#define r(x) /* run debug */ +#define d(x) /* general debug */ + + +/* + + This is not yet complete. + + The following s-exp's are supported: + + list = (and list*) + perform an intersection of a number of lists, and return that. + + bool = (and bool*) + perform a boolean AND of boolean values. + + list = (or list*) + perform a union of a number of lists, returning the new list. + + bool = (or bool*) + perform a boolean OR of boolean values. + + Comparison operators: + + bool = (lt int int) + bool = (gt int int) + bool = (eq int int) + + bool = (lt string string) + bool = (gt string string) + bool = (eq string string) + Perform a comparision of 2 integers, or 2 string values. + + Matching operators: + + list = (contains string) + Returns a list of all messages containing the string in their body. + + list = (match-all bool-expr) + Returns a list of all messages for which the bool expression is true. + The bool-expr is evaluated for each message in turn. + It is more efficient not to perform body-content comparisons inside a + match-all operator. + + int = (date-sent) + Returns a time_t of the date-sent of the message. + + bool = (header-contains string1 string2) + Returns true if the current message (inside a match-all operator) + has a header 'string1', which contains 'string2' +*/ + +static GScannerConfig scanner_config = +{ + ( + " \t\r\n" + ) /* cset_skip_characters */, + ( + G_CSET_a_2_z + "_" + G_CSET_A_2_Z + ) /* cset_identifier_first */, + ( + G_CSET_a_2_z + "_0123456789-" + G_CSET_A_2_Z + G_CSET_LATINS + G_CSET_LATINC + ) /* cset_identifier_nth */, + ( "#\n" ) /* cpair_comment_single */, + + FALSE /* case_sensitive */, + + TRUE /* skip_comment_multi */, + TRUE /* skip_comment_single */, + TRUE /* scan_comment_multi */, + TRUE /* scan_identifier */, + FALSE /* scan_identifier_1char */, + FALSE /* scan_identifier_NULL */, + TRUE /* scan_symbols */, + FALSE /* scan_binary */, + TRUE /* scan_octal */, + TRUE /* scan_float */, + TRUE /* scan_hex */, + FALSE /* scan_hex_dollar */, + TRUE /* scan_string_sq */, + TRUE /* scan_string_dq */, + TRUE /* numbers_2_int */, + FALSE /* int_2_float */, + FALSE /* identifier_2_string */, + TRUE /* char_2_token */, + FALSE /* symbol_2_token */, + FALSE /* scope_0_fallback */, +}; + + +enum _searchtermtype_t { + SEARCH_AND, + SEARCH_OR, + SEARCH_LT, + SEARCH_GT, + SEARCH_EQ, + SEARCH_CONTAINS, + SEARCH_DATESENT, + SEARCH_STRING, + SEARCH_INT, + SEARCH_FUNC, +}; + +struct _searchterm { + enum _searchtermtype_t type; + union { + char *string; + int number; + struct { + struct _searchterm_symbol *sym; + struct _searchterm **terms; + int termcount; + } func; + } value; +}; + +struct _searchcontext { + int whatever; + + CamelFolder *folder; + +#ifdef HAVE_IBEX + ibex *index; /* index of content for this folder */ +#endif + + CamelFolderSummary *summary; + const GArray *message_info; + + CamelMessageInfo *message_current; /* when performing a (match operation */ +}; + +enum _searchresulttype_t { + RESULT_ARRAY_PTR=0, + RESULT_INT, + RESULT_STRING, + RESULT_BOOL, + RESULT_UNDEFINED +}; + +struct _searchresult { + enum _searchresulttype_t type; + union { + GPtrArray *ptrarray; + int number; + char *string; + int bool; + } value; +}; + +/* function callbacks */ +static struct _searchresult *search_contains(struct _searchcontext *ctx, struct _searchterm *t); +static struct _searchresult *search_matches(struct _searchcontext *ctx, struct _searchterm *t); +static struct _searchresult *search_date_sent(struct _searchcontext *ctx, struct _searchterm *t); +static struct _searchresult *header_contains(struct _searchcontext *ctx, struct _searchterm *t); + +struct _searchterm_symbol { + char *name; + int type; + int argtype; + struct _searchresult * (*func)(struct _searchcontext *ctx, struct _searchterm *t); +} symbols[] = { + { "and", SEARCH_AND, 0, NULL }, + { "or", SEARCH_OR, 0, NULL }, + { "lt", SEARCH_LT, 1, NULL }, + { "gt", SEARCH_GT, 1, NULL }, + { "eq", SEARCH_EQ, 1, NULL }, + { "contains", SEARCH_FUNC, 1, search_contains }, + { "match-all", SEARCH_FUNC, 1, search_matches }, + { "date-sent", SEARCH_FUNC, 1, search_date_sent }, + { "header-contains", SEARCH_FUNC, 1, header_contains }, +}; + +static struct _searchterm * parse_list(GScanner *gs, int gotbrace); +static struct _searchterm * parse_value(GScanner *gs); + +static struct _searchresult *term_eval(struct _searchcontext *ctx, struct _searchterm *t); +static void parse_dump_term(struct _searchterm *t, int depth); + +/* can you tell, i dont like glib? */ +struct _glib_sux_donkeys { + int count; + GPtrArray *uids; +}; + + +/* ok, store any values that are in all sets */ +static void +g_lib_sux_htand(char *key, int value, struct _glib_sux_donkeys *fuckup) +{ + if (value == fuckup->count) { + g_ptr_array_add(fuckup->uids, key); + } +} + +/* or, store all unique values */ +static void +g_lib_sux_htor(char *key, int value, struct _glib_sux_donkeys *fuckup) +{ + g_ptr_array_add(fuckup->uids, key); +} + +static struct _searchresult * +result_new(int type) +{ + struct _searchresult *r = g_malloc0(sizeof(*r)); + r->type = type; + return r; +} + +static void +result_free(struct _searchresult *t) +{ + switch(t->type) { + case RESULT_ARRAY_PTR: + g_ptr_array_free(t->value.ptrarray, TRUE); + break; + case RESULT_BOOL: + case RESULT_INT: + break; + case RESULT_STRING: + g_free(t->value.string); + break; + case RESULT_UNDEFINED: + break; + } + g_free(t); +} + +static struct _searchresult *search_contains(struct _searchcontext *ctx, struct _searchterm *t) +{ + struct _searchresult *r, *r1; + + r = result_new(RESULT_UNDEFINED); + + if (t->value.func.termcount>0) { + if (t->value.func.termcount!=1) { + printf("warning, only looking for first string in contains clause\n"); + } + r1 = term_eval(ctx, t->value.func.terms[0]); + if (r1->type == RESULT_STRING) { + if (ctx->message_current) { + int truth = FALSE; +#ifdef HAVE_IBEX + int i; + GPtrArray *array; + + if (ctx->index) { + array = ibex_find(ctx->index, r1->value.string); + + for (i=0;i<array->len;i++) { + if (!strcmp(g_ptr_array_index(array, i), ctx->message_current->uid)) { + truth = TRUE; + break; + } + } + g_ptr_array_free(array, TRUE); + } +#endif + r->type = RESULT_BOOL; + r->value.bool = truth; + } else { + r->type = RESULT_ARRAY_PTR; +#ifdef HAVE_IBEX + if (ctx->index) { + /* blah, this should probably copy the index strings? */ + r->value.ptrarray = ibex_find(ctx->index, r1->value.string); + } else { + r->value.ptrarray = g_ptr_array_new(); + } +#endif + } + } else { + printf("you can't search for a contents of a non-string, fool\n"); + } + result_free(r1); + } + return r; +} + +/* run a sub-tree of commands which match on header fields etc */ +static struct _searchresult *search_matches(struct _searchcontext *ctx, struct _searchterm *t) +{ + int i; + struct _searchresult *r, *r1; + + if (t->value.func.termcount == 1) { + r = result_new(RESULT_ARRAY_PTR); + r->value.ptrarray = g_ptr_array_new(); + + for (i=0;i<ctx->message_info->len;i++) { + ctx->message_current = &g_array_index(ctx->message_info, CamelMessageInfo, i); + r1 = term_eval(ctx, t->value.func.terms[0]); + if (r1->type == RESULT_BOOL) { + if (r1->value.bool) { + r(printf("adding message %s\n", ctx->message_current->uid)); + g_ptr_array_add(r->value.ptrarray, ctx->message_current->uid); + } + } else { + printf("invalid syntax, matches require a single bool result\n"); + } + result_free(r1); + } + ctx->message_current = NULL; + } else { + r = result_new(RESULT_UNDEFINED); + printf("invalid syntax, matches only allows a single bool arg\n"); + } + return r; +} + +/* these variable-getting things could be put into 1 function */ +static struct _searchresult *search_date_sent(struct _searchcontext *ctx, struct _searchterm *t) +{ + struct _searchresult *r; + + if (ctx->message_current) { + r = result_new(RESULT_INT); + r->value.number = time(0); + /* r->value.number = ctx->current_message->date_sent;*/ + } else { + r = result_new(RESULT_UNDEFINED); + } + return r; +} + +/* header contains - can only be used inside a match-all construct */ +/* all headers should be inside a lookup table, so this can search + all header types */ +static struct _searchresult *header_contains(struct _searchcontext *ctx, struct _searchterm *t) +{ + struct _searchresult *r; + + r(printf("executing header-contains\n")); + + /* are we inside a match-all? */ + if (ctx->message_current + && t->value.func.termcount == 2) { + char *header, *substring; + int truth = FALSE; + struct _searchresult *r1, *r2; + + r1 = term_eval(ctx, t->value.func.terms[0]); + r2 = term_eval(ctx, t->value.func.terms[1]); + + if (r1->type == RESULT_STRING + && r2->type == RESULT_STRING) { + + header = r1->value.string; + substring = r2->value.string; + if (!strcasecmp(header, "subject")) { + r(printf("comparing subject: %s\n", ctx->message_current->subject)); + if (ctx->message_current->subject) + truth = (strstr(ctx->message_current->subject, substring)) != NULL; + else + printf("Warning: no subject line in message?\n"); + } + r(printf("header-contains %s %s = %s\n", header, substring, truth?"TRUE":"FALSE")); + } + + result_free(r1); + result_free(r2); + + r = result_new(RESULT_BOOL); + r->value.number = truth; + } else { + r = result_new(RESULT_UNDEFINED); + } + return r; +} + + +static struct _searchresult * +term_eval(struct _searchcontext *ctx, struct _searchterm *t) +{ + struct _searchresult *r, *r1, *r2; + int i; + + r(printf("eval term :\n")); + r(parse_dump_term(t, 0)); + + r = g_malloc0(sizeof(*r)); + r->type = RESULT_UNDEFINED; + + switch (t->type) { + case SEARCH_AND: { + GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal); + struct _glib_sux_donkeys lambdafoo; + int type=-1; + int bool = TRUE; + + r(printf("( and\n")); + + for (i=0;bool && i<t->value.func.termcount;i++) { + r1 = term_eval(ctx, t->value.func.terms[i]); + if (type == -1) + type = r1->type; + if (type != r1->type) { + printf("invalid types in and operation, all types must be the same\n"); + } else if ( r1->type == RESULT_ARRAY_PTR ) { + char **a1; + int l1, j; + + a1 = (char **)r1->value.ptrarray->pdata; + l1 = r1->value.ptrarray->len; + for (j=0;i<l1;j++) { + int n; + n = (int)g_hash_table_lookup(ht, a1[i]); + g_hash_table_insert(ht, a1[i], (void *)n+1); + } + } else if ( r1->type == RESULT_BOOL ) { + bool &= r1->value.bool; + } + result_free(r1); + } + + if (type == RESULT_ARRAY_PTR) { + lambdafoo.count = t->value.func.termcount; + lambdafoo.uids = g_ptr_array_new(); + g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htand, &lambdafoo); + r->type = RESULT_ARRAY_PTR; + r->value.ptrarray = lambdafoo.uids; + } else if (type == RESULT_BOOL) { + r->type = RESULT_BOOL; + r->value.bool = bool; + } + + g_hash_table_destroy(ht); + + break; } + case SEARCH_OR: { + GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal); + struct _glib_sux_donkeys lambdafoo; + int type = -1; + int bool = FALSE; + + r(printf("(or \n")); + + for (i=0;!bool && i<t->value.func.termcount;i++) { + r1 = term_eval(ctx, t->value.func.terms[i]); + if (type == -1) + type = r1->type; + if (r1->type != type) { + printf("wrong types in or operation\n"); + } else if (r1->type == RESULT_ARRAY_PTR) { + char **a1; + int l1, j; + + a1 = (char **)r1->value.ptrarray->pdata; + l1 = r1->value.ptrarray->len; + for (j=0;i<l1;j++) { + g_hash_table_insert(ht, a1[j], (void *)1); + } + } else if (r1->type == RESULT_BOOL) { + bool |= r1->value.bool; + } + result_free(r1); + } + + if (type == RESULT_ARRAY_PTR) { + lambdafoo.count = t->value.func.termcount; + lambdafoo.uids = g_ptr_array_new(); + g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo); + r->type = RESULT_ARRAY_PTR; + r->value.ptrarray = lambdafoo.uids; + } else if (type == RESULT_BOOL) { + r->type = RESULT_BOOL; + r->value.bool = bool; + } + g_hash_table_destroy(ht); + + break; } + case SEARCH_LT: + r(printf("(lt \n")); + if (t->value.func.termcount == 2) { + r1 = term_eval(ctx, t->value.func.terms[0]); + r2 = term_eval(ctx, t->value.func.terms[1]); + if (r1->type != r2->type) { + printf("error, invalid types in compare\n"); + } else if (r1->type == RESULT_INT) { + r->type = RESULT_BOOL; + r->value.bool = r1->value.number < r2->value.number; + } else if (r1->type == RESULT_STRING) { + r->type = RESULT_BOOL; + r->value.bool = strcmp(r1->value.string, r2->value.string) < 0; + } + } + break; + case SEARCH_GT: + r(printf("(gt \n")); + if (t->value.func.termcount == 2) { + r1 = term_eval(ctx, t->value.func.terms[0]); + r2 = term_eval(ctx, t->value.func.terms[1]); + if (r1->type != r2->type) { + printf("error, invalid types in compare\n"); + } else if (r1->type == RESULT_INT) { + r->type = RESULT_BOOL; + r->value.bool = r1->value.number > r2->value.number; + } else if (r1->type == RESULT_STRING) { + r->type = RESULT_BOOL; + r->value.bool = strcmp(r1->value.string, r2->value.string) > 0; + } + } + break; + case SEARCH_STRING: + r(printf(" (string \"%s\")\n", t->value.string)); + r->type = RESULT_STRING; + /* erk, this shoul;dn't need to strdup this ... */ + r->value.string = g_strdup(t->value.string); + break; + case SEARCH_INT: + r(printf(" (int %d)\n", t->value.number)); + r->type = RESULT_INT; + r->value.number = t->value.number; + break; + case SEARCH_FUNC: + g_free(r); /* <---- FIXME: ICK !! */ + r(printf("function '%s'\n", t->value.func.sym->name)); + return t->value.func.sym->func(ctx, t); + default: + printf("Warning: Unknown type encountered in parse tree: %d\n", t->type); + r->type = RESULT_UNDEFINED; + } + + return r; +} + + +static void +parse_dump_term(struct _searchterm *t, int depth) +{ + int dumpvals = FALSE; + int i; + + if (t==NULL) { + printf("null term??\n"); + return; + } + + for (i=0;i<depth;i++) + printf(" "); + + switch (t->type) { + case SEARCH_AND: + printf("(and \n"); + dumpvals = 1; + break; + case SEARCH_OR: + printf("(or \n"); + dumpvals = 1; + break; + case SEARCH_LT: + printf("(lt \n"); + dumpvals = 1; + break; + case SEARCH_GT: + printf("(gt \n"); + dumpvals = 1; + break; + case SEARCH_STRING: + printf(" \"%s\"", t->value.string); + break; + case SEARCH_INT: + printf(" %d", t->value.number); + break; + case SEARCH_FUNC: + printf(" (function %s", t->value.func.sym->name); + dumpvals = 1; + break; + default: + printf("unknown type: %d\n", t->type); + } + + if (dumpvals) { + /*printf(" [%d] ", t->value.func.termcount);*/ + for (i=0;i<t->value.func.termcount;i++) { + parse_dump_term(t->value.func.terms[i], depth+1); + } + for (i=0;i<depth;i++) + printf(" "); + printf(")\n"); + } + printf("\n"); +} + +/* + PARSER +*/ + +static struct _searchterm * +parse_new_term(int type) +{ + struct _searchterm *s = g_malloc0(sizeof(*s)); + s->type = type; + return s; +} + +static void +parse_term_free(struct _searchterm *t) +{ + int i; + + if (t==NULL) { + return; + } + + switch (t->type) { + case SEARCH_AND: + case SEARCH_OR: + case SEARCH_LT: + case SEARCH_GT: + case SEARCH_FUNC: + for (i=0;i<t->value.func.termcount;i++) { + parse_term_free(t->value.func.terms[i]); + } + g_free(t->value.func.terms); + break; + case SEARCH_STRING: + g_free(t->value.string); + break; + case SEARCH_INT: + break; + default: + printf("parse_term_free: unknown type: %d\n", t->type); + } + g_free(t); +} + +static struct _searchterm ** +parse_lists(GScanner *gs, int *len) +{ + int token; + struct _searchterm **terms; + int i=0; + + p(printf("parsing lists\n")); + + terms = g_malloc0(20*sizeof(*terms)); + + while ( (token = g_scanner_peek_next_token(gs)) != G_TOKEN_EOF + && token != ')') { + terms[i]=parse_list(gs, FALSE); + i++; + } + + if (len) + *len = i; + + p(printf("found %d subterms\n", i)); + + p(printf("done parsing lists, token= %d %c\n", token, token)); + return terms; +} + +static struct _searchterm ** +parse_values(GScanner *gs, int *len) +{ + int token; + struct _searchterm **terms; + int i=0; + + p(printf("parsing values\n")); + + terms = g_malloc0(20*sizeof(*terms)); + + while ( (token = g_scanner_peek_next_token(gs)) != G_TOKEN_EOF + && token != ')') { + terms[i]=parse_value(gs); + i++; + } + + p(printf("found %d subterms\n", i)); + *len = i; + + p(printf("dont parsing values\n")); + return terms; +} + +static struct _searchterm * +parse_value(GScanner *gs) +{ + int token; + struct _searchterm *t = NULL; + + p(printf("parsing value\n")); + + token = g_scanner_get_next_token(gs); + switch(token) { + case G_TOKEN_LEFT_PAREN: + p(printf("got brace, its a list!\n")); + return parse_list(gs, TRUE); + case G_TOKEN_STRING: + p(printf("got string\n")); + t = parse_new_term(SEARCH_STRING); + t->value.string = g_strdup(g_scanner_cur_value(gs).v_string); + break; + case G_TOKEN_INT: + t = parse_new_term(SEARCH_INT); + t->value.number = g_scanner_cur_value(gs).v_int; + p(printf("got int\n")); + break; + default: + printf("Innvalid token trying to parse a list of values\n"); + } + p(printf("done parsing value\n")); + return t; +} + +/* FIXME: this needs some robustification */ +static struct _searchterm * +parse_list(GScanner *gs, int gotbrace) +{ + int token; + struct _searchterm *t = NULL; + + p(printf("parsing list\n")); + if (gotbrace) + token = '('; + else + token = g_scanner_get_next_token(gs); + if (token =='(') { + token = g_scanner_get_next_token(gs); + if (token == G_TOKEN_SYMBOL) { + struct _searchterm_symbol *s; + + s = g_scanner_cur_value(gs).v_symbol; + p(printf("got funciton: %s\n", s->name)); + t = parse_new_term(s->type); + t->value.func.sym = s; + p(printf("created new list %p\n", t)); + switch(s->argtype) { + case 0: /* it MUST be a list of lists */ + t->value.func.terms = parse_lists(gs, &t->value.func.termcount); + break; + case 1: + t->value.func.terms = parse_values(gs, &t->value.func.termcount); + break; + default: + printf("Error, internal error parsing symbols\n"); + } + } else { + printf("unknown sequence encountered, type = %d\n", token); + } + token = g_scanner_get_next_token(gs); + if (token != ')') { + printf("Error, expected ')' not found\n"); + } + } else { + printf("Error, list term without opening (\n"); + } + + p(printf("returning list %p\n", t)); + return t; +} + +GList * +camel_mbox_folder_search_by_expression(CamelFolder *folder, char *expression, CamelException *ex) +{ + GScanner *gs; + int i; + struct _searchterm *t; + struct _searchcontext *ctx; + struct _searchresult *r; + GList *matches = NULL; + + gs = g_scanner_new(&scanner_config); + for(i=0;i<sizeof(symbols)/sizeof(symbols[0]);i++) + g_scanner_scope_add_symbol(gs, 0, symbols[i].name, &symbols[i]); + + g_scanner_input_text(gs, expression, strlen(expression)); + t = parse_list(gs, 0); + + if (t) { + ctx = g_malloc0(sizeof(*ctx)); + ctx->folder = folder; + ctx->summary = camel_folder_get_summary(folder, ex); + ctx->message_info = camel_folder_summary_get_message_info_list(ctx->summary); +#ifdef HAVE_IBEX + ctx->index = ibex_open(CAMEL_MBOX_FOLDER(folder)->index_file_path, FALSE); + if (!ctx->index) { + perror("Cannot open index file, body searches will be ignored\n"); + } +#endif + r = term_eval(ctx, t); + + /* now create a folder summary to return?? */ + if (r + && r->type == RESULT_ARRAY_PTR) { + d(printf("got result ...\n")); + for (i=0;i<r->value.ptrarray->len;i++) { + d(printf("adding match: %s\n", (char *)g_ptr_array_index(r->value.ptrarray, i))); + matches = g_list_prepend(matches, g_strdup(g_ptr_array_index(r->value.ptrarray, i))); + } + result_free(r); + } + + if (ctx->index) + ibex_close(ctx->index); + + gtk_object_unref((GtkObject *)ctx->summary); + g_free(ctx); + parse_term_free(t); + } else { + printf("Warning, Could not parse expression!\n %s\n", expression); + } + + g_scanner_destroy(gs); + + return matches; +} |