diff options
author | Not Zed <NotZed@Ximian.com> | 2002-04-18 10:18:55 +0800 |
---|---|---|
committer | Michael Zucci <zucchi@src.gnome.org> | 2002-04-18 10:18:55 +0800 |
commit | 6ccd0e6f59bec5f1900c49cd1868fca998570fc7 (patch) | |
tree | eac70d58c4d79bfbc73d7592ad5f303f7f8c044e /camel | |
parent | e5e67a6644e4d0ac41c270a4bcd18e5c6e2b7667 (diff) | |
download | gsoc2013-evolution-6ccd0e6f59bec5f1900c49cd1868fca998570fc7.tar.gz gsoc2013-evolution-6ccd0e6f59bec5f1900c49cd1868fca998570fc7.tar.zst gsoc2013-evolution-6ccd0e6f59bec5f1900c49cd1868fca998570fc7.zip |
When doing a contains match, split the words and perform an and on it.
2002-04-18 Not Zed <NotZed@Ximian.com>
* camel-folder-search.c (check_header): When doing a contains
match, split the words and perform an and on it.
(match_words_messages): If we have an index, but were forced to do
a full search, first lookup a subset of messages using
the index and a simplified word set. Only do a manual search of
this subset.
2002-04-17 Not Zed <NotZed@Ximian.com>
* camel-folder-search.c (match_message_index): Changed to take a
utf8 string not a regex pattern.
(match_words_index): Matches against a camel_search_words list.
(match_words_1message): Matches a single message against a
camel_search_words list.
(match_words_message): Same, but gets the message from the folder
for you.
(match_words_messages): Matches a list of messages against a words
list.
(search_body_contains): Rewritten to handle multiple word
searches. For #23371.
* providers/imap/camel-imap-search.c (sync_match): Split words
when searching, to support multiple search words. Also, try
searching specifying charset of utf8 if we can, if that fails,
fall back to not specifying charset. TODO: It should translate
the strings into the locale default charset?
* providers/imap/camel-imap-store.c (connect_to_server): Added new
cap - utf8_search, if set, we tell the server we're searching
using utf8, otherwise we dont (incorrectly, since we always use
utf8 to search).
* camel-search-private.c (camel_ustrstrcase): Make this class public.
(camel_search_words_split): Split a word into multiple words based
on whitespace, and keep track of whether the word is simple
(indexable directly), or not.
(camel_search_words_free): Free 'em.
svn path=/trunk/; revision=16501
Diffstat (limited to 'camel')
-rw-r--r-- | camel/ChangeLog | 40 | ||||
-rw-r--r-- | camel/camel-folder-search.c | 296 | ||||
-rw-r--r-- | camel/camel-search-private.c | 158 | ||||
-rw-r--r-- | camel/camel-search-private.h | 27 | ||||
-rw-r--r-- | camel/providers/imap/camel-imap-search.c | 39 | ||||
-rw-r--r-- | camel/providers/imap/camel-imap-store.c | 3 | ||||
-rw-r--r-- | camel/providers/imap/camel-imap-store.h | 1 |
7 files changed, 469 insertions, 95 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index bccacbea60..ad285fa52b 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,43 @@ +2002-04-18 Not Zed <NotZed@Ximian.com> + + * camel-folder-search.c (check_header): When doing a contains + match, split the words and perform an and on it. + (match_words_messages): If we have an index, but were forced to do + a full search, first lookup a subset of messages using + the index and a simplified word set. Only do a manual search of + this subset. + +2002-04-17 Not Zed <NotZed@Ximian.com> + + * camel-folder-search.c (match_message_index): Changed to take a + utf8 string not a regex pattern. + (match_words_index): Matches against a camel_search_words list. + (match_words_1message): Matches a single message against a + camel_search_words list. + (match_words_message): Same, but gets the message from the folder + for you. + (match_words_messages): Matches a list of messages against a words + list. + (search_body_contains): Rewritten to handle multiple word + searches. For #23371. + + * providers/imap/camel-imap-search.c (sync_match): Split words + when searching, to support multiple search words. Also, try + searching specifying charset of utf8 if we can, if that fails, + fall back to not specifying charset. TODO: It should translate + the strings into the locale default charset? + + * providers/imap/camel-imap-store.c (connect_to_server): Added new + cap - utf8_search, if set, we tell the server we're searching + using utf8, otherwise we dont (incorrectly, since we always use + utf8 to search). + + * camel-search-private.c (camel_ustrstrcase): Make this class public. + (camel_search_words_split): Split a word into multiple words based + on whitespace, and keep track of whether the word is simple + (indexable directly), or not. + (camel_search_words_free): Free 'em. + 2002-04-17 Jeffrey Stedfast <fejj@ximian.com> * camel-vee-folder.c (vee_search_by_expression): If the vee-folder diff --git a/camel/camel-folder-search.c b/camel/camel-folder-search.c index e0e5052a5f..d9702706f9 100644 --- a/camel/camel-folder-search.c +++ b/camel/camel-folder-search.c @@ -621,8 +621,9 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder char *headername; const char *header = NULL; char strbuf[32]; - int i; + int i, j; camel_search_t type = CAMEL_SEARCH_TYPE_ASIS; + struct _camel_search_words *words; /* only a subset of headers are supported .. */ headername = argv[0]->value.string; @@ -652,9 +653,21 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder if (header) { /* performs an OR of all words */ for (i=1;i<argc && !truth;i++) { - if (argv[i]->type == ESEXP_RES_STRING) - truth = camel_search_header_match(header, argv[i]->value.string, - how, type, NULL); + if (argv[i]->type == ESEXP_RES_STRING) { + if (argv[i]->value.string[0] == 0) { + truth = TRUE; + } else if (how == CAMEL_SEARCH_MATCH_CONTAINS) { + /* doesn't make sense to split words on anything but contains i.e. we can't have an ending match different words */ + words = camel_search_words_split(argv[i]->value.string); + truth = TRUE; + for (j=0;j<words->len && truth;j++) { + truth = camel_search_header_match(header, words->words[j]->word, how, type, NULL); + } + camel_search_words_free(words); + } else { + truth = camel_search_header_match(header, argv[i]->value.string, how, type, NULL); + } + } } } } @@ -723,26 +736,53 @@ g_lib_sux_htor(char *key, int value, struct _glib_sux_donkeys *fuckup) g_ptr_array_add(fuckup->uids, key); } +/* and, only store duplicates */ +static void +g_lib_sux_htand(char *key, int value, struct _glib_sux_donkeys *fuckup) +{ + if (value == fuckup->count) + g_ptr_array_add(fuckup->uids, key); +} + static int -match_message(CamelFolder *folder, const char *uid, regex_t *pattern, CamelException *ex) +match_message_index(CamelIndex *idx, const char *uid, const char *match, CamelException *ex) { - CamelMimeMessage *msg; + CamelIndexCursor *wc, *nc; + const char *word, *name; int truth = FALSE; - msg = camel_folder_get_message(folder, uid, ex); - if (!camel_exception_is_set(ex) && msg!=NULL) { - truth = camel_search_message_body_contains((CamelDataWrapper *)msg, pattern); - camel_object_unref((CamelObject *)msg); - } else { - camel_exception_clear(ex); + wc = camel_index_words(idx); + if (wc) { + while (!truth && (word = camel_index_cursor_next(wc))) { + if (camel_ustrstrcase(word,match) != NULL) { + /* perf: could have the wc cursor return the name cursor */ + nc = camel_index_find(idx, word); + if (nc) { + while (!truth && (name = camel_index_cursor_next(nc))) + truth = strcmp(name, uid) == 0; + camel_object_unref((CamelObject *)nc); + } + } + } + camel_object_unref((CamelObject *)wc); } + return truth; } -/* perform a regex match against words in an index */ -/* uids = hash table of messageinfo's by uid's */ +/* + "one two" "three" "four five" + + one and two +or + three +or + four and five +*/ + +/* returns messages which contain all words listed in words */ static GPtrArray * -match_messages_index(CamelIndex *idx, regex_t *pattern, GHashTable *uids, CamelException *ex) +match_words_index(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex) { GPtrArray *result = g_ptr_array_new(); GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal); @@ -750,123 +790,207 @@ match_messages_index(CamelIndex *idx, regex_t *pattern, GHashTable *uids, CamelE CamelIndexCursor *wc, *nc; const char *word, *name; CamelMessageInfo *mi; + int i; + + /* we can have a maximum of 32 words, as we use it as the AND mask */ - wc = camel_index_words(idx); + wc = camel_index_words(search->body_index); if (wc) { while ((word = camel_index_cursor_next(wc))) { - if (regexec(pattern, word, 0, NULL, 0) == 0) { - /* perf: could have the wc cursor return the name cursor */ - nc = camel_index_find(idx, word); - if (nc) { - while ((name = camel_index_cursor_next(nc))) { - mi = g_hash_table_lookup(uids, name); - if (mi) - g_hash_table_insert(ht, (char *)camel_message_info_uid(mi), (void *)1); + for (i=0;i<words->len;i++) { + if (camel_ustrstrcase(word, words->words[i]->word) != NULL) { + /* perf: could have the wc cursor return the name cursor */ + nc = camel_index_find(search->body_index, word); + if (nc) { + while ((name = camel_index_cursor_next(nc))) { + mi = g_hash_table_lookup(search->summary_hash, name); + if (mi) { + int mask; + const char *uid = camel_message_info_uid(mi); + + mask = ((int)g_hash_table_lookup(ht, uid)) | (1<<i); + g_hash_table_insert(ht, (char *)uid, (void *)mask); + } + } + camel_object_unref((CamelObject *)nc); } - camel_object_unref((CamelObject *)nc); } } } camel_object_unref((CamelObject *)wc); lambdafoo.uids = result; - g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo); + lambdafoo.count = (1<<words->len) - 1; + g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htand, &lambdafoo); g_hash_table_destroy(ht); } return result; } -/* perform a regex match against an individual uid in an index */ -/* this would benefit greatly in practice if there was a hashtalbe of uid's to amtch against */ -static int -match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelException *ex) +static gboolean +match_words_1message (CamelDataWrapper *object, struct _camel_search_words *words, guint32 *mask) { - CamelIndexCursor *wc, *nc; - const char *word, *name; + CamelDataWrapper *containee; int truth = FALSE; - - wc = camel_index_words(idx); - if (wc) { - while (!truth && (word = camel_index_cursor_next(wc))) { - if (regexec(pattern, word, 0, NULL, 0) == 0) { - /* perf: could have the wc cursor return the name cursor */ - nc = camel_index_find(idx, word); - if (nc) { - while (!truth && (name = camel_index_cursor_next(nc))) - truth = strcmp(name, uid) == 0; - camel_object_unref((CamelObject *)nc); - } + int parts, i; + + containee = camel_medium_get_content_object (CAMEL_MEDIUM (object)); + + if (containee == NULL) + return FALSE; + + /* using the object types is more accurate than using the mime/types */ + if (CAMEL_IS_MULTIPART (containee)) { + parts = camel_multipart_get_number (CAMEL_MULTIPART (containee)); + for (i = 0; i < parts && truth == FALSE; i++) { + CamelDataWrapper *part = (CamelDataWrapper *)camel_multipart_get_part (CAMEL_MULTIPART (containee), i); + if (part) + truth = match_words_1message(part, words, mask); + } + } else if (CAMEL_IS_MIME_MESSAGE (containee)) { + /* for messages we only look at its contents */ + truth = match_words_1message((CamelDataWrapper *)containee, words, mask); + } else if (header_content_type_is(CAMEL_DATA_WRAPPER (containee)->mime_type, "text", "*")) { + /* for all other text parts, we look inside, otherwise we dont care */ + CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new (); + + /* FIXME: The match should be part of a stream op */ + camel_data_wrapper_write_to_stream (containee, CAMEL_STREAM (mem)); + camel_stream_write (CAMEL_STREAM (mem), "", 1); + for (i=0;i<words->len;i++) { + /* FIXME: This is horridly slow, and should use a real search algorithm */ + if (camel_ustrstrcase(mem->buffer->data, words->words[i]->word) != NULL) { + *mask |= (1<<i); + /* shortcut a match */ + if (*mask == (1<<(words->len))-1) + return TRUE; } } - camel_object_unref((CamelObject *)wc); + camel_object_unref (CAMEL_OBJECT (mem)); + } + + return truth; +} + +static gboolean +match_words_message(CamelFolder *folder, const char *uid, struct _camel_search_words *words, CamelException *ex) +{ + guint32 mask; + CamelMimeMessage *msg; + int truth; + + msg = camel_folder_get_message(folder, uid, ex); + if (msg) { + mask = 0; + truth = match_words_1message((CamelDataWrapper *)msg, words, &mask); + camel_object_unref((CamelObject *)msg); + } else { + camel_exception_clear(ex); + truth = FALSE; } return truth; } +static GPtrArray * +match_words_messages(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex) +{ + int i; + GPtrArray *matches = g_ptr_array_new(); + + if (search->body_index) { + GPtrArray *indexed; + struct _camel_search_words *simple; + + simple = camel_search_words_simple(words); + indexed = match_words_index(search, simple, ex); + camel_search_words_free(simple); + + for (i=0;i<indexed->len;i++) { + const char *uid = g_ptr_array_index(indexed, i); + + if (match_words_message(search->folder, uid, words, ex)) + g_ptr_array_add(matches, (char *)uid); + } + + g_ptr_array_free(indexed, TRUE); + } else { + for (i=0;i<search->summary->len;i++) { + CamelMessageInfo *info = g_ptr_array_index(search->summary, i); + const char *uid = camel_message_info_uid(info); + + if (match_words_message(search->folder, uid, words, ex)) + g_ptr_array_add(matches, (char *)uid); + } + } + + return matches; +} + static ESExpResult * search_body_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search) { - ESExpResult *r; - int i; - regex_t pattern; + int i, j; CamelException *ex = search->priv->ex; + struct _camel_search_words *words; + ESExpResult *r; + struct _glib_sux_donkeys lambdafoo; - if (search->current) { + if (search->current) { int truth = FALSE; - if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) { + if (argc == 1 && argv[0]->value.string[0] == 0) { truth = TRUE; - } else if (search->body_index) { - if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) { - truth = match_message_index(search->body_index, camel_message_info_uid(search->current), &pattern, ex); - regfree(&pattern); - } - } else if (search->folder) { - /* we do a 'slow' direct search */ - if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) { - truth = match_message(search->folder, camel_message_info_uid(search->current), &pattern, ex); - regfree(&pattern); - } } else { - g_warning("Cannot perform indexed body query with no index or folder set"); + for (i=0;i<argc && !truth;i++) { + if (argv[i]->type == ESEXP_RES_STRING) { + words = camel_search_words_split(argv[i]->value.string); + truth = TRUE; + if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) { + for (j=0;j<words->len && truth;j++) + truth = match_message_index(search->body_index, camel_message_info_uid(search->current), words->words[j]->word, ex); + } else { + /* TODO: cache current message incase of multiple body search terms */ + truth = match_words_message(search->folder, camel_message_info_uid(search->current), words, ex); + } + camel_search_words_free(words); + } + } } r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; } else { r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); + r->value.ptrarray = g_ptr_array_new(); - if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) { - /* optimise the match "" case - match everything */ - r->value.ptrarray = g_ptr_array_new(); + if (argc == 1 && argv[0]->value.string[0] == 0) { for (i=0;i<search->summary->len;i++) { CamelMessageInfo *info = g_ptr_array_index(search->summary, i); + g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info)); } - } else if (search->body_index) { - if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) { - r->value.ptrarray = match_messages_index(search->body_index, &pattern, search->summary_hash, ex); - regfree(&pattern); - } - } else if (search->folder) { - /* do a slow search */ - r->value.ptrarray = g_ptr_array_new(); - if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) { - if (search->summary) { - for (i=0;i<search->summary->len;i++) { - CamelMessageInfo *info = g_ptr_array_index(search->summary, i); - - if (match_message(search->folder, camel_message_info_uid(info), &pattern, ex)) - g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info)); + } else { + GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal); + GPtrArray *matches; + + for (i=0;i<argc;i++) { + if (argv[i]->type == ESEXP_RES_STRING) { + words = camel_search_words_split(argv[i]->value.string); + if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) { + matches = match_words_index(search, words, ex); + } else { + matches = match_words_messages(search, words, ex); } - } /* else? we could always get the summary from the folder, but then - we need to free it later somehow */ - regfree(&pattern); + for (j=0;j<matches->len;j++) + g_hash_table_insert(ht, matches->pdata[j], matches->pdata[j]); + g_ptr_array_free(matches, TRUE); + camel_search_words_free(words); + } } - } else { - g_warning("Cannot perform indexed body query with no index or folder set"); - r->value.ptrarray = g_ptr_array_new(); + lambdafoo.uids = r->value.ptrarray; + g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo); + g_hash_table_destroy(ht); } } diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c index 6ecb64a1ae..7e8553cd35 100644 --- a/camel/camel-search-private.c +++ b/camel/camel-search-private.c @@ -194,6 +194,7 @@ header_soundex (const char *header, const char *match) return truth; } +/* FIXME: This is stupidly slow and needs to be removed */ static gunichar utf8_get (const char **inp) { @@ -209,7 +210,7 @@ utf8_get (const char **inp) return c; } -static const char * +const char * camel_ustrstrcase (const char *haystack, const char *needle) { gunichar *nuni, *puni; @@ -469,9 +470,6 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern) if (containee == NULL) return FALSE; - /* TODO: I find it odd that get_part and get_content_object do not - add a reference, probably need fixing for multithreading */ - /* using the object types is more accurate than using the mime/types */ if (CAMEL_IS_MULTIPART (containee)) { parts = camel_multipart_get_number (CAMEL_MULTIPART (containee)); @@ -496,3 +494,155 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern) return truth; } +static __inline__ guint32 +camel_utf8_getc(const unsigned char **ptr) +{ + register unsigned char *p = (unsigned char *)*ptr; + register unsigned char c, r; + register guint32 v=0, /* this is only required because the stupid @@@%#%# compiler thinks it can be used uninitialised */ + m; + + r = *p++; +loop: + if (r < 0x80) { + *ptr = p; + v = r; + } else if (r < 0xfe) { /* valid start char? */ + v = r; + m = 0x7f80; /* used to mask out the length bits */ + do { + c = *p++; + if ((c & 0xc0) != 0x80) { + r = c; + goto loop; + } + v = (v<<6) | (c & 0x3f); + r<<=1; + m<<=5; + } while (r & 0x40); + + *ptr = p; + + v &= ~m; + } + + return v; +} + +struct _camel_search_words * +camel_search_words_split(const unsigned char *in) +{ + int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0; + GString *w; + struct _camel_search_word *word; + struct _camel_search_words *words; + GPtrArray *list = g_ptr_array_new(); + guint32 c; + int utf8len; + char utf8[8]; + + words = g_malloc0(sizeof(*words)); + w = g_string_new(""); + + do { + c = camel_utf8_getc(&in); + if (c == 0 || g_unichar_isspace(c)) { + if (w->len) { + word = g_malloc0(sizeof(*word)); + word->word = g_strdup(w->str); + word->type = type; + g_ptr_array_add(list, word); + all |= type; + type = CAMEL_SEARCH_WORD_SIMPLE; + g_string_truncate(w, 0); + } + } else { + if (!g_unichar_isalnum(c)) + type = CAMEL_SEARCH_WORD_COMPLEX; + else + c = g_unichar_tolower(c); + if (c > 0x80) + type |= CAMEL_SEARCH_WORD_8BIT; + + utf8len = g_unichar_to_utf8(c, utf8); + utf8[utf8len] = 0; + g_string_append(w, utf8); + } + } while (c); + + g_string_free(w, TRUE); + words->len = list->len; + words->words = (struct _camel_search_word **)list->pdata; + words->type = all; + g_ptr_array_free(list, FALSE); + + return words; +} + +/* takes an existing 'words' list, and converts it to another consisting of + only simple words, with any punctuation etc stripped */ +struct _camel_search_words * +camel_search_words_simple(struct _camel_search_words *wordin) +{ + int i; + const unsigned char *ptr, *start, *last; + int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0; + GPtrArray *list = g_ptr_array_new(); + struct _camel_search_word *word; + struct _camel_search_words *words; + guint32 c; + + words = g_malloc0(sizeof(*words)); + + for (i=0;i<wordin->len;i++) { + if ((wordin->words[i]->type & CAMEL_SEARCH_WORD_COMPLEX) == 0) { + word = g_malloc0(sizeof(*word)); + word->type = wordin->words[i]->type; + word->word = g_strdup(wordin->words[i]->word); + g_ptr_array_add(list, word); + } else { + ptr = wordin->words[i]->word; + start = last = ptr; + do { + c = camel_utf8_getc(&ptr); + if (c == 0 || !g_unichar_isalnum(c)) { + if (last > start) { + word = g_malloc0(sizeof(*word)); + word->word = g_strndup(start, last-start); + word->type = type; + g_ptr_array_add(list, word); + all |= type; + type = CAMEL_SEARCH_WORD_SIMPLE; + } + start = ptr; + } + if (c > 0x80) + type = CAMEL_SEARCH_WORD_8BIT; + last = ptr; + } while (c); + } + } + + words->len = list->len; + words->words = (struct _camel_search_word **)list->pdata; + words->type = all; + g_ptr_array_free(list, FALSE); + + return words; +} + +void +camel_search_words_free(struct _camel_search_words *words) +{ + int i; + + for (i=0;i<words->len;i++) { + struct _camel_search_word *word = words->words[i]; + + g_free(word->word); + g_free(word); + } + g_free(words->words); + g_free(words); +} + diff --git a/camel/camel-search-private.h b/camel/camel-search-private.h index e45d6fe321..7cc30b687f 100644 --- a/camel/camel-search-private.h +++ b/camel/camel-search-private.h @@ -21,6 +21,8 @@ #ifndef _CAMEL_SEARCH_PRIVATE_H #define _CAMEL_SEARCH_PRIVATE_H +#include <regex.h> + typedef enum { CAMEL_SEARCH_MATCH_START = 1<<0, CAMEL_SEARCH_MATCH_END = 1<<1, @@ -52,4 +54,29 @@ gboolean camel_search_message_body_contains(CamelDataWrapper *object, regex_t *p gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how, camel_search_t type, const char *default_charset); gboolean camel_search_header_soundex(const char *header, const char *match); +/* TODO: replace with a real search function */ +const char *camel_ustrstrcase(const char *haystack, const char *needle); + +/* Some crappy utility functions for handling multiple search words */ +enum _camel_search_word_t { + CAMEL_SEARCH_WORD_SIMPLE = 1, + CAMEL_SEARCH_WORD_COMPLEX = 2, + CAMEL_SEARCH_WORD_8BIT = 4, +}; +struct _camel_search_word { + enum _camel_search_word_t type; + char *word; +}; + +struct _camel_search_words { + int len; + enum _camel_search_word_t type; /* OR of all word types in list */ + struct _camel_search_word **words; +}; + +struct _camel_search_words *camel_search_words_split(const unsigned char *in); +struct _camel_search_words *camel_search_words_simple(struct _camel_search_words *wordin); +void camel_search_words_free(struct _camel_search_words *); + #endif /* ! _CAMEL_SEARCH_PRIVATE_H */ + diff --git a/camel/providers/imap/camel-imap-search.c b/camel/providers/imap/camel-imap-search.c index 8463eb202b..c30fa5611e 100644 --- a/camel/providers/imap/camel-imap-search.c +++ b/camel/providers/imap/camel-imap-search.c @@ -42,6 +42,7 @@ #include "camel-mime-utils.h" /* base64 encoding */ #include "camel-seekable-stream.h" +#include "camel-search-private.h" #define d(x) x @@ -304,10 +305,13 @@ static int sync_match(CamelImapSearch *is, struct _match_record *mr) { char *p, *result, *lasts = NULL; - CamelImapResponse *response; + CamelImapResponse *response = NULL; guint32 uid; CamelFolder *folder = ((CamelFolderSearch *)is)->folder; CamelImapStore *store = (CamelImapStore *)folder->parent_store; + struct _camel_search_words *words; + GString *search; + int i; if (mr->lastuid >= is->lastuid && mr->validity == is->validity) return 0; @@ -316,9 +320,36 @@ sync_match(CamelImapSearch *is, struct _match_record *mr) /* TODO: Handle multiple search terms */ - response = camel_imap_command (store, folder, NULL, - "UID SEARCH UID %d:%d BODY \"%s\"", - mr->lastuid+1, is->lastuid, mr->terms[0]); + /* This handles multiple search words within a single term */ + words = camel_search_words_split(mr->terms[0]); + search = g_string_new(""); + g_string_sprintfa(search, "UID %d:%d", mr->lastuid+1, is->lastuid); + for (i=0;i<words->len;i++) { + char *w = words->words[i]->word, c; + + g_string_sprintfa(search, " BODY \""); + while ((c = *w++)) { + if (c == '\\' || c == '"') + g_string_append_c(search, '\\'); + g_string_append_c(search, c); + } + g_string_append_c(search, '"'); + } + camel_search_words_free(words); + + /* We only try search using utf8 if its non us-ascii text? */ + if ((words->type & CAMEL_SEARCH_WORD_8BIT) && (store->capabilities & IMAP_CAPABILITY_utf8_search)) { + response = camel_imap_command(store, folder, NULL, + "UID SEARCH CHARSET UTF-8 %s", search->str); + /* We can't actually tell if we got a NO response, so assume always */ + if (response == NULL) + store->capabilities &= ~IMAP_CAPABILITY_utf8_search; + } + if (response == NULL) + response = camel_imap_command (store, folder, NULL, + "UID SEARCH %s", search->str); + g_string_free(search, TRUE); + if (!response) return -1; result = camel_imap_response_extract (store, response, "SEARCH", NULL); diff --git a/camel/providers/imap/camel-imap-store.c b/camel/providers/imap/camel-imap-store.c index b3d98db759..ce87174486 100644 --- a/camel/providers/imap/camel-imap-store.c +++ b/camel/providers/imap/camel-imap-store.c @@ -342,7 +342,8 @@ connect_to_server (CamelService *service, CamelException *ex) store->connected = TRUE; /* Find out the IMAP capabilities */ - store->capabilities = 0; + /* We assume we have utf8 capable search until a failed search tells us otherwise */ + store->capabilities = IMAP_CAPABILITY_utf8_search; store->authtypes = g_hash_table_new (g_str_hash, g_str_equal); response = camel_imap_command (store, NULL, ex, "CAPABILITY"); if (!response) diff --git a/camel/providers/imap/camel-imap-store.h b/camel/providers/imap/camel-imap-store.h index 43c9b8e1f0..9a6c6655ad 100644 --- a/camel/providers/imap/camel-imap-store.h +++ b/camel/providers/imap/camel-imap-store.h @@ -71,6 +71,7 @@ typedef enum { #define IMAP_CAPABILITY_UIDPLUS (1 << 4) #define IMAP_CAPABILITY_LITERALPLUS (1 << 5) #define IMAP_CAPABILITY_useful_lsub (1 << 6) +#define IMAP_CAPABILITY_utf8_search (1 << 7) #define IMAP_PARAM_OVERRIDE_NAMESPACE (1 << 0) #define IMAP_PARAM_CHECK_ALL (1 << 1) |