aboutsummaryrefslogtreecommitdiffstats
path: root/camel
diff options
context:
space:
mode:
authorNot Zed <NotZed@Ximian.com>2002-04-18 10:18:55 +0800
committerMichael Zucci <zucchi@src.gnome.org>2002-04-18 10:18:55 +0800
commit6ccd0e6f59bec5f1900c49cd1868fca998570fc7 (patch)
treeeac70d58c4d79bfbc73d7592ad5f303f7f8c044e /camel
parente5e67a6644e4d0ac41c270a4bcd18e5c6e2b7667 (diff)
downloadgsoc2013-evolution-6ccd0e6f59bec5f1900c49cd1868fca998570fc7.tar.gz
gsoc2013-evolution-6ccd0e6f59bec5f1900c49cd1868fca998570fc7.tar.zst
gsoc2013-evolution-6ccd0e6f59bec5f1900c49cd1868fca998570fc7.zip
When doing a contains match, split the words and perform an and on it.
2002-04-18 Not Zed <NotZed@Ximian.com> * camel-folder-search.c (check_header): When doing a contains match, split the words and perform an and on it. (match_words_messages): If we have an index, but were forced to do a full search, first lookup a subset of messages using the index and a simplified word set. Only do a manual search of this subset. 2002-04-17 Not Zed <NotZed@Ximian.com> * camel-folder-search.c (match_message_index): Changed to take a utf8 string not a regex pattern. (match_words_index): Matches against a camel_search_words list. (match_words_1message): Matches a single message against a camel_search_words list. (match_words_message): Same, but gets the message from the folder for you. (match_words_messages): Matches a list of messages against a words list. (search_body_contains): Rewritten to handle multiple word searches. For #23371. * providers/imap/camel-imap-search.c (sync_match): Split words when searching, to support multiple search words. Also, try searching specifying charset of utf8 if we can, if that fails, fall back to not specifying charset. TODO: It should translate the strings into the locale default charset? * providers/imap/camel-imap-store.c (connect_to_server): Added new cap - utf8_search, if set, we tell the server we're searching using utf8, otherwise we dont (incorrectly, since we always use utf8 to search). * camel-search-private.c (camel_ustrstrcase): Make this class public. (camel_search_words_split): Split a word into multiple words based on whitespace, and keep track of whether the word is simple (indexable directly), or not. (camel_search_words_free): Free 'em. svn path=/trunk/; revision=16501
Diffstat (limited to 'camel')
-rw-r--r--camel/ChangeLog40
-rw-r--r--camel/camel-folder-search.c296
-rw-r--r--camel/camel-search-private.c158
-rw-r--r--camel/camel-search-private.h27
-rw-r--r--camel/providers/imap/camel-imap-search.c39
-rw-r--r--camel/providers/imap/camel-imap-store.c3
-rw-r--r--camel/providers/imap/camel-imap-store.h1
7 files changed, 469 insertions, 95 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog
index bccacbea60..ad285fa52b 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,3 +1,43 @@
+2002-04-18 Not Zed <NotZed@Ximian.com>
+
+ * camel-folder-search.c (check_header): When doing a contains
+ match, split the words and perform an and on it.
+ (match_words_messages): If we have an index, but were forced to do
+ a full search, first lookup a subset of messages using
+ the index and a simplified word set. Only do a manual search of
+ this subset.
+
+2002-04-17 Not Zed <NotZed@Ximian.com>
+
+ * camel-folder-search.c (match_message_index): Changed to take a
+ utf8 string not a regex pattern.
+ (match_words_index): Matches against a camel_search_words list.
+ (match_words_1message): Matches a single message against a
+ camel_search_words list.
+ (match_words_message): Same, but gets the message from the folder
+ for you.
+ (match_words_messages): Matches a list of messages against a words
+ list.
+ (search_body_contains): Rewritten to handle multiple word
+ searches. For #23371.
+
+ * providers/imap/camel-imap-search.c (sync_match): Split words
+ when searching, to support multiple search words. Also, try
+ searching specifying charset of utf8 if we can, if that fails,
+ fall back to not specifying charset. TODO: It should translate
+ the strings into the locale default charset?
+
+ * providers/imap/camel-imap-store.c (connect_to_server): Added new
+ cap - utf8_search, if set, we tell the server we're searching
+ using utf8, otherwise we dont (incorrectly, since we always use
+ utf8 to search).
+
+ * camel-search-private.c (camel_ustrstrcase): Make this class public.
+ (camel_search_words_split): Split a word into multiple words based
+ on whitespace, and keep track of whether the word is simple
+ (indexable directly), or not.
+ (camel_search_words_free): Free 'em.
+
2002-04-17 Jeffrey Stedfast <fejj@ximian.com>
* camel-vee-folder.c (vee_search_by_expression): If the vee-folder
diff --git a/camel/camel-folder-search.c b/camel/camel-folder-search.c
index e0e5052a5f..d9702706f9 100644
--- a/camel/camel-folder-search.c
+++ b/camel/camel-folder-search.c
@@ -621,8 +621,9 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder
char *headername;
const char *header = NULL;
char strbuf[32];
- int i;
+ int i, j;
camel_search_t type = CAMEL_SEARCH_TYPE_ASIS;
+ struct _camel_search_words *words;
/* only a subset of headers are supported .. */
headername = argv[0]->value.string;
@@ -652,9 +653,21 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder
if (header) {
/* performs an OR of all words */
for (i=1;i<argc && !truth;i++) {
- if (argv[i]->type == ESEXP_RES_STRING)
- truth = camel_search_header_match(header, argv[i]->value.string,
- how, type, NULL);
+ if (argv[i]->type == ESEXP_RES_STRING) {
+ if (argv[i]->value.string[0] == 0) {
+ truth = TRUE;
+ } else if (how == CAMEL_SEARCH_MATCH_CONTAINS) {
+ /* doesn't make sense to split words on anything but contains i.e. we can't have an ending match different words */
+ words = camel_search_words_split(argv[i]->value.string);
+ truth = TRUE;
+ for (j=0;j<words->len && truth;j++) {
+ truth = camel_search_header_match(header, words->words[j]->word, how, type, NULL);
+ }
+ camel_search_words_free(words);
+ } else {
+ truth = camel_search_header_match(header, argv[i]->value.string, how, type, NULL);
+ }
+ }
}
}
}
@@ -723,26 +736,53 @@ g_lib_sux_htor(char *key, int value, struct _glib_sux_donkeys *fuckup)
g_ptr_array_add(fuckup->uids, key);
}
+/* and, only store duplicates */
+static void
+g_lib_sux_htand(char *key, int value, struct _glib_sux_donkeys *fuckup)
+{
+ if (value == fuckup->count)
+ g_ptr_array_add(fuckup->uids, key);
+}
+
static int
-match_message(CamelFolder *folder, const char *uid, regex_t *pattern, CamelException *ex)
+match_message_index(CamelIndex *idx, const char *uid, const char *match, CamelException *ex)
{
- CamelMimeMessage *msg;
+ CamelIndexCursor *wc, *nc;
+ const char *word, *name;
int truth = FALSE;
- msg = camel_folder_get_message(folder, uid, ex);
- if (!camel_exception_is_set(ex) && msg!=NULL) {
- truth = camel_search_message_body_contains((CamelDataWrapper *)msg, pattern);
- camel_object_unref((CamelObject *)msg);
- } else {
- camel_exception_clear(ex);
+ wc = camel_index_words(idx);
+ if (wc) {
+ while (!truth && (word = camel_index_cursor_next(wc))) {
+ if (camel_ustrstrcase(word,match) != NULL) {
+ /* perf: could have the wc cursor return the name cursor */
+ nc = camel_index_find(idx, word);
+ if (nc) {
+ while (!truth && (name = camel_index_cursor_next(nc)))
+ truth = strcmp(name, uid) == 0;
+ camel_object_unref((CamelObject *)nc);
+ }
+ }
+ }
+ camel_object_unref((CamelObject *)wc);
}
+
return truth;
}
-/* perform a regex match against words in an index */
-/* uids = hash table of messageinfo's by uid's */
+/*
+ "one two" "three" "four five"
+
+ one and two
+or
+ three
+or
+ four and five
+*/
+
+/* returns messages which contain all words listed in words */
static GPtrArray *
-match_messages_index(CamelIndex *idx, regex_t *pattern, GHashTable *uids, CamelException *ex)
+match_words_index(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex)
{
GPtrArray *result = g_ptr_array_new();
GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
@@ -750,123 +790,207 @@ match_messages_index(CamelIndex *idx, regex_t *pattern, GHashTable *uids, CamelE
CamelIndexCursor *wc, *nc;
const char *word, *name;
CamelMessageInfo *mi;
+ int i;
+
+ /* we can have a maximum of 32 words, as we use it as the AND mask */
- wc = camel_index_words(idx);
+ wc = camel_index_words(search->body_index);
if (wc) {
while ((word = camel_index_cursor_next(wc))) {
- if (regexec(pattern, word, 0, NULL, 0) == 0) {
- /* perf: could have the wc cursor return the name cursor */
- nc = camel_index_find(idx, word);
- if (nc) {
- while ((name = camel_index_cursor_next(nc))) {
- mi = g_hash_table_lookup(uids, name);
- if (mi)
- g_hash_table_insert(ht, (char *)camel_message_info_uid(mi), (void *)1);
+ for (i=0;i<words->len;i++) {
+ if (camel_ustrstrcase(word, words->words[i]->word) != NULL) {
+ /* perf: could have the wc cursor return the name cursor */
+ nc = camel_index_find(search->body_index, word);
+ if (nc) {
+ while ((name = camel_index_cursor_next(nc))) {
+ mi = g_hash_table_lookup(search->summary_hash, name);
+ if (mi) {
+ int mask;
+ const char *uid = camel_message_info_uid(mi);
+
+ mask = ((int)g_hash_table_lookup(ht, uid)) | (1<<i);
+ g_hash_table_insert(ht, (char *)uid, (void *)mask);
+ }
+ }
+ camel_object_unref((CamelObject *)nc);
}
- camel_object_unref((CamelObject *)nc);
}
}
}
camel_object_unref((CamelObject *)wc);
lambdafoo.uids = result;
- g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo);
+ lambdafoo.count = (1<<words->len) - 1;
+ g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htand, &lambdafoo);
g_hash_table_destroy(ht);
}
return result;
}
-/* perform a regex match against an individual uid in an index */
-/* this would benefit greatly in practice if there was a hashtalbe of uid's to amtch against */
-static int
-match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelException *ex)
+static gboolean
+match_words_1message (CamelDataWrapper *object, struct _camel_search_words *words, guint32 *mask)
{
- CamelIndexCursor *wc, *nc;
- const char *word, *name;
+ CamelDataWrapper *containee;
int truth = FALSE;
-
- wc = camel_index_words(idx);
- if (wc) {
- while (!truth && (word = camel_index_cursor_next(wc))) {
- if (regexec(pattern, word, 0, NULL, 0) == 0) {
- /* perf: could have the wc cursor return the name cursor */
- nc = camel_index_find(idx, word);
- if (nc) {
- while (!truth && (name = camel_index_cursor_next(nc)))
- truth = strcmp(name, uid) == 0;
- camel_object_unref((CamelObject *)nc);
- }
+ int parts, i;
+
+ containee = camel_medium_get_content_object (CAMEL_MEDIUM (object));
+
+ if (containee == NULL)
+ return FALSE;
+
+ /* using the object types is more accurate than using the mime/types */
+ if (CAMEL_IS_MULTIPART (containee)) {
+ parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
+ for (i = 0; i < parts && truth == FALSE; i++) {
+ CamelDataWrapper *part = (CamelDataWrapper *)camel_multipart_get_part (CAMEL_MULTIPART (containee), i);
+ if (part)
+ truth = match_words_1message(part, words, mask);
+ }
+ } else if (CAMEL_IS_MIME_MESSAGE (containee)) {
+ /* for messages we only look at its contents */
+ truth = match_words_1message((CamelDataWrapper *)containee, words, mask);
+ } else if (header_content_type_is(CAMEL_DATA_WRAPPER (containee)->mime_type, "text", "*")) {
+ /* for all other text parts, we look inside, otherwise we dont care */
+ CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new ();
+
+ /* FIXME: The match should be part of a stream op */
+ camel_data_wrapper_write_to_stream (containee, CAMEL_STREAM (mem));
+ camel_stream_write (CAMEL_STREAM (mem), "", 1);
+ for (i=0;i<words->len;i++) {
+ /* FIXME: This is horridly slow, and should use a real search algorithm */
+ if (camel_ustrstrcase(mem->buffer->data, words->words[i]->word) != NULL) {
+ *mask |= (1<<i);
+ /* shortcut a match */
+ if (*mask == (1<<(words->len))-1)
+ return TRUE;
}
}
- camel_object_unref((CamelObject *)wc);
+ camel_object_unref (CAMEL_OBJECT (mem));
+ }
+
+ return truth;
+}
+
+static gboolean
+match_words_message(CamelFolder *folder, const char *uid, struct _camel_search_words *words, CamelException *ex)
+{
+ guint32 mask;
+ CamelMimeMessage *msg;
+ int truth;
+
+ msg = camel_folder_get_message(folder, uid, ex);
+ if (msg) {
+ mask = 0;
+ truth = match_words_1message((CamelDataWrapper *)msg, words, &mask);
+ camel_object_unref((CamelObject *)msg);
+ } else {
+ camel_exception_clear(ex);
+ truth = FALSE;
}
return truth;
}
+static GPtrArray *
+match_words_messages(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex)
+{
+ int i;
+ GPtrArray *matches = g_ptr_array_new();
+
+ if (search->body_index) {
+ GPtrArray *indexed;
+ struct _camel_search_words *simple;
+
+ simple = camel_search_words_simple(words);
+ indexed = match_words_index(search, simple, ex);
+ camel_search_words_free(simple);
+
+ for (i=0;i<indexed->len;i++) {
+ const char *uid = g_ptr_array_index(indexed, i);
+
+ if (match_words_message(search->folder, uid, words, ex))
+ g_ptr_array_add(matches, (char *)uid);
+ }
+
+ g_ptr_array_free(indexed, TRUE);
+ } else {
+ for (i=0;i<search->summary->len;i++) {
+ CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
+ const char *uid = camel_message_info_uid(info);
+
+ if (match_words_message(search->folder, uid, words, ex))
+ g_ptr_array_add(matches, (char *)uid);
+ }
+ }
+
+ return matches;
+}
+
static ESExpResult *
search_body_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search)
{
- ESExpResult *r;
- int i;
- regex_t pattern;
+ int i, j;
CamelException *ex = search->priv->ex;
+ struct _camel_search_words *words;
+ ESExpResult *r;
+ struct _glib_sux_donkeys lambdafoo;
- if (search->current) {
+ if (search->current) {
int truth = FALSE;
- if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) {
+ if (argc == 1 && argv[0]->value.string[0] == 0) {
truth = TRUE;
- } else if (search->body_index) {
- if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
- truth = match_message_index(search->body_index, camel_message_info_uid(search->current), &pattern, ex);
- regfree(&pattern);
- }
- } else if (search->folder) {
- /* we do a 'slow' direct search */
- if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
- truth = match_message(search->folder, camel_message_info_uid(search->current), &pattern, ex);
- regfree(&pattern);
- }
} else {
- g_warning("Cannot perform indexed body query with no index or folder set");
+ for (i=0;i<argc && !truth;i++) {
+ if (argv[i]->type == ESEXP_RES_STRING) {
+ words = camel_search_words_split(argv[i]->value.string);
+ truth = TRUE;
+ if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) {
+ for (j=0;j<words->len && truth;j++)
+ truth = match_message_index(search->body_index, camel_message_info_uid(search->current), words->words[j]->word, ex);
+ } else {
+ /* TODO: cache current message incase of multiple body search terms */
+ truth = match_words_message(search->folder, camel_message_info_uid(search->current), words, ex);
+ }
+ camel_search_words_free(words);
+ }
+ }
}
r = e_sexp_result_new(f, ESEXP_RES_BOOL);
r->value.bool = truth;
} else {
r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR);
+ r->value.ptrarray = g_ptr_array_new();
- if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) {
- /* optimise the match "" case - match everything */
- r->value.ptrarray = g_ptr_array_new();
+ if (argc == 1 && argv[0]->value.string[0] == 0) {
for (i=0;i<search->summary->len;i++) {
CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
+
g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info));
}
- } else if (search->body_index) {
- if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
- r->value.ptrarray = match_messages_index(search->body_index, &pattern, search->summary_hash, ex);
- regfree(&pattern);
- }
- } else if (search->folder) {
- /* do a slow search */
- r->value.ptrarray = g_ptr_array_new();
- if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
- if (search->summary) {
- for (i=0;i<search->summary->len;i++) {
- CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
-
- if (match_message(search->folder, camel_message_info_uid(info), &pattern, ex))
- g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info));
+ } else {
+ GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
+ GPtrArray *matches;
+
+ for (i=0;i<argc;i++) {
+ if (argv[i]->type == ESEXP_RES_STRING) {
+ words = camel_search_words_split(argv[i]->value.string);
+ if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) {
+ matches = match_words_index(search, words, ex);
+ } else {
+ matches = match_words_messages(search, words, ex);
}
- } /* else? we could always get the summary from the folder, but then
- we need to free it later somehow */
- regfree(&pattern);
+ for (j=0;j<matches->len;j++)
+ g_hash_table_insert(ht, matches->pdata[j], matches->pdata[j]);
+ g_ptr_array_free(matches, TRUE);
+ camel_search_words_free(words);
+ }
}
- } else {
- g_warning("Cannot perform indexed body query with no index or folder set");
- r->value.ptrarray = g_ptr_array_new();
+ lambdafoo.uids = r->value.ptrarray;
+ g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo);
+ g_hash_table_destroy(ht);
}
}
diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c
index 6ecb64a1ae..7e8553cd35 100644
--- a/camel/camel-search-private.c
+++ b/camel/camel-search-private.c
@@ -194,6 +194,7 @@ header_soundex (const char *header, const char *match)
return truth;
}
+/* FIXME: This is stupidly slow and needs to be removed */
static gunichar
utf8_get (const char **inp)
{
@@ -209,7 +210,7 @@ utf8_get (const char **inp)
return c;
}
-static const char *
+const char *
camel_ustrstrcase (const char *haystack, const char *needle)
{
gunichar *nuni, *puni;
@@ -469,9 +470,6 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern)
if (containee == NULL)
return FALSE;
- /* TODO: I find it odd that get_part and get_content_object do not
- add a reference, probably need fixing for multithreading */
-
/* using the object types is more accurate than using the mime/types */
if (CAMEL_IS_MULTIPART (containee)) {
parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
@@ -496,3 +494,155 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern)
return truth;
}
+static __inline__ guint32
+camel_utf8_getc(const unsigned char **ptr)
+{
+ register unsigned char *p = (unsigned char *)*ptr;
+ register unsigned char c, r;
+ register guint32 v=0, /* this is only required because the stupid @@@%#%# compiler thinks it can be used uninitialised */
+ m;
+
+ r = *p++;
+loop:
+ if (r < 0x80) {
+ *ptr = p;
+ v = r;
+ } else if (r < 0xfe) { /* valid start char? */
+ v = r;
+ m = 0x7f80; /* used to mask out the length bits */
+ do {
+ c = *p++;
+ if ((c & 0xc0) != 0x80) {
+ r = c;
+ goto loop;
+ }
+ v = (v<<6) | (c & 0x3f);
+ r<<=1;
+ m<<=5;
+ } while (r & 0x40);
+
+ *ptr = p;
+
+ v &= ~m;
+ }
+
+ return v;
+}
+
+struct _camel_search_words *
+camel_search_words_split(const unsigned char *in)
+{
+ int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
+ GString *w;
+ struct _camel_search_word *word;
+ struct _camel_search_words *words;
+ GPtrArray *list = g_ptr_array_new();
+ guint32 c;
+ int utf8len;
+ char utf8[8];
+
+ words = g_malloc0(sizeof(*words));
+ w = g_string_new("");
+
+ do {
+ c = camel_utf8_getc(&in);
+ if (c == 0 || g_unichar_isspace(c)) {
+ if (w->len) {
+ word = g_malloc0(sizeof(*word));
+ word->word = g_strdup(w->str);
+ word->type = type;
+ g_ptr_array_add(list, word);
+ all |= type;
+ type = CAMEL_SEARCH_WORD_SIMPLE;
+ g_string_truncate(w, 0);
+ }
+ } else {
+ if (!g_unichar_isalnum(c))
+ type = CAMEL_SEARCH_WORD_COMPLEX;
+ else
+ c = g_unichar_tolower(c);
+ if (c > 0x80)
+ type |= CAMEL_SEARCH_WORD_8BIT;
+
+ utf8len = g_unichar_to_utf8(c, utf8);
+ utf8[utf8len] = 0;
+ g_string_append(w, utf8);
+ }
+ } while (c);
+
+ g_string_free(w, TRUE);
+ words->len = list->len;
+ words->words = (struct _camel_search_word **)list->pdata;
+ words->type = all;
+ g_ptr_array_free(list, FALSE);
+
+ return words;
+}
+
+/* takes an existing 'words' list, and converts it to another consisting of
+ only simple words, with any punctuation etc stripped */
+struct _camel_search_words *
+camel_search_words_simple(struct _camel_search_words *wordin)
+{
+ int i;
+ const unsigned char *ptr, *start, *last;
+ int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
+ GPtrArray *list = g_ptr_array_new();
+ struct _camel_search_word *word;
+ struct _camel_search_words *words;
+ guint32 c;
+
+ words = g_malloc0(sizeof(*words));
+
+ for (i=0;i<wordin->len;i++) {
+ if ((wordin->words[i]->type & CAMEL_SEARCH_WORD_COMPLEX) == 0) {
+ word = g_malloc0(sizeof(*word));
+ word->type = wordin->words[i]->type;
+ word->word = g_strdup(wordin->words[i]->word);
+ g_ptr_array_add(list, word);
+ } else {
+ ptr = wordin->words[i]->word;
+ start = last = ptr;
+ do {
+ c = camel_utf8_getc(&ptr);
+ if (c == 0 || !g_unichar_isalnum(c)) {
+ if (last > start) {
+ word = g_malloc0(sizeof(*word));
+ word->word = g_strndup(start, last-start);
+ word->type = type;
+ g_ptr_array_add(list, word);
+ all |= type;
+ type = CAMEL_SEARCH_WORD_SIMPLE;
+ }
+ start = ptr;
+ }
+ if (c > 0x80)
+ type = CAMEL_SEARCH_WORD_8BIT;
+ last = ptr;
+ } while (c);
+ }
+ }
+
+ words->len = list->len;
+ words->words = (struct _camel_search_word **)list->pdata;
+ words->type = all;
+ g_ptr_array_free(list, FALSE);
+
+ return words;
+}
+
+void
+camel_search_words_free(struct _camel_search_words *words)
+{
+ int i;
+
+ for (i=0;i<words->len;i++) {
+ struct _camel_search_word *word = words->words[i];
+
+ g_free(word->word);
+ g_free(word);
+ }
+ g_free(words->words);
+ g_free(words);
+}
+
diff --git a/camel/camel-search-private.h b/camel/camel-search-private.h
index e45d6fe321..7cc30b687f 100644
--- a/camel/camel-search-private.h
+++ b/camel/camel-search-private.h
@@ -21,6 +21,8 @@
#ifndef _CAMEL_SEARCH_PRIVATE_H
#define _CAMEL_SEARCH_PRIVATE_H
+#include <regex.h>
+
typedef enum {
CAMEL_SEARCH_MATCH_START = 1<<0,
CAMEL_SEARCH_MATCH_END = 1<<1,
@@ -52,4 +54,29 @@ gboolean camel_search_message_body_contains(CamelDataWrapper *object, regex_t *p
gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how, camel_search_t type, const char *default_charset);
gboolean camel_search_header_soundex(const char *header, const char *match);
+/* TODO: replace with a real search function */
+const char *camel_ustrstrcase(const char *haystack, const char *needle);
+
+/* Some crappy utility functions for handling multiple search words */
+enum _camel_search_word_t {
+ CAMEL_SEARCH_WORD_SIMPLE = 1,
+ CAMEL_SEARCH_WORD_COMPLEX = 2,
+ CAMEL_SEARCH_WORD_8BIT = 4,
+};
+struct _camel_search_word {
+ enum _camel_search_word_t type;
+ char *word;
+};
+
+struct _camel_search_words {
+ int len;
+ enum _camel_search_word_t type; /* OR of all word types in list */
+ struct _camel_search_word **words;
+};
+
+struct _camel_search_words *camel_search_words_split(const unsigned char *in);
+struct _camel_search_words *camel_search_words_simple(struct _camel_search_words *wordin);
+void camel_search_words_free(struct _camel_search_words *);
+
#endif /* ! _CAMEL_SEARCH_PRIVATE_H */
+
diff --git a/camel/providers/imap/camel-imap-search.c b/camel/providers/imap/camel-imap-search.c
index 8463eb202b..c30fa5611e 100644
--- a/camel/providers/imap/camel-imap-search.c
+++ b/camel/providers/imap/camel-imap-search.c
@@ -42,6 +42,7 @@
#include "camel-mime-utils.h" /* base64 encoding */
#include "camel-seekable-stream.h"
+#include "camel-search-private.h"
#define d(x) x
@@ -304,10 +305,13 @@ static int
sync_match(CamelImapSearch *is, struct _match_record *mr)
{
char *p, *result, *lasts = NULL;
- CamelImapResponse *response;
+ CamelImapResponse *response = NULL;
guint32 uid;
CamelFolder *folder = ((CamelFolderSearch *)is)->folder;
CamelImapStore *store = (CamelImapStore *)folder->parent_store;
+ struct _camel_search_words *words;
+ GString *search;
+ int i;
if (mr->lastuid >= is->lastuid && mr->validity == is->validity)
return 0;
@@ -316,9 +320,36 @@ sync_match(CamelImapSearch *is, struct _match_record *mr)
/* TODO: Handle multiple search terms */
- response = camel_imap_command (store, folder, NULL,
- "UID SEARCH UID %d:%d BODY \"%s\"",
- mr->lastuid+1, is->lastuid, mr->terms[0]);
+ /* This handles multiple search words within a single term */
+ words = camel_search_words_split(mr->terms[0]);
+ search = g_string_new("");
+ g_string_sprintfa(search, "UID %d:%d", mr->lastuid+1, is->lastuid);
+ for (i=0;i<words->len;i++) {
+ char *w = words->words[i]->word, c;
+
+ g_string_sprintfa(search, " BODY \"");
+ while ((c = *w++)) {
+ if (c == '\\' || c == '"')
+ g_string_append_c(search, '\\');
+ g_string_append_c(search, c);
+ }
+ g_string_append_c(search, '"');
+ }
+ camel_search_words_free(words);
+
+ /* We only try search using utf8 if its non us-ascii text? */
+ if ((words->type & CAMEL_SEARCH_WORD_8BIT) && (store->capabilities & IMAP_CAPABILITY_utf8_search)) {
+ response = camel_imap_command(store, folder, NULL,
+ "UID SEARCH CHARSET UTF-8 %s", search->str);
+ /* We can't actually tell if we got a NO response, so assume always */
+ if (response == NULL)
+ store->capabilities &= ~IMAP_CAPABILITY_utf8_search;
+ }
+ if (response == NULL)
+ response = camel_imap_command (store, folder, NULL,
+ "UID SEARCH %s", search->str);
+ g_string_free(search, TRUE);
+
if (!response)
return -1;
result = camel_imap_response_extract (store, response, "SEARCH", NULL);
diff --git a/camel/providers/imap/camel-imap-store.c b/camel/providers/imap/camel-imap-store.c
index b3d98db759..ce87174486 100644
--- a/camel/providers/imap/camel-imap-store.c
+++ b/camel/providers/imap/camel-imap-store.c
@@ -342,7 +342,8 @@ connect_to_server (CamelService *service, CamelException *ex)
store->connected = TRUE;
/* Find out the IMAP capabilities */
- store->capabilities = 0;
+ /* We assume we have utf8 capable search until a failed search tells us otherwise */
+ store->capabilities = IMAP_CAPABILITY_utf8_search;
store->authtypes = g_hash_table_new (g_str_hash, g_str_equal);
response = camel_imap_command (store, NULL, ex, "CAPABILITY");
if (!response)
diff --git a/camel/providers/imap/camel-imap-store.h b/camel/providers/imap/camel-imap-store.h
index 43c9b8e1f0..9a6c6655ad 100644
--- a/camel/providers/imap/camel-imap-store.h
+++ b/camel/providers/imap/camel-imap-store.h
@@ -71,6 +71,7 @@ typedef enum {
#define IMAP_CAPABILITY_UIDPLUS (1 << 4)
#define IMAP_CAPABILITY_LITERALPLUS (1 << 5)
#define IMAP_CAPABILITY_useful_lsub (1 << 6)
+#define IMAP_CAPABILITY_utf8_search (1 << 7)
#define IMAP_PARAM_OVERRIDE_NAMESPACE (1 << 0)
#define IMAP_PARAM_CHECK_ALL (1 << 1)