aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Zucci <zucchi@src.gnome.org>2001-01-25 13:10:59 +0800
committerMichael Zucci <zucchi@src.gnome.org>2001-01-25 13:10:59 +0800
commit46e42dcf44573b23812ada87bfe5313cb318662f (patch)
tree9f9c822170c87d8525970b03e4b374c854a5c560
parentbf64278f45cb952f9254a364beee6757f65bea3d (diff)
downloadgsoc2013-evolution-46e42dcf44573b23812ada87bfe5313cb318662f.tar.gz
gsoc2013-evolution-46e42dcf44573b23812ada87bfe5313cb318662f.tar.zst
gsoc2013-evolution-46e42dcf44573b23812ada87bfe5313cb318662f.zip
Oops.
svn path=/trunk/; revision=7801
-rw-r--r--camel/camel-search-private.c275
-rw-r--r--camel/camel-search-private.h46
2 files changed, 321 insertions, 0 deletions
diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c
new file mode 100644
index 0000000000..94c6c1a355
--- /dev/null
+++ b/camel/camel-search-private.c
@@ -0,0 +1,275 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Authors: Jeffrey Stedfast <fejj@helixcode.com>
+ * Michael Zucchi <NotZed@Ximian.com>
+ *
+ * Copyright 2000 Helix Code, Inc. (www.helixcode.com)
+ * Copyright 2001 Ximian Inc. (www.ximian.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* (from glibc headers:
+ POSIX says that <sys/types.h> must be included (by the caller) before <regex.h>. */
+#include <sys/types.h>
+#include <regex.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#include "camel-exception.h"
+#include "camel-mime-message.h"
+#include "camel-multipart.h"
+#include "camel-stream-mem.h"
+#include "e-util/e-sexp.h"
+
+#include "camel-search-private.h"
+
+#define d(x)
+
+/* builds the regex into pattern */
+/* taken from camel-folder-search, with added isregex & exception parameter */
+/* Basically, we build a new regex, either based on subset regex's, or substrings,
+ that can be executed once over the whoel body, to match anything suitable.
+ This is more efficient than multiple searches, and probably most (naive) strstr
+ implementations, over long content.
+
+ A small issue is that case-insenstivity wont work entirely correct for utf8 strings. */
+int
+camel_search_build_match_regex(regex_t *pattern, camel_search_flags_t type, int argc, struct _ESExpResult **argv, CamelException *ex)
+{
+ GString *match = g_string_new("");
+ int c, i, count=0, err;
+ char *word;
+ int flags;
+
+ /* build a regex pattern we can use to match the words, we OR them together */
+ if (argc>1)
+ g_string_append_c(match, '(');
+ for (i=0;i<argc;i++) {
+ if (argv[i]->type == ESEXP_RES_STRING) {
+ if (count > 0)
+ g_string_append_c(match, '|');
+ /* escape any special chars (not sure if this list is complete) */
+ word = argv[i]->value.string;
+ if (type & CAMEL_SEARCH_MATCH_REGEX) {
+ g_string_append(match, word);
+ } else {
+ if (type & CAMEL_SEARCH_MATCH_START)
+ g_string_append_c(match, '^');
+ while ((c = *word++)) {
+ if (strchr("*\\.()[]^$+", c) != NULL) {
+ g_string_append_c(match, '\\');
+ }
+ g_string_append_c(match, c);
+ }
+ if (type & CAMEL_SEARCH_MATCH_END)
+ g_string_append_c(match, '^');
+ }
+ count++;
+ } else {
+ g_warning("Invalid type passed to body-contains match function");
+ }
+ }
+ if (argc>1)
+ g_string_append_c(match, ')');
+ flags = REG_EXTENDED|REG_NOSUB;
+ if (type & CAMEL_SEARCH_MATCH_ICASE)
+ flags |= REG_ICASE;
+ err = regcomp(pattern, match->str, flags);
+ if (err != 0) {
+ /* regerror gets called twice to get the full error string
+ length to do proper posix error reporting */
+ int len = regerror(err, pattern, 0, 0);
+ char *buffer = g_malloc0(len + 1);
+
+ regerror(err, pattern, buffer, len);
+ camel_exception_setv(ex, CAMEL_EXCEPTION_SYSTEM,
+ _("Regular expression compilation failed: %s: %s"),
+ match->str, buffer);
+
+ regfree(pattern);
+ }
+ d(printf("Built regex: '%s'\n", match->str));
+ g_string_free(match, TRUE);
+ return err;
+}
+
+static unsigned char soundex_table[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 49, 50, 51, 0, 49, 50, 0, 0, 50, 50, 52, 53, 53, 0,
+ 49, 50, 54, 50, 51, 0, 49, 0, 50, 0, 50, 0, 0, 0, 0, 0,
+ 0, 0, 49, 50, 51, 0, 49, 50, 0, 0, 50, 50, 52, 53, 53, 0,
+ 49, 50, 54, 50, 51, 0, 49, 0, 50, 0, 50, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static void
+soundexify (const gchar *sound, gchar code[5])
+{
+ guchar *c, last = '\0';
+ gint n;
+
+ for (c = (guchar *) sound; *c && !isalpha (*c); c++);
+ code[0] = toupper (*c);
+ memset (code + 1, '0', 3);
+ for (n = 1; *c && n < 5; c++) {
+ guchar ch = soundex_table[*c];
+
+ if (ch && ch != last) {
+ code[n++] = ch;
+ last = ch;
+ }
+ }
+ code[4] = '\0';
+}
+
+static gboolean
+header_soundex(const char *header, const char *match)
+{
+ char mcode[5], hcode[5];
+ const char *p;
+ char c;
+ GString *word;
+ int truth = FALSE;
+
+ soundexify(match, mcode);
+
+ /* split the header into words, and soundexify and compare each one */
+ /* FIXME: Should this convert to utf8, and split based on that, and what not?
+ soundex only makes sense for us-ascii though ... */
+
+ word = g_string_new("");
+ p = header;
+ do {
+ c = *p++;
+ if (c == 0 || isspace(c)) {
+ if (word->len > 0) {
+ soundexify(word->str, hcode);
+ if (strcmp(hcode, mcode) == 0)
+ truth = TRUE;
+ }
+ g_string_truncate(word, 0);
+ } else if (isalpha(c))
+ g_string_append_c(word, c);
+ } while (c && !truth);
+ g_string_free(word, TRUE);
+
+ return truth;
+}
+
+/* searhces for match inside value, if match is mixed case, hten use case-sensitive,
+ else insensitive */
+gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how)
+{
+ const char *p;
+
+ if (how == CAMEL_SEARCH_MATCH_SOUNDEX)
+ return header_soundex(value, match);
+
+ while (*value && isspace(*value))
+ value++;
+
+ if (strlen(value) < strlen(match))
+ return FALSE;
+
+ /* from dan the man, if we have mixed case, perform a case-sensitive match,
+ otherwise not */
+ p = match;
+ while (*p) {
+ if (isupper(*p)) {
+ switch(how) {
+ case CAMEL_SEARCH_MATCH_EXACT:
+ return strcmp(value, match) == 0;
+ case CAMEL_SEARCH_MATCH_CONTAINS:
+ return strstr(value, match) != NULL;
+ case CAMEL_SEARCH_MATCH_STARTS:
+ return strncmp(value, match, strlen(match)) == 0;
+ case CAMEL_SEARCH_MATCH_ENDS:
+ return strcmp(value+strlen(value)-strlen(match), match) == 0;
+ default:
+ break;
+ }
+ return FALSE;
+ }
+ p++;
+ }
+ switch(how) {
+ case CAMEL_SEARCH_MATCH_EXACT:
+ return strcasecmp(value, match) == 0;
+ case CAMEL_SEARCH_MATCH_CONTAINS:
+ return e_utf8_strstrcase(value, match) != NULL;
+ case CAMEL_SEARCH_MATCH_STARTS:
+ return strncasecmp(value, match, strlen(match)) == 0;
+ case CAMEL_SEARCH_MATCH_ENDS:
+ return strcasecmp(value+strlen(value)-strlen(match), match) == 0;
+ default:
+ break;
+ }
+
+ return FALSE;
+}
+
+/* performs a 'slow' content-based match */
+/* there is also an identical copy of this in camel-filter-search.c */
+gboolean
+camel_search_message_body_contains(CamelDataWrapper *object, regex_t *pattern)
+{
+ CamelDataWrapper *containee;
+ int truth = FALSE;
+ int parts, i;
+
+ containee = camel_medium_get_content_object(CAMEL_MEDIUM(object));
+
+ if (containee == NULL)
+ return FALSE;
+
+ /* TODO: I find it odd that get_part and get_content_object do not
+ add a reference, probably need fixing for multithreading */
+
+ /* using the object types is more accurate than using the mime/types */
+ if (CAMEL_IS_MULTIPART(containee)) {
+ parts = camel_multipart_get_number(CAMEL_MULTIPART(containee));
+ for (i=0;i<parts && truth==FALSE;i++) {
+ CamelDataWrapper *part = (CamelDataWrapper *)camel_multipart_get_part(CAMEL_MULTIPART(containee), i);
+ if (part)
+ truth = camel_search_message_body_contains(part, pattern);
+ }
+ } else if (CAMEL_IS_MIME_MESSAGE(containee)) {
+ /* for messages we only look at its contents */
+ truth = camel_search_message_body_contains((CamelDataWrapper *)containee, pattern);
+ } else if (header_content_type_is(CAMEL_DATA_WRAPPER(containee)->mime_type, "text", "*")) {
+ /* for all other text parts, we look inside, otherwise we dont care */
+ CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new();
+
+ camel_data_wrapper_write_to_stream(containee, (CamelStream *)mem);
+ camel_stream_write((CamelStream *)mem, "", 1);
+ truth = regexec(pattern, mem->buffer->data, 0, NULL, 0) == 0;
+ camel_object_unref((CamelObject *)mem);
+ }
+ return truth;
+}
+
diff --git a/camel/camel-search-private.h b/camel/camel-search-private.h
new file mode 100644
index 0000000000..aff881f32f
--- /dev/null
+++ b/camel/camel-search-private.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2001 Ximian Inc.
+ *
+ * Authors: Michael Zucchi <notzed@helixcode.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _CAMEL_SEARCH_PRIVATE_H
+#define _CAMEL_SEARCH_PRIVATE_H
+
+typedef enum {
+ CAMEL_SEARCH_MATCH_START = 1<<0,
+ CAMEL_SEARCH_MATCH_END = 1<<1,
+ CAMEL_SEARCH_MATCH_REGEX = 1<<2, /* disables the first 2 */
+ CAMEL_SEARCH_MATCH_ICASE = 1<<3,
+} camel_search_flags_t;
+
+typedef enum {
+ CAMEL_SEARCH_MATCH_EXACT,
+ CAMEL_SEARCH_MATCH_CONTAINS,
+ CAMEL_SEARCH_MATCH_STARTS,
+ CAMEL_SEARCH_MATCH_ENDS,
+ CAMEL_SEARCH_MATCH_SOUNDEX,
+} camel_search_match_t;
+
+/* builds a regex that represents a string search */
+int camel_search_build_match_regex(regex_t *pattern, camel_search_flags_t type, int argc, struct _ESExpResult **argv, CamelException *ex);
+gboolean camel_search_message_body_contains(CamelDataWrapper *object, regex_t *pattern);
+
+gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how);
+gboolean camel_search_header_soundex(const char *header, const char *match);
+
+#endif /* ! _CAMEL_SEARCH_PRIVATE_H */