WebKit port - port widgets

author: Dan Vrátil <dvratil@redhat.com> 2012-03-29 00:37:35 +0800
committer: Dan Vrátil <dvratil@redhat.com> 2012-03-29 00:37:35 +0800
commit: 6bd1c6833a2c51898ac45865767dd01ba66a95c5 (patch)
tree: 51f9cc360c49e71c455f74f72f1605965e73a932 /widgets/misc/e-searching-tokenizer.c
parent: 038e0eccec595ce1cc39fe95262272e29d5a6fbf (diff)
download: gsoc2013-evolution-6bd1c6833a2c51898ac45865767dd01ba66a95c5.tar.gz
gsoc2013-evolution-6bd1c6833a2c51898ac45865767dd01ba66a95c5.tar.zst
gsoc2013-evolution-6bd1c6833a2c51898ac45865767dd01ba66a95c5.zip
1 files changed, 0 insertions, 1210 deletions
diff --git a/widgets/misc/e-searching-tokenizer.c b/widgets/misc/e-searching-tokenizer.c
deleted file mode 100644
index 878efa1654..0000000000
--- a/widgets/misc/e-searching-tokenizer.c
+++ /dev/null
@@ -1,1210 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) version 3.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with the program; if not, see <http://www.gnu.org/licenses/>
- *
- *
- * Authors:
- * Developed by Jon Trowbridge <trow@ximian.com>
- * Rewritten significantly to handle multiple strings and improve performance
- * by Michael Zucchi <notzed@ximian.com>
- *
- * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-#include "e-searching-tokenizer.h"
-
-#include "libedataserver/e-memory.h"
-
-#define d(x)
-
-#define E_SEARCHING_TOKENIZER_GET_PRIVATE(obj) \
-	(G_TYPE_INSTANCE_GET_PRIVATE \
-	((obj), E_TYPE_SEARCHING_TOKENIZER, ESearchingTokenizerPrivate))
-
-enum {
-	MATCH_SIGNAL,
-	LAST_SIGNAL
-};
-
-static guint signals[LAST_SIGNAL];
-
-G_DEFINE_TYPE (
-	ESearchingTokenizer,
-	e_searching_tokenizer,
-	HTML_TYPE_TOKENIZER)
-
-/* Utility functions */
-
-/* This is faster and safer than glib2's utf8 abomination,
- * but isn't exported from camel as yet */
-static inline guint32
-camel_utf8_getc (const guchar **ptr)
-{
-	register guchar *p = (guchar *) * ptr;
-	register guchar c, r;
-	register guint32 v, m;
-
-again:
-	r = *p++;
-loop:
-	if (r < 0x80) {
-		*ptr = p;
-		v = r;
-	} else if (r < 0xfe) { /* valid start char? */
-		v = r;
-		m = 0x7f80;	/* used to mask out the length bits */
-		do {
-			c = *p++;
-			if ((c & 0xc0) != 0x80) {
-				r = c;
-				goto loop;
-			}
-			v = (v << 6) | (c & 0x3f);
-			r<<=1;
-			m<<=5;
-		} while (r & 0x40);
-
-		*ptr = p;
-
-		v &= ~m;
-	} else {
-		goto again;
-	}
-
-	return v;
-}
-
-/* note: our tags of interest are 7 bit ascii
- * only no need to do any fancy utf8 stuff */
-/* tags should be upper case
- * if this list gets longer than 10 entries, consider binary search */
-static const gchar *ignored_tags[] = {
-	"B", "I", "FONT", "TT", "EM", /* and more? */};
-
-static gint
-ignore_tag (const gchar *tag)
-{
-	gchar *t = g_alloca (strlen (tag) + 1), c, *out;
-	const gchar *in;
-	gint i;
-
-	/* we could use a aho-corasick matcher here too ... but we wont */
-
-	/* normalise tag into 't'.
-	 * Note we use the property that the only tags we're interested in
-	 * are 7 bit ascii to shortcut and simplify case insensitivity */
-	in = tag+2;		/* skip: TAG_ESCAPE '<' */
-	if (*in == '/')
-		in++;
-	out = t;
-	while ((c = *in++)) {
-		if (c >= 'A' && c <= 'Z')
-			*out++ = c;
-		else if (c >= 'a' && c <= 'z')
-			*out++ = c & 0xdf; /* convert ASCII to upper case */
-		else
-			/* maybe should check for > or ' ' etc? */
-			break;
-	}
-	*out = 0;
-
-	for (i = 0; i < G_N_ELEMENTS (ignored_tags); i++) {
-		if (strcmp (t, ignored_tags[i]) == 0)
-			return 1;
-	}
-
-	return 0;
-}
-
-/* ********************************************************************** */
-
-/* Aho-Corasick search tree implmeentation */
-
-/* next state if we match a character */
-struct _match {
-	struct _match *next;
-	guint32 ch;
-	struct _state *match;
-};
-
-/* tree state node */
-struct _state {
-	struct _match *matches;
-	guint final;		/* max no of chars we just matched */
-	struct _state *fail;		/* where to try next if we fail */
-	struct _state *next;		/* next on this level? */
-};
-
-/* base tree structure */
-struct _trie {
-	struct _state root;
-	gint max_depth;
-
-	EMemChunk *state_chunks;
-	EMemChunk *match_chunks;
-};
-
-/* will be enabled only if debug is enabled */
-#if d(1) -1 != -1
-static void
-dump_trie (struct _state *s,
-           gint d)
-{
-	gchar *p = g_alloca (d *2 + 1);
-	struct _match *m;
-
-	memset (p, ' ', d *2);
-	p[d *2]=0;
-
-	printf("%s[state] %p: %d  fail->%p\n", p, s, s->final, s->fail);
-	m = s->matches;
-	while (m) {
-		printf(" %s'%c' -> %p\n", p, m->ch, m->match);
-		if (m->match)
-			dump_trie (m->match, d + 1);
-		m = m->next;
-	}
-}
-#endif
-
-/* This builds an Aho-Corasick search trie for a set of utf8 words */
-/* See
- *  http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
- * for a neat demo */
-
-static inline struct _match *
-g (struct _state *q,
-   guint32 c)
-{
-	struct _match *m = q->matches;
-
-	while (m && m->ch != c)
-		m = m->next;
-
-	return m;
-}
-
-static struct _trie *
-build_trie (gint nocase,
-            gint len,
-            guchar **words)
-{
-	struct _state *q, *qt, *r;
-	const guchar *word;
-	struct _match *m, *n = NULL;
-	gint i, depth;
-	guint32 c;
-	struct _trie *trie;
-	gint state_depth_max, state_depth_size;
-	struct _state **state_depth;
-
-	trie = g_malloc (sizeof (*trie));
-	trie->root.matches = NULL;
-	trie->root.final = 0;
-	trie->root.fail = NULL;
-	trie->root.next = NULL;
-
-	trie->state_chunks = e_memchunk_new (8, sizeof (struct _state));
-	trie->match_chunks = e_memchunk_new (8, sizeof (struct _match));
-
-	/* This will correspond to the length of the longest pattern */
-	state_depth_size = 0;
-	state_depth_max = 64;
-	state_depth = g_malloc (sizeof (*state_depth[0]) * 64);
-	state_depth[0] = NULL;
-
-	/* Step 1: Build trie */
-
-	/* This just builds a tree that merges all common prefixes into the same branch */
-
-	for (i = 0; i < len; i++) {
-		word = words[i];
-		q = &trie->root;
-		depth = 0;
-		while ((c = camel_utf8_getc (&word))) {
-			if (nocase)
-				c = g_unichar_tolower (c);
-			m = g (q, c);
-			if (m == NULL) {
-				m = e_memchunk_alloc (trie->match_chunks);
-				m->ch = c;
-				m->next = q->matches;
-				q->matches = m;
-				q = m->match = e_memchunk_alloc (trie->state_chunks);
-				q->matches = NULL;
-				q->fail = &trie->root;
-				q->final = 0;
-				if (state_depth_max < depth) {
-					state_depth_max += 64;
-					state_depth = g_realloc (
-						state_depth,
-						sizeof (*state_depth[0]) *
-						state_depth_max);
-				}
-				if (state_depth_size < depth) {
-					state_depth[depth] = NULL;
-					state_depth_size = depth;
-				}
-				q->next = state_depth[depth];
-				state_depth[depth] = q;
-			} else {
-				q = m->match;
-			}
-			depth++;
-		}
-		q->final = depth;
-	}
-
-	d(printf("Dumping trie:\n"));
-	d (dump_trie (&trie->root, 0));
-
-	/* Step 2: Build failure graph */
-
-	/* This searches for the longest substring which is a prefix of
-	 * another string and builds a graph of failure links so you can
-	 * find multiple substrings concurrently, using aho-corasick's
-	 * algorithm. */
-
-	for (i = 0; i < state_depth_size; i++) {
-		q = state_depth[i];
-		while (q) {
-			m = q->matches;
-			while (m) {
-				c = m->ch;
-				qt = m->match;
-				r = q->fail;
-				while (r && (n = g (r, c)) == NULL)
-					r = r->fail;
-				if (r != NULL) {
-					qt->fail = n->match;
-					if (qt->fail->final > qt->final)
-						qt->final = qt->fail->final;
-				} else {
-					if ((n = g (&trie->root, c)))
-						qt->fail = n->match;
-					else
-						qt->fail = &trie->root;
-				}
-				m = m->next;
-			}
-			q = q->next;
-		}
-	}
-
-	d (printf("After failure analysis\n"));
-	d (dump_trie (&trie->root, 0));
-
-	g_free (state_depth);
-
-	trie->max_depth = state_depth_size;
-
-	return trie;
-}
-
-static void
-free_trie (struct _trie *t)
-{
-	e_memchunk_destroy (t->match_chunks);
-	e_memchunk_destroy (t->state_chunks);
-
-	g_free (t);
-}
-
-/* ********************************************************************** */
-
-/* html token searcher */
-
-struct _token {
-	struct _token *next;
-	struct _token *prev;
-	guint offset;
-	/* we need to copy the token for memory management, so why not copy it whole */
-	gchar tok[1];
-};
-
-/* stack of submatches currently being scanned, used for merging */
-struct _submatch {
-	guint offstart, offend; /* in bytes */
-};
-
-/* flags for new func */
-#define SEARCH_CASE (1)
-#define SEARCH_BOLD (2)
-
-struct _searcher {
-	struct _trie *t;
-
-	gchar *(*next_token)();	/* callbacks for more tokens */
-	gpointer next_data;
-
-	gint words;		/* how many words */
-	gchar *tags, *tage;	/* the tag we used to highlight */
-
-	gint flags;		/* case sensitive or not */
-
-	struct _state *state;	/* state is the current trie state */
-
-	gint matchcount;
-
-	GQueue input;		/* pending 'input' tokens, processed but might match */
-	GQueue output;		/* output tokens ready for source */
-
-	struct _token *current;	/* for token output memory management */
-
-	guint32 offset;		/* current offset through searchable stream? */
-	guint32 offout;		/* last output position */
-
-	guint lastp;	/* current position in rotating last buffer */
-	guint32 *last;		/* buffer that goes back last 'n' positions */
-	guint32 last_mask;	/* bitmask for efficient rotation calculation */
-
-	guint submatchp;	/* submatch stack */
-	struct _submatch *submatches;
-};
-
-static void
-searcher_set_tokenfunc (struct _searcher *s,
-                        gchar *(*next)(),
-                        gpointer data)
-{
-	s->next_token = next;
-	s->next_data = data;
-}
-
-static struct _searcher *
-searcher_new (gint flags,
-              gint argc,
-              guchar **argv,
-              const gchar *tags,
-              const gchar *tage)
-{
-	gint i, m;
-	struct _searcher *s;
-
-	s = g_malloc (sizeof (*s));
-
-	s->t = build_trie ((flags&SEARCH_CASE) == 0, argc, argv);
-	s->words = argc;
-	s->tags = g_strdup (tags);
-	s->tage = g_strdup (tage);
-	s->flags = flags;
-	s->state = &s->t->root;
-	s->matchcount = -1;
-
-	g_queue_init (&s->input);
-	g_queue_init (&s->output);
-	s->current = NULL;
-
-	s->offset = 0;
-	s->offout = 0;
-
-	/* rotating queue of previous character positions */
-	m = s->t->max_depth + 1;
-	i = 2;
-	while (i < m)
-		i<<=2;
-	s->last = g_malloc (sizeof (s->last[0]) * i);
-	s->last_mask = i - 1;
-	s->lastp = 0;
-
-	/* a stack of possible submatches */
-	s->submatchp = 0;
-	s->submatches = g_malloc (sizeof (s->submatches[0]) * argc + 1);
-
-	return s;
-}
-
-static void
-searcher_free (struct _searcher *s)
-{
-	struct _token *t;
-
-	while ((t = g_queue_pop_head (&s->input)) != NULL)
-		g_free (t);
-	while ((t = g_queue_pop_head (&s->output)) != NULL)
-		g_free (t);
-	g_free (s->tags);
-	g_free (s->tage);
-	g_free (s->last);
-	g_free (s->submatches);
-	free_trie (s->t);
-	g_free (s);
-}
-
-static struct _token *
-append_token (GQueue *queue,
-              const gchar *tok,
-              gint len)
-{
-	struct _token *token;
-
-	if (len == -1)
-		len = strlen (tok);
-	token = g_malloc (sizeof (*token) + len + 1);
-	token->offset = 0;	/* set by caller when required */
-	memcpy (token->tok, tok, len);
-	token->tok[len] = 0;
-	g_queue_push_tail (queue, token);
-
-	return token;
-}
-
-#define free_token(x) (g_free (x))
-
-static void
-output_token (struct _searcher *s,
-              struct _token *token)
-{
-	gint offend;
-	gint left, pre;
-
-	if (token->tok[0] == TAG_ESCAPE) {
-		if (token->offset >= s->offout) {
-			g_queue_push_tail (&s->output, token);
-		} else {
-			free_token (token);
-		}
-	} else {
-		offend = token->offset + strlen (token->tok);
-		left = offend - s->offout;
-		if (left > 0) {
-			pre = s->offout - token->offset;
-			if (pre > 0)
-				memmove (token->tok, token->tok + pre, left + 1);
-			s->offout = offend;
-			g_queue_push_tail (&s->output, token);
-		} else {
-			free_token (token);
-		}
-	}
-}
-
-static struct _token *
-find_token (struct _searcher *s,
-            gint start)
-{
-	GList *link;
-
-	/* find token which is start token, from end of list back */
-	link = g_queue_peek_tail_link (&s->input);
-	while (link != NULL) {
-		struct _token *token = link->data;
-
-		if (token->offset <= start)
-			return token;
-
-		link = g_list_previous (link);
-	}
-
-	return NULL;
-}
-
-static void
-output_match (struct _searcher *s,
-              guint start,
-              guint end)
-{
-	register struct _token *token;
-	struct _token *starttoken, *endtoken;
-	gchar b[8];
-
-	d (printf("output match: %d-%d  at %d\n", start, end, s->offout));
-
-	starttoken = find_token (s, start);
-	endtoken = find_token (s, end);
-
-	if (starttoken == NULL || endtoken == NULL) {
-		d (printf("Cannot find match history for match %d-%d\n", start, end));
-		return;
-	}
-
-	/* output pending stuff that didn't match afterall */
-	while ((struct _token *) g_queue_peek_head (&s->input) != starttoken) {
-		token = g_queue_pop_head (&s->input);
-		output_token (s, token);
-	}
-
-	/* output any pre-match text */
-	if (s->offout < start) {
-		token = append_token (
-			&s->output, starttoken->tok +
-			(s->offout - starttoken->offset),
-			start - s->offout);
-		s->offout = start;
-	}
-
-	/* output highlight/bold */
-	if (s->flags & SEARCH_BOLD) {
-		sprintf(b, "%c<b>", (gchar)TAG_ESCAPE);
-		append_token (&s->output, b, -1);
-	}
-	if (s->tags)
-		append_token (&s->output, s->tags, -1);
-
-	/* output match node (s) */
-	if (starttoken != endtoken) {
-		while ((struct _token *) g_queue_peek_head (&s->input) != endtoken) {
-			token = g_queue_pop_head (&s->input);
-			output_token (s, token);
-		}
-	}
-
-	/* any remaining partial content */
-	if (s->offout < end) {
-		token = append_token (
-			&s->output, endtoken->tok +
-			(s->offout - endtoken->offset),
-			end - s->offout);
-		s->offout = end;
-	}
-
-	/* end highlight */
-	if (s->tage)
-		append_token (&s->output, s->tage, -1);
-
-	/* and close bold if we need to */
-	if (s->flags & SEARCH_BOLD) {
-		sprintf(b, "%c</b>", (gchar)TAG_ESCAPE);
-		append_token (&s->output, b, -1);
-	}
-}
-
-/* output any sub-pending blocks */
-static void
-output_subpending (struct _searcher *s)
-{
-	gint i;
-
-	for (i = s->submatchp - 1; i >= 0; i--)
-		output_match (s, s->submatches[i].offstart, s->submatches[i].offend);
-	s->submatchp = 0;
-}
-
-/* returns true if a merge took place */
-static gint
-merge_subpending (struct _searcher *s,
-                  gint offstart,
-                  gint offend)
-{
-	gint i;
-
-	/* merges overlapping or abutting match strings */
-	if (s->submatchp &&
-	    s->submatches[s->submatchp - 1].offend >= offstart) {
-
-		/* go from end, any that match 'invalidate' follow-on ones too */
-		for (i = s->submatchp - 1; i >= 0; i--) {
-			if (s->submatches[i].offend >= offstart) {
-				if (offstart < s->submatches[i].offstart)
-					s->submatches[i].offstart = offstart;
-				s->submatches[i].offend = offend;
-				if (s->submatchp > i)
-					s->submatchp = i + 1;
-			}
-		}
-		return 1;
-	}
-
-	return 0;
-}
-
-static void
-push_subpending (struct _searcher *s,
-                 gint offstart,
-                 gint offend)
-{
-	/* This is really an assertion, we just ignore the
-	 * last pending match instead of crashing though. */
-	if (s->submatchp >= s->words) {
-		d (printf("ERROR: submatch pending stack overflow\n"));
-		s->submatchp = s->words - 1;
-	}
-
-	s->submatches[s->submatchp].offstart = offstart;
-	s->submatches[s->submatchp].offend = offend;
-	s->submatchp++;
-}
-
-/* move any (partial) tokens from input to output
- * if they are beyond the current output position */
-static void
-output_pending (struct _searcher *s)
-{
-	struct _token *token;
-
-	while ((token = g_queue_pop_head (&s->input)) != NULL)
-		output_token (s, token);
-}
-
-/* flushes any nodes we cannot possibly match anymore */
-static void
-flush_extra (struct _searcher *s)
-{
-	guint start;
-	gint i;
-	struct _token *starttoken, *token;
-
-	/* find earliest gchar that can be in contention */
-	start = s->offset - s->t->max_depth;
-	for (i = 0; i < s->submatchp; i++)
-		if (s->submatches[i].offstart < start)
-			start = s->submatches[i].offstart;
-
-	/* now, flush out any tokens which are before this point */
-	starttoken = find_token (s, start);
-	if (starttoken == NULL)
-		return;
-
-	while ((struct _token *) g_queue_peek_head (&s->input) != starttoken) {
-		token = g_queue_pop_head (&s->input);
-		output_token (s, token);
-	}
-}
-
-static gchar *
-searcher_next_token (struct _searcher *s)
-{
-	struct _token *token;
-	const guchar *tok, *stok, *pre_tok;
-	struct _trie *t = s->t;
-	struct _state *q = s->state;
-	struct _match *m = NULL;
-	gint offstart, offend;
-	guint32 c;
-
-	while (g_queue_is_empty (&s->output)) {
-		/* get next token */
-		tok = (guchar *) s->next_token (s->next_data);
-		if (tok == NULL) {
-			output_subpending (s);
-			output_pending (s);
-			break;
-		}
-
-		/* we dont always have to copy each token, e.g. if we dont match anything */
-		token = append_token (&s->input, (gchar *) tok, -1);
-		token->offset = s->offset;
-		tok = (guchar *) token->tok;
-
-		/* tag test, reset state on unknown tags */
-		if (tok[0] == TAG_ESCAPE) {
-			if (!ignore_tag ((gchar *) tok)) {
-				/* force reset */
-				output_subpending (s);
-				output_pending (s);
-				q = &t->root;
-			}
-
-			continue;
-		}
-
-		/* process whole token */
-		pre_tok = stok = tok;
-		while ((c = camel_utf8_getc (&tok))) {
-			if ((s->flags & SEARCH_CASE) == 0)
-				c = g_unichar_tolower (c);
-			while (q && (m = g (q, c)) == NULL)
-				q = q->fail;
-			if (q == NULL) {
-				/* mismatch ... reset state */
-				output_subpending (s);
-				q = &t->root;
-			} else if (m != NULL) {
-				/* keep track of previous offsets of utf8 chars, rotating buffer */
-				s->last[s->lastp] = s->offset + (pre_tok - stok);
-				s->lastp = (s->lastp + 1) &s->last_mask;
-
-				q = m->match;
-				/* we have a match of q->final characters for a matching word */
-				if (q->final) {
-					s->matchcount++;
-
-					/* use the last buffer to find the real offset of this gchar */
-					offstart = s->last[(s->lastp - q->final) &s->last_mask];
-					offend = s->offset + (tok - stok);
-
-					if (q->matches == NULL) {
-						if (s->submatchp == 0) {
-							/* nothing pending, always put
-							 * something in so we can try merge */
-							push_subpending (s, offstart, offend);
-						} else if (!merge_subpending (s, offstart, offend)) {
-							/* can't merge, output what we have, and start againt */
-							output_subpending (s);
-							push_subpending (s, offstart, offend);
-							/*output_match(s, offstart, offend);*/
-						} else if (g_queue_get_length (&s->input) > 8) {
-							/* we're continuing to match and merge,
-							 * but we have a lot of stuff waiting,
-							 * so flush it out now since this is a
-							 * safe point to do it */
-							output_subpending (s);
-						}
-					} else {
-						/* merge/add subpending */
-						if (!merge_subpending (s, offstart, offend))
-							push_subpending (s, offstart, offend);
-					}
-				}
-			}
-			pre_tok = tok;
-		}
-
-		s->offset += (pre_tok - stok);
-
-		flush_extra (s);
-	}
-
-	s->state = q;
-
-	if (s->current)
-		free_token (s->current);
-
-	s->current = token = g_queue_pop_head (&s->output);
-
-	return token ? g_strdup (token->tok) : NULL;
-}
-
-static gchar *
-searcher_peek_token (struct _searcher *s)
-{
-	gchar *tok;
-
-	/* we just get it and then put it back, it's fast enuf */
-	tok = searcher_next_token (s);
-	if (tok) {
-		/* need to clear this so we dont free it while its still active */
-		g_queue_push_head (&s->output, s->current);
-		s->current = NULL;
-	}
-
-	return tok;
-}
-
-static gint
-searcher_pending (struct _searcher *s)
-{
-	return !(g_queue_is_empty (&s->input) && g_queue_is_empty (&s->output));
-}
-
-/* ********************************************************************** */
-
-struct _search_info {
-	GPtrArray *strv;
-	gchar *color;
-	guint size : 8;
-	guint flags : 8;
-};
-
-/** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **/
-
-static struct _search_info *
-search_info_new (void)
-{
-	struct _search_info *s;
-
-	s = g_malloc0 (sizeof (struct _search_info));
-	s->strv = g_ptr_array_new ();
-
-	return s;
-}
-
-static void
-search_info_set_flags (struct _search_info *si,
-                       guint flags,
-                       guint mask)
-{
-	si->flags = (si->flags & ~mask) | (flags & mask);
-}
-
-static void
-search_info_set_color (struct _search_info *si,
-                       const gchar *color)
-{
-	g_free (si->color);
-	si->color = g_strdup (color);
-}
-
-static void
-search_info_add_string (struct _search_info *si,
-                        const gchar *s)
-{
-	const guchar *start;
-	guint32 c;
-
-	if (s && s[0]) {
-		const guchar *us = (guchar *) s;
-		/* strip leading whitespace */
-		start = us;
-		while ((c = camel_utf8_getc (&us))) {
-			if (!g_unichar_isspace (c)) {
-				break;
-			}
-			start = us;
-		}
-		/* should probably also strip trailing, but i'm lazy today */
-		if (start[0])
-			g_ptr_array_add (si->strv, g_strdup ((gchar *) start));
-	}
-}
-
-static void
-search_info_clear (struct _search_info *si)
-{
-	gint i;
-
-	for (i = 0; i < si->strv->len; i++)
-		g_free (si->strv->pdata[i]);
-
-	g_ptr_array_set_size (si->strv, 0);
-}
-
-static void
-search_info_free (struct _search_info *si)
-{
-	gint i;
-
-	for (i = 0; i < si->strv->len; i++)
-		g_free (si->strv->pdata[i]);
-
-	g_ptr_array_free (si->strv, TRUE);
-	g_free (si->color);
-	g_free (si);
-}
-
-static struct _search_info *
-search_info_clone (struct _search_info *si)
-{
-	struct _search_info *out;
-	gint i;
-
-	out = search_info_new ();
-	for (i = 0; i < si->strv->len; i++)
-		g_ptr_array_add (out->strv, g_strdup (si->strv->pdata[i]));
-	out->color = g_strdup (si->color);
-	out->flags = si->flags;
-	out->size = si->size;
-
-	return out;
-}
-
-static struct _searcher *
-search_info_to_searcher (struct _search_info *si)
-{
-	gchar *tags, *tage;
-	const gchar *col;
-
-	if (si->strv->len == 0)
-		return NULL;
-
-	if (si->color == NULL)
-		col = "red";
-	else
-		col = si->color;
-
-	tags = g_alloca (20 + strlen (col));
-	sprintf(tags, "%c<font color=\"%s\">", TAG_ESCAPE, col);
-	tage = g_alloca (20);
-	sprintf(tage, "%c</font>", TAG_ESCAPE);
-
-	return searcher_new (
-		si->flags, si->strv->len,
-		(guchar **) si->strv->pdata, tags, tage);
-}
-
-/* ********************************************************************** */
-
-struct _ESearchingTokenizerPrivate {
-	struct _search_info *primary, *secondary;
-	struct _searcher *engine;
-};
-
-/** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **/
-
-/** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **/
-
-/* blah blah the htmltokeniser doesn't like being asked
- * for a token if it doens't hvae any! */
-static gchar *
-get_token (HTMLTokenizer *tokenizer)
-{
-	HTMLTokenizerClass *class;
-
-	class = HTML_TOKENIZER_CLASS (e_searching_tokenizer_parent_class);
-
-	if (class->has_more (tokenizer))
-		return class->next_token (tokenizer);
-
-	return NULL;
-}
-
-/* proxy matched event, not sure what its for otherwise */
-static void
-matched (ESearchingTokenizer *tokenizer)
-{
-	/*++tokenizer->priv->match_count;*/
-	g_signal_emit (tokenizer, signals[MATCH_SIGNAL], 0);
-}
-
-/* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */
-
-static void
-searching_tokenizer_finalize (GObject *object)
-{
-	ESearchingTokenizerPrivate *priv;
-
-	priv = E_SEARCHING_TOKENIZER_GET_PRIVATE (object);
-
-	search_info_free (priv->primary);
-	search_info_free (priv->secondary);
-
-	if (priv->engine != NULL)
-		searcher_free (priv->engine);
-
-	/* Chain up to parent's finalize () method. */
-	G_OBJECT_CLASS (e_searching_tokenizer_parent_class)->finalize (object);
-}
-
-static void
-searching_tokenizer_begin (HTMLTokenizer *tokenizer,
-                           const gchar *content_type)
-{
-	ESearchingTokenizerPrivate *priv;
-
-	priv = E_SEARCHING_TOKENIZER_GET_PRIVATE (tokenizer);
-
-	/* reset search */
-	if (priv->engine != NULL) {
-		searcher_free (priv->engine);
-		priv->engine = NULL;
-	}
-
-	if ((priv->engine = search_info_to_searcher (priv->primary))
-	    || (priv->engine = search_info_to_searcher (priv->secondary))) {
-		searcher_set_tokenfunc (priv->engine, get_token, tokenizer);
-	}
-	/* else - no engine, no search active */
-
-	/* Chain up to parent's begin() method. */
-	HTML_TOKENIZER_CLASS (e_searching_tokenizer_parent_class)->
-		begin (tokenizer, content_type);
-}
-
-static gchar *
-searching_tokenizer_peek_token (HTMLTokenizer *tokenizer)
-{
-	ESearchingTokenizerPrivate *priv;
-
-	priv = E_SEARCHING_TOKENIZER_GET_PRIVATE (tokenizer);
-
-	if (priv->engine != NULL)
-		return searcher_peek_token (priv->engine);
-
-	/* Chain up to parent's peek_token() method. */
-	return HTML_TOKENIZER_CLASS (e_searching_tokenizer_parent_class)->
-		peek_token (tokenizer);
-}
-
-static gchar *
-searching_tokenizer_next_token (HTMLTokenizer *tokenizer)
-{
-	ESearchingTokenizerPrivate *priv;
-	gint oldmatched;
-	gchar *token;
-
-	priv = E_SEARCHING_TOKENIZER_GET_PRIVATE (tokenizer);
-
-	/* If no search is active, just use the default method. */
-	if (priv->engine == NULL)
-		return HTML_TOKENIZER_CLASS (
-			e_searching_tokenizer_parent_class)->
-			next_token (tokenizer);
-
-	oldmatched = priv->engine->matchcount;
-	if (priv->engine->matchcount == -1)
-		priv->engine->matchcount = 0;
-
-	token = searcher_next_token (priv->engine);
-
-	/* not sure if this has to be accurate or just say we had some matches */
-	if (oldmatched != priv->engine->matchcount)
-		g_signal_emit (tokenizer, signals[MATCH_SIGNAL], 0);
-
-	return token;
-}
-
-static gboolean
-searching_tokenizer_has_more (HTMLTokenizer *tokenizer)
-{
-	ESearchingTokenizerPrivate *priv;
-
-	priv = E_SEARCHING_TOKENIZER_GET_PRIVATE (tokenizer);
-
-	return (priv->engine != NULL && searcher_pending (priv->engine)) ||
-		HTML_TOKENIZER_CLASS (e_searching_tokenizer_parent_class)->
-			has_more (tokenizer);
-}
-
-static HTMLTokenizer *
-searching_tokenizer_clone (HTMLTokenizer *tokenizer)
-{
-	ESearchingTokenizer *orig_st;
-	ESearchingTokenizer *new_st;
-
-	orig_st = E_SEARCHING_TOKENIZER (tokenizer);
-	new_st = e_searching_tokenizer_new ();
-
-	search_info_free (new_st->priv->primary);
-	search_info_free (new_st->priv->secondary);
-
-	new_st->priv->primary = search_info_clone (orig_st->priv->primary);
-	new_st->priv->secondary = search_info_clone (orig_st->priv->secondary);
-
-	g_signal_connect_swapped (
-		new_st, "match", G_CALLBACK (matched), orig_st);
-
-	return HTML_TOKENIZER (new_st);
-}
-static void
-e_searching_tokenizer_class_init (ESearchingTokenizerClass *class)
-{
-	GObjectClass *object_class;
-	HTMLTokenizerClass *tokenizer_class;
-
-	g_type_class_add_private (class, sizeof (ESearchingTokenizerPrivate));
-
-	object_class = G_OBJECT_CLASS (class);
-	object_class->finalize = searching_tokenizer_finalize;
-
-	tokenizer_class = HTML_TOKENIZER_CLASS (class);
-	tokenizer_class->begin = searching_tokenizer_begin;
-	tokenizer_class->peek_token = searching_tokenizer_peek_token;
-	tokenizer_class->next_token = searching_tokenizer_next_token;
-	tokenizer_class->has_more = searching_tokenizer_has_more;
-	tokenizer_class->clone = searching_tokenizer_clone;
-
-	signals[MATCH_SIGNAL] = g_signal_new (
-		"match",
-		G_TYPE_FROM_CLASS (class),
-		G_SIGNAL_RUN_LAST,
-		G_STRUCT_OFFSET (ESearchingTokenizerClass, match),
-		NULL, NULL,
-		g_cclosure_marshal_VOID__VOID,
-		G_TYPE_NONE, 0);
-}
-
-static void
-e_searching_tokenizer_init (ESearchingTokenizer *tokenizer)
-{
-	tokenizer->priv = E_SEARCHING_TOKENIZER_GET_PRIVATE (tokenizer);
-
-	tokenizer->priv->primary = search_info_new ();
-	search_info_set_flags (
-		tokenizer->priv->primary,
-		SEARCH_BOLD, SEARCH_CASE | SEARCH_BOLD);
-	search_info_set_color (tokenizer->priv->primary, "red");
-
-	tokenizer->priv->secondary = search_info_new ();
-	search_info_set_flags (
-		tokenizer->priv->secondary,
-		SEARCH_BOLD, SEARCH_CASE | SEARCH_BOLD);
-	search_info_set_color (tokenizer->priv->secondary, "purple");
-}
-
-ESearchingTokenizer *
-e_searching_tokenizer_new (void)
-{
-	return g_object_new (E_TYPE_SEARCHING_TOKENIZER, NULL);
-}
-
-void
-e_searching_tokenizer_set_primary_search_string (ESearchingTokenizer *tokenizer,
-                                                 const gchar *primary_string)
-{
-	g_return_if_fail (E_IS_SEARCHING_TOKENIZER (tokenizer));
-
-	search_info_clear (tokenizer->priv->primary);
-	search_info_add_string (tokenizer->priv->primary, primary_string);
-}
-
-void
-e_searching_tokenizer_add_primary_search_string (ESearchingTokenizer *tokenizer,
-                                                 const gchar *primary_string)
-{
-	g_return_if_fail (E_IS_SEARCHING_TOKENIZER (tokenizer));
-
-	search_info_add_string (tokenizer->priv->primary, primary_string);
-}
-
-void
-e_searching_tokenizer_set_primary_case_sensitivity (ESearchingTokenizer *tokenizer,
-                                                    gboolean case_sensitive)
-{
-	g_return_if_fail (E_IS_SEARCHING_TOKENIZER (tokenizer));
-
-	search_info_set_flags (
-		tokenizer->priv->primary,
-		case_sensitive ? SEARCH_CASE : 0, SEARCH_CASE);
-}
-
-void
-e_searching_tokenizer_set_secondary_search_string (ESearchingTokenizer *tokenizer,
-                                                   const gchar *secondary_string)
-{
-	g_return_if_fail (E_IS_SEARCHING_TOKENIZER (tokenizer));
-
-	search_info_clear (tokenizer->priv->secondary);
-	search_info_add_string (tokenizer->priv->secondary, secondary_string);
-}
-
-void
-e_searching_tokenizer_add_secondary_search_string (ESearchingTokenizer *tokenizer,
-                                                   const gchar *secondary_string)
-{
-	g_return_if_fail (E_IS_SEARCHING_TOKENIZER (tokenizer));
-
-	search_info_add_string (tokenizer->priv->secondary, secondary_string);
-}
-
-void
-e_searching_tokenizer_set_secondary_case_sensitivity (ESearchingTokenizer *tokenizer,
-                                                      gboolean case_sensitive)
-{
-	g_return_if_fail (E_IS_SEARCHING_TOKENIZER (tokenizer));
-
-	search_info_set_flags (
-		tokenizer->priv->secondary,
-		case_sensitive ? SEARCH_CASE : 0, SEARCH_CASE);
-}
-
-/* Note: only returns the primary search string count */
-gint
-e_searching_tokenizer_match_count (ESearchingTokenizer *tokenizer)
-{
-	g_return_val_if_fail (E_IS_SEARCHING_TOKENIZER (tokenizer), -1);
-
-	if (tokenizer->priv->engine && tokenizer->priv->primary->strv->len)
-		return tokenizer->priv->engine->matchcount;
-
-	return 0;
-}
author	Dan Vrátil <dvratil@redhat.com>	2012-03-29 00:37:35 +0800
committer	Dan Vrátil <dvratil@redhat.com>	2012-03-29 00:37:35 +0800
commit	6bd1c6833a2c51898ac45865767dd01ba66a95c5 (patch)
tree	51f9cc360c49e71c455f74f72f1605965e73a932 /widgets/misc/e-searching-tokenizer.c
parent	038e0eccec595ce1cc39fe95262272e29d5a6fbf (diff)
download	gsoc2013-evolution-6bd1c6833a2c51898ac45865767dd01ba66a95c5.tar.gz gsoc2013-evolution-6bd1c6833a2c51898ac45865767dd01ba66a95c5.tar.zst gsoc2013-evolution-6bd1c6833a2c51898ac45865767dd01ba66a95c5.zip