aboutsummaryrefslogtreecommitdiffstats
path: root/e-util/e-trie.c
diff options
context:
space:
mode:
Diffstat (limited to 'e-util/e-trie.c')
-rw-r--r--e-util/e-trie.c345
1 files changed, 0 insertions, 345 deletions
diff --git a/e-util/e-trie.c b/e-util/e-trie.c
deleted file mode 100644
index 2edf94df34..0000000000
--- a/e-util/e-trie.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- * Authors: Jeffrey Stedfast <fejj@ximian.com>
- *
- * Copyright 2002 Ximian, Inc. (www.ximian.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
- *
- */
-
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-#include <string.h>
-
-#include "e-trie.h"
-#include "e-memory.h"
-
-#define d(x)
-
-struct _trie_state {
- struct _trie_state *next;
- struct _trie_state *fail;
- struct _trie_match *match;
- unsigned int final;
- int id;
-};
-
-struct _trie_match {
- struct _trie_match *next;
- struct _trie_state *state;
- gunichar c;
-};
-
-struct _ETrie {
- struct _trie_state root;
- GPtrArray *fail_states;
- gboolean icase;
-
- EMemChunk *match_chunks;
- EMemChunk *state_chunks;
-};
-
-
-static inline gunichar
-trie_utf8_getc (const unsigned char **in, size_t inlen)
-{
- register const unsigned char *inptr = *in;
- const unsigned char *inend = inptr + inlen;
- register unsigned char c, r;
- register gunichar u, m;
-
- if (inlen == 0)
- return 0;
-
- r = *inptr++;
- if (r < 0x80) {
- *in = inptr;
- u = r;
- } else if (r < 0xfe) { /* valid start char? */
- u = r;
- m = 0x7f80; /* used to mask out the length bits */
- do {
- if (inptr >= inend)
- return 0;
-
- c = *inptr++;
- if ((c & 0xc0) != 0x80)
- goto error;
-
- u = (u << 6) | (c & 0x3f);
- r <<= 1;
- m <<= 5;
- } while (r & 0x40);
-
- *in = inptr;
-
- u &= ~m;
- } else {
- error:
- *in = (*in)+1;
- u = 0xfffe;
- }
-
- return u;
-}
-
-
-ETrie *
-e_trie_new (gboolean icase)
-{
- ETrie *trie;
-
- trie = g_new (ETrie, 1);
- trie->root.next = NULL;
- trie->root.fail = NULL;
- trie->root.match = NULL;
- trie->root.final = 0;
-
- trie->fail_states = g_ptr_array_sized_new (8);
- trie->icase = icase;
-
- trie->match_chunks = e_memchunk_new (8, sizeof (struct _trie_match));
- trie->state_chunks = e_memchunk_new (8, sizeof (struct _trie_state));
-
- return trie;
-}
-
-void
-e_trie_free (ETrie *trie)
-{
- g_ptr_array_free (trie->fail_states, TRUE);
- e_memchunk_destroy (trie->match_chunks);
- e_memchunk_destroy (trie->state_chunks);
- g_free (trie);
-}
-
-
-
-static struct _trie_match *
-g (struct _trie_state *s, gunichar c)
-{
- struct _trie_match *m = s->match;
-
- while (m && m->c != c)
- m = m->next;
-
- return m;
-}
-
-static struct _trie_state *
-trie_insert (ETrie *trie, int depth, struct _trie_state *q, gunichar c)
-{
- struct _trie_match *m;
-
- m = e_memchunk_alloc (trie->match_chunks);
- m->next = q->match;
- m->c = c;
-
- q->match = m;
- q = m->state = e_memchunk_alloc (trie->state_chunks);
- q->match = NULL;
- q->fail = &trie->root;
- q->final = 0;
- q->id = -1;
-
- if (trie->fail_states->len < depth + 1) {
- unsigned int size = trie->fail_states->len;
-
- size = MAX (size + 64, depth + 1);
- g_ptr_array_set_size (trie->fail_states, size);
- }
-
- q->next = trie->fail_states->pdata[depth];
- trie->fail_states->pdata[depth] = q;
-
- return q;
-}
-
-
-#if 1
-static void
-dump_trie (struct _trie_state *s, int depth)
-{
- char *p = g_alloca ((depth * 2) + 1);
- struct _trie_match *m;
-
- memset (p, ' ', depth * 2);
- p[depth * 2] = '\0';
-
- fprintf (stderr, "%s[state] %p: final=%d; pattern-id=%d; fail=%p\n",
- p, s, s->final, s->id, s->fail);
- m = s->match;
- while (m) {
- fprintf (stderr, " %s'%c' -> %p\n", p, m->c, m->state);
- if (m->state)
- dump_trie (m->state, depth + 1);
-
- m = m->next;
- }
-}
-#endif
-
-
-/*
- * final = empty set
- * FOR p = 1 TO #pat
- * q = root
- * FOR j = 1 TO m[p]
- * IF g(q, pat[p][j]) == null
- * insert(q, pat[p][j])
- * ENDIF
- * q = g(q, pat[p][j])
- * ENDFOR
- * final = union(final, q)
- * ENDFOR
-*/
-
-void
-e_trie_add (ETrie *trie, const char *pattern, int pattern_id)
-{
- const unsigned char *inptr = (const unsigned char *) pattern;
- struct _trie_state *q, *q1, *r;
- struct _trie_match *m, *n;
- int i, depth = 0;
- gunichar c;
-
- /* Step 1: add the pattern to the trie */
-
- q = &trie->root;
-
- while ((c = trie_utf8_getc (&inptr, -1))) {
- if (trie->icase)
- c = g_unichar_tolower (c);
-
- m = g (q, c);
- if (m == NULL) {
- q = trie_insert (trie, depth, q, c);
- } else {
- q = m->state;
- }
-
- depth++;
- }
-
- q->final = depth;
- q->id = pattern_id;
-
- /* Step 2: compute failure graph */
-
- for (i = 0; i < trie->fail_states->len; i++) {
- q = trie->fail_states->pdata[i];
- while (q) {
- m = q->match;
- while (m) {
- c = m->c;
- q1 = m->state;
- r = q->fail;
- while (r && (n = g (r, c)) == NULL)
- r = r->fail;
-
- if (r != NULL) {
- q1->fail = n->state;
- if (q1->fail->final > q1->final)
- q1->final = q1->fail->final;
- } else {
- if ((n = g (&trie->root, c)))
- q1->fail = n->state;
- else
- q1->fail = &trie->root;
- }
-
- m = m->next;
- }
-
- q = q->next;
- }
- }
-
- d(fprintf (stderr, "\nafter adding pattern '%s' to trie %p:\n", pattern, trie));
- d(dump_trie (&trie->root, 0));
-}
-
-/*
- * Aho-Corasick
- *
- * q = root
- * FOR i = 1 TO n
- * WHILE q != fail AND g(q, text[i]) == fail
- * q = h(q)
- * ENDWHILE
- * IF q == fail
- * q = root
- * ELSE
- * q = g(q, text[i])
- * ENDIF
- * IF isElement(q, final)
- * RETURN TRUE
- * ENDIF
- * ENDFOR
- * RETURN FALSE
- */
-
-const char *
-e_trie_search (ETrie *trie, const char *buffer, size_t buflen, int *matched_id)
-{
- const unsigned char *inptr, *inend, *prev, *pat;
- register size_t inlen = buflen;
- struct _trie_state *q;
- struct _trie_match *m;
- gunichar c;
-
- inptr = (const unsigned char *) buffer;
- inend = inptr + buflen;
-
- q = &trie->root;
- pat = prev = inptr;
- while ((c = trie_utf8_getc (&inptr, inlen))) {
- inlen = (inend - inptr);
-
- if (c != 0xfffe) {
- if (trie->icase)
- c = g_unichar_tolower (c);
-
- while (q != NULL && (m = g (q, c)) == NULL)
- q = q->fail;
-
- if (q == &trie->root)
- pat = prev;
-
- if (q == NULL) {
- q = &trie->root;
- pat = inptr;
- } else if (m != NULL) {
- q = m->state;
-
- if (q->final) {
- if (matched_id)
- *matched_id = q->id;
-
- return (const char *) pat;
- }
- }
- }
-
- prev = inptr;
- }
-
- return NULL;
-}