/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * Authors: Jeffrey Stedfast * * Copyright 2002 Ximian, Inc. (www.ximian.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA. * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include "e-trie.h" #include "e-memory.h" #define d(x) struct _trie_state { struct _trie_state *next; struct _trie_state *fail; struct _trie_match *match; unsigned int final; int id; }; struct _trie_match { struct _trie_match *next; struct _trie_state *state; gunichar c; }; struct _ETrie { struct _trie_state root; GPtrArray *fail_states; gboolean icase; EMemChunk *match_chunks; EMemChunk *state_chunks; }; static inline gunichar trie_utf8_getc (const unsigned char **in, size_t inlen) { register const unsigned char *inptr = *in; const unsigned char *inend = inptr + inlen; register unsigned char c, r; register gunichar u, m; if (inlen == 0) return 0; r = *inptr++; if (r < 0x80) { *in = inptr; u = r; } else if (r < 0xfe) { /* valid start char? */ u = r; m = 0x7f80; /* used to mask out the length bits */ do { if (inptr >= inend) return 0; c = *inptr++; if ((c & 0xc0) != 0x80) goto error; u = (u << 6) | (c & 0x3f); r <<= 1; m <<= 5; } while (r & 0x40); *in = inptr; u &= ~m; } else { error: *in = (*in)+1; u = 0xfffe; } return u; } ETrie * e_trie_new (gboolean icase) { ETrie *trie; trie = g_new (ETrie, 1); trie->root.next = NULL; trie->root.fail = NULL; trie->root.match = NULL; trie->root.final = 0; trie->fail_states = g_ptr_array_sized_new (8); trie->icase = icase; trie->match_chunks = e_memchunk_new (8, sizeof (struct _trie_match)); trie->state_chunks = e_memchunk_new (8, sizeof (struct _trie_state)); return trie; } void e_trie_free (ETrie *trie) { g_ptr_array_free (trie->fail_states, TRUE); e_memchunk_destroy (trie->match_chunks); e_memchunk_destroy (trie->state_chunks); g_free (trie); } static struct _trie_match * g (struct _trie_state *s, gunichar c) { struct _trie_match *m = s->match; while (m && m->c != c) m = m->next; return m; } static struct _trie_state * trie_insert (ETrie *trie, int depth, struct _trie_state *q, gunichar c) { struct _trie_match *m; m = e_memchunk_alloc (trie->match_chunks); m->next = q->match; m->c = c; q->match = m; q = m->state = e_memchunk_alloc (trie->state_chunks); q->match = NULL; q->fail = &trie->root; q->final = 0; q->id = -1; if (trie->fail_states->len < depth + 1) { unsigned int size = trie->fail_states->len; size = MAX (size + 64, depth + 1); g_ptr_array_set_size (trie->fail_states, size); } q->next = trie->fail_states->pdata[depth]; trie->fail_states->pdata[depth] = q; return q; } #if 1 static void dump_trie (struct _trie_state *s, int depth) { char *p = g_alloca ((depth * 2) + 1); struct _trie_match *m; memset (p, ' ', depth * 2); p[depth * 2] = '\0'; fprintf (stderr, "%s[state] %p: final=%d; pattern-id=%d; fail=%p\n", p, s, s->final, s->id, s->fail); m = s->match; while (m) { fprintf (stderr, " %s'%c' -> %p\n", p, m->c, m->state); if (m->state) dump_trie (m->state, depth + 1); m = m->next; } } #endif /* * final = empty set * FOR p = 1 TO #pat * q = root * FOR j = 1 TO m[p] * IF g(q, pat[p][j]) == null * insert(q, pat[p][j]) * ENDIF * q = g(q, pat[p][j]) * ENDFOR * final = union(final, q) * ENDFOR */ void e_trie_add (ETrie *trie, const char *pattern, int pattern_id) { const unsigned char *inptr = (const unsigned char *) pattern; struct _trie_state *q, *q1, *r; struct _trie_match *m, *n; int i, depth = 0; gunichar c; /* Step 1: add the pattern to the trie */ q = &trie->root; while ((c = trie_utf8_getc (&inptr, -1))) { if (trie->icase) c = g_unichar_tolower (c); m = g (q, c); if (m == NULL) { q = trie_insert (trie, depth, q, c); } else { q = m->state; } depth++; } q->final = depth; q->id = pattern_id; /* Step 2: compute failure graph */ for (i = 0; i < trie->fail_states->len; i++) { q = trie->fail_states->pdata[i]; while (q) { m = q->match; while (m) { c = m->c; q1 = m->state; r = q->fail; while (r && (n = g (r, c)) == NULL) r = r->fail; if (r != NULL) { q1->fail = n->state; if (q1->fail->final > q1->final) q1->final = q1->fail->final; } else { if ((n = g (&trie->root, c))) q1->fail = n->state; else q1->fail = &trie->root; } m = m->next; } q = q->next; } } d(fprintf (stderr, "\nafter adding pattern '%s' to trie %p:\n", pattern, trie)); d(dump_trie (&trie->root, 0)); } /* * Aho-Corasick * * q = root * FOR i = 1 TO n * WHILE q != fail AND g(q, text[i]) == fail * q = h(q) * ENDWHILE * IF q == fail * q = root * ELSE * q = g(q, text[i]) * ENDIF * IF isElement(q, final) * RETURN TRUE * ENDIF * ENDFOR * RETURN FALSE */ const char * e_trie_search (ETrie *trie, const char *buffer, size_t buflen, int *matched_id) { const unsigned char *inptr, *inend, *prev, *pat; register size_t inlen = buflen; struct _trie_state *q; struct _trie_match *m; gunichar c; inptr = (const unsigned char *) buffer; inend = inptr + buflen; q = &trie->root; pat = prev = inptr; while ((c = trie_utf8_getc (&inptr, inlen))) { inlen = (inend - inptr); if (c != 0xfffe) { if (trie->icase) c = g_unichar_tolower (c); while (q != NULL && (m = g (q, c)) == NULL) q = q->fail; if (q == &trie->root) pat = prev; if (q == NULL) { q = &trie->root; pat = inptr; } else if (m != NULL) { q = m->state; if (q->final) { if (matched_id) *matched_id = q->id; return (const char *) pat; } } } prev = inptr; } return NULL; } t/comms
Commit message (Expand)AuthorAgeFilesLines
* - Update to 20100416pav2010-08-137-42/+51
* Remove pkg-install, which is now replaced by mgettycfg with the same code.olgeni2010-08-085-667/+40
* Silence post-patch command.olgeni2010-08-041-1/+1
* Reset long-time inactive maintainer jbq@caraldi.com due to maintainerlinimon2010-07-301-1/+1
* Update ImageMagick to 6.6.2-10mm2010-07-251-2/+2
* Update to fix an issue with Xastir/Motif and a certain level of Xorg server.xride2010-07-242-2/+21
* - Marked IGNORE, because the version current is freeze and version stable is ...sylvio2010-07-211-0/+2
* Update to version 1.28.0.bsam2010-07-123-6/+19
* - pass args to hfaxddinoex2010-07-092-1/+25
* Reset maintainer per his request to ports@.linimon2010-07-081-1/+1
* - Update to 3.1.11mm2010-07-073-12/+12
* - Update to 2.4johans2010-07-024-28/+15
* Update to 0.5.2.stefan2010-07-013-4/+14
* Present KDE SC 4.4.5 for FreeBSD.makc2010-06-301-3/+3
* - Update to version 0.0.7danfe2010-06-222-6/+6
* - Update to 1.2.11db2010-06-105-13/+41
* Bump PORTREVISION after libao update and handle API incompatibility.naddy2010-06-072-0/+11
* - Update to 0.02pgollucci2010-06-072-4/+4
* - not make jobs safepgollucci2010-06-061-0/+2
* - Fix buildmiwi2010-06-051-0/+12
* LICENSE GPLv2dinoex2010-06-041-0/+2
* - update audio/lame to 3.98.4netchild2010-06-031-0/+1
* Present KDE SC 4.4.4 for FreeBSD.makc2010-06-022-4/+3
* Bounce PORTREVISION for gettext-related ports. Have fun, ya'll.ade2010-05-3114-12/+14
* - Mass conversion of RF -> RG for MASTER_SITE for rubygem- portspgollucci2010-05-271-1/+1
* - Pass maintainership to gcooper@amdmi32010-05-251-1/+1
* Reset perky@FreeBSD.org due to maintainer-timeouts and no responselinimon2010-05-241-1/+1
* Update libical to 0.44.marcus2010-05-221-1/+1
* The xz utils and lzma library have been imported into base, so makenaddy2010-05-221-1/+1
* - Update to 3.1.8mm2010-05-214-89/+21
* - Update to 0.78dhn2010-05-213-5/+6
* . move *.pyc and *.pyo file creation to comms/gammu Makefilebsam2010-05-153-5/+12
* comms/gammu-python:bsam2010-05-152-1/+6
* This is a gammu port with python bindings.bsam2010-05-142-0/+18
* . remove period from COMMENT;bsam2010-05-141-2/+4
* Return bsd.port.pre[post].mk back for the sake of OSVERSION.bsam2010-05-141-2/+5
* Switch back to base gcc at sparc, should be fine now.bsam2010-05-141-7/+1
* Here is a more descriptive pkg-descr. :-)bsam2010-05-131-7/+20
* Update COMMENT and port description to match the wording from official site.danfe2010-05-112-6/+7
* - The FreeBSD KDE team is pleased to announce KDE SC 4.4.3 for FreeBSDfluffy2010-05-113-33/+44
* - Chase comms/gnokii shlib bumpmiwi2010-05-101-2/+2
* - Update to 0.6.29miwi2010-05-108-120/+72
* - forced commit to 1.3.0 to actually include patchdb2010-05-031-0/+11
* - Update to 1.3.0db2010-05-032-9/+16
* - Fix to build in 9-currentsylvio2010-04-221-1/+5
* Switch to use newer GMP version.ale2010-04-191-2/+2
* - Update to 1.27.93sylvio2010-04-182-4/+4
* Drop maintainership, as I do not use this port anymore.anders2010-04-141-1/+1
* - Update to 3.36sylvio2010-04-103-5/+5
* Fix WWW and quotes.olgeni2010-04-091-2/+2
* SMS::SMS77 consists of a perl interface and a script to send SMSbeat2010-04-065-0/+41
* Chase the ftp/curl shlib version bump.roam2010-04-031-1/+1
* - cleanup libpng12dinoex2010-03-301-2/+2
* - fix build for png-1.4.1dinoex2010-03-301-0/+4
* - Bump Magick++, MagickWand or MagickCore dependencymm2010-03-291-2/+2
* Take maintainership from ports@.olgeni2010-03-291-1/+1
* - update to 1.4.1dinoex2010-03-2836-34/+36
* Begin the process of deprecating sysutils/rc_subr bydougb2010-03-279-9/+9
* - Fix previous commit (redefinition of LIB_DEPENDS)gahr2010-03-241-1/+0
* - Chase x11-toolkits/fltk updategahr2010-03-243-7/+8