diff options
author | adam <adam@FreeBSD.org> | 1997-01-03 12:03:04 +0800 |
---|---|---|
committer | adam <adam@FreeBSD.org> | 1997-01-03 12:03:04 +0800 |
commit | 2dbf728c0215b09ade076db286ca21768d896f87 (patch) | |
tree | 0e9256b344905758247ff7a5bcadcbd7e3e5819c /net | |
parent | 4c848ad5ccdf61977b6af05f0e4187cf48ae41e0 (diff) | |
download | freebsd-ports-gnome-2dbf728c0215b09ade076db286ca21768d896f87.tar.gz freebsd-ports-gnome-2dbf728c0215b09ade076db286ca21768d896f87.tar.zst freebsd-ports-gnome-2dbf728c0215b09ade076db286ca21768d896f87.zip |
added kfuns regexp_compile() and regexp_match()
this is the LPC interface to GNU regexp by Robert Leslie <rob@ccs.neu.edu>
and is used by the upcoming dgd-lpmoo port
Diffstat (limited to 'net')
-rw-r--r-- | net/dgd-net/files/patch-aa | 42 | ||||
-rw-r--r-- | net/dgd-net/files/patch-ad | 533 | ||||
-rw-r--r-- | net/dgd-net/pkg-plist | 5 |
3 files changed, 568 insertions, 12 deletions
diff --git a/net/dgd-net/files/patch-aa b/net/dgd-net/files/patch-aa index f4f3fb83bb67..7d981b605960 100644 --- a/net/dgd-net/files/patch-aa +++ b/net/dgd-net/files/patch-aa @@ -1,7 +1,7 @@ -*** Makefile.orig Sun Dec 10 19:21:36 1995 ---- Makefile Wed Feb 7 22:53:33 1996 +*** Makefile.orig Thu Jan 2 23:38:50 1997 +--- Makefile Thu Jan 2 23:41:13 1997 *************** -*** 3,14 **** +*** 3,24 **** # HOST= NETBSD DEFINES=-D$(HOST) #-DDUMP_FUNCS @@ -14,7 +14,17 @@ LD= $(CC) DMAKE= make BIN= ../bin ---- 3,15 ---- + + OBJ= alloc.o error.o hash.o swap.o str.o array.o object.o data.o path.o \ +! editor.o comm.net.o call_out.o interpret.o config.o dgd.o + EDOBJ= alloc.o error.o + LEXOBJ= alloc.o hash.o + COMPOBJ=alloc.o error.o hash.o path.o str.o array.o object.o data.o \ +! interpret.o config.o + + a.out: $(OBJ) always + cd comp; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd +--- 3,27 ---- # HOST= NETBSD DEFINES=-D$(HOST) #-DDUMP_FUNCS @@ -23,22 +33,23 @@ CCFLAGS=$(DEFINES) $(DEBUG) CFLAGS= -I. -Icomp -Ilex -Ied -Ikfun $(CCFLAGS) ! LDFLAGS=-s -! LIBS=-lcrypt +! LIBS=-lcrypt -lgnuregex ! CC= cc LD= $(CC) DMAKE= make BIN= ../bin -*************** -*** 20,25 **** ---- 21,28 ---- + + OBJ= alloc.o error.o hash.o swap.o str.o array.o object.o data.o path.o \ +! editor.o comm.net.o call_out.o interpret.o config.o dgd.o rgx.o + EDOBJ= alloc.o error.o + LEXOBJ= alloc.o hash.o COMPOBJ=alloc.o error.o hash.o path.o str.o array.o object.o data.o \ - interpret.o config.o +! interpret.o config.o rgx.o +! +! all: a.out comp/a.out -+ all: a.out comp/a.out -+ a.out: $(OBJ) always cd comp; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd - cd lex; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd *************** *** 38,44 **** -mv $(BIN)/driver $(BIN)/driver.old @@ -77,3 +88,10 @@ clean: rm -f a.out $(OBJ) comp.sub lex.sub ed.sub +*************** +*** 92,94 **** +--- 99,102 ---- + call_out.o config.o dgd.o: call_out.h + error.o comm.o call_out.o config.o dgd.o: comm.h + config.o: version.h ++ rgx.o: str.h array.h rgx.h interpret.h diff --git a/net/dgd-net/files/patch-ad b/net/dgd-net/files/patch-ad new file mode 100644 index 000000000000..451e8b2ec5c4 --- /dev/null +++ b/net/dgd-net/files/patch-ad @@ -0,0 +1,533 @@ +*** src.rgx/config.c Thu Jan 2 23:34:31 1997 +--- config.c Thu Jan 2 23:51:21 1997 +*************** +*** 19,24 **** +--- 19,25 ---- + # include "compile.h" + # include "csupport.h" + # include "table.h" ++ # include "rgx.h" + + typedef struct { + char *name; /* name of the option */ +*************** +*** 810,815 **** +--- 811,819 ---- + + /* initialize interpreter */ + i_init(conf[CREATE].u.str); ++ ++ /* initialize regular expressions */ ++ rgx_init(); + + /* initialize compiler */ + c_init(conf[AUTO_OBJECT].u.str, +*** src.rgx/kfun/extra.c Tue Sep 27 09:28:26 1994 +--- kfun/extra.c Thu Feb 2 22:25:18 1995 +*************** +*** 560,562 **** +--- 560,640 ---- + error("Not yet implemented"); + } + # endif ++ ++ ++ # ifdef FUNCDEF ++ FUNCDEF("regexp_compile", kf_regexp_compile, p_regexp_compile) ++ # else ++ char p_regexp_compile[] = { C_TYPECHECKED | C_STATIC | C_VARARGS, ++ T_STRING | (1 << REFSHIFT), 2, T_STRING, T_INT }; ++ ++ /* ++ * NAME: kfun->regexp_compile() ++ * DESCRIPTION: compile a regexp pattern ++ */ ++ int kf_regexp_compile(nargs) ++ int nargs; ++ { ++ int case_matters; ++ array *compiled; ++ ++ if (nargs < 1) ++ return -1; ++ ++ case_matters = (nargs == 2 ? ! (sp++)->u.number : 1); ++ ++ compiled = rgx_new(sp->u.string, case_matters); ++ ++ str_del(sp->u.string); ++ sp->type = T_ARRAY; ++ arr_ref(sp->u.array = compiled); ++ ++ return 0; ++ } ++ # endif ++ ++ ++ # ifdef FUNCDEF ++ FUNCDEF("regexp_match", kf_regexp_match, p_regexp_match) ++ # else ++ char p_regexp_match[] = { C_TYPECHECKED | C_STATIC | C_VARARGS, ++ T_INT | (1 << REFSHIFT), 3, ++ T_STRING | (1 << REFSHIFT), T_STRING, T_INT }; ++ ++ /* ++ * NAME: kfun->regexp_match() ++ * DESCRIPTION: perform regexp matching with a previously compiled pattern ++ */ ++ int kf_regexp_match(nargs) ++ int nargs; ++ { ++ int reverse; ++ string *subject; ++ array *compiled, *result; ++ ++ if (nargs < 2) ++ return -1; ++ ++ reverse = (nargs == 3 ? (sp++)->u.number : 0); ++ subject = sp->u.string; ++ compiled = sp[1].u.array; ++ ++ if (compiled->size != 3) ++ return 1; ++ ++ result = rgx_match(d_get_elts(compiled), subject, reverse); ++ ++ str_del((sp++)->u.string); ++ arr_del(sp->u.array); ++ ++ if (result == (array *) 0) ++ { ++ sp->type = T_INT; ++ sp->u.number = 0; ++ } ++ else ++ arr_ref(sp->u.array = result); ++ ++ return 0; ++ } ++ # endif +*** src.rgx/kfun/kfun.h Sun May 8 08:15:01 1994 +--- kfun/kfun.h Thu Feb 2 22:25:18 1995 +*************** +*** 5,7 **** +--- 5,8 ---- + # include "xfloat.h" + # include "interpret.h" + # include "data.h" ++ # include "rgx.h" +*** src.rgx/rgx.c Thu Jan 2 21:41:55 1997 +--- rgx.c Thu Jan 2 21:17:46 1997 +*************** +*** 0 **** +--- 1,213 ---- ++ # include "dgd.h" ++ # include "str.h" ++ # include "array.h" ++ # include "interpret.h" ++ # include <gnuregex.h> ++ # include "rgx.h" ++ # include <memory.h> ++ ++ static char trans_table[256]; ++ ++ /* ++ * NAME: regexp->init() ++ * DESCRIPTION: initialize regexp handling ++ */ ++ void rgx_init() ++ { ++ register int i; ++ ++ for (i = 0; i < 256; ++i) ++ trans_table[i] = i; ++ for (i = 'a'; i <= 'z'; ++i) ++ trans_table[i] = i + 'A' - 'a'; ++ } ++ ++ /* ++ * NAME: regexp->new() ++ * DESCRIPTION: create a new regexp buffer ++ */ ++ array *rgx_new(pattern, case_matters) ++ string *pattern; ++ int case_matters; ++ { ++ char *translate; ++ struct re_pattern_buffer patbuf; ++ char fastmap[256]; ++ const char *compile_error; ++ array *result; ++ register value *v; ++ string *s; ++ ++ translate = (case_matters ? (char *) 0 : trans_table); ++ ++ patbuf.buffer = 0; ++ patbuf.allocated = 0; ++ patbuf.used = 0; ++ ++ patbuf.fastmap = fastmap; ++ patbuf.translate = translate; ++ ++ patbuf.fastmap_accurate = 0; ++ ++ { ++ int i; ++ long n = 0; ++ for (i = 0; i < pattern->len; i++) { ++ switch (pattern->text[i]) { ++ case '[': ++ if (pattern->text[++i] == '^') ++ i++; ++ for (i++; i < pattern->len; i++) ++ if (pattern->text[i] == ']') ++ break; ++ break; ++ case '%': ++ pattern->text[i++] = '\\'; /* skip escaped char */ ++ break; ++ case '\\': ++ pattern->text[i] == '%'; /* mark for expansion */ ++ n++; ++ break; ++ } ++ } ++ if (n) { ++ int j; ++ ++ s = str_new(NULL, pattern->len + n); ++ for (i = j = 0; i < pattern->len; i++, j++) { ++ switch (pattern->text[i]) { ++ case '[': ++ s->text[j++] = pattern->text[i++]; ++ if (i == pattern->len) ++ goto breakout; ++ if (pattern->text[i] == '^') { ++ s->text[j++] = pattern->text[i++]; ++ if (i == pattern->len) ++ goto breakout; ++ } ++ s->text[j++] = pattern->text[i++]; ++ if (i == pattern->len) ++ goto breakout; ++ for ( ; i < pattern->len; i++, j++) { ++ if ((s->text[j] = pattern->text[i]) == ']') ++ break; ++ } ++ break; ++ case '%': /* expand */ ++ s->text[j++] = '\\'; ++ s->text[j] = '\\'; ++ break; ++ case '\\': /* skip escaped char */ ++ s->text[j++] = pattern->text[i++]; ++ if (i == pattern->len) ++ goto breakout; ++ /* fallthru */ ++ default: ++ s->text[j] = pattern->text[i]; ++ } ++ } ++ breakout: ++ } ++ } ++ compile_error = re_compile_pattern(s->text, s->len, &patbuf); ++ str_del(s); ++ if (compile_error != (char *) 0) ++ { ++ regfree(&patbuf); ++ error(compile_error); ++ } ++ ++ re_compile_fastmap(&patbuf); ++ ++ result = arr_new(3L); ++ v = result->elts; ++ ++ v->type = T_STRING; ++ str_ref(v->u.string = str_new((char *) &patbuf, (long) sizeof(patbuf))); ++ ++v; ++ v->type = T_STRING; ++ str_ref(v->u.string = str_new((char *) patbuf.buffer, ++ (long) patbuf.allocated)); ++ ++v; ++ v->type = T_STRING; ++ str_ref(v->u.string = str_new(fastmap, 256L)); ++ ++ /* don't let regfree() try to free these */ ++ patbuf.fastmap = 0; ++ patbuf.translate = 0; ++ ++ regfree(&patbuf); ++ ++ return result; ++ } ++ ++ /* ++ * NAME: regexp->match() ++ * DESCRIPTION: perform regexp matching, given a pattern and subject string ++ */ ++ array *rgx_match(pattern, subject, reverse) ++ value *pattern; ++ string *subject; ++ int reverse; ++ { ++ long sub_len; ++ struct re_pattern_buffer patbuf; ++ struct re_registers regs; ++ regoff_t starts[RGX_NREGS + 1], ends[RGX_NREGS + 1]; ++ array *result; ++ register value *v; ++ register int i; ++ ++ if (pattern[0].u.string->len != sizeof(struct re_pattern_buffer)) ++ error("Invalid compiled pattern"); ++ ++ memcpy((char *) &patbuf, pattern[0].u.string->text, ++ sizeof(struct re_pattern_buffer)); ++ ++ if (patbuf.allocated != (unsigned long) pattern[1].u.string->len || ++ pattern[2].u.string->len != 256) ++ error("Invalid compiled pattern"); ++ ++ patbuf.buffer = (unsigned char *) pattern[1].u.string->text; ++ patbuf.fastmap = pattern[2].u.string->text; ++ ++ regs.num_regs = RGX_NREGS; ++ regs.start = starts; ++ regs.end = ends; ++ patbuf.regs_allocated = REGS_FIXED; ++ ++ sub_len = subject->len; ++ if (re_search(&patbuf, subject->text, sub_len, reverse ? sub_len : 0, ++ reverse ? -(sub_len + 1) : sub_len + 1, ®s) == -1) ++ return (array *) 0; ++ ++ result = arr_new((long) RGX_NREGS * 2); ++ v = result->elts; ++ ++ v->type = T_INT; ++ v->u.number = starts[0]; ++ ++v; ++ ++ v->type = T_INT; ++ v->u.number = ends[0] - 1; ++ ++v; ++ ++ for (i = 1; i < RGX_NREGS; ++i, v += 2) ++ { ++ v[0].type = T_INT; ++ v[1].type = T_INT; ++ ++ if (starts[i] == -1) ++ { ++ v[0].u.number = 0; ++ v[1].u.number = -1; ++ } ++ else ++ { ++ v[0].u.number = starts[i]; ++ v[1].u.number = ends[i] - 1; ++ } ++ } ++ ++ return result; ++ } +*** src.rgx/rgx.h Thu Jan 2 21:42:05 1997 +--- rgx.h Fri Feb 3 03:09:54 1995 +*************** +*** 0 **** +--- 1,5 ---- ++ # define RGX_NREGS 10 ++ ++ extern void rgx_init P((void)); ++ extern array *rgx_new P((string*, int)); ++ extern array *rgx_match P((value*, string*, int)); +*** doc.rgx/example.c Thu Jan 1 00:00:00 1970 +--- ../doc/rgx_example.c Fri Feb 3 03:30:01 1995 +*************** +*** 0 **** +--- 1,49 ---- ++ /* ++ * This file shows how an interface can be built to cache regexp patterns ++ * and ultimately provide a more streamlined interface to the regexp kfuns. ++ * ++ * Note that since regexp_match() severely depends on the return result from ++ * regexp_compile() being unaltered, it is a good idea to provide an ++ * interface like this, and also to mask the regexp_match() kfun from the ++ * auto object. ++ */ ++ ++ # define CACHE_SIZE 10 ++ ++ private mapping cache; ++ private string *list; ++ private string last_pattern; ++ ++ static ++ void create(void) ++ { ++ cache = ([ ]); ++ list = ({ }); ++ } ++ ++ int *match(string subject, string pattern) ++ { ++ string *buffer; ++ ++ if ((buffer = cache[pattern]) == 0) ++ { ++ buffer = regexp_compile(pattern); ++ ++ if (sizeof(list) >= CACHE_SIZE) ++ { ++ cache[list[0]] = 0; ++ list = list[1 ..] + ({ pattern }); ++ } ++ else ++ list += ({ pattern }); ++ ++ cache[pattern] = buffer; ++ } ++ else if (pattern != last_pattern) ++ { ++ list = list - ({ pattern }) + ({ pattern }); ++ last_pattern = pattern; ++ } ++ ++ return regexp_match(buffer, subject); ++ } +diff -crN doc.rgx/kfun/regexp_compile doc/kfun/regexp_compile +*** doc.rgx/kfun/regexp_compile Thu Jan 1 00:00:00 1970 +--- ../doc/kfun/regexp_compile Tue Jul 26 00:02:34 1994 +*************** +*** 0 **** +--- 1,27 ---- ++ NAME ++ regexp_compile - compile a regular expression ++ ++ SYNOPSIS ++ varargs string *regexp_compile(string pattern, int case_insensitive) ++ ++ DESCRIPTION ++ The argument pattern is compiled as a regular expression. If the ++ argument case_insensitive is nonzero, the pattern is compiled in ++ such a way that subsequent matching will be done without case ++ sensitivity. The default is to be case-sensitive. ++ ++ An array of strings is returned; these strings contain binary ++ data and must not be altered in any way before being passed to ++ regexp_match(). ++ ++ The compiled regexp can be saved and used any number of times with ++ regexp_match(). ++ ++ ERRORS ++ If the argument pattern contains a syntactically malformed regular ++ expression, an error will result. An error can also occur if the ++ pattern is too complicated, or if there is not enough memory to ++ compile the pattern. ++ ++ SEE ALSO ++ kfun/regexp_match +*** doc.rgx/kfun/regexp_match Thu Jan 1 00:00:00 1970 +--- ../doc/kfun/regexp_match Mon Jul 25 22:19:42 1994 +*************** +*** 0 **** +--- 1,34 ---- ++ NAME ++ regexp_match - perform regular expression matching ++ ++ SYNOPSIS ++ varargs int *regexp_match(string *pattern, string subject, int reverse) ++ ++ DESCRIPTION ++ The argument subject is matched against the compiled regular ++ expression pattern. If the argument reverse is nonzero, matching ++ is performed from right-to-left; otherwise, matching is performed ++ left-to-right. ++ ++ The pattern argument must be an array of strings exactly as it ++ was received from regexp_compile(); otherwise, the result of ++ calling this function is undefined. ++ ++ If the argument subject could not be matched with the regular ++ expression, 0 is returned. Otherwise, an array of 20 integers ++ is returned with this format: ++ ++ ({ start0, end0, start1, end1, ..., start9, end9 }) ++ ++ Each element is a character index into the subject string. The ++ first two elements, start0 and end0, indicate the part of the subject ++ that was matched by the regular expression as a whole. The following ++ elements indicate the starting and ending indices of each ++ subexpression (denoted by "%(" and "%)" pairs in the original ++ pattern) that were matched. ++ ++ If any subexpression was not matched, the corresponding start and ++ end elements will be 0 and -1, respectively. ++ ++ SEE ALSO ++ kfun/regexp_compile +*** doc.rgx/regexps Thu Jan 1 00:00:00 1970 +--- ../doc/regexps Mon Jul 25 22:58:57 1994 +*************** +*** 0 **** +--- 1,32 ---- ++ ++ Regular expressions are composed of the following operators: ++ ++ . Match any single character ++ XY Match X immediately followed by Y ++ X* Match zero-or-more of X ++ X+ Match one-or-more of X ++ X? Match zero-or-one of X ++ X%|Y Match either X or Y ++ [charset] Match any single character in `charset' ++ [^charset] Match any single character not in `charset' ++ %(X%) Match X, but also remember the match as a subexpression ++ %digit Match the numbered previous subexpression ++ ^X Match X anchored at the beginning of a line ++ X$ Match X anchored at the end of a line ++ %b Match the empty string at the beginning or end of a word ++ %B Match the empty string only within the middle of a word ++ %< Match the beginning of a word ++ %> Match the end of a word ++ %w Match any word-constituent character ++ %W Match any character that is not word-constituent ++ ++ Any other character in a regular expression is matched literally with itself. ++ To match any of the special operator characters .*+?%[^$ literally, precede ++ the character with `%'. ++ ++ A `charset' is formed by listing all desired characters with brackets. To ++ include a literal `^' in a charset, do not list it in the first position. To ++ include a literal `]', list it immediately after the opening `[' or `[^'. All ++ characters are non-special (and should not be escaped) within a charset, ++ except `-', which denotes a character range. To include a literal `-', list it ++ either first or last. +*** README.rgx.old Fri Jan 3 03:17:21 1997 +--- ../README.rgx Fri Jan 3 03:14:29 1997 +*************** +*** 0 **** +--- 1,18 ---- ++ dgd-rgx was written by Robert Leslie <rob@ccs.neu.edu> as an LPC interface to ++ GNU regex, adding two kfuns to DGD for regular expression matching: ++ ++ regexp_compile() ++ regexp_match() ++ ++ For a description of the regular expression language accepted by these kfuns, ++ please read doc/regexps. ++ ++ Complete details for the two kfuns can be found in the doc/kfun directory. ++ ++ Adapted by Adam David <adam@veda.is> for DGD 1.0.97 and to use the unmodified ++ GNU regexp library. ++ ++ This software is a modification of DGD, and is therefore protected by the ++ DGD Copyright. ++ ++ There is no warranty for this software. diff --git a/net/dgd-net/pkg-plist b/net/dgd-net/pkg-plist index 1dc4ed4f6d24..8ab2e0422b86 100644 --- a/net/dgd-net/pkg-plist +++ b/net/dgd-net/pkg-plist @@ -3,6 +3,7 @@ dgd/Copyright.NET dgd/Credits dgd/README dgd/README.FreeBSD +dgd/README.rgx dgd/README.sites dgd/bin/driver.net dgd/bin/precomp.net @@ -12,6 +13,8 @@ dgd/doc/LPC.html dgd/doc/Patching dgd/doc/Platforms dgd/doc/editor +dgd/doc/regexps +dgd/doc/rgx_example.c dgd/doc/kfun/allocate dgd/doc/kfun/call_other dgd/doc/kfun/call_out @@ -49,6 +52,8 @@ dgd/doc/kfun/query_editor dgd/doc/kfun/query_ip_number dgd/doc/kfun/random dgd/doc/kfun/read_file +dgd/doc/kfun/regexp_compile +dgd/doc/kfun/regexp_match dgd/doc/kfun/remove_call_out dgd/doc/kfun/remove_dir dgd/doc/kfun/remove_file |