diff options
-rw-r--r-- | mail/spamprobe/Makefile | 12 | ||||
-rw-r--r-- | mail/spamprobe/distinfo | 2 | ||||
-rw-r--r-- | mail/spamprobe/files/Makefile | 10 | ||||
-rw-r--r-- | mail/spamprobe/files/Makefile.export0_6 | 10 | ||||
-rw-r--r-- | mail/spamprobe/files/post-install-notes | 15 | ||||
-rw-r--r-- | mail/spamprobe/files/spamprobe.1 | 68 | ||||
-rw-r--r-- | mail/spamprobe/pkg-plist | 1 |
7 files changed, 107 insertions, 11 deletions
diff --git a/mail/spamprobe/Makefile b/mail/spamprobe/Makefile index 1e2d4f632fdc..78d55d9084db 100644 --- a/mail/spamprobe/Makefile +++ b/mail/spamprobe/Makefile @@ -6,18 +6,28 @@ # PORTNAME= spamprobe -PORTVERSION= 0.6 +PORTVERSION= 0.7a CATEGORIES= mail MASTER_SITES= ${MASTER_SITE_SOURCEFORGE} MASTER_SITE_SUBDIR=${PORTNAME} +LIB_DEPENDS= db3.3:${PORTSDIR}/databases/db3 + MAINTAINER= mdodd@freebsd.org MAKEFILE= ${FILESDIR}/Makefile .include <bsd.port.pre.mk> +post-build: + @cd ${WRKSRC} && ${MAKE} -f ${FILESDIR}/Makefile.export0_6 clean + @cd ${WRKSRC} && ${MAKE} -f ${FILESDIR}/Makefile.export0_6 + post-extract: @${CP} ${FILESDIR}/spamprobe.1 ${WRKSRC}/ +post-install: + @cd ${WRKSRC} && ${MAKE} -f ${FILESDIR}/Makefile.export0_6 install + @${CAT} ${FILESDIR}/post-install-notes + .include <bsd.port.post.mk> diff --git a/mail/spamprobe/distinfo b/mail/spamprobe/distinfo index f8e6d3ffc6d4..eb8912579de7 100644 --- a/mail/spamprobe/distinfo +++ b/mail/spamprobe/distinfo @@ -1 +1 @@ -MD5 (spamprobe-0.6.tar.gz) = d277ec6ab4fc2501db99a2e1cc6cc2e8 +MD5 (spamprobe-0.7a.tar.gz) = dcd2a409391c4842adaa84442019ccc1 diff --git a/mail/spamprobe/files/Makefile b/mail/spamprobe/files/Makefile index 08eff50c9d64..17359d61f663 100644 --- a/mail/spamprobe/files/Makefile +++ b/mail/spamprobe/files/Makefile @@ -4,8 +4,12 @@ PREFIX?= /usr/local BINDIR= ${PREFIX}/bin MANDIR= ${PREFIX}/man/man PROG_CXX= spamprobe -SRCS= File.cc FrequencyDB.cc LockFile.cc Message.cc \ - MessageFactory.cc MimeHeader.cc MimeLineReader.cc \ - MimeMessageReader.cc SpamFilter.cc spamprobe.cc util.cc +CXXFLAGS= -Wall -DUSE_DB -I${PREFIX}/include/db3 +LDFLAGS= -L${PREFIX}/lib -ldb3 +SRCS= File.cc FrequencyDB.cc FrequencyDBImpl_bdb.cc \ + FrequencyDBImpl_cache.cc FrequencyDBImpl_dbm.cc LockFile.cc \ + Message.cc MessageFactory.cc MimeHeader.cc MimeLineReader.cc \ + MimeMessageReader.cc SpamFilter.cc Tokenizer.cc \ + spamprobe.cc util.cc .include <bsd.prog.mk> diff --git a/mail/spamprobe/files/Makefile.export0_6 b/mail/spamprobe/files/Makefile.export0_6 new file mode 100644 index 000000000000..3e65e3a48633 --- /dev/null +++ b/mail/spamprobe/files/Makefile.export0_6 @@ -0,0 +1,10 @@ +# $FreeBSD: /tmp/pcvs/ports/mail/spamprobe/files/Attic/Makefile.export0_6,v 1.1 2002-09-16 19:36:22 mdodd Exp $ +# +PREFIX?= /usr/local +BINDIR= ${PREFIX}/bin +NOMAN= +PROG_CXX= spamprobe-export_0.6 +CXXFLAGS= -Wall -DUSE_DBM +SRCS= File.cc export0_6.cc + +.include <bsd.prog.mk> diff --git a/mail/spamprobe/files/post-install-notes b/mail/spamprobe/files/post-install-notes new file mode 100644 index 000000000000..c115a23a4bc5 --- /dev/null +++ b/mail/spamprobe/files/post-install-notes @@ -0,0 +1,15 @@ + + + *** WARNING *** + +If you are using a version of SpamProbe earlier than 0.7 the database +format has changed! You will need to rebuild your database. + +You may convert your database using the following command: + + spamprobe-export_0.6 | spamprobe import + + *** WARNING *** + + + diff --git a/mail/spamprobe/files/spamprobe.1 b/mail/spamprobe/files/spamprobe.1 index 775a210cdaf5..18a1884d41d7 100644 --- a/mail/spamprobe/files/spamprobe.1 +++ b/mail/spamprobe/files/spamprobe.1 @@ -19,7 +19,10 @@ .Op Fl m .Op Fl n Ar number .Op Fl r Ar number +.Op Fl s Ar number .Op Fl v +.Op Fl V +.Op Fl Y .Op Fl 7 .Op Fl 8 .Ar command Op ... @@ -37,6 +40,12 @@ .Ar spam Op filename ... .Nm .Ar remove Op filename ... +.Nm +.Ar dump +.Nm +.Ar export +.Nm +.Ar import Op filename ... .Sh DESCRIPTION Welcome to .Nm SpamProbe ! @@ -84,7 +93,9 @@ multiple emails arrive simultaneously. .It Scores only the Received, Subject, To, From, and Cc headers. All other headers are ignored to make it hard for spammers to hide non-spammy words -in X- headers to fool the filter. +in X- headers to fool the filter. The +.Fl H +command line option can be used to override this. .El .Ss OPTIONS .Bl -tag -width ".Fl d Ar directory" @@ -152,11 +163,29 @@ repeats reduces the number of words overall (since a single word occupies more than one slot) but allows words which occur frequently in the message to have a higher weight. Generally this is changed only for optimization purposes. +.It Fl s Ar number +.Nm +maintains an in memory cache of the words it has seen in previous messages +to reduce disk i/o and improve performance. By default the cache is +flushed and cleared every 250 messages. This number can be changed using +the +.Fl s +option. A value of zero causes +.NM +to use 100,000 as the limit which effectively means that the cache will +only be flushed at program exit (unless you have really enormous mailbox +files). The cache doesn't affect receive, dump, or export but has a +significant impact on the others. .It Fl v Write debugging information to stderr. This can be useful for debugging or for seeing which terms .Nm used to score each email. +.It Fl V +Prints version and copyright information and then exits. +.It Fl Y +Assume traditional Berkeley mailbox format, ignoring any Content-Length: +fields. .It Fl 7 Ignore any characters with the most significant bit set to 1 instead of mapping them to the letter 'z'. @@ -175,9 +204,11 @@ scored the message is classified as either spam or non-spam and its word counts are written to the appropriate database. The message's score is written to stdout along with a single word. For example: .Pp - SPAM 0.99 - or - GOOD 0.02 +.Dl "SPAM 0.99" +.Pp +or +.Pp +.Dl "GOOD 0.02" .It Ar score Op filename ... Similar to receive except that the databases are not modified in any way and only the score is printed to stdout. @@ -201,6 +232,28 @@ ignored. Scans each file (or stdin if no file is specified) and removes its term counts from the database. Messages which are not in the database (recognized using their message ids) are ignored. +.It Ar dump +Prints the contents of the word counts database one word per line in human +readable format with good count, spam count, and word in columns separated +by whitespace. Note that when using GDBM for the database the words are +printed in the order they are hashed so the results will need to be sorted +to be most useful. The standard unix sort command can do this. For +example to list all words from "most good" to "least good" use this +command: +.Pp +.Dl "spamprobe dump | sort -k 1 -n -r" +.Pp +To list all words from "most spammy" to "least spammy" use this command: +.Pp +.Dl "spamprobe dump | sort -k 2 -n -r" +.It Ar export +Similar to the dump command but prints the counts and words in a comma +separated format with the words surrounded by double quotes. This can be +more useful for importing into some databases. +.It Ar import Op filename ... +Reads the specified files which must contain export data written by the +export command. The terms and counts from this file are added to the +database. This can be used to convert a database from a prior version. .El .Sh ENVIRONMENT The @@ -244,9 +297,12 @@ Exit status is 0 on success, and 1 if .Nm encounters an invalid command. .Sh COMPATIBILITY -The +Version of .Nm -command has no known compatibility issues. +previous to 0.7 use a different database format. To convert your existing +database to the new format use the following command. +.Pp +.Dl "spamprobe-export_0.6 | spamprobe import" .Sh SEE ALSO .Xr formail 1 , .Xr procmail 1 , diff --git a/mail/spamprobe/pkg-plist b/mail/spamprobe/pkg-plist index 39a83231119f..cd436410e120 100644 --- a/mail/spamprobe/pkg-plist +++ b/mail/spamprobe/pkg-plist @@ -1,2 +1,3 @@ bin/spamprobe +bin/spamprobe-export_0.6 man/man1/spamprobe.1.gz |