aboutsummaryrefslogtreecommitdiffstats
path: root/japanese
diff options
context:
space:
mode:
authorscheidell <scheidell@FreeBSD.org>2012-04-08 00:34:45 +0800
committerscheidell <scheidell@FreeBSD.org>2012-04-08 00:34:45 +0800
commitfd3d6a12a18acacd47af4a7edfd128b59d64c1c3 (patch)
treee2f9f8507039d3418d11d7f3c7129d60c5bc66c6 /japanese
parent6d9f79c3d5e2dd505409340833bea00e0ef30bc9 (diff)
downloadfreebsd-ports-gnome-fd3d6a12a18acacd47af4a7edfd128b59d64c1c3.tar.gz
freebsd-ports-gnome-fd3d6a12a18acacd47af4a7edfd128b59d64c1c3.tar.zst
freebsd-ports-gnome-fd3d6a12a18acacd47af4a7edfd128b59d64c1c3.zip
- Modern revamping japanese/p5-Mail-SpamAssassin to work as a slave port [1]
- Bump PORTREVISION to follow mail/p5-Mail-SpamAssassin [1] - tweak files/*.plist to check for and optionally save tokenizer.pre [2] PR: ports/165765 [1] Submitted by: Masaki TAGAWA <masaki@club.kyutech.ac.jp> (maintainer) Reviewed by: scheidell (me) [2] Feature safe: yes
Diffstat (limited to 'japanese')
-rw-r--r--japanese/p5-Mail-SpamAssassin/Makefile340
-rw-r--r--japanese/p5-Mail-SpamAssassin/distinfo8
-rw-r--r--japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf15
-rw-r--r--japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw27
-rw-r--r--japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in43
-rw-r--r--japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch1148
-rw-r--r--japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist7
-rw-r--r--japanese/p5-Mail-SpamAssassin/files/tokenizer.pre8
-rw-r--r--japanese/p5-Mail-SpamAssassin/pkg-deinstall17
-rw-r--r--japanese/p5-Mail-SpamAssassin/pkg-descr14
-rw-r--r--japanese/p5-Mail-SpamAssassin/pkg-install66
-rw-r--r--japanese/p5-Mail-SpamAssassin/pkg-message4
-rw-r--r--japanese/p5-Mail-SpamAssassin/pkg-plist161
13 files changed, 1187 insertions, 671 deletions
diff --git a/japanese/p5-Mail-SpamAssassin/Makefile b/japanese/p5-Mail-SpamAssassin/Makefile
index bad670021dac..45575daa95cb 100644
--- a/japanese/p5-Mail-SpamAssassin/Makefile
+++ b/japanese/p5-Mail-SpamAssassin/Makefile
@@ -5,339 +5,43 @@
# $FreeBSD$
#
-PORTNAME= Mail-SpamAssassin
-PORTVERSION= 3.3.2
-PORTREVISION= 1
+PORTREVISION= 2
CATEGORIES= japanese mail perl5
-MASTER_SITES= ${MASTER_SITE_APACHE:S/$/:apache/} ${MASTER_SITE_PERL_CPAN:S/$/:cpan/} \
- ${PATCH_SITES}
-MASTER_SITE_SUBDIR= spamassassin/source/:apache Mail/:cpan
PKGNAMEPREFIX= ja-p5-
-DISTFILES= ${DISTNAME}${EXTRACT_SUFX}:apache,cpan \
- ${TOKENIZER_PRE}:JA ${DOCJA}:JA
-DIST_SUBDIR= ja-spamassassin
-EXTRACT_ONLY= ${DISTNAME}${EXTRACT_SUFX}
-
-PATCH_SITES= http://spamassassin.emaillab.jp/pub/ja-patch/sa${PORTVERSION:R}/:JA
-PATCHFILES= spamassassin-${PORTVERSION}-ja-${PATCHLEVEL}.patch:JA
-PATCHLEVEL= 1
-PATCH_DIST_STRIP= -p1
MAINTAINER= masaki@club.kyutech.ac.jp
-COMMENT= SpamAssassin with Japanese tokenizer
-
-RUN_DEPENDS= p5-NetAddr-IP>=4.00.7:${PORTSDIR}/net-mgmt/p5-NetAddr-IP \
- p5-Net-DNS>=0.63:${PORTSDIR}/dns/p5-Net-DNS \
- p5-HTML-Parser>=3.46:${PORTSDIR}/www/p5-HTML-Parser \
- p5-libwww>=0:${PORTSDIR}/www/p5-libwww \
- p5-Encode-Detect>=0:${PORTSDIR}/converters/p5-Encode-Detect \
- p5-Mail-Tools>=0:${PORTSDIR}/mail/p5-Mail-Tools \
- ja-p5-MeCab>=0.98:${PORTSDIR}/japanese/p5-MeCab
-BUILD_DEPENDS= p5-NetAddr-IP>=4.00.7:${PORTSDIR}/net-mgmt/p5-NetAddr-IP \
- p5-Net-DNS>=0.63:${PORTSDIR}/dns/p5-Net-DNS \
- p5-HTML-Parser>=3.46:${PORTSDIR}/www/p5-HTML-Parser \
- p5-libwww>=0:${PORTSDIR}/www/p5-libwww \
- p5-Encode-Detect>=0:${PORTSDIR}/converters/p5-Encode-Detect \
- p5-Mail-Tools>=0:${PORTSDIR}/mail/p5-Mail-Tools
-
-CONFLICTS= p5-Mail-SpamAssassin-[0-9]*
-
-PERL_CONFIGURE= yes
-USE_PERL5_RUN= 5.8.8+
-USE_LDCONFIG= yes
-CONFIGURE_ARGS= SYSCONFDIR="${PREFIX}/etc" \
- CONTACT_ADDRESS="${CONTACT_ADDRESS}" \
- LOCALSTATEDIR="/var/db/spamassassin"
-
-USERS= spamd
-GROUPS= spamd
-
-# You can override it if you like
-CONTACT_ADDRESS?= The administrator of that system
-
-OPTIONS= AS_ROOT "Run spamd as root (recommended)" on \
- SPAMC "Build spamd/spamc (not for amavisd)" on \
- SACOMPILE "sa-compile" off \
- DKIM "DKIM/DomainKeys Identified Mail" on \
- SSL "Build with SSL support for spamd/spamc" on \
- GNUPG "Install GnuPG (for sa-update)" on \
- MYSQL "Add MySQL support" off \
- PGSQL "Add PostreSQL support" off \
- RAZOR "Add Vipul's Razor support" on \
- SPF_QUERY "Add SPF query support" off \
- RELAY_COUNTRY "Relay country support" off \
- DCC "Add DCC support (see LICENSE)" off
-
-.if !defined(WITHOUT_SSL)
-USE_OPENSSL= yes
-.endif
-
-.include <bsd.port.pre.mk>
-
-.if ${PERL_LEVEL} < 500903
-RUN_DEPENDS+= p5-IO-Compress>=2.017:${PORTSDIR}/archivers/p5-IO-Compress
-.endif
-
-.if ${PERL_LEVEL} < 501000
-RUN_DEPENDS+= p5-Archive-Tar>=1.23:${PORTSDIR}/archivers/p5-Archive-Tar \
- p5-IO-Zlib>=1.04:${PORTSDIR}/archivers/p5-IO-Zlib \
- p5-Test-Harness>=3.16:${PORTSDIR}/devel/p5-Test-Harness
-.endif
-
-.if defined (WITH_SPAMC)
-CONFIGURE_ARGS+= BUILD_SPAMC=yes
-.else
-CONFIGURE_ARGS+= BUILD_SPAMC=no
-WITH_AS_ROOT=
-WITHOUT_SSL=1
-.endif
-
-.if defined(WITH_SPF_QUERY)
-RUN_DEPENDS+= ${SITE_PERL}/Mail/SPF.pm:${PORTSDIR}/mail/p5-Mail-SPF
-.endif
-.if !defined(WITHOUT_IPV6)
-RUN_DEPENDS+= ${SITE_PERL}/IO/Socket/INET6.pm:${PORTSDIR}/net/p5-IO-Socket-INET6
-.endif
-
-.if !defined(WITHOUT_SSL)
-.include "${PORTSDIR}/Mk/bsd.openssl.mk"
-RUN_DEPENDS+= ${SITE_PERL}/IO/Socket/SSL.pm:${PORTSDIR}/security/p5-IO-Socket-SSL
-CFLAGS+= -I${OPENSSLINC}
-LDFLAGS+= -L${OPENSSLLIB}
-CONFIGURE_ARGS+= ENABLE_SSL=yes
-PLIST_SUB+= SSL=""
-.else
-CONFIGURE_ARGS+= ENABLE_SSL=no
-PLIST_SUB+= SSL="@comment "
-.endif
-
-.if !defined(WITHOUT_GNUPG)
-RUN_DEPENDS+= gnupg>=1.4.7:${PORTSDIR}/security/gnupg
-.endif
+COMMENT= SpamAssassin with paches to handle multibyte character
-.if defined(WITH_MYSQL)
-RUN_DEPENDS+= ${SITE_PERL}/${PERL_ARCH}/DBD/mysql.pm:${PORTSDIR}/databases/p5-DBD-mysql
-.endif
+LICENSE= AL2
-.if defined(WITH_PGSQL)
-RUN_DEPENDS+= ${SITE_PERL}/${PERL_ARCH}/DBD/Pg.pm:${PORTSDIR}/databases/p5-DBD-Pg
-.endif
+MASTERDIR= ${.CURDIR}/../../mail/p5-Mail-SpamAssassin
-.if defined(WITH_RAZOR)
-RUN_DEPENDS+= razor-agents>=2.84:${PORTSDIR}/mail/razor-agents
-.else
-.if ${PERL_LEVEL} < 501000
-.if !defined(WITH_DKIM)
-RUN_DEPENDS+= p5-Digest-SHA1>=2.11:${PORTSDIR}/security/p5-Digest-SHA1
-.endif
-.endif
-.endif
+RUN_DEPENDS+= ja-p5-MeCab>=0.98:${PORTSDIR}/japanese/p5-MeCab
-.if defined(WITH_DKIM)
-RUN_DEPENDS+= ${SITE_PERL}/IO/Socket/SSL.pm:${PORTSDIR}/security/p5-IO-Socket-SSL
-. if ${PERL_LEVEL} < 501000
-RUN_DEPENDS+= ${SITE_PERL}/${PERL_ARCH}/Digest/SHA.pm:${PORTSDIR}/security/p5-Digest-SHA
-. endif
-RUN_DEPENDS+= p5-Mail-DKIM>=0.37:${PORTSDIR}/mail/p5-Mail-DKIM
-RUN_DEPENDS+= p5-Crypt-OpenSSL-RSA>=0.26_1:${PORTSDIR}/security/p5-Crypt-OpenSSL-RSA
-.endif
-
-.if defined(WITH_SACOMPILE)
-RUN_DEPENDS+= re2c>=.12.0:${PORTSDIR}/devel/re2c
-.endif
-
-.if defined(WITH_RELAY_COUNTRY)
-RUN_DEPENDS+= ${SITE_PERL}/IP/Country/Fast.pm:${PORTSDIR}/net/p5-IP-Country
-.endif
-
-.if defined(WITH_DCC)
-RUN_DEPENDS+= dcc-dccd>=1.3.111:${PORTSDIR}/mail/dcc-dccd
-.endif
+CONFLICTS= p5-Mail-SpamAssassin-[0-9]*
-MAN3= Mail::SpamAssassin.3 \
- Mail::SpamAssassin::AICache.3 \
- Mail::SpamAssassin::ArchiveIterator.3 \
- Mail::SpamAssassin::AsyncLoop.3 \
- Mail::SpamAssassin::AutoWhitelist.3 \
- Mail::SpamAssassin::Bayes.3 \
- Mail::SpamAssassin::BayesStore.3 \
- Mail::SpamAssassin::BayesStore::BDB.3 \
- Mail::SpamAssassin::BayesStore::MySQL.3 \
- Mail::SpamAssassin::BayesStore::PgSQL.3 \
- Mail::SpamAssassin::BayesStore::SQL.3 \
- Mail::SpamAssassin::Client.3 \
- Mail::SpamAssassin::Conf.3 \
- Mail::SpamAssassin::Conf::LDAP.3 \
- Mail::SpamAssassin::Conf::Parser.3 \
- Mail::SpamAssassin::Conf::SQL.3 \
- Mail::SpamAssassin::DnsResolver.3 \
- Mail::SpamAssassin::Logger.3 \
- Mail::SpamAssassin::Logger::File.3 \
- Mail::SpamAssassin::Logger::Stderr.3 \
- Mail::SpamAssassin::Logger::Syslog.3 \
- Mail::SpamAssassin::Message.3 \
- Mail::SpamAssassin::Message::Metadata.3 \
- Mail::SpamAssassin::Message::Node.3 \
- Mail::SpamAssassin::PerMsgLearner.3 \
- Mail::SpamAssassin::PerMsgStatus.3 \
- Mail::SpamAssassin::PersistentAddrList.3 \
- Mail::SpamAssassin::Plugin.3 \
- Mail::SpamAssassin::Plugin::ASN.3 \
- Mail::SpamAssassin::Plugin::AWL.3 \
- Mail::SpamAssassin::Plugin::AccessDB.3 \
- Mail::SpamAssassin::Plugin::AntiVirus.3 \
- Mail::SpamAssassin::Plugin::AutoLearnThreshold.3 \
- Mail::SpamAssassin::Plugin::Bayes.3 \
- Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor.3 \
- Mail::SpamAssassin::Plugin::Check.3 \
- Mail::SpamAssassin::Plugin::DCC.3 \
- Mail::SpamAssassin::Plugin::DKIM.3 \
- Mail::SpamAssassin::Plugin::Hashcash.3 \
- Mail::SpamAssassin::Plugin::MIMEHeader.3 \
- Mail::SpamAssassin::Plugin::OneLineBodyRuleType.3 \
- Mail::SpamAssassin::Plugin::PhishTag.3 \
- Mail::SpamAssassin::Plugin::Pyzor.3 \
- Mail::SpamAssassin::Plugin::Razor2.3 \
- Mail::SpamAssassin::Plugin::RelayCountry.3 \
- Mail::SpamAssassin::Plugin::ReplaceTags.3 \
- Mail::SpamAssassin::Plugin::Reuse.3 \
- Mail::SpamAssassin::Plugin::Rule2XSBody.3 \
- Mail::SpamAssassin::Plugin::SPF.3 \
- Mail::SpamAssassin::Plugin::Shortcircuit.3 \
- Mail::SpamAssassin::Plugin::SpamCop.3 \
- Mail::SpamAssassin::Plugin::Test.3 \
- Mail::SpamAssassin::Plugin::TextCat.3 \
- Mail::SpamAssassin::Plugin::Tokenizer.3 \
- Mail::SpamAssassin::Plugin::Tokenizer::MeCab.3 \
- Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA.3 \
- Mail::SpamAssassin::Plugin::URIDNSBL.3 \
- Mail::SpamAssassin::Plugin::URIDetail.3 \
- Mail::SpamAssassin::Plugin::VBounce.3 \
- Mail::SpamAssassin::Plugin::WhiteListSubject.3 \
- Mail::SpamAssassin::PluginHandler.3 \
- Mail::SpamAssassin::SQLBasedAddrList.3 \
- Mail::SpamAssassin::SubProcBackChannel.3 \
- Mail::SpamAssassin::Timeout.3 \
- Mail::SpamAssassin::Util.3 \
- Mail::SpamAssassin::Util::Charset.3 \
- Mail::SpamAssassin::Util::DependencyInfo.3 \
- Mail::SpamAssassin::Util::Progress.3 \
- Mail::SpamAssassin::Util::RegistrarBoundaries.3 \
- spamassassin-run.3
+EXTRA_PATCHES= ${.CURDIR}/files/spamassassin-3.3.2-ja-1.patch
-MAN1= spamd.1 spamassassin.1 spamc.1 sa-learn.1 sa-update.1 \
- spamassassin-run.1 sa-compile.1 sa-awl.1
+PKGMESSAGE= ${.CURDIR}/pkg-message
+PLIST= ${WRKDIR}/pkg-plist
-DOCSDIR= ${PREFIX}/share/doc/${PKGNAMEPREFIX}${PORTNAME}
-DATADIR= ${PREFIX}/share/spamassassin
-DOCS= CREDITS Changes INSTALL LICENSE NOTICE PACKAGING README TRADEMARK UPGRADE USAGE procmailrc.example
-DOCSSQL= README README.awl README.bayes awl_mysql.sql awl_pg.sql bayes_mysql.sql bayes_pg.sql userpref_mysql.sql userpref_pg.sql
-DOCSLDAP= README README.testing sa_test.ldif
-PORTDOCS= ${DOCS} sql ldap ${DOCJA}
-DOCJA= ${PATCHFILES:S/.patch:JA/.txt/}
TOKENIZER_PRE= tokenizer.pre
-USE_RC_SUBR= sa-spamd.sh
-
-.if defined(WITH_MYSQL) || defined(WITH_PGSQL)
-SUB_LIST+= SQL_FLAG="-Q"
-.else
-SUB_LIST+= SQL_FLAG=""
-.endif
-.if !defined(WITH_AS_ROOT)
-SUB_LIST+= RUN_AS_USER="-u ${USERS} -H /var/spool/spamd"
-.else
-SUB_LIST+= RUN_AS_USER=""
-.endif
-
-post-patch:
- @${FIND} ${WRKSRC} -name \*.orig -delete
- @${REINPLACE_CMD} -e 's#B_CONFDIR)/local.cf#B_CONFDIR)/local.cf.sample#g' \
- -e 's#B_CONFDIR)/init.pre#B_CONFDIR)/init.pre.sample#g' \
- -e 's#B_CONFDIR)/v310.pre#B_CONFDIR)/v310.pre.sample#g' \
- -e 's#B_CONFDIR)/v312.pre#B_CONFDIR)/v312.pre.sample#g' \
- -e 's#B_CONFDIR)/v320.pre#B_CONFDIR)/v320.pre.sample#g' \
- -e 's#B_CONFDIR)/v330.pre#B_CONFDIR)/v330.pre.sample#g' \
- -e 's/require DBI/0/' \
- ${WRKSRC}/Makefile.PL
- @${REINPLACE_CMD} -e '/^CC =/d; \
- s|@SSLCFLAGS@|& $${CFLAGS}|g' ${WRKSRC}/spamc/Makefile.in
+PLIST_SUB+= TOKENIZER_PRE=${TOKENIZER_PRE}
-.if defined(WITH_RAZOR)
- ${REINPLACE_CMD} -e '/Razor2/s/^#loadplugin/loadplugin/' ${WRKSRC}/rules/v312.pre
-.endif
-.if defined(WITH_RELAY_COUNTRY)
- ${REINPLACE_CMD} -e '/RelayCountry/s/^# ?loadplugin/loadplugin/' ${WRKSRC}/rules/init.pre
-.endif
-.if !defined(WITH_DKIM)
- ${REINPLACE_CMD} -e '/DKIM/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/v312.pre
-.endif
-.if !defined(WITH_SPF_QUERY)
- ${REINPLACE_CMD} -e '/SPF/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/init.pre
-.endif
-.if defined(WITH_DCC)
- ${REINPLACE_CMD} -e '/DCC/s/^#loadplugin/loadplugin/' ${WRKSRC}/rules/v310.pre
-.endif
-.if !defined(WITH_AWL)
- ${REINPLACE_CMD} -e '/AWL/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/v310.pre
-.endif
-.if defined(WITH_SACOMPILE)
- ${REINPLACE_CMD} -e '/Rule2XSBody/s/^# loadplugin/loadplugin/' ${WRKSRC}/rules/v320.pre
-.endif
+MAN3= Mail::SpamAssassin::Util::Charset.3 \
+ Mail::SpamAssassin::Plugin::Tokenizer::MeCab.3 \
+ Mail::SpamAssassin::Plugin::Tokenizer.3 \
+ Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA.3
pre-install:
- @${MKDIR} ${DATADIR}
-
-post-build:
- @(cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libspamc.so)
-.if !defined(WITHOUT_SSL)
- @(cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libsslspamc.so)
-.endif
-
-pre-su-install:
- @USER=${USERS} GROUP=${GROUPS} ${SH} ${PKGINSTALL} ${PKGNAME} PRE-INSTALL
- @${INSTALL_PROGRAM} ${WRKSRC}/spamc/libspamc.so ${PREFIX}/lib/libspamc.so.0
- @${LN} -sf libspamc.so.0 ${PREFIX}/lib/libspamc.so
-.if !defined(WITHOUT_SSL)
- @${INSTALL_PROGRAM} ${WRKSRC}/spamc/libsslspamc.so ${PREFIX}/lib/libsslspamc.so.0
- @${LN} -sf libsslspamc.so.0 ${PREFIX}/lib/libsslspamc.so
-.endif
- @${INSTALL_DATA} ${WRKSRC}/spamc/libspamc.h ${PREFIX}/include
-
-post-install:
-.if defined (WITH_SPAMC)
- @${STRIP_CMD} ${PREFIX}/bin/spamc
-.endif
- @[ -f ${PREFIX}/etc/mail/spamassassin/init.pre ] || \
- ${CP} ${PREFIX}/etc/mail/spamassassin/init.pre.sample \
- ${PREFIX}/etc/mail/spamassassin/init.pre
- @[ -f ${PREFIX}/etc/mail/spamassassin/v310.pre ] || \
- ${CP} ${PREFIX}/etc/mail/spamassassin/v310.pre.sample \
- ${PREFIX}/etc/mail/spamassassin/v310.pre
- @[ -f ${PREFIX}/etc/mail/spamassassin/v312.pre ] || \
- ${CP} ${PREFIX}/etc/mail/spamassassin/v312.pre.sample \
- ${PREFIX}/etc/mail/spamassassin/v312.pre
- @[ -f ${PREFIX}/etc/mail/spamassassin/v320.pre ] || \
- ${CP} ${PREFIX}/etc/mail/spamassassin/v320.pre.sample \
- ${PREFIX}/etc/mail/spamassassin/v320.pre
- @PKG_PREFIX=${PREFIX} BATCH=${BATCH} SU_CMD="${SU_CMD}" USER=${USERS} GROUP=${GROUPS} ${SH} ${PKGDIR}/pkg-install ${PKGNAME} POST-INSTALL
- @[ -f ${PREFIX}/etc/mail/spamassassin/v330.pre ] || \
- ${CP} ${PREFIX}/etc/mail/spamassassin/v330.pre.sample \
- ${PREFIX}/etc/mail/spamassassin/v330.pre
-
- @${CP} ${DISTDIR}/${DIST_SUBDIR}/${TOKENIZER_PRE} ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE}.sample
- @[ -f ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE} ] || \
- ${CP} ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE}.sample \
- ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE}
-
-.if !defined(NOPORTDOCS)
- @${MKDIR} ${DOCSDIR} ${DOCSDIR}/sql ${DOCSDIR}/ldap
- @${INSTALL_DATA} ${DOCS:S|^|${WRKSRC}/|} ${DOCSDIR}
- @${INSTALL_DATA} ${DOCSSQL:S|^|${WRKSRC}/sql/|} ${DOCSDIR}/sql
- @${INSTALL_DATA} ${DOCSLDAP:S|^|${WRKSRC}/ldap/|} ${DOCSDIR}/ldap
- @${INSTALL_DATA} ${DISTDIR}/${DIST_SUBDIR}/${DOCJA} ${DOCSDIR}
+ @${CAT} ${EXTRA_PATCHES:S/.patch/.plist/} > ${PLIST}
+ @${CAT} ${PKGDIR}/pkg-plist >> ${PLIST}
-.endif
- @${SED} -e 's#PREFIX#${PREFIX}#' ${PKGMESSAGE}
+post-install::
+ @${CP} ${.CURDIR}/files/${TOKENIZER_PRE} ${ETCDIR}/${TOKENIZER_PRE}.sample
+ @[ -f ${ETCDIR}/${TOKENIZER_PRE} ] || \
+ ${INSTALL_DATA} ${ETCDIR}/${TOKENIZER_PRE}.sample \
+ ${ETCDIR}/${TOKENIZER_PRE}
-.include <bsd.port.post.mk>
+.include "${MASTERDIR}/Makefile"
diff --git a/japanese/p5-Mail-SpamAssassin/distinfo b/japanese/p5-Mail-SpamAssassin/distinfo
deleted file mode 100644
index b9842b70795a..000000000000
--- a/japanese/p5-Mail-SpamAssassin/distinfo
+++ /dev/null
@@ -1,8 +0,0 @@
-SHA256 (ja-spamassassin/Mail-SpamAssassin-3.3.2.tar.gz) = 5323038939a0ef9fc97d5264defce3ae1d95e98b3a94c4c3b583341c927f32df
-SIZE (ja-spamassassin/Mail-SpamAssassin-3.3.2.tar.gz) = 1208182
-SHA256 (ja-spamassassin/tokenizer.pre) = 9f8e30a8449fd13d571427ea30a252b4b275f153bde5345c50427a7aee3c90e0
-SIZE (ja-spamassassin/tokenizer.pre) = 163
-SHA256 (ja-spamassassin/spamassassin-3.3.2-ja-1.txt) = 6d818b246d2655abb260de83b4735c4f433808de7c19c4f905474c78c1ccbebd
-SIZE (ja-spamassassin/spamassassin-3.3.2-ja-1.txt) = 7246
-SHA256 (ja-spamassassin/spamassassin-3.3.2-ja-1.patch) = 073e9eaebf0dc2bf9e90f894c171a7654dad8444ed269528045e547302b7136a
-SIZE (ja-spamassassin/spamassassin-3.3.2-ja-1.patch) = 33740
diff --git a/japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf b/japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf
deleted file mode 100644
index 29979a03ee9f..000000000000
--- a/japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf
+++ /dev/null
@@ -1,15 +0,0 @@
---- rules/local.cf.orig 2008-01-06 06:11:39.000000000 +0900
-+++ rules/local.cf 2008-05-27 22:07:58.000000000 +0900
-@@ -51,3 +51,12 @@
- # bayes_ignore_header X-Spam-Flag
- # bayes_ignore_header X-Spam-Status
-
-+
-+# Normalize charset to UTF-8 (default:0)
-+#
-+# normalize_charset 1
-+#
-+# score FROM_EXCESS_BASE64 0
-+# score SUBJ_ILLEGAL_CHARS 0
-+# score MIME_BASE64_TEXT 1.0
-+# score TVD_SPACE_RATIO 0
diff --git a/japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw b/japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw
deleted file mode 100644
index 635cc317adc9..000000000000
--- a/japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw
+++ /dev/null
@@ -1,27 +0,0 @@
---- sa-learn.raw.orig Wed Aug 8 06:19:47 2007
-+++ sa-learn.raw Mon Aug 20 19:05:06 2007
-@@ -101,6 +101,7 @@ GetOptions(
- 'local|L' => \$opt{'local'},
- 'no-sync|nosync' => \$opt{'nosync'},
- 'showdots' => \$opt{'showdots'},
-+ 'quiet|q' => \$opt{'quiet'},
- 'progress' => \$opt{'progress'},
- 'use-ignores' => \$opt{'use-ignores'},
- 'no-rebuild|norebuild' => sub { $opt{'nosync'} = 1; warn "The --no-rebuild option has been deprecated. Please use --no-sync instead.\n" },
-@@ -433,7 +434,7 @@ eval {
- $progress->final() if ($opt{progress} && $progress);
-
- my $phrase = defined $forget ? "Forgot" : "Learned";
-- print "$phrase tokens from $learnedcount message(s) ($messagecount message(s) examined)\n";
-+ print "$phrase tokens from $learnedcount message(s) ($messagecount message(s) examined)\n" if (!$opt{quiet});
-
- # If we needed to make a tempfile, go delete it.
- if ( defined $tempfile ) {
-@@ -601,6 +602,7 @@ Options:
- (default: /etc/mail/spamassassin)
- --cf='config line' Additional line of configuration
- -D, --debug [area=n,...] Print debugging messages
-+ -q, --quiet Reduce amount of information printed out
- -V, --version Print version
- -h, --help Print usage message
-
diff --git a/japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in b/japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in
deleted file mode 100644
index 5398ea6fa7e7..000000000000
--- a/japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/sh
-#
-# $FreeBSD$
-#
-
-# PROVIDE: spamd
-# REQUIRE: LOGIN
-# BEFORE: mail
-# KEYWORD: shutdown
-
-#
-# Add the following lines to /etc/rc.conf to enable spamd:
-#
-#spamd_enable="YES"
-#
-# See spamd(8) for flags
-#
-
-. /etc/rc.subr
-
-name=spamd
-rcvar=spamd_enable
-
-extra_commands="reload"
-load_rc_config $name
-
-# Set defaults
-: ${spamd_enable:="NO"}
-: ${spamd_flags="-c %%SQL_FLAG%% %%RUN_AS_USER%%"}
-
-pidfile=${spamd_pidfile:-"/var/run/spamd/spamd.pid"}
-command=%%PREFIX%%/bin/spamd
-command_args="-d -r ${pidfile}"
-required_dirs=%%PREFIX%%/share/spamassassin
-
-stop_postcmd=stop_postcmd
-
-stop_postcmd()
-{
- rm -f $pidfile
-}
-
-run_rc_command "$1"
diff --git a/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch
new file mode 100644
index 000000000000..9ce06cfe2d9f
--- /dev/null
+++ b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch
@@ -0,0 +1,1148 @@
+diff -uNr lib/Mail/SpamAssassin/HTML.pm lib/Mail/SpamAssassin/HTML.pm
+--- lib/Mail/SpamAssassin/HTML.pm 2011-06-07 08:59:17.000000000 +0900
++++ lib/Mail/SpamAssassin/HTML.pm 2011-07-14 22:35:46.000000000 +0900
+@@ -84,7 +84,7 @@
+ $ok_attributes{span}{$_} = 1 for qw( style );
+
+ sub new {
+- my ($class) = @_;
++ my ($class, $opts) = @_;
+ my $self = $class->SUPER::new(
+ api_version => 3,
+ handlers => [
+@@ -97,6 +97,7 @@
+ declaration => ["html_declaration", "self,text"],
+ ],
+ marked_sections => 1);
++ $self->{normalize} = $opts->{'normalize'} || 0;
+
+ $self;
+ }
+@@ -672,7 +673,14 @@
+ }
+ }
+ else {
+- $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g;
++ if ($self->{normalize}) {
++ $text =~ s/\xc2\xa0/ /g; # no-break space
++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace
++ $text =~ s/[ \t\n\r\f\x0b]+/ /g;
++ }
++ else {
++ $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g;
++ }
+ # trim leading whitespace if previous element was whitespace
+ # and current element is not invisible
+ if (@{ $self->{text} } && !$display{invisible} &&
+diff -uNr lib/Mail/SpamAssassin/Message/Node.pm lib/Mail/SpamAssassin/Message/Node.pm
+--- lib/Mail/SpamAssassin/Message/Node.pm 2011-06-07 08:59:17.000000000 +0900
++++ lib/Mail/SpamAssassin/Message/Node.pm 2011-07-14 22:35:46.000000000 +0900
+@@ -42,6 +42,7 @@
+ use Mail::SpamAssassin::Constants qw(:sa);
+ use Mail::SpamAssassin::HTML;
+ use Mail::SpamAssassin::Logger;
++use Mail::SpamAssassin::Util::Charset;
+
+ =item new()
+
+@@ -387,27 +388,10 @@
+
+ sub _normalize {
+ my ($self, $data, $charset) = @_;
+- return $data unless $self->{normalize};
++ return wantarray ? ($data, $charset) : $data unless $self->{normalize};
+
+- my $detected = Encode::Detect::Detector::detect($data);
+-
+- my $converter;
+-
+- if ($charset && $charset !~ /^us-ascii$/i &&
+- ($detected || 'none') !~ /^(?:UTF|EUC|ISO-2022|Shift_JIS|Big5|GB)/i) {
+- dbg("message: Using labeled charset $charset");
+- $converter = Encode::find_encoding($charset);
+- }
+-
+- $converter = Encode::find_encoding($detected) unless $converter || !defined($detected);
+-
+- return $data unless $converter;
+-
+- dbg("message: Converting...");
+-
+- my $rv = $converter->decode($data, 0);
+- utf8::downgrade($rv, 1);
+- return $rv
++ my ($decoded_data, $detected_charset) = normalize_charset($data, $charset);
++ return wantarray ? ($decoded_data, $detected_charset) : $decoded_data;
+ }
+
+ =item rendered()
+@@ -430,8 +414,12 @@
+ # text/x-aol is ignored here, but looks like text/html ...
+ return(undef,undef) unless ( $self->{'type'} =~ /^text\/(?:plain|html)$/i );
+
+- my $text = $self->_normalize($self->decode(), $self->{charset});
++ my ($text, $charset) = $self->_normalize($self->decode(), $self->{charset});
+ my $raw = length($text);
++ if ($self->{normalize}) {
++ $self->{charset} = $charset;
++ $self->{language} = get_language($text, $charset);
++ }
+
+ # render text/html always, or any other text|text/plain part as text/html
+ # based on a heuristic which simulates a certain common mail client
+@@ -441,7 +429,7 @@
+ {
+ $self->{rendered_type} = 'text/html';
+
+- my $html = Mail::SpamAssassin::HTML->new(); # object
++ my $html = Mail::SpamAssassin::HTML->new({normalize=>$self->{normalize}}); # object
+ $html->parse($text); # parse+render text
+ $self->{rendered} = $html->get_rendered_text();
+ $self->{visible_rendered} = $html->get_rendered_text(invisible => 0);
+diff -uNr lib/Mail/SpamAssassin/Message.pm lib/Mail/SpamAssassin/Message.pm
+--- lib/Mail/SpamAssassin/Message.pm 2011-06-07 08:59:17.000000000 +0900
++++ lib/Mail/SpamAssassin/Message.pm 2011-07-14 22:35:46.000000000 +0900
+@@ -559,6 +559,8 @@
+ delete $self->{'pristine_headers'};
+ delete $self->{'line_ending'};
+ delete $self->{'missing_head_body_separator'};
++ delete $self->{'charset'};
++ delete $self->{'language'};
+
+ my @toclean = ( $self );
+
+@@ -585,6 +587,8 @@
+ delete $part->{'invisible_rendered'};
+ delete $part->{'type'};
+ delete $part->{'rendered_type'};
++ delete $self->{'charset'};
++ delete $self->{'language'};
+
+ # if there are children nodes, add them to the queue of nodes to clean up
+ if (exists $part->{'body_parts'}) {
+@@ -1014,7 +1018,14 @@
+
+ # whitespace handling (warning: small changes have large effects!)
+ $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed
+- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space
++ if ($self->{normalize}) {
++ $text =~ s/\xc2\xa0/ /g; # no-break space => space
++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space
++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space
++ }
++ else {
++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space
++ }
+ $text =~ tr/\f/\n/; # form feeds => newline
+
+ # warn "message: $text";
+@@ -1071,7 +1082,14 @@
+
+ # whitespace handling (warning: small changes have large effects!)
+ $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed
+- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space
++ if ($self->{normalize}) {
++ $text =~ s/\xc2\xa0/ /g; # no-break space => space
++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space
++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space
++ }
++ else {
++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space
++ }
+ $text =~ tr/\f/\n/; # form feeds => newline
+
+ my @textary = split_into_array_of_short_lines ($text);
+@@ -1122,7 +1140,14 @@
+
+ # whitespace handling (warning: small changes have large effects!)
+ $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed
+- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space
++ if ($self->{normalize}) {
++ $text =~ s/\xc2\xa0/ /g; # no-break space => space
++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space
++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space
++ }
++ else {
++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space
++ }
+ $text =~ tr/\f/\n/; # form feeds => newline
+
+ my @textary = split_into_array_of_short_lines ($text);
+@@ -1198,6 +1223,28 @@
+
+ # ---------------------------------------------------------------------------
+
++sub get_language {
++ my ($self) = @_;
++
++ if (defined $self->{language}) { return $self->{language}; }
++ my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1);
++ return '' unless @parts;
++
++ # Go through each part
++ my @langs;
++ for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
++ my $p = $parts[$pt];
++ my $lang = $p->{language};
++ next unless ($lang);
++ push(@langs, $lang) unless (grep(/^$lang$/, @langs))
++ }
++ $self->{language} = scalar(@langs) ? join(' ', @langs) : '';
++ return $self->{language};
++}
++
++# ---------------------------------------------------------------------------
++
++
+ 1;
+
+ =back
+diff -uNr lib/Mail/SpamAssassin/PerMsgStatus.pm lib/Mail/SpamAssassin/PerMsgStatus.pm
+--- lib/Mail/SpamAssassin/PerMsgStatus.pm 2011-06-07 08:59:17.000000000 +0900
++++ lib/Mail/SpamAssassin/PerMsgStatus.pm 2011-07-14 22:35:46.000000000 +0900
+@@ -53,6 +53,7 @@
+ use warnings;
+ use re 'taint';
+
++use Encode;
+ use Time::HiRes qw(time);
+
+ use Mail::SpamAssassin::Constants qw(:sa);
+@@ -733,19 +734,41 @@
+
+ # the report charset
+ my $report_charset = "; charset=iso-8859-1";
+- if ($self->{conf}->{report_charset}) {
+- $report_charset = "; charset=" . $self->{conf}->{report_charset};
+- }
+
+ # the SpamAssassin report
+ my $report = $self->get_report();
++ if ($self->{conf}->{report_charset}) {
++ $report_charset = "; charset=" . $self->{conf}->{report_charset};
++ }
+
+ # If there are any wide characters, need to MIME-encode in UTF-8
+ # TODO: If $report_charset is something other than iso-8859-1/us-ascii, then
+ # we could try converting to that charset if possible
+- unless ($] < 5.008 || utf8::downgrade($report, 1)) {
++ my $is_utf8 = 0;
++ if ($self->{conf}->{normalize_charset}) {
++ $report = Encode::decode_utf8($report);
++ $is_utf8 = 1;
++ }
++ else {
++ if ($self->{msg}->{charset}) {
++ eval {
++ my $scratch = $report;
++ $report = Encode::decode($self->{msg}->{charset},$scratch,Encode::FB_CROAK);
++ $is_utf8 = 1;
++ };
++ }
++ }
++ if ($is_utf8) {
++ $is_utf8 = 1;
++ eval {
++ my $scratch = $report;
++ $report = Encode::encode($self->{conf}->{report_charset},$scratch,Encode::FB_CROAK);
++ $is_utf8 = 0;
++ };
++ if ($is_utf8) {
++ $report = Encode::encode_utf8($report);
+ $report_charset = "; charset=utf-8";
+- utf8::encode($report);
++ }
+ }
+
+ # get original headers, "pristine" if we can do it
+diff -uNr lib/Mail/SpamAssassin/Plugin/Bayes.pm lib/Mail/SpamAssassin/Plugin/Bayes.pm
+--- lib/Mail/SpamAssassin/Plugin/Bayes.pm 2011-06-07 08:59:17.000000000 +0900
++++ lib/Mail/SpamAssassin/Plugin/Bayes.pm 2011-07-14 22:35:46.000000000 +0900
+@@ -223,6 +223,15 @@
+ # will require a longer token than English ones.)
+ use constant MAX_TOKEN_LENGTH => 15;
+
++# Skip if a token is too short.
++our $SKIP_UTF8_SHORT_TOKENS_RE = qr{(?:
++ [\x00-\x7F] # 1 byte
++ | [\xC0-\xDF][\x80-\xBF] # 2 bytes
++ | [\xE0-\xEF][\x80-\xBF]{2} # 3 bytes
++ | [\xF0-\xF7][\x80-\xBF]{3} # 4 bytes
++ | (?:\xE3[\x81-\x83][\x80-\xBF]){2} # 2 characters of Hiragana and Katakana
++)}x;
++
+ ###########################################################################
+
+ sub new {
+@@ -983,9 +992,28 @@
+ $msgdata->{bayes_token_body} = $msg->{msg}->get_visible_rendered_body_text_array();
+ $msgdata->{bayes_token_inviz} = $msg->{msg}->get_invisible_rendered_body_text_array();
+ @{$msgdata->{bayes_token_uris}} = $msg->get_uri_list();
++ if ($self->{conf}->{normalize_charset}) {
++ my $tokenizer = $self->get_tokenizer($msg);
++ if (ref($tokenizer)) {
++ $msgdata->{bayes_token_body} = $tokenizer->tokenize($msgdata->{bayes_token_body});
++ $msgdata->{bayes_token_inviz} = $tokenizer->tokenize($msgdata->{bayes_token_inviz});
++ }
++ }
+ return $msgdata;
+ }
+
++sub get_tokenizer {
++ my ($self, $msg) = @_;
++
++ my $tokenizer;
++ my @languages = split(/\s+/, $msg->{msg}->get_language());
++ foreach my $lang (@languages) {
++ $tokenizer = $self->{'conf'}->{'tokenizer'}->{$lang};
++ last if (ref($tokenizer));
++ }
++ return $tokenizer;
++}
++
+ ###########################################################################
+
+ # The calling functions expect a uniq'ed array of tokens ...
+@@ -1039,7 +1067,7 @@
+ # include quotes, .'s and -'s for URIs, and [$,]'s for Nigerian-scam strings,
+ # and ISO-8859-15 alphas. Do not split on @'s; better results keeping it.
+ # Some useful tokens: "$31,000,000" "www.clock-speed.net" "f*ck" "Hits!"
+- tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs;
++ tr/-A-Za-z0-9,\@\*\!_'"\$.\200-\377 / /cs;
+
+ # DO split on "..." or "--" or "---"; common formatting error resulting in
+ # hapaxes. Keep the separator itself as a token, though, as long ones can
+@@ -1068,6 +1096,11 @@
+ #
+ next if ( defined $magic_re && $token =~ /$magic_re/ );
+
++ # Skip short UTF-8 tokens.
++ if ($self->{conf}->{normalize_charset}) {
++ next if ($token =~ /^$SKIP_UTF8_SHORT_TOKENS_RE$/o);
++ }
++
+ # *do* keep 3-byte tokens; there's some solid signs in there
+ my $len = length($token);
+
+@@ -1096,14 +1129,16 @@
+ # the domain ".net" appeared in the To header.
+ #
+ if ($len > MAX_TOKEN_LENGTH && $token !~ /\*/) {
+- if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) {
+- # Matt sez: "Could be asian? Autrijus suggested doing character ngrams,
+- # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan
+- # to me! (jm)
+- while ($token =~ s/^(..?)//) {
+- push (@rettokens, "8:$1");
+- }
+- next;
++ unless ($self->{conf}->{normalize_charset}) {
++ if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) {
++ # Matt sez: "Could be asian? Autrijus suggested doing character ngrams,
++ # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan
++ # to me! (jm)
++ while ($token =~ s/^(..?)//) {
++ push (@rettokens, "8:$1");
++ }
++ next;
++ }
+ }
+
+ if (($region == 0 && HDRS_TOKENIZE_LONG_TOKENS_AS_SKIPS)
+diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm
+--- lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm 1970-01-01 09:00:00.000000000 +0900
++++ lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm 2011-07-14 22:29:19.000000000 +0900
+@@ -0,0 +1,84 @@
++# <@LICENSE>
++# Copyright 2004 Apache Software Foundation
++#
++# Licensed under the Apache License, Version 2.0 (the "License");
++# you may not use this file except in compliance with the License.
++# You may obtain a copy of the License at
++#
++# http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS,
++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++# See the License for the specific language governing permissions and
++# limitations under the License.
++# </@LICENSE>
++
++=head1 NAME
++
++Tokenizer::MeCab - Japanese tokenizer with MeCab
++
++=head1 SYNOPSIS
++
++loadplugin Mail::SpamAssassin::Plugin::Tokenizer::MeCab
++
++=head1 DESCRIPTION
++
++This plugin tokenizes a Japanese string with MeCab that is
++the morphological analysis engine.
++
++Text::MeCab 0.12 or over is required.
++
++=cut
++
++package Mail::SpamAssassin::Plugin::Tokenizer::MeCab;
++
++use strict;
++use warnings;
++use Mail::SpamAssassin::Plugin::Tokenizer;
++
++use vars qw(@ISA);
++@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer);
++
++# Have to do this so that RPM doesn't find these as required perl modules
++BEGIN { require MeCab; }
++our $language = 'ja';
++our $mecab = new MeCab::Tagger(-Ochasen);
++
++sub new {
++ my $class = shift;
++ my $mailsaobject = shift;
++
++ $class = ref($class) || $class;
++ my $self = $class->SUPER::new($mailsaobject, $language);
++ bless ($self, $class);
++
++ return $self;
++}
++
++sub tokenize {
++ my $self = shift;
++ my $text_array = shift;
++
++ my @tokenized_array;
++ foreach my $text (@$text_array) {
++ next unless ($text);
++ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg;
++ push(@tokenized_array, $text);
++ }
++ return \@tokenized_array;
++}
++
++sub _tokenize {
++ my $text = shift;
++
++ my @buf;
++ for (my $node = $mecab->parseToNode($text); $node->{next}; $node = $node->{next}) {
++ push(@buf, $node->{surface});
++ }
++ my $tokenized = join(' ', @buf) . ' ';
++ return $tokenized;
++}
++
++1;
++
+diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm
+--- lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm 1970-01-01 09:00:00.000000000 +0900
++++ lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm 2011-07-14 22:29:19.000000000 +0900
+@@ -0,0 +1,111 @@
++# <@LICENSE>
++# Copyright 2004 Apache Software Foundation
++#
++# Licensed under the Apache License, Version 2.0 (the "License");
++# you may not use this file except in compliance with the License.
++# You may obtain a copy of the License at
++#
++# http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS,
++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++# See the License for the specific language governing permissions and
++# limitations under the License.
++# </@LICENSE>
++
++=head1 NAME
++
++Tokenizer::SimpleJA - simple Japanese tokenizer
++
++=head1 SYNOPSIS
++
++loadplugin Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA
++
++=head1 DESCRIPTION
++
++This plugin simply tokenizes a Japanese string by characters other than
++the alphabet, the Chinese character, and the katakana.
++
++=cut
++
++package Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA;
++
++use strict;
++use warnings;
++use Mail::SpamAssassin::Plugin::Tokenizer;
++
++use vars qw(@ISA);
++@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer);
++
++our $language = 'ja';
++
++our $RE = qr{(
++ # Hiragana
++ (?:
++ \xE3\x81[\x80-\xBF]
++ | \xE3\x82[\x80-\x9F]
++ )+
++ # Katakana
++ | (?:
++ \xE3\x82[\xA0-\xBF]
++ | \xE3\x83[\x80-\xBF]
++ )+
++ # Kanji
++ | (?:
++ \xE3[\x90-\xBF][\x80-\xBF]
++ | [\xE4-\xE9][\x80-\xBF]{2}
++ | \xEF[\xA4-\xAB][\x80-\xBF]
++ )+
++ # Fullwidth
++ | (?:
++ \xEF\xBC[\x80-\xBF]
++ | \xEF\xBD[\x80-\x9F]
++ )+
++ # Others
++ | [\xC0-\xDF][\x80-\xBF]
++ | [\xE0-\xE2][\x80-\xBF]{2}
++ | \xE3\x80[\x80-\xBF]
++ | \xE3[\x84-\x8F][\x80-\xBF]
++ | [\xEA-\xEE][\x80-\xBF]{2}
++ | \xEF[\x80-\xA3][\x80-\xBF]
++ | \xEF[\xAC-\xBB][\x80-\xBF]
++ | \xEF\xBD[\xA0-\xBF]
++ | \xEF[\xBE-\xBF][\x80-\xBF]
++ | [\xF0-\xF7][\x80-\xBF]{3}
++)}x;
++
++sub new {
++ my $class = shift;
++ my $mailsaobject = shift;
++
++ $class = ref($class) || $class;
++ my $self = $class->SUPER::new($mailsaobject, $language);
++ bless ($self, $class);
++
++ return $self;
++}
++
++sub tokenize {
++ my $self = shift;
++ my $text_array = shift;
++
++ my @tokenized_array;
++ foreach my $text (@$text_array) {
++ next unless ($text);
++ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg;
++ push(@tokenized_array, $text);
++ }
++ return \@tokenized_array;
++}
++
++sub _tokenize {
++ my $text = shift;
++
++ $text =~ s/$RE/$1 /og;
++ $text = ' ' . $text;
++ return $text;
++}
++
++1;
++
+diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer.pm lib/Mail/SpamAssassin/Plugin/Tokenizer.pm
+--- lib/Mail/SpamAssassin/Plugin/Tokenizer.pm 1970-01-01 09:00:00.000000000 +0900
++++ lib/Mail/SpamAssassin/Plugin/Tokenizer.pm 2011-07-14 22:35:46.000000000 +0900
+@@ -0,0 +1,115 @@
++# <@LICENSE>
++# Copyright 2004 Apache Software Foundation
++#
++# Licensed under the Apache License, Version 2.0 (the "License");
++# you may not use this file except in compliance with the License.
++# You may obtain a copy of the License at
++#
++# http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS,
++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++# See the License for the specific language governing permissions and
++# limitations under the License.
++# </@LICENSE>
++
++=head1 NAME
++
++Mail::SpamAssassin::Plugin::Tokenizer - Tokenizer plugin base class
++
++=head1 SYNOPSIS
++
++=head2 SpamAssassin configuration:
++
++ loadplugin MyTokenizerPlugin /path/to/MyTokenizerPlugin.pm
++
++=head2 Perl code:
++
++ use Mail::SpamAssassin::Plugin::Tokenizer;
++ use vars qw(@ISA);
++ @ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer);
++ # language to use this plugin
++ our $language = 'ja';
++
++ # constructor: register language
++ sub new {
++ my $class = shift;
++ my $mailsaobject = shift;
++
++ # some boilerplate...
++ $class = ref($class) || $class;
++ my $self = $class->SUPER::new($mailsaobject, $language);
++ bless ($self, $class);
++
++ return $self;
++ }
++
++ # tokenize function
++ sub tokenize {
++ my $self = shift;
++ my $text_array_ref = shift;
++
++ ......
++
++ return $tokenized_array_ref;
++ }
++
++
++=head1 DESCRIPTION
++
++This plugin is the base class of tokenizer plugin.
++You must define tokenize() and $language
++
++=head1 INTERFACE
++
++ sub tokenize {
++ my $self = shift;
++ my $text_array_ref = shift;
++
++ ......
++
++ return $tokenized_array_ref;
++ }
++
++=cut
++
++package Mail::SpamAssassin::Plugin::Tokenizer;
++
++use Mail::SpamAssassin::Plugin;
++use Mail::SpamAssassin::Logger;
++use strict;
++use warnings;
++use bytes;
++
++use vars qw(@ISA);
++@ISA = qw(Mail::SpamAssassin::Plugin);
++
++sub new {
++ my $class = shift;
++ my $mailsaobject = shift;
++ my $language = shift;
++
++ # some boilerplate...
++ $class = ref($class) || $class;
++ my $self = $class->SUPER::new($mailsaobject);
++ bless ($self, $class);
++
++ if ($language) {
++ $self->{main}->{conf}->{tokenizer}->{$language} = $self;
++ }
++ else {
++ dbg("plugin: $self: \$language is not defined");
++ }
++
++ return $self;
++}
++
++sub tokenize {
++ my ($self, $ref) = @_;
++
++ return $ref;
++}
++
++1;
++
+diff -uNr lib/Mail/SpamAssassin/Util/Charset.pm lib/Mail/SpamAssassin/Util/Charset.pm
+--- lib/Mail/SpamAssassin/Util/Charset.pm 1970-01-01 09:00:00.000000000 +0900
++++ lib/Mail/SpamAssassin/Util/Charset.pm 2011-07-14 22:29:19.000000000 +0900
+@@ -0,0 +1,471 @@
++# <@LICENSE>
++# Copyright 2006 Apache Software Foundation
++#
++# Licensed under the Apache License, Version 2.0 (the "License");
++# you may not use this file except in compliance with the License.
++# You may obtain a copy of the License at
++#
++# http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS,
++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++# See the License for the specific language governing permissions and
++# limitations under the License.
++# </@LICENSE>
++
++
++=head1 NAME
++
++ Mail::SpamAssassin::Util::Charset.pm - Utility for charset and language
++
++=head1 SYNOPSIS
++
++ my ($decoded, $detected) = Mail::SpamAssassin::Util::Charset::normalize_charset($str, $charset);
++ my $language = Mail::SpamAssassin::Util::Charset::get_language($str, $charset);
++
++=head1 DESCRIPTION
++
++This module implements utility methods for charset and language.
++
++=cut
++
++package Mail::SpamAssassin::Util::Charset;
++
++use strict;
++use warnings;
++use Encode;
++use Encode::Guess;
++use Encode::Alias;
++
++use vars qw (
++ @ISA @EXPORT
++);
++
++require Exporter;
++
++@ISA = qw(Exporter);
++@EXPORT = qw(normalize_charset get_language);
++
++###########################################################################
++
++use constant HAS_ENCODE_DETECT => eval { require Encode::Detect::Detector; };
++use constant HAS_ENCODE_HANEXTRA => eval { require Encode::HanExtra; };
++use constant HAS_ENCODE_EUCJPMS => eval { require Encode::EUCJPMS; };
++
++###########################################################################
++
++our $KANA_HAN_RE = qr{
++ # Hiragana and Katakana
++ \xE3[\x81-\x83][\x80-\xBF]
++ # Han
++ | \xE3[\x90-\xBF][\x80-\xBF]
++ | [\xE4-\xE9][\x80-\xBF]{2}
++ | \xEF[\xA4-\xAB][\x80-\xBF]
++}x;
++
++our %enc2lang;
++our %lang2enc;
++our %scr2lang;
++our %cjkscr2lang;
++our @scrorder;
++
++BEGIN {
++
++ # See the following URL about this map:
++ # http://czyborra.com/charsets/iso8859.html
++ # http://czyborra.com/charsets/codepages.html
++ # http://czyborra.com/charsets/cyrillic.html
++ # http://en.wikipedia.org/wiki/ISO_8859
++ # http://www.w3.org/International/O-charset-lang.html
++ %enc2lang = (
++ # buint-in Encodings and Encode::Byte
++ # N. America
++ 'ascii' => 'en',
++ 'cp437' => 'en',
++ 'cp863' => 'weurope',
++
++ # W. Europe (Latin1, Latin9)
++ # fr es ca eu pt it sq rm nl de da sv no fi fo is ga gd en af
++ 'iso-8859-1' => 'weurope',
++ 'iso-8859-15' => 'weurope',
++ 'cp850' => 'weurope',
++ 'cp860' => 'weurope',
++ 'cp1252' => 'weurope',
++ 'MacRoman' => 'weurope',
++
++ # Cntrl. Europe / Latin2 / Latin10
++ # hr cs hu pl sr sk sl
++ 'iso-8859-2' => 'ceurope',
++ 'cp852' => 'ceurope',
++ 'cp1250' => 'ceurope',
++ 'MacCentralEurRoman' => 'ceurope',
++ 'MacCroatian' => 'ceurope',
++ 'iso-8859-16' => 'ceurope',
++ 'MacRomanian' => 'ceurope',
++
++ # Latin3 (Esperanto, Maltese, and Turkish. Turkish is now on 8859-9.)
++ # eo mt
++ 'iso-8859-3' => 'seurope',
++
++ # Baltics (Latin4, Latin7)
++ # lv lt
++ 'iso-8859-4' => 'neurope',
++ 'iso-8859-13' => 'baltic',
++ 'cp1257' => 'baltic',
++
++ # Nordics (Latin6)
++ # et kl iu se
++ 'iso-8859-10' => 'nordic',
++
++ # Cyrillics
++ # bg be uk sr mk ru
++ 'iso-8859-5' => 'ru',
++ 'cp855' => 'ru',
++ 'cp1251' => 'ru',
++ 'cp866' => 'ru',
++ 'MacCyrillic' => 'ru',
++ 'koi8-r' => 'ru',
++ 'MacUkrainian' => 'uk',
++ 'koi8-u' => 'uk',
++
++ # Arabic
++ 'iso-8859-6' => 'ar',
++ 'cp864' => 'ar',
++ 'cp1256' => 'ar',
++ 'MacArabic' => 'ar',
++ 'cp1006' => 'fa',
++ 'MacFarsi' => 'fa',
++
++ # Greek
++ 'iso-8859-7' => 'el',
++ 'cp1253' => 'el',
++ 'MacGreek' => 'el',
++
++ # Hebrew
++ # he yi
++ 'iso-8859-8' => 'he',
++ 'cp862' => 'he',
++ 'cp1255' => 'he',
++ 'MacHebrew' => 'he',
++
++ # Turkish
++ 'iso-8859-9' => 'tr',
++ 'cp857' => 'tr',
++ 'cp1254' => 'tr',
++ 'MacTurkish' => 'tr',
++
++ # Thai
++ 'iso-8859-11' => 'th',
++ 'cp874' => 'th',
++
++ # Celtics (Latin8)
++ # gd cy br
++ 'iso-8859-14' => 'celtic',
++
++ # Vietnamese
++ 'viscii' => 'vi',
++ 'cp1258' => 'vi',
++
++ # Encode::CN
++ 'euc-cn' => 'zh',
++ 'cp936' => 'zh',
++ 'hz' => 'zh',
++
++ # Encode::TW
++ 'big5-eten' => 'zh',
++ 'big5-hkscs' => 'zh',
++ 'cp950' => 'zh',
++
++ # Encode::JP
++ 'euc-jp' => 'ja',
++ 'shiftjis' => 'ja',
++ '7bit-jis' => 'ja',
++ 'iso-2022-jp' => 'ja',
++ 'iso-2022-jp-1' => 'ja',
++ 'cp932' => 'ja',
++
++ # Encode::KR
++ 'euc-kr' => 'ko',
++ 'cp949' => 'ko',
++ 'johab' => 'ko',
++ 'iso-2022-kr' => 'ko',
++
++ # Encode::HanExtra
++ 'euc-tw' => 'zh',
++ 'gb18030' => 'zh',
++
++ # Encode::JIS2K
++ 'euc-jisx0213' => 'ja',
++ 'shiftjisx0123' => 'ja',
++ 'iso-2022-jp-3' => 'ja',
++
++ # Encode::EUCJPMS
++ 'eucJP-ms' => 'ja',
++ 'cp51932' => 'ja',
++ 'cp50220' => 'ja',
++ 'cp50221' => 'ja',
++
++ );
++
++ %lang2enc = (
++ # Latin1
++ 'en' => ['ascii'],
++ 'weurope' => ['cp1252'],
++
++ # Latin2
++ 'ceurope' => ['cp1250'],
++
++ # Latin3
++ 'seurope' => ['iso-8859-3'],
++
++ # Latin4
++ 'neurope' => ['iso-8859-4'],
++
++ # Latin5
++ 'tr' => ['cp1254'],
++
++ # Latin6
++ 'nordic' => ['iso-8859-10'],
++
++ # Latin7
++ 'baltic' => ['cp1257'],
++
++ # Latin8
++ 'celtic' => ['iso-8859-14'],
++
++ # Non Latin
++ 'ru' => ['koi8-r', 'cp1251'],
++ 'uk' => ['koi8-u'],
++
++ 'ar' => ['cp1256'],
++ 'el' => ['cp1253'],
++ 'he' => ['cp1255'],
++ 'th' => ['cp874'],
++ 'vi' => ['viscii', 'cp1258'],
++ 'zh' => ['euc-cn', 'cp950'],
++ 'ja' => ['euc-jp', 'cp932'],
++ 'ko' => ['euc-kr', 'cp949'],
++
++ );
++
++ %scr2lang = (
++ 'InLatin1Supplement' => ['weurope'],
++ 'InLatinExtendedA' => [
++ 'ceurope',
++ 'seurope',
++ 'tr',
++ 'vi'
++ ],
++ 'InLatinExtendedB' => [
++ 'nordic',
++ 'baltic',
++ 'celtic'
++ ],
++ 'Thai' => ['th'],
++ 'Cyrillic' => ['ru', 'uk'],
++ 'Arabic' => ['ar'],
++ 'Greek' => ['el'],
++ 'Hebrew' => ['he'],
++ );
++
++ # better detection for CJK
++ @scrorder = ('Hiragana','Katakana','Hangul','Han',keys(%scr2lang));
++ %cjkscr2lang = (
++ 'Hiragana' => ['ja'],
++ 'Katakana' => ['ja'],
++ 'Hangul' => ['ko'],
++ 'Han' => ['zh', 'ja', 'ko'],
++ );
++
++ unless (HAS_ENCODE_HANEXTRA) {
++ Encode::Alias::define_alias( qr/^gb18030$/i => ' "euc-cn"' );
++ }
++ Encode::Alias::define_alias( qr/^unicode-1-1-(.+)$/i => ' "$1"' );
++ Encode::Alias::define_alias( qr/^TIS-620$/i => ' "iso-8859-11"' );
++ Encode::Alias::define_alias( qr/^x-mac-(.+)$/i => ' "Mac$1"' );
++ Encode::Alias::define_alias( qr/^Shift_JIS$/i => ' "cp932"' );
++ if (HAS_ENCODE_EUCJPMS) {
++ Encode::Alias::define_alias( qr/^iso-2022-jp$/i => ' "cp50221"' );
++ }
++}
++
++sub get_language {
++ my $str = shift; # $str must be UTF-8 encoding
++ my $charset = shift;
++
++ return 'en' unless $charset;
++ if ($charset !~ /^utf/i) {
++ return $enc2lang{$charset};
++ } elsif (defined($str)) {
++ $str =~ s/[\x00-\x7F]//g; # remove ASCII characters
++ return 'en' if ($str eq '');
++
++ my %handled;
++ $str = Encode::decode_utf8($str) unless (Encode::is_utf8($str));
++ foreach my $scr (@scrorder) {
++ next if ($str !~ /\p{$scr}/);
++ my $scrlangs = exists($cjkscr2lang{$scr}) ? $cjkscr2lang{$scr} : $scr2lang{$scr};
++ foreach my $lang (@$scrlangs) {
++ next if (exists($handled{$lang}));
++ foreach my $enc (@{$lang2enc{$lang}}) {
++ my $scratch = $str;
++ Encode::encode($enc, $scratch, Encode::FB_QUIET);
++ return $lang if ($scratch eq '');
++ }
++ $handled{$lang} = 1;
++ }
++ }
++ }
++ return 'en';
++}
++
++# TEST 1: try conversion to use the specified charset.
++# TEST 2: try conversion to use Encode::Detect.
++# TEST 3: try conversion to use Encode::Guess.
++sub normalize_charset {
++ my $str = shift;
++ my $charset = shift;
++
++ return wantarray ? ($str, 'ascii') : $str unless ($str);
++
++ my $decoded;
++ my $detected;
++
++ if ($charset) {
++ ($decoded, $detected) = _specified_encoding($str, $charset);
++ }
++ unless ($detected) {
++ ($decoded, $detected) = _encode_detect($str);
++ }
++ unless ($detected) {
++ ($decoded, $detected) = _encode_guess($str);
++ }
++ unless ($detected) {
++ return ($str, undef);
++ }
++ $decoded =~ s/^\x{feff}//g;
++ $decoded = Encode::encode_utf8($decoded);
++
++ # unfold hiragana, katakana and han
++ if ($detected =~ /^(?:UTF|EUC|BIG5|GB|SHIFTJIS|ISO-2022|CP969$|CP932$|CP949|CP50221$)/i) {
++ $decoded =~ s/($KANA_HAN_RE)\012($KANA_HAN_RE)/$1$2/og;
++ }
++ return wantarray ? ($decoded, $detected) : $decoded;
++}
++
++sub _specified_encoding {
++ my $str = shift;
++ my $encoding = shift;
++
++ my $detected;
++ my $decoded;
++
++ return (undef, undef) unless ($encoding);
++
++ # note: ISO-2022-* is not deistinguish from US-ASCII
++ return (undef, undef) if ($str =~ /\e/ and $encoding !~ /^ISO-2022/i);
++
++ # UTF-16|32 encoding without BOM cannot be trusted.
++ return (undef, undef) if ($encoding =~ /^UTF-32$/i and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/);
++ return (undef, undef) if ($encoding =~ /^UTF-16$/i and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/);
++
++ #$encoding = _get_alias($encoding);
++ my $encoder = Encode::find_encoding($encoding);
++ if (ref($encoder)) {
++ $decoded = $encoder->decode($str,Encode::FB_QUIET);
++ $detected = $encoder->name if ($str eq '');
++ }
++ return ($decoded, $detected);
++}
++
++sub _encode_detect {
++ return undef unless HAS_ENCODE_DETECT;
++ my $str = shift;
++
++ # UTF-16|32 encoding without BOM cannot be trusted.
++ return (undef, undef) if ($str =~ /\x00\x00/ and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/);
++ return (undef, undef) if ($str =~ /\x00/ and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/);
++
++ my $decoded;
++ my $detected = Encode::Detect::Detector::detect($str);
++ if ($detected) {
++ $detected = _get_alias($detected);
++ my $encoder = Encode::find_encoding($detected);
++ if (ref($encoder)) {
++ $decoded = $encoder->decode($str);
++ $detected = $decoded ? $encoder->name : undef;
++ }
++ else {
++ $detected = undef;
++ }
++ }
++ return ($decoded, $detected);
++}
++
++sub _encode_guess {
++ my $str = shift;
++
++ my $detected;
++ my $decoded;
++ my $encoder;
++
++ # Step 1: Examine ISO-2022-*.
++ if ($str =~ /\e/) {
++ $Encode::Guess::NoUTFAutoGuess = 1;
++ $encoder = Encode::Guess::guess_encoding($str,
++ qw/cp50221 7bit-jis iso-2022-kr/);
++ $Encode::Guess::NoUTFAutoGuess = 0;
++ }
++
++ # Step 2: Examine US-ASCII/UTF-(8|16|32)
++ unless (ref($encoder)) {
++ $Encode::Guess::NoUTFAutoGuess = 0;
++ $encoder = Encode::Guess::guess_encoding($str);
++ }
++
++ # Step 3: Examine other encodings
++ unless (ref($encoder)) {
++ $Encode::Guess::NoUTFAutoGuess = 1;
++ eval {
++ if ($str =~ /[\x80-\xFF]{4}/) {
++ $encoder = Encode::Guess::guess_encoding($str,
++ qw/euc-cn big5-eten euc-jp cp932 euc-kr cp949/);
++ }
++ else {
++ $encoder = Encode::Guess::guess_encoding($str,
++ qw/iso-8859-1 cp1252/);
++ }
++ };
++ $Encode::Guess::NoUTFAutoGuess = 0;
++ }
++ if (ref($encoder)) {
++ $detected = $encoder->name;
++ if ($detected) {
++ $decoded = $encoder->decode($str);
++ }
++ }
++ return ($decoded, $detected);
++}
++
++sub _get_alias {
++ my $encoding = shift;
++
++ unless (HAS_ENCODE_HANEXTRA) {
++ $encoding =~ s/^gb18030$/euc-cn/i;
++ }
++ $encoding =~ s/^unicode-1-1-(.+)$/$1/i;
++ $encoding =~ s/^TIS-620$/iso-8859-11/i;
++ $encoding =~ s/x-mac-(.+)$/Mac$1/i;
++ $encoding =~ s/^Shift_JIS$/cp932/i;
++ if (HAS_ENCODE_EUCJPMS) {
++ $encoding =~ s/^iso-2022-jp$/cp50221/i;
++ $encoding =~ s/^euc-jp$/cp51932/i;
++ }
++
++ return $encoding;
++}
++
++
++1;
++
diff --git a/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist
new file mode 100644
index 000000000000..11643c87b058
--- /dev/null
+++ b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist
@@ -0,0 +1,7 @@
+%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm
+%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm
+@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer
+%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer.pm
+%%SITE_PERL%%/Mail/SpamAssassin/Util/Charset.pm
+@unexec if cmp -s %%PREFIX%%/%%ETCDIR%%/%%TOKENIZER_PRE%%.sample %%PREFIX%%/%%ETCDIR%%/%%TOKENIZER_PRE%%; then rm -f %%PREFIX%%/%%ETCDIR%%/%%TOKENIZER_PRE%%; fi
+%%ETCDIR%%/%%TOKENIZER_PRE%%.sample
diff --git a/japanese/p5-Mail-SpamAssassin/files/tokenizer.pre b/japanese/p5-Mail-SpamAssassin/files/tokenizer.pre
new file mode 100644
index 000000000000..d21410bbadc9
--- /dev/null
+++ b/japanese/p5-Mail-SpamAssassin/files/tokenizer.pre
@@ -0,0 +1,8 @@
+
+# Tokenizer::SimpleJA
+#
+loadplugin Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA
+
+# Tokenizer::MeCab
+#
+#loadplugin Mail::SpamAssassin::Plugin::Tokenizer::MeCab
diff --git a/japanese/p5-Mail-SpamAssassin/pkg-deinstall b/japanese/p5-Mail-SpamAssassin/pkg-deinstall
deleted file mode 100644
index fb8983183880..000000000000
--- a/japanese/p5-Mail-SpamAssassin/pkg-deinstall
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/sh
-
-if [ "$2" != "POST-DEINSTALL" ]; then
- exit 0
-fi
-
-if [ -d /var/db/spamassassin ]; then
- echo "To delete /var/db/spamassassin, use 'rm -rf /var/db/spamassassin'"
-fi
-
-USER=spamd
-
-if pw usershow "${USER}" 2>/dev/null 1>&2; then
- echo "To delete ${USER} user permanently, use 'rmuser ${USER}'"
-fi
-
-exit 0
diff --git a/japanese/p5-Mail-SpamAssassin/pkg-descr b/japanese/p5-Mail-SpamAssassin/pkg-descr
deleted file mode 100644
index e4807953b7aa..000000000000
--- a/japanese/p5-Mail-SpamAssassin/pkg-descr
+++ /dev/null
@@ -1,14 +0,0 @@
-SpamAssassin is a mail filter which attempts to identify spam using text
-analysis and several internet-based realtime blacklists.
-
-Using its rule base, it uses a wide range of heuristic tests on mail
-headers and body text to identify "spam", also known as unsolicited
-commercial email.
-
-Once identified, the mail can then be optionally tagged as spam for later
-filtering using the user's own mail user-agent application.
-
-Additional drop-in rule sets are available at
-http://wiki.apache.org/spamassassin/CustomRulesets
-
-WWW: http://spamassassin.apache.org/
diff --git a/japanese/p5-Mail-SpamAssassin/pkg-install b/japanese/p5-Mail-SpamAssassin/pkg-install
deleted file mode 100644
index 568dc5e832e4..000000000000
--- a/japanese/p5-Mail-SpamAssassin/pkg-install
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/sh
-PKG_PREFIX=${PKG_PREFIX:-/usr/local}
-USER=${USER:-spamd}
-GROUP=${GROUP:-spamd}
-HOME=/var/spool/${USER}
-
-if [ "$2" = "POST-INSTALL" ];then
-ask() {
- local question default answer
-
- question=$1
- default=$2
- if [ -z "${PACKAGE_BUILDING}" -a -z "${BATCH}" ]; then
- read -t120 -p "${question} [${default}]? " answer
- fi
- echo ${answer:-${default}}
-}
-
-yesno() {
- local question default answer
-
- question=$1
- default=$2
- while :; do
- answer=$(ask "${question}" "${default}")
- case "${answer}" in
- [Yy]*) return 0;;
- [Nn]*) return 1;;
- esac
- echo "Please answer yes or no."
- done
-}
-
- # Create pid directory
- install -d -o ${USER} -g ${GROUP} /var/run/spamd
- /usr/bin/su root -c "${PKG_PREFIX}/bin/spamassassin -x -L --lint"
-
- if [ ${?} -eq 9 ];then
- echo "***********************************************"
- echo "*__ ___ ____ _ _ ___ _ _ ____ *"
- echo "*\ \ / / \ | _ \| \ | |_ _| \ | |/ ___|*"
- echo "* \ \ /\ / / _ \ | |_) | \| || || \| | | _ *"
- echo "* \ V V / ___ \| _ <| |\ || || |\ | |_| |*"
- echo "* \_/\_/_/ \_\_| \_\_| \_|___|_| \_|\____|*"
- echo "* *"
- echo "*You must install rules before starting spamd!*"
- echo "***********************************************"
- if [ -z "${PACKAGE_BUILDING}" -a -z "${BATCH}" ]; then
- if yesno "Do you wish to run sa-update to fetch new rules" "N";then
- ${PKG_PREFIX}/bin/sa-update || true
- else
- echo ""
- fi
- /usr/bin/su root -c "${PKG_PREFIX}/bin/spamassassin -x -L --lint"
- if [ ${?} -eq 0 ] && grep '^load.*Rule2XSBody' ${PKG_PREFIX}/etc/mail/spamassassin/v320.pre > /dev/null ;then
- if yesno "Do you wish to compile rules with re2c (will take a long time)" "N";then
- ${PKG_PREFIX}/bin/sa-compile || true
- fi
- fi
- fi
- fi
-
- exit 0
-fi # post-install
-
-exit 0
diff --git a/japanese/p5-Mail-SpamAssassin/pkg-message b/japanese/p5-Mail-SpamAssassin/pkg-message
index 01153919b024..2ba532e27974 100644
--- a/japanese/p5-Mail-SpamAssassin/pkg-message
+++ b/japanese/p5-Mail-SpamAssassin/pkg-message
@@ -39,8 +39,8 @@ as root. To change this, also add this to rc.conf:
spamd_flags="-u spamd -H /var/spool/spamd"
************************************************************************
-For Japanese users, see document in
-PREFIX/share/doc/ja-p5-Mail-SpamAssassin/
+For Japanese users, see documents in
+http://emaillab.jp/spamassassin/ja-patch/
Tokenizer::MeCab uses UTF-8 encoding. You may have to manually
(re)install the following ports with the build options for UTF-8:
diff --git a/japanese/p5-Mail-SpamAssassin/pkg-plist b/japanese/p5-Mail-SpamAssassin/pkg-plist
deleted file mode 100644
index 43dae0435ff3..000000000000
--- a/japanese/p5-Mail-SpamAssassin/pkg-plist
+++ /dev/null
@@ -1,161 +0,0 @@
-@stopdaemon sa-spamd
-bin/sa-awl
-bin/sa-check_spamd
-bin/sa-compile
-bin/sa-learn
-bin/sa-update
-bin/spamassassin
-bin/spamc
-bin/spamd
-@unexec rm -rf %D/etc/mail/spamassassin/sa-update-keys || true
-etc/mail/spamassassin/local.cf.sample
-@unexec if cmp -s %B/init.pre.sample %B/init.pre; then rm -f %B/init.pre; fi
-etc/mail/spamassassin/init.pre.sample
-@exec [ -f %B/init.pre ] || cp %B/%f %B/init.pre
-@unexec if cmp -s %B/tokenizer.pre.sample %B/tokenizer.pre; then rm -f %B/tokenizer.pre; fi
-etc/mail/spamassassin/tokenizer.pre.sample
-@exec [ -f %B/tokenizer.pre ] || cp %B/%f %B/tokenizer.pre
-@unexec if cmp -s %B/v310.pre.sample %B/v310.pre; then rm -f %B/v310.pre; fi
-etc/mail/spamassassin/v310.pre.sample
-@exec [ -f %B/v310.pre ] || cp %B/%f %B/v310.pre
-@unexec if cmp -s %B/v312.pre.sample %B/v312.pre; then rm -f %B/v312.pre; fi
-etc/mail/spamassassin/v312.pre.sample
-@exec [ -f %B/v312.pre ] || cp %B/%f %B/v312.pre
-@unexec if cmp -s %B/v320.pre.sample %B/v320.pre; then rm -f %B/v320.pre; fi
-etc/mail/spamassassin/v320.pre.sample
-@exec [ -f %B/v320.pre ] || cp %B/%f %B/v320.pre
-@unexec if cmp -s %B/v330.pre.sample %B/v330.pre; then rm -f %B/v330.pre;fi
-etc/mail/spamassassin/v330.pre.sample
-@exec [ -f %B/v330.pre ] || cp %B/%f %B/v330.pre
-include/libspamc.h
-lib/libspamc.so
-lib/libspamc.so.0
-%%SSL%%lib/libsslspamc.so
-%%SSL%%lib/libsslspamc.so.0
-%%SITE_PERL%%/Mail/SpamAssassin.pm
-%%SITE_PERL%%/Mail/SpamAssassin/AICache.pm
-%%SITE_PERL%%/Mail/SpamAssassin/ArchiveIterator.pm
-%%SITE_PERL%%/Mail/SpamAssassin/AsyncLoop.pm
-%%SITE_PERL%%/Mail/SpamAssassin/AutoWhitelist.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Bayes.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Bayes/CombineChi.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm
-%%SITE_PERL%%/Mail/SpamAssassin/BayesStore.pm
-%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/BDB.pm
-%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/DBM.pm
-%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/MySQL.pm
-%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/PgSQL.pm
-%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/SDBM.pm
-%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/SQL.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Client.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Conf.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Conf/LDAP.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Conf/Parser.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Conf/SQL.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Constants.pm
-%%SITE_PERL%%/Mail/SpamAssassin/DBBasedAddrList.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Dns.pm
-%%SITE_PERL%%/Mail/SpamAssassin/DnsResolver.pm
-%%SITE_PERL%%/Mail/SpamAssassin/HTML.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Locales.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Locker.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Locker/Flock.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Locker/Win32.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Logger.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Logger/File.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Logger/Stderr.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Logger/Syslog.pm
-%%SITE_PERL%%/Mail/SpamAssassin/MailingList.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Message.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Message/Metadata.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Message/Metadata/Received.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Message/Node.pm
-%%SITE_PERL%%/Mail/SpamAssassin/NetSet.pm
-%%SITE_PERL%%/Mail/SpamAssassin/PerMsgLearner.pm
-%%SITE_PERL%%/Mail/SpamAssassin/PerMsgStatus.pm
-%%SITE_PERL%%/Mail/SpamAssassin/PersistentAddrList.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/ASN.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AWL.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AccessDB.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AntiVirus.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Bayes.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/BodyEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Check.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/DCC.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/DKIM.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/DNSEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/FreeMail.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/HTMLEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Hashcash.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/HeaderEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/ImageInfo.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/MIMEEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/MIMEHeader.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/PhishTag.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Pyzor.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Razor2.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/RelayCountry.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/RelayEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/ReplaceTags.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Reuse.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Rule2XSBody.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/SPF.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Shortcircuit.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/SpamCop.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Test.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/TextCat.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/URIDNSBL.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/URIDetail.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/URIEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/VBounce.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/WLBLEval.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Plugin/WhiteListSubject.pm
-%%SITE_PERL%%/Mail/SpamAssassin/PluginHandler.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Reporter.pm
-%%SITE_PERL%%/Mail/SpamAssassin/SQLBasedAddrList.pm
-%%SITE_PERL%%/Mail/SpamAssassin/SpamdForkScaling.pm
-%%SITE_PERL%%/Mail/SpamAssassin/SubProcBackChannel.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Timeout.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Util.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Util/Charset.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Util/DependencyInfo.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Util/Progress.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Util/ScopedTimer.pm
-%%SITE_PERL%%/Mail/SpamAssassin/Util/TieOneStringHash.pm
-%%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail/SpamAssassin/.packlist
-%%SITE_PERL%%/spamassassin-run.pod
-%%DATADIR%%/languages
-%%DATADIR%%/sa-update-pubkey.txt
-%%DATADIR%%/user_prefs.template
-@unexec rm -rf /var/lib/spamassassin/2* || true
-@unexec rmdir /var/lib/spamassassin 2>/dev/null || true
-@unexec rmdir /var/lib 2>/dev/null || true
-@unexec rmdir /var/db/spamassassin || true
-@dirrm %%DATADIR%%
-@dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail/SpamAssassin
-@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Util
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer
-@dirrmtry %%SITE_PERL%%/Mail/SpamAssassin/Plugin
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Message/Metadata
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Message
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Logger
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Locker
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Conf
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/BayesStore
-@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Bayes
-@dirrmtry %%SITE_PERL%%/Mail/SpamAssassin
-@dirrmtry %%SITE_PERL%%/Mail
-@dirrmtry etc/mail/spamassassin
-@dirrmtry etc/mail
-@unexec rm -rf /var/run/spamd || true