diff options
authormiwi <miwi@FreeBSD.org>2006-09-25 03:51:59 +0800
committermiwi <miwi@FreeBSD.org>2006-09-25 03:51:59 +0800
commit1baaeec35fbcf7244f5e016570bcca712365d964 (patch)
parentf61928cccbd672a1c978abeacbc2b1dc7c5a1e70 (diff)
Text::Language::Guess guesses a document's language. Its implementation
is simple: Using "Text::ExtractWords" and "Lingua::StopWords" from CPAN, it determines how many of the known stopwords the document contains for each language supported by "Lingua::StopWords". Each word in the document recognized as stopword of a particular language scores one point for this language. The "language_guess()" function takes a document as a parameter and returns the abbreviation of the language that it is most likely written in. Author: Mike Schilli <cpan@perlmeister.com> WWW: http://search.cpan.org/~mschilli/Text-Language-Guess-0.02/ PR: ports/103571 Submitted by: Masahiro Teramoto <markun@onohara.to>
5 files changed, 61 insertions, 0 deletions
diff --git a/textproc/Makefile b/textproc/Makefile
index cfb7de117137..dbfd1dd08ff6 100644
--- a/textproc/Makefile
+++ b/textproc/Makefile
@@ -508,6 +508,7 @@
SUBDIR += p5-Text-Glob
SUBDIR += p5-Text-Graphics
SUBDIR += p5-Text-Hatena
+ SUBDIR += p5-Text-Language-Guess
SUBDIR += p5-Text-Markdown
SUBDIR += p5-Text-Metaphone
SUBDIR += p5-Text-NSP
diff --git a/textproc/p5-Text-Language-Guess/Makefile b/textproc/p5-Text-Language-Guess/Makefile
new file mode 100644
index 000000000000..82a0ab656005
--- /dev/null
+++ b/textproc/p5-Text-Language-Guess/Makefile
@@ -0,0 +1,34 @@
+# New ports collection makefile for: textproc/p5-Text-Language-Guess
+# Date created: 2006-09-25
+# Whom: Masahiro Teramoto <markun@onohara.to>
+# $FreeBSD$
+PORTNAME= Text-Language-Guess
+CATEGORIES= textproc perl5
+MAINTAINER= markun@onohara.to
+COMMENT= Trained module to guess a document's language
+BUILD_DEPENDS= p5-Log-Log4perl>=1.0:${PORTSDIR}/devel/p5-Log-Log4perl \
+ p5-Text-ExtractWords>=0:${PORTSDIR}/textproc/p5-Text-ExtractWords \
+ p5-Lingua-StopWords>=0:${PORTSDIR}/textproc/p5-Lingua-StopWords
+MAN1= language-guess.1
+MAN3= Text::Language::Guess.3
+.include <bsd.port.pre.mk>
+.if ${PERL_LEVEL} < 500600
+IGNORE= requires perl 5.6.x or later. Install lang/perl5 then try again
+.include <bsd.port.post.mk>
diff --git a/textproc/p5-Text-Language-Guess/distinfo b/textproc/p5-Text-Language-Guess/distinfo
new file mode 100644
index 000000000000..ead33bbc403b
--- /dev/null
+++ b/textproc/p5-Text-Language-Guess/distinfo
@@ -0,0 +1,3 @@
+MD5 (Text-Language-Guess-0.02.tar.gz) = 66fbb68b17c3e62febbba633111f852e
+SHA256 (Text-Language-Guess-0.02.tar.gz) = 12ef612c1de0451367d403db73723446b836e2e10adeec5e9386b7baa8ede12f
+SIZE (Text-Language-Guess-0.02.tar.gz) = 5377
diff --git a/textproc/p5-Text-Language-Guess/pkg-descr b/textproc/p5-Text-Language-Guess/pkg-descr
new file mode 100644
index 000000000000..9ab26f30870e
--- /dev/null
+++ b/textproc/p5-Text-Language-Guess/pkg-descr
@@ -0,0 +1,14 @@
+Text::Language::Guess guesses a document's language. Its implementation
+is simple: Using "Text::ExtractWords" and "Lingua::StopWords" from CPAN,
+it determines how many of the known stopwords the document contains for
+each language supported by "Lingua::StopWords".
+Each word in the document recognized as stopword of a particular
+language scores one point for this language.
+The "language_guess()" function takes a document as a parameter and
+returns the abbreviation of the language that it is most likely written
+Author: Mike Schilli <cpan@perlmeister.com>
+WWW: http://search.cpan.org/~mschilli/Text-Language-Guess-0.02/
diff --git a/textproc/p5-Text-Language-Guess/pkg-plist b/textproc/p5-Text-Language-Guess/pkg-plist
new file mode 100644
index 000000000000..1ac4805998da
--- /dev/null
+++ b/textproc/p5-Text-Language-Guess/pkg-plist
@@ -0,0 +1,9 @@
+@comment $FreeBSD$
+@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Text/Language/Guess
+@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Text/Language
+@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Text
+@dirrmtry %%SITE_PERL%%/Text/Language
+@dirrmtry %%SITE_PERL%%/Text