aboutsummaryrefslogtreecommitdiffstats
path: root/www
diff options
context:
space:
mode:
authorkuriyama <kuriyama@FreeBSD.org>2009-03-06 06:55:11 +0800
committerkuriyama <kuriyama@FreeBSD.org>2009-03-06 06:55:11 +0800
commitfeaa9dd01d751be9585946d12bc339e38f39f919 (patch)
treea5a14198dd1164a87d6ebc94bf99ebc98249a6b0 /www
parent6d5469dcace54f36e3f1b5fc85de2cf337851dd1 (diff)
downloadfreebsd-ports-gnome-feaa9dd01d751be9585946d12bc339e38f39f919.tar.gz
freebsd-ports-gnome-feaa9dd01d751be9585946d12bc339e38f39f919.tar.zst
freebsd-ports-gnome-feaa9dd01d751be9585946d12bc339e38f39f919.zip
Add p5-HTML-ExtractContent 0.05, perl extension for HTML content
extractor with scoring heuristics.
Diffstat (limited to 'www')
-rw-r--r--www/Makefile1
-rw-r--r--www/p5-HTML-ExtractContent/Makefile27
-rw-r--r--www/p5-HTML-ExtractContent/distinfo3
-rw-r--r--www/p5-HTML-ExtractContent/pkg-descr11
-rw-r--r--www/p5-HTML-ExtractContent/pkg-plist5
5 files changed, 47 insertions, 0 deletions
diff --git a/www/Makefile b/www/Makefile
index 29e211ca40aa..b0d26676cef4 100644
--- a/www/Makefile
+++ b/www/Makefile
@@ -831,6 +831,7 @@
SUBDIR += p5-HTML-Element-Library
SUBDIR += p5-HTML-Embperl
SUBDIR += p5-HTML-Encoding
+ SUBDIR += p5-HTML-ExtractContent
SUBDIR += p5-HTML-FillInForm
SUBDIR += p5-HTML-FillInForm-ForceUTF8
SUBDIR += p5-HTML-FormFu
diff --git a/www/p5-HTML-ExtractContent/Makefile b/www/p5-HTML-ExtractContent/Makefile
new file mode 100644
index 000000000000..504e59fef3e5
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/Makefile
@@ -0,0 +1,27 @@
+# New ports collection makefile for: HTML::ExtractContent
+# Date created: 05 Mar 2009
+# Whom: Jun Kuriyama <kuriyama@FreeBSD.org>
+#
+# $FreeBSD$
+#
+
+PORTNAME= HTML-ExtractContent
+PORTVERSION= 0.05
+CATEGORIES= www perl5
+MASTER_SITES= CPAN
+PKGNAMEPREFIX= p5-
+
+MAINTAINER= kuriyama@FreeBSD.org
+COMMENT= Perl extension for HTML content extractor with scoring heuristics
+
+RUN_DEPENDS= \
+ p5-Class-Accessor-Lvalue>0:${PORTSDIR}/devel/p5-Class-Accessor-Lvalue \
+ p5-Exporter-Lite>0:${PORTSDIR}/devel/p5-Exporter-Lite \
+ p5-HTML-Parser>0:${PORTSDIR}/www/p5-HTML-Parser
+BUILD_DEPENDS= ${RUN_DEPENDS}
+
+PERL_CONFIGURE= yes
+
+MAN3= HTML::ExtractContent.3
+
+.include <bsd.port.mk>
diff --git a/www/p5-HTML-ExtractContent/distinfo b/www/p5-HTML-ExtractContent/distinfo
new file mode 100644
index 000000000000..1c41ee285394
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/distinfo
@@ -0,0 +1,3 @@
+MD5 (HTML-ExtractContent-0.05.tar.gz) = 95c0f8be7624a4e71de6b7b3a0fe362b
+SHA256 (HTML-ExtractContent-0.05.tar.gz) = 973950b6445b9644d71caa79787cb4753ed75ec296d31ee5d6df9494491ac85f
+SIZE (HTML-ExtractContent-0.05.tar.gz) = 25899
diff --git a/www/p5-HTML-ExtractContent/pkg-descr b/www/p5-HTML-ExtractContent/pkg-descr
new file mode 100644
index 000000000000..16155a386c75
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/pkg-descr
@@ -0,0 +1,11 @@
+HTML::ExtractContent is a module for extracting content from HTML with
+scoring heuristics.
+
+It guesses which block of HTML looks like content according to scores
+depending on the amount of punctuation marks and the lengths of non-tag
+texts.
+
+It also guesses whether content end in the block or continue to the next
+block.
+
+WWW: http://search.cpan.org/dist/HTML-ExtractContent/
diff --git a/www/p5-HTML-ExtractContent/pkg-plist b/www/p5-HTML-ExtractContent/pkg-plist
new file mode 100644
index 000000000000..b78c74786f45
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/pkg-plist
@@ -0,0 +1,5 @@
+%%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent/.packlist
+%%SITE_PERL%%/HTML/ExtractContent.pm
+%%SITE_PERL%%/HTML/ExtractContent/Util.pm
+@dirrm %%SITE_PERL%%/HTML/ExtractContent
+@dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent