diff options
-rw-r--r-- | www/Makefile | 1 | ||||
-rw-r--r-- | www/p5-HTML-ExtractContent/Makefile | 27 | ||||
-rw-r--r-- | www/p5-HTML-ExtractContent/distinfo | 3 | ||||
-rw-r--r-- | www/p5-HTML-ExtractContent/pkg-descr | 11 | ||||
-rw-r--r-- | www/p5-HTML-ExtractContent/pkg-plist | 5 |
5 files changed, 47 insertions, 0 deletions
diff --git a/www/Makefile b/www/Makefile index 29e211ca40aa..b0d26676cef4 100644 --- a/www/Makefile +++ b/www/Makefile @@ -831,6 +831,7 @@ SUBDIR += p5-HTML-Element-Library SUBDIR += p5-HTML-Embperl SUBDIR += p5-HTML-Encoding + SUBDIR += p5-HTML-ExtractContent SUBDIR += p5-HTML-FillInForm SUBDIR += p5-HTML-FillInForm-ForceUTF8 SUBDIR += p5-HTML-FormFu diff --git a/www/p5-HTML-ExtractContent/Makefile b/www/p5-HTML-ExtractContent/Makefile new file mode 100644 index 000000000000..504e59fef3e5 --- /dev/null +++ b/www/p5-HTML-ExtractContent/Makefile @@ -0,0 +1,27 @@ +# New ports collection makefile for: HTML::ExtractContent +# Date created: 05 Mar 2009 +# Whom: Jun Kuriyama <kuriyama@FreeBSD.org> +# +# $FreeBSD$ +# + +PORTNAME= HTML-ExtractContent +PORTVERSION= 0.05 +CATEGORIES= www perl5 +MASTER_SITES= CPAN +PKGNAMEPREFIX= p5- + +MAINTAINER= kuriyama@FreeBSD.org +COMMENT= Perl extension for HTML content extractor with scoring heuristics + +RUN_DEPENDS= \ + p5-Class-Accessor-Lvalue>0:${PORTSDIR}/devel/p5-Class-Accessor-Lvalue \ + p5-Exporter-Lite>0:${PORTSDIR}/devel/p5-Exporter-Lite \ + p5-HTML-Parser>0:${PORTSDIR}/www/p5-HTML-Parser +BUILD_DEPENDS= ${RUN_DEPENDS} + +PERL_CONFIGURE= yes + +MAN3= HTML::ExtractContent.3 + +.include <bsd.port.mk> diff --git a/www/p5-HTML-ExtractContent/distinfo b/www/p5-HTML-ExtractContent/distinfo new file mode 100644 index 000000000000..1c41ee285394 --- /dev/null +++ b/www/p5-HTML-ExtractContent/distinfo @@ -0,0 +1,3 @@ +MD5 (HTML-ExtractContent-0.05.tar.gz) = 95c0f8be7624a4e71de6b7b3a0fe362b +SHA256 (HTML-ExtractContent-0.05.tar.gz) = 973950b6445b9644d71caa79787cb4753ed75ec296d31ee5d6df9494491ac85f +SIZE (HTML-ExtractContent-0.05.tar.gz) = 25899 diff --git a/www/p5-HTML-ExtractContent/pkg-descr b/www/p5-HTML-ExtractContent/pkg-descr new file mode 100644 index 000000000000..16155a386c75 --- /dev/null +++ b/www/p5-HTML-ExtractContent/pkg-descr @@ -0,0 +1,11 @@ +HTML::ExtractContent is a module for extracting content from HTML with +scoring heuristics. + +It guesses which block of HTML looks like content according to scores +depending on the amount of punctuation marks and the lengths of non-tag +texts. + +It also guesses whether content end in the block or continue to the next +block. + +WWW: http://search.cpan.org/dist/HTML-ExtractContent/ diff --git a/www/p5-HTML-ExtractContent/pkg-plist b/www/p5-HTML-ExtractContent/pkg-plist new file mode 100644 index 000000000000..b78c74786f45 --- /dev/null +++ b/www/p5-HTML-ExtractContent/pkg-plist @@ -0,0 +1,5 @@ +%%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent/.packlist +%%SITE_PERL%%/HTML/ExtractContent.pm +%%SITE_PERL%%/HTML/ExtractContent/Util.pm +@dirrm %%SITE_PERL%%/HTML/ExtractContent +@dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent |