diff options
author | miwi <miwi@FreeBSD.org> | 2008-07-20 01:02:19 +0800 |
---|---|---|
committer | miwi <miwi@FreeBSD.org> | 2008-07-20 01:02:19 +0800 |
commit | 39ade7f9f8df18801a5a8756ddd3a28dbe4e78b3 (patch) | |
tree | d7ead849ed803bc410f6f40ea2b13cccf3144282 /www/jericho-html | |
parent | 4135feb8d2dcc5102619c89be3aa43f15a81cbd0 (diff) | |
download | freebsd-ports-gnome-39ade7f9f8df18801a5a8756ddd3a28dbe4e78b3.tar.gz freebsd-ports-gnome-39ade7f9f8df18801a5a8756ddd3a28dbe4e78b3.tar.zst freebsd-ports-gnome-39ade7f9f8df18801a5a8756ddd3a28dbe4e78b3.zip |
Jericho HTML Parser is a simple but powerful java library allowing
analysis and manipulation of parts of an HTML document, including
some common server-side tags, while reproducing verbatim any
unrecognised or invalid HTML.
It also provides high-level HTML form manipulation functions.
WWW: http://jerichohtml.sourceforge.net/doc/index.html
PR: ports/124770
Submitted by: Marcin Cieslak <saper at SYSTEM.PL>
Diffstat (limited to 'www/jericho-html')
-rw-r--r-- | www/jericho-html/Makefile | 52 | ||||
-rw-r--r-- | www/jericho-html/distinfo | 3 | ||||
-rw-r--r-- | www/jericho-html/files/patch-encoding | 15 | ||||
-rw-r--r-- | www/jericho-html/pkg-descr | 8 |
4 files changed, 78 insertions, 0 deletions
diff --git a/www/jericho-html/Makefile b/www/jericho-html/Makefile new file mode 100644 index 000000000000..4dc195625119 --- /dev/null +++ b/www/jericho-html/Makefile @@ -0,0 +1,52 @@ +# New ports collection makefile for: jerichohtml +# Date created: 2008-06-17 +# Whom: Marcin Cieslak <saper@SYSTEM.PL> +# +# $FreeBSD$ +# + +PORTNAME= jericho-html +PORTVERSION= 2.5 +CATEGORIES= www java +MASTER_SITES= SF +MASTER_SITE_SUBDIR= ${PORTNAME:S,-,,} + +MAINTAINER= saper@SYSTEM.PL +COMMENT= A java library to analyse and manipulate HTML + +USE_ZIP= yes +USE_JAVA= 1.3+ + +INTERFACES:= "compile-time-dependencies/slf4j-api-1.4.3.jar:\ + compile-time-dependencies/commons-logging-api-1.1.jar:\ + compile-time-dependencies/log4j-api-1.2.14.jar" + +PORTDOCS= api +PLIST_FILES+= %%JAVAJARDIR%%/${PORTNAME}.jar + +do-build: + (cd ${WRKSRC} && ${RM} -rf classes/* && ${JAVAC} \ + -classpath ${INTERFACES:S, ,,g} \ + -d classes src/java/au/id/jericho/lib/html/*.java \ + src/java/au/id/jericho/lib/html/nodoc/*.java) + ${JAR} -cf ${WRKSRC}/lib/${PORTNAME}.jar \ + -C ${WRKSRC}/classes . +.if !defined(NOPORTDOCS) + (cd ${WRKSRC} && ${RM} -rf doc/* && ${JAVADOC} -quiet \ + -windowtitle "Jericho HTML Parser ${PORTVERSION}" \ + -classpath ${INTERFACES:S, ,,g}:src/java:classes \ + -use -d ${WRKSRC}/doc/api \ + -subpackages au.id.jericho.lib.html \ + -exclude au.id.jericho.lib.html.nodoc \ + -noqualifier au.id.jericho.lib.html \ + -group "Core package" au.id.jericho.lib.html) +.endif + +do-install: + ${INSTALL_DATA} ${WRKSRC}/lib/${PORTNAME}.jar ${JAVAJARDIR} +.if !defined(NOPORTDOCS) + ${MKDIR} ${DOCSDIR} + (cd ${WRKSRC}/doc && ${FIND} api | ${CPIO} -pdmu ${DOCSDIR}) +.endif + +.include <bsd.port.mk> diff --git a/www/jericho-html/distinfo b/www/jericho-html/distinfo new file mode 100644 index 000000000000..280531763604 --- /dev/null +++ b/www/jericho-html/distinfo @@ -0,0 +1,3 @@ +MD5 (jericho-html-2.5.zip) = 64306d0eb82608e50496a680b319182d +SHA256 (jericho-html-2.5.zip) = 212b9e8b72f9787dfafd046e8716f0d04365afcd3f4d2fb293e69d5b90e456b4 +SIZE (jericho-html-2.5.zip) = 1456664 diff --git a/www/jericho-html/files/patch-encoding b/www/jericho-html/files/patch-encoding new file mode 100644 index 000000000000..3396a1440a73 --- /dev/null +++ b/www/jericho-html/files/patch-encoding @@ -0,0 +1,15 @@ +--- src/java/au/id/jericho/lib/html/StreamEncodingDetector.java.orig 2008-06-17 21:01:53.890292905 +0200 ++++ src/java/au/id/jericho/lib/html/StreamEncodingDetector.java 2008-06-17 21:02:43.940300330 +0200 +@@ -203,9 +203,9 @@ + // Assume the more likely case of four 8-bit characters <= U+00FF.
+ // Check whether it fits some common EBCDIC strings that might be found at the start of a document:
+ if (b1==0x4C) { // first character is EBCDIC '<' (ASCII 'L'), check a couple more characters before assuming EBCDIC encoding:
+- if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC ("Lo§”" in Windows-1252)
+- if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC ("LZÄÖ" in Windows-1252)
+- if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" ("LÈãÔ" in Windows-1252), or "<htm" ("Lˆ£”" in Windows-1252)
++ if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC ++ if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC ++ if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" or "<htm" + // although this is not an exhaustive check for EBCDIC, it is safer to assume a more common preliminary encoding if none of these conditions are met.
+ }
+ // Now confident that it is not EBCDIC, but some other 8-bit encoding.
diff --git a/www/jericho-html/pkg-descr b/www/jericho-html/pkg-descr new file mode 100644 index 000000000000..b38a2d771bd5 --- /dev/null +++ b/www/jericho-html/pkg-descr @@ -0,0 +1,8 @@ +Jericho HTML Parser is a simple but powerful java library allowing +analysis and manipulation of parts of an HTML document, including +some common server-side tags, while reproducing verbatim any +unrecognised or invalid HTML. + +It also provides high-level HTML form manipulation functions. + +WWW: http://jerichohtml.sourceforge.net/doc/index.html |