aboutsummaryrefslogtreecommitdiffstats
path: root/www
diff options
context:
space:
mode:
authormadpilot <madpilot@FreeBSD.org>2017-08-01 00:22:20 +0800
committermadpilot <madpilot@FreeBSD.org>2017-08-01 00:22:20 +0800
commit3543dd2876d4e58da7d152f352b532b63be04ff2 (patch)
treea0997b2a358fcb83b310406d350d45ebba87304c /www
parenta358cf8362b93b6762c36bc4eccb6ef3f6828386 (diff)
downloadfreebsd-ports-gnome-3543dd2876d4e58da7d152f352b532b63be04ff2.tar.gz
freebsd-ports-gnome-3543dd2876d4e58da7d152f352b532b63be04ff2.tar.zst
freebsd-ports-gnome-3543dd2876d4e58da7d152f352b532b63be04ff2.zip
A fast implementation of the HTML 5 parsing spec for Python. Parsing
is done in C using a variant of the gumbo parser. The gumbo parse tree is then transformed into an lxml tree, also in C, yielding parse times that can be a thirtieth of the html5lib parse times. That is a speedup of 30x. This differs, for instance, from the gumbo python bindings, where the initial parsing is done in C but the transformation into the final tree is done in python. WWW: https://html5-parser.readthedocs.io/
Diffstat (limited to 'www')
-rw-r--r--www/Makefile1
-rw-r--r--www/py-html5-parser/Makefile19
-rw-r--r--www/py-html5-parser/distinfo3
-rw-r--r--www/py-html5-parser/pkg-descr9
4 files changed, 32 insertions, 0 deletions
diff --git a/www/Makefile b/www/Makefile
index fd489e29cefd..920491a62135 100644
--- a/www/Makefile
+++ b/www/Makefile
@@ -1668,6 +1668,7 @@
SUBDIR += py-horizon
SUBDIR += py-hpack
SUBDIR += py-html
+ SUBDIR += py-html5-parser
SUBDIR += py-html5lib
SUBDIR += py-http-parser
SUBDIR += py-httpie
diff --git a/www/py-html5-parser/Makefile b/www/py-html5-parser/Makefile
new file mode 100644
index 000000000000..5b545b340138
--- /dev/null
+++ b/www/py-html5-parser/Makefile
@@ -0,0 +1,19 @@
+# $FreeBSD$
+
+PORTNAME= html5-parser
+PORTVERSION= 0.4.3
+CATEGORIES= www python
+MASTER_SITES= CHEESESHOP
+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER= madpilot@FreeBSD.org
+COMMENT= Fast implementation of the HTML 5 parsing spec for Python
+
+LICENSE= APACHE20
+
+BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}lxml>=3.8.0:devel/py-lxml
+
+USES= pkgconfig python
+USE_PYTHON= autoplist distutils
+
+.include <bsd.port.mk>
diff --git a/www/py-html5-parser/distinfo b/www/py-html5-parser/distinfo
new file mode 100644
index 000000000000..7dbb045eef22
--- /dev/null
+++ b/www/py-html5-parser/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1501237401
+SHA256 (html5-parser-0.4.3.tar.gz) = dd5e3647c5919439c41600172ef96b5fdbf278028bd4000476f87412c4fb7b9c
+SIZE (html5-parser-0.4.3.tar.gz) = 261906
diff --git a/www/py-html5-parser/pkg-descr b/www/py-html5-parser/pkg-descr
new file mode 100644
index 000000000000..03b7267e4e1d
--- /dev/null
+++ b/www/py-html5-parser/pkg-descr
@@ -0,0 +1,9 @@
+A fast implementation of the HTML 5 parsing spec for Python. Parsing
+is done in C using a variant of the gumbo parser. The gumbo parse
+tree is then transformed into an lxml tree, also in C, yielding
+parse times that can be a thirtieth of the html5lib parse times.
+That is a speedup of 30x. This differs, for instance, from the gumbo
+python bindings, where the initial parsing is done in C but the
+transformation into the final tree is done in python.
+
+WWW: https://html5-parser.readthedocs.io/