aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoramdmi3 <amdmi3@FreeBSD.org>2009-06-01 04:25:31 +0800
committeramdmi3 <amdmi3@FreeBSD.org>2009-06-01 04:25:31 +0800
commit50a0b0d3d9e3b16e973ccc848bc2a857af93916e (patch)
treea217d230ef51cc433f1226002d9429a5ae4dd83e
parent415ac356641baed2215f3eee94a8265609747205 (diff)
downloadfreebsd-ports-gnome-50a0b0d3d9e3b16e973ccc848bc2a857af93916e.tar.gz
freebsd-ports-gnome-50a0b0d3d9e3b16e973ccc848bc2a857af93916e.tar.zst
freebsd-ports-gnome-50a0b0d3d9e3b16e973ccc848bc2a857af93916e.zip
OCRopus(tm) is a state-of-the-art document analysis and OCR system,
featuring pluggable layout analysis, pluggable character recognition, statistical natural language modeling, and multi-lingual capabilities. WWW: http://sites.google.com/site/ocropus/ PR: 134718 Submitted by: Hiroto Kagotani <hiroto.kagotani@gmail.com>
-rw-r--r--graphics/Makefile1
-rw-r--r--graphics/ocropus/Makefile51
-rw-r--r--graphics/ocropus/distinfo3
-rw-r--r--graphics/ocropus/files/patch-Makefile.am11
-rw-r--r--graphics/ocropus/files/patch-configure.ac20
-rw-r--r--graphics/ocropus/files/patch-ocr-utils__narray-io.h10
-rw-r--r--graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua96
-rw-r--r--graphics/ocropus/pkg-descr5
-rw-r--r--graphics/ocropus/pkg-plist119
9 files changed, 316 insertions, 0 deletions
diff --git a/graphics/Makefile b/graphics/Makefile
index 4151450724be..7ae95ccf10b0 100644
--- a/graphics/Makefile
+++ b/graphics/Makefile
@@ -524,6 +524,7 @@
SUBDIR += ocaml-lablgl
SUBDIR += ocrad
SUBDIR += ocre
+ SUBDIR += ocropus
SUBDIR += oglext
SUBDIR += ogre3d
SUBDIR += openclipart
diff --git a/graphics/ocropus/Makefile b/graphics/ocropus/Makefile
new file mode 100644
index 000000000000..5ecb3206249d
--- /dev/null
+++ b/graphics/ocropus/Makefile
@@ -0,0 +1,51 @@
+# New ports collection makefile for: OCRopus
+# Date created: 2009-05-20
+# Whom: Hiroto Kagotani <hiroto.kagotani@gmail.com>
+#
+# $FreeBSD$
+#
+
+PORTNAME= ocropus
+PORTVERSION= 0.3.1
+CATEGORIES= graphics
+MASTER_SITES= ${MASTER_SITE_GOOGLE_CODE}
+
+MAINTAINER= hiroto.kagotani@gmail.com
+COMMENT= The OCRopus(tm) open source document analysis and OCR system
+
+BUILD_DEPENDS= ${LOCALBASE}/lib/libtesseract_full.a:${PORTSDIR}/graphics/tesseract
+LIB_DEPENDS= png.5:${PORTSDIR}/graphics/png \
+ jpeg.9:${PORTSDIR}/graphics/jpeg \
+ tiff.4:${PORTSDIR}/graphics/tiff \
+ iulib.0:${PORTSDIR}/graphics/iulib
+
+WRKSRC= ${WRKDIR}/ocropus-0.3
+
+OPTIONS= SDL "Enable SDL for graphical debugging" off \
+ LEPTONICA "Enable Leptonica image analysis" off
+
+MAKE_JOBS_UNSAFE= yes
+USE_AUTOTOOLS= aclocal:110 automake:110 autoconf:262
+USE_GMAKE= yes
+CONFIGURE_ARGS= --without-fst
+CONFIGURE_ENV= CPPFLAGS=-I${LOCALBASE}/include LDFLAGS=-L${LOCALBASE}/lib
+
+.include <bsd.port.pre.mk>
+
+.if !defined(WITH_SDL)
+CONFIGURE_ARGS+=--without-SDL
+.else
+USE_SDL= sdl
+.endif
+
+.if !defined(WITH_LEPTONICA)
+CONFIGURE_ARGS+=--without-leptonica
+PLIST_SUB+= LEPTONICA="@comment "
+.else
+LIB_DEPENDS+= lept.0:${PORTSDIR}/graphics/leptonlib
+PLIST_SUB+= LEPTONICA=""
+.endif
+
+run-autotools: run-autotools-aclocal run-autotools-automake run-autotools-autoconf
+
+.include <bsd.port.post.mk>
diff --git a/graphics/ocropus/distinfo b/graphics/ocropus/distinfo
new file mode 100644
index 000000000000..4cb3801b19eb
--- /dev/null
+++ b/graphics/ocropus/distinfo
@@ -0,0 +1,3 @@
+MD5 (ocropus-0.3.1.tar.gz) = 2a1b66419ae69ef031d5e6269db15bb5
+SHA256 (ocropus-0.3.1.tar.gz) = ee02d209a1c823090f0bceba7ec4a884029f66fc44147a2d34922f8148a699df
+SIZE (ocropus-0.3.1.tar.gz) = 12061574
diff --git a/graphics/ocropus/files/patch-Makefile.am b/graphics/ocropus/files/patch-Makefile.am
new file mode 100644
index 000000000000..37b04f6fd88a
--- /dev/null
+++ b/graphics/ocropus/files/patch-Makefile.am
@@ -0,0 +1,11 @@
+--- ./Makefile.am.orig 2008-10-16 05:40:47.000000000 +0900
++++ ./Makefile.am 2009-05-26 21:25:34.000000000 +0900
+@@ -110,7 +110,7 @@
+
+ # run check-style everytime and give a hint about make check
+ all:
+- $(srcdir)/utilities/check-style -f $(srcdir)
++# $(srcdir)/utilities/check-style -f $(srcdir)
+ @echo
+ @echo "Use 'make check' to run tests!"
+ @echo
diff --git a/graphics/ocropus/files/patch-configure.ac b/graphics/ocropus/files/patch-configure.ac
new file mode 100644
index 000000000000..1995713ab60d
--- /dev/null
+++ b/graphics/ocropus/files/patch-configure.ac
@@ -0,0 +1,20 @@
+--- ./configure.ac.orig 2008-10-16 05:40:35.000000000 +0900
++++ ./configure.ac 2009-05-26 21:22:11.000000000 +0900
+@@ -116,6 +116,8 @@
+ AC_MSG_ERROR([no TIFFOpen; please install libtiff4-dev or equivalent]))
+
+ AC_LANG_CPLUSPLUS
++# should require CXXCPP before conditional AC_CHECK_HEADER
++AC_PROG_CXXCPP
+
+ # --- iulib (required) ---
+ # NB: we can only use functions with C linkage here
+@@ -180,7 +182,7 @@
+ LDFLAGS="$LDFLAGS -L$leptheaders/../../lib"
+ AC_CHECK_LIB(lept,pixCreate,,AC_MSG_ERROR([leptonica not found! Choose --without-leptonica if you don't want to use it.]))
+ fi
+-AM_CONDITIONAL([use_leptonica], [test x$use_leptonica == xyes])
++AM_CONDITIONAL([use_leptonica], [test x$use_leptonica = xyes])
+
+
+ # --- SDL (optional for graphical debugging in ocroscript) ---
diff --git a/graphics/ocropus/files/patch-ocr-utils__narray-io.h b/graphics/ocropus/files/patch-ocr-utils__narray-io.h
new file mode 100644
index 000000000000..91adc25b6442
--- /dev/null
+++ b/graphics/ocropus/files/patch-ocr-utils__narray-io.h
@@ -0,0 +1,10 @@
+--- ./ocr-utils/narray-io.h.orig 2008-10-16 05:40:46.000000000 +0900
++++ ./ocr-utils/narray-io.h 2009-05-26 21:22:11.000000000 +0900
+@@ -31,6 +31,7 @@
+
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <stdint.h>
+ #include "colib.h"
+
+ namespace ocropus {
diff --git a/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua b/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua
new file mode 100644
index 000000000000..d428d7d718cb
--- /dev/null
+++ b/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua
@@ -0,0 +1,96 @@
+--- ./ocroscript/scripts/rec-tess-complete.lua.orig 2008-10-16 05:40:35.000000000 +0900
++++ ./ocroscript/scripts/rec-tess-complete.lua 2009-05-26 21:22:11.000000000 +0900
+@@ -20,11 +20,20 @@
+ -- Reviewer:
+ -- Primary Repository:
+ -- Web Sites: www.iupr.org, www.dfki.de, www.ocropus.org
++--
++-- Patch applied:
++-- http://code.google.com/p/ocropus/issues/detail?id=137
+
+
+ require 'lib.util'
+ require 'lib.headings'
+ require 'lib.paragraphs'
++require 'lib.path'
++require 'lib.hocr'
++import_all(ocr)
++import_all(graphics)
++import_all(iulib)
++import_all(nustring)
+
+ remove_hyphens = true
+
+@@ -74,7 +83,7 @@
+ -- RecognizedPage is a transport object of tesseract_recognize_blockwise().
+ -- This function will convert it to a DOM.
+ function convert_RecognizedPage_to_DOM(p, image_path, keep_char_boxes)
+- page_DOM = get_page_DOM(p, image_path)
++ page_DOM = hocr.get_page_DOM(p, image_path)
+ for i = 0, p:linesCount() - 1 do
+ local bbox = p:bbox(i)
+ local text = nustring()
+@@ -85,13 +94,12 @@
+ bboxes = narray_to_table(r)
+ end
+ p:text(text, i)
+- line_DOM = get_line_DOM(bbox, text, bboxes, p)
++ line_DOM = hocr.get_line_DOM(bbox, text, bboxes, p)
+ table.insert(page_DOM, line_DOM)
+ end
+ return page_DOM
+ end
+
+-
+ function get_images_DOM(tiseg_image, html_path, images_dir, page_image)
+ os.execute('mkdir -p "'..images_dir..'"')
+ local rects = rectarray()
+@@ -102,12 +110,11 @@
+ local dom = {{tag = 'hr', size = '0'}}
+ for i = 0, rects:length() - 1 do
+ local src = images_dir .. ('/%04d.png'):format(i + 1)
+- local img_path = util.combine_paths(html_path, src)
+ img = bytearray()
+ r = rects:at(i)
+ extract_subimage(img, page_image, r.x0, r.y0, r.x1, r.y1)
+- write_image_gray(img_path, img)
+- local props = {bbox = bbox_to_string(page_image, r)}
++ iulib.write_image_gray(src, img)
++ local props = {bbox = hocr.bbox_to_string(page_image, r)}
+ local link = {tag = 'a', href=src}
+ local width = r.x1 - r.x0
+ local height = r.y1 -r.y0
+@@ -119,7 +126,7 @@
+ height = "200px"
+ end
+ local tag = {tag = 'img', src = src, width=width, height=height,
+- class = 'ocr_image', title = hocr_properties_attribute(props)}
++ class = 'ocr_image', title = hocr.properties_attribute(props)}
+ table.insert(link, tag)
+ table.insert(dom, link)
+ table.insert(dom, '\n')
+@@ -146,8 +153,8 @@
+ get_nontext_mask(nontext_mask,tiseg_image)
+ remove_masked_region(text_image,nontext_mask,clean_image)
+ segmenter:segment(page_segmentation,text_image)
+- local p = RecognizedPage()
+- tesseract_recognize_blockwise(p, page_image, page_segmentation)
++ local p = tesseract.RecognizedPage()
++ tesseract.recognize_blockwise(p, page_image, page_segmentation)
+ page_DOM = convert_RecognizedPage_to_DOM(p, pages:getFileName(),
+ option("charboxes"))
+ page_DOM = detect_headings(page_DOM, page_image)
+@@ -157,10 +164,10 @@
+ table.insert(body_DOM, page_DOM)
+ end
+ --end
+-doc_DOM = get_html_tag()
+-table.insert(doc_DOM, get_head_tag())
++doc_DOM = hocr.get_html_tag()
++table.insert(doc_DOM, hocr.get_head_tag())
+ table.insert(doc_DOM, '\n')
+ table.insert(doc_DOM, body_DOM)
+ file = io.open(output_file, 'w')
+-dump_DOM(file, doc_DOM, html_preamble)
++hocr.dump(file, doc_DOM, html_preamble)
+ file:close()
diff --git a/graphics/ocropus/pkg-descr b/graphics/ocropus/pkg-descr
new file mode 100644
index 000000000000..70b8a316431a
--- /dev/null
+++ b/graphics/ocropus/pkg-descr
@@ -0,0 +1,5 @@
+OCRopus(tm) is a state-of-the-art document analysis and OCR system,
+featuring pluggable layout analysis, pluggable character recognition,
+statistical natural language modeling, and multi-lingual capabilities.
+
+WWW: http://sites.google.com/site/ocropus/
diff --git a/graphics/ocropus/pkg-plist b/graphics/ocropus/pkg-plist
new file mode 100644
index 000000000000..c1fa889c9bd0
--- /dev/null
+++ b/graphics/ocropus/pkg-plist
@@ -0,0 +1,119 @@
+bin/ocroscript
+lib/libocropus.a
+lib/libocroscript.a
+include/ocropus/extern.h
+include/ocropus/read_image.h
+include/ocropus/function.h
+include/ocropus/defs.h
+include/ocropus/voronoi-ocropus.h
+include/ocropus/const.h
+include/ocropus/beam-search.h
+include/ocropus/langmod-shortest-path.h
+include/ocropus/lattice.h
+include/ocropus/ocr-binarize-sauvola.h
+include/ocropus/ocr-binarize-otsu.h
+include/ocropus/grouping.h
+include/ocropus/make-garbage.h
+include/ocropus/charlib.h
+include/ocropus/feature-extractor.h
+include/ocropus/confusion-matrix.h
+include/ocropus/bpnet.h
+include/ocropus/additions.h
+include/ocropus/feature-stream.h
+include/ocropus/mnist.h
+include/ocropus/classmap.h
+include/ocropus/classify-chars.h
+include/ocropus/bpnetline.h
+include/ocropus/ocr-deskew-rast.h
+include/ocropus/ocr-noisefilter.h
+include/ocropus/ocr-doc-clean.h
+include/ocropus/ocr-doc-clean-concomp.h
+include/ocropus/ocr-pageframe-rast.h
+include/ocropus/ocr-whitespace-cover.h
+include/ocropus/ocr-char-stats.h
+include/ocropus/ocr-layout-rast.h
+include/ocropus/line-info.h
+include/ocropus/ocr-extract-gutters.h
+include/ocropus/ocr-ctextline-rast.h
+include/ocropus/ocr-ctextline-rast-extended.h
+include/ocropus/ocr-classify-zones.h
+include/ocropus/ocr-pageseg-wcuts.h
+include/ocropus/ocr-text-image-seg.h
+include/ocropus/log-reg-data.h
+include/ocropus/ocr-pageseg-xycut.h
+include/ocropus/ocr-word-segmentation.h
+include/ocropus/ocrcomponents.h
+include/ocropus/kmeans.h
+include/ocropus/glinerec.h
+include/ocropus/idmap.h
+include/ocropus/narray-io.h
+include/ocropus/ocr-segmentations.h
+include/ocropus/eigens.h
+include/ocropus/sysutil.h
+include/ocropus/logger.h
+include/ocropus/grouper.h
+include/ocropus/segmentation.h
+include/ocropus/pages.h
+include/ocropus/didegrade.h
+include/ocropus/lines.h
+include/ocropus/regionextractor.h
+include/ocropus/ocr-utils.h
+include/ocropus/resource-path.h
+include/ocropus/queue.h
+include/ocropus/grid.h
+include/ocropus/seg-eval.h
+include/ocropus/enumerator.h
+include/ocropus/editdist.h
+include/ocropus/tesseract.h
+include/ocropus/recognized-page.h
+%%LEPTONICA%%include/ocropus/ocr-text-image-seg-leptonica.h
+%%DATADIR%%/models/neural-net-file.nn
+%%DATADIR%%/words/en-us
+%%DATADIR%%/scripts/lib/align.lua
+%%DATADIR%%/scripts/lib/datasets.lua
+%%DATADIR%%/scripts/lib/editdist.lua
+%%DATADIR%%/scripts/lib/getopt.lua
+%%DATADIR%%/scripts/lib/headings.lua
+%%DATADIR%%/scripts/lib/hocr.lua
+%%DATADIR%%/scripts/lib/paragraphs.lua
+%%DATADIR%%/scripts/lib/path.lua
+%%DATADIR%%/scripts/lib/util.lua
+%%DATADIR%%/scripts/lib/xml.lua
+%%DATADIR%%/scripts/align-lines-wordwise.lua
+%%DATADIR%%/scripts/align-transcription.lua
+%%DATADIR%%/scripts/align.lua
+%%DATADIR%%/scripts/build-ngram-model.lua
+%%DATADIR%%/scripts/check-train-valid-bpnet-feature.lua
+%%DATADIR%%/scripts/degrade.lua
+%%DATADIR%%/scripts/deskew.lua
+%%DATADIR%%/scripts/editdist.lua
+%%DATADIR%%/scripts/erode3.lua
+%%DATADIR%%/scripts/eval-bpnet-on-words.lua
+%%DATADIR%%/scripts/eval-editdist-layout.lua
+%%DATADIR%%/scripts/eval-on-word-list.lua
+%%DATADIR%%/scripts/hocr-to-text.lua
+%%DATADIR%%/scripts/line-clean.lua
+%%DATADIR%%/scripts/matra-clipping.lua
+%%DATADIR%%/scripts/rec-bpnet-isolated.lua
+%%DATADIR%%/scripts/rec-bpnet.lua
+%%DATADIR%%/scripts/rec-guided.lua
+%%DATADIR%%/scripts/rec-line.lua
+%%DATADIR%%/scripts/rec-ltess.lua
+%%DATADIR%%/scripts/rec-minimal.lua
+%%DATADIR%%/scripts/rec-tess-complete.lua
+%%DATADIR%%/scripts/recognize.lua
+%%DATADIR%%/scripts/reflow.lua
+%%DATADIR%%/scripts/sauvola.lua
+%%DATADIR%%/scripts/segment-line.lua
+%%DATADIR%%/scripts/show.lua
+%%DATADIR%%/scripts/showseg.lua
+%%DATADIR%%/scripts/strict.lua
+%%DATADIR%%/scripts/text-to-hocr.lua
+%%DATADIR%%/scripts/train-bpnet-isolated.lua
+%%DATADIR%%/scripts/train-bpnet-lines.lua
+@dirrm include/ocropus
+@dirrm %%DATADIR%%/models
+@dirrm %%DATADIR%%/scripts/lib
+@dirrmtry %%DATADIR%%/scripts
+@dirrm %%DATADIR%%/words
+@dirrmtry %%DATADIR%%