diff options
author | amdmi3 <amdmi3@FreeBSD.org> | 2009-06-01 04:25:31 +0800 |
---|---|---|
committer | amdmi3 <amdmi3@FreeBSD.org> | 2009-06-01 04:25:31 +0800 |
commit | 50a0b0d3d9e3b16e973ccc848bc2a857af93916e (patch) | |
tree | a217d230ef51cc433f1226002d9429a5ae4dd83e | |
parent | 415ac356641baed2215f3eee94a8265609747205 (diff) | |
download | freebsd-ports-gnome-50a0b0d3d9e3b16e973ccc848bc2a857af93916e.tar.gz freebsd-ports-gnome-50a0b0d3d9e3b16e973ccc848bc2a857af93916e.tar.zst freebsd-ports-gnome-50a0b0d3d9e3b16e973ccc848bc2a857af93916e.zip |
OCRopus(tm) is a state-of-the-art document analysis and OCR system,
featuring pluggable layout analysis, pluggable character recognition,
statistical natural language modeling, and multi-lingual capabilities.
WWW: http://sites.google.com/site/ocropus/
PR: 134718
Submitted by: Hiroto Kagotani <hiroto.kagotani@gmail.com>
-rw-r--r-- | graphics/Makefile | 1 | ||||
-rw-r--r-- | graphics/ocropus/Makefile | 51 | ||||
-rw-r--r-- | graphics/ocropus/distinfo | 3 | ||||
-rw-r--r-- | graphics/ocropus/files/patch-Makefile.am | 11 | ||||
-rw-r--r-- | graphics/ocropus/files/patch-configure.ac | 20 | ||||
-rw-r--r-- | graphics/ocropus/files/patch-ocr-utils__narray-io.h | 10 | ||||
-rw-r--r-- | graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua | 96 | ||||
-rw-r--r-- | graphics/ocropus/pkg-descr | 5 | ||||
-rw-r--r-- | graphics/ocropus/pkg-plist | 119 |
9 files changed, 316 insertions, 0 deletions
diff --git a/graphics/Makefile b/graphics/Makefile index 4151450724be..7ae95ccf10b0 100644 --- a/graphics/Makefile +++ b/graphics/Makefile @@ -524,6 +524,7 @@ SUBDIR += ocaml-lablgl SUBDIR += ocrad SUBDIR += ocre + SUBDIR += ocropus SUBDIR += oglext SUBDIR += ogre3d SUBDIR += openclipart diff --git a/graphics/ocropus/Makefile b/graphics/ocropus/Makefile new file mode 100644 index 000000000000..5ecb3206249d --- /dev/null +++ b/graphics/ocropus/Makefile @@ -0,0 +1,51 @@ +# New ports collection makefile for: OCRopus +# Date created: 2009-05-20 +# Whom: Hiroto Kagotani <hiroto.kagotani@gmail.com> +# +# $FreeBSD$ +# + +PORTNAME= ocropus +PORTVERSION= 0.3.1 +CATEGORIES= graphics +MASTER_SITES= ${MASTER_SITE_GOOGLE_CODE} + +MAINTAINER= hiroto.kagotani@gmail.com +COMMENT= The OCRopus(tm) open source document analysis and OCR system + +BUILD_DEPENDS= ${LOCALBASE}/lib/libtesseract_full.a:${PORTSDIR}/graphics/tesseract +LIB_DEPENDS= png.5:${PORTSDIR}/graphics/png \ + jpeg.9:${PORTSDIR}/graphics/jpeg \ + tiff.4:${PORTSDIR}/graphics/tiff \ + iulib.0:${PORTSDIR}/graphics/iulib + +WRKSRC= ${WRKDIR}/ocropus-0.3 + +OPTIONS= SDL "Enable SDL for graphical debugging" off \ + LEPTONICA "Enable Leptonica image analysis" off + +MAKE_JOBS_UNSAFE= yes +USE_AUTOTOOLS= aclocal:110 automake:110 autoconf:262 +USE_GMAKE= yes +CONFIGURE_ARGS= --without-fst +CONFIGURE_ENV= CPPFLAGS=-I${LOCALBASE}/include LDFLAGS=-L${LOCALBASE}/lib + +.include <bsd.port.pre.mk> + +.if !defined(WITH_SDL) +CONFIGURE_ARGS+=--without-SDL +.else +USE_SDL= sdl +.endif + +.if !defined(WITH_LEPTONICA) +CONFIGURE_ARGS+=--without-leptonica +PLIST_SUB+= LEPTONICA="@comment " +.else +LIB_DEPENDS+= lept.0:${PORTSDIR}/graphics/leptonlib +PLIST_SUB+= LEPTONICA="" +.endif + +run-autotools: run-autotools-aclocal run-autotools-automake run-autotools-autoconf + +.include <bsd.port.post.mk> diff --git a/graphics/ocropus/distinfo b/graphics/ocropus/distinfo new file mode 100644 index 000000000000..4cb3801b19eb --- /dev/null +++ b/graphics/ocropus/distinfo @@ -0,0 +1,3 @@ +MD5 (ocropus-0.3.1.tar.gz) = 2a1b66419ae69ef031d5e6269db15bb5 +SHA256 (ocropus-0.3.1.tar.gz) = ee02d209a1c823090f0bceba7ec4a884029f66fc44147a2d34922f8148a699df +SIZE (ocropus-0.3.1.tar.gz) = 12061574 diff --git a/graphics/ocropus/files/patch-Makefile.am b/graphics/ocropus/files/patch-Makefile.am new file mode 100644 index 000000000000..37b04f6fd88a --- /dev/null +++ b/graphics/ocropus/files/patch-Makefile.am @@ -0,0 +1,11 @@ +--- ./Makefile.am.orig 2008-10-16 05:40:47.000000000 +0900 ++++ ./Makefile.am 2009-05-26 21:25:34.000000000 +0900 +@@ -110,7 +110,7 @@ + + # run check-style everytime and give a hint about make check + all: +- $(srcdir)/utilities/check-style -f $(srcdir) ++# $(srcdir)/utilities/check-style -f $(srcdir) + @echo + @echo "Use 'make check' to run tests!" + @echo diff --git a/graphics/ocropus/files/patch-configure.ac b/graphics/ocropus/files/patch-configure.ac new file mode 100644 index 000000000000..1995713ab60d --- /dev/null +++ b/graphics/ocropus/files/patch-configure.ac @@ -0,0 +1,20 @@ +--- ./configure.ac.orig 2008-10-16 05:40:35.000000000 +0900 ++++ ./configure.ac 2009-05-26 21:22:11.000000000 +0900 +@@ -116,6 +116,8 @@ + AC_MSG_ERROR([no TIFFOpen; please install libtiff4-dev or equivalent])) + + AC_LANG_CPLUSPLUS ++# should require CXXCPP before conditional AC_CHECK_HEADER ++AC_PROG_CXXCPP + + # --- iulib (required) --- + # NB: we can only use functions with C linkage here +@@ -180,7 +182,7 @@ + LDFLAGS="$LDFLAGS -L$leptheaders/../../lib" + AC_CHECK_LIB(lept,pixCreate,,AC_MSG_ERROR([leptonica not found! Choose --without-leptonica if you don't want to use it.])) + fi +-AM_CONDITIONAL([use_leptonica], [test x$use_leptonica == xyes]) ++AM_CONDITIONAL([use_leptonica], [test x$use_leptonica = xyes]) + + + # --- SDL (optional for graphical debugging in ocroscript) --- diff --git a/graphics/ocropus/files/patch-ocr-utils__narray-io.h b/graphics/ocropus/files/patch-ocr-utils__narray-io.h new file mode 100644 index 000000000000..91adc25b6442 --- /dev/null +++ b/graphics/ocropus/files/patch-ocr-utils__narray-io.h @@ -0,0 +1,10 @@ +--- ./ocr-utils/narray-io.h.orig 2008-10-16 05:40:46.000000000 +0900 ++++ ./ocr-utils/narray-io.h 2009-05-26 21:22:11.000000000 +0900 +@@ -31,6 +31,7 @@ + + #include <stdio.h> + #include <stdlib.h> ++#include <stdint.h> + #include "colib.h" + + namespace ocropus { diff --git a/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua b/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua new file mode 100644 index 000000000000..d428d7d718cb --- /dev/null +++ b/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua @@ -0,0 +1,96 @@ +--- ./ocroscript/scripts/rec-tess-complete.lua.orig 2008-10-16 05:40:35.000000000 +0900 ++++ ./ocroscript/scripts/rec-tess-complete.lua 2009-05-26 21:22:11.000000000 +0900 +@@ -20,11 +20,20 @@ + -- Reviewer: + -- Primary Repository: + -- Web Sites: www.iupr.org, www.dfki.de, www.ocropus.org ++-- ++-- Patch applied: ++-- http://code.google.com/p/ocropus/issues/detail?id=137 + + + require 'lib.util' + require 'lib.headings' + require 'lib.paragraphs' ++require 'lib.path' ++require 'lib.hocr' ++import_all(ocr) ++import_all(graphics) ++import_all(iulib) ++import_all(nustring) + + remove_hyphens = true + +@@ -74,7 +83,7 @@ + -- RecognizedPage is a transport object of tesseract_recognize_blockwise(). + -- This function will convert it to a DOM. + function convert_RecognizedPage_to_DOM(p, image_path, keep_char_boxes) +- page_DOM = get_page_DOM(p, image_path) ++ page_DOM = hocr.get_page_DOM(p, image_path) + for i = 0, p:linesCount() - 1 do + local bbox = p:bbox(i) + local text = nustring() +@@ -85,13 +94,12 @@ + bboxes = narray_to_table(r) + end + p:text(text, i) +- line_DOM = get_line_DOM(bbox, text, bboxes, p) ++ line_DOM = hocr.get_line_DOM(bbox, text, bboxes, p) + table.insert(page_DOM, line_DOM) + end + return page_DOM + end + +- + function get_images_DOM(tiseg_image, html_path, images_dir, page_image) + os.execute('mkdir -p "'..images_dir..'"') + local rects = rectarray() +@@ -102,12 +110,11 @@ + local dom = {{tag = 'hr', size = '0'}} + for i = 0, rects:length() - 1 do + local src = images_dir .. ('/%04d.png'):format(i + 1) +- local img_path = util.combine_paths(html_path, src) + img = bytearray() + r = rects:at(i) + extract_subimage(img, page_image, r.x0, r.y0, r.x1, r.y1) +- write_image_gray(img_path, img) +- local props = {bbox = bbox_to_string(page_image, r)} ++ iulib.write_image_gray(src, img) ++ local props = {bbox = hocr.bbox_to_string(page_image, r)} + local link = {tag = 'a', href=src} + local width = r.x1 - r.x0 + local height = r.y1 -r.y0 +@@ -119,7 +126,7 @@ + height = "200px" + end + local tag = {tag = 'img', src = src, width=width, height=height, +- class = 'ocr_image', title = hocr_properties_attribute(props)} ++ class = 'ocr_image', title = hocr.properties_attribute(props)} + table.insert(link, tag) + table.insert(dom, link) + table.insert(dom, '\n') +@@ -146,8 +153,8 @@ + get_nontext_mask(nontext_mask,tiseg_image) + remove_masked_region(text_image,nontext_mask,clean_image) + segmenter:segment(page_segmentation,text_image) +- local p = RecognizedPage() +- tesseract_recognize_blockwise(p, page_image, page_segmentation) ++ local p = tesseract.RecognizedPage() ++ tesseract.recognize_blockwise(p, page_image, page_segmentation) + page_DOM = convert_RecognizedPage_to_DOM(p, pages:getFileName(), + option("charboxes")) + page_DOM = detect_headings(page_DOM, page_image) +@@ -157,10 +164,10 @@ + table.insert(body_DOM, page_DOM) + end + --end +-doc_DOM = get_html_tag() +-table.insert(doc_DOM, get_head_tag()) ++doc_DOM = hocr.get_html_tag() ++table.insert(doc_DOM, hocr.get_head_tag()) + table.insert(doc_DOM, '\n') + table.insert(doc_DOM, body_DOM) + file = io.open(output_file, 'w') +-dump_DOM(file, doc_DOM, html_preamble) ++hocr.dump(file, doc_DOM, html_preamble) + file:close() diff --git a/graphics/ocropus/pkg-descr b/graphics/ocropus/pkg-descr new file mode 100644 index 000000000000..70b8a316431a --- /dev/null +++ b/graphics/ocropus/pkg-descr @@ -0,0 +1,5 @@ +OCRopus(tm) is a state-of-the-art document analysis and OCR system, +featuring pluggable layout analysis, pluggable character recognition, +statistical natural language modeling, and multi-lingual capabilities. + +WWW: http://sites.google.com/site/ocropus/ diff --git a/graphics/ocropus/pkg-plist b/graphics/ocropus/pkg-plist new file mode 100644 index 000000000000..c1fa889c9bd0 --- /dev/null +++ b/graphics/ocropus/pkg-plist @@ -0,0 +1,119 @@ +bin/ocroscript +lib/libocropus.a +lib/libocroscript.a +include/ocropus/extern.h +include/ocropus/read_image.h +include/ocropus/function.h +include/ocropus/defs.h +include/ocropus/voronoi-ocropus.h +include/ocropus/const.h +include/ocropus/beam-search.h +include/ocropus/langmod-shortest-path.h +include/ocropus/lattice.h +include/ocropus/ocr-binarize-sauvola.h +include/ocropus/ocr-binarize-otsu.h +include/ocropus/grouping.h +include/ocropus/make-garbage.h +include/ocropus/charlib.h +include/ocropus/feature-extractor.h +include/ocropus/confusion-matrix.h +include/ocropus/bpnet.h +include/ocropus/additions.h +include/ocropus/feature-stream.h +include/ocropus/mnist.h +include/ocropus/classmap.h +include/ocropus/classify-chars.h +include/ocropus/bpnetline.h +include/ocropus/ocr-deskew-rast.h +include/ocropus/ocr-noisefilter.h +include/ocropus/ocr-doc-clean.h +include/ocropus/ocr-doc-clean-concomp.h +include/ocropus/ocr-pageframe-rast.h +include/ocropus/ocr-whitespace-cover.h +include/ocropus/ocr-char-stats.h +include/ocropus/ocr-layout-rast.h +include/ocropus/line-info.h +include/ocropus/ocr-extract-gutters.h +include/ocropus/ocr-ctextline-rast.h +include/ocropus/ocr-ctextline-rast-extended.h +include/ocropus/ocr-classify-zones.h +include/ocropus/ocr-pageseg-wcuts.h +include/ocropus/ocr-text-image-seg.h +include/ocropus/log-reg-data.h +include/ocropus/ocr-pageseg-xycut.h +include/ocropus/ocr-word-segmentation.h +include/ocropus/ocrcomponents.h +include/ocropus/kmeans.h +include/ocropus/glinerec.h +include/ocropus/idmap.h +include/ocropus/narray-io.h +include/ocropus/ocr-segmentations.h +include/ocropus/eigens.h +include/ocropus/sysutil.h +include/ocropus/logger.h +include/ocropus/grouper.h +include/ocropus/segmentation.h +include/ocropus/pages.h +include/ocropus/didegrade.h +include/ocropus/lines.h +include/ocropus/regionextractor.h +include/ocropus/ocr-utils.h +include/ocropus/resource-path.h +include/ocropus/queue.h +include/ocropus/grid.h +include/ocropus/seg-eval.h +include/ocropus/enumerator.h +include/ocropus/editdist.h +include/ocropus/tesseract.h +include/ocropus/recognized-page.h +%%LEPTONICA%%include/ocropus/ocr-text-image-seg-leptonica.h +%%DATADIR%%/models/neural-net-file.nn +%%DATADIR%%/words/en-us +%%DATADIR%%/scripts/lib/align.lua +%%DATADIR%%/scripts/lib/datasets.lua +%%DATADIR%%/scripts/lib/editdist.lua +%%DATADIR%%/scripts/lib/getopt.lua +%%DATADIR%%/scripts/lib/headings.lua +%%DATADIR%%/scripts/lib/hocr.lua +%%DATADIR%%/scripts/lib/paragraphs.lua +%%DATADIR%%/scripts/lib/path.lua +%%DATADIR%%/scripts/lib/util.lua +%%DATADIR%%/scripts/lib/xml.lua +%%DATADIR%%/scripts/align-lines-wordwise.lua +%%DATADIR%%/scripts/align-transcription.lua +%%DATADIR%%/scripts/align.lua +%%DATADIR%%/scripts/build-ngram-model.lua +%%DATADIR%%/scripts/check-train-valid-bpnet-feature.lua +%%DATADIR%%/scripts/degrade.lua +%%DATADIR%%/scripts/deskew.lua +%%DATADIR%%/scripts/editdist.lua +%%DATADIR%%/scripts/erode3.lua +%%DATADIR%%/scripts/eval-bpnet-on-words.lua +%%DATADIR%%/scripts/eval-editdist-layout.lua +%%DATADIR%%/scripts/eval-on-word-list.lua +%%DATADIR%%/scripts/hocr-to-text.lua +%%DATADIR%%/scripts/line-clean.lua +%%DATADIR%%/scripts/matra-clipping.lua +%%DATADIR%%/scripts/rec-bpnet-isolated.lua +%%DATADIR%%/scripts/rec-bpnet.lua +%%DATADIR%%/scripts/rec-guided.lua +%%DATADIR%%/scripts/rec-line.lua +%%DATADIR%%/scripts/rec-ltess.lua +%%DATADIR%%/scripts/rec-minimal.lua +%%DATADIR%%/scripts/rec-tess-complete.lua +%%DATADIR%%/scripts/recognize.lua +%%DATADIR%%/scripts/reflow.lua +%%DATADIR%%/scripts/sauvola.lua +%%DATADIR%%/scripts/segment-line.lua +%%DATADIR%%/scripts/show.lua +%%DATADIR%%/scripts/showseg.lua +%%DATADIR%%/scripts/strict.lua +%%DATADIR%%/scripts/text-to-hocr.lua +%%DATADIR%%/scripts/train-bpnet-isolated.lua +%%DATADIR%%/scripts/train-bpnet-lines.lua +@dirrm include/ocropus +@dirrm %%DATADIR%%/models +@dirrm %%DATADIR%%/scripts/lib +@dirrmtry %%DATADIR%%/scripts +@dirrm %%DATADIR%%/words +@dirrmtry %%DATADIR%% |