aboutsummaryrefslogtreecommitdiffstats
path: root/misc
diff options
context:
space:
mode:
authoryuri <yuri@FreeBSD.org>2017-12-19 06:43:44 +0800
committeryuri <yuri@FreeBSD.org>2017-12-19 06:43:44 +0800
commit1e74c9004d35587283a5bb7c07a572de211d1d59 (patch)
treea90a2de1c61388d57aaa390465e7c250324bf77d /misc
parent1704c23f3c8282f6d11ed491c44836f18a0e1163 (diff)
downloadfreebsd-ports-gnome-1e74c9004d35587283a5bb7c07a572de211d1d59.tar.gz
freebsd-ports-gnome-1e74c9004d35587283a5bb7c07a572de211d1d59.tar.zst
freebsd-ports-gnome-1e74c9004d35587283a5bb7c07a572de211d1d59.zip
New port: misc/libpostal: Library for parsing/normalizing street addresses around the world
PR: 224262 Submitted by: Dmitri Goutnik <dg@syrec.org> Approved by: adamw (mentor) Differential Revision: https://reviews.freebsd.org/D13468
Diffstat (limited to 'misc')
-rw-r--r--misc/Makefile1
-rw-r--r--misc/libpostal/Makefile58
-rw-r--r--misc/libpostal/distinfo3
-rw-r--r--misc/libpostal/files/patch-configure.ac11
-rw-r--r--misc/libpostal/files/patch-src_Makefile.am74
-rw-r--r--misc/libpostal/files/patch-src_libpostal__data23
-rw-r--r--misc/libpostal/files/patch-src_sparkey_Makefile.am15
-rw-r--r--misc/libpostal/files/patch-test_Makefile.am20
-rw-r--r--misc/libpostal/files/pkg-message.in4
-rw-r--r--misc/libpostal/pkg-descr6
-rw-r--r--misc/libpostal/pkg-plist9
11 files changed, 224 insertions, 0 deletions
diff --git a/misc/Makefile b/misc/Makefile
index 43d4d2c7a4b8..6442bf747aab 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -250,6 +250,7 @@
SUBDIR += libisocodes
SUBDIR += libkdeedu
SUBDIR += libmetalink
+ SUBDIR += libpostal
SUBDIR += libpri
SUBDIR += libsupertone
SUBDIR += libutf
diff --git a/misc/libpostal/Makefile b/misc/libpostal/Makefile
new file mode 100644
index 000000000000..c215122be699
--- /dev/null
+++ b/misc/libpostal/Makefile
@@ -0,0 +1,58 @@
+# $FreeBSD$
+
+PORTNAME= libpostal
+DISTVERSIONPREFIX= v
+DISTVERSION= 1.0.0
+CATEGORIES= misc geography
+
+MAINTAINER= dg@syrec.org
+COMMENT= Library for parsing/normalizing street addresses around the world
+
+LICENSE= MIT
+LICENSE_FILE= ${WRKSRC}/LICENSE
+
+RUN_DEPENDS= curl:ftp/curl
+
+USES= autoreconf libtool localbase
+GNU_CONFIGURE= yes
+USE_LDCONFIG= yes
+CONFIGURE_ARGS= --datadir=${LIBPOSTAL_DATADIR} \
+ --disable-data-download
+USE_GITHUB= yes
+GH_ACCOUNT= openvenues
+TEST_TARGET= check
+
+LIBPOSTAL_DATADIR?= /var/db/${PORTNAME}
+
+SUB_FILES= pkg-message
+SUB_LIST= PORTNAME=${PORTNAME} \
+ LIBPOSTAL_DATADIR=${LIBPOSTAL_DATADIR}
+
+PORTDOCS= README.md
+
+OPTIONS_DEFINE= CBLAS DOCS SSE2 STATIC
+CBLAS_DESC= Build with CBLAS/OPENBLAS
+SSE2_DESC= Enable SSE2 optimization
+OPTIONS_SUB= yes
+
+CBLAS_BUILD_DEPENDS= ${LOCALBASE}/include/cblas.h:math/cblas
+CBLAS_LIB_DEPENDS= libopenblas.so:math/openblas
+CBLAS_CONFIGURE_WITH= cblas=${LOCALBASE}/lib/libopenblas.so
+
+SSE2_CONFIGURE_ENABLE= sse2
+
+STATIC_CONFIGURE_ENABLE= static
+
+pre-configure:
+ @cd ${WRKSRC} && ${SH} bootstrap.sh
+
+post-install:
+ @${STRIP_CMD} ${STAGEDIR}${PREFIX}/lib/libpostal.so
+ ${INSTALL_PROGRAM} ${WRKSRC}/src/address_parser ${STAGEDIR}${PREFIX}/bin
+ ${INSTALL_SCRIPT} ${WRKSRC}/src/libpostal ${STAGEDIR}${PREFIX}/bin
+
+post-install-DOCS-on:
+ @${MKDIR} ${STAGEDIR}${DOCSDIR}
+ ${INSTALL_MAN} ${PORTDOCS:S|^|${WRKSRC}/|} ${STAGEDIR}${DOCSDIR}
+
+.include <bsd.port.mk>
diff --git a/misc/libpostal/distinfo b/misc/libpostal/distinfo
new file mode 100644
index 000000000000..f48f6ffa2377
--- /dev/null
+++ b/misc/libpostal/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1513006366
+SHA256 (openvenues-libpostal-v1.0.0_GH0.tar.gz) = 3035af7e15b2894069753975d953fa15a86d968103913dbf8ce4b8aa26231644
+SIZE (openvenues-libpostal-v1.0.0_GH0.tar.gz) = 5537587
diff --git a/misc/libpostal/files/patch-configure.ac b/misc/libpostal/files/patch-configure.ac
new file mode 100644
index 000000000000..6ac8e7c9d4a7
--- /dev/null
+++ b/misc/libpostal/files/patch-configure.ac
@@ -0,0 +1,11 @@
+--- configure.ac.orig 2017-04-07 21:40:27 UTC
++++ configure.ac
+@@ -19,7 +19,7 @@ AC_CONFIG_HEADERS([config.h])
+ AC_PROG_CC_C99
+ AC_PROG_INSTALL
+
+-LDFLAGS="$LDFLAGS -L/usr/local/lib"
++#LDFLAGS="$LDFLAGS -L/usr/local/lib"
+
+ # Checks for libraries.
+ AC_SEARCH_LIBS([log],
diff --git a/misc/libpostal/files/patch-src_Makefile.am b/misc/libpostal/files/patch-src_Makefile.am
new file mode 100644
index 000000000000..09f1e0609ac4
--- /dev/null
+++ b/misc/libpostal/files/patch-src_Makefile.am
@@ -0,0 +1,74 @@
+--- src/Makefile.am.orig 2017-04-07 21:40:27 UTC
++++ src/Makefile.am
+@@ -1,11 +1,11 @@
+ # Inherited from autoconf / user-specified
+ CFLAGS_CONF = @CFLAGS@
+-CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF)
++CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)"' $(CFLAGS_CONF)
+ CFLAGS_O0 = $(CFLAGS_BASE) -O0
+ CFLAGS_O1 = $(CFLAGS_BASE) -O1
+ CFLAGS_O2 = $(CFLAGS_BASE) -O2
+ CFLAGS_O3 = $(CFLAGS_BASE) -O3
+-DEFAULT_INCLUDES = -I.. -I/usr/local/include
++DEFAULT_INCLUDES = -I..
+
+ # Wonky but have to be able to override the user's optimization level to compile the scanner
+ # as it takes an unreasonably long time to compile with the optimizer on.
+@@ -14,7 +14,7 @@ CFLAGS =
+ lib_LTLIBRARIES = libpostal.la
+ libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c normalize.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
+ libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
+-libpostal_la_CFLAGS = $(CFLAGS_O2)
++libpostal_la_CFLAGS = $(CFLAGS_BASE)
+ libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@
+
+ dist_bin_SCRIPTS = libpostal_data
+@@ -30,37 +30,37 @@ noinst_PROGRAMS = libpostal bench addres
+
+ libpostal_SOURCES = main.c json_encode.c
+ libpostal_LDADD = libpostal.la
+-libpostal_CFLAGS = $(CFLAGS_O3)
++libpostal_CFLAGS = $(CFLAGS_BASE)
+ bench_SOURCES = bench.c
+ bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
+-bench_CFLAGS = $(CFLAGS_O3)
++bench_CFLAGS = $(CFLAGS_BASE)
+ address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c numex.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c
+ address_parser_LDADD = libscanner.la $(CBLAS_LIBS)
+-address_parser_CFLAGS = $(CFLAGS_O3)
++address_parser_CFLAGS = $(CFLAGS_BASE)
+
+ build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c
+-build_address_dictionary_CFLAGS = $(CFLAGS_O3)
++build_address_dictionary_CFLAGS = $(CFLAGS_BASE)
+ build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c
+-build_numex_table_CFLAGS = $(CFLAGS_O3)
++build_numex_table_CFLAGS = $(CFLAGS_BASE)
+ build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c
+-build_trans_table_CFLAGS = $(CFLAGS_O3)
++build_trans_table_CFLAGS = $(CFLAGS_BASE)
+ address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c
+ address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS)
+-address_parser_train_CFLAGS = $(CFLAGS_O3)
++address_parser_train_CFLAGS = $(CFLAGS_BASE)
+
+ address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c
+ address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS)
+-address_parser_test_CFLAGS = $(CFLAGS_O3)
++address_parser_test_CFLAGS = $(CFLAGS_BASE)
+
+ language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c
+ language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS)
+-language_classifier_train_CFLAGS = $(CFLAGS_O3)
++language_classifier_train_CFLAGS = $(CFLAGS_BASE)
+ language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
+ language_classifier_LDADD = libscanner.la $(CBLAS_LIBS)
+-language_classifier_CFLAGS = $(CFLAGS_O3)
++language_classifier_CFLAGS = $(CFLAGS_BASE)
+ language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
+ language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS)
+-language_classifier_test_CFLAGS = $(CFLAGS_O3)
++language_classifier_test_CFLAGS = $(CFLAGS_BASE)
+
+
+ pkginclude_HEADERS = libpostal.h
diff --git a/misc/libpostal/files/patch-src_libpostal__data b/misc/libpostal/files/patch-src_libpostal__data
new file mode 100644
index 000000000000..d6bf904d6763
--- /dev/null
+++ b/misc/libpostal/files/patch-src_libpostal__data
@@ -0,0 +1,23 @@
+--- src/libpostal_data.orig 2017-04-07 21:40:27 UTC
++++ src/libpostal_data
+@@ -78,9 +78,9 @@ download_multipart() {
+ else
+ max=$size;
+ fi;
+- printf "%s\0%s\0%s\0%s\0%s\0" "$i" "$offset" "$max" "$url" "$part_filename"
++ printf "%s\0%s\0%s\0%s\0%s\0%s\0%s\0" "x" "$i" "$offset" "$max" "$url" "$part_filename"
+ offset=$((offset+CHUNK_SIZE))
+- done | xargs -0 -n 5 -P $NUM_WORKERS sh -c "$DOWNLOAD_PART" --
++ done | xargs -0 -n 6 -P $NUM_WORKERS sh -c "$DOWNLOAD_PART" --
+
+ > $local_path
+
+@@ -176,6 +176,8 @@ if [ $COMMAND = "download" ]; then
+ download_file $LIBPOSTAL_LANG_CLASS_UPDATED_PATH $LIBPOSTAL_DATA_DIR $lang_class_s3_prefix $LIBPOSTAL_LANG_CLASS_FILE "language classifier data file" $LANGUAGE_CLASSIFIER_MODULE_DIR
+ fi
+
++ chown -R root:wheel $LIBPOSTAL_DATA_DIR
++
+ if [ "$LIBPOSTAL_DATA_DIR_VERSION" != "$LIBPOSTAL_VERSION_STRING" ]; then
+ echo $LIBPOSTAL_VERSION_STRING > $LIBPOSTAL_DATA_VERSION_FILE;
+ fi
diff --git a/misc/libpostal/files/patch-src_sparkey_Makefile.am b/misc/libpostal/files/patch-src_sparkey_Makefile.am
new file mode 100644
index 000000000000..dccff8b173da
--- /dev/null
+++ b/misc/libpostal/files/patch-src_sparkey_Makefile.am
@@ -0,0 +1,15 @@
+--- src/sparkey/Makefile.am.orig 2017-04-07 21:40:27 UTC
++++ src/sparkey/Makefile.am
+@@ -1,5 +1,5 @@
+ CFLAGS_CONF = @CFLAGS@
+-CFLAGS = -I/usr/local/include -O2 -Wall -Wextra -Wfloat-equal -Wshadow -Wpointer-arith -Werror -pedantic $(CFLAGS_CONF)
++CFLAGS = -Wall -Wextra -Wfloat-equal -Wshadow -Wpointer-arith -Werror -pedantic $(CFLAGS_CONF)
+
+ noinst_LTLIBRARIES = libsparkey.la
+ libsparkey_la_SOURCES = endiantools.h hashheader.h logheader.h \
+@@ -8,4 +8,4 @@ logreader.c returncodes.c util.c buf.h h
+ sparkey.h util.h endiantools.c \
+ hashheader.c hashreader.c logheader.c logwriter.c MurmurHash3.c \
+ sparkey-internal.h
+-libsparkey_la_LDFLAGS = -L/usr/local/lib
++#libsparkey_la_LDFLAGS = -L/usr/local/lib
diff --git a/misc/libpostal/files/patch-test_Makefile.am b/misc/libpostal/files/patch-test_Makefile.am
new file mode 100644
index 000000000000..8ff79254647b
--- /dev/null
+++ b/misc/libpostal/files/patch-test_Makefile.am
@@ -0,0 +1,20 @@
+--- test/Makefile.am.orig 2017-04-07 21:40:27 UTC
++++ test/Makefile.am
+@@ -1,9 +1,9 @@
+-CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g
++CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)"'
+ CFLAGS_O0 = $(CFLAGS_BASE) -O0
+ CFLAGS_O1 = $(CFLAGS_BASE) -O1
+ CFLAGS_O2 = $(CFLAGS_BASE) -O2
+ CFLAGS_O3 = $(CFLAGS_BASE) -O3
+-DEFAULT_INCLUDES = -I.. -I/usr/local/include
++DEFAULT_INCLUDES = -I..
+
+ CFLAGS = $(CFLAGS_BASE)
+
+@@ -11,4 +11,4 @@ TESTS = test_libpostal
+ noinst_PROGRAMS = test_libpostal
+ test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c
+ test_libpostal_LDADD = ../src/libpostal.la $(CBLAS_LIBS)
+-test_libpostal_CFLAGS = $(CFLAGS_O3)
++test_libpostal_CFLAGS = $(CFLAGS_BASE)
diff --git a/misc/libpostal/files/pkg-message.in b/misc/libpostal/files/pkg-message.in
new file mode 100644
index 000000000000..83631648f446
--- /dev/null
+++ b/misc/libpostal/files/pkg-message.in
@@ -0,0 +1,4 @@
+%%PORTNAME%% requires model data (about 1.5GB) which can be downloaded using
+the following command:
+
+# %%PREFIX%%/bin/libpostal_data download all %%LIBPOSTAL_DATADIR%%
diff --git a/misc/libpostal/pkg-descr b/misc/libpostal/pkg-descr
new file mode 100644
index 000000000000..2ab55d343202
--- /dev/null
+++ b/misc/libpostal/pkg-descr
@@ -0,0 +1,6 @@
+C library for parsing/normalizing street addresses around the world, powered
+by statistical NLP and open geo data. This library helps convert the
+free-form addresses that humans use into clean normalized forms suitable for
+machine comparison and full-text indexing.
+
+WWW: https://github.com/openvenues/libpostal
diff --git a/misc/libpostal/pkg-plist b/misc/libpostal/pkg-plist
new file mode 100644
index 000000000000..f7a31c3f25ba
--- /dev/null
+++ b/misc/libpostal/pkg-plist
@@ -0,0 +1,9 @@
+bin/address_parser
+bin/libpostal
+bin/libpostal_data
+include/libpostal/libpostal.h
+%%STATIC%%lib/libpostal.a
+lib/libpostal.so
+lib/libpostal.so.1
+lib/libpostal.so.1.0.0
+libdata/pkgconfig/libpostal.pc