diff options
author | yuri <yuri@FreeBSD.org> | 2017-12-19 06:43:44 +0800 |
---|---|---|
committer | yuri <yuri@FreeBSD.org> | 2017-12-19 06:43:44 +0800 |
commit | 1e74c9004d35587283a5bb7c07a572de211d1d59 (patch) | |
tree | a90a2de1c61388d57aaa390465e7c250324bf77d /misc | |
parent | 1704c23f3c8282f6d11ed491c44836f18a0e1163 (diff) | |
download | freebsd-ports-gnome-1e74c9004d35587283a5bb7c07a572de211d1d59.tar.gz freebsd-ports-gnome-1e74c9004d35587283a5bb7c07a572de211d1d59.tar.zst freebsd-ports-gnome-1e74c9004d35587283a5bb7c07a572de211d1d59.zip |
New port: misc/libpostal: Library for parsing/normalizing street addresses around the world
PR: 224262
Submitted by: Dmitri Goutnik <dg@syrec.org>
Approved by: adamw (mentor)
Differential Revision: https://reviews.freebsd.org/D13468
Diffstat (limited to 'misc')
-rw-r--r-- | misc/Makefile | 1 | ||||
-rw-r--r-- | misc/libpostal/Makefile | 58 | ||||
-rw-r--r-- | misc/libpostal/distinfo | 3 | ||||
-rw-r--r-- | misc/libpostal/files/patch-configure.ac | 11 | ||||
-rw-r--r-- | misc/libpostal/files/patch-src_Makefile.am | 74 | ||||
-rw-r--r-- | misc/libpostal/files/patch-src_libpostal__data | 23 | ||||
-rw-r--r-- | misc/libpostal/files/patch-src_sparkey_Makefile.am | 15 | ||||
-rw-r--r-- | misc/libpostal/files/patch-test_Makefile.am | 20 | ||||
-rw-r--r-- | misc/libpostal/files/pkg-message.in | 4 | ||||
-rw-r--r-- | misc/libpostal/pkg-descr | 6 | ||||
-rw-r--r-- | misc/libpostal/pkg-plist | 9 |
11 files changed, 224 insertions, 0 deletions
diff --git a/misc/Makefile b/misc/Makefile index 43d4d2c7a4b8..6442bf747aab 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -250,6 +250,7 @@ SUBDIR += libisocodes SUBDIR += libkdeedu SUBDIR += libmetalink + SUBDIR += libpostal SUBDIR += libpri SUBDIR += libsupertone SUBDIR += libutf diff --git a/misc/libpostal/Makefile b/misc/libpostal/Makefile new file mode 100644 index 000000000000..c215122be699 --- /dev/null +++ b/misc/libpostal/Makefile @@ -0,0 +1,58 @@ +# $FreeBSD$ + +PORTNAME= libpostal +DISTVERSIONPREFIX= v +DISTVERSION= 1.0.0 +CATEGORIES= misc geography + +MAINTAINER= dg@syrec.org +COMMENT= Library for parsing/normalizing street addresses around the world + +LICENSE= MIT +LICENSE_FILE= ${WRKSRC}/LICENSE + +RUN_DEPENDS= curl:ftp/curl + +USES= autoreconf libtool localbase +GNU_CONFIGURE= yes +USE_LDCONFIG= yes +CONFIGURE_ARGS= --datadir=${LIBPOSTAL_DATADIR} \ + --disable-data-download +USE_GITHUB= yes +GH_ACCOUNT= openvenues +TEST_TARGET= check + +LIBPOSTAL_DATADIR?= /var/db/${PORTNAME} + +SUB_FILES= pkg-message +SUB_LIST= PORTNAME=${PORTNAME} \ + LIBPOSTAL_DATADIR=${LIBPOSTAL_DATADIR} + +PORTDOCS= README.md + +OPTIONS_DEFINE= CBLAS DOCS SSE2 STATIC +CBLAS_DESC= Build with CBLAS/OPENBLAS +SSE2_DESC= Enable SSE2 optimization +OPTIONS_SUB= yes + +CBLAS_BUILD_DEPENDS= ${LOCALBASE}/include/cblas.h:math/cblas +CBLAS_LIB_DEPENDS= libopenblas.so:math/openblas +CBLAS_CONFIGURE_WITH= cblas=${LOCALBASE}/lib/libopenblas.so + +SSE2_CONFIGURE_ENABLE= sse2 + +STATIC_CONFIGURE_ENABLE= static + +pre-configure: + @cd ${WRKSRC} && ${SH} bootstrap.sh + +post-install: + @${STRIP_CMD} ${STAGEDIR}${PREFIX}/lib/libpostal.so + ${INSTALL_PROGRAM} ${WRKSRC}/src/address_parser ${STAGEDIR}${PREFIX}/bin + ${INSTALL_SCRIPT} ${WRKSRC}/src/libpostal ${STAGEDIR}${PREFIX}/bin + +post-install-DOCS-on: + @${MKDIR} ${STAGEDIR}${DOCSDIR} + ${INSTALL_MAN} ${PORTDOCS:S|^|${WRKSRC}/|} ${STAGEDIR}${DOCSDIR} + +.include <bsd.port.mk> diff --git a/misc/libpostal/distinfo b/misc/libpostal/distinfo new file mode 100644 index 000000000000..f48f6ffa2377 --- /dev/null +++ b/misc/libpostal/distinfo @@ -0,0 +1,3 @@ +TIMESTAMP = 1513006366 +SHA256 (openvenues-libpostal-v1.0.0_GH0.tar.gz) = 3035af7e15b2894069753975d953fa15a86d968103913dbf8ce4b8aa26231644 +SIZE (openvenues-libpostal-v1.0.0_GH0.tar.gz) = 5537587 diff --git a/misc/libpostal/files/patch-configure.ac b/misc/libpostal/files/patch-configure.ac new file mode 100644 index 000000000000..6ac8e7c9d4a7 --- /dev/null +++ b/misc/libpostal/files/patch-configure.ac @@ -0,0 +1,11 @@ +--- configure.ac.orig 2017-04-07 21:40:27 UTC ++++ configure.ac +@@ -19,7 +19,7 @@ AC_CONFIG_HEADERS([config.h]) + AC_PROG_CC_C99 + AC_PROG_INSTALL + +-LDFLAGS="$LDFLAGS -L/usr/local/lib" ++#LDFLAGS="$LDFLAGS -L/usr/local/lib" + + # Checks for libraries. + AC_SEARCH_LIBS([log], diff --git a/misc/libpostal/files/patch-src_Makefile.am b/misc/libpostal/files/patch-src_Makefile.am new file mode 100644 index 000000000000..09f1e0609ac4 --- /dev/null +++ b/misc/libpostal/files/patch-src_Makefile.am @@ -0,0 +1,74 @@ +--- src/Makefile.am.orig 2017-04-07 21:40:27 UTC ++++ src/Makefile.am +@@ -1,11 +1,11 @@ + # Inherited from autoconf / user-specified + CFLAGS_CONF = @CFLAGS@ +-CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF) ++CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)"' $(CFLAGS_CONF) + CFLAGS_O0 = $(CFLAGS_BASE) -O0 + CFLAGS_O1 = $(CFLAGS_BASE) -O1 + CFLAGS_O2 = $(CFLAGS_BASE) -O2 + CFLAGS_O3 = $(CFLAGS_BASE) -O3 +-DEFAULT_INCLUDES = -I.. -I/usr/local/include ++DEFAULT_INCLUDES = -I.. + + # Wonky but have to be able to override the user's optimization level to compile the scanner + # as it takes an unreasonably long time to compile with the optimizer on. +@@ -14,7 +14,7 @@ CFLAGS = + lib_LTLIBRARIES = libpostal.la + libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c normalize.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c + libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) +-libpostal_la_CFLAGS = $(CFLAGS_O2) ++libpostal_la_CFLAGS = $(CFLAGS_BASE) + libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ + + dist_bin_SCRIPTS = libpostal_data +@@ -30,37 +30,37 @@ noinst_PROGRAMS = libpostal bench addres + + libpostal_SOURCES = main.c json_encode.c + libpostal_LDADD = libpostal.la +-libpostal_CFLAGS = $(CFLAGS_O3) ++libpostal_CFLAGS = $(CFLAGS_BASE) + bench_SOURCES = bench.c + bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) +-bench_CFLAGS = $(CFLAGS_O3) ++bench_CFLAGS = $(CFLAGS_BASE) + address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c numex.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c + address_parser_LDADD = libscanner.la $(CBLAS_LIBS) +-address_parser_CFLAGS = $(CFLAGS_O3) ++address_parser_CFLAGS = $(CFLAGS_BASE) + + build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c +-build_address_dictionary_CFLAGS = $(CFLAGS_O3) ++build_address_dictionary_CFLAGS = $(CFLAGS_BASE) + build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c +-build_numex_table_CFLAGS = $(CFLAGS_O3) ++build_numex_table_CFLAGS = $(CFLAGS_BASE) + build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c +-build_trans_table_CFLAGS = $(CFLAGS_O3) ++build_trans_table_CFLAGS = $(CFLAGS_BASE) + address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c + address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) +-address_parser_train_CFLAGS = $(CFLAGS_O3) ++address_parser_train_CFLAGS = $(CFLAGS_BASE) + + address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c + address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) +-address_parser_test_CFLAGS = $(CFLAGS_O3) ++address_parser_test_CFLAGS = $(CFLAGS_BASE) + + language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c + language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS) +-language_classifier_train_CFLAGS = $(CFLAGS_O3) ++language_classifier_train_CFLAGS = $(CFLAGS_BASE) + language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c + language_classifier_LDADD = libscanner.la $(CBLAS_LIBS) +-language_classifier_CFLAGS = $(CFLAGS_O3) ++language_classifier_CFLAGS = $(CFLAGS_BASE) + language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c + language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS) +-language_classifier_test_CFLAGS = $(CFLAGS_O3) ++language_classifier_test_CFLAGS = $(CFLAGS_BASE) + + + pkginclude_HEADERS = libpostal.h diff --git a/misc/libpostal/files/patch-src_libpostal__data b/misc/libpostal/files/patch-src_libpostal__data new file mode 100644 index 000000000000..d6bf904d6763 --- /dev/null +++ b/misc/libpostal/files/patch-src_libpostal__data @@ -0,0 +1,23 @@ +--- src/libpostal_data.orig 2017-04-07 21:40:27 UTC ++++ src/libpostal_data +@@ -78,9 +78,9 @@ download_multipart() { + else + max=$size; + fi; +- printf "%s\0%s\0%s\0%s\0%s\0" "$i" "$offset" "$max" "$url" "$part_filename" ++ printf "%s\0%s\0%s\0%s\0%s\0%s\0%s\0" "x" "$i" "$offset" "$max" "$url" "$part_filename" + offset=$((offset+CHUNK_SIZE)) +- done | xargs -0 -n 5 -P $NUM_WORKERS sh -c "$DOWNLOAD_PART" -- ++ done | xargs -0 -n 6 -P $NUM_WORKERS sh -c "$DOWNLOAD_PART" -- + + > $local_path + +@@ -176,6 +176,8 @@ if [ $COMMAND = "download" ]; then + download_file $LIBPOSTAL_LANG_CLASS_UPDATED_PATH $LIBPOSTAL_DATA_DIR $lang_class_s3_prefix $LIBPOSTAL_LANG_CLASS_FILE "language classifier data file" $LANGUAGE_CLASSIFIER_MODULE_DIR + fi + ++ chown -R root:wheel $LIBPOSTAL_DATA_DIR ++ + if [ "$LIBPOSTAL_DATA_DIR_VERSION" != "$LIBPOSTAL_VERSION_STRING" ]; then + echo $LIBPOSTAL_VERSION_STRING > $LIBPOSTAL_DATA_VERSION_FILE; + fi diff --git a/misc/libpostal/files/patch-src_sparkey_Makefile.am b/misc/libpostal/files/patch-src_sparkey_Makefile.am new file mode 100644 index 000000000000..dccff8b173da --- /dev/null +++ b/misc/libpostal/files/patch-src_sparkey_Makefile.am @@ -0,0 +1,15 @@ +--- src/sparkey/Makefile.am.orig 2017-04-07 21:40:27 UTC ++++ src/sparkey/Makefile.am +@@ -1,5 +1,5 @@ + CFLAGS_CONF = @CFLAGS@ +-CFLAGS = -I/usr/local/include -O2 -Wall -Wextra -Wfloat-equal -Wshadow -Wpointer-arith -Werror -pedantic $(CFLAGS_CONF) ++CFLAGS = -Wall -Wextra -Wfloat-equal -Wshadow -Wpointer-arith -Werror -pedantic $(CFLAGS_CONF) + + noinst_LTLIBRARIES = libsparkey.la + libsparkey_la_SOURCES = endiantools.h hashheader.h logheader.h \ +@@ -8,4 +8,4 @@ logreader.c returncodes.c util.c buf.h h + sparkey.h util.h endiantools.c \ + hashheader.c hashreader.c logheader.c logwriter.c MurmurHash3.c \ + sparkey-internal.h +-libsparkey_la_LDFLAGS = -L/usr/local/lib ++#libsparkey_la_LDFLAGS = -L/usr/local/lib diff --git a/misc/libpostal/files/patch-test_Makefile.am b/misc/libpostal/files/patch-test_Makefile.am new file mode 100644 index 000000000000..8ff79254647b --- /dev/null +++ b/misc/libpostal/files/patch-test_Makefile.am @@ -0,0 +1,20 @@ +--- test/Makefile.am.orig 2017-04-07 21:40:27 UTC ++++ test/Makefile.am +@@ -1,9 +1,9 @@ +-CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g ++CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)"' + CFLAGS_O0 = $(CFLAGS_BASE) -O0 + CFLAGS_O1 = $(CFLAGS_BASE) -O1 + CFLAGS_O2 = $(CFLAGS_BASE) -O2 + CFLAGS_O3 = $(CFLAGS_BASE) -O3 +-DEFAULT_INCLUDES = -I.. -I/usr/local/include ++DEFAULT_INCLUDES = -I.. + + CFLAGS = $(CFLAGS_BASE) + +@@ -11,4 +11,4 @@ TESTS = test_libpostal + noinst_PROGRAMS = test_libpostal + test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c + test_libpostal_LDADD = ../src/libpostal.la $(CBLAS_LIBS) +-test_libpostal_CFLAGS = $(CFLAGS_O3) ++test_libpostal_CFLAGS = $(CFLAGS_BASE) diff --git a/misc/libpostal/files/pkg-message.in b/misc/libpostal/files/pkg-message.in new file mode 100644 index 000000000000..83631648f446 --- /dev/null +++ b/misc/libpostal/files/pkg-message.in @@ -0,0 +1,4 @@ +%%PORTNAME%% requires model data (about 1.5GB) which can be downloaded using +the following command: + +# %%PREFIX%%/bin/libpostal_data download all %%LIBPOSTAL_DATADIR%% diff --git a/misc/libpostal/pkg-descr b/misc/libpostal/pkg-descr new file mode 100644 index 000000000000..2ab55d343202 --- /dev/null +++ b/misc/libpostal/pkg-descr @@ -0,0 +1,6 @@ +C library for parsing/normalizing street addresses around the world, powered +by statistical NLP and open geo data. This library helps convert the +free-form addresses that humans use into clean normalized forms suitable for +machine comparison and full-text indexing. + +WWW: https://github.com/openvenues/libpostal diff --git a/misc/libpostal/pkg-plist b/misc/libpostal/pkg-plist new file mode 100644 index 000000000000..f7a31c3f25ba --- /dev/null +++ b/misc/libpostal/pkg-plist @@ -0,0 +1,9 @@ +bin/address_parser +bin/libpostal +bin/libpostal_data +include/libpostal/libpostal.h +%%STATIC%%lib/libpostal.a +lib/libpostal.so +lib/libpostal.so.1 +lib/libpostal.so.1.0.0 +libdata/pkgconfig/libpostal.pc |