diff options
author | bf <bf@FreeBSD.org> | 2013-08-16 07:01:27 +0800 |
---|---|---|
committer | bf <bf@FreeBSD.org> | 2013-08-16 07:01:27 +0800 |
commit | 8d118f54682ed791040aea7fa234bf25d6c0d228 (patch) | |
tree | 5f62b7491efa50ee1ac19d348dfad79cf3823bdd /math | |
parent | 7a36c2f67b1cad9bcd17b5d90b28d7e0bb713106 (diff) | |
download | freebsd-ports-gnome-8d118f54682ed791040aea7fa234bf25d6c0d228.tar.gz freebsd-ports-gnome-8d118f54682ed791040aea7fa234bf25d6c0d228.tar.zst freebsd-ports-gnome-8d118f54682ed791040aea7fa234bf25d6c0d228.zip |
Add sfft 0.1.0, optimized Sparse Fast Fourier Transform.
Diffstat (limited to 'math')
-rw-r--r-- | math/Makefile | 1 | ||||
-rw-r--r-- | math/sfft/Makefile | 123 | ||||
-rw-r--r-- | math/sfft/distinfo | 4 | ||||
-rw-r--r-- | math/sfft/files/patch-src__computefourier-1.0-2.0.cc | 55 | ||||
-rw-r--r-- | math/sfft/files/patch-src__computefourier-3.0.cc | 132 | ||||
-rw-r--r-- | math/sfft/pkg-descr | 8 |
6 files changed, 323 insertions, 0 deletions
diff --git a/math/Makefile b/math/Makefile index 6b5dcf4c3f14..fc12adaaa554 100644 --- a/math/Makefile +++ b/math/Makefile @@ -610,6 +610,7 @@ SUBDIR += sdpa-gmp SUBDIR += sdpara SUBDIR += sedumi + SUBDIR += sfft SUBDIR += simd-viterbi SUBDIR += slatec SUBDIR += slgrace diff --git a/math/sfft/Makefile b/math/sfft/Makefile new file mode 100644 index 000000000000..dd6cf86714ac --- /dev/null +++ b/math/sfft/Makefile @@ -0,0 +1,123 @@ +# Created by: bf@FreeBSD.org +# $FreeBSD$ + +PORTNAME= sfft +PORTVERSION= 0.1.0 +CATEGORIES= math +MASTER_SITES= http://spiral.net/software/sfft/ LOCAL/bf +DISTFILES= ${DISTNAME}${EXTRACT_SUFX} +EXTRACT_ONLY= ${DISTNAME}${EXTRACT_SUFX} + +MAINTAINER= bf@FreeBSD.org +COMMENT= Optimized Sparse Fast Fourier Transform + +LICENSE= GPLv2 + +LIB_DEPENDS= libfftw3.so:${PORTSDIR}/math/fftw3 + +USE_GCC= yes +USE_LDCONFIG= yes +USE_ZIP= yes + +OPTIONS_DEFINE= OPTIMIZED_CFLAGS PROFILE +OPTIONS_DEFAULT= OPTIMIZED_CFLAGS + +BUILD_WRKSRC= ${WRKSRC}/src +INSTALL_WRKSRC= ${BUILD_WRKSRC} +CFLAGS+= -fopenmp -msse2 -Iflopcount -I${LOCALBASE}/include +LDFLAGS+= -L${LOCALBASE}/lib +HEADERS= sfft.h +HDIR= include/sfft +MAKE_ENV= LDADD="-lfftw3 ${LIBM}" LIB=sfft SHLIB_MAJOR="${SHLIB_MAJOR}" \ + SRCCONF=/dev/null SRCS="${SRCS}" + +PLIST_DIRS= ${HDIR} +PLIST_FILES= ${HEADERS:S|^|${HDIR}/|} lib/libsfft.a lib/libsfft.so \ + lib/libsfft.so.${SHLIB_MAJOR} + +SHLIB_MAJOR= 1 +SRCS= common.cc computefourier-1.0-2.0.cc \ + computefourier-3.0.cc fftw.cc filters.cc parameters.cc \ + sfft.cc simulation.cc timer.cc utils.cc + +.include <bsd.port.options.mk> + +.if !${ARCH:Mamd64} && !${MACHINE_CPU:Msse2} +IGNORE= this port requires SSE2, and benefits from SSE3 -- set CPUTYPE\ +appropriately +.endif + +LIBM= -lm +.if ${OSVERSION} < 1000034 +LIB_DEPENDS+= libmissing.so:${PORTSDIR}/math/libmissing +LIBM+= -lmissing +.endif + +.if ${PORT_OPTIONS:MDOCS} || make(makesum) +DISTFILES+= sfft-doc.pdf +PORTDOCS= sfft-doc.pdf +.endif + +.if ${PORT_OPTIONS:MOPTIMIZED_CFLAGS} +CFLAGS+= -O3 -ffast-math +.endif + +.if ${PORT_OPTIONS:MPROFILE} +.if defined(NOPROFILE) || defined(NO_PROFILE) || defined(WITHOUT_PROFILE) +IGNORE = you have defined WITH_PROFILE, but have also defined\ +WITHOUT_PROFILE, NOPROFILE, or NO_PROFILE +.elif !exists(/usr/lib/libc_p.a) +IGNORE = you have chosen WITH_PROFILE, but have not installed the\ +base system profiling libraries +.endif +PLIST_FILES+= lib/libsfft_p.a +.else +MAKE_ENV+= NO_PROFILE=yes +.endif + +post-extract: + @${CP} /usr/include/complex.h ${BUILD_WRKSRC}/sfftcomplex.h + @${PRINTF} "LIBDIR=\t${PREFIX}/lib\n.include <bsd.lib.mk>\n" > \ + ${BUILD_WRKSRC}/Makefile + +post-patch: + @${REINPLACE_CMD} -e 's/string\.h/cstring/' \ + ${WRKSRC}/src/utils.cc +.if ${OSVERSION} < 1000034 + @${REINPLACE_CMD} -e '\|<complex.h>|{x; \ + s|^.*$$|#include "missing_complex.h"|; H; x;}' \ + ${WRKSRC}/src/fft.h +.endif + @${REINPLACE_CMD} -E -e '/<complex\.h>/ \ + {s/<complex\.h>/ "sfftcomplex.h"/; x ; \ + s|^.*$$|#endif|; G; x; \ + s|^.*$$|extern "C" {|; G; x; \ + s|^.*$$|#ifdef __cplusplus|; G; x; \ + s|^.*$$|#ifdef __cplusplus|; H; \ + s|^.*$$|}|; H; \ + s|^.*$$|#endif|; H; x;}' \ + ${WRKSRC}/src/computefourier-1.0-2.0.h \ + ${WRKSRC}/src/computefourier-3.0.h \ + ${WRKSRC}/src/fft.h + +CORELIMIT?= /usr/bin/limits -Sc 0 + +check regression-test test: build + @cd ${BUILD_WRKSRC}; \ + ${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \ + ${LDFLAGS} libsfft.a -lfftw3 ${LIBM} ; \ + for _v in 1 2 3 ; do \ + for _k in 5 10 50; do \ + echo "Checking sfft version $${_v} with $${_k} frequency components:"; \ + ${CORELIMIT} ./sfft-verification -k $${_k} -r 3 -v $${_v} || ${TRUE} ; \ + done ; done + +post-install: + @${MKDIR} ${PREFIX}/${HDIR} + @cd ${BUILD_WRKSRC}; ${INSTALL_DATA} ${HEADERS} ${PREFIX}/${HDIR} +.if ${PORT_OPTIONS:MDOCS} + @${MKDIR} ${DOCSDIR} + @${INSTALL_DATA} ${_DISTDIR}/${PORTDOCS} ${DOCSDIR} +.endif + +.include <bsd.port.mk> diff --git a/math/sfft/distinfo b/math/sfft/distinfo new file mode 100644 index 000000000000..c0af74abb746 --- /dev/null +++ b/math/sfft/distinfo @@ -0,0 +1,4 @@ +SHA256 (sfft-0.1.0.zip) = b52d53f020e82f67cc7da2ad9cacb428752ec3229ce00f435a527d6180ddd494 +SIZE (sfft-0.1.0.zip) = 466599 +SHA256 (sfft-doc.pdf) = d9416944b2ca7bd068320a1b45ed0cc4a311b0bfbac5ed43e68f46bdc3a93454 +SIZE (sfft-doc.pdf) = 122378 diff --git a/math/sfft/files/patch-src__computefourier-1.0-2.0.cc b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc new file mode 100644 index 000000000000..09b1e51d5b29 --- /dev/null +++ b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc @@ -0,0 +1,55 @@ +--- src/computefourier-1.0-2.0.cc.orig 2013-06-13 08:12:25.000000000 -0400 ++++ src/computefourier-1.0-2.0.cc 2013-08-09 00:26:54.000000000 -0400 +@@ -248,8 +248,13 @@ + __m128d ad_bc = _mm_mul_pd(ab, dc); + __m128d ac_mbd = _mm_mul_pd(ac_bd, signs); + ++#ifdef __SSE3__ + __m128d ab_times_cd = _mm_hadd_pd(ac_mbd, ad_bc); +- ++#else ++ __m128d ab_times_cd_lo = _mm_shuffle_pd(ac_mbd, ad_bc, 0); ++ __m128d ab_times_cd_hi = _mm_shuffle_pd(ac_mbd, ad_bc, 3); ++ __m128d ab_times_cd = _mm_add_pd(ab_times_cd_lo, ab_times_cd_hi); ++#endif + unsigned int i_mod_B_p_offset = (i & B2_m_1) + offset; + __m128d xy = _mm_load_pd(d_x_sampt + i_mod_B_p_offset); + __m128d st = _mm_add_pd(xy, ab_times_cd); +@@ -283,7 +288,13 @@ + __m128d ab_square = _mm_mul_pd(ab, ab); + __m128d cd_square = _mm_mul_pd(cd, cd); + ++#ifdef __SSE3__ + __m128d r = _mm_hadd_pd(ab_square, cd_square); ++#else ++ __m128d r_lo = _mm_shuffle_pd(ab_square, cd_square, 0); ++ __m128d r_hi = _mm_shuffle_pd(ab_square, cd_square, 3); ++ __m128d r = _mm_add_pd(r_lo, r_hi); ++#endif + + _mm_store_pd(samples + j, r); + } +@@ -390,11 +401,23 @@ + __m128d ad_bc = _mm_mul_pd(ab, dc); + __m128d mad_bc = _mm_mul_pd(ad_bc, signs); + ++#ifdef __SSE3__ + __m128d acpbd_bcmad = _mm_hadd_pd(ac_bd, mad_bc); ++#else ++ __m128d acpbd_bcmad_lo = _mm_shuffle_pd(ac_bd, mad_bc, 0); ++ __m128d acpbd_bcmad_hi = _mm_shuffle_pd(ac_bd, mad_bc, 3); ++ __m128d acpbd_bcmad = _mm_add_pd(acpbd_bcmad_lo, acpbd_bcmad_hi); ++#endif + + __m128d cd_squares = _mm_mul_pd(cd, cd); ++ ++#ifdef __SSE3__ + __m128d cd_squares_sum = + _mm_hadd_pd(cd_squares, cd_squares); ++#else ++ __m128d cd_squares_flip = _mm_shuffle_pd(cd_squares, cd_squares, 1); ++ __m128d cd_squares_sum = _mm_add_pd(cd_squares, cd_squares_flip); ++#endif + + __m128d r = _mm_div_pd(acpbd_bcmad, cd_squares_sum); + diff --git a/math/sfft/files/patch-src__computefourier-3.0.cc b/math/sfft/files/patch-src__computefourier-3.0.cc new file mode 100644 index 000000000000..aba106fdd70a --- /dev/null +++ b/math/sfft/files/patch-src__computefourier-3.0.cc @@ -0,0 +1,132 @@ +--- src/computefourier-3.0.cc.orig 2013-06-13 08:12:26.000000000 -0400 ++++ src/computefourier-3.0.cc 2013-08-10 17:02:52.000000000 -0400 +@@ -416,27 +416,64 @@ + + __m128d t1r = _mm_mul_pd(v1r, ab31); + __m128d t1i = _mm_mul_pd(v1i, ba31); ++ ++#ifdef __SSE3__ + __m128d remove1 = _mm_addsub_pd(t1r, t1i); ++#else ++__m128i mask_fliplo = _mm_set_epi32(0, 0, 0x80000000, 0); ++__m128d t1i_fliplo = _mm_xor_pd(t1i, _mm_castsi128_pd(mask_fliplo)); ++__m128d remove1 = _mm_add_pd(t1r, t1i_fliplo); ++#endif + + __m128d t2r = _mm_mul_pd(v1r, ab32); + __m128d t2i = _mm_mul_pd(v1i, ba32); ++ ++#ifdef __SSE3__ + __m128d remove2 = _mm_addsub_pd(t2r, t2i); ++#else ++__m128d t2i_fliplo = _mm_xor_pd(t2i, _mm_castsi128_pd(mask_fliplo)); ++__m128d remove2 = _mm_add_pd(t2r, t2i_fliplo); ++#endif + + __m128d t3r = _mm_mul_pd(v1r, ab33); + __m128d t3i = _mm_mul_pd(v1i, ba33); ++ ++#ifdef __SSE3__ + __m128d remove3 = _mm_addsub_pd(t3r, t3i); ++#else ++__m128d t3i_fliplo = _mm_xor_pd(t3i, _mm_castsi128_pd(mask_fliplo)); ++__m128d remove3 = _mm_add_pd(t3r, t3i_fliplo); ++#endif + + __m128d t4r = _mm_mul_pd(v2r, ab31); + __m128d t4i = _mm_mul_pd(v2i, ba31); ++ ++#ifdef __SSE3__ + __m128d remove4 = _mm_addsub_pd(t4r, t4i); ++#else ++__m128d t4i_fliplo = _mm_xor_pd(t4i, _mm_castsi128_pd(mask_fliplo)); ++__m128d remove4 = _mm_add_pd(t4r, t4i_fliplo); ++#endif + + __m128d t5r = _mm_mul_pd(v2r, ab32); + __m128d t5i = _mm_mul_pd(v2i, ba32); ++ ++#ifdef __SSE3__ + __m128d remove5 = _mm_addsub_pd(t5r, t5i); ++#else ++__m128d t5i_fliplo = _mm_xor_pd(t5i, _mm_castsi128_pd(mask_fliplo)); ++__m128d remove5 = _mm_add_pd(t5r, t5i_fliplo); ++#endif + + __m128d t6r = _mm_mul_pd(v2r, ab33); + __m128d t6i = _mm_mul_pd(v2i, ba33); ++ ++#ifdef __SSE3__ + __m128d remove6 = _mm_addsub_pd(t6r, t6i); ++#else ++__m128d t6i_fliplo = _mm_xor_pd(t6i, _mm_castsi128_pd(mask_fliplo)); ++__m128d remove6 = _mm_add_pd(t6r, t6i_fliplo); ++#endif + + FLOPCOUNT_INCREMENT(6 * (4 + 2)); + +@@ -524,11 +561,28 @@ + __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3); + FLOPCOUNT_INCREMENT(8); + ++#ifdef __SSE3__ + __m128d c0c1 = _mm_hadd_pd(a0b0_sq, a1b1_sq); + __m128d c2c3 = _mm_hadd_pd(a2b2_sq, a3b3_sq); ++#else ++ __m128d c0c1_lo = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 0); ++ __m128d c0c1_hi = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 3); ++ __m128d c0c1 = _mm_add_pd(c0c1_lo, c0c1_hi); ++ __m128d c2c3_lo = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 0); ++ __m128d c2c3_hi = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 3); ++ __m128d c2c3 = _mm_add_pd(c2c3_lo, c2c3_hi); ++#endif ++ + FLOPCOUNT_INCREMENT(4); + ++#ifdef __SSE3__ + __m128d zbc = _mm_hadd_pd(c0c1, c2c3); ++#else ++ __m128d zbc_lo = _mm_shuffle_pd(c0c1, c2c3, 0); ++ __m128d zbc_hi = _mm_shuffle_pd(c0c1, c2c3, 3); ++ __m128d zbc = _mm_add_pd(zbc_lo, zbc_hi); ++#endif ++ + FLOPCOUNT_INCREMENT(1); + + _mm_store_pd(zero_buck_check, zbc); +@@ -681,13 +735,35 @@ + __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3); + FLOPCOUNT_INCREMENT(8); + ++#ifdef __SSE3__ + __m128d c0c1 = _mm_hadd_pd(a0b0_sq, a1b1_sq); ++#else ++ __m128d c0c1_lo = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 0); ++ __m128d c0c1_hi = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 3); ++ __m128d c0c1 = _mm_add_pd(c0c1_lo, c0c1_hi); ++#endif ++ + __m128d c0c1_normed = _mm_mul_pd(c0c1, norm2vec); ++ ++#ifdef __SSE3__ + __m128d c2c3 = _mm_hadd_pd(a2b2_sq, a3b3_sq); ++#else ++ __m128d c2c3_lo = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 0); ++ __m128d c2c3_hi = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 3); ++ __m128d c2c3 = _mm_add_pd(c2c3_lo, c2c3_hi); ++#endif ++ + __m128d c2c3_normed = _mm_mul_pd(c2c3, norm2vec); + FLOPCOUNT_INCREMENT(8); + ++#ifdef __SSE3__ + __m128d zbc = _mm_hadd_pd(c0c1_normed, c2c3_normed); ++#else ++ __m128d zbc_lo = _mm_shuffle_pd(c0c1_normed, c2c3_normed, 0); ++ __m128d zbc_hi = _mm_shuffle_pd(c0c1_normed, c2c3_normed, 3); ++ __m128d zbc = _mm_add_pd(zbc_lo, zbc_hi); ++#endif ++ + FLOPCOUNT_INCREMENT(1); + + _mm_store_pd(zero_buck_check, zbc); diff --git a/math/sfft/pkg-descr b/math/sfft/pkg-descr new file mode 100644 index 000000000000..7eaae7086b76 --- /dev/null +++ b/math/sfft/pkg-descr @@ -0,0 +1,8 @@ +sfft is a library to compute discrete Fourier transforms of signals with +a sparse frequency domain, using an algorithm that is more efficient than +other known FFT algorithms. It was developed by Haitham Hassanieh, Piotr +Indyk, Dina Katabi, and Eric Price at the Computer Science and Artifical +Intelligence Lab at MIT. Performance optimizations were developed by J. +Schumacher at the Computer Science Department of ETH Zurich in 2013. + +WWW: http://spiral.net/software/sfft.html |