aboutsummaryrefslogtreecommitdiffstats
path: root/math
diff options
context:
space:
mode:
authorbf <bf@FreeBSD.org>2013-08-16 07:01:27 +0800
committerbf <bf@FreeBSD.org>2013-08-16 07:01:27 +0800
commit8d118f54682ed791040aea7fa234bf25d6c0d228 (patch)
tree5f62b7491efa50ee1ac19d348dfad79cf3823bdd /math
parent7a36c2f67b1cad9bcd17b5d90b28d7e0bb713106 (diff)
downloadfreebsd-ports-gnome-8d118f54682ed791040aea7fa234bf25d6c0d228.tar.gz
freebsd-ports-gnome-8d118f54682ed791040aea7fa234bf25d6c0d228.tar.zst
freebsd-ports-gnome-8d118f54682ed791040aea7fa234bf25d6c0d228.zip
Add sfft 0.1.0, optimized Sparse Fast Fourier Transform.
Diffstat (limited to 'math')
-rw-r--r--math/Makefile1
-rw-r--r--math/sfft/Makefile123
-rw-r--r--math/sfft/distinfo4
-rw-r--r--math/sfft/files/patch-src__computefourier-1.0-2.0.cc55
-rw-r--r--math/sfft/files/patch-src__computefourier-3.0.cc132
-rw-r--r--math/sfft/pkg-descr8
6 files changed, 323 insertions, 0 deletions
diff --git a/math/Makefile b/math/Makefile
index 6b5dcf4c3f14..fc12adaaa554 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -610,6 +610,7 @@
SUBDIR += sdpa-gmp
SUBDIR += sdpara
SUBDIR += sedumi
+ SUBDIR += sfft
SUBDIR += simd-viterbi
SUBDIR += slatec
SUBDIR += slgrace
diff --git a/math/sfft/Makefile b/math/sfft/Makefile
new file mode 100644
index 000000000000..dd6cf86714ac
--- /dev/null
+++ b/math/sfft/Makefile
@@ -0,0 +1,123 @@
+# Created by: bf@FreeBSD.org
+# $FreeBSD$
+
+PORTNAME= sfft
+PORTVERSION= 0.1.0
+CATEGORIES= math
+MASTER_SITES= http://spiral.net/software/sfft/ LOCAL/bf
+DISTFILES= ${DISTNAME}${EXTRACT_SUFX}
+EXTRACT_ONLY= ${DISTNAME}${EXTRACT_SUFX}
+
+MAINTAINER= bf@FreeBSD.org
+COMMENT= Optimized Sparse Fast Fourier Transform
+
+LICENSE= GPLv2
+
+LIB_DEPENDS= libfftw3.so:${PORTSDIR}/math/fftw3
+
+USE_GCC= yes
+USE_LDCONFIG= yes
+USE_ZIP= yes
+
+OPTIONS_DEFINE= OPTIMIZED_CFLAGS PROFILE
+OPTIONS_DEFAULT= OPTIMIZED_CFLAGS
+
+BUILD_WRKSRC= ${WRKSRC}/src
+INSTALL_WRKSRC= ${BUILD_WRKSRC}
+CFLAGS+= -fopenmp -msse2 -Iflopcount -I${LOCALBASE}/include
+LDFLAGS+= -L${LOCALBASE}/lib
+HEADERS= sfft.h
+HDIR= include/sfft
+MAKE_ENV= LDADD="-lfftw3 ${LIBM}" LIB=sfft SHLIB_MAJOR="${SHLIB_MAJOR}" \
+ SRCCONF=/dev/null SRCS="${SRCS}"
+
+PLIST_DIRS= ${HDIR}
+PLIST_FILES= ${HEADERS:S|^|${HDIR}/|} lib/libsfft.a lib/libsfft.so \
+ lib/libsfft.so.${SHLIB_MAJOR}
+
+SHLIB_MAJOR= 1
+SRCS= common.cc computefourier-1.0-2.0.cc \
+ computefourier-3.0.cc fftw.cc filters.cc parameters.cc \
+ sfft.cc simulation.cc timer.cc utils.cc
+
+.include <bsd.port.options.mk>
+
+.if !${ARCH:Mamd64} && !${MACHINE_CPU:Msse2}
+IGNORE= this port requires SSE2, and benefits from SSE3 -- set CPUTYPE\
+appropriately
+.endif
+
+LIBM= -lm
+.if ${OSVERSION} < 1000034
+LIB_DEPENDS+= libmissing.so:${PORTSDIR}/math/libmissing
+LIBM+= -lmissing
+.endif
+
+.if ${PORT_OPTIONS:MDOCS} || make(makesum)
+DISTFILES+= sfft-doc.pdf
+PORTDOCS= sfft-doc.pdf
+.endif
+
+.if ${PORT_OPTIONS:MOPTIMIZED_CFLAGS}
+CFLAGS+= -O3 -ffast-math
+.endif
+
+.if ${PORT_OPTIONS:MPROFILE}
+.if defined(NOPROFILE) || defined(NO_PROFILE) || defined(WITHOUT_PROFILE)
+IGNORE = you have defined WITH_PROFILE, but have also defined\
+WITHOUT_PROFILE, NOPROFILE, or NO_PROFILE
+.elif !exists(/usr/lib/libc_p.a)
+IGNORE = you have chosen WITH_PROFILE, but have not installed the\
+base system profiling libraries
+.endif
+PLIST_FILES+= lib/libsfft_p.a
+.else
+MAKE_ENV+= NO_PROFILE=yes
+.endif
+
+post-extract:
+ @${CP} /usr/include/complex.h ${BUILD_WRKSRC}/sfftcomplex.h
+ @${PRINTF} "LIBDIR=\t${PREFIX}/lib\n.include <bsd.lib.mk>\n" > \
+ ${BUILD_WRKSRC}/Makefile
+
+post-patch:
+ @${REINPLACE_CMD} -e 's/string\.h/cstring/' \
+ ${WRKSRC}/src/utils.cc
+.if ${OSVERSION} < 1000034
+ @${REINPLACE_CMD} -e '\|<complex.h>|{x; \
+ s|^.*$$|#include "missing_complex.h"|; H; x;}' \
+ ${WRKSRC}/src/fft.h
+.endif
+ @${REINPLACE_CMD} -E -e '/<complex\.h>/ \
+ {s/<complex\.h>/ "sfftcomplex.h"/; x ; \
+ s|^.*$$|#endif|; G; x; \
+ s|^.*$$|extern "C" {|; G; x; \
+ s|^.*$$|#ifdef __cplusplus|; G; x; \
+ s|^.*$$|#ifdef __cplusplus|; H; \
+ s|^.*$$|}|; H; \
+ s|^.*$$|#endif|; H; x;}' \
+ ${WRKSRC}/src/computefourier-1.0-2.0.h \
+ ${WRKSRC}/src/computefourier-3.0.h \
+ ${WRKSRC}/src/fft.h
+
+CORELIMIT?= /usr/bin/limits -Sc 0
+
+check regression-test test: build
+ @cd ${BUILD_WRKSRC}; \
+ ${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
+ ${LDFLAGS} libsfft.a -lfftw3 ${LIBM} ; \
+ for _v in 1 2 3 ; do \
+ for _k in 5 10 50; do \
+ echo "Checking sfft version $${_v} with $${_k} frequency components:"; \
+ ${CORELIMIT} ./sfft-verification -k $${_k} -r 3 -v $${_v} || ${TRUE} ; \
+ done ; done
+
+post-install:
+ @${MKDIR} ${PREFIX}/${HDIR}
+ @cd ${BUILD_WRKSRC}; ${INSTALL_DATA} ${HEADERS} ${PREFIX}/${HDIR}
+.if ${PORT_OPTIONS:MDOCS}
+ @${MKDIR} ${DOCSDIR}
+ @${INSTALL_DATA} ${_DISTDIR}/${PORTDOCS} ${DOCSDIR}
+.endif
+
+.include <bsd.port.mk>
diff --git a/math/sfft/distinfo b/math/sfft/distinfo
new file mode 100644
index 000000000000..c0af74abb746
--- /dev/null
+++ b/math/sfft/distinfo
@@ -0,0 +1,4 @@
+SHA256 (sfft-0.1.0.zip) = b52d53f020e82f67cc7da2ad9cacb428752ec3229ce00f435a527d6180ddd494
+SIZE (sfft-0.1.0.zip) = 466599
+SHA256 (sfft-doc.pdf) = d9416944b2ca7bd068320a1b45ed0cc4a311b0bfbac5ed43e68f46bdc3a93454
+SIZE (sfft-doc.pdf) = 122378
diff --git a/math/sfft/files/patch-src__computefourier-1.0-2.0.cc b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc
new file mode 100644
index 000000000000..09b1e51d5b29
--- /dev/null
+++ b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc
@@ -0,0 +1,55 @@
+--- src/computefourier-1.0-2.0.cc.orig 2013-06-13 08:12:25.000000000 -0400
++++ src/computefourier-1.0-2.0.cc 2013-08-09 00:26:54.000000000 -0400
+@@ -248,8 +248,13 @@
+ __m128d ad_bc = _mm_mul_pd(ab, dc);
+ __m128d ac_mbd = _mm_mul_pd(ac_bd, signs);
+
++#ifdef __SSE3__
+ __m128d ab_times_cd = _mm_hadd_pd(ac_mbd, ad_bc);
+-
++#else
++ __m128d ab_times_cd_lo = _mm_shuffle_pd(ac_mbd, ad_bc, 0);
++ __m128d ab_times_cd_hi = _mm_shuffle_pd(ac_mbd, ad_bc, 3);
++ __m128d ab_times_cd = _mm_add_pd(ab_times_cd_lo, ab_times_cd_hi);
++#endif
+ unsigned int i_mod_B_p_offset = (i & B2_m_1) + offset;
+ __m128d xy = _mm_load_pd(d_x_sampt + i_mod_B_p_offset);
+ __m128d st = _mm_add_pd(xy, ab_times_cd);
+@@ -283,7 +288,13 @@
+ __m128d ab_square = _mm_mul_pd(ab, ab);
+ __m128d cd_square = _mm_mul_pd(cd, cd);
+
++#ifdef __SSE3__
+ __m128d r = _mm_hadd_pd(ab_square, cd_square);
++#else
++ __m128d r_lo = _mm_shuffle_pd(ab_square, cd_square, 0);
++ __m128d r_hi = _mm_shuffle_pd(ab_square, cd_square, 3);
++ __m128d r = _mm_add_pd(r_lo, r_hi);
++#endif
+
+ _mm_store_pd(samples + j, r);
+ }
+@@ -390,11 +401,23 @@
+ __m128d ad_bc = _mm_mul_pd(ab, dc);
+ __m128d mad_bc = _mm_mul_pd(ad_bc, signs);
+
++#ifdef __SSE3__
+ __m128d acpbd_bcmad = _mm_hadd_pd(ac_bd, mad_bc);
++#else
++ __m128d acpbd_bcmad_lo = _mm_shuffle_pd(ac_bd, mad_bc, 0);
++ __m128d acpbd_bcmad_hi = _mm_shuffle_pd(ac_bd, mad_bc, 3);
++ __m128d acpbd_bcmad = _mm_add_pd(acpbd_bcmad_lo, acpbd_bcmad_hi);
++#endif
+
+ __m128d cd_squares = _mm_mul_pd(cd, cd);
++
++#ifdef __SSE3__
+ __m128d cd_squares_sum =
+ _mm_hadd_pd(cd_squares, cd_squares);
++#else
++ __m128d cd_squares_flip = _mm_shuffle_pd(cd_squares, cd_squares, 1);
++ __m128d cd_squares_sum = _mm_add_pd(cd_squares, cd_squares_flip);
++#endif
+
+ __m128d r = _mm_div_pd(acpbd_bcmad, cd_squares_sum);
+
diff --git a/math/sfft/files/patch-src__computefourier-3.0.cc b/math/sfft/files/patch-src__computefourier-3.0.cc
new file mode 100644
index 000000000000..aba106fdd70a
--- /dev/null
+++ b/math/sfft/files/patch-src__computefourier-3.0.cc
@@ -0,0 +1,132 @@
+--- src/computefourier-3.0.cc.orig 2013-06-13 08:12:26.000000000 -0400
++++ src/computefourier-3.0.cc 2013-08-10 17:02:52.000000000 -0400
+@@ -416,27 +416,64 @@
+
+ __m128d t1r = _mm_mul_pd(v1r, ab31);
+ __m128d t1i = _mm_mul_pd(v1i, ba31);
++
++#ifdef __SSE3__
+ __m128d remove1 = _mm_addsub_pd(t1r, t1i);
++#else
++__m128i mask_fliplo = _mm_set_epi32(0, 0, 0x80000000, 0);
++__m128d t1i_fliplo = _mm_xor_pd(t1i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove1 = _mm_add_pd(t1r, t1i_fliplo);
++#endif
+
+ __m128d t2r = _mm_mul_pd(v1r, ab32);
+ __m128d t2i = _mm_mul_pd(v1i, ba32);
++
++#ifdef __SSE3__
+ __m128d remove2 = _mm_addsub_pd(t2r, t2i);
++#else
++__m128d t2i_fliplo = _mm_xor_pd(t2i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove2 = _mm_add_pd(t2r, t2i_fliplo);
++#endif
+
+ __m128d t3r = _mm_mul_pd(v1r, ab33);
+ __m128d t3i = _mm_mul_pd(v1i, ba33);
++
++#ifdef __SSE3__
+ __m128d remove3 = _mm_addsub_pd(t3r, t3i);
++#else
++__m128d t3i_fliplo = _mm_xor_pd(t3i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove3 = _mm_add_pd(t3r, t3i_fliplo);
++#endif
+
+ __m128d t4r = _mm_mul_pd(v2r, ab31);
+ __m128d t4i = _mm_mul_pd(v2i, ba31);
++
++#ifdef __SSE3__
+ __m128d remove4 = _mm_addsub_pd(t4r, t4i);
++#else
++__m128d t4i_fliplo = _mm_xor_pd(t4i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove4 = _mm_add_pd(t4r, t4i_fliplo);
++#endif
+
+ __m128d t5r = _mm_mul_pd(v2r, ab32);
+ __m128d t5i = _mm_mul_pd(v2i, ba32);
++
++#ifdef __SSE3__
+ __m128d remove5 = _mm_addsub_pd(t5r, t5i);
++#else
++__m128d t5i_fliplo = _mm_xor_pd(t5i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove5 = _mm_add_pd(t5r, t5i_fliplo);
++#endif
+
+ __m128d t6r = _mm_mul_pd(v2r, ab33);
+ __m128d t6i = _mm_mul_pd(v2i, ba33);
++
++#ifdef __SSE3__
+ __m128d remove6 = _mm_addsub_pd(t6r, t6i);
++#else
++__m128d t6i_fliplo = _mm_xor_pd(t6i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove6 = _mm_add_pd(t6r, t6i_fliplo);
++#endif
+
+ FLOPCOUNT_INCREMENT(6 * (4 + 2));
+
+@@ -524,11 +561,28 @@
+ __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
+ FLOPCOUNT_INCREMENT(8);
+
++#ifdef __SSE3__
+ __m128d c0c1 = _mm_hadd_pd(a0b0_sq, a1b1_sq);
+ __m128d c2c3 = _mm_hadd_pd(a2b2_sq, a3b3_sq);
++#else
++ __m128d c0c1_lo = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 0);
++ __m128d c0c1_hi = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 3);
++ __m128d c0c1 = _mm_add_pd(c0c1_lo, c0c1_hi);
++ __m128d c2c3_lo = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 0);
++ __m128d c2c3_hi = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 3);
++ __m128d c2c3 = _mm_add_pd(c2c3_lo, c2c3_hi);
++#endif
++
+ FLOPCOUNT_INCREMENT(4);
+
++#ifdef __SSE3__
+ __m128d zbc = _mm_hadd_pd(c0c1, c2c3);
++#else
++ __m128d zbc_lo = _mm_shuffle_pd(c0c1, c2c3, 0);
++ __m128d zbc_hi = _mm_shuffle_pd(c0c1, c2c3, 3);
++ __m128d zbc = _mm_add_pd(zbc_lo, zbc_hi);
++#endif
++
+ FLOPCOUNT_INCREMENT(1);
+
+ _mm_store_pd(zero_buck_check, zbc);
+@@ -681,13 +735,35 @@
+ __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
+ FLOPCOUNT_INCREMENT(8);
+
++#ifdef __SSE3__
+ __m128d c0c1 = _mm_hadd_pd(a0b0_sq, a1b1_sq);
++#else
++ __m128d c0c1_lo = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 0);
++ __m128d c0c1_hi = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 3);
++ __m128d c0c1 = _mm_add_pd(c0c1_lo, c0c1_hi);
++#endif
++
+ __m128d c0c1_normed = _mm_mul_pd(c0c1, norm2vec);
++
++#ifdef __SSE3__
+ __m128d c2c3 = _mm_hadd_pd(a2b2_sq, a3b3_sq);
++#else
++ __m128d c2c3_lo = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 0);
++ __m128d c2c3_hi = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 3);
++ __m128d c2c3 = _mm_add_pd(c2c3_lo, c2c3_hi);
++#endif
++
+ __m128d c2c3_normed = _mm_mul_pd(c2c3, norm2vec);
+ FLOPCOUNT_INCREMENT(8);
+
++#ifdef __SSE3__
+ __m128d zbc = _mm_hadd_pd(c0c1_normed, c2c3_normed);
++#else
++ __m128d zbc_lo = _mm_shuffle_pd(c0c1_normed, c2c3_normed, 0);
++ __m128d zbc_hi = _mm_shuffle_pd(c0c1_normed, c2c3_normed, 3);
++ __m128d zbc = _mm_add_pd(zbc_lo, zbc_hi);
++#endif
++
+ FLOPCOUNT_INCREMENT(1);
+
+ _mm_store_pd(zero_buck_check, zbc);
diff --git a/math/sfft/pkg-descr b/math/sfft/pkg-descr
new file mode 100644
index 000000000000..7eaae7086b76
--- /dev/null
+++ b/math/sfft/pkg-descr
@@ -0,0 +1,8 @@
+sfft is a library to compute discrete Fourier transforms of signals with
+a sparse frequency domain, using an algorithm that is more efficient than
+other known FFT algorithms. It was developed by Haitham Hassanieh, Piotr
+Indyk, Dina Katabi, and Eric Price at the Computer Science and Artifical
+Intelligence Lab at MIT. Performance optimizations were developed by J.
+Schumacher at the Computer Science Department of ETH Zurich in 2013.
+
+WWW: http://spiral.net/software/sfft.html