diff options
author | jwb <jwb@FreeBSD.org> | 2018-04-17 11:11:39 +0800 |
---|---|---|
committer | jwb <jwb@FreeBSD.org> | 2018-04-17 11:11:39 +0800 |
commit | 8a0dcca197ac3cd810d6be3c89889eb9a5b7e868 (patch) | |
tree | ff9bf0299d3b0bbbd98698a3caa84889111a3ea2 | |
parent | ffa227dbe686fd57d7819bf267163c23a8d57695 (diff) | |
download | freebsd-ports-gnome-8a0dcca197ac3cd810d6be3c89889eb9a5b7e868.tar.gz freebsd-ports-gnome-8a0dcca197ac3cd810d6be3c89889eb9a5b7e868.tar.zst freebsd-ports-gnome-8a0dcca197ac3cd810d6be3c89889eb9a5b7e868.zip |
biology/vcflib: C++ library and CLI tools for parsing and manipulating VCF files
Approved by: jrm (mentor)
Differential Revision: https://reviews.freebsd.org/D15109
-rw-r--r-- | biology/Makefile | 1 | ||||
-rw-r--r-- | biology/vcflib/Makefile | 47 | ||||
-rw-r--r-- | biology/vcflib/distinfo | 3 | ||||
-rw-r--r-- | biology/vcflib/files/Makefile.external-libs | 207 | ||||
-rw-r--r-- | biology/vcflib/files/Makefile.submod | 117 | ||||
-rw-r--r-- | biology/vcflib/files/patch-src_cdflib.cpp | 11 | ||||
-rw-r--r-- | biology/vcflib/pkg-descr | 21 | ||||
-rw-r--r-- | biology/vcflib/pkg-plist | 104 |
8 files changed, 511 insertions, 0 deletions
diff --git a/biology/Makefile b/biology/Makefile index 88e9aa1a1e54..c4d3565e300f 100644 --- a/biology/Makefile +++ b/biology/Makefile @@ -124,6 +124,7 @@ SUBDIR += trimmomatic SUBDIR += ugene SUBDIR += vcftools + SUBDIR += vcflib SUBDIR += velvet SUBDIR += wise SUBDIR += xmolwt diff --git a/biology/vcflib/Makefile b/biology/vcflib/Makefile new file mode 100644 index 000000000000..ffec9fe35272 --- /dev/null +++ b/biology/vcflib/Makefile @@ -0,0 +1,47 @@ +# $FreeBSD$ + +PORTNAME= vcflib +DISTVERSIONPREFIX= v +DISTVERSION= 1.0.0-rc1-130 +DISTVERSIONSUFFIX= -g7e3d806 +CATEGORIES= biology + +MAINTAINER= jwb@FreeBSD.org +COMMENT= C++ library and CLI tools for parsing and manipulating VCF files + +LICENSE= MIT + +LIB_DEPENDS= libhts.so:biology/htslib \ + libtabix.so:biology/tabixpp \ + libsw.so:biology/smithwaterman +COMMON_DEPENDS= fastahack:biology/fastahack \ + filevercmp:sysutils/filevercmp \ + fsom:science/fsom \ + multichoose>=1.0.3:math/multichoose \ + interval_tree_test:math/intervaltree +BUILD_DEPENDS= ${COMMON_DEPENDS} +RUN_DEPENDS= ${COMMON_DEPENDS} + +USES= gmake shebangfix +USE_LDCONFIG= yes +USE_GITHUB= yes +GH_ACCOUNT= ekg + +CXXFLAGS+= -I${LOCALBASE}/include/smithwaterman \ + -I${LOCALBASE}/include/multichoose \ + -I${LOCALBASE}/include/filevercmp \ + -I${LOCALBASE}/include/fastahack \ + -I${LOCALBASE}/include/intervaltree \ + -DVERSION='\"${PORTVERSION}\"' + +# Clang and GCC disable sse2 by default on i386, but it's required for vcflib +CFLAGS_i386= -msse2 + +MAKEFILE= ${FILESDIR}/Makefile.external-libs +INSTALL_TARGET= install-strip + +post-install: + ${RLN} ${STAGEDIR}${PREFIX}/lib/libvcflib.so.1 \ + ${STAGEDIR}${PREFIX}/lib/libvcflib.so + +.include <bsd.port.mk> diff --git a/biology/vcflib/distinfo b/biology/vcflib/distinfo new file mode 100644 index 000000000000..985916a8d85b --- /dev/null +++ b/biology/vcflib/distinfo @@ -0,0 +1,3 @@ +TIMESTAMP = 1523243322 +SHA256 (ekg-vcflib-v1.0.0-rc1-130-g7e3d806_GH0.tar.gz) = 8ec24354dfc7a87c011c865ab75f3eb72646768ab9cb6eff4263e69763338478 +SIZE (ekg-vcflib-v1.0.0-rc1-130-g7e3d806_GH0.tar.gz) = 20138644 diff --git a/biology/vcflib/files/Makefile.external-libs b/biology/vcflib/files/Makefile.external-libs new file mode 100644 index 000000000000..ae030a517b44 --- /dev/null +++ b/biology/vcflib/files/Makefile.external-libs @@ -0,0 +1,207 @@ +#OBJ_DIR = ./ +HEADERS = src/Variant.h \ + src/split.h \ + src/pdflib.hpp \ + src/var.hpp \ + src/cdflib.hpp \ + src/rnglib.hpp \ + src/join.h +SOURCES = src/Variant.cpp \ + src/rnglib.cpp \ + src/var.cpp \ + src/pdflib.cpp \ + src/cdflib.cpp \ + src/split.cpp +OBJECTS= $(SOURCES:.cpp=.o) + +VCF_LIB_LOCAL:=$(shell pwd) +BIN_DIR:=bin +LIB_DIR:=lib +SRC_DIR=src +INC_DIR:=include +OBJ_DIR:=obj + +LIB = libvcflib.a +SOVERSION = 1 +SLIB = libvcflib.so.$(SOVERSION) + +# TODO +#vcfstats.cpp + +BIN_SOURCES = src/vcfecho.cpp \ + src/vcfnormalizesvs.cpp \ + src/dumpContigsFromHeader.cpp \ + src/bFst.cpp \ + src/pVst.cpp \ + src/hapLrt.cpp \ + src/popStats.cpp \ + src/wcFst.cpp \ + src/iHS.cpp \ + src/segmentFst.cpp \ + src/segmentIhs.cpp \ + src/genotypeSummary.cpp \ + src/sequenceDiversity.cpp \ + src/pFst.cpp \ + src/smoother.cpp \ + src/LD.cpp \ + src/plotHaps.cpp \ + src/abba-baba.cpp \ + src/permuteGPAT++.cpp \ + src/permuteSmooth.cpp \ + src/normalize-iHS.cpp \ + src/meltEHH.cpp \ + src/vcfaltcount.cpp \ + src/vcfhetcount.cpp \ + src/vcfhethomratio.cpp \ + src/vcffilter.cpp \ + src/vcf2tsv.cpp \ + src/vcfgenotypes.cpp \ + src/vcfannotategenotypes.cpp \ + src/vcfcommonsamples.cpp \ + src/vcfremovesamples.cpp \ + src/vcfkeepsamples.cpp \ + src/vcfsamplenames.cpp \ + src/vcfgenotypecompare.cpp \ + src/vcffixup.cpp \ + src/vcfclassify.cpp \ + src/vcfsamplediff.cpp \ + src/vcfremoveaberrantgenotypes.cpp \ + src/vcfrandom.cpp \ + src/vcfparsealts.cpp \ + src/vcfstats.cpp \ + src/vcfflatten.cpp \ + src/vcfprimers.cpp \ + src/vcfnumalt.cpp \ + src/vcfcleancomplex.cpp \ + src/vcfintersect.cpp \ + src/vcfannotate.cpp \ + src/vcfallelicprimitives.cpp \ + src/vcfoverlay.cpp \ + src/vcfaddinfo.cpp \ + src/vcfkeepinfo.cpp \ + src/vcfkeepgeno.cpp \ + src/vcfafpath.cpp \ + src/vcfcountalleles.cpp \ + src/vcflength.cpp \ + src/vcfdistance.cpp \ + src/vcfrandomsample.cpp \ + src/vcfentropy.cpp \ + src/vcfglxgt.cpp \ + src/vcfroc.cpp \ + src/vcfcheck.cpp \ + src/vcfstreamsort.cpp \ + src/vcfuniq.cpp \ + src/vcfuniqalleles.cpp \ + src/vcfremap.cpp \ + src/vcf2fasta.cpp \ + src/vcfsitesummarize.cpp \ + src/vcfbreakmulti.cpp \ + src/vcfcreatemulti.cpp \ + src/vcfevenregions.cpp \ + src/vcfcat.cpp \ + src/vcfgenosummarize.cpp \ + src/vcfgenosamplenames.cpp \ + src/vcfgeno2haplo.cpp \ + src/vcfleftalign.cpp \ + src/vcfcombine.cpp \ + src/vcfgeno2alleles.cpp \ + src/vcfindex.cpp \ + src/vcf2dag.cpp \ + src/vcfsample2info.cpp \ + src/vcfqual2info.cpp \ + src/vcfinfo2qual.cpp \ + src/vcfglbound.cpp \ + src/vcfunphase.cpp \ + src/vcfnull2ref.cpp \ + src/vcfinfosummarize.cpp + +# when we can figure out how to build on mac +# src/vcfsom.cpp + +#BINS = $(BIN_SOURCES:.cpp=) +BINS = $(addprefix bin/,$(notdir $(BIN_SOURCES:.cpp=))) +SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=)) +# Use ?= to allow overriding from the env or command-line. + +MAKE ?= make +LOCALBASE ?= /usr/local +LIB_PATH ?= ${LOCALBASE}/lib + +CC ?= cc +CXX ?= c++ +CXXFLAGS ?= -O3 +CFLAGS += -D_FILE_OFFSET_BITS=64 -fPIC +CXXFLAGS += $(CFLAGS) --std=c++11 +#CXXFLAGS += -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual + +DESTDIR ?= stage +PREFIX ?= /usr/local +STRIP ?= strip +INSTALL ?= install -c +MKDIR ?= mkdir -p +AR ?= ar + +SSW = src/ssw.o src/ssw_cpp.o + +INCLUDES = -I${LOCALBASE}/include \ + -I${LOCALBASE}/include/smithwaterman \ + -I${LOCALBASE}/include/multichoose \ + -I${LOCALBASE}/include/fastahack \ + -I${LOCALBASE}/include/intervaltree +LDFLAGS += -L. -lvcflib \ + -L$(LIB_PATH) -lsw -ltabix -lhts -lfastahack -lfilevercmp \ + -lpthread -lz -lm + +all: $(OBJECTS) $(BINS) $(LIB) $(SLIB) + +ssw.o: src/ssw.h +ssw_cpp.o: src/ssw_cpp.h + +openmp: + $(MAKE) CXXFLAGS="$(CXXFLAGS) -fopenmp -D HAS_OPENMP" + +profiling: + $(MAKE) CXXFLAGS="$(CXXFLAGS) -g" all + +gprof: + $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all + +$(OBJECTS): $(SOURCES) $(HEADERS) + $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(CXXFLAGS) + +$(SHORTBINS): + $(MAKE) $(BIN_DIR)/$@ + +$(BINS): $(BIN_SOURCES) $(LIB) $(OBJECTS) $(SSW) pre + $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(CXXFLAGS) $(LDFLAGS) + +$(LIB): $(OBJECTS) $(SSW) + ar rs $(LIB) $(OBJECTS) $(SSW) + +$(SLIB): $(OBJECTS) $(SSW) + $(CXX) -shared -Wl,-soname,$(SLIB) -o $(SLIB) $(OBJECTS) $(SSW) + +install: all + $(MKDIR) $(DESTDIR)$(PREFIX)/bin + $(MKDIR) $(DESTDIR)$(PREFIX)/include/vcflib + $(MKDIR) $(DESTDIR)$(PREFIX)/lib + $(INSTALL) bin/* $(DESTDIR)$(PREFIX)/bin + $(INSTALL) src/*.h src/*.hpp $(DESTDIR)$(PREFIX)/include/vcflib + $(INSTALL) $(LIB) $(SLIB) $(DESTDIR)$(PREFIX)/lib + +install-strip: install + $(STRIP) $(DESTDIR)$(PREFIX)/bin/* $(DESTDIR)$(PREFIX)/lib/$(SLIB) + +test: $(BINS) + @prove -Itests/lib -w tests/*.t + +clean: + rm -f $(BINS) $(OBJECTS) + rm -f ssw_cpp.o ssw.o + rm -f $(LIB) + rm -rf $(BIN_DIR) + +pre: + mkdir -p $(BIN_DIR) + +.PHONY: clean all test pre diff --git a/biology/vcflib/files/Makefile.submod b/biology/vcflib/files/Makefile.submod new file mode 100644 index 000000000000..aafad2809a9e --- /dev/null +++ b/biology/vcflib/files/Makefile.submod @@ -0,0 +1,117 @@ +#OBJ_DIR = ./ +HEADERS = src/Variant.h \ + src/split.h \ + src/join.h +SOURCES = src/Variant.cpp \ + src/split.cpp +OBJECTS= $(SOURCES:.cpp=.o) + +VCF_LIB_LOCAL:=$(shell pwd) +BIN_DIR:=bin +LIB_DIR:=lib +SRC_DIR=src +INC_DIR:=include +OBJ_DIR:=obj + +include Makefile.common + +TABIX = tabixpp/tabix.o +FASTAHACK = fastahack/Fasta.o +SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o +REPEATS = smithwaterman/Repeats.o +INDELALLELE = smithwaterman/IndelAllele.o +DISORDER = smithwaterman/disorder.o +LEFTALIGN = smithwaterman/LeftAlign.o +FSOM = fsom/fsom.o +FILEVERCMP = filevercmp/filevercmp.o + +INCLUDES = -Itabixpp/htslib -I$(INC_DIR) -L. -Ltabixpp/htslib +LDFLAGS = -L$(LIB_DIR) -lvcflib -lhts -lpthread -lz -lm + + +all: $(OBJECTS) $(BINS) + +CXX ?= c++ +CXXFLAGS ?= -O3 -D_FILE_OFFSET_BITS=64 +#CXXFLAGS = -O2 +#CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual + +SSW = src/ssw.o src/ssw_cpp.o + +ssw.o: src/ssw.h +ssw_cpp.o:src/ssw_cpp.h + +openmp: + $(MAKE) CXXFLAGS="$(CXXFLAGS) -fopenmp -D HAS_OPENMP" + +profiling: + $(MAKE) CXXFLAGS="$(CXXFLAGS) -g" all + +gprof: + $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all + +$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP) + $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && cp src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ + +multichoose: pre + cd multichoose && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ + +intervaltree: pre + cd intervaltree && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ + +$(TABIX): pre + cd tabixpp && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ + +$(SMITHWATERMAN): pre + cd smithwaterman && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp *.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ + +$(DISORDER): $(SMITHWATERMAN) + +$(REPEATS): $(SMITHWATERMAN) + +$(LEFTALIGN): $(SMITHWATERMAN) + +$(INDELALLELE): $(SMITHWATERMAN) + +$(FASTAHACK): pre + cd fastahack && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp Fasta.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ + +#$(FSOM): +# cd fsom && $(CXX) $(CXXFLAGS) -c fsom.c -lm + +$(FILEVERCMP): pre + cd filevercmp && make && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/ + +$(SHORTBINS): pre + $(MAKE) bin/$@ + +$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre intervaltree + $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) + +libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) $(TABIX) pre + ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) $(TABIX) + cp libvcflib.a $(LIB_DIR) + + +test: $(BINS) + @prove -Itests/lib -w tests/*.t + +pre: + if [ ! -d $(BIN_DIR) ]; then mkdir -p $(BIN_DIR); fi + if [ ! -d $(LIB_DIR) ]; then mkdir -p $(LIB_DIR); fi + if [ ! -d $(INC_DIR) ]; then mkdir -p $(INC_DIR); fi + if [ ! -d $(OBJ_DIR) ]; then mkdir -p $(OBJ_DIR); fi + +clean: + rm -f $(BINS) $(OBJECTS) + rm -f ssw_cpp.o ssw.o + rm -f libvcflib.a + rm -rf $(BIN_DIR) + rm -rf $(LIB_DIR) + rm -rf $(INC_DIR) + rm -rf $(OBJ_DIR) + cd tabixpp && make clean + cd smithwaterman && make clean + cd fastahack && make clean + +.PHONY: clean all test pre diff --git a/biology/vcflib/files/patch-src_cdflib.cpp b/biology/vcflib/files/patch-src_cdflib.cpp new file mode 100644 index 000000000000..84bff432c1ed --- /dev/null +++ b/biology/vcflib/files/patch-src_cdflib.cpp @@ -0,0 +1,11 @@ +--- src/cdflib.cpp.orig 2018-03-09 20:31:19 UTC ++++ src/cdflib.cpp +@@ -10040,7 +10040,7 @@ void negative_binomial_cdf_values ( int + 1, 2, 3, + 0, 1, 2 }; + +- if ( n_data < 0 ) ++ if ( *n_data < 0 ) + { + *n_data = 0; + } diff --git a/biology/vcflib/pkg-descr b/biology/vcflib/pkg-descr new file mode 100644 index 000000000000..977e86a03dae --- /dev/null +++ b/biology/vcflib/pkg-descr @@ -0,0 +1,21 @@ +The Variant Call Format (VCF) is a flat-file, tab-delimited textual format +intended to concisely describe reference-indexed variations between +individuals. VCF provides a common interchange format for the description of +variation in individuals and populations of samples, and has become the defacto +standard reporting format for a wide array of genomic variant detectors. + +vcflib provides methods to manipulate and interpret sequence variation as it +can be described by VCF. It is both: + + an API for parsing and operating on records of genomic variation as it can + be described by the VCF format + + and a collection of command-line utilities for executing complex + manipulations on VCF files. + +The API itself provides a quick and extremely permissive method to read and +write VCF files. Extensions and applications of the library provided in the +included utilities (*.cpp) comprise the vast bulk of the library's utility for +most users. + +WWW: https://github.com/vcflib/vcflib diff --git a/biology/vcflib/pkg-plist b/biology/vcflib/pkg-plist new file mode 100644 index 000000000000..944045d78581 --- /dev/null +++ b/biology/vcflib/pkg-plist @@ -0,0 +1,104 @@ +bin/LD +bin/abba-baba +bin/bFst +bin/dumpContigsFromHeader +bin/genotypeSummary +bin/hapLrt +bin/iHS +bin/meltEHH +bin/normalize-iHS +bin/pFst +bin/pVst +bin/permuteGPAT++ +bin/permuteSmooth +bin/plotHaps +bin/popStats +bin/segmentFst +bin/segmentIhs +bin/sequenceDiversity +bin/smoother +bin/vcf2dag +bin/vcf2fasta +bin/vcf2tsv +bin/vcfaddinfo +bin/vcfafpath +bin/vcfallelicprimitives +bin/vcfaltcount +bin/vcfannotate +bin/vcfannotategenotypes +bin/vcfbreakmulti +bin/vcfcat +bin/vcfcheck +bin/vcfclassify +bin/vcfcleancomplex +bin/vcfcombine +bin/vcfcommonsamples +bin/vcfcountalleles +bin/vcfcreatemulti +bin/vcfdistance +bin/vcfecho +bin/vcfentropy +bin/vcfevenregions +bin/vcffilter +bin/vcffixup +bin/vcfflatten +bin/vcfgeno2alleles +bin/vcfgeno2haplo +bin/vcfgenosamplenames +bin/vcfgenosummarize +bin/vcfgenotypecompare +bin/vcfgenotypes +bin/vcfglbound +bin/vcfglxgt +bin/vcfhetcount +bin/vcfhethomratio +bin/vcfindex +bin/vcfinfo2qual +bin/vcfinfosummarize +bin/vcfintersect +bin/vcfkeepgeno +bin/vcfkeepinfo +bin/vcfkeepsamples +bin/vcfleftalign +bin/vcflength +bin/vcfnormalizesvs +bin/vcfnull2ref +bin/vcfnumalt +bin/vcfoverlay +bin/vcfparsealts +bin/vcfprimers +bin/vcfqual2info +bin/vcfrandom +bin/vcfrandomsample +bin/vcfremap +bin/vcfremoveaberrantgenotypes +bin/vcfremovesamples +bin/vcfroc +bin/vcfsample2info +bin/vcfsamplediff +bin/vcfsamplenames +bin/vcfsitesummarize +bin/vcfstats +bin/vcfstreamsort +bin/vcfuniq +bin/vcfuniqalleles +bin/vcfunphase +bin/wcFst +include/vcflib/BedReader.h +include/vcflib/Variant.h +include/vcflib/cdflib.hpp +include/vcflib/convert.h +include/vcflib/gpatInfo.hpp +include/vcflib/join.h +include/vcflib/mt19937ar.h +include/vcflib/pdflib.hpp +include/vcflib/rnglib.hpp +include/vcflib/split.h +include/vcflib/ssw.hpp +include/vcflib/ssw_cpp.hpp +include/vcflib/var.hpp +include/vcflib/vec128int.h +include/vcflib/veclib_types.h +lib/libvcflib.a +lib/libvcflib.so +lib/libvcflib.so.1 |