From 8fc3700b2b22a759f21bcd7cf9699a62b50d6938 Mon Sep 17 00:00:00 2001 From: amdmi3 Date: Sat, 7 Nov 2009 01:23:59 +0000 Subject: Velvet is a de novo genomic assembler specially designed for short read sequencing technologies, such as Solexa or 454, developed by Daniel Zerbino and Ewan Birney at the European Bioinformatics Institute (EMBL-EBI). Citation: Velvet: algorithms for de novo short read assembly using de Bruijn graphs. D.R. Zerbino and E. Birney. Genome Research 18: 821-829 (2008) WWW: http://www.ebi.ac.uk/~zerbino/velvet/ PR: 140147 Submitted by: Motomichi Matsuzaki --- biology/velvet/Makefile | 89 +++++++++++++++++++++ biology/velvet/distinfo | 3 + biology/velvet/files/patch-Makefile | 38 +++++++++ ...atch-contrib-VelvetOptimiser-VelvetOptimiser.pl | 92 ++++++++++++++++++++++ biology/velvet/files/patch-src-run.c | 11 +++ biology/velvet/pkg-descr | 10 +++ 6 files changed, 243 insertions(+) create mode 100644 biology/velvet/Makefile create mode 100644 biology/velvet/distinfo create mode 100644 biology/velvet/files/patch-Makefile create mode 100644 biology/velvet/files/patch-contrib-VelvetOptimiser-VelvetOptimiser.pl create mode 100644 biology/velvet/files/patch-src-run.c create mode 100644 biology/velvet/pkg-descr (limited to 'biology/velvet') diff --git a/biology/velvet/Makefile b/biology/velvet/Makefile new file mode 100644 index 000000000000..5fcdca3ac0a5 --- /dev/null +++ b/biology/velvet/Makefile @@ -0,0 +1,89 @@ +# New ports collection makefile for: velvet +# Date created: 27 Oct 2009 +# Whom: Motomichi Matsuzaki +# +# $FreeBSD$ +# + +PORTNAME= velvet +PORTVERSION= 0.7.55 +CATEGORIES= biology +MASTER_SITES= http://www.ebi.ac.uk/~zerbino/velvet/ +DISTNAME= ${PORTNAME}_${PORTVERSION} +EXTRACT_SUFX= .tgz + +MAINTAINER= mzaki@m.u-tokyo.ac.jp +COMMENT= Sequence assembler for very short reads + +USE_GMAKE= yes +ALL_TARGET= default + +MAXKMERLENGTH?= 31 +MAKE_ENV+= MAXKMERLENGTH=${MAXKMERLENGTH} + +USE_PERL5_RUN= yes + +BINARIES= velvetg velveth +SCRIPTS= contrib/VelvetOptimiser/VelvetOptimiser.pl \ + contrib/afg_handling/asmbly_splitter.pl \ + contrib/afg_handling/snp_view.pl \ + contrib/estimate-exp_cov/velvet-estimate-exp_cov.pl \ + contrib/fasta2agp/fasta2agp.pl \ + contrib/extractContigReads/extractContigReads.pl \ + contrib/observed-insert-length.pl/observed-insert-length.pl \ + contrib/shuffleSequences_fasta/shuffleSequences_fasta.pl +PERLMOD_DIRS= contrib/VelvetOptimiser/VelvetOpt +PERLMOD_FILES= contrib/VelvetOptimiser/VelvetOpt/Assembly.pm \ + contrib/VelvetOptimiser/VelvetOpt/Utils.pm \ + contrib/VelvetOptimiser/VelvetOpt/gwrap.pm \ + contrib/VelvetOptimiser/VelvetOpt/hwrap.pm + +.if !defined(WITHOUT_PYTHON) +USE_PYTHON= yes +SCRIPTS+= contrib/layout/graph2.py +.endif + +.for f in ${BINARIES} ${SCRIPTS} +PLIST_FILES+= bin/${f:T} +.endfor +.for d in ${PERLMOD_DIRS} +PLIST_DIRS+= %%SITE_PERL%%/${d:T} +.endfor +.for f in ${PERLMOD_FILES} +PLIST_FILES+= %%SITE_PERL%%/${f:H:T}/${f:T} +.endfor + +PORTDOCS= ChangeLog Manual.pdf + +post-patch: +.for f in ${SCRIPTS} + ${REINPLACE_CMD} -e s,#!/usr/bin/perl,${PERL}, ${WRKSRC}/${f} +.endfor +.if !defined(WITHOUT_PYTHON) + (IFS=''; ${REINPLACE_CMD} -e `${ECHO} '1i\'; ${ECHO} '#!${PYTHON_CMD}'` ${WRKSRC}/contrib/layout/graph2.py) +.endif + +post-build: + ${REINPLACE_CMD} -e s,%%MAXKMERLENGTH%%,${MAXKMERLENGTH},g ${WRKSRC}/contrib/VelvetOptimiser/VelvetOptimiser.pl + +do-install: +.for f in ${BINARIES} + ${INSTALL_PROGRAM} ${WRKSRC}/${f} ${PREFIX}/bin +.endfor +.for f in ${SCRIPTS} + ${INSTALL_SCRIPT} ${WRKSRC}/${f} ${PREFIX}/bin/${f:T} +.endfor +.for d in ${PERLMOD_DIRS} + ${MKDIR} ${PREFIX}/${SITE_PERL_REL}/${d:T} +.endfor +.for f in ${PERLMOD_FILES} + ${INSTALL_DATA} ${WRKSRC}/${f} ${PREFIX}/${SITE_PERL_REL}/${f:H:T}/${f:T} +.endfor +.if !defined(NOPORTDOCS) + ${MKDIR} ${DOCSDIR} +.for f in ${PORTDOCS} + ${INSTALL_MAN} ${WRKSRC}/${f} ${DOCSDIR} +.endfor +.endif + +.include diff --git a/biology/velvet/distinfo b/biology/velvet/distinfo new file mode 100644 index 000000000000..9e4392f835cd --- /dev/null +++ b/biology/velvet/distinfo @@ -0,0 +1,3 @@ +MD5 (velvet_0.7.55.tgz) = ea99c0b157b4195c313f93bd9f77ecb3 +SHA256 (velvet_0.7.55.tgz) = 315331f418c2de89c6c871cea3759d947320c0adbc763fea290ec78ece69b614 +SIZE (velvet_0.7.55.tgz) = 3475289 diff --git a/biology/velvet/files/patch-Makefile b/biology/velvet/files/patch-Makefile new file mode 100644 index 000000000000..554927aa6fd8 --- /dev/null +++ b/biology/velvet/files/patch-Makefile @@ -0,0 +1,38 @@ +--- Makefile.orig 2009-09-02 22:21:01.000000000 +0900 ++++ Makefile 2009-10-27 22:05:49.000000000 +0900 +@@ -1,18 +1,18 @@ +-CC = gcc +-CFLAGS = -Wall ++CC ?= gcc ++CFLAGS += -Wall + DEBUG = -g +-LDFLAGS = -lm +-OPT = -O3 +-MAXKMERLENGTH=31 +-CATEGORIES=2 ++#LDFLAGS = -lm ++#OPT = -O3 ++MAXKMERLENGTH?=31 ++CATEGORIES?=2 + DEF = -D MAXKMERLENGTH=$(MAXKMERLENGTH) -D CATEGORIES=$(CATEGORIES) + + Z_LIB_DIR=third-party/zlib-1.2.3 + Z_LIB_FILES=$(Z_LIB_DIR)/*.o + + # Mac OS users: uncomment the following lines +-# Z_LIB_FILES= +-# LDFLAGS = -lm -lz ++Z_LIB_FILES= ++LDFLAGS += -lm -lz + # CFLAGS = -Wall -m64 + + # Sparc/Solaris users: uncomment the following line +@@ -32,7 +32,7 @@ + -rm obj/*.o obj/dbg/*.o + + zlib : +- cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o ++# cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o + + velveth : $(OBJ) + $(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velveth obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/crc.o obj/utility.o obj/kmer.o $(Z_LIB_FILES) diff --git a/biology/velvet/files/patch-contrib-VelvetOptimiser-VelvetOptimiser.pl b/biology/velvet/files/patch-contrib-VelvetOptimiser-VelvetOptimiser.pl new file mode 100644 index 000000000000..90bd168cc57e --- /dev/null +++ b/biology/velvet/files/patch-contrib-VelvetOptimiser-VelvetOptimiser.pl @@ -0,0 +1,92 @@ +--- contrib/VelvetOptimiser/VelvetOptimiser.pl.orig 2009-09-14 23:44:19.000000000 +0900 ++++ contrib/VelvetOptimiser/VelvetOptimiser.pl 2009-10-30 22:13:45.000000000 +0900 +@@ -44,9 +44,10 @@ + my @hashvals; + my %assemblies; + my $readfile; ++my $MAXKMERLENGTH = %%MAXKMERLENGTH%%; + my $logfile = "logfile.txt"; + my $hashs = 19; +-my $hashe = 129; ++my $hashe = $MAXKMERLENGTH; + my $ass_num = 1; + my $interested = 1; + +@@ -311,26 +312,26 @@ + my $ass = shift; + print STDERR "\tPlease type in the insert length for the short reads: "; + #my $len = <>; +- my $len = "200"; #do not ask for insert size! ++ my $len = "auto"; #do not ask for insert size! + chomp($len); +- while($len =~ /\D+/){ +- print STDERR "\tThe length needs to be a number, please re-enter: "; +- $len = <>; +- chomp($len); +- } ++ #while($len =~ /\D+/){ ++ # print STDERR "\tThe length needs to be a number, please re-enter: "; ++ # $len = <>; ++ # chomp($len); ++ #} + print STDERR strftime("%b %e %H:%M:%S", localtime), " Running assembly with short insert length $len\n"; + print OUT strftime("%b %e %H:%M:%S", localtime), " Running assembly with short insert length $len\n"; + + #re-write the pstringg with the new velvetg command.. +- my $vg = $ass->{pstringg}; +- if($vg =~ /ins_length /){ +- $vg =~ s/ins_length\s+\d+/ins_length $len/; +- } +- else { +- $vg .= " -ins_length $len"; +- } ++ #my $vg = $ass->{pstringg}; ++ #if($vg =~ /ins_length /){ ++ # $vg =~ s/ins_length\s+\d+/ins_length $len/; ++ #} ++ #else { ++ # $vg .= " -ins_length $len"; ++ #} + +- $ass->{pstringg} = $vg; ++ #$ass->{pstringg} = $vg; + my $worked = VelvetOpt::gwrap::objectVelvetg($ass); + if($worked){ + $ass->getAssemblyDetails(); +@@ -347,8 +348,8 @@ + my $usage = "\nVelvetOptimiser.pl: A script to run the Velvet assembler and optimise its output. Simon Gladman - CSIRO 2008, 2009.\n\n"; + $usage .= "Usage: VelvetOptimiser.pl <-f 'velveth parameters'> [-s ] [-e ] [-a ]\n\n"; + $usage .= "Where:\t<-f 'velveth parameters'> is the parameter line normally passed to velveth in quotes.\n"; +-$usage .= "\t-s The hash value you want velvet to start looking from. Default: 19. MUST BE ODD > 0 & <=31!\n"; +-$usage .= "\t-e The hash value you want velvet to stop looking at. Default: 31. MUST BE ODD AND > START & <= 31!\n"; ++$usage .= "\t-s The hash value you want velvet to start looking from. Default: 19. MUST BE ODD > 0 & <=$MAXKMERLENGTH!\n"; ++$usage .= "\t-e The hash value you want velvet to stop looking at. Default: $MAXKMERLENGTH. MUST BE ODD AND > START & <= $MAXKMERLENGTH!\n"; + $usage .= "\t-a The final optimised assembly will include read tracking and amos file outputs (however, intermediate assemblies won't.)\n"; + $usage .= "\nIf the optimizer requires an insert length for some paired end data, it will ask for it when it gets to the optimization step.\n"; + +@@ -390,9 +391,9 @@ + $hashs = $opts{'s'}; + print STDERR "\n\t\t's-Param' is >$hashs<\n"; + unless($hashs =~ /^\d+$/){ die "\tFatal error! Start hash not a number!\n$usage";} +- if($hashs > 129){ +- print STDERR "\tStart hash value too high. New start hash value is 129.\n"; +- $hashs = 129; ++ if($hashs > $MAXKMERLENGTH){ ++ print STDERR "\tStart hash value too high. New start hash value is $MAXKMERLENGTH.\n"; ++ $hashs = $MAXKMERLENGTH; + } + if(!&isOdd($hashs)){ + $hashs = $hashs - 1; +@@ -404,9 +405,9 @@ + if($opts{'e'}){ + $hashe = $opts{'e'}; + unless($hashe =~ /^\d+$/){ die "\tFatal error! End hash not a number!\n$usage";} +- if($hashe > 129 || $hashe < 1){ +- print STDERR "\tEnd hash value not in workable range. New end hash value is 129.\n"; +- $hashe = 129; ++ if($hashe > $MAXKMERLENGTH || $hashe < 1){ ++ print STDERR "\tEnd hash value not in workable range. New end hash value is $MAXKMERLENGTH.\n"; ++ $hashe = $MAXKMERLENGTH; + } + if($hashe < $hashs){ + print STDERR "\tEnd hash value lower than start hash value. New end hash value = $hashs.\n"; diff --git a/biology/velvet/files/patch-src-run.c b/biology/velvet/files/patch-src-run.c new file mode 100644 index 000000000000..7ecd02708351 --- /dev/null +++ b/biology/velvet/files/patch-src-run.c @@ -0,0 +1,11 @@ +--- src/run.c.orig 2009-09-16 20:11:59.000000000 +0900 ++++ src/run.c 2009-10-27 22:27:27.000000000 +0900 +@@ -152,7 +152,7 @@ + double_strand); + + destroySplayTable(splayTable); +- closedir(dir); ++ if (dir) closedir(dir); + free(filename); + free(buf); + diff --git a/biology/velvet/pkg-descr b/biology/velvet/pkg-descr new file mode 100644 index 000000000000..da5c56fa485a --- /dev/null +++ b/biology/velvet/pkg-descr @@ -0,0 +1,10 @@ +Velvet is a de novo genomic assembler specially designed for short read +sequencing technologies, such as Solexa or 454, developed by Daniel Zerbino +and Ewan Birney at the European Bioinformatics Institute (EMBL-EBI). + +Citation: + +Velvet: algorithms for de novo short read assembly using de Bruijn graphs. +D.R. Zerbino and E. Birney. Genome Research 18: 821-829 (2008) + +WWW: http://www.ebi.ac.uk/~zerbino/velvet/ -- cgit