pkgsrc-WIP-changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

rna-star: Spliced Transcripts Alignment to a Reference



Module Name:	pkgsrc-wip
Committed By:	Jason W. Bacon <bacon%NetBSD.org@localhost>
Pushed By:	outpaddling
Date:		Sun Jul 23 11:05:28 2023 -0500
Changeset:	610f4169fe55ea02efeda41573c698b1936aaa8e

Modified Files:
	Makefile
Added Files:
	rna-star/DESCR
	rna-star/Makefile
	rna-star/PLIST
	rna-star/distinfo
	rna-star/patches/patch-Makefile
	rna-star/patches/patch-SpliceGraph.cpp
	rna-star/patches/patch-SuffixArrayFuns.cpp
	rna-star/patches/patch-bamSortByCoordinate.cpp
	rna-star/patches/patch-opal_opal.cpp

Log Message:
rna-star: Spliced Transcripts Alignment to a Reference

STAR (Spliced Transcripts Alignment to a Reference) aims to achieve
accurate alignment of high-throughput RNA-seq data.  STAR is based on
a previously undescribed RNA-seq alignment algorithm that uses
sequential maximum mappable seed search in uncompressed suffix arrays
followed by seed clustering and stitching procedure. STAR outperforms
other aligners by a factor of >50 in mapping speed, aligning to the
human genome 550 million 2 x 76 bp paired-end reads per hour on a modest
12-core server, while at the same time improving alignment sensitivity
and precision

To see a diff of this commit:
https://wip.pkgsrc.org/cgi-bin/gitweb.cgi?p=pkgsrc-wip.git;a=commitdiff;h=610f4169fe55ea02efeda41573c698b1936aaa8e

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

diffstat:
 Makefile                                       |  1 +
 rna-star/DESCR                                 |  9 ++++
 rna-star/Makefile                              | 65 ++++++++++++++++++++++++++
 rna-star/PLIST                                 |  1 +
 rna-star/distinfo                              | 10 ++++
 rna-star/patches/patch-Makefile                | 62 ++++++++++++++++++++++++
 rna-star/patches/patch-SpliceGraph.cpp         | 12 +++++
 rna-star/patches/patch-SuffixArrayFuns.cpp     | 52 +++++++++++++++++++++
 rna-star/patches/patch-bamSortByCoordinate.cpp | 13 ++++++
 rna-star/patches/patch-opal_opal.cpp           | 15 ++++++
 10 files changed, 240 insertions(+)

diffs:
diff --git a/Makefile b/Makefile
index ab471d7bac..b5341a312c 100644
--- a/Makefile
+++ b/Makefile
@@ -5007,6 +5007,7 @@ SUBDIR+=	rk
 SUBDIR+=	rlottie
 SUBDIR+=	rmilter
 SUBDIR+=	rna-seq
+SUBDIR+=	rna-star
 SUBDIR+=	rnc-mode
 SUBDIR+=	rng-tools
 SUBDIR+=	rnp
diff --git a/rna-star/DESCR b/rna-star/DESCR
new file mode 100644
index 0000000000..f2e372497f
--- /dev/null
+++ b/rna-star/DESCR
@@ -0,0 +1,9 @@
+STAR (Spliced Transcripts Alignment to a Reference) aims to achieve
+accurate alignment of high-throughput RNA-seq data.  STAR is based on
+a previously undescribed RNA-seq alignment algorithm that uses
+sequential maximum mappable seed search in uncompressed suffix arrays
+followed by seed clustering and stitching procedure. STAR outperforms
+other aligners by a factor of >50 in mapping speed, aligning to the
+human genome 550 million 2 x 76 bp paired-end reads per hour on a modest
+12-core server, while at the same time improving alignment sensitivity
+and precision
diff --git a/rna-star/Makefile b/rna-star/Makefile
new file mode 100644
index 0000000000..ce268ba9d1
--- /dev/null
+++ b/rna-star/Makefile
@@ -0,0 +1,65 @@
+# $NetBSD$
+#
+###########################################################
+#                  Generated by fbsd2pkg                  #
+#              Sun Jul 23 10:47:07 CDT 2023               #
+###########################################################
+
+###########################################################
+# Unconverted and partially converted FreeBSD port syntax:
+
+#NOT_FOR_ARCHS=		armv6 armv7 i386 mips powerpc powerpcspe
+#NOT_FOR_ARCHS_REASON=	Requires 64-bit processor
+#.if !exists(/usr/include/omp.h)
+#BROKEN=		requires OpenMP support that is missing on this architecture
+#.endif
+# Unknown tool: USE_TOOLS=	compiler:c++11-lang
+# Unknown tool: USE_TOOLS=	localbase:ldflags
+
+DISTNAME=	STAR-2.7.10b
+PKGNAME=	rna-STAR-2.7.10b
+CATEGORIES=	biology
+MASTER_SITES=	${MASTER_SITE_GITHUB:=alexdobin/}
+GITHUB_PROJECT=	STAR
+
+OWNER=		bacon%NetBSD.org@localhost
+HOMEPAGE=	https://github.com/alexdobin/STAR
+COMMENT=	Spliced Transcripts Alignment to a Reference
+LICENSE=	gnu-gpl-v3
+
+# Best guess translation of REINPLACE above.  Replace 1 with a
+# meaningful name.  Assuming pre-configure: Change if necessary.
+SUBST_CLASSES+=		htslib
+SUBST_STAGE.htslib=	pre-configure
+SUBST_SED.htslib+=	-e 's|"htslib/htslib/sam.h"|<htslib/sam.h>|g'
+SUBST_SED.htslib+=	-e 's|"htslib/htslib/kstring.h"|<htslib/kstring.h>|g'
+SUBST_SED.htslib+=	-e 's|"htslib/htslib/bgzf.h"|<htslib/bgzf.h>|g'
+SUBST_FILES.htslib+=	${WRKSRC}/BAMfunctions.cpp
+SUBST_FILES.htslib+=	${WRKSRC}/IncludeDefine.h
+SUBST_FILES.htslib+=	${WRKSRC}/STAR.cpp
+SUBST_FILES.htslib+=	${WRKSRC}/bamRemoveDuplicates.cpp
+SUBST_FILES.htslib+=	${WRKSRC}/bam_cat.c
+SUBST_FILES.htslib+=	${WRKSRC}/bam_cat.h
+SUBST_FILES.htslib+=	${WRKSRC}/signalFromBAM.h
+
+# Test and change if necessary.
+MAKE_JOBS_SAFE=	no
+
+# Just assuming C and C++: Adjust this!
+USE_LANGUAGES=	c c++
+USE_TOOLS+=	gmake
+
+WRKSRC=		${WRKDIR}/${DISTNAME}/source
+MAKE_FLAGS=	HTSLIB=''
+CXXFLAGS+=	-std=c++11
+
+INSTALLATION_DIRS=	bin
+
+do-install:
+	${INSTALL_PROGRAM} ${WRKSRC}/STAR ${DESTDIR}${PREFIX}/bin
+
+# Convert any _DEPENDS above that have a buildlink3.mk
+.include "../../biology/htslib/buildlink3.mk"
+# CentOS doesn't have zlib in the base, so uncomment if needed.
+# .include "../../devel/zlib/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/rna-star/PLIST b/rna-star/PLIST
new file mode 100644
index 0000000000..48d96a5493
--- /dev/null
+++ b/rna-star/PLIST
@@ -0,0 +1 @@
+@comment $NetBSD$
diff --git a/rna-star/distinfo b/rna-star/distinfo
new file mode 100644
index 0000000000..de60f4a45a
--- /dev/null
+++ b/rna-star/distinfo
@@ -0,0 +1,10 @@
+$NetBSD$
+
+BLAKE2s (STAR-2.7.10b.tar.gz) = ae5b1c236c1b5975d60277dbd5634661a4598e6f3f906f3d6b582b403edb58d3
+SHA512 (STAR-2.7.10b.tar.gz) = c4e94fd19f2a8145c38d59b39d91af16b53af63da2d77275445b9aeadfd287e3d2749829352a034e45e4352c64884275fa5b7b2ed28b95bf47cac3183b0d74aa
+Size (STAR-2.7.10b.tar.gz) = 12443703 bytes
+SHA1 (patch-Makefile) = 6bf4600d222a89a95e1cc9e038226a8e419fc0e8
+SHA1 (patch-SpliceGraph.cpp) = 17f5b819985914d075cc15ef67f9b1625a1a6ca3
+SHA1 (patch-SuffixArrayFuns.cpp) = 219abac0885e2e710e634335228baadf16138429
+SHA1 (patch-bamSortByCoordinate.cpp) = 8c3db8dff990c3ff69be02fc86a4b7a8bed17bd6
+SHA1 (patch-opal_opal.cpp) = 69334ba211d2b371fcf96f86fb842d3e67acc9c0
diff --git a/rna-star/patches/patch-Makefile b/rna-star/patches/patch-Makefile
new file mode 100644
index 0000000000..a3a42d9c63
--- /dev/null
+++ b/rna-star/patches/patch-Makefile
@@ -0,0 +1,62 @@
+$NetBSD$
+
+--- Makefile.orig	2022-11-01 14:50:35 UTC
++++ Makefile
+@@ -11,6 +11,10 @@ CXXFLAGSextra ?=
+ # user may define the compiler
+ CXX ?= g++
+ 
++# user may set to '' and sed "htslib/htslib/*.h" to <htslib/*.h>
++# if they don't want the bundled htslib
++HTSLIB ?= htslib
++
+ # pre-defined flags
+ LDFLAGS_shared := -pthread -Lhtslib -Bstatic -lhts -Bdynamic -lz
+ LDFLAGS_static := -static -static-libgcc -pthread -Lhtslib -lhts -lz
+@@ -18,7 +22,8 @@ LDFLAGS_Mac :=-pthread -lz htslib/libhts.a
+ LDFLAGS_Mac_static :=-pthread -lz -static-libgcc htslib/libhts.a
+ LDFLAGS_gdb := $(LDFLAGS_shared)
+ 
+-DATE_FMT = --iso-8601=seconds
++# --iso-8601 is not portable
++DATE_FMT = -Iseconds
+ ifdef SOURCE_DATE_EPOCH
+     BUILD_DATE ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u "$(DATE_FMT)")
+ else
+@@ -41,12 +46,12 @@ GIT_BRANCH_COMMIT_DIFF := -D'GIT_BRANCH_COMMIT_DIFF="$
+ 
+ # Defaults, can be overridden by make arguments or environment
+ CXXFLAGS ?= -pipe -Wall -Wextra
+-CFLAGS ?= -pipe -Wall -Wextra -O3
++CFLAGS ?= -pipe -Wall -Wextra
+ CXXFLAGS_SIMD ?= -mavx2
+ 
+ # Unconditionally set essential flags and optimization options
+ CXXFLAGS_common := -std=c++11 -fopenmp $(COMPTIMEPLACE) $(GIT_BRANCH_COMMIT_DIFF)
+-CXXFLAGS_main := -O3 $(CXXFLAGS_common)
++CXXFLAGS_main := $(CXXFLAGS_common)
+ CXXFLAGS_gdb := -O0 -g3 $(CXXFLAGS_common)
+ 
+ ##########################################################################################################
+@@ -114,7 +119,7 @@ clean:
+ 
+ .PHONY: CLEAN
+ CLEAN: clean
+-	$(MAKE) -C htslib clean
++	$(MAKE) -C $(HTSLIB) clean
+ 
+ 
+ .PHONY: clean_solo
+@@ -131,10 +136,10 @@ ifneq ($(MAKECMDGOALS),CLEAN)
+ ifneq ($(MAKECMDGOALS),clean_solo)
+ ifneq ($(MAKECMDGOALS),STARforMac)
+ ifneq ($(MAKECMDGOALS),STARforMacGDB)
+-Depend.list: $(SOURCES) parametersDefault.xxd htslib
++Depend.list: $(SOURCES) parametersDefault.xxd $(HTSLIB)
+ 	echo $(SOURCES)
+ 	'rm' -f ./Depend.list
+-	$(CXX) $(CXXFLAGS_common) -MM $^ >> Depend.list
++	$(CXX) $(CXXFLAGS) $(CXXFLAGS_common) -MM $^ >> Depend.list
+ include Depend.list
+ endif
+ endif
diff --git a/rna-star/patches/patch-SpliceGraph.cpp b/rna-star/patches/patch-SpliceGraph.cpp
new file mode 100644
index 0000000000..374054d0c5
--- /dev/null
+++ b/rna-star/patches/patch-SpliceGraph.cpp
@@ -0,0 +1,12 @@
+$NetBSD$
+
+--- SpliceGraph.cpp.orig	2023-07-21 15:27:26 UTC
++++ SpliceGraph.cpp
+@@ -1,7 +1,6 @@
+ /*
+  * Created by Fahimeh Mirhaj on 6/10/19.
+ */
+-using namespace std;
+ 
+ #include "SpliceGraph.h"
+ #include "GTF.h"
diff --git a/rna-star/patches/patch-SuffixArrayFuns.cpp b/rna-star/patches/patch-SuffixArrayFuns.cpp
new file mode 100644
index 0000000000..0906b36c1c
--- /dev/null
+++ b/rna-star/patches/patch-SuffixArrayFuns.cpp
@@ -0,0 +1,52 @@
+$NetBSD$
+
+--- SuffixArrayFuns.cpp.orig	2023-07-21 15:26:07 UTC
++++ SuffixArrayFuns.cpp
+@@ -15,7 +15,7 @@ uint compareSeqToGenome(Genome &mapGen, char** s2, uin
+      * dirR forward or reverse direction search on read sequence
+      */
+ 
+-    register int64 ii;
++    int64 ii;
+ 
+     uint SAstr=mapGen.SA[iSA];
+     bool dirG = (SAstr>>mapGen.GstrandBit) == 0; //forward or reverse strand of the genome
+@@ -231,7 +231,7 @@ uint compareSeqToGenome1(Genome &mapGen, char** s2, ui
+ 
+     //TODO no need for complementary sequence
+ 
+-    register int64 ii;
++    int64 ii;
+ 
+     uint SAstr=mapGen.SA[iSA];
+     bool dirG = (SAstr>>mapGen.GstrandBit) == 0; //forward or reverse strand of the genome
+@@ -356,13 +356,13 @@ uint funCalcSAiFromSA(char* gSeq, PackedArray& gSA, Ge
+     bool dirG = (SAstr>>mapGen.GstrandBit) == 0; //forward or reverse strand of the genome
+     SAstr &= mapGen.GstrandMask;
+     iL4=-1;
+-    register uint saind=0;
++    uint saind=0;
+     if (dirG)
+     {
+-        register uint128 g1=*( (uint128*) (gSeq+SAstr) );
++        uint128 g1=*( (uint128*) (gSeq+SAstr) );
+         for (int ii=0; ii<L; ii++)
+         {
+-            register char g2=(char) g1;
++            char g2=(char) g1;
+             if (g2>3)
+             {
+                 iL4=ii;
+@@ -376,10 +376,10 @@ uint funCalcSAiFromSA(char* gSeq, PackedArray& gSA, Ge
+         return saind;
+     } else
+     {
+-        register uint128 g1=*( (uint128*) (gSeq+mapGen.nGenome-SAstr-16) );
++        uint128 g1=*( (uint128*) (gSeq+mapGen.nGenome-SAstr-16) );
+         for (int ii=0; ii<L; ii++)
+         {
+-            register char g2=(char) (g1>>(8*(15-ii)));
++            char g2=(char) (g1>>(8*(15-ii)));
+             if (g2>3)
+             {
+                 iL4=ii;
diff --git a/rna-star/patches/patch-bamSortByCoordinate.cpp b/rna-star/patches/patch-bamSortByCoordinate.cpp
new file mode 100644
index 0000000000..6d18a2d90e
--- /dev/null
+++ b/rna-star/patches/patch-bamSortByCoordinate.cpp
@@ -0,0 +1,13 @@
+$NetBSD$
+
+--- bamSortByCoordinate.cpp.orig	2021-11-26 16:17:37 UTC
++++ bamSortByCoordinate.cpp
+@@ -64,7 +64,7 @@ void bamSortByCoordinate (Parameters &P, ReadAlignChun
+                             boolWait=false;
+                             totalMem+=newMem;
+                         };
+-                        sleep(0.1);
++                        usleep(10000);
+                     };
+                     BAMbinSortByCoordinate(ibin,binN,binS,P.runThreadN,P.outBAMsortTmpDir, P, genome, solo);
+                     #pragma omp critical
diff --git a/rna-star/patches/patch-opal_opal.cpp b/rna-star/patches/patch-opal_opal.cpp
new file mode 100644
index 0000000000..d281b30e31
--- /dev/null
+++ b/rna-star/patches/patch-opal_opal.cpp
@@ -0,0 +1,15 @@
+$NetBSD$
+
+--- opal/opal.cpp.orig	2021-11-26 16:14:46 UTC
++++ opal/opal.cpp
+@@ -5,10 +5,8 @@
+ #include <limits>
+ #include <vector>
+ 
+-extern "C" {
+ #define SIMDE_ENABLE_NATIVE_ALIASES
+ #include <simde_avx2.h> // AVX2 and lower
+-}
+ 
+ #include "opal.h"
+ 


Home | Main Index | Thread Index | Old Index