pkgsrc-WIP-changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
rna-star: Spliced Transcripts Alignment to a Reference
Module Name: pkgsrc-wip
Committed By: Jason W. Bacon <bacon%NetBSD.org@localhost>
Pushed By: outpaddling
Date: Sun Jul 23 11:05:28 2023 -0500
Changeset: 610f4169fe55ea02efeda41573c698b1936aaa8e
Modified Files:
Makefile
Added Files:
rna-star/DESCR
rna-star/Makefile
rna-star/PLIST
rna-star/distinfo
rna-star/patches/patch-Makefile
rna-star/patches/patch-SpliceGraph.cpp
rna-star/patches/patch-SuffixArrayFuns.cpp
rna-star/patches/patch-bamSortByCoordinate.cpp
rna-star/patches/patch-opal_opal.cpp
Log Message:
rna-star: Spliced Transcripts Alignment to a Reference
STAR (Spliced Transcripts Alignment to a Reference) aims to achieve
accurate alignment of high-throughput RNA-seq data. STAR is based on
a previously undescribed RNA-seq alignment algorithm that uses
sequential maximum mappable seed search in uncompressed suffix arrays
followed by seed clustering and stitching procedure. STAR outperforms
other aligners by a factor of >50 in mapping speed, aligning to the
human genome 550 million 2 x 76 bp paired-end reads per hour on a modest
12-core server, while at the same time improving alignment sensitivity
and precision
To see a diff of this commit:
https://wip.pkgsrc.org/cgi-bin/gitweb.cgi?p=pkgsrc-wip.git;a=commitdiff;h=610f4169fe55ea02efeda41573c698b1936aaa8e
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
diffstat:
Makefile | 1 +
rna-star/DESCR | 9 ++++
rna-star/Makefile | 65 ++++++++++++++++++++++++++
rna-star/PLIST | 1 +
rna-star/distinfo | 10 ++++
rna-star/patches/patch-Makefile | 62 ++++++++++++++++++++++++
rna-star/patches/patch-SpliceGraph.cpp | 12 +++++
rna-star/patches/patch-SuffixArrayFuns.cpp | 52 +++++++++++++++++++++
rna-star/patches/patch-bamSortByCoordinate.cpp | 13 ++++++
rna-star/patches/patch-opal_opal.cpp | 15 ++++++
10 files changed, 240 insertions(+)
diffs:
diff --git a/Makefile b/Makefile
index ab471d7bac..b5341a312c 100644
--- a/Makefile
+++ b/Makefile
@@ -5007,6 +5007,7 @@ SUBDIR+= rk
SUBDIR+= rlottie
SUBDIR+= rmilter
SUBDIR+= rna-seq
+SUBDIR+= rna-star
SUBDIR+= rnc-mode
SUBDIR+= rng-tools
SUBDIR+= rnp
diff --git a/rna-star/DESCR b/rna-star/DESCR
new file mode 100644
index 0000000000..f2e372497f
--- /dev/null
+++ b/rna-star/DESCR
@@ -0,0 +1,9 @@
+STAR (Spliced Transcripts Alignment to a Reference) aims to achieve
+accurate alignment of high-throughput RNA-seq data. STAR is based on
+a previously undescribed RNA-seq alignment algorithm that uses
+sequential maximum mappable seed search in uncompressed suffix arrays
+followed by seed clustering and stitching procedure. STAR outperforms
+other aligners by a factor of >50 in mapping speed, aligning to the
+human genome 550 million 2 x 76 bp paired-end reads per hour on a modest
+12-core server, while at the same time improving alignment sensitivity
+and precision
diff --git a/rna-star/Makefile b/rna-star/Makefile
new file mode 100644
index 0000000000..ce268ba9d1
--- /dev/null
+++ b/rna-star/Makefile
@@ -0,0 +1,65 @@
+# $NetBSD$
+#
+###########################################################
+# Generated by fbsd2pkg #
+# Sun Jul 23 10:47:07 CDT 2023 #
+###########################################################
+
+###########################################################
+# Unconverted and partially converted FreeBSD port syntax:
+
+#NOT_FOR_ARCHS= armv6 armv7 i386 mips powerpc powerpcspe
+#NOT_FOR_ARCHS_REASON= Requires 64-bit processor
+#.if !exists(/usr/include/omp.h)
+#BROKEN= requires OpenMP support that is missing on this architecture
+#.endif
+# Unknown tool: USE_TOOLS= compiler:c++11-lang
+# Unknown tool: USE_TOOLS= localbase:ldflags
+
+DISTNAME= STAR-2.7.10b
+PKGNAME= rna-STAR-2.7.10b
+CATEGORIES= biology
+MASTER_SITES= ${MASTER_SITE_GITHUB:=alexdobin/}
+GITHUB_PROJECT= STAR
+
+OWNER= bacon%NetBSD.org@localhost
+HOMEPAGE= https://github.com/alexdobin/STAR
+COMMENT= Spliced Transcripts Alignment to a Reference
+LICENSE= gnu-gpl-v3
+
+# Best guess translation of REINPLACE above. Replace 1 with a
+# meaningful name. Assuming pre-configure: Change if necessary.
+SUBST_CLASSES+= htslib
+SUBST_STAGE.htslib= pre-configure
+SUBST_SED.htslib+= -e 's|"htslib/htslib/sam.h"|<htslib/sam.h>|g'
+SUBST_SED.htslib+= -e 's|"htslib/htslib/kstring.h"|<htslib/kstring.h>|g'
+SUBST_SED.htslib+= -e 's|"htslib/htslib/bgzf.h"|<htslib/bgzf.h>|g'
+SUBST_FILES.htslib+= ${WRKSRC}/BAMfunctions.cpp
+SUBST_FILES.htslib+= ${WRKSRC}/IncludeDefine.h
+SUBST_FILES.htslib+= ${WRKSRC}/STAR.cpp
+SUBST_FILES.htslib+= ${WRKSRC}/bamRemoveDuplicates.cpp
+SUBST_FILES.htslib+= ${WRKSRC}/bam_cat.c
+SUBST_FILES.htslib+= ${WRKSRC}/bam_cat.h
+SUBST_FILES.htslib+= ${WRKSRC}/signalFromBAM.h
+
+# Test and change if necessary.
+MAKE_JOBS_SAFE= no
+
+# Just assuming C and C++: Adjust this!
+USE_LANGUAGES= c c++
+USE_TOOLS+= gmake
+
+WRKSRC= ${WRKDIR}/${DISTNAME}/source
+MAKE_FLAGS= HTSLIB=''
+CXXFLAGS+= -std=c++11
+
+INSTALLATION_DIRS= bin
+
+do-install:
+ ${INSTALL_PROGRAM} ${WRKSRC}/STAR ${DESTDIR}${PREFIX}/bin
+
+# Convert any _DEPENDS above that have a buildlink3.mk
+.include "../../biology/htslib/buildlink3.mk"
+# CentOS doesn't have zlib in the base, so uncomment if needed.
+# .include "../../devel/zlib/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/rna-star/PLIST b/rna-star/PLIST
new file mode 100644
index 0000000000..48d96a5493
--- /dev/null
+++ b/rna-star/PLIST
@@ -0,0 +1 @@
+@comment $NetBSD$
diff --git a/rna-star/distinfo b/rna-star/distinfo
new file mode 100644
index 0000000000..de60f4a45a
--- /dev/null
+++ b/rna-star/distinfo
@@ -0,0 +1,10 @@
+$NetBSD$
+
+BLAKE2s (STAR-2.7.10b.tar.gz) = ae5b1c236c1b5975d60277dbd5634661a4598e6f3f906f3d6b582b403edb58d3
+SHA512 (STAR-2.7.10b.tar.gz) = c4e94fd19f2a8145c38d59b39d91af16b53af63da2d77275445b9aeadfd287e3d2749829352a034e45e4352c64884275fa5b7b2ed28b95bf47cac3183b0d74aa
+Size (STAR-2.7.10b.tar.gz) = 12443703 bytes
+SHA1 (patch-Makefile) = 6bf4600d222a89a95e1cc9e038226a8e419fc0e8
+SHA1 (patch-SpliceGraph.cpp) = 17f5b819985914d075cc15ef67f9b1625a1a6ca3
+SHA1 (patch-SuffixArrayFuns.cpp) = 219abac0885e2e710e634335228baadf16138429
+SHA1 (patch-bamSortByCoordinate.cpp) = 8c3db8dff990c3ff69be02fc86a4b7a8bed17bd6
+SHA1 (patch-opal_opal.cpp) = 69334ba211d2b371fcf96f86fb842d3e67acc9c0
diff --git a/rna-star/patches/patch-Makefile b/rna-star/patches/patch-Makefile
new file mode 100644
index 0000000000..a3a42d9c63
--- /dev/null
+++ b/rna-star/patches/patch-Makefile
@@ -0,0 +1,62 @@
+$NetBSD$
+
+--- Makefile.orig 2022-11-01 14:50:35 UTC
++++ Makefile
+@@ -11,6 +11,10 @@ CXXFLAGSextra ?=
+ # user may define the compiler
+ CXX ?= g++
+
++# user may set to '' and sed "htslib/htslib/*.h" to <htslib/*.h>
++# if they don't want the bundled htslib
++HTSLIB ?= htslib
++
+ # pre-defined flags
+ LDFLAGS_shared := -pthread -Lhtslib -Bstatic -lhts -Bdynamic -lz
+ LDFLAGS_static := -static -static-libgcc -pthread -Lhtslib -lhts -lz
+@@ -18,7 +22,8 @@ LDFLAGS_Mac :=-pthread -lz htslib/libhts.a
+ LDFLAGS_Mac_static :=-pthread -lz -static-libgcc htslib/libhts.a
+ LDFLAGS_gdb := $(LDFLAGS_shared)
+
+-DATE_FMT = --iso-8601=seconds
++# --iso-8601 is not portable
++DATE_FMT = -Iseconds
+ ifdef SOURCE_DATE_EPOCH
+ BUILD_DATE ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u "$(DATE_FMT)")
+ else
+@@ -41,12 +46,12 @@ GIT_BRANCH_COMMIT_DIFF := -D'GIT_BRANCH_COMMIT_DIFF="$
+
+ # Defaults, can be overridden by make arguments or environment
+ CXXFLAGS ?= -pipe -Wall -Wextra
+-CFLAGS ?= -pipe -Wall -Wextra -O3
++CFLAGS ?= -pipe -Wall -Wextra
+ CXXFLAGS_SIMD ?= -mavx2
+
+ # Unconditionally set essential flags and optimization options
+ CXXFLAGS_common := -std=c++11 -fopenmp $(COMPTIMEPLACE) $(GIT_BRANCH_COMMIT_DIFF)
+-CXXFLAGS_main := -O3 $(CXXFLAGS_common)
++CXXFLAGS_main := $(CXXFLAGS_common)
+ CXXFLAGS_gdb := -O0 -g3 $(CXXFLAGS_common)
+
+ ##########################################################################################################
+@@ -114,7 +119,7 @@ clean:
+
+ .PHONY: CLEAN
+ CLEAN: clean
+- $(MAKE) -C htslib clean
++ $(MAKE) -C $(HTSLIB) clean
+
+
+ .PHONY: clean_solo
+@@ -131,10 +136,10 @@ ifneq ($(MAKECMDGOALS),CLEAN)
+ ifneq ($(MAKECMDGOALS),clean_solo)
+ ifneq ($(MAKECMDGOALS),STARforMac)
+ ifneq ($(MAKECMDGOALS),STARforMacGDB)
+-Depend.list: $(SOURCES) parametersDefault.xxd htslib
++Depend.list: $(SOURCES) parametersDefault.xxd $(HTSLIB)
+ echo $(SOURCES)
+ 'rm' -f ./Depend.list
+- $(CXX) $(CXXFLAGS_common) -MM $^ >> Depend.list
++ $(CXX) $(CXXFLAGS) $(CXXFLAGS_common) -MM $^ >> Depend.list
+ include Depend.list
+ endif
+ endif
diff --git a/rna-star/patches/patch-SpliceGraph.cpp b/rna-star/patches/patch-SpliceGraph.cpp
new file mode 100644
index 0000000000..374054d0c5
--- /dev/null
+++ b/rna-star/patches/patch-SpliceGraph.cpp
@@ -0,0 +1,12 @@
+$NetBSD$
+
+--- SpliceGraph.cpp.orig 2023-07-21 15:27:26 UTC
++++ SpliceGraph.cpp
+@@ -1,7 +1,6 @@
+ /*
+ * Created by Fahimeh Mirhaj on 6/10/19.
+ */
+-using namespace std;
+
+ #include "SpliceGraph.h"
+ #include "GTF.h"
diff --git a/rna-star/patches/patch-SuffixArrayFuns.cpp b/rna-star/patches/patch-SuffixArrayFuns.cpp
new file mode 100644
index 0000000000..0906b36c1c
--- /dev/null
+++ b/rna-star/patches/patch-SuffixArrayFuns.cpp
@@ -0,0 +1,52 @@
+$NetBSD$
+
+--- SuffixArrayFuns.cpp.orig 2023-07-21 15:26:07 UTC
++++ SuffixArrayFuns.cpp
+@@ -15,7 +15,7 @@ uint compareSeqToGenome(Genome &mapGen, char** s2, uin
+ * dirR forward or reverse direction search on read sequence
+ */
+
+- register int64 ii;
++ int64 ii;
+
+ uint SAstr=mapGen.SA[iSA];
+ bool dirG = (SAstr>>mapGen.GstrandBit) == 0; //forward or reverse strand of the genome
+@@ -231,7 +231,7 @@ uint compareSeqToGenome1(Genome &mapGen, char** s2, ui
+
+ //TODO no need for complementary sequence
+
+- register int64 ii;
++ int64 ii;
+
+ uint SAstr=mapGen.SA[iSA];
+ bool dirG = (SAstr>>mapGen.GstrandBit) == 0; //forward or reverse strand of the genome
+@@ -356,13 +356,13 @@ uint funCalcSAiFromSA(char* gSeq, PackedArray& gSA, Ge
+ bool dirG = (SAstr>>mapGen.GstrandBit) == 0; //forward or reverse strand of the genome
+ SAstr &= mapGen.GstrandMask;
+ iL4=-1;
+- register uint saind=0;
++ uint saind=0;
+ if (dirG)
+ {
+- register uint128 g1=*( (uint128*) (gSeq+SAstr) );
++ uint128 g1=*( (uint128*) (gSeq+SAstr) );
+ for (int ii=0; ii<L; ii++)
+ {
+- register char g2=(char) g1;
++ char g2=(char) g1;
+ if (g2>3)
+ {
+ iL4=ii;
+@@ -376,10 +376,10 @@ uint funCalcSAiFromSA(char* gSeq, PackedArray& gSA, Ge
+ return saind;
+ } else
+ {
+- register uint128 g1=*( (uint128*) (gSeq+mapGen.nGenome-SAstr-16) );
++ uint128 g1=*( (uint128*) (gSeq+mapGen.nGenome-SAstr-16) );
+ for (int ii=0; ii<L; ii++)
+ {
+- register char g2=(char) (g1>>(8*(15-ii)));
++ char g2=(char) (g1>>(8*(15-ii)));
+ if (g2>3)
+ {
+ iL4=ii;
diff --git a/rna-star/patches/patch-bamSortByCoordinate.cpp b/rna-star/patches/patch-bamSortByCoordinate.cpp
new file mode 100644
index 0000000000..6d18a2d90e
--- /dev/null
+++ b/rna-star/patches/patch-bamSortByCoordinate.cpp
@@ -0,0 +1,13 @@
+$NetBSD$
+
+--- bamSortByCoordinate.cpp.orig 2021-11-26 16:17:37 UTC
++++ bamSortByCoordinate.cpp
+@@ -64,7 +64,7 @@ void bamSortByCoordinate (Parameters &P, ReadAlignChun
+ boolWait=false;
+ totalMem+=newMem;
+ };
+- sleep(0.1);
++ usleep(10000);
+ };
+ BAMbinSortByCoordinate(ibin,binN,binS,P.runThreadN,P.outBAMsortTmpDir, P, genome, solo);
+ #pragma omp critical
diff --git a/rna-star/patches/patch-opal_opal.cpp b/rna-star/patches/patch-opal_opal.cpp
new file mode 100644
index 0000000000..d281b30e31
--- /dev/null
+++ b/rna-star/patches/patch-opal_opal.cpp
@@ -0,0 +1,15 @@
+$NetBSD$
+
+--- opal/opal.cpp.orig 2021-11-26 16:14:46 UTC
++++ opal/opal.cpp
+@@ -5,10 +5,8 @@
+ #include <limits>
+ #include <vector>
+
+-extern "C" {
+ #define SIMDE_ENABLE_NATIVE_ALIASES
+ #include <simde_avx2.h> // AVX2 and lower
+-}
+
+ #include "opal.h"
+
Home |
Main Index |
Thread Index |
Old Index