pkgsrc-WIP-changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

minimap2: Import from biology, add SIMDE support



Module Name:	pkgsrc-wip
Committed By:	Jason Bacon <bacon%NetBSD.org@localhost>
Pushed By:	outpaddling
Date:		Fri Feb 16 09:51:50 2024 -0600
Changeset:	39af9c22af56ef4dadd888db17ae1adeb5856046

Modified Files:
	Makefile
Added Files:
	minimap2/DESCR
	minimap2/Makefile
	minimap2/PLIST
	minimap2/distinfo
	minimap2/patches/patch-Makefile.simde
	minimap2/patches/patch-example.c

Log Message:
minimap2: Import from biology, add SIMDE support

Allows minimap to run on non-x86 architectures

To see a diff of this commit:
https://wip.pkgsrc.org/cgi-bin/gitweb.cgi?p=pkgsrc-wip.git;a=commitdiff;h=39af9c22af56ef4dadd888db17ae1adeb5856046

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

diffstat:
 Makefile                              |  1 +
 minimap2/DESCR                        | 18 +++++++++
 minimap2/Makefile                     | 25 +++++++++++++
 minimap2/PLIST                        |  3 ++
 minimap2/distinfo                     |  7 ++++
 minimap2/patches/patch-Makefile.simde | 70 +++++++++++++++++++++++++++++++++++
 minimap2/patches/patch-example.c      | 15 ++++++++
 7 files changed, 139 insertions(+)

diffs:
diff --git a/Makefile b/Makefile
index bae6ccaa87..8a3ce68405 100644
--- a/Makefile
+++ b/Makefile
@@ -2226,6 +2226,7 @@ SUBDIR+=	mingw-w64-gcc
 SUBDIR+=	mingw-w64-headers
 SUBDIR+=	mingw-w64-libgcc
 SUBDIR+=	mini18n
+SUBDIR+=	minimap2
 SUBDIR+=	minio
 SUBDIR+=	minio-client
 SUBDIR+=	minizip192
diff --git a/minimap2/DESCR b/minimap2/DESCR
new file mode 100644
index 0000000000..97ba6a1796
--- /dev/null
+++ b/minimap2/DESCR
@@ -0,0 +1,18 @@
+Minimap2 is a versatile sequence alignment program that aligns DNA or
+mRNA sequences against a large reference database. Typical use cases
+include: (1) mapping PacBio or Oxford Nanopore genomic reads to the
+human genome; (2) finding overlaps between long reads with error rate
+up to ~15%; (3) splice-aware alignment of PacBio Iso-Seq or Nanopore
+cDNA or Direct RNA reads against a reference genome; (4) aligning
+Illumina single- or paired-end reads; (5) assembly-to-assembly
+alignment; (6) full-genome alignment between two closely related
+species with divergence below ~15%.
+
+For ~10kb noisy reads sequences, minimap2 is tens of times faster than
+mainstream long-read mappers such as BLASR, BWA-MEM, NGMLR and
+GMAP. It is more accurate on simulated long reads and produces
+biologically meaningful alignment ready for downstream analyses. For
+>100bp Illumina short reads, minimap2 is three times as fast as
+BWA-MEM and Bowtie2, and as accurate on simulated data.  Detailed
+evaluations are available from the minimap2 paper
+(https://doi.org/10.1093/bioinformatics/bty191).
diff --git a/minimap2/Makefile b/minimap2/Makefile
new file mode 100644
index 0000000000..591b7a9f02
--- /dev/null
+++ b/minimap2/Makefile
@@ -0,0 +1,25 @@
+# $NetBSD: Makefile,v 1.4 2023/08/14 05:23:51 wiz Exp $
+
+VERSION=	2.25
+DISTNAME=	minimap2-${VERSION}
+CATEGORIES=	biology python
+MASTER_SITES=	${MASTER_SITE_GITHUB:=lh3/}
+GITHUB_TAG=	v${VERSION}
+
+MAINTAINER=	pkgsrc-users%NetBSD.org@localhost
+HOMEPAGE=	https://github.com/lh3/minimap2
+COMMENT=	Sequence alignment program for noisy, long reads
+LICENSE=	mit
+
+USE_TOOLS+=	gmake
+MAKE_FILE=	Makefile.simde
+
+INSTALLATION_DIRS+=	bin ${PKGMANDIR}/man1
+
+do-install:
+	${INSTALL} ${WRKSRC}/minimap2 ${DESTDIR}${PREFIX}/bin
+	${INSTALL_DATA} ${WRKSRC}/minimap2.1 ${DESTDIR}${PREFIX}/${PKGMANDIR}/man1
+
+.include "../../devel/zlib/buildlink3.mk"
+.include "../../devel/simde/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/minimap2/PLIST b/minimap2/PLIST
new file mode 100644
index 0000000000..0a1e24c17f
--- /dev/null
+++ b/minimap2/PLIST
@@ -0,0 +1,3 @@
+@comment $NetBSD$
+bin/minimap2
+man/man1/minimap2.1
diff --git a/minimap2/distinfo b/minimap2/distinfo
new file mode 100644
index 0000000000..4af32f9eba
--- /dev/null
+++ b/minimap2/distinfo
@@ -0,0 +1,7 @@
+$NetBSD$
+
+BLAKE2s (minimap2-2.25.tar.gz) = b15eedecf2c98c51b2c90ebf824d0b1be307c4d5146f4d9489eee12ec8421d10
+SHA512 (minimap2-2.25.tar.gz) = 5d549885874603bb849212ab01fdbd32a2f98300d3b5f81e971e618c00e585e8dd4b0dd3638d7206a22913e7bb316aefa59b34e582de951a37f9f8b7852c55dd
+Size (minimap2-2.25.tar.gz) = 257574 bytes
+SHA1 (patch-Makefile.simde) = 850a66c8843119f99bcd4a2b261cfd4bffa38551
+SHA1 (patch-example.c) = ccf0c4addfece2e11b90f5a558a6de324f255d7c
diff --git a/minimap2/patches/patch-Makefile.simde b/minimap2/patches/patch-Makefile.simde
new file mode 100644
index 0000000000..2c7b2c5e51
--- /dev/null
+++ b/minimap2/patches/patch-Makefile.simde
@@ -0,0 +1,70 @@
+$NetBSD$
+
+# Respect standard env vars
+
+--- Makefile.simde.orig	2021-11-18 22:11:48 UTC
++++ Makefile.simde
+@@ -1,13 +1,26 @@
+-CFLAGS=		-g -Wall -O2 -Wc++-compat #-Wextra
+-CPPFLAGS=	-DHAVE_KALLOC -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES
+-INCLUDES=	-Ilib/simde
+-OBJS=		kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o lchain.o align.o hit.o map.o format.o pe.o seed.o esterr.o splitidx.o \
+-			ksw2_extz2_simde.o ksw2_extd2_simde.o ksw2_exts2_simde.o ksw2_ll_simde.o
++
++# Makefile patches rejected by upstream
++
++CFLAGS?=	-g -Wall -O2
++CFLAGS+=	-Wc++-compat -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES -D__SSE2__
++CPPFLAGS?=	-DHAVE_KALLOC
++OBJS=		kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o \
++		index.o lchain.o align.o hit.o map.o format.o pe.o seed.o \
++		esterr.o splitidx.o ksw2_extz2_simde.o ksw2_extd2_simde.o \
++		ksw2_exts2_simde.o ksw2_ll_simde.o
+ PROG=		minimap2
+ PROG_EXTRA=	sdust minimap2-lite
+ LIBS=		-lm -lz -lpthread
+ 
++PREFIX?=	/usr/local
++MAN1DIR?=	${PREFIX}/man/man1
++DATADIR?=	${PREFIX}/share/minimap2
+ 
++MKDIR?=		mkdir
++CP?=		cp	
++INSTALL?=	install
++STRIP?=		strip
++
+ ifneq ($(arm_neon),) # if arm_neon is defined
+ ifeq ($(aarch64),)   #if aarch64 is not defined
+ 	CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
+@@ -26,7 +39,7 @@ ifneq ($(tsan),)
+ 	LIBS+=-fsanitize=thread
+ endif
+ 
+-.PHONY:all extra clean depend
++.PHONY:all extra install install-strip clean depend
+ .SUFFIXES:.c .o
+ 
+ .c.o:
+@@ -61,6 +74,22 @@ ksw2_exts2_simde.o:ksw2_exts2_sse.c ksw2.h kalloc.h
+ 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) $(INCLUDES) $< -o $@
+ 
+ # other non-file targets
++
++
++install: all
++	${MKDIR} -p ${DESTDIR}${PREFIX}/bin
++	${MKDIR} -p ${DESTDIR}${MAN1DIR}
++	${MKDIR} -p ${DESTDIR}${DATADIR}
++	${INSTALL} -c minimap2 ${DESTDIR}${PREFIX}/bin
++	${INSTALL} -c minimap2-lite ${DESTDIR}${PREFIX}/bin
++	${INSTALL} -c sdust ${DESTDIR}${PREFIX}/bin
++	${INSTALL} -c minimap2.1 ${DESTDIR}${MAN1DIR}
++	${CP} -R test ${DESTDIR}${DATADIR}
++
++install-strip: install
++	${STRIP} ${DESTDIR}${PREFIX}/bin/minimap2
++	${STRIP} ${DESTDIR}${PREFIX}/bin/minimap2-lite
++	${STRIP} ${DESTDIR}${PREFIX}/bin/sdust
+ 
+ clean:
+ 		rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
diff --git a/minimap2/patches/patch-example.c b/minimap2/patches/patch-example.c
new file mode 100644
index 0000000000..1e9ef90bc2
--- /dev/null
+++ b/minimap2/patches/patch-example.c
@@ -0,0 +1,15 @@
+$NetBSD$
+
+# Fix data type mismatch
+
+--- example.c.orig	2021-11-30 15:26:24 UTC
++++ example.c
+@@ -44,7 +44,7 @@ int main(int argc, char *argv[])
+ 			for (j = 0; j < n_reg; ++j) { // traverse hits and print them out
+ 				mm_reg1_t *r = &reg[j];
+ 				assert(r->p); // with MM_F_CIGAR, this should not be NULL
+-				printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]);
++				printf("%s\t%zu\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]);
+ 				printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq);
+ 				for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings!
+ 					printf("%d%c", r->p->cigar[i]>>4, MM_CIGAR_STR[r->p->cigar[i]&0xf]);


Home | Main Index | Thread Index | Old Index