pkgsrc-WIP-changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

llama.cpp: start package



Module Name:	pkgsrc-wip
Committed By:	Thomas Klausner <wiz%NetBSD.org@localhost>
Pushed By:	wiz
Date:		Sat Jun 8 12:48:08 2024 +0200
Changeset:	6b28727dca82bc9f63d07120a86ea5a8f96ee335

Modified Files:
	Makefile
Added Files:
	llama.cpp/DESCR
	llama.cpp/Makefile
	llama.cpp/PLIST
	llama.cpp/TODO
	llama.cpp/distinfo

Log Message:
llama.cpp: start package

To see a diff of this commit:
https://wip.pkgsrc.org/cgi-bin/gitweb.cgi?p=pkgsrc-wip.git;a=commitdiff;h=6b28727dca82bc9f63d07120a86ea5a8f96ee335

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

diffstat:
 Makefile           |  1 +
 llama.cpp/DESCR    | 15 ++++++++++++++
 llama.cpp/Makefile | 24 ++++++++++++++++++++++
 llama.cpp/PLIST    | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 llama.cpp/TODO     |  9 ++++++++
 llama.cpp/distinfo |  5 +++++
 6 files changed, 114 insertions(+)

diffs:
diff --git a/Makefile b/Makefile
index 5b6241e2dd..c15d57e3b3 100644
--- a/Makefile
+++ b/Makefile
@@ -2135,6 +2135,7 @@ SUBDIR+=	lissac
 SUBDIR+=	litecoin
 SUBDIR+=	lives
 SUBDIR+=	lizardfs
+SUBDIR+=	llama.cpp
 SUBDIR+=	lld
 SUBDIR+=	lldb
 SUBDIR+=	llilc-git
diff --git a/llama.cpp/DESCR b/llama.cpp/DESCR
new file mode 100644
index 0000000000..f52f2a686f
--- /dev/null
+++ b/llama.cpp/DESCR
@@ -0,0 +1,15 @@
+The main goal of llama.cpp is to enable LLM inference with minimal
+setup and state-of-the-art performance on a wide variety of hardware
+- locally and in the cloud.
+
+* Plain C/C++ implementation without any dependencies
+* Apple silicon is a first-class citizen - optimized via ARM NEON,
+  Accelerate and Metal frameworks
+* AVX, AVX2 and AVX512 support for x86 architectures
+* 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer
+  quantization for faster inference and reduced memory use
+* Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for
+  AMD GPUs via HIP)
+* Vulkan and SYCL backend support
+* CPU+GPU hybrid inference to partially accelerate models larger
+  than the total VRAM capacity
diff --git a/llama.cpp/Makefile b/llama.cpp/Makefile
new file mode 100644
index 0000000000..0fc35cac86
--- /dev/null
+++ b/llama.cpp/Makefile
@@ -0,0 +1,24 @@
+# $NetBSD$
+
+DISTNAME=	llama.cpp-b3091
+PKGNAME=	${DISTNAME:S/-b/-0.0.2./}
+CATEGORIES=	devel
+MASTER_SITES=	${MASTER_SITE_GITHUB:=ggerganov/}
+GITHUB_TAG=	b3091
+
+MAINTAINER=	pkgsrc-users%NetBSD.org@localhost
+HOMEPAGE=	https://github.com/ggerganov/llama.cpp/
+COMMENT=	LLM inference in C/C++
+LICENSE=	mit
+
+#WRKSRC=		${WRKDIR}/llama.cpp-b3091
+USE_TOOLS+=	pkg-config
+USE_LANGUAGES=	c c++
+USE_TOOLS+=	gmake
+
+PKGCONFIG_OVERRIDE+=	cmake/llama.pc.in
+REPLACE_PYTHON+=	*.py */*.py */*/*.py
+
+.include "../../devel/cmake/build.mk"
+.include "../../lang/python/application.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/llama.cpp/PLIST b/llama.cpp/PLIST
new file mode 100644
index 0000000000..ebd9640327
--- /dev/null
+++ b/llama.cpp/PLIST
@@ -0,0 +1,60 @@
+@comment $NetBSD$
+bin/baby-llama
+bin/batched
+bin/batched-bench
+bin/benchmark
+bin/convert-hf-to-gguf.py
+bin/convert-llama2c-to-ggml
+bin/embedding
+bin/eval-callback
+bin/export-lora
+bin/finetune
+bin/gguf
+bin/gguf-split
+bin/gritlm
+bin/imatrix
+bin/infill
+bin/llama-bench
+bin/llava-cli
+bin/lookahead
+bin/lookup
+bin/lookup-create
+bin/lookup-merge
+bin/lookup-stats
+bin/main
+bin/parallel
+bin/passkey
+bin/perplexity
+bin/quantize
+bin/quantize-stats
+bin/retrieval
+bin/save-load-state
+bin/server
+bin/simple
+bin/speculative
+bin/test-autorelease
+bin/test-backend-ops
+bin/test-chat-template
+bin/test-grad0
+bin/test-grammar-integration
+bin/test-grammar-parser
+bin/test-json-schema-to-grammar
+bin/test-llama-grammar
+bin/test-model-load-cancel
+bin/test-quantize-fns
+bin/test-quantize-perf
+bin/test-rope
+bin/test-sampling
+bin/test-tokenizer-0
+bin/test-tokenizer-1-bpe
+bin/test-tokenizer-1-spm
+bin/tokenize
+bin/train-text-from-scratch
+include/ggml-alloc.h
+include/ggml-backend.h
+include/ggml.h
+include/llama.h
+lib/cmake/Llama/LlamaConfig.cmake
+lib/cmake/Llama/LlamaConfigVersion.cmake
+lib/libllama.a
+lib/pkgconfig/llama.pc
diff --git a/llama.cpp/TODO b/llama.cpp/TODO
new file mode 100644
index 0000000000..fb3090de0a
--- /dev/null
+++ b/llama.cpp/TODO
@@ -0,0 +1,9 @@
+Test
+
+Add python dependencies:
+torch~=2.1.1
+numpy~=1.24.4
+sentencepiece~=0.2.0
+transformers>=4.40.1,<5.0.0
+gguf>=0.1.0
+protobuf>=4.21.0,<5.0.0
diff --git a/llama.cpp/distinfo b/llama.cpp/distinfo
new file mode 100644
index 0000000000..f929c3a057
--- /dev/null
+++ b/llama.cpp/distinfo
@@ -0,0 +1,5 @@
+$NetBSD$
+
+BLAKE2s (llama.cpp-b3091.tar.gz) = 34af707155ae44c9417ce85b102a432ed665062d87ec6ad36b039268272031f8
+SHA512 (llama.cpp-b3091.tar.gz) = f2dac9b3663dd272c74f7ff9e50b63ddb489d5566a8a2e71102b1ad0b61454221cd0bd2dcce40eca1050f2236bc4ded76916c34371f0375649c3920126665bda
+Size (llama.cpp-b3091.tar.gz) = 20541244 bytes


Home | Main Index | Thread Index | Old Index