automatic import of llama.cpp

author: CoprDistGit <infra@openeuler.org> 2024-11-06 02:42:05 +0000
committer: CoprDistGit <infra@openeuler.org> 2024-11-06 02:42:05 +0000
commit: 01b344e3b00539c0dacd8c9bb450a05b54a39fa9 (patch)
tree: 295ef97f133c2a7f6788a5959453257912a1aa85
parent: 41ce1b030e5536e9a4b7a0d54ff522833d4004f3 (diff)
5 files changed, 256 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..0a1143e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/b4016.tar.gz
diff --git a/backport-CVE-2024-41130.patch b/backport-CVE-2024-41130.patch
new file mode 100644
index 0000000..d1c6a23
--- /dev/null
+++ b/backport-CVE-2024-41130.patch
@@ -0,0 +1,41 @@
+From 07283b1a90e1320aae4762c7e03c879043910252 Mon Sep 17 00:00:00 2001
+From: Georgi Gerganov <ggerganov@gmail.com>
+Date: Sat, 20 Jul 2024 17:15:42 +0300
+Subject: [PATCH] gguf : handle null name during init (#8587)
+
+---
+ examples/gguf/gguf.cpp | 5 +++++
+ ggml.c        | 2 +-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
+index 57514377..7498f85e 100644
+--- a/examples/gguf/gguf.cpp
++++ b/examples/gguf/gguf.cpp
+@@ -92,6 +92,11 @@ static bool gguf_ex_read_0(const std::string & fname) {
+ 
+     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
+ 
++    if (!ctx) {
++        fprintf(stderr, "%s: failed to load '%s'\n", __func__, fname.c_str());
++        return false;
++    }
++
+     printf("%s: version:      %d\n", __func__, gguf_get_version(ctx));
+     printf("%s: alignment:   %zu\n", __func__, gguf_get_alignment(ctx));
+     printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
+diff --git a/ggml.c b/ggml.c
+index 7a39c685..dbb3a3eb 100644
+--- a/ggml.c
++++ b/ggml.c
+@@ -21015,7 +21015,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
+             gguf_tensor_info_sanitize(info);
+ 
+             // make sure there is no duplicated tensor names
+-            for (uint64_t j = 0; j < i; ++j) {
++            for (uint64_t j = 0; j < i && ok; ++j) {
+                 if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
+                     fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
+                     ok = false;
+-- 
+2.20.1
diff --git a/backport-CVE-2024-42477-CVE-2024-42478-CVE-2024-42479.patch b/backport-CVE-2024-42477-CVE-2024-42478-CVE-2024-42479.patch
new file mode 100644
index 0000000..41c49e2
--- /dev/null
+++ b/backport-CVE-2024-42477-CVE-2024-42478-CVE-2024-42479.patch
@@ -0,0 +1,150 @@
+From 0337f608107f2ce3ba403135e832cf7237db3f1a Mon Sep 17 00:00:00 2001
+From: wangguochun <wangguochun@kylinos.cn>
+Date: Fri, 16 Aug 2024 03:33:01 +0000
+Subject: [PATCH] Merge commit from fork 
+
+cherry pick:https://github.com/ggerganov/llama.cpp/commit/b72942fac998672a79a1ae3c03b340f7e629980b
+
+---
+ examples/rpc/README.md      |  4 ++++
+ examples/rpc/rpc-server.cpp | 13 ++++++++++++-
+ ggml-rpc.cpp                | 36 +++++++++++++++++++++++++++++++++++-
+ ggml.c                      |  3 ++-
+ 4 files changed, 53 insertions(+), 3 deletions(-)
+
+diff --git a/examples/rpc/README.md b/examples/rpc/README.md
+index eeec71a..9941547 100644
+--- a/examples/rpc/README.md
++++ b/examples/rpc/README.md
+@@ -1,5 +1,9 @@
+ ## Overview
+ 
++> [!IMPORTANT]
++> This example and the RPC backend are currently in a proof-of-concept development stage. As such, the functionality is fragile and
++> insecure. **Never run the RPC server on an open network or in a sensitive environment!**
++
+ The `rpc-server` allows  running `ggml` backend on a remote host.
+ The RPC backend communicates with one or several instances of `rpc-server` and offloads computations to them.
+ This can be used for distributed LLM inference with `llama.cpp` in the following way:
+diff --git a/examples/rpc/rpc-server.cpp b/examples/rpc/rpc-server.cpp
+index 7c15d2a..6342e64 100644
+--- a/examples/rpc/rpc-server.cpp
++++ b/examples/rpc/rpc-server.cpp
+@@ -16,7 +16,7 @@
+ #include <stdio.h>
+ 
+ struct rpc_server_params {
+-    std::string host        = "0.0.0.0";
++    std::string host        = "127.0.0.1";
+     int         port        = 50052;
+     size_t      backend_mem = 0;
+ };
+@@ -114,6 +114,17 @@ int main(int argc, char * argv[]) {
+         fprintf(stderr, "Invalid parameters\n");
+         return 1;
+     }
++
++    if (params.host != "127.0.0.1") {
++        fprintf(stderr, "\n");
++        fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
++        fprintf(stderr, "WARNING: Host ('%s') is != '127.0.0.1'\n", params.host.c_str());
++        fprintf(stderr, "         Never expose the RPC server to an open network!\n");
++        fprintf(stderr, "         This is an experimental feature and is not secure!\n");
++        fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
++        fprintf(stderr, "\n");
++    }
++
+     ggml_backend_t backend = create_backend();
+     if (!backend) {
+         fprintf(stderr, "Failed to create backend\n");
+diff --git a/ggml-rpc.cpp b/ggml-rpc.cpp
+index 49a20df..dc90e17 100644
+--- a/ggml-rpc.cpp
++++ b/ggml-rpc.cpp
+@@ -193,6 +193,10 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
+         fprintf(stderr, "Failed to set SO_REUSEADDR\n");
+         return nullptr;
+     }
++    if (inet_addr(host) == INADDR_NONE) {
++        fprintf(stderr, "Invalid host address: %s\n", host);
++        return nullptr;
++    }
+     struct sockaddr_in serv_addr;
+     serv_addr.sin_family = AF_INET;
+     serv_addr.sin_addr.s_addr = inet_addr(host);
+@@ -875,6 +879,14 @@ ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rp
+     if (result->buffer && buffers.find(result->buffer) == buffers.end()) {
+         return nullptr;
+     }
++
++    // require that the tensor data does not go beyond the buffer end
++    uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
++    uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
++    uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
++    GGML_ASSERT(tensor->data + tensor_size >= tensor->data); // check for overflow
++    GGML_ASSERT(tensor->data >= buffer_start && tensor->data + tensor_size <= buffer_start + buffer_size);
++
+     result->op = (ggml_op) tensor->op;
+     for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
+         result->op_params[i] = tensor->op_params[i];
+@@ -894,7 +906,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
+     const rpc_tensor * in_tensor = (const rpc_tensor *)input.data();
+     uint64_t offset;
+     memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset));
+-    size_t size = input.size() - sizeof(rpc_tensor) - sizeof(offset);
++    const size_t size = input.size() - sizeof(rpc_tensor) - sizeof(offset);
+ 
+     struct ggml_init_params params {
+         /*.mem_size   =*/ ggml_tensor_overhead(),
+@@ -909,6 +921,17 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
+         return false;
+     }
+     GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
++
++    // sanitize tensor->data
++    {
++        const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
++        const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
++
++        if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
++            GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
++        }
++    }
++
+     const void * data = input.data() + sizeof(rpc_tensor) + sizeof(offset);
+     ggml_backend_tensor_set(tensor, data, offset, size);
+     ggml_free(ctx);
+@@ -939,6 +962,17 @@ bool rpc_server::get_tensor(const std::vector<uint8_t> & input, std::vector<uint
+         return false;
+     }
+     GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
++
++    // sanitize tensor->data
++    {
++        const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
++        const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
++
++        if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
++            GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
++        }
++    }
++
+     // output serialization format: | data (size bytes) |
+     output.resize(size, 0);
+     ggml_backend_tensor_get(tensor, output.data(), offset, size);
+diff --git a/ggml.c b/ggml.c
+index 7680363..e70d075 100644
+--- a/ggml.c
++++ b/ggml.c
+@@ -3577,7 +3577,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
+         struct ggml_tensor  * view_src,
+         size_t                view_offs) {
+ 
+-    assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
++    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
++    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
+ 
+     // find the base tensor and absolute offset
+     if (view_src != NULL && view_src->view_src != NULL) {
+-- 
+2.43.0
diff --git a/llama.cpp.spec b/llama.cpp.spec
new file mode 100644
index 0000000..693137c
--- /dev/null
+++ b/llama.cpp.spec
@@ -0,0 +1,63 @@
+%define debug_package %{nil}
+%global llama_commitid b4016
+
+Name:       llama.cpp
+Version:    20240531
+Release:    2
+License:    MIT
+Summary:    Port of English lagre model LLaMA implemented based on C/C++
+
+URL:            https://github.com/ggerganov/llama.cpp
+Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/tags/%{llama_commitid}.tar.gz
+
+Patch6000:	backport-CVE-2024-41130.patch
+Patch6001:	backport-CVE-2024-42477-CVE-2024-42478-CVE-2024-42479.patch
+
+BuildRequires:  gcc,gcc-c++,cmake
+
+%description
+Port of English lagre model LLaMA implemented based on C/C++,
+it can be used for model dialogue based on local laptops.
+
+%prep
+%autosetup -b 0 -n %{name}-%{llama_commitid} -p1
+
+%build
+mkdir llama_builddir
+pushd llama_builddir
+cmake ..
+%make_build
+popd
+
+%install
+pushd llama_builddir
+%make_install
+mv %{buildroot}%{_prefix}/local/bin/main  %{buildroot}%{_prefix}/local/bin/llama_cpp_main
+mv %{buildroot}%{_prefix}/local/bin/convert-hf-to-gguf.py %{buildroot}%{_prefix}/local/bin/llama_convert-hf-to-gguf.py
+mv %{buildroot}%{_prefix}/local/*  %{buildroot}%{_prefix}
+popd
+
+%files
+%{_bindir}/*
+%{_includedir}/*
+%{_libdir}/*
+
+%changelog
+* Wed Aug 28 2024 zhoupengcheng <zhoupengcheng11@huawei.com> - 20240531-2
+- fix CVE-2024-42477,CVE-2024-42478,CVE-2024-42479.patch,CVE-2024-41130
+
+* Fri Jun 21 2024 zhoupengcheng <zhoupengcheng11@huawei.com> - 20240531-1
+- update llama.cpp to b3051
+
+* Tue May 14 2024 wangshuo <wangshuo@kylinos.cn> - 20230815-4
+- add loongarch64 support
+
+* Wed Sep 20 2023 zhoupengcheng <zhoupengcheng11@huawei.com> - 20230815-3
+- rename /usr/bin/convert.py 
+- update long-term yum.repo in dockerfile
+
+* Tue Sep 19 2023 zhoupengcheng <zhoupengcheng11@huawei.com> - 20230815-2
+- add dockerfile
+
+* Wed Aug 16 2023 zhoupengcheng <zhoupengcheng11@huawei.com> - 20230815-1
+- Init package
diff --git a/sources b/sources
new file mode 100644
index 0000000..d164cfe
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+ec81639f66dd3fae8ad7f192f55e427b  b4016.tar.gz
author	CoprDistGit <infra@openeuler.org>	2024-11-06 02:42:05 +0000
committer	CoprDistGit <infra@openeuler.org>	2024-11-06 02:42:05 +0000
commit	01b344e3b00539c0dacd8c9bb450a05b54a39fa9 (patch)
tree	295ef97f133c2a7f6788a5959453257912a1aa85
parent	41ce1b030e5536e9a4b7a0d54ff522833d4004f3 (diff)