Compare commits

...

14 Commits

Author SHA1 Message Date
Georgi Gerganov
78fafcaf10 ggml : do not use _GNU_SOURCE gratuitously 2023-06-25 17:21:02 +03:00
Georgi Gerganov
447ccbe8c3 readme : add new roadmap + manifesto 2023-06-25 16:08:12 +03:00
Georgi Gerganov
bd34cdde38 ggml : sync latest ggml (custom operators) 2023-06-25 14:25:08 +03:00
anon998
c2a08f87b8 fix server sampling: top k sampler first (#1977)
Co-authored-by: anon <anon@example.org>
2023-06-25 10:48:36 +02:00
Georgi Gerganov
66a2555ba6 readme : add Azure CI discussion link 2023-06-25 09:07:03 +03:00
sjinzh
e65ca7e14a zig : upgrade build system support (#1981)
* upgrade zig build system support

* zig : add new line at the end of the file

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2023-06-25 08:45:44 +03:00
Robyn
5ec8dd5a3c #1869 Fix null reference errors when training from scratch with CUDA (#1907)
* #1869 Fix null reference errors when training from scratch with CUDA build

Calling ggml_compute_forward when node->src0 was null was causing train-text-from-scratch.exe to terminate unexpectedly.

* ggml : do not dereference src0 if NULL

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2023-06-24 20:10:29 +02:00
Georgi Gerganov
65bdd52a86 tests : sync test-grad0 from ggml 2023-06-24 19:40:18 +03:00
Rowan Hart
fdd1860911 flake : fix ggml-metal.metal path and run nixfmt (#1974) 2023-06-24 14:07:08 +03:00
AN Long
c943d823c1 convert : fix invalid params in write_vocab_only (#1975) 2023-06-24 14:02:06 +03:00
slaren
f2c754e1c3 ggml : improve ggml_graph_dump_dot, add ggml_format_name (#1978)
* Improve ggml_graph_dump_dot, add ggml_format_name

* add more automatic names to view ops

* fix name of copies
2023-06-24 13:57:18 +03:00
Georgi Gerganov
11da1a85cd readme : fix whitespaces 2023-06-24 13:38:18 +03:00
Alberto
235b610d65 readme : fixed termux instructions (#1973) 2023-06-24 13:32:13 +03:00
Alex Renda
b061ba9e2a llama : fix top-p sampling to match the canonical definition (#1953)
* Fix top-p sampling to match the standard definition (smallest set that has probability mass at least p, not largest set with probability mass less than p)

* top-p: correct gt to gte

* add test for correct top-p behavior
2023-06-24 13:15:01 +03:00
13 changed files with 608 additions and 161 deletions

View File

@@ -49,6 +49,14 @@ CFLAGS = -I. $(OPT) -std=c11 -fPIC
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
LDFLAGS =
# clock_gettime came in POSIX.1b (1993)
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
# posix_memalign came in POSIX.1-2001 / SUSv3
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
# ref: https://github.com/ggerganov/whisper.cpp/pull/1027
CFLAGS += -D_XOPEN_SOURCE=600
CXXFLAGS += -D_XOPEN_SOURCE=600
ifdef LLAMA_DEBUG
CFLAGS += -O0 -g
CXXFLAGS += -O0 -g

View File

@@ -5,12 +5,15 @@
[![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions)
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
**Hot topics:**
- New roadmap: https://github.com/users/ggerganov/projects/7
- Azure CI brainstorming: https://github.com/ggerganov/llama.cpp/discussions/1985
- p1 : LLM-based code completion engine at the edge : https://github.com/ggml-org/p1/discussions/1
- Roadmap June 2023: https://github.com/ggerganov/llama.cpp/discussions/1729
<details>
<summary>Table of Contents</summary>
@@ -680,12 +683,13 @@ Upon completion of the aforementioned steps, you will have successfully compiled
```
GGML_OPENCL_PLATFORM=0
GGML_OPENCL_DEVICE=0
export LD_LIBRARY_PATH=/system/vendor/lib64:$LD_LIBRARY_PATH
./main (...)
export LD_LIBRARY_PATH=/vendor/lib64:$LD_LIBRARY_PATH
```
For easy and swift re-execution, consider documenting this final part in a .sh script file. This will enable you to rerun the process with minimal hassle.
Place your desired model into the `/llama.cpp/models/` directory and execute the `./main (...)` script.
### Docker
#### Prerequisites

View File

@@ -1,61 +1,58 @@
const std = @import("std");
// Zig Version: 0.11.0-dev.3379+629f0d23b
pub fn build(b: *std.build.Builder) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardReleaseOptions();
const want_lto = b.option(bool, "lto", "Want -fLTO");
const lib = b.addStaticLibrary("llama", null);
lib.want_lto = want_lto;
lib.setTarget(target);
lib.setBuildMode(optimize);
const optimize = b.standardOptimizeOption(.{});
const lib = b.addStaticLibrary(.{
.name = "llama",
.target = target,
.optimize = optimize,
});
lib.linkLibC();
lib.linkLibCpp();
lib.addIncludePath(".");
lib.addIncludePath("examples");
lib.addIncludePath("./examples");
lib.addCSourceFiles(&.{
"ggml.c",
}, &.{"-std=c11"});
lib.addCSourceFiles(&.{
"llama.cpp",
}, &.{"-std=c++11"});
lib.install();
b.installArtifact(lib);
const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize, .want_lto = want_lto };
const examples = .{
"main",
"baby-llama",
"embedding",
// "metal",
"perplexity",
"quantize",
"quantize-stats",
"save-load-state",
// "server",
"simple",
"train-text-from-scratch",
};
const exe = build_example("main", build_args);
_ = build_example("quantize", build_args);
_ = build_example("perplexity", build_args);
_ = build_example("embedding", build_args);
// create "zig build run" command for ./main
const run_cmd = exe.run();
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
run_cmd.addArgs(args);
inline for (examples) |example_name| {
const exe = b.addExecutable(.{
.name = example_name,
.target = target,
.optimize = optimize,
});
exe.addIncludePath(".");
exe.addIncludePath("./examples");
exe.addCSourceFiles(&.{
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{example_name, example_name}),
"examples/common.cpp",
}, &.{"-std=c++11"});
exe.linkLibrary(lib);
b.installArtifact(exe);
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| run_cmd.addArgs(args);
const run_step = b.step("run_" ++ example_name, "Run the app");
run_step.dependOn(&run_cmd.step);
}
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
}
fn build_example(comptime name: []const u8, args: anytype) *std.build.LibExeObjStep {
const b = args.b;
const lib = args.lib;
const want_lto = args.want_lto;
const exe = b.addExecutable(name, null);
exe.want_lto = want_lto;
lib.setTarget(args.target);
lib.setBuildMode(args.optimize);
exe.addIncludePath(".");
exe.addIncludePath("examples");
exe.addCSourceFiles(&.{
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{name, name}),
"examples/common.cpp",
}, &.{"-std=c++11"});
exe.linkLibrary(lib);
exe.install();
return exe;
}

View File

@@ -998,9 +998,9 @@ class OutputFile:
def write_vocab_only(fname_out: Path, vocab: Vocab) -> None:
of = OutputFile(fname_out)
params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0,
n_head=1, n_layer=0, file_type=GGMLFileType.AllF32)
n_head=1, n_layer=0)
of = OutputFile(fname_out)
of.write_file_header(params)
of.write_file_header(params, file_type=GGMLFileType.AllF32)
of.write_vocab(vocab)
of.fout.close()

View File

@@ -325,10 +325,10 @@ struct llama_server_context {
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
} else {
// Temperature sampling
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
llama_sample_temperature(ctx, &candidates_p, temp);
id = llama_sample_token(ctx, &candidates_p);
}

View File

@@ -9,27 +9,33 @@
inherit (pkgs.stdenv) isAarch64 isDarwin;
inherit (pkgs.lib) optionals;
isM1 = isAarch64 && isDarwin;
osSpecific =
if isM1 then with pkgs.darwin.apple_sdk_11_0.frameworks; [ Accelerate MetalKit MetalPerformanceShaders MetalPerformanceShadersGraph ]
else if isDarwin then with pkgs.darwin.apple_sdk.frameworks; [ Accelerate CoreGraphics CoreVideo ]
else [ ];
pkgs = import nixpkgs {
inherit system;
};
llama-python = pkgs.python310.withPackages (ps: with ps; [
numpy
sentencepiece
]);
in
{
osSpecific = if isM1 then
with pkgs.darwin.apple_sdk_11_0.frameworks; [
Accelerate
MetalKit
MetalPerformanceShaders
MetalPerformanceShadersGraph
]
else if isDarwin then
with pkgs.darwin.apple_sdk.frameworks; [
Accelerate
CoreGraphics
CoreVideo
]
else
[ ];
pkgs = import nixpkgs { inherit system; };
llama-python =
pkgs.python310.withPackages (ps: with ps; [ numpy sentencepiece ]);
in {
packages.default = pkgs.stdenv.mkDerivation {
name = "llama.cpp";
src = ./.;
postPatch =
if isM1 then ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/ggml-metal.metal\";"
'' else "";
postPatch = if isM1 then ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
'' else
"";
nativeBuildInputs = with pkgs; [ cmake ];
buildInputs = osSpecific;
cmakeFlags = [ "-DLLAMA_BUILD_SERVER=ON" ] ++ (optionals isM1 [
@@ -62,11 +68,7 @@
};
apps.default = self.apps.${system}.llama;
devShells.default = pkgs.mkShell {
packages = with pkgs; [
cmake
llama-python
] ++ osSpecific;
packages = with pkgs; [ cmake llama-python ] ++ osSpecific;
};
}
);
});
}

View File

@@ -2635,7 +2635,7 @@ void ggml_cuda_free_scratch() {
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
ggml_cuda_func_t func;
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|| tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT
|| (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT))
|| (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU);
switch (tensor->op) {

508
ggml.c
View File

@@ -1,5 +1,4 @@
// Defines CLOCK_MONOTONIC on Linux
#define _GNU_SOURCE
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml.h"
@@ -24,6 +23,7 @@
#include <stdio.h>
#include <float.h>
#include <limits.h>
#include <stdarg.h>
#ifdef GGML_USE_METAL
#include <unistd.h>
@@ -130,6 +130,34 @@ typedef void* thread_ret_t;
#define GGML_MEM_ALIGN 16
#endif
//
// logging
//
#if (GGML_DEBUG >= 1)
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif
#if (GGML_DEBUG >= 5)
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_5(...)
#endif
#if (GGML_DEBUG >= 10)
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_10(...)
#endif
#define GGML_PRINT(...) printf(__VA_ARGS__)
//
// end of logging block
//
#if defined(_MSC_VER) || defined(__MINGW32__)
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
@@ -143,6 +171,17 @@ inline static void* ggml_aligned_malloc(size_t size) {
#endif
if (result != 0) {
// Handle allocation failure
const char *error_desc = "unknown allocation error";
switch (result) {
case EINVAL:
error_desc = "invalid alignment value";
break;
case ENOMEM:
error_desc = "insufficient memory";
break;
}
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
__func__, error_desc, size/(1024.0*1024.0));
return NULL;
}
return aligned_memory;
@@ -3529,30 +3568,6 @@ inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x
*s = 1.f/(*s);
}
//
// logging
//
#if (GGML_DEBUG >= 1)
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif
#if (GGML_DEBUG >= 5)
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_5(...)
#endif
#if (GGML_DEBUG >= 10)
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_10(...)
#endif
#define GGML_PRINT(...) printf(__VA_ARGS__)
//
// data types
//
@@ -3712,11 +3727,15 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"MAP_UNARY",
"MAP_BINARY",
"MAP_CUSTOM1",
"MAP_CUSTOM2",
"MAP_CUSTOM3",
"CROSS_ENTROPY_LOSS",
"CROSS_ENTROPY_LOSS_BACK",
};
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61");
static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@@ -3784,11 +3803,15 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"f(x)",
"f(x,y)",
"custom(x)",
"custom(x,y)",
"custom(x,y,z)",
"cross_entropy_loss(x,y)",
"cross_entropy_loss_back(x,y)",
};
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61");
static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
@@ -4734,10 +4757,19 @@ struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * nam
return tensor;
}
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
va_list args;
va_start(args, fmt);
vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
va_end(args);
return tensor;
}
struct ggml_tensor * ggml_view_tensor(
struct ggml_context * ctx,
const struct ggml_tensor * src) {
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src->data);
ggml_format_name(result, "%s (view)", src->name);
result->nb[0] = src->nb[0];
result->nb[1] = src->nb[1];
@@ -5899,6 +5931,11 @@ struct ggml_tensor * ggml_cpy_impl(
// make a view of the destination
struct ggml_tensor * result = ggml_view_tensor(ctx, b);
if (strlen(b->name) > 0) {
ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
} else {
ggml_format_name(result, "%s (copy)", a->name);
}
result->op = GGML_OP_CPY;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -5935,6 +5972,7 @@ struct ggml_tensor * ggml_cont_impl(
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_format_name(result, "%s (cont)", a->name);
result->op = GGML_OP_CONT;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -5978,6 +6016,7 @@ struct ggml_tensor * ggml_reshape(
}
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a->data);
ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6002,6 +6041,7 @@ struct ggml_tensor * ggml_reshape_1d(
const int64_t ne[1] = { ne0 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a->data);
ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6027,6 +6067,7 @@ struct ggml_tensor * ggml_reshape_2d(
const int64_t ne[2] = { ne0, ne1 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a->data);
ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6053,6 +6094,7 @@ struct ggml_tensor * ggml_reshape_3d(
const int64_t ne[3] = { ne0, ne1, ne2 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a->data);
ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6081,6 +6123,7 @@ struct ggml_tensor * ggml_reshape_4d(
const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a->data);
ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6105,10 +6148,12 @@ struct ggml_tensor * ggml_view_1d(
}
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
ggml_format_name(result, "%s (view)", a->name);
ggml_scratch_save(ctx);
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
ggml_set_name(offs, "offset");
memcpy(offs->data, &offset, 2*sizeof(int32_t));
ggml_scratch_load(ctx);
@@ -6141,10 +6186,12 @@ struct ggml_tensor * ggml_view_2d(
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
ggml_format_name(result, "%s (view)", a->name);
ggml_scratch_save(ctx);
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
ggml_set_name(offs, "offset");
memcpy(offs->data, &offset, 2*sizeof(int32_t));
ggml_scratch_load(ctx);
@@ -6183,10 +6230,12 @@ struct ggml_tensor * ggml_view_3d(
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
ggml_format_name(result, "%s (view)", a->name);
ggml_scratch_save(ctx);
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
ggml_set_name(offs, "offset");
memcpy(offs->data, &offset, 2*sizeof(int32_t));
ggml_scratch_load(ctx);
@@ -6227,10 +6276,12 @@ struct ggml_tensor * ggml_view_4d(
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
ggml_format_name(result, "%s (view)", a->name);
ggml_scratch_save(ctx);
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
ggml_set_name(offs, "offset");
memcpy(offs->data, &offset, 2*sizeof(int32_t));
ggml_scratch_load(ctx);
@@ -6276,6 +6327,7 @@ struct ggml_tensor * ggml_permute(
}
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
ggml_format_name(result, "%s (permuted)", a->name);
int ne[GGML_MAX_DIMS];
int nb[GGML_MAX_DIMS];
@@ -6335,6 +6387,7 @@ struct ggml_tensor * ggml_transpose(
}
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
ggml_format_name(result, "%s (transposed)", a->name);
result->ne[0] = a->ne[1];
result->ne[1] = a->ne[0];
@@ -7063,9 +7116,14 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_UNARY;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7105,9 +7163,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_BINARY;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7134,6 +7197,150 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
}
// ggml_map_custom1
struct ggml_tensor * ggml_map_custom1_impl_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_f32_t fun,
bool inplace) {
bool is_node = false;
if (!inplace && a->grad) {
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_CUSTOM1;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->opt[0] = addr_tensor;
return result;
}
struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_f32_t fun) {
return ggml_map_custom1_impl_f32(ctx, a, fun, false);
}
struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_f32_t fun) {
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
}
// ggml_map_custom2
struct ggml_tensor * ggml_map_custom2_impl_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_f32_t fun,
bool inplace) {
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_CUSTOM2;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = b;
result->opt[0] = addr_tensor;
return result;
}
struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_f32_t fun) {
return ggml_map_custom2_impl_f32(ctx, a, b, fun, false);
}
struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_f32_t fun) {
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
}
// ggml_map_custom3
struct ggml_tensor * ggml_map_custom3_impl_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_f32_t fun,
bool inplace) {
bool is_node = false;
if (!inplace && (a->grad || b->grad || c->grad)) {
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_CUSTOM3;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = b;
result->opt[0] = addr_tensor;
result->opt[1] = c;
return result;
}
struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_f32_t fun) {
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, false);
}
struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_f32_t fun) {
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
}
// ggml_cross_entropy_loss
struct ggml_tensor * ggml_cross_entropy_loss(
@@ -13179,8 +13386,7 @@ static void ggml_compute_forward_conv_2d_sk_p0_f16_f32(
const int nk1 = ne01;
// size of the convolution row - the kernel size unrolled across all channels
// round-up so it is more suitable for SIMD
const int ew0 = ggml_up32(nk0*nk1*ne02);
const int ew0 = nk0*nk1*ne02;
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
@@ -14590,6 +14796,114 @@ static void ggml_compute_forward_map_binary(
}
}
// ggml_compute_forward_map_custom1
static void ggml_compute_forward_map_custom1_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst,
const ggml_custom1_op_f32_t fun) {
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
fun(dst, a);
}
static void ggml_compute_forward_map_custom1(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst,
const ggml_custom1_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom2
static void ggml_compute_forward_map_custom2_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst,
const ggml_custom2_op_f32_t fun) {
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
fun(dst, a, b);
}
static void ggml_compute_forward_map_custom2(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst,
const ggml_custom2_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
const struct ggml_tensor * c,
struct ggml_tensor * dst,
const ggml_custom3_op_f32_t fun) {
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
fun(dst, a, b, c);
}
static void ggml_compute_forward_map_custom3(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
const struct ggml_tensor * c,
struct ggml_tensor * dst,
const ggml_custom3_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_cross_entropy_loss
static void ggml_compute_forward_cross_entropy_loss_f32(
@@ -14880,7 +15194,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
if (skip_cpu) {
return;
}
GGML_ASSERT(tensor->src0->backend == GGML_BACKEND_CPU);
GGML_ASSERT(tensor->src0 == NULL || tensor->src0->backend == GGML_BACKEND_CPU);
GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU);
#endif // GGML_USE_CUBLAS
@@ -15127,6 +15441,24 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM1:
{
const ggml_custom1_op_f32_t fun = *((ggml_custom1_op_f32_t *)tensor->opt[0]->data);
ggml_compute_forward_map_custom1(params, tensor->src0, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM2:
{
const ggml_custom2_op_f32_t fun = *((ggml_custom2_op_f32_t *)tensor->opt[0]->data);
ggml_compute_forward_map_custom2(params, tensor->src0, tensor->src1, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM3:
{
const ggml_custom3_op_f32_t fun = *((ggml_custom3_op_f32_t *)tensor->opt[0]->data);
ggml_compute_forward_map_custom3(params, tensor->src0, tensor->src1, tensor->opt[1], tensor, fun);
}
break;
case GGML_OP_CROSS_ENTROPY_LOSS:
{
ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor);
@@ -15933,6 +16265,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
case GGML_OP_WIN_UNPART:
case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1:
case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3:
{
GGML_ASSERT(false); // not supported
} break;
@@ -16004,7 +16339,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
if (strlen(node->name) == 0) {
snprintf(node->name, sizeof(node->name), "leaf_%d", cgraph->n_leafs);
ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
}
cgraph->leafs[cgraph->n_leafs] = node;
@@ -16013,7 +16348,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
if (strlen(node->name) == 0) {
snprintf(node->name, sizeof(node->name), "node_%d", cgraph->n_nodes);
ggml_format_name(node, "node_%d", cgraph->n_nodes);
}
cgraph->nodes[cgraph->n_nodes] = node;
@@ -16574,6 +16909,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
case GGML_OP_WIN_UNPART:
case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1:
case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3:
{
node->n_tasks = 1;
} break;
@@ -17397,6 +17735,26 @@ static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgr
return NULL;
}
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
gparent0 ? (void *) gparent0 : (void *) parent,
gparent0 ? "g" : "x",
gparent ? (void *) gparent : (void *) node,
gparent ? "g" : "x",
gparent ? "empty" : "vee",
gparent ? "dashed" : "solid",
label);
}
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n",
(void *) parent, "x",
(void *) node, "x",
label);
}
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
char color[16];
@@ -17432,7 +17790,9 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
(void *) node, color);
if (strlen(node->name) > 0) {
fprintf(fp, "%s |", node->name);
fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
} else {
fprintf(fp, "(%s)|", ggml_type_name(node->type));
}
if (node->n_dims == 2) {
@@ -17441,7 +17801,6 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], GGML_OP_SYMBOL[node->op]);
}
if (node->grad) {
fprintf(fp, " | <g>%s\"; ]\n", GGML_OP_SYMBOL[node->grad->op]);
} else {
@@ -17460,18 +17819,29 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
(void *) node, color);
if (strlen(node->name) > 0) {
fprintf(fp, "%s | ", node->name);
fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
} else {
fprintf(fp, "(%s)|", ggml_type_name(node->type));
}
if (ggml_nelements(node) == 1) {
if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
fprintf(fp, "%d", ggml_get_i32_1d(node, 0));
fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
if (ggml_nelements(node) < 5) {
fprintf(fp, " | (");
for (int j = 0; j < ggml_nelements(node); j++) {
if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
fprintf(fp, "%d", ggml_get_i32_1d(node, j));
}
else if (node->type == GGML_TYPE_F32 || node->type == GGML_TYPE_F16) {
fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
}
else {
fprintf(fp, "#");
}
if (j < ggml_nelements(node) - 1) {
fprintf(fp, ", ");
}
}
else {
fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, 0));
}
}
else {
fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
fprintf(fp, ")");
}
fprintf(fp, "\"; ]\n");
}
@@ -17479,30 +17849,20 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
for (int i = 0; i < gb->n_nodes; i++) {
struct ggml_tensor * node = gb->nodes[i];
struct ggml_tensor * parent = ggml_graph_get_parent(gb, node);
if (node->src0) {
struct ggml_tensor * parent0 = ggml_graph_get_parent(gb, node->src0);
fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"x\"; ]\n",
parent0 ? (void *) parent0 : (void *) node->src0,
parent0 ? "g" : "x",
parent ? (void *) parent : (void *) node,
parent ? "g" : "x",
parent ? "empty" : "vee",
parent ? "dashed" : "solid");
ggml_graph_dump_dot_node_edge(fp, gb, node, node->src0, "x");
}
if (node->src1) {
struct ggml_tensor * parent1 = ggml_graph_get_parent(gb, node->src1);
ggml_graph_dump_dot_node_edge(fp, gb, node, node->src1, "y");
}
fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"y\"; ]\n",
parent1 ? (void *) parent1 : (void *) node->src1,
parent1 ? "g" : "x",
parent ? (void *) parent : (void *) node,
parent ? "g" : "x",
parent ? "empty" : "vee",
parent ? "dashed" : "solid");
for (int j = 0; j < GGML_MAX_OPT; j++) {
if (node->opt[j]) {
char label[16];
snprintf(label, sizeof(label), "opt %d", j);
ggml_graph_dump_dot_node_edge(fp, gb, node, node->opt[j], label);
}
}
}
@@ -17510,15 +17870,19 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
struct ggml_tensor * node = gb->leafs[i];
if (node->src0) {
fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"x\"; ]\n",
(void *) node->src0, "x",
(void *) node, "x");
ggml_graph_dump_dot_leaf_edge(fp, node, node->src0, "x");
}
if (node->src1) {
fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"y\"; ]\n",
(void *) node->src1, "x",
(void *) node, "x");
ggml_graph_dump_dot_leaf_edge(fp, node, node->src1, "y");
}
for (int j = 0; j < GGML_MAX_OPT; j++) {
if (node->opt[j]) {
char label[16];
snprintf(label, sizeof(label), "opt %d", j);
ggml_graph_dump_dot_leaf_edge(fp, node, node->opt[j], label);
}
}
}

61
ggml.h
View File

@@ -345,6 +345,10 @@ extern "C" {
GGML_OP_MAP_UNARY,
GGML_OP_MAP_BINARY,
GGML_OP_MAP_CUSTOM1,
GGML_OP_MAP_CUSTOM2,
GGML_OP_MAP_CUSTOM3,
GGML_OP_CROSS_ENTROPY_LOSS,
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
@@ -563,6 +567,7 @@ extern "C" {
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
//
// operations on tensors with backpropagation
@@ -1166,21 +1171,73 @@ extern "C" {
int h0,
int w);
// Mapping operations
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
// custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
GGML_API struct ggml_tensor * ggml_map_unary_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_unary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_unary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_binary_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_binary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_binary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_f32_t fun);
// loss function
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(

View File

@@ -53,6 +53,7 @@ __attribute__((format(gnu_printf, 1, 2)))
__attribute__((format(printf, 1, 2)))
#endif
#endif
static std::string format(const char * fmt, ...) {
va_list ap, ap2;
va_start(ap, fmt);

View File

@@ -1,11 +1,3 @@
// Defines fileno on msys:
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#include <cstddef>
#include <cstdint>
#include <cstdio>
#endif
#include "llama-util.h"
#include "llama.h"
@@ -2015,9 +2007,10 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
for (size_t i = 0; i < candidates->size; ++i) {
cum_sum += candidates->data[i].p;
// Check if the running sum is greater than p or if we have kept at least min_keep tokens
if (cum_sum > p && i >= min_keep) {
last_idx = i;
// Check if the running sum is at least p or if we have kept at least min_keep tokens
// we set the last index to i+1 to indicate that the current iterate should be included in the set
if (cum_sum >= p && i + 1 >= min_keep) {
last_idx = i + 1;
break;
}
}

View File

@@ -1,3 +1,4 @@
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml.h"
#include <math.h>
@@ -5,6 +6,10 @@
#include <stdlib.h>
#include <assert.h>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
#define MAX_NARGS 3
#undef MIN
@@ -197,8 +202,23 @@ bool check_gradient(
float max_error_abs,
float max_error_rel) {
static int n_threads = -1;
if (n_threads < 0) {
n_threads = GGML_DEFAULT_N_THREADS;
const char *env = getenv("GGML_N_THREADS");
if (env) {
n_threads = atoi(env);
}
printf("GGML_N_THREADS = %d\n", n_threads);
}
struct ggml_cgraph gf = ggml_build_forward (f);
gf.n_threads = n_threads;
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
gb.n_threads = n_threads;
ggml_graph_compute(ctx0, &gf);
ggml_graph_reset (&gf);

View File

@@ -181,6 +181,7 @@ int main(void) {
test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 0);
test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f}, 0.7f);
test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f}, 0.8f);
test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1);
test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f}, 0.25f);