Compare commits

...

11 Commits

Author SHA1 Message Date
Aldehir Rojas
d77599234e common : do not pass prompt tokens to reasoning budget sampler (#22488) 2026-04-29 14:10:58 -05:00
Max Krasnyansky
41a63be28e hexagon: make vmem and buffer-size configurable (#22487)
* hexagon: allow host to set max vmem size

We use a sane default but it's helpful to allow for an override if needed.

* hexagon: add support for measuring vmem space and move pinned mmaping management to host

* hexagon: update vmem checks to use uint64

* hexagon: bump op buffers to 16 (matches max mmaps)

* hexagon: bump default vmem to 3.2GB

* hexagon: add support for autodetecting vmem space and some logging cleanup in that area

* hexagon: fix whitespace warnings

* Update scripts/snapdragon/adb/run-cli.sh

Co-authored-by: Pascal <admin@serveurperso.com>

* hex-adb: fix run-completion script

---------

Co-authored-by: Pascal <admin@serveurperso.com>
2026-04-29 11:51:21 -07:00
Anav Prasad
098705a29e CUDA: fuse SSM_CONV + ADD(bias) + SILU (#22478) 2026-04-30 02:39:56 +08:00
Georgi Gerganov
683c5acb90 spec : disacard last drafted token with low prob (#22506) 2026-04-29 17:00:00 +03:00
Georgi Gerganov
b1d5f5b449 sync : ggml 2026-04-29 16:43:47 +03:00
Georgi Gerganov
4b221b7f1e ggml : bump version to 0.10.1 (ggml/1469) 2026-04-29 16:43:47 +03:00
Pascal
59237bfbbc webui: fix slow mic stop and WAV encode (#22480)
* webui: instant mic stop, race-free recorder restart

* webui: faster WAV PCM encode via hoisted channels and Int16Array

* chore: update webui build output

* webui: drop setTimeout(0) hack and harden cancelRecording

* chore: update webui build output
2026-04-29 12:58:35 +02:00
shalinib-ibm
1cbc846eba ggml-cpu : disable tiled matmul on AIX to fix page boundary segfault (#22293)
* ggml-cpu : disable tiled matmul on AIX to fix page boundary segfault

vec_xst operations in the tiled path crash on AIX when writing
near 4KB page boundaries due to strict memory protection. Fall
back to mnpack implementation on AIX for stable execution.

Signed-off-by: Shalini Salomi Bodapati <Shalini.Salomi.Bodapati@ibm.com>

* Update ggml/src/ggml-cpu/llamafile/sgemm.cpp

Co-authored-by: Aaron Teo <taronaeo@gmail.com>

* Update sgemm.cpp

* Update sgemm.cpp

---------

Signed-off-by: Shalini Salomi Bodapati <Shalini.Salomi.Bodapati@ibm.com>
Co-authored-by: Aaron Teo <taronaeo@gmail.com>
2026-04-29 13:32:40 +03:00
Aman Gupta
3142f1dbb9 ggml-cuda: refactor fusion code (#22468)
* ggml-cuda: refactor fusion code

* apply formatting + make env variable truthy
2026-04-29 16:19:33 +08:00
qiurui144
b5c4227dc6 ggml-cpu: cmake: append xsmtvdotii march for SpacemiT IME (#22317)
* ggml-cpu: cmake: append xsmtvdotii march for SpacemiT IME

When GGML_CPU_RISCV64_SPACEMIT=ON is set, ime1_kernels.cpp contains
inline asm for the vmadot family which requires the xsmtvdotii custom
extension.(problem can see in some blogs and make sure in K3 platform)
The current CMakeLists does not include xsmtvdotii, so any toolchain
that honours the explicit -march (tested with SpacemiT GCC 15.2) fails
at the assembler stage:

  Error: unrecognized opcode `vmadot v16,v14,v0',
         extension `xsmtvdotii' required

Append _xsmtvdotii to MARCH_STR when GGML_CPU_RISCV64_SPACEMIT is
enabled so the IME path can actually build with a capable toolchain.
No effect on builds that leave GGML_CPU_RISCV64_SPACEMIT off.

toolchain from https://www.spacemit.com/community/resources-download/Tools

* Update ggml/src/ggml-cpu/CMakeLists.txt

Co-authored-by: alex-spacemit <jinghui.huang@spacemit.com>

---------

Co-authored-by: alex-spacemit <jinghui.huang@spacemit.com>
2026-04-29 10:59:21 +03:00
Reese Levine
d6a5094004 ggml-webgpu: Fix bug in FlashAttention support check (#22492)
* Fix flashattention support check for devices that don't support subgroups

* set path to none if kv_tile doesn't fit
2026-04-29 10:59:00 +03:00
26 changed files with 896 additions and 682 deletions

View File

@@ -232,34 +232,6 @@ static struct llama_sampler * common_reasoning_budget_init_state(
);
}
struct llama_sampler * common_reasoning_budget_init(
const struct llama_vocab * vocab,
const std::vector<llama_token> & start_tokens,
const std::vector<llama_token> & end_tokens,
const std::vector<llama_token> & forced_tokens,
int32_t budget,
const std::vector<llama_token> & prefill_tokens) {
// Determine initial state from prefill: COUNTING if the prefill begins with
// the start sequence but does not also contain the end sequence after it.
common_reasoning_budget_state initial_state = REASONING_BUDGET_IDLE;
if (!prefill_tokens.empty() && !start_tokens.empty() &&
prefill_tokens.size() >= start_tokens.size() &&
std::equal(start_tokens.begin(), start_tokens.end(), prefill_tokens.begin())) {
initial_state = REASONING_BUDGET_COUNTING;
// If the end sequence also follows the start in the prefill, reasoning
// was opened and immediately closed — stay IDLE.
if (!end_tokens.empty() &&
prefill_tokens.size() >= start_tokens.size() + end_tokens.size()) {
auto end_start = prefill_tokens.end() - (ptrdiff_t) end_tokens.size();
if (end_start >= prefill_tokens.begin() + (ptrdiff_t) start_tokens.size() &&
std::equal(end_tokens.begin(), end_tokens.end(), end_start)) {
initial_state = REASONING_BUDGET_IDLE;
}
}
}
return common_reasoning_budget_init_state(vocab, start_tokens, end_tokens, forced_tokens, budget, initial_state);
}
struct llama_sampler * common_reasoning_budget_init(
const struct llama_vocab * vocab,
const std::vector<llama_token> & start_tokens,

View File

@@ -29,10 +29,7 @@ enum common_reasoning_budget_state {
// end_tokens - token sequence for natural deactivation
// forced_tokens - token sequence forced when budget expires
// budget - max tokens allowed in the reasoning block
// prefill_tokens - tokens already present in the prompt (generation prompt);
// used to determine the initial state: COUNTING if they begin
// with start_tokens (but don't also end with end_tokens),
// IDLE otherwise. COUNTING with budget <= 0 is promoted to FORCING.
// initial_state - initial state
//
struct llama_sampler * common_reasoning_budget_init(
const struct llama_vocab * vocab,
@@ -40,16 +37,6 @@ struct llama_sampler * common_reasoning_budget_init(
const std::vector<llama_token> & end_tokens,
const std::vector<llama_token> & forced_tokens,
int32_t budget,
const std::vector<llama_token> & prefill_tokens = {});
// Variant that takes an explicit initial state (used by tests and clone).
// COUNTING with budget <= 0 is promoted to FORCING.
struct llama_sampler * common_reasoning_budget_init(
const struct llama_vocab * vocab,
const std::vector<llama_token> & start_tokens,
const std::vector<llama_token> & end_tokens,
const std::vector<llama_token> & forced_tokens,
int32_t budget,
common_reasoning_budget_state initial_state);
common_reasoning_budget_state initial_state = REASONING_BUDGET_IDLE);
common_reasoning_budget_state common_reasoning_budget_get_state(const struct llama_sampler * smpl);

View File

@@ -260,32 +260,35 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
}
}
// Compute prefill tokens from the generation prompt
std::vector<llama_token> prefill_tokens;
if (!params.generation_prompt.empty()) {
GGML_ASSERT(vocab != nullptr);
auto tokens = common_tokenize(vocab, params.generation_prompt, false, true);
for (size_t i = 0; i < tokens.size(); i++) {
std::string piece = common_token_to_piece(vocab, tokens[i], true);
if (i == 0 && std::isspace(piece[0]) && !std::isspace(params.generation_prompt[0])) {
// Some tokenizers will add a space before the first special token, need to exclude
continue;
}
LOG_DBG("%s: prefill token: %d = %s\n", __func__, tokens[i], piece.c_str());
prefill_tokens.push_back(tokens[i]);
}
}
// Feed generation prompt tokens to the grammar sampler so it advances past
// tokens the template already placed in the prompt.
// Only applies to output-format and tool-call grammars; user-supplied grammars must not be prefilled.
std::vector<llama_token> prefill_tokens;
if (!params.generation_prompt.empty() && common_grammar_needs_prefill(params.grammar)) {
GGML_ASSERT(vocab != nullptr);
prefill_tokens = common_tokenize(vocab, params.generation_prompt, false, true);
if (!prefill_tokens.empty()) {
std::string first_token = common_token_to_piece(vocab, prefill_tokens[0], true);
if (std::isspace(first_token[0]) && !std::isspace(params.generation_prompt[0])) {
// Some tokenizers will add a space before the first special token, need to remove
prefill_tokens = std::vector<llama_token>(prefill_tokens.begin() + 1, prefill_tokens.end());
}
}
if (grmr && !params.grammar_lazy) {
try {
for (const auto & token : prefill_tokens) {
llama_sampler_accept(grmr, token);
LOG_DBG("%s: accepted prefill token (%d)\n", __func__, token);
}
} catch (std::exception &e) {
LOG_ERR("%s: error initializing grammar sampler for grammar:\n%s\n\nGeneration prompt:\n'%s'\n", __func__,
common_grammar_value(params.grammar).c_str(), params.generation_prompt.c_str());
throw e;
if (grmr && !params.grammar_lazy && common_grammar_needs_prefill(params.grammar)) {
try {
for (const auto & token : prefill_tokens) {
llama_sampler_accept(grmr, token);
LOG_DBG("%s: grammar accepted prefill token (%d)\n", __func__, token);
}
} catch (std::exception &e) {
LOG_ERR("%s: error initializing grammar sampler for grammar:\n%s\n\nGeneration prompt:\n'%s'\n", __func__,
common_grammar_value(params.grammar).c_str(), params.generation_prompt.c_str());
throw e;
}
}
@@ -296,8 +299,12 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
params.reasoning_budget_start,
params.reasoning_budget_end,
params.reasoning_budget_forced,
params.reasoning_budget_tokens < 0 ? INT_MAX : params.reasoning_budget_tokens,
prefill_tokens);
params.reasoning_budget_tokens < 0 ? INT_MAX : params.reasoning_budget_tokens);
for (const auto & token : prefill_tokens) {
llama_sampler_accept(rbudget, token);
LOG_DBG("%s: reasoning-budget accepted prefill token (%d)\n", __func__, token);
}
}
if (params.has_logit_bias()) {
@@ -431,7 +438,7 @@ static bool grammar_should_apply(struct common_sampler * gsmpl) {
return true;
}
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool is_generated) {
if (!gsmpl) {
return;
}
@@ -439,9 +446,11 @@ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, boo
const auto tm = gsmpl->tm();
// grammar_should_apply() checks the reasoning budget state, so calculate this before we accept
accept_grammar = accept_grammar && grammar_should_apply(gsmpl);
const auto accept_grammar = is_generated && grammar_should_apply(gsmpl);
llama_sampler_accept(gsmpl->rbudget, token);
if (gsmpl->rbudget && is_generated) {
llama_sampler_accept(gsmpl->rbudget, token);
}
if (gsmpl->grmr && accept_grammar) {
llama_sampler_accept(gsmpl->grmr, token);

View File

@@ -41,8 +41,8 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
void common_sampler_free(struct common_sampler * gsmpl);
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
// if is_generated is true, the token is accepted by the sampling chain, the reasoning budget sampler, and the grammar sampler
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool is_generated);
void common_sampler_reset (struct common_sampler * gsmpl);
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);

View File

@@ -467,7 +467,7 @@ struct common_speculative_state_draft : public common_speculative_state {
prompt_dft.push_back(id_last);
LOG_DBG("%s: draft prompt: %s\n", __func__, string_from(ctx_dft, prompt_dft).c_str());
//LOG_DBG("%s: draft prompt: %s\n", __func__, string_from(ctx_dft, prompt_dft).c_str());
int ret = llama_decode(ctx_dft, batch);
if (ret != 0 && ret != 1) {
@@ -495,14 +495,14 @@ struct common_speculative_state_draft : public common_speculative_state {
common_sampler_accept(smpl, id, true);
result.push_back(id);
if (sparams.n_max <= (int) result.size()) {
// only collect very high-confidence draft tokens
if (cur_p->data[0].p < sparams.p_min) {
break;
}
// only collect very high-confidence draft tokens
if (cur_p->data[0].p < sparams.p_min) {
result.push_back(id);
if (sparams.n_max <= (int) result.size()) {
break;
}

View File

@@ -5,7 +5,7 @@ project("ggml" C CXX ASM)
### GGML Version
set(GGML_VERSION_MAJOR 0)
set(GGML_VERSION_MINOR 10)
set(GGML_VERSION_PATCH 0)
set(GGML_VERSION_PATCH 1)
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")

View File

@@ -485,6 +485,13 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
if (GGML_RV_ZIHINTPAUSE)
string(APPEND MARCH_STR "_zihintpause")
endif()
if (GGML_CPU_RISCV64_SPACEMIT)
# `xsmtvdotii' is only required for GCC >= 15.
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 15)
string(APPEND MARCH_STR "_xsmtvdotii")
endif()
endif()
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
else()

View File

@@ -2321,6 +2321,9 @@ class tinyBLAS_Q0_PPC {
}
void matmul(int64_t m, int64_t n) {
#if defined(_AIX) || defined(__BIG_ENDIAN__)
mnpack(0, m, 0, n);
#else
const int64_t mc = 64;
const int64_t kc = 64;
int64_t nc = 64;
@@ -2334,7 +2337,6 @@ class tinyBLAS_Q0_PPC {
} else {
n_aligned = (n / 64) * 64;
}
if (n_aligned > 0) {
if (n_aligned % 64 == 0) nc = 64;
else if (n_aligned == n) nc = n;
@@ -2352,6 +2354,7 @@ class tinyBLAS_Q0_PPC {
} else {
mnpack(0, m, 0, n);
}
#endif
}
private:
@@ -3191,12 +3194,16 @@ class tinyBLAS_PPC {
}
void matmul(int64_t m, int64_t n) {
#if defined(_AIX) || defined(__BIG_ENDIAN__)
mnpack(0, m, 0, n);
#else
int64_t mc = 256; int64_t nc = 256; int64_t kc = 256;
if (m % mc == 0 && n % nc == 0 && k % kc == 0) {
matmul_tiled(m, n, mc, nc, kc);
} else {
mnpack(0, m, 0, n);
}
#endif
}
private:

View File

@@ -3556,6 +3556,9 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph,
&& unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_SILU) {
const ggml_tensor * ssm_conv = cgraph->nodes[node_idx];
const ggml_tensor * silu = cgraph->nodes[node_idx+1];
if (ggml_get_unary_op(silu) != unary_ops.begin()[0]) {
return false;
}
if (ssm_conv->type != GGML_TYPE_F32 || silu->type != GGML_TYPE_F32) {
return false;
@@ -3564,6 +3567,31 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph,
return true;
}
if (ops.size() == 3 && ops.begin()[0] == GGML_OP_SSM_CONV && ops.begin()[1] == GGML_OP_ADD
&& ops.begin()[2] == GGML_OP_UNARY && unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_SILU) {
const ggml_tensor * ssm_conv = cgraph->nodes[node_idx];
const ggml_tensor * add = cgraph->nodes[node_idx+1];
const ggml_tensor * silu = cgraph->nodes[node_idx+2];
if (ggml_get_unary_op(silu) != unary_ops.begin()[0]) {
return false;
}
if (ssm_conv->type != GGML_TYPE_F32 || add->type != GGML_TYPE_F32 || silu->type != GGML_TYPE_F32) {
return false;
}
// ADD must consume ssm_conv's output and broadcast a 1-D channel-wise bias.
const ggml_tensor * bias = (add->src[0] == ssm_conv) ? add->src[1] : add->src[0];
if (bias->type != GGML_TYPE_F32 || !ggml_is_contiguous(bias)) {
return false;
}
if (ggml_nelements(bias) != ssm_conv->ne[0] || bias->ne[0] != ssm_conv->ne[0]) {
return false;
}
return true;
}
if (ops.size() == 2 && ops.begin()[0] == GGML_OP_UNARY && ops.begin()[1] == GGML_OP_MUL
&& unary_ops.size() == 1 && (unary_ops.begin()[0] == GGML_UNARY_OP_SILU || unary_ops.begin()[0] == GGML_UNARY_OP_SIGMOID || unary_ops.begin()[0] == GGML_UNARY_OP_SOFTPLUS)) {
const ggml_tensor * unary = cgraph->nodes[node_idx];
@@ -3640,6 +3668,362 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph,
return false;
}
// try and fuse nodes and return the number of nodes to skip
static int ggml_cuda_try_fuse(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph, int i) {
static bool disable_fusion = getenv("GGML_CUDA_DISABLE_FUSION") != nullptr && std::atoi(getenv("GGML_CUDA_DISABLE_FUSION"));
if (disable_fusion) {
return 0;
}
ggml_tensor * node = cgraph->nodes[i];
//topk-moe
if (cgraph->nodes[i]->op == GGML_OP_UNARY || cgraph->nodes[i]->op == GGML_OP_SOFT_MAX ||
cgraph->nodes[i]->op == GGML_OP_ARGSORT) {
ggml_cuda_topk_moe_args args;
const bool can_fuse = ggml_cuda_topk_moe_fusion(cgraph, i, args);
std::vector<ggml_op> ops;
if (can_fuse) {
const ggml_tensor * logits = node->src[0];
ggml_tensor * weights = nullptr;
ggml_tensor * ids = nullptr;
const ggml_tensor * bias = nullptr;
const ggml_tensor * clamp = nullptr;
const ggml_tensor * scale = nullptr;
if (!args.delayed_softmax) {
ggml_op gating_op = args.sigmoid ? GGML_OP_UNARY : GGML_OP_SOFT_MAX;
int out_nodes[2]; // nodes which can't be elided
if (args.prob_bias) {
bias = cgraph->nodes[i + 2]->src[1];
ops.insert(ops.end(), { gating_op, GGML_OP_RESHAPE, GGML_OP_ADD, GGML_OP_ARGSORT, GGML_OP_VIEW,
GGML_OP_GET_ROWS });
out_nodes[0] = i + 4;
ids = cgraph->nodes[i + 4];
} else {
ops.insert(ops.end(),
{ gating_op, GGML_OP_RESHAPE, GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS });
out_nodes[0] = i + 3;
ids = cgraph->nodes[i + 3];
}
if (args.norm) {
ops.insert(ops.end(),
{ GGML_OP_RESHAPE, GGML_OP_SUM_ROWS, GGML_OP_CLAMP, GGML_OP_DIV, GGML_OP_RESHAPE });
clamp = cgraph->nodes[i + ops.size() - 3];
}
if (args.scale) {
ops.insert(ops.end(), { GGML_OP_SCALE });
scale = cgraph->nodes[i + ops.size() - 1];
}
weights = cgraph->nodes[i + ops.size() - 1];
out_nodes[1] = i + ops.size() - 1;
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
ggml_cuda_should_use_topk_moe(node, logits, weights, ids) &&
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/true)) {
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
return ops.size() - 1;
}
} else if (!args.norm && !args.prob_bias) {
//special case gpt-oss, no norm, no bias.
ops.insert(ops.end(), { GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
GGML_OP_SOFT_MAX, GGML_OP_RESHAPE });
weights = cgraph->nodes[i + 5];
ids = cgraph->nodes[i + 1];
const ggml_tensor * softmax = cgraph->nodes[i + 4];
int out_nodes[2] = { i + 1, i + 5 };
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
ggml_cuda_should_use_topk_moe(softmax, logits, weights, ids) &&
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/true)) {
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
return ops.size() - 1;
}
}
}
}
//RoPE + view + set-rows
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_ROPE, GGML_OP_VIEW, GGML_OP_SET_ROWS }, {})) {
ggml_tensor * rope = cgraph->nodes[i];
ggml_tensor * set_rows = cgraph->nodes[i + 2];
ggml_cuda_op_rope_fused(*cuda_ctx, rope, set_rows);
return 2;
}
// multi-(add or mul)
if (node->op == GGML_OP_ADD || node->op == GGML_OP_MUL) {
int n_fuse = 0;
ggml_op ops[8];
std::fill(ops, ops + 8, node->op);
for (; n_fuse <= 6; ++n_fuse) {
if (!ggml_can_fuse(cgraph, i + n_fuse, ops + n_fuse, 2)) {
break;
}
if (cgraph->nodes[i + n_fuse] != cgraph->nodes[i + n_fuse + 1]->src[0]) {
break;
}
if (!ggml_are_same_layout(cgraph->nodes[i + n_fuse]->src[1], cgraph->nodes[i + n_fuse + 1]->src[1])) {
break;
}
}
n_fuse++;
if (n_fuse > 1) {
ggml_tensor fused_node;
memcpy(&fused_node, node, sizeof(ggml_tensor));
for (int j = 0; j < n_fuse - 1; ++j) {
fused_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
}
fused_node.data = cgraph->nodes[i + n_fuse - 1]->data;
if (node->op == GGML_OP_ADD) {
ggml_cuda_op_fused_add(*cuda_ctx, &fused_node, n_fuse);
} else {
ggml_cuda_op_fused_mul(*cuda_ctx, &fused_node, n_fuse);
}
return n_fuse - 1;
}
}
bool fused_mul_mat_vec = false;
int fused_node_count = 0;
// gate + glu + up
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
if (ggml_cuda_can_fuse(cgraph, i, { op, bias_op, op, bias_op, GGML_OP_GLU }, {})) {
ggml_tensor * glu = cgraph->nodes[i + 4];
ggml_tensor * gate_bias_n = glu->src[0];
ggml_tensor * up_bias_n = glu->src[1];
//we don't assume the order for {gate, up}. Instead infer it from the bias tensor
ggml_tensor * gate_n = nullptr;
ggml_tensor * up_n = nullptr;
if (gate_bias_n->src[0] == cgraph->nodes[i] || gate_bias_n->src[1] == cgraph->nodes[i]) {
gate_n = cgraph->nodes[i];
up_n = cgraph->nodes[i + 2];
} else if (gate_bias_n->src[0] == cgraph->nodes[i + 2] || gate_bias_n->src[1] == cgraph->nodes[i + 2]) {
gate_n = cgraph->nodes[i + 2];
up_n = cgraph->nodes[i];
} else {
continue;
}
auto get_bias_tensor = [](const ggml_tensor * bias_node, const ggml_tensor * mul_node, ggml_op op_bias) {
if (op_bias == GGML_OP_ADD) {
if (bias_node->src[0] == mul_node) {
return bias_node->src[1];
}
if (bias_node->src[1] == mul_node) {
return bias_node->src[0];
}
return (ggml_tensor *) nullptr;
}
GGML_ASSERT(op_bias == GGML_OP_ADD_ID);
GGML_ASSERT(bias_node->src[0] == mul_node);
return bias_node->src[1];
};
ggml_tensor * up_bias_tensor = get_bias_tensor(up_bias_n, up_n, bias_op);
ggml_tensor * gate_bias_tensor = get_bias_tensor(gate_bias_n, gate_n, bias_op);
if (!up_bias_tensor || !gate_bias_tensor) {
continue;
}
// we don't support repeating adds
if (bias_op == GGML_OP_ADD && (!ggml_are_same_shape(gate_bias_n->src[0], gate_bias_n->src[1]) ||
!ggml_are_same_shape(up_bias_n->src[0], up_bias_n->src[1]))) {
continue;
}
const ggml_tensor * src0 = up_n->src[0];
const ggml_tensor * src1 = up_n->src[1];
const ggml_tensor * ids = up_n->src[2];
if (ggml_cuda_should_fuse_mul_mat_vec_f(up_n)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate_n->src[0];
fusion_data.x_bias = up_bias_tensor;
fusion_data.gate_bias = gate_bias_tensor;
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 5;
break;
}
if (ggml_cuda_should_fuse_mul_mat_vec_q(up_n)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate_n->src[0];
fusion_data.x_bias = up_bias_tensor;
fusion_data.gate_bias = gate_bias_tensor;
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 5;
break;
}
} else if (ggml_cuda_can_fuse(cgraph, i, { op, op, GGML_OP_GLU }, {})) {
ggml_tensor * glu = cgraph->nodes[i + 2];
ggml_tensor * gate = glu->src[0];
ggml_tensor * up = glu->src[1];
bool ok = (gate == cgraph->nodes[i] && up == cgraph->nodes[i + 1]) ||
(gate == cgraph->nodes[i + 1] && up == cgraph->nodes[i]);
if (!ok) {
continue;
}
const ggml_tensor * src0 = up->src[0];
const ggml_tensor * src1 = up->src[1];
const ggml_tensor * ids = up->src[2];
if (ggml_cuda_should_fuse_mul_mat_vec_f(up)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate->src[0];
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 3;
break;
}
if (ggml_cuda_should_fuse_mul_mat_vec_q(up)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate->src[0];
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 3;
break;
}
}
}
if (fused_mul_mat_vec) {
return fused_node_count - 1;
}
fused_mul_mat_vec = false;
fused_node_count = 0;
// gate + add + glu + up + add
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
if (!ggml_can_fuse(cgraph, i, { op, bias_op })) {
continue;
}
ggml_tensor * mm_node = cgraph->nodes[i];
ggml_tensor * bias_node = cgraph->nodes[i + 1];
ggml_tensor * bias_tensor = nullptr;
if (bias_op == GGML_OP_ADD) {
if (bias_node->src[0] == mm_node) {
bias_tensor = bias_node->src[1];
} else if (bias_node->src[1] == mm_node) {
bias_tensor = bias_node->src[0];
} else {
continue;
}
} else {
if (bias_node->src[0] != mm_node) {
continue;
}
bias_tensor = bias_node->src[1];
}
const ggml_tensor * src0 = mm_node->src[0];
const ggml_tensor * src1 = mm_node->src[1];
const ggml_tensor * ids = mm_node->src[2];
if (bias_op == GGML_OP_ADD_ID && bias_node->src[2] != ids) {
continue;
}
if (bias_op == GGML_OP_ADD && !ggml_are_same_shape(bias_node->src[0], bias_node->src[1])) {
continue;
}
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.x_bias = bias_tensor;
if (ggml_cuda_should_fuse_mul_mat_vec_f(mm_node)) {
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 2;
break;
}
if (ggml_cuda_should_fuse_mul_mat_vec_q(mm_node)) {
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 2;
break;
}
}
if (fused_mul_mat_vec) {
return fused_node_count - 1;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL, GGML_OP_ADD }, {})) {
ggml_cuda_op_rms_norm_fused_add(*cuda_ctx, node, cgraph->nodes[i + 1], cgraph->nodes[i + 2]);
return 2;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL }, {})) {
ggml_cuda_op_rms_norm_fused(*cuda_ctx, node, cgraph->nodes[i + 1]);
return 1;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SSM_CONV, GGML_OP_ADD, GGML_OP_UNARY }, { GGML_UNARY_OP_SILU })) {
ggml_cuda_op_ssm_conv(*cuda_ctx, node, cgraph->nodes[i + 1], cgraph->nodes[i + 2]);
return 2;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SSM_CONV, GGML_OP_UNARY }, { GGML_UNARY_OP_SILU })) {
ggml_cuda_op_ssm_conv(*cuda_ctx, node, /*bias_add_node=*/ nullptr, cgraph->nodes[i + 1]);
return 1;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SILU }) ||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SIGMOID }) ||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SOFTPLUS })) {
ggml_cuda_op_unary_mul(*cuda_ctx, node, cgraph->nodes[i + 1]);
return 1;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_SQR }, { GGML_UNARY_OP_RELU })) {
ggml_cuda_op_relu_sqr(*cuda_ctx, node, cgraph->nodes[i + 1]);
return 1;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SCALE, GGML_OP_UNARY, GGML_OP_SCALE }, { GGML_UNARY_OP_TANH })) {
ggml_cuda_op_softcap(*cuda_ctx, cgraph->nodes[i + 2], node);
return 2;
}
return 0;
}
static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph, const bool use_cuda_graph, const bool cuda_graph_update_required, const void * graph_key) {
bool graph_evaluated_or_captured = false;
@@ -3786,355 +4170,11 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
continue;
}
// start of fusion operations
static bool disable_fusion = (getenv("GGML_CUDA_DISABLE_FUSION") != nullptr);
if (!disable_fusion) {
ggml_cuda_topk_moe_args args;
int nodes_to_skip = ggml_cuda_try_fuse(cuda_ctx, cgraph, i);
if (cgraph->nodes[i]->op == GGML_OP_UNARY || cgraph->nodes[i]->op == GGML_OP_SOFT_MAX ||
cgraph->nodes[i]->op == GGML_OP_ARGSORT) {
const bool can_fuse = ggml_cuda_topk_moe_fusion(cgraph, i, args);
std::vector<ggml_op> ops;
if (can_fuse) {
const ggml_tensor * logits = node->src[0];
ggml_tensor * weights = nullptr;
ggml_tensor * ids = nullptr;
const ggml_tensor * bias = nullptr;
const ggml_tensor * clamp = nullptr;
const ggml_tensor * scale = nullptr;
if (!args.delayed_softmax) {
ggml_op gating_op = args.sigmoid ? GGML_OP_UNARY : GGML_OP_SOFT_MAX;
int out_nodes[2]; // nodes which can't be elided
if (args.prob_bias) {
bias = cgraph->nodes[i + 2]->src[1];
ops.insert(ops.end(), { gating_op, GGML_OP_RESHAPE, GGML_OP_ADD, GGML_OP_ARGSORT,
GGML_OP_VIEW, GGML_OP_GET_ROWS });
out_nodes[0] = i + 4;
ids = cgraph->nodes[i + 4];
} else {
ops.insert(ops.end(), { gating_op, GGML_OP_RESHAPE, GGML_OP_ARGSORT, GGML_OP_VIEW,
GGML_OP_GET_ROWS });
out_nodes[0] = i + 3;
ids = cgraph->nodes[i + 3];
}
if (args.norm) {
ops.insert(ops.end(), { GGML_OP_RESHAPE, GGML_OP_SUM_ROWS, GGML_OP_CLAMP,
GGML_OP_DIV, GGML_OP_RESHAPE });
clamp = cgraph->nodes[i + ops.size() - 3];
}
if (args.scale) {
ops.insert(ops.end(), { GGML_OP_SCALE });
scale = cgraph->nodes[i + ops.size() - 1];
}
weights = cgraph->nodes[i + ops.size() - 1];
out_nodes[1] = i + ops.size() - 1;
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
ggml_cuda_should_use_topk_moe(node, logits, weights, ids) &&
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/ true)) {
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
i += ops.size() - 1;
continue;
}
} else if (!args.norm && !args.prob_bias) {
//special case gpt-oss, no norm, no bias.
ops.insert(ops.end(), { GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS,
GGML_OP_RESHAPE, GGML_OP_SOFT_MAX, GGML_OP_RESHAPE });
weights = cgraph->nodes[i + 5];
ids = cgraph->nodes[i + 1];
const ggml_tensor * softmax = cgraph->nodes[i + 4];
int out_nodes[2] = { i + 1, i + 5 };
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
ggml_cuda_should_use_topk_moe(softmax, logits, weights, ids) &&
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/ true)) {
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
i += ops.size() - 1;
continue;
}
}
}
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_ROPE, GGML_OP_VIEW, GGML_OP_SET_ROWS }, {})) {
ggml_tensor * rope = cgraph->nodes[i];
ggml_tensor * set_rows = cgraph->nodes[i + 2];
ggml_cuda_op_rope_fused(*cuda_ctx, rope, set_rows);
i += 2;
continue;
}
if (node->op == GGML_OP_ADD || node->op == GGML_OP_MUL) {
int n_fuse = 0;
ggml_op ops[8];
std::fill(ops, ops + 8, node->op);
for (; n_fuse <= 6; ++n_fuse){
if (!ggml_can_fuse(cgraph, i + n_fuse, ops + n_fuse, 2)) {
break;
}
if (cgraph->nodes[i + n_fuse] != cgraph->nodes[i + n_fuse + 1]->src[0]) {
break;
}
if (!ggml_are_same_layout(cgraph->nodes[i + n_fuse]->src[1], cgraph->nodes[i + n_fuse + 1]->src[1])) {
break;
}
}
n_fuse++;
if (n_fuse > 1) {
ggml_tensor fused_node;
memcpy(&fused_node, node, sizeof(ggml_tensor));
for (int j = 0; j < n_fuse - 1; ++j) {
fused_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
}
fused_node.data = cgraph->nodes[i + n_fuse - 1]->data;
if (node->op == GGML_OP_ADD) {
ggml_cuda_op_fused_add(*cuda_ctx, &fused_node, n_fuse);
} else {
ggml_cuda_op_fused_mul(*cuda_ctx, &fused_node, n_fuse);
}
i += n_fuse - 1;
continue;
}
}
bool fused_mul_mat_vec = false;
int fused_node_count = 0;
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
if (ggml_cuda_can_fuse(cgraph, i, { op, bias_op, op, bias_op, GGML_OP_GLU }, {})) {
ggml_tensor * glu = cgraph->nodes[i + 4];
ggml_tensor * gate_bias_n = glu->src[0];
ggml_tensor * up_bias_n = glu->src[1];
//we don't assume the order for {gate, up}. Instead infer it from the bias tensor
ggml_tensor * gate_n = nullptr;
ggml_tensor * up_n = nullptr;
if (gate_bias_n->src[0] == cgraph->nodes[i] || gate_bias_n->src[1] == cgraph->nodes[i]) {
gate_n = cgraph->nodes[i];
up_n = cgraph->nodes[i + 2];
} else if (gate_bias_n->src[0] == cgraph->nodes[i + 2] || gate_bias_n->src[1] == cgraph->nodes[i + 2]) {
gate_n = cgraph->nodes[i + 2];
up_n = cgraph->nodes[i];
} else {
continue;
}
auto get_bias_tensor = [](const ggml_tensor * bias_node, const ggml_tensor * mul_node, ggml_op op_bias) {
if (op_bias == GGML_OP_ADD) {
if (bias_node->src[0] == mul_node) {
return bias_node->src[1];
}
if (bias_node->src[1] == mul_node) {
return bias_node->src[0];
}
return (ggml_tensor *) nullptr;
}
GGML_ASSERT(op_bias == GGML_OP_ADD_ID);
GGML_ASSERT(bias_node->src[0] == mul_node);
return bias_node->src[1];
};
ggml_tensor * up_bias_tensor = get_bias_tensor(up_bias_n, up_n, bias_op);
ggml_tensor * gate_bias_tensor = get_bias_tensor(gate_bias_n, gate_n, bias_op);
if (!up_bias_tensor || !gate_bias_tensor) {
continue;
}
// we don't support repeating adds
if (bias_op == GGML_OP_ADD &&
(!ggml_are_same_shape(gate_bias_n->src[0], gate_bias_n->src[1]) ||
!ggml_are_same_shape(up_bias_n->src[0], up_bias_n->src[1]))) {
continue;
}
const ggml_tensor * src0 = up_n->src[0];
const ggml_tensor * src1 = up_n->src[1];
const ggml_tensor * ids = up_n->src[2];
if (ggml_cuda_should_fuse_mul_mat_vec_f(up_n)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate_n->src[0];
fusion_data.x_bias = up_bias_tensor;
fusion_data.gate_bias = gate_bias_tensor;
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 5;
break;
}
if (ggml_cuda_should_fuse_mul_mat_vec_q(up_n)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate_n->src[0];
fusion_data.x_bias = up_bias_tensor;
fusion_data.gate_bias = gate_bias_tensor;
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 5;
break;
}
} else if (ggml_cuda_can_fuse(cgraph, i, { op, op, GGML_OP_GLU }, {})) {
ggml_tensor * glu = cgraph->nodes[i + 2];
ggml_tensor * gate = glu->src[0];
ggml_tensor * up = glu->src[1];
bool ok = (gate == cgraph->nodes[i] && up == cgraph->nodes[i + 1])
|| (gate == cgraph->nodes[i + 1] && up == cgraph->nodes[i]);
if (!ok) continue;
const ggml_tensor * src0 = up->src[0];
const ggml_tensor * src1 = up->src[1];
const ggml_tensor * ids = up->src[2];
if (ggml_cuda_should_fuse_mul_mat_vec_f(up)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate->src[0];
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 3;
break;
}
if (ggml_cuda_should_fuse_mul_mat_vec_q(up)) {
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.gate = gate->src[0];
fusion_data.glu_op = ggml_get_glu_op(glu);
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 3;
break;
}
}
}
if (fused_mul_mat_vec) {
i += fused_node_count - 1;
continue;
}
fused_mul_mat_vec = false;
fused_node_count = 0;
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
if (!ggml_can_fuse(cgraph, i, { op, bias_op })) {
continue;
}
ggml_tensor * mm_node = cgraph->nodes[i];
ggml_tensor * bias_node = cgraph->nodes[i + 1];
ggml_tensor * bias_tensor = nullptr;
if (bias_op == GGML_OP_ADD) {
if (bias_node->src[0] == mm_node) {
bias_tensor = bias_node->src[1];
} else if (bias_node->src[1] == mm_node) {
bias_tensor = bias_node->src[0];
} else {
continue;
}
} else {
if (bias_node->src[0] != mm_node) {
continue;
}
bias_tensor = bias_node->src[1];
}
const ggml_tensor * src0 = mm_node->src[0];
const ggml_tensor * src1 = mm_node->src[1];
const ggml_tensor * ids = mm_node->src[2];
if (bias_op == GGML_OP_ADD_ID && bias_node->src[2] != ids) {
continue;
}
if (bias_op == GGML_OP_ADD && !ggml_are_same_shape(bias_node->src[0], bias_node->src[1])) {
continue;
}
ggml_cuda_mm_fusion_args_host fusion_data{};
fusion_data.x_bias = bias_tensor;
if (ggml_cuda_should_fuse_mul_mat_vec_f(mm_node)) {
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 2;
break;
}
if (ggml_cuda_should_fuse_mul_mat_vec_q(mm_node)) {
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
fused_mul_mat_vec = true;
fused_node_count = 2;
break;
}
}
if (fused_mul_mat_vec) {
i += fused_node_count - 1;
continue;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL, GGML_OP_ADD}, {})) {
ggml_cuda_op_rms_norm_fused_add(*cuda_ctx, node, cgraph->nodes[i+1], cgraph->nodes[i+2]);
i += 2;
continue;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL}, {})) {
ggml_cuda_op_rms_norm_fused(*cuda_ctx, node, cgraph->nodes[i+1]);
i++;
continue;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SSM_CONV, GGML_OP_UNARY }, { GGML_UNARY_OP_SILU })) {
ggml_cuda_op_ssm_conv(*cuda_ctx, node, cgraph->nodes[i+1]);
i++;
continue;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SILU }) ||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SIGMOID }) ||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SOFTPLUS })) {
ggml_cuda_op_unary_mul(*cuda_ctx, node, cgraph->nodes[i+1]);
i++;
continue;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_SQR }, { GGML_UNARY_OP_RELU })) {
ggml_cuda_op_relu_sqr(*cuda_ctx, node, cgraph->nodes[i+1]);
i++;
continue;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SCALE, GGML_OP_UNARY, GGML_OP_SCALE }, { GGML_UNARY_OP_TANH })) {
i += 2;
ggml_cuda_op_softcap(*cuda_ctx, cgraph->nodes[i], node);
continue;
}
if (nodes_to_skip != 0) {
i += nodes_to_skip;
continue;
}
#ifndef NDEBUG
assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));

View File

@@ -3,6 +3,7 @@
template <bool apply_silu, size_t split_d_inner, size_t d_conv>
static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float * __restrict__ src1,
const float * __restrict__ bias,
const int src0_nb0, const int src0_nb1, const int src0_nb2, const int src1_nb1,
float * __restrict__ dst, const int dst_nb0, const int dst_nb1, const int dst_nb2,
const int64_t n_t) {
@@ -27,6 +28,8 @@ static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float
w[j] = w_block[tid * stride_w + j];
}
float b = bias != nullptr ? bias[bidy * split_d_inner + tid] : 0.0f;
for (int64_t i = 0; i < n_t; i++) {
float sumf = 0.0f;
@@ -42,12 +45,14 @@ static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float
for (size_t j = 0; j < d_conv; j++) {
sumf += x[(i + j) % d_conv] * w[j];
}
sumf += b;
y_block[i * stride_y + tid] = apply_silu ? ggml_cuda_op_silu_single(sumf) : sumf;
}
}
template <bool apply_silu, size_t split_d_inner, size_t d_conv, int64_t split_n_t>
static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0, const float * __restrict__ src1,
const float * __restrict__ bias,
const int src0_nb0, const int src0_nb1, const int src0_nb2,
const int src1_nb1, float * __restrict__ dst, const int dst_nb0,
const int dst_nb1, const int dst_nb2, const int64_t n_t) {
@@ -97,6 +102,8 @@ static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0,
w[j] = w_block[tid * stride_w + j];
}
float b = bias != nullptr ? bias[bidy * split_d_inner + tid] : 0.0f;
// Compute from shared memory
for (int64_t i = 0; i < local_n_t; i++) {
float sumf = 0.0f;
@@ -104,12 +111,13 @@ static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0,
for (size_t j = 0; j < d_conv; j++) {
sumf += smem[tid * n_cols + i + j] * w[j];
}
sumf += b;
y_block[i * stride_y + tid] = apply_silu ? ggml_cuda_op_silu_single(sumf) : sumf;
}
}
template <bool apply_silu>
static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int src0_nb0, const int src0_nb1,
static void ssm_conv_f32_cuda(const float * src0, const float * src1, const float * bias, const int src0_nb0, const int src0_nb1,
const int src0_nb2, const int src1_nb1, float * dst, const int dst_nb0, const int dst_nb1,
const int dst_nb2, const int64_t nc, const int64_t nr, const int64_t n_t,
const int64_t n_s, cudaStream_t stream) {
@@ -120,14 +128,14 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
constexpr int kNC = decltype(NC)::value;
if (n_t <= 32) {
const dim3 blocks(n_s, (nr + threads - 1) / threads, 1);
ssm_conv_f32<apply_silu, threads, kNC><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
ssm_conv_f32<apply_silu, threads, kNC><<<blocks, threads, 0, stream>>>(src0, src1, bias, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
dst, dst_nb0, dst_nb1, dst_nb2, n_t);
} else {
const int64_t split_n_t = 32;
dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t);
const size_t smem_size = threads * (kNC - 1 + split_n_t) * sizeof(float);
ssm_conv_long_token_f32<apply_silu, threads, kNC, split_n_t><<<blocks, threads, smem_size, stream>>>(
src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
src0, src1, bias, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
}
};
@@ -140,11 +148,18 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
}
}
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * silu_dst) {
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * bias_add_node, ggml_tensor * silu_dst) {
const struct ggml_tensor * src0 = dst->src[0]; // conv_x
const struct ggml_tensor * src1 = dst->src[1]; // conv1d.weight
const bool fuse_bias = bias_add_node != nullptr;
const bool fuse_silu = silu_dst != nullptr;
// bias always comes with silu.
GGML_ASSERT(!fuse_bias || fuse_silu);
// The bias (when fused) is the non-conv operand of the ADD node.
const struct ggml_tensor * bias = fuse_bias ? (bias_add_node->src[0] == dst ? bias_add_node->src[1] : bias_add_node->src[0]) : nullptr;
// When fusing, write to silu_dst (the node downstream references).
const struct ggml_tensor * out = fuse_silu ? silu_dst : dst;
@@ -160,16 +175,23 @@ void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, g
const float * src0_d = (const float *) src0->data;
const float * src1_d = (const float *) src1->data;
const float * bias_d = fuse_bias ? (const float *) bias->data : nullptr;
float * dst_d = (float *) out->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(out->type == GGML_TYPE_F32);
if (fuse_bias) {
GGML_ASSERT(bias->type == GGML_TYPE_F32);
GGML_ASSERT(ggml_is_contiguous(bias));
GGML_ASSERT(ggml_nelements(bias) == nr);
}
if (fuse_silu) {
ssm_conv_f32_cuda<true>(src0_d, src1_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
ssm_conv_f32_cuda<true>(src0_d, src1_d, bias_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
out->nb[2], nc, nr, n_t, n_s, stream);
} else {
ssm_conv_f32_cuda<false>(src0_d, src1_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
ssm_conv_f32_cuda<false>(src0_d, src1_d, bias_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
out->nb[2], nc, nr, n_t, n_s, stream);
}
}

View File

@@ -1,3 +1,3 @@
#include "common.cuh"
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * silu_dst = nullptr);
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * bias_add_node = nullptr, ggml_tensor * silu_dst = nullptr);

View File

@@ -48,14 +48,16 @@ using intvec = std::vector<int>;
using uintvec = std::vector<unsigned int>;
using u32vec = std::vector<uint32_t>;
static size_t opt_ndev = 1;
static size_t opt_nhvx = 0; // use all
static int opt_arch = 0; // autodetect
static int opt_etm = 0;
static int opt_verbose = 0;
static int opt_profile = 0; // profiling mode (0-disabled, 1-basic, 2-pmu)
static int opt_hostbuf = 1; // hostbuf ON by default
static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only
static int opt_arch = 0; // autodetect
static size_t opt_ndev = 1;
static size_t opt_nhvx = 0; // use all
static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only
static size_t opt_vmem = HTP_OP_MAX_VMEM_DEFAULT; // max available va space for buffer mappings
static size_t opt_mbuf = 1ul * 1024 * 1024 * 1024; // max buffer size
static int opt_etm = 0;
static int opt_verbose = 0;
static int opt_profile = 0; // profiling mode (0-disabled, 1-basic, 2-pmu)
static int opt_hostbuf = 1; // hostbuf ON by default
// Default PMU events, if profiling with PMU (mode=2) is enabled
// See https://docs.qualcomm.com/doc/80-N2040-60/topic/pmu-events.html
@@ -66,6 +68,7 @@ static u32vec opt_pmu_evt { 0x3, 0x111, 0x100, 0x105, 0x240, 0x256, 0x7D, 0x8C }
static int opt_opstage = HTP_OPSTAGE_QUEUE | HTP_OPSTAGE_COMPUTE;
static int opt_opbatch = 1024; // max number of ops in a batch
static int opt_opqueue = 16; // max number of pending batches
static std::regex* opt_opfilter = NULL; // regex of ops to not claim
#define HEX_VERBOSE(...) \
@@ -110,7 +113,7 @@ static void ggml_hexagon_dump_op_supp(const std::string &sess_name, const struct
if (!opt_verbose) return;
op_desc desc(op);
GGML_LOG_DEBUG("ggml-hex: %s supports-op %s : %s : %s : %s : %s : %s : %s\n", sess_name.c_str(),
GGML_LOG_DEBUG("ggml-hex: %s supports-op %s: %s : %s : %s : %s : %s : %s\n", sess_name.c_str(),
ggml_op_desc(op), desc.names, desc.dims, desc.types, desc.strides, desc.buffs, supp ? "yes" : "no");
}
@@ -118,8 +121,6 @@ static void ggml_hexagon_dump_op_prof(const std::string &sess_name, const ggml_t
uint32_t op_usec, uint32_t op_cycles, const uint32_t pmu[]) {
if (!opt_profile) return;
op_desc desc(op);
char pmu_str[256] = "";
if (opt_profile > 1) {
static_assert(HTP_PROF_PMU_NCNT == 8, "current implementation assumes 8 PMU counters");
@@ -127,6 +128,7 @@ static void ggml_hexagon_dump_op_prof(const std::string &sess_name, const ggml_t
pmu[0], pmu[1], pmu[2], pmu[3], pmu[4], pmu[5], pmu[6], pmu[7]);
}
op_desc desc(op);
GGML_LOG_DEBUG("ggml-hex: %s profile-op %s: %s : %s : %s : %s : usec %u cycles %u%s\n", sess_name.c_str(),
ggml_op_desc(op), desc.names, desc.dims, desc.types, desc.strides, op_usec, op_cycles, pmu_str);
}
@@ -191,33 +193,30 @@ struct ggml_hexagon_shared_buffer {
bool mapped;
bool pinned;
void mmap(bool pinned = false) {
int err = fastrpc_mmap(sess->domain_id, this->fd, (void *) this->base, 0, this->size, FASTRPC_MAP_FD_DELAYED);
void mmap() {
fastrpc_map_flags flags = this->pinned ? FASTRPC_MAP_FD : FASTRPC_MAP_FD_DELAYED;
int err = fastrpc_mmap(sess->domain_id, this->fd, (void *) this->base, 0, this->size, flags);
if (err != 0) {
GGML_LOG_ERROR("ggml-hex: %s buffer mapping failed : domain_id %d size %zu fd %d error 0x%08x\n", sess->c_name(),
sess->domain_id, this->size, this->fd, (unsigned) err);
throw std::runtime_error("ggml-hex: fastrpc_mmap failed (see log for details)");
}
if (pinned) {
err = htp_iface_mmap(sess->handle, this->fd, this->size, pinned);
if (err != 0) {
GGML_LOG_ERROR("ggml-hex: %s buffer pinning failed : domain_id %d size %zu fd %d error 0x%08x\n", sess->c_name(),
sess->domain_id, this->size, this->fd, (unsigned) err);
throw std::runtime_error("ggml-hex: htp_iface_mmap failed (see log for details)");
}
}
this->mapped = true;
this->pinned = pinned;
HEX_VERBOSE("ggml-hex: %s mapped buffer: base %p size %zu fd %d pinned %u\n",
sess->c_name(), (void *) this->base, this->size, this->fd, pinned);
this->mapped = true;
}
void unmap() {
if (!this->mapped) return;
htp_iface_munmap(sess->handle, this->fd);
if (!this->pinned) {
// HTP might still hold a reference, tell it drop it
htp_iface_munmap(sess->handle, this->fd);
}
fastrpc_munmap(sess->domain_id, this->fd, (void *) this->base, this->size);
HEX_VERBOSE("ggml-hex: %s unmapped buffer: base %p size %zu fd %d\n", sess->c_name(),
@@ -227,7 +226,7 @@ struct ggml_hexagon_shared_buffer {
this->fd = -1;
}
void alloc(size_t size, bool pinned = false) {
void alloc(size_t size) {
if (this->base) return;
this->base = (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, size);
@@ -245,8 +244,7 @@ struct ggml_hexagon_shared_buffer {
HEX_VERBOSE("ggml-hex: %s allocated buffer: base %p size %zu fd %d pinned %d\n", sess->c_name(),
(void *) this->base, this->size, this->fd, (int) pinned);
mmap(pinned);
mmap();
}
void free() {
@@ -262,15 +260,14 @@ struct ggml_hexagon_shared_buffer {
}
ggml_hexagon_shared_buffer(ggml_hexagon_session * sess, size_t size, bool pinned = false) {
size += 4 * 1024; // extra page for padding
this->sess = sess;
this->size = 0;
this->base = nullptr;
this->fd = -1;
this->mapped = false;
this->pinned = pinned;
alloc(size, pinned);
alloc(size);
}
~ggml_hexagon_shared_buffer() {
@@ -1475,6 +1472,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
ggml_backend_buffer_type_t buffer_type, size_t size) {
auto sess = static_cast<ggml_backend_hexagon_buffer_type_context *>(buffer_type->context)->sess;
try {
size += 4 * 1024; // guard page
ggml_hexagon_shared_buffer * sbuf = new ggml_hexagon_shared_buffer(sess, size);
return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, sbuf, size);
} catch (const std::exception & exc) {
@@ -1487,6 +1485,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_repack_buffer_type_alloc_buffe
ggml_backend_buffer_type_t buffer_type, size_t size) {
auto sess = static_cast<ggml_backend_hexagon_buffer_type_context *>(buffer_type->context)->sess;
try {
size += 4 * 1024; // guard page
ggml_hexagon_shared_buffer * sbuf = new ggml_hexagon_shared_buffer(sess, size);
return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, sbuf, size);
} catch (const std::exception & exc) {
@@ -1505,7 +1504,7 @@ static size_t ggml_backend_hexagon_buffer_type_get_alloc_size(ggml_backend_buffe
}
static size_t ggml_backend_hexagon_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
return 1UL * 1024 * 1024 * 1024; // 1GB per buffer
return opt_mbuf; // typically 1GB per buffer
GGML_UNUSED(buffer_type);
}
@@ -1573,14 +1572,14 @@ struct ggml_hexagon_opbatch {
d_map.clear();
}
ggml_hexagon_opbatch(ggml_hexagon_session *sess, size_t batch_size) {
ggml_hexagon_opbatch(ggml_hexagon_session *sess, size_t batch_size, size_t max_vmem) {
this->sess = sess;
n_bufs_max = HTP_OP_MAX_BUFS;
n_ops_max = batch_size;
n_tens_max = n_ops_max + n_ops_max * HTP_OP_MAX_INPUTS;
b_vmem_max = HTP_OP_MAX_VMEM;
b_vmem_max = max_vmem;
ops.resize(n_ops_max);
@@ -1592,6 +1591,9 @@ struct ggml_hexagon_opbatch {
t_map.reserve(n_tens_max);
d_map.reserve(n_tens_max);
GGML_LOG_INFO("ggml-hex: %s op batching: n-bufs %u n-tensors %u n-ops %u vmem %zu\n",
sess->c_name(), n_bufs_max, n_tens_max, n_ops_max, b_vmem_max);
reset();
}
@@ -1925,6 +1927,8 @@ void ggml_hexagon_session::flush_batch() {
// Bump pending flag (cleared in the session::flush once we get the response)
this->op_pending++; // atomic inc
HEX_VERBOSE("ggml-hex: %s queue-opbatch: %p size %u\n", this->c_name(), dbuf.ptr, dbuf.size);
int err = dspqueue_write(this->queue, 0, 1, &dbuf, sizeof(req), (const uint8_t*) &req, DSPQUEUE_TIMEOUT);
if (err != 0) {
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", this->c_name(), (unsigned) err);
@@ -1944,6 +1948,35 @@ void ggml_hexagon_session::flush(bool all) {
flush_pending(all);
}
static size_t ggml_hexagon_measure_max_vmem(ggml_hexagon_session *sess) {
// Allocate a bunch pinned buffers till failure.
// This is kind of expensive but handy for figuring out exactly how much we can mmap on a specific device.
// Typically we're going to allocate all/most of these buffers anyway for the model weights.
std::vector<ggml_hexagon_shared_buffer *> sbufs;
const size_t MiB = 1024 * 1024;
const size_t GiB = MiB * 1024;
size_t vmem = 0;
size_t step = 256u * MiB;
try {
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB;
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB;
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB;
while (1) {
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, step, true));
vmem += step;
}
} catch (...) { }
for (auto b : sbufs) { delete b; }
return vmem - step; // backoff to account for overhead from internal mappings
}
void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
this->valid_session = false;
this->valid_handle = false;
@@ -1957,7 +1990,7 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
this->op_pending = 0;
GGML_LOG_INFO("ggml-hex: allocating new session: %s\n", this->name.c_str());
GGML_LOG_DEBUG("ggml-hex: %s allocating new session\n", this->name.c_str());
domain * my_domain = get_domain(this->domain_id);
if (my_domain == NULL) {
@@ -2033,9 +2066,6 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
this->valid_handle = true;
GGML_LOG_INFO("ggml-hex: new session: %s : session-id %d domain-id %d uri %s handle 0x%lx\n", this->name.c_str(),
this->session_id, this->domain_id, session_uri, (unsigned long) this->handle);
// Enable FastRPC QoS mode
{
struct remote_rpc_control_latency l;
@@ -2047,6 +2077,9 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
}
}
GGML_LOG_INFO("ggml-hex: %s new session : session-id %d domain-id %d uri %s handle 0x%lx\n", this->c_name(),
this->session_id, this->domain_id, session_uri, (unsigned long) this->handle);
const size_t req_q_size = (sizeof(htp_opbatch_req) * opt_opqueue * 2) + 1024;
const size_t rsp_q_size = (sizeof(htp_opbatch_rsp) * opt_opqueue * 2) + 1024;
@@ -2091,13 +2124,19 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
}
// Allocate buffers and state for op batching
this->op_batch = new ggml_hexagon_opbatch(this, opt_opbatch);
this->op_queue = new ggml_hexagon_opqueue(this, opt_opbatch, opt_opqueue);
// Start processing op batch requests
err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx);
if (!opt_vmem) {
opt_vmem = ggml_hexagon_measure_max_vmem(this);
GGML_LOG_INFO("ggml-hex: %s measured max vmem %zu\n", this->c_name(), opt_vmem);
}
this->op_batch = new ggml_hexagon_opbatch(this, opt_opbatch, opt_vmem);
// Start dspqueue/opbatch processing
err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx, opt_vmem);
if (err != 0) {
GGML_LOG_ERROR("ggml-hex: failed to start session: 0x%08x\n", (unsigned) err);
GGML_LOG_ERROR("ggml-hex: %s failed to start session: 0x%08x\n", this->c_name(), (unsigned) err);
throw std::runtime_error("ggml-hex: iface start failed (see log for details)");
}
this->valid_iface = true;
@@ -2108,17 +2147,17 @@ void ggml_hexagon_session::release() noexcept(true) {
int err;
delete this->op_batch;
delete this->op_queue;
// Stop the DSP-side service and close the queue
if (this->valid_iface) {
// Stop dspqueue/opbatch processing
err = htp_iface_stop(this->handle);
if (err != 0) {
GGML_ABORT("ggml-hex: htp_iface_stop failed: 0x%08x\n", (unsigned) err);
}
}
delete this->op_batch;
delete this->op_queue;
if (opt_etm) {
err = htp_iface_etm(this->handle, 0);
if (err != 0) {
@@ -3380,21 +3419,6 @@ struct ggml_hexagon_registry {
ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
GGML_LOG_INFO("ggml-hex: Hexagon backend (experimental) : allocating new registry : ndev %zu\n", opt_ndev);
if (!opt_arch) {
int err = get_hex_arch_ver(CDSP_DOMAIN_ID, &opt_arch);
if (err != 0) {
GGML_LOG_ERROR("ggml-hex: failed to query HTP version (err %d) defaulting to v73\n", err);
opt_arch = 73;
}
}
#if defined(__ANDROID__)
if (opt_arch < 75) {
opt_ndev = 1;
GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
}
#endif
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
// Create devices / sessions
@@ -3480,32 +3504,67 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
static_assert((unsigned int) HTP_TYPE_IQ4_NL == (unsigned int) GGML_TYPE_IQ4_NL,
"please update hexagon_type to match ggml_type");
const char * str_verbose = getenv("GGML_HEXAGON_VERBOSE");
const char * str_hostbuf = getenv("GGML_HEXAGON_HOSTBUF");
const char * str_opstage = getenv("GGML_HEXAGON_OPSTAGE");
const char * str_opbatch = getenv("GGML_HEXAGON_OPBATCH");
const char * str_opqueue = getenv("GGML_HEXAGON_OPQUEUE");
const char * str_opfilter= getenv("GGML_HEXAGON_OPFILTER");
const char * str_profile = getenv("GGML_HEXAGON_PROFILE");
const char * str_etm = getenv("GGML_HEXAGON_ETM");
const char * str_nhvx = getenv("GGML_HEXAGON_NHVX");
const char * str_use_hmx = getenv("GGML_HEXAGON_USE_HMX");
const char * str_ndev = getenv("GGML_HEXAGON_NDEV");
const char * str_arch = getenv("GGML_HEXAGON_ARCH");
const char * str_verbose = getenv("GGML_HEXAGON_VERBOSE");
const char * str_hostbuf = getenv("GGML_HEXAGON_HOSTBUF");
const char * str_opstage = getenv("GGML_HEXAGON_OPSTAGE");
const char * str_opbatch = getenv("GGML_HEXAGON_OPBATCH");
const char * str_opqueue = getenv("GGML_HEXAGON_OPQUEUE");
const char * str_opfilter = getenv("GGML_HEXAGON_OPFILTER");
const char * str_profile = getenv("GGML_HEXAGON_PROFILE");
const char * str_etm = getenv("GGML_HEXAGON_ETM");
const char * str_nhvx = getenv("GGML_HEXAGON_NHVX");
const char * str_use_hmx = getenv("GGML_HEXAGON_USE_HMX");
const char * str_ndev = getenv("GGML_HEXAGON_NDEV");
const char * str_arch = getenv("GGML_HEXAGON_ARCH");
const char * str_vmem = getenv("GGML_HEXAGON_VMEM");
const char * str_mbuf = getenv("GGML_HEXAGON_MBUF");
// Init Arch first since it affects other defaults
if (!str_arch) {
int err = get_hex_arch_ver(CDSP_DOMAIN_ID, &opt_arch);
if (err != 0) {
GGML_LOG_ERROR("ggml-hex: failed to query HTP version (err %d) defaulting to v73\n", err);
opt_arch = 73;
}
} else {
if (str_arch[0] == 'v' || str_arch[0] == 'V') {
str_arch++;
}
opt_arch = strtoul(str_arch, NULL, 0);
}
size_t MiB = 1024 * 1024;
// Update vmem default
opt_vmem = opt_arch >= 75 ? HTP_OP_MAX_VMEM_DEFAULT : 3000 * MiB;
auto RE_ICASE = std::regex_constants::icase;
opt_opfilter = str_opfilter ? new std::regex(str_opfilter, RE_ICASE) : NULL;
opt_verbose = str_verbose ? atoi(str_verbose) : 0;
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
opt_opstage = str_opstage ? strtoul(str_opstage, NULL, 0) : opt_opstage;
opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch;
opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue;
opt_etm = str_etm ? atoi(str_etm) : 0;
opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx;
opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx;
opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev;
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
opt_opfilter = str_opfilter ? new std::regex(str_opfilter, RE_ICASE) : NULL;
opt_verbose = str_verbose ? atoi(str_verbose) : 0;
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
opt_opstage = str_opstage ? strtoul(str_opstage, NULL, 0) : opt_opstage;
opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch;
opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue;
opt_profile = str_profile ? atoi(str_profile) : 0;
opt_etm = str_etm ? atoi(str_etm) : 0;
opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx;
opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx;
opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev;
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
opt_mbuf = str_mbuf ? strtoul(str_mbuf, NULL, 0) * MiB : opt_mbuf;
opt_vmem = str_vmem ? strtoul(str_vmem, NULL, 0) * MiB : opt_vmem;
if (opt_ndev > GGML_HEXAGON_MAX_SESSIONS) {
opt_ndev = GGML_HEXAGON_MAX_SESSIONS;
}
#if defined(__ANDROID__)
if (opt_arch < 75) {
opt_ndev = 1;
GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
}
#endif
if (str_profile) {
opt_pmu_evt = [&]() -> std::vector<uint32_t> {
@@ -3520,17 +3579,6 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
vec_to_str<uint32_t, 16>(opt_pmu_evt).c_str());
}
if (opt_ndev > GGML_HEXAGON_MAX_SESSIONS) {
opt_ndev = GGML_HEXAGON_MAX_SESSIONS;
}
if (str_arch) {
if (str_arch[0] == 'v') {
str_arch++;
}
opt_arch = strtoul(str_arch, NULL, 0);
}
reg->context = new ggml_hexagon_registry(reg);
}

View File

@@ -20,7 +20,7 @@ struct htp_mmap {
uint64_t size;
uint64_t base;
uint32_t fd;
uint32_t pinned;
uint32_t reserved;
};
// Scratchpad state
@@ -77,6 +77,8 @@ struct htp_context {
atomic_bool vtcm_valid;
atomic_bool vtcm_needs_release;
uint64_t max_vmem;
struct htp_ops_context octx;
#ifdef HTP_HAS_HMX

View File

@@ -90,15 +90,11 @@ enum htp_op_code {
#define HTP_OP_MAX_INPUTS 6 // aka GGML_MAX_SRCS
#define HTP_OP_MAX_PARAMS 16 // aka GGML_MAX_OP_PARAMS
#define HTP_OP_MAX_BUFS 8
#define HTP_OP_MAX_BUFS 16
#define HTP_OP_MAX_REQS 256
#define HTP_OP_MAX_TENSORS (HTP_OP_MAX_REQS * HTP_OP_MAX_INPUTS + HTP_OP_MAX_REQS)
#if __HVX_ARCH__ < 75
#define HTP_OP_MAX_VMEM (3167538380u)
#else
#define HTP_OP_MAX_VMEM (3221225472u)
#endif
#define HTP_OP_MAX_VMEM_DEFAULT (3355443200u)
#define HTP_MMAP_MAX_VMEM (2147483648u)

View File

@@ -11,9 +11,9 @@ struct htp_iface_pmu_conf {
};
interface htp_iface : remote_handle64 {
AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx);
AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx, in uint64 max_vmem);
AEEResult stop();
AEEResult mmap(in uint32 fd, in uint32 size, in uint32 pinned);
AEEResult mmap(in uint32 fd, in uint32 size);
AEEResult munmap(in uint32 fd);
AEEResult profiler(in uint32 mode, in htp_iface_pmu_conf pmu);
AEEResult etm(in uint32 enable);

View File

@@ -210,7 +210,7 @@ AEEResult htp_iface_close(remote_handle64 handle) {
return AEE_SUCCESS;
}
AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32 pinned) {
AEEResult htp_iface_mmap(remote_handle64 handle, uint32_t fd, uint32_t size) {
struct htp_context * ctx = (struct htp_context *) handle;
if (!ctx) {
return AEE_EBADPARM;
@@ -220,7 +220,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) {
struct htp_mmap *m = &ctx->mmap[i];
if (m->fd == fd) {
m->pinned = pinned;
return AEE_SUCCESS;
}
}
@@ -229,7 +228,7 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) {
struct htp_mmap *m = &ctx->mmap[i];
if (!m->size) {
FARF(HIGH, "mmap : fd %u size %u pinned %u", fd, size, pinned);
FARF(HIGH, "mmap : fd %u size %u", fd, size);
#if __HVX_ARCH__ > 73
void *va = HAP_mmap2(NULL, size, HAP_PROT_READ | HAP_PROT_WRITE, 0, fd, 0);
#else
@@ -248,7 +247,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
m->base = (uint64_t) va;
m->fd = fd;
m->size = size;
m->pinned = pinned;
return AEE_SUCCESS;
}
@@ -275,7 +273,6 @@ AEEResult htp_iface_munmap(remote_handle64 handle, uint32 fd) {
m->size = 0;
m->base = NULL;
m->fd = -1;
m->pinned = 0;
}
}
@@ -358,7 +355,7 @@ static void vtcm_free(struct htp_context * ctx) {
static void htp_packet_callback(dspqueue_t queue, int error, void * context);
static void htp_error_callback(dspqueue_t queue, int error, void * context);
AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx) {
AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx, uint64_t max_vmem) {
struct htp_context * ctx = (struct htp_context *) handle;
if (!ctx) {
@@ -376,12 +373,12 @@ AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_que
htp_error_callback, // Error callback; no errors expected on the DSP
(void *) ctx, // Callback context
&ctx->queue);
if (err) {
FARF(ERROR, "Queue import failed with 0x%08x", (unsigned) err);
return err;
}
ctx->max_vmem = max_vmem;
ctx->thread_id = qurt_thread_get_id();
ctx->thread_prio = qurt_thread_get_priority(ctx->thread_id);
@@ -622,8 +619,8 @@ static inline bool reuse_buf(struct htp_context *ctx, uint32_t *m_reuse, struct
}
static inline void drop_mmap(struct htp_context *ctx, struct htp_mmap *m) {
if (m->size && !m->pinned) {
FARF(HIGH, "unmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned);
if (m->size) {
FARF(HIGH, "unmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size);
#if __HVX_ARCH__ > 73
HAP_munmap2((void *) m->base, m->size);
#else
@@ -660,9 +657,8 @@ static inline void mmap_buf(struct htp_context *ctx, struct htp_buf_desc *b) {
m->base = b->base = (uint64_t) va;
m->fd = b->fd;
m->size = b->size;
m->pinned = 0;
FARF(HIGH, "mmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned);
FARF(HIGH, "mmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size);
return;
}
}
@@ -672,8 +668,8 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin
uint32_t m_reuse = 0; // mmap reuse mask (index from ctx->mmap array)
uint32_t b_reuse = 0; // buf reuse count
size_t m_vmem = 0; // mapped vmem
size_t e_vmem = 0; // extra vmem
uint64_t m_vmem = 0; // mapped vmem
uint64_t e_vmem = 0; // extra vmem
// See what we can reuse
for (uint32_t i=0; i < n_bufs; i++) {
@@ -687,9 +683,10 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin
// See how much vmem we have mmaped right now
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) { m_vmem += ctx->mmap[i].size; }
FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu n-bufs %u b-reuse %u", m_vmem, e_vmem, n_bufs, b_reuse);
FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu max-vmem %zu : n-bufs %u b-reuse %u",
(size_t) m_vmem, (size_t) e_vmem, (size_t) ctx->max_vmem, n_bufs, b_reuse);
if ((m_vmem + e_vmem) > HTP_OP_MAX_VMEM) {
if ((m_vmem + e_vmem) > ctx->max_vmem) {
// Drop unused mappings
for (uint32_t i=0; i < HTP_MAX_MMAPS; i++) {
bool used = m_reuse & (1<<i);

View File

@@ -494,9 +494,10 @@ struct ggml_webgpu_unary_pipeline_key_hash {
/** FlashAttention */
enum ggml_webgpu_flash_attn_path : uint32_t {
GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX = 0u,
GGML_WEBGPU_FLASH_ATTN_PATH_TILE = 1u,
GGML_WEBGPU_FLASH_ATTN_PATH_VEC = 2u,
GGML_WEBGPU_FLASH_ATTN_PATH_NONE = 0u,
GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX = 1u,
GGML_WEBGPU_FLASH_ATTN_PATH_TILE = 2u,
GGML_WEBGPU_FLASH_ATTN_PATH_VEC = 3u,
};
struct ggml_webgpu_flash_attn_pipeline_key {
@@ -534,7 +535,7 @@ struct ggml_webgpu_flash_attn_pipeline_key_hash {
};
struct ggml_webgpu_flash_attn_decisions {
uint32_t path = GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX;
uint32_t path = GGML_WEBGPU_FLASH_ATTN_PATH_NONE;
uint32_t q_tile = 0;
uint32_t kv_tile = 0;
uint32_t wg_size = 0;
@@ -709,19 +710,29 @@ inline ggml_webgpu_flash_attn_decisions ggml_webgpu_flash_attn_get_decisions(
(context.src0->ne[0] % GGML_WEBGPU_FLASH_ATTN_TILE_KV_VEC_WIDTH == 0) &&
(context.src2->ne[0] % GGML_WEBGPU_FLASH_ATTN_TILE_KV_VEC_WIDTH == 0) && !use_vec;
decisions.path = use_vec ? GGML_WEBGPU_FLASH_ATTN_PATH_VEC :
use_tile ? GGML_WEBGPU_FLASH_ATTN_PATH_TILE :
GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX;
decisions.path = use_vec ? GGML_WEBGPU_FLASH_ATTN_PATH_VEC :
use_tile ? GGML_WEBGPU_FLASH_ATTN_PATH_TILE :
context.supports_subgroup_matrix ? GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX :
GGML_WEBGPU_FLASH_ATTN_PATH_NONE;
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_NONE) {
return decisions;
}
const ggml_webgpu_flash_attn_pipeline_key key = ggml_webgpu_flash_attn_make_pipeline_key(context, decisions.path);
decisions.kv_direct = key.kv_direct;
const uint32_t max_kv_tile = ggml_webgpu_flash_attn_max_kv_tile(context, key);
// invalidate if even the smallest kv_tile doesn't fit in shared memory
if (max_kv_tile == 0) {
decisions.path = GGML_WEBGPU_FLASH_ATTN_PATH_NONE;
return decisions;
}
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_VEC) {
const uint32_t min_kv_tile = ggml_webgpu_flash_attn_max_kv_tile(context, key);
decisions.q_tile = 1u;
decisions.kv_tile = std::max(8u, std::min(32u, min_kv_tile));
decisions.kv_tile = (decisions.kv_tile / 8u) * 8u;
decisions.wg_size = std::max(1u, std::min<uint32_t>(32u, context.max_subgroup_size));
decisions.q_tile = 1u;
decisions.kv_tile = std::max(8u, std::min(32u, max_kv_tile));
decisions.kv_tile = (decisions.kv_tile / 8u) * 8u;
decisions.wg_size = std::max(1u, std::min<uint32_t>(32u, context.max_subgroup_size));
if (decisions.kv_direct) {
decisions.kv_tile = std::min(decisions.kv_tile, GGML_WEBGPU_KV_SEQ_PAD);
while (GGML_WEBGPU_KV_SEQ_PAD % decisions.kv_tile != 0) {
@@ -734,9 +745,8 @@ inline ggml_webgpu_flash_attn_decisions ggml_webgpu_flash_attn_get_decisions(
decisions.q_tile =
decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_TILE ? GGML_WEBGPU_FLASH_ATTN_TILE_Q_TILE : context.sg_mat_m;
decisions.kv_tile = decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_TILE ?
std::min(64u, ggml_webgpu_flash_attn_max_kv_tile(context, key)) :
std::min(ggml_webgpu_flash_attn_max_kv_tile(context, key),
context.sg_mat_n * GGML_WEBGPU_FLASH_ATTN_PREFERRED_KV_SG_TILES);
std::min(64u, max_kv_tile) :
std::min(max_kv_tile, context.sg_mat_n * GGML_WEBGPU_FLASH_ATTN_PREFERRED_KV_SG_TILES);
decisions.wg_size = decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_TILE ?
GGML_WEBGPU_FLASH_ATTN_PREFERRED_WG_SIZE :
std::max(context.max_subgroup_size, GGML_WEBGPU_FLASH_ATTN_PREFERRED_WG_SIZE);
@@ -755,7 +765,6 @@ inline ggml_webgpu_flash_attn_decisions ggml_webgpu_flash_attn_get_decisions(
context.sg_mat_n;
}
}
return decisions;
}
@@ -1364,7 +1373,7 @@ class ggml_webgpu_shader_lib {
if (key.src_type == GGML_TYPE_Q1_0) {
defines.push_back("BLOCK_SIZE=128u");
} else if ((key.src_type >= GGML_TYPE_Q4_0 && key.src_type <= GGML_TYPE_Q8_1) ||
key.src_type == GGML_TYPE_IQ4_NL) {
key.src_type == GGML_TYPE_IQ4_NL) {
defines.push_back("BLOCK_SIZE=32u");
} else if (key.src_type >= GGML_TYPE_Q2_K) {
defines.push_back("BLOCK_SIZE=256u");
@@ -2325,6 +2334,7 @@ class ggml_webgpu_shader_lib {
size_t storage_offset_alignment) {
const ggml_webgpu_flash_attn_decisions decisions =
ggml_webgpu_flash_attn_get_decisions(context, storage_offset_alignment);
GGML_ASSERT(decisions.path != GGML_WEBGPU_FLASH_ATTN_PATH_NONE);
ggml_webgpu_flash_attn_pipeline_key key = ggml_webgpu_flash_attn_make_pipeline_key(context, decisions.path);
auto it = flash_attn_pipelines.find(key);
if (it != flash_attn_pipelines.end()) {

View File

@@ -3918,6 +3918,10 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const
shader_lib_ctx, ctx->webgpu_global_ctx->capabilities.limits.minStorageBufferOffsetAlignment);
const size_t limit_bytes = ctx->webgpu_global_ctx->capabilities.limits.maxComputeWorkgroupStorageSize;
const bool has_mask = op->src[3] != nullptr;
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_NONE) {
supports_op = false;
break;
}
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_VEC) {
const size_t min_bytes =
ggml_webgpu_flash_attn_wg_mem_bytes(decisions.q_tile, decisions.kv_tile, (uint32_t) src0->ne[0],

View File

@@ -54,13 +54,23 @@ opqueue=
opflt=
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
vmem=
[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB"
vmem=
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \

View File

@@ -54,13 +54,19 @@ opqueue=
opflt=
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
vmem=
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \

View File

@@ -1 +1 @@
1c40d85a4dcfcd62176f649b8682433bb1a6caef
387fa29fbbf3149f06a631c7850b6c35c24b0232

View File

@@ -3579,6 +3579,49 @@ struct test_ssm_conv : public test_case {
}
};
// GGML_OP_SSM_CONV + GGML_OP_ADD (channel-wise bias, optional) + GGML_OP_UNARY(SILU) (fused operation)
struct test_ssm_conv_bias_silu : public test_case {
const ggml_type type;
const std::array<int64_t, 4> ne_a;
const std::array<int64_t, 4> ne_b;
const bool fuse_bias;
std::string op_desc(ggml_tensor * t) override {
GGML_UNUSED(t);
return "SSM_CONV_BIAS_SILU";
}
bool run_whole_graph() override { return true; }
std::string vars() override {
return VARS_TO_STR4(type, ne_a, ne_b, fuse_bias);
}
test_ssm_conv_bias_silu(ggml_type type, std::array<int64_t, 4> ne_a, std::array<int64_t, 4> ne_b,
bool fuse_bias)
: type(type), ne_a(ne_a), ne_b(ne_b), fuse_bias(fuse_bias) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
ggml_set_name(a, "a");
ggml_set_name(b, "b");
ggml_tensor * out = ggml_ssm_conv(ctx, a, b);
if (fuse_bias) {
ggml_tensor * bias = ggml_new_tensor_1d(ctx, type, out->ne[0]);
ggml_set_name(bias, "bias");
out = ggml_add(ctx, out, bias);
}
out = ggml_silu(ctx, out);
ggml_set_name(out, "out");
return out;
}
};
// GGML_OP_SSM_SCAN
struct test_ssm_scan : public test_case {
const ggml_type type;
@@ -7977,6 +8020,27 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
}
}
// fused ssm_conv + (optional) bias_add + silu. The bias-only graph (no silu) is intentionally
// not tested since there's no fusion for that pattern in ggml_cuda_can_fuse.
for (int64_t d_conv : {3, 4, 9}) {
for (int64_t d_inner : {1024, 1536, 2048}) {
for (bool fuse_bias : {false, true}) {
// short token path (n_t <= 32)
test_cases.emplace_back(new test_ssm_conv_bias_silu(
GGML_TYPE_F32, {d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
test_cases.emplace_back(new test_ssm_conv_bias_silu(
GGML_TYPE_F32, {2 * d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
test_cases.emplace_back(new test_ssm_conv_bias_silu(
GGML_TYPE_F32, {d_conv, d_inner, 4, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
// long token path (n_t > 32)
test_cases.emplace_back(new test_ssm_conv_bias_silu(
GGML_TYPE_F32, {d_conv - 1 + 64, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
test_cases.emplace_back(new test_ssm_conv_bias_silu(
GGML_TYPE_F32, {d_conv - 1 + 64, d_inner, 4, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
}
}
}
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 16, 1, 1024, 1, 32, 4)); // Mamba-1
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 16, 2, 32, 4)); // Mamba-2
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 256, 64, 8, 2, 32, 4)); // Falcon-H1
@@ -8993,6 +9057,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
// Examples from granite-4.0-h-1b/ggml-model-Q8_0.gguf
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {515, 3328, 1, 1}, {4, 3328, 1, 1})); // prefill
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 3328, 1, 1}, {4, 3328, 1, 1})); // generate
test_cases.emplace_back(new test_ssm_conv_bias_silu(GGML_TYPE_F32, {515, 3328, 1, 1}, {4, 3328, 1, 1}, true)); // prefill
test_cases.emplace_back(new test_ssm_conv_bias_silu(GGML_TYPE_F32, {4, 3328, 1, 1}, {4, 3328, 1, 1}, true)); // generate
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1, 1)); // generate

View File

@@ -5151,38 +5151,39 @@ nt",variants:{variant:{default:"bg-card text-card-foreground",destructive:"text-
attribute_effect(div,$0=>({"data-slot":"alert",class:$0,...restProps,role:"alert"}),[()=>cn$1(alertVariants({variant:variant()}),$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}var root$1A=from_html("<div><!></div>");function Alert_description($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy",
"ref","class","children"]);var div=root$1A();attribute_effect(div,$0=>({"data-slot":"alert-description",class:$0,...restProps}),[()=>cn$1("col-start-2 grid justify-items-start gap-1 text-sm text-muted-foreground [&_p]:leading-relaxed",$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}var root$1z=from_html("<div><!></div>");function Alert_title($$anchor,$$props){push$1($$props,!0);
let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","children"]);var div=root$1z();attribute_effect(div,$0=>({"data-slot":"alert-title",class:$0,...restProps}),[()=>cn$1("col-start-2 line-clamp-1 min-h-4 font-medium tracking-tight",$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}class AudioRecorder{mediaRecorder=null;audioChunks=[];stream=null;recordingState=!1;async startRecording(){
try{this.stream=await navigator.mediaDevices.getUserMedia({audio:{echoCancellation:!0,noiseSuppression:!0,autoGainControl:!0}}),this.initializeRecorder(this.stream),this.audioChunks=[],this.mediaRecorder.start(100),this.recordingState=!0}catch(error2){throw console.error("Failed to start recording:",error2),new Error("Failed to access microphone. Please check permissions.")}}async stopRecording(){return new Promise((resolve2,reject)=>{if(!this.mediaRecorder||this.mediaRecorder.state==="inactive"){
reject(new Error("No active recording to stop"));return}this.mediaRecorder.onstop=()=>{const mimeType=this.mediaRecorder?.mimeType||MimeTypeAudio.WAV,audioBlob=new Blob(this.audioChunks,{type:mimeType});this.cleanup(),resolve2(audioBlob)},this.mediaRecorder.onerror=event2=>{console.error("Recording error:",event2),this.cleanup(),reject(new Error("Recording failed"))},this.mediaRecorder.stop()})}isRecording(){return this.recordingState}cancelRecording(){this.mediaRecorder&&this.mediaRecorder.state!==
"inactive"&&this.mediaRecorder.stop(),this.cleanup()}initializeRecorder(stream){const options={};MediaRecorder.isTypeSupported(MimeTypeAudio.WAV)?options.mimeType=MimeTypeAudio.WAV:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM_OPUS)?options.mimeType=MimeTypeAudio.WEBM_OPUS:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM)?options.mimeType=MimeTypeAudio.WEBM:MediaRecorder.isTypeSupported(MimeTypeAudio.MP4)?options.mimeType=MimeTypeAudio.MP4:console.warn("No preferred audio format supported, us\
ing default"),this.mediaRecorder=new MediaRecorder(stream,options),this.mediaRecorder.ondataavailable=event2=>{event2.data.size>0&&this.audioChunks.push(event2.data)},this.mediaRecorder.onstop=()=>{this.recordingState=!1},this.mediaRecorder.onerror=event2=>{console.error("MediaRecorder error:",event2),this.recordingState=!1}}cleanup(){if(this.stream){for(const track2 of this.stream.getTracks())track2.stop();this.stream=null}this.mediaRecorder=null,this.audioChunks=[],this.recordingState=!1}}async function convertToWav(audioBlob){
try{if(audioBlob.type.includes("wav"))return audioBlob;const arrayBuffer=await audioBlob.arrayBuffer(),audioContext=new(window.AudioContext||window.webkitAudioContext),audioBuffer=await audioContext.decodeAudioData(arrayBuffer),wavBlob=audioBufferToWav(audioBuffer);return audioContext.close(),wavBlob}catch(error2){return console.error("Failed to convert audio to WAV:",error2),audioBlob}}function audioBufferToWav(buffer){const length=buffer.length,numberOfChannels=buffer.numberOfChannels,sampleRate=buffer.
sampleRate,blockAlign=numberOfChannels*2,byteRate=sampleRate*blockAlign,dataSize=length*blockAlign,bufferSize=44+dataSize,arrayBuffer=new ArrayBuffer(bufferSize),view=new DataView(arrayBuffer),writeString=(offset22,string2)=>{for(let i=0;i<string2.length;i++)view.setUint8(offset22+i,string2.charCodeAt(i))};writeString(0,"RIFF"),view.setUint32(4,bufferSize-8,!0),writeString(8,"WAVE"),writeString(12,"fmt "),view.setUint32(16,16,!0),view.setUint16(20,1,!0),view.setUint16(22,numberOfChannels,!0),view.
setUint32(24,sampleRate,!0),view.setUint32(28,byteRate,!0),view.setUint16(32,blockAlign,!0),view.setUint16(34,16,!0),writeString(36,"data"),view.setUint32(40,dataSize,!0);let offset2=44;for(let i=0;i<length;i++)for(let channel=0;channel<numberOfChannels;channel++){const sample=Math.max(-1,Math.min(1,buffer.getChannelData(channel)[i]));view.setInt16(offset2,sample*32767,!0),offset2+=2}return new Blob([arrayBuffer],{type:MimeTypeAudio.WAV})}function createAudioFile(audioBlob,filename){const timestamp=new Date().
toISOString().replace(/[:.]/g,"-"),extension2=audioBlob.type.includes("wav")?"wav":"mp3",defaultFilename=`recording-${timestamp}.${extension2}`;return new File([audioBlob],defaultFilename,{type:audioBlob.type,lastModified:Date.now()})}function isAudioRecordingSupported(){return!!(typeof navigator<"u"&&navigator.mediaDevices&&typeof navigator.mediaDevices.getUserMedia=="function"&&typeof window<"u"&&window.MediaRecorder)}const isNodeJS=typeof process=="object"&&process+""=="[object process]"&&!process.
versions.nw&&!(process.versions.electron&&process.type&&process.type!=="browser"),FONT_IDENTITY_MATRIX=[.001,0,0,.001,0,0],LINE_FACTOR=1.35,RenderingIntentFlag={ANY:1,DISPLAY:2,PRINT:4,ANNOTATIONS_FORMS:16,ANNOTATIONS_STORAGE:32,ANNOTATIONS_DISABLE:64,IS_EDITING:128,OPLIST:256},AnnotationMode={DISABLE:0,ENABLE:1,ENABLE_FORMS:2,ENABLE_STORAGE:3},AnnotationEditorPrefix="pdfjs_internal_editor_",AnnotationEditorType={DISABLE:-1,NONE:0,FREETEXT:3,HIGHLIGHT:9,STAMP:13,INK:15,SIGNATURE:101,COMMENT:102},
AnnotationEditorParamsType={RESIZE:1,CREATE:2,FREETEXT_SIZE:11,FREETEXT_COLOR:12,FREETEXT_OPACITY:13,INK_COLOR:21,INK_THICKNESS:22,INK_OPACITY:23,HIGHLIGHT_COLOR:31,HIGHLIGHT_THICKNESS:32,HIGHLIGHT_FREE:33,HIGHLIGHT_SHOW_ALL:34,DRAW_STEP:41},PermissionFlag={PRINT:4,MODIFY_CONTENTS:8,COPY:16,MODIFY_ANNOTATIONS:32,FILL_INTERACTIVE_FORMS:256,COPY_FOR_ACCESSIBILITY:512,ASSEMBLE:1024,PRINT_HIGH_QUALITY:2048},TextRenderingMode={FILL:0,STROKE:1,FILL_STROKE:2,INVISIBLE:3,FILL_STROKE_MASK:3,ADD_TO_PATH_FLAG:4},
util_ImageKind={GRAYSCALE_1BPP:1,RGB_24BPP:2,RGBA_32BPP:3},AnnotationType={TEXT:1,LINK:2,FREETEXT:3,LINE:4,SQUARE:5,CIRCLE:6,POLYGON:7,POLYLINE:8,HIGHLIGHT:9,UNDERLINE:10,SQUIGGLY:11,STRIKEOUT:12,STAMP:13,CARET:14,INK:15,POPUP:16,FILEATTACHMENT:17,SOUND:18,MOVIE:19,WIDGET:20,SCREEN:21,PRINTERMARK:22,TRAPNET:23,WATERMARK:24,THREED:25,REDACT:26},AnnotationBorderStyleType={SOLID:1,DASHED:2,BEVELED:3,INSET:4,UNDERLINE:5},VerbosityLevel={ERRORS:0,WARNINGS:1,INFOS:5},OPS={dependency:1,setLineWidth:2,setLineCap:3,
setLineJoin:4,setMiterLimit:5,setDash:6,setRenderingIntent:7,setFlatness:8,setGState:9,save:10,restore:11,transform:12,moveTo:13,lineTo:14,curveTo:15,curveTo2:16,curveTo3:17,closePath:18,rectangle:19,stroke:20,closeStroke:21,fill:22,eoFill:23,fillStroke:24,eoFillStroke:25,closeFillStroke:26,closeEOFillStroke:27,endPath:28,clip:29,eoClip:30,beginText:31,endText:32,setCharSpacing:33,setWordSpacing:34,setHScale:35,setLeading:36,setFont:37,setTextRenderingMode:38,setTextRise:39,moveText:40,setLeadingMoveText:41,
setTextMatrix:42,nextLine:43,showText:44,showSpacedText:45,nextLineShowText:46,nextLineSetSpacingShowText:47,setCharWidth:48,setCharWidthAndBounds:49,setStrokeColorSpace:50,setFillColorSpace:51,setStrokeColor:52,setStrokeColorN:53,setFillColor:54,setFillColorN:55,setStrokeGray:56,setFillGray:57,setStrokeRGBColor:58,setFillRGBColor:59,setStrokeCMYKColor:60,setFillCMYKColor:61,shadingFill:62,beginInlineImage:63,beginImageData:64,endInlineImage:65,paintXObject:66,markPoint:67,markPointProps:68,beginMarkedContent:69,
beginMarkedContentProps:70,endMarkedContent:71,beginCompat:72,endCompat:73,paintFormXObjectBegin:74,paintFormXObjectEnd:75,beginGroup:76,endGroup:77,beginAnnotation:80,endAnnotation:81,paintImageMaskXObject:83,paintImageMaskXObjectGroup:84,paintImageXObject:85,paintInlineImageXObject:86,paintInlineImageXObjectGroup:87,paintImageXObjectRepeat:88,paintImageMaskXObjectRepeat:89,paintSolidColorImageMask:90,constructPath:91,setStrokeTransparent:92,setFillTransparent:93,rawFillPath:94},DrawOPS={moveTo:0,
lineTo:1,curveTo:2,closePath:3},PasswordResponses={NEED_PASSWORD:1,INCORRECT_PASSWORD:2};let verbosity=VerbosityLevel.WARNINGS;function setVerbosityLevel(level){Number.isInteger(level)&&(verbosity=level)}function getVerbosityLevel(){return verbosity}function info(msg){verbosity>=VerbosityLevel.INFOS&&console.log(`Info: ${msg}`)}function warn(msg){verbosity>=VerbosityLevel.WARNINGS&&console.log(`Warning: ${msg}`)}function unreachable(msg){throw new Error(msg)}function assert$1(cond,msg){cond||unreachable(
msg)}function _isValidProtocol(url2){switch(url2?.protocol){case"http:":case"https:":case"ftp:":case"mailto:":case"tel:":return!0;default:return!1}}function createValidAbsoluteUrl(url2,baseUrl=null,options=null){if(!url2)return null;if(options&&typeof url2=="string"&&(options.addDefaultProtocol&&url2.startsWith("www.")&&url2.match(/\./g)?.length>=2&&(url2=`http://${url2}`),options.tryConvertEncoding))try{url2=stringToUTF8String(url2)}catch{}const absoluteUrl=baseUrl?URL.parse(url2,baseUrl):URL.parse(
url2);return _isValidProtocol(absoluteUrl)?absoluteUrl:null}function updateUrlHash(url2,hash2,allowRel=!1){const res=URL.parse(url2);return res?(res.hash=hash2,res.href):allowRel&&createValidAbsoluteUrl(url2,"http://example.com")?url2.split("#",1)[0]+`${hash2?`#${hash2}`:""}`:""}function shadow(obj,prop2,value,nonSerializable=!1){return Object.defineProperty(obj,prop2,{value,enumerable:!nonSerializable,configurable:!0,writable:!1}),value}const BaseException=(function(){function BaseException2(message,name){
this.message=message,this.name=name}return BaseException2.prototype=new Error,BaseException2.constructor=BaseException2,BaseException2})();class PasswordException extends BaseException{constructor(msg,code2){super(msg,"PasswordException"),this.code=code2}}class UnknownErrorException extends BaseException{constructor(msg,details){super(msg,"UnknownErrorException"),this.details=details}}class InvalidPDFException extends BaseException{constructor(msg){super(msg,"InvalidPDFException")}}class ResponseException extends BaseException{constructor(msg,status,missing){
super(msg,"ResponseException"),this.status=status,this.missing=missing}}class FormatError extends BaseException{constructor(msg){super(msg,"FormatError")}}class AbortException extends BaseException{constructor(msg){super(msg,"AbortException")}}function bytesToString(bytes){(typeof bytes!="object"||bytes?.length===void 0)&&unreachable("Invalid argument for bytesToString");const length=bytes.length,MAX_ARGUMENT_COUNT=8192;if(length<MAX_ARGUMENT_COUNT)return String.fromCharCode.apply(null,bytes);const strBuf=[];
for(let i=0;i<length;i+=MAX_ARGUMENT_COUNT){const chunkEnd=Math.min(i+MAX_ARGUMENT_COUNT,length),chunk=bytes.subarray(i,chunkEnd);strBuf.push(String.fromCharCode.apply(null,chunk))}return strBuf.join("")}function stringToBytes(str){typeof str!="string"&&unreachable("Invalid argument for stringToBytes");const length=str.length,bytes=new Uint8Array(length);for(let i=0;i<length;++i)bytes[i]=str.charCodeAt(i)&255;return bytes}function string32(value){return String.fromCharCode(value>>24&255,value>>16&
255,value>>8&255,value&255)}function isLittleEndian(){const buffer8=new Uint8Array(4);return buffer8[0]=1,new Uint32Array(buffer8.buffer,0,1)[0]===1}function isEvalSupported(){try{return new Function(""),!0}catch{return!1}}class util_FeatureTest{static get isLittleEndian(){return shadow(this,"isLittleEndian",isLittleEndian())}static get isEvalSupported(){return shadow(this,"isEvalSupported",isEvalSupported())}static get isOffscreenCanvasSupported(){return shadow(this,"isOffscreenCanvasSupported",
typeof OffscreenCanvas<"u")}static get isImageDecoderSupported(){return shadow(this,"isImageDecoderSupported",typeof ImageDecoder<"u")}static get platform(){const{platform:platform2,userAgent}=navigator;return shadow(this,"platform",{isAndroid:userAgent.includes("Android"),isLinux:platform2.includes("Linux"),isMac:platform2.includes("Mac"),isWindows:platform2.includes("Win"),isFirefox:userAgent.includes("Firefox")})}static get isCSSRoundSupported(){return shadow(this,"isCSSRoundSupported",globalThis.
CSS?.supports?.("width: round(1.5px, 1px)"))}}const hexNumbers=Array.from(Array(256).keys(),n=>n.toString(16).padStart(2,"0"));class Util{static makeHexColor(r2,g,b){return`#${hexNumbers[r2]}${hexNumbers[g]}${hexNumbers[b]}`}static scaleMinMax(transform2,minMax){let temp;transform2[0]?(transform2[0]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[0],minMax[2]*=transform2[0],transform2[3]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[3],
minMax[3]*=transform2[3]):(temp=minMax[0],minMax[0]=minMax[1],minMax[1]=temp,temp=minMax[2],minMax[2]=minMax[3],minMax[3]=temp,transform2[1]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[1],minMax[3]*=transform2[1],transform2[2]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[2],minMax[2]*=transform2[2]),minMax[0]+=transform2[4],minMax[1]+=transform2[5],minMax[2]+=transform2[4],minMax[3]+=transform2[5]}static transform(m1,m2){return[m1[0]*
m2[0]+m1[2]*m2[1],m1[1]*m2[0]+m1[3]*m2[1],m1[0]*m2[2]+m1[2]*m2[3],m1[1]*m2[2]+m1[3]*m2[3],m1[0]*m2[4]+m1[2]*m2[5]+m1[4],m1[1]*m2[4]+m1[3]*m2[5]+m1[5]]}static applyTransform(p2,m,pos=0){const p0=p2[pos],p1=p2[pos+1];p2[pos]=p0*m[0]+p1*m[2]+m[4],p2[pos+1]=p0*m[1]+p1*m[3]+m[5]}static applyTransformToBezier(p2,transform2,pos=0){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5];for(let i=0;i<6;i+=2){const pI=p2[pos+i],pI1=p2[pos+i+1];p2[pos+i]=
pI*m0+pI1*m2+m4,p2[pos+i+1]=pI*m1+pI1*m3+m5}}static applyInverseTransform(p2,m){const p0=p2[0],p1=p2[1],d2=m[0]*m[3]-m[1]*m[2];p2[0]=(p0*m[3]-p1*m[2]+m[2]*m[5]-m[4]*m[3])/d2,p2[1]=(-p0*m[1]+p1*m[0]+m[4]*m[1]-m[5]*m[0])/d2}static axialAlignedBoundingBox(rect,transform2,output){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5],r0=rect[0],r1=rect[1],r2=rect[2],r3=rect[3];let a0=m0*r0+m4,a2=a0,a1=m0*r2+m4,a3=a1,b0=m3*r1+m5,b2=b0,b1=m3*r3+m5,b3=b1;
if(m1!==0||m2!==0){const m1r0=m1*r0,m1r2=m1*r2,m2r1=m2*r1,m2r3=m2*r3;a0+=m2r1,a3+=m2r1,a1+=m2r3,a2+=m2r3,b0+=m1r0,b3+=m1r0,b1+=m1r2,b2+=m1r2}output[0]=Math.min(output[0],a0,a1,a2,a3),output[1]=Math.min(output[1],b0,b1,b2,b3),output[2]=Math.max(output[2],a0,a1,a2,a3),output[3]=Math.max(output[3],b0,b1,b2,b3)}static inverseTransform(m){const d2=m[0]*m[3]-m[1]*m[2];return[m[3]/d2,-m[1]/d2,-m[2]/d2,m[0]/d2,(m[2]*m[5]-m[4]*m[3])/d2,(m[4]*m[1]-m[5]*m[0])/d2]}static singularValueDecompose2dScale(matrix,output){
const m0=matrix[0],m1=matrix[1],m2=matrix[2],m3=matrix[3],a=m0**2+m1**2,b=m0*m2+m1*m3,c2=m2**2+m3**2,first=(a+c2)/2,second=Math.sqrt(first**2-(a*c2-b**2));output[0]=Math.sqrt(first+second||1),output[1]=Math.sqrt(first-second||1)}static normalizeRect(rect){const r2=rect.slice(0);return rect[0]>rect[2]&&(r2[0]=rect[2],r2[2]=rect[0]),rect[1]>rect[3]&&(r2[1]=rect[3],r2[3]=rect[1]),r2}static intersect(rect1,rect2){const xLow=Math.max(Math.min(rect1[0],rect1[2]),Math.min(rect2[0],rect2[2])),xHigh=Math.
min(Math.max(rect1[0],rect1[2]),Math.max(rect2[0],rect2[2]));if(xLow>xHigh)return null;const yLow=Math.max(Math.min(rect1[1],rect1[3]),Math.min(rect2[1],rect2[3])),yHigh=Math.min(Math.max(rect1[1],rect1[3]),Math.max(rect2[1],rect2[3]));return yLow>yHigh?null:[xLow,yLow,xHigh,yHigh]}static pointBoundingBox(x,y,minMax){minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static rectBoundingBox(x0,y0,x1,y1,minMax){minMax[0]=
Math.min(minMax[0],x0,x1),minMax[1]=Math.min(minMax[1],y0,y1),minMax[2]=Math.max(minMax[2],x0,x1),minMax[3]=Math.max(minMax[3],y0,y1)}static#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,t,minMax){if(t<=0||t>=1)return;const mt=1-t,tt=t*t,ttt=tt*t,x=mt*(mt*(mt*x0+3*t*x1)+3*tt*x2)+ttt*x3,y=mt*(mt*(mt*y0+3*t*y1)+3*tt*y2)+ttt*y3;minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,a,b,c2,minMax){
if(Math.abs(a)<1e-12){Math.abs(b)>=1e-12&&this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,-c2/b,minMax);return}const delta=b**2-4*c2*a;if(delta<0)return;const sqrtDelta=Math.sqrt(delta),a2=2*a;this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b+sqrtDelta)/a2,minMax),this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b-sqrtDelta)/a2,minMax)}static bezierBoundingBox(x0,y0,x1,y1,x2,y2,x3,y3,minMax){minMax[0]=Math.min(minMax[0],x0,x3),minMax[1]=Math.min(minMax[1],y0,y3),minMax[2]=Math.max(minMax[2],
x0,x3),minMax[3]=Math.max(minMax[3],y0,y3),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-x0+3*(x1-x2)+x3),6*(x0-2*x1+x2),3*(x1-x0),minMax),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-y0+3*(y1-y2)+y3),6*(y0-2*y1+y2),3*(y1-y0),minMax)}}function stringToUTF8String(str){return decodeURIComponent(escape(str))}let NormalizeRegex=null,NormalizationMap=null;function normalizeUnicode(str){return NormalizeRegex||(NormalizeRegex=/([\u00a0\u00b5\u037e\u0eb3\u2000-\u200a\u202f\u2126\ufb00-\ufb04\ufb06\ufb20-\ufb36\ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufba1\ufba4-\ufba9\ufbae-\ufbb1\ufbd3-\ufbdc\ufbde-\ufbe7\ufbea-\ufbf8\ufbfc-\ufbfd\ufc00-\ufc5d\ufc64-\ufcf1\ufcf5-\ufd3d\ufd88\ufdf4\ufdfa-\ufdfb\ufe71\ufe77\ufe79\ufe7b\ufe7d]+)|(\ufb05+)/gu,
try{this.stream=await navigator.mediaDevices.getUserMedia({audio:{echoCancellation:!0,noiseSuppression:!0,autoGainControl:!0}}),this.initializeRecorder(this.stream),this.audioChunks=[],this.mediaRecorder.start(100),this.recordingState=!0}catch(error2){throw console.error("Failed to start recording:",error2),new Error("Failed to access microphone. Please check permissions.")}}async stopRecording(){return new Promise((resolve2,reject)=>{const recorder=this.mediaRecorder,chunks=this.audioChunks,stream=this.
stream;if(!recorder||recorder.state==="inactive"){reject(new Error("No active recording to stop"));return}this.mediaRecorder=null,this.audioChunks=[],this.stream=null,this.recordingState=!1,recorder.onstop=()=>{const audioBlob=new Blob(chunks,{type:recorder.mimeType||MimeTypeAudio.WAV});if(stream)for(const track2 of stream.getTracks())track2.stop();resolve2(audioBlob)},recorder.onerror=event2=>{if(console.error("Recording error:",event2),stream)for(const track2 of stream.getTracks())track2.stop();
reject(new Error("Recording failed"))},recorder.stop()})}isRecording(){return this.recordingState}cancelRecording(){const recorder=this.mediaRecorder,stream=this.stream;if(this.mediaRecorder=null,this.audioChunks=[],this.stream=null,this.recordingState=!1,recorder&&recorder.state!=="inactive"&&(recorder.onstop=null,recorder.onerror=null,recorder.stop()),stream)for(const track2 of stream.getTracks())track2.stop()}initializeRecorder(stream){const options={};MediaRecorder.isTypeSupported(MimeTypeAudio.
WAV)?options.mimeType=MimeTypeAudio.WAV:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM_OPUS)?options.mimeType=MimeTypeAudio.WEBM_OPUS:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM)?options.mimeType=MimeTypeAudio.WEBM:MediaRecorder.isTypeSupported(MimeTypeAudio.MP4)?options.mimeType=MimeTypeAudio.MP4:console.warn("No preferred audio format supported, using default"),this.mediaRecorder=new MediaRecorder(stream,options),this.mediaRecorder.ondataavailable=event2=>{event2.data.size>0&&this.audioChunks.
push(event2.data)},this.mediaRecorder.onstop=()=>{this.recordingState=!1},this.mediaRecorder.onerror=event2=>{console.error("MediaRecorder error:",event2),this.recordingState=!1}}}async function convertToWav(audioBlob){try{if(audioBlob.type.includes("wav"))return audioBlob;const arrayBuffer=await audioBlob.arrayBuffer(),audioContext=new(window.AudioContext||window.webkitAudioContext);try{const audioBuffer=await audioContext.decodeAudioData(arrayBuffer);return audioBufferToWav(audioBuffer)}finally{
audioContext.close()}}catch(error2){return console.error("Failed to convert audio to WAV:",error2),audioBlob}}function audioBufferToWav(buffer){const length=buffer.length,numberOfChannels=buffer.numberOfChannels,sampleRate=buffer.sampleRate,blockAlign=numberOfChannels*2,byteRate=sampleRate*blockAlign,dataSize=length*blockAlign,bufferSize=44+dataSize,arrayBuffer=new ArrayBuffer(bufferSize),view=new DataView(arrayBuffer),writeString=(offset2,string2)=>{for(let i=0;i<string2.length;i++)view.setUint8(
offset2+i,string2.charCodeAt(i))};writeString(0,"RIFF"),view.setUint32(4,bufferSize-8,!0),writeString(8,"WAVE"),writeString(12,"fmt "),view.setUint32(16,16,!0),view.setUint16(20,1,!0),view.setUint16(22,numberOfChannels,!0),view.setUint32(24,sampleRate,!0),view.setUint32(28,byteRate,!0),view.setUint16(32,blockAlign,!0),view.setUint16(34,16,!0),writeString(36,"data"),view.setUint32(40,dataSize,!0);const channels=new Array(numberOfChannels);for(let c2=0;c2<numberOfChannels;c2++)channels[c2]=buffer.
getChannelData(c2);const pcm=new Int16Array(arrayBuffer,44,length*numberOfChannels);let p2=0;for(let i=0;i<length;i++)for(let c2=0;c2<numberOfChannels;c2++){let s2=channels[c2][i];s2>1?s2=1:s2<-1&&(s2=-1),pcm[p2++]=s2*32767}return new Blob([arrayBuffer],{type:MimeTypeAudio.WAV})}function createAudioFile(audioBlob,filename){const timestamp=new Date().toISOString().replace(/[:.]/g,"-"),extension2=audioBlob.type.includes("wav")?"wav":"mp3",defaultFilename=`recording-${timestamp}.${extension2}`;return new File(
[audioBlob],defaultFilename,{type:audioBlob.type,lastModified:Date.now()})}function isAudioRecordingSupported(){return!!(typeof navigator<"u"&&navigator.mediaDevices&&typeof navigator.mediaDevices.getUserMedia=="function"&&typeof window<"u"&&window.MediaRecorder)}const isNodeJS=typeof process=="object"&&process+""=="[object process]"&&!process.versions.nw&&!(process.versions.electron&&process.type&&process.type!=="browser"),FONT_IDENTITY_MATRIX=[.001,0,0,.001,0,0],LINE_FACTOR=1.35,RenderingIntentFlag={
ANY:1,DISPLAY:2,PRINT:4,ANNOTATIONS_FORMS:16,ANNOTATIONS_STORAGE:32,ANNOTATIONS_DISABLE:64,IS_EDITING:128,OPLIST:256},AnnotationMode={DISABLE:0,ENABLE:1,ENABLE_FORMS:2,ENABLE_STORAGE:3},AnnotationEditorPrefix="pdfjs_internal_editor_",AnnotationEditorType={DISABLE:-1,NONE:0,FREETEXT:3,HIGHLIGHT:9,STAMP:13,INK:15,SIGNATURE:101,COMMENT:102},AnnotationEditorParamsType={RESIZE:1,CREATE:2,FREETEXT_SIZE:11,FREETEXT_COLOR:12,FREETEXT_OPACITY:13,INK_COLOR:21,INK_THICKNESS:22,INK_OPACITY:23,HIGHLIGHT_COLOR:31,
HIGHLIGHT_THICKNESS:32,HIGHLIGHT_FREE:33,HIGHLIGHT_SHOW_ALL:34,DRAW_STEP:41},PermissionFlag={PRINT:4,MODIFY_CONTENTS:8,COPY:16,MODIFY_ANNOTATIONS:32,FILL_INTERACTIVE_FORMS:256,COPY_FOR_ACCESSIBILITY:512,ASSEMBLE:1024,PRINT_HIGH_QUALITY:2048},TextRenderingMode={FILL:0,STROKE:1,FILL_STROKE:2,INVISIBLE:3,FILL_STROKE_MASK:3,ADD_TO_PATH_FLAG:4},util_ImageKind={GRAYSCALE_1BPP:1,RGB_24BPP:2,RGBA_32BPP:3},AnnotationType={TEXT:1,LINK:2,FREETEXT:3,LINE:4,SQUARE:5,CIRCLE:6,POLYGON:7,POLYLINE:8,HIGHLIGHT:9,
UNDERLINE:10,SQUIGGLY:11,STRIKEOUT:12,STAMP:13,CARET:14,INK:15,POPUP:16,FILEATTACHMENT:17,SOUND:18,MOVIE:19,WIDGET:20,SCREEN:21,PRINTERMARK:22,TRAPNET:23,WATERMARK:24,THREED:25,REDACT:26},AnnotationBorderStyleType={SOLID:1,DASHED:2,BEVELED:3,INSET:4,UNDERLINE:5},VerbosityLevel={ERRORS:0,WARNINGS:1,INFOS:5},OPS={dependency:1,setLineWidth:2,setLineCap:3,setLineJoin:4,setMiterLimit:5,setDash:6,setRenderingIntent:7,setFlatness:8,setGState:9,save:10,restore:11,transform:12,moveTo:13,lineTo:14,curveTo:15,
curveTo2:16,curveTo3:17,closePath:18,rectangle:19,stroke:20,closeStroke:21,fill:22,eoFill:23,fillStroke:24,eoFillStroke:25,closeFillStroke:26,closeEOFillStroke:27,endPath:28,clip:29,eoClip:30,beginText:31,endText:32,setCharSpacing:33,setWordSpacing:34,setHScale:35,setLeading:36,setFont:37,setTextRenderingMode:38,setTextRise:39,moveText:40,setLeadingMoveText:41,setTextMatrix:42,nextLine:43,showText:44,showSpacedText:45,nextLineShowText:46,nextLineSetSpacingShowText:47,setCharWidth:48,setCharWidthAndBounds:49,
setStrokeColorSpace:50,setFillColorSpace:51,setStrokeColor:52,setStrokeColorN:53,setFillColor:54,setFillColorN:55,setStrokeGray:56,setFillGray:57,setStrokeRGBColor:58,setFillRGBColor:59,setStrokeCMYKColor:60,setFillCMYKColor:61,shadingFill:62,beginInlineImage:63,beginImageData:64,endInlineImage:65,paintXObject:66,markPoint:67,markPointProps:68,beginMarkedContent:69,beginMarkedContentProps:70,endMarkedContent:71,beginCompat:72,endCompat:73,paintFormXObjectBegin:74,paintFormXObjectEnd:75,beginGroup:76,
endGroup:77,beginAnnotation:80,endAnnotation:81,paintImageMaskXObject:83,paintImageMaskXObjectGroup:84,paintImageXObject:85,paintInlineImageXObject:86,paintInlineImageXObjectGroup:87,paintImageXObjectRepeat:88,paintImageMaskXObjectRepeat:89,paintSolidColorImageMask:90,constructPath:91,setStrokeTransparent:92,setFillTransparent:93,rawFillPath:94},DrawOPS={moveTo:0,lineTo:1,curveTo:2,closePath:3},PasswordResponses={NEED_PASSWORD:1,INCORRECT_PASSWORD:2};let verbosity=VerbosityLevel.WARNINGS;function setVerbosityLevel(level){
Number.isInteger(level)&&(verbosity=level)}function getVerbosityLevel(){return verbosity}function info(msg){verbosity>=VerbosityLevel.INFOS&&console.log(`Info: ${msg}`)}function warn(msg){verbosity>=VerbosityLevel.WARNINGS&&console.log(`Warning: ${msg}`)}function unreachable(msg){throw new Error(msg)}function assert$1(cond,msg){cond||unreachable(msg)}function _isValidProtocol(url2){switch(url2?.protocol){case"http:":case"https:":case"ftp:":case"mailto:":case"tel:":return!0;default:return!1}}function createValidAbsoluteUrl(url2,baseUrl=null,options=null){
if(!url2)return null;if(options&&typeof url2=="string"&&(options.addDefaultProtocol&&url2.startsWith("www.")&&url2.match(/\./g)?.length>=2&&(url2=`http://${url2}`),options.tryConvertEncoding))try{url2=stringToUTF8String(url2)}catch{}const absoluteUrl=baseUrl?URL.parse(url2,baseUrl):URL.parse(url2);return _isValidProtocol(absoluteUrl)?absoluteUrl:null}function updateUrlHash(url2,hash2,allowRel=!1){const res=URL.parse(url2);return res?(res.hash=hash2,res.href):allowRel&&createValidAbsoluteUrl(url2,
"http://example.com")?url2.split("#",1)[0]+`${hash2?`#${hash2}`:""}`:""}function shadow(obj,prop2,value,nonSerializable=!1){return Object.defineProperty(obj,prop2,{value,enumerable:!nonSerializable,configurable:!0,writable:!1}),value}const BaseException=(function(){function BaseException2(message,name){this.message=message,this.name=name}return BaseException2.prototype=new Error,BaseException2.constructor=BaseException2,BaseException2})();class PasswordException extends BaseException{constructor(msg,code2){
super(msg,"PasswordException"),this.code=code2}}class UnknownErrorException extends BaseException{constructor(msg,details){super(msg,"UnknownErrorException"),this.details=details}}class InvalidPDFException extends BaseException{constructor(msg){super(msg,"InvalidPDFException")}}class ResponseException extends BaseException{constructor(msg,status,missing){super(msg,"ResponseException"),this.status=status,this.missing=missing}}class FormatError extends BaseException{constructor(msg){super(msg,"For\
matError")}}class AbortException extends BaseException{constructor(msg){super(msg,"AbortException")}}function bytesToString(bytes){(typeof bytes!="object"||bytes?.length===void 0)&&unreachable("Invalid argument for bytesToString");const length=bytes.length,MAX_ARGUMENT_COUNT=8192;if(length<MAX_ARGUMENT_COUNT)return String.fromCharCode.apply(null,bytes);const strBuf=[];for(let i=0;i<length;i+=MAX_ARGUMENT_COUNT){const chunkEnd=Math.min(i+MAX_ARGUMENT_COUNT,length),chunk=bytes.subarray(i,chunkEnd);
strBuf.push(String.fromCharCode.apply(null,chunk))}return strBuf.join("")}function stringToBytes(str){typeof str!="string"&&unreachable("Invalid argument for stringToBytes");const length=str.length,bytes=new Uint8Array(length);for(let i=0;i<length;++i)bytes[i]=str.charCodeAt(i)&255;return bytes}function string32(value){return String.fromCharCode(value>>24&255,value>>16&255,value>>8&255,value&255)}function isLittleEndian(){const buffer8=new Uint8Array(4);return buffer8[0]=1,new Uint32Array(buffer8.
buffer,0,1)[0]===1}function isEvalSupported(){try{return new Function(""),!0}catch{return!1}}class util_FeatureTest{static get isLittleEndian(){return shadow(this,"isLittleEndian",isLittleEndian())}static get isEvalSupported(){return shadow(this,"isEvalSupported",isEvalSupported())}static get isOffscreenCanvasSupported(){return shadow(this,"isOffscreenCanvasSupported",typeof OffscreenCanvas<"u")}static get isImageDecoderSupported(){return shadow(this,"isImageDecoderSupported",typeof ImageDecoder<
"u")}static get platform(){const{platform:platform2,userAgent}=navigator;return shadow(this,"platform",{isAndroid:userAgent.includes("Android"),isLinux:platform2.includes("Linux"),isMac:platform2.includes("Mac"),isWindows:platform2.includes("Win"),isFirefox:userAgent.includes("Firefox")})}static get isCSSRoundSupported(){return shadow(this,"isCSSRoundSupported",globalThis.CSS?.supports?.("width: round(1.5px, 1px)"))}}const hexNumbers=Array.from(Array(256).keys(),n=>n.toString(16).padStart(2,"0"));
class Util{static makeHexColor(r2,g,b){return`#${hexNumbers[r2]}${hexNumbers[g]}${hexNumbers[b]}`}static scaleMinMax(transform2,minMax){let temp;transform2[0]?(transform2[0]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[0],minMax[2]*=transform2[0],transform2[3]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[3],minMax[3]*=transform2[3]):(temp=minMax[0],minMax[0]=minMax[1],minMax[1]=temp,temp=minMax[2],minMax[2]=minMax[3],minMax[3]=temp,
transform2[1]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[1],minMax[3]*=transform2[1],transform2[2]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[2],minMax[2]*=transform2[2]),minMax[0]+=transform2[4],minMax[1]+=transform2[5],minMax[2]+=transform2[4],minMax[3]+=transform2[5]}static transform(m1,m2){return[m1[0]*m2[0]+m1[2]*m2[1],m1[1]*m2[0]+m1[3]*m2[1],m1[0]*m2[2]+m1[2]*m2[3],m1[1]*m2[2]+m1[3]*m2[3],m1[0]*m2[4]+m1[2]*m2[5]+m1[4],m1[1]*
m2[4]+m1[3]*m2[5]+m1[5]]}static applyTransform(p2,m,pos=0){const p0=p2[pos],p1=p2[pos+1];p2[pos]=p0*m[0]+p1*m[2]+m[4],p2[pos+1]=p0*m[1]+p1*m[3]+m[5]}static applyTransformToBezier(p2,transform2,pos=0){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5];for(let i=0;i<6;i+=2){const pI=p2[pos+i],pI1=p2[pos+i+1];p2[pos+i]=pI*m0+pI1*m2+m4,p2[pos+i+1]=pI*m1+pI1*m3+m5}}static applyInverseTransform(p2,m){const p0=p2[0],p1=p2[1],d2=m[0]*m[3]-m[1]*m[2];
p2[0]=(p0*m[3]-p1*m[2]+m[2]*m[5]-m[4]*m[3])/d2,p2[1]=(-p0*m[1]+p1*m[0]+m[4]*m[1]-m[5]*m[0])/d2}static axialAlignedBoundingBox(rect,transform2,output){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5],r0=rect[0],r1=rect[1],r2=rect[2],r3=rect[3];let a0=m0*r0+m4,a2=a0,a1=m0*r2+m4,a3=a1,b0=m3*r1+m5,b2=b0,b1=m3*r3+m5,b3=b1;if(m1!==0||m2!==0){const m1r0=m1*r0,m1r2=m1*r2,m2r1=m2*r1,m2r3=m2*r3;a0+=m2r1,a3+=m2r1,a1+=m2r3,a2+=m2r3,b0+=m1r0,b3+=m1r0,
b1+=m1r2,b2+=m1r2}output[0]=Math.min(output[0],a0,a1,a2,a3),output[1]=Math.min(output[1],b0,b1,b2,b3),output[2]=Math.max(output[2],a0,a1,a2,a3),output[3]=Math.max(output[3],b0,b1,b2,b3)}static inverseTransform(m){const d2=m[0]*m[3]-m[1]*m[2];return[m[3]/d2,-m[1]/d2,-m[2]/d2,m[0]/d2,(m[2]*m[5]-m[4]*m[3])/d2,(m[4]*m[1]-m[5]*m[0])/d2]}static singularValueDecompose2dScale(matrix,output){const m0=matrix[0],m1=matrix[1],m2=matrix[2],m3=matrix[3],a=m0**2+m1**2,b=m0*m2+m1*m3,c2=m2**2+m3**2,first=(a+c2)/
2,second=Math.sqrt(first**2-(a*c2-b**2));output[0]=Math.sqrt(first+second||1),output[1]=Math.sqrt(first-second||1)}static normalizeRect(rect){const r2=rect.slice(0);return rect[0]>rect[2]&&(r2[0]=rect[2],r2[2]=rect[0]),rect[1]>rect[3]&&(r2[1]=rect[3],r2[3]=rect[1]),r2}static intersect(rect1,rect2){const xLow=Math.max(Math.min(rect1[0],rect1[2]),Math.min(rect2[0],rect2[2])),xHigh=Math.min(Math.max(rect1[0],rect1[2]),Math.max(rect2[0],rect2[2]));if(xLow>xHigh)return null;const yLow=Math.max(Math.min(
rect1[1],rect1[3]),Math.min(rect2[1],rect2[3])),yHigh=Math.min(Math.max(rect1[1],rect1[3]),Math.max(rect2[1],rect2[3]));return yLow>yHigh?null:[xLow,yLow,xHigh,yHigh]}static pointBoundingBox(x,y,minMax){minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static rectBoundingBox(x0,y0,x1,y1,minMax){minMax[0]=Math.min(minMax[0],x0,x1),minMax[1]=Math.min(minMax[1],y0,y1),minMax[2]=Math.max(minMax[2],x0,x1),minMax[3]=Math.max(
minMax[3],y0,y1)}static#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,t,minMax){if(t<=0||t>=1)return;const mt=1-t,tt=t*t,ttt=tt*t,x=mt*(mt*(mt*x0+3*t*x1)+3*tt*x2)+ttt*x3,y=mt*(mt*(mt*y0+3*t*y1)+3*tt*y2)+ttt*y3;minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,a,b,c2,minMax){if(Math.abs(a)<1e-12){Math.abs(b)>=1e-12&&this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,-c2/b,minMax);return}
const delta=b**2-4*c2*a;if(delta<0)return;const sqrtDelta=Math.sqrt(delta),a2=2*a;this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b+sqrtDelta)/a2,minMax),this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b-sqrtDelta)/a2,minMax)}static bezierBoundingBox(x0,y0,x1,y1,x2,y2,x3,y3,minMax){minMax[0]=Math.min(minMax[0],x0,x3),minMax[1]=Math.min(minMax[1],y0,y3),minMax[2]=Math.max(minMax[2],x0,x3),minMax[3]=Math.max(minMax[3],y0,y3),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-x0+3*(x1-x2)+x3),6*(x0-
2*x1+x2),3*(x1-x0),minMax),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-y0+3*(y1-y2)+y3),6*(y0-2*y1+y2),3*(y1-y0),minMax)}}function stringToUTF8String(str){return decodeURIComponent(escape(str))}let NormalizeRegex=null,NormalizationMap=null;function normalizeUnicode(str){return NormalizeRegex||(NormalizeRegex=/([\u00a0\u00b5\u037e\u0eb3\u2000-\u200a\u202f\u2126\ufb00-\ufb04\ufb06\ufb20-\ufb36\ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufba1\ufba4-\ufba9\ufbae-\ufbb1\ufbd3-\ufbdc\ufbde-\ufbe7\ufbea-\ufbf8\ufbfc-\ufbfd\ufc00-\ufc5d\ufc64-\ufcf1\ufcf5-\ufd3d\ufd88\ufdf4\ufdfa-\ufdfb\ufe71\ufe77\ufe79\ufe7b\ufe7d]+)|(\ufb05+)/gu,
NormalizationMap=new Map([["ſt","ſt"]])),str.replaceAll(NormalizeRegex,(_,p1,p2)=>p1?p1.normalize("NFKC"):NormalizationMap.get(p2))}function getUuid(){if(typeof crypto.randomUUID=="function")return crypto.randomUUID();const buf=new Uint8Array(32);return crypto.getRandomValues(buf),bytesToString(buf)}const AnnotationPrefix="pdfjs_internal_id_";function _isValidExplicitDest(validRef,validName,dest){if(!Array.isArray(dest)||dest.length<2)return!1;const[page2,zoom,...args]=dest;if(!validRef(page2)&&
!Number.isInteger(page2)||!validName(zoom))return!1;const argsLen=args.length;let allowNull=!0;switch(zoom.name){case"XYZ":if(argsLen<2||argsLen>3)return!1;break;case"Fit":case"FitB":return argsLen===0;case"FitH":case"FitBH":case"FitV":case"FitBV":if(argsLen>1)return!1;break;case"FitR":if(argsLen!==4)return!1;allowNull=!1;break;default:return!1}for(const arg of args)if(!(typeof arg=="number"||allowNull&&arg===null))return!1;return!0}function MathClamp(v,min2,max2){return Math.min(Math.max(v,min2),
max2)}function toBase64Util(arr){return Uint8Array.prototype.toBase64?arr.toBase64():btoa(bytesToString(arr))}function fromBase64Util(str){return Uint8Array.fromBase64?Uint8Array.fromBase64(str):stringToBytes(atob(str))}typeof Promise.try!="function"&&(Promise.try=function(fn,...args){return new Promise(resolve2=>{resolve2(fn(...args))})});typeof Math.sumPrecise!="function"&&(Math.sumPrecise=function(numbers){return numbers.reduce((a,b)=>a+b,0)});const SVG_NS="http://www.w3.org/2000/svg";class PixelsPerInch{static CSS=96;static PDF=72;static PDF_TO_CSS_UNITS=this.
@@ -6828,43 +6829,43 @@ preventDefault();const textFile=new File([text2],"Pasted",{type:MimeTypeText.PLA
isLoading:!0,mcpPrompt:{serverName:promptInfo.serverName,promptName:promptInfo.name,arguments:args?{...args}:void 0}};uploadedFiles([...uploadedFiles(),placeholder2]),$$props.onUploadedFilesChange?.(uploadedFiles()),get$3(textareaRef)?.focus()}function handlePromptLoadComplete(placeholderId,result){const promptText=result.messages?.map(msg=>typeof msg.content=="string"?msg.content:msg.content.type===ContentPartType.TEXT?msg.content.text:"").filter(Boolean).join(PROMPT_CONTENT_SEPARATOR);uploadedFiles(
uploadedFiles().map(f=>f.id===placeholderId?{...f,isLoading:!1,textContent:promptText,size:promptText.length,file:new File([promptText],`${f.name}${FileExtensionText.TXT}`,{type:MimeTypeText.PLAIN})}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptLoadError(placeholderId,error2){uploadedFiles(uploadedFiles().map(f=>f.id===placeholderId?{...f,isLoading:!1,loadError:error2}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptPickerClose(){set$1(isPromptPickerOpen,
!1),set$1(promptSearchQuery,""),get$3(textareaRef)?.focus()}function handleInlineResourcePickerClose(){set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleInlineResourceSelect(){value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleBrowseResources(){set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,
""),value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isResourceDialogOpen,!0)}async function handleMicClick(){if(!audioRecorder||!get$3(recordingSupported)){console.warn("Audio recording not supported");return}if(get$3(isRecording))try{const audioBlob=await audioRecorder.stopRecording(),wavBlob=await convertToWav(audioBlob),audioFile=createAudioFile(wavBlob);$$props.onFilesAdd?.([audioFile]),set$1(isRecording,!1)}catch(error2){console.error("Failed to st\
op recording:",error2),set$1(isRecording,!1)}else try{await audioRecorder.startRecording(),set$1(isRecording,!0)}catch(error2){console.error("Failed to start recording:",error2)}}var $$exports={focus:focus2,resetTextareaHeight,openModelSelector,checkModelSelected},fragment=root$15(),node2=first_child(fragment);bind_this(ChatFormFileInputInvisible(node2,{onFileSelect:handleFileSelect}),$$value=>set$1(fileInputRef,$$value,!0),()=>get$3(fileInputRef));var form=sibling(node2,2),node_1=child(form);bind_this(
ChatFormPromptPicker(node_1,{get isOpen(){return get$3(isPromptPickerOpen)},get searchQuery(){return get$3(promptSearchQuery)},onClose:handlePromptPickerClose,onPromptLoadStart:handlePromptLoadStart,onPromptLoadComplete:handlePromptLoadComplete,onPromptLoadError:handlePromptLoadError}),$$value=>set$1(promptPickerRef,$$value,!0),()=>get$3(promptPickerRef));var node_2=sibling(node_1,2);bind_this(ChatFormResourcePicker(node_2,{get isOpen(){return get$3(isInlineResourcePickerOpen)},get searchQuery(){
return get$3(resourceSearchQuery)},onClose:handleInlineResourcePickerClose,onResourceSelect:handleInlineResourceSelect,onBrowse:handleBrowseResources}),$$value=>set$1(resourcePickerRef,$$value,!0),()=>get$3(resourcePickerRef));var div=sibling(node_2,2),node_3=child(div);{let $0=user_derived(()=>get$3(activeModelId)??void 0);ChatAttachmentsList(node_3,{get attachments(){return attachments()},onFileRemove:handleFileRemove,limitToSingleRow:!0,class:"py-5",style:"scroll-padding: 1rem;",get activeModelId(){
return get$3($0)},get uploadedFiles(){return uploadedFiles()},set uploadedFiles($$value){uploadedFiles($$value)}})}var div_1=sibling(node_3,2),node_4=child(div_1);bind_this(ChatFormTextarea(node_4,{class:"px-5 py-1.5 md:pt-0",onKeydown:handleKeydown,onInput:()=>{handleInput(),$$props.onValueChange?.(value())},get disabled(){return disabled()},get placeholder(){return placeholder()},get value(){return value()},set value($$value){value($$value)}}),$$value=>set$1(textareaRef,$$value,!0),()=>get$3(textareaRef));
var node_5=sibling(node_4,2);{var consequent=$$anchor2=>{ChatAttachmentMcpResources($$anchor2,{class:"mb-3",onResourceClick:uri2=>{set$1(preSelectedResourceUri,uri2,!0),set$1(isResourceDialogOpen,!0)}})},d2=user_derived(()=>mcpHasResourceAttachments());if_block(node_5,$$render=>{get$3(d2)&&$$render(consequent)})}var node_6=sibling(node_5,2);{let $0=user_derived(()=>value().trim().length>0),$1=user_derived(()=>showMcpPromptButton()?()=>set$1(isPromptPickerOpen,!0):void 0);bind_this(ChatFormActions(
node_6,{class:"px-3",get canSend(){return get$3(canSubmit)},get hasText(){return get$3($0)},get disabled(){return disabled()},get isLoading(){return isLoading2()},get isRecording(){return get$3(isRecording)},get uploadedFiles(){return uploadedFiles()},onFileUpload:handleFileUpload,onMicClick:handleMicClick,get onStop(){return $$props.onStop},onSystemPromptClick:()=>$$props.onSystemPromptClick?.({message:value(),files:uploadedFiles()}),get onMcpPromptClick(){return get$3($1)},onMcpResourcesClick:()=>set$1(
isResourceDialogOpen,!0)}),$$value=>set$1(chatFormActionsRef,$$value,!0),()=>get$3(chatFormActionsRef))}reset(div_1),reset(div),reset(form);var node_7=sibling(form,2);return DialogMcpResources(node_7,{get preSelectedUri(){return get$3(preSelectedResourceUri)},onAttach:resource=>{mcpStore.attachResource(resource.uri)},onOpenChange:newOpen=>{newOpen||set$1(preSelectedResourceUri,void 0)},get open(){return get$3(isResourceDialogOpen)},set open($$value){set$1(isResourceDialogOpen,$$value,!0)}}),template_effect(
()=>{set_class(form,1,`relative ${className()??""}`),set_class(div,1,`${INPUT_CLASSES??""} overflow-hidden rounded-3xl backdrop-blur-md ${disabled()?"cursor-not-allowed opacity-60":""}`)}),event("submit",form,e=>{e.preventDefault(),!(!get$3(canSubmit)||disabled()||get$3(hasLoadingAttachments))&&$$props.onSubmit?.()}),event("paste",div_1,handlePaste),append($$anchor,fragment),pop($$exports)}function Dropdown_menu_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),sideOffset=prop(
$$props,"sideOffset",3,4),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","sideOffset","portalProps","class"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Portal$2,($$anchor2,DropdownMenuPrimitive_Portal)=>{DropdownMenuPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=comment$2(),node_1=first_child(fragment_1);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-av\
ailable-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[\
state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node_1,()=>Dropdown_menu_content$1,($$anchor4,DropdownMenuPrimitive_Content)=>{DropdownMenuPrimitive_Content($$anchor4,spread_props({"data-slot":"dropdown-menu-content",get sideOffset(){return sideOffset()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}
function Dropdown_menu_item($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),variant=prop($$props,"variant",3,"default"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","variant"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("relative flex cursor-pointer items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground d\
ata-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[variant=destructive]:text-destructive data-[variant=destructive]:data-highlighted:bg-destructive/10 data-[variant=destructive]:data-highlighted:text-destructive dark:data-[variant=destructive]:data-highlighted:bg-destructive/20 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground data-[variant=destructive]:*:[svg]:!text-destructive",
$$props.class));component(node2,()=>Menu_item,($$anchor2,DropdownMenuPrimitive_Item)=>{DropdownMenuPrimitive_Item($$anchor2,spread_props({"data-slot":"dropdown-menu-item",get"data-inset"(){return $$props.inset},get"data-variant"(){return variant()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_separator($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props(
$$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("-mx-1 my-1 h-px bg-border/20",$$props.class));component(node2,()=>Menu_separator,($$anchor2,DropdownMenuPrimitive_Separator)=>{DropdownMenuPrimitive_Separator($$anchor2,spread_props({"data-slot":"dropdown-menu-separator",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}
function Dropdown_menu_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Menu_trigger,($$anchor2,DropdownMenuPrimitive_Trigger)=>{DropdownMenuPrimitive_Trigger($$anchor2,spread_props({"data-slot":"dropdown-menu-trigger"},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))}),append($$anchor,fragment),pop()}
function Dropdown_menu_sub_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foregr\
ound shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node2,()=>Menu_sub_content,($$anchor2,DropdownMenuPrimitive_SubContent)=>{DropdownMenuPrimitive_SubContent(
$$anchor2,spread_props({"data-slot":"dropdown-menu-sub-content",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}var root_1$D=from_html("<!> <!>",1);function Dropdown_menu_sub_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","children"]);var fragment=comment$2(),node2=first_child(fragment);{
let $0=user_derived(()=>cn$1("flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[state=open]:bg-accent data-[state=open]:text-accent-foreground [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground",$$props.class));component(
node2,()=>Menu_sub_trigger,($$anchor2,DropdownMenuPrimitive_SubTrigger)=>{DropdownMenuPrimitive_SubTrigger($$anchor2,spread_props({"data-slot":"dropdown-menu-sub-trigger",get"data-inset"(){return $$props.inset},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$D(),node_1=first_child(fragment_1);snippet(node_1,()=>$$props.children??noop$3);var node_2=sibling(node_1,2);Chevron_right(node_2,
{class:"ml-auto size-4"}),append($$anchor3,fragment_1)},$$slots:{default:!0}}))})}append($$anchor,fragment),pop()}const Sub=Menu_sub,Root$2=Menu;function useAttachmentMenu(getFlags,getCallbacks,close2){const modalityFlags=user_derived(getFlags),callbacks=user_derived(()=>{const cbs=getCallbacks(),wrap2=fn=>()=>{close2(),fn?.()};return{[AttachmentAction.FILE_UPLOAD]:wrap2(cbs.onFileUpload),[AttachmentAction.SYSTEM_PROMPT_CLICK]:wrap2(cbs.onSystemPromptClick),[AttachmentAction.MCP_PROMPT_CLICK]:wrap2(
cbs.onMcpPromptClick),[AttachmentAction.MCP_RESOURCES_CLICK]:wrap2(cbs.onMcpResourcesClick)}});function isItemEnabled(enabledWhen){return!enabledWhen||enabledWhen==="always"?!0:!!get$3(modalityFlags)[enabledWhen]}function isItemVisible(visibleWhen){return visibleWhen?!!get$3(modalityFlags)[visibleWhen]:!0}function getSystemMessageTooltip(){return page$1.params.id?"Inject custom system message at the beginning of the conversation":"Add custom system message for a new conversation"}return{get callbacks(){
return get$3(callbacks)},isItemEnabled,isItemVisible,getSystemMessageTooltip}}var root_5$o=from_html('<span class="sr-only"> </span> <!>',1),root_6$j=from_html("<p> </p>"),root_3$u=from_html("<!> <!>",1),root_10$c=from_html("<!> <span> </span>",1),root_14$5=from_html("<!> <span> </span>",1),root_15$7=from_html("<p> </p>"),root_12$7=from_html("<!> <!>",1),root_20$4=from_html("<!> <span> </span>",1),root_21$5=from_html("<p>PDFs will be converted to text. Image-based PDFs may not work properly.</p>"),
root_17$8=from_html("<!> <!>",1),root_26=from_html("<!> <span> </span>",1),root_27$1=from_html("<p> </p>"),root_24$1=from_html("<!> <!>",1),root_30$1=from_html("<!> <span> </span>",1),root_7$j=from_html("<!> <!> <!> <!> <!> <!> <!>",1),root_1$C=from_html("<!> <!>",1),root$14=from_html("<div><!></div>");function ChatFormActionAttachmentsDropdown($$anchor,$$props){push$1($$props,!0);let className=prop($$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),hasAudioModality=prop($$props,"hasAu\
dioModality",3,!1),hasVisionModality=prop($$props,"hasVisionModality",3,!1),hasMcpPromptsSupport=prop($$props,"hasMcpPromptsSupport",3,!1),hasMcpResourcesSupport=prop($$props,"hasMcpResourcesSupport",3,!1),dropdownOpen=state$1(!1);function handleMcpSettingsClick(){set$1(dropdownOpen,!1),$$props.onMcpSettingsClick?.()}const attachmentMenu=useAttachmentMenu(()=>({hasVisionModality:hasVisionModality(),hasAudioModality:hasAudioModality(),hasMcpPromptsSupport:hasMcpPromptsSupport(),hasMcpResourcesSupport:hasMcpResourcesSupport()}),
()=>({onFileUpload:$$props.onFileUpload,onSystemPromptClick:$$props.onSystemPromptClick,onMcpPromptClick:$$props.onMcpPromptClick,onMcpResourcesClick:$$props.onMcpResourcesClick}),()=>{set$1(dropdownOpen,!1)});var div=root$14(),node2=child(div);component(node2,()=>Root$2,($$anchor2,DropdownMenu_Root)=>{DropdownMenu_Root($$anchor2,{get open(){return get$3(dropdownOpen)},set open($$value){set$1(dropdownOpen,$$value,!0)},children:($$anchor3,$$slotProps)=>{var fragment=root_1$C(),node_1=first_child(
fragment);component(node_1,()=>Dropdown_menu_trigger,($$anchor4,DropdownMenu_Trigger)=>{DropdownMenu_Trigger($$anchor4,{name:"Attach files",get disabled(){return disabled()},children:($$anchor5,$$slotProps2)=>{var fragment_1=comment$2(),node_2=first_child(fragment_1);component(node_2,()=>Root$5,($$anchor6,Tooltip_Root)=>{Tooltip_Root($$anchor6,{children:($$anchor7,$$slotProps3)=>{var fragment_2=root_3$u(),node_3=first_child(fragment_2);component(node_3,()=>Tooltip_trigger,($$anchor8,Tooltip_Trigger)=>{
Tooltip_Trigger($$anchor8,{class:"w-full",children:($$anchor9,$$slotProps4)=>{Button($$anchor9,{class:"file-upload-button h-8 w-8 rounded-full p-0",get disabled(){return disabled()},variant:"secondary",type:"button",children:($$anchor10,$$slotProps5)=>{var fragment_4=root_5$o(),span=first_child(fragment_4),text2=child(span,!0);reset(span);var node_4=sibling(span,2);Plus(node_4,{class:"h-4 w-4"}),template_effect(()=>set_text(text2,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor10,fragment_4)},$$slots:{
default:!0}})},$$slots:{default:!0}})});var node_5=sibling(node_3,2);component(node_5,()=>Tooltip_content,($$anchor8,Tooltip_Content)=>{Tooltip_Content($$anchor8,{children:($$anchor9,$$slotProps4)=>{var p2=root_6$j(),text_1=child(p2,!0);reset(p2),template_effect(()=>set_text(text_1,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor9,p2)},$$slots:{default:!0}})}),append($$anchor7,fragment_2)},$$slots:{default:!0}})}),append($$anchor5,fragment_1)},$$slots:{default:!0}})});var node_6=sibling(node_1,2);component(
node_6,()=>Dropdown_menu_content,($$anchor4,DropdownMenu_Content)=>{DropdownMenu_Content($$anchor4,{align:"start",class:"w-48",children:($$anchor5,$$slotProps2)=>{var fragment_5=root_7$j(),node_7=first_child(fragment_5);each(node_7,17,()=>ATTACHMENT_FILE_ITEMS,item=>item.id,($$anchor6,item)=>{const enabled=user_derived(()=>attachmentMenu.isItemEnabled(get$3(item).enabledWhen));var fragment_6=comment$2(),node_8=first_child(fragment_6);{var consequent=$$anchor7=>{var fragment_7=comment$2(),node_9=first_child(
fragment_7);{let $0=user_derived(()=>get$3(item).class??"");component(node_9,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item)=>{DropdownMenu_Item($$anchor8,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor9,$$slotProps3)=>{var fragment_8=root_10$c(),node_10=first_child(fragment_8);component(node_10,()=>get$3(item).icon,($$anchor10,item_icon)=>{item_icon($$anchor10,{class:"h-4 w-4"})});var span_1=sibling(
node_10,2),text_2=child(span_1,!0);reset(span_1),template_effect(()=>set_text(text_2,get$3(item).label)),append($$anchor9,fragment_8)},$$slots:{default:!0}})})}append($$anchor7,fragment_7)},consequent_1=$$anchor7=>{var fragment_9=comment$2(),node_11=first_child(fragment_9);component(node_11,()=>Root$5,($$anchor8,Tooltip_Root_1)=>{Tooltip_Root_1($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_10=root_12$7(),node_12=first_child(fragment_10);
component(node_12,()=>Tooltip_trigger,($$anchor10,Tooltip_Trigger_1)=>{Tooltip_Trigger_1($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_11=comment$2(),node_13=first_child(fragment_11);{let $0=user_derived(()=>get$3(item).class??"");component(node_13,()=>Dropdown_menu_item,($$anchor12,DropdownMenu_Item_1)=>{DropdownMenu_Item_1($$anchor12,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},disabled:!0,children:($$anchor13,$$slotProps5)=>{var fragment_12=root_14$5(),
node_14=first_child(fragment_12);component(node_14,()=>get$3(item).icon,($$anchor14,item_icon_1)=>{item_icon_1($$anchor14,{class:"h-4 w-4"})});var span_2=sibling(node_14,2),text_3=child(span_2,!0);reset(span_2),template_effect(()=>set_text(text_3,get$3(item).label)),append($$anchor13,fragment_12)},$$slots:{default:!0}})})}append($$anchor11,fragment_11)},$$slots:{default:!0}})});var node_15=sibling(node_12,2);component(node_15,()=>Tooltip_content,($$anchor10,Tooltip_Content_1)=>{Tooltip_Content_1(
$$anchor10,{side:"right",children:($$anchor11,$$slotProps4)=>{var p_1=root_15$7(),text_4=child(p_1,!0);reset(p_1),template_effect(()=>set_text(text_4,get$3(item).disabledTooltip)),append($$anchor11,p_1)},$$slots:{default:!0}})}),append($$anchor9,fragment_10)},$$slots:{default:!0}})}),append($$anchor7,fragment_9)};if_block(node_8,$$render=>{get$3(enabled)?$$render(consequent):get$3(item).disabledTooltip&&$$render(consequent_1,1)})}append($$anchor6,fragment_6)});var node_16=sibling(node_7,2);{var consequent_3=$$anchor6=>{
""),value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isResourceDialogOpen,!0)}async function handleMicClick(){if(!audioRecorder||!get$3(recordingSupported)){console.warn("Audio recording not supported");return}if(get$3(isRecording)){set$1(isRecording,!1);try{const audioBlob=await audioRecorder.stopRecording(),wavBlob=await convertToWav(audioBlob),audioFile=createAudioFile(wavBlob);$$props.onFilesAdd?.([audioFile])}catch(error2){console.error("Failed to s\
top recording:",error2)}}else try{await audioRecorder.startRecording(),set$1(isRecording,!0)}catch(error2){console.error("Failed to start recording:",error2)}}var $$exports={focus:focus2,resetTextareaHeight,openModelSelector,checkModelSelected},fragment=root$15(),node2=first_child(fragment);bind_this(ChatFormFileInputInvisible(node2,{onFileSelect:handleFileSelect}),$$value=>set$1(fileInputRef,$$value,!0),()=>get$3(fileInputRef));var form=sibling(node2,2),node_1=child(form);bind_this(ChatFormPromptPicker(
node_1,{get isOpen(){return get$3(isPromptPickerOpen)},get searchQuery(){return get$3(promptSearchQuery)},onClose:handlePromptPickerClose,onPromptLoadStart:handlePromptLoadStart,onPromptLoadComplete:handlePromptLoadComplete,onPromptLoadError:handlePromptLoadError}),$$value=>set$1(promptPickerRef,$$value,!0),()=>get$3(promptPickerRef));var node_2=sibling(node_1,2);bind_this(ChatFormResourcePicker(node_2,{get isOpen(){return get$3(isInlineResourcePickerOpen)},get searchQuery(){return get$3(resourceSearchQuery)},
onClose:handleInlineResourcePickerClose,onResourceSelect:handleInlineResourceSelect,onBrowse:handleBrowseResources}),$$value=>set$1(resourcePickerRef,$$value,!0),()=>get$3(resourcePickerRef));var div=sibling(node_2,2),node_3=child(div);{let $0=user_derived(()=>get$3(activeModelId)??void 0);ChatAttachmentsList(node_3,{get attachments(){return attachments()},onFileRemove:handleFileRemove,limitToSingleRow:!0,class:"py-5",style:"scroll-padding: 1rem;",get activeModelId(){return get$3($0)},get uploadedFiles(){
return uploadedFiles()},set uploadedFiles($$value){uploadedFiles($$value)}})}var div_1=sibling(node_3,2),node_4=child(div_1);bind_this(ChatFormTextarea(node_4,{class:"px-5 py-1.5 md:pt-0",onKeydown:handleKeydown,onInput:()=>{handleInput(),$$props.onValueChange?.(value())},get disabled(){return disabled()},get placeholder(){return placeholder()},get value(){return value()},set value($$value){value($$value)}}),$$value=>set$1(textareaRef,$$value,!0),()=>get$3(textareaRef));var node_5=sibling(node_4,
2);{var consequent=$$anchor2=>{ChatAttachmentMcpResources($$anchor2,{class:"mb-3",onResourceClick:uri2=>{set$1(preSelectedResourceUri,uri2,!0),set$1(isResourceDialogOpen,!0)}})},d2=user_derived(()=>mcpHasResourceAttachments());if_block(node_5,$$render=>{get$3(d2)&&$$render(consequent)})}var node_6=sibling(node_5,2);{let $0=user_derived(()=>value().trim().length>0),$1=user_derived(()=>showMcpPromptButton()?()=>set$1(isPromptPickerOpen,!0):void 0);bind_this(ChatFormActions(node_6,{class:"px-3",get canSend(){
return get$3(canSubmit)},get hasText(){return get$3($0)},get disabled(){return disabled()},get isLoading(){return isLoading2()},get isRecording(){return get$3(isRecording)},get uploadedFiles(){return uploadedFiles()},onFileUpload:handleFileUpload,onMicClick:handleMicClick,get onStop(){return $$props.onStop},onSystemPromptClick:()=>$$props.onSystemPromptClick?.({message:value(),files:uploadedFiles()}),get onMcpPromptClick(){return get$3($1)},onMcpResourcesClick:()=>set$1(isResourceDialogOpen,!0)}),
$$value=>set$1(chatFormActionsRef,$$value,!0),()=>get$3(chatFormActionsRef))}reset(div_1),reset(div),reset(form);var node_7=sibling(form,2);return DialogMcpResources(node_7,{get preSelectedUri(){return get$3(preSelectedResourceUri)},onAttach:resource=>{mcpStore.attachResource(resource.uri)},onOpenChange:newOpen=>{newOpen||set$1(preSelectedResourceUri,void 0)},get open(){return get$3(isResourceDialogOpen)},set open($$value){set$1(isResourceDialogOpen,$$value,!0)}}),template_effect(()=>{set_class(
form,1,`relative ${className()??""}`),set_class(div,1,`${INPUT_CLASSES??""} overflow-hidden rounded-3xl backdrop-blur-md ${disabled()?"cursor-not-allowed opacity-60":""}`)}),event("submit",form,e=>{e.preventDefault(),!(!get$3(canSubmit)||disabled()||get$3(hasLoadingAttachments))&&$$props.onSubmit?.()}),event("paste",div_1,handlePaste),append($$anchor,fragment),pop($$exports)}function Dropdown_menu_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),sideOffset=prop($$props,
"sideOffset",3,4),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","sideOffset","portalProps","class"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Portal$2,($$anchor2,DropdownMenuPrimitive_Portal)=>{DropdownMenuPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=comment$2(),node_1=first_child(fragment_1);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-\
height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=op\
en]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node_1,()=>Dropdown_menu_content$1,($$anchor4,DropdownMenuPrimitive_Content)=>{DropdownMenuPrimitive_Content($$anchor4,spread_props({"data-slot":"dropdown-menu-content",get sideOffset(){return sideOffset()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_item($$anchor,$$props){
push$1($$props,!0);let ref2=prop($$props,"ref",15,null),variant=prop($$props,"variant",3,"default"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","variant"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("relative flex cursor-pointer items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disab\
led]:opacity-50 data-[inset]:pl-8 data-[variant=destructive]:text-destructive data-[variant=destructive]:data-highlighted:bg-destructive/10 data-[variant=destructive]:data-highlighted:text-destructive dark:data-[variant=destructive]:data-highlighted:bg-destructive/20 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground data-[variant=destructive]:*:[svg]:!text-destructive",$$props.class));component(node2,()=>Menu_item,
($$anchor2,DropdownMenuPrimitive_Item)=>{DropdownMenuPrimitive_Item($$anchor2,spread_props({"data-slot":"dropdown-menu-item",get"data-inset"(){return $$props.inset},get"data-variant"(){return variant()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_separator($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events",
"$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("-mx-1 my-1 h-px bg-border/20",$$props.class));component(node2,()=>Menu_separator,($$anchor2,DropdownMenuPrimitive_Separator)=>{DropdownMenuPrimitive_Separator($$anchor2,spread_props({"data-slot":"dropdown-menu-separator",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_trigger($$anchor,$$props){
push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Menu_trigger,($$anchor2,DropdownMenuPrimitive_Trigger)=>{DropdownMenuPrimitive_Trigger($$anchor2,spread_props({"data-slot":"dropdown-menu-trigger"},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_sub_content($$anchor,$$props){
push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-\
in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node2,()=>Menu_sub_content,($$anchor2,DropdownMenuPrimitive_SubContent)=>{DropdownMenuPrimitive_SubContent($$anchor2,spread_props({"data-sl\
ot":"dropdown-menu-sub-content",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}var root_1$D=from_html("<!> <!>",1);function Dropdown_menu_sub_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","children"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("fl\
ex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[state=open]:bg-accent data-[state=open]:text-accent-foreground [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground",$$props.class));component(node2,()=>Menu_sub_trigger,
($$anchor2,DropdownMenuPrimitive_SubTrigger)=>{DropdownMenuPrimitive_SubTrigger($$anchor2,spread_props({"data-slot":"dropdown-menu-sub-trigger",get"data-inset"(){return $$props.inset},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$D(),node_1=first_child(fragment_1);snippet(node_1,()=>$$props.children??noop$3);var node_2=sibling(node_1,2);Chevron_right(node_2,{class:"ml-auto size-4"}),
append($$anchor3,fragment_1)},$$slots:{default:!0}}))})}append($$anchor,fragment),pop()}const Sub=Menu_sub,Root$2=Menu;function useAttachmentMenu(getFlags,getCallbacks,close2){const modalityFlags=user_derived(getFlags),callbacks=user_derived(()=>{const cbs=getCallbacks(),wrap2=fn=>()=>{close2(),fn?.()};return{[AttachmentAction.FILE_UPLOAD]:wrap2(cbs.onFileUpload),[AttachmentAction.SYSTEM_PROMPT_CLICK]:wrap2(cbs.onSystemPromptClick),[AttachmentAction.MCP_PROMPT_CLICK]:wrap2(cbs.onMcpPromptClick),
[AttachmentAction.MCP_RESOURCES_CLICK]:wrap2(cbs.onMcpResourcesClick)}});function isItemEnabled(enabledWhen){return!enabledWhen||enabledWhen==="always"?!0:!!get$3(modalityFlags)[enabledWhen]}function isItemVisible(visibleWhen){return visibleWhen?!!get$3(modalityFlags)[visibleWhen]:!0}function getSystemMessageTooltip(){return page$1.params.id?"Inject custom system message at the beginning of the conversation":"Add custom system message for a new conversation"}return{get callbacks(){return get$3(callbacks)},
isItemEnabled,isItemVisible,getSystemMessageTooltip}}var root_5$o=from_html('<span class="sr-only"> </span> <!>',1),root_6$j=from_html("<p> </p>"),root_3$u=from_html("<!> <!>",1),root_10$c=from_html("<!> <span> </span>",1),root_14$5=from_html("<!> <span> </span>",1),root_15$7=from_html("<p> </p>"),root_12$7=from_html("<!> <!>",1),root_20$4=from_html("<!> <span> </span>",1),root_21$5=from_html("<p>PDFs will be converted to text. Image-based PDFs may not work properly.</p>"),root_17$8=from_html("<\
!> <!>",1),root_26=from_html("<!> <span> </span>",1),root_27$1=from_html("<p> </p>"),root_24$1=from_html("<!> <!>",1),root_30$1=from_html("<!> <span> </span>",1),root_7$j=from_html("<!> <!> <!> <!> <!> <!> <!>",1),root_1$C=from_html("<!> <!>",1),root$14=from_html("<div><!></div>");function ChatFormActionAttachmentsDropdown($$anchor,$$props){push$1($$props,!0);let className=prop($$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),hasAudioModality=prop($$props,"hasAudioModality",3,!1),hasVisionModality=prop(
$$props,"hasVisionModality",3,!1),hasMcpPromptsSupport=prop($$props,"hasMcpPromptsSupport",3,!1),hasMcpResourcesSupport=prop($$props,"hasMcpResourcesSupport",3,!1),dropdownOpen=state$1(!1);function handleMcpSettingsClick(){set$1(dropdownOpen,!1),$$props.onMcpSettingsClick?.()}const attachmentMenu=useAttachmentMenu(()=>({hasVisionModality:hasVisionModality(),hasAudioModality:hasAudioModality(),hasMcpPromptsSupport:hasMcpPromptsSupport(),hasMcpResourcesSupport:hasMcpResourcesSupport()}),()=>({onFileUpload:$$props.
onFileUpload,onSystemPromptClick:$$props.onSystemPromptClick,onMcpPromptClick:$$props.onMcpPromptClick,onMcpResourcesClick:$$props.onMcpResourcesClick}),()=>{set$1(dropdownOpen,!1)});var div=root$14(),node2=child(div);component(node2,()=>Root$2,($$anchor2,DropdownMenu_Root)=>{DropdownMenu_Root($$anchor2,{get open(){return get$3(dropdownOpen)},set open($$value){set$1(dropdownOpen,$$value,!0)},children:($$anchor3,$$slotProps)=>{var fragment=root_1$C(),node_1=first_child(fragment);component(node_1,
()=>Dropdown_menu_trigger,($$anchor4,DropdownMenu_Trigger)=>{DropdownMenu_Trigger($$anchor4,{name:"Attach files",get disabled(){return disabled()},children:($$anchor5,$$slotProps2)=>{var fragment_1=comment$2(),node_2=first_child(fragment_1);component(node_2,()=>Root$5,($$anchor6,Tooltip_Root)=>{Tooltip_Root($$anchor6,{children:($$anchor7,$$slotProps3)=>{var fragment_2=root_3$u(),node_3=first_child(fragment_2);component(node_3,()=>Tooltip_trigger,($$anchor8,Tooltip_Trigger)=>{Tooltip_Trigger($$anchor8,
{class:"w-full",children:($$anchor9,$$slotProps4)=>{Button($$anchor9,{class:"file-upload-button h-8 w-8 rounded-full p-0",get disabled(){return disabled()},variant:"secondary",type:"button",children:($$anchor10,$$slotProps5)=>{var fragment_4=root_5$o(),span=first_child(fragment_4),text2=child(span,!0);reset(span);var node_4=sibling(span,2);Plus(node_4,{class:"h-4 w-4"}),template_effect(()=>set_text(text2,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor10,fragment_4)},$$slots:{default:!0}})},$$slots:{default:!0}})});
var node_5=sibling(node_3,2);component(node_5,()=>Tooltip_content,($$anchor8,Tooltip_Content)=>{Tooltip_Content($$anchor8,{children:($$anchor9,$$slotProps4)=>{var p2=root_6$j(),text_1=child(p2,!0);reset(p2),template_effect(()=>set_text(text_1,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor9,p2)},$$slots:{default:!0}})}),append($$anchor7,fragment_2)},$$slots:{default:!0}})}),append($$anchor5,fragment_1)},$$slots:{default:!0}})});var node_6=sibling(node_1,2);component(node_6,()=>Dropdown_menu_content,($$anchor4,DropdownMenu_Content)=>{
DropdownMenu_Content($$anchor4,{align:"start",class:"w-48",children:($$anchor5,$$slotProps2)=>{var fragment_5=root_7$j(),node_7=first_child(fragment_5);each(node_7,17,()=>ATTACHMENT_FILE_ITEMS,item=>item.id,($$anchor6,item)=>{const enabled=user_derived(()=>attachmentMenu.isItemEnabled(get$3(item).enabledWhen));var fragment_6=comment$2(),node_8=first_child(fragment_6);{var consequent=$$anchor7=>{var fragment_7=comment$2(),node_9=first_child(fragment_7);{let $0=user_derived(()=>get$3(item).class??
"");component(node_9,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item)=>{DropdownMenu_Item($$anchor8,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor9,$$slotProps3)=>{var fragment_8=root_10$c(),node_10=first_child(fragment_8);component(node_10,()=>get$3(item).icon,($$anchor10,item_icon)=>{item_icon($$anchor10,{class:"h-4 w-4"})});var span_1=sibling(node_10,2),text_2=child(span_1,!0);reset(
span_1),template_effect(()=>set_text(text_2,get$3(item).label)),append($$anchor9,fragment_8)},$$slots:{default:!0}})})}append($$anchor7,fragment_7)},consequent_1=$$anchor7=>{var fragment_9=comment$2(),node_11=first_child(fragment_9);component(node_11,()=>Root$5,($$anchor8,Tooltip_Root_1)=>{Tooltip_Root_1($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_10=root_12$7(),node_12=first_child(fragment_10);component(node_12,()=>Tooltip_trigger,
($$anchor10,Tooltip_Trigger_1)=>{Tooltip_Trigger_1($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_11=comment$2(),node_13=first_child(fragment_11);{let $0=user_derived(()=>get$3(item).class??"");component(node_13,()=>Dropdown_menu_item,($$anchor12,DropdownMenu_Item_1)=>{DropdownMenu_Item_1($$anchor12,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},disabled:!0,children:($$anchor13,$$slotProps5)=>{var fragment_12=root_14$5(),node_14=first_child(
fragment_12);component(node_14,()=>get$3(item).icon,($$anchor14,item_icon_1)=>{item_icon_1($$anchor14,{class:"h-4 w-4"})});var span_2=sibling(node_14,2),text_3=child(span_2,!0);reset(span_2),template_effect(()=>set_text(text_3,get$3(item).label)),append($$anchor13,fragment_12)},$$slots:{default:!0}})})}append($$anchor11,fragment_11)},$$slots:{default:!0}})});var node_15=sibling(node_12,2);component(node_15,()=>Tooltip_content,($$anchor10,Tooltip_Content_1)=>{Tooltip_Content_1($$anchor10,{side:"r\
ight",children:($$anchor11,$$slotProps4)=>{var p_1=root_15$7(),text_4=child(p_1,!0);reset(p_1),template_effect(()=>set_text(text_4,get$3(item).disabledTooltip)),append($$anchor11,p_1)},$$slots:{default:!0}})}),append($$anchor9,fragment_10)},$$slots:{default:!0}})}),append($$anchor7,fragment_9)};if_block(node_8,$$render=>{get$3(enabled)?$$render(consequent):get$3(item).disabledTooltip&&$$render(consequent_1,1)})}append($$anchor6,fragment_6)});var node_16=sibling(node_7,2);{var consequent_3=$$anchor6=>{
var fragment_13=comment$2(),node_17=first_child(fragment_13);component(node_17,()=>Root$5,($$anchor7,Tooltip_Root_2)=>{Tooltip_Root_2($$anchor7,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor8,$$slotProps3)=>{var fragment_14=root_17$8(),node_18=first_child(fragment_14);component(node_18,()=>Tooltip_trigger,($$anchor9,Tooltip_Trigger_2)=>{Tooltip_Trigger_2($$anchor9,{class:"w-full",children:($$anchor10,$$slotProps4)=>{var fragment_15=comment$2(),node_19=first_child(fragment_15);
component(node_19,()=>Dropdown_menu_item,($$anchor11,DropdownMenu_Item_2)=>{DropdownMenu_Item_2($$anchor11,{class:"flex cursor-pointer items-center gap-2",get onclick(){return attachmentMenu.callbacks.onFileUpload},children:($$anchor12,$$slotProps5)=>{const pdfItem=user_derived(()=>ATTACHMENT_FILE_ITEMS.find(i=>i.id===AttachmentMenuItemId.PDF));var fragment_16=comment$2(),node_20=first_child(fragment_16);{var consequent_2=$$anchor13=>{var fragment_17=root_20$4(),node_21=first_child(fragment_17);
component(node_21,()=>get$3(pdfItem).icon,($$anchor14,pdfItem_icon)=>{pdfItem_icon($$anchor14,{class:"h-4 w-4"})});var span_3=sibling(node_21,2),text_5=child(span_3,!0);reset(span_3),template_effect(()=>set_text(text_5,get$3(pdfItem).label)),append($$anchor13,fragment_17)};if_block(node_20,$$render=>{get$3(pdfItem)&&$$render(consequent_2)})}append($$anchor12,fragment_16)},$$slots:{default:!0}})}),append($$anchor10,fragment_15)},$$slots:{default:!0}})});var node_22=sibling(node_18,2);component(node_22,

View File

@@ -354,6 +354,7 @@ struct server_slot {
// generate a new draft
spec_draft = common_speculative_draft(spec.get(), params_spec, tokens, sampled);
n_draft_total += spec_draft.size();
if (spec_draft.size() > (size_t) n_draft_max) {
SLT_WRN(*this, "draft size %d exceeds max %d, truncating\n", (int) spec_draft.size(), n_draft_max);
@@ -3019,7 +3020,6 @@ private:
// update how many tokens out of those tested were accepted
slot.n_draft_accepted += ids.size() - 1;
slot.n_draft_total += n_draft;
// add accepted tokens to the prompt
slot.prompt.tokens.keep_first(slot.prompt.n_tokens() - n_draft);

View File

@@ -527,16 +527,15 @@
}
if (isRecording) {
isRecording = false;
try {
const audioBlob = await audioRecorder.stopRecording();
const wavBlob = await convertToWav(audioBlob);
const audioFile = createAudioFile(wavBlob);
onFilesAdd?.([audioFile]);
isRecording = false;
} catch (error) {
console.error('Failed to stop recording:', error);
isRecording = false;
}
} else {
try {

View File

@@ -43,27 +43,48 @@ export class AudioRecorder {
async stopRecording(): Promise<Blob> {
return new Promise((resolve, reject) => {
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive') {
const recorder = this.mediaRecorder;
const chunks = this.audioChunks;
const stream = this.stream;
if (!recorder || recorder.state === 'inactive') {
reject(new Error('No active recording to stop'));
return;
}
this.mediaRecorder.onstop = () => {
const mimeType = this.mediaRecorder?.mimeType || MimeTypeAudio.WAV;
const audioBlob = new Blob(this.audioChunks, { type: mimeType });
// Detach instance state right away so a new startRecording can take over without race
this.mediaRecorder = null;
this.audioChunks = [];
this.stream = null;
this.recordingState = false;
this.cleanup();
recorder.onstop = () => {
const audioBlob = new Blob(chunks, {
type: recorder.mimeType || MimeTypeAudio.WAV
});
if (stream) {
for (const track of stream.getTracks()) {
track.stop();
}
}
resolve(audioBlob);
};
this.mediaRecorder.onerror = (event) => {
recorder.onerror = (event) => {
console.error('Recording error:', event);
this.cleanup();
if (stream) {
for (const track of stream.getTracks()) {
track.stop();
}
}
reject(new Error('Recording failed'));
};
this.mediaRecorder.stop();
recorder.stop();
});
}
@@ -72,10 +93,26 @@ export class AudioRecorder {
}
cancelRecording(): void {
if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
this.mediaRecorder.stop();
const recorder = this.mediaRecorder;
const stream = this.stream;
this.mediaRecorder = null;
this.audioChunks = [];
this.stream = null;
this.recordingState = false;
if (recorder && recorder.state !== 'inactive') {
// Drop the original handlers so the pending stop event does not touch the instance
recorder.onstop = null;
recorder.onerror = null;
recorder.stop();
}
if (stream) {
for (const track of stream.getTracks()) {
track.stop();
}
}
this.cleanup();
}
private initializeRecorder(stream: MediaStream): void {
@@ -110,19 +147,6 @@ export class AudioRecorder {
this.recordingState = false;
};
}
private cleanup(): void {
if (this.stream) {
for (const track of this.stream.getTracks()) {
track.stop();
}
this.stream = null;
}
this.mediaRecorder = null;
this.audioChunks = [];
this.recordingState = false;
}
}
export async function convertToWav(audioBlob: Blob): Promise<Blob> {
@@ -136,13 +160,12 @@ export async function convertToWav(audioBlob: Blob): Promise<Blob> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
const wavBlob = audioBufferToWav(audioBuffer);
audioContext.close();
return wavBlob;
try {
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
return audioBufferToWav(audioBuffer);
} finally {
audioContext.close();
}
} catch (error) {
console.error('Failed to convert audio to WAV:', error);
return audioBlob;
@@ -182,12 +205,20 @@ function audioBufferToWav(buffer: AudioBuffer): Blob {
writeString(36, 'data'); // Subchunk2ID
view.setUint32(40, dataSize, true); // Subchunk2Size
let offset = 44;
// Cache channel arrays, write PCM via Int16Array (native little-endian, matches WAV)
const channels: Float32Array[] = new Array(numberOfChannels);
for (let c = 0; c < numberOfChannels; c++) {
channels[c] = buffer.getChannelData(c);
}
const pcm = new Int16Array(arrayBuffer, 44, length * numberOfChannels);
let p = 0;
for (let i = 0; i < length; i++) {
for (let channel = 0; channel < numberOfChannels; channel++) {
const sample = Math.max(-1, Math.min(1, buffer.getChannelData(channel)[i]));
view.setInt16(offset, sample * 0x7fff, true);
offset += 2;
for (let c = 0; c < numberOfChannels; c++) {
let s = channels[c][i];
if (s > 1) s = 1;
else if (s < -1) s = -1;
pcm[p++] = s * 0x7fff;
}
}