mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-04-30 14:54:19 +02:00
Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e82aaf2587 | ||
|
|
27aef3dd91 | ||
|
|
45155597aa | ||
|
|
80afa33aad | ||
|
|
b42c7fa5b8 | ||
|
|
d77599234e | ||
|
|
41a63be28e | ||
|
|
098705a29e | ||
|
|
683c5acb90 | ||
|
|
b1d5f5b449 | ||
|
|
4b221b7f1e | ||
|
|
59237bfbbc | ||
|
|
1cbc846eba | ||
|
|
3142f1dbb9 | ||
|
|
b5c4227dc6 | ||
|
|
d6a5094004 | ||
|
|
7b95ea5d11 | ||
|
|
bdc9c743a5 | ||
|
|
739393beeb |
@@ -49,7 +49,7 @@ enum common_log_col : int {
|
||||
};
|
||||
|
||||
// disable colors by default
|
||||
static std::vector<const char *> g_col = {
|
||||
static const char* g_col[] = {
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
@@ -247,7 +247,6 @@ public:
|
||||
|
||||
entries = std::move(new_entries);
|
||||
}
|
||||
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
@@ -265,7 +264,6 @@ public:
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
cv.wait(lock, [this]() { return head != tail; });
|
||||
|
||||
cur = entries[head];
|
||||
|
||||
head = (head + 1) % entries.size();
|
||||
@@ -301,7 +299,6 @@ public:
|
||||
|
||||
tail = (tail + 1) % entries.size();
|
||||
}
|
||||
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
@@ -338,7 +335,7 @@ public:
|
||||
g_col[COMMON_LOG_COL_CYAN] = LOG_COL_CYAN;
|
||||
g_col[COMMON_LOG_COL_WHITE] = LOG_COL_WHITE;
|
||||
} else {
|
||||
for (size_t i = 0; i < g_col.size(); i++) {
|
||||
for (size_t i = 0; i < std::size(g_col); i++) {
|
||||
g_col[i] = "";
|
||||
}
|
||||
}
|
||||
@@ -368,14 +365,20 @@ struct common_log * common_log_init() {
|
||||
}
|
||||
|
||||
struct common_log * common_log_main() {
|
||||
static struct common_log log;
|
||||
// We intentionally leak (i.e. do not delete) the logger singleton because
|
||||
// common_log destructor called at DLL teardown phase will cause hanging on Windows.
|
||||
// OS will release resources anyway so it should not be a significant issue,
|
||||
// though this design may cause logs to be lost if not flushed before the program exits.
|
||||
// Refer to https://github.com/ggml-org/llama.cpp/issues/22142 for details.
|
||||
static struct common_log * log;
|
||||
static std::once_flag init_flag;
|
||||
std::call_once(init_flag, [&]() {
|
||||
log = new common_log;
|
||||
// Set default to auto-detect colors
|
||||
log.set_colors(tty_can_use_colors());
|
||||
log->set_colors(tty_can_use_colors());
|
||||
});
|
||||
|
||||
return &log;
|
||||
return log;
|
||||
}
|
||||
|
||||
void common_log_pause(struct common_log * log) {
|
||||
|
||||
@@ -49,7 +49,11 @@ void common_log_default_callback(enum ggml_log_level level, const char * text, v
|
||||
struct common_log;
|
||||
|
||||
struct common_log * common_log_init();
|
||||
struct common_log * common_log_main(); // singleton, automatically destroys itself on exit
|
||||
|
||||
// Singleton, intentionally leaked to avoid Windows teardown hangs.
|
||||
// Call common_log_flush() before exit if you want to ensure all logs are flushed.
|
||||
struct common_log * common_log_main();
|
||||
|
||||
void common_log_pause (struct common_log * log); // pause the worker thread, not thread-safe
|
||||
void common_log_resume(struct common_log * log); // resume the worker thread, not thread-safe
|
||||
void common_log_free (struct common_log * log);
|
||||
|
||||
@@ -232,34 +232,6 @@ static struct llama_sampler * common_reasoning_budget_init_state(
|
||||
);
|
||||
}
|
||||
|
||||
struct llama_sampler * common_reasoning_budget_init(
|
||||
const struct llama_vocab * vocab,
|
||||
const std::vector<llama_token> & start_tokens,
|
||||
const std::vector<llama_token> & end_tokens,
|
||||
const std::vector<llama_token> & forced_tokens,
|
||||
int32_t budget,
|
||||
const std::vector<llama_token> & prefill_tokens) {
|
||||
// Determine initial state from prefill: COUNTING if the prefill begins with
|
||||
// the start sequence but does not also contain the end sequence after it.
|
||||
common_reasoning_budget_state initial_state = REASONING_BUDGET_IDLE;
|
||||
if (!prefill_tokens.empty() && !start_tokens.empty() &&
|
||||
prefill_tokens.size() >= start_tokens.size() &&
|
||||
std::equal(start_tokens.begin(), start_tokens.end(), prefill_tokens.begin())) {
|
||||
initial_state = REASONING_BUDGET_COUNTING;
|
||||
// If the end sequence also follows the start in the prefill, reasoning
|
||||
// was opened and immediately closed — stay IDLE.
|
||||
if (!end_tokens.empty() &&
|
||||
prefill_tokens.size() >= start_tokens.size() + end_tokens.size()) {
|
||||
auto end_start = prefill_tokens.end() - (ptrdiff_t) end_tokens.size();
|
||||
if (end_start >= prefill_tokens.begin() + (ptrdiff_t) start_tokens.size() &&
|
||||
std::equal(end_tokens.begin(), end_tokens.end(), end_start)) {
|
||||
initial_state = REASONING_BUDGET_IDLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return common_reasoning_budget_init_state(vocab, start_tokens, end_tokens, forced_tokens, budget, initial_state);
|
||||
}
|
||||
|
||||
struct llama_sampler * common_reasoning_budget_init(
|
||||
const struct llama_vocab * vocab,
|
||||
const std::vector<llama_token> & start_tokens,
|
||||
|
||||
@@ -29,10 +29,7 @@ enum common_reasoning_budget_state {
|
||||
// end_tokens - token sequence for natural deactivation
|
||||
// forced_tokens - token sequence forced when budget expires
|
||||
// budget - max tokens allowed in the reasoning block
|
||||
// prefill_tokens - tokens already present in the prompt (generation prompt);
|
||||
// used to determine the initial state: COUNTING if they begin
|
||||
// with start_tokens (but don't also end with end_tokens),
|
||||
// IDLE otherwise. COUNTING with budget <= 0 is promoted to FORCING.
|
||||
// initial_state - initial state
|
||||
//
|
||||
struct llama_sampler * common_reasoning_budget_init(
|
||||
const struct llama_vocab * vocab,
|
||||
@@ -40,16 +37,6 @@ struct llama_sampler * common_reasoning_budget_init(
|
||||
const std::vector<llama_token> & end_tokens,
|
||||
const std::vector<llama_token> & forced_tokens,
|
||||
int32_t budget,
|
||||
const std::vector<llama_token> & prefill_tokens = {});
|
||||
|
||||
// Variant that takes an explicit initial state (used by tests and clone).
|
||||
// COUNTING with budget <= 0 is promoted to FORCING.
|
||||
struct llama_sampler * common_reasoning_budget_init(
|
||||
const struct llama_vocab * vocab,
|
||||
const std::vector<llama_token> & start_tokens,
|
||||
const std::vector<llama_token> & end_tokens,
|
||||
const std::vector<llama_token> & forced_tokens,
|
||||
int32_t budget,
|
||||
common_reasoning_budget_state initial_state);
|
||||
common_reasoning_budget_state initial_state = REASONING_BUDGET_IDLE);
|
||||
|
||||
common_reasoning_budget_state common_reasoning_budget_get_state(const struct llama_sampler * smpl);
|
||||
|
||||
@@ -260,32 +260,35 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||
}
|
||||
}
|
||||
|
||||
// Compute prefill tokens from the generation prompt
|
||||
std::vector<llama_token> prefill_tokens;
|
||||
if (!params.generation_prompt.empty()) {
|
||||
GGML_ASSERT(vocab != nullptr);
|
||||
auto tokens = common_tokenize(vocab, params.generation_prompt, false, true);
|
||||
for (size_t i = 0; i < tokens.size(); i++) {
|
||||
std::string piece = common_token_to_piece(vocab, tokens[i], true);
|
||||
if (i == 0 && std::isspace(piece[0]) && !std::isspace(params.generation_prompt[0])) {
|
||||
// Some tokenizers will add a space before the first special token, need to exclude
|
||||
continue;
|
||||
}
|
||||
LOG_DBG("%s: prefill token: %d = %s\n", __func__, tokens[i], piece.c_str());
|
||||
prefill_tokens.push_back(tokens[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Feed generation prompt tokens to the grammar sampler so it advances past
|
||||
// tokens the template already placed in the prompt.
|
||||
// Only applies to output-format and tool-call grammars; user-supplied grammars must not be prefilled.
|
||||
std::vector<llama_token> prefill_tokens;
|
||||
if (!params.generation_prompt.empty() && common_grammar_needs_prefill(params.grammar)) {
|
||||
GGML_ASSERT(vocab != nullptr);
|
||||
prefill_tokens = common_tokenize(vocab, params.generation_prompt, false, true);
|
||||
if (!prefill_tokens.empty()) {
|
||||
std::string first_token = common_token_to_piece(vocab, prefill_tokens[0], true);
|
||||
if (std::isspace(first_token[0]) && !std::isspace(params.generation_prompt[0])) {
|
||||
// Some tokenizers will add a space before the first special token, need to remove
|
||||
prefill_tokens = std::vector<llama_token>(prefill_tokens.begin() + 1, prefill_tokens.end());
|
||||
}
|
||||
}
|
||||
|
||||
if (grmr && !params.grammar_lazy) {
|
||||
try {
|
||||
for (const auto & token : prefill_tokens) {
|
||||
llama_sampler_accept(grmr, token);
|
||||
LOG_DBG("%s: accepted prefill token (%d)\n", __func__, token);
|
||||
}
|
||||
} catch (std::exception &e) {
|
||||
LOG_ERR("%s: error initializing grammar sampler for grammar:\n%s\n\nGeneration prompt:\n'%s'\n", __func__,
|
||||
common_grammar_value(params.grammar).c_str(), params.generation_prompt.c_str());
|
||||
throw e;
|
||||
if (grmr && !params.grammar_lazy && common_grammar_needs_prefill(params.grammar)) {
|
||||
try {
|
||||
for (const auto & token : prefill_tokens) {
|
||||
llama_sampler_accept(grmr, token);
|
||||
LOG_DBG("%s: grammar accepted prefill token (%d)\n", __func__, token);
|
||||
}
|
||||
} catch (std::exception &e) {
|
||||
LOG_ERR("%s: error initializing grammar sampler for grammar:\n%s\n\nGeneration prompt:\n'%s'\n", __func__,
|
||||
common_grammar_value(params.grammar).c_str(), params.generation_prompt.c_str());
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,8 +299,12 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||
params.reasoning_budget_start,
|
||||
params.reasoning_budget_end,
|
||||
params.reasoning_budget_forced,
|
||||
params.reasoning_budget_tokens < 0 ? INT_MAX : params.reasoning_budget_tokens,
|
||||
prefill_tokens);
|
||||
params.reasoning_budget_tokens < 0 ? INT_MAX : params.reasoning_budget_tokens);
|
||||
|
||||
for (const auto & token : prefill_tokens) {
|
||||
llama_sampler_accept(rbudget, token);
|
||||
LOG_DBG("%s: reasoning-budget accepted prefill token (%d)\n", __func__, token);
|
||||
}
|
||||
}
|
||||
|
||||
if (params.has_logit_bias()) {
|
||||
@@ -431,7 +438,7 @@ static bool grammar_should_apply(struct common_sampler * gsmpl) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
|
||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool is_generated) {
|
||||
if (!gsmpl) {
|
||||
return;
|
||||
}
|
||||
@@ -439,9 +446,11 @@ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, boo
|
||||
const auto tm = gsmpl->tm();
|
||||
|
||||
// grammar_should_apply() checks the reasoning budget state, so calculate this before we accept
|
||||
accept_grammar = accept_grammar && grammar_should_apply(gsmpl);
|
||||
const auto accept_grammar = is_generated && grammar_should_apply(gsmpl);
|
||||
|
||||
llama_sampler_accept(gsmpl->rbudget, token);
|
||||
if (gsmpl->rbudget && is_generated) {
|
||||
llama_sampler_accept(gsmpl->rbudget, token);
|
||||
}
|
||||
|
||||
if (gsmpl->grmr && accept_grammar) {
|
||||
llama_sampler_accept(gsmpl->grmr, token);
|
||||
|
||||
@@ -41,8 +41,8 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||
|
||||
void common_sampler_free(struct common_sampler * gsmpl);
|
||||
|
||||
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
|
||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
|
||||
// if is_generated is true, the token is accepted by the sampling chain, the reasoning budget sampler, and the grammar sampler
|
||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool is_generated);
|
||||
void common_sampler_reset (struct common_sampler * gsmpl);
|
||||
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
|
||||
|
||||
|
||||
@@ -167,8 +167,6 @@ struct common_speculative_checkpoint {
|
||||
size_t size() const {
|
||||
return data.size();
|
||||
}
|
||||
|
||||
size_t ckpt_size = 0;
|
||||
};
|
||||
|
||||
struct common_speculative_state_draft : public common_speculative_state {
|
||||
@@ -176,7 +174,7 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
llama_context * ctx_dft;
|
||||
|
||||
bool use_ckpt = false;
|
||||
struct common_speculative_checkpoint ckpt;
|
||||
common_speculative_checkpoint ckpt;
|
||||
|
||||
common_sampler * smpl;
|
||||
|
||||
@@ -249,26 +247,16 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
llama_batch_free(batch);
|
||||
}
|
||||
|
||||
void begin(const llama_tokens & prompt) override {
|
||||
if (use_ckpt && ckpt.size() > 0) {
|
||||
// delete checkpoint
|
||||
LOG_DBG("%s: delete checkpoint, prompt.size=%zu, pos_min=%d, pos_max=%d, n_tokens=%" PRId64 ", size=%.3f MiB\n",
|
||||
__func__, prompt.size(), ckpt.pos_min, ckpt.pos_max, ckpt.n_tokens, (float) ckpt.data.size() / 1024 / 1024);
|
||||
ckpt.pos_min = 0;
|
||||
ckpt.pos_max = 0;
|
||||
ckpt.n_tokens = 0;
|
||||
ckpt.ckpt_size = 0;
|
||||
ckpt.data.clear();
|
||||
}
|
||||
void begin(const llama_tokens & /*prompt*/) override {
|
||||
}
|
||||
|
||||
size_t draft_create_checkpoint(int n_tokens_prompt, int n_tokens_batch) {
|
||||
size_t create_checkpoint(int n_tokens_prompt) {
|
||||
int slot_id = 0;
|
||||
const size_t checkpoint_size = llama_state_seq_get_size_ext(ctx_dft, slot_id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
|
||||
ckpt.pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx_dft), slot_id);
|
||||
ckpt.pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx_dft), slot_id);
|
||||
ckpt.n_tokens = n_tokens_prompt - n_tokens_batch;
|
||||
ckpt.n_tokens = n_tokens_prompt;
|
||||
ckpt.data.resize(checkpoint_size);
|
||||
|
||||
const size_t n = llama_state_seq_get_data_ext(ctx_dft, ckpt.data.data(), checkpoint_size, slot_id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
@@ -281,13 +269,13 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
return n;
|
||||
}
|
||||
|
||||
size_t draft_restore_checkpoint(size_t ckpt_size_part_expected) {
|
||||
size_t restore_checkpoint() {
|
||||
int slot_id = 0;
|
||||
LOG_DBG("%s: pos_min = %d, pos_max = %d\n", __func__, ckpt.pos_min, ckpt.pos_max);
|
||||
const size_t n = llama_state_seq_set_data_ext(ctx_dft, ckpt.data.data(), ckpt.size(), slot_id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
if (n != ckpt_size_part_expected) {
|
||||
GGML_ABORT("%s: failed to restore context checkpoint (pos_min=%d, pos_max=%d, size=%zu, get_data_ext->%zu, set_data_ext->%zu",
|
||||
__func__, ckpt.pos_min, ckpt.pos_max, ckpt.size(), ckpt_size_part_expected, n);
|
||||
if (n != ckpt.size()) {
|
||||
GGML_ABORT("%s: failed to restore context checkpoint (pos_min=%d, pos_max=%d, size=%zu",
|
||||
__func__, ckpt.pos_min, ckpt.pos_max, ckpt.size());
|
||||
}
|
||||
llama_memory_seq_rm(llama_get_memory(ctx_dft), slot_id, ckpt.pos_max + 1, -1);
|
||||
|
||||
@@ -346,13 +334,18 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
|
||||
const int i_start = std::max<int>(0, (int) prompt_cur.size() - n_ctx);
|
||||
|
||||
if (use_ckpt && i_start > 0) {
|
||||
LOG_WRN("%s: context shift is not supported with checkpoint-based contexts - skipping\n", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
// reuse as much as possible from the old draft context
|
||||
// ideally, the draft context should be as big as the target context and we will always reuse the entire prompt
|
||||
for (int i = 0; i < (int) prompt_dft.size(); ++i) {
|
||||
int cur = 0;
|
||||
while (i_start + cur < (int) prompt_cur.size() &&
|
||||
i + cur < (int) prompt_dft.size() &&
|
||||
prompt_cur[i_start + cur] == prompt_dft[i + cur]) {
|
||||
i + cur < (int) prompt_dft.size() &&
|
||||
prompt_cur[i_start + cur] == prompt_dft[i + cur]) {
|
||||
cur++;
|
||||
}
|
||||
|
||||
@@ -360,21 +353,26 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
reuse_i = i;
|
||||
reuse_n = cur;
|
||||
}
|
||||
|
||||
if (use_ckpt) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DBG("%s: reuse_i = %d, reuse_n = %d, #prompt_dft = %zu, #prompt_cur = %zu\n",
|
||||
__func__, reuse_i, reuse_n, prompt_dft.size(), prompt_cur.size());
|
||||
if (use_ckpt && ckpt.ckpt_size == 0 && reuse_n > 0) {
|
||||
LOG_DBG("%s: no checkpoint available, no reuse, (reuse_i=%d, reuse_n=%d) -> (0, 0)\n",
|
||||
__func__, reuse_i, reuse_n);
|
||||
if (use_ckpt && ckpt.n_tokens > reuse_n) {
|
||||
LOG_DBG("%s: checkpoint (n_tokens = %d) is outdated -> delete it\n", __func__, (int) ckpt.n_tokens);
|
||||
|
||||
reuse_i = 0;
|
||||
reuse_n = 0;
|
||||
|
||||
ckpt = {};
|
||||
}
|
||||
|
||||
result.clear();
|
||||
result.reserve(sparams.n_max);
|
||||
|
||||
bool needs_ckpt = use_ckpt && prompt_dft.size() > 0;
|
||||
if (reuse_n == 0 || (use_ckpt && reuse_i > 0)) {
|
||||
llama_memory_clear(mem_dft, false);
|
||||
prompt_dft.clear();
|
||||
@@ -393,50 +391,38 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
return;
|
||||
}
|
||||
|
||||
bool do_restore = false;
|
||||
if (prompt_dft.size() > prompt_cur.size() && reuse_i + reuse_n < (int64_t) prompt_dft.size()) {
|
||||
// This can happen after a partial acceptance (speculative decoding with checkpoints)
|
||||
LOG_DBG("%s: #prompt_dft=%zu, #prompt_cur=%zu, shorten draft\n",
|
||||
__func__, prompt_dft.size(), prompt_cur.size());
|
||||
prompt_dft.resize(prompt_cur.size());
|
||||
do_restore = true;
|
||||
}
|
||||
|
||||
if (reuse_i > 0) {
|
||||
GGML_ASSERT(!use_ckpt);
|
||||
|
||||
bool is_removed = llama_memory_seq_rm (mem_dft, 0, 0, reuse_i);
|
||||
if (!is_removed) {
|
||||
LOG_ERR("%s: llama_memory_seq_rm failed, reuse_i=%d\n", __func__, reuse_i);
|
||||
return;
|
||||
}
|
||||
llama_memory_seq_add(mem_dft, 0, reuse_i, -1, -reuse_i);
|
||||
|
||||
prompt_dft.erase(prompt_dft.begin(), prompt_dft.begin() + reuse_i);
|
||||
}
|
||||
|
||||
if (reuse_n < (int) prompt_dft.size() || do_restore) {
|
||||
if (reuse_n < (int) prompt_dft.size()) {
|
||||
if (use_ckpt) {
|
||||
if (ckpt.n_tokens > (int64_t) prompt_dft.size()) {
|
||||
LOG_INF("%s: checkpoint is too large, prompt_tgt.size=%zu, ckpt.n_tokens=%" PRId64 ", reuse_n=%d, prompt_dft.size=%zu\n",
|
||||
__func__, prompt_tgt.size(), ckpt.n_tokens, reuse_n, prompt_dft.size());
|
||||
if (ckpt.n_tokens > 0) {
|
||||
LOG_DBG("%s: restoring checkpoint, reuse_n=%d, prompt_dft.size=%zu\n", __func__, reuse_n, prompt_dft.size());
|
||||
restore_checkpoint();
|
||||
reuse_n = ckpt.n_tokens;
|
||||
prompt_dft.resize(reuse_n);
|
||||
}
|
||||
draft_restore_checkpoint(ckpt.ckpt_size);
|
||||
reuse_n = ckpt.n_tokens;
|
||||
prompt_dft.resize(reuse_n);
|
||||
needs_ckpt = false;
|
||||
} else {
|
||||
bool is_removed = llama_memory_seq_rm (mem_dft, 0, reuse_n, -1);
|
||||
const bool is_removed = llama_memory_seq_rm(mem_dft, 0, reuse_n, -1);
|
||||
if (!is_removed) {
|
||||
LOG_ERR("%s: llama_memory_seq_rm failed, reuse_n=%d, prompt_dft.size=%zu\n",
|
||||
__func__, reuse_n, prompt_dft.size());
|
||||
LOG_ERR("%s: llama_memory_seq_rm failed, reuse_n=%d, prompt_dft.size=%zu\n", __func__, reuse_n, prompt_dft.size());
|
||||
return;
|
||||
}
|
||||
prompt_dft.erase(prompt_dft.begin() + reuse_n, prompt_dft.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_ckpt) {
|
||||
ckpt.ckpt_size = draft_create_checkpoint(prompt_dft.size(), batch.n_tokens);
|
||||
}
|
||||
|
||||
// prepare a batch to evaluate any new tokens in the prompt
|
||||
common_batch_clear(batch);
|
||||
|
||||
@@ -450,12 +436,17 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
// we should rarely end-up here during normal decoding
|
||||
if (batch.n_tokens > 0) {
|
||||
//LOG_DBG("%s: draft prompt batch: %s\n", __func__, string_from(ctx, batch).c_str());
|
||||
LOG_DBG("%s: draft prompt batch: %d tokens\n", __func__, batch.n_tokens);
|
||||
|
||||
int ret = llama_decode(ctx_dft, batch);
|
||||
if (ret != 0 && ret != 1) {
|
||||
LOG_WRN("%s: llama_decode returned %d, prompt_cur.size=%zu\n",
|
||||
__func__, ret, prompt_cur.size());
|
||||
}
|
||||
|
||||
if (use_ckpt) {
|
||||
create_checkpoint(prompt_dft.size());
|
||||
}
|
||||
}
|
||||
|
||||
const llama_pos n_past = prompt_dft.size();
|
||||
@@ -467,7 +458,7 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
|
||||
prompt_dft.push_back(id_last);
|
||||
|
||||
LOG_DBG("%s: draft prompt: %s\n", __func__, string_from(ctx_dft, prompt_dft).c_str());
|
||||
//LOG_DBG("%s: draft prompt: %s\n", __func__, string_from(ctx_dft, prompt_dft).c_str());
|
||||
|
||||
int ret = llama_decode(ctx_dft, batch);
|
||||
if (ret != 0 && ret != 1) {
|
||||
@@ -495,14 +486,14 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
|
||||
common_sampler_accept(smpl, id, true);
|
||||
|
||||
result.push_back(id);
|
||||
|
||||
if (sparams.n_max <= (int) result.size()) {
|
||||
// only collect very high-confidence draft tokens
|
||||
if (cur_p->data[0].p < sparams.p_min) {
|
||||
break;
|
||||
}
|
||||
|
||||
// only collect very high-confidence draft tokens
|
||||
if (cur_p->data[0].p < sparams.p_min) {
|
||||
result.push_back(id);
|
||||
|
||||
if (sparams.n_max <= (int) result.size()) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -784,17 +775,15 @@ struct common_speculative_state_ngram_mod : public common_speculative_state {
|
||||
}
|
||||
|
||||
void accept(uint16_t n_accepted) override {
|
||||
if (verbose) {
|
||||
LOG_INF("%s: accepted %d tokens from %zu drafted tokens\n", __func__, n_accepted, n_draft_last);
|
||||
}
|
||||
|
||||
// compute acceptance fraction if we have a recorded draft length
|
||||
if (n_draft_last > 0) {
|
||||
const double f_acc = (double)n_accepted / (double)n_draft_last;
|
||||
if (f_acc < 0.5) {
|
||||
n_low++;
|
||||
if (n_low >= 3) {
|
||||
LOG_WRN("%s: low acceptance streak (%d) – resetting ngram_mod\n", __func__, n_low);
|
||||
if (verbose) {
|
||||
LOG_WRN("%s: low acceptance streak (%d) – resetting ngram_mod\n", __func__, n_low);
|
||||
}
|
||||
|
||||
mod.reset();
|
||||
n_low = 0;
|
||||
|
||||
@@ -110,13 +110,21 @@ int main(int argc, char ** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (
|
||||
llama_vocab_get_add_bos(vocab_tgt) != llama_vocab_get_add_bos(vocab_dft) ||
|
||||
llama_vocab_get_add_eos(vocab_tgt) != llama_vocab_get_add_eos(vocab_dft) ||
|
||||
llama_vocab_bos(vocab_tgt) != llama_vocab_bos(vocab_dft) ||
|
||||
llama_vocab_eos(vocab_tgt) != llama_vocab_eos(vocab_dft)
|
||||
) {
|
||||
LOG_ERR("%s: draft model special tokens must match target model to use speculation\n", __func__);
|
||||
if (llama_vocab_get_add_bos(vocab_tgt) != llama_vocab_get_add_bos(vocab_dft) ||
|
||||
(llama_vocab_get_add_bos(vocab_tgt) && llama_vocab_bos(vocab_tgt) != llama_vocab_bos(vocab_dft))) {
|
||||
LOG_ERR("%s: draft model bos tokens must match target model to use speculation. add: %d - %d, id: %d - %d)\n",
|
||||
__func__,
|
||||
llama_vocab_get_add_bos(vocab_tgt), llama_vocab_get_add_bos(vocab_dft),
|
||||
llama_vocab_bos(vocab_tgt), llama_vocab_bos(vocab_dft));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (llama_vocab_get_add_eos(vocab_tgt) != llama_vocab_get_add_eos(vocab_dft) ||
|
||||
(llama_vocab_get_add_eos(vocab_tgt) && llama_vocab_eos(vocab_tgt) != llama_vocab_eos(vocab_dft))) {
|
||||
LOG_ERR("%s: draft model eos tokens must match target model to use speculation. add: %d - %d, id: %d - %d)\n",
|
||||
__func__,
|
||||
llama_vocab_get_add_eos(vocab_tgt), llama_vocab_get_add_eos(vocab_dft),
|
||||
llama_vocab_eos(vocab_tgt), llama_vocab_eos(vocab_dft));
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -137,11 +145,12 @@ int main(int argc, char ** argv) {
|
||||
for (int i = SPEC_VOCAB_CHECK_START_TOKEN_ID; i < std::min(n_vocab_tgt, n_vocab_dft); ++i) {
|
||||
const char * token_text_tgt = llama_vocab_get_text(vocab_tgt, i);
|
||||
const char * token_text_dft = llama_vocab_get_text(vocab_dft, i);
|
||||
|
||||
if (std::strcmp(token_text_tgt, token_text_dft) != 0) {
|
||||
LOG_ERR("%s: draft model vocab must match target model to use speculation but ", __func__);
|
||||
LOG_ERR("token %d content differs - target '%s', draft '%s'\n", i,
|
||||
common_token_to_piece(ctx_tgt, i).c_str(),
|
||||
common_token_to_piece(ctx_dft, i).c_str());
|
||||
common_token_to_piece(vocab_tgt, i).c_str(),
|
||||
common_token_to_piece(vocab_dft, i).c_str());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ project("ggml" C CXX ASM)
|
||||
### GGML Version
|
||||
set(GGML_VERSION_MAJOR 0)
|
||||
set(GGML_VERSION_MINOR 10)
|
||||
set(GGML_VERSION_PATCH 0)
|
||||
set(GGML_VERSION_PATCH 1)
|
||||
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
||||
@@ -1826,7 +1826,24 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend,
|
||||
continue;
|
||||
}
|
||||
|
||||
i = get_i_delayed(i);
|
||||
const int i_delayed = get_i_delayed(i);
|
||||
|
||||
// If we can delay the AllReduce we need to consider the interaction with zero-sized tensor slices.
|
||||
// A backend with such a slice would normally have valid data after participating in the AllReduce with a node that has
|
||||
// its compute flag disabled and thus gets its data zeroed out.
|
||||
// If the AllReduce is delayed then the nodes until that point also need to have their compute flag disabled.
|
||||
if (i_delayed > i) {
|
||||
for (size_t j = 0; j < n_backends; j++) {
|
||||
auto & bcj = backend_ctx->backend_configs[j];
|
||||
if ((bcj.nodes[i]->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
|
||||
for (int ii = i + 1; ii <= i_delayed; ii++) {
|
||||
bcj.nodes[ii]->flags &= ~GGML_TENSOR_FLAG_COMPUTE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i = i_delayed;
|
||||
|
||||
for (size_t j = 0; j < n_backends; j++) {
|
||||
auto & bcj = backend_ctx->backend_configs[j];
|
||||
|
||||
@@ -485,6 +485,13 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
||||
if (GGML_RV_ZIHINTPAUSE)
|
||||
string(APPEND MARCH_STR "_zihintpause")
|
||||
endif()
|
||||
if (GGML_CPU_RISCV64_SPACEMIT)
|
||||
# `xsmtvdotii' is only required for GCC >= 15.
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
|
||||
CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 15)
|
||||
string(APPEND MARCH_STR "_xsmtvdotii")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
|
||||
else()
|
||||
|
||||
@@ -5023,6 +5023,71 @@ void ggml_gemm_q8_0_4x8_q8_0(int n,
|
||||
UNUSED(ncols_interleaved);
|
||||
UNUSED(blocklen);
|
||||
|
||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
if (svcntb() * 8 == 256) {
|
||||
const block_q8_0x4 * b_ptr_base = (const block_q8_0x4 *) vx;
|
||||
|
||||
static const uint32_t idx_arr[8] = {0, 1, 4, 5, 2, 3, 6, 7};
|
||||
svuint32_t idx = svld1(svptrue_b32(), idx_arr);
|
||||
static const uint32_t idx_arr1[8] = {0, 1, 2, 3, 1, 2, 3, 0};
|
||||
svuint32_t idx_sc1 = svld1(svptrue_b32(), idx_arr1);
|
||||
static const uint32_t idx_arr2[8] = {0, 1, 2, 3, 0, 1, 2, 3};
|
||||
svuint32_t idx_sc2 = svld1(svptrue_b32(), idx_arr2);
|
||||
|
||||
for (int y = 0; y < nr; y += 4) {
|
||||
const block_q8_0x4 * a_ptr_base = (const block_q8_0x4 *) vy + (y / 4) * nb;
|
||||
|
||||
for (int x = 0; x < nc; x += ncols_interleaved) {
|
||||
const block_q8_0x4 * b_ptr = b_ptr_base + (x / 4) * nb;
|
||||
const block_q8_0x4 * a_ptr = a_ptr_base;
|
||||
|
||||
svfloat32_t acc_f32_01 = svdup_f32(0);
|
||||
svfloat32_t acc_f32_23 = svdup_f32(0);
|
||||
|
||||
for (int b = 0; b < nb; b++) {
|
||||
|
||||
svint32_t acc_01 = svdup_s32(0);
|
||||
svint32_t acc_23 = svdup_s32(0);
|
||||
|
||||
// Process 4 chunks of 8 positions each
|
||||
for (int chunk = 0; chunk < 4; chunk++) {
|
||||
svint8_t s_a01 = svld1rq_s8(svptrue_b8(), a_ptr->qs + chunk * 32);
|
||||
svint8_t s_a23 = svld1rq_s8(svptrue_b8(), a_ptr->qs + chunk * 32 + 16);
|
||||
svint8_t s_b0123 = svld1_s8(svptrue_b8(), b_ptr->qs + chunk * 32);
|
||||
|
||||
acc_01 = svmmla_s32(acc_01, s_a01, s_b0123);
|
||||
acc_23 = svmmla_s32(acc_23, s_a23, s_b0123);
|
||||
}
|
||||
|
||||
// Reorder outputs from 2×2 tiles to row-major
|
||||
// acc[01] = [r0c0, r0c1, r1c0, r1c1, r0c2, r0c3, r1c2, r1c3]
|
||||
// acc[23] = [r2c0, r2c1, r3c0, r3c1, r2c2, r2c3, r3c2, r3c3]
|
||||
|
||||
svint32_t row01 = svtbl_s32(acc_01, idx);
|
||||
svint32_t row23 = svtbl_s32(acc_23, idx);
|
||||
|
||||
svfloat16_t temp1 = svld1_f16(svptrue_pat_b16(SV_VL4), (const __fp16 *) a_ptr->d);
|
||||
svfloat16_t temp2 = svld1_f16(svptrue_pat_b16(SV_VL4), (const __fp16 *) b_ptr->d);
|
||||
svfloat32_t sv_a_d = svtbl_f32(svcvt_f32_f16_x(svptrue_b32(), svzip1_f16(temp1, temp1)), idx_sc1);
|
||||
svfloat32_t sv_b_d = svtbl_f32(svcvt_f32_f16_x(svptrue_b32(), svzip1_f16(temp2, temp2)), idx_sc2);
|
||||
|
||||
acc_f32_01 = svmla_f32_x(svptrue_b32(), acc_f32_01, svcvt_f32_s32_x(svptrue_b32(), row01), svmul_lane_f32(sv_b_d, sv_a_d, 0));
|
||||
acc_f32_23 = svmla_f32_x(svptrue_b32(), acc_f32_23, svcvt_f32_s32_x(svptrue_b32(), row23), svmul_lane_f32(sv_b_d, sv_a_d, 2));
|
||||
a_ptr++;
|
||||
b_ptr++;
|
||||
}
|
||||
|
||||
svbool_t pg4 = svptrue_pat_b32(SV_VL4);
|
||||
svst1_f32(pg4, s + (y+0) * bs + x, acc_f32_01);
|
||||
svst1_f32(pg4, s + (y+1) * bs + x, svext_f32(acc_f32_01, acc_f32_01, 4));
|
||||
svst1_f32(pg4, s + (y+2) * bs + x, acc_f32_23);
|
||||
svst1_f32(pg4, s + (y+3) * bs + x, svext_f32(acc_f32_23, acc_f32_23, 4));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif // SVE compile-time end
|
||||
|
||||
#if defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
const block_q8_0x4 * b_ptr_base = (const block_q8_0x4 *) vx;
|
||||
|
||||
|
||||
@@ -2321,6 +2321,9 @@ class tinyBLAS_Q0_PPC {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
#if defined(_AIX) || defined(__BIG_ENDIAN__)
|
||||
mnpack(0, m, 0, n);
|
||||
#else
|
||||
const int64_t mc = 64;
|
||||
const int64_t kc = 64;
|
||||
int64_t nc = 64;
|
||||
@@ -2334,7 +2337,6 @@ class tinyBLAS_Q0_PPC {
|
||||
} else {
|
||||
n_aligned = (n / 64) * 64;
|
||||
}
|
||||
|
||||
if (n_aligned > 0) {
|
||||
if (n_aligned % 64 == 0) nc = 64;
|
||||
else if (n_aligned == n) nc = n;
|
||||
@@ -2352,6 +2354,7 @@ class tinyBLAS_Q0_PPC {
|
||||
} else {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -3191,12 +3194,16 @@ class tinyBLAS_PPC {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
#if defined(_AIX) || defined(__BIG_ENDIAN__)
|
||||
mnpack(0, m, 0, n);
|
||||
#else
|
||||
int64_t mc = 256; int64_t nc = 256; int64_t kc = 256;
|
||||
if (m % mc == 0 && n % nc == 0 && k % kc == 0) {
|
||||
matmul_tiled(m, n, mc, nc, kc);
|
||||
} else {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -68,7 +68,7 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2, 64, 64)
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2, 64, 64)
|
||||
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(320, 256, 32, 256, 2, 64, 64)
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(320, 256, 16, 256, 2, 64, 64)
|
||||
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 4, 128, 2, 64, 64)
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 8, 256, 2, 64, 64)
|
||||
@@ -130,7 +130,7 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2, 32, 128)
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2, 32, 64)
|
||||
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(320, 256, 32, 256, 2, 32, 64)
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(320, 256, 16, 256, 2, 32, 64)
|
||||
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 4, 128, 2, 32, 64)
|
||||
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 8, 256, 2, 32, 64)
|
||||
@@ -1124,7 +1124,7 @@ static void launch_fattn_tile_switch_ncols1(ggml_backend_cuda_context & ctx, ggm
|
||||
constexpr size_t nbytes_shared = 0;
|
||||
|
||||
#ifdef GGML_USE_HIP
|
||||
if constexpr (DV <= 128) {
|
||||
if constexpr (DKQ <= 128) {
|
||||
if (Q->ne[1] > 32/ncols2) {
|
||||
constexpr int cols_per_block = 64;
|
||||
const int nwarps = ggml_cuda_fattn_tile_get_nthreads (DKQ, DV, cols_per_block, cc) / warp_size;
|
||||
@@ -1138,7 +1138,7 @@ static void launch_fattn_tile_switch_ncols1(ggml_backend_cuda_context & ctx, ggm
|
||||
#endif // GGML_USE_HIP
|
||||
|
||||
#ifndef GGML_USE_HIP
|
||||
if constexpr (DV <= 256)
|
||||
if constexpr (DKQ <= 256)
|
||||
#endif // GGML_USE_HIP
|
||||
{
|
||||
if (Q->ne[1] > 16/ncols2) {
|
||||
@@ -1220,11 +1220,22 @@ static void launch_fattn_tile_switch_ncols2(ggml_backend_cuda_context & ctx, ggm
|
||||
const int gqa_limit = nvidia && gqa_ratio <= 4 && DV <= 256 ? 16 : INT_MAX;
|
||||
const bool use_gqa_opt = mask && max_bias == 0.0f && Q->ne[1] <= gqa_limit && K->ne[1] % FATTN_KQ_STRIDE == 0;
|
||||
|
||||
if constexpr (DKQ == 320) { // Mistral Small 4
|
||||
if constexpr (DKQ == 320) {
|
||||
// This branch is only used for Mistral Small 4 which has a GQA ratio of 32.
|
||||
// On AMD, simply use that GQA ratio with 32 columns / block since we always have enough SRAM.
|
||||
// On NVIDIA however, the tile kernel is only used for GPUs that can't use the mma kernel (Pascal and older).
|
||||
// Therefore, use a GQA ratio of 16 with 16 columns / block to stay below 48 kiB of SRAM / block.
|
||||
#ifdef GGML_USE_HIP
|
||||
if (use_gqa_opt && gqa_ratio % 32 == 0) {
|
||||
launch_fattn_tile_switch_ncols1<DKQ, DV, 32, use_logit_softcap>(ctx, dst);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
if (use_gqa_opt && gqa_ratio % 16 == 0) {
|
||||
launch_fattn_tile_switch_ncols1<DKQ, DV, 16, use_logit_softcap>(ctx, dst);
|
||||
return;
|
||||
}
|
||||
#endif // GGML_USE_HIP
|
||||
GGML_ABORT("flash-attn tile (320/256): expected GQA ratio multiple of 32");
|
||||
}
|
||||
|
||||
|
||||
@@ -3556,6 +3556,9 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph,
|
||||
&& unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_SILU) {
|
||||
const ggml_tensor * ssm_conv = cgraph->nodes[node_idx];
|
||||
const ggml_tensor * silu = cgraph->nodes[node_idx+1];
|
||||
if (ggml_get_unary_op(silu) != unary_ops.begin()[0]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ssm_conv->type != GGML_TYPE_F32 || silu->type != GGML_TYPE_F32) {
|
||||
return false;
|
||||
@@ -3564,6 +3567,31 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ops.size() == 3 && ops.begin()[0] == GGML_OP_SSM_CONV && ops.begin()[1] == GGML_OP_ADD
|
||||
&& ops.begin()[2] == GGML_OP_UNARY && unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_SILU) {
|
||||
const ggml_tensor * ssm_conv = cgraph->nodes[node_idx];
|
||||
const ggml_tensor * add = cgraph->nodes[node_idx+1];
|
||||
const ggml_tensor * silu = cgraph->nodes[node_idx+2];
|
||||
if (ggml_get_unary_op(silu) != unary_ops.begin()[0]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ssm_conv->type != GGML_TYPE_F32 || add->type != GGML_TYPE_F32 || silu->type != GGML_TYPE_F32) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ADD must consume ssm_conv's output and broadcast a 1-D channel-wise bias.
|
||||
const ggml_tensor * bias = (add->src[0] == ssm_conv) ? add->src[1] : add->src[0];
|
||||
if (bias->type != GGML_TYPE_F32 || !ggml_is_contiguous(bias)) {
|
||||
return false;
|
||||
}
|
||||
if (ggml_nelements(bias) != ssm_conv->ne[0] || bias->ne[0] != ssm_conv->ne[0]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ops.size() == 2 && ops.begin()[0] == GGML_OP_UNARY && ops.begin()[1] == GGML_OP_MUL
|
||||
&& unary_ops.size() == 1 && (unary_ops.begin()[0] == GGML_UNARY_OP_SILU || unary_ops.begin()[0] == GGML_UNARY_OP_SIGMOID || unary_ops.begin()[0] == GGML_UNARY_OP_SOFTPLUS)) {
|
||||
const ggml_tensor * unary = cgraph->nodes[node_idx];
|
||||
@@ -3640,6 +3668,362 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph,
|
||||
return false;
|
||||
}
|
||||
|
||||
// try and fuse nodes and return the number of nodes to skip
|
||||
static int ggml_cuda_try_fuse(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph, int i) {
|
||||
|
||||
static bool disable_fusion = getenv("GGML_CUDA_DISABLE_FUSION") != nullptr && std::atoi(getenv("GGML_CUDA_DISABLE_FUSION"));
|
||||
if (disable_fusion) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ggml_tensor * node = cgraph->nodes[i];
|
||||
|
||||
//topk-moe
|
||||
if (cgraph->nodes[i]->op == GGML_OP_UNARY || cgraph->nodes[i]->op == GGML_OP_SOFT_MAX ||
|
||||
cgraph->nodes[i]->op == GGML_OP_ARGSORT) {
|
||||
ggml_cuda_topk_moe_args args;
|
||||
const bool can_fuse = ggml_cuda_topk_moe_fusion(cgraph, i, args);
|
||||
std::vector<ggml_op> ops;
|
||||
|
||||
if (can_fuse) {
|
||||
const ggml_tensor * logits = node->src[0];
|
||||
ggml_tensor * weights = nullptr;
|
||||
ggml_tensor * ids = nullptr;
|
||||
const ggml_tensor * bias = nullptr;
|
||||
const ggml_tensor * clamp = nullptr;
|
||||
const ggml_tensor * scale = nullptr;
|
||||
|
||||
if (!args.delayed_softmax) {
|
||||
ggml_op gating_op = args.sigmoid ? GGML_OP_UNARY : GGML_OP_SOFT_MAX;
|
||||
int out_nodes[2]; // nodes which can't be elided
|
||||
|
||||
if (args.prob_bias) {
|
||||
bias = cgraph->nodes[i + 2]->src[1];
|
||||
ops.insert(ops.end(), { gating_op, GGML_OP_RESHAPE, GGML_OP_ADD, GGML_OP_ARGSORT, GGML_OP_VIEW,
|
||||
GGML_OP_GET_ROWS });
|
||||
out_nodes[0] = i + 4;
|
||||
ids = cgraph->nodes[i + 4];
|
||||
} else {
|
||||
ops.insert(ops.end(),
|
||||
{ gating_op, GGML_OP_RESHAPE, GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS });
|
||||
out_nodes[0] = i + 3;
|
||||
ids = cgraph->nodes[i + 3];
|
||||
}
|
||||
|
||||
if (args.norm) {
|
||||
ops.insert(ops.end(),
|
||||
{ GGML_OP_RESHAPE, GGML_OP_SUM_ROWS, GGML_OP_CLAMP, GGML_OP_DIV, GGML_OP_RESHAPE });
|
||||
clamp = cgraph->nodes[i + ops.size() - 3];
|
||||
}
|
||||
if (args.scale) {
|
||||
ops.insert(ops.end(), { GGML_OP_SCALE });
|
||||
scale = cgraph->nodes[i + ops.size() - 1];
|
||||
}
|
||||
|
||||
weights = cgraph->nodes[i + ops.size() - 1];
|
||||
out_nodes[1] = i + ops.size() - 1;
|
||||
|
||||
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
|
||||
ggml_cuda_should_use_topk_moe(node, logits, weights, ids) &&
|
||||
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/true)) {
|
||||
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
|
||||
return ops.size() - 1;
|
||||
}
|
||||
} else if (!args.norm && !args.prob_bias) {
|
||||
//special case gpt-oss, no norm, no bias.
|
||||
ops.insert(ops.end(), { GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
|
||||
GGML_OP_SOFT_MAX, GGML_OP_RESHAPE });
|
||||
weights = cgraph->nodes[i + 5];
|
||||
ids = cgraph->nodes[i + 1];
|
||||
const ggml_tensor * softmax = cgraph->nodes[i + 4];
|
||||
|
||||
int out_nodes[2] = { i + 1, i + 5 };
|
||||
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
|
||||
ggml_cuda_should_use_topk_moe(softmax, logits, weights, ids) &&
|
||||
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/true)) {
|
||||
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
|
||||
return ops.size() - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//RoPE + view + set-rows
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_ROPE, GGML_OP_VIEW, GGML_OP_SET_ROWS }, {})) {
|
||||
ggml_tensor * rope = cgraph->nodes[i];
|
||||
ggml_tensor * set_rows = cgraph->nodes[i + 2];
|
||||
|
||||
ggml_cuda_op_rope_fused(*cuda_ctx, rope, set_rows);
|
||||
return 2;
|
||||
}
|
||||
|
||||
// multi-(add or mul)
|
||||
if (node->op == GGML_OP_ADD || node->op == GGML_OP_MUL) {
|
||||
int n_fuse = 0;
|
||||
ggml_op ops[8];
|
||||
std::fill(ops, ops + 8, node->op);
|
||||
|
||||
for (; n_fuse <= 6; ++n_fuse) {
|
||||
if (!ggml_can_fuse(cgraph, i + n_fuse, ops + n_fuse, 2)) {
|
||||
break;
|
||||
}
|
||||
if (cgraph->nodes[i + n_fuse] != cgraph->nodes[i + n_fuse + 1]->src[0]) {
|
||||
break;
|
||||
}
|
||||
if (!ggml_are_same_layout(cgraph->nodes[i + n_fuse]->src[1], cgraph->nodes[i + n_fuse + 1]->src[1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
n_fuse++;
|
||||
|
||||
if (n_fuse > 1) {
|
||||
ggml_tensor fused_node;
|
||||
memcpy(&fused_node, node, sizeof(ggml_tensor));
|
||||
for (int j = 0; j < n_fuse - 1; ++j) {
|
||||
fused_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
|
||||
}
|
||||
fused_node.data = cgraph->nodes[i + n_fuse - 1]->data;
|
||||
if (node->op == GGML_OP_ADD) {
|
||||
ggml_cuda_op_fused_add(*cuda_ctx, &fused_node, n_fuse);
|
||||
} else {
|
||||
ggml_cuda_op_fused_mul(*cuda_ctx, &fused_node, n_fuse);
|
||||
}
|
||||
return n_fuse - 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool fused_mul_mat_vec = false;
|
||||
int fused_node_count = 0;
|
||||
|
||||
// gate + glu + up
|
||||
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
|
||||
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { op, bias_op, op, bias_op, GGML_OP_GLU }, {})) {
|
||||
ggml_tensor * glu = cgraph->nodes[i + 4];
|
||||
ggml_tensor * gate_bias_n = glu->src[0];
|
||||
ggml_tensor * up_bias_n = glu->src[1];
|
||||
|
||||
//we don't assume the order for {gate, up}. Instead infer it from the bias tensor
|
||||
ggml_tensor * gate_n = nullptr;
|
||||
ggml_tensor * up_n = nullptr;
|
||||
|
||||
if (gate_bias_n->src[0] == cgraph->nodes[i] || gate_bias_n->src[1] == cgraph->nodes[i]) {
|
||||
gate_n = cgraph->nodes[i];
|
||||
up_n = cgraph->nodes[i + 2];
|
||||
} else if (gate_bias_n->src[0] == cgraph->nodes[i + 2] || gate_bias_n->src[1] == cgraph->nodes[i + 2]) {
|
||||
gate_n = cgraph->nodes[i + 2];
|
||||
up_n = cgraph->nodes[i];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto get_bias_tensor = [](const ggml_tensor * bias_node, const ggml_tensor * mul_node, ggml_op op_bias) {
|
||||
if (op_bias == GGML_OP_ADD) {
|
||||
if (bias_node->src[0] == mul_node) {
|
||||
return bias_node->src[1];
|
||||
}
|
||||
if (bias_node->src[1] == mul_node) {
|
||||
return bias_node->src[0];
|
||||
}
|
||||
return (ggml_tensor *) nullptr;
|
||||
}
|
||||
GGML_ASSERT(op_bias == GGML_OP_ADD_ID);
|
||||
GGML_ASSERT(bias_node->src[0] == mul_node);
|
||||
return bias_node->src[1];
|
||||
};
|
||||
|
||||
ggml_tensor * up_bias_tensor = get_bias_tensor(up_bias_n, up_n, bias_op);
|
||||
ggml_tensor * gate_bias_tensor = get_bias_tensor(gate_bias_n, gate_n, bias_op);
|
||||
|
||||
if (!up_bias_tensor || !gate_bias_tensor) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// we don't support repeating adds
|
||||
if (bias_op == GGML_OP_ADD && (!ggml_are_same_shape(gate_bias_n->src[0], gate_bias_n->src[1]) ||
|
||||
!ggml_are_same_shape(up_bias_n->src[0], up_bias_n->src[1]))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const ggml_tensor * src0 = up_n->src[0];
|
||||
const ggml_tensor * src1 = up_n->src[1];
|
||||
const ggml_tensor * ids = up_n->src[2];
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_f(up_n)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate_n->src[0];
|
||||
fusion_data.x_bias = up_bias_tensor;
|
||||
fusion_data.gate_bias = gate_bias_tensor;
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 5;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_q(up_n)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate_n->src[0];
|
||||
fusion_data.x_bias = up_bias_tensor;
|
||||
fusion_data.gate_bias = gate_bias_tensor;
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 5;
|
||||
break;
|
||||
}
|
||||
} else if (ggml_cuda_can_fuse(cgraph, i, { op, op, GGML_OP_GLU }, {})) {
|
||||
ggml_tensor * glu = cgraph->nodes[i + 2];
|
||||
ggml_tensor * gate = glu->src[0];
|
||||
ggml_tensor * up = glu->src[1];
|
||||
|
||||
bool ok = (gate == cgraph->nodes[i] && up == cgraph->nodes[i + 1]) ||
|
||||
(gate == cgraph->nodes[i + 1] && up == cgraph->nodes[i]);
|
||||
|
||||
if (!ok) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const ggml_tensor * src0 = up->src[0];
|
||||
const ggml_tensor * src1 = up->src[1];
|
||||
const ggml_tensor * ids = up->src[2];
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_f(up)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate->src[0];
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_q(up)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate->src[0];
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fused_mul_mat_vec) {
|
||||
return fused_node_count - 1;
|
||||
}
|
||||
|
||||
fused_mul_mat_vec = false;
|
||||
fused_node_count = 0;
|
||||
|
||||
// gate + add + glu + up + add
|
||||
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
|
||||
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
|
||||
|
||||
if (!ggml_can_fuse(cgraph, i, { op, bias_op })) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_tensor * mm_node = cgraph->nodes[i];
|
||||
ggml_tensor * bias_node = cgraph->nodes[i + 1];
|
||||
|
||||
ggml_tensor * bias_tensor = nullptr;
|
||||
if (bias_op == GGML_OP_ADD) {
|
||||
if (bias_node->src[0] == mm_node) {
|
||||
bias_tensor = bias_node->src[1];
|
||||
} else if (bias_node->src[1] == mm_node) {
|
||||
bias_tensor = bias_node->src[0];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (bias_node->src[0] != mm_node) {
|
||||
continue;
|
||||
}
|
||||
bias_tensor = bias_node->src[1];
|
||||
}
|
||||
|
||||
const ggml_tensor * src0 = mm_node->src[0];
|
||||
const ggml_tensor * src1 = mm_node->src[1];
|
||||
const ggml_tensor * ids = mm_node->src[2];
|
||||
|
||||
if (bias_op == GGML_OP_ADD_ID && bias_node->src[2] != ids) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bias_op == GGML_OP_ADD && !ggml_are_same_shape(bias_node->src[0], bias_node->src[1])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.x_bias = bias_tensor;
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_f(mm_node)) {
|
||||
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_q(mm_node)) {
|
||||
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fused_mul_mat_vec) {
|
||||
return fused_node_count - 1;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL, GGML_OP_ADD }, {})) {
|
||||
ggml_cuda_op_rms_norm_fused_add(*cuda_ctx, node, cgraph->nodes[i + 1], cgraph->nodes[i + 2]);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL }, {})) {
|
||||
ggml_cuda_op_rms_norm_fused(*cuda_ctx, node, cgraph->nodes[i + 1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SSM_CONV, GGML_OP_ADD, GGML_OP_UNARY }, { GGML_UNARY_OP_SILU })) {
|
||||
ggml_cuda_op_ssm_conv(*cuda_ctx, node, cgraph->nodes[i + 1], cgraph->nodes[i + 2]);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SSM_CONV, GGML_OP_UNARY }, { GGML_UNARY_OP_SILU })) {
|
||||
ggml_cuda_op_ssm_conv(*cuda_ctx, node, /*bias_add_node=*/ nullptr, cgraph->nodes[i + 1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SILU }) ||
|
||||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SIGMOID }) ||
|
||||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SOFTPLUS })) {
|
||||
ggml_cuda_op_unary_mul(*cuda_ctx, node, cgraph->nodes[i + 1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_SQR }, { GGML_UNARY_OP_RELU })) {
|
||||
ggml_cuda_op_relu_sqr(*cuda_ctx, node, cgraph->nodes[i + 1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SCALE, GGML_OP_UNARY, GGML_OP_SCALE }, { GGML_UNARY_OP_TANH })) {
|
||||
ggml_cuda_op_softcap(*cuda_ctx, cgraph->nodes[i + 2], node);
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph, const bool use_cuda_graph, const bool cuda_graph_update_required, const void * graph_key) {
|
||||
bool graph_evaluated_or_captured = false;
|
||||
|
||||
@@ -3786,355 +4170,11 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
|
||||
continue;
|
||||
}
|
||||
|
||||
// start of fusion operations
|
||||
static bool disable_fusion = (getenv("GGML_CUDA_DISABLE_FUSION") != nullptr);
|
||||
if (!disable_fusion) {
|
||||
ggml_cuda_topk_moe_args args;
|
||||
int nodes_to_skip = ggml_cuda_try_fuse(cuda_ctx, cgraph, i);
|
||||
|
||||
if (cgraph->nodes[i]->op == GGML_OP_UNARY || cgraph->nodes[i]->op == GGML_OP_SOFT_MAX ||
|
||||
cgraph->nodes[i]->op == GGML_OP_ARGSORT) {
|
||||
const bool can_fuse = ggml_cuda_topk_moe_fusion(cgraph, i, args);
|
||||
|
||||
std::vector<ggml_op> ops;
|
||||
|
||||
if (can_fuse) {
|
||||
const ggml_tensor * logits = node->src[0];
|
||||
ggml_tensor * weights = nullptr;
|
||||
ggml_tensor * ids = nullptr;
|
||||
const ggml_tensor * bias = nullptr;
|
||||
const ggml_tensor * clamp = nullptr;
|
||||
const ggml_tensor * scale = nullptr;
|
||||
|
||||
if (!args.delayed_softmax) {
|
||||
ggml_op gating_op = args.sigmoid ? GGML_OP_UNARY : GGML_OP_SOFT_MAX;
|
||||
int out_nodes[2]; // nodes which can't be elided
|
||||
|
||||
if (args.prob_bias) {
|
||||
bias = cgraph->nodes[i + 2]->src[1];
|
||||
ops.insert(ops.end(), { gating_op, GGML_OP_RESHAPE, GGML_OP_ADD, GGML_OP_ARGSORT,
|
||||
GGML_OP_VIEW, GGML_OP_GET_ROWS });
|
||||
out_nodes[0] = i + 4;
|
||||
ids = cgraph->nodes[i + 4];
|
||||
} else {
|
||||
ops.insert(ops.end(), { gating_op, GGML_OP_RESHAPE, GGML_OP_ARGSORT, GGML_OP_VIEW,
|
||||
GGML_OP_GET_ROWS });
|
||||
out_nodes[0] = i + 3;
|
||||
ids = cgraph->nodes[i + 3];
|
||||
}
|
||||
|
||||
if (args.norm) {
|
||||
ops.insert(ops.end(), { GGML_OP_RESHAPE, GGML_OP_SUM_ROWS, GGML_OP_CLAMP,
|
||||
GGML_OP_DIV, GGML_OP_RESHAPE });
|
||||
clamp = cgraph->nodes[i + ops.size() - 3];
|
||||
}
|
||||
if (args.scale) {
|
||||
ops.insert(ops.end(), { GGML_OP_SCALE });
|
||||
scale = cgraph->nodes[i + ops.size() - 1];
|
||||
}
|
||||
|
||||
weights = cgraph->nodes[i + ops.size() - 1];
|
||||
out_nodes[1] = i + ops.size() - 1;
|
||||
|
||||
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
|
||||
ggml_cuda_should_use_topk_moe(node, logits, weights, ids) &&
|
||||
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/ true)) {
|
||||
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
|
||||
i += ops.size() - 1;
|
||||
continue;
|
||||
}
|
||||
} else if (!args.norm && !args.prob_bias) {
|
||||
//special case gpt-oss, no norm, no bias.
|
||||
ops.insert(ops.end(), { GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS,
|
||||
GGML_OP_RESHAPE, GGML_OP_SOFT_MAX, GGML_OP_RESHAPE });
|
||||
weights = cgraph->nodes[i + 5];
|
||||
ids = cgraph->nodes[i + 1];
|
||||
const ggml_tensor * softmax = cgraph->nodes[i + 4];
|
||||
|
||||
int out_nodes[2] = { i + 1, i + 5 };
|
||||
if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) &&
|
||||
ggml_cuda_should_use_topk_moe(softmax, logits, weights, ids) &&
|
||||
ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2, /*is_topk_moe=*/ true)) {
|
||||
ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args);
|
||||
i += ops.size() - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_ROPE, GGML_OP_VIEW, GGML_OP_SET_ROWS }, {})) {
|
||||
ggml_tensor * rope = cgraph->nodes[i];
|
||||
ggml_tensor * set_rows = cgraph->nodes[i + 2];
|
||||
|
||||
ggml_cuda_op_rope_fused(*cuda_ctx, rope, set_rows);
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (node->op == GGML_OP_ADD || node->op == GGML_OP_MUL) {
|
||||
int n_fuse = 0;
|
||||
ggml_op ops[8];
|
||||
std::fill(ops, ops + 8, node->op);
|
||||
|
||||
for (; n_fuse <= 6; ++n_fuse){
|
||||
if (!ggml_can_fuse(cgraph, i + n_fuse, ops + n_fuse, 2)) {
|
||||
break;
|
||||
}
|
||||
if (cgraph->nodes[i + n_fuse] != cgraph->nodes[i + n_fuse + 1]->src[0]) {
|
||||
break;
|
||||
}
|
||||
if (!ggml_are_same_layout(cgraph->nodes[i + n_fuse]->src[1], cgraph->nodes[i + n_fuse + 1]->src[1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
n_fuse++;
|
||||
|
||||
if (n_fuse > 1) {
|
||||
ggml_tensor fused_node;
|
||||
memcpy(&fused_node, node, sizeof(ggml_tensor));
|
||||
for (int j = 0; j < n_fuse - 1; ++j) {
|
||||
fused_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
|
||||
}
|
||||
fused_node.data = cgraph->nodes[i + n_fuse - 1]->data;
|
||||
if (node->op == GGML_OP_ADD) {
|
||||
ggml_cuda_op_fused_add(*cuda_ctx, &fused_node, n_fuse);
|
||||
} else {
|
||||
ggml_cuda_op_fused_mul(*cuda_ctx, &fused_node, n_fuse);
|
||||
}
|
||||
i += n_fuse - 1;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
bool fused_mul_mat_vec = false;
|
||||
int fused_node_count = 0;
|
||||
|
||||
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
|
||||
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { op, bias_op, op, bias_op, GGML_OP_GLU }, {})) {
|
||||
ggml_tensor * glu = cgraph->nodes[i + 4];
|
||||
ggml_tensor * gate_bias_n = glu->src[0];
|
||||
ggml_tensor * up_bias_n = glu->src[1];
|
||||
|
||||
//we don't assume the order for {gate, up}. Instead infer it from the bias tensor
|
||||
ggml_tensor * gate_n = nullptr;
|
||||
ggml_tensor * up_n = nullptr;
|
||||
|
||||
if (gate_bias_n->src[0] == cgraph->nodes[i] || gate_bias_n->src[1] == cgraph->nodes[i]) {
|
||||
gate_n = cgraph->nodes[i];
|
||||
up_n = cgraph->nodes[i + 2];
|
||||
} else if (gate_bias_n->src[0] == cgraph->nodes[i + 2] || gate_bias_n->src[1] == cgraph->nodes[i + 2]) {
|
||||
gate_n = cgraph->nodes[i + 2];
|
||||
up_n = cgraph->nodes[i];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto get_bias_tensor = [](const ggml_tensor * bias_node, const ggml_tensor * mul_node, ggml_op op_bias) {
|
||||
if (op_bias == GGML_OP_ADD) {
|
||||
if (bias_node->src[0] == mul_node) {
|
||||
return bias_node->src[1];
|
||||
}
|
||||
if (bias_node->src[1] == mul_node) {
|
||||
return bias_node->src[0];
|
||||
}
|
||||
return (ggml_tensor *) nullptr;
|
||||
}
|
||||
GGML_ASSERT(op_bias == GGML_OP_ADD_ID);
|
||||
GGML_ASSERT(bias_node->src[0] == mul_node);
|
||||
return bias_node->src[1];
|
||||
};
|
||||
|
||||
ggml_tensor * up_bias_tensor = get_bias_tensor(up_bias_n, up_n, bias_op);
|
||||
ggml_tensor * gate_bias_tensor = get_bias_tensor(gate_bias_n, gate_n, bias_op);
|
||||
|
||||
if (!up_bias_tensor || !gate_bias_tensor) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// we don't support repeating adds
|
||||
if (bias_op == GGML_OP_ADD &&
|
||||
(!ggml_are_same_shape(gate_bias_n->src[0], gate_bias_n->src[1]) ||
|
||||
!ggml_are_same_shape(up_bias_n->src[0], up_bias_n->src[1]))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const ggml_tensor * src0 = up_n->src[0];
|
||||
const ggml_tensor * src1 = up_n->src[1];
|
||||
const ggml_tensor * ids = up_n->src[2];
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_f(up_n)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate_n->src[0];
|
||||
fusion_data.x_bias = up_bias_tensor;
|
||||
fusion_data.gate_bias = gate_bias_tensor;
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 5;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_q(up_n)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate_n->src[0];
|
||||
fusion_data.x_bias = up_bias_tensor;
|
||||
fusion_data.gate_bias = gate_bias_tensor;
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 5;
|
||||
break;
|
||||
}
|
||||
} else if (ggml_cuda_can_fuse(cgraph, i, { op, op, GGML_OP_GLU }, {})) {
|
||||
ggml_tensor * glu = cgraph->nodes[i + 2];
|
||||
ggml_tensor * gate = glu->src[0];
|
||||
ggml_tensor * up = glu->src[1];
|
||||
|
||||
bool ok = (gate == cgraph->nodes[i] && up == cgraph->nodes[i + 1])
|
||||
|| (gate == cgraph->nodes[i + 1] && up == cgraph->nodes[i]);
|
||||
|
||||
if (!ok) continue;
|
||||
|
||||
const ggml_tensor * src0 = up->src[0];
|
||||
const ggml_tensor * src1 = up->src[1];
|
||||
const ggml_tensor * ids = up->src[2];
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_f(up)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate->src[0];
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_q(up)) {
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.gate = gate->src[0];
|
||||
fusion_data.glu_op = ggml_get_glu_op(glu);
|
||||
|
||||
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, glu, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fused_mul_mat_vec) {
|
||||
i += fused_node_count - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
fused_mul_mat_vec = false;
|
||||
fused_node_count = 0;
|
||||
|
||||
for (ggml_op op : { GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID }) {
|
||||
const ggml_op bias_op = op == GGML_OP_MUL_MAT ? GGML_OP_ADD : GGML_OP_ADD_ID;
|
||||
|
||||
if (!ggml_can_fuse(cgraph, i, { op, bias_op })) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_tensor * mm_node = cgraph->nodes[i];
|
||||
ggml_tensor * bias_node = cgraph->nodes[i + 1];
|
||||
|
||||
ggml_tensor * bias_tensor = nullptr;
|
||||
if (bias_op == GGML_OP_ADD) {
|
||||
if (bias_node->src[0] == mm_node) {
|
||||
bias_tensor = bias_node->src[1];
|
||||
} else if (bias_node->src[1] == mm_node) {
|
||||
bias_tensor = bias_node->src[0];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (bias_node->src[0] != mm_node) {
|
||||
continue;
|
||||
}
|
||||
bias_tensor = bias_node->src[1];
|
||||
}
|
||||
|
||||
const ggml_tensor * src0 = mm_node->src[0];
|
||||
const ggml_tensor * src1 = mm_node->src[1];
|
||||
const ggml_tensor * ids = mm_node->src[2];
|
||||
|
||||
if (bias_op == GGML_OP_ADD_ID && bias_node->src[2] != ids) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bias_op == GGML_OP_ADD && !ggml_are_same_shape(bias_node->src[0], bias_node->src[1])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_cuda_mm_fusion_args_host fusion_data{};
|
||||
fusion_data.x_bias = bias_tensor;
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_f(mm_node)) {
|
||||
ggml_cuda_mul_mat_vec_f(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ggml_cuda_should_fuse_mul_mat_vec_q(mm_node)) {
|
||||
ggml_cuda_mul_mat_vec_q(*cuda_ctx, src0, src1, ids, bias_node, &fusion_data);
|
||||
fused_mul_mat_vec = true;
|
||||
fused_node_count = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fused_mul_mat_vec) {
|
||||
i += fused_node_count - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL, GGML_OP_ADD}, {})) {
|
||||
ggml_cuda_op_rms_norm_fused_add(*cuda_ctx, node, cgraph->nodes[i+1], cgraph->nodes[i+2]);
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL}, {})) {
|
||||
ggml_cuda_op_rms_norm_fused(*cuda_ctx, node, cgraph->nodes[i+1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SSM_CONV, GGML_OP_UNARY }, { GGML_UNARY_OP_SILU })) {
|
||||
ggml_cuda_op_ssm_conv(*cuda_ctx, node, cgraph->nodes[i+1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SILU }) ||
|
||||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SIGMOID }) ||
|
||||
ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SOFTPLUS })) {
|
||||
ggml_cuda_op_unary_mul(*cuda_ctx, node, cgraph->nodes[i+1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_SQR }, { GGML_UNARY_OP_RELU })) {
|
||||
ggml_cuda_op_relu_sqr(*cuda_ctx, node, cgraph->nodes[i+1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SCALE, GGML_OP_UNARY, GGML_OP_SCALE }, { GGML_UNARY_OP_TANH })) {
|
||||
i += 2;
|
||||
ggml_cuda_op_softcap(*cuda_ctx, cgraph->nodes[i], node);
|
||||
continue;
|
||||
}
|
||||
if (nodes_to_skip != 0) {
|
||||
i += nodes_to_skip;
|
||||
continue;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
template <bool apply_silu, size_t split_d_inner, size_t d_conv>
|
||||
static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float * __restrict__ src1,
|
||||
const float * __restrict__ bias,
|
||||
const int src0_nb0, const int src0_nb1, const int src0_nb2, const int src1_nb1,
|
||||
float * __restrict__ dst, const int dst_nb0, const int dst_nb1, const int dst_nb2,
|
||||
const int64_t n_t) {
|
||||
@@ -27,6 +28,8 @@ static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float
|
||||
w[j] = w_block[tid * stride_w + j];
|
||||
}
|
||||
|
||||
float b = bias != nullptr ? bias[bidy * split_d_inner + tid] : 0.0f;
|
||||
|
||||
for (int64_t i = 0; i < n_t; i++) {
|
||||
float sumf = 0.0f;
|
||||
|
||||
@@ -42,12 +45,14 @@ static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float
|
||||
for (size_t j = 0; j < d_conv; j++) {
|
||||
sumf += x[(i + j) % d_conv] * w[j];
|
||||
}
|
||||
sumf += b;
|
||||
y_block[i * stride_y + tid] = apply_silu ? ggml_cuda_op_silu_single(sumf) : sumf;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool apply_silu, size_t split_d_inner, size_t d_conv, int64_t split_n_t>
|
||||
static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0, const float * __restrict__ src1,
|
||||
const float * __restrict__ bias,
|
||||
const int src0_nb0, const int src0_nb1, const int src0_nb2,
|
||||
const int src1_nb1, float * __restrict__ dst, const int dst_nb0,
|
||||
const int dst_nb1, const int dst_nb2, const int64_t n_t) {
|
||||
@@ -97,6 +102,8 @@ static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0,
|
||||
w[j] = w_block[tid * stride_w + j];
|
||||
}
|
||||
|
||||
float b = bias != nullptr ? bias[bidy * split_d_inner + tid] : 0.0f;
|
||||
|
||||
// Compute from shared memory
|
||||
for (int64_t i = 0; i < local_n_t; i++) {
|
||||
float sumf = 0.0f;
|
||||
@@ -104,12 +111,13 @@ static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0,
|
||||
for (size_t j = 0; j < d_conv; j++) {
|
||||
sumf += smem[tid * n_cols + i + j] * w[j];
|
||||
}
|
||||
sumf += b;
|
||||
y_block[i * stride_y + tid] = apply_silu ? ggml_cuda_op_silu_single(sumf) : sumf;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool apply_silu>
|
||||
static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int src0_nb0, const int src0_nb1,
|
||||
static void ssm_conv_f32_cuda(const float * src0, const float * src1, const float * bias, const int src0_nb0, const int src0_nb1,
|
||||
const int src0_nb2, const int src1_nb1, float * dst, const int dst_nb0, const int dst_nb1,
|
||||
const int dst_nb2, const int64_t nc, const int64_t nr, const int64_t n_t,
|
||||
const int64_t n_s, cudaStream_t stream) {
|
||||
@@ -120,14 +128,14 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
|
||||
constexpr int kNC = decltype(NC)::value;
|
||||
if (n_t <= 32) {
|
||||
const dim3 blocks(n_s, (nr + threads - 1) / threads, 1);
|
||||
ssm_conv_f32<apply_silu, threads, kNC><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
|
||||
ssm_conv_f32<apply_silu, threads, kNC><<<blocks, threads, 0, stream>>>(src0, src1, bias, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
|
||||
dst, dst_nb0, dst_nb1, dst_nb2, n_t);
|
||||
} else {
|
||||
const int64_t split_n_t = 32;
|
||||
dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t);
|
||||
const size_t smem_size = threads * (kNC - 1 + split_n_t) * sizeof(float);
|
||||
ssm_conv_long_token_f32<apply_silu, threads, kNC, split_n_t><<<blocks, threads, smem_size, stream>>>(
|
||||
src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
|
||||
src0, src1, bias, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -140,11 +148,18 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * silu_dst) {
|
||||
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * bias_add_node, ggml_tensor * silu_dst) {
|
||||
const struct ggml_tensor * src0 = dst->src[0]; // conv_x
|
||||
const struct ggml_tensor * src1 = dst->src[1]; // conv1d.weight
|
||||
const bool fuse_bias = bias_add_node != nullptr;
|
||||
const bool fuse_silu = silu_dst != nullptr;
|
||||
|
||||
// bias always comes with silu.
|
||||
GGML_ASSERT(!fuse_bias || fuse_silu);
|
||||
|
||||
// The bias (when fused) is the non-conv operand of the ADD node.
|
||||
const struct ggml_tensor * bias = fuse_bias ? (bias_add_node->src[0] == dst ? bias_add_node->src[1] : bias_add_node->src[0]) : nullptr;
|
||||
|
||||
// When fusing, write to silu_dst (the node downstream references).
|
||||
const struct ggml_tensor * out = fuse_silu ? silu_dst : dst;
|
||||
|
||||
@@ -160,16 +175,23 @@ void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, g
|
||||
|
||||
const float * src0_d = (const float *) src0->data;
|
||||
const float * src1_d = (const float *) src1->data;
|
||||
const float * bias_d = fuse_bias ? (const float *) bias->data : nullptr;
|
||||
float * dst_d = (float *) out->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(out->type == GGML_TYPE_F32);
|
||||
if (fuse_bias) {
|
||||
GGML_ASSERT(bias->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(ggml_is_contiguous(bias));
|
||||
GGML_ASSERT(ggml_nelements(bias) == nr);
|
||||
}
|
||||
|
||||
if (fuse_silu) {
|
||||
ssm_conv_f32_cuda<true>(src0_d, src1_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
|
||||
ssm_conv_f32_cuda<true>(src0_d, src1_d, bias_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
|
||||
out->nb[2], nc, nr, n_t, n_s, stream);
|
||||
} else {
|
||||
ssm_conv_f32_cuda<false>(src0_d, src1_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
|
||||
ssm_conv_f32_cuda<false>(src0_d, src1_d, bias_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1],
|
||||
out->nb[2], nc, nr, n_t, n_s, stream);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * silu_dst = nullptr);
|
||||
void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * bias_add_node = nullptr, ggml_tensor * silu_dst = nullptr);
|
||||
|
||||
@@ -48,14 +48,16 @@ using intvec = std::vector<int>;
|
||||
using uintvec = std::vector<unsigned int>;
|
||||
using u32vec = std::vector<uint32_t>;
|
||||
|
||||
static size_t opt_ndev = 1;
|
||||
static size_t opt_nhvx = 0; // use all
|
||||
static int opt_arch = 0; // autodetect
|
||||
static int opt_etm = 0;
|
||||
static int opt_verbose = 0;
|
||||
static int opt_profile = 0; // profiling mode (0-disabled, 1-basic, 2-pmu)
|
||||
static int opt_hostbuf = 1; // hostbuf ON by default
|
||||
static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only
|
||||
static int opt_arch = 0; // autodetect
|
||||
static size_t opt_ndev = 1;
|
||||
static size_t opt_nhvx = 0; // use all
|
||||
static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only
|
||||
static size_t opt_vmem = HTP_OP_MAX_VMEM_DEFAULT; // max available va space for buffer mappings
|
||||
static size_t opt_mbuf = 1ul * 1024 * 1024 * 1024; // max buffer size
|
||||
static int opt_etm = 0;
|
||||
static int opt_verbose = 0;
|
||||
static int opt_profile = 0; // profiling mode (0-disabled, 1-basic, 2-pmu)
|
||||
static int opt_hostbuf = 1; // hostbuf ON by default
|
||||
|
||||
// Default PMU events, if profiling with PMU (mode=2) is enabled
|
||||
// See https://docs.qualcomm.com/doc/80-N2040-60/topic/pmu-events.html
|
||||
@@ -66,6 +68,7 @@ static u32vec opt_pmu_evt { 0x3, 0x111, 0x100, 0x105, 0x240, 0x256, 0x7D, 0x8C }
|
||||
static int opt_opstage = HTP_OPSTAGE_QUEUE | HTP_OPSTAGE_COMPUTE;
|
||||
static int opt_opbatch = 1024; // max number of ops in a batch
|
||||
static int opt_opqueue = 16; // max number of pending batches
|
||||
|
||||
static std::regex* opt_opfilter = NULL; // regex of ops to not claim
|
||||
|
||||
#define HEX_VERBOSE(...) \
|
||||
@@ -110,7 +113,7 @@ static void ggml_hexagon_dump_op_supp(const std::string &sess_name, const struct
|
||||
if (!opt_verbose) return;
|
||||
|
||||
op_desc desc(op);
|
||||
GGML_LOG_DEBUG("ggml-hex: %s supports-op %s : %s : %s : %s : %s : %s : %s\n", sess_name.c_str(),
|
||||
GGML_LOG_DEBUG("ggml-hex: %s supports-op %s: %s : %s : %s : %s : %s : %s\n", sess_name.c_str(),
|
||||
ggml_op_desc(op), desc.names, desc.dims, desc.types, desc.strides, desc.buffs, supp ? "yes" : "no");
|
||||
}
|
||||
|
||||
@@ -118,8 +121,6 @@ static void ggml_hexagon_dump_op_prof(const std::string &sess_name, const ggml_t
|
||||
uint32_t op_usec, uint32_t op_cycles, const uint32_t pmu[]) {
|
||||
if (!opt_profile) return;
|
||||
|
||||
op_desc desc(op);
|
||||
|
||||
char pmu_str[256] = "";
|
||||
if (opt_profile > 1) {
|
||||
static_assert(HTP_PROF_PMU_NCNT == 8, "current implementation assumes 8 PMU counters");
|
||||
@@ -127,6 +128,7 @@ static void ggml_hexagon_dump_op_prof(const std::string &sess_name, const ggml_t
|
||||
pmu[0], pmu[1], pmu[2], pmu[3], pmu[4], pmu[5], pmu[6], pmu[7]);
|
||||
}
|
||||
|
||||
op_desc desc(op);
|
||||
GGML_LOG_DEBUG("ggml-hex: %s profile-op %s: %s : %s : %s : %s : usec %u cycles %u%s\n", sess_name.c_str(),
|
||||
ggml_op_desc(op), desc.names, desc.dims, desc.types, desc.strides, op_usec, op_cycles, pmu_str);
|
||||
}
|
||||
@@ -191,33 +193,30 @@ struct ggml_hexagon_shared_buffer {
|
||||
bool mapped;
|
||||
bool pinned;
|
||||
|
||||
void mmap(bool pinned = false) {
|
||||
int err = fastrpc_mmap(sess->domain_id, this->fd, (void *) this->base, 0, this->size, FASTRPC_MAP_FD_DELAYED);
|
||||
void mmap() {
|
||||
fastrpc_map_flags flags = this->pinned ? FASTRPC_MAP_FD : FASTRPC_MAP_FD_DELAYED;
|
||||
|
||||
int err = fastrpc_mmap(sess->domain_id, this->fd, (void *) this->base, 0, this->size, flags);
|
||||
if (err != 0) {
|
||||
GGML_LOG_ERROR("ggml-hex: %s buffer mapping failed : domain_id %d size %zu fd %d error 0x%08x\n", sess->c_name(),
|
||||
sess->domain_id, this->size, this->fd, (unsigned) err);
|
||||
throw std::runtime_error("ggml-hex: fastrpc_mmap failed (see log for details)");
|
||||
}
|
||||
|
||||
if (pinned) {
|
||||
err = htp_iface_mmap(sess->handle, this->fd, this->size, pinned);
|
||||
if (err != 0) {
|
||||
GGML_LOG_ERROR("ggml-hex: %s buffer pinning failed : domain_id %d size %zu fd %d error 0x%08x\n", sess->c_name(),
|
||||
sess->domain_id, this->size, this->fd, (unsigned) err);
|
||||
throw std::runtime_error("ggml-hex: htp_iface_mmap failed (see log for details)");
|
||||
}
|
||||
}
|
||||
|
||||
this->mapped = true;
|
||||
this->pinned = pinned;
|
||||
HEX_VERBOSE("ggml-hex: %s mapped buffer: base %p size %zu fd %d pinned %u\n",
|
||||
sess->c_name(), (void *) this->base, this->size, this->fd, pinned);
|
||||
|
||||
this->mapped = true;
|
||||
}
|
||||
|
||||
void unmap() {
|
||||
if (!this->mapped) return;
|
||||
|
||||
htp_iface_munmap(sess->handle, this->fd);
|
||||
if (!this->pinned) {
|
||||
// HTP might still hold a reference, tell it drop it
|
||||
htp_iface_munmap(sess->handle, this->fd);
|
||||
}
|
||||
|
||||
fastrpc_munmap(sess->domain_id, this->fd, (void *) this->base, this->size);
|
||||
|
||||
HEX_VERBOSE("ggml-hex: %s unmapped buffer: base %p size %zu fd %d\n", sess->c_name(),
|
||||
@@ -227,7 +226,7 @@ struct ggml_hexagon_shared_buffer {
|
||||
this->fd = -1;
|
||||
}
|
||||
|
||||
void alloc(size_t size, bool pinned = false) {
|
||||
void alloc(size_t size) {
|
||||
if (this->base) return;
|
||||
|
||||
this->base = (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, size);
|
||||
@@ -245,8 +244,7 @@ struct ggml_hexagon_shared_buffer {
|
||||
|
||||
HEX_VERBOSE("ggml-hex: %s allocated buffer: base %p size %zu fd %d pinned %d\n", sess->c_name(),
|
||||
(void *) this->base, this->size, this->fd, (int) pinned);
|
||||
|
||||
mmap(pinned);
|
||||
mmap();
|
||||
}
|
||||
|
||||
void free() {
|
||||
@@ -262,15 +260,14 @@ struct ggml_hexagon_shared_buffer {
|
||||
}
|
||||
|
||||
ggml_hexagon_shared_buffer(ggml_hexagon_session * sess, size_t size, bool pinned = false) {
|
||||
size += 4 * 1024; // extra page for padding
|
||||
|
||||
this->sess = sess;
|
||||
this->size = 0;
|
||||
this->base = nullptr;
|
||||
this->fd = -1;
|
||||
this->mapped = false;
|
||||
this->pinned = pinned;
|
||||
|
||||
alloc(size, pinned);
|
||||
alloc(size);
|
||||
}
|
||||
|
||||
~ggml_hexagon_shared_buffer() {
|
||||
@@ -1475,6 +1472,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
|
||||
ggml_backend_buffer_type_t buffer_type, size_t size) {
|
||||
auto sess = static_cast<ggml_backend_hexagon_buffer_type_context *>(buffer_type->context)->sess;
|
||||
try {
|
||||
size += 4 * 1024; // guard page
|
||||
ggml_hexagon_shared_buffer * sbuf = new ggml_hexagon_shared_buffer(sess, size);
|
||||
return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, sbuf, size);
|
||||
} catch (const std::exception & exc) {
|
||||
@@ -1487,6 +1485,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_repack_buffer_type_alloc_buffe
|
||||
ggml_backend_buffer_type_t buffer_type, size_t size) {
|
||||
auto sess = static_cast<ggml_backend_hexagon_buffer_type_context *>(buffer_type->context)->sess;
|
||||
try {
|
||||
size += 4 * 1024; // guard page
|
||||
ggml_hexagon_shared_buffer * sbuf = new ggml_hexagon_shared_buffer(sess, size);
|
||||
return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, sbuf, size);
|
||||
} catch (const std::exception & exc) {
|
||||
@@ -1505,7 +1504,7 @@ static size_t ggml_backend_hexagon_buffer_type_get_alloc_size(ggml_backend_buffe
|
||||
}
|
||||
|
||||
static size_t ggml_backend_hexagon_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
|
||||
return 1UL * 1024 * 1024 * 1024; // 1GB per buffer
|
||||
return opt_mbuf; // typically 1GB per buffer
|
||||
GGML_UNUSED(buffer_type);
|
||||
}
|
||||
|
||||
@@ -1573,14 +1572,14 @@ struct ggml_hexagon_opbatch {
|
||||
d_map.clear();
|
||||
}
|
||||
|
||||
ggml_hexagon_opbatch(ggml_hexagon_session *sess, size_t batch_size) {
|
||||
ggml_hexagon_opbatch(ggml_hexagon_session *sess, size_t batch_size, size_t max_vmem) {
|
||||
this->sess = sess;
|
||||
|
||||
n_bufs_max = HTP_OP_MAX_BUFS;
|
||||
n_ops_max = batch_size;
|
||||
n_tens_max = n_ops_max + n_ops_max * HTP_OP_MAX_INPUTS;
|
||||
|
||||
b_vmem_max = HTP_OP_MAX_VMEM;
|
||||
b_vmem_max = max_vmem;
|
||||
|
||||
ops.resize(n_ops_max);
|
||||
|
||||
@@ -1592,6 +1591,9 @@ struct ggml_hexagon_opbatch {
|
||||
t_map.reserve(n_tens_max);
|
||||
d_map.reserve(n_tens_max);
|
||||
|
||||
GGML_LOG_INFO("ggml-hex: %s op batching: n-bufs %u n-tensors %u n-ops %u vmem %zu\n",
|
||||
sess->c_name(), n_bufs_max, n_tens_max, n_ops_max, b_vmem_max);
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
@@ -1925,6 +1927,8 @@ void ggml_hexagon_session::flush_batch() {
|
||||
// Bump pending flag (cleared in the session::flush once we get the response)
|
||||
this->op_pending++; // atomic inc
|
||||
|
||||
HEX_VERBOSE("ggml-hex: %s queue-opbatch: %p size %u\n", this->c_name(), dbuf.ptr, dbuf.size);
|
||||
|
||||
int err = dspqueue_write(this->queue, 0, 1, &dbuf, sizeof(req), (const uint8_t*) &req, DSPQUEUE_TIMEOUT);
|
||||
if (err != 0) {
|
||||
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", this->c_name(), (unsigned) err);
|
||||
@@ -1944,6 +1948,35 @@ void ggml_hexagon_session::flush(bool all) {
|
||||
flush_pending(all);
|
||||
}
|
||||
|
||||
static size_t ggml_hexagon_measure_max_vmem(ggml_hexagon_session *sess) {
|
||||
// Allocate a bunch pinned buffers till failure.
|
||||
// This is kind of expensive but handy for figuring out exactly how much we can mmap on a specific device.
|
||||
// Typically we're going to allocate all/most of these buffers anyway for the model weights.
|
||||
|
||||
std::vector<ggml_hexagon_shared_buffer *> sbufs;
|
||||
|
||||
const size_t MiB = 1024 * 1024;
|
||||
const size_t GiB = MiB * 1024;
|
||||
|
||||
size_t vmem = 0;
|
||||
size_t step = 256u * MiB;
|
||||
|
||||
try {
|
||||
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB;
|
||||
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB;
|
||||
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB;
|
||||
|
||||
while (1) {
|
||||
sbufs.push_back(new ggml_hexagon_shared_buffer(sess, step, true));
|
||||
vmem += step;
|
||||
}
|
||||
} catch (...) { }
|
||||
|
||||
for (auto b : sbufs) { delete b; }
|
||||
|
||||
return vmem - step; // backoff to account for overhead from internal mappings
|
||||
}
|
||||
|
||||
void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
|
||||
this->valid_session = false;
|
||||
this->valid_handle = false;
|
||||
@@ -1957,7 +1990,7 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
|
||||
|
||||
this->op_pending = 0;
|
||||
|
||||
GGML_LOG_INFO("ggml-hex: allocating new session: %s\n", this->name.c_str());
|
||||
GGML_LOG_DEBUG("ggml-hex: %s allocating new session\n", this->name.c_str());
|
||||
|
||||
domain * my_domain = get_domain(this->domain_id);
|
||||
if (my_domain == NULL) {
|
||||
@@ -2033,9 +2066,6 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
|
||||
|
||||
this->valid_handle = true;
|
||||
|
||||
GGML_LOG_INFO("ggml-hex: new session: %s : session-id %d domain-id %d uri %s handle 0x%lx\n", this->name.c_str(),
|
||||
this->session_id, this->domain_id, session_uri, (unsigned long) this->handle);
|
||||
|
||||
// Enable FastRPC QoS mode
|
||||
{
|
||||
struct remote_rpc_control_latency l;
|
||||
@@ -2047,6 +2077,9 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
|
||||
}
|
||||
}
|
||||
|
||||
GGML_LOG_INFO("ggml-hex: %s new session : session-id %d domain-id %d uri %s handle 0x%lx\n", this->c_name(),
|
||||
this->session_id, this->domain_id, session_uri, (unsigned long) this->handle);
|
||||
|
||||
const size_t req_q_size = (sizeof(htp_opbatch_req) * opt_opqueue * 2) + 1024;
|
||||
const size_t rsp_q_size = (sizeof(htp_opbatch_rsp) * opt_opqueue * 2) + 1024;
|
||||
|
||||
@@ -2091,13 +2124,19 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
|
||||
}
|
||||
|
||||
// Allocate buffers and state for op batching
|
||||
this->op_batch = new ggml_hexagon_opbatch(this, opt_opbatch);
|
||||
this->op_queue = new ggml_hexagon_opqueue(this, opt_opbatch, opt_opqueue);
|
||||
|
||||
// Start processing op batch requests
|
||||
err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx);
|
||||
if (!opt_vmem) {
|
||||
opt_vmem = ggml_hexagon_measure_max_vmem(this);
|
||||
GGML_LOG_INFO("ggml-hex: %s measured max vmem %zu\n", this->c_name(), opt_vmem);
|
||||
}
|
||||
|
||||
this->op_batch = new ggml_hexagon_opbatch(this, opt_opbatch, opt_vmem);
|
||||
|
||||
// Start dspqueue/opbatch processing
|
||||
err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx, opt_vmem);
|
||||
if (err != 0) {
|
||||
GGML_LOG_ERROR("ggml-hex: failed to start session: 0x%08x\n", (unsigned) err);
|
||||
GGML_LOG_ERROR("ggml-hex: %s failed to start session: 0x%08x\n", this->c_name(), (unsigned) err);
|
||||
throw std::runtime_error("ggml-hex: iface start failed (see log for details)");
|
||||
}
|
||||
this->valid_iface = true;
|
||||
@@ -2108,17 +2147,17 @@ void ggml_hexagon_session::release() noexcept(true) {
|
||||
|
||||
int err;
|
||||
|
||||
delete this->op_batch;
|
||||
delete this->op_queue;
|
||||
|
||||
// Stop the DSP-side service and close the queue
|
||||
if (this->valid_iface) {
|
||||
// Stop dspqueue/opbatch processing
|
||||
err = htp_iface_stop(this->handle);
|
||||
if (err != 0) {
|
||||
GGML_ABORT("ggml-hex: htp_iface_stop failed: 0x%08x\n", (unsigned) err);
|
||||
}
|
||||
}
|
||||
|
||||
delete this->op_batch;
|
||||
delete this->op_queue;
|
||||
|
||||
if (opt_etm) {
|
||||
err = htp_iface_etm(this->handle, 0);
|
||||
if (err != 0) {
|
||||
@@ -3380,21 +3419,6 @@ struct ggml_hexagon_registry {
|
||||
ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
||||
GGML_LOG_INFO("ggml-hex: Hexagon backend (experimental) : allocating new registry : ndev %zu\n", opt_ndev);
|
||||
|
||||
if (!opt_arch) {
|
||||
int err = get_hex_arch_ver(CDSP_DOMAIN_ID, &opt_arch);
|
||||
if (err != 0) {
|
||||
GGML_LOG_ERROR("ggml-hex: failed to query HTP version (err %d) defaulting to v73\n", err);
|
||||
opt_arch = 73;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
if (opt_arch < 75) {
|
||||
opt_ndev = 1;
|
||||
GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
||||
|
||||
// Create devices / sessions
|
||||
@@ -3480,32 +3504,67 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
|
||||
static_assert((unsigned int) HTP_TYPE_IQ4_NL == (unsigned int) GGML_TYPE_IQ4_NL,
|
||||
"please update hexagon_type to match ggml_type");
|
||||
|
||||
const char * str_verbose = getenv("GGML_HEXAGON_VERBOSE");
|
||||
const char * str_hostbuf = getenv("GGML_HEXAGON_HOSTBUF");
|
||||
const char * str_opstage = getenv("GGML_HEXAGON_OPSTAGE");
|
||||
const char * str_opbatch = getenv("GGML_HEXAGON_OPBATCH");
|
||||
const char * str_opqueue = getenv("GGML_HEXAGON_OPQUEUE");
|
||||
const char * str_opfilter= getenv("GGML_HEXAGON_OPFILTER");
|
||||
const char * str_profile = getenv("GGML_HEXAGON_PROFILE");
|
||||
const char * str_etm = getenv("GGML_HEXAGON_ETM");
|
||||
const char * str_nhvx = getenv("GGML_HEXAGON_NHVX");
|
||||
const char * str_use_hmx = getenv("GGML_HEXAGON_USE_HMX");
|
||||
const char * str_ndev = getenv("GGML_HEXAGON_NDEV");
|
||||
const char * str_arch = getenv("GGML_HEXAGON_ARCH");
|
||||
const char * str_verbose = getenv("GGML_HEXAGON_VERBOSE");
|
||||
const char * str_hostbuf = getenv("GGML_HEXAGON_HOSTBUF");
|
||||
const char * str_opstage = getenv("GGML_HEXAGON_OPSTAGE");
|
||||
const char * str_opbatch = getenv("GGML_HEXAGON_OPBATCH");
|
||||
const char * str_opqueue = getenv("GGML_HEXAGON_OPQUEUE");
|
||||
const char * str_opfilter = getenv("GGML_HEXAGON_OPFILTER");
|
||||
const char * str_profile = getenv("GGML_HEXAGON_PROFILE");
|
||||
const char * str_etm = getenv("GGML_HEXAGON_ETM");
|
||||
const char * str_nhvx = getenv("GGML_HEXAGON_NHVX");
|
||||
const char * str_use_hmx = getenv("GGML_HEXAGON_USE_HMX");
|
||||
const char * str_ndev = getenv("GGML_HEXAGON_NDEV");
|
||||
const char * str_arch = getenv("GGML_HEXAGON_ARCH");
|
||||
const char * str_vmem = getenv("GGML_HEXAGON_VMEM");
|
||||
const char * str_mbuf = getenv("GGML_HEXAGON_MBUF");
|
||||
|
||||
// Init Arch first since it affects other defaults
|
||||
if (!str_arch) {
|
||||
int err = get_hex_arch_ver(CDSP_DOMAIN_ID, &opt_arch);
|
||||
if (err != 0) {
|
||||
GGML_LOG_ERROR("ggml-hex: failed to query HTP version (err %d) defaulting to v73\n", err);
|
||||
opt_arch = 73;
|
||||
}
|
||||
} else {
|
||||
if (str_arch[0] == 'v' || str_arch[0] == 'V') {
|
||||
str_arch++;
|
||||
}
|
||||
opt_arch = strtoul(str_arch, NULL, 0);
|
||||
}
|
||||
|
||||
size_t MiB = 1024 * 1024;
|
||||
|
||||
// Update vmem default
|
||||
opt_vmem = opt_arch >= 75 ? HTP_OP_MAX_VMEM_DEFAULT : 3000 * MiB;
|
||||
|
||||
auto RE_ICASE = std::regex_constants::icase;
|
||||
|
||||
opt_opfilter = str_opfilter ? new std::regex(str_opfilter, RE_ICASE) : NULL;
|
||||
opt_verbose = str_verbose ? atoi(str_verbose) : 0;
|
||||
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
|
||||
opt_opstage = str_opstage ? strtoul(str_opstage, NULL, 0) : opt_opstage;
|
||||
opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch;
|
||||
opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue;
|
||||
opt_etm = str_etm ? atoi(str_etm) : 0;
|
||||
opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx;
|
||||
opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx;
|
||||
opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev;
|
||||
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
|
||||
opt_opfilter = str_opfilter ? new std::regex(str_opfilter, RE_ICASE) : NULL;
|
||||
opt_verbose = str_verbose ? atoi(str_verbose) : 0;
|
||||
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
|
||||
opt_opstage = str_opstage ? strtoul(str_opstage, NULL, 0) : opt_opstage;
|
||||
opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch;
|
||||
opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue;
|
||||
opt_profile = str_profile ? atoi(str_profile) : 0;
|
||||
opt_etm = str_etm ? atoi(str_etm) : 0;
|
||||
opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx;
|
||||
opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx;
|
||||
opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev;
|
||||
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf;
|
||||
opt_mbuf = str_mbuf ? strtoul(str_mbuf, NULL, 0) * MiB : opt_mbuf;
|
||||
opt_vmem = str_vmem ? strtoul(str_vmem, NULL, 0) * MiB : opt_vmem;
|
||||
|
||||
if (opt_ndev > GGML_HEXAGON_MAX_SESSIONS) {
|
||||
opt_ndev = GGML_HEXAGON_MAX_SESSIONS;
|
||||
}
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
if (opt_arch < 75) {
|
||||
opt_ndev = 1;
|
||||
GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (str_profile) {
|
||||
opt_pmu_evt = [&]() -> std::vector<uint32_t> {
|
||||
@@ -3520,17 +3579,6 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
|
||||
vec_to_str<uint32_t, 16>(opt_pmu_evt).c_str());
|
||||
}
|
||||
|
||||
if (opt_ndev > GGML_HEXAGON_MAX_SESSIONS) {
|
||||
opt_ndev = GGML_HEXAGON_MAX_SESSIONS;
|
||||
}
|
||||
|
||||
if (str_arch) {
|
||||
if (str_arch[0] == 'v') {
|
||||
str_arch++;
|
||||
}
|
||||
opt_arch = strtoul(str_arch, NULL, 0);
|
||||
}
|
||||
|
||||
reg->context = new ggml_hexagon_registry(reg);
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ struct htp_mmap {
|
||||
uint64_t size;
|
||||
uint64_t base;
|
||||
uint32_t fd;
|
||||
uint32_t pinned;
|
||||
uint32_t reserved;
|
||||
};
|
||||
|
||||
// Scratchpad state
|
||||
@@ -77,6 +77,8 @@ struct htp_context {
|
||||
atomic_bool vtcm_valid;
|
||||
atomic_bool vtcm_needs_release;
|
||||
|
||||
uint64_t max_vmem;
|
||||
|
||||
struct htp_ops_context octx;
|
||||
|
||||
#ifdef HTP_HAS_HMX
|
||||
|
||||
@@ -90,15 +90,11 @@ enum htp_op_code {
|
||||
#define HTP_OP_MAX_INPUTS 6 // aka GGML_MAX_SRCS
|
||||
#define HTP_OP_MAX_PARAMS 16 // aka GGML_MAX_OP_PARAMS
|
||||
|
||||
#define HTP_OP_MAX_BUFS 8
|
||||
#define HTP_OP_MAX_BUFS 16
|
||||
#define HTP_OP_MAX_REQS 256
|
||||
#define HTP_OP_MAX_TENSORS (HTP_OP_MAX_REQS * HTP_OP_MAX_INPUTS + HTP_OP_MAX_REQS)
|
||||
|
||||
#if __HVX_ARCH__ < 75
|
||||
#define HTP_OP_MAX_VMEM (3167538380u)
|
||||
#else
|
||||
#define HTP_OP_MAX_VMEM (3221225472u)
|
||||
#endif
|
||||
#define HTP_OP_MAX_VMEM_DEFAULT (3355443200u)
|
||||
|
||||
#define HTP_MMAP_MAX_VMEM (2147483648u)
|
||||
|
||||
|
||||
@@ -11,9 +11,9 @@ struct htp_iface_pmu_conf {
|
||||
};
|
||||
|
||||
interface htp_iface : remote_handle64 {
|
||||
AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx);
|
||||
AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx, in uint64 max_vmem);
|
||||
AEEResult stop();
|
||||
AEEResult mmap(in uint32 fd, in uint32 size, in uint32 pinned);
|
||||
AEEResult mmap(in uint32 fd, in uint32 size);
|
||||
AEEResult munmap(in uint32 fd);
|
||||
AEEResult profiler(in uint32 mode, in htp_iface_pmu_conf pmu);
|
||||
AEEResult etm(in uint32 enable);
|
||||
|
||||
@@ -210,7 +210,7 @@ AEEResult htp_iface_close(remote_handle64 handle) {
|
||||
return AEE_SUCCESS;
|
||||
}
|
||||
|
||||
AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32 pinned) {
|
||||
AEEResult htp_iface_mmap(remote_handle64 handle, uint32_t fd, uint32_t size) {
|
||||
struct htp_context * ctx = (struct htp_context *) handle;
|
||||
if (!ctx) {
|
||||
return AEE_EBADPARM;
|
||||
@@ -220,7 +220,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
|
||||
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) {
|
||||
struct htp_mmap *m = &ctx->mmap[i];
|
||||
if (m->fd == fd) {
|
||||
m->pinned = pinned;
|
||||
return AEE_SUCCESS;
|
||||
}
|
||||
}
|
||||
@@ -229,7 +228,7 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
|
||||
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) {
|
||||
struct htp_mmap *m = &ctx->mmap[i];
|
||||
if (!m->size) {
|
||||
FARF(HIGH, "mmap : fd %u size %u pinned %u", fd, size, pinned);
|
||||
FARF(HIGH, "mmap : fd %u size %u", fd, size);
|
||||
#if __HVX_ARCH__ > 73
|
||||
void *va = HAP_mmap2(NULL, size, HAP_PROT_READ | HAP_PROT_WRITE, 0, fd, 0);
|
||||
#else
|
||||
@@ -248,7 +247,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
|
||||
m->base = (uint64_t) va;
|
||||
m->fd = fd;
|
||||
m->size = size;
|
||||
m->pinned = pinned;
|
||||
|
||||
return AEE_SUCCESS;
|
||||
}
|
||||
@@ -275,7 +273,6 @@ AEEResult htp_iface_munmap(remote_handle64 handle, uint32 fd) {
|
||||
m->size = 0;
|
||||
m->base = NULL;
|
||||
m->fd = -1;
|
||||
m->pinned = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,7 +355,7 @@ static void vtcm_free(struct htp_context * ctx) {
|
||||
static void htp_packet_callback(dspqueue_t queue, int error, void * context);
|
||||
static void htp_error_callback(dspqueue_t queue, int error, void * context);
|
||||
|
||||
AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx) {
|
||||
AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx, uint64_t max_vmem) {
|
||||
struct htp_context * ctx = (struct htp_context *) handle;
|
||||
|
||||
if (!ctx) {
|
||||
@@ -376,12 +373,12 @@ AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_que
|
||||
htp_error_callback, // Error callback; no errors expected on the DSP
|
||||
(void *) ctx, // Callback context
|
||||
&ctx->queue);
|
||||
|
||||
if (err) {
|
||||
FARF(ERROR, "Queue import failed with 0x%08x", (unsigned) err);
|
||||
return err;
|
||||
}
|
||||
|
||||
ctx->max_vmem = max_vmem;
|
||||
ctx->thread_id = qurt_thread_get_id();
|
||||
ctx->thread_prio = qurt_thread_get_priority(ctx->thread_id);
|
||||
|
||||
@@ -622,8 +619,8 @@ static inline bool reuse_buf(struct htp_context *ctx, uint32_t *m_reuse, struct
|
||||
}
|
||||
|
||||
static inline void drop_mmap(struct htp_context *ctx, struct htp_mmap *m) {
|
||||
if (m->size && !m->pinned) {
|
||||
FARF(HIGH, "unmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned);
|
||||
if (m->size) {
|
||||
FARF(HIGH, "unmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size);
|
||||
#if __HVX_ARCH__ > 73
|
||||
HAP_munmap2((void *) m->base, m->size);
|
||||
#else
|
||||
@@ -660,9 +657,8 @@ static inline void mmap_buf(struct htp_context *ctx, struct htp_buf_desc *b) {
|
||||
m->base = b->base = (uint64_t) va;
|
||||
m->fd = b->fd;
|
||||
m->size = b->size;
|
||||
m->pinned = 0;
|
||||
|
||||
FARF(HIGH, "mmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned);
|
||||
FARF(HIGH, "mmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -672,8 +668,8 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin
|
||||
uint32_t m_reuse = 0; // mmap reuse mask (index from ctx->mmap array)
|
||||
uint32_t b_reuse = 0; // buf reuse count
|
||||
|
||||
size_t m_vmem = 0; // mapped vmem
|
||||
size_t e_vmem = 0; // extra vmem
|
||||
uint64_t m_vmem = 0; // mapped vmem
|
||||
uint64_t e_vmem = 0; // extra vmem
|
||||
|
||||
// See what we can reuse
|
||||
for (uint32_t i=0; i < n_bufs; i++) {
|
||||
@@ -687,9 +683,10 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin
|
||||
// See how much vmem we have mmaped right now
|
||||
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) { m_vmem += ctx->mmap[i].size; }
|
||||
|
||||
FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu n-bufs %u b-reuse %u", m_vmem, e_vmem, n_bufs, b_reuse);
|
||||
FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu max-vmem %zu : n-bufs %u b-reuse %u",
|
||||
(size_t) m_vmem, (size_t) e_vmem, (size_t) ctx->max_vmem, n_bufs, b_reuse);
|
||||
|
||||
if ((m_vmem + e_vmem) > HTP_OP_MAX_VMEM) {
|
||||
if ((m_vmem + e_vmem) > ctx->max_vmem) {
|
||||
// Drop unused mappings
|
||||
for (uint32_t i=0; i < HTP_MAX_MMAPS; i++) {
|
||||
bool used = m_reuse & (1<<i);
|
||||
|
||||
@@ -494,9 +494,10 @@ struct ggml_webgpu_unary_pipeline_key_hash {
|
||||
/** FlashAttention */
|
||||
|
||||
enum ggml_webgpu_flash_attn_path : uint32_t {
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX = 0u,
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_TILE = 1u,
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_VEC = 2u,
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_NONE = 0u,
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX = 1u,
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_TILE = 2u,
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_VEC = 3u,
|
||||
};
|
||||
|
||||
struct ggml_webgpu_flash_attn_pipeline_key {
|
||||
@@ -534,7 +535,7 @@ struct ggml_webgpu_flash_attn_pipeline_key_hash {
|
||||
};
|
||||
|
||||
struct ggml_webgpu_flash_attn_decisions {
|
||||
uint32_t path = GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX;
|
||||
uint32_t path = GGML_WEBGPU_FLASH_ATTN_PATH_NONE;
|
||||
uint32_t q_tile = 0;
|
||||
uint32_t kv_tile = 0;
|
||||
uint32_t wg_size = 0;
|
||||
@@ -709,19 +710,29 @@ inline ggml_webgpu_flash_attn_decisions ggml_webgpu_flash_attn_get_decisions(
|
||||
(context.src0->ne[0] % GGML_WEBGPU_FLASH_ATTN_TILE_KV_VEC_WIDTH == 0) &&
|
||||
(context.src2->ne[0] % GGML_WEBGPU_FLASH_ATTN_TILE_KV_VEC_WIDTH == 0) && !use_vec;
|
||||
|
||||
decisions.path = use_vec ? GGML_WEBGPU_FLASH_ATTN_PATH_VEC :
|
||||
use_tile ? GGML_WEBGPU_FLASH_ATTN_PATH_TILE :
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX;
|
||||
decisions.path = use_vec ? GGML_WEBGPU_FLASH_ATTN_PATH_VEC :
|
||||
use_tile ? GGML_WEBGPU_FLASH_ATTN_PATH_TILE :
|
||||
context.supports_subgroup_matrix ? GGML_WEBGPU_FLASH_ATTN_PATH_SUBGROUP_MATRIX :
|
||||
GGML_WEBGPU_FLASH_ATTN_PATH_NONE;
|
||||
|
||||
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_NONE) {
|
||||
return decisions;
|
||||
}
|
||||
|
||||
const ggml_webgpu_flash_attn_pipeline_key key = ggml_webgpu_flash_attn_make_pipeline_key(context, decisions.path);
|
||||
decisions.kv_direct = key.kv_direct;
|
||||
const uint32_t max_kv_tile = ggml_webgpu_flash_attn_max_kv_tile(context, key);
|
||||
// invalidate if even the smallest kv_tile doesn't fit in shared memory
|
||||
if (max_kv_tile == 0) {
|
||||
decisions.path = GGML_WEBGPU_FLASH_ATTN_PATH_NONE;
|
||||
return decisions;
|
||||
}
|
||||
|
||||
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_VEC) {
|
||||
const uint32_t min_kv_tile = ggml_webgpu_flash_attn_max_kv_tile(context, key);
|
||||
decisions.q_tile = 1u;
|
||||
decisions.kv_tile = std::max(8u, std::min(32u, min_kv_tile));
|
||||
decisions.kv_tile = (decisions.kv_tile / 8u) * 8u;
|
||||
decisions.wg_size = std::max(1u, std::min<uint32_t>(32u, context.max_subgroup_size));
|
||||
decisions.q_tile = 1u;
|
||||
decisions.kv_tile = std::max(8u, std::min(32u, max_kv_tile));
|
||||
decisions.kv_tile = (decisions.kv_tile / 8u) * 8u;
|
||||
decisions.wg_size = std::max(1u, std::min<uint32_t>(32u, context.max_subgroup_size));
|
||||
if (decisions.kv_direct) {
|
||||
decisions.kv_tile = std::min(decisions.kv_tile, GGML_WEBGPU_KV_SEQ_PAD);
|
||||
while (GGML_WEBGPU_KV_SEQ_PAD % decisions.kv_tile != 0) {
|
||||
@@ -734,9 +745,8 @@ inline ggml_webgpu_flash_attn_decisions ggml_webgpu_flash_attn_get_decisions(
|
||||
decisions.q_tile =
|
||||
decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_TILE ? GGML_WEBGPU_FLASH_ATTN_TILE_Q_TILE : context.sg_mat_m;
|
||||
decisions.kv_tile = decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_TILE ?
|
||||
std::min(64u, ggml_webgpu_flash_attn_max_kv_tile(context, key)) :
|
||||
std::min(ggml_webgpu_flash_attn_max_kv_tile(context, key),
|
||||
context.sg_mat_n * GGML_WEBGPU_FLASH_ATTN_PREFERRED_KV_SG_TILES);
|
||||
std::min(64u, max_kv_tile) :
|
||||
std::min(max_kv_tile, context.sg_mat_n * GGML_WEBGPU_FLASH_ATTN_PREFERRED_KV_SG_TILES);
|
||||
decisions.wg_size = decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_TILE ?
|
||||
GGML_WEBGPU_FLASH_ATTN_PREFERRED_WG_SIZE :
|
||||
std::max(context.max_subgroup_size, GGML_WEBGPU_FLASH_ATTN_PREFERRED_WG_SIZE);
|
||||
@@ -755,7 +765,6 @@ inline ggml_webgpu_flash_attn_decisions ggml_webgpu_flash_attn_get_decisions(
|
||||
context.sg_mat_n;
|
||||
}
|
||||
}
|
||||
|
||||
return decisions;
|
||||
}
|
||||
|
||||
@@ -1364,7 +1373,7 @@ class ggml_webgpu_shader_lib {
|
||||
if (key.src_type == GGML_TYPE_Q1_0) {
|
||||
defines.push_back("BLOCK_SIZE=128u");
|
||||
} else if ((key.src_type >= GGML_TYPE_Q4_0 && key.src_type <= GGML_TYPE_Q8_1) ||
|
||||
key.src_type == GGML_TYPE_IQ4_NL) {
|
||||
key.src_type == GGML_TYPE_IQ4_NL) {
|
||||
defines.push_back("BLOCK_SIZE=32u");
|
||||
} else if (key.src_type >= GGML_TYPE_Q2_K) {
|
||||
defines.push_back("BLOCK_SIZE=256u");
|
||||
@@ -1797,6 +1806,25 @@ class ggml_webgpu_shader_lib {
|
||||
defines.push_back("U32_DEQUANT_HELPERS");
|
||||
defines.push_back("SRC0_INNER_TYPE=u32");
|
||||
|
||||
switch (context.src0->type) {
|
||||
case GGML_TYPE_IQ1_S:
|
||||
case GGML_TYPE_IQ1_M:
|
||||
case GGML_TYPE_IQ4_NL:
|
||||
case GGML_TYPE_IQ4_XS:
|
||||
defines.push_back(type_upper + "_GRID");
|
||||
break;
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ2_S:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
case GGML_TYPE_IQ3_S:
|
||||
defines.push_back(type_upper + "_GRID");
|
||||
defines.push_back(type_upper + "_TABLES");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
variant += std::string("_") + src0_name;
|
||||
break;
|
||||
}
|
||||
@@ -2325,6 +2353,7 @@ class ggml_webgpu_shader_lib {
|
||||
size_t storage_offset_alignment) {
|
||||
const ggml_webgpu_flash_attn_decisions decisions =
|
||||
ggml_webgpu_flash_attn_get_decisions(context, storage_offset_alignment);
|
||||
GGML_ASSERT(decisions.path != GGML_WEBGPU_FLASH_ATTN_PATH_NONE);
|
||||
ggml_webgpu_flash_attn_pipeline_key key = ggml_webgpu_flash_attn_make_pipeline_key(context, decisions.path);
|
||||
auto it = flash_attn_pipelines.find(key);
|
||||
if (it != flash_attn_pipelines.end()) {
|
||||
|
||||
@@ -1422,7 +1422,7 @@ static webgpu_encoded_op ggml_webgpu_mul_mat(webgpu_context & ctx,
|
||||
case GGML_TYPE_IQ3_S:
|
||||
case GGML_TYPE_IQ4_NL:
|
||||
case GGML_TYPE_IQ4_XS:
|
||||
use_fast = is_vec;
|
||||
use_fast = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -3918,6 +3918,10 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const
|
||||
shader_lib_ctx, ctx->webgpu_global_ctx->capabilities.limits.minStorageBufferOffsetAlignment);
|
||||
const size_t limit_bytes = ctx->webgpu_global_ctx->capabilities.limits.maxComputeWorkgroupStorageSize;
|
||||
const bool has_mask = op->src[3] != nullptr;
|
||||
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_NONE) {
|
||||
supports_op = false;
|
||||
break;
|
||||
}
|
||||
if (decisions.path == GGML_WEBGPU_FLASH_ATTN_PATH_VEC) {
|
||||
const size_t min_bytes =
|
||||
ggml_webgpu_flash_attn_wg_mem_bytes(decisions.q_tile, decisions.kv_tile, (uint32_t) src0->ne[0],
|
||||
|
||||
@@ -740,3 +740,426 @@ fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u3
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_Q6_K
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ4_NL
|
||||
const BLOCK_SIZE = 32u;
|
||||
const BLOCK_SIZE_BYTES = 18u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
let d = load_f16_at_src0(block_byte_base);
|
||||
|
||||
let pos = k_in_block % 16u;
|
||||
let nib_shift = (k_in_block / 16u) * 4u;
|
||||
let q_packed = load_u32_at_src0(block_byte_base + 2u + (pos / 4u) * 4u);
|
||||
let nib = (get_byte(q_packed, pos % 4u) >> nib_shift) & 0xFu;
|
||||
|
||||
shmem[elem_idx] = d * f16(kvalues_iq4nl[nib]);
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ4_NL
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ4_XS
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 136u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
|
||||
let d_scales_h = load_u32_at_src0(block_byte_base);
|
||||
let d = bitcast<vec2<f16>>(d_scales_h).x;
|
||||
let scales_h = d_scales_h >> 16u;
|
||||
|
||||
let ib = k_in_block / 32u;
|
||||
let pos = k_in_block % 32u;
|
||||
|
||||
let scales_l_word = load_u32_at_src0(block_byte_base + 4u);
|
||||
let ls_lo = (get_byte(scales_l_word, ib / 2u) >> ((ib & 1u) * 4u)) & 0xFu;
|
||||
let ls_hi = ((scales_h >> (2u * ib)) & 3u) << 4u;
|
||||
let dl = d * f16(i32(ls_lo | ls_hi) - 32);
|
||||
|
||||
let iqs = ib * 16u + (pos % 16u);
|
||||
let nib_shift = (pos / 16u) * 4u;
|
||||
let q_packed = load_u32_at_src0(block_byte_base + 8u + (iqs / 4u) * 4u);
|
||||
let nib = (get_byte(q_packed, iqs % 4u) >> nib_shift) & 0xFu;
|
||||
|
||||
shmem[elem_idx] = dl * f16(kvalues_iq4nl[nib]);
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ4_XS
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ1_S
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 50u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
let d = load_f16_as_f32_at_src0(block_byte_base);
|
||||
|
||||
let ib = k_in_block / 32u;
|
||||
let pos = k_in_block % 32u;
|
||||
let l = pos / 8u;
|
||||
let j = pos % 8u;
|
||||
|
||||
let qh = load_u32_at_src0(block_byte_base + 34u + ib * 2u) & 0xFFFFu;
|
||||
let dl = d * (2.0 * f32((qh >> 12u) & 7u) + 1.0);
|
||||
let delta = select(IQ1_DELTA, -IQ1_DELTA, (qh & 0x8000u) != 0u);
|
||||
|
||||
let qs_w = load_u32_at_src0(block_byte_base + 2u + ib * 4u);
|
||||
let ig = (get_byte(qs_w, l) | (((qh >> (3u * l)) & 7u) << 8u)) * 8u;
|
||||
|
||||
let gw = iq1_grid[(ig + j) / 16u];
|
||||
let g = (gw >> (((ig + j) % 16u) * 2u)) & 3u;
|
||||
let gs = bitcast<i32>(g << 30u) >> 30u;
|
||||
|
||||
shmem[elem_idx] = f16(dl * (f32(gs) + delta));
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ1_S
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ1_M
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 56u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
|
||||
let scales0 = load_u32_at_src0(block_byte_base + 48u);
|
||||
let scales1 = load_u32_at_src0(block_byte_base + 52u);
|
||||
let scale_packed = ((scales0 >> 12u) & 0xFu) |
|
||||
((scales0 >> 24u) & 0x00F0u) |
|
||||
((scales1 >> 4u) & 0x0F00u) |
|
||||
((scales1 >> 16u) & 0xF000u);
|
||||
let d = f32(bitcast<vec2<f16>>(scale_packed).x);
|
||||
|
||||
let ib = k_in_block / 32u;
|
||||
let pos = k_in_block % 32u;
|
||||
let l = pos / 8u;
|
||||
let j = pos % 8u;
|
||||
|
||||
let scales = select(scales0, scales1, ib >= 4u);
|
||||
let sw = (scales >> (16u * ((ib / 2u) % 2u))) & 0xFFFFu;
|
||||
let s_pair = (sw >> (6u * (ib % 2u) + 3u * (l / 2u))) & 0x7u;
|
||||
let dl = d * f32(2u * s_pair + 1u);
|
||||
|
||||
let qh_word = load_u32_at_src0(block_byte_base + 32u + (ib / 2u) * 4u);
|
||||
let qh = qh_word >> (16u * (ib % 2u));
|
||||
let qh_nib = (qh >> (4u * l)) & 0xFu;
|
||||
|
||||
let qs_w = load_u32_at_src0(block_byte_base + ib * 4u);
|
||||
let idx = get_byte(qs_w, l) | ((qh_nib & 7u) << 8u);
|
||||
let delta = select(IQ1_DELTA, -IQ1_DELTA, (qh_nib & 0x8u) != 0u);
|
||||
|
||||
let ig = idx * 8u;
|
||||
let gw = iq1_grid[(ig + j) / 16u];
|
||||
let g = (gw >> (((ig + j) % 16u) * 2u)) & 3u;
|
||||
let gs = bitcast<i32>(g << 30u) >> 30u;
|
||||
|
||||
shmem[elem_idx] = f16(dl * (f32(gs) + delta));
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ1_M
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ2_XXS
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 66u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
let d = load_f16_as_f32_at_src0(block_byte_base);
|
||||
|
||||
let entry_idx = k_in_block / 8u;
|
||||
let j = k_in_block % 8u;
|
||||
|
||||
let ib = entry_idx & ~3u;
|
||||
let l = entry_idx & 3u;
|
||||
|
||||
let aux0 = load_u32_at_src0(block_byte_base + 2u + ib * 2u);
|
||||
let aux1 = load_u32_at_src0(block_byte_base + 2u + (ib + 2u) * 2u);
|
||||
let db = d * (0.5 + f32(aux1 >> 28u)) * 0.25;
|
||||
|
||||
let ig = get_byte(aux0, l) * 8u;
|
||||
let is = (aux1 >> (7u * l)) & 127u;
|
||||
let signs = get_byte(ksigns_iq2xs[is / 4u], is % 4u);
|
||||
|
||||
let g = get_byte(iq2xxs_grid[(ig + j) / 4u], (ig + j) % 4u);
|
||||
let m = select(1.0, -1.0, (get_byte(kmask_iq2xs[j / 4u], j % 4u) & signs) != 0u);
|
||||
|
||||
shmem[elem_idx] = f16(db * f32(g) * m);
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ2_XXS
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ2_XS
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 74u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
let d = load_f16_as_f32_at_src0(block_byte_base);
|
||||
|
||||
let entry_idx = k_in_block / 8u;
|
||||
let j = k_in_block % 8u;
|
||||
|
||||
let ib = entry_idx & ~3u;
|
||||
let l = entry_idx & 3u;
|
||||
|
||||
let scales_word = load_u32_at_src0(block_byte_base + 66u + (ib / 16u) * 4u);
|
||||
let s = get_byte(scales_word, (ib % 16u) / 4u);
|
||||
let s_nib = select(s & 0xFu, (s >> 4u) & 0xFu, (l / 2u) != 0u);
|
||||
let dl = d * (0.5 + f32(s_nib)) * 0.25;
|
||||
|
||||
let qs_word = load_u32_at_src0(block_byte_base + 2u + (ib + l) * 2u);
|
||||
let qs_val = qs_word & 0xFFFFu;
|
||||
let ig = (qs_val & 511u) * 8u;
|
||||
let is = qs_val >> 9u;
|
||||
let signs = get_byte(ksigns_iq2xs[is / 4u], is % 4u);
|
||||
|
||||
let g = get_byte(iq2xs_grid[(ig + j) / 4u], (ig + j) % 4u);
|
||||
let m = select(1.0, -1.0, (get_byte(kmask_iq2xs[j / 4u], j % 4u) & signs) != 0u);
|
||||
|
||||
shmem[elem_idx] = f16(dl * f32(g) * m);
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ2_XS
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ2_S
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 82u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
let d = load_f16_as_f32_at_src0(block_byte_base);
|
||||
|
||||
let ib = k_in_block / 32u;
|
||||
let l = (k_in_block % 32u) / 8u;
|
||||
let j = k_in_block % 8u;
|
||||
|
||||
let scales_word = load_u32_at_src0(block_byte_base + 74u + (ib / 4u) * 4u);
|
||||
let s = get_byte(scales_word, ib % 4u);
|
||||
let s_nib = select(s & 0xFu, (s >> 4u) & 0xFu, (l / 2u) != 0u);
|
||||
let dl = d * (0.5 + f32(s_nib)) * 0.25;
|
||||
|
||||
let qs_word = load_u32_at_src0(block_byte_base + 2u + ib * 4u);
|
||||
let qh_word = load_u32_at_src0(block_byte_base + 66u + (ib / 4u) * 4u);
|
||||
let qh_b = (get_byte(qh_word, ib % 4u) << (8u - 2u * l)) & 0x300u;
|
||||
let ig = (get_byte(qs_word, l) | qh_b) * 8u;
|
||||
|
||||
let signs_word = load_u32_at_src0(block_byte_base + 34u + ib * 4u);
|
||||
let signs = get_byte(signs_word, l);
|
||||
|
||||
let g = get_byte(iq2s_grid[(ig + j) / 4u], (ig + j) % 4u);
|
||||
let m = select(1.0, -1.0, (get_byte(kmask_iq2xs[j / 4u], j % 4u) & signs) != 0u);
|
||||
|
||||
shmem[elem_idx] = f16(dl * f32(g) * m);
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ2_S
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ3_XXS
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 98u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
let d = load_f16_as_f32_at_src0(block_byte_base);
|
||||
|
||||
let ib_pair = k_in_block / 32u;
|
||||
let in_pair = k_in_block % 32u;
|
||||
let l = in_pair / 8u;
|
||||
let in_l = in_pair % 8u;
|
||||
let k2 = in_l / 4u;
|
||||
let j = in_l % 4u;
|
||||
|
||||
let ib = ib_pair * 2u;
|
||||
let sc_sign_off = block_byte_base + 2u + (ib + 32u) * 2u;
|
||||
let sc_sign = load_u32_at_src0(sc_sign_off);
|
||||
let db = d * (0.5 + f32(sc_sign >> 28u)) * 0.5;
|
||||
let is = (sc_sign >> (7u * l)) & 127u;
|
||||
let signs = get_byte(ksigns_iq2xs[is / 4u], is % 4u);
|
||||
|
||||
let ig_word = load_u32_at_src0(block_byte_base + 2u + (ib * 2u + l) * 2u) & 0xFFFFu;
|
||||
let ig_byte = get_byte(ig_word, k2);
|
||||
let g = get_byte(iq3xxs_grid[ig_byte], j);
|
||||
let m = select(1.0, -1.0, (get_byte(kmask_iq2xs[k2], j) & signs) != 0u);
|
||||
|
||||
shmem[elem_idx] = f16(db * f32(g) * m);
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ3_XXS
|
||||
|
||||
#ifdef INIT_SRC0_SHMEM_IQ3_S
|
||||
const BLOCK_SIZE = 256u;
|
||||
const BLOCK_SIZE_BYTES = 110u;
|
||||
|
||||
fn init_shmem_src0(thread_id: u32, batch_offset: u32, offset_m: u32, k_outer: u32) {
|
||||
for (var elem_idx = thread_id; elem_idx < TILE_SRC0_SHMEM; elem_idx += TOTAL_WORKGROUP_SIZE) {
|
||||
let tile_m = elem_idx / TILE_K;
|
||||
let tile_k = elem_idx % TILE_K;
|
||||
let global_m = offset_m + tile_m;
|
||||
let global_k = k_outer + tile_k;
|
||||
|
||||
if (global_m >= params.m || global_k >= params.k) {
|
||||
shmem[elem_idx] = f16(0.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let block_k = global_k / BLOCK_SIZE;
|
||||
let k_in_block = global_k % BLOCK_SIZE;
|
||||
|
||||
let src0_idx = batch_offset + global_m * params.stride_01 + block_k;
|
||||
let block_byte_base = src0_idx * BLOCK_SIZE_BYTES;
|
||||
let d = load_f16_as_f32_at_src0(block_byte_base);
|
||||
|
||||
let ib = k_in_block / 64u;
|
||||
let rest = k_in_block % 64u;
|
||||
let k = rest / 32u;
|
||||
let in_k = rest % 32u;
|
||||
let l = in_k / 8u;
|
||||
let in_l = in_k % 8u;
|
||||
let k2 = in_l / 4u;
|
||||
let j = in_l % 4u;
|
||||
|
||||
let scales_word = load_u32_at_src0(block_byte_base + 106u);
|
||||
let s = get_byte(scales_word, ib);
|
||||
let s_nib = select(s & 0xFu, (s >> 4u) & 0xFu, k != 0u);
|
||||
let dl = d * (1.0 + 2.0 * f32(s_nib));
|
||||
|
||||
let qh_word = load_u32_at_src0(block_byte_base + 66u + (ib / 2u) * 4u);
|
||||
let qh_byte = get_byte(qh_word, (ib % 2u) * 2u + k);
|
||||
|
||||
let ig_word = load_u32_at_src0(block_byte_base + 2u + (ib * 8u + k * 4u + l) * 2u) & 0xFFFFu;
|
||||
let ig_lo = get_byte(ig_word, 0u) | ((qh_byte << (8u - 2u * l)) & 256u);
|
||||
let ig_hi = get_byte(ig_word, 1u) | ((qh_byte << (7u - 2u * l)) & 256u);
|
||||
let ig = select(ig_lo, ig_hi, k2 != 0u);
|
||||
|
||||
let signs_word = load_u32_at_src0(block_byte_base + 74u + (ib * 2u + k) * 4u);
|
||||
let signs = get_byte(signs_word, l);
|
||||
|
||||
let g = get_byte(iq3s_grid[ig], j);
|
||||
let m = select(1.0, -1.0, (get_byte(kmask_iq2xs[k2], j) & signs) != 0u);
|
||||
|
||||
shmem[elem_idx] = f16(dl * f32(g) * m);
|
||||
}
|
||||
}
|
||||
#endif // INIT_SRC0_SHMEM_IQ3_S
|
||||
|
||||
@@ -54,13 +54,23 @@ opqueue=
|
||||
opflt=
|
||||
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
|
||||
|
||||
vmem=
|
||||
[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM"
|
||||
|
||||
mbuf=
|
||||
[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB"
|
||||
vmem=
|
||||
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
|
||||
|
||||
mbuf=
|
||||
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
|
||||
set -x
|
||||
|
||||
adb $adbserial $adbhost shell " \
|
||||
cd $basedir; ulimit -c unlimited; \
|
||||
LD_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
|
||||
./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
|
||||
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
|
||||
--ctx-size 8192 --ubatch-size 256 -fa on \
|
||||
|
||||
@@ -54,13 +54,19 @@ opqueue=
|
||||
opflt=
|
||||
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
|
||||
|
||||
vmem=
|
||||
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
|
||||
|
||||
mbuf=
|
||||
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
|
||||
|
||||
set -x
|
||||
|
||||
adb $adbserial $adbhost shell " \
|
||||
cd $basedir; ulimit -c unlimited; \
|
||||
LD_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
|
||||
./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
|
||||
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
|
||||
--ctx-size 8192 --ubatch-size 256 -fa on \
|
||||
|
||||
@@ -1 +1 @@
|
||||
1c40d85a4dcfcd62176f649b8682433bb1a6caef
|
||||
387fa29fbbf3149f06a631c7850b6c35c24b0232
|
||||
|
||||
58
scripts/wc2wt.sh
Executable file
58
scripts/wc2wt.sh
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# initialize a new worktree from a branch name:
|
||||
#
|
||||
# - creates a new branch from current HEAD
|
||||
# - creates a new worktree in a parent folder, suffixed with the branch name
|
||||
#
|
||||
# sample usage:
|
||||
# ./scripts/wc2wt.sh gg/new-feature-foo-bar
|
||||
# ./scripts/wc2wt.sh gg/new-feature-foo-bar opencode
|
||||
# ./scripts/wc2wt.sh gg/new-feature-foo-bar "cmake -B build && cmake --build build"
|
||||
# ./scripts/wc2wt.sh gg/new-feature-foo-bar "bash -l"
|
||||
|
||||
function usage() {
|
||||
echo "usage: $0 <branch_name> [cmd]"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# check we are in the right directory
|
||||
if [[ ! -f "scripts/wc2wt.sh" ]]; then
|
||||
echo "error: this script must be run from the root of the repository"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $# -lt 1 || $# -gt 2 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
BRANCH=$1
|
||||
|
||||
if [[ -z "$BRANCH" ]]; then
|
||||
echo "error: branch name must not be empty"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
dir=$(basename $(pwd))
|
||||
# sanitize branch name for directory name (replace / with -)
|
||||
dir_suffix=$(echo "$BRANCH" | tr '/' '-')
|
||||
|
||||
git worktree add -b "$BRANCH" "../$dir-$dir_suffix" HEAD
|
||||
|
||||
og_path=$(pwd)
|
||||
wt_path=$(cd "../$dir-$dir_suffix" && pwd)
|
||||
|
||||
echo "git worktree created in $wt_path"
|
||||
|
||||
cd "$wt_path"
|
||||
|
||||
# pi agent setup in the worktree
|
||||
if [[ -f "$og_path/.pi/SYSTEM.md" && ! -f ".pi/SYSTEM.md" ]]; then
|
||||
mkdir -p .pi
|
||||
ln -sfn "$og_path/.pi/SYSTEM.md" .pi/SYSTEM.md
|
||||
fi
|
||||
|
||||
if [[ $# -eq 2 ]]; then
|
||||
echo "executing: $2"
|
||||
eval "$2"
|
||||
fi
|
||||
@@ -3579,6 +3579,49 @@ struct test_ssm_conv : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SSM_CONV + GGML_OP_ADD (channel-wise bias, optional) + GGML_OP_UNARY(SILU) (fused operation)
|
||||
struct test_ssm_conv_bias_silu : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne_a;
|
||||
const std::array<int64_t, 4> ne_b;
|
||||
const bool fuse_bias;
|
||||
|
||||
std::string op_desc(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return "SSM_CONV_BIAS_SILU";
|
||||
}
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne_a, ne_b, fuse_bias);
|
||||
}
|
||||
|
||||
test_ssm_conv_bias_silu(ggml_type type, std::array<int64_t, 4> ne_a, std::array<int64_t, 4> ne_b,
|
||||
bool fuse_bias)
|
||||
: type(type), ne_a(ne_a), ne_b(ne_b), fuse_bias(fuse_bias) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
||||
ggml_set_name(a, "a");
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
ggml_tensor * out = ggml_ssm_conv(ctx, a, b);
|
||||
|
||||
if (fuse_bias) {
|
||||
ggml_tensor * bias = ggml_new_tensor_1d(ctx, type, out->ne[0]);
|
||||
ggml_set_name(bias, "bias");
|
||||
out = ggml_add(ctx, out, bias);
|
||||
}
|
||||
|
||||
out = ggml_silu(ctx, out);
|
||||
|
||||
ggml_set_name(out, "out");
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SSM_SCAN
|
||||
struct test_ssm_scan : public test_case {
|
||||
const ggml_type type;
|
||||
@@ -7977,6 +8020,27 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
}
|
||||
}
|
||||
|
||||
// fused ssm_conv + (optional) bias_add + silu. The bias-only graph (no silu) is intentionally
|
||||
// not tested since there's no fusion for that pattern in ggml_cuda_can_fuse.
|
||||
for (int64_t d_conv : {3, 4, 9}) {
|
||||
for (int64_t d_inner : {1024, 1536, 2048}) {
|
||||
for (bool fuse_bias : {false, true}) {
|
||||
// short token path (n_t <= 32)
|
||||
test_cases.emplace_back(new test_ssm_conv_bias_silu(
|
||||
GGML_TYPE_F32, {d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
|
||||
test_cases.emplace_back(new test_ssm_conv_bias_silu(
|
||||
GGML_TYPE_F32, {2 * d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
|
||||
test_cases.emplace_back(new test_ssm_conv_bias_silu(
|
||||
GGML_TYPE_F32, {d_conv, d_inner, 4, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
|
||||
// long token path (n_t > 32)
|
||||
test_cases.emplace_back(new test_ssm_conv_bias_silu(
|
||||
GGML_TYPE_F32, {d_conv - 1 + 64, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
|
||||
test_cases.emplace_back(new test_ssm_conv_bias_silu(
|
||||
GGML_TYPE_F32, {d_conv - 1 + 64, d_inner, 4, 1}, {d_conv, d_inner, 1, 1}, fuse_bias));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 16, 1, 1024, 1, 32, 4)); // Mamba-1
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 16, 2, 32, 4)); // Mamba-2
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 256, 64, 8, 2, 32, 4)); // Falcon-H1
|
||||
@@ -8993,6 +9057,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||
// Examples from granite-4.0-h-1b/ggml-model-Q8_0.gguf
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {515, 3328, 1, 1}, {4, 3328, 1, 1})); // prefill
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 3328, 1, 1}, {4, 3328, 1, 1})); // generate
|
||||
test_cases.emplace_back(new test_ssm_conv_bias_silu(GGML_TYPE_F32, {515, 3328, 1, 1}, {4, 3328, 1, 1}, true)); // prefill
|
||||
test_cases.emplace_back(new test_ssm_conv_bias_silu(GGML_TYPE_F32, {4, 3328, 1, 1}, {4, 3328, 1, 1}, true)); // generate
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1, 1)); // generate
|
||||
|
||||
|
||||
@@ -35,5 +35,9 @@ int main() {
|
||||
threads[i].join();
|
||||
}
|
||||
|
||||
common_log_flush(common_log_main());
|
||||
// We explicitly free the logger singleton to avoid hanging on Windows
|
||||
// related to timing issues of thread startup and DLL teardown
|
||||
common_log_free(common_log_main());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -5151,38 +5151,39 @@ nt",variants:{variant:{default:"bg-card text-card-foreground",destructive:"text-
|
||||
attribute_effect(div,$0=>({"data-slot":"alert",class:$0,...restProps,role:"alert"}),[()=>cn$1(alertVariants({variant:variant()}),$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}var root$1A=from_html("<div><!></div>");function Alert_description($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy",
|
||||
"ref","class","children"]);var div=root$1A();attribute_effect(div,$0=>({"data-slot":"alert-description",class:$0,...restProps}),[()=>cn$1("col-start-2 grid justify-items-start gap-1 text-sm text-muted-foreground [&_p]:leading-relaxed",$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}var root$1z=from_html("<div><!></div>");function Alert_title($$anchor,$$props){push$1($$props,!0);
|
||||
let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","children"]);var div=root$1z();attribute_effect(div,$0=>({"data-slot":"alert-title",class:$0,...restProps}),[()=>cn$1("col-start-2 line-clamp-1 min-h-4 font-medium tracking-tight",$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}class AudioRecorder{mediaRecorder=null;audioChunks=[];stream=null;recordingState=!1;async startRecording(){
|
||||
try{this.stream=await navigator.mediaDevices.getUserMedia({audio:{echoCancellation:!0,noiseSuppression:!0,autoGainControl:!0}}),this.initializeRecorder(this.stream),this.audioChunks=[],this.mediaRecorder.start(100),this.recordingState=!0}catch(error2){throw console.error("Failed to start recording:",error2),new Error("Failed to access microphone. Please check permissions.")}}async stopRecording(){return new Promise((resolve2,reject)=>{if(!this.mediaRecorder||this.mediaRecorder.state==="inactive"){
|
||||
reject(new Error("No active recording to stop"));return}this.mediaRecorder.onstop=()=>{const mimeType=this.mediaRecorder?.mimeType||MimeTypeAudio.WAV,audioBlob=new Blob(this.audioChunks,{type:mimeType});this.cleanup(),resolve2(audioBlob)},this.mediaRecorder.onerror=event2=>{console.error("Recording error:",event2),this.cleanup(),reject(new Error("Recording failed"))},this.mediaRecorder.stop()})}isRecording(){return this.recordingState}cancelRecording(){this.mediaRecorder&&this.mediaRecorder.state!==
|
||||
"inactive"&&this.mediaRecorder.stop(),this.cleanup()}initializeRecorder(stream){const options={};MediaRecorder.isTypeSupported(MimeTypeAudio.WAV)?options.mimeType=MimeTypeAudio.WAV:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM_OPUS)?options.mimeType=MimeTypeAudio.WEBM_OPUS:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM)?options.mimeType=MimeTypeAudio.WEBM:MediaRecorder.isTypeSupported(MimeTypeAudio.MP4)?options.mimeType=MimeTypeAudio.MP4:console.warn("No preferred audio format supported, us\
|
||||
ing default"),this.mediaRecorder=new MediaRecorder(stream,options),this.mediaRecorder.ondataavailable=event2=>{event2.data.size>0&&this.audioChunks.push(event2.data)},this.mediaRecorder.onstop=()=>{this.recordingState=!1},this.mediaRecorder.onerror=event2=>{console.error("MediaRecorder error:",event2),this.recordingState=!1}}cleanup(){if(this.stream){for(const track2 of this.stream.getTracks())track2.stop();this.stream=null}this.mediaRecorder=null,this.audioChunks=[],this.recordingState=!1}}async function convertToWav(audioBlob){
|
||||
try{if(audioBlob.type.includes("wav"))return audioBlob;const arrayBuffer=await audioBlob.arrayBuffer(),audioContext=new(window.AudioContext||window.webkitAudioContext),audioBuffer=await audioContext.decodeAudioData(arrayBuffer),wavBlob=audioBufferToWav(audioBuffer);return audioContext.close(),wavBlob}catch(error2){return console.error("Failed to convert audio to WAV:",error2),audioBlob}}function audioBufferToWav(buffer){const length=buffer.length,numberOfChannels=buffer.numberOfChannels,sampleRate=buffer.
|
||||
sampleRate,blockAlign=numberOfChannels*2,byteRate=sampleRate*blockAlign,dataSize=length*blockAlign,bufferSize=44+dataSize,arrayBuffer=new ArrayBuffer(bufferSize),view=new DataView(arrayBuffer),writeString=(offset22,string2)=>{for(let i=0;i<string2.length;i++)view.setUint8(offset22+i,string2.charCodeAt(i))};writeString(0,"RIFF"),view.setUint32(4,bufferSize-8,!0),writeString(8,"WAVE"),writeString(12,"fmt "),view.setUint32(16,16,!0),view.setUint16(20,1,!0),view.setUint16(22,numberOfChannels,!0),view.
|
||||
setUint32(24,sampleRate,!0),view.setUint32(28,byteRate,!0),view.setUint16(32,blockAlign,!0),view.setUint16(34,16,!0),writeString(36,"data"),view.setUint32(40,dataSize,!0);let offset2=44;for(let i=0;i<length;i++)for(let channel=0;channel<numberOfChannels;channel++){const sample=Math.max(-1,Math.min(1,buffer.getChannelData(channel)[i]));view.setInt16(offset2,sample*32767,!0),offset2+=2}return new Blob([arrayBuffer],{type:MimeTypeAudio.WAV})}function createAudioFile(audioBlob,filename){const timestamp=new Date().
|
||||
toISOString().replace(/[:.]/g,"-"),extension2=audioBlob.type.includes("wav")?"wav":"mp3",defaultFilename=`recording-${timestamp}.${extension2}`;return new File([audioBlob],defaultFilename,{type:audioBlob.type,lastModified:Date.now()})}function isAudioRecordingSupported(){return!!(typeof navigator<"u"&&navigator.mediaDevices&&typeof navigator.mediaDevices.getUserMedia=="function"&&typeof window<"u"&&window.MediaRecorder)}const isNodeJS=typeof process=="object"&&process+""=="[object process]"&&!process.
|
||||
versions.nw&&!(process.versions.electron&&process.type&&process.type!=="browser"),FONT_IDENTITY_MATRIX=[.001,0,0,.001,0,0],LINE_FACTOR=1.35,RenderingIntentFlag={ANY:1,DISPLAY:2,PRINT:4,ANNOTATIONS_FORMS:16,ANNOTATIONS_STORAGE:32,ANNOTATIONS_DISABLE:64,IS_EDITING:128,OPLIST:256},AnnotationMode={DISABLE:0,ENABLE:1,ENABLE_FORMS:2,ENABLE_STORAGE:3},AnnotationEditorPrefix="pdfjs_internal_editor_",AnnotationEditorType={DISABLE:-1,NONE:0,FREETEXT:3,HIGHLIGHT:9,STAMP:13,INK:15,SIGNATURE:101,COMMENT:102},
|
||||
AnnotationEditorParamsType={RESIZE:1,CREATE:2,FREETEXT_SIZE:11,FREETEXT_COLOR:12,FREETEXT_OPACITY:13,INK_COLOR:21,INK_THICKNESS:22,INK_OPACITY:23,HIGHLIGHT_COLOR:31,HIGHLIGHT_THICKNESS:32,HIGHLIGHT_FREE:33,HIGHLIGHT_SHOW_ALL:34,DRAW_STEP:41},PermissionFlag={PRINT:4,MODIFY_CONTENTS:8,COPY:16,MODIFY_ANNOTATIONS:32,FILL_INTERACTIVE_FORMS:256,COPY_FOR_ACCESSIBILITY:512,ASSEMBLE:1024,PRINT_HIGH_QUALITY:2048},TextRenderingMode={FILL:0,STROKE:1,FILL_STROKE:2,INVISIBLE:3,FILL_STROKE_MASK:3,ADD_TO_PATH_FLAG:4},
|
||||
util_ImageKind={GRAYSCALE_1BPP:1,RGB_24BPP:2,RGBA_32BPP:3},AnnotationType={TEXT:1,LINK:2,FREETEXT:3,LINE:4,SQUARE:5,CIRCLE:6,POLYGON:7,POLYLINE:8,HIGHLIGHT:9,UNDERLINE:10,SQUIGGLY:11,STRIKEOUT:12,STAMP:13,CARET:14,INK:15,POPUP:16,FILEATTACHMENT:17,SOUND:18,MOVIE:19,WIDGET:20,SCREEN:21,PRINTERMARK:22,TRAPNET:23,WATERMARK:24,THREED:25,REDACT:26},AnnotationBorderStyleType={SOLID:1,DASHED:2,BEVELED:3,INSET:4,UNDERLINE:5},VerbosityLevel={ERRORS:0,WARNINGS:1,INFOS:5},OPS={dependency:1,setLineWidth:2,setLineCap:3,
|
||||
setLineJoin:4,setMiterLimit:5,setDash:6,setRenderingIntent:7,setFlatness:8,setGState:9,save:10,restore:11,transform:12,moveTo:13,lineTo:14,curveTo:15,curveTo2:16,curveTo3:17,closePath:18,rectangle:19,stroke:20,closeStroke:21,fill:22,eoFill:23,fillStroke:24,eoFillStroke:25,closeFillStroke:26,closeEOFillStroke:27,endPath:28,clip:29,eoClip:30,beginText:31,endText:32,setCharSpacing:33,setWordSpacing:34,setHScale:35,setLeading:36,setFont:37,setTextRenderingMode:38,setTextRise:39,moveText:40,setLeadingMoveText:41,
|
||||
setTextMatrix:42,nextLine:43,showText:44,showSpacedText:45,nextLineShowText:46,nextLineSetSpacingShowText:47,setCharWidth:48,setCharWidthAndBounds:49,setStrokeColorSpace:50,setFillColorSpace:51,setStrokeColor:52,setStrokeColorN:53,setFillColor:54,setFillColorN:55,setStrokeGray:56,setFillGray:57,setStrokeRGBColor:58,setFillRGBColor:59,setStrokeCMYKColor:60,setFillCMYKColor:61,shadingFill:62,beginInlineImage:63,beginImageData:64,endInlineImage:65,paintXObject:66,markPoint:67,markPointProps:68,beginMarkedContent:69,
|
||||
beginMarkedContentProps:70,endMarkedContent:71,beginCompat:72,endCompat:73,paintFormXObjectBegin:74,paintFormXObjectEnd:75,beginGroup:76,endGroup:77,beginAnnotation:80,endAnnotation:81,paintImageMaskXObject:83,paintImageMaskXObjectGroup:84,paintImageXObject:85,paintInlineImageXObject:86,paintInlineImageXObjectGroup:87,paintImageXObjectRepeat:88,paintImageMaskXObjectRepeat:89,paintSolidColorImageMask:90,constructPath:91,setStrokeTransparent:92,setFillTransparent:93,rawFillPath:94},DrawOPS={moveTo:0,
|
||||
lineTo:1,curveTo:2,closePath:3},PasswordResponses={NEED_PASSWORD:1,INCORRECT_PASSWORD:2};let verbosity=VerbosityLevel.WARNINGS;function setVerbosityLevel(level){Number.isInteger(level)&&(verbosity=level)}function getVerbosityLevel(){return verbosity}function info(msg){verbosity>=VerbosityLevel.INFOS&&console.log(`Info: ${msg}`)}function warn(msg){verbosity>=VerbosityLevel.WARNINGS&&console.log(`Warning: ${msg}`)}function unreachable(msg){throw new Error(msg)}function assert$1(cond,msg){cond||unreachable(
|
||||
msg)}function _isValidProtocol(url2){switch(url2?.protocol){case"http:":case"https:":case"ftp:":case"mailto:":case"tel:":return!0;default:return!1}}function createValidAbsoluteUrl(url2,baseUrl=null,options=null){if(!url2)return null;if(options&&typeof url2=="string"&&(options.addDefaultProtocol&&url2.startsWith("www.")&&url2.match(/\./g)?.length>=2&&(url2=`http://${url2}`),options.tryConvertEncoding))try{url2=stringToUTF8String(url2)}catch{}const absoluteUrl=baseUrl?URL.parse(url2,baseUrl):URL.parse(
|
||||
url2);return _isValidProtocol(absoluteUrl)?absoluteUrl:null}function updateUrlHash(url2,hash2,allowRel=!1){const res=URL.parse(url2);return res?(res.hash=hash2,res.href):allowRel&&createValidAbsoluteUrl(url2,"http://example.com")?url2.split("#",1)[0]+`${hash2?`#${hash2}`:""}`:""}function shadow(obj,prop2,value,nonSerializable=!1){return Object.defineProperty(obj,prop2,{value,enumerable:!nonSerializable,configurable:!0,writable:!1}),value}const BaseException=(function(){function BaseException2(message,name){
|
||||
this.message=message,this.name=name}return BaseException2.prototype=new Error,BaseException2.constructor=BaseException2,BaseException2})();class PasswordException extends BaseException{constructor(msg,code2){super(msg,"PasswordException"),this.code=code2}}class UnknownErrorException extends BaseException{constructor(msg,details){super(msg,"UnknownErrorException"),this.details=details}}class InvalidPDFException extends BaseException{constructor(msg){super(msg,"InvalidPDFException")}}class ResponseException extends BaseException{constructor(msg,status,missing){
|
||||
super(msg,"ResponseException"),this.status=status,this.missing=missing}}class FormatError extends BaseException{constructor(msg){super(msg,"FormatError")}}class AbortException extends BaseException{constructor(msg){super(msg,"AbortException")}}function bytesToString(bytes){(typeof bytes!="object"||bytes?.length===void 0)&&unreachable("Invalid argument for bytesToString");const length=bytes.length,MAX_ARGUMENT_COUNT=8192;if(length<MAX_ARGUMENT_COUNT)return String.fromCharCode.apply(null,bytes);const strBuf=[];
|
||||
for(let i=0;i<length;i+=MAX_ARGUMENT_COUNT){const chunkEnd=Math.min(i+MAX_ARGUMENT_COUNT,length),chunk=bytes.subarray(i,chunkEnd);strBuf.push(String.fromCharCode.apply(null,chunk))}return strBuf.join("")}function stringToBytes(str){typeof str!="string"&&unreachable("Invalid argument for stringToBytes");const length=str.length,bytes=new Uint8Array(length);for(let i=0;i<length;++i)bytes[i]=str.charCodeAt(i)&255;return bytes}function string32(value){return String.fromCharCode(value>>24&255,value>>16&
|
||||
255,value>>8&255,value&255)}function isLittleEndian(){const buffer8=new Uint8Array(4);return buffer8[0]=1,new Uint32Array(buffer8.buffer,0,1)[0]===1}function isEvalSupported(){try{return new Function(""),!0}catch{return!1}}class util_FeatureTest{static get isLittleEndian(){return shadow(this,"isLittleEndian",isLittleEndian())}static get isEvalSupported(){return shadow(this,"isEvalSupported",isEvalSupported())}static get isOffscreenCanvasSupported(){return shadow(this,"isOffscreenCanvasSupported",
|
||||
typeof OffscreenCanvas<"u")}static get isImageDecoderSupported(){return shadow(this,"isImageDecoderSupported",typeof ImageDecoder<"u")}static get platform(){const{platform:platform2,userAgent}=navigator;return shadow(this,"platform",{isAndroid:userAgent.includes("Android"),isLinux:platform2.includes("Linux"),isMac:platform2.includes("Mac"),isWindows:platform2.includes("Win"),isFirefox:userAgent.includes("Firefox")})}static get isCSSRoundSupported(){return shadow(this,"isCSSRoundSupported",globalThis.
|
||||
CSS?.supports?.("width: round(1.5px, 1px)"))}}const hexNumbers=Array.from(Array(256).keys(),n=>n.toString(16).padStart(2,"0"));class Util{static makeHexColor(r2,g,b){return`#${hexNumbers[r2]}${hexNumbers[g]}${hexNumbers[b]}`}static scaleMinMax(transform2,minMax){let temp;transform2[0]?(transform2[0]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[0],minMax[2]*=transform2[0],transform2[3]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[3],
|
||||
minMax[3]*=transform2[3]):(temp=minMax[0],minMax[0]=minMax[1],minMax[1]=temp,temp=minMax[2],minMax[2]=minMax[3],minMax[3]=temp,transform2[1]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[1],minMax[3]*=transform2[1],transform2[2]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[2],minMax[2]*=transform2[2]),minMax[0]+=transform2[4],minMax[1]+=transform2[5],minMax[2]+=transform2[4],minMax[3]+=transform2[5]}static transform(m1,m2){return[m1[0]*
|
||||
m2[0]+m1[2]*m2[1],m1[1]*m2[0]+m1[3]*m2[1],m1[0]*m2[2]+m1[2]*m2[3],m1[1]*m2[2]+m1[3]*m2[3],m1[0]*m2[4]+m1[2]*m2[5]+m1[4],m1[1]*m2[4]+m1[3]*m2[5]+m1[5]]}static applyTransform(p2,m,pos=0){const p0=p2[pos],p1=p2[pos+1];p2[pos]=p0*m[0]+p1*m[2]+m[4],p2[pos+1]=p0*m[1]+p1*m[3]+m[5]}static applyTransformToBezier(p2,transform2,pos=0){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5];for(let i=0;i<6;i+=2){const pI=p2[pos+i],pI1=p2[pos+i+1];p2[pos+i]=
|
||||
pI*m0+pI1*m2+m4,p2[pos+i+1]=pI*m1+pI1*m3+m5}}static applyInverseTransform(p2,m){const p0=p2[0],p1=p2[1],d2=m[0]*m[3]-m[1]*m[2];p2[0]=(p0*m[3]-p1*m[2]+m[2]*m[5]-m[4]*m[3])/d2,p2[1]=(-p0*m[1]+p1*m[0]+m[4]*m[1]-m[5]*m[0])/d2}static axialAlignedBoundingBox(rect,transform2,output){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5],r0=rect[0],r1=rect[1],r2=rect[2],r3=rect[3];let a0=m0*r0+m4,a2=a0,a1=m0*r2+m4,a3=a1,b0=m3*r1+m5,b2=b0,b1=m3*r3+m5,b3=b1;
|
||||
if(m1!==0||m2!==0){const m1r0=m1*r0,m1r2=m1*r2,m2r1=m2*r1,m2r3=m2*r3;a0+=m2r1,a3+=m2r1,a1+=m2r3,a2+=m2r3,b0+=m1r0,b3+=m1r0,b1+=m1r2,b2+=m1r2}output[0]=Math.min(output[0],a0,a1,a2,a3),output[1]=Math.min(output[1],b0,b1,b2,b3),output[2]=Math.max(output[2],a0,a1,a2,a3),output[3]=Math.max(output[3],b0,b1,b2,b3)}static inverseTransform(m){const d2=m[0]*m[3]-m[1]*m[2];return[m[3]/d2,-m[1]/d2,-m[2]/d2,m[0]/d2,(m[2]*m[5]-m[4]*m[3])/d2,(m[4]*m[1]-m[5]*m[0])/d2]}static singularValueDecompose2dScale(matrix,output){
|
||||
const m0=matrix[0],m1=matrix[1],m2=matrix[2],m3=matrix[3],a=m0**2+m1**2,b=m0*m2+m1*m3,c2=m2**2+m3**2,first=(a+c2)/2,second=Math.sqrt(first**2-(a*c2-b**2));output[0]=Math.sqrt(first+second||1),output[1]=Math.sqrt(first-second||1)}static normalizeRect(rect){const r2=rect.slice(0);return rect[0]>rect[2]&&(r2[0]=rect[2],r2[2]=rect[0]),rect[1]>rect[3]&&(r2[1]=rect[3],r2[3]=rect[1]),r2}static intersect(rect1,rect2){const xLow=Math.max(Math.min(rect1[0],rect1[2]),Math.min(rect2[0],rect2[2])),xHigh=Math.
|
||||
min(Math.max(rect1[0],rect1[2]),Math.max(rect2[0],rect2[2]));if(xLow>xHigh)return null;const yLow=Math.max(Math.min(rect1[1],rect1[3]),Math.min(rect2[1],rect2[3])),yHigh=Math.min(Math.max(rect1[1],rect1[3]),Math.max(rect2[1],rect2[3]));return yLow>yHigh?null:[xLow,yLow,xHigh,yHigh]}static pointBoundingBox(x,y,minMax){minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static rectBoundingBox(x0,y0,x1,y1,minMax){minMax[0]=
|
||||
Math.min(minMax[0],x0,x1),minMax[1]=Math.min(minMax[1],y0,y1),minMax[2]=Math.max(minMax[2],x0,x1),minMax[3]=Math.max(minMax[3],y0,y1)}static#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,t,minMax){if(t<=0||t>=1)return;const mt=1-t,tt=t*t,ttt=tt*t,x=mt*(mt*(mt*x0+3*t*x1)+3*tt*x2)+ttt*x3,y=mt*(mt*(mt*y0+3*t*y1)+3*tt*y2)+ttt*y3;minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,a,b,c2,minMax){
|
||||
if(Math.abs(a)<1e-12){Math.abs(b)>=1e-12&&this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,-c2/b,minMax);return}const delta=b**2-4*c2*a;if(delta<0)return;const sqrtDelta=Math.sqrt(delta),a2=2*a;this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b+sqrtDelta)/a2,minMax),this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b-sqrtDelta)/a2,minMax)}static bezierBoundingBox(x0,y0,x1,y1,x2,y2,x3,y3,minMax){minMax[0]=Math.min(minMax[0],x0,x3),minMax[1]=Math.min(minMax[1],y0,y3),minMax[2]=Math.max(minMax[2],
|
||||
x0,x3),minMax[3]=Math.max(minMax[3],y0,y3),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-x0+3*(x1-x2)+x3),6*(x0-2*x1+x2),3*(x1-x0),minMax),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-y0+3*(y1-y2)+y3),6*(y0-2*y1+y2),3*(y1-y0),minMax)}}function stringToUTF8String(str){return decodeURIComponent(escape(str))}let NormalizeRegex=null,NormalizationMap=null;function normalizeUnicode(str){return NormalizeRegex||(NormalizeRegex=/([\u00a0\u00b5\u037e\u0eb3\u2000-\u200a\u202f\u2126\ufb00-\ufb04\ufb06\ufb20-\ufb36\ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufba1\ufba4-\ufba9\ufbae-\ufbb1\ufbd3-\ufbdc\ufbde-\ufbe7\ufbea-\ufbf8\ufbfc-\ufbfd\ufc00-\ufc5d\ufc64-\ufcf1\ufcf5-\ufd3d\ufd88\ufdf4\ufdfa-\ufdfb\ufe71\ufe77\ufe79\ufe7b\ufe7d]+)|(\ufb05+)/gu,
|
||||
try{this.stream=await navigator.mediaDevices.getUserMedia({audio:{echoCancellation:!0,noiseSuppression:!0,autoGainControl:!0}}),this.initializeRecorder(this.stream),this.audioChunks=[],this.mediaRecorder.start(100),this.recordingState=!0}catch(error2){throw console.error("Failed to start recording:",error2),new Error("Failed to access microphone. Please check permissions.")}}async stopRecording(){return new Promise((resolve2,reject)=>{const recorder=this.mediaRecorder,chunks=this.audioChunks,stream=this.
|
||||
stream;if(!recorder||recorder.state==="inactive"){reject(new Error("No active recording to stop"));return}this.mediaRecorder=null,this.audioChunks=[],this.stream=null,this.recordingState=!1,recorder.onstop=()=>{const audioBlob=new Blob(chunks,{type:recorder.mimeType||MimeTypeAudio.WAV});if(stream)for(const track2 of stream.getTracks())track2.stop();resolve2(audioBlob)},recorder.onerror=event2=>{if(console.error("Recording error:",event2),stream)for(const track2 of stream.getTracks())track2.stop();
|
||||
reject(new Error("Recording failed"))},recorder.stop()})}isRecording(){return this.recordingState}cancelRecording(){const recorder=this.mediaRecorder,stream=this.stream;if(this.mediaRecorder=null,this.audioChunks=[],this.stream=null,this.recordingState=!1,recorder&&recorder.state!=="inactive"&&(recorder.onstop=null,recorder.onerror=null,recorder.stop()),stream)for(const track2 of stream.getTracks())track2.stop()}initializeRecorder(stream){const options={};MediaRecorder.isTypeSupported(MimeTypeAudio.
|
||||
WAV)?options.mimeType=MimeTypeAudio.WAV:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM_OPUS)?options.mimeType=MimeTypeAudio.WEBM_OPUS:MediaRecorder.isTypeSupported(MimeTypeAudio.WEBM)?options.mimeType=MimeTypeAudio.WEBM:MediaRecorder.isTypeSupported(MimeTypeAudio.MP4)?options.mimeType=MimeTypeAudio.MP4:console.warn("No preferred audio format supported, using default"),this.mediaRecorder=new MediaRecorder(stream,options),this.mediaRecorder.ondataavailable=event2=>{event2.data.size>0&&this.audioChunks.
|
||||
push(event2.data)},this.mediaRecorder.onstop=()=>{this.recordingState=!1},this.mediaRecorder.onerror=event2=>{console.error("MediaRecorder error:",event2),this.recordingState=!1}}}async function convertToWav(audioBlob){try{if(audioBlob.type.includes("wav"))return audioBlob;const arrayBuffer=await audioBlob.arrayBuffer(),audioContext=new(window.AudioContext||window.webkitAudioContext);try{const audioBuffer=await audioContext.decodeAudioData(arrayBuffer);return audioBufferToWav(audioBuffer)}finally{
|
||||
audioContext.close()}}catch(error2){return console.error("Failed to convert audio to WAV:",error2),audioBlob}}function audioBufferToWav(buffer){const length=buffer.length,numberOfChannels=buffer.numberOfChannels,sampleRate=buffer.sampleRate,blockAlign=numberOfChannels*2,byteRate=sampleRate*blockAlign,dataSize=length*blockAlign,bufferSize=44+dataSize,arrayBuffer=new ArrayBuffer(bufferSize),view=new DataView(arrayBuffer),writeString=(offset2,string2)=>{for(let i=0;i<string2.length;i++)view.setUint8(
|
||||
offset2+i,string2.charCodeAt(i))};writeString(0,"RIFF"),view.setUint32(4,bufferSize-8,!0),writeString(8,"WAVE"),writeString(12,"fmt "),view.setUint32(16,16,!0),view.setUint16(20,1,!0),view.setUint16(22,numberOfChannels,!0),view.setUint32(24,sampleRate,!0),view.setUint32(28,byteRate,!0),view.setUint16(32,blockAlign,!0),view.setUint16(34,16,!0),writeString(36,"data"),view.setUint32(40,dataSize,!0);const channels=new Array(numberOfChannels);for(let c2=0;c2<numberOfChannels;c2++)channels[c2]=buffer.
|
||||
getChannelData(c2);const pcm=new Int16Array(arrayBuffer,44,length*numberOfChannels);let p2=0;for(let i=0;i<length;i++)for(let c2=0;c2<numberOfChannels;c2++){let s2=channels[c2][i];s2>1?s2=1:s2<-1&&(s2=-1),pcm[p2++]=s2*32767}return new Blob([arrayBuffer],{type:MimeTypeAudio.WAV})}function createAudioFile(audioBlob,filename){const timestamp=new Date().toISOString().replace(/[:.]/g,"-"),extension2=audioBlob.type.includes("wav")?"wav":"mp3",defaultFilename=`recording-${timestamp}.${extension2}`;return new File(
|
||||
[audioBlob],defaultFilename,{type:audioBlob.type,lastModified:Date.now()})}function isAudioRecordingSupported(){return!!(typeof navigator<"u"&&navigator.mediaDevices&&typeof navigator.mediaDevices.getUserMedia=="function"&&typeof window<"u"&&window.MediaRecorder)}const isNodeJS=typeof process=="object"&&process+""=="[object process]"&&!process.versions.nw&&!(process.versions.electron&&process.type&&process.type!=="browser"),FONT_IDENTITY_MATRIX=[.001,0,0,.001,0,0],LINE_FACTOR=1.35,RenderingIntentFlag={
|
||||
ANY:1,DISPLAY:2,PRINT:4,ANNOTATIONS_FORMS:16,ANNOTATIONS_STORAGE:32,ANNOTATIONS_DISABLE:64,IS_EDITING:128,OPLIST:256},AnnotationMode={DISABLE:0,ENABLE:1,ENABLE_FORMS:2,ENABLE_STORAGE:3},AnnotationEditorPrefix="pdfjs_internal_editor_",AnnotationEditorType={DISABLE:-1,NONE:0,FREETEXT:3,HIGHLIGHT:9,STAMP:13,INK:15,SIGNATURE:101,COMMENT:102},AnnotationEditorParamsType={RESIZE:1,CREATE:2,FREETEXT_SIZE:11,FREETEXT_COLOR:12,FREETEXT_OPACITY:13,INK_COLOR:21,INK_THICKNESS:22,INK_OPACITY:23,HIGHLIGHT_COLOR:31,
|
||||
HIGHLIGHT_THICKNESS:32,HIGHLIGHT_FREE:33,HIGHLIGHT_SHOW_ALL:34,DRAW_STEP:41},PermissionFlag={PRINT:4,MODIFY_CONTENTS:8,COPY:16,MODIFY_ANNOTATIONS:32,FILL_INTERACTIVE_FORMS:256,COPY_FOR_ACCESSIBILITY:512,ASSEMBLE:1024,PRINT_HIGH_QUALITY:2048},TextRenderingMode={FILL:0,STROKE:1,FILL_STROKE:2,INVISIBLE:3,FILL_STROKE_MASK:3,ADD_TO_PATH_FLAG:4},util_ImageKind={GRAYSCALE_1BPP:1,RGB_24BPP:2,RGBA_32BPP:3},AnnotationType={TEXT:1,LINK:2,FREETEXT:3,LINE:4,SQUARE:5,CIRCLE:6,POLYGON:7,POLYLINE:8,HIGHLIGHT:9,
|
||||
UNDERLINE:10,SQUIGGLY:11,STRIKEOUT:12,STAMP:13,CARET:14,INK:15,POPUP:16,FILEATTACHMENT:17,SOUND:18,MOVIE:19,WIDGET:20,SCREEN:21,PRINTERMARK:22,TRAPNET:23,WATERMARK:24,THREED:25,REDACT:26},AnnotationBorderStyleType={SOLID:1,DASHED:2,BEVELED:3,INSET:4,UNDERLINE:5},VerbosityLevel={ERRORS:0,WARNINGS:1,INFOS:5},OPS={dependency:1,setLineWidth:2,setLineCap:3,setLineJoin:4,setMiterLimit:5,setDash:6,setRenderingIntent:7,setFlatness:8,setGState:9,save:10,restore:11,transform:12,moveTo:13,lineTo:14,curveTo:15,
|
||||
curveTo2:16,curveTo3:17,closePath:18,rectangle:19,stroke:20,closeStroke:21,fill:22,eoFill:23,fillStroke:24,eoFillStroke:25,closeFillStroke:26,closeEOFillStroke:27,endPath:28,clip:29,eoClip:30,beginText:31,endText:32,setCharSpacing:33,setWordSpacing:34,setHScale:35,setLeading:36,setFont:37,setTextRenderingMode:38,setTextRise:39,moveText:40,setLeadingMoveText:41,setTextMatrix:42,nextLine:43,showText:44,showSpacedText:45,nextLineShowText:46,nextLineSetSpacingShowText:47,setCharWidth:48,setCharWidthAndBounds:49,
|
||||
setStrokeColorSpace:50,setFillColorSpace:51,setStrokeColor:52,setStrokeColorN:53,setFillColor:54,setFillColorN:55,setStrokeGray:56,setFillGray:57,setStrokeRGBColor:58,setFillRGBColor:59,setStrokeCMYKColor:60,setFillCMYKColor:61,shadingFill:62,beginInlineImage:63,beginImageData:64,endInlineImage:65,paintXObject:66,markPoint:67,markPointProps:68,beginMarkedContent:69,beginMarkedContentProps:70,endMarkedContent:71,beginCompat:72,endCompat:73,paintFormXObjectBegin:74,paintFormXObjectEnd:75,beginGroup:76,
|
||||
endGroup:77,beginAnnotation:80,endAnnotation:81,paintImageMaskXObject:83,paintImageMaskXObjectGroup:84,paintImageXObject:85,paintInlineImageXObject:86,paintInlineImageXObjectGroup:87,paintImageXObjectRepeat:88,paintImageMaskXObjectRepeat:89,paintSolidColorImageMask:90,constructPath:91,setStrokeTransparent:92,setFillTransparent:93,rawFillPath:94},DrawOPS={moveTo:0,lineTo:1,curveTo:2,closePath:3},PasswordResponses={NEED_PASSWORD:1,INCORRECT_PASSWORD:2};let verbosity=VerbosityLevel.WARNINGS;function setVerbosityLevel(level){
|
||||
Number.isInteger(level)&&(verbosity=level)}function getVerbosityLevel(){return verbosity}function info(msg){verbosity>=VerbosityLevel.INFOS&&console.log(`Info: ${msg}`)}function warn(msg){verbosity>=VerbosityLevel.WARNINGS&&console.log(`Warning: ${msg}`)}function unreachable(msg){throw new Error(msg)}function assert$1(cond,msg){cond||unreachable(msg)}function _isValidProtocol(url2){switch(url2?.protocol){case"http:":case"https:":case"ftp:":case"mailto:":case"tel:":return!0;default:return!1}}function createValidAbsoluteUrl(url2,baseUrl=null,options=null){
|
||||
if(!url2)return null;if(options&&typeof url2=="string"&&(options.addDefaultProtocol&&url2.startsWith("www.")&&url2.match(/\./g)?.length>=2&&(url2=`http://${url2}`),options.tryConvertEncoding))try{url2=stringToUTF8String(url2)}catch{}const absoluteUrl=baseUrl?URL.parse(url2,baseUrl):URL.parse(url2);return _isValidProtocol(absoluteUrl)?absoluteUrl:null}function updateUrlHash(url2,hash2,allowRel=!1){const res=URL.parse(url2);return res?(res.hash=hash2,res.href):allowRel&&createValidAbsoluteUrl(url2,
|
||||
"http://example.com")?url2.split("#",1)[0]+`${hash2?`#${hash2}`:""}`:""}function shadow(obj,prop2,value,nonSerializable=!1){return Object.defineProperty(obj,prop2,{value,enumerable:!nonSerializable,configurable:!0,writable:!1}),value}const BaseException=(function(){function BaseException2(message,name){this.message=message,this.name=name}return BaseException2.prototype=new Error,BaseException2.constructor=BaseException2,BaseException2})();class PasswordException extends BaseException{constructor(msg,code2){
|
||||
super(msg,"PasswordException"),this.code=code2}}class UnknownErrorException extends BaseException{constructor(msg,details){super(msg,"UnknownErrorException"),this.details=details}}class InvalidPDFException extends BaseException{constructor(msg){super(msg,"InvalidPDFException")}}class ResponseException extends BaseException{constructor(msg,status,missing){super(msg,"ResponseException"),this.status=status,this.missing=missing}}class FormatError extends BaseException{constructor(msg){super(msg,"For\
|
||||
matError")}}class AbortException extends BaseException{constructor(msg){super(msg,"AbortException")}}function bytesToString(bytes){(typeof bytes!="object"||bytes?.length===void 0)&&unreachable("Invalid argument for bytesToString");const length=bytes.length,MAX_ARGUMENT_COUNT=8192;if(length<MAX_ARGUMENT_COUNT)return String.fromCharCode.apply(null,bytes);const strBuf=[];for(let i=0;i<length;i+=MAX_ARGUMENT_COUNT){const chunkEnd=Math.min(i+MAX_ARGUMENT_COUNT,length),chunk=bytes.subarray(i,chunkEnd);
|
||||
strBuf.push(String.fromCharCode.apply(null,chunk))}return strBuf.join("")}function stringToBytes(str){typeof str!="string"&&unreachable("Invalid argument for stringToBytes");const length=str.length,bytes=new Uint8Array(length);for(let i=0;i<length;++i)bytes[i]=str.charCodeAt(i)&255;return bytes}function string32(value){return String.fromCharCode(value>>24&255,value>>16&255,value>>8&255,value&255)}function isLittleEndian(){const buffer8=new Uint8Array(4);return buffer8[0]=1,new Uint32Array(buffer8.
|
||||
buffer,0,1)[0]===1}function isEvalSupported(){try{return new Function(""),!0}catch{return!1}}class util_FeatureTest{static get isLittleEndian(){return shadow(this,"isLittleEndian",isLittleEndian())}static get isEvalSupported(){return shadow(this,"isEvalSupported",isEvalSupported())}static get isOffscreenCanvasSupported(){return shadow(this,"isOffscreenCanvasSupported",typeof OffscreenCanvas<"u")}static get isImageDecoderSupported(){return shadow(this,"isImageDecoderSupported",typeof ImageDecoder<
|
||||
"u")}static get platform(){const{platform:platform2,userAgent}=navigator;return shadow(this,"platform",{isAndroid:userAgent.includes("Android"),isLinux:platform2.includes("Linux"),isMac:platform2.includes("Mac"),isWindows:platform2.includes("Win"),isFirefox:userAgent.includes("Firefox")})}static get isCSSRoundSupported(){return shadow(this,"isCSSRoundSupported",globalThis.CSS?.supports?.("width: round(1.5px, 1px)"))}}const hexNumbers=Array.from(Array(256).keys(),n=>n.toString(16).padStart(2,"0"));
|
||||
class Util{static makeHexColor(r2,g,b){return`#${hexNumbers[r2]}${hexNumbers[g]}${hexNumbers[b]}`}static scaleMinMax(transform2,minMax){let temp;transform2[0]?(transform2[0]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[0],minMax[2]*=transform2[0],transform2[3]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[3],minMax[3]*=transform2[3]):(temp=minMax[0],minMax[0]=minMax[1],minMax[1]=temp,temp=minMax[2],minMax[2]=minMax[3],minMax[3]=temp,
|
||||
transform2[1]<0&&(temp=minMax[1],minMax[1]=minMax[3],minMax[3]=temp),minMax[1]*=transform2[1],minMax[3]*=transform2[1],transform2[2]<0&&(temp=minMax[0],minMax[0]=minMax[2],minMax[2]=temp),minMax[0]*=transform2[2],minMax[2]*=transform2[2]),minMax[0]+=transform2[4],minMax[1]+=transform2[5],minMax[2]+=transform2[4],minMax[3]+=transform2[5]}static transform(m1,m2){return[m1[0]*m2[0]+m1[2]*m2[1],m1[1]*m2[0]+m1[3]*m2[1],m1[0]*m2[2]+m1[2]*m2[3],m1[1]*m2[2]+m1[3]*m2[3],m1[0]*m2[4]+m1[2]*m2[5]+m1[4],m1[1]*
|
||||
m2[4]+m1[3]*m2[5]+m1[5]]}static applyTransform(p2,m,pos=0){const p0=p2[pos],p1=p2[pos+1];p2[pos]=p0*m[0]+p1*m[2]+m[4],p2[pos+1]=p0*m[1]+p1*m[3]+m[5]}static applyTransformToBezier(p2,transform2,pos=0){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5];for(let i=0;i<6;i+=2){const pI=p2[pos+i],pI1=p2[pos+i+1];p2[pos+i]=pI*m0+pI1*m2+m4,p2[pos+i+1]=pI*m1+pI1*m3+m5}}static applyInverseTransform(p2,m){const p0=p2[0],p1=p2[1],d2=m[0]*m[3]-m[1]*m[2];
|
||||
p2[0]=(p0*m[3]-p1*m[2]+m[2]*m[5]-m[4]*m[3])/d2,p2[1]=(-p0*m[1]+p1*m[0]+m[4]*m[1]-m[5]*m[0])/d2}static axialAlignedBoundingBox(rect,transform2,output){const m0=transform2[0],m1=transform2[1],m2=transform2[2],m3=transform2[3],m4=transform2[4],m5=transform2[5],r0=rect[0],r1=rect[1],r2=rect[2],r3=rect[3];let a0=m0*r0+m4,a2=a0,a1=m0*r2+m4,a3=a1,b0=m3*r1+m5,b2=b0,b1=m3*r3+m5,b3=b1;if(m1!==0||m2!==0){const m1r0=m1*r0,m1r2=m1*r2,m2r1=m2*r1,m2r3=m2*r3;a0+=m2r1,a3+=m2r1,a1+=m2r3,a2+=m2r3,b0+=m1r0,b3+=m1r0,
|
||||
b1+=m1r2,b2+=m1r2}output[0]=Math.min(output[0],a0,a1,a2,a3),output[1]=Math.min(output[1],b0,b1,b2,b3),output[2]=Math.max(output[2],a0,a1,a2,a3),output[3]=Math.max(output[3],b0,b1,b2,b3)}static inverseTransform(m){const d2=m[0]*m[3]-m[1]*m[2];return[m[3]/d2,-m[1]/d2,-m[2]/d2,m[0]/d2,(m[2]*m[5]-m[4]*m[3])/d2,(m[4]*m[1]-m[5]*m[0])/d2]}static singularValueDecompose2dScale(matrix,output){const m0=matrix[0],m1=matrix[1],m2=matrix[2],m3=matrix[3],a=m0**2+m1**2,b=m0*m2+m1*m3,c2=m2**2+m3**2,first=(a+c2)/
|
||||
2,second=Math.sqrt(first**2-(a*c2-b**2));output[0]=Math.sqrt(first+second||1),output[1]=Math.sqrt(first-second||1)}static normalizeRect(rect){const r2=rect.slice(0);return rect[0]>rect[2]&&(r2[0]=rect[2],r2[2]=rect[0]),rect[1]>rect[3]&&(r2[1]=rect[3],r2[3]=rect[1]),r2}static intersect(rect1,rect2){const xLow=Math.max(Math.min(rect1[0],rect1[2]),Math.min(rect2[0],rect2[2])),xHigh=Math.min(Math.max(rect1[0],rect1[2]),Math.max(rect2[0],rect2[2]));if(xLow>xHigh)return null;const yLow=Math.max(Math.min(
|
||||
rect1[1],rect1[3]),Math.min(rect2[1],rect2[3])),yHigh=Math.min(Math.max(rect1[1],rect1[3]),Math.max(rect2[1],rect2[3]));return yLow>yHigh?null:[xLow,yLow,xHigh,yHigh]}static pointBoundingBox(x,y,minMax){minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static rectBoundingBox(x0,y0,x1,y1,minMax){minMax[0]=Math.min(minMax[0],x0,x1),minMax[1]=Math.min(minMax[1],y0,y1),minMax[2]=Math.max(minMax[2],x0,x1),minMax[3]=Math.max(
|
||||
minMax[3],y0,y1)}static#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,t,minMax){if(t<=0||t>=1)return;const mt=1-t,tt=t*t,ttt=tt*t,x=mt*(mt*(mt*x0+3*t*x1)+3*tt*x2)+ttt*x3,y=mt*(mt*(mt*y0+3*t*y1)+3*tt*y2)+ttt*y3;minMax[0]=Math.min(minMax[0],x),minMax[1]=Math.min(minMax[1],y),minMax[2]=Math.max(minMax[2],x),minMax[3]=Math.max(minMax[3],y)}static#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,a,b,c2,minMax){if(Math.abs(a)<1e-12){Math.abs(b)>=1e-12&&this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,-c2/b,minMax);return}
|
||||
const delta=b**2-4*c2*a;if(delta<0)return;const sqrtDelta=Math.sqrt(delta),a2=2*a;this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b+sqrtDelta)/a2,minMax),this.#getExtremumOnCurve(x0,x1,x2,x3,y0,y1,y2,y3,(-b-sqrtDelta)/a2,minMax)}static bezierBoundingBox(x0,y0,x1,y1,x2,y2,x3,y3,minMax){minMax[0]=Math.min(minMax[0],x0,x3),minMax[1]=Math.min(minMax[1],y0,y3),minMax[2]=Math.max(minMax[2],x0,x3),minMax[3]=Math.max(minMax[3],y0,y3),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-x0+3*(x1-x2)+x3),6*(x0-
|
||||
2*x1+x2),3*(x1-x0),minMax),this.#getExtremum(x0,x1,x2,x3,y0,y1,y2,y3,3*(-y0+3*(y1-y2)+y3),6*(y0-2*y1+y2),3*(y1-y0),minMax)}}function stringToUTF8String(str){return decodeURIComponent(escape(str))}let NormalizeRegex=null,NormalizationMap=null;function normalizeUnicode(str){return NormalizeRegex||(NormalizeRegex=/([\u00a0\u00b5\u037e\u0eb3\u2000-\u200a\u202f\u2126\ufb00-\ufb04\ufb06\ufb20-\ufb36\ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufba1\ufba4-\ufba9\ufbae-\ufbb1\ufbd3-\ufbdc\ufbde-\ufbe7\ufbea-\ufbf8\ufbfc-\ufbfd\ufc00-\ufc5d\ufc64-\ufcf1\ufcf5-\ufd3d\ufd88\ufdf4\ufdfa-\ufdfb\ufe71\ufe77\ufe79\ufe7b\ufe7d]+)|(\ufb05+)/gu,
|
||||
NormalizationMap=new Map([["ſt","ſt"]])),str.replaceAll(NormalizeRegex,(_,p1,p2)=>p1?p1.normalize("NFKC"):NormalizationMap.get(p2))}function getUuid(){if(typeof crypto.randomUUID=="function")return crypto.randomUUID();const buf=new Uint8Array(32);return crypto.getRandomValues(buf),bytesToString(buf)}const AnnotationPrefix="pdfjs_internal_id_";function _isValidExplicitDest(validRef,validName,dest){if(!Array.isArray(dest)||dest.length<2)return!1;const[page2,zoom,...args]=dest;if(!validRef(page2)&&
|
||||
!Number.isInteger(page2)||!validName(zoom))return!1;const argsLen=args.length;let allowNull=!0;switch(zoom.name){case"XYZ":if(argsLen<2||argsLen>3)return!1;break;case"Fit":case"FitB":return argsLen===0;case"FitH":case"FitBH":case"FitV":case"FitBV":if(argsLen>1)return!1;break;case"FitR":if(argsLen!==4)return!1;allowNull=!1;break;default:return!1}for(const arg of args)if(!(typeof arg=="number"||allowNull&&arg===null))return!1;return!0}function MathClamp(v,min2,max2){return Math.min(Math.max(v,min2),
|
||||
max2)}function toBase64Util(arr){return Uint8Array.prototype.toBase64?arr.toBase64():btoa(bytesToString(arr))}function fromBase64Util(str){return Uint8Array.fromBase64?Uint8Array.fromBase64(str):stringToBytes(atob(str))}typeof Promise.try!="function"&&(Promise.try=function(fn,...args){return new Promise(resolve2=>{resolve2(fn(...args))})});typeof Math.sumPrecise!="function"&&(Math.sumPrecise=function(numbers){return numbers.reduce((a,b)=>a+b,0)});const SVG_NS="http://www.w3.org/2000/svg";class PixelsPerInch{static CSS=96;static PDF=72;static PDF_TO_CSS_UNITS=this.
|
||||
@@ -6828,43 +6829,43 @@ preventDefault();const textFile=new File([text2],"Pasted",{type:MimeTypeText.PLA
|
||||
isLoading:!0,mcpPrompt:{serverName:promptInfo.serverName,promptName:promptInfo.name,arguments:args?{...args}:void 0}};uploadedFiles([...uploadedFiles(),placeholder2]),$$props.onUploadedFilesChange?.(uploadedFiles()),get$3(textareaRef)?.focus()}function handlePromptLoadComplete(placeholderId,result){const promptText=result.messages?.map(msg=>typeof msg.content=="string"?msg.content:msg.content.type===ContentPartType.TEXT?msg.content.text:"").filter(Boolean).join(PROMPT_CONTENT_SEPARATOR);uploadedFiles(
|
||||
uploadedFiles().map(f=>f.id===placeholderId?{...f,isLoading:!1,textContent:promptText,size:promptText.length,file:new File([promptText],`${f.name}${FileExtensionText.TXT}`,{type:MimeTypeText.PLAIN})}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptLoadError(placeholderId,error2){uploadedFiles(uploadedFiles().map(f=>f.id===placeholderId?{...f,isLoading:!1,loadError:error2}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptPickerClose(){set$1(isPromptPickerOpen,
|
||||
!1),set$1(promptSearchQuery,""),get$3(textareaRef)?.focus()}function handleInlineResourcePickerClose(){set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleInlineResourceSelect(){value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleBrowseResources(){set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,
|
||||
""),value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isResourceDialogOpen,!0)}async function handleMicClick(){if(!audioRecorder||!get$3(recordingSupported)){console.warn("Audio recording not supported");return}if(get$3(isRecording))try{const audioBlob=await audioRecorder.stopRecording(),wavBlob=await convertToWav(audioBlob),audioFile=createAudioFile(wavBlob);$$props.onFilesAdd?.([audioFile]),set$1(isRecording,!1)}catch(error2){console.error("Failed to st\
|
||||
op recording:",error2),set$1(isRecording,!1)}else try{await audioRecorder.startRecording(),set$1(isRecording,!0)}catch(error2){console.error("Failed to start recording:",error2)}}var $$exports={focus:focus2,resetTextareaHeight,openModelSelector,checkModelSelected},fragment=root$15(),node2=first_child(fragment);bind_this(ChatFormFileInputInvisible(node2,{onFileSelect:handleFileSelect}),$$value=>set$1(fileInputRef,$$value,!0),()=>get$3(fileInputRef));var form=sibling(node2,2),node_1=child(form);bind_this(
|
||||
ChatFormPromptPicker(node_1,{get isOpen(){return get$3(isPromptPickerOpen)},get searchQuery(){return get$3(promptSearchQuery)},onClose:handlePromptPickerClose,onPromptLoadStart:handlePromptLoadStart,onPromptLoadComplete:handlePromptLoadComplete,onPromptLoadError:handlePromptLoadError}),$$value=>set$1(promptPickerRef,$$value,!0),()=>get$3(promptPickerRef));var node_2=sibling(node_1,2);bind_this(ChatFormResourcePicker(node_2,{get isOpen(){return get$3(isInlineResourcePickerOpen)},get searchQuery(){
|
||||
return get$3(resourceSearchQuery)},onClose:handleInlineResourcePickerClose,onResourceSelect:handleInlineResourceSelect,onBrowse:handleBrowseResources}),$$value=>set$1(resourcePickerRef,$$value,!0),()=>get$3(resourcePickerRef));var div=sibling(node_2,2),node_3=child(div);{let $0=user_derived(()=>get$3(activeModelId)??void 0);ChatAttachmentsList(node_3,{get attachments(){return attachments()},onFileRemove:handleFileRemove,limitToSingleRow:!0,class:"py-5",style:"scroll-padding: 1rem;",get activeModelId(){
|
||||
return get$3($0)},get uploadedFiles(){return uploadedFiles()},set uploadedFiles($$value){uploadedFiles($$value)}})}var div_1=sibling(node_3,2),node_4=child(div_1);bind_this(ChatFormTextarea(node_4,{class:"px-5 py-1.5 md:pt-0",onKeydown:handleKeydown,onInput:()=>{handleInput(),$$props.onValueChange?.(value())},get disabled(){return disabled()},get placeholder(){return placeholder()},get value(){return value()},set value($$value){value($$value)}}),$$value=>set$1(textareaRef,$$value,!0),()=>get$3(textareaRef));
|
||||
var node_5=sibling(node_4,2);{var consequent=$$anchor2=>{ChatAttachmentMcpResources($$anchor2,{class:"mb-3",onResourceClick:uri2=>{set$1(preSelectedResourceUri,uri2,!0),set$1(isResourceDialogOpen,!0)}})},d2=user_derived(()=>mcpHasResourceAttachments());if_block(node_5,$$render=>{get$3(d2)&&$$render(consequent)})}var node_6=sibling(node_5,2);{let $0=user_derived(()=>value().trim().length>0),$1=user_derived(()=>showMcpPromptButton()?()=>set$1(isPromptPickerOpen,!0):void 0);bind_this(ChatFormActions(
|
||||
node_6,{class:"px-3",get canSend(){return get$3(canSubmit)},get hasText(){return get$3($0)},get disabled(){return disabled()},get isLoading(){return isLoading2()},get isRecording(){return get$3(isRecording)},get uploadedFiles(){return uploadedFiles()},onFileUpload:handleFileUpload,onMicClick:handleMicClick,get onStop(){return $$props.onStop},onSystemPromptClick:()=>$$props.onSystemPromptClick?.({message:value(),files:uploadedFiles()}),get onMcpPromptClick(){return get$3($1)},onMcpResourcesClick:()=>set$1(
|
||||
isResourceDialogOpen,!0)}),$$value=>set$1(chatFormActionsRef,$$value,!0),()=>get$3(chatFormActionsRef))}reset(div_1),reset(div),reset(form);var node_7=sibling(form,2);return DialogMcpResources(node_7,{get preSelectedUri(){return get$3(preSelectedResourceUri)},onAttach:resource=>{mcpStore.attachResource(resource.uri)},onOpenChange:newOpen=>{newOpen||set$1(preSelectedResourceUri,void 0)},get open(){return get$3(isResourceDialogOpen)},set open($$value){set$1(isResourceDialogOpen,$$value,!0)}}),template_effect(
|
||||
()=>{set_class(form,1,`relative ${className()??""}`),set_class(div,1,`${INPUT_CLASSES??""} overflow-hidden rounded-3xl backdrop-blur-md ${disabled()?"cursor-not-allowed opacity-60":""}`)}),event("submit",form,e=>{e.preventDefault(),!(!get$3(canSubmit)||disabled()||get$3(hasLoadingAttachments))&&$$props.onSubmit?.()}),event("paste",div_1,handlePaste),append($$anchor,fragment),pop($$exports)}function Dropdown_menu_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),sideOffset=prop(
|
||||
$$props,"sideOffset",3,4),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","sideOffset","portalProps","class"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Portal$2,($$anchor2,DropdownMenuPrimitive_Portal)=>{DropdownMenuPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=comment$2(),node_1=first_child(fragment_1);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-av\
|
||||
ailable-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[\
|
||||
state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node_1,()=>Dropdown_menu_content$1,($$anchor4,DropdownMenuPrimitive_Content)=>{DropdownMenuPrimitive_Content($$anchor4,spread_props({"data-slot":"dropdown-menu-content",get sideOffset(){return sideOffset()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}
|
||||
function Dropdown_menu_item($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),variant=prop($$props,"variant",3,"default"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","variant"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("relative flex cursor-pointer items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground d\
|
||||
ata-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[variant=destructive]:text-destructive data-[variant=destructive]:data-highlighted:bg-destructive/10 data-[variant=destructive]:data-highlighted:text-destructive dark:data-[variant=destructive]:data-highlighted:bg-destructive/20 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground data-[variant=destructive]:*:[svg]:!text-destructive",
|
||||
$$props.class));component(node2,()=>Menu_item,($$anchor2,DropdownMenuPrimitive_Item)=>{DropdownMenuPrimitive_Item($$anchor2,spread_props({"data-slot":"dropdown-menu-item",get"data-inset"(){return $$props.inset},get"data-variant"(){return variant()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_separator($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props(
|
||||
$$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("-mx-1 my-1 h-px bg-border/20",$$props.class));component(node2,()=>Menu_separator,($$anchor2,DropdownMenuPrimitive_Separator)=>{DropdownMenuPrimitive_Separator($$anchor2,spread_props({"data-slot":"dropdown-menu-separator",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}
|
||||
function Dropdown_menu_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Menu_trigger,($$anchor2,DropdownMenuPrimitive_Trigger)=>{DropdownMenuPrimitive_Trigger($$anchor2,spread_props({"data-slot":"dropdown-menu-trigger"},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))}),append($$anchor,fragment),pop()}
|
||||
function Dropdown_menu_sub_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foregr\
|
||||
ound shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node2,()=>Menu_sub_content,($$anchor2,DropdownMenuPrimitive_SubContent)=>{DropdownMenuPrimitive_SubContent(
|
||||
$$anchor2,spread_props({"data-slot":"dropdown-menu-sub-content",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}var root_1$D=from_html("<!> <!>",1);function Dropdown_menu_sub_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","children"]);var fragment=comment$2(),node2=first_child(fragment);{
|
||||
let $0=user_derived(()=>cn$1("flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[state=open]:bg-accent data-[state=open]:text-accent-foreground [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground",$$props.class));component(
|
||||
node2,()=>Menu_sub_trigger,($$anchor2,DropdownMenuPrimitive_SubTrigger)=>{DropdownMenuPrimitive_SubTrigger($$anchor2,spread_props({"data-slot":"dropdown-menu-sub-trigger",get"data-inset"(){return $$props.inset},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$D(),node_1=first_child(fragment_1);snippet(node_1,()=>$$props.children??noop$3);var node_2=sibling(node_1,2);Chevron_right(node_2,
|
||||
{class:"ml-auto size-4"}),append($$anchor3,fragment_1)},$$slots:{default:!0}}))})}append($$anchor,fragment),pop()}const Sub=Menu_sub,Root$2=Menu;function useAttachmentMenu(getFlags,getCallbacks,close2){const modalityFlags=user_derived(getFlags),callbacks=user_derived(()=>{const cbs=getCallbacks(),wrap2=fn=>()=>{close2(),fn?.()};return{[AttachmentAction.FILE_UPLOAD]:wrap2(cbs.onFileUpload),[AttachmentAction.SYSTEM_PROMPT_CLICK]:wrap2(cbs.onSystemPromptClick),[AttachmentAction.MCP_PROMPT_CLICK]:wrap2(
|
||||
cbs.onMcpPromptClick),[AttachmentAction.MCP_RESOURCES_CLICK]:wrap2(cbs.onMcpResourcesClick)}});function isItemEnabled(enabledWhen){return!enabledWhen||enabledWhen==="always"?!0:!!get$3(modalityFlags)[enabledWhen]}function isItemVisible(visibleWhen){return visibleWhen?!!get$3(modalityFlags)[visibleWhen]:!0}function getSystemMessageTooltip(){return page$1.params.id?"Inject custom system message at the beginning of the conversation":"Add custom system message for a new conversation"}return{get callbacks(){
|
||||
return get$3(callbacks)},isItemEnabled,isItemVisible,getSystemMessageTooltip}}var root_5$o=from_html('<span class="sr-only"> </span> <!>',1),root_6$j=from_html("<p> </p>"),root_3$u=from_html("<!> <!>",1),root_10$c=from_html("<!> <span> </span>",1),root_14$5=from_html("<!> <span> </span>",1),root_15$7=from_html("<p> </p>"),root_12$7=from_html("<!> <!>",1),root_20$4=from_html("<!> <span> </span>",1),root_21$5=from_html("<p>PDFs will be converted to text. Image-based PDFs may not work properly.</p>"),
|
||||
root_17$8=from_html("<!> <!>",1),root_26=from_html("<!> <span> </span>",1),root_27$1=from_html("<p> </p>"),root_24$1=from_html("<!> <!>",1),root_30$1=from_html("<!> <span> </span>",1),root_7$j=from_html("<!> <!> <!> <!> <!> <!> <!>",1),root_1$C=from_html("<!> <!>",1),root$14=from_html("<div><!></div>");function ChatFormActionAttachmentsDropdown($$anchor,$$props){push$1($$props,!0);let className=prop($$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),hasAudioModality=prop($$props,"hasAu\
|
||||
dioModality",3,!1),hasVisionModality=prop($$props,"hasVisionModality",3,!1),hasMcpPromptsSupport=prop($$props,"hasMcpPromptsSupport",3,!1),hasMcpResourcesSupport=prop($$props,"hasMcpResourcesSupport",3,!1),dropdownOpen=state$1(!1);function handleMcpSettingsClick(){set$1(dropdownOpen,!1),$$props.onMcpSettingsClick?.()}const attachmentMenu=useAttachmentMenu(()=>({hasVisionModality:hasVisionModality(),hasAudioModality:hasAudioModality(),hasMcpPromptsSupport:hasMcpPromptsSupport(),hasMcpResourcesSupport:hasMcpResourcesSupport()}),
|
||||
()=>({onFileUpload:$$props.onFileUpload,onSystemPromptClick:$$props.onSystemPromptClick,onMcpPromptClick:$$props.onMcpPromptClick,onMcpResourcesClick:$$props.onMcpResourcesClick}),()=>{set$1(dropdownOpen,!1)});var div=root$14(),node2=child(div);component(node2,()=>Root$2,($$anchor2,DropdownMenu_Root)=>{DropdownMenu_Root($$anchor2,{get open(){return get$3(dropdownOpen)},set open($$value){set$1(dropdownOpen,$$value,!0)},children:($$anchor3,$$slotProps)=>{var fragment=root_1$C(),node_1=first_child(
|
||||
fragment);component(node_1,()=>Dropdown_menu_trigger,($$anchor4,DropdownMenu_Trigger)=>{DropdownMenu_Trigger($$anchor4,{name:"Attach files",get disabled(){return disabled()},children:($$anchor5,$$slotProps2)=>{var fragment_1=comment$2(),node_2=first_child(fragment_1);component(node_2,()=>Root$5,($$anchor6,Tooltip_Root)=>{Tooltip_Root($$anchor6,{children:($$anchor7,$$slotProps3)=>{var fragment_2=root_3$u(),node_3=first_child(fragment_2);component(node_3,()=>Tooltip_trigger,($$anchor8,Tooltip_Trigger)=>{
|
||||
Tooltip_Trigger($$anchor8,{class:"w-full",children:($$anchor9,$$slotProps4)=>{Button($$anchor9,{class:"file-upload-button h-8 w-8 rounded-full p-0",get disabled(){return disabled()},variant:"secondary",type:"button",children:($$anchor10,$$slotProps5)=>{var fragment_4=root_5$o(),span=first_child(fragment_4),text2=child(span,!0);reset(span);var node_4=sibling(span,2);Plus(node_4,{class:"h-4 w-4"}),template_effect(()=>set_text(text2,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor10,fragment_4)},$$slots:{
|
||||
default:!0}})},$$slots:{default:!0}})});var node_5=sibling(node_3,2);component(node_5,()=>Tooltip_content,($$anchor8,Tooltip_Content)=>{Tooltip_Content($$anchor8,{children:($$anchor9,$$slotProps4)=>{var p2=root_6$j(),text_1=child(p2,!0);reset(p2),template_effect(()=>set_text(text_1,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor9,p2)},$$slots:{default:!0}})}),append($$anchor7,fragment_2)},$$slots:{default:!0}})}),append($$anchor5,fragment_1)},$$slots:{default:!0}})});var node_6=sibling(node_1,2);component(
|
||||
node_6,()=>Dropdown_menu_content,($$anchor4,DropdownMenu_Content)=>{DropdownMenu_Content($$anchor4,{align:"start",class:"w-48",children:($$anchor5,$$slotProps2)=>{var fragment_5=root_7$j(),node_7=first_child(fragment_5);each(node_7,17,()=>ATTACHMENT_FILE_ITEMS,item=>item.id,($$anchor6,item)=>{const enabled=user_derived(()=>attachmentMenu.isItemEnabled(get$3(item).enabledWhen));var fragment_6=comment$2(),node_8=first_child(fragment_6);{var consequent=$$anchor7=>{var fragment_7=comment$2(),node_9=first_child(
|
||||
fragment_7);{let $0=user_derived(()=>get$3(item).class??"");component(node_9,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item)=>{DropdownMenu_Item($$anchor8,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor9,$$slotProps3)=>{var fragment_8=root_10$c(),node_10=first_child(fragment_8);component(node_10,()=>get$3(item).icon,($$anchor10,item_icon)=>{item_icon($$anchor10,{class:"h-4 w-4"})});var span_1=sibling(
|
||||
node_10,2),text_2=child(span_1,!0);reset(span_1),template_effect(()=>set_text(text_2,get$3(item).label)),append($$anchor9,fragment_8)},$$slots:{default:!0}})})}append($$anchor7,fragment_7)},consequent_1=$$anchor7=>{var fragment_9=comment$2(),node_11=first_child(fragment_9);component(node_11,()=>Root$5,($$anchor8,Tooltip_Root_1)=>{Tooltip_Root_1($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_10=root_12$7(),node_12=first_child(fragment_10);
|
||||
component(node_12,()=>Tooltip_trigger,($$anchor10,Tooltip_Trigger_1)=>{Tooltip_Trigger_1($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_11=comment$2(),node_13=first_child(fragment_11);{let $0=user_derived(()=>get$3(item).class??"");component(node_13,()=>Dropdown_menu_item,($$anchor12,DropdownMenu_Item_1)=>{DropdownMenu_Item_1($$anchor12,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},disabled:!0,children:($$anchor13,$$slotProps5)=>{var fragment_12=root_14$5(),
|
||||
node_14=first_child(fragment_12);component(node_14,()=>get$3(item).icon,($$anchor14,item_icon_1)=>{item_icon_1($$anchor14,{class:"h-4 w-4"})});var span_2=sibling(node_14,2),text_3=child(span_2,!0);reset(span_2),template_effect(()=>set_text(text_3,get$3(item).label)),append($$anchor13,fragment_12)},$$slots:{default:!0}})})}append($$anchor11,fragment_11)},$$slots:{default:!0}})});var node_15=sibling(node_12,2);component(node_15,()=>Tooltip_content,($$anchor10,Tooltip_Content_1)=>{Tooltip_Content_1(
|
||||
$$anchor10,{side:"right",children:($$anchor11,$$slotProps4)=>{var p_1=root_15$7(),text_4=child(p_1,!0);reset(p_1),template_effect(()=>set_text(text_4,get$3(item).disabledTooltip)),append($$anchor11,p_1)},$$slots:{default:!0}})}),append($$anchor9,fragment_10)},$$slots:{default:!0}})}),append($$anchor7,fragment_9)};if_block(node_8,$$render=>{get$3(enabled)?$$render(consequent):get$3(item).disabledTooltip&&$$render(consequent_1,1)})}append($$anchor6,fragment_6)});var node_16=sibling(node_7,2);{var consequent_3=$$anchor6=>{
|
||||
""),value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isResourceDialogOpen,!0)}async function handleMicClick(){if(!audioRecorder||!get$3(recordingSupported)){console.warn("Audio recording not supported");return}if(get$3(isRecording)){set$1(isRecording,!1);try{const audioBlob=await audioRecorder.stopRecording(),wavBlob=await convertToWav(audioBlob),audioFile=createAudioFile(wavBlob);$$props.onFilesAdd?.([audioFile])}catch(error2){console.error("Failed to s\
|
||||
top recording:",error2)}}else try{await audioRecorder.startRecording(),set$1(isRecording,!0)}catch(error2){console.error("Failed to start recording:",error2)}}var $$exports={focus:focus2,resetTextareaHeight,openModelSelector,checkModelSelected},fragment=root$15(),node2=first_child(fragment);bind_this(ChatFormFileInputInvisible(node2,{onFileSelect:handleFileSelect}),$$value=>set$1(fileInputRef,$$value,!0),()=>get$3(fileInputRef));var form=sibling(node2,2),node_1=child(form);bind_this(ChatFormPromptPicker(
|
||||
node_1,{get isOpen(){return get$3(isPromptPickerOpen)},get searchQuery(){return get$3(promptSearchQuery)},onClose:handlePromptPickerClose,onPromptLoadStart:handlePromptLoadStart,onPromptLoadComplete:handlePromptLoadComplete,onPromptLoadError:handlePromptLoadError}),$$value=>set$1(promptPickerRef,$$value,!0),()=>get$3(promptPickerRef));var node_2=sibling(node_1,2);bind_this(ChatFormResourcePicker(node_2,{get isOpen(){return get$3(isInlineResourcePickerOpen)},get searchQuery(){return get$3(resourceSearchQuery)},
|
||||
onClose:handleInlineResourcePickerClose,onResourceSelect:handleInlineResourceSelect,onBrowse:handleBrowseResources}),$$value=>set$1(resourcePickerRef,$$value,!0),()=>get$3(resourcePickerRef));var div=sibling(node_2,2),node_3=child(div);{let $0=user_derived(()=>get$3(activeModelId)??void 0);ChatAttachmentsList(node_3,{get attachments(){return attachments()},onFileRemove:handleFileRemove,limitToSingleRow:!0,class:"py-5",style:"scroll-padding: 1rem;",get activeModelId(){return get$3($0)},get uploadedFiles(){
|
||||
return uploadedFiles()},set uploadedFiles($$value){uploadedFiles($$value)}})}var div_1=sibling(node_3,2),node_4=child(div_1);bind_this(ChatFormTextarea(node_4,{class:"px-5 py-1.5 md:pt-0",onKeydown:handleKeydown,onInput:()=>{handleInput(),$$props.onValueChange?.(value())},get disabled(){return disabled()},get placeholder(){return placeholder()},get value(){return value()},set value($$value){value($$value)}}),$$value=>set$1(textareaRef,$$value,!0),()=>get$3(textareaRef));var node_5=sibling(node_4,
|
||||
2);{var consequent=$$anchor2=>{ChatAttachmentMcpResources($$anchor2,{class:"mb-3",onResourceClick:uri2=>{set$1(preSelectedResourceUri,uri2,!0),set$1(isResourceDialogOpen,!0)}})},d2=user_derived(()=>mcpHasResourceAttachments());if_block(node_5,$$render=>{get$3(d2)&&$$render(consequent)})}var node_6=sibling(node_5,2);{let $0=user_derived(()=>value().trim().length>0),$1=user_derived(()=>showMcpPromptButton()?()=>set$1(isPromptPickerOpen,!0):void 0);bind_this(ChatFormActions(node_6,{class:"px-3",get canSend(){
|
||||
return get$3(canSubmit)},get hasText(){return get$3($0)},get disabled(){return disabled()},get isLoading(){return isLoading2()},get isRecording(){return get$3(isRecording)},get uploadedFiles(){return uploadedFiles()},onFileUpload:handleFileUpload,onMicClick:handleMicClick,get onStop(){return $$props.onStop},onSystemPromptClick:()=>$$props.onSystemPromptClick?.({message:value(),files:uploadedFiles()}),get onMcpPromptClick(){return get$3($1)},onMcpResourcesClick:()=>set$1(isResourceDialogOpen,!0)}),
|
||||
$$value=>set$1(chatFormActionsRef,$$value,!0),()=>get$3(chatFormActionsRef))}reset(div_1),reset(div),reset(form);var node_7=sibling(form,2);return DialogMcpResources(node_7,{get preSelectedUri(){return get$3(preSelectedResourceUri)},onAttach:resource=>{mcpStore.attachResource(resource.uri)},onOpenChange:newOpen=>{newOpen||set$1(preSelectedResourceUri,void 0)},get open(){return get$3(isResourceDialogOpen)},set open($$value){set$1(isResourceDialogOpen,$$value,!0)}}),template_effect(()=>{set_class(
|
||||
form,1,`relative ${className()??""}`),set_class(div,1,`${INPUT_CLASSES??""} overflow-hidden rounded-3xl backdrop-blur-md ${disabled()?"cursor-not-allowed opacity-60":""}`)}),event("submit",form,e=>{e.preventDefault(),!(!get$3(canSubmit)||disabled()||get$3(hasLoadingAttachments))&&$$props.onSubmit?.()}),event("paste",div_1,handlePaste),append($$anchor,fragment),pop($$exports)}function Dropdown_menu_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),sideOffset=prop($$props,
|
||||
"sideOffset",3,4),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","sideOffset","portalProps","class"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Portal$2,($$anchor2,DropdownMenuPrimitive_Portal)=>{DropdownMenuPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=comment$2(),node_1=first_child(fragment_1);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-\
|
||||
height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=op\
|
||||
en]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node_1,()=>Dropdown_menu_content$1,($$anchor4,DropdownMenuPrimitive_Content)=>{DropdownMenuPrimitive_Content($$anchor4,spread_props({"data-slot":"dropdown-menu-content",get sideOffset(){return sideOffset()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_item($$anchor,$$props){
|
||||
push$1($$props,!0);let ref2=prop($$props,"ref",15,null),variant=prop($$props,"variant",3,"default"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","variant"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("relative flex cursor-pointer items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disab\
|
||||
led]:opacity-50 data-[inset]:pl-8 data-[variant=destructive]:text-destructive data-[variant=destructive]:data-highlighted:bg-destructive/10 data-[variant=destructive]:data-highlighted:text-destructive dark:data-[variant=destructive]:data-highlighted:bg-destructive/20 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground data-[variant=destructive]:*:[svg]:!text-destructive",$$props.class));component(node2,()=>Menu_item,
|
||||
($$anchor2,DropdownMenuPrimitive_Item)=>{DropdownMenuPrimitive_Item($$anchor2,spread_props({"data-slot":"dropdown-menu-item",get"data-inset"(){return $$props.inset},get"data-variant"(){return variant()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_separator($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events",
|
||||
"$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("-mx-1 my-1 h-px bg-border/20",$$props.class));component(node2,()=>Menu_separator,($$anchor2,DropdownMenuPrimitive_Separator)=>{DropdownMenuPrimitive_Separator($$anchor2,spread_props({"data-slot":"dropdown-menu-separator",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_trigger($$anchor,$$props){
|
||||
push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Menu_trigger,($$anchor2,DropdownMenuPrimitive_Trigger)=>{DropdownMenuPrimitive_Trigger($$anchor2,spread_props({"data-slot":"dropdown-menu-trigger"},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_sub_content($$anchor,$$props){
|
||||
push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-\
|
||||
in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node2,()=>Menu_sub_content,($$anchor2,DropdownMenuPrimitive_SubContent)=>{DropdownMenuPrimitive_SubContent($$anchor2,spread_props({"data-sl\
|
||||
ot":"dropdown-menu-sub-content",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}var root_1$D=from_html("<!> <!>",1);function Dropdown_menu_sub_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","children"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("fl\
|
||||
ex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[state=open]:bg-accent data-[state=open]:text-accent-foreground [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground",$$props.class));component(node2,()=>Menu_sub_trigger,
|
||||
($$anchor2,DropdownMenuPrimitive_SubTrigger)=>{DropdownMenuPrimitive_SubTrigger($$anchor2,spread_props({"data-slot":"dropdown-menu-sub-trigger",get"data-inset"(){return $$props.inset},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$D(),node_1=first_child(fragment_1);snippet(node_1,()=>$$props.children??noop$3);var node_2=sibling(node_1,2);Chevron_right(node_2,{class:"ml-auto size-4"}),
|
||||
append($$anchor3,fragment_1)},$$slots:{default:!0}}))})}append($$anchor,fragment),pop()}const Sub=Menu_sub,Root$2=Menu;function useAttachmentMenu(getFlags,getCallbacks,close2){const modalityFlags=user_derived(getFlags),callbacks=user_derived(()=>{const cbs=getCallbacks(),wrap2=fn=>()=>{close2(),fn?.()};return{[AttachmentAction.FILE_UPLOAD]:wrap2(cbs.onFileUpload),[AttachmentAction.SYSTEM_PROMPT_CLICK]:wrap2(cbs.onSystemPromptClick),[AttachmentAction.MCP_PROMPT_CLICK]:wrap2(cbs.onMcpPromptClick),
|
||||
[AttachmentAction.MCP_RESOURCES_CLICK]:wrap2(cbs.onMcpResourcesClick)}});function isItemEnabled(enabledWhen){return!enabledWhen||enabledWhen==="always"?!0:!!get$3(modalityFlags)[enabledWhen]}function isItemVisible(visibleWhen){return visibleWhen?!!get$3(modalityFlags)[visibleWhen]:!0}function getSystemMessageTooltip(){return page$1.params.id?"Inject custom system message at the beginning of the conversation":"Add custom system message for a new conversation"}return{get callbacks(){return get$3(callbacks)},
|
||||
isItemEnabled,isItemVisible,getSystemMessageTooltip}}var root_5$o=from_html('<span class="sr-only"> </span> <!>',1),root_6$j=from_html("<p> </p>"),root_3$u=from_html("<!> <!>",1),root_10$c=from_html("<!> <span> </span>",1),root_14$5=from_html("<!> <span> </span>",1),root_15$7=from_html("<p> </p>"),root_12$7=from_html("<!> <!>",1),root_20$4=from_html("<!> <span> </span>",1),root_21$5=from_html("<p>PDFs will be converted to text. Image-based PDFs may not work properly.</p>"),root_17$8=from_html("<\
|
||||
!> <!>",1),root_26=from_html("<!> <span> </span>",1),root_27$1=from_html("<p> </p>"),root_24$1=from_html("<!> <!>",1),root_30$1=from_html("<!> <span> </span>",1),root_7$j=from_html("<!> <!> <!> <!> <!> <!> <!>",1),root_1$C=from_html("<!> <!>",1),root$14=from_html("<div><!></div>");function ChatFormActionAttachmentsDropdown($$anchor,$$props){push$1($$props,!0);let className=prop($$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),hasAudioModality=prop($$props,"hasAudioModality",3,!1),hasVisionModality=prop(
|
||||
$$props,"hasVisionModality",3,!1),hasMcpPromptsSupport=prop($$props,"hasMcpPromptsSupport",3,!1),hasMcpResourcesSupport=prop($$props,"hasMcpResourcesSupport",3,!1),dropdownOpen=state$1(!1);function handleMcpSettingsClick(){set$1(dropdownOpen,!1),$$props.onMcpSettingsClick?.()}const attachmentMenu=useAttachmentMenu(()=>({hasVisionModality:hasVisionModality(),hasAudioModality:hasAudioModality(),hasMcpPromptsSupport:hasMcpPromptsSupport(),hasMcpResourcesSupport:hasMcpResourcesSupport()}),()=>({onFileUpload:$$props.
|
||||
onFileUpload,onSystemPromptClick:$$props.onSystemPromptClick,onMcpPromptClick:$$props.onMcpPromptClick,onMcpResourcesClick:$$props.onMcpResourcesClick}),()=>{set$1(dropdownOpen,!1)});var div=root$14(),node2=child(div);component(node2,()=>Root$2,($$anchor2,DropdownMenu_Root)=>{DropdownMenu_Root($$anchor2,{get open(){return get$3(dropdownOpen)},set open($$value){set$1(dropdownOpen,$$value,!0)},children:($$anchor3,$$slotProps)=>{var fragment=root_1$C(),node_1=first_child(fragment);component(node_1,
|
||||
()=>Dropdown_menu_trigger,($$anchor4,DropdownMenu_Trigger)=>{DropdownMenu_Trigger($$anchor4,{name:"Attach files",get disabled(){return disabled()},children:($$anchor5,$$slotProps2)=>{var fragment_1=comment$2(),node_2=first_child(fragment_1);component(node_2,()=>Root$5,($$anchor6,Tooltip_Root)=>{Tooltip_Root($$anchor6,{children:($$anchor7,$$slotProps3)=>{var fragment_2=root_3$u(),node_3=first_child(fragment_2);component(node_3,()=>Tooltip_trigger,($$anchor8,Tooltip_Trigger)=>{Tooltip_Trigger($$anchor8,
|
||||
{class:"w-full",children:($$anchor9,$$slotProps4)=>{Button($$anchor9,{class:"file-upload-button h-8 w-8 rounded-full p-0",get disabled(){return disabled()},variant:"secondary",type:"button",children:($$anchor10,$$slotProps5)=>{var fragment_4=root_5$o(),span=first_child(fragment_4),text2=child(span,!0);reset(span);var node_4=sibling(span,2);Plus(node_4,{class:"h-4 w-4"}),template_effect(()=>set_text(text2,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor10,fragment_4)},$$slots:{default:!0}})},$$slots:{default:!0}})});
|
||||
var node_5=sibling(node_3,2);component(node_5,()=>Tooltip_content,($$anchor8,Tooltip_Content)=>{Tooltip_Content($$anchor8,{children:($$anchor9,$$slotProps4)=>{var p2=root_6$j(),text_1=child(p2,!0);reset(p2),template_effect(()=>set_text(text_1,ATTACHMENT_TOOLTIP_TEXT)),append($$anchor9,p2)},$$slots:{default:!0}})}),append($$anchor7,fragment_2)},$$slots:{default:!0}})}),append($$anchor5,fragment_1)},$$slots:{default:!0}})});var node_6=sibling(node_1,2);component(node_6,()=>Dropdown_menu_content,($$anchor4,DropdownMenu_Content)=>{
|
||||
DropdownMenu_Content($$anchor4,{align:"start",class:"w-48",children:($$anchor5,$$slotProps2)=>{var fragment_5=root_7$j(),node_7=first_child(fragment_5);each(node_7,17,()=>ATTACHMENT_FILE_ITEMS,item=>item.id,($$anchor6,item)=>{const enabled=user_derived(()=>attachmentMenu.isItemEnabled(get$3(item).enabledWhen));var fragment_6=comment$2(),node_8=first_child(fragment_6);{var consequent=$$anchor7=>{var fragment_7=comment$2(),node_9=first_child(fragment_7);{let $0=user_derived(()=>get$3(item).class??
|
||||
"");component(node_9,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item)=>{DropdownMenu_Item($$anchor8,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor9,$$slotProps3)=>{var fragment_8=root_10$c(),node_10=first_child(fragment_8);component(node_10,()=>get$3(item).icon,($$anchor10,item_icon)=>{item_icon($$anchor10,{class:"h-4 w-4"})});var span_1=sibling(node_10,2),text_2=child(span_1,!0);reset(
|
||||
span_1),template_effect(()=>set_text(text_2,get$3(item).label)),append($$anchor9,fragment_8)},$$slots:{default:!0}})})}append($$anchor7,fragment_7)},consequent_1=$$anchor7=>{var fragment_9=comment$2(),node_11=first_child(fragment_9);component(node_11,()=>Root$5,($$anchor8,Tooltip_Root_1)=>{Tooltip_Root_1($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_10=root_12$7(),node_12=first_child(fragment_10);component(node_12,()=>Tooltip_trigger,
|
||||
($$anchor10,Tooltip_Trigger_1)=>{Tooltip_Trigger_1($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_11=comment$2(),node_13=first_child(fragment_11);{let $0=user_derived(()=>get$3(item).class??"");component(node_13,()=>Dropdown_menu_item,($$anchor12,DropdownMenu_Item_1)=>{DropdownMenu_Item_1($$anchor12,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},disabled:!0,children:($$anchor13,$$slotProps5)=>{var fragment_12=root_14$5(),node_14=first_child(
|
||||
fragment_12);component(node_14,()=>get$3(item).icon,($$anchor14,item_icon_1)=>{item_icon_1($$anchor14,{class:"h-4 w-4"})});var span_2=sibling(node_14,2),text_3=child(span_2,!0);reset(span_2),template_effect(()=>set_text(text_3,get$3(item).label)),append($$anchor13,fragment_12)},$$slots:{default:!0}})})}append($$anchor11,fragment_11)},$$slots:{default:!0}})});var node_15=sibling(node_12,2);component(node_15,()=>Tooltip_content,($$anchor10,Tooltip_Content_1)=>{Tooltip_Content_1($$anchor10,{side:"r\
|
||||
ight",children:($$anchor11,$$slotProps4)=>{var p_1=root_15$7(),text_4=child(p_1,!0);reset(p_1),template_effect(()=>set_text(text_4,get$3(item).disabledTooltip)),append($$anchor11,p_1)},$$slots:{default:!0}})}),append($$anchor9,fragment_10)},$$slots:{default:!0}})}),append($$anchor7,fragment_9)};if_block(node_8,$$render=>{get$3(enabled)?$$render(consequent):get$3(item).disabledTooltip&&$$render(consequent_1,1)})}append($$anchor6,fragment_6)});var node_16=sibling(node_7,2);{var consequent_3=$$anchor6=>{
|
||||
var fragment_13=comment$2(),node_17=first_child(fragment_13);component(node_17,()=>Root$5,($$anchor7,Tooltip_Root_2)=>{Tooltip_Root_2($$anchor7,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor8,$$slotProps3)=>{var fragment_14=root_17$8(),node_18=first_child(fragment_14);component(node_18,()=>Tooltip_trigger,($$anchor9,Tooltip_Trigger_2)=>{Tooltip_Trigger_2($$anchor9,{class:"w-full",children:($$anchor10,$$slotProps4)=>{var fragment_15=comment$2(),node_19=first_child(fragment_15);
|
||||
component(node_19,()=>Dropdown_menu_item,($$anchor11,DropdownMenu_Item_2)=>{DropdownMenu_Item_2($$anchor11,{class:"flex cursor-pointer items-center gap-2",get onclick(){return attachmentMenu.callbacks.onFileUpload},children:($$anchor12,$$slotProps5)=>{const pdfItem=user_derived(()=>ATTACHMENT_FILE_ITEMS.find(i=>i.id===AttachmentMenuItemId.PDF));var fragment_16=comment$2(),node_20=first_child(fragment_16);{var consequent_2=$$anchor13=>{var fragment_17=root_20$4(),node_21=first_child(fragment_17);
|
||||
component(node_21,()=>get$3(pdfItem).icon,($$anchor14,pdfItem_icon)=>{pdfItem_icon($$anchor14,{class:"h-4 w-4"})});var span_3=sibling(node_21,2),text_5=child(span_3,!0);reset(span_3),template_effect(()=>set_text(text_5,get$3(pdfItem).label)),append($$anchor13,fragment_17)};if_block(node_20,$$render=>{get$3(pdfItem)&&$$render(consequent_2)})}append($$anchor12,fragment_16)},$$slots:{default:!0}})}),append($$anchor10,fragment_15)},$$slots:{default:!0}})});var node_22=sibling(node_18,2);component(node_22,
|
||||
|
||||
@@ -354,6 +354,7 @@ struct server_slot {
|
||||
|
||||
// generate a new draft
|
||||
spec_draft = common_speculative_draft(spec.get(), params_spec, tokens, sampled);
|
||||
n_draft_total += spec_draft.size();
|
||||
|
||||
if (spec_draft.size() > (size_t) n_draft_max) {
|
||||
SLT_WRN(*this, "draft size %d exceeds max %d, truncating\n", (int) spec_draft.size(), n_draft_max);
|
||||
@@ -679,6 +680,7 @@ private:
|
||||
// slots / clients
|
||||
std::vector<server_slot> slots;
|
||||
|
||||
int trace = 0;
|
||||
int slots_debug = 0;
|
||||
int n_empty_consecutive = 0;
|
||||
|
||||
@@ -917,12 +919,21 @@ private:
|
||||
slot.reset();
|
||||
}
|
||||
|
||||
{
|
||||
const char * LLAMA_TRACE = getenv("LLAMA_TRACE");
|
||||
trace = LLAMA_TRACE ? atoi(LLAMA_TRACE) : 0;
|
||||
|
||||
if (trace) {
|
||||
SRV_WRN("LLAMA_TRACE = %d\n", trace);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const char * LLAMA_SERVER_SLOTS_DEBUG = getenv("LLAMA_SERVER_SLOTS_DEBUG");
|
||||
slots_debug = LLAMA_SERVER_SLOTS_DEBUG ? atoi(LLAMA_SERVER_SLOTS_DEBUG) : 0;
|
||||
|
||||
if (slots_debug) {
|
||||
SRV_WRN("slots debug = %d\n", slots_debug);
|
||||
SRV_WRN("LLAMA_SERVER_SLOTS_DEBUG = %d\n", slots_debug);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2973,13 +2984,15 @@ private:
|
||||
auto accepted = common_sampler_sample_and_accept_n(slot.smpl.get(), slot.ctx, slot.spec_i_batch, slot.spec_draft);
|
||||
slot.spec_i_batch.clear();
|
||||
|
||||
SLT_DBG(slot, "%s: n_draft=%zu, accepted=%zu\n", __func__, slot.spec_draft.size(), accepted.size());
|
||||
|
||||
GGML_ASSERT(accepted.size() >= 1);
|
||||
|
||||
// check for partial draft acceptance
|
||||
if (accepted.size() < slot.spec_draft.size() + 1) {
|
||||
if (use_ckpt) {
|
||||
if (trace > 0) {
|
||||
SLT_INF(slot, "accepted %2zu/%2zu draft tokens (restore checkpoint)\n", accepted.size() - 1, slot.spec_draft.size());
|
||||
}
|
||||
|
||||
// partial acceptance is not supported by the context -> truncate the draft and restore the state
|
||||
slot.spec_draft = std::move(accepted);
|
||||
|
||||
@@ -3001,8 +3014,10 @@ private:
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DBG("%s: partial acceptance: %zu < %zu\n", __func__, accepted.size(), slot.spec_draft.size());
|
||||
if (trace > 0) {
|
||||
SLT_INF(slot, "accepted %2zu/%2zu draft tokens\n", accepted.size() - 1, n_draft);
|
||||
}
|
||||
|
||||
common_speculative_accept(slot.spec.get(), accepted.size() - 1);
|
||||
@@ -3019,7 +3034,6 @@ private:
|
||||
|
||||
// update how many tokens out of those tested were accepted
|
||||
slot.n_draft_accepted += ids.size() - 1;
|
||||
slot.n_draft_total += n_draft;
|
||||
|
||||
// add accepted tokens to the prompt
|
||||
slot.prompt.tokens.keep_first(slot.prompt.n_tokens() - n_draft);
|
||||
|
||||
@@ -527,16 +527,15 @@
|
||||
}
|
||||
|
||||
if (isRecording) {
|
||||
isRecording = false;
|
||||
try {
|
||||
const audioBlob = await audioRecorder.stopRecording();
|
||||
const wavBlob = await convertToWav(audioBlob);
|
||||
const audioFile = createAudioFile(wavBlob);
|
||||
|
||||
onFilesAdd?.([audioFile]);
|
||||
isRecording = false;
|
||||
} catch (error) {
|
||||
console.error('Failed to stop recording:', error);
|
||||
isRecording = false;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
|
||||
@@ -43,27 +43,48 @@ export class AudioRecorder {
|
||||
|
||||
async stopRecording(): Promise<Blob> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive') {
|
||||
const recorder = this.mediaRecorder;
|
||||
const chunks = this.audioChunks;
|
||||
const stream = this.stream;
|
||||
|
||||
if (!recorder || recorder.state === 'inactive') {
|
||||
reject(new Error('No active recording to stop'));
|
||||
return;
|
||||
}
|
||||
|
||||
this.mediaRecorder.onstop = () => {
|
||||
const mimeType = this.mediaRecorder?.mimeType || MimeTypeAudio.WAV;
|
||||
const audioBlob = new Blob(this.audioChunks, { type: mimeType });
|
||||
// Detach instance state right away so a new startRecording can take over without race
|
||||
this.mediaRecorder = null;
|
||||
this.audioChunks = [];
|
||||
this.stream = null;
|
||||
this.recordingState = false;
|
||||
|
||||
this.cleanup();
|
||||
recorder.onstop = () => {
|
||||
const audioBlob = new Blob(chunks, {
|
||||
type: recorder.mimeType || MimeTypeAudio.WAV
|
||||
});
|
||||
|
||||
if (stream) {
|
||||
for (const track of stream.getTracks()) {
|
||||
track.stop();
|
||||
}
|
||||
}
|
||||
|
||||
resolve(audioBlob);
|
||||
};
|
||||
|
||||
this.mediaRecorder.onerror = (event) => {
|
||||
recorder.onerror = (event) => {
|
||||
console.error('Recording error:', event);
|
||||
this.cleanup();
|
||||
|
||||
if (stream) {
|
||||
for (const track of stream.getTracks()) {
|
||||
track.stop();
|
||||
}
|
||||
}
|
||||
|
||||
reject(new Error('Recording failed'));
|
||||
};
|
||||
|
||||
this.mediaRecorder.stop();
|
||||
recorder.stop();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -72,10 +93,26 @@ export class AudioRecorder {
|
||||
}
|
||||
|
||||
cancelRecording(): void {
|
||||
if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
|
||||
this.mediaRecorder.stop();
|
||||
const recorder = this.mediaRecorder;
|
||||
const stream = this.stream;
|
||||
|
||||
this.mediaRecorder = null;
|
||||
this.audioChunks = [];
|
||||
this.stream = null;
|
||||
this.recordingState = false;
|
||||
|
||||
if (recorder && recorder.state !== 'inactive') {
|
||||
// Drop the original handlers so the pending stop event does not touch the instance
|
||||
recorder.onstop = null;
|
||||
recorder.onerror = null;
|
||||
recorder.stop();
|
||||
}
|
||||
|
||||
if (stream) {
|
||||
for (const track of stream.getTracks()) {
|
||||
track.stop();
|
||||
}
|
||||
}
|
||||
this.cleanup();
|
||||
}
|
||||
|
||||
private initializeRecorder(stream: MediaStream): void {
|
||||
@@ -110,19 +147,6 @@ export class AudioRecorder {
|
||||
this.recordingState = false;
|
||||
};
|
||||
}
|
||||
|
||||
private cleanup(): void {
|
||||
if (this.stream) {
|
||||
for (const track of this.stream.getTracks()) {
|
||||
track.stop();
|
||||
}
|
||||
|
||||
this.stream = null;
|
||||
}
|
||||
this.mediaRecorder = null;
|
||||
this.audioChunks = [];
|
||||
this.recordingState = false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function convertToWav(audioBlob: Blob): Promise<Blob> {
|
||||
@@ -136,13 +160,12 @@ export async function convertToWav(audioBlob: Blob): Promise<Blob> {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
|
||||
|
||||
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
||||
|
||||
const wavBlob = audioBufferToWav(audioBuffer);
|
||||
|
||||
audioContext.close();
|
||||
|
||||
return wavBlob;
|
||||
try {
|
||||
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
||||
return audioBufferToWav(audioBuffer);
|
||||
} finally {
|
||||
audioContext.close();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to convert audio to WAV:', error);
|
||||
return audioBlob;
|
||||
@@ -182,12 +205,20 @@ function audioBufferToWav(buffer: AudioBuffer): Blob {
|
||||
writeString(36, 'data'); // Subchunk2ID
|
||||
view.setUint32(40, dataSize, true); // Subchunk2Size
|
||||
|
||||
let offset = 44;
|
||||
// Cache channel arrays, write PCM via Int16Array (native little-endian, matches WAV)
|
||||
const channels: Float32Array[] = new Array(numberOfChannels);
|
||||
for (let c = 0; c < numberOfChannels; c++) {
|
||||
channels[c] = buffer.getChannelData(c);
|
||||
}
|
||||
|
||||
const pcm = new Int16Array(arrayBuffer, 44, length * numberOfChannels);
|
||||
let p = 0;
|
||||
for (let i = 0; i < length; i++) {
|
||||
for (let channel = 0; channel < numberOfChannels; channel++) {
|
||||
const sample = Math.max(-1, Math.min(1, buffer.getChannelData(channel)[i]));
|
||||
view.setInt16(offset, sample * 0x7fff, true);
|
||||
offset += 2;
|
||||
for (let c = 0; c < numberOfChannels; c++) {
|
||||
let s = channels[c][i];
|
||||
if (s > 1) s = 1;
|
||||
else if (s < -1) s = -1;
|
||||
pcm[p++] = s * 0x7fff;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user