Compare commits

...

3 Commits
b6089 ... b6092

Author SHA1 Message Date
Juk Armstrong
c81de6e107 Fix glm4moe bug (#15088) 2025-08-05 13:56:44 +01:00
Alex Wu
22f060c9c4 webui: fix markdown table (#15081)
* webui: fix markdown table

* webui: fix table display with themes
2025-08-05 13:56:44 +02:00
compilade
ee3a9fcf88 context : fix index overflow on huge outputs (#15080)
* context : fix overflow when re-ordering huge outputs

* context : fix logits size overflow for huge batches
2025-08-05 11:27:45 +02:00
4 changed files with 26 additions and 13 deletions

View File

@@ -786,7 +786,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
const auto & hparams = model.hparams;
const int64_t n_embd = hparams.n_embd;
const int32_t n_vocab = model.vocab.n_tokens();
const int64_t n_vocab = model.vocab.n_tokens();
// note: during encode, we always pass the full sequence starting from pos = 0
if (!balloc->init(batch_inp, model.vocab, nullptr, n_embd, cparams.kv_unified ? LLAMA_MAX_SEQ : cparams.n_seq_max, true)) {
@@ -959,7 +959,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
const auto & vocab = model.vocab;
const auto & hparams = model.hparams;
const int32_t n_vocab = vocab.n_tokens();
const int64_t n_vocab = vocab.n_tokens();
const int64_t n_embd = hparams.n_embd;
// when computing embeddings, all tokens are output
@@ -1328,21 +1328,21 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
}
void llama_context::output_reorder() {
const uint32_t n_vocab = model.vocab.n_tokens();
const uint64_t n_vocab = model.vocab.n_tokens();
const uint64_t n_embd = model.hparams.n_embd;
for (uint32_t s = 0; s < output_swaps.size(); ++s) {
const uint32_t i0 = output_swaps[s].i0;
const uint32_t i1 = output_swaps[s].i1;
for (size_t s = 0; s < output_swaps.size(); ++s) {
const uint64_t i0 = output_swaps[s].i0;
const uint64_t i1 = output_swaps[s].i1;
if (logits_size > 0) {
for (uint32_t k = 0; k < n_vocab; k++) {
for (uint64_t k = 0; k < n_vocab; k++) {
std::swap(logits[i0*n_vocab + k], logits[i1*n_vocab + k]);
}
}
if (embd_size > 0) {
for (uint32_t k = 0; k < n_embd; k++) {
for (uint64_t k = 0; k < n_embd; k++) {
std::swap(embd[i0*n_embd + k], embd[i1*n_embd + k]);
}
}

View File

@@ -13800,10 +13800,6 @@ struct llm_build_glm4_moe : public llm_graph_context {
LLM_FFN_SILU, LLM_FFN_PAR, il);
cb(cur, "ffn_out", il);
} else {
// MoE layer with shared experts
const int64_t n_expert = hparams.n_expert;
const int64_t n_expert_used = hparams.n_expert_used;
// Process routed experts using existing MoE infrastructure
ggml_tensor * routed_out = build_moe_ffn(cur,
model.layers[il].ffn_gate_inp,

Binary file not shown.

View File

@@ -31,7 +31,24 @@ html {
hr {
@apply my-4 border-base-content/20 border-1;
}
/* TODO: fix markdown table */
table {
@apply w-full border-collapse text-sm font-sans my-4 text-base-content;
}
thead {
@apply bg-base-200 text-base-content;
}
th {
@apply border border-base-300 px-4 py-2 text-left font-semibold;
}
td {
@apply border border-base-300 px-4 py-2 align-top;
}
tbody tr:nth-child(even) {
@apply bg-base-100;
}
tbody tr:hover {
@apply bg-base-200;
}
}
.btn-mini {