mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-07-04 21:23:03 +02:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 665892536d | |||
| ef2d770117 | |||
| 2d973636e2 | |||
| d4cff114c0 | |||
| f113e02d5a | |||
| 152d337fad | |||
| 75a48a9055 | |||
| 067de93718 |
+27
-1
@@ -2378,6 +2378,23 @@ static void func_args_not_string(json & messages) {
|
||||
}
|
||||
}
|
||||
|
||||
// Trim leading/trailing whitespace from message contents before rendering. This
|
||||
// has to run on the messages (not on the rendered JSON) because templates with
|
||||
// string-only content caps concatenate typed content parts into a single string
|
||||
// during rendering, after which the per-part whitespace can no longer be reached.
|
||||
// Both the plain string content and the text of typed content parts are trimmed.
|
||||
static void trim_all_content(std::vector<common_chat_msg> & messages) {
|
||||
for (auto & message : messages) {
|
||||
message.content = trim_whitespace(message.content);
|
||||
message.reasoning_content = trim_whitespace(message.reasoning_content);
|
||||
for (auto & part : message.content_parts) {
|
||||
if (part.type == "text") {
|
||||
part.text = trim_whitespace(part.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// MiniCPM5 format:
|
||||
@@ -2634,7 +2651,16 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
|
||||
params.tools.is_array() && tmpls->template_tool_use ? *tmpls->template_tool_use : *tmpls->template_default;
|
||||
const auto & src = tmpl.source();
|
||||
const auto & caps = tmpl.original_caps();
|
||||
params.messages = render_message_to_json(inputs.messages, tmpl.original_caps());
|
||||
std::vector<common_chat_msg> trimmed_messages;
|
||||
const std::vector<common_chat_msg> * messages_to_render = &inputs.messages;
|
||||
if (src.find("You have access to the following functions in JSONSchema format") != std::string::npos) {
|
||||
// StepFun: trim message contents (including typed content parts) before rendering,
|
||||
// otherwise leftover whitespace drives the model into reasoning loops (issue #24181)
|
||||
trimmed_messages = inputs.messages;
|
||||
workaround::trim_all_content(trimmed_messages);
|
||||
messages_to_render = &trimmed_messages;
|
||||
}
|
||||
params.messages = render_message_to_json(*messages_to_render, tmpl.original_caps());
|
||||
params.tool_choice = inputs.tool_choice;
|
||||
params.reasoning_format = inputs.reasoning_format;
|
||||
params.enable_thinking = inputs.enable_thinking;
|
||||
|
||||
+14
-5
@@ -955,10 +955,11 @@ struct common_speculative_impl_draft_dflash : public common_speculative_impl {
|
||||
LOG_INF("%s: - block_size=%d, mask_token_id=%d, n_extract=%u\n", __func__, block_size, mask_token_id, target_layer_ids_n);
|
||||
|
||||
// DFlash input is [id_last, <mask> * (block_size-1)], so it can draft at most block_size-1 tokens per step
|
||||
if (this->params.n_max > block_size - 1) {
|
||||
LOG_WRN("%s: requested draft size %d exceeds the trained DFlash block size %d -- clamping to %d draft tokens per step\n",
|
||||
__func__, this->params.n_max, block_size - 1, block_size - 1);
|
||||
this->params.n_max = block_size - 1;
|
||||
if (this->params.n_max > block_size - 1 || this->params.n_min > block_size - 1) {
|
||||
LOG_WRN("%s: requested draft size (n_max=%d, n_min=%d) exceeds the trained DFlash block size %d -- clamping to %d\n",
|
||||
__func__, this->params.n_max, this->params.n_min, block_size, block_size - 1);
|
||||
this->params.n_max = std::min(this->params.n_max, block_size - 1);
|
||||
this->params.n_min = std::min(this->params.n_min, block_size - 1);
|
||||
}
|
||||
|
||||
batch = llama_batch_init(llama_n_batch(ctx_dft), 0, n_seq);
|
||||
@@ -968,7 +969,7 @@ struct common_speculative_impl_draft_dflash : public common_speculative_impl {
|
||||
for (auto & s : smpls) {
|
||||
common_params_sampling sparams;
|
||||
sparams.no_perf = false;
|
||||
sparams.top_k = 1;
|
||||
sparams.top_k = 10;
|
||||
sparams.samplers = { COMMON_SAMPLER_TYPE_TOP_K };
|
||||
s.reset(common_sampler_init(model_dft, sparams));
|
||||
}
|
||||
@@ -1173,10 +1174,18 @@ struct common_speculative_impl_draft_dflash : public common_speculative_impl {
|
||||
|
||||
const llama_token id = cur_p->data[0].id;
|
||||
|
||||
if (cur_p->data[0].p < params.p_min) {
|
||||
break;
|
||||
}
|
||||
|
||||
common_sampler_accept(smpl, id, true);
|
||||
|
||||
result.push_back(id);
|
||||
}
|
||||
|
||||
if (result.size() < (size_t) params.n_min) {
|
||||
result.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1913,7 +1913,11 @@ static void ggml_compute_forward_concat_any(
|
||||
GGML_ASSERT(dim >= 0 && dim < 4);
|
||||
|
||||
int64_t o[4] = {0, 0, 0, 0};
|
||||
o[dim] = src0->ne[dim];
|
||||
if (dim == 0) {
|
||||
o[dim] = src0->ne[dim]/ggml_blck_size(src0->type);
|
||||
} else {
|
||||
o[dim] = src0->ne[dim];
|
||||
}
|
||||
|
||||
const char * x;
|
||||
|
||||
@@ -1921,8 +1925,8 @@ static void ggml_compute_forward_concat_any(
|
||||
for (int i3 = 0; i3 < ne3; i3++) {
|
||||
for (int i2 = ith; i2 < ne2; i2 += nth) {
|
||||
for (int i1 = 0; i1 < ne1; i1++) {
|
||||
for (int i0 = 0; i0 < ne0; i0++) {
|
||||
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
||||
for (int i0 = 0; i0 < ne0/ggml_blck_size(dst->type); i0++) {
|
||||
if (i0 < ne00/ggml_blck_size(src0->type) && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
||||
x = (const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03;
|
||||
} else {
|
||||
x = (const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13;
|
||||
@@ -2071,6 +2075,14 @@ void ggml_compute_forward_concat(
|
||||
ggml_tensor * dst) {
|
||||
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
if (ggml_is_quantized(src0->type)) {
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
GGML_ASSERT(ggml_is_contiguous(src1));
|
||||
GGML_ASSERT(src0->ne[0] % ggml_blck_size(src0->type) == 0);
|
||||
GGML_ASSERT(src1->ne[0] % ggml_blck_size(src1->type) == 0);
|
||||
}
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F16:
|
||||
|
||||
@@ -312,6 +312,10 @@ static void launch_topk_moe_cuda(ggml_backend_cuda_context & ctx,
|
||||
ggml_cuda_kernel_launch(topk_moe_cuda<256, has_bias>, launch_params,
|
||||
logits, weights, ids, bias, n_rows, n_expert_used, clamp_val, scale_val, config);
|
||||
break;
|
||||
case 288: // StepFun 3.7
|
||||
ggml_cuda_kernel_launch(topk_moe_cuda<288, has_bias>, launch_params,
|
||||
logits, weights, ids, bias, n_rows, n_expert_used, clamp_val, scale_val, config);
|
||||
break;
|
||||
case 512:
|
||||
ggml_cuda_kernel_launch(topk_moe_cuda<512, has_bias>, launch_params,
|
||||
logits, weights, ids, bias, n_rows, n_expert_used, clamp_val, scale_val, config);
|
||||
@@ -377,8 +381,10 @@ bool ggml_cuda_should_use_topk_moe(const ggml_tensor * gating_op,
|
||||
const ggml_tensor * weights,
|
||||
const ggml_tensor * logits,
|
||||
const ggml_tensor * ids) {
|
||||
// must match an instantiation of launch_topk_moe_cuda: a power of 2 up to 512,
|
||||
// or one of the non-power-of-2 expert counts of supported models
|
||||
const int n_expert = ids->nb[1] / ids->nb[0];
|
||||
if (((n_expert & (n_expert - 1)) != 0 || n_expert > 512) && n_expert != 576) {
|
||||
if (((n_expert & (n_expert - 1)) != 0 || n_expert > 512) && n_expert != 288 && n_expert != 576) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}<im_patch>{% endif %}{% endfor %}{% endif %}{% endmacro %}
|
||||
{{bos_token}}{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- render_content(messages[0].content) + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson(ensure_ascii=False) }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('<tool_response>') and render_content(message.content).endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- set content = render_content(message.content) %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
|
||||
{{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = render_content(message.reasoning_content) %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- else %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
||||
{%- if tool_call.arguments is defined %}
|
||||
{%- set arguments = tool_call.arguments %}
|
||||
{%- for args_name, args_value in arguments|items %}
|
||||
{{- '<parameter=' + args_name + '>\n' }}
|
||||
{%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
|
||||
{{- args_value }}
|
||||
{{- '\n</parameter>\n' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '</function>\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>tool_response\n' }}
|
||||
{%- endif %}
|
||||
{{- '<tool_response>' }}
|
||||
{{- content }}
|
||||
{{- '</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n<think>\n' }}
|
||||
{%- endif %}
|
||||
@@ -8918,6 +8918,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
}
|
||||
}
|
||||
|
||||
for (ggml_type type_a : { GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0 }) {
|
||||
for (int dim : { 0, 1, 2, 3, }) {
|
||||
test_cases.emplace_back(new test_concat(type_a, {128, 12, 13, 14}, dim == 0 ? 256 : 7, dim, 0));
|
||||
}
|
||||
}
|
||||
|
||||
for (ggml_sort_order order : {GGML_SORT_ORDER_ASC, GGML_SORT_ORDER_DESC}) {
|
||||
for (uint32_t i = 4; i <= 1024*1024; i *= 2) {
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {i-1, 1, 1, 1}));
|
||||
@@ -9219,6 +9225,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_topk_moe({128, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
|
||||
test_cases.emplace_back(new test_topk_moe({129, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
|
||||
test_cases.emplace_back(new test_topk_moe({160, 4, 1, 1}, 160, with_norm, bias_probs, gate, scale_w));
|
||||
test_cases.emplace_back(new test_topk_moe({288, 22, 1, 1}, 8, with_norm, bias_probs, gate, scale_w)); // Used by StepFun 3.7
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1887,7 +1887,6 @@ static void test_role_markers_all_templates(testing & t) {
|
||||
{ "Qwen-Qwen3-0.6B.jinja", "<|im_start|>user", "<|im_start|>assistant" },
|
||||
{ "Qwen-QwQ-32B.jinja", "<|im_start|>user", "<|im_start|>assistant" },
|
||||
{ "StepFun3.5-Flash.jinja", "<|im_start|>user", "<|im_start|>assistant" },
|
||||
{ "stepfun-ai-Step-3.5-Flash.jinja", "<|im_start|>user", "<|im_start|>assistant" },
|
||||
|
||||
// DeepSeek family
|
||||
{ "deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", "<|User|>", "<|Assistant|>" },
|
||||
|
||||
@@ -3155,6 +3155,59 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// StepFun trimming regression test (see https://github.com/ggml-org/llama.cpp/pull/25238)
|
||||
auto tmpls = read_templates("models/templates/StepFun3.5-Flash.jinja");
|
||||
|
||||
common_chat_msg message_chatbot = simple_assist_msg("Let me check.\n\n", "I am thinking.\n\n");
|
||||
|
||||
{
|
||||
common_chat_templates_inputs inputs;
|
||||
inputs.messages = { message_chatbot };
|
||||
inputs.add_generation_prompt = true;
|
||||
|
||||
auto params = common_chat_templates_apply(tmpls.get(), inputs);
|
||||
|
||||
if (params.prompt.find("Let me check.\n\n") != std::string::npos) {
|
||||
throw std::runtime_error("StepFun 3.5: content not trimmed");
|
||||
}
|
||||
|
||||
if (params.prompt.find("I am thinking.\n\n") != std::string::npos) {
|
||||
throw std::runtime_error("StepFun 3.5: reasoning_content not trimmed");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Trimming must also reach typed (text) content parts, not just string content
|
||||
// (see https://github.com/ggml-org/llama.cpp/pull/25238)
|
||||
common_chat_msg message_parts;
|
||||
message_parts.role = "user";
|
||||
message_parts.content_parts = {
|
||||
{ /* .type = */ "text", /* .text = */ "First part.\n\n" },
|
||||
{ /* .type = */ "media_marker", /* .text = */ "<__media__>" },
|
||||
{ /* .type = */ "text", /* .text = */ "Second part.\n\n" },
|
||||
};
|
||||
|
||||
common_chat_templates_inputs inputs;
|
||||
inputs.messages = { message_parts };
|
||||
inputs.add_generation_prompt = true;
|
||||
|
||||
auto params = common_chat_templates_apply(tmpls.get(), inputs);
|
||||
|
||||
if (params.prompt.find("First part.\n\n") != std::string::npos ||
|
||||
params.prompt.find("Second part.\n\n") != std::string::npos) {
|
||||
throw std::runtime_error("StepFun 3.5: text content parts not trimmed");
|
||||
}
|
||||
|
||||
// the trimmed text itself must still be present
|
||||
if (params.prompt.find("First part.") == std::string::npos ||
|
||||
params.prompt.find("Second part.") == std::string::npos) {
|
||||
throw std::runtime_error("StepFun 3.5: text content parts missing after trim");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
@@ -20,9 +20,9 @@
|
||||
agenticInjectSteeringMessage
|
||||
} from '$lib/stores/agentic.svelte';
|
||||
import {
|
||||
buildSiblingInfoMap,
|
||||
copyToClipboard,
|
||||
formatMessageForClipboard,
|
||||
getMessageSiblings,
|
||||
hasAgenticContent
|
||||
} from '$lib/utils';
|
||||
|
||||
@@ -169,6 +169,8 @@
|
||||
});
|
||||
});
|
||||
|
||||
let siblingInfoByMessageId = $derived(buildSiblingInfoMap(allConversationMessages));
|
||||
|
||||
let displayMessages = $derived.by(() => {
|
||||
if (!messages.length) {
|
||||
return [];
|
||||
@@ -223,18 +225,18 @@
|
||||
}
|
||||
}
|
||||
|
||||
const siblingInfo = getMessageSiblings(allConversationMessages, msg.id);
|
||||
const siblingInfo = siblingInfoByMessageId.get(msg.id) ?? {
|
||||
message: msg,
|
||||
siblingIds: [msg.id],
|
||||
currentIndex: 0,
|
||||
totalSiblings: 1
|
||||
};
|
||||
|
||||
result.push({
|
||||
message: msg,
|
||||
toolMessages,
|
||||
isLastAssistantMessage: false,
|
||||
siblingInfo: siblingInfo || {
|
||||
message: msg,
|
||||
siblingIds: [msg.id],
|
||||
currentIndex: 0,
|
||||
totalSiblings: 1
|
||||
}
|
||||
siblingInfo
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -37,3 +37,8 @@ export const MODEL_ACTIVATED_PARAMS_RE = /^[Aa]\d+(\.\d+)?[BbMmKkTt]$/;
|
||||
* Container format segments to exclude from tags (every model uses these).
|
||||
*/
|
||||
export const MODEL_IGNORED_SEGMENTS = new Set(['GGUF', 'GGML']);
|
||||
|
||||
/**
|
||||
* Matches a trailing weight file extension, e.g. `model.gguf` -> `model`.
|
||||
*/
|
||||
export const MODEL_WEIGHT_EXTENSION_RE = /\.(gguf|ggml)$/i;
|
||||
|
||||
@@ -69,7 +69,6 @@ export const SETTINGS_KEYS = {
|
||||
// Developer
|
||||
DISABLE_REASONING_PARSING: 'disableReasoningParsing',
|
||||
EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext',
|
||||
ENABLE_THINKING: 'enableThinking',
|
||||
SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch',
|
||||
// PY_INTERPRETER_ENABLED: 'pyInterpreterEnabled',
|
||||
JS_SANDBOX_ENABLED: 'jsSandboxEnabled',
|
||||
|
||||
@@ -185,7 +185,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
defaultValue: false,
|
||||
type: SettingsFieldType.CHECKBOX,
|
||||
section: SETTINGS_SECTION_SLUGS.GENERAL,
|
||||
isExperimental: true
|
||||
isExperimental: true,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.TITLE_GENERATION_USE_LLM,
|
||||
paramType: SyncableParameterType.BOOLEAN
|
||||
}
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.TITLE_GENERATION_PROMPT,
|
||||
@@ -193,7 +197,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
help: 'Optional template for the title generation prompt. Use {{USER}} for the user message and {{ASSISTANT}} for the assistant message.',
|
||||
defaultValue: TITLE_GENERATION.DEFAULT_PROMPT,
|
||||
type: SettingsFieldType.TEXTAREA,
|
||||
section: SETTINGS_SECTION_SLUGS.GENERAL
|
||||
section: SETTINGS_SECTION_SLUGS.GENERAL,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.TITLE_GENERATION_PROMPT,
|
||||
paramType: SyncableParameterType.STRING
|
||||
}
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.MAX_IMAGE_RESOLUTION,
|
||||
@@ -201,7 +209,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
help: 'Images larger than this will be resized before sending to server. Set to 0 to disable.',
|
||||
defaultValue: 0,
|
||||
type: SettingsFieldType.INPUT,
|
||||
section: SETTINGS_SECTION_SLUGS.GENERAL
|
||||
section: SETTINGS_SECTION_SLUGS.GENERAL,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.MAX_IMAGE_RESOLUTION,
|
||||
paramType: SyncableParameterType.NUMBER
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -385,7 +397,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
help: 'Display the current build version in the bottom-right corner of the interface.',
|
||||
defaultValue: false,
|
||||
type: SettingsFieldType.CHECKBOX,
|
||||
section: SETTINGS_SECTION_SLUGS.DISPLAY
|
||||
section: SETTINGS_SECTION_SLUGS.DISPLAY,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.SHOW_BUILD_VERSION,
|
||||
paramType: SyncableParameterType.BOOLEAN
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -669,7 +685,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
help: 'After each response, re-submit the conversation to pre-fill the server KV cache. Makes the next turn faster since the prompt is already encoded while you read the response.',
|
||||
defaultValue: false,
|
||||
type: SettingsFieldType.CHECKBOX,
|
||||
section: SETTINGS_SECTION_SLUGS.DEVELOPER
|
||||
section: SETTINGS_SECTION_SLUGS.DEVELOPER,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.PRE_ENCODE_CONVERSATION,
|
||||
paramType: SyncableParameterType.BOOLEAN
|
||||
}
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.DISABLE_REASONING_PARSING,
|
||||
@@ -677,7 +697,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
help: 'Send reasoning_format=none so the server returns thinking tokens inline instead of extracting them into a separate field.',
|
||||
defaultValue: false,
|
||||
type: SettingsFieldType.CHECKBOX,
|
||||
section: SETTINGS_SECTION_SLUGS.DEVELOPER
|
||||
section: SETTINGS_SECTION_SLUGS.DEVELOPER,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.DISABLE_REASONING_PARSING,
|
||||
paramType: SyncableParameterType.BOOLEAN
|
||||
}
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT,
|
||||
@@ -691,14 +715,6 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
paramType: SyncableParameterType.BOOLEAN
|
||||
}
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.ENABLE_THINKING,
|
||||
label: 'Enable thinking',
|
||||
help: 'Enable model thinking/reasoning for each request. When off, the model will skip the thinking phase and go straight to the response.',
|
||||
defaultValue: false,
|
||||
type: SettingsFieldType.CHECKBOX,
|
||||
section: SETTINGS_SECTION_SLUGS.DEVELOPER
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH,
|
||||
label: 'Enable raw output toggle',
|
||||
@@ -717,7 +733,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
help: 'Expose a run_javascript tool to the model. Code runs in a Web Worker inside a sandboxed iframe with an opaque origin, isolated from the WebUI and its API, with a hard timeout.',
|
||||
defaultValue: false,
|
||||
type: SettingsFieldType.CHECKBOX,
|
||||
section: SETTINGS_SECTION_SLUGS.DEVELOPER
|
||||
section: SETTINGS_SECTION_SLUGS.DEVELOPER,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.JS_SANDBOX_ENABLED,
|
||||
paramType: SyncableParameterType.BOOLEAN
|
||||
}
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.CUSTOM_JSON,
|
||||
@@ -753,7 +773,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
|
||||
defaultValue: DEFAULT_MCP_CONFIG.requestTimeoutSeconds,
|
||||
type: SettingsFieldType.INPUT,
|
||||
section: SETTINGS_SECTION_SLUGS.MCP,
|
||||
isPositiveInteger: true
|
||||
isPositiveInteger: true,
|
||||
sync: {
|
||||
serverKey: SETTINGS_KEYS.MCP_REQUEST_TIMEOUT_SECONDS,
|
||||
paramType: SyncableParameterType.NUMBER
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -551,13 +551,49 @@ const mcpDefaultEnabledMigration: Migration = {
|
||||
}
|
||||
};
|
||||
|
||||
const CONFIG_TYPES_MIGRATION_ID = 'config-type-normalization-v1';
|
||||
|
||||
const configTypesMigration: Migration = {
|
||||
id: CONFIG_TYPES_MIGRATION_ID,
|
||||
description: 'Coerce legacy string-encoded booleans in persisted config to real booleans',
|
||||
|
||||
async run(): Promise<void> {
|
||||
const configRaw = localStorage.getItem(CONFIG_LOCALSTORAGE_KEY);
|
||||
if (configRaw === null) return;
|
||||
|
||||
const config = JSON.parse(configRaw);
|
||||
let changed = false;
|
||||
|
||||
// Pre-schema configs persisted booleans as the strings "true"/"false", which the
|
||||
// strict server schema now rejects. Coerce those back to real booleans. No config
|
||||
// string field holds exactly "true"/"false", so the match is unambiguous.
|
||||
for (const key of Object.keys(config)) {
|
||||
if (config[key] === 'true') {
|
||||
config[key] = true;
|
||||
changed = true;
|
||||
} else if (config[key] === 'false') {
|
||||
config[key] = false;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
localStorage.setItem(CONFIG_LOCALSTORAGE_KEY, JSON.stringify(config));
|
||||
}
|
||||
|
||||
if (import.meta.env.DEV && import.meta.env.VITE_DEBUG)
|
||||
console.log(`[Migration] Config types: coerced string booleans (changed=${changed})`);
|
||||
}
|
||||
};
|
||||
|
||||
const migrations: Migration[] = [
|
||||
localStorageMigration,
|
||||
idxdbMigration,
|
||||
legacyMessageMigration,
|
||||
themeMigration,
|
||||
customJsonKeyMigration,
|
||||
mcpDefaultEnabledMigration
|
||||
mcpDefaultEnabledMigration,
|
||||
configTypesMigration
|
||||
];
|
||||
|
||||
export const MigrationService = {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
import { apiFetch, apiPost } from '$lib/utils';
|
||||
import { apiFetch, apiPost, normalizeModelName } from '$lib/utils';
|
||||
import type { ParsedModelId } from '$lib/types/models';
|
||||
import {
|
||||
MODEL_QUANTIZATION_SEGMENT_RE,
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
MODEL_PARAMS_RE,
|
||||
MODEL_ACTIVATED_PARAMS_RE,
|
||||
MODEL_IGNORED_SEGMENTS,
|
||||
MODEL_WEIGHT_EXTENSION_RE,
|
||||
MODEL_ID_NOT_FOUND,
|
||||
MODEL_ID_ORG_SEPARATOR,
|
||||
MODEL_ID_SEGMENT_SEPARATOR,
|
||||
@@ -139,15 +140,19 @@ export class ModelsService {
|
||||
tags: []
|
||||
};
|
||||
|
||||
// strip directory path and weight extension so a bare `-m /path/file.gguf`
|
||||
// parses like a clean repo id; the HF `org/model` form is preserved
|
||||
const source = normalizeModelName(modelId).replace(MODEL_WEIGHT_EXTENSION_RE, '');
|
||||
|
||||
// 1. Extract colon-separated quantization (e.g. `model:Q4_K_M`)
|
||||
const colonIdx = modelId.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR);
|
||||
const colonIdx = source.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR);
|
||||
let modelPath: string;
|
||||
|
||||
if (colonIdx !== MODEL_ID_NOT_FOUND) {
|
||||
result.quantization = modelId.slice(colonIdx + 1) || null;
|
||||
modelPath = modelId.slice(0, colonIdx);
|
||||
result.quantization = source.slice(colonIdx + 1) || null;
|
||||
modelPath = source.slice(0, colonIdx);
|
||||
} else {
|
||||
modelPath = modelId;
|
||||
modelPath = source;
|
||||
}
|
||||
|
||||
// 2. Extract org name (e.g. `org/model` -> org = "org")
|
||||
|
||||
@@ -114,14 +114,13 @@ class ConversationsStore {
|
||||
|
||||
/** Load thinking-enabled default from localStorage */
|
||||
private static loadThinkingDefaults(): boolean {
|
||||
if (typeof globalThis.localStorage === 'undefined') return false;
|
||||
if (typeof globalThis.localStorage === 'undefined') return true;
|
||||
try {
|
||||
const raw = localStorage.getItem(THINKING_ENABLED_DEFAULT_LOCALSTORAGE_KEY);
|
||||
if (!raw) return false;
|
||||
const parsed = raw === 'true';
|
||||
return typeof parsed === 'boolean' ? parsed : false;
|
||||
if (!raw) return true;
|
||||
return raw === 'true';
|
||||
} catch {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,7 +332,7 @@ class ConversationsStore {
|
||||
}
|
||||
|
||||
this.pendingMcpServerOverrides = [];
|
||||
this.pendingThinkingEnabled = false;
|
||||
this.pendingThinkingEnabled = ConversationsStore.loadThinkingDefaults();
|
||||
this.activeConversation = conversation;
|
||||
|
||||
if (conversation.currNode) {
|
||||
|
||||
@@ -92,18 +92,14 @@ export function filterByLeafNodeId(
|
||||
* Finds the leaf node (message with no children) for a given message branch.
|
||||
* Traverses down the tree following the last child until reaching a leaf.
|
||||
*
|
||||
* @param messages - All messages in the conversation
|
||||
* @param nodeMap - Map of messages keyed by ID
|
||||
* @param messageId - Starting message ID to find leaf for
|
||||
* @returns The leaf node ID, or the original messageId if no children
|
||||
*/
|
||||
export function findLeafNode(messages: readonly DatabaseMessage[], messageId: string): string {
|
||||
const nodeMap = new Map<string, DatabaseMessage>();
|
||||
|
||||
// Build node map for quick lookups
|
||||
for (const msg of messages) {
|
||||
nodeMap.set(msg.id, msg);
|
||||
}
|
||||
|
||||
function findLeafNodeInMap(
|
||||
nodeMap: ReadonlyMap<string, DatabaseMessage>,
|
||||
messageId: string
|
||||
): string {
|
||||
let currentNode: DatabaseMessage | undefined = nodeMap.get(messageId);
|
||||
while (currentNode && currentNode.children.length > 0) {
|
||||
// Follow the last child (most recent branch)
|
||||
@@ -114,6 +110,22 @@ export function findLeafNode(messages: readonly DatabaseMessage[], messageId: st
|
||||
return currentNode?.id ?? messageId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience wrapper around {@link findLeafNodeInMap} for callers that only have
|
||||
* a flat message array.
|
||||
*
|
||||
* Finds the leaf node (message with no children) for a given message branch.
|
||||
* Traverses down the tree following the last child until reaching a leaf.
|
||||
*
|
||||
* @param messages - All messages in the conversation
|
||||
* @param messageId - Starting message ID to find leaf for
|
||||
* @returns The leaf node ID, or the original messageId if no children
|
||||
*/
|
||||
export function findLeafNode(messages: readonly DatabaseMessage[], messageId: string): string {
|
||||
const nodeMap = new Map(messages.map((msg) => [msg.id, msg] as const));
|
||||
return findLeafNodeInMap(nodeMap, messageId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds all descendant messages (children, grandchildren, etc.) of a given message.
|
||||
* This is used for cascading deletion to remove all messages in a branch.
|
||||
@@ -156,21 +168,14 @@ export function findDescendantMessages(
|
||||
* Gets sibling information for a message, including all sibling IDs and current position.
|
||||
* Siblings are messages that share the same parent.
|
||||
*
|
||||
* @param messages - All messages in the conversation
|
||||
* @param nodeMap - Map of messages keyed by ID
|
||||
* @param messageId - The message to get sibling info for
|
||||
* @returns Sibling information including leaf node IDs for navigation
|
||||
*/
|
||||
export function getMessageSiblings(
|
||||
messages: readonly DatabaseMessage[],
|
||||
nodeMap: ReadonlyMap<string, DatabaseMessage>,
|
||||
messageId: string
|
||||
): ChatMessageSiblingInfo | null {
|
||||
const nodeMap = new Map<string, DatabaseMessage>();
|
||||
|
||||
// Build node map for quick lookups
|
||||
for (const msg of messages) {
|
||||
nodeMap.set(msg.id, msg);
|
||||
}
|
||||
|
||||
const message = nodeMap.get(messageId);
|
||||
if (!message) {
|
||||
return null;
|
||||
@@ -203,7 +208,9 @@ export function getMessageSiblings(
|
||||
|
||||
// Convert sibling message IDs to their corresponding leaf node IDs
|
||||
// This allows navigation between different conversation branches
|
||||
const siblingLeafIds = siblingIds.map((siblingId: string) => findLeafNode(messages, siblingId));
|
||||
const siblingLeafIds = siblingIds.map((siblingId: string) =>
|
||||
findLeafNodeInMap(nodeMap, siblingId)
|
||||
);
|
||||
|
||||
// Find current message's position among siblings
|
||||
const currentIndex = siblingIds.indexOf(messageId);
|
||||
@@ -217,85 +224,22 @@ export function getMessageSiblings(
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a display-ready list of messages with sibling information for UI rendering.
|
||||
* This is the main function used by chat components to render conversation branches.
|
||||
* Builds sibling information for every message in a conversation.
|
||||
* A single node map is shared across all lookups for O(1) access.
|
||||
*
|
||||
* @param messages - All messages in the conversation
|
||||
* @param leafNodeId - Current leaf node being viewed
|
||||
* @returns Array of messages with sibling navigation info
|
||||
* @returns Map of message ID to its sibling information
|
||||
*/
|
||||
export function getMessageDisplayList(
|
||||
messages: readonly DatabaseMessage[],
|
||||
leafNodeId: string
|
||||
): ChatMessageSiblingInfo[] {
|
||||
// Get the current conversation path
|
||||
const currentPath = filterByLeafNodeId(messages, leafNodeId, true);
|
||||
const result: ChatMessageSiblingInfo[] = [];
|
||||
|
||||
// Add sibling info for each message in the current path
|
||||
for (const message of currentPath) {
|
||||
if (message.type === 'root') {
|
||||
continue; // Skip root messages in display
|
||||
}
|
||||
|
||||
const siblingInfo = getMessageSiblings(messages, message.id);
|
||||
if (siblingInfo) {
|
||||
result.push(siblingInfo);
|
||||
export function buildSiblingInfoMap(
|
||||
messages: readonly DatabaseMessage[]
|
||||
): Map<string, ChatMessageSiblingInfo> {
|
||||
const nodeMap = new Map(messages.map((msg) => [msg.id, msg] as const));
|
||||
const siblingMap = new Map<string, ChatMessageSiblingInfo>();
|
||||
for (const msg of messages) {
|
||||
const info = getMessageSiblings(nodeMap, msg.id);
|
||||
if (info) {
|
||||
siblingMap.set(msg.id, info);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a message has multiple siblings (indicating branching at that point).
|
||||
*
|
||||
* @param messages - All messages in the conversation
|
||||
* @param messageId - The message to check
|
||||
* @returns True if the message has siblings
|
||||
*/
|
||||
export function hasMessageSiblings(
|
||||
messages: readonly DatabaseMessage[],
|
||||
messageId: string
|
||||
): boolean {
|
||||
const siblingInfo = getMessageSiblings(messages, messageId);
|
||||
return siblingInfo ? siblingInfo.totalSiblings > 1 : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next sibling message ID for navigation.
|
||||
*
|
||||
* @param messages - All messages in the conversation
|
||||
* @param messageId - Current message ID
|
||||
* @returns Next sibling's leaf node ID, or null if at the end
|
||||
*/
|
||||
export function getNextSibling(
|
||||
messages: readonly DatabaseMessage[],
|
||||
messageId: string
|
||||
): string | null {
|
||||
const siblingInfo = getMessageSiblings(messages, messageId);
|
||||
if (!siblingInfo || siblingInfo.currentIndex >= siblingInfo.totalSiblings - 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return siblingInfo.siblingIds[siblingInfo.currentIndex + 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the previous sibling message ID for navigation.
|
||||
*
|
||||
* @param messages - All messages in the conversation
|
||||
* @param messageId - Current message ID
|
||||
* @returns Previous sibling's leaf node ID, or null if at the beginning
|
||||
*/
|
||||
export function getPreviousSibling(
|
||||
messages: readonly DatabaseMessage[],
|
||||
messageId: string
|
||||
): string | null {
|
||||
const siblingInfo = getMessageSiblings(messages, messageId);
|
||||
if (!siblingInfo || siblingInfo.currentIndex <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return siblingInfo.siblingIds[siblingInfo.currentIndex - 1];
|
||||
return siblingMap;
|
||||
}
|
||||
|
||||
@@ -26,10 +26,7 @@ export {
|
||||
findLeafNode,
|
||||
findDescendantMessages,
|
||||
getMessageSiblings,
|
||||
getMessageDisplayList,
|
||||
hasMessageSiblings,
|
||||
getNextSibling,
|
||||
getPreviousSibling
|
||||
buildSiblingInfoMap
|
||||
} from './branching';
|
||||
|
||||
// Code
|
||||
|
||||
Reference in New Issue
Block a user