jinja, chat: add --reasoning-preserve flag (#25105 )

* jinja, chat: add --reasoning-preserve flag * correct help message
Revert "ui: fix accessibility for hover-gated interactive elements assisted by claude(in debugging and tests) (#24727 )" (#25098 )
2026-06-29 02:33:03 +02:00 · 2026-06-28 23:33:51 +02:00 · 2026-06-28 21:30:03 +02:00 · 2026-06-28 21:06:43 +02:00 · 2026-06-28 20:31:48 +02:00 · 2026-06-28 16:53:32 +02:00
20 changed files with 717 additions and 108 deletions
@@ -3296,6 +3296,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.sampling.reasoning_budget_message = value;
        }
    ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_THINK_BUDGET_MESSAGE"));
+    add_opt(common_arg(
+        {"--reasoning-preserve"},
+        {"--no-reasoning-preserve"},
+        "preserve reasoning trace in the full history, not just the last assistant message (default: template default)\n"
+        "compatible with certain templates having 'supports_preserve_reasoning' capability\n"
+        "example: https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking",
+        [](common_params & params, bool value) {
+            if (value) {
+                params.default_template_kwargs["preserve_reasoning"] = "true";
+            } else {
+                params.default_template_kwargs["preserve_reasoning"] = "false";
+            }
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_REASONING_PRESERVE"));
    add_opt(common_arg(
        {"--chat-template"}, "JINJA_TEMPLATE",
        string_format(
@@ -912,6 +912,10 @@ static std::string common_chat_template_direct_apply_impl(
    if (inputs.add_generation_prompt) {
        inp["add_generation_prompt"] = true;
    }
+    if (inp.contains("preserve_reasoning") && inp["preserve_reasoning"].is_boolean()) {
+        bool enabled = inp["preserve_reasoning"].get<bool>();
+        jinja::caps_apply_preserve_reasoning(ctx, enabled);
+    }

    jinja::global_from_json(ctx, inp, inputs.mark_input);

@@ -2376,6 +2380,149 @@ static void func_args_not_string(json & messages) {

 }

+// MiniCPM5 format:
+// - Reasoning: <think>{reasoning}</think> (optional)
+// - Tool calls: <function name="foo"><param name="bar">value</param></function>
+static common_chat_params common_chat_params_init_minicpm5(const common_chat_template &          tmpl,
+                                                           const autoparser::generation_params & inputs) {
+    common_chat_params data;
+
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.preserved_tokens  = {
+        "<function",
+        "<param",
+        "</function>",
+        "</param>",
+        "<think>",
+        "</think>",
+    };
+
+    data.thinking_start_tag = "<think>";
+    data.thinking_end_tag   = "</think>";
+
+    data.message_delimiters = {
+        { COMMON_CHAT_ROLE_ASSISTANT, "<|im_start|>assistant"             },
+        { COMMON_CHAT_ROLE_TOOL,      "<|im_start|>user\n<tool_response>" },
+        { COMMON_CHAT_ROLE_USER,      "<|im_start|>user"                  },
+        { COMMON_CHAT_ROLE_SYSTEM,    "<|im_start|>system"                },
+    };
+
+    auto has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
+    auto extract_reasoning   = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar     = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
+
+    if (inputs.has_continuation()) {
+        const auto & msg = inputs.continue_msg;
+
+        data.generation_prompt = "<|im_start|>assistant\n<think>\n" + msg.reasoning_content;
+        if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
+            data.generation_prompt += "\n</think>\n\n" + msg.render_content();
+        }
+
+        data.prompt += data.generation_prompt;
+    }
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto generation_prompt = p.literal("<|im_start|>assistant\n");
+
+        auto reasoning = p.eps();
+        if (extract_reasoning) {
+            reasoning = ("<think>" << p.reasoning(p.until("</think>")) << "</think>") + p.space();
+        }
+
+        // Response format parser
+        if (has_response_format) {
+            return generation_prompt + reasoning + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
+        }
+
+        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
+            // CDATA lets a value carry characters that would otherwise close the tag (e.g.
+            // </param>); capture the inner text only, excluding the CDATA markers.
+            auto string_value = p.choice({
+                p.literal("<![CDATA[") + p.ac(p.tool_arg_string_value(p.until("]]>")) + p.literal("]]>"), "]]>") + p.tool_arg_close(p.literal("</param>")),
+                p.negate(p.literal("<![CDATA[")) + p.ac(p.tool_arg_string_value(p.until("</param>")) + p.tool_arg_close(p.literal("</param>")), "</param>")
+            });
+
+            auto tool_choice = p.choice();
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto &      function = tool.at("function");
+                const std::string name     = function.at("name");
+                auto              params   = function.contains("parameters") ? function.at("parameters") : json::object();
+
+                auto args = p.eps();
+                if (params.contains("properties") && params.at("properties").is_object() && !params.at("properties").empty()) {
+                    auto schema_info = common_schema_info();
+                    schema_info.resolve_refs(params);
+
+                    auto arg_choice = p.choice();
+                    for (const auto & [prop_name, prop_schema] : params.at("properties").items()) {
+                        auto value_parser = p.eps();
+                        if (schema_info.resolves_to_string(prop_schema)) {
+                            value_parser = string_value;
+                        } else {
+                            value_parser = p.tool_arg_json_value(
+                                    p.schema(p.json(), "tool-" + name + "-arg-" + prop_name + "-schema", prop_schema, false)
+                                ) + p.tool_arg_close(p.literal("</param>"));
+                        }
+
+                        auto arg_rule = p.tool_arg(
+                            p.tool_arg_open(p.literal("<param name=\"") + p.tool_arg_name(p.literal(prop_name)) + p.literal("\">")) +
+                            value_parser
+                        );
+
+                        arg_choice |= arg_rule;
+                    }
+                    args = p.zero_or_more(arg_choice + p.space());
+                }
+
+                auto tool_parser = p.tool(
+                    p.tool_open(p.literal("<function name=\"") + p.tool_name(p.literal(name)) + p.literal("\">"))
+                    << p.tool_args(args)
+                    << p.tool_close(p.literal("</function>")));
+
+                tool_choice |= p.rule("tool-" + name, tool_parser);
+            });
+
+            auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
+            auto tool_calls = p.trigger_rule("tool-call", p.repeat(tool_choice + p.space(), 1, max_calls));
+
+            auto content = p.content(p.until("<function"));
+
+            return generation_prompt + reasoning + content + tool_calls + p.end();
+        }
+
+        return generation_prompt + reasoning + p.content(p.rest()) + p.end();
+    });
+
+    data.parser = parser.save();
+
+    if (include_grammar) {
+        data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.contains("parameters") ? function.at("parameters") : json::object();
+                builder.resolve_refs(schema);
+            });
+            if (has_response_format) {
+                auto schema = inputs.json_schema;
+                builder.resolve_refs(schema);
+            }
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function" },
+        };
+    }
+
+    return data;
+}
+
 static json common_chat_extra_context() {
    json ctx = json::object();
    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
@@ -2468,6 +2615,14 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
        return common_chat_params_init_gemma4(tmpl, params);
    }

+    // MiniCPM5 - XML tool calls with <function name="..."><param name="...">...</param></function>
+    if (src.find("Tool usage guidelines:") != std::string::npos &&
+        src.find("<function name=\"") != std::string::npos &&
+        src.find("<param name=\"") != std::string::npos) {
+        LOG_DBG("Using specialized template: MiniCPM5\n");
+        return common_chat_params_init_minicpm5(tmpl, params);
+    }
+
    return std::nullopt;
 }

@@ -16,22 +16,34 @@ using json = nlohmann::ordered_json;
 namespace jinja {

 using caps_json_fn = std::function<json()>;
-using caps_analyze_fn = std::function<void(bool, value &, value &)>;
+using caps_ctx_fn = std::function<void(context &)>;
+using caps_analyze_fn = std::function<void(bool, value &, value &, const std::string &)>;
+
+void caps_apply_preserve_reasoning(jinja::context & ctx, bool enabled) {
+    ctx.set_val("preserve_thinking",         mk_val<value_bool>(enabled));
+    ctx.set_val("clear_thinking",            mk_val<value_bool>(!enabled));
+    ctx.set_val("truncate_history_thinking", mk_val<value_bool>(!enabled));
+}

 static void caps_try_execute(jinja::program & prog,
                             const caps_json_fn & messages_fn,
+                             const caps_ctx_fn & ctx_fn,
                             const caps_json_fn & tools_fn,
                             const caps_analyze_fn & analyze_fn) {
    context ctx;
    ctx.is_get_stats = true;
    jinja::global_from_json(ctx, json{
        {"messages", messages_fn()},
-        {"tools", tools_fn()},
+        {"tools", tools_fn ? tools_fn() : json::array()},
        {"bos_token", ""},
        {"eos_token", ""},
        {"add_generation_prompt", true}
    }, true);

+    if (ctx_fn) {
+        ctx_fn(ctx);
+    }
+
    auto messages = ctx.get_val("messages");
    auto tools = ctx.get_val("tools");

@@ -49,7 +61,7 @@ static void caps_try_execute(jinja::program & prog,
        // ignore exceptions during capability analysis
    }

-    analyze_fn(success, messages, tools);
+    analyze_fn(success, messages, tools, result);
 }

 // for debugging only
@@ -109,11 +121,9 @@ caps caps_get(jinja::program & prog) {
                }
            });
        },
-        [&]() {
-            // tools
-            return json{nullptr};
-        },
-        [&](bool success, value & messages, value &) {
+        nullptr, // ctx_fn
+        nullptr, // tools_fn
+        [&](bool success, value & messages, value &, const std::string &) {
            auto & content = messages->at(0)->at("content");
            caps_print_stats(content, "messages[0].content");
            if (has_op(content, "selectattr") || has_op(content, "array_access")) {
@@ -145,11 +155,9 @@ caps caps_get(jinja::program & prog) {
                },
            });
        },
-        [&]() {
-            // tools
-            return json::array();
-        },
-        [&](bool, value & messages, value &) {
+        nullptr, // ctx_fn
+        nullptr, // tools_fn
+        [&](bool, value & messages, value &, const std::string &) {
            auto & content = messages->at(0)->at("content");
            caps_print_stats(content, "messages[0].content");
            if (!content->stats.used) {
@@ -201,6 +209,7 @@ caps caps_get(jinja::program & prog) {
                },
            });
        },
+        nullptr, // ctx_fn
        [&]() {
            // tools
            return json::array({
@@ -224,7 +233,7 @@ caps caps_get(jinja::program & prog) {
                },
            });
        },
-        [&](bool success, value & messages, value & tools) {
+        [&](bool success, value & messages, value & tools, const std::string &) {
            if (!success) {
                return; // Nothing can be inferred
            }
@@ -293,6 +302,7 @@ caps caps_get(jinja::program & prog) {
                    },
                });
            },
+            nullptr, // ctx_fn
            [&]() {
                // tools
                return json::array({
@@ -316,7 +326,7 @@ caps caps_get(jinja::program & prog) {
                    },
                });
            },
-            [&](bool success, value & messages, value & tools) {
+            [&](bool success, value & messages, value & tools, const std::string &) {
                if (!success) {
                    result.supports_tool_calls = false;
                    result.supports_tools = false;
@@ -394,6 +404,7 @@ caps caps_get(jinja::program & prog) {
                },
            });
        },
+        nullptr, // ctx_fn
        [&]() {
            // tools
            return json::array({
@@ -417,7 +428,7 @@ caps caps_get(jinja::program & prog) {
                },
            });
        },
-        [&](bool success, value & messages, value & /*tools*/) {
+        [&](bool success, value & messages, value &, const std::string &) {
            if (!success) {
                result.supports_parallel_tool_calls = false;
                return;
@@ -438,11 +449,22 @@ caps caps_get(jinja::program & prog) {
    JJ_DEBUG("%s\n", ">>> Running capability check: preserve reasoning");

    // case: preserve reasoning content in chat history
+    const std::string reasoning_placeholder = "<REASONING_CONTENT_PLACEHOLDER>";
    caps_try_execute(
        prog,
        [&]() {
            // messages
            return json::array({
+                {
+                    {"role", "user"},
+                    {"content", "User message"}
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", "Assistant message"},
+                    // check of reasoning_content deeper in the history, not just the last assistant message
+                    {"reasoning_content", reasoning_placeholder}
+                },
                {
                    {"role", "user"},
                    {"content", "User message"}
@@ -458,14 +480,13 @@ caps caps_get(jinja::program & prog) {
                },
            });
        },
-        [&]() {
-            // tools
-            return json::array();
+        [&](context & ctx) {
+            caps_apply_preserve_reasoning(ctx, true);
        },
-        [&](bool, value & messages, value &) {
-            auto & content = messages->at(1)->at("reasoning_content");
-            caps_print_stats(content, "messages[1].reasoning_content");
-            if (content->stats.used) {
+        nullptr, // tools_fn
+        [&](bool, value &, value &, const std::string & output) {
+            // note: we cannot use stats here because the reasoning_content may be used for "if" condition test, but not actually outputted in the final result
+            if (output.find(reasoning_placeholder) != std::string::npos) {
                result.supports_preserve_reasoning = true;
            }
        }
@@ -12,7 +12,9 @@ struct caps {
    bool supports_tool_calls = true;
    bool supports_system_role = true;
    bool supports_parallel_tool_calls = true;
-    bool supports_preserve_reasoning = false; // support assistant message with reasoning_content
+
+    // supports preserve reasoning trace in the full history, not just the last assistant message
+    bool supports_preserve_reasoning = false;

    // one of the 2 content capabilities must be true
    bool supports_string_content = true;
@@ -29,4 +31,6 @@ struct caps {

 caps caps_get(jinja::program & prog);

+void caps_apply_preserve_reasoning(jinja::context & ctx, bool enabled);
+
 } // namespace jinja
@@ -1108,6 +1108,50 @@ const func_builtins & value_array_t::get_builtins() const {
            std::reverse(arr.begin(), arr.end());
            return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
        }},
+        {"min", [](const func_args & args) -> value {
+            args.ensure_count(1, 4);
+            args.ensure_vals<value_array>();
+            value val_case    = args.get_kwarg_or_pos("case_sensitive", 1);
+            value attribute   = args.get_kwarg_or_pos("attribute",      2);
+            if (!attribute->is_undefined()) {
+                throw not_implemented_exception("min: attribute not implemented");
+            }
+            // FIXME: min is currently always case sensitive
+            (void) val_case;
+            const auto & arr = args.get_pos(0)->as_array();
+            if (arr.empty()) {
+                return mk_val<value_undefined>();
+            }
+            value result = arr[0];
+            for (size_t i = 1; i < arr.size(); ++i) {
+                if (value_compare(arr[i], result, value_compare_op::lt)) {
+                    result = arr[i];
+                }
+            }
+            return result;
+        }},
+        {"max", [](const func_args & args) -> value {
+            args.ensure_count(1, 4);
+            args.ensure_vals<value_array>();
+            value val_case    = args.get_kwarg_or_pos("case_sensitive", 1);
+            value attribute   = args.get_kwarg_or_pos("attribute",      2);
+            if (!attribute->is_undefined()) {
+                throw not_implemented_exception("max: attribute not implemented");
+            }
+            // FIXME: max is currently always case sensitive
+            (void) val_case;
+            const auto & arr = args.get_pos(0)->as_array();
+            if (arr.empty()) {
+                return mk_val<value_undefined>();
+            }
+            value result = arr[0];
+            for (size_t i = 1; i < arr.size(); ++i) {
+                if (value_compare(arr[i], result, value_compare_op::gt)) {
+                    result = arr[i];
+                }
+            }
+            return result;
+        }},
        {"unique", array_unique_not_implemented},
    };
    return builtins;
@@ -73,7 +73,7 @@ class LlamaModel(TextModel):
            target_num_layers = target_config["num_hidden_layers"]
            target_layers = [2, target_num_layers // 2, target_num_layers - 3]
            logger.info(f"EAGLE-3: target_layers = {target_layers} (target model has {target_num_layers} layers)")
-            self.gguf_writer.add_array(f"{self.gguf_writer.arch}.target_layers", target_layers)
+            self.gguf_writer.add_target_layers(target_layers)

            # target_hidden_size: prefer eagle3 config, fallback to target config
            if eagle3_raw_config.get("target_hidden_size") is not None:
@@ -83,12 +83,12 @@ class LlamaModel(TextModel):
                target_hidden_size = target_config["hidden_size"]
                src = "target model config"
            logger.info(f"EAGLE-3: target_hidden_size = {target_hidden_size} (from {src})")
-            self.gguf_writer.add_uint32(f"{self.gguf_writer.arch}.target_hidden_size", target_hidden_size)
+            self.gguf_writer.add_target_hidden_size(target_hidden_size)

            # norm_before_residual (RedHat-style eagle3 specific)
            norm_before_residual = eagle3_raw_config.get("norm_before_residual", False)
            logger.info(f"EAGLE-3: norm_before_residual = {norm_before_residual}")
-            self.gguf_writer.add_bool(f"{self.gguf_writer.arch}.norm_before_residual", norm_before_residual)
+            self.gguf_writer.add_norm_before_residual(norm_before_residual)

    def set_vocab(self):
        # eagle3: use tokenizer from target model if provided
@@ -643,21 +643,21 @@ class DFlashModel(Qwen3Model):
        super().set_vocab()
        self.dir_model = original_dir

+        mask_token_id = self.hparams.get("dflash_config", {}).get("mask_token_id")
+        if mask_token_id is not None:
+            self.gguf_writer.add_mask_token_id(mask_token_id)
+
    def set_gguf_parameters(self):
        super().set_gguf_parameters()

        block_size = self.hparams.get("block_size", 16)
-        self.gguf_writer.add_uint32(f"{self.gguf_writer.arch}.block_size", block_size)
+        self.gguf_writer.add_block_size(block_size)
        dflash_config = self.hparams.get("dflash_config", {})

        target_layer_ids = dflash_config.get("target_layer_ids", [])
        if target_layer_ids:
            extract_layer_ids = [i + 1 for i in target_layer_ids]
-            self.gguf_writer.add_array(f"{self.gguf_writer.arch}.target_layers", extract_layer_ids)
-
-        mask_token_id = dflash_config.get("mask_token_id", None)
-        if mask_token_id is not None:
-            self.gguf_writer.add_mask_token_id(mask_token_id)
+            self.gguf_writer.add_target_layers(extract_layer_ids)

        use_sliding_window = self.hparams.get("use_sliding_window", False)
        sliding_window = self.hparams.get("sliding_window")
@@ -667,13 +667,9 @@ class DFlashModel(Qwen3Model):
            self.gguf_writer.add_sliding_window(sliding_window)
            self.gguf_writer.add_sliding_window_pattern(is_swa)

-    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
-        if name == "fc.weight":
-            yield (name, data_torch)
-            return
-        if name == "hidden_norm.weight":
-            yield (self.format_tensor_name(gguf.MODEL_TENSOR.ENC_OUTPUT_NORM), data_torch)
-            return
+    @classmethod
+    def filter_tensors(cls, item: tuple[str, Callable[[], Tensor]]) -> tuple[str, Callable[[], Tensor]] | None:
+        name, gen = item
        if not name.startswith("model."):
            name = "model." + name
-        yield from super().modify_tensors(data_torch, name, bid)
+        return super().filter_tensors((name, gen))
@@ -156,6 +156,7 @@ class Keys:
        DENSE_FEAT_OUT_SIZE               = "{arch}.{dense}_feat_out"
        TARGET_LAYERS                     = "{arch}.target_layers"
        TARGET_HIDDEN_SIZE                = "{arch}.target_hidden_size"
+        BLOCK_SIZE                        = "{arch}.block_size"
        NORM_BEFORE_RESIDUAL              = "{arch}.norm_before_residual"

    class Attention:
@@ -940,6 +940,18 @@ class GGUFWriter:
    def add_sliding_window(self, value: int) -> None:
        self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)

+    def add_block_size(self, value: int) -> None:
+        self.add_uint32(Keys.LLM.BLOCK_SIZE.format(arch=self.arch), value)
+
+    def add_target_layers(self, value: Sequence[int]) -> None:
+        self.add_array(Keys.LLM.TARGET_LAYERS.format(arch=self.arch), value)
+
+    def add_target_hidden_size(self, value: int) -> None:
+        self.add_uint32(Keys.LLM.TARGET_HIDDEN_SIZE.format(arch=self.arch), value)
+
+    def add_norm_before_residual(self, value: bool) -> None:
+        self.add_bool(Keys.LLM.NORM_BEFORE_RESIDUAL.format(arch=self.arch), value)
+
    def add_attention_scale(self, value: float) -> None:
        self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)

@@ -1283,6 +1283,11 @@ class TensorNameMap:
        MODEL_TENSOR.ENC_OUTPUT_NORM: (
            "encoder.final_layer_norm", # t5
            "layer_norm",               # neobert
+            "model.hidden_norm",        # dflash
+        ),
+
+        MODEL_TENSOR.FC: (
+            "model.fc", # dflash
        ),

        MODEL_TENSOR.CLS: (
@@ -0,0 +1,179 @@
+{{- bos_token }}{%- if tools %}
+    {%- set tool_definitions %}
+        {{- "# Tools\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+        {%- for tool in tools %}
+            {{- "\n" }}
+            {{- tool | tojson(ensure_ascii=False) }}
+        {%- endfor %}
+        {{- '\n</tools>\n\nTool usage guidelines:\n- You may call zero or more functions. If no function calls are needed, just answer normally and do not include any <function ... </function>.\n- When calling a function, return an XML object within <function ... </function> using:\n<function name="function-name"><param name="param-name">param-value</param></function>\n- param-value may be multi-line. If it contains <, & or newline characters, wrap it in a CDATA block: <param name="param-name"><![CDATA[...multi-line value...]]></param>' }}
+    {%- endset %}
+    
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {%- if '<tool_def_sep>' in messages[0].content %}
+            {{- messages[0].content.replace('<tool_def_sep>', tool_definitions) }}
+        {%- else %}
+            {{- messages[0].content + '\n\n' + tool_definitions }}
+        {%- endif %}
+    {%- else %}
+        {{- tool_definitions.lstrip() }}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        
+        {%- if message.tool_calls %}
+            {%- set content_parts = content.split('<tool_sep>') %}
+            {%- set processed_content = content_parts[0] %}
+            {%- set tool_calls_count = message.tool_calls|length %}
+            {%- set tool_sep_count = content_parts|length - 1 %}
+            {%- set min_count = [tool_calls_count, tool_sep_count]|min %}
+            
+            {%- for i in range(1, content_parts|length) %}
+                {%- set tool_index = i - 1 %}
+                {%- if tool_index < tool_calls_count %}
+                    {%- set tool_call = message.tool_calls[tool_index] %}
+                    {%- if tool_call.function %}
+                        {%- set tool_call = tool_call.function %}
+                    {%- endif %}
+                    {%- set single_tool_xml %}
+                        {{- '<function name="' ~ tool_call.name ~ '">' }}
+                        {%- if tool_call.arguments %}
+                            {%- set args_dict = tool_call.arguments %}
+                            {%- for param_name, param_value in args_dict.items() %}
+                                {{- '<param name="' ~ param_name ~ '">' }}
+                                {%- if param_value is string and ('<' in param_value or '&' in param_value or '\n' in param_value) %}
+                                    {{- '<![CDATA[' + param_value + ']]>' }}
+                                {%- else %}
+                                    {{- param_value }}
+                                {%- endif %}
+                                {{- '</param>' }}
+                            {%- endfor %}
+                        {%- endif %}
+                        {{- '</function>' }}
+                    {%- endset %}
+                    {%- set processed_content = processed_content + single_tool_xml + content_parts[i] %}
+                {%- else %}
+                    {%- set processed_content = processed_content + content_parts[i] %}
+                {%- endif %}
+            {%- endfor %}
+            
+            {%- if tool_calls_count > tool_sep_count %}
+                {%- for remaining_index in range(tool_sep_count, tool_calls_count) %}
+                    {%- set tool_call = message.tool_calls[remaining_index] %}
+                    {%- if tool_call.function %}
+                        {%- set tool_call = tool_call.function %}
+                    {%- endif %}
+                    {%- set remaining_tool_xml %}
+                        {{- '<function name="' ~ tool_call.name ~ '">' }}
+                        {%- if tool_call.arguments %}
+                            {%- set args_dict = tool_call.arguments %}
+                            {%- for param_name, param_value in args_dict.items() %}
+                                {{- '<param name="' ~ param_name ~ '">' }}
+                                {%- if param_value is string and ('<' in param_value or '&' in param_value or '\n' in param_value) %}
+                                    {{- '<![CDATA[' + param_value + ']]>' }}
+                                {%- else %}
+                                    {{- param_value }}
+                                {%- endif %}
+                                {{- '</param>' }}
+                            {%- endfor %}
+                        {%- endif %}
+                        {{- '</function>' }}
+                    {%- endset %}
+                    {%- set processed_content = processed_content + remaining_tool_xml %}
+                {%- endfor %}
+            {%- endif %}
+            
+            {%- set content = processed_content %}
+        {%- endif %}
+        
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if reasoning_content %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        
+        {%- if message.tool_calls and not has_tool_sep %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<function name="' ~ tool_call.name ~ '">' }}
+                {%- if tool_call.arguments %}
+                    {%- set args_dict = tool_call.arguments %}
+                    {%- for param_name, param_value in args_dict.items() %}
+                        {{- '<param name="' ~ param_name ~ '">' }}
+                        {%- if param_value is string and ('<' in param_value or '&' in param_value or '\n' in param_value) %}
+                            {{- '<![CDATA[' + param_value + ']]>' }}
+                        {%- else %}
+                            {{- param_value }}
+                        {%- endif %}
+                        {{- '</param>' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '</function>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {%- if message.content is string %}
+            {{- content }}
+        {%- else %}
+            {{- message.content | tojson(ensure_ascii=False) }}
+        {%- endif %}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined %}
+        {%- if enable_thinking is false %}
+            {{- '<think>\n\n</think>\n\n' }}
+        {%- elif enable_thinking is true %}
+            {{- '<think>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endif %}
@@ -5593,6 +5593,77 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            .expect_content("Hello, world!\nWhat's up?")
            .run();
    }
+
+    // MiniCPM5 - XML tool calls with <function name="..."><param name="...">...</param></function>
+    {
+        auto tst = peg_tester("models/templates/openbmb-MiniCPM5-1B.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist)
+            .run();
+
+        tst.test(R"(<function name="python"><param name="code">print('Hello, World!')</param></function>)")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ python_tool })
+            .expect_tool_calls({ { "python", R"#({"code": "print('Hello, World!')"})#", {} } })
+            .run();
+
+        tst.test(R"(<function name="empty_args"></function>)")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ empty_args_tool })
+            .expect(simple_assist_msg("", "", "empty_args", "{}"))
+            .run();
+
+        tst.test(R"(<function name="python"><param name="code">print('x')</param></function>)")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({ python_tool })
+            .expect_tool_calls({ { "python", R"#({"code": "print('x')"})#", {} } })
+            .run();
+
+        // CDATA lets a string value carry characters that would otherwise close the tag.
+        tst.test(R"(<function name="html"><param name="markup"><![CDATA[<a href="/x">hi</a> </param>]]></param></function>)")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ html_tool })
+            .expect_tool_calls({ { "html", R"#({"markup": "<a href=\"/x\">hi</a> </param>"})#", {} } })
+            .run();
+
+        tst.test(R"(I'm thinking</think><function name="python"><param name="code">print('hey')</param></function>)")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ python_tool })
+            .expect_reasoning("I'm thinking")
+            .expect_tool_calls({ { "python", R"#({"code": "print('hey')"})#", {} } })
+            .run();
+
+        tst.test(R"(<function name="python"><param name="code">print('x')</param></function>
+<function name="python"><param name="code">print('y')</param></function>)")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({ python_tool })
+            .expect_tool_calls({
+                { "python", R"#({"code": "print('x')"})#", {} },
+                { "python", R"#({"code": "print('y')"})#", {} },
+            })
+            .run();
+
+        tst.test(" thinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .messages({ message_user, message_assist_prefill_reasoning })
+            .add_generation_prompt(false)
+            .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
+            .expect_reasoning("I'm thinking")
+            .expect_content("Hello, world!\nWhat's up?")
+            .run();
+    }
 }

 static void test_template_generation_prompt() {
@@ -5740,6 +5811,13 @@ static void test_template_generation_prompt() {
        check(tmpls, continuation_content(),   "<｜Assistant｜><think>I'm thinking</think>Hello, ");
        check(tmpls, continuation_reasoning(), "<｜Assistant｜><think>I'm");
    }
+
+    {
+        auto tmpls = read_templates("models/templates/openbmb-MiniCPM5-1B.jinja");
+        check(tmpls, basic(),                  "<|im_start|>assistant\n<think>\n");
+        check(tmpls, continuation_content(),   "<|im_start|>assistant\n<think>\nI'm thinking\n</think>\n\nHello, ");
+        check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>\nI'm");
+    }
 }

 // Test the developer role to system workaround with a simple mock template
@@ -1584,6 +1584,36 @@ static void test_array_methods(testing & t) {
        "6"
    );

+    test_template(t, "array|min",
+        "{{ [tool_calls_count, tool_sep_count]|min }}",
+        {{"tool_calls_count", 2}, {"tool_sep_count", 1}},
+        "1"
+    );
+
+    test_template(t, "array|max",
+        "{{ [tool_calls_count, tool_sep_count]|max }}",
+        {{"tool_calls_count", 2}, {"tool_sep_count", 1}},
+        "2"
+    );
+
+    test_template(t, "array|min attribute",
+        "{{ items|min(attribute='x') }}",
+        {{"items", json::array({
+            json({{"x", 2}}),
+            json({{"x", 1}}),
+        })}},
+        "{'x': 1}"
+    );
+
+    test_template(t, "array|max attribute",
+        "{{ items|max(attribute='x') }}",
+        {{"items", json::array({
+            json({{"x", 2}}),
+            json({{"x", 1}}),
+        })}},
+        "{'x': 2}"
+    );
+
    // not used by any chat templates
    // test_template(t, "array.insert()",
    //     "{% set _ = arr.insert(1, 'x') %}{{ arr|join(',') }}",
@@ -1538,6 +1538,19 @@ private:
                /* media_path            */ params_base.media_path,
                /* force_pure_content    */ params_base.force_pure_content_parser
            };
+
+            {
+                auto caps = common_chat_templates_get_caps(chat_params.tmpls.get());
+                auto it = params_base.default_template_kwargs.find("preserve_reasoning");
+                bool supported = caps.at("supports_preserve_reasoning");
+                bool enabled = it != params_base.default_template_kwargs.end();
+                if (supported && !enabled) {
+                    SRV_INF("%s", "chat template supports preserving reasoning, consider enabling it via --reasoning-preserve\n");
+                }
+                if (!supported && enabled) {
+                    SRV_WRN("%s", "chat template does NOT support preserving reasoning, --reasoning-preserve has no effect\n");
+                }
+            }
        }

        return true;
@@ -2450,6 +2463,8 @@ private:

                    server_slot * slot = get_slot_by_cmpl_id(task.params.control_cmpl_id);
                    if (slot == nullptr) {
+                        SRV_WRN("control %s on unknown completion id=%s, no live slot\n",
+                                task.params.control_action.c_str(), task.params.control_cmpl_id.c_str());
                        res->success = false;
                        res->message = "no active completion for this id";
                        queue_results.send(std::move(res));
@@ -1983,7 +1983,10 @@ void server_models_routes::init_routes() {
            cli.set_read_timeout(0, STREAM_LOOKUP_TIMEOUT_MS * 1000);
            cli.set_write_timeout(0, STREAM_LOOKUP_TIMEOUT_MS * 1000);
            auto resp = cli.Delete(child_path.c_str());
-            (void) resp; // best effort, 404 and network errors are equivalent to no op
+            (void) resp; // the child logs its own miss when the session is unknown there
+        } else {
+            SRV_WRN("router stop for unknown conv_id=%s, no owning child in the conv map\n",
+                    conv_id.c_str());
        }
        // drop the tracking entry, the session is being torn down
        models.conv_models.forget(conv_id);
@@ -218,6 +218,13 @@ void stream_session_manager::evict_and_cancel(const std::string & conversation_i
        std::unique_lock<std::shared_mutex> lock(map_mu);
        auto it = sessions.find(conversation_id);
        if (it == sessions.end()) {
+            std::string live;
+            for (const auto & kv : sessions) {
+                if (!live.empty()) live += ", ";
+                live += kv.first;
+            }
+            SRV_WRN("stop on unknown stream session, conv_id=%s matched nothing, %zu live: [%s]\n",
+                    conversation_id.c_str(), sessions.size(), live.c_str());
            return;
        }
        s = it->second;
@@ -33,7 +33,7 @@

 	{#if !readonly && onRemove}
 		<div
-			class="absolute top-10 right-2 flex items-center justify-center opacity-0 transition-opacity group-focus-within:opacity-100 group-hover:opacity-100"
+			class="absolute top-10 right-2 flex items-center justify-center opacity-0 transition-opacity group-hover:opacity-100"
 		>
 			<ActionIcon icon={X} tooltip="Remove" stopPropagationOnClick onclick={() => onRemove?.()} />
 		</div>
@@ -56,7 +56,7 @@
 			<div class="relative flex h-6 items-center justify-between">
 				<div class="right-0 flex items-center gap-2 opacity-100 transition-opacity">
 					<div
-						class="pointer-events-auto inset-0 flex items-center gap-1 opacity-0 transition-all duration-150 group-focus-within:opacity-100 group-hover:opacity-100"
+						class="pointer-events-auto inset-0 flex items-center gap-1 opacity-0 transition-all duration-150 group-hover:opacity-100"
 					>
 						<ActionIcon icon={Edit} tooltip="Edit" onclick={editCtx.handleEdit} />
 						<ActionIcon icon={Trash2} tooltip="Delete" onclick={onDelete} />
@@ -39,6 +39,7 @@
 		depth = 0
 	}: Props = $props();

+	let renderActionsDropdown = $state(false);
 	let dropdownOpen = $state(false);

 	let isLoading = $derived(getAllLoadingChats().includes(conversation.id));
@@ -70,10 +71,26 @@
 		}
 	}

+	function handleMouseLeave() {
+		if (!dropdownOpen) {
+			renderActionsDropdown = false;
+		}
+	}
+
+	function handleMouseOver() {
+		renderActionsDropdown = true;
+	}
+
 	function handleSelect() {
 		onSelect?.(conversation.id);
 	}

+	$effect(() => {
+		if (!dropdownOpen) {
+			renderActionsDropdown = false;
+		}
+	});
+
 	onMount(() => {
 		document.addEventListener('edit-active-conversation', handleGlobalEditEvent as EventListener);

@@ -86,19 +103,23 @@
 	});
 </script>

-<div
-	class="conversation-item group relative flex min-h-9 w-full items-center justify-between space-x-3 rounded-lg py-1.5 transition-colors hover:bg-foreground/10 {isActive
+<!-- svelte-ignore a11y_mouse_events_have_key_events -->
+<button
+	class="group flex min-h-9 w-full cursor-pointer items-center justify-between space-x-3 rounded-lg py-1.5 text-left transition-colors hover:bg-foreground/10 {isActive
 		? 'bg-foreground/5 text-accent-foreground'
 		: ''} px-3"
+	onclick={handleSelect}
+	onmouseover={handleMouseOver}
+	onmouseleave={handleMouseLeave}
+	onfocusin={handleMouseOver}
+	onfocusout={(e) => {
+		if (!e.currentTarget.contains(e.relatedTarget as Node | null)) {
+			handleMouseLeave();
+		}
+	}}
 >
-	<button
-		class="absolute inset-0 z-0 cursor-pointer rounded-lg focus:outline-none focus-visible:ring-2 focus-visible:ring-ring"
-		onclick={handleSelect}
-		aria-label={conversation.name}
-	>
-	</button>
 	<div
-		class="pointer-events-none relative z-10 flex min-w-0 flex-1 items-center gap-2"
+		class="flex min-w-0 flex-1 items-center gap-2"
 		style:padding-left="{depth * FORK_TREE_DEPTH_PADDING}px"
 	>
 		{#if depth > 0}
@@ -109,7 +130,7 @@
 						<a
 							{...props}
 							href={RouterService.chat(conversation.forkedFromConversationId)}
-							class="pointer-events-auto flex shrink-0 items-center text-muted-foreground transition-colors hover:text-foreground"
+							class="flex shrink-0 items-center text-muted-foreground transition-colors hover:text-foreground"
 						>
 							<GitBranch class="h-3.5 w-3.5" />
 						</a>
@@ -125,15 +146,18 @@
 		{#if isLoading}
 			<Tooltip.Root>
 				<Tooltip.Trigger>
-					<button
-						class="stop-button pointer-events-auto flex h-4 w-4 shrink-0 cursor-pointer items-center justify-center rounded text-muted-foreground transition-colors hover:text-foreground"
+					<div
+						class="stop-button flex h-4 w-4 shrink-0 cursor-pointer items-center justify-center rounded text-muted-foreground transition-colors hover:text-foreground"
 						onclick={handleStop}
+						onkeydown={(e) => e.key === 'Enter' && handleStop(e)}
+						role="button"
+						tabindex="0"
 						aria-label="Stop generation"
 					>
 						<Loader2 class="loading-icon h-3.5 w-3.5 animate-spin" />

 						<Square class="stop-icon hidden h-3 w-3 fill-current text-destructive" />
-					</button>
+					</div>
 				</Tooltip.Trigger>

 				<Tooltip.Content>
@@ -145,50 +169,52 @@
 		<TruncatedText text={conversation.name} class="text-sm font-medium" showTooltip={false} />
 	</div>

-	<div class="actions pointer-events-auto relative z-20 flex items-center">
-		<DropdownMenuActions
-			triggerIcon={MoreHorizontal}
-			triggerTooltip="More actions"
-			bind:open={dropdownOpen}
-			actions={[
-				{
-					icon: conversation.pinned ? PinOff : Pin,
-					label: conversation.pinned ? 'Unpin' : 'Pin',
-					onclick: (e: Event) => {
-						e.stopPropagation();
-						handleTogglePin();
-					}
-				},
-				{
-					icon: Pencil,
-					label: 'Edit',
-					onclick: handleEdit,
-					shortcut: ['shift', 'cmd', 'e']
-				},
-				{
-					icon: Download,
-					label: 'Export',
-					onclick: (e: Event) => {
-						e.stopPropagation();
-						conversationsStore.downloadConversation(conversation.id);
+	{#if renderActionsDropdown}
+		<div class="actions flex items-center">
+			<DropdownMenuActions
+				triggerIcon={MoreHorizontal}
+				triggerTooltip="More actions"
+				bind:open={dropdownOpen}
+				actions={[
+					{
+						icon: conversation.pinned ? PinOff : Pin,
+						label: conversation.pinned ? 'Unpin' : 'Pin',
+						onclick: (e: Event) => {
+							e.stopPropagation();
+							handleTogglePin();
+						}
 					},
-					shortcut: ['shift', 'cmd', 's']
-				},
-				{
-					icon: Trash2,
-					label: 'Delete',
-					onclick: handleDelete,
-					variant: 'destructive',
-					shortcut: ['shift', 'cmd', 'd'],
-					separator: true
-				}
-			]}
-		/>
-	</div>
-</div>
+					{
+						icon: Pencil,
+						label: 'Edit',
+						onclick: handleEdit,
+						shortcut: ['shift', 'cmd', 'e']
+					},
+					{
+						icon: Download,
+						label: 'Export',
+						onclick: (e: Event) => {
+							e.stopPropagation();
+							conversationsStore.downloadConversation(conversation.id);
+						},
+						shortcut: ['shift', 'cmd', 's']
+					},
+					{
+						icon: Trash2,
+						label: 'Delete',
+						onclick: handleDelete,
+						variant: 'destructive',
+						shortcut: ['shift', 'cmd', 'd'],
+						separator: true
+					}
+				]}
+			/>
+		</div>
+	{/if}
+</button>

 <style>
-	.conversation-item {
+	button {
 		:global([data-slot='dropdown-menu-trigger']:not([data-state='open'])) {
 			opacity: 0;
 		}
@@ -213,8 +239,7 @@
 			}
 		}

-		&:is(:hover) .stop-button,
-		&:focus-within .stop-button {
+		&:is(:hover) .stop-button {
 			:global(.stop-icon) {
 				display: block;
 			}
@@ -154,7 +154,13 @@ class ChatStore {
 		});
 		if (convId === conversationsStore.activeConversation?.id) this.currentResponse = response;
 	}
-	private clearChatStreaming(convId: string): void {
+	private clearChatStreaming(convId: string, messageId?: string): void {
+		// session aware: a stale generation must not wipe a newer one's streaming state on the
+		// same conversation, that would drop the frozen stop identity and stop the wrong session
+		if (messageId !== undefined) {
+			const cur = this.chatStreamingStates.get(convId);
+			if (cur && cur.messageId !== messageId) return;
+		}
 		this.chatStreamingStates.delete(convId);
 		if (convId === conversationsStore.activeConversation?.id) this.currentResponse = '';
 	}
@@ -1055,11 +1061,14 @@ class ChatStore {
 		modelOverride?: string | null,
 		firstUserMessageContent?: string
 	): Promise<void> {
-		let effectiveModel = modelOverride;
+		// the ::model suffix in the stream identity is only for router mode, where it routes to the
+		// owning child. in single-model mode the identity stays the bare conv id so that attach, stop
+		// and reattach all agree, regardless of fresh send vs regenerate passing a resolved model
+		let effectiveModel: string | null | undefined = undefined;

-		if (isRouterMode() && !effectiveModel) {
+		if (isRouterMode()) {
 			const conversationModel = this.getConversationModel(allMessages);
-			effectiveModel = selectedModelName() || conversationModel;
+			effectiveModel = modelOverride || selectedModelName() || conversationModel;
 		}

 		if (isRouterMode() && effectiveModel) {
@@ -1074,6 +1083,9 @@ class ChatStore {
 		let resolvedModel: string | null = null;
 		let modelPersisted = false;
 		const convId = assistantMessage.convId;
+		// freeze the POST identity from t0 so a stop cancels with the exact session key,
+		// never a stale or empty model resolved later
+		this.setChatStreaming(convId, streamedContent, currentMessageId, effectiveModel);

 		const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
 			if (!modelName) return;
@@ -1103,7 +1115,7 @@ class ChatStore {
 		};

 		const updateStreamingUI = () => {
-			this.setChatStreaming(convId, streamedContent, currentMessageId);
+			this.setChatStreaming(convId, streamedContent, currentMessageId, effectiveModel);
 			const idx = conversationsStore.findMessageIndex(currentMessageId);
 			conversationsStore.updateMessageAtIndex(idx, { content: streamedContent });
 		};
@@ -1111,7 +1123,7 @@ class ChatStore {
 		const cleanupStreamingState = () => {
 			this.setStreamingActive(false);
 			this.setChatLoading(convId, false);
-			this.clearChatStreaming(convId);
+			this.clearChatStreaming(convId, currentMessageId);
 			this.setProcessingState(convId, null);
 		};

@@ -1128,7 +1140,7 @@ class ChatStore {
 			onReasoningChunk: (chunk: string) => {
 				streamedReasoningContent += chunk;
 				// mark streaming state so a stop mid-thinking can persist the partial reasoning
-				this.setChatStreaming(convId, streamedContent, currentMessageId);
+				this.setChatStreaming(convId, streamedContent, currentMessageId, effectiveModel);
 				const idx = conversationsStore.findMessageIndex(currentMessageId);
 				conversationsStore.updateMessageAtIndex(idx, {
 					reasoningContent: streamedReasoningContent
@@ -1405,7 +1417,7 @@ class ChatStore {
 		// detached drain keeps producing tokens until eos or max_tokens. use the frozen identity
 		// captured when the session started, not the live dropdown
 		const streamStateForStop = this.chatStreamingStates.get(convId);
-		const modelForStop = streamStateForStop?.model ?? selectedModelName();
+		const modelForStop = streamStateForStop?.model;
 		void ChatService.cancelServerStream(convId, modelForStop);
 		this.abortRequest(convId);
 		this.setChatLoading(convId, false);
@@ -1846,6 +1858,14 @@ class ChatStore {
 						updateStreamingContent(originalContent + appendedContent);
 						this.setChatReasoning(msg.convId, false);
 					},
+					onCompletionId: (id: string) => {
+						if (!id) return;
+						// refresh the message id so a later skip targets the live slot after a continue
+						conversationsStore.updateMessageAtIndex(conversationsStore.findMessageIndex(msg.id), {
+							completionId: id
+						});
+						DatabaseService.updateMessage(msg.id, { completionId: id }).catch(() => {});
+					},
 					onReasoningChunk: (chunk: string) => {
 						appendedReasoning += chunk;
 						hasReceivedContent = true;
Author	SHA1	Message	Date
Xuan-Son Nguyen	b3fed31b99	jinja, chat: add --reasoning-preserve flag (#25105 ) * jinja, chat: add --reasoning-preserve flag * correct help message	2026-06-28 23:33:51 +02:00
Aleksander Grygier	dbdaece23d	Revert "ui: fix accessibility for hover-gated interactive elements assisted by claude(in debugging and tests) (#24727 )" (#25098 )	2026-06-28 21:30:03 +02:00
Pascal	7cb8576e7c	ui: fix stop and reasoning skip in single-model mode (#25084 )	2026-06-28 21:06:43 +02:00
Ruixiang Wang	fa72bc6826	dflash: refactor draft model conversion (#25110 ) * dflash: refactor draft model conversion * apply fix for eagle3 convert	2026-06-28 20:31:48 +02:00
Aldehir Rojas	c818263f2a	chat : implement minicpm5 parser (#24889 ) * Add minicpm5 tool call parser * Refactor MiniCPM5 PEG parser per review feedback * Fix jinja min/max API to match Jinja2 * modify by review * MiniCPM5: use autoparser for XML tool calls and fix grammar preserved-token triggers * MiniCPM5: fix streaming tool-arg placeholder and remove alt XML markers * skip min/max attribute tests in -py mode * test-jinja: use real expected output for min/max attribute tests * MiniCPM5: revert shared mapper and history fallbacks per review Drop streaming tool-arg placeholder workarounds from the generic PEG mapper and restore strict tool-call argument JSON parsing so MiniCPM5 support stays limited to autoparser/diff-analyzer changes. * chat : refactor minicpm5 back to dedicated parser * cont : simplify grammar * cont : refactor * cont : fixes * cont : rename template to openbmb-MiniCPM5-1B.jinja * cont : add message delimiters * cont : fix tests --------- Co-authored-by: zhangtao <zhangtao2@modelbest.cn> Co-authored-by: 张涛 <>	2026-06-28 16:53:32 +02:00