ui: Improve performance when streaming (#25225 )

* ui: Improve performance when streaming * ui: build sibling info map in branching utils Moves the node map and sibling map construction from the .by block into buildSiblingInfoMap() in branching.ts. The map is built once per structural change and only read afterwards, so it does not need SvelteMap reactivity. Keeping the construction in plain TypeScript fixes the svelte/prefer-svelte-reactivity lint error and groups the branching logic where it already lives. --------- Co-authored-by: Pascal <admin@serveurperso.com>
ui: strip path and weight extension from model id in single model mode (#25137 )
2026-07-03 20:53:08 +02:00 · 2026-07-03 19:03:51 +02:00 · 2026-07-03 17:32:48 +02:00 · 2026-07-03 15:40:06 +02:00 · 2026-07-03 15:36:55 +02:00 · 2026-07-03 13:14:52 +02:00
51 changed files with 1692 additions and 306 deletions
@@ -955,10 +955,11 @@ struct common_speculative_impl_draft_dflash : public common_speculative_impl {
        LOG_INF("%s: - block_size=%d, mask_token_id=%d, n_extract=%u\n", __func__, block_size, mask_token_id, target_layer_ids_n);

        // DFlash input is [id_last, <mask> * (block_size-1)], so it can draft at most block_size-1 tokens per step
-        if (this->params.n_max > block_size - 1) {
-            LOG_WRN("%s: requested draft size %d exceeds the trained DFlash block size %d -- clamping to %d draft tokens per step\n",
-                    __func__, this->params.n_max, block_size - 1, block_size - 1);
-            this->params.n_max = block_size - 1;
+        if (this->params.n_max > block_size - 1 || this->params.n_min > block_size - 1) {
+            LOG_WRN("%s: requested draft size (n_max=%d, n_min=%d) exceeds the trained DFlash block size %d -- clamping to %d\n",
+                    __func__, this->params.n_max, this->params.n_min, block_size, block_size - 1);
+            this->params.n_max = std::min(this->params.n_max, block_size - 1);
+            this->params.n_min = std::min(this->params.n_min, block_size - 1);
        }

        batch        = llama_batch_init(llama_n_batch(ctx_dft), 0,          n_seq);
@@ -968,7 +969,7 @@ struct common_speculative_impl_draft_dflash : public common_speculative_impl {
        for (auto & s : smpls) {
            common_params_sampling sparams;
            sparams.no_perf  = false;
-            sparams.top_k    = 1;
+            sparams.top_k    = 10;
            sparams.samplers = { COMMON_SAMPLER_TYPE_TOP_K };
            s.reset(common_sampler_init(model_dft, sparams));
        }
@@ -1173,10 +1174,18 @@ struct common_speculative_impl_draft_dflash : public common_speculative_impl {

                const llama_token id = cur_p->data[0].id;

+                if (cur_p->data[0].p < params.p_min) {
+                    break;
+                }
+
                common_sampler_accept(smpl, id, true);

                result.push_back(id);
            }
+
+            if (result.size() < (size_t) params.n_min) {
+                result.clear();
+            }
        }
    }

@@ -10,6 +10,7 @@ gated_delta_net_cuda(const float * q,
                                     const float * beta,
                                     const float * curr_state,
                                     float *       dst,
+                                     float *       state,
                                     int64_t       H,
                                     int64_t       n_tokens,
                                     int64_t       n_seqs,
@@ -25,6 +26,7 @@ gated_delta_net_cuda(const float * q,
                                     const uint3   neqk1_magic,
                                     const uint3   rq3_magic,
                                     float         scale,
+                                     int64_t       state_slot_stride,
                                     int           K) {
    const uint32_t h_idx    = blockIdx.x;
    const uint32_t sequence = blockIdx.y;
@@ -35,9 +37,7 @@ gated_delta_net_cuda(const float * q,
    const uint32_t iq1 = fastmodulo(h_idx, neqk1_magic);
    const uint32_t iq3 = fastdiv(sequence, rq3_magic);

-    const int64_t attn_score_elems = S_v * H * n_tokens * n_seqs;
    float *       attn_data        = dst;
-    float *       state            = dst + attn_score_elems;

    // input state holds s0 only: [S_v, S_v, H, n_seqs] — seq stride is D = H * S_v * S_v.
    // output state layout (per-slot D * n_seqs) — same per-(seq,head) offset as before.
@@ -145,10 +145,9 @@ gated_delta_net_cuda(const float * q,
        if constexpr (keep_rs_t) {
            // snapshot slot mapping: slot 0 = most recent state, slot s = s tokens back.
            // When n_tokens < K only slots 0..n_tokens-1 are written; older slots are caller-owned.
-            const int64_t state_size_per_token = S_v * S_v * H * n_seqs; // per-slot stride in output
            const int target_slot = (int) n_tokens - 1 - t;
            if (target_slot >= 0 && target_slot < K) {
-                float * curr_state = (dst + attn_score_elems) + target_slot * state_size_per_token + state_out_offset;
+                float * curr_state = state + target_slot * state_slot_stride;
 #pragma unroll
                for (int r = 0; r < rows_per_lane; r++) {
                    const int i = r * warp_size + lane;
@@ -171,13 +170,13 @@ template <bool KDA, bool keep_rs_t>
 static void launch_gated_delta_net(
        const float * q_d, const float * k_d, const float * v_d,
        const float * g_d, const float * b_d, const float * s_d,
-        float * dst_d,
+        float * dst_d, float * state_d,
        int64_t S_v,   int64_t H, int64_t n_tokens, int64_t n_seqs,
        int64_t sq1,   int64_t sq2, int64_t sq3,
        int64_t sv1,   int64_t sv2, int64_t sv3,
        int64_t sb1,   int64_t sb2, int64_t sb3,
        int64_t neqk1, int64_t rq3,
-        float scale, int K, cudaStream_t stream) {
+        float scale, int64_t state_slot_stride, int K, cudaStream_t stream) {
    //TODO: Add chunked kernel for even faster pre-fill
    const int warp_size = ggml_cuda_info().devices[ggml_cuda_get_device()].warp_size;
    const int num_warps = 4;
@@ -187,34 +186,32 @@ static void launch_gated_delta_net(
    const uint3 neqk1_magic = init_fastdiv_values(neqk1);
    const uint3 rq3_magic   = init_fastdiv_values(rq3);

-    int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
-
    const ggml_cuda_kernel_launch_params launch_params = ggml_cuda_kernel_launch_params(grid_dims, block_dims, 0, stream);
    switch (S_v) {
        case 16:
            ggml_cuda_kernel_launch(gated_delta_net_cuda<16, KDA, keep_rs_t>, launch_params,
-                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H,
+                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d, H,
                n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, K);
+                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, state_slot_stride, K);
            break;
        case 32:
            ggml_cuda_kernel_launch(gated_delta_net_cuda<32, KDA, keep_rs_t>, launch_params,
-                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H,
+                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d, H,
                n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, K);
+                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, state_slot_stride, K);
            break;
        case 64: {
            ggml_cuda_kernel_launch(gated_delta_net_cuda<64, KDA, keep_rs_t>, launch_params,
-                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H,
+                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d, H,
                n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, K);
+                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, state_slot_stride, K);
            break;
        }
        case 128: {
            ggml_cuda_kernel_launch(gated_delta_net_cuda<128, KDA, keep_rs_t>, launch_params,
-                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H,
+                q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d, H,
                n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, K);
+                sb1, sb2, sb3, neqk1_magic, rq3_magic, scale, state_slot_stride, K);
            break;
        }
        default:
@@ -223,7 +220,8 @@ static void launch_gated_delta_net(
    }
 }

-void ggml_cuda_op_gated_delta_net(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+static void ggml_cuda_op_gated_delta_net_impl(
+        ggml_backend_cuda_context & ctx, ggml_tensor * dst, const ggml_cuda_gated_delta_net_fused_cache * cache) {
    ggml_tensor * src_q     = dst->src[0];
    ggml_tensor * src_k     = dst->src[1];
    ggml_tensor * src_v     = dst->src[2];
@@ -288,25 +286,42 @@ void ggml_cuda_op_gated_delta_net(ggml_backend_cuda_context & ctx, ggml_tensor *
    const int K = ggml_get_op_params_i32(dst, 0);
    const bool keep_rs = K > 1;

+    // recurrent state -> gdn_out tail (after attention scores), or the cache when fusing
+    float * state_d           = dst_d + S_v * H * n_tokens * n_seqs;
+    int64_t state_slot_stride = S_v * S_v * H * n_seqs;
+    if (cache != nullptr) {
+        state_d           = cache->data;
+        state_slot_stride = cache->slot_stride;
+    }
+
    if (kda) {
        if (keep_rs) {
-            launch_gated_delta_net<true, true>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d,
+            launch_gated_delta_net<true, true>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d,
                S_v, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1, rq3, scale, K, stream);
+                sb1, sb2, sb3, neqk1, rq3, scale, state_slot_stride, K, stream);
        } else {
-            launch_gated_delta_net<true, false>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d,
+            launch_gated_delta_net<true, false>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d,
                S_v, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1, rq3, scale, K, stream);
+                sb1, sb2, sb3, neqk1, rq3, scale, state_slot_stride, K, stream);
        }
    } else {
        if (keep_rs) {
-            launch_gated_delta_net<false, true>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d,
+            launch_gated_delta_net<false, true>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d,
                S_v, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1, rq3, scale, K, stream);
+                sb1, sb2, sb3, neqk1, rq3, scale, state_slot_stride, K, stream);
        } else {
-            launch_gated_delta_net<false, false>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d,
+            launch_gated_delta_net<false, false>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, state_d,
                S_v, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3,
-                sb1, sb2, sb3, neqk1, rq3, scale, K, stream);
+                sb1, sb2, sb3, neqk1, rq3, scale, state_slot_stride, K, stream);
        }
    }
 }
+
+void ggml_cuda_op_gated_delta_net(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+    ggml_cuda_op_gated_delta_net_impl(ctx, dst, nullptr);
+}
+
+void ggml_cuda_op_gated_delta_net_fused_cache(
+        ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_cuda_gated_delta_net_fused_cache cache) {
+    ggml_cuda_op_gated_delta_net_impl(ctx, dst, &cache);
+}
@@ -1,4 +1,14 @@
 #include "common.cuh"
 #include "ggml.h"

+// fused-kernel recurrent-state output; strides in elements (per-seq stride is always D, set in-kernel)
+struct ggml_cuda_gated_delta_net_fused_cache {
+    float * data;        // rollback slot 0
+    int64_t slot_stride; // between rollback slots (0 when K==1)
+};
+
 void ggml_cuda_op_gated_delta_net(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+// same op, but writes the snapshot(s) into the cache instead of dst (see ggml_cuda_try_gdn_cache_fusion)
+void ggml_cuda_op_gated_delta_net_fused_cache(ggml_backend_cuda_context & ctx, ggml_tensor * dst,
+                                              ggml_cuda_gated_delta_net_fused_cache cache);
@@ -3251,6 +3251,11 @@ static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
    GGML_UNUSED(backend);
 }

+static bool ggml_cuda_is_view_or_noop(const ggml_tensor * t) {
+    return ggml_is_empty(t) || t->op == GGML_OP_RESHAPE || t->op == GGML_OP_TRANSPOSE ||
+           t->op == GGML_OP_VIEW || t->op == GGML_OP_PERMUTE || t->op == GGML_OP_NONE;
+}
+
 #ifdef USE_CUDA_GRAPH
 static bool ggml_cuda_graph_check_compability(ggml_cgraph * cgraph) {

@@ -3260,7 +3265,7 @@ static bool ggml_cuda_graph_check_compability(ggml_cgraph * cgraph) {
    for (int i = 0; i < cgraph->n_nodes; i++) {
        ggml_tensor * node = cgraph->nodes[i];

-        if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
+        if (ggml_cuda_is_view_or_noop(node)) {
            continue;
        }

@@ -3403,6 +3408,70 @@ static bool ggml_cuda_should_fuse_rope_set_rows(const ggml_tensor * rope,
    return true;
 }

+// match gated_delta_net + the strided cpy that scatters its state snapshots into the cache
+// (slot i -> rollback group i, slot 0 newest), so the kernel can write them and skip the cpy.
+static int ggml_cuda_try_gdn_cache_fusion(
+        const ggml_cgraph * cgraph, int node_idx, ggml_cuda_gated_delta_net_fused_cache & fused_state_cpy) {
+    const ggml_tensor * gdn = cgraph->nodes[node_idx];
+    // the kernel skips the snapshot tail, so the gdn output must not be a graph output
+    if (gdn->op != GGML_OP_GATED_DELTA_NET || gdn->type != GGML_TYPE_F32 ||
+        (gdn->flags & GGML_TENSOR_FLAG_OUTPUT)) {
+        return 0;
+    }
+
+    const ggml_tensor * src_v     = gdn->src[2];
+    const int64_t       S_v       = src_v->ne[0];
+    const int64_t       H         = src_v->ne[1];
+    const int64_t       n_tokens  = src_v->ne[2];
+    const int64_t       n_seqs    = src_v->ne[3];
+    const int64_t       D         = S_v * S_v * H;
+    const int64_t       K         = ggml_get_op_params_i32(gdn, 0); // snapshot slot count
+    const int64_t       n_written = std::min<int64_t>(n_tokens, K); // newest n_written slots are written
+
+    // snapshot tail starts right after the attention scores
+    const size_t tail_off = ggml_row_size(GGML_TYPE_F32, S_v * H * n_tokens * n_seqs);
+
+    // snapshot cpy is the first real node after the gdn (skip views/no-ops)
+    const ggml_tensor * cpy  = nullptr;
+    int                 skip = 0;
+    for (int j = node_idx + 1; j < cgraph->n_nodes && cpy == nullptr; ++j) {
+        const ggml_tensor * n = cgraph->nodes[j];
+        if (ggml_cuda_is_view_or_noop(n)) {
+            continue;
+        }
+        if (n->op != GGML_OP_CPY || (n->flags & GGML_TENSOR_FLAG_OUTPUT)) {
+            return 0;
+        }
+        cpy  = n;
+        skip = j - node_idx;
+    }
+    if (cpy == nullptr) {
+        return 0;
+    }
+
+    const ggml_tensor * src = cpy->src[0]; // view of the gdn snapshot tail
+    const ggml_tensor * dst = cpy->src[1]; // cache view the kernel writes to
+
+    // src must be this gdn's snapshot tail (contiguous, at the tail offset)
+    if (src->op != GGML_OP_VIEW || src->view_src != gdn || src->view_offs != tail_off ||
+        !ggml_is_contiguous(src)) {
+        return 0;
+    }
+
+    // dst is the [D, n_seqs, n_written] cache view; require nb[1] == D (the per-seq stride the kernel
+    // assumes). ggml_cpy pins src to the same element count.
+    const std::array<int64_t, GGML_MAX_DIMS> expected_ne = { D, n_seqs, n_written, 1 };
+    if (dst->op != GGML_OP_VIEW || dst->type != GGML_TYPE_F32 || dst->data == nullptr ||
+        !std::equal(expected_ne.begin(), expected_ne.end(), dst->ne) ||
+        dst->nb[0] != ggml_type_size(GGML_TYPE_F32) || dst->nb[1] != (size_t) ggml_row_size(GGML_TYPE_F32, D)) {
+        return 0;
+    }
+
+    fused_state_cpy.data        = (float *) dst->data; // rollback group 0 (newest)
+    fused_state_cpy.slot_stride = K > 1 ? (int64_t) (dst->nb[2] / sizeof(float)) : 0;
+    return skip;
+}
+
 static bool ggml_cuda_topk_moe_fusion(const struct ggml_cgraph * cgraph, int node_idx, ggml_cuda_topk_moe_args & args) {
    args.sigmoid         = false;
    args.softmax         = false;
@@ -3844,6 +3913,20 @@ static int ggml_cuda_try_fuse(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph

    ggml_tensor * node = cgraph->nodes[i];

+    // gated_delta_net -> cpy: scatter recurrent-state snapshots into the cache
+    if (node->op == GGML_OP_GATED_DELTA_NET) {
+        ggml_cuda_gated_delta_net_fused_cache fused_state_cpy;
+        const int nodes_to_skip = ggml_cuda_try_gdn_cache_fusion(cgraph, i, fused_state_cpy);
+        if (nodes_to_skip > 0) {
+#ifdef GGML_CUDA_DEBUG
+            GGML_LOG_INFO("%s: fused gated_delta_net snapshot copies for %s (skipped %d nodes)\n",
+                          __func__, node->name, nodes_to_skip);
+#endif
+            ggml_cuda_op_gated_delta_net_fused_cache(*cuda_ctx, node, fused_state_cpy);
+            return nodes_to_skip;
+        }
+    }
+
    //topk-moe
    if (cgraph->nodes[i]->op == GGML_OP_UNARY || cgraph->nodes[i]->op == GGML_OP_SOFT_MAX ||
            cgraph->nodes[i]->op == GGML_OP_ARGSORT) {
@@ -4372,7 +4455,7 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
 #endif
                prev_i = i;

-                if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
+                if (ggml_cuda_is_view_or_noop(node)) {
                    continue;
                }

@@ -312,6 +312,10 @@ static void launch_topk_moe_cuda(ggml_backend_cuda_context & ctx,
            ggml_cuda_kernel_launch(topk_moe_cuda<256, has_bias>, launch_params,
                logits, weights, ids, bias, n_rows, n_expert_used, clamp_val, scale_val, config);
            break;
+        case 288: // StepFun 3.7
+            ggml_cuda_kernel_launch(topk_moe_cuda<288, has_bias>, launch_params,
+                logits, weights, ids, bias, n_rows, n_expert_used, clamp_val, scale_val, config);
+            break;
        case 512:
            ggml_cuda_kernel_launch(topk_moe_cuda<512, has_bias>, launch_params,
                logits, weights, ids, bias, n_rows, n_expert_used, clamp_val, scale_val, config);
@@ -377,8 +381,10 @@ bool ggml_cuda_should_use_topk_moe(const ggml_tensor * gating_op,
                                   const ggml_tensor * weights,
                                   const ggml_tensor * logits,
                                   const ggml_tensor * ids) {
+    // must match an instantiation of launch_topk_moe_cuda: a power of 2 up to 512,
+    // or one of the non-power-of-2 expert counts of supported models
    const int n_expert = ids->nb[1] / ids->nb[0];
-    if (((n_expert & (n_expert - 1)) != 0 || n_expert > 512) && n_expert != 576) {
+    if (((n_expert & (n_expert - 1)) != 0 || n_expert > 512) && n_expert != 288 && n_expert != 576) {
        return false;
    }

@@ -9219,6 +9219,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
                    test_cases.emplace_back(new test_topk_moe({128, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
                    test_cases.emplace_back(new test_topk_moe({129, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
                    test_cases.emplace_back(new test_topk_moe({160, 4, 1, 1}, 160, with_norm, bias_probs, gate, scale_w));
+                    test_cases.emplace_back(new test_topk_moe({288, 22, 1, 1}, 8, with_norm, bias_probs, gate, scale_w)); // Used by StepFun 3.7
                }
            }
        }
@@ -521,6 +521,8 @@ These words will not be included in the completion, so make sure to add them to

 `return_progress`: Include prompt processing progress in `stream` mode. The progress will be contained inside `prompt_progress` with 4 values: `total`, `cache`, `processed`, and `time_ms`. The overall progress is `processed/total`, while the actual timed progress is `(processed-cache)/(total-cache)`. The `time_ms` field contains the elapsed time in milliseconds since prompt processing started. Default: `false`

+`sse_ping_interval`: Interval in seconds between SSE comment pings emitted while the stream stays silent, keeping the connection observable during long prompt processing. Overrides the server `--sse-ping-interval` setting for this request, `-1` disables pings. Default: server setting
+
 `post_sampling_probs`: Returns the probabilities of top `n_probs` tokens after applying sampling chain.

 `response_fields`: A list of response fields, for example: `"response_fields": ["content", "generation_settings/n_predict"]`. If the specified field is missing, it will simply be omitted from the response without triggering an error. Note that fields with a slash will be unnested; for example, `generation_settings/n_predict` will move the field `n_predict` from the `generation_settings` object to the root of the response and give it a new name.
@@ -4089,6 +4089,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
    auto & rd = res->rd;
    auto & params = this->params;

+    int32_t sse_ping_interval = params.sse_ping_interval;
+
    try {
        std::vector<server_task> tasks;

@@ -4139,6 +4141,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
            task.params.message_spans = task.tokens.find_message_spans(delimiters);

            task.id_slot = json_value(data, "id_slot", -1);
+            sse_ping_interval = task.params.sse_ping_interval;

            // OAI-compat
            task.params.res_type          = res_type;
@@ -4228,7 +4231,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
        }
        res->status = 200;
        res->content_type = "text/event-stream";
-        res->next = [res_this = res.get(), res_type, &req, &params](std::string & output) -> bool {
+        res->next = [res_this = res.get(), res_type, sse_ping_interval, &req](std::string & output) -> bool {
            static auto format_error = [](task_response_type res_type, const json & res_json) {
                if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
                    return format_anthropic_sse({
@@ -4277,10 +4280,10 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
                // receive subsequent results
                bool timeout = false;
                int64_t start_time = ggml_time_ms();
-                auto result = rd.next([&timeout, &start_time, &params, &effective_should_stop]() {
+                auto result = rd.next([&timeout, &start_time, sse_ping_interval, &effective_should_stop]() {
                    if (effective_should_stop()) {
                        return true; // should_stop condition met
-                    } else if (params.sse_ping_interval > 0 && ggml_time_ms() - start_time > (int64_t)params.sse_ping_interval * 1000) {
+                    } else if (sse_ping_interval > 0 && ggml_time_ms() - start_time > (int64_t)sse_ping_interval * 1000) {
                        timeout = true;
                        return true; // timeout
                    }
@@ -37,6 +37,10 @@ std::vector<std::unique_ptr<field>> make_llama_cmpl_schema(const common_params &
    add((new field_bool("return_progress", params.return_progress))
        ->set_desc("Include prompt processing progress events in stream mode"));

+    add((new field_num("sse_ping_interval", params.sse_ping_interval))
+        ->set_hard_limits(-1, INT32_MAX)
+        ->set_desc("Interval in seconds between SSE comment pings emitted while the stream stays silent, -1 disables pings"));
+
    add((new field_num("n_predict", params.n_predict))
        ->set_hard_limits(-1, INT32_MAX)
        ->add_alias("max_completion_tokens")
@@ -504,6 +508,7 @@ task_params eval_llama_cmpl_schema(
    params.n_cache_reuse = params_base.n_cache_reuse;
    params.cache_prompt  = params_base.cache_prompt;
    params.antiprompt    = params_base.antiprompt;
+    params.sse_ping_interval = params_base.sse_ping_interval;

    // enabling this will output extra debug information in the HTTP responses from the server
    params.verbose       = params_base.verbosity > 9;
@@ -54,6 +54,8 @@ struct task_params {
    bool return_tokens   = false;
    bool return_progress = false;

+    int32_t sse_ping_interval = 30; // seconds between SSE comment pings while the stream stays silent, -1 disables
+
    int32_t n_keep    =  0; // number of tokens to keep from initial prompt
    int32_t n_discard =  0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
    int32_t n_predict = -1; // new tokens to predict
@@ -18,7 +18,7 @@

 	let mcpSearchQuery = $state('');
 	let allMcpServers = $derived(mcpStore.getServersSorted());
-	let mcpServers = $derived(allMcpServers.filter((s) => s.enabled));
+	let mcpServers = $derived(mcpStore.visibleMcpServers);
 	let hasMcpServers = $derived(mcpServers.length > 0);
 	// let hasAnyMcpServers = $derived(allMcpServers.length > 0);
 	let filteredMcpServers = $derived.by(() => {
@@ -74,9 +74,7 @@
 	const sheetItemRowClass =
 		'flex w-full items-center justify-between gap-2 rounded-md px-3 py-2 text-left text-sm transition-colors hover:bg-accent';

-	function getEnabledMcpServers() {
-		return mcpStore.getServersSorted().filter((s) => s.enabled);
-	}
+	let visibleMcpServers = $derived(mcpStore.visibleMcpServers);
 </script>

 <div class="flex items-center gap-1 {className}">
@@ -153,13 +151,13 @@
 						<span class="flex-1">MCP Servers</span>

 						<span class="text-xs text-muted-foreground">
-							{getEnabledMcpServers().length} server{getEnabledMcpServers().length !== 1 ? 's' : ''}
+							{visibleMcpServers.length} server{visibleMcpServers.length !== 1 ? 's' : ''}
 						</span>
 					</Collapsible.Trigger>

 					<Collapsible.Content>
 						<div class="flex flex-col gap-0.5 pl-4">
-							{#each getEnabledMcpServers() as server (server.id)}
+							{#each visibleMcpServers as server (server.id)}
 								{@const healthState = mcpStore.getHealthCheckState(server.id)}
 								{@const hasError = healthState.status === HealthCheckStatus.ERROR}
 								{@const displayName = mcpStore.getServerLabel(server)}
@@ -202,7 +200,7 @@
 								</button>
 							{/each}

-							{#if getEnabledMcpServers().length === 0}
+							{#if visibleMcpServers.length === 0}
 								<div class="px-3 py-2 text-center text-sm text-muted-foreground">
 									No MCP servers configured
 								</div>
@@ -43,7 +43,7 @@
 		assistantMessages: number;
 		messageTypes: string[];
 	} | null>(null);
-	let editedContent = $state(message.content);
+	let editedContent = $derived(message.content);

 	let rawEditContent = $derived.by(() => {
 		if (message.role !== MessageRole.ASSISTANT) return undefined;
@@ -1,8 +1,9 @@
 <script lang="ts">
 	import { ChevronDown, ShieldQuestion } from '@lucide/svelte';
 	import { ChatMessageActionCard } from '$lib/components/app';
-	import { Button } from '$lib/components/ui/button';
+	import { Button, buttonVariants } from '$lib/components/ui/button';
 	import * as ButtonGroup from '$lib/components/ui/button-group';
+	import { cn } from '$lib/components/ui/utils';
 	import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
 	import { ToolSource, ToolPermissionDecision } from '$lib/enums';
 	import { TOOL_SERVER_LABELS } from '$lib/constants';
@@ -19,25 +20,17 @@

 <ChatMessageActionCard icon={ShieldQuestion}>
 	{#snippet message()}
-		Allow use of
-
-		<span class="font-semibold">{toolName}</span>
-
-		{#if serverLabel}
-			from <span class="font-semibold">{serverLabel}</span>
-		{/if}
-
-		?
+		Allow use of <span class="font-semibold">{toolName}</span>{#if serverLabel}
+			from <span class="font-semibold">{serverLabel}</span>{/if}?
 	{/snippet}

 	{#snippet actions()}
 		<DropdownMenu.Root>
-			<ButtonGroup.Root
-				class="overflow-hidden rounded-md bg-foreground text-white shadow-sm dark:bg-secondary dark:text-foreground"
-			>
+			<ButtonGroup.Root class="overflow-hidden rounded-md shadow-sm">
 				<Button
-					class="rounded-none! shadow-none!"
+					variant="secondary"
 					size="sm"
+					class="!rounded-r-none !shadow-none"
 					onclick={() => onDecision(ToolPermissionDecision.ONCE)}
 				>
 					Allow once
@@ -45,10 +38,14 @@

 				<ButtonGroup.Separator />

-				<DropdownMenu.Trigger>
-					<Button size="sm" class="rounded-none! !ps-2 shadow-none!">
-						<ChevronDown class="h-3.5 w-3.5" />
-					</Button>
+				<DropdownMenu.Trigger
+					class={cn(
+						buttonVariants({ variant: 'secondary', size: 'sm' }),
+						'inline-flex cursor-pointer items-center !rounded-l-none !shadow-none !px-2'
+					)}
+					aria-label="More allow options"
+				>
+					<ChevronDown class="h-3.5 w-3.5" />
 				</DropdownMenu.Trigger>
 			</ButtonGroup.Root>

@@ -76,12 +73,7 @@
 			</DropdownMenu.Content>
 		</DropdownMenu.Root>

-		<Button
-			variant="destructive"
-			size="sm"
-			class="text-destructive hover:text-destructive"
-			onclick={() => onDecision(ToolPermissionDecision.DENY)}
-		>
+		<Button variant="destructive" size="sm" onclick={() => onDecision(ToolPermissionDecision.DENY)}>
 			Deny
 		</Button>
 	{/snippet}
@@ -20,9 +20,9 @@
 		agenticInjectSteeringMessage
 	} from '$lib/stores/agentic.svelte';
 	import {
+		buildSiblingInfoMap,
 		copyToClipboard,
 		formatMessageForClipboard,
-		getMessageSiblings,
 		hasAgenticContent
 	} from '$lib/utils';

@@ -169,6 +169,8 @@
 		});
 	});

+	let siblingInfoByMessageId = $derived(buildSiblingInfoMap(allConversationMessages));
+
 	let displayMessages = $derived.by(() => {
 		if (!messages.length) {
 			return [];
@@ -223,18 +225,18 @@
 				}
 			}

-			const siblingInfo = getMessageSiblings(allConversationMessages, msg.id);
+			const siblingInfo = siblingInfoByMessageId.get(msg.id) ?? {
+				message: msg,
+				siblingIds: [msg.id],
+				currentIndex: 0,
+				totalSiblings: 1
+			};

 			result.push({
 				message: msg,
 				toolMessages,
 				isLastAssistantMessage: false,
-				siblingInfo: siblingInfo || {
-					message: msg,
-					siblingIds: [msg.id],
-					currentIndex: 0,
-					totalSiblings: 1
-				}
+				siblingInfo
 			});
 		}

@@ -4,7 +4,7 @@
 	import { McpServerForm } from '$lib/components/app/mcp';
 	import { mcpStore } from '$lib/stores/mcp.svelte';
 	import { conversationsStore } from '$lib/stores/conversations.svelte';
-	import { uuid } from '$lib/utils';
+	import { parseHeadersToArray, uuid } from '$lib/utils';
 	import { MCP_SERVER_ID_PREFIX } from '$lib/constants';

 	interface Props {
@@ -26,6 +26,10 @@
 			return 'Invalid URL format';
 		}
 	});
+	let newServerHeaderPairsValid = $derived(
+		parseHeadersToArray(newServerHeaders).every((p) => p.key.trim() && p.value.trim())
+	);
+	let canSave = $derived(!newServerUrlError && newServerHeaderPairsValid);

 	function handleOpenChange(value: boolean) {
 		if (!value) {
@@ -37,7 +41,7 @@
 	}

 	function saveNewServer() {
-		if (newServerUrlError) return;
+		if (!canSave) return;

 		const newServerId = uuid() ?? `${MCP_SERVER_ID_PREFIX}-${Date.now()}`;

@@ -52,6 +56,11 @@

 		handleOpenChange(false);
 	}
+
+	function handleSubmit(event: SubmitEvent) {
+		event.preventDefault();
+		saveNewServer();
+	}
 </script>

 <Dialog.Root {open} onOpenChange={handleOpenChange}>
@@ -60,29 +69,27 @@
 			<Dialog.Title>Add New Server</Dialog.Title>
 		</Dialog.Header>

-		<div class="space-y-4 py-4">
-			<McpServerForm
-				url={newServerUrl}
-				headers={newServerHeaders}
-				onUrlChange={(v) => (newServerUrl = v)}
-				onHeadersChange={(v) => (newServerHeaders = v)}
-				urlError={newServerUrl ? newServerUrlError : null}
-				id="new-server"
-			/>
-		</div>
+		<form onsubmit={handleSubmit} class="contents">
+			<div class="space-y-4 py-4">
+				<McpServerForm
+					url={newServerUrl}
+					headers={newServerHeaders}
+					onUrlChange={(v) => (newServerUrl = v)}
+					onHeadersChange={(v) => (newServerHeaders = v)}
+					urlError={newServerUrl ? newServerUrlError : null}
+					id="new-server"
+				/>
+			</div>

-		<Dialog.Footer>
-			<Button variant="secondary" size="sm" onclick={() => handleOpenChange(false)}>Cancel</Button>
+			<Dialog.Footer>
+				<Button variant="secondary" size="sm" onclick={() => handleOpenChange(false)}>
+					Cancel
+				</Button>

-			<Button
-				variant="default"
-				size="sm"
-				onclick={saveNewServer}
-				disabled={!!newServerUrlError}
-				aria-label="Save"
-			>
-				Add
-			</Button>
-		</Dialog.Footer>
+				<Button variant="default" size="sm" type="submit" disabled={!canSave} aria-label="Save">
+					Add
+				</Button>
+			</Dialog.Footer>
+		</form>
 	</Dialog.Content>
 </Dialog.Root>
@@ -0,0 +1,180 @@
+<script lang="ts">
+	import { Button } from '$lib/components/ui/button';
+	import * as Card from '$lib/components/ui/card';
+	import * as Dialog from '$lib/components/ui/dialog';
+	import { fly } from 'svelte/transition';
+	import { McpServerCardCompact, McpServerForm } from '$lib/components/app/mcp';
+	import { RECOMMENDED_MCP_SERVERS } from '$lib/constants';
+	import { conversationsStore } from '$lib/stores/conversations.svelte';
+	import { mcpStore } from '$lib/stores/mcp.svelte';
+	import { uuid } from '$lib/utils';
+	import { MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY, MCP_SERVER_ID_PREFIX } from '$lib/constants';
+	import type { MCPServerSettingsEntry } from '$lib/types';
+	import { Plus } from '@lucide/svelte';
+
+	interface Props {
+		open: boolean;
+		onOpenChange?: (open: boolean) => void;
+	}
+
+	let { open = $bindable(), onOpenChange }: Props = $props();
+
+	let selected = $state<Record<string, boolean>>(
+		Object.fromEntries(RECOMMENDED_MCP_SERVERS.map((server) => [server.id, false]))
+	);
+
+	let addedServers = $state<MCPServerSettingsEntry[]>([]);
+
+	let showAddForm = $state(false);
+	let newServerUrl = $state('');
+	let newServerHeaders = $state('');
+	let newServerUrlError = $derived.by(() => {
+		if (!newServerUrl.trim()) return 'URL is required';
+		try {
+			new URL(newServerUrl);
+
+			return null;
+		} catch {
+			return 'Invalid URL format';
+		}
+	});
+
+	function handleOpenChange(value: boolean) {
+		if (!value) {
+			showAddForm = false;
+			newServerUrl = '';
+			newServerHeaders = '';
+			addedServers = [];
+
+			localStorage.setItem(MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY, 'true');
+		}
+		open = value;
+		onOpenChange?.(value);
+	}
+
+	function resetAddForm() {
+		showAddForm = false;
+		newServerUrl = '';
+		newServerHeaders = '';
+	}
+
+	function enableSelected() {
+		localStorage.setItem(MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY, 'true');
+
+		for (const server of RECOMMENDED_MCP_SERVERS) {
+			if (selected[server.id]) {
+				const existing = mcpStore.getServerById(server.id);
+				if (existing) {
+					mcpStore.updateServer(server.id, { enabled: true });
+				} else {
+					mcpStore.addServer({
+						id: server.id,
+						enabled: true,
+						url: server.url,
+						name: server.name
+					});
+				}
+				conversationsStore.setMcpServerOverride(server.id, true);
+			}
+		}
+		handleOpenChange(false);
+	}
+
+	function saveNewServer() {
+		if (newServerUrlError) return;
+
+		const newServerId = uuid() ?? `${MCP_SERVER_ID_PREFIX}-${Date.now()}`;
+
+		localStorage.setItem(MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY, 'true');
+
+		const newServer = mcpStore.addServer({
+			id: newServerId,
+			enabled: true,
+			url: newServerUrl.trim(),
+			headers: newServerHeaders.trim() || undefined
+		});
+
+		conversationsStore.setMcpServerOverride(newServerId, true);
+
+		if (newServer) {
+			addedServers = [...addedServers, newServer];
+		}
+
+		resetAddForm();
+	}
+</script>
+
+<Dialog.Root bind:open onOpenChange={handleOpenChange}>
+	<Dialog.Content class="sm:max-w-lg">
+		<Dialog.Header>
+			<Dialog.Title>Do more with MCP</Dialog.Title>
+			<Dialog.Description>
+				Power-up your experience by adding tools, resources and more capabilities provided by MCP
+				servers.
+			</Dialog.Description>
+		</Dialog.Header>
+
+		<div class="max-h-[60vh] space-y-4 overflow-y-auto py-4" in:fly={{ y: 16, duration: 300 }}>
+			<h3 class="text-sm font-semibold">Quickly get started with</h3>
+
+			{#each RECOMMENDED_MCP_SERVERS as server (server.id)}
+				<McpServerCardCompact
+					{server}
+					enabled={selected[server.id]}
+					onToggle={(enabled) => (selected[server.id] = enabled)}
+				/>
+			{/each}
+
+			{#if addedServers.length > 0}
+				{#each addedServers as server (server.id)}
+					<McpServerCardCompact {server} enabled={true} />
+				{/each}
+			{/if}
+
+			{#if showAddForm}
+				<Card.Root class="gap-3! bg-muted/30 p-4">
+					<McpServerForm
+						url={newServerUrl}
+						headers={newServerHeaders}
+						onUrlChange={(v) => (newServerUrl = v)}
+						onHeadersChange={(v) => (newServerHeaders = v)}
+						urlError={newServerUrl ? newServerUrlError : null}
+						id="recommendation-new-server"
+					/>
+
+					<div class="flex justify-end gap-2 pt-2">
+						<Button variant="secondary" size="sm" onclick={resetAddForm}>Cancel</Button>
+
+						<Button
+							variant="default"
+							size="sm"
+							onclick={saveNewServer}
+							disabled={!!newServerUrlError}
+							aria-label="Save"
+						>
+							Add
+						</Button>
+					</div>
+				</Card.Root>
+			{:else}
+				<Card.Root class="gap-0 border-dashed bg-muted/30 p-0 transition-colors hover:bg-muted/50">
+					<button
+						type="button"
+						class="flex w-full items-center justify-center gap-2 rounded-lg p-6 text-sm text-muted-foreground transition-colors hover:text-foreground"
+						onclick={() => (showAddForm = true)}
+						aria-label="Add your own MCP server"
+					>
+						<Plus class="h-4 w-4" />
+						<span>Add your own server</span>
+					</button>
+				</Card.Root>
+			{/if}
+		</div>
+
+		<Dialog.Footer>
+			<Button variant="secondary" size="sm" onclick={() => handleOpenChange(false)}>Not now</Button>
+
+			<Button variant="default" size="sm" onclick={enableSelected}>Add selected</Button>
+		</Dialog.Footer>
+	</Dialog.Content>
+</Dialog.Root>
@@ -18,6 +18,15 @@
 */
 export { default as DialogMcpServerAddNew } from './DialogMcpServerAddNew.svelte';

+/**
+ * **DialogMcpServerRecommendations** - Suggested MCP servers opt-in dialog
+ *
+ * Prompts the user to enable pre-defined recommended MCP servers on first launch.
+ * Shows one switch per suggested server and persists the choice as a per-chat
+ * override so the selected servers become available in conversations.
+ */
+export { default as DialogMcpServerRecommendations } from './DialogMcpServerRecommendations.svelte';
+
 /**
 * **DialogExportSettings** - Settings export dialog with sensitive data warning
 *
@@ -1,4 +1,5 @@
 <script lang="ts">
+	import { tick } from 'svelte';
 	import { Plus, Trash2 } from '@lucide/svelte';
 	import { Input } from '$lib/components/ui/input';
 	import {
@@ -33,8 +34,18 @@
 		sectionLabelOptional = true
 	}: Props = $props();

-	function addPair() {
+	// Pre-allocate the ref array so `bind:ref={keyInputRefs[index]}` never reads `undefined`
+	// for in-range indices; the $effect below keeps it in sync when `pairs` grows.
+	// svelte-ignore state_referenced_locally
+	let keyInputRefs: (HTMLInputElement | null)[] = $state(pairs.map(() => null));
+
+	async function addPair() {
+		// Capture the target index before mutating so deletions earlier in the
+		// list can't make keyInputRefs.length drift past the newly-appended row.
+		const newIndex = pairs.length;
 		onPairsChange([...pairs, { key: '', value: '' }]);
+		await tick();
+		keyInputRefs[newIndex]?.focus();
 	}

 	function removePair(index: number) {
@@ -76,6 +87,15 @@
 		newPairs[index] = { ...newPairs[index], value: trimmed };
 		onPairsChange(newPairs);
 	}
+
+	// Keep keyInputRefs aligned with pairs length so bind:ref never sees `undefined`.
+	// $effect.pre runs during traversal in tree order, before the {#each} block re-renders,
+	// so newly-appended items always have a defined slot when their binding is set up.
+	$effect.pre(() => {
+		while (keyInputRefs.length < pairs.length) {
+			keyInputRefs.push(null);
+		}
+	});
 </script>

 <div class={className}>
@@ -103,6 +123,7 @@
 			{#each pairs as pair, index (index)}
 				<div class="flex items-start gap-2">
 					<Input
+						bind:ref={keyInputRefs[index]}
 						type="text"
 						placeholder={keyPlaceholder}
 						value={pair.key}
@@ -163,7 +163,7 @@
 			{/if}
 		</div>

-		<div class="flex justify-between gap-4">
+		<div class="mt-auto flex justify-between gap-4">
 			{#if showSkeleton}
 				<Skeleton class="h-3 w-28" />
 			{:else if protocolVersion}
@@ -0,0 +1,156 @@
+<script lang="ts">
+	import * as Card from '$lib/components/ui/card';
+	import { Badge } from '$lib/components/ui/badge';
+	import { Skeleton } from '$lib/components/ui/skeleton';
+	import { Switch } from '$lib/components/ui/switch';
+	import * as Tooltip from '$lib/components/ui/tooltip';
+	import { McpServerIdentity } from '$lib/components/app/mcp';
+	import { mcpStore } from '$lib/stores/mcp.svelte';
+	import { HealthCheckStatus } from '$lib/enums';
+	import type { MCPServerDisplayInfo, HealthCheckState, MCPServerSettingsEntry } from '$lib/types';
+	import { onMount } from 'svelte';
+	import { MCP_CARD_VISIBLE_TOOL_LIMIT, NEWLINE } from '$lib/constants';
+
+	interface Props {
+		server: MCPServerDisplayInfo & { description?: string };
+		enabled?: boolean;
+		onToggle?: (enabled: boolean) => void;
+	}
+
+	let { server, enabled = false, onToggle }: Props = $props();
+
+	onMount(() => {
+		const state = mcpStore.getHealthCheckState(server.id);
+
+		if (state.status === HealthCheckStatus.IDLE) {
+			mcpStore.runHealthCheck(server as MCPServerSettingsEntry).catch(() => {});
+		}
+	});
+
+	let healthState = $derived<HealthCheckState>(mcpStore.getHealthCheckState(server.id));
+	let displayName = $derived(mcpStore.getServerLabel(server));
+	let faviconUrl = $derived(mcpStore.getServerFavicon(server.id));
+	let isIdle = $derived(healthState.status === HealthCheckStatus.IDLE);
+	let isHealthChecking = $derived(healthState.status === HealthCheckStatus.CONNECTING);
+	let isError = $derived(healthState.status === HealthCheckStatus.ERROR);
+	let errorMessage = $derived(
+		healthState.status === HealthCheckStatus.ERROR ? healthState.message : undefined
+	);
+	let serverInfo = $derived(
+		healthState.status === HealthCheckStatus.SUCCESS ? healthState.serverInfo : undefined
+	);
+	let tools = $derived(healthState.status === HealthCheckStatus.SUCCESS ? healthState.tools : []);
+	let instructions = $derived(
+		healthState.status === HealthCheckStatus.SUCCESS ? healthState.instructions : undefined
+	);
+	let showSkeleton = $derived(isIdle || isHealthChecking);
+
+	// Curated descriptions get two lines; instructions fallback is one line so the
+	// compact card stays scannable.
+	let description = $derived.by(() => {
+		if (server.description) {
+			return { text: server.description, lines: 2 };
+		}
+		if (!instructions) return null;
+		const firstLine = instructions.split(NEWLINE).find((line: string) => line.trim().length > 0);
+		const trimmed = firstLine?.trim();
+		return trimmed ? { text: trimmed, lines: 1 } : null;
+	});
+
+	let visibleTools = $derived(tools.slice(0, MCP_CARD_VISIBLE_TOOL_LIMIT));
+	let hiddenTools = $derived(tools.slice(MCP_CARD_VISIBLE_TOOL_LIMIT));
+	let hiddenToolCount = $derived(hiddenTools.length);
+
+	function handleToggle(checked: boolean) {
+		onToggle?.(checked);
+	}
+</script>
+
+<Card.Root class="!gap-3 bg-muted/30 p-4">
+	<div class="flex items-start justify-between gap-3">
+		<div class="min-w-0 flex-1">
+			{#if showSkeleton}
+				<span class="flex min-w-0 items-center gap-1.5">
+					<Skeleton class="h-5 w-5 rounded" />
+					<Skeleton class="h-4 w-32" />
+				</span>
+			{:else}
+				<McpServerIdentity
+					{displayName}
+					{faviconUrl}
+					{serverInfo}
+					iconClass="h-5 w-5"
+					iconRounded="rounded"
+					nameClass="font-medium"
+				/>
+			{/if}
+		</div>
+
+		<Switch checked={enabled} disabled={isError || showSkeleton} onCheckedChange={handleToggle} />
+	</div>
+
+	{#if isError && errorMessage}
+		<p class="text-xs text-destructive">{errorMessage}</p>
+	{/if}
+
+	{#if showSkeleton}
+		<div class="space-y-1.5">
+			<Skeleton class="h-3 w-full max-w-md" />
+		</div>
+
+		<div class="flex flex-wrap items-center gap-1.5">
+			<Skeleton class="h-5 w-16 rounded-full" />
+			<Skeleton class="h-5 w-20 rounded-full" />
+			<Skeleton class="h-5 w-24 rounded-full" />
+			<Skeleton class="h-5 w-14 rounded-full" />
+		</div>
+	{:else}
+		{#if description}
+			{#if description.lines === 2}
+				<p class="line-clamp-2 text-xs text-muted-foreground" title={description.text}>
+					{description.text}
+				</p>
+			{:else}
+				<p class="line-clamp-1 truncate text-xs text-muted-foreground" title={description.text}>
+					{description.text}
+				</p>
+			{/if}
+		{/if}
+
+		{#if tools.length > 0}
+			<div class="flex flex-wrap items-center gap-1.5">
+				{#each visibleTools as tool (tool.name)}
+					<Tooltip.Root>
+						<Tooltip.Trigger>
+							<Badge variant="secondary" class="h-5 max-w-40 px-2 text-[11px]">
+								<span class="block min-w-0 flex-1 truncate">{tool.name}</span>
+							</Badge>
+						</Tooltip.Trigger>
+
+						<Tooltip.Content>
+							<p class="max-w-xs text-xs">
+								{tool.description ?? 'No description'}
+							</p>
+						</Tooltip.Content>
+					</Tooltip.Root>
+				{/each}
+
+				{#if hiddenToolCount > 0}
+					<Tooltip.Root>
+						<Tooltip.Trigger>
+							<Badge variant="secondary" class="h-5 px-2 text-[11px] text-muted-foreground">
+								+ {hiddenToolCount} more tools
+							</Badge>
+						</Tooltip.Trigger>
+
+						<Tooltip.Content class="max-w-md">
+							<p class="text-xs">
+								{hiddenTools.map((tool) => tool.name).join(', ')}
+							</p>
+						</Tooltip.Content>
+					</Tooltip.Root>
+				{/if}
+			</div>
+		{/if}
+	{/if}
+</Card.Root>
@@ -1,6 +1,7 @@
 <script lang="ts">
 	import { Button } from '$lib/components/ui/button';
 	import { McpServerForm } from '$lib/components/app/mcp';
+	import { parseHeadersToArray } from '$lib/utils';

 	interface Props {
 		serverId: string;
@@ -26,13 +27,21 @@
 		}
 	});

-	let canSave = $derived(!urlError);
+	let headerPairsValid = $derived(
+		parseHeadersToArray(editHeaders).every((p) => p.key.trim() && p.value.trim())
+	);
+	let canSave = $derived(!urlError && headerPairsValid);

 	function handleSave() {
 		if (!canSave) return;
 		onSave(editUrl.trim(), editHeaders.trim(), editUseProxy);
 	}

+	function handleSubmit(event: SubmitEvent) {
+		event.preventDefault();
+		handleSave();
+	}
+
 	export function setInitialValues(url: string, headers: string, useProxy: boolean) {
 		editUrl = url;
 		editHeaders = headers;
@@ -40,25 +49,27 @@
 	}
 </script>

-<div class="space-y-4">
-	<p class="font-medium">Configure Server</p>
+<form onsubmit={handleSubmit} class="contents">
+	<div class="space-y-4">
+		<p class="font-medium">Configure Server</p>

-	<McpServerForm
-		url={editUrl}
-		headers={editHeaders}
-		useProxy={editUseProxy}
-		onUrlChange={(v) => (editUrl = v)}
-		onHeadersChange={(v) => (editHeaders = v)}
-		onUseProxyChange={(v) => (editUseProxy = v)}
-		urlError={editUrl ? urlError : null}
-		id={serverId}
-	/>
+		<McpServerForm
+			url={editUrl}
+			headers={editHeaders}
+			useProxy={editUseProxy}
+			onUrlChange={(v) => (editUrl = v)}
+			onHeadersChange={(v) => (editHeaders = v)}
+			onUseProxyChange={(v) => (editUseProxy = v)}
+			urlError={editUrl ? urlError : null}
+			id={serverId}
+		/>

-	<div class="flex items-center justify-end gap-2">
-		<Button variant="secondary" size="sm" onclick={onCancel}>Cancel</Button>
+		<div class="flex items-center justify-end gap-2">
+			<Button variant="secondary" size="sm" onclick={onCancel}>Cancel</Button>

-		<Button size="sm" onclick={handleSave} disabled={!canSave}>
-			{serverUrl.trim() ? 'Update' : 'Add'}
-		</Button>
+			<Button size="sm" type="submit" disabled={!canSave}>
+				{serverUrl.trim() ? 'Update' : 'Add'}
+			</Button>
+		</div>
 	</div>
-</div>
+</form>
@@ -38,14 +38,87 @@

 	let headerPairs = $derived<KeyValuePair[]>(parseHeadersToArray(headers));

+	const AUTHORIZATION_HEADER = 'Authorization';
+	const BEARER_PREFIX = 'Bearer ';
+
+	// Heuristic: this dedicated UI only owns Authorization headers that already
+	// carry a Bearer scheme. Anything else (e.g. Basic, raw tokens) stays in the
+	// KV section so the user can still edit those values verbatim.
+	const matchesAuthorizationKey = (key: string): boolean =>
+		key.trim().toLowerCase() === AUTHORIZATION_HEADER.toLowerCase();
+
+	const isBearerScheme = (value: string): boolean =>
+		value.trim().toLowerCase().startsWith(BEARER_PREFIX.toLowerCase());
+
+	const ownedByBearerUi = (p: KeyValuePair): boolean =>
+		matchesAuthorizationKey(p.key) && isBearerScheme(p.value);
+
+	let hasAuthorization = $derived(headerPairs.some(ownedByBearerUi));
+
+	let wantsAuthorization = $state(false);
+
+	let showAuthorization = $derived(hasAuthorization || wantsAuthorization);
+
+	let urlInput: HTMLInputElement | null = $state(null);
+	let bearerInput: HTMLInputElement | null = $state(null);
+
+	$effect(() => {
+		urlInput?.focus();
+	});
+
+	$effect(() => {
+		if (wantsAuthorization && bearerInput) {
+			bearerInput.focus();
+		}
+	});
+
+	let bearerToken = $derived.by(() => {
+		const auth = headerPairs.find(ownedByBearerUi);
+		if (!auth) return '';
+		return auth.value.trim().slice(BEARER_PREFIX.length).trim();
+	});
+
+	$effect(() => {
+		if (!headers.trim()) {
+			wantsAuthorization = false;
+		}
+	});
+
 	function updateHeaderPairs(newPairs: KeyValuePair[]) {
 		headerPairs = newPairs;
 		onHeadersChange(serializeHeaders(newPairs));
 	}
+
+	// The dedicated UI owns the Authorization slot end-to-end when the user
+	// engages it: any prior Authorization row (Bearer or otherwise) is replaced
+	// by exactly one { Authorization: "Bearer <token>" } entry. JSON's last-key
+	// behavior would otherwise pick one arbitrarily, so we strip first.
+	function updateBearerToken(token: string) {
+		const filtered = headerPairs.filter((p) => !matchesAuthorizationKey(p.key));
+
+		const trimmed = token.trim();
+
+		if (trimmed) {
+			filtered.push({ key: AUTHORIZATION_HEADER, value: `${BEARER_PREFIX}${trimmed}` });
+		}
+
+		updateHeaderPairs(filtered);
+	}
+
+	function setUseAuthorization(checked: boolean) {
+		wantsAuthorization = checked;
+
+		if (!checked) {
+			// Only drop the entry this UI owns; a non-Bearer Authorization row
+			// authored in the KV section must survive a toggle off untouched.
+			const filtered = headerPairs.filter((p) => !ownedByBearerUi(p));
+			updateHeaderPairs(filtered);
+		}
+	}
 </script>

-<div class="grid gap-3">
-	<div>
+<div class="grid gap-2">
+	<div class="mb-4">
 		<label for="server-url-{id}" class="mb-2 block text-xs font-medium">
 			Server URL <span class="text-destructive">*</span>
 		</label>
@@ -57,50 +130,52 @@
 			value={url}
 			oninput={(e) => onUrlChange(e.currentTarget.value)}
 			class={urlError ? 'border-destructive' : ''}
+			bind:ref={urlInput}
 		/>

 		{#if urlError}
 			<p class="mt-1.5 text-xs text-destructive">{urlError}</p>
 		{/if}
-
-		{#if !isWebSocket && onUseProxyChange}
-			<label
-				class={[
-					'mt-3 flex items-start gap-2',
-					mcpStore.isProxyAvailable && 'cursor-pointer',
-					!mcpStore.isProxyAvailable && 'opacity-80'
-				]}
-			>
-				<Switch
-					class="mt-1"
-					id="use-proxy-{id}"
-					checked={useProxy}
-					disabled={!mcpStore.isProxyAvailable}
-					onCheckedChange={(checked) => onUseProxyChange?.(checked)}
-				/>
-
-				<span>
-					<span class="text-xs text-muted-foreground">Use llama-server proxy</span>
-
-					<br />
-
-					{#if !mcpStore.isProxyAvailable}
-						<span class="inline-flex gap-0.75 text-xs text-muted-foreground/60"
-							>(Run <pre>llama-server</pre>
-							with
-							<pre>{CLI_FLAGS.MCP_PROXY}</pre>
-							flag)</span
-						>
-					{/if}
-				</span>
-			</label>
-		{/if}
 	</div>

+	<label class="flex items-center gap-2 cursor-pointer">
+		<Switch
+			id="use-authorization-{id}"
+			checked={showAuthorization}
+			onCheckedChange={setUseAuthorization}
+		/>
+
+		<span class="text-xs text-muted-foreground">Authorization</span>
+	</label>
+
+	{#if showAuthorization}
+		<div class="relative mt-2">
+			<Input
+				id="bearer-token-{id}"
+				type="password"
+				autocomplete="off"
+				placeholder="Paste token here"
+				value={bearerToken}
+				oninput={(e) => updateBearerToken(e.currentTarget.value)}
+				class="pl-16"
+				bind:ref={bearerInput}
+			/>
+
+			<span
+				class="pointer-events-none absolute inset-y-0 left-3 flex items-center text-sm font-medium text-foreground"
+			>
+				Bearer
+			</span>
+		</div>
+	{/if}
+
 	<KeyValuePairs
-		class="mt-2"
-		pairs={headerPairs}
-		onPairsChange={updateHeaderPairs}
+		class="mt-3"
+		pairs={headerPairs.filter((p) => !ownedByBearerUi(p))}
+		onPairsChange={(pairs) => {
+			const auth = headerPairs.find(ownedByBearerUi);
+			updateHeaderPairs(auth ? [...pairs, auth] : pairs);
+		}}
 		keyPlaceholder="Header name"
 		valuePlaceholder="Value"
 		addButtonLabel="Add"
@@ -108,4 +183,37 @@
 		sectionLabel="Custom Headers"
 		sectionLabelOptional
 	/>
+
+	{#if !isWebSocket && onUseProxyChange}
+		<label
+			class={[
+				'mt-3 flex items-start gap-2',
+				mcpStore.isProxyAvailable && 'cursor-pointer',
+				!mcpStore.isProxyAvailable && 'opacity-80'
+			]}
+		>
+			<Switch
+				class="mt-1"
+				id="use-proxy-{id}"
+				checked={useProxy}
+				disabled={!mcpStore.isProxyAvailable}
+				onCheckedChange={(checked) => onUseProxyChange?.(checked)}
+			/>
+
+			<span>
+				<span class="text-xs text-muted-foreground">Use llama-server proxy</span>
+
+				<br />
+
+				{#if !mcpStore.isProxyAvailable}
+					<span class="inline-flex gap-0.75 text-xs text-muted-foreground/60"
+						>(Run <pre>llama-server</pre>
+						with
+						<pre>{CLI_FLAGS.MCP_PROXY}</pre>
+						flag)</span
+					>
+				{/if}
+			</span>
+		</label>
+	{/if}
 </div>
@@ -1,6 +1,7 @@
 <script lang="ts">
 	import { ExternalLink } from '@lucide/svelte';
 	import { Badge } from '$lib/components/ui/badge';
+	import { McpLogo } from '$lib/components/app/mcp';
 	import { TruncatedText } from '$lib/components/app/misc';
 	import { sanitizeExternalUrl } from '$lib/utils';
 	import type { MCPServerInfo } from '$lib/types';
@@ -34,20 +35,15 @@

 <span class="flex min-w-0 items-center gap-1.5">
 	{#if faviconUrl}
-		<img
-			src={faviconUrl}
-			alt=""
-			class={['shrink-0', iconRounded, iconClass]}
-			onerror={(e) => {
-				(e.currentTarget as HTMLImageElement).style.display = 'none';
-			}}
-		/>
+		<img src={faviconUrl} alt="" class={['shrink-0 text-foreground', iconRounded, iconClass]} />
+	{:else}
+		<McpLogo class={['shrink-0 text-foreground', iconRounded, iconClass].join(' ')} />
 	{/if}

 	<TruncatedText text={displayName ?? ''} class={nameClass ?? ''} />

 	{#if showVersion && serverInfo?.version}
-		<Badge variant="secondary" class="h-4 min-w-0 shrink px-1 text-[10px]">
+		<Badge variant="secondary" class="h-4 max-w-24 min-w-0 shrink px-1 text-[10px]">
 			<TruncatedText text={`v${serverInfo.version}`} />
 		</Badge>
 	{/if}
@@ -180,6 +180,16 @@ export { default as McpServerCardDeleteDialog } from './McpServerCard/McpServerC
 /** Skeleton loading state for server card during health checks. */
 export { default as McpServerCardSkeleton } from './McpServerCardSkeleton.svelte';

+/**
+ * **McpServerCardCompact** - Condensed MCP server card
+ *
+ * Compact alternative to McpServerCard tailored for picker-style UIs.
+ * Shows the server identity, status, and a flex-wrapped list of available tools.
+ * Tool names are rendered as badges; hovering a badge shows its description in a tooltip.
+ * Does not show connection logs or server instructions.
+ */
+export { default as McpServerCardCompact } from './McpServerCard/McpServerCardCompact.svelte';
+
 /**
 * **McpServerIdentity** - Server identity display (icon, name, version)
 *
@@ -21,7 +21,7 @@

 	let { class: className }: Props = $props();

-	let servers = $derived(mcpStore.getServersSorted());
+	let servers = $derived(mcpStore.visibleMcpServers);

 	let initialLoadComplete = $state(false);
 	let isAddingServer = $state(false);
@@ -8,6 +8,7 @@ export * from './attachment-labels';
 export * from './database';
 export * from './reasoning-effort';
 export * from './reasoning-effort-tokens';
+export * from './recommended-mcp-servers';
 export * from './storage';
 export * from './attachment-menu';
 export * from './auto-scroll';
@@ -1,2 +1,4 @@
 export const MCP_SERVER_URL_PLACEHOLDER = 'https://mcp.example.com/sse';
 export const MIN_AUTOCOMPLETE_INPUT_LENGTH = 1;
+/** Number of tools shown on the compact MCP server card before collapsing to a "+ N more" badge */
+export const MCP_CARD_VISIBLE_TOOL_LIMIT = 4;
@@ -37,3 +37,8 @@ export const MODEL_ACTIVATED_PARAMS_RE = /^[Aa]\d+(\.\d+)?[BbMmKkTt]$/;
 * Container format segments to exclude from tags (every model uses these).
 */
 export const MODEL_IGNORED_SEGMENTS = new Set(['GGUF', 'GGML']);
+
+/**
+ * Matches a trailing weight file extension, e.g. `model.gguf` -> `model`.
+ */
+export const MODEL_WEIGHT_EXTENSION_RE = /\.(gguf|ggml)$/i;
@@ -0,0 +1,35 @@
+import { DEFAULT_MCP_CONFIG } from './mcp';
+import type { RecommendedMCPServer } from '$lib/types';
+
+/**
+ * Pre-defined recommended MCP servers.
+ *
+ * Servers are enabled by default, but they are not turned on for individual
+ * conversations until the user explicitly enables them (so their tools are
+ * disabled by default).
+ */
+export const RECOMMENDED_MCP_SERVERS: RecommendedMCPServer[] = [
+	{
+		id: 'exa-web-search',
+		name: 'Exa Web Search',
+		description: 'Search the web and retrieve relevant content.',
+		url: 'https://mcp.exa.ai/mcp',
+		enabled: true,
+		requestTimeoutSeconds: DEFAULT_MCP_CONFIG.requestTimeoutSeconds
+	},
+	{
+		id: 'huggingface-mcp',
+		name: 'Hugging Face',
+		description:
+			'Browse models, datasets, spaces and machine learning papers from the Hugging Face hub.',
+		url: 'https://huggingface.co/mcp',
+		enabled: true,
+		requestTimeoutSeconds: DEFAULT_MCP_CONFIG.requestTimeoutSeconds
+	}
+];
+
+export const RECOMMENDED_MCP_SERVER_IDS = new Set(
+	RECOMMENDED_MCP_SERVERS.map((server) => server.id)
+);
+
+export const RECOMMENDED_MCP_SERVERS_OPTIN_DIALOG_DELAY = 1000;
@@ -59,6 +59,7 @@ export const SETTINGS_KEYS = {
 	// MCP
 	MCP_SERVERS: 'mcpServers',
 	MCP_REQUEST_TIMEOUT_SECONDS: 'mcpRequestTimeoutSeconds',
+	MCP_DEFAULT_SERVER_OVERRIDES: 'mcpDefaultServerOverrides',
 	AGENTIC_MAX_TURNS: 'agenticMaxTurns',
 	ALWAYS_SHOW_AGENTIC_TURNS: 'alwaysShowAgenticTurns',
 	AGENTIC_MAX_TOOL_PREVIEW_LINES: 'agenticMaxToolPreviewLines',
@@ -28,6 +28,7 @@ import McpLogo from '$lib/components/app/mcp/McpLogo.svelte';
 import { SETTINGS_KEYS } from './settings-keys';
 import { ROUTES, SETTINGS_SECTION_SLUGS } from './routes';
 import { TITLE_GENERATION } from './title-generation';
+import { RECOMMENDED_MCP_SERVERS } from './recommended-mcp-servers';

 export const SETTINGS_SECTION_TITLES = {
 	GENERAL: 'General',
@@ -774,9 +775,16 @@ const NON_UI_SETTINGS: SettingsEntry[] = [
 		key: SETTINGS_KEYS.MCP_SERVERS,
 		label: 'MCP servers',
 		help: 'Configure MCP servers as a JSON list. Use the form in the MCP Client settings section to edit.',
-		defaultValue: '[]',
+		defaultValue: JSON.stringify(RECOMMENDED_MCP_SERVERS),
 		type: SettingsFieldType.INPUT,
 		sync: { serverKey: SETTINGS_KEYS.MCP_SERVERS, paramType: SyncableParameterType.STRING }
+	},
+	{
+		key: SETTINGS_KEYS.MCP_DEFAULT_SERVER_OVERRIDES,
+		label: 'MCP default server overrides',
+		help: 'Per-server enable/disable defaults inherited by new chats. JSON-serialized list of {serverId, enabled} entries.',
+		defaultValue: '[]',
+		type: SettingsFieldType.INPUT
 	}
 	// {
 	// 	key: SETTINGS_KEYS.PY_INTERPRETER_ENABLED,
@@ -21,9 +21,10 @@ export const DISABLED_TOOLS_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.disabledTool
 /** Disabled tools keyed by stable selection identity, no migration from the name based key */
 export const DISABLED_TOOL_KEYS_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.disabledToolKeys`;
 export const FAVORITE_MODELS_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.favoriteModels`;
-export const MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.mcpDefaultEnabled`;
 export const THINKING_ENABLED_DEFAULT_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.thinkingEnabledDefault`;
 export const REASONING_EFFORT_DEFAULT_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.reasoningEffortDefault`;
+/** Set when user has interacted with the MCP server recommendations dialog (checked servers, added custom server, or dismissed) */
+export const MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.mcpServersSetupDone`;
 export const USER_OVERRIDES_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.userOverrides`;

 /** Key prefix for per-conversation resumable stream state, conversationId is appended */
@@ -38,8 +39,6 @@ export const DEPRECATED_CONFIG_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME_DEPRECATED
 export const DEPRECATED_DISABLED_TOOLS_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME_DEPRECATED}.disabledTools`;
 /** @deprecated Use {@link FAVORITE_MODELS_LOCALSTORAGE_KEY} instead */
 export const DEPRECATED_FAVORITE_MODELS_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME_DEPRECATED}.favoriteModels`;
-/** @deprecated Use {@link MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY} instead */
-export const DEPRECATED_MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME_DEPRECATED}.mcpDefaultEnabled`;
 /** @deprecated Use {@link USER_OVERRIDES_LOCALSTORAGE_KEY} instead */
 export const DEPRECATED_USER_OVERRIDES_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME_DEPRECATED}.userOverrides`;

@@ -52,6 +51,5 @@ export const NEW_TO_DEPRECATED_MAP: Record<string, string> = {
 	[CONFIG_LOCALSTORAGE_KEY]: DEPRECATED_CONFIG_LOCALSTORAGE_KEY,
 	[DISABLED_TOOLS_LOCALSTORAGE_KEY]: DEPRECATED_DISABLED_TOOLS_LOCALSTORAGE_KEY,
 	[FAVORITE_MODELS_LOCALSTORAGE_KEY]: DEPRECATED_FAVORITE_MODELS_LOCALSTORAGE_KEY,
-	[MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY]: DEPRECATED_MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY,
 	[USER_OVERRIDES_LOCALSTORAGE_KEY]: DEPRECATED_USER_OVERRIDES_LOCALSTORAGE_KEY
 };
@@ -1,3 +1,3 @@
 // grace window after a visibilitychange before we kick a reader whose socket likely died
 // while the tab was hidden. covers brief background pauses without thrashing live streams
-export const STREAM_VISIBILITY_KICK_MS = 1000;
+export const STREAM_VISIBILITY_KICK_MS = 3000;
@@ -0,0 +1,85 @@
+import { browser } from '$app/environment';
+import {
+	MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY,
+	RECOMMENDED_MCP_SERVER_IDS,
+	RECOMMENDED_MCP_SERVERS_OPTIN_DIALOG_DELAY
+} from '$lib/constants';
+import { mcpStore } from '$lib/stores/mcp.svelte';
+
+/**
+ * First-run opt-in dialog for the recommended MCP servers.
+ *
+ * Owns the dismissed / open / trigger-timeout state and the effect that
+ * schedules the dialog. Reads opt-in status and the configured server list
+ * from `mcpStore`, so callers don't need to recompute on their side.
+ */
+export function useMcpRecommendations() {
+	let dismissed = $state(
+		browser && localStorage.getItem(MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY) === 'true'
+	);
+	let open = $state(false);
+	let checked = $state(false);
+	let triggerTimeout: ReturnType<typeof setTimeout> | null = null;
+
+	function dismiss() {
+		if (browser) {
+			localStorage.setItem(MCP_SERVERS_ADDED_TO_CHAT_LOCALSTORAGE_KEY, 'true');
+		}
+		dismissed = true;
+		open = false;
+		if (triggerTimeout) {
+			clearTimeout(triggerTimeout);
+			triggerTimeout = null;
+		}
+	}
+
+	function handleOpenChange(next: boolean) {
+		open = next;
+		if (!next) dismiss();
+	}
+
+	$effect(() => {
+		if (!browser) return;
+
+		if (open || dismissed) {
+			if (triggerTimeout) {
+				clearTimeout(triggerTimeout);
+				triggerTimeout = null;
+			}
+			return;
+		}
+
+		// Already evaluated once this session; leave any pending trigger alone so
+		// it can still fire later. Setting `checked = true` below re-runs this
+		// effect, and we must not wipe the timeout that was just scheduled.
+		if (checked) return;
+
+		if (mcpStore.optedInRecommendationIds.size > 0) {
+			checked = true;
+			return;
+		}
+
+		const hasRecommendations = mcpStore
+			.getServers()
+			.some((server) => RECOMMENDED_MCP_SERVER_IDS.has(server.id));
+
+		if (hasRecommendations) {
+			triggerTimeout = setTimeout(() => {
+				open = true;
+			}, RECOMMENDED_MCP_SERVERS_OPTIN_DIALOG_DELAY);
+		}
+
+		checked = true;
+	});
+
+	return {
+		get open() {
+			return open;
+		},
+		get dismissed() {
+			return dismissed;
+		},
+		dismiss,
+		handleOpenChange
+	};
+}
@@ -255,6 +255,7 @@ export class ChatService {
 			}),
 			stream,
 			return_progress: stream ? true : undefined,
+			sse_ping_interval: stream ? 1 : undefined,
 			tools: tools && tools.length > 0 ? tools : undefined
 		};

@@ -20,6 +20,7 @@
 import Dexie from 'dexie';
 import {
 	STORAGE_APP_NAME,
+	STORAGE_APP_NAME_DEPRECATED,
 	DB_APP_NAME_DEPRECATED,
 	CONFIG_LOCALSTORAGE_KEY,
 	IDXDB_TABLES,
@@ -494,12 +495,105 @@ const customJsonKeyMigration: Migration = {
 	}
 };

+const MCP_DEFAULT_ENABLED_MIGRATION_ID = 'mcp-default-enabled-to-config-v1';
+
+const LEGACY_MCP_DEFAULT_ENABLED_KEY = `${STORAGE_APP_NAME}.mcpDefaultEnabled`;
+const DEPRECATED_LEGACY_MCP_DEFAULT_ENABLED_KEY = `${STORAGE_APP_NAME_DEPRECATED}.mcpDefaultEnabled`;
+
+const mcpDefaultEnabledMigration: Migration = {
+	id: MCP_DEFAULT_ENABLED_MIGRATION_ID,
+	description:
+		'Copy mcpDefaultEnabled localStorage key into settings config (preserves legacy keys)',
+
+	async run(): Promise<void> {
+		const raw =
+			localStorage.getItem(LEGACY_MCP_DEFAULT_ENABLED_KEY) ??
+			localStorage.getItem(DEPRECATED_LEGACY_MCP_DEFAULT_ENABLED_KEY);
+
+		// Legacy keys intentionally left in place so a downgrade keeps reading them.
+
+		if (raw === null) {
+			if (import.meta.env.DEV && import.meta.env.VITE_DEBUG)
+				console.log('[Migration] MCP default enabled: no legacy key found, skipping');
+			return;
+		}
+
+		const configRaw = localStorage.getItem(CONFIG_LOCALSTORAGE_KEY);
+		const config = configRaw ? JSON.parse(configRaw) : {};
+
+		// Don't overwrite an existing config entry — current data wins.
+		if (SETTINGS_KEYS.MCP_DEFAULT_SERVER_OVERRIDES in config) {
+			if (import.meta.env.DEV && import.meta.env.VITE_DEBUG)
+				console.log('[Migration] MCP default enabled: config already has overrides, skipping');
+			return;
+		}
+
+		try {
+			const parsed = JSON.parse(raw);
+			if (!Array.isArray(parsed)) return;
+			const valid = parsed.every(
+				(o) =>
+					typeof o === 'object' &&
+					o !== null &&
+					typeof (o as Record<string, unknown>).serverId === 'string' &&
+					typeof (o as Record<string, unknown>).enabled === 'boolean'
+			);
+			if (!valid) return;
+		} catch {
+			return;
+		}
+
+		config[SETTINGS_KEYS.MCP_DEFAULT_SERVER_OVERRIDES] = raw;
+		localStorage.setItem(CONFIG_LOCALSTORAGE_KEY, JSON.stringify(config));
+
+		if (import.meta.env.DEV && import.meta.env.VITE_DEBUG)
+			console.log('[Migration] MCP default enabled: moved legacy key into config');
+	}
+};
+
+const CONFIG_TYPES_MIGRATION_ID = 'config-type-normalization-v1';
+
+const configTypesMigration: Migration = {
+	id: CONFIG_TYPES_MIGRATION_ID,
+	description: 'Coerce legacy string-encoded booleans in persisted config to real booleans',
+
+	async run(): Promise<void> {
+		const configRaw = localStorage.getItem(CONFIG_LOCALSTORAGE_KEY);
+		if (configRaw === null) return;
+
+		const config = JSON.parse(configRaw);
+		let changed = false;
+
+		// Pre-schema configs persisted booleans as the strings "true"/"false", which the
+		// strict server schema now rejects. Coerce those back to real booleans. No config
+		// string field holds exactly "true"/"false", so the match is unambiguous.
+		for (const key of Object.keys(config)) {
+			if (config[key] === 'true') {
+				config[key] = true;
+				changed = true;
+			} else if (config[key] === 'false') {
+				config[key] = false;
+				changed = true;
+			}
+		}
+
+		if (changed) {
+			localStorage.setItem(CONFIG_LOCALSTORAGE_KEY, JSON.stringify(config));
+		}
+
+		if (import.meta.env.DEV && import.meta.env.VITE_DEBUG)
+			console.log(`[Migration] Config types: coerced string booleans (changed=${changed})`);
+	}
+};
+
 const migrations: Migration[] = [
 	localStorageMigration,
 	idxdbMigration,
 	legacyMessageMigration,
 	themeMigration,
-	customJsonKeyMigration
+	customJsonKeyMigration,
+	mcpDefaultEnabledMigration,
+	configTypesMigration
 ];

 export const MigrationService = {
@@ -1,5 +1,5 @@
 import { ServerModelStatus } from '$lib/enums';
-import { apiFetch, apiPost } from '$lib/utils';
+import { apiFetch, apiPost, normalizeModelName } from '$lib/utils';
 import type { ParsedModelId } from '$lib/types/models';
 import {
 	MODEL_QUANTIZATION_SEGMENT_RE,
@@ -7,6 +7,7 @@ import {
 	MODEL_PARAMS_RE,
 	MODEL_ACTIVATED_PARAMS_RE,
 	MODEL_IGNORED_SEGMENTS,
+	MODEL_WEIGHT_EXTENSION_RE,
 	MODEL_ID_NOT_FOUND,
 	MODEL_ID_ORG_SEPARATOR,
 	MODEL_ID_SEGMENT_SEPARATOR,
@@ -139,15 +140,19 @@ export class ModelsService {
 			tags: []
 		};

+		// strip directory path and weight extension so a bare `-m /path/file.gguf`
+		// parses like a clean repo id; the HF `org/model` form is preserved
+		const source = normalizeModelName(modelId).replace(MODEL_WEIGHT_EXTENSION_RE, '');
+
 		// 1. Extract colon-separated quantization (e.g. `model:Q4_K_M`)
-		const colonIdx = modelId.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR);
+		const colonIdx = source.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR);
 		let modelPath: string;

 		if (colonIdx !== MODEL_ID_NOT_FOUND) {
-			result.quantization = modelId.slice(colonIdx + 1) || null;
-			modelPath = modelId.slice(0, colonIdx);
+			result.quantization = source.slice(colonIdx + 1) || null;
+			modelPath = source.slice(0, colonIdx);
 		} else {
-			modelPath = modelId;
+			modelPath = source;
 		}

 		// 2. Extract org name (e.g. `org/model` -> org = "org")
@@ -23,7 +23,7 @@ import { browser } from '$app/environment';
 import { toast } from 'svelte-sonner';
 import { DatabaseService } from '$lib/services/database.service';
 import { MigrationService } from '$lib/services/migration.service';
-import { config } from '$lib/stores/settings.svelte';
+import { config, settingsStore } from '$lib/stores/settings.svelte';
 import { filterByLeafNodeId, findLeafNode, generateConversationTitle } from '$lib/utils';
 import type { McpServerOverride } from '$lib/types/database';
 import { zipSync, unzipSync, strToU8, strFromU8 } from 'fflate';
@@ -46,7 +46,7 @@ import {
 	ISO_TIME_SEPARATOR_REPLACEMENT,
 	NON_ALPHANUMERIC_REGEX,
 	MULTIPLE_UNDERSCORE_REGEX,
-	MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY,
+	SETTINGS_KEYS,
 	THINKING_ENABLED_DEFAULT_LOCALSTORAGE_KEY,
 	REASONING_EFFORT_DEFAULT_LOCALSTORAGE_KEY
 } from '$lib/constants';
@@ -90,12 +90,10 @@ class ConversationsStore {
 	/** Global (non-conversation-specific) reasoning effort default */
 	pendingReasoningEffort = $state<ReasoningEffort>(ConversationsStore.loadReasoningEffortDefault());

-	/** Load MCP default overrides from localStorage */
 	private static loadMcpDefaults(): McpServerOverride[] {
-		if (typeof globalThis.localStorage === 'undefined') return [];
+		const raw = config()[SETTINGS_KEYS.MCP_DEFAULT_SERVER_OVERRIDES];
+		if (typeof raw !== 'string' || raw.length === 0) return [];
 		try {
-			const raw = localStorage.getItem(MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY);
-			if (!raw) return [];
 			const parsed = JSON.parse(raw);
 			if (!Array.isArray(parsed)) return [];
 			return parsed.filter(
@@ -106,30 +104,23 @@ class ConversationsStore {
 		}
 	}

-	/** Persist MCP default overrides to localStorage */
 	private saveMcpDefaults(): void {
-		if (typeof globalThis.localStorage === 'undefined') return;
 		const plain = this.pendingMcpServerOverrides.map((o) => ({
 			serverId: o.serverId,
 			enabled: o.enabled
 		}));
-		if (plain.length > 0) {
-			localStorage.setItem(MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY, JSON.stringify(plain));
-		} else {
-			localStorage.removeItem(MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY);
-		}
+		settingsStore.updateConfig(SETTINGS_KEYS.MCP_DEFAULT_SERVER_OVERRIDES, JSON.stringify(plain));
 	}

 	/** Load thinking-enabled default from localStorage */
 	private static loadThinkingDefaults(): boolean {
-		if (typeof globalThis.localStorage === 'undefined') return false;
+		if (typeof globalThis.localStorage === 'undefined') return true;
 		try {
 			const raw = localStorage.getItem(THINKING_ENABLED_DEFAULT_LOCALSTORAGE_KEY);
-			if (!raw) return false;
-			const parsed = raw === 'true';
-			return typeof parsed === 'boolean' ? parsed : false;
+			if (!raw) return true;
+			return raw === 'true';
 		} catch {
-			return false;
+			return true;
 		}
 	}

@@ -189,6 +180,10 @@ class ConversationsStore {
 		try {
 			await MigrationService.runAllMigrations();

+			// Re-read defaults after migrations: a migration may have populated
+			// the settings config (e.g. moved legacy MCP overrides into it).
+			this.pendingMcpServerOverrides = ConversationsStore.loadMcpDefaults();
+
 			await this.loadConversations();
 			this.isInitialized = true;
 		} catch (error) {
@@ -337,7 +332,7 @@ class ConversationsStore {
 			}

 			this.pendingMcpServerOverrides = [];
-			this.pendingThinkingEnabled = false;
+			this.pendingThinkingEnabled = ConversationsStore.loadThinkingDefaults();
 			this.activeConversation = conversation;

 			if (conversation.currNode) {
@@ -20,11 +20,13 @@
 */

 import { browser } from '$app/environment';
+import { SvelteSet } from 'svelte/reactivity';
 import { SETTINGS_KEYS } from '$lib/constants';
 import { MCPService } from '$lib/services/mcp.service';
 import { config, settingsStore } from '$lib/stores/settings.svelte';
 import { mcpResourceStore } from '$lib/stores/mcp-resources.svelte';
 import { serverStore } from '$lib/stores/server.svelte';
+import { conversationsStore } from '$lib/stores/conversations.svelte';
 import { mode } from 'mode-watcher';
 import {
 	parseMcpServerSettings,
@@ -48,10 +50,11 @@ import {
 	EXPECTED_THEMED_ICON_PAIR_COUNT,
 	MCP_ALLOWED_ICON_MIME_TYPES,
 	MCP_SERVER_ID_PREFIX,
-	MCP_RECONNECT_INITIAL_DELAY,
 	MCP_RECONNECT_BACKOFF_MULTIPLIER,
+	MCP_RECONNECT_INITIAL_DELAY,
 	MCP_RECONNECT_MAX_DELAY,
-	MCP_RECONNECT_ATTEMPT_TIMEOUT_MS
+	MCP_RECONNECT_ATTEMPT_TIMEOUT_MS,
+	RECOMMENDED_MCP_SERVER_IDS
 } from '$lib/constants';
 import type {
 	MCPToolCall,
@@ -70,6 +73,7 @@ import type {
 	Tool,
 	HealthCheckState,
 	MCPServerSettingsEntry,
+	MCPServerDisplayInfo,
 	MCPServerConfig,
 	MCPResourceIcon,
 	MCPResourceAttachment,
@@ -365,7 +369,7 @@ class MCPStore {
 		return this.connections;
 	}

-	getServerLabel(server: MCPServerSettingsEntry): string {
+	getServerLabel(server: MCPServerDisplayInfo): string {
 		const healthState = this.getHealthCheckState(server.id);

 		if (healthState?.status === HealthCheckStatus.SUCCESS)
@@ -527,7 +531,7 @@ class MCPStore {

 	addServer(
 		serverData: Omit<MCPServerSettingsEntry, 'id' | 'requestTimeoutSeconds'> & { id?: string }
-	): void {
+	): MCPServerSettingsEntry {
 		const servers = this.getServers();
 		const newServer: MCPServerSettingsEntry = {
 			id: serverData.id || (uuid() ?? `server-${Date.now()}`),
@@ -540,6 +544,7 @@ class MCPStore {
 			useProxy: serverData.useProxy
 		};
 		settingsStore.updateConfig(SETTINGS_KEYS.MCP_SERVERS, JSON.stringify([...servers, newServer]));
+		return newServer;
 	}

 	updateServer(id: string, updates: Partial<MCPServerSettingsEntry>): void {
@@ -576,6 +581,33 @@ class MCPStore {
 		});
 	}

+	/**
+	 * Recommended MCP server IDs the user opted in to via per-chat overrides.
+	 * Single source of truth for "which recommendations has the user accepted",
+	 * shared by the recommendations hook and the visible-servers getter.
+	 */
+	get optedInRecommendationIds(): ReadonlySet<string> {
+		const ids = new SvelteSet<string>();
+		for (const override of conversationsStore.pendingMcpServerOverrides) {
+			if (RECOMMENDED_MCP_SERVER_IDS.has(override.serverId) && override.enabled) {
+				ids.add(override.serverId);
+			}
+		}
+		return ids;
+	}
+
+	/**
+	 * MCP servers selectable in chat-add UIs and the settings page:
+	 * enabled in settings and either non-recommended or explicitly opted in.
+	 */
+	get visibleMcpServers(): MCPServerSettingsEntry[] {
+		const optedIn = this.optedInRecommendationIds;
+		return this.getServersSorted().filter(
+			(server) =>
+				server.enabled && (!RECOMMENDED_MCP_SERVER_IDS.has(server.id) || optedIn.has(server.id))
+		);
+	}
+
 	async ensureInitialized(perChatOverrides?: McpServerOverride[]): Promise<boolean> {
 		if (!browser) {
 			return false;
@@ -265,6 +265,7 @@ export interface ApiChatCompletionRequest {
 	stream?: boolean;
 	model?: string;
 	return_progress?: boolean;
+	sse_ping_interval?: number;
 	tools?: ApiChatCompletionTool[];
 	// Reasoning parameters
 	reasoning_format?: string;
@@ -127,6 +127,8 @@ export type {
 	MCPServerConfig,
 	MCPClientConfig,
 	MCPServerSettingsEntry,
+	MCPServerDisplayInfo,
+	RecommendedMCPServer,
 	MCPToolCall,
 	OpenAIToolDefinition,
 	ServerStatus,
@@ -209,17 +209,32 @@ export type MCPToolCall = {
 	};
 };

-export type MCPServerSettingsEntry = {
+/**
+ * Minimum fields needed to display or identify an MCP server.
+ */
+export interface MCPServerDisplayInfo {
 	id: string;
-	enabled: boolean;
+	name?: string;
 	url: string;
+}
+
+export type MCPServerSettingsEntry = MCPServerDisplayInfo & {
+	enabled: boolean;
 	requestTimeoutSeconds: number;
 	headers?: string;
-	name?: string;
 	iconUrl?: string;
 	useProxy?: boolean;
 };

+/**
+ * Pre-defined recommended MCP server shown to the user in onboarding/picker UIs.
+ */
+export interface RecommendedMCPServer extends MCPServerDisplayInfo {
+	description: string;
+	enabled: boolean;
+	requestTimeoutSeconds: number;
+}
+
 export interface MCPHostManagerConfig {
 	servers: MCPClientConfig['servers'];
 	clientInfo?: Implementation;
@@ -92,18 +92,14 @@ export function filterByLeafNodeId(
 * Finds the leaf node (message with no children) for a given message branch.
 * Traverses down the tree following the last child until reaching a leaf.
 *
- * @param messages - All messages in the conversation
+ * @param nodeMap - Map of messages keyed by ID
 * @param messageId - Starting message ID to find leaf for
 * @returns The leaf node ID, or the original messageId if no children
 */
-export function findLeafNode(messages: readonly DatabaseMessage[], messageId: string): string {
-	const nodeMap = new Map<string, DatabaseMessage>();
-
-	// Build node map for quick lookups
-	for (const msg of messages) {
-		nodeMap.set(msg.id, msg);
-	}
-
+function findLeafNodeInMap(
+	nodeMap: ReadonlyMap<string, DatabaseMessage>,
+	messageId: string
+): string {
 	let currentNode: DatabaseMessage | undefined = nodeMap.get(messageId);
 	while (currentNode && currentNode.children.length > 0) {
 		// Follow the last child (most recent branch)
@@ -114,6 +110,22 @@ export function findLeafNode(messages: readonly DatabaseMessage[], messageId: st
 	return currentNode?.id ?? messageId;
 }

+/**
+ * Convenience wrapper around {@link findLeafNodeInMap} for callers that only have
+ * a flat message array.
+ *
+ * Finds the leaf node (message with no children) for a given message branch.
+ * Traverses down the tree following the last child until reaching a leaf.
+ *
+ * @param messages - All messages in the conversation
+ * @param messageId - Starting message ID to find leaf for
+ * @returns The leaf node ID, or the original messageId if no children
+ */
+export function findLeafNode(messages: readonly DatabaseMessage[], messageId: string): string {
+	const nodeMap = new Map(messages.map((msg) => [msg.id, msg] as const));
+	return findLeafNodeInMap(nodeMap, messageId);
+}
+
 /**
 * Finds all descendant messages (children, grandchildren, etc.) of a given message.
 * This is used for cascading deletion to remove all messages in a branch.
@@ -156,21 +168,14 @@ export function findDescendantMessages(
 * Gets sibling information for a message, including all sibling IDs and current position.
 * Siblings are messages that share the same parent.
 *
- * @param messages - All messages in the conversation
+ * @param nodeMap - Map of messages keyed by ID
 * @param messageId - The message to get sibling info for
 * @returns Sibling information including leaf node IDs for navigation
 */
 export function getMessageSiblings(
-	messages: readonly DatabaseMessage[],
+	nodeMap: ReadonlyMap<string, DatabaseMessage>,
 	messageId: string
 ): ChatMessageSiblingInfo | null {
-	const nodeMap = new Map<string, DatabaseMessage>();
-
-	// Build node map for quick lookups
-	for (const msg of messages) {
-		nodeMap.set(msg.id, msg);
-	}
-
 	const message = nodeMap.get(messageId);
 	if (!message) {
 		return null;
@@ -203,7 +208,9 @@ export function getMessageSiblings(

 	// Convert sibling message IDs to their corresponding leaf node IDs
 	// This allows navigation between different conversation branches
-	const siblingLeafIds = siblingIds.map((siblingId: string) => findLeafNode(messages, siblingId));
+	const siblingLeafIds = siblingIds.map((siblingId: string) =>
+		findLeafNodeInMap(nodeMap, siblingId)
+	);

 	// Find current message's position among siblings
 	const currentIndex = siblingIds.indexOf(messageId);
@@ -217,85 +224,22 @@ export function getMessageSiblings(
 }

 /**
- * Creates a display-ready list of messages with sibling information for UI rendering.
- * This is the main function used by chat components to render conversation branches.
+ * Builds sibling information for every message in a conversation.
+ * A single node map is shared across all lookups for O(1) access.
 *
 * @param messages - All messages in the conversation
- * @param leafNodeId - Current leaf node being viewed
- * @returns Array of messages with sibling navigation info
+ * @returns Map of message ID to its sibling information
 */
-export function getMessageDisplayList(
-	messages: readonly DatabaseMessage[],
-	leafNodeId: string
-): ChatMessageSiblingInfo[] {
-	// Get the current conversation path
-	const currentPath = filterByLeafNodeId(messages, leafNodeId, true);
-	const result: ChatMessageSiblingInfo[] = [];
-
-	// Add sibling info for each message in the current path
-	for (const message of currentPath) {
-		if (message.type === 'root') {
-			continue; // Skip root messages in display
-		}
-
-		const siblingInfo = getMessageSiblings(messages, message.id);
-		if (siblingInfo) {
-			result.push(siblingInfo);
+export function buildSiblingInfoMap(
+	messages: readonly DatabaseMessage[]
+): Map<string, ChatMessageSiblingInfo> {
+	const nodeMap = new Map(messages.map((msg) => [msg.id, msg] as const));
+	const siblingMap = new Map<string, ChatMessageSiblingInfo>();
+	for (const msg of messages) {
+		const info = getMessageSiblings(nodeMap, msg.id);
+		if (info) {
+			siblingMap.set(msg.id, info);
 		}
 	}
-
-	return result;
-}
-
-/**
- * Checks if a message has multiple siblings (indicating branching at that point).
- *
- * @param messages - All messages in the conversation
- * @param messageId - The message to check
- * @returns True if the message has siblings
- */
-export function hasMessageSiblings(
-	messages: readonly DatabaseMessage[],
-	messageId: string
-): boolean {
-	const siblingInfo = getMessageSiblings(messages, messageId);
-	return siblingInfo ? siblingInfo.totalSiblings > 1 : false;
-}
-
-/**
- * Gets the next sibling message ID for navigation.
- *
- * @param messages - All messages in the conversation
- * @param messageId - Current message ID
- * @returns Next sibling's leaf node ID, or null if at the end
- */
-export function getNextSibling(
-	messages: readonly DatabaseMessage[],
-	messageId: string
-): string | null {
-	const siblingInfo = getMessageSiblings(messages, messageId);
-	if (!siblingInfo || siblingInfo.currentIndex >= siblingInfo.totalSiblings - 1) {
-		return null;
-	}
-
-	return siblingInfo.siblingIds[siblingInfo.currentIndex + 1];
-}
-
-/**
- * Gets the previous sibling message ID for navigation.
- *
- * @param messages - All messages in the conversation
- * @param messageId - Current message ID
- * @returns Previous sibling's leaf node ID, or null if at the beginning
- */
-export function getPreviousSibling(
-	messages: readonly DatabaseMessage[],
-	messageId: string
-): string | null {
-	const siblingInfo = getMessageSiblings(messages, messageId);
-	if (!siblingInfo || siblingInfo.currentIndex <= 0) {
-		return null;
-	}
-
-	return siblingInfo.siblingIds[siblingInfo.currentIndex - 1];
+	return siblingMap;
 }
@@ -26,10 +26,7 @@ export {
 	findLeafNode,
 	findDescendantMessages,
 	getMessageSiblings,
-	getMessageDisplayList,
-	hasMessageSiblings,
-	getNextSibling,
-	getPreviousSibling
+	buildSiblingInfoMap
 } from './branching';

 // Code
@@ -8,6 +8,7 @@
 	import { onMount } from 'svelte';

 	import { SidebarNavigation, DialogConversationTitleUpdate } from '$lib/components/app';
+	import { DialogMcpServerRecommendations } from '$lib/components/app/dialogs';
 	import { PwaMetaTags, PwaRefreshAlert } from '$lib/components/pwa';
 	import { pwaAssetsHead } from 'virtual:pwa-assets/head';

@@ -26,6 +27,7 @@
 	import { FAVICON_PATHS, FAVICON_SELECTORS } from '$lib/constants/pwa';
 	import { useKeyboardShortcuts } from '$lib/hooks/use-keyboard-shortcuts.svelte';
 	import { usePwa } from '$lib/hooks/use-pwa.svelte';
+	import { useMcpRecommendations } from '$lib/hooks/use-mcp-recommendations.svelte';
 	import { conversations } from '$lib/stores/conversations.svelte';
 	import { isMobile } from '$lib/stores/viewport.svelte';
 	import { theme } from '$lib/stores/theme.svelte';
@@ -37,6 +39,8 @@
 	let innerHeight = $state<number | undefined>();
 	let innerWidth = $state(browser ? window.innerWidth : 0);

+	const mcpRecommendations = useMcpRecommendations();
+
 	let chatSidebar:
 		| {
 				activateSearchMode?: () => void;
@@ -321,6 +325,11 @@
 		onConfirm={handleTitleUpdateConfirm}
 		onCancel={handleTitleUpdateCancel}
 	/>
+
+	<DialogMcpServerRecommendations
+		open={mcpRecommendations.open}
+		onOpenChange={mcpRecommendations.handleOpenChange}
+	/>
 </Tooltip.Provider>

 <!-- PWA update prompt + version -->
@@ -0,0 +1,37 @@
+<script lang="ts">
+	import { untrack } from 'svelte';
+	import McpServerForm from '$lib/components/app/mcp/McpServerForm.svelte';
+
+	interface Props {
+		headers?: string;
+	}
+
+	let { headers = '' }: Props = $props();
+
+	let headersState = $state(untrack(() => headers));
+	let lastCapturedHeaders = $state(untrack(() => headers));
+
+	$effect(() => {
+		if (headers !== lastCapturedHeaders) {
+			headersState = headers;
+			lastCapturedHeaders = headers;
+		}
+	});
+</script>
+
+<!--
+	Drives McpServerForm with a controlled `headers` string and exposes the
+	latest captured value through `data-captured-headers` so the client test
+	can read it back without a custom binding API.
+-->
+<McpServerForm
+	url="https://example.test/mcp"
+	headers={headersState}
+	onUrlChange={() => {}}
+	onHeadersChange={(value) => {
+		headersState = value;
+	}}
+	id="mcp-server-form-test"
+/>
+
+<div data-testid="captured-headers" data-captured-headers={headersState} hidden></div>
@@ -0,0 +1,133 @@
+import { describe, expect, it } from 'vitest';
+import { render } from 'vitest-browser-svelte';
+import McpServerFormWrapper from './components/McpServerFormWrapper.svelte';
+
+const AUTHORIZATION_HEADER = 'Authorization';
+const BEARER_PREFIX = 'Bearer ';
+const BEARER_PLACEHOLDER = 'Paste token here';
+
+/**
+ * Client-side tests for the McpServerForm bearer UI.
+ *
+ * The dedicated UI only "owns" Authorization headers that already carry a
+ * Bearer scheme (heuristic check on the value). Other Authorization values
+ * stay in the KV section so the user can still edit them verbatim. Storage
+ * always goes through the same custom-headers slot, so a round-trip via this
+ * UI produces exactly one `Authorization: Bearer <token>` entry.
+ *
+ * Equivalent parser coverage lives in `tests/unit/headers.test.ts`.
+ */
+describe('McpServerForm - Authorization / bearer UI', () => {
+	function bearerInput(screen: Awaited<ReturnType<typeof render>>) {
+		return screen.locator.getByPlaceholder(BEARER_PLACEHOLDER);
+	}
+
+	function capturedHeaders(screen: Awaited<ReturnType<typeof render>>) {
+		return screen.getByTestId('captured-headers');
+	}
+
+	it('mounts with the bearer input hidden when no auth header is present', async () => {
+		const screen = await render(McpServerFormWrapper, { headers: '' });
+
+		await expect.element(screen.getByRole('textbox', { name: /server url/i })).toBeVisible();
+
+		await expect.element(bearerInput(screen)).not.toBeInTheDocument();
+	});
+
+	it('toggling Authorization shows the bearer input', async () => {
+		const screen = await render(McpServerFormWrapper, { headers: '' });
+
+		await screen.getByRole('switch', { name: /authorization/i }).click();
+
+		await expect.element(bearerInput(screen)).toBeVisible();
+	});
+
+	it('typing a token writes the Authorization row with the Bearer prefix prepended', async () => {
+		const screen = await render(McpServerFormWrapper, { headers: '' });
+
+		await screen.getByRole('switch', { name: /authorization/i }).click();
+
+		const token = 'super-secret';
+		await bearerInput(screen).fill(token);
+
+		const expected = JSON.stringify({ [AUTHORIZATION_HEADER]: `${BEARER_PREFIX}${token}` });
+		await expect
+			.element(capturedHeaders(screen))
+			.toHaveAttribute('data-captured-headers', expected);
+	});
+
+	it('pre-existing Bearer header pre-fills the bearer input with the token stripped', async () => {
+		const existing = JSON.stringify({
+			'X-Trace-Id': 'abc',
+			[AUTHORIZATION_HEADER]: `${BEARER_PREFIX}preexisting`
+		});
+
+		const screen = await render(McpServerFormWrapper, { headers: existing });
+
+		await expect.element(bearerInput(screen)).toBeVisible();
+		await expect.element(bearerInput(screen)).toHaveValue('preexisting');
+	});
+
+	it('non-Bearer Authorization is ignored by the dedicated UI and stays in the KV section', async () => {
+		const existing = JSON.stringify({ [AUTHORIZATION_HEADER]: 'Basic czNjcjpwYXNz' });
+
+		const screen = await render(McpServerFormWrapper, { headers: existing });
+
+		await expect.element(bearerInput(screen)).not.toBeInTheDocument();
+
+		const headerKeyInput = screen.getByPlaceholder('Header name');
+		await expect.element(headerKeyInput).toBeVisible();
+	});
+
+	it('engaging the token UI replaces a non-Bearer Authorization with the Bearer scheme', async () => {
+		const existing = JSON.stringify({ [AUTHORIZATION_HEADER]: 'Basic old' });
+
+		const screen = await render(McpServerFormWrapper, { headers: existing });
+
+		await screen.getByRole('switch', { name: /authorization/i }).click();
+		await bearerInput(screen).fill('new');
+
+		const expected = JSON.stringify({ [AUTHORIZATION_HEADER]: `${BEARER_PREFIX}new` });
+		await expect
+			.element(capturedHeaders(screen))
+			.toHaveAttribute('data-captured-headers', expected);
+	});
+
+	it('toggling Authorization off with no token drops the Bearer row but keeps non-Bearer schemes', async () => {
+		const existing = JSON.stringify({ [AUTHORIZATION_HEADER]: `${BEARER_PREFIX}xyz` });
+		const screen = await render(McpServerFormWrapper, { headers: existing });
+
+		await screen.getByRole('switch', { name: /authorization/i }).click();
+
+		await expect.element(capturedHeaders(screen)).toHaveAttribute('data-captured-headers', '');
+	});
+
+	it('toggling Authorization off when no Bearer row is present leaves headers untouched', async () => {
+		const existing = JSON.stringify({ [AUTHORIZATION_HEADER]: 'Basic czNjcjpwYXNz' });
+		const screen = await render(McpServerFormWrapper, { headers: existing });
+
+		await screen.getByRole('switch', { name: /authorization/i }).click();
+		await screen.getByRole('switch', { name: /authorization/i }).click();
+
+		await expect
+			.element(capturedHeaders(screen))
+			.toHaveAttribute('data-captured-headers', existing);
+	});
+
+	it('clearing the bearer input drops the Authorization row', async () => {
+		const existing = JSON.stringify({ [AUTHORIZATION_HEADER]: `${BEARER_PREFIX}xyz` });
+		const screen = await render(McpServerFormWrapper, { headers: existing });
+
+		await bearerInput(screen).fill('');
+
+		await expect.element(capturedHeaders(screen)).toHaveAttribute('data-captured-headers', '');
+	});
+
+	it('does not surface Bearer Authorization in the KV section even when pre-existing', async () => {
+		const existing = JSON.stringify({ [AUTHORIZATION_HEADER]: `${BEARER_PREFIX}xyz` });
+		const screen = await render(McpServerFormWrapper, { headers: existing });
+
+		const headerKeyInput = screen.getByPlaceholder('Header name');
+		await expect.element(headerKeyInput).not.toBeInTheDocument();
+	});
+});
@@ -0,0 +1,126 @@
+import { describe, expect, it } from 'vitest';
+import { parseHeadersToArray, serializeHeaders } from '$lib/utils/headers';
+
+/**
+ * Tests for the header serialization helpers used by the MCP server form
+ * (custom header rows) and the new Authorization/Bearer-token flow.
+ */
+describe('parseHeadersToArray', () => {
+	it('returns an empty array for empty or whitespace-only input', () => {
+		expect(parseHeadersToArray('')).toEqual([]);
+		expect(parseHeadersToArray('   ')).toEqual([]);
+		expect(parseHeadersToArray(undefined as unknown as string)).toEqual([]);
+	});
+
+	it('returns an empty array for invalid JSON input', () => {
+		expect(parseHeadersToArray('{not-json')).toEqual([]);
+		expect(parseHeadersToArray('[]')).toEqual([]);
+		expect(parseHeadersToArray('"plain-string"')).toEqual([]);
+	});
+
+	it('converts an object into ordered key/value pairs', () => {
+		expect(parseHeadersToArray('{"X-Foo":"bar","Authorization":"Bearer abc"}')).toEqual([
+			{ key: 'X-Foo', value: 'bar' },
+			{ key: 'Authorization', value: 'Bearer abc' }
+		]);
+	});
+
+	it('stringifies non-string values', () => {
+		expect(parseHeadersToArray('{"count":"42","flag":"true"}')).toEqual([
+			{ key: 'count', value: '42' },
+			{ key: 'flag', value: 'true' }
+		]);
+	});
+});
+
+describe('serializeHeaders', () => {
+	it('returns an empty string when there are no valid pairs', () => {
+		expect(serializeHeaders([])).toBe('');
+		expect(serializeHeaders([{ key: '', value: 'value' }])).toBe('');
+		expect(serializeHeaders([{ key: '   ', value: 'value' }])).toBe('');
+	});
+
+	it('returns an empty string when every pair has a blank key', () => {
+		expect(
+			serializeHeaders([
+				{ key: '', value: 'drop-me' },
+				{ key: '   ', value: 'drop-me-too' },
+				{ key: '\t', value: 'tab-key' }
+			])
+		).toBe('');
+	});
+
+	it('drops pairs with empty keys but keeps the rest', () => {
+		expect(
+			serializeHeaders([
+				{ key: '', value: 'drop-me' },
+				{ key: 'X-Keep', value: 'ok' }
+			])
+		).toBe('{"X-Keep":"ok"}');
+	});
+
+	it('trims keys before serializing', () => {
+		expect(serializeHeaders([{ key: '  X-Space  ', value: 'ok' }])).toBe('{"X-Space":"ok"}');
+	});
+
+	it('preserves the input order of surviving pairs', () => {
+		const serialized = serializeHeaders([
+			{ key: 'X-C', value: '3' },
+			{ key: 'X-A', value: '1' },
+			{ key: 'X-B', value: '2' }
+		]);
+
+		// Object key order follows insertion order in modern JS engines, so
+		// the serialized JSON writes keys in our input order.
+		expect(JSON.parse(serialized)).toEqual({ 'X-C': '3', 'X-A': '1', 'X-B': '2' });
+	});
+});
+
+describe('parseHeadersToArray / serializeHeaders roundtrip', () => {
+	it('serializes back to an equal header object after a parse', () => {
+		const original = JSON.stringify({
+			'Content-Type': 'application/json',
+			'X-Trace-Id': 'abc-123'
+		});
+
+		const roundtrip = serializeHeaders(parseHeadersToArray(original));
+
+		expect(JSON.parse(roundtrip)).toEqual(JSON.parse(original));
+	});
+
+	it('drops rows whose keys are blank after trimming during serialization', () => {
+		const pairs = parseHeadersToArray('{"X-Keep":"ok","":"drop-me"}');
+
+		// parseHeadersToArray keeps raw key strings (the consumer is expected to
+		// filter blanks, not the parser); serialization must strip them.
+		expect(pairs).toEqual([
+			{ key: 'X-Keep', value: 'ok' },
+			{ key: '', value: 'drop-me' }
+		]);
+		expect(serializeHeaders(pairs)).toBe('{"X-Keep":"ok"}');
+	});
+
+	it('preserves upstream keys untouched (does not lowercase them)', () => {
+		const upperCased = '{"Authorization":"Bearer xyz"}';
+
+		const parsed = parseHeadersToArray(upperCased);
+
+		expect(parsed).toEqual([{ key: 'Authorization', value: 'Bearer xyz' }]);
+	});
+
+	it('bearer-token write survives a re-parse when paired with regular custom headers', () => {
+		// The McpServerForm bearer UI writes {Authorization: `Bearer <token>`}
+		// into the same headers string as the custom KV section. The round
+		// trip below mirrors the exact shape the form produces so a future
+		// refactor of either code path cannot silently change the on-disk key.
+		const pairs = [
+			{ key: 'X-Trace-Id', value: 'abc-123' },
+			{ key: 'Authorization', value: 'Bearer super-secret' }
+		];
+
+		const serialized = serializeHeaders(pairs);
+
+		expect(serialized).toBe('{"X-Trace-Id":"abc-123","Authorization":"Bearer super-secret"}');
+		expect(parseHeadersToArray(serialized)).toEqual(pairs);
+	});
+});
@@ -0,0 +1,144 @@
+import { describe, expect, it, vi } from 'vitest';
+import { parseMcpServerSettings } from '$lib/utils/mcp';
+import { DEFAULT_MCP_CONFIG, MCP_SERVER_ID_PREFIX } from '$lib/constants/mcp';
+
+/**
+ * Tests for the mcpServers settings parser.
+ *
+ * The branch seeds the MCP servers setting with a default value of
+ * `JSON.stringify(RECOMMENDED_MCP_SERVERS)`, so the parser has to be
+ * resilient to anything that may live in the user's localStorage: malformed
+ * JSON, wrong shapes, missing fields, falsy-but-not-zero numbers, and entry
+ * arrays that have been mutated by the user via the settings form.
+ */
+describe('parseMcpServerSettings', () => {
+	it('returns an empty array for falsy or whitespace-only input', () => {
+		expect(parseMcpServerSettings(null)).toEqual([]);
+		expect(parseMcpServerSettings(undefined)).toEqual([]);
+		expect(parseMcpServerSettings('')).toEqual([]);
+		expect(parseMcpServerSettings('   ')).toEqual([]);
+	});
+
+	it('returns an empty array and logs a warning for invalid JSON strings', () => {
+		const warn = vi.spyOn(console, 'warn').mockImplementation(() => {});
+
+		expect(parseMcpServerSettings('{not-json')).toEqual([]);
+		expect(warn).toHaveBeenCalled();
+
+		warn.mockRestore();
+	});
+
+	it('returns an empty array for valid JSON that is not an array', () => {
+		expect(parseMcpServerSettings('"plain-string"')).toEqual([]);
+		expect(parseMcpServerSettings('{"id":"foo"}')).toEqual([]);
+		expect(parseMcpServerSettings('42')).toEqual([]);
+		expect(parseMcpServerSettings('null')).toEqual([]);
+	});
+
+	it('drops entries with no parseable id and substitutes a stable fallback', () => {
+		const parsed = parseMcpServerSettings(
+			JSON.stringify([{ url: 'https://a.test', enabled: true }, { url: 'https://b.test' }])
+		);
+
+		expect(parsed).toHaveLength(2);
+		expect(parsed[0]?.id).toBe(`${MCP_SERVER_ID_PREFIX}-1`);
+		expect(parsed[1]?.id).toBe(`${MCP_SERVER_ID_PREFIX}-2`);
+	});
+
+	it('reuses the first id when it is present and falls back only for missing ones', () => {
+		const parsed = parseMcpServerSettings(
+			JSON.stringify([
+				{ id: 'custom-1', url: 'https://a.test' },
+				{ url: 'https://b.test' },
+				{ id: 'custom-3', url: 'https://c.test' }
+			])
+		);
+
+		expect(parsed[0]?.id).toBe('custom-1');
+		expect(parsed[1]?.id).toBe(`${MCP_SERVER_ID_PREFIX}-2`);
+		expect(parsed[2]?.id).toBe('custom-3');
+	});
+
+	it('falls back to the configured default requestTimeoutSeconds only for nullish values', () => {
+		const fallback = DEFAULT_MCP_CONFIG.requestTimeoutSeconds;
+
+		const parsed = parseMcpServerSettings(
+			JSON.stringify([
+				{ id: 'a', url: 'https://a.test' },
+				{ id: 'b', url: 'https://b.test', requestTimeoutSeconds: undefined },
+				{ id: 'c', url: 'https://c.test', requestTimeoutSeconds: 0 },
+				{ id: 'd', url: 'https://d.test', requestTimeoutSeconds: 45 }
+			])
+		);
+
+		// The parser uses ?? for timeout fallback, which only triggers on
+		// null/undefined. Explicit 0 is preserved at face value.
+		expect(parsed[0]?.requestTimeoutSeconds).toBe(fallback);
+		expect(parsed[1]?.requestTimeoutSeconds).toBe(fallback);
+		expect(parsed[2]?.requestTimeoutSeconds).toBe(0);
+		expect(parsed[3]?.requestTimeoutSeconds).toBe(45);
+	});
+
+	it('treats whitespace-only headers strings as undefined', () => {
+		const parsed = parseMcpServerSettings(
+			JSON.stringify([
+				{ id: 'a', url: 'https://a.test', headers: '   ' },
+				{ id: 'b', url: 'https://b.test', headers: '{"X-Foo":"bar"}' }
+			])
+		);
+
+		// The parser trims headers and coerces empty/whitespace to undefined.
+		expect(parsed[0]?.headers).toBeUndefined();
+		expect(parsed[1]?.headers).toBe('{"X-Foo":"bar"}');
+	});
+
+	it('defaults coercion for booleans (undefined -> false, true -> true)', () => {
+		const parsed = parseMcpServerSettings(
+			JSON.stringify([
+				{ id: 'a', url: 'https://a.test' },
+				{ id: 'b', url: 'https://b.test', enabled: true },
+				{ id: 'c', url: 'https://c.test', enabled: false },
+				{ id: 'd', url: 'https://d.test', useProxy: true }
+			])
+		);
+
+		expect(parsed[0]?.enabled).toBe(false);
+		expect(parsed[1]?.enabled).toBe(true);
+		expect(parsed[2]?.enabled).toBe(false);
+		expect(parsed[0]?.useProxy).toBe(false);
+		expect(parsed[3]?.useProxy).toBe(true);
+	});
+
+	it('preserves input order when mapping entries', () => {
+		const source = [
+			{ id: 'gamma', url: 'https://c.test' },
+			{ id: 'alpha', url: 'https://a.test' },
+			{ id: 'beta', url: 'https://b.test' }
+		];
+
+		const parsed = parseMcpServerSettings(JSON.stringify(source));
+
+		expect(parsed.map((entry) => entry.id)).toEqual(['gamma', 'alpha', 'beta']);
+	});
+
+	it('passes non-string raw input through the JSON-equality path', () => {
+		const parsed = parseMcpServerSettings([
+			{ id: 'a', url: 'https://a.test' },
+			{ id: 'b', url: 'https://b.test', enabled: true }
+		]);
+
+		expect(parsed).toHaveLength(2);
+		expect(parsed[0]?.id).toBe('a');
+		expect(parsed[1]?.enabled).toBe(true);
+	});
+
+	it('coerces non-string url values to an empty string rather than throwing', () => {
+		const parsed = parseMcpServerSettings(
+			JSON.stringify([{ id: 'a', url: 42 }, { id: 'b' }, { id: 'c', url: 'https://c.test' }])
+		);
+
+		expect(parsed[0]?.url).toBe('');
+		expect(parsed[1]?.url).toBe('');
+		expect(parsed[2]?.url).toBe('https://c.test');
+	});
+});
@@ -0,0 +1,90 @@
+import { describe, expect, it } from 'vitest';
+import {
+	RECOMMENDED_MCP_SERVER_IDS,
+	RECOMMENDED_MCP_SERVERS
+} from '$lib/constants/recommended-mcp-servers';
+import { parseMcpServerSettings } from '$lib/utils/mcp';
+import { DEFAULT_MCP_CONFIG, MCP_SERVER_ID_PREFIX } from '$lib/constants/mcp';
+
+/**
+ * Tests for the predefined recommended MCP servers.
+ *
+ * These are surfaced to first-time users via
+ * DialogMcpServerRecommendations and used as the default value of the MCP
+ * servers setting, so a regression that breaks the round-trip through the
+ * settings parser would silently break onboarding for new users.
+ */
+describe('RECOMMENDED_MCP_SERVERS', () => {
+	it('lists at least one entry and uses stable, unique ids', () => {
+		expect(RECOMMENDED_MCP_SERVERS.length).toBeGreaterThan(0);
+
+		const ids = RECOMMENDED_MCP_SERVERS.map((server) => server.id);
+		expect(new Set(ids).size).toBe(ids.length);
+
+		for (const id of ids) {
+			expect(id).toMatch(/^[a-z0-9-]+$/);
+			expect(id.toLowerCase()).not.toContain(MCP_SERVER_ID_PREFIX.toLowerCase());
+		}
+	});
+
+	it('requires a name, description and url for every entry', () => {
+		for (const server of RECOMMENDED_MCP_SERVERS) {
+			expect(server.name?.trim().length ?? 0).toBeGreaterThan(0);
+			expect(server.description.trim().length).toBeGreaterThan(0);
+			expect(server.url.trim().length).toBeGreaterThan(0);
+			expect(() => new URL(server.url)).not.toThrow();
+		}
+	});
+});
+
+describe('RECOMMENDED_MCP_SERVER_IDS', () => {
+	it('matches the ids declared in RECOMMENDED_MCP_SERVERS', () => {
+		expect(RECOMMENDED_MCP_SERVER_IDS.size).toBe(RECOMMENDED_MCP_SERVERS.length);
+
+		for (const server of RECOMMENDED_MCP_SERVERS) {
+			expect(RECOMMENDED_MCP_SERVER_IDS.has(server.id)).toBe(true);
+		}
+	});
+});
+
+describe('recommended-mcp-servers default value', () => {
+	it('round-trips cleanly through parseMcpServerSettings', () => {
+		const serialized = JSON.stringify(RECOMMENDED_MCP_SERVERS);
+		const parsed = parseMcpServerSettings(serialized);
+
+		expect(parsed).toHaveLength(RECOMMENDED_MCP_SERVERS.length);
+
+		for (let index = 0; index < RECOMMENDED_MCP_SERVERS.length; index++) {
+			const source = RECOMMENDED_MCP_SERVERS[index];
+			const entry = parsed[index];
+
+			expect(entry).toBeDefined();
+			expect(entry?.id).toBe(source.id);
+			expect(entry?.url).toBe(source.url);
+			expect(entry?.enabled).toBe(source.enabled);
+			expect(entry?.requestTimeoutSeconds).toBe(source.requestTimeoutSeconds);
+			expect(entry?.name).toBe(source.name);
+
+			// Headers and useProxy are not set on recommended servers; the
+			// parser must fall back to the inactive defaults rather than
+			// surfacing undefined-boundary states.
+			expect(entry?.headers).toBeUndefined();
+			expect(entry?.useProxy).toBe(false);
+		}
+	});
+
+	it('uses the global default timeout when one is not specified on an entry', () => {
+		const sourceOnlyRequired = {
+			id: 'roundtrip-only',
+			name: 'Only required fields',
+			url: 'https://example.test/mcp',
+			description: 'Smoke entry for parser roundtrip with default timeout.',
+			enabled: true
+		};
+
+		const parsed = parseMcpServerSettings(JSON.stringify([sourceOnlyRequired]));
+		const entry = parsed[0];
+
+		expect(entry?.requestTimeoutSeconds).toBe(DEFAULT_MCP_CONFIG.requestTimeoutSeconds);
+	});
+});
Author	SHA1	Message	Date
Nick Towle	d4cff114c0	ui: Improve performance when streaming (#25225 ) * ui: Improve performance when streaming * ui: build sibling info map in branching utils Moves the node map and sibling map construction from the .by block into buildSiblingInfoMap() in branching.ts. The map is built once per structural change and only read afterwards, so it does not need SvelteMap reactivity. Keeping the construction in plain TypeScript fixes the svelte/prefer-svelte-reactivity lint error and groups the branching logic where it already lives. --------- Co-authored-by: Pascal <admin@serveurperso.com>	2026-07-03 19:03:51 +02:00
Pascal	f113e02d5a	ui: strip path and weight extension from model id in single model mode (#25137 )	2026-07-03 17:32:48 +02:00
Ruixiang Wang	152d337fad	spec: support spec-draft-p-min in DFlash (#25246 ) * spec: support spec-draft-p-min in DFlash * dflash: add n_min guard * dflash: guard both n_min and n_max	2026-07-03 15:40:06 +02:00
Piotr Wilkin (ilintar)	75a48a9055	cuda: enable topk-moe fusion for 288 experts (#25267 ) * cuda: enable topk-moe fusion for 288 experts The topk-moe fusion only accepted power-of-2 expert counts (or the special-cased 576), so models with 288 experts (e.g. Step-3.7-Flash) fell back to the unfused per-layer routing chain: softmax/sigmoid, argsort, get_rows, sum_rows, div, clamp, scale. At batch size 1 that is ~330 extra tiny graph nodes per token. 288 is a multiple of the warp size, so the existing kernel already handles it; this adds the missing template instantiation and accepts 288 in the eligibility check. Measured on gfx1151 with Step-3.7-Flash IQ4_XS (llama-bench, -b 4096 -ub 4096 -fa 1 -dio 1 -ctk q8_0 -ctv q8_0; machine idle, before/after paired so pp4096 stays matched as a load control): test \| before \| after ----------------+----------------+---------------- pp4096 \| 460.99 ± 0.45 \| 462.47 ± 0.34 (unchanged) tg128 \| 19.10 ± 0.04 \| 19.56 ± 0.03 (+2.4%) tg128 @ d30000 \| 12.68 ± 0.04 \| 12.69 ± 0.03 (unchanged) Prompt processing is unaffected (the fusion only touches decode routing). The decode gain is ~+2.4% at shallow context and fades with depth: by 30k tokens each step is attention-bound over the KV cache, so removing the fixed routing overhead is no longer visible. Assisted-By: Claude Fable 5 <noreply@anthropic.com> * Update tests/test-backend-ops.cpp Co-authored-by: Oliver Simons <osimons@nvidia.com> * Add comment for case 288 in topk-moe.cu --------- Co-authored-by: Oliver Simons <osimons@nvidia.com>	2026-07-03 15:36:55 +02:00
Pascal	067de93718	ui: align persisted config with strict server schema and enable thinking by default (#25242 ) * ui: migrate legacy string-encoded booleans in persisted config * ui: enable thinking by default Fresh users and legacy conversations without a persisted thinking preference now default to enabled. The per-conversation toggle and the persisted localStorage choice keep taking precedence. Picks up the enable_thinking default from #24876.	2026-07-03 13:14:52 +02:00
Pascal	b5315e16e0	server + ui: ping silent SSE streams every 1s and kick only after 3s so slow prefill never drops healthy connections (#25241 ) * server + ui: ping silent SSE streams every 1s and kick only after 3s so slow prefill never drops healthy connections * server + ui: sse_ping_interval becomes a per-request body field Address review from ngxson: the global default returns to 30 so API clients see no behavior change, and the WebUI sends sse_ping_interval: 1 in the request body since it owns the 3s visibility-kick contract and declares the cadence it needs. Positive values keep the existing > 0 gate, -1 keeps its disabled semantics. * server: move sse_ping_interval into the request schema Address review from ngxson: the field is now a typed field_num with hard limits (-1, INT32_MAX) bound to task_params, seeded from the CLI default alongside the other inherited parameters. The raw json_value read and its redundant comment are gone, and schema evaluation brings type and range validation for free.	2026-07-03 12:47:04 +02:00
Aleksander Grygier	94875285e4	ui: Add MCP Servers Opt-In for first time visitors (#25239 ) * feat: ui: Add predefined recommended MCP servers to settings * feat: ui: Add MCP server recommendation dialog with custom server support * feat: Auto-focus input fields on mount and dynamic addition * feat: Add header validation to MCP server add and edit forms * feat: Persist recommended MCP server opt-in selections * test: Cover MCP configuration with tests * chore: Format & cleanup * feat: Centralize MCP server overrides to settings config and improve recommendation UI * fix: Capture index before mutation to prevent focus drift * refactor: Extract MCP_CARD_VISIBLE_TOOL_LIMIT to shared constants * refactor: Support arbitrary authorization header schemes * refactor: Consolidate MCP recommendations dismissal into existing storage key * fix: Use case-insensitive comparison for MCP server ID prefix check * refactor: Centralize MCP server visibility logic and extract recommendations hook * refactor: Cleanup	2026-07-03 12:16:29 +02:00
Gaurav Garg	5a460dea9f	Remove redundant CUDA copies after gated_delta_net. (#23940 ) * Remove redundant CUDA copies after gated_delta_net. Currently, GDN writes recurrent state snapshots into its output tail, then the graph immediately copies those snapshots into ssm_states_all. With MTP draft length 3, target decode uses K=4, so that becomes 4 extra ggml_cuda_cpy calls. The change detects that gated_delta_net -> view -> cpy pattern and makes the CUDA GDN kernel write the state snapshot(s) directly into the recurrent cache, skipping the intermediate tail writes and copy kernels when safe. * Address review comments	2026-07-03 14:36:29 +05:30