Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	.github/workflows/release.yml
#	.gitignore
#	examples/batched/batched.cpp
#	examples/debug/debug.cpp
#	examples/eval-callback/eval-callback.cpp
#	examples/idle/idle.cpp
#	examples/lookahead/lookahead.cpp
#	examples/lookup/lookup-create.cpp
#	examples/lookup/lookup-stats.cpp
#	examples/lookup/lookup.cpp
#	examples/parallel/parallel.cpp
#	examples/passkey/passkey.cpp
#	examples/retrieval/retrieval.cpp
#	examples/save-load-state/save-load-state.cpp
#	examples/speculative-simple/speculative-simple.cpp
#	examples/speculative/speculative.cpp
#	examples/training/finetune.cpp
#	ggml/CMakeLists.txt
#	ggml/src/ggml-cann/aclnn_ops.cpp
#	ggml/src/ggml-cann/common.h
#	ggml/src/ggml-cann/ggml-cann.cpp
#	ggml/src/ggml-sycl/fattn-tile.hpp
#	ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp
#	ggml/src/ggml-webgpu/ggml-webgpu.cpp
#	ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl
#	ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py
#	ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl
#	ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl
#	scripts/sync-ggml.last
#	tests/export-graph-ops.cpp
#	tests/test-chat.cpp
#	tests/test-state-restore-fragmented.cpp
#	tests/test-thread-safety.cpp
#	tools/batched-bench/batched-bench.cpp
#	tools/cli/cli.cpp
#	tools/cvector-generator/cvector-generator.cpp
#	tools/export-lora/export-lora.cpp
#	tools/imatrix/imatrix.cpp
#	tools/perplexity/perplexity.cpp
#	tools/results/results.cpp
#	tools/server/CMakeLists.txt
This commit is contained in:
Concedo
2026-04-01 10:54:13 +08:00
61 changed files with 2167 additions and 1180 deletions

View File

@@ -21,14 +21,6 @@ indent_style = tab
[prompts/*.txt]
insert_final_newline = unset
[tools/server/public/*]
indent_size = 2
[tools/server/public/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
indent_size = unset
[tools/server/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
@@ -61,6 +53,14 @@ charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
[tools/server/public/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
[benches/**]
indent_style = unset
indent_size = unset

4
.gitattributes vendored Normal file
View File

@@ -0,0 +1,4 @@
# Treat the generated single-file WebUI build as binary for diff purposes.
# Git's pack-file delta compression still works (byte-level), but this prevents
# git diff from printing the entire minified file on every change.
tools/server/public/index.html -diff

View File

@@ -232,7 +232,7 @@ using chat_template_caps = jinja::caps;
struct common_chat_templates {
bool add_bos;
bool add_eos;
bool has_explicit_template; // Model had builtin template or template overridde was specified.
bool has_explicit_template; // Model had builtin template or template overridden was specified.
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
std::unique_ptr<common_chat_template> template_tool_use;
};
@@ -1004,6 +1004,10 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
auto analysis = p.ref("analysis");
auto preamble = p.rule("preamble", p.literal("<|channel|>commentary<|message|>") + p.content(content) + end);
auto final_msg = p.rule("final", p.literal("<|channel|>final<|message|>") + p.content(content));
// Consume any unsolicited tool calls, e.g. builtin functions
auto unsolicited = p.rule("unsolicited", p.atomic(p.optional(channel) + p.literal(" to=") + content + end));
auto any = p.rule("any", preamble | analysis);
if (has_response_format) {
@@ -1047,7 +1051,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
return p.zero_or_more(start + any) + start + (tool_call | final_msg);
}
return p.zero_or_more(start + any) + start + final_msg;
return p.zero_or_more(start + any) + start + (final_msg | unsolicited);
});
data.parser = parser.save();

View File

@@ -366,6 +366,11 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
}
void common_init() {
#if defined(_WIN32)
SetConsoleOutputCP(CP_UTF8);
SetConsoleCP(CP_UTF8);
#endif
llama_log_set(common_log_default_callback, NULL);
#ifdef NDEBUG
@@ -374,7 +379,7 @@ void common_init() {
const char * build_type = " (debug)";
#endif
LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
LOG_DBG("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
}
std::string common_params_get_system_info(const common_params & params) {
@@ -1250,6 +1255,9 @@ llama_context * common_init_result::context() {
}
common_sampler * common_init_result::sampler(llama_seq_id seq_id) {
if (seq_id < 0 || seq_id >= (int) pimpl->samplers.size()) {
return nullptr;
}
return pimpl->samplers[seq_id].get();
}

View File

@@ -123,6 +123,9 @@ class ProgressBar {
static inline std::map<const ProgressBar *, int> lines;
static inline int max_line = 0;
std::string filename;
size_t len = 0;
static void cleanup(const ProgressBar * line) {
lines.erase(line);
if (lines.empty()) {
@@ -139,7 +142,23 @@ class ProgressBar {
}
public:
ProgressBar() = default;
ProgressBar(const std::string & url = "") : filename(url) {
if (auto pos = filename.rfind('/'); pos != std::string::npos) {
filename = filename.substr(pos + 1);
}
if (auto pos = filename.find('?'); pos != std::string::npos) {
filename = filename.substr(0, pos);
}
for (size_t i = 0; i < filename.size(); ++i) {
if ((filename[i] & 0xC0) != 0x80) {
if (len++ == 39) {
filename.resize(i);
filename += "";
break;
}
}
}
}
~ProgressBar() {
std::lock_guard<std::mutex> lock(mutex);
@@ -147,11 +166,7 @@ public:
}
void update(size_t current, size_t total) {
if (!is_output_a_tty()) {
return;
}
if (!total) {
if (!total || !is_output_a_tty()) {
return;
}
@@ -163,28 +178,27 @@ public:
}
int lines_up = max_line - lines[this];
size_t width = 50;
size_t bar = 55 - len;
size_t pct = (100 * current) / total;
size_t pos = (width * current) / total;
std::cout << "\033[s";
size_t pos = (bar * current) / total;
if (lines_up > 0) {
std::cout << "\033[" << lines_up << "A";
}
std::cout << "\033[2K\r["
<< std::string(pos, '=')
<< (pos < width ? ">" : "")
<< std::string(width - pos, ' ')
<< "] " << std::setw(3) << pct << "% ("
<< current / (1024 * 1024) << " MB / "
<< total / (1024 * 1024) << " MB) "
<< "\033[u";
std::cout << '\r' << "Downloading " << filename << " ";
std::cout.flush();
for (size_t i = 0; i < bar; ++i) {
std::cout << (i < pos ? "" : " ");
}
std::cout << std::setw(4) << pct << "%\033[K";
if (lines_up > 0) {
std::cout << "\033[" << lines_up << "B";
}
std::cout << '\r' << std::flush;
if (current == total) {
cleanup(this);
cleanup(this);
}
}
@@ -212,7 +226,7 @@ static bool common_pull_file(httplib::Client & cli,
const char * func = __func__; // avoid __func__ inside a lambda
size_t downloaded = existing_size;
size_t progress_step = 0;
ProgressBar bar;
ProgressBar bar(resolve_path);
auto res = cli.Get(resolve_path, headers,
[&](const httplib::Response &response) {
@@ -290,7 +304,7 @@ static int common_download_file_single_online(const std::string & url,
const bool file_exists = std::filesystem::exists(path);
if (file_exists && skip_etag) {
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
LOG_DBG("%s: using cached file: %s\n", __func__, path.c_str());
return 304; // 304 Not Modified - fake cached response
}
@@ -298,7 +312,7 @@ static int common_download_file_single_online(const std::string & url,
if (file_exists) {
last_etag = read_etag(path);
} else {
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
LOG_DBG("%s: no previous model file found %s\n", __func__, path.c_str());
}
auto head = cli.Head(parts.path);
@@ -332,11 +346,11 @@ static int common_download_file_single_online(const std::string & url,
if (file_exists) {
if (etag.empty()) {
LOG_INF("%s: using cached file (no server etag): %s\n", __func__, path.c_str());
LOG_DBG("%s: using cached file (no server etag): %s\n", __func__, path.c_str());
return 304; // 304 Not Modified - fake cached response
}
if (!last_etag.empty() && last_etag == etag) {
LOG_INF("%s: using cached file (same etag): %s\n", __func__, path.c_str());
LOG_DBG("%s: using cached file (same etag): %s\n", __func__, path.c_str());
return 304; // 304 Not Modified - fake cached response
}
if (remove(path.c_str()) != 0) {
@@ -372,7 +386,7 @@ static int common_download_file_single_online(const std::string & url,
}
}
LOG_INF("%s: downloading from %s to %s (etag:%s)...\n",
LOG_DBG("%s: downloading from %s to %s (etag:%s)...\n",
__func__, common_http_show_masked_url(parts).c_str(),
path_temporary.c_str(), etag.c_str());
@@ -441,7 +455,7 @@ int common_download_file_single(const std::string & url,
return -1;
}
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
LOG_DBG("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
return 304; // Not Modified - fake cached response
}

View File

@@ -51,7 +51,7 @@ struct common_ngram_map_value {
// statistics of a n-gram
struct common_ngram_map_key {
size_t key_idx; // index of key n-gram in token-history
size_t stat_idx; // index of last token of stastistics computation (key_num, values)
size_t stat_idx; // index of last token of statistics computation (key_num, values)
uint16_t key_num; // number of occurrences of this key n-gram in token-history
common_ngram_map_value values[COMMON_NGRAM_MAX_VALUES]; // some known values after the key

View File

@@ -383,6 +383,12 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
params.backend_sampling = false;
}
if (rbudget && params.backend_sampling) {
LOG_WRN("%s: backend sampling is not compatible with reasoning budget, disabling\n", __func__);
params.backend_sampling = false;
}
auto * result = new common_sampler {
/* .params = */ params,
/* .grmr = */ grmr,

View File

@@ -545,11 +545,12 @@ int main(int argc, char ** argv) {
common_params params;
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_DIFFUSION)) {
return 1;
}
common_init();
llama_backend_init();
llama_model_params model_params = llama_model_default_params();

View File

@@ -99,12 +99,12 @@ int main(int argc, char ** argv) {
common_params params;
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EMBEDDING)) {
return 1;
}
common_init();
params.embedding = true;
// get max number of sequences per batch

View File

@@ -0,0 +1,155 @@
enable f16;
#ifdef TYPE_F32
#define DataType f32
#endif
#ifdef TYPE_F16
#define DataType f16
#endif
#ifdef OP_REGLU
fn op(a: DataType, b: DataType) -> DataType {
return max(a, 0) * b;
}
#endif
#ifdef OP_GEGLU
const SQRT_2_OVER_PI: DataType = 0.79788456080286535587989211986876;
const GELU_COEF_A: DataType = 0.044715;
fn op(a: DataType, b: DataType) -> DataType {
let val = SQRT_2_OVER_PI * a * (1.0 + GELU_COEF_A * a * a);
return 0.5 * a * (2.0 - 2.0/ (exp(2* val) + 1)) * b;
}
#endif
#ifdef OP_SWIGLU
fn op(a: DataType, b: DataType) -> DataType {
return a / (1.0 + exp(-a)) * b;
}
#endif
#ifdef OP_SWIGLU_OAI
fn op(a: f32, b: f32) -> f32 {
let xi = min(a, params.limit);
let gi = max(min(b, params.limit), -params.limit);
var out_glu = xi / (1.0 + exp(-xi * params.alpha));
out_glu = out_glu * (1.0 + gi);
return out_glu;
}
#endif
#ifdef OP_GEGLU_ERF
const p_erf: DataType = 0.3275911;
const a1_erf: DataType = 0.254829592;
const a2_erf: DataType = -0.284496736;
const a3_erf: DataType = 1.421413741;
const a4_erf: DataType = -1.453152027;
const a5_erf: DataType = 1.061405429;
const SQRT_2_INV: DataType = 0.7071067811865476;
fn op(a: DataType, b: DataType) -> DataType {
let a_div_sqr2 = a * SQRT_2_INV;
let sign_x = sign(a_div_sqr2);
let x = abs(a_div_sqr2);
let t = 1.0 / (1.0 + p_erf * x);
let y = 1.0 - (((((a5_erf * t + a4_erf) * t + a3_erf) * t + a2_erf) * t + a1_erf) * t * exp(-x * x));
let erf_approx = sign_x * y;
return 0.5 * a * (1.0 + erf_approx) * b;
}
#endif
#ifdef OP_GEGLU_QUICK
const GELU_QUICK_COEF: DataType = -1.702;
fn op(a: DataType, b: DataType) -> DataType {
return a * (1.0 / (1.0 + exp(GELU_QUICK_COEF * a))) * b;
}
#endif
struct Params {
offset_src0: u32,
offset_src1: u32,
offset_dst: u32,
// Strides (in elements)
stride_src01: u32,
stride_src02: u32,
stride_src03: u32,
stride_src11: u32,
stride_src12: u32,
stride_src13: u32,
stride_dst1: u32,
stride_dst2: u32,
stride_dst3: u32,
// shape of dst
ne: u32,
ne0: u32,
ne1: u32,
ne2: u32,
swapped: u32,
alpha: f32,
limit: f32,
}
@group(0) @binding(0)
var<storage, read_write> src0: array<DataType>;
#ifdef NO_SPLIT
@group(0) @binding(1)
var<storage, read_write> dst: array<DataType>;
@group(0) @binding(2)
var<uniform> params: Params;
fn a_value(base: u32) -> DataType {
let offset: u32 = select(0, params.ne0, params.swapped != 0);
return src0[base + offset];
}
fn b_value(base: u32) -> DataType {
let offset: u32 = select(params.ne0, 0, params.swapped != 0);
return src0[base + offset];
}
#else
@group(0) @binding(1)
var<storage, read_write> src1: array<DataType>;
@group(0) @binding(2)
var<storage, read_write> dst: array<DataType>;
@group(0) @binding(3)
var<uniform> params: Params;
fn a_value(base: u32) -> DataType {
return src0[base];
}
fn b_value(base: u32) -> DataType {
return src1[base];
}
#endif
@compute @workgroup_size(WG_SIZE)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (gid.x >= params.ne) {
return;
}
var i = gid.x;
let i3 = i / (params.ne2 * params.ne1 * params.ne0);
i = i % (params.ne2 * params.ne1 * params.ne0);
let i2 = i / (params.ne1 * params.ne0);
i = i % (params.ne1 * params.ne0);
let i1 = i / params.ne0;
let i0 = i % params.ne0;
let i_a = params.offset_src0 + i3 * params.stride_src03 + i2 * params.stride_src02 + i1 * params.stride_src01 + i0;
let i_b = params.offset_src1 + i3 * params.stride_src13 + i2 * params.stride_src12 + i1 * params.stride_src11 + i0;
let i_dst = params.offset_dst + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1 + i0;
dst[i_dst] = op(a_value(i_a), b_value(i_b));
}

View File

@@ -294,7 +294,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
}
// get extra buffer types of the CPU
// TODO: a more general solution for non-CPU extra buft should be imlpemented in the future
// TODO: a more general solution for non-CPU extra buft should be implemented in the future
// ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
std::vector<ggml_backend_buffer_type_t> buft_extra;
{

View File

@@ -18,7 +18,7 @@ struct llama_ubatch {
}
// typical for M-RoPE cases:
// 0 - sequantial position of the tokens/embeddings in the sequence
// 0 - sequential position of the tokens/embeddings in the sequence
// 1 - y position in the image
// 2 - x position in the image
// 3 - other

View File

@@ -595,7 +595,7 @@ void llama_context::sched_reserve() {
// reserve again with pp graph to avoid ggml-alloc reallocations during inference
{
// TODO: not sure if the following graph would be worster case for multi-stream KV caches:
// TODO: not sure if the following graph would be worst case for multi-stream KV caches:
//
// auto * gf = graph_reserve(n_tokens, 1, n_tokens, mctx.get());
//

View File

@@ -1665,7 +1665,7 @@ ggml_tensor * llm_graph_context::build_inp_attn_scale() const {
ggml_tensor * llm_graph_context::build_inp_out_ids() const {
// note: when all tokens are output, we could skip this optimization to spare the ggml_get_rows() calls,
// but this would make the graph topology depend on the number of output tokens, which can interere with
// but this would make the graph topology depend on the number of output tokens, which can interfere with
// features that require constant topology such as pipeline parallelism
// ref: https://github.com/ggml-org/llama.cpp/pull/14275#issuecomment-2987424471
//if (n_outputs < n_tokens) {

View File

@@ -333,7 +333,7 @@ public:
ggml_tensor * get_v(ggml_context * ctx, int32_t il) const;
// store k_cur and v_cur in the cache based on the provided head location
// note: the heads in k_cur and v_cur should be layed out contiguously in memory
// note: the heads in k_cur and v_cur should be laid out contiguously in memory
// - k_cur [n_embd_head_k, n_head_k, n_tokens]
// - k_idxs [n_tokens]
// - v_cur [n_embd_head_v, n_head_v, n_tokens]

View File

@@ -9,7 +9,7 @@ llm_build_gemma_embedding::llm_build_gemma_embedding(const llama_model & model,
inpL = build_inp_embd(model.tok_embd);
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
cb(inpL, "inp_scaled", -1);

View File

@@ -9,7 +9,7 @@ llm_build_gemma3<iswa>::llm_build_gemma3(const llama_model & model, const llm_gr
inpL = build_inp_embd(model.tok_embd);
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
cb(inpL, "inp_scaled", -1);

View File

@@ -12,7 +12,7 @@ llm_build_gemma3n_iswa::llm_build_gemma3n_iswa(const llama_model & model, const
inpL = build_inp_embd(model.tok_embd);
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
cb(inpL, "inp_scaled", -1);

View File

@@ -91,12 +91,12 @@ int main(int argc, char ** argv) {
common_params params;
g_params = &params;
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMPLETION, print_usage)) {
return 1;
}
common_init();
auto & sparams = params.sampling;
// save choice to use color for later
@@ -147,19 +147,13 @@ int main(int argc, char ** argv) {
ctx = llama_init->context();
model = llama_init->model();
smpl = llama_init->sampler(0);
if (ctx == NULL) {
LOG_ERR("%s: error: unable to create context\n", __func__);
return 1;
}
if (model == NULL) {
LOG_ERR("%s: error: unable to load model\n", __func__);
return 1;
}
smpl = llama_init->sampler(0);
llama_memory_t mem = llama_get_memory(ctx);
const llama_vocab * vocab = llama_model_get_vocab(model);

View File

@@ -17,11 +17,12 @@ using namespace std::chrono_literals;
int main(int argc, char ** argv) {
common_params params;
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
return 1;
}
common_init();
llama_backend_init();
llama_numa_init(params.numa);
auto mparams = common_model_params_to_llama(params);

View File

@@ -54,11 +54,12 @@ int main(int argc, char ** argv) {
common_params params;
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) {
return 1;
}
common_init();
mtmd_helper_log_set(common_log_default_callback, nullptr);
if (params.mmproj.path.empty()) {

View File

@@ -281,11 +281,12 @@ int main(int argc, char ** argv) {
common_params params;
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) {
return 1;
}
common_init();
mtmd_helper_log_set(common_log_default_callback, nullptr);
if (params.mmproj.path.empty()) {

View File

@@ -259,6 +259,6 @@ npm run test
npm run build
```
After `public/index.html.gz` has been generated, rebuild `llama-server` as described in the [build](#build) section to include the updated UI.
After `public/index.html` has been generated, rebuild `llama-server` as described in the [build](#build) section to include the updated UI.
**Note:** The Vite dev server automatically proxies API requests to `http://localhost:8080`. Make sure `llama-server` is running on that port during development.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -35,8 +35,8 @@ static server_http_res_ptr proxy_request(const server_http_req & req, std::strin
std::map<std::string, std::string> headers;
for (auto [key, value] : req.headers) {
auto new_key = key;
if (string_starts_with(new_key, "X-Proxy-Header-")) {
string_replace_all(new_key, "X-Proxy-Header-", "");
if (string_starts_with(new_key, "x-proxy-header-")) {
string_replace_all(new_key, "x-proxy-header-", "");
}
headers[new_key] = value;
}

View File

@@ -10,7 +10,9 @@
#ifdef LLAMA_BUILD_WEBUI
// auto generated files (see README.md for details)
#include "index.html.gz.hpp"
#include "index.html.hpp"
#include "bundle.js.hpp"
#include "bundle.css.hpp"
#include "loading.html.hpp"
#endif
@@ -272,16 +274,19 @@ bool server_http_context::init(const common_params & params) {
} else {
#ifdef LLAMA_BUILD_WEBUI
// using embedded static index.html
srv->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) {
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
res.set_content("Error: gzip is not supported by this browser", "text/plain");
} else {
res.set_header("Content-Encoding", "gzip");
// COEP and COOP headers, required by pyodide (python interpreter)
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
}
srv->Get(params.api_prefix + "/", [](const httplib::Request & /*req*/, httplib::Response & res) {
// COEP and COOP headers, required by pyodide (python interpreter)
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
res.set_content(reinterpret_cast<const char*>(index_html), index_html_len, "text/html; charset=utf-8");
return false;
});
srv->Get(params.api_prefix + "/bundle.js", [](const httplib::Request & /*req*/, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(bundle_js), bundle_js_len, "application/javascript; charset=utf-8");
return false;
});
srv->Get(params.api_prefix + "/bundle.css", [](const httplib::Request & /*req*/, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(bundle_css), bundle_css_len, "text/css; charset=utf-8");
return false;
});
#endif

View File

@@ -75,6 +75,8 @@ int main(int argc, char ** argv) {
// own arguments required by this example
common_params params;
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_SERVER)) {
return 1;
}
@@ -100,8 +102,6 @@ int main(int argc, char ** argv) {
params.model_alias.insert(params.model.name);
}
common_init();
// struct that contains llama context and inference
server_context ctx_server;

View File

@@ -188,14 +188,14 @@ The build process:
1. **Vite Build** - Bundles all TypeScript, Svelte, and CSS
2. **Static Adapter** - Outputs to `../public` (llama-server's static file directory)
3. **Post-Build Script** - Cleans up intermediate files
4. **Custom Plugin** - Creates `index.html.gz` with:
4. **Custom Plugin** - Creates `index.html` with:
- Inlined favicon as base64
- GZIP compression (level 9)
- Deterministic output (zeroed timestamps)
```text
tools/server/webui/ → build → tools/server/public/
├── src/ ├── index.html.gz (served by llama-server)
├── src/ ├── index.html (served by llama-server)
├── static/ └── (favicon inlined)
└── ...
```
@@ -219,7 +219,7 @@ output: {
The WebUI is embedded directly into the llama-server binary:
1. `npm run build` outputs `index.html.gz` to `tools/server/public/`
1. `npm run build` outputs `index.html` to `tools/server/public/`
2. llama-server compiles this into the binary at build time
3. When accessing `/`, llama-server serves the gzipped HTML
4. All assets are inlined (CSS, JS, fonts, favicon)

View File

@@ -50,7 +50,6 @@
"eslint-config-prettier": "^10.0.1",
"eslint-plugin-storybook": "^10.2.4",
"eslint-plugin-svelte": "^3.0.0",
"fflate": "^0.8.2",
"globals": "^16.0.0",
"http-server": "^14.1.1",
"mdast": "^3.0.0",

View File

@@ -1,14 +1,12 @@
#!/bin/bash
# Script to install pre-commit and pre-push hooks for webui
# Pre-commit: formats code and runs checks
# Pre-push: builds the project, stashes unstaged changes
# Script to install pre-commit hook for webui
# Pre-commit: formats, checks, builds, and stages build output
REPO_ROOT=$(git rev-parse --show-toplevel)
PRE_COMMIT_HOOK="$REPO_ROOT/.git/hooks/pre-commit"
PRE_PUSH_HOOK="$REPO_ROOT/.git/hooks/pre-push"
echo "Installing pre-commit and pre-push hooks for webui..."
echo "Installing pre-commit hook for webui..."
# Create the pre-commit hook
cat > "$PRE_COMMIT_HOOK" << 'EOF'
@@ -16,21 +14,19 @@ cat > "$PRE_COMMIT_HOOK" << 'EOF'
# Check if there are any changes in the webui directory
if git diff --cached --name-only | grep -q "^tools/server/webui/"; then
echo "Formatting and checking webui code..."
# Change to webui directory and run format
cd tools/server/webui
# Check if npm is available and package.json exists
REPO_ROOT=$(git rev-parse --show-toplevel)
cd "$REPO_ROOT/tools/server/webui"
# Check if package.json exists
if [ ! -f "package.json" ]; then
echo "Error: package.json not found in tools/server/webui"
exit 1
fi
echo "Formatting and checking webui code..."
# Run the format command
npm run format
# Check if format command succeeded
if [ $? -ne 0 ]; then
echo "Error: npm run format failed"
exit 1
@@ -38,8 +34,6 @@ if git diff --cached --name-only | grep -q "^tools/server/webui/"; then
# Run the lint command
npm run lint
# Check if lint command succeeded
if [ $? -ne 0 ]; then
echo "Error: npm run lint failed"
exit 1
@@ -47,156 +41,42 @@ if git diff --cached --name-only | grep -q "^tools/server/webui/"; then
# Run the check command
npm run check
# Check if check command succeeded
if [ $? -ne 0 ]; then
echo "Error: npm run check failed"
exit 1
fi
# Go back to repo root
cd ../../..
echo "✅ Webui code formatted and checked successfully"
fi
exit 0
EOF
# Create the pre-push hook
cat > "$PRE_PUSH_HOOK" << 'EOF'
#!/bin/bash
# Check if there are any webui changes that need building
WEBUI_CHANGES=$(git diff --name-only @{push}..HEAD | grep "^tools/server/webui/" || true)
if [ -n "$WEBUI_CHANGES" ]; then
echo "Webui changes detected, checking if build is up-to-date..."
# Change to webui directory
cd tools/server/webui
# Check if npm is available and package.json exists
if [ ! -f "package.json" ]; then
echo "Error: package.json not found in tools/server/webui"
# Build the webui
echo "Building webui..."
npm run build
if [ $? -ne 0 ]; then
echo "❌ npm run build failed"
exit 1
fi
# Check if build output exists and is newer than source files
BUILD_FILE="../public/index.html.gz"
NEEDS_BUILD=false
if [ ! -f "$BUILD_FILE" ]; then
echo "Build output not found, building..."
NEEDS_BUILD=true
else
# Check if any source files are newer than the build output
if find src -newer "$BUILD_FILE" -type f | head -1 | grep -q .; then
echo "Source files are newer than build output, rebuilding..."
NEEDS_BUILD=true
fi
fi
if [ "$NEEDS_BUILD" = true ]; then
echo "Building webui..."
# Stash any unstaged changes to avoid conflicts during build
echo "Checking for unstaged changes..."
if ! git diff --quiet || ! git diff --cached --quiet --diff-filter=A; then
echo "Stashing unstaged changes..."
git stash push --include-untracked -m "Pre-push hook: stashed unstaged changes"
STASH_CREATED=$?
else
echo "No unstaged changes to stash"
STASH_CREATED=1
fi
# Run the build command
npm run build
# Check if build command succeeded
if [ $? -ne 0 ]; then
echo "Error: npm run build failed"
if [ $STASH_CREATED -eq 0 ]; then
echo "You can restore your unstaged changes with: git stash pop"
fi
exit 1
fi
# Go back to repo root
cd ../../..
# Check if build output was created/updated
if [ -f "tools/server/public/index.html.gz" ]; then
# Add the build output and commit it
git add tools/server/public/index.html.gz
if ! git diff --cached --quiet; then
echo "Committing updated build output..."
git commit -m "chore: update webui build output"
echo "✅ Build output committed successfully"
else
echo "Build output unchanged"
fi
else
echo "Error: Build output not found after build"
if [ $STASH_CREATED -eq 0 ]; then
echo "You can restore your unstaged changes with: git stash pop"
fi
exit 1
fi
if [ $STASH_CREATED -eq 0 ]; then
echo "✅ Build completed. Your unstaged changes have been stashed."
echo "They will be automatically restored after the push."
# Create a marker file to indicate stash was created by pre-push hook
touch .git/WEBUI_PUSH_STASH_MARKER
fi
else
echo "✅ Build output is up-to-date"
fi
echo "✅ Webui ready for push"
# Stage the build output alongside the source changes
cd "$REPO_ROOT"
git add tools/server/public/
echo "✅ Webui built and build output staged"
fi
exit 0
EOF
# Create the post-push hook (for restoring stashed changes after push)
cat > "$REPO_ROOT/.git/hooks/post-push" << 'EOF'
#!/bin/bash
# Check if we have a stash marker from the pre-push hook
if [ -f .git/WEBUI_PUSH_STASH_MARKER ]; then
echo "Restoring your unstaged changes after push..."
git stash pop
rm -f .git/WEBUI_PUSH_STASH_MARKER
echo "✅ Your unstaged changes have been restored."
fi
exit 0
EOF
# Make all hooks executable
# Make hook executable
chmod +x "$PRE_COMMIT_HOOK"
chmod +x "$PRE_PUSH_HOOK"
chmod +x "$REPO_ROOT/.git/hooks/post-push"
if [ $? -eq 0 ]; then
echo "✅ Git hooks installed successfully!"
echo "✅ Git hook installed successfully!"
echo " Pre-commit: $PRE_COMMIT_HOOK"
echo " Pre-push: $PRE_PUSH_HOOK"
echo " Post-push: $REPO_ROOT/.git/hooks/post-push"
echo ""
echo "The hooks will automatically:"
echo " • Format and check webui code before commits (pre-commit)"
echo " • Build webui code before pushes (pre-push)"
echo " • Stash unstaged changes during build process"
echo " • Restore your unstaged changes after the push"
echo ""
echo "To test the hooks:"
echo " • Make a change to a file in the webui directory and commit it (triggers format/check)"
echo " • Push your commits to trigger the build process"
echo "The hook will automatically:"
echo " • Format, lint and check webui code before commits"
echo " • Build webui and stage tools/server/public/ into the same commit"
else
echo "❌ Failed to make hooks executable"
echo "❌ Failed to make hook executable"
exit 1
fi

View File

@@ -1,3 +1,3 @@
rm -rf ../public/_app;
rm ../public/favicon.svg;
rm ../public/index.html;
rm -f ../public/index.html.gz; # deprecated, but may still be generated by older versions of the build process

View File

@@ -40,6 +40,17 @@
--code-background: oklch(0.985 0 0);
--code-foreground: oklch(0.145 0 0);
--layer-popover: 1000000;
--chat-form-area-height: 8rem;
--chat-form-area-offset: 2rem;
--max-message-height: max(24rem, min(80dvh, calc(100dvh - var(--chat-form-area-height) - 12rem)));
}
@media (min-width: 640px) {
:root {
--chat-form-area-height: 24rem;
--chat-form-area-offset: 12rem;
}
}
.dark {
@@ -116,19 +127,6 @@
--color-sidebar-ring: var(--sidebar-ring);
}
:root {
--chat-form-area-height: 8rem;
--chat-form-area-offset: 2rem;
--max-message-height: max(24rem, min(80dvh, calc(100dvh - var(--chat-form-area-height) - 12rem)));
}
@media (min-width: 640px) {
:root {
--chat-form-area-height: 24rem;
--chat-form-area-offset: 12rem;
}
}
@layer base {
* {
@apply border-border outline-ring/50;

View File

@@ -4,7 +4,7 @@
import { getChatActionsContext, setMessageEditContext } from '$lib/contexts';
import { chatStore, pendingEditMessageId } from '$lib/stores/chat.svelte';
import { conversationsStore } from '$lib/stores/conversations.svelte';
import { DatabaseService } from '$lib/services';
import { DatabaseService } from '$lib/services/database.service';
import { SYSTEM_MESSAGE_PLACEHOLDER } from '$lib/constants';
import { MessageRole, AttachmentType } from '$lib/enums';
import {
@@ -19,6 +19,7 @@
interface Props {
class?: string;
message: DatabaseMessage;
toolMessages?: DatabaseMessage[];
isLastAssistantMessage?: boolean;
siblingInfo?: ChatMessageSiblingInfo | null;
}
@@ -26,6 +27,7 @@
let {
class: className = '',
message,
toolMessages = [],
isLastAssistantMessage = false,
siblingInfo = null
}: Props = $props();
@@ -302,6 +304,7 @@
{deletionInfo}
{isLastAssistantMessage}
{message}
{toolMessages}
messageContent={message.content}
onConfirmDelete={handleConfirmDelete}
onContinue={handleContinue}

View File

@@ -6,42 +6,42 @@
SyntaxHighlightedCode
} from '$lib/components/app';
import { config } from '$lib/stores/settings.svelte';
import { Wrench, Loader2, AlertTriangle, Brain } from '@lucide/svelte';
import { AgenticSectionType, AttachmentType, FileTypeText } from '$lib/enums';
import { Wrench, Loader2, Brain } from '@lucide/svelte';
import { AgenticSectionType, FileTypeText } from '$lib/enums';
import { formatJsonPretty } from '$lib/utils';
import { ATTACHMENT_SAVED_REGEX, NEWLINE_SEPARATOR } from '$lib/constants';
import { parseAgenticContent, type AgenticSection } from '$lib/utils';
import type { DatabaseMessage, DatabaseMessageExtraImageFile } from '$lib/types/database';
import {
deriveAgenticSections,
parseToolResultWithImages,
type AgenticSection,
type ToolResultLine
} from '$lib/utils';
import type { DatabaseMessage } from '$lib/types/database';
import type { ChatMessageAgenticTimings, ChatMessageAgenticTurnStats } from '$lib/types/chat';
import { ChatMessageStatsView } from '$lib/enums';
interface Props {
message?: DatabaseMessage;
content: string;
message: DatabaseMessage;
toolMessages?: DatabaseMessage[];
isStreaming?: boolean;
highlightTurns?: boolean;
}
type ToolResultLine = {
text: string;
image?: DatabaseMessageExtraImageFile;
};
let { content, message, isStreaming = false, highlightTurns = false }: Props = $props();
let { message, toolMessages = [], isStreaming = false, highlightTurns = false }: Props = $props();
let expandedStates: Record<number, boolean> = $state({});
const sections = $derived(parseAgenticContent(content));
const showToolCallInProgress = $derived(config().showToolCallInProgress as boolean);
const showThoughtInProgress = $derived(config().showThoughtInProgress as boolean);
// Parse toolResults with images only when sections or message.extra change
const sections = $derived(deriveAgenticSections(message, toolMessages, []));
// Parse tool results with images
const sectionsParsed = $derived(
sections.map((section) => ({
...section,
parsedLines: section.toolResult
? parseToolResultWithImages(section.toolResult, message?.extra)
: []
? parseToolResultWithImages(section.toolResult, section.toolResultExtras || message?.extra)
: ([] as ToolResultLine[])
}))
);
@@ -107,26 +107,6 @@
expandedStates[index] = !currentState;
}
function parseToolResultWithImages(
toolResult: string,
extras?: DatabaseMessage['extra']
): ToolResultLine[] {
const lines = toolResult.split(NEWLINE_SEPARATOR);
return lines.map((line) => {
const match = line.match(ATTACHMENT_SAVED_REGEX);
if (!match || !extras) return { text: line };
const attachmentName = match[1];
const image = extras.find(
(e): e is DatabaseMessageExtraImageFile =>
e.type === AttachmentType.IMAGE && e.name === attachmentName
);
return { text: line, image };
});
}
function buildTurnAgenticTimings(stats: ChatMessageAgenticTurnStats): ChatMessageAgenticTimings {
return {
turns: 1,
@@ -144,9 +124,8 @@
<MarkdownContent content={section.content} attachments={message?.extra} />
</div>
{:else if section.type === AgenticSectionType.TOOL_CALL_STREAMING}
{@const streamingIcon = isStreaming ? Loader2 : AlertTriangle}
{@const streamingIconClass = isStreaming ? 'h-4 w-4 animate-spin' : 'h-4 w-4 text-yellow-500'}
{@const streamingSubtitle = isStreaming ? '' : 'incomplete'}
{@const streamingIcon = isStreaming ? Loader2 : Loader2}
{@const streamingIconClass = isStreaming ? 'h-4 w-4 animate-spin' : 'h-4 w-4'}
<CollapsibleContentBlock
open={isExpanded(index, section)}
@@ -154,7 +133,7 @@
icon={streamingIcon}
iconClass={streamingIconClass}
title={section.toolName || 'Tool call'}
subtitle={streamingSubtitle}
subtitle={isStreaming ? '' : 'incomplete'}
{isStreaming}
onToggle={() => toggleExpanded(index, section)}
>

View File

@@ -15,7 +15,7 @@
import { Check, X } from '@lucide/svelte';
import { Button } from '$lib/components/ui/button';
import { Checkbox } from '$lib/components/ui/checkbox';
import { AGENTIC_TAGS, INPUT_CLASSES, REASONING_TAGS } from '$lib/constants';
import { INPUT_CLASSES } from '$lib/constants';
import { MessageRole, KeyboardKey, ChatMessageStatsView } from '$lib/enums';
import Label from '$lib/components/ui/label/label.svelte';
import { config } from '$lib/stores/settings.svelte';
@@ -23,6 +23,8 @@
import { modelsStore } from '$lib/stores/models.svelte';
import { ServerModelStatus } from '$lib/enums';
import { hasAgenticContent } from '$lib/utils';
interface Props {
class?: string;
deletionInfo: {
@@ -33,6 +35,7 @@
} | null;
isLastAssistantMessage?: boolean;
message: DatabaseMessage;
toolMessages?: DatabaseMessage[];
messageContent: string | undefined;
onCopy: () => void;
onConfirmDelete: () => void;
@@ -53,6 +56,7 @@
deletionInfo,
isLastAssistantMessage = false,
message,
toolMessages = [],
messageContent,
onConfirmDelete,
onContinue,
@@ -84,10 +88,8 @@
}
}
const hasAgenticMarkers = $derived(
messageContent?.includes(AGENTIC_TAGS.TOOL_CALL_START) ?? false
);
const hasReasoningMarkers = $derived(messageContent?.includes(REASONING_TAGS.START) ?? false);
const isAgentic = $derived(hasAgenticContent(message, toolMessages));
const hasReasoning = $derived(!!message.reasoningContent);
const processingState = useProcessingState();
let currentConfig = $derived(config());
@@ -145,7 +147,7 @@
}
let highlightAgenticTurns = $derived(
hasAgenticMarkers &&
isAgentic &&
(currentConfig.alwaysShowAgenticTurns || activeStatsView === ChatMessageStatsView.SUMMARY)
);
@@ -160,13 +162,14 @@
message?.role === MessageRole.ASSISTANT &&
isActivelyProcessing &&
hasNoContent &&
!isAgentic &&
isLastAssistantMessage
);
let showProcessingInfoBottom = $derived(
message?.role === MessageRole.ASSISTANT &&
isActivelyProcessing &&
!hasNoContent &&
(!hasNoContent || isAgentic) &&
isLastAssistantMessage
);
@@ -252,10 +255,10 @@
<pre class="raw-output">{messageContent || ''}</pre>
{:else}
<ChatMessageAgenticContent
content={messageContent || ''}
{message}
{toolMessages}
isStreaming={isChatStreaming()}
highlightTurns={highlightAgenticTurns}
{message}
/>
{/if}
{:else}
@@ -344,9 +347,7 @@
{onCopy}
{onEdit}
{onRegenerate}
onContinue={currentConfig.enableContinueGeneration && !hasReasoningMarkers
? onContinue
: undefined}
onContinue={currentConfig.enableContinueGeneration && !hasReasoning ? onContinue : undefined}
{onForkConversation}
{onDelete}
{onConfirmDelete}

View File

@@ -6,7 +6,12 @@
import { chatStore } from '$lib/stores/chat.svelte';
import { conversationsStore, activeConversation } from '$lib/stores/conversations.svelte';
import { config } from '$lib/stores/settings.svelte';
import { copyToClipboard, formatMessageForClipboard, getMessageSiblings } from '$lib/utils';
import {
copyToClipboard,
formatMessageForClipboard,
getMessageSiblings,
hasAgenticContent
} from '$lib/utils';
interface Props {
class?: string;
@@ -119,32 +124,75 @@
? messages
: messages.filter((msg) => msg.type !== MessageRole.SYSTEM);
let lastAssistantIndex = -1;
// Build display entries, grouping agentic sessions into single entries.
// An agentic session = assistant(with tool_calls) → tool → assistant → tool → ... → assistant(final)
const result: Array<{
message: DatabaseMessage;
toolMessages: DatabaseMessage[];
isLastAssistantMessage: boolean;
siblingInfo: ChatMessageSiblingInfo;
}> = [];
for (let i = filteredMessages.length - 1; i >= 0; i--) {
if (filteredMessages[i].role === MessageRole.ASSISTANT) {
lastAssistantIndex = i;
for (let i = 0; i < filteredMessages.length; i++) {
const msg = filteredMessages[i];
// Skip tool messages - they're grouped with preceding assistant
if (msg.role === MessageRole.TOOL) continue;
const toolMessages: DatabaseMessage[] = [];
if (msg.role === MessageRole.ASSISTANT && hasAgenticContent(msg)) {
let j = i + 1;
while (j < filteredMessages.length) {
const next = filteredMessages[j];
if (next.role === MessageRole.TOOL) {
toolMessages.push(next);
j++;
} else if (next.role === MessageRole.ASSISTANT) {
toolMessages.push(next);
j++;
} else {
break;
}
}
i = j - 1;
} else if (msg.role === MessageRole.ASSISTANT) {
let j = i + 1;
while (j < filteredMessages.length && filteredMessages[j].role === MessageRole.TOOL) {
toolMessages.push(filteredMessages[j]);
j++;
}
}
const siblingInfo = getMessageSiblings(allConversationMessages, msg.id);
result.push({
message: msg,
toolMessages,
isLastAssistantMessage: false,
siblingInfo: siblingInfo || {
message: msg,
siblingIds: [msg.id],
currentIndex: 0,
totalSiblings: 1
}
});
}
// Mark the last assistant message
for (let i = result.length - 1; i >= 0; i--) {
if (result[i].message.role === MessageRole.ASSISTANT) {
result[i].isLastAssistantMessage = true;
break;
}
}
return filteredMessages.map((message, index) => {
const siblingInfo = getMessageSiblings(allConversationMessages, message.id);
const isLastAssistantMessage =
message.role === MessageRole.ASSISTANT && index === lastAssistantIndex;
return {
message,
isLastAssistantMessage,
siblingInfo: siblingInfo || {
message,
siblingIds: [message.id],
currentIndex: 0,
totalSiblings: 1
}
};
});
return result;
});
</script>
@@ -152,11 +200,12 @@
class="flex h-full flex-col space-y-10 pt-24 {className}"
style="height: auto; min-height: calc(100dvh - 14rem);"
>
{#each displayMessages as { message, isLastAssistantMessage, siblingInfo } (message.id)}
{#each displayMessages as { message, toolMessages, isLastAssistantMessage, siblingInfo } (message.id)}
<div use:fadeInView>
<ChatMessage
class="mx-auto w-full max-w-[48rem]"
{message}
{toolMessages}
{isLastAssistantMessage}
{siblingInfo}
/>

View File

@@ -425,21 +425,16 @@ export { default as ChatMessage } from './ChatMessages/ChatMessage.svelte';
/**
* **ChatMessageAgenticContent** - Agentic workflow output display
*
* Specialized renderer for assistant messages containing agentic workflow markers.
* Parses structured content and displays tool calls and reasoning blocks as
* interactive collapsible sections with real-time streaming support.
* Specialized renderer for assistant messages with tool calls and reasoning.
* Derives display sections from structured message data (toolCalls, reasoningContent,
* and child tool result messages) and renders them as interactive collapsible sections.
*
* **Architecture:**
* - Uses `parseAgenticContent()` from `$lib/utils` to parse markers
* - Uses `deriveAgenticSections()` from `$lib/utils` to build sections from structured data
* - Renders sections as CollapsibleContentBlock components
* - Handles streaming state for progressive content display
* - Falls back to MarkdownContent for plain text sections
*
* **Marker Format:**
* - Tool calls: in constants/agentic.ts (AGENTIC_TAGS)
* - Reasoning: in constants/agentic.ts (REASONING_TAGS)
* - Partial markers handled gracefully during streaming
*
* **Execution States:**
* - **Streaming**: Animated spinner, block expanded, auto-scroll enabled
* - **Pending**: Waiting indicator for queued tool calls

View File

@@ -15,8 +15,11 @@ export const DEFAULT_AGENTIC_CONFIG: AgenticConfig = {
maxToolPreviewLines: 25
} as const;
// Agentic tool call tag markers
export const AGENTIC_TAGS = {
/**
* @deprecated Legacy marker tags - only used for migration of old stored messages.
* New messages use structured fields (reasoningContent, toolCalls, toolCallId).
*/
export const LEGACY_AGENTIC_TAGS = {
TOOL_CALL_START: '<<<AGENTIC_TOOL_CALL_START>>>',
TOOL_CALL_END: '<<<AGENTIC_TOOL_CALL_END>>>',
TOOL_NAME_PREFIX: '<<<TOOL_NAME:',
@@ -25,39 +28,25 @@ export const AGENTIC_TAGS = {
TAG_SUFFIX: '>>>'
} as const;
export const REASONING_TAGS = {
/**
* @deprecated Legacy reasoning tags - only used for migration of old stored messages.
* New messages use the dedicated reasoningContent field.
*/
export const LEGACY_REASONING_TAGS = {
START: '<<<reasoning_content_start>>>',
END: '<<<reasoning_content_end>>>'
} as const;
// Regex for trimming leading/trailing newlines
export const TRIM_NEWLINES_REGEX = /^\n+|\n+$/g;
// Regex patterns for parsing agentic content
export const AGENTIC_REGEX = {
// Matches completed tool calls (with END marker)
/**
* @deprecated Legacy regex patterns - only used for migration of old stored messages.
*/
export const LEGACY_AGENTIC_REGEX = {
COMPLETED_TOOL_CALL:
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*?)<<<AGENTIC_TOOL_CALL_END>>>/g,
// Matches pending tool call (has NAME and ARGS but no END)
PENDING_TOOL_CALL:
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*)$/,
// Matches partial tool call (has START and NAME, ARGS still streaming)
PARTIAL_WITH_NAME:
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*)$/,
// Matches early tool call (just START marker)
EARLY_MATCH: /<<<AGENTIC_TOOL_CALL_START>>>([\s\S]*)$/,
// Matches partial marker at end of content
PARTIAL_MARKER: /<<<[A-Za-z_]*$/,
// Matches reasoning content blocks (including tags)
REASONING_BLOCK: /<<<reasoning_content_start>>>[\s\S]*?<<<reasoning_content_end>>>/g,
// Captures the reasoning text between start/end tags
REASONING_EXTRACT: /<<<reasoning_content_start>>>([\s\S]*?)<<<reasoning_content_end>>>/,
// Matches an opening reasoning tag and any remaining content (unterminated)
REASONING_OPEN: /<<<reasoning_content_start>>>[\s\S]*$/,
// Matches a complete agentic tool call display block (start to end marker)
AGENTIC_TOOL_CALL_BLOCK: /\n*<<<AGENTIC_TOOL_CALL_START>>>[\s\S]*?<<<AGENTIC_TOOL_CALL_END>>>/g,
// Matches a pending/partial agentic tool call (start marker with no matching end)
AGENTIC_TOOL_CALL_OPEN: /\n*<<<AGENTIC_TOOL_CALL_START>>>[\s\S]*$/,
// Matches tool name inside content
TOOL_NAME_EXTRACT: /<<<TOOL_NAME:([^>]+)>>>/
HAS_LEGACY_MARKERS: /<<<(?:AGENTIC_TOOL_CALL_START|reasoning_content_start)>>>/
} as const;

View File

@@ -1,6 +1,7 @@
import { getJsonHeaders, formatAttachmentText, isAbortError } from '$lib/utils';
import { getJsonHeaders } from '$lib/utils/api-headers';
import { formatAttachmentText } from '$lib/utils/formatters';
import { isAbortError } from '$lib/utils/abort';
import {
AGENTIC_REGEX,
ATTACHMENT_LABEL_PDF_FILE,
ATTACHMENT_LABEL_MCP_PROMPT,
ATTACHMENT_LABEL_MCP_RESOURCE
@@ -17,38 +18,6 @@ import type { DatabaseMessageExtraMcpPrompt, DatabaseMessageExtraMcpResource } f
import { modelsStore } from '$lib/stores/models.svelte';
export class ChatService {
private static stripReasoningContent(
content: ApiChatMessageData['content'] | null | undefined
): ApiChatMessageData['content'] | null | undefined {
if (!content) {
return content;
}
if (typeof content === 'string') {
return content
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
}
if (!Array.isArray(content)) {
return content;
}
return content.map((part: ApiChatMessageContentPart) => {
if (part.type !== ContentPartType.TEXT || !part.text) return part;
return {
...part,
text: part.text
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
};
});
}
/**
*
*
@@ -57,46 +26,6 @@ export class ChatService {
*
*/
/**
* Extracts reasoning text from content that contains internal reasoning tags.
* Returns the concatenated reasoning content or undefined if none found.
*/
private static extractReasoningFromContent(
content: ApiChatMessageData['content'] | null | undefined
): string | undefined {
if (!content) return undefined;
const extractFromString = (text: string): string => {
const parts: string[] = [];
// Use a fresh regex instance to avoid shared lastIndex state
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
let match = re.exec(text);
while (match) {
parts.push(match[1]);
// advance past the matched portion and retry
text = text.slice(match.index + match[0].length);
match = re.exec(text);
}
return parts.join('');
};
if (typeof content === 'string') {
const result = extractFromString(content);
return result || undefined;
}
if (!Array.isArray(content)) return undefined;
const parts: string[] = [];
for (const part of content) {
if (part.type === ContentPartType.TEXT && part.text) {
const result = extractFromString(part.text);
if (result) parts.push(result);
}
}
return parts.length > 0 ? parts.join('') : undefined;
}
/**
* Sends a chat completion request to the llama.cpp server.
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
@@ -201,20 +130,15 @@ export class ChatService {
const requestBody: ApiChatCompletionRequest = {
messages: normalizedMessages.map((msg: ApiChatMessageData) => {
// Always strip internal reasoning/agentic tags from content
const cleanedContent = ChatService.stripReasoningContent(msg.content);
const mapped: ApiChatCompletionRequest['messages'][0] = {
role: msg.role,
content: cleanedContent,
content: msg.content,
tool_calls: msg.tool_calls,
tool_call_id: msg.tool_call_id
};
// When preserving reasoning, extract it from raw content and send as separate field
if (!excludeReasoningFromContext) {
const reasoning = ChatService.extractReasoningFromContent(msg.content);
if (reasoning) {
mapped.reasoning_content = reasoning;
}
// Include reasoning_content from the dedicated field
if (!excludeReasoningFromContext && msg.reasoning_content) {
mapped.reasoning_content = msg.reasoning_content;
}
return mapped;
}),
@@ -730,6 +654,10 @@ export class ChatService {
content: message.content
};
if (message.reasoningContent) {
result.reasoning_content = message.reasoningContent;
}
if (toolCalls && toolCalls.length > 0) {
result.tool_calls = toolCalls;
}
@@ -858,6 +786,9 @@ export class ChatService {
role: message.role as MessageRole,
content: contentParts
};
if (message.reasoningContent) {
result.reasoning_content = message.reasoningContent;
}
if (toolCalls && toolCalls.length > 0) {
result.tool_calls = toolCalls;
}

View File

@@ -42,6 +42,7 @@ import type {
import {
buildProxiedUrl,
buildProxiedHeaders,
getAuthHeaders,
throwIfAborted,
isAbortError,
createBase64DataUrl
@@ -124,7 +125,14 @@ export class MCPService {
const requestInit: RequestInit = {};
if (config.headers) {
requestInit.headers = buildProxiedHeaders(config.headers);
requestInit.headers = config.useProxy ? buildProxiedHeaders(config.headers) : config.headers;
}
if (useProxy) {
requestInit.headers = {
...getAuthHeaders(),
...(requestInit.headers as Record<string, string>)
};
}
if (config.credentials) {

View File

@@ -7,6 +7,10 @@
* - Session state management
* - Turn limit enforcement
*
* Each agentic turn produces separate DB messages:
* - One assistant message per LLM turn (with tool_calls if any)
* - One tool result message per tool call execution
*
* **Architecture & Relationships:**
* - **ChatService**: Stateless API layer (sendMessage, streaming)
* - **mcpStore**: MCP connection management and tool execution
@@ -16,7 +20,6 @@
* @see mcpStore in stores/mcp.svelte.ts for MCP operations
*/
import { SvelteMap } from 'svelte/reactivity';
import { ChatService } from '$lib/services';
import { config } from '$lib/stores/settings.svelte';
import { mcpStore } from '$lib/stores/mcp.svelte';
@@ -24,7 +27,6 @@ import { modelsStore } from '$lib/stores/models.svelte';
import { isAbortError } from '$lib/utils';
import {
DEFAULT_AGENTIC_CONFIG,
AGENTIC_TAGS,
NEWLINE_SEPARATOR,
TURN_LIMIT_MESSAGE,
LLM_ERROR_BLOCK_START,
@@ -193,17 +195,6 @@ class AgenticStore {
async runAgenticFlow(params: AgenticFlowParams): Promise<AgenticFlowResult> {
const { conversationId, messages, options = {}, callbacks, signal, perChatOverrides } = params;
const {
onChunk,
onReasoningChunk,
onToolCallChunk,
onAttachments,
onModel,
onComplete,
onError,
onTimings,
onTurnComplete
} = callbacks;
const agenticConfig = this.getConfig(config(), perChatOverrides);
if (!agenticConfig.enabled) return { handled: false };
@@ -253,24 +244,14 @@ class AgenticStore {
options,
tools,
agenticConfig,
callbacks: {
onChunk,
onReasoningChunk,
onToolCallChunk,
onAttachments,
onModel,
onComplete,
onError,
onTimings,
onTurnComplete
},
callbacks,
signal
});
return { handled: true };
} catch (error) {
const normalizedError = error instanceof Error ? error : new Error(String(error));
this.updateSession(conversationId, { lastError: normalizedError });
onError?.(normalizedError);
callbacks.onError?.(normalizedError);
return { handled: true, error: normalizedError };
} finally {
this.updateSession(conversationId, { isRunning: false });
@@ -295,17 +276,20 @@ class AgenticStore {
const {
onChunk,
onReasoningChunk,
onToolCallChunk,
onToolCallsStreaming,
onAttachments,
onModel,
onComplete,
onAssistantTurnComplete,
createToolResultMessage,
createAssistantMessage,
onFlowComplete,
onTimings,
onTurnComplete
} = callbacks;
const sessionMessages: AgenticMessage[] = toAgenticMessages(messages);
const allToolCalls: ApiChatCompletionToolCall[] = [];
let capturedTimings: ChatMessageTimings | undefined;
let totalToolCallCount = 0;
const agenticTimings: ChatMessageAgenticTimings = {
turns: 0,
@@ -316,12 +300,7 @@ class AgenticStore {
llm: { predicted_n: 0, predicted_ms: 0, prompt_n: 0, prompt_ms: 0 }
};
const maxTurns = agenticConfig.maxTurns;
const maxToolPreviewLines = agenticConfig.maxToolPreviewLines;
// Resolve effective model for vision capability checks.
// In ROUTER mode, options.model is always set by the caller.
// In MODEL mode, options.model is undefined; use the single loaded model
// which carries modalities bridged from /props.
const effectiveModel = options.model || modelsStore.models[0]?.model || '';
for (let turn = 0; turn < maxTurns; turn++) {
@@ -329,23 +308,20 @@ class AgenticStore {
agenticTimings.turns = turn + 1;
if (signal?.aborted) {
onComplete?.(
'',
undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
return;
}
// For turns > 0, create a new assistant message via callback
if (turn > 0 && createAssistantMessage) {
await createAssistantMessage();
}
let turnContent = '';
let turnReasoningContent = '';
let turnToolCalls: ApiChatCompletionToolCall[] = [];
let lastStreamingToolCallName = '';
let lastStreamingToolCallArgsLength = 0;
const emittedToolCallStates = new SvelteMap<
number,
{ emittedOnce: boolean; lastArgs: string }
>();
let turnTimings: ChatMessageTimings | undefined;
const turnStats: ChatMessageAgenticTurnStats = {
@@ -366,30 +342,15 @@ class AgenticStore {
turnContent += chunk;
onChunk?.(chunk);
},
onReasoningChunk,
onReasoningChunk: (chunk: string) => {
turnReasoningContent += chunk;
onReasoningChunk?.(chunk);
},
onToolCallChunk: (serialized: string) => {
try {
turnToolCalls = JSON.parse(serialized) as ApiChatCompletionToolCall[];
for (let i = 0; i < turnToolCalls.length; i++) {
const toolCall = turnToolCalls[i];
const toolName = toolCall.function?.name ?? '';
const toolArgs = toolCall.function?.arguments ?? '';
const state = emittedToolCallStates.get(i) || {
emittedOnce: false,
lastArgs: ''
};
if (!state.emittedOnce) {
const output = `\n\n${AGENTIC_TAGS.TOOL_CALL_START}\n${AGENTIC_TAGS.TOOL_NAME_PREFIX}${toolName}${AGENTIC_TAGS.TAG_SUFFIX}\n${AGENTIC_TAGS.TOOL_ARGS_START}\n${toolArgs}`;
onChunk?.(output);
state.emittedOnce = true;
state.lastArgs = toolArgs;
emittedToolCallStates.set(i, state);
} else if (toolArgs.length > state.lastArgs.length) {
onChunk?.(toolArgs.slice(state.lastArgs.length));
state.lastArgs = toolArgs;
emittedToolCallStates.set(i, state);
}
}
onToolCallsStreaming?.(turnToolCalls);
if (turnToolCalls.length > 0 && turnToolCalls[0]?.function) {
const name = turnToolCalls[0].function.name || '';
const args = turnToolCalls[0].function.arguments || '';
@@ -442,77 +403,84 @@ class AgenticStore {
}
} catch (error) {
if (signal?.aborted) {
onComplete?.(
'',
undefined,
// Save whatever we have for this turn before exiting
await onAssistantTurnComplete?.(
turnContent,
turnReasoningContent || undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
return;
}
const normalizedError = error instanceof Error ? error : new Error('LLM stream error');
// Save error as content in the current turn
onChunk?.(`${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`);
onComplete?.(
'',
undefined,
await onAssistantTurnComplete?.(
turnContent + `${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`,
turnReasoningContent || undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
throw normalizedError;
}
// No tool calls = final turn, save and complete
if (turnToolCalls.length === 0) {
agenticTimings.perTurn!.push(turnStats);
onComplete?.(
'',
undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
const finalTimings = this.buildFinalTimings(capturedTimings, agenticTimings);
await onAssistantTurnComplete?.(
turnContent,
turnReasoningContent || undefined,
finalTimings,
undefined
);
if (finalTimings) onTurnComplete?.(finalTimings);
onFlowComplete?.(finalTimings);
return;
}
// Normalize and save assistant turn with tool calls
const normalizedCalls = this.normalizeToolCalls(turnToolCalls);
if (normalizedCalls.length === 0) {
onComplete?.(
'',
undefined,
await onAssistantTurnComplete?.(
turnContent,
turnReasoningContent || undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
return;
}
for (const call of normalizedCalls) {
allToolCalls.push({
id: call.id,
type: call.type,
function: call.function ? { ...call.function } : undefined
});
}
totalToolCallCount += normalizedCalls.length;
this.updateSession(conversationId, { totalToolCalls: totalToolCallCount });
this.updateSession(conversationId, { totalToolCalls: allToolCalls.length });
onToolCallChunk?.(JSON.stringify(allToolCalls));
// Save the assistant message with its tool calls
await onAssistantTurnComplete?.(
turnContent,
turnReasoningContent || undefined,
turnTimings,
normalizedCalls
);
// Add assistant message to session history
sessionMessages.push({
role: MessageRole.ASSISTANT,
content: turnContent || undefined,
tool_calls: normalizedCalls
});
// Execute each tool call and create result messages
for (const toolCall of normalizedCalls) {
if (signal?.aborted) {
onComplete?.(
'',
undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
return;
}
@@ -530,13 +498,7 @@ class AgenticStore {
result = executionResult.content;
} catch (error) {
if (isAbortError(error)) {
onComplete?.(
'',
undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
return;
}
result = `Error: ${error instanceof Error ? error.message : String(error)}`;
@@ -557,21 +519,27 @@ class AgenticStore {
turnStats.toolsMs += Math.round(toolDurationMs);
if (signal?.aborted) {
onComplete?.(
'',
undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
return;
}
const { cleanedResult, attachments } = this.extractBase64Attachments(result);
if (attachments.length > 0) onAttachments?.(attachments);
this.emitToolCallResult(cleanedResult, maxToolPreviewLines, onChunk);
// Create the tool result message in the DB
let toolResultMessage: DatabaseMessage | undefined;
if (createToolResultMessage) {
toolResultMessage = await createToolResultMessage(
toolCall.id,
cleanedResult,
attachments.length > 0 ? attachments : undefined
);
}
if (attachments.length > 0 && toolResultMessage) {
onAttachments?.(toolResultMessage.id, attachments);
}
// Build content parts for session history (including images for vision models)
const contentParts: ApiChatMessageContentPart[] = [
{ type: ContentPartType.TEXT, text: cleanedResult }
];
@@ -605,8 +573,15 @@ class AgenticStore {
}
}
// Turn limit reached
onChunk?.(TURN_LIMIT_MESSAGE);
onComplete?.('', undefined, this.buildFinalTimings(capturedTimings, agenticTimings), undefined);
await onAssistantTurnComplete?.(
TURN_LIMIT_MESSAGE,
undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
);
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
}
private buildFinalTimings(
@@ -633,23 +608,6 @@ class AgenticStore {
}));
}
private emitToolCallResult(
result: string,
maxLines: number,
emit?: (chunk: string) => void
): void {
if (!emit) {
return;
}
let output = `${NEWLINE_SEPARATOR}${AGENTIC_TAGS.TOOL_ARGS_END}`;
const lines = result.split(NEWLINE_SEPARATOR);
const trimmedLines = lines.length > maxLines ? lines.slice(-maxLines) : lines;
output += `${NEWLINE_SEPARATOR}${trimmedLines.join(NEWLINE_SEPARATOR)}${NEWLINE_SEPARATOR}${AGENTIC_TAGS.TOOL_CALL_END}${NEWLINE_SEPARATOR}`;
emit(output);
}
private extractBase64Attachments(result: string): {
cleanedResult: string;
attachments: DatabaseMessageExtra[];

View File

@@ -12,7 +12,8 @@
*/
import { SvelteMap } from 'svelte/reactivity';
import { DatabaseService, ChatService } from '$lib/services';
import { DatabaseService } from '$lib/services/database.service';
import { ChatService } from '$lib/services/chat.service';
import { conversationsStore } from '$lib/stores/conversations.svelte';
import { config } from '$lib/stores/settings.svelte';
import { agenticStore } from '$lib/stores/agentic.svelte';
@@ -34,7 +35,6 @@ import {
import {
MAX_INACTIVE_CONVERSATION_STATES,
INACTIVE_CONVERSATION_STATE_MAX_AGE_MS,
REASONING_TAGS,
SYSTEM_MESSAGE_PLACEHOLDER
} from '$lib/constants';
import type {
@@ -50,15 +50,6 @@ interface ConversationStateEntry {
lastAccessed: number;
}
const countOccurrences = (source: string, token: string): number =>
source ? source.split(token).length - 1 : 0;
const hasUnclosedReasoningTag = (content: string): boolean =>
countOccurrences(content, REASONING_TAGS.START) > countOccurrences(content, REASONING_TAGS.END);
const wrapReasoningContent = (content: string, reasoningContent?: string): string => {
if (!reasoningContent) return content;
return `${REASONING_TAGS.START}${reasoningContent}${REASONING_TAGS.END}${content}`;
};
class ChatStore {
activeProcessingState = $state<ApiProcessingState | null>(null);
currentResponse = $state('');
@@ -557,83 +548,76 @@ class ChatStore {
await modelsStore.fetchModelProps(effectiveModel);
}
let streamedContent = '',
streamedToolCallContent = '',
isReasoningOpen = false,
hasStreamedChunks = false,
resolvedModel: string | null = null,
modelPersisted = false;
let streamedExtras: DatabaseMessageExtra[] = assistantMessage.extra
? JSON.parse(JSON.stringify(assistantMessage.extra))
: [];
// Mutable state for the current message being streamed
let currentMessageId = assistantMessage.id;
let streamedContent = '';
let streamedReasoningContent = '';
let resolvedModel: string | null = null;
let modelPersisted = false;
const convId = assistantMessage.convId;
const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
if (!modelName) return;
const n = normalizeModelName(modelName);
if (!n || n === resolvedModel) return;
resolvedModel = n;
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
const idx = conversationsStore.findMessageIndex(currentMessageId);
conversationsStore.updateMessageAtIndex(idx, { model: n });
if (persistImmediately && !modelPersisted) {
modelPersisted = true;
DatabaseService.updateMessage(assistantMessage.id, { model: n }).catch(() => {
DatabaseService.updateMessage(currentMessageId, { model: n }).catch(() => {
modelPersisted = false;
resolvedModel = null;
});
}
};
const updateStreamingContent = () => {
this.setChatStreaming(assistantMessage.convId, streamedContent, assistantMessage.id);
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
const updateStreamingUI = () => {
this.setChatStreaming(convId, streamedContent, currentMessageId);
const idx = conversationsStore.findMessageIndex(currentMessageId);
conversationsStore.updateMessageAtIndex(idx, { content: streamedContent });
};
const appendContentChunk = (chunk: string) => {
if (isReasoningOpen) {
streamedContent += REASONING_TAGS.END;
isReasoningOpen = false;
}
streamedContent += chunk;
hasStreamedChunks = true;
updateStreamingContent();
};
const appendReasoningChunk = (chunk: string) => {
if (!isReasoningOpen) {
streamedContent += REASONING_TAGS.START;
isReasoningOpen = true;
}
streamedContent += chunk;
hasStreamedChunks = true;
updateStreamingContent();
};
const finalizeReasoning = () => {
if (isReasoningOpen) {
streamedContent += REASONING_TAGS.END;
isReasoningOpen = false;
}
const cleanupStreamingState = () => {
this.setStreamingActive(false);
this.setChatLoading(convId, false);
this.clearChatStreaming(convId);
this.setProcessingState(convId, null);
};
this.setStreamingActive(true);
this.setActiveProcessingConversation(assistantMessage.convId);
const abortController = this.getOrCreateAbortController(assistantMessage.convId);
this.setActiveProcessingConversation(convId);
const abortController = this.getOrCreateAbortController(convId);
const streamCallbacks: ChatStreamCallbacks = {
onChunk: (chunk: string) => appendContentChunk(chunk),
onReasoningChunk: (chunk: string) => appendReasoningChunk(chunk),
onToolCallChunk: (chunk: string) => {
const c = chunk.trim();
if (!c) return;
streamedToolCallContent = c;
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
onChunk: (chunk: string) => {
streamedContent += chunk;
updateStreamingUI();
},
onAttachments: (extras: DatabaseMessageExtra[]) => {
onReasoningChunk: (chunk: string) => {
streamedReasoningContent += chunk;
// Update UI to show reasoning is being received
const idx = conversationsStore.findMessageIndex(currentMessageId);
conversationsStore.updateMessageAtIndex(idx, {
reasoningContent: streamedReasoningContent
});
},
onToolCallsStreaming: (toolCalls) => {
const idx = conversationsStore.findMessageIndex(currentMessageId);
conversationsStore.updateMessageAtIndex(idx, { toolCalls: JSON.stringify(toolCalls) });
},
onAttachments: (messageId: string, extras: DatabaseMessageExtra[]) => {
if (!extras.length) return;
streamedExtras = [...streamedExtras, ...extras];
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
conversationsStore.updateMessageAtIndex(idx, { extra: streamedExtras });
DatabaseService.updateMessage(assistantMessage.id, { extra: streamedExtras }).catch(
console.error
);
const idx = conversationsStore.findMessageIndex(messageId);
if (idx === -1) return;
const msg = conversationsStore.activeMessages[idx];
const updatedExtras = [...(msg.extra || []), ...extras];
conversationsStore.updateMessageAtIndex(idx, { extra: updatedExtras });
DatabaseService.updateMessage(messageId, { extra: updatedExtras }).catch(console.error);
},
onModel: (modelName: string) => recordModel(modelName),
onTurnComplete: (intermediateTimings: ChatMessageTimings) => {
// Update the first assistant message with cumulative agentic timings
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
conversationsStore.updateMessageAtIndex(idx, { timings: intermediateTimings });
},
@@ -651,56 +635,104 @@ class ChatStore {
cache_n: timings?.cache_n || 0,
prompt_progress: promptProgress
},
assistantMessage.convId
convId
);
},
onComplete: async (
finalContent?: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCallContent?: string
onAssistantTurnComplete: async (
content: string,
reasoningContent: string | undefined,
timings: ChatMessageTimings | undefined,
toolCalls: import('$lib/types/api').ApiChatCompletionToolCall[] | undefined
) => {
this.setStreamingActive(false);
finalizeReasoning();
const combinedContent = hasStreamedChunks
? streamedContent
: wrapReasoningContent(finalContent || '', reasoningContent);
const updateData: Record<string, unknown> = {
content: combinedContent,
toolCalls: toolCallContent || streamedToolCallContent,
content,
reasoningContent: reasoningContent || undefined,
toolCalls: toolCalls ? JSON.stringify(toolCalls) : '',
timings
};
if (streamedExtras.length > 0) updateData.extra = streamedExtras;
if (resolvedModel && !modelPersisted) updateData.model = resolvedModel;
await DatabaseService.updateMessage(assistantMessage.id, updateData);
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
await DatabaseService.updateMessage(currentMessageId, updateData);
const idx = conversationsStore.findMessageIndex(currentMessageId);
const uiUpdate: Partial<DatabaseMessage> = {
content: combinedContent,
toolCalls: updateData.toolCalls as string
content,
reasoningContent: reasoningContent || undefined,
toolCalls: toolCalls ? JSON.stringify(toolCalls) : ''
};
if (streamedExtras.length > 0) uiUpdate.extra = streamedExtras;
if (timings) uiUpdate.timings = timings;
if (resolvedModel) uiUpdate.model = resolvedModel;
conversationsStore.updateMessageAtIndex(idx, uiUpdate);
await conversationsStore.updateCurrentNode(assistantMessage.id);
if (onComplete) await onComplete(combinedContent);
this.setChatLoading(assistantMessage.convId, false);
this.clearChatStreaming(assistantMessage.convId);
this.setProcessingState(assistantMessage.convId, null);
await conversationsStore.updateCurrentNode(currentMessageId);
},
createToolResultMessage: async (
toolCallId: string,
content: string,
extras?: DatabaseMessageExtra[]
) => {
const msg = await DatabaseService.createMessageBranch(
{
convId,
type: MessageType.TEXT,
role: MessageRole.TOOL,
content,
toolCallId,
timestamp: Date.now(),
toolCalls: '',
children: [],
extra: extras
},
currentMessageId
);
conversationsStore.addMessageToActive(msg);
await conversationsStore.updateCurrentNode(msg.id);
return msg;
},
createAssistantMessage: async () => {
// Reset streaming state for new message
streamedContent = '';
streamedReasoningContent = '';
const lastMsg =
conversationsStore.activeMessages[conversationsStore.activeMessages.length - 1];
const msg = await DatabaseService.createMessageBranch(
{
convId,
type: MessageType.TEXT,
role: MessageRole.ASSISTANT,
content: '',
timestamp: Date.now(),
toolCalls: '',
children: [],
model: resolvedModel
},
lastMsg.id
);
conversationsStore.addMessageToActive(msg);
currentMessageId = msg.id;
return msg;
},
onFlowComplete: (finalTimings?: ChatMessageTimings) => {
if (finalTimings) {
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
conversationsStore.updateMessageAtIndex(idx, { timings: finalTimings });
DatabaseService.updateMessage(assistantMessage.id, { timings: finalTimings }).catch(
console.error
);
}
cleanupStreamingState();
if (onComplete) onComplete(streamedContent);
if (isRouterMode()) modelsStore.fetchRouterModels().catch(console.error);
},
onError: (error: Error) => {
this.setStreamingActive(false);
if (isAbortError(error)) {
this.setChatLoading(assistantMessage.convId, false);
this.clearChatStreaming(assistantMessage.convId);
this.setProcessingState(assistantMessage.convId, null);
cleanupStreamingState();
return;
}
console.error('Streaming error:', error);
this.setChatLoading(assistantMessage.convId, false);
this.clearChatStreaming(assistantMessage.convId);
this.setProcessingState(assistantMessage.convId, null);
cleanupStreamingState();
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
if (idx !== -1) {
const failedMessage = conversationsStore.removeMessageAtIndex(idx);
@@ -717,12 +749,13 @@ class ChatStore {
if (onError) onError(error);
}
};
const perChatOverrides = conversationsStore.activeConversation?.mcpServerOverrides;
const agenticConfig = agenticStore.getConfig(config(), perChatOverrides);
if (agenticConfig.enabled) {
const agenticResult = await agenticStore.runAgenticFlow({
conversationId: assistantMessage.convId,
conversationId: convId,
messages: allMessages,
options: { ...this.getApiOptions(), ...(effectiveModel ? { model: effectiveModel } : {}) },
callbacks: streamCallbacks,
@@ -732,16 +765,50 @@ class ChatStore {
if (agenticResult.handled) return;
}
const completionOptions = {
...this.getApiOptions(),
...(effectiveModel ? { model: effectiveModel } : {}),
...streamCallbacks
};
// Non-agentic path: direct streaming into the single assistant message
await ChatService.sendMessage(
allMessages,
completionOptions,
assistantMessage.convId,
{
...this.getApiOptions(),
...(effectiveModel ? { model: effectiveModel } : {}),
stream: true,
onChunk: streamCallbacks.onChunk,
onReasoningChunk: streamCallbacks.onReasoningChunk,
onModel: streamCallbacks.onModel,
onTimings: streamCallbacks.onTimings,
onComplete: async (
finalContent?: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCalls?: string
) => {
const content = streamedContent || finalContent || '';
const reasoning = streamedReasoningContent || reasoningContent;
const updateData: Record<string, unknown> = {
content,
reasoningContent: reasoning || undefined,
toolCalls: toolCalls || '',
timings
};
if (resolvedModel && !modelPersisted) updateData.model = resolvedModel;
await DatabaseService.updateMessage(currentMessageId, updateData);
const idx = conversationsStore.findMessageIndex(currentMessageId);
const uiUpdate: Partial<DatabaseMessage> = {
content,
reasoningContent: reasoning || undefined,
toolCalls: toolCalls || ''
};
if (timings) uiUpdate.timings = timings;
if (resolvedModel) uiUpdate.model = resolvedModel;
conversationsStore.updateMessageAtIndex(idx, uiUpdate);
await conversationsStore.updateCurrentNode(currentMessageId);
cleanupStreamingState();
if (onComplete) await onComplete(content);
if (isRouterMode()) modelsStore.fetchRouterModels().catch(console.error);
},
onError: streamCallbacks.onError
},
convId,
abortController.signal
);
}
@@ -1033,56 +1100,40 @@ class ChatStore {
}
const originalContent = dbMessage.content;
const originalReasoning = dbMessage.reasoningContent || '';
const conversationContext = conversationsStore.activeMessages.slice(0, idx);
const contextWithContinue = [
...conversationContext,
{ role: MessageRole.ASSISTANT as const, content: originalContent }
];
let appendedContent = '',
hasReceivedContent = false,
isReasoningOpen = hasUnclosedReasoningTag(originalContent);
let appendedContent = '';
let appendedReasoning = '';
let hasReceivedContent = false;
const updateStreamingContent = (fullContent: string) => {
this.setChatStreaming(msg.convId, fullContent, msg.id);
conversationsStore.updateMessageAtIndex(idx, { content: fullContent });
};
const appendContentChunk = (chunk: string) => {
if (isReasoningOpen) {
appendedContent += REASONING_TAGS.END;
isReasoningOpen = false;
}
appendedContent += chunk;
hasReceivedContent = true;
updateStreamingContent(originalContent + appendedContent);
};
const appendReasoningChunk = (chunk: string) => {
if (!isReasoningOpen) {
appendedContent += REASONING_TAGS.START;
isReasoningOpen = true;
}
appendedContent += chunk;
hasReceivedContent = true;
updateStreamingContent(originalContent + appendedContent);
};
const finalizeReasoning = () => {
if (isReasoningOpen) {
appendedContent += REASONING_TAGS.END;
isReasoningOpen = false;
}
};
const abortController = this.getOrCreateAbortController(msg.convId);
await ChatService.sendMessage(
contextWithContinue,
{
...this.getApiOptions(),
onChunk: (chunk: string) => appendContentChunk(chunk),
onReasoningChunk: (chunk: string) => appendReasoningChunk(chunk),
onChunk: (chunk: string) => {
appendedContent += chunk;
hasReceivedContent = true;
updateStreamingContent(originalContent + appendedContent);
},
onReasoningChunk: (chunk: string) => {
appendedReasoning += chunk;
hasReceivedContent = true;
conversationsStore.updateMessageAtIndex(idx, {
reasoningContent: originalReasoning + appendedReasoning
});
},
onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
const tokensPerSecond =
timings?.predicted_ms && timings?.predicted_n
@@ -1105,21 +1156,23 @@ class ChatStore {
reasoningContent?: string,
timings?: ChatMessageTimings
) => {
finalizeReasoning();
const appendedFromCompletion = hasReceivedContent
? appendedContent
: wrapReasoningContent(finalContent || '', reasoningContent);
const fullContent = originalContent + appendedFromCompletion;
const finalAppendedContent = hasReceivedContent ? appendedContent : finalContent || '';
const finalAppendedReasoning = hasReceivedContent
? appendedReasoning
: reasoningContent || '';
const fullContent = originalContent + finalAppendedContent;
const fullReasoning = originalReasoning + finalAppendedReasoning || undefined;
await DatabaseService.updateMessage(msg.id, {
content: fullContent,
reasoningContent: fullReasoning,
timestamp: Date.now(),
timings
});
conversationsStore.updateMessageAtIndex(idx, {
content: fullContent,
reasoningContent: fullReasoning,
timestamp: Date.now(),
timings
});
@@ -1135,11 +1188,13 @@ class ChatStore {
if (hasReceivedContent && appendedContent) {
await DatabaseService.updateMessage(msg.id, {
content: originalContent + appendedContent,
reasoningContent: originalReasoning + appendedReasoning || undefined,
timestamp: Date.now()
});
conversationsStore.updateMessageAtIndex(idx, {
content: originalContent + appendedContent,
reasoningContent: originalReasoning + appendedReasoning || undefined,
timestamp: Date.now()
});
}

View File

@@ -23,7 +23,7 @@ import { browser } from '$app/environment';
import { toast } from 'svelte-sonner';
import { DatabaseService } from '$lib/services/database.service';
import { config } from '$lib/stores/settings.svelte';
import { filterByLeafNodeId, findLeafNode } from '$lib/utils';
import { filterByLeafNodeId, findLeafNode, runLegacyMigration } from '$lib/utils';
import type { McpServerOverride } from '$lib/types/database';
import { MessageRole } from '$lib/enums';
import {
@@ -128,6 +128,10 @@ class ConversationsStore {
if (this.isInitialized) return;
try {
// @deprecated Legacy migration for old marker-based messages.
// Remove once all users have migrated to the structured format.
await runLegacyMigration();
await this.loadConversations();
this.isInitialized = true;
} catch (error) {

View File

@@ -2,6 +2,7 @@ import type { MessageRole } from '$lib/enums';
import { ToolCallType } from '$lib/enums';
import type {
ApiChatCompletionRequest,
ApiChatCompletionToolCall,
ApiChatMessageContentPart,
ApiChatMessageData
} from './api';
@@ -70,22 +71,48 @@ export interface AgenticSession {
}
/**
* Callbacks for agentic flow execution
* Callbacks for agentic flow execution.
*
* The agentic loop creates separate DB messages for each turn:
* - assistant messages (one per LLM turn, with tool_calls if any)
* - tool result messages (one per tool call execution)
*
* The first assistant message is created by the caller before starting the flow.
* Subsequent messages are created via createToolResultMessage / createAssistantMessage.
*/
export interface AgenticFlowCallbacks {
/** Content chunk for the current assistant message */
onChunk?: (chunk: string) => void;
/** Reasoning content chunk for the current assistant message */
onReasoningChunk?: (chunk: string) => void;
onToolCallChunk?: (serializedToolCalls: string) => void;
onAttachments?: (extras: DatabaseMessageExtra[]) => void;
/** Tool calls being streamed (partial, accumulating) for the current turn */
onToolCallsStreaming?: (toolCalls: ApiChatCompletionToolCall[]) => void;
/** Attachments extracted from tool results */
onAttachments?: (messageId: string, extras: DatabaseMessageExtra[]) => void;
/** Model name detected from response */
onModel?: (model: string) => void;
onComplete?: (
/** Current assistant turn's streaming is complete - save to DB */
onAssistantTurnComplete?: (
content: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCalls?: string
) => void;
reasoningContent: string | undefined,
timings: ChatMessageTimings | undefined,
toolCalls: ApiChatCompletionToolCall[] | undefined
) => Promise<void>;
/** Create a tool result message in the DB tree */
createToolResultMessage?: (
toolCallId: string,
content: string,
extras?: DatabaseMessageExtra[]
) => Promise<DatabaseMessage>;
/** Create a new assistant message for the next agentic turn */
createAssistantMessage?: () => Promise<DatabaseMessage>;
/** Entire agentic flow is complete */
onFlowComplete?: (timings?: ChatMessageTimings) => void;
/** Error during flow */
onError?: (error: Error) => void;
/** Timing updates during streaming */
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
/** An agentic turn (LLM + tool execution) completed - intermediate timing update */
onTurnComplete?: (intermediateTimings: ChatMessageTimings) => void;
}

View File

@@ -1,5 +1,6 @@
import type { ErrorDialogType } from '$lib/enums';
import type { DatabaseMessageExtra } from './database';
import type { ApiChatCompletionToolCall } from './api';
import type { DatabaseMessage, DatabaseMessageExtra } from './database';
export interface ChatUploadedFile {
id: string;
@@ -99,21 +100,28 @@ export interface ChatMessageToolCallTiming {
}
/**
* Callbacks for streaming chat responses
* Callbacks for streaming chat responses (used by both agentic and non-agentic paths)
*/
export interface ChatStreamCallbacks {
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
onToolCallChunk?: (chunk: string) => void;
onAttachments?: (extras: DatabaseMessageExtra[]) => void;
onToolCallsStreaming?: (toolCalls: ApiChatCompletionToolCall[]) => void;
onAttachments?: (messageId: string, extras: DatabaseMessageExtra[]) => void;
onModel?: (model: string) => void;
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
onComplete?: (
content?: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCallContent?: string
) => void;
onAssistantTurnComplete?: (
content: string,
reasoningContent: string | undefined,
timings: ChatMessageTimings | undefined,
toolCalls: ApiChatCompletionToolCall[] | undefined
) => Promise<void>;
createToolResultMessage?: (
toolCallId: string,
content: string,
extras?: DatabaseMessageExtra[]
) => Promise<DatabaseMessage>;
createAssistantMessage?: () => Promise<DatabaseMessage>;
onFlowComplete?: (timings?: ChatMessageTimings) => void;
onError?: (error: Error) => void;
onTurnComplete?: (intermediateTimings: ChatMessageTimings) => void;
}

View File

@@ -92,6 +92,8 @@ export interface DatabaseMessage {
* @deprecated - left for backward compatibility
*/
thinking?: string;
/** Reasoning content produced by the model (separate from visible content) */
reasoningContent?: string;
/** Serialized JSON array of tool calls made by assistant messages */
toolCalls?: string;
/** Tool call ID for tool result messages (role: 'tool') */

View File

@@ -1,8 +1,15 @@
import { AgenticSectionType } from '$lib/enums';
import { AGENTIC_TAGS, AGENTIC_REGEX, REASONING_TAGS, TRIM_NEWLINES_REGEX } from '$lib/constants';
import { AgenticSectionType, MessageRole } from '$lib/enums';
import { ATTACHMENT_SAVED_REGEX, NEWLINE_SEPARATOR } from '$lib/constants';
import type { ApiChatCompletionToolCall } from '$lib/types/api';
import type {
DatabaseMessage,
DatabaseMessageExtra,
DatabaseMessageExtraImageFile
} from '$lib/types/database';
import { AttachmentType } from '$lib/enums';
/**
* Represents a parsed section of agentic content
* Represents a parsed section of agentic content for display
*/
export interface AgenticSection {
type: AgenticSectionType;
@@ -10,63 +17,70 @@ export interface AgenticSection {
toolName?: string;
toolArgs?: string;
toolResult?: string;
toolResultExtras?: DatabaseMessageExtra[];
}
/**
* Represents a segment of content that may contain reasoning blocks
* Represents a tool result line that may reference an image attachment
*/
type ReasoningSegment = {
type:
| AgenticSectionType.TEXT
| AgenticSectionType.REASONING
| AgenticSectionType.REASONING_PENDING;
content: string;
export type ToolResultLine = {
text: string;
image?: DatabaseMessageExtraImageFile;
};
/**
* Parses agentic content into structured sections
* Derives display sections from a single assistant message and its direct tool results.
*
* Main parsing function that processes content containing:
* - Tool calls (completed, pending, or streaming)
* - Reasoning blocks (completed or streaming)
* - Regular text content
*
* The parser handles chronological display of agentic flow output, maintaining
* the order of operations and properly identifying different states of tool calls
* and reasoning blocks during streaming.
*
* @param rawContent - The raw content string to parse
* @returns Array of structured agentic sections ready for display
*
* @example
* ```typescript
* const content = "Some text <<<AGENTIC_TOOL_CALL>>>tool_name...";
* const sections = parseAgenticContent(content);
* // Returns: [{ type: 'text', content: 'Some text' }, { type: 'tool_call_streaming', ... }]
* ```
* @param message - The assistant message
* @param toolMessages - Tool result messages for this assistant's tool_calls
* @param streamingToolCalls - Partial tool calls during streaming (not yet persisted)
*/
export function parseAgenticContent(rawContent: string): AgenticSection[] {
if (!rawContent) return [];
const segments = splitReasoningSegments(rawContent);
function deriveSingleTurnSections(
message: DatabaseMessage,
toolMessages: DatabaseMessage[] = [],
streamingToolCalls: ApiChatCompletionToolCall[] = []
): AgenticSection[] {
const sections: AgenticSection[] = [];
for (const segment of segments) {
if (segment.type === AgenticSectionType.TEXT) {
sections.push(...parseToolCallContent(segment.content));
continue;
}
if (segment.type === AgenticSectionType.REASONING) {
if (segment.content.trim()) {
sections.push({ type: AgenticSectionType.REASONING, content: segment.content });
}
continue;
}
// 1. Reasoning content (from dedicated field)
if (message.reasoningContent) {
sections.push({
type: AgenticSectionType.REASONING_PENDING,
content: segment.content
type: AgenticSectionType.REASONING,
content: message.reasoningContent
});
}
// 2. Text content
if (message.content?.trim()) {
sections.push({
type: AgenticSectionType.TEXT,
content: message.content
});
}
// 3. Persisted tool calls (from message.toolCalls field)
const toolCalls = parseToolCalls(message.toolCalls);
for (const tc of toolCalls) {
const resultMsg = toolMessages.find((m) => m.toolCallId === tc.id);
sections.push({
type: resultMsg ? AgenticSectionType.TOOL_CALL : AgenticSectionType.TOOL_CALL_PENDING,
content: resultMsg?.content || '',
toolName: tc.function?.name,
toolArgs: tc.function?.arguments,
toolResult: resultMsg?.content,
toolResultExtras: resultMsg?.extra
});
}
// 4. Streaming tool calls (not yet persisted - currently being received)
for (const tc of streamingToolCalls) {
// Skip if already in persisted tool calls
if (tc.id && toolCalls.find((t) => t.id === tc.id)) continue;
sections.push({
type: AgenticSectionType.TOOL_CALL_STREAMING,
content: '',
toolName: tc.function?.name,
toolArgs: tc.function?.arguments
});
}
@@ -74,211 +88,123 @@ export function parseAgenticContent(rawContent: string): AgenticSection[] {
}
/**
* Parses content containing tool call markers
* Derives display sections from structured message data.
*
* Identifies and extracts tool calls from content, handling:
* - Completed tool calls with name, arguments, and results
* - Pending tool calls (execution in progress)
* - Streaming tool calls (arguments being received)
* - Early-stage tool calls (just started)
* Handles both single-turn (one assistant + its tool results) and multi-turn
* agentic sessions (multiple assistant + tool messages grouped together).
*
* @param rawContent - The raw content string to parse
* @returns Array of agentic sections representing tool calls and text
* When `toolMessages` contains continuation assistant messages (from multi-turn
* agentic flows), they are processed in order to produce sections across all turns.
*
* @param message - The first/anchor assistant message
* @param toolMessages - Tool result messages and continuation assistant messages
* @param streamingToolCalls - Partial tool calls during streaming (not yet persisted)
* @param isStreaming - Whether the message is currently being streamed
*/
function parseToolCallContent(rawContent: string): AgenticSection[] {
if (!rawContent) return [];
export function deriveAgenticSections(
message: DatabaseMessage,
toolMessages: DatabaseMessage[] = [],
streamingToolCalls: ApiChatCompletionToolCall[] = []
): AgenticSection[] {
const hasAssistantContinuations = toolMessages.some((m) => m.role === MessageRole.ASSISTANT);
if (!hasAssistantContinuations) {
return deriveSingleTurnSections(message, toolMessages, streamingToolCalls);
}
const sections: AgenticSection[] = [];
const completedToolCallRegex = new RegExp(AGENTIC_REGEX.COMPLETED_TOOL_CALL.source, 'g');
const firstTurnToolMsgs = collectToolMessages(toolMessages, 0);
sections.push(...deriveSingleTurnSections(message, firstTurnToolMsgs));
let lastIndex = 0;
let match;
let i = firstTurnToolMsgs.length;
while ((match = completedToolCallRegex.exec(rawContent)) !== null) {
if (match.index > lastIndex) {
const textBefore = rawContent.slice(lastIndex, match.index).trim();
if (textBefore) {
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
}
while (i < toolMessages.length) {
const msg = toolMessages[i];
if (msg.role === MessageRole.ASSISTANT) {
const turnToolMsgs = collectToolMessages(toolMessages, i + 1);
const isLastTurn = i + 1 + turnToolMsgs.length >= toolMessages.length;
sections.push(
...deriveSingleTurnSections(msg, turnToolMsgs, isLastTurn ? streamingToolCalls : [])
);
i += 1 + turnToolMsgs.length;
} else {
i++;
}
const toolName = match[1];
const toolArgs = match[2];
const toolResult = match[3].replace(TRIM_NEWLINES_REGEX, '');
sections.push({
type: AgenticSectionType.TOOL_CALL,
content: toolResult,
toolName,
toolArgs,
toolResult
});
lastIndex = match.index + match[0].length;
}
const remainingContent = rawContent.slice(lastIndex);
const pendingMatch = remainingContent.match(AGENTIC_REGEX.PENDING_TOOL_CALL);
const partialWithNameMatch = remainingContent.match(AGENTIC_REGEX.PARTIAL_WITH_NAME);
const earlyMatch = remainingContent.match(AGENTIC_REGEX.EARLY_MATCH);
if (pendingMatch) {
const pendingIndex = remainingContent.indexOf(AGENTIC_TAGS.TOOL_CALL_START);
if (pendingIndex > 0) {
const textBefore = remainingContent.slice(0, pendingIndex).trim();
if (textBefore) {
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
}
}
const toolName = pendingMatch[1];
const toolArgs = pendingMatch[2];
const streamingResult = (pendingMatch[3] || '').replace(TRIM_NEWLINES_REGEX, '');
sections.push({
type: AgenticSectionType.TOOL_CALL_PENDING,
content: streamingResult,
toolName,
toolArgs,
toolResult: streamingResult || undefined
});
} else if (partialWithNameMatch) {
const pendingIndex = remainingContent.indexOf(AGENTIC_TAGS.TOOL_CALL_START);
if (pendingIndex > 0) {
const textBefore = remainingContent.slice(0, pendingIndex).trim();
if (textBefore) {
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
}
}
const partialArgs = partialWithNameMatch[2] || '';
sections.push({
type: AgenticSectionType.TOOL_CALL_STREAMING,
content: '',
toolName: partialWithNameMatch[1],
toolArgs: partialArgs || undefined,
toolResult: undefined
});
} else if (earlyMatch) {
const pendingIndex = remainingContent.indexOf(AGENTIC_TAGS.TOOL_CALL_START);
if (pendingIndex > 0) {
const textBefore = remainingContent.slice(0, pendingIndex).trim();
if (textBefore) {
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
}
}
const nameMatch = earlyMatch[1]?.match(AGENTIC_REGEX.TOOL_NAME_EXTRACT);
sections.push({
type: AgenticSectionType.TOOL_CALL_STREAMING,
content: '',
toolName: nameMatch?.[1],
toolArgs: undefined,
toolResult: undefined
});
} else if (lastIndex < rawContent.length) {
let remainingText = rawContent.slice(lastIndex).trim();
const partialMarkerMatch = remainingText.match(AGENTIC_REGEX.PARTIAL_MARKER);
if (partialMarkerMatch) {
remainingText = remainingText.slice(0, partialMarkerMatch.index).trim();
}
if (remainingText) {
sections.push({ type: AgenticSectionType.TEXT, content: remainingText });
}
}
if (sections.length === 0 && rawContent.trim()) {
sections.push({ type: AgenticSectionType.TEXT, content: rawContent });
}
return sections;
}
/**
* Strips partial marker from text content
*
* Removes incomplete agentic markers (e.g., "<<<", "<<<AGENTIC") that may appear
* at the end of streaming content.
*
* @param text - The text content to process
* @returns Text with partial markers removed
* Collect consecutive tool messages starting at `startIndex`.
*/
function stripPartialMarker(text: string): string {
const partialMarkerMatch = text.match(AGENTIC_REGEX.PARTIAL_MARKER);
function collectToolMessages(messages: DatabaseMessage[], startIndex: number): DatabaseMessage[] {
const result: DatabaseMessage[] = [];
if (partialMarkerMatch) {
return text.slice(0, partialMarkerMatch.index).trim();
for (let i = startIndex; i < messages.length; i++) {
if (messages[i].role === MessageRole.TOOL) {
result.push(messages[i]);
} else {
break;
}
}
return text;
return result;
}
/**
* Splits raw content into segments based on reasoning blocks
*
* Identifies and extracts reasoning content wrapped in REASONING_TAGS.START/END markers,
* separating it from regular text content. Handles both complete and incomplete
* (streaming) reasoning blocks.
*
* @param rawContent - The raw content string to parse
* @returns Array of reasoning segments with their types and content
* Parse tool result text into lines, matching image attachments by name.
*/
function splitReasoningSegments(rawContent: string): ReasoningSegment[] {
if (!rawContent) return [];
export function parseToolResultWithImages(
toolResult: string,
extras?: DatabaseMessageExtra[]
): ToolResultLine[] {
const lines = toolResult.split(NEWLINE_SEPARATOR);
return lines.map((line) => {
const match = line.match(ATTACHMENT_SAVED_REGEX);
if (!match || !extras) return { text: line };
const segments: ReasoningSegment[] = [];
let cursor = 0;
const attachmentName = match[1];
const image = extras.find(
(e): e is DatabaseMessageExtraImageFile =>
e.type === AttachmentType.IMAGE && e.name === attachmentName
);
while (cursor < rawContent.length) {
const startIndex = rawContent.indexOf(REASONING_TAGS.START, cursor);
return { text: line, image };
});
}
if (startIndex === -1) {
const remainingText = rawContent.slice(cursor);
/**
* Safely parse the toolCalls JSON string from a DatabaseMessage.
*/
function parseToolCalls(toolCallsJson?: string): ApiChatCompletionToolCall[] {
if (!toolCallsJson) return [];
if (remainingText) {
segments.push({ type: AgenticSectionType.TEXT, content: remainingText });
}
try {
const parsed = JSON.parse(toolCallsJson);
break;
}
return Array.isArray(parsed) ? parsed : [];
} catch {
return [];
}
}
if (startIndex > cursor) {
const textBefore = rawContent.slice(cursor, startIndex);
/**
* Check if a message has agentic content (tool calls or is part of an agentic flow).
*/
export function hasAgenticContent(
message: DatabaseMessage,
toolMessages: DatabaseMessage[] = []
): boolean {
if (message.toolCalls) {
const tc = parseToolCalls(message.toolCalls);
if (textBefore) {
segments.push({ type: AgenticSectionType.TEXT, content: textBefore });
}
}
const contentStart = startIndex + REASONING_TAGS.START.length;
const endIndex = rawContent.indexOf(REASONING_TAGS.END, contentStart);
if (endIndex === -1) {
const pendingContent = rawContent.slice(contentStart);
segments.push({
type: AgenticSectionType.REASONING_PENDING,
content: stripPartialMarker(pendingContent)
});
break;
}
const reasoningContent = rawContent.slice(contentStart, endIndex);
segments.push({ type: AgenticSectionType.REASONING, content: reasoningContent });
cursor = endIndex + REASONING_TAGS.END.length;
if (tc.length > 0) return true;
}
return segments;
return toolMessages.length > 0;
}

View File

@@ -28,7 +28,7 @@ export function buildProxiedHeaders(headers: Record<string, string>): Record<str
const proxiedHeaders: Record<string, string> = {};
for (const [key, value] of Object.entries(headers)) {
proxiedHeaders[`X-Proxy-Header-${key}`] = value;
proxiedHeaders[`x-proxy-header-${key}`] = value;
}
return proxiedHeaders;

View File

@@ -149,8 +149,17 @@ export { parseHeadersToArray, serializeHeaders } from './headers';
// Favicon utilities
export { getFaviconUrl } from './favicon';
// Agentic content parsing utilities
export { parseAgenticContent, type AgenticSection } from './agentic';
// Agentic content utilities (structured section derivation)
export {
deriveAgenticSections,
parseToolResultWithImages,
hasAgenticContent,
type AgenticSection,
type ToolResultLine
} from './agentic';
// Legacy migration utilities
export { runLegacyMigration, isMigrationNeeded } from './legacy-migration';
// Cache utilities
export { TTLCache, ReactiveTTLMap, type TTLCacheOptions } from './cache-ttl';

View File

@@ -0,0 +1,345 @@
/**
* @deprecated Legacy migration utility — remove at some point in the future once all users have migrated to the new structured agentic message format.
*
* Converts old marker-based agentic messages to the new structured format
* with separate messages per turn.
*
* Old format: Single assistant message with markers in content:
* <<<reasoning_content_start>>>...<<<reasoning_content_end>>>
* <<<AGENTIC_TOOL_CALL_START>>>...<<<AGENTIC_TOOL_CALL_END>>>
*
* New format: Separate messages per turn:
* - assistant (content + reasoningContent + toolCalls)
* - tool (toolCallId + content)
* - assistant (next turn)
* - ...
*/
import { LEGACY_AGENTIC_REGEX, LEGACY_REASONING_TAGS } from '$lib/constants';
import { DatabaseService } from '$lib/services/database.service';
import { MessageRole, MessageType } from '$lib/enums';
import type { DatabaseMessage } from '$lib/types/database';
const MIGRATION_DONE_KEY = 'llama-webui-migration-v2-done';
/**
* @deprecated Part of legacy migration — remove with the migration module.
* Check if migration has been performed.
*/
export function isMigrationNeeded(): boolean {
try {
return !localStorage.getItem(MIGRATION_DONE_KEY);
} catch {
return false;
}
}
/**
* Mark migration as done.
*/
function markMigrationDone(): void {
try {
localStorage.setItem(MIGRATION_DONE_KEY, String(Date.now()));
} catch {
// Ignore localStorage errors
}
}
/**
* Check if a message has legacy markers in its content.
*/
function hasLegacyMarkers(message: DatabaseMessage): boolean {
if (!message.content) return false;
return LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test(message.content);
}
/**
* Extract reasoning content from legacy marker format.
*/
function extractLegacyReasoning(content: string): { reasoning: string; cleanContent: string } {
let reasoning = '';
let cleanContent = content;
// Extract all reasoning blocks
const re = new RegExp(LEGACY_AGENTIC_REGEX.REASONING_EXTRACT.source, 'g');
let match;
while ((match = re.exec(content)) !== null) {
reasoning += match[1];
}
// Remove reasoning tags from content
cleanContent = cleanContent
.replace(new RegExp(LEGACY_AGENTIC_REGEX.REASONING_BLOCK.source, 'g'), '')
.replace(LEGACY_AGENTIC_REGEX.REASONING_OPEN, '');
return { reasoning, cleanContent };
}
/**
* Parse legacy content with tool call markers into structured turns.
*/
interface ParsedTurn {
textBefore: string;
toolCalls: Array<{
name: string;
args: string;
result: string;
}>;
}
function parseLegacyToolCalls(content: string): ParsedTurn[] {
const turns: ParsedTurn[] = [];
const regex = new RegExp(LEGACY_AGENTIC_REGEX.COMPLETED_TOOL_CALL.source, 'g');
let lastIndex = 0;
let currentTurn: ParsedTurn = { textBefore: '', toolCalls: [] };
let match;
while ((match = regex.exec(content)) !== null) {
const textBefore = content.slice(lastIndex, match.index).trim();
// If there's text between tool calls and we already have tool calls,
// that means a new turn started (text after tool results = new LLM turn)
if (textBefore && currentTurn.toolCalls.length > 0) {
turns.push(currentTurn);
currentTurn = { textBefore, toolCalls: [] };
} else if (textBefore && currentTurn.toolCalls.length === 0) {
currentTurn.textBefore = textBefore;
}
currentTurn.toolCalls.push({
name: match[1],
args: match[2],
result: match[3].replace(/^\n+|\n+$/g, '')
});
lastIndex = match.index + match[0].length;
}
// Any remaining text after the last tool call
const remainingText = content.slice(lastIndex).trim();
if (currentTurn.toolCalls.length > 0) {
turns.push(currentTurn);
}
// If there's text after all tool calls, it's the final assistant response
if (remainingText) {
// Remove any partial/open markers
const cleanRemaining = remainingText
.replace(LEGACY_AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
.trim();
if (cleanRemaining) {
turns.push({ textBefore: cleanRemaining, toolCalls: [] });
}
}
// If no tool calls found at all, return the original content as a single turn
if (turns.length === 0) {
turns.push({ textBefore: content.trim(), toolCalls: [] });
}
return turns;
}
/**
* Migrate a single conversation's messages from legacy format to new format.
*/
async function migrateConversation(convId: string): Promise<number> {
const allMessages = await DatabaseService.getConversationMessages(convId);
let migratedCount = 0;
for (const message of allMessages) {
if (message.role !== MessageRole.ASSISTANT) continue;
if (!hasLegacyMarkers(message)) {
// Still check for reasoning-only markers (no tool calls)
if (message.content?.includes(LEGACY_REASONING_TAGS.START)) {
const { reasoning, cleanContent } = extractLegacyReasoning(message.content);
await DatabaseService.updateMessage(message.id, {
content: cleanContent.trim(),
reasoningContent: reasoning || undefined
});
migratedCount++;
}
continue;
}
// Has agentic markers - full migration needed
const { reasoning, cleanContent } = extractLegacyReasoning(message.content);
const turns = parseLegacyToolCalls(cleanContent);
// Parse existing toolCalls JSON to try to match IDs
let existingToolCalls: Array<{ id: string; function?: { name: string; arguments: string } }> =
[];
if (message.toolCalls) {
try {
existingToolCalls = JSON.parse(message.toolCalls);
} catch {
// Ignore
}
}
// First turn uses the existing message
const firstTurn = turns[0];
if (!firstTurn) continue;
// Match tool calls from the first turn to existing IDs
const firstTurnToolCalls = firstTurn.toolCalls.map((tc, i) => {
const existing =
existingToolCalls.find((e) => e.function?.name === tc.name) || existingToolCalls[i];
return {
id: existing?.id || `legacy_tool_${i}`,
type: 'function' as const,
function: { name: tc.name, arguments: tc.args }
};
});
// Update the existing message for the first turn
await DatabaseService.updateMessage(message.id, {
content: firstTurn.textBefore,
reasoningContent: reasoning || undefined,
toolCalls: firstTurnToolCalls.length > 0 ? JSON.stringify(firstTurnToolCalls) : ''
});
let currentParentId = message.id;
let toolCallIdCounter = existingToolCalls.length;
// Create tool result messages for the first turn
for (let i = 0; i < firstTurn.toolCalls.length; i++) {
const tc = firstTurn.toolCalls[i];
const toolCallId = firstTurnToolCalls[i]?.id || `legacy_tool_${i}`;
const toolMsg = await DatabaseService.createMessageBranch(
{
convId,
type: MessageType.TEXT,
role: MessageRole.TOOL,
content: tc.result,
toolCallId,
timestamp: message.timestamp + i + 1,
toolCalls: '',
children: []
},
currentParentId
);
currentParentId = toolMsg.id;
}
// Create messages for subsequent turns
for (let turnIdx = 1; turnIdx < turns.length; turnIdx++) {
const turn = turns[turnIdx];
const turnToolCalls = turn.toolCalls.map((tc, i) => {
const idx = toolCallIdCounter + i;
const existing = existingToolCalls[idx];
return {
id: existing?.id || `legacy_tool_${idx}`,
type: 'function' as const,
function: { name: tc.name, arguments: tc.args }
};
});
toolCallIdCounter += turn.toolCalls.length;
// Create assistant message for this turn
const assistantMsg = await DatabaseService.createMessageBranch(
{
convId,
type: MessageType.TEXT,
role: MessageRole.ASSISTANT,
content: turn.textBefore,
timestamp: message.timestamp + turnIdx * 100,
toolCalls: turnToolCalls.length > 0 ? JSON.stringify(turnToolCalls) : '',
children: [],
model: message.model
},
currentParentId
);
currentParentId = assistantMsg.id;
// Create tool result messages for this turn
for (let i = 0; i < turn.toolCalls.length; i++) {
const tc = turn.toolCalls[i];
const toolCallId = turnToolCalls[i]?.id || `legacy_tool_${toolCallIdCounter + i}`;
const toolMsg = await DatabaseService.createMessageBranch(
{
convId,
type: MessageType.TEXT,
role: MessageRole.TOOL,
content: tc.result,
toolCallId,
timestamp: message.timestamp + turnIdx * 100 + i + 1,
toolCalls: '',
children: []
},
currentParentId
);
currentParentId = toolMsg.id;
}
}
// Re-parent any children of the original message to the last created message
// (the original message's children list was the next user message or similar)
if (message.children.length > 0 && currentParentId !== message.id) {
for (const childId of message.children) {
// Skip children we just created (they were already properly parented)
const child = allMessages.find((m) => m.id === childId);
if (!child) continue;
// Only re-parent non-tool messages that were original children
if (child.role !== MessageRole.TOOL) {
await DatabaseService.updateMessage(childId, { parent: currentParentId });
// Add to new parent's children
const newParent = await DatabaseService.getConversationMessages(convId).then((msgs) =>
msgs.find((m) => m.id === currentParentId)
);
if (newParent && !newParent.children.includes(childId)) {
await DatabaseService.updateMessage(currentParentId, {
children: [...newParent.children, childId]
});
}
}
}
// Clear re-parented children from the original message
await DatabaseService.updateMessage(message.id, { children: [] });
}
migratedCount++;
}
return migratedCount;
}
/**
* @deprecated Part of legacy migration — remove with the migration module.
* Run the full migration across all conversations.
* This should be called once at app startup if migration is needed.
*/
export async function runLegacyMigration(): Promise<void> {
if (!isMigrationNeeded()) return;
console.log('[Migration] Starting legacy message format migration...');
try {
const conversations = await DatabaseService.getAllConversations();
let totalMigrated = 0;
for (const conv of conversations) {
const count = await migrateConversation(conv.id);
totalMigrated += count;
}
if (totalMigrated > 0) {
console.log(
`[Migration] Migrated ${totalMigrated} messages across ${conversations.length} conversations`
);
} else {
console.log('[Migration] No legacy messages found, marking as done');
}
markMigrationDone();
} catch (error) {
console.error('[Migration] Failed to migrate legacy messages:', error);
// Still mark as done to avoid infinite retry loops
markMigrationDone();
}
}

View File

@@ -22,7 +22,7 @@ const config = {
strict: true
}),
output: {
bundleStrategy: 'inline'
bundleStrategy: 'single'
},
alias: {
$styles: 'src/styles'

View File

@@ -2,5 +2,5 @@ import { expect, test } from '@playwright/test';
test('home page has expected h1', async ({ page }) => {
await page.goto('/');
await expect(page.locator('h1')).toBeVisible();
await expect(page.locator('h1').first()).toBeVisible();
});

View File

@@ -0,0 +1,211 @@
import { describe, it, expect } from 'vitest';
import { deriveAgenticSections, hasAgenticContent } from '$lib/utils/agentic';
import { AgenticSectionType, MessageRole } from '$lib/enums';
import type { DatabaseMessage } from '$lib/types/database';
import type { ApiChatCompletionToolCall } from '$lib/types/api';
function makeAssistant(overrides: Partial<DatabaseMessage> = {}): DatabaseMessage {
return {
id: overrides.id ?? 'ast-1',
convId: 'conv-1',
type: 'text',
timestamp: Date.now(),
role: MessageRole.ASSISTANT,
content: overrides.content ?? '',
parent: null,
children: [],
...overrides
} as DatabaseMessage;
}
function makeToolMsg(overrides: Partial<DatabaseMessage> = {}): DatabaseMessage {
return {
id: overrides.id ?? 'tool-1',
convId: 'conv-1',
type: 'text',
timestamp: Date.now(),
role: MessageRole.TOOL,
content: overrides.content ?? 'tool result',
parent: null,
children: [],
toolCallId: overrides.toolCallId ?? 'call_1',
...overrides
} as DatabaseMessage;
}
describe('deriveAgenticSections', () => {
it('returns empty array for assistant with no content', () => {
const msg = makeAssistant({ content: '' });
const sections = deriveAgenticSections(msg);
expect(sections).toEqual([]);
});
it('returns text section for simple assistant message', () => {
const msg = makeAssistant({ content: 'Hello world' });
const sections = deriveAgenticSections(msg);
expect(sections).toHaveLength(1);
expect(sections[0].type).toBe(AgenticSectionType.TEXT);
expect(sections[0].content).toBe('Hello world');
});
it('returns reasoning + text for message with reasoning', () => {
const msg = makeAssistant({
content: 'Answer is 4.',
reasoningContent: 'Let me think...'
});
const sections = deriveAgenticSections(msg);
expect(sections).toHaveLength(2);
expect(sections[0].type).toBe(AgenticSectionType.REASONING);
expect(sections[0].content).toBe('Let me think...');
expect(sections[1].type).toBe(AgenticSectionType.TEXT);
});
it('single turn: assistant with tool calls and results', () => {
const msg = makeAssistant({
content: 'Let me check.',
toolCalls: JSON.stringify([
{ id: 'call_1', type: 'function', function: { name: 'search', arguments: '{"q":"test"}' } }
])
});
const toolResult = makeToolMsg({
toolCallId: 'call_1',
content: 'Found 3 results'
});
const sections = deriveAgenticSections(msg, [toolResult]);
expect(sections).toHaveLength(2);
expect(sections[0].type).toBe(AgenticSectionType.TEXT);
expect(sections[1].type).toBe(AgenticSectionType.TOOL_CALL);
expect(sections[1].toolName).toBe('search');
expect(sections[1].toolResult).toBe('Found 3 results');
});
it('single turn: pending tool call without result', () => {
const msg = makeAssistant({
toolCalls: JSON.stringify([
{ id: 'call_1', type: 'function', function: { name: 'bash', arguments: '{}' } }
])
});
const sections = deriveAgenticSections(msg, []);
expect(sections).toHaveLength(1);
expect(sections[0].type).toBe(AgenticSectionType.TOOL_CALL_PENDING);
expect(sections[0].toolName).toBe('bash');
});
it('multi-turn: two assistant turns grouped as one session', () => {
const assistant1 = makeAssistant({
id: 'ast-1',
content: 'Turn 1 text',
toolCalls: JSON.stringify([
{ id: 'call_1', type: 'function', function: { name: 'search', arguments: '{"q":"foo"}' } }
])
});
const tool1 = makeToolMsg({ id: 'tool-1', toolCallId: 'call_1', content: 'result 1' });
const assistant2 = makeAssistant({
id: 'ast-2',
content: 'Final answer based on results.'
});
// toolMessages contains both tool result and continuation assistant
const sections = deriveAgenticSections(assistant1, [tool1, assistant2]);
expect(sections).toHaveLength(3);
// Turn 1
expect(sections[0].type).toBe(AgenticSectionType.TEXT);
expect(sections[0].content).toBe('Turn 1 text');
expect(sections[1].type).toBe(AgenticSectionType.TOOL_CALL);
expect(sections[1].toolName).toBe('search');
expect(sections[1].toolResult).toBe('result 1');
// Turn 2 (final)
expect(sections[2].type).toBe(AgenticSectionType.TEXT);
expect(sections[2].content).toBe('Final answer based on results.');
});
it('multi-turn: three turns with tool calls', () => {
const assistant1 = makeAssistant({
id: 'ast-1',
content: '',
toolCalls: JSON.stringify([
{ id: 'call_1', type: 'function', function: { name: 'list_files', arguments: '{}' } }
])
});
const tool1 = makeToolMsg({ id: 'tool-1', toolCallId: 'call_1', content: 'file1 file2' });
const assistant2 = makeAssistant({
id: 'ast-2',
content: 'Reading file1...',
toolCalls: JSON.stringify([
{
id: 'call_2',
type: 'function',
function: { name: 'read_file', arguments: '{"path":"file1"}' }
}
])
});
const tool2 = makeToolMsg({ id: 'tool-2', toolCallId: 'call_2', content: 'contents of file1' });
const assistant3 = makeAssistant({
id: 'ast-3',
content: 'Here is the analysis.',
reasoningContent: 'The file contains...'
});
const sections = deriveAgenticSections(assistant1, [tool1, assistant2, tool2, assistant3]);
// Turn 1: tool_call (no text since content is empty)
// Turn 2: text + tool_call
// Turn 3: reasoning + text
expect(sections).toHaveLength(5);
expect(sections[0].type).toBe(AgenticSectionType.TOOL_CALL);
expect(sections[0].toolName).toBe('list_files');
expect(sections[1].type).toBe(AgenticSectionType.TEXT);
expect(sections[1].content).toBe('Reading file1...');
expect(sections[2].type).toBe(AgenticSectionType.TOOL_CALL);
expect(sections[2].toolName).toBe('read_file');
expect(sections[3].type).toBe(AgenticSectionType.REASONING);
expect(sections[4].type).toBe(AgenticSectionType.TEXT);
expect(sections[4].content).toBe('Here is the analysis.');
});
it('multi-turn: streaming tool calls on last turn', () => {
const assistant1 = makeAssistant({
toolCalls: JSON.stringify([
{ id: 'call_1', type: 'function', function: { name: 'search', arguments: '{}' } }
])
});
const tool1 = makeToolMsg({ toolCallId: 'call_1', content: 'result' });
const assistant2 = makeAssistant({ id: 'ast-2', content: '' });
const streamingToolCalls: ApiChatCompletionToolCall[] = [
{ id: 'call_2', type: 'function', function: { name: 'write_file', arguments: '{"pa' } }
];
const sections = deriveAgenticSections(assistant1, [tool1, assistant2], streamingToolCalls);
// Turn 1: tool_call
// Turn 2 (streaming): streaming tool call
expect(sections.some((s) => s.type === AgenticSectionType.TOOL_CALL)).toBe(true);
expect(sections.some((s) => s.type === AgenticSectionType.TOOL_CALL_STREAMING)).toBe(true);
});
});
describe('hasAgenticContent', () => {
it('returns false for plain assistant', () => {
const msg = makeAssistant({ content: 'Just text' });
expect(hasAgenticContent(msg)).toBe(false);
});
it('returns true when message has toolCalls', () => {
const msg = makeAssistant({
toolCalls: JSON.stringify([
{ id: 'call_1', type: 'function', function: { name: 'test', arguments: '{}' } }
])
});
expect(hasAgenticContent(msg)).toBe(true);
});
it('returns true when toolMessages are provided', () => {
const msg = makeAssistant();
const tool = makeToolMsg();
expect(hasAgenticContent(msg, [tool])).toBe(true);
});
it('returns false for empty toolCalls JSON', () => {
const msg = makeAssistant({ toolCalls: '[]' });
expect(hasAgenticContent(msg)).toBe(false);
});
});

View File

@@ -1,17 +1,22 @@
import { describe, it, expect } from 'vitest';
import { AGENTIC_REGEX } from '$lib/constants/agentic';
import { LEGACY_AGENTIC_REGEX } from '$lib/constants/agentic';
// Mirror the logic in ChatService.stripReasoningContent so we can test it in isolation.
// The real function is private static, so we replicate the strip pipeline here.
function stripContextMarkers(content: string): string {
/**
* Tests for legacy marker stripping (used in migration).
* The new system does not embed markers in content - these tests verify
* the legacy regex patterns still work for the migration code.
*/
// Mirror the legacy stripping logic used during migration
function stripLegacyContextMarkers(content: string): string {
return content
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
.replace(new RegExp(LEGACY_AGENTIC_REGEX.REASONING_BLOCK.source, 'g'), '')
.replace(LEGACY_AGENTIC_REGEX.REASONING_OPEN, '')
.replace(new RegExp(LEGACY_AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK.source, 'g'), '')
.replace(LEGACY_AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
}
// A realistic complete tool call block as stored in message.content after a turn.
// A realistic complete tool call block as stored in old message.content
const COMPLETE_BLOCK =
'\n\n<<<AGENTIC_TOOL_CALL_START>>>\n' +
'<<<TOOL_NAME:bash_tool>>>\n' +
@@ -30,11 +35,10 @@ const OPEN_BLOCK =
'<<<TOOL_ARGS_END>>>\n' +
'partial output...';
describe('agentic marker stripping for context', () => {
describe('legacy agentic marker stripping (for migration)', () => {
it('strips a complete tool call block, leaving surrounding text', () => {
const input = 'Before.' + COMPLETE_BLOCK + 'After.';
const result = stripContextMarkers(input);
// markers gone; residual newlines between fragments are fine
const result = stripLegacyContextMarkers(input);
expect(result).not.toContain('<<<');
expect(result).toContain('Before.');
expect(result).toContain('After.');
@@ -42,7 +46,7 @@ describe('agentic marker stripping for context', () => {
it('strips multiple complete tool call blocks', () => {
const input = 'A' + COMPLETE_BLOCK + 'B' + COMPLETE_BLOCK + 'C';
const result = stripContextMarkers(input);
const result = stripLegacyContextMarkers(input);
expect(result).not.toContain('<<<');
expect(result).toContain('A');
expect(result).toContain('B');
@@ -51,19 +55,19 @@ describe('agentic marker stripping for context', () => {
it('strips an open/partial tool call block (no END marker)', () => {
const input = 'Lead text.' + OPEN_BLOCK;
const result = stripContextMarkers(input);
const result = stripLegacyContextMarkers(input);
expect(result).toBe('Lead text.');
expect(result).not.toContain('<<<');
});
it('does not alter content with no markers', () => {
const input = 'Just a normal assistant response.';
expect(stripContextMarkers(input)).toBe(input);
expect(stripLegacyContextMarkers(input)).toBe(input);
});
it('strips reasoning block independently', () => {
const input = '<<<reasoning_content_start>>>think hard<<<reasoning_content_end>>>Answer.';
expect(stripContextMarkers(input)).toBe('Answer.');
expect(stripLegacyContextMarkers(input)).toBe('Answer.');
});
it('strips both reasoning and agentic blocks together', () => {
@@ -71,11 +75,21 @@ describe('agentic marker stripping for context', () => {
'<<<reasoning_content_start>>>plan<<<reasoning_content_end>>>' +
'Some text.' +
COMPLETE_BLOCK;
expect(stripContextMarkers(input)).not.toContain('<<<');
expect(stripContextMarkers(input)).toContain('Some text.');
expect(stripLegacyContextMarkers(input)).not.toContain('<<<');
expect(stripLegacyContextMarkers(input)).toContain('Some text.');
});
it('empty string survives', () => {
expect(stripContextMarkers('')).toBe('');
expect(stripLegacyContextMarkers('')).toBe('');
});
it('detects legacy markers', () => {
expect(LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test('normal text')).toBe(false);
expect(
LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test('text<<<AGENTIC_TOOL_CALL_START>>>more')
).toBe(true);
expect(LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test('<<<reasoning_content_start>>>think')).toBe(
true
);
});
});

View File

@@ -1,196 +1,89 @@
import { describe, it, expect } from 'vitest';
import { AGENTIC_REGEX, REASONING_TAGS } from '$lib/constants/agentic';
import { ContentPartType } from '$lib/enums';
import { MessageRole } from '$lib/enums';
// Replicate ChatService.extractReasoningFromContent (private static)
function extractReasoningFromContent(
content: string | Array<{ type: string; text?: string }> | null | undefined
): string | undefined {
if (!content) return undefined;
/**
* Tests for the new reasoning content handling.
* In the new architecture, reasoning content is stored in a dedicated
* `reasoningContent` field on DatabaseMessage, not embedded in content with tags.
* The API sends it as `reasoning_content` on ApiChatMessageData.
*/
const extractFromString = (text: string): string => {
const parts: string[] = [];
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
let match = re.exec(text);
while (match) {
parts.push(match[1]);
text = text.slice(match.index + match[0].length);
match = re.exec(text);
}
return parts.join('');
};
if (typeof content === 'string') {
const result = extractFromString(content);
return result || undefined;
}
if (!Array.isArray(content)) return undefined;
const parts: string[] = [];
for (const part of content) {
if (part.type === ContentPartType.TEXT && part.text) {
const result = extractFromString(part.text);
if (result) parts.push(result);
}
}
return parts.length > 0 ? parts.join('') : undefined;
}
// Replicate ChatService.stripReasoningContent (private static)
function stripReasoningContent(
content: string | Array<{ type: string; text?: string }> | null | undefined
): typeof content {
if (!content) return content;
if (typeof content === 'string') {
return content
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
}
if (!Array.isArray(content)) return content;
return content.map((part) => {
if (part.type !== ContentPartType.TEXT || !part.text) return part;
return {
...part,
text: part.text
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
describe('reasoning content in new structured format', () => {
it('reasoning is stored as separate field, not in content', () => {
// Simulate what the new chat store does
const message = {
content: 'The answer is 4.',
reasoningContent: 'Let me think: 2+2=4, basic arithmetic.'
};
});
}
// Simulate the message mapping logic from ChatService.sendMessage
function buildApiMessage(
content: string,
excludeReasoningFromContext: boolean
): { role: string; content: string; reasoning_content?: string } {
const cleaned = stripReasoningContent(content) as string;
const mapped: { role: string; content: string; reasoning_content?: string } = {
role: 'assistant',
content: cleaned
};
if (!excludeReasoningFromContext) {
const reasoning = extractReasoningFromContent(content);
if (reasoning) {
mapped.reasoning_content = reasoning;
// Content should be clean
expect(message.content).not.toContain('<<<');
expect(message.content).toBe('The answer is 4.');
// Reasoning in dedicated field
expect(message.reasoningContent).toBe('Let me think: 2+2=4, basic arithmetic.');
});
it('convertDbMessageToApiChatMessageData includes reasoning_content', () => {
// Simulate the conversion logic
const dbMessage = {
role: MessageRole.ASSISTANT,
content: 'The answer is 4.',
reasoningContent: 'Let me think: 2+2=4, basic arithmetic.'
};
const apiMessage: Record<string, unknown> = {
role: dbMessage.role,
content: dbMessage.content
};
if (dbMessage.reasoningContent) {
apiMessage.reasoning_content = dbMessage.reasoningContent;
}
}
return mapped;
}
// Helper: wrap reasoning the same way the chat store does during streaming
function wrapReasoning(reasoning: string, content: string): string {
return `${REASONING_TAGS.START}${reasoning}${REASONING_TAGS.END}${content}`;
}
describe('reasoning content extraction', () => {
it('extracts reasoning from tagged string content', () => {
const input = wrapReasoning('step 1, step 2', 'The answer is 42.');
const result = extractReasoningFromContent(input);
expect(result).toBe('step 1, step 2');
expect(apiMessage.content).toBe('The answer is 4.');
expect(apiMessage.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
// No internal tags leak into either field
expect(apiMessage.content).not.toContain('<<<');
expect(apiMessage.reasoning_content).not.toContain('<<<');
});
it('returns undefined when no reasoning tags present', () => {
expect(extractReasoningFromContent('Just a normal response.')).toBeUndefined();
it('API message excludes reasoning when excludeReasoningFromContext is true', () => {
const dbMessage = {
role: MessageRole.ASSISTANT,
content: 'The answer is 4.',
reasoningContent: 'internal thinking'
};
const excludeReasoningFromContext = true;
const apiMessage: Record<string, unknown> = {
role: dbMessage.role,
content: dbMessage.content
};
if (!excludeReasoningFromContext && dbMessage.reasoningContent) {
apiMessage.reasoning_content = dbMessage.reasoningContent;
}
expect(apiMessage.content).toBe('The answer is 4.');
expect(apiMessage.reasoning_content).toBeUndefined();
});
it('returns undefined for null/empty input', () => {
expect(extractReasoningFromContent(null)).toBeUndefined();
expect(extractReasoningFromContent(undefined)).toBeUndefined();
expect(extractReasoningFromContent('')).toBeUndefined();
});
it('handles messages with no reasoning', () => {
const dbMessage = {
role: MessageRole.ASSISTANT,
content: 'No reasoning here.',
reasoningContent: undefined
};
it('extracts reasoning from content part arrays', () => {
const input = [
{
type: ContentPartType.TEXT,
text: wrapReasoning('thinking hard', 'result')
}
];
expect(extractReasoningFromContent(input)).toBe('thinking hard');
});
const apiMessage: Record<string, unknown> = {
role: dbMessage.role,
content: dbMessage.content
};
if (dbMessage.reasoningContent) {
apiMessage.reasoning_content = dbMessage.reasoningContent;
}
it('handles multiple reasoning blocks', () => {
const input =
REASONING_TAGS.START +
'block1' +
REASONING_TAGS.END +
'middle' +
REASONING_TAGS.START +
'block2' +
REASONING_TAGS.END +
'end';
expect(extractReasoningFromContent(input)).toBe('block1block2');
});
it('ignores non-text content parts', () => {
const input = [{ type: 'image_url', text: wrapReasoning('hidden', 'img') }];
expect(extractReasoningFromContent(input)).toBeUndefined();
});
});
describe('strip reasoning content', () => {
it('removes reasoning tags from string content', () => {
const input = wrapReasoning('internal thoughts', 'visible answer');
expect(stripReasoningContent(input)).toBe('visible answer');
});
it('removes reasoning from content part arrays', () => {
const input = [
{
type: ContentPartType.TEXT,
text: wrapReasoning('thoughts', 'answer')
}
];
const result = stripReasoningContent(input) as Array<{ type: string; text?: string }>;
expect(result[0].text).toBe('answer');
});
});
describe('API message building with reasoning preservation', () => {
const storedContent = wrapReasoning('Let me think: 2+2=4, basic arithmetic.', 'The answer is 4.');
it('preserves reasoning_content when excludeReasoningFromContext is false', () => {
const msg = buildApiMessage(storedContent, false);
expect(msg.content).toBe('The answer is 4.');
expect(msg.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
// no internal tags leak into either field
expect(msg.content).not.toContain('<<<');
expect(msg.reasoning_content).not.toContain('<<<');
});
it('strips reasoning_content when excludeReasoningFromContext is true', () => {
const msg = buildApiMessage(storedContent, true);
expect(msg.content).toBe('The answer is 4.');
expect(msg.reasoning_content).toBeUndefined();
});
it('handles content with no reasoning in both modes', () => {
const plain = 'No reasoning here.';
const msgPreserve = buildApiMessage(plain, false);
const msgExclude = buildApiMessage(plain, true);
expect(msgPreserve.content).toBe(plain);
expect(msgPreserve.reasoning_content).toBeUndefined();
expect(msgExclude.content).toBe(plain);
expect(msgExclude.reasoning_content).toBeUndefined();
});
it('cleans agentic tool call blocks from content even when preserving reasoning', () => {
const input =
wrapReasoning('plan', 'text') +
'\n\n<<<AGENTIC_TOOL_CALL_START>>>\n' +
'<<<TOOL_NAME:bash>>>\n' +
'<<<TOOL_ARGS_START>>>\n{}\n<<<TOOL_ARGS_END>>>\nout\n' +
'<<<AGENTIC_TOOL_CALL_END>>>\n';
const msg = buildApiMessage(input, false);
expect(msg.content).not.toContain('<<<');
expect(msg.reasoning_content).toBe('plan');
expect(apiMessage.content).toBe('No reasoning here.');
expect(apiMessage.reasoning_content).toBeUndefined();
});
});

View File

@@ -1,7 +1,6 @@
import tailwindcss from '@tailwindcss/vite';
import { sveltekit } from '@sveltejs/kit/vite';
import * as fflate from 'fflate';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { readFileSync, writeFileSync, existsSync, readdirSync, copyFileSync } from 'fs';
import { dirname, resolve } from 'path';
import { fileURLToPath } from 'url';
@@ -20,15 +19,13 @@ const GUIDE_FOR_FRONTEND = `
-->
`.trim();
const MAX_BUNDLE_SIZE = 2 * 1024 * 1024;
/**
* the maximum size of an embedded asset in bytes,
* e.g. maximum size of embedded font (see node_modules/katex/dist/fonts/*.woff2)
*/
const MAX_ASSET_SIZE = 32000;
/** public/index.html.gz minified flag */
/** public/index.html minified flag */
const ENABLE_JS_MINIFICATION = true;
function llamaCppBuildPlugin() {
@@ -40,7 +37,6 @@ function llamaCppBuildPlugin() {
setTimeout(() => {
try {
const indexPath = resolve('../public/index.html');
const gzipPath = resolve('../public/index.html.gz');
if (!existsSync(indexPath)) {
return;
@@ -61,26 +57,35 @@ function llamaCppBuildPlugin() {
content = content.replace(/\r/g, '');
content = GUIDE_FOR_FRONTEND + '\n' + content;
content = content.replace(/\/_app\/immutable\/bundle\.[^"]+\.js/g, './bundle.js');
content = content.replace(
/\/_app\/immutable\/assets\/bundle\.[^"]+\.css/g,
'./bundle.css'
);
const compressed = fflate.gzipSync(Buffer.from(content, 'utf-8'), { level: 9 });
writeFileSync(indexPath, content, 'utf-8');
console.log('✓ Updated index.html');
compressed[0x4] = 0;
compressed[0x5] = 0;
compressed[0x6] = 0;
compressed[0x7] = 0;
compressed[0x9] = 0;
if (compressed.byteLength > MAX_BUNDLE_SIZE) {
throw new Error(
`Bundle size is too large (${Math.ceil(compressed.byteLength / 1024)} KB).\n` +
`Please reduce the size of the frontend or increase MAX_BUNDLE_SIZE in vite.config.ts.\n`
);
// Copy bundle.*.js -> ../public/bundle.js
const immutableDir = resolve('../public/_app/immutable');
const bundleDir = resolve('../public/_app/immutable/assets');
if (existsSync(immutableDir)) {
const jsFiles = readdirSync(immutableDir).filter((f) => f.match(/^bundle\..+\.js$/));
if (jsFiles.length > 0) {
copyFileSync(resolve(immutableDir, jsFiles[0]), resolve('../public/bundle.js'));
console.log(`✓ Copied ${jsFiles[0]} -> bundle.js`);
}
}
// Copy bundle.*.css -> ../public/bundle.css
if (existsSync(bundleDir)) {
const cssFiles = readdirSync(bundleDir).filter((f) => f.match(/^bundle\..+\.css$/));
if (cssFiles.length > 0) {
copyFileSync(resolve(bundleDir, cssFiles[0]), resolve('../public/bundle.css'));
console.log(`✓ Copied ${cssFiles[0]} -> bundle.css`);
}
}
writeFileSync(gzipPath, compressed);
console.log('✓ Created index.html.gz');
} catch (error) {
console.error('Failed to create gzip file:', error);
console.error('Failed to update index.html:', error);
}
}, 100);
}

View File

@@ -551,6 +551,8 @@ int main(int argc, char ** argv) {
params.sampling.top_k = 4;
params.sampling.samplers = { COMMON_SAMPLER_TYPE_TOP_K, };
common_init();
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_TTS, print_usage)) {
return 1;
}
@@ -558,8 +560,6 @@ int main(int argc, char ** argv) {
const int n_parallel = params.n_parallel;
const int n_predict = params.n_predict;
common_init();
// init LLM
llama_backend_init();

View File

@@ -39,7 +39,7 @@ if (LLAMA_BUILD_BORINGSSL)
set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")
set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
set(BORINGSSL_VERSION "0.20260211.0" CACHE STRING "BoringSSL version")
set(BORINGSSL_VERSION "0.20260327.0" CACHE STRING "BoringSSL version")
message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}")