mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-09 10:31:45 +02:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # .github/workflows/release.yml # .gitignore # examples/batched/batched.cpp # examples/debug/debug.cpp # examples/eval-callback/eval-callback.cpp # examples/idle/idle.cpp # examples/lookahead/lookahead.cpp # examples/lookup/lookup-create.cpp # examples/lookup/lookup-stats.cpp # examples/lookup/lookup.cpp # examples/parallel/parallel.cpp # examples/passkey/passkey.cpp # examples/retrieval/retrieval.cpp # examples/save-load-state/save-load-state.cpp # examples/speculative-simple/speculative-simple.cpp # examples/speculative/speculative.cpp # examples/training/finetune.cpp # ggml/CMakeLists.txt # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cann/common.h # ggml/src/ggml-cann/ggml-cann.cpp # ggml/src/ggml-sycl/fattn-tile.hpp # ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp # ggml/src/ggml-webgpu/ggml-webgpu.cpp # ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py # ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl # scripts/sync-ggml.last # tests/export-graph-ops.cpp # tests/test-chat.cpp # tests/test-state-restore-fragmented.cpp # tests/test-thread-safety.cpp # tools/batched-bench/batched-bench.cpp # tools/cli/cli.cpp # tools/cvector-generator/cvector-generator.cpp # tools/export-lora/export-lora.cpp # tools/imatrix/imatrix.cpp # tools/perplexity/perplexity.cpp # tools/results/results.cpp # tools/server/CMakeLists.txt
This commit is contained in:
@@ -21,14 +21,6 @@ indent_style = tab
|
||||
[prompts/*.txt]
|
||||
insert_final_newline = unset
|
||||
|
||||
[tools/server/public/*]
|
||||
indent_size = 2
|
||||
|
||||
[tools/server/public/deps_*]
|
||||
trim_trailing_whitespace = unset
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
|
||||
[tools/server/deps_*]
|
||||
trim_trailing_whitespace = unset
|
||||
indent_style = unset
|
||||
@@ -61,6 +53,14 @@ charset = unset
|
||||
trim_trailing_whitespace = unset
|
||||
insert_final_newline = unset
|
||||
|
||||
[tools/server/public/**]
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
end_of_line = unset
|
||||
charset = unset
|
||||
trim_trailing_whitespace = unset
|
||||
insert_final_newline = unset
|
||||
|
||||
[benches/**]
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
|
||||
4
.gitattributes
vendored
Normal file
4
.gitattributes
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# Treat the generated single-file WebUI build as binary for diff purposes.
|
||||
# Git's pack-file delta compression still works (byte-level), but this prevents
|
||||
# git diff from printing the entire minified file on every change.
|
||||
tools/server/public/index.html -diff
|
||||
@@ -232,7 +232,7 @@ using chat_template_caps = jinja::caps;
|
||||
struct common_chat_templates {
|
||||
bool add_bos;
|
||||
bool add_eos;
|
||||
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
||||
bool has_explicit_template; // Model had builtin template or template overridden was specified.
|
||||
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
||||
std::unique_ptr<common_chat_template> template_tool_use;
|
||||
};
|
||||
@@ -1004,6 +1004,10 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
|
||||
auto analysis = p.ref("analysis");
|
||||
auto preamble = p.rule("preamble", p.literal("<|channel|>commentary<|message|>") + p.content(content) + end);
|
||||
auto final_msg = p.rule("final", p.literal("<|channel|>final<|message|>") + p.content(content));
|
||||
|
||||
// Consume any unsolicited tool calls, e.g. builtin functions
|
||||
auto unsolicited = p.rule("unsolicited", p.atomic(p.optional(channel) + p.literal(" to=") + content + end));
|
||||
|
||||
auto any = p.rule("any", preamble | analysis);
|
||||
|
||||
if (has_response_format) {
|
||||
@@ -1047,7 +1051,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
|
||||
return p.zero_or_more(start + any) + start + (tool_call | final_msg);
|
||||
}
|
||||
|
||||
return p.zero_or_more(start + any) + start + final_msg;
|
||||
return p.zero_or_more(start + any) + start + (final_msg | unsolicited);
|
||||
});
|
||||
|
||||
data.parser = parser.save();
|
||||
|
||||
@@ -366,6 +366,11 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
|
||||
}
|
||||
|
||||
void common_init() {
|
||||
#if defined(_WIN32)
|
||||
SetConsoleOutputCP(CP_UTF8);
|
||||
SetConsoleCP(CP_UTF8);
|
||||
#endif
|
||||
|
||||
llama_log_set(common_log_default_callback, NULL);
|
||||
|
||||
#ifdef NDEBUG
|
||||
@@ -374,7 +379,7 @@ void common_init() {
|
||||
const char * build_type = " (debug)";
|
||||
#endif
|
||||
|
||||
LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
|
||||
LOG_DBG("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
|
||||
}
|
||||
|
||||
std::string common_params_get_system_info(const common_params & params) {
|
||||
@@ -1250,6 +1255,9 @@ llama_context * common_init_result::context() {
|
||||
}
|
||||
|
||||
common_sampler * common_init_result::sampler(llama_seq_id seq_id) {
|
||||
if (seq_id < 0 || seq_id >= (int) pimpl->samplers.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
return pimpl->samplers[seq_id].get();
|
||||
}
|
||||
|
||||
|
||||
@@ -123,6 +123,9 @@ class ProgressBar {
|
||||
static inline std::map<const ProgressBar *, int> lines;
|
||||
static inline int max_line = 0;
|
||||
|
||||
std::string filename;
|
||||
size_t len = 0;
|
||||
|
||||
static void cleanup(const ProgressBar * line) {
|
||||
lines.erase(line);
|
||||
if (lines.empty()) {
|
||||
@@ -139,7 +142,23 @@ class ProgressBar {
|
||||
}
|
||||
|
||||
public:
|
||||
ProgressBar() = default;
|
||||
ProgressBar(const std::string & url = "") : filename(url) {
|
||||
if (auto pos = filename.rfind('/'); pos != std::string::npos) {
|
||||
filename = filename.substr(pos + 1);
|
||||
}
|
||||
if (auto pos = filename.find('?'); pos != std::string::npos) {
|
||||
filename = filename.substr(0, pos);
|
||||
}
|
||||
for (size_t i = 0; i < filename.size(); ++i) {
|
||||
if ((filename[i] & 0xC0) != 0x80) {
|
||||
if (len++ == 39) {
|
||||
filename.resize(i);
|
||||
filename += "…";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~ProgressBar() {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
@@ -147,11 +166,7 @@ public:
|
||||
}
|
||||
|
||||
void update(size_t current, size_t total) {
|
||||
if (!is_output_a_tty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!total) {
|
||||
if (!total || !is_output_a_tty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -163,28 +178,27 @@ public:
|
||||
}
|
||||
int lines_up = max_line - lines[this];
|
||||
|
||||
size_t width = 50;
|
||||
size_t bar = 55 - len;
|
||||
size_t pct = (100 * current) / total;
|
||||
size_t pos = (width * current) / total;
|
||||
|
||||
std::cout << "\033[s";
|
||||
size_t pos = (bar * current) / total;
|
||||
|
||||
if (lines_up > 0) {
|
||||
std::cout << "\033[" << lines_up << "A";
|
||||
}
|
||||
std::cout << "\033[2K\r["
|
||||
<< std::string(pos, '=')
|
||||
<< (pos < width ? ">" : "")
|
||||
<< std::string(width - pos, ' ')
|
||||
<< "] " << std::setw(3) << pct << "% ("
|
||||
<< current / (1024 * 1024) << " MB / "
|
||||
<< total / (1024 * 1024) << " MB) "
|
||||
<< "\033[u";
|
||||
std::cout << '\r' << "Downloading " << filename << " ";
|
||||
|
||||
std::cout.flush();
|
||||
for (size_t i = 0; i < bar; ++i) {
|
||||
std::cout << (i < pos ? "—" : " ");
|
||||
}
|
||||
std::cout << std::setw(4) << pct << "%\033[K";
|
||||
|
||||
if (lines_up > 0) {
|
||||
std::cout << "\033[" << lines_up << "B";
|
||||
}
|
||||
std::cout << '\r' << std::flush;
|
||||
|
||||
if (current == total) {
|
||||
cleanup(this);
|
||||
cleanup(this);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -212,7 +226,7 @@ static bool common_pull_file(httplib::Client & cli,
|
||||
const char * func = __func__; // avoid __func__ inside a lambda
|
||||
size_t downloaded = existing_size;
|
||||
size_t progress_step = 0;
|
||||
ProgressBar bar;
|
||||
ProgressBar bar(resolve_path);
|
||||
|
||||
auto res = cli.Get(resolve_path, headers,
|
||||
[&](const httplib::Response &response) {
|
||||
@@ -290,7 +304,7 @@ static int common_download_file_single_online(const std::string & url,
|
||||
const bool file_exists = std::filesystem::exists(path);
|
||||
|
||||
if (file_exists && skip_etag) {
|
||||
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
||||
LOG_DBG("%s: using cached file: %s\n", __func__, path.c_str());
|
||||
return 304; // 304 Not Modified - fake cached response
|
||||
}
|
||||
|
||||
@@ -298,7 +312,7 @@ static int common_download_file_single_online(const std::string & url,
|
||||
if (file_exists) {
|
||||
last_etag = read_etag(path);
|
||||
} else {
|
||||
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
||||
LOG_DBG("%s: no previous model file found %s\n", __func__, path.c_str());
|
||||
}
|
||||
|
||||
auto head = cli.Head(parts.path);
|
||||
@@ -332,11 +346,11 @@ static int common_download_file_single_online(const std::string & url,
|
||||
|
||||
if (file_exists) {
|
||||
if (etag.empty()) {
|
||||
LOG_INF("%s: using cached file (no server etag): %s\n", __func__, path.c_str());
|
||||
LOG_DBG("%s: using cached file (no server etag): %s\n", __func__, path.c_str());
|
||||
return 304; // 304 Not Modified - fake cached response
|
||||
}
|
||||
if (!last_etag.empty() && last_etag == etag) {
|
||||
LOG_INF("%s: using cached file (same etag): %s\n", __func__, path.c_str());
|
||||
LOG_DBG("%s: using cached file (same etag): %s\n", __func__, path.c_str());
|
||||
return 304; // 304 Not Modified - fake cached response
|
||||
}
|
||||
if (remove(path.c_str()) != 0) {
|
||||
@@ -372,7 +386,7 @@ static int common_download_file_single_online(const std::string & url,
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INF("%s: downloading from %s to %s (etag:%s)...\n",
|
||||
LOG_DBG("%s: downloading from %s to %s (etag:%s)...\n",
|
||||
__func__, common_http_show_masked_url(parts).c_str(),
|
||||
path_temporary.c_str(), etag.c_str());
|
||||
|
||||
@@ -441,7 +455,7 @@ int common_download_file_single(const std::string & url,
|
||||
return -1;
|
||||
}
|
||||
|
||||
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
||||
LOG_DBG("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
||||
return 304; // Not Modified - fake cached response
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ struct common_ngram_map_value {
|
||||
// statistics of a n-gram
|
||||
struct common_ngram_map_key {
|
||||
size_t key_idx; // index of key n-gram in token-history
|
||||
size_t stat_idx; // index of last token of stastistics computation (key_num, values)
|
||||
size_t stat_idx; // index of last token of statistics computation (key_num, values)
|
||||
|
||||
uint16_t key_num; // number of occurrences of this key n-gram in token-history
|
||||
common_ngram_map_value values[COMMON_NGRAM_MAX_VALUES]; // some known values after the key
|
||||
|
||||
@@ -383,6 +383,12 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||
params.backend_sampling = false;
|
||||
}
|
||||
|
||||
if (rbudget && params.backend_sampling) {
|
||||
LOG_WRN("%s: backend sampling is not compatible with reasoning budget, disabling\n", __func__);
|
||||
|
||||
params.backend_sampling = false;
|
||||
}
|
||||
|
||||
auto * result = new common_sampler {
|
||||
/* .params = */ params,
|
||||
/* .grmr = */ grmr,
|
||||
|
||||
@@ -545,11 +545,12 @@ int main(int argc, char ** argv) {
|
||||
|
||||
common_params params;
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_DIFFUSION)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
llama_backend_init();
|
||||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
|
||||
@@ -99,12 +99,12 @@ int main(int argc, char ** argv) {
|
||||
|
||||
common_params params;
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EMBEDDING)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
|
||||
params.embedding = true;
|
||||
|
||||
// get max number of sequences per batch
|
||||
|
||||
155
ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl
Normal file
155
ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl
Normal file
@@ -0,0 +1,155 @@
|
||||
enable f16;
|
||||
|
||||
#ifdef TYPE_F32
|
||||
#define DataType f32
|
||||
#endif
|
||||
#ifdef TYPE_F16
|
||||
#define DataType f16
|
||||
#endif
|
||||
|
||||
#ifdef OP_REGLU
|
||||
fn op(a: DataType, b: DataType) -> DataType {
|
||||
return max(a, 0) * b;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OP_GEGLU
|
||||
const SQRT_2_OVER_PI: DataType = 0.79788456080286535587989211986876;
|
||||
const GELU_COEF_A: DataType = 0.044715;
|
||||
|
||||
fn op(a: DataType, b: DataType) -> DataType {
|
||||
let val = SQRT_2_OVER_PI * a * (1.0 + GELU_COEF_A * a * a);
|
||||
return 0.5 * a * (2.0 - 2.0/ (exp(2* val) + 1)) * b;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OP_SWIGLU
|
||||
fn op(a: DataType, b: DataType) -> DataType {
|
||||
return a / (1.0 + exp(-a)) * b;
|
||||
}
|
||||
#endif
|
||||
#ifdef OP_SWIGLU_OAI
|
||||
fn op(a: f32, b: f32) -> f32 {
|
||||
let xi = min(a, params.limit);
|
||||
let gi = max(min(b, params.limit), -params.limit);
|
||||
var out_glu = xi / (1.0 + exp(-xi * params.alpha));
|
||||
out_glu = out_glu * (1.0 + gi);
|
||||
return out_glu;
|
||||
}
|
||||
#endif
|
||||
#ifdef OP_GEGLU_ERF
|
||||
const p_erf: DataType = 0.3275911;
|
||||
const a1_erf: DataType = 0.254829592;
|
||||
const a2_erf: DataType = -0.284496736;
|
||||
const a3_erf: DataType = 1.421413741;
|
||||
const a4_erf: DataType = -1.453152027;
|
||||
const a5_erf: DataType = 1.061405429;
|
||||
const SQRT_2_INV: DataType = 0.7071067811865476;
|
||||
|
||||
fn op(a: DataType, b: DataType) -> DataType {
|
||||
let a_div_sqr2 = a * SQRT_2_INV;
|
||||
let sign_x = sign(a_div_sqr2);
|
||||
let x = abs(a_div_sqr2);
|
||||
let t = 1.0 / (1.0 + p_erf * x);
|
||||
let y = 1.0 - (((((a5_erf * t + a4_erf) * t + a3_erf) * t + a2_erf) * t + a1_erf) * t * exp(-x * x));
|
||||
let erf_approx = sign_x * y;
|
||||
return 0.5 * a * (1.0 + erf_approx) * b;
|
||||
}
|
||||
#endif
|
||||
#ifdef OP_GEGLU_QUICK
|
||||
const GELU_QUICK_COEF: DataType = -1.702;
|
||||
|
||||
fn op(a: DataType, b: DataType) -> DataType {
|
||||
return a * (1.0 / (1.0 + exp(GELU_QUICK_COEF * a))) * b;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct Params {
|
||||
offset_src0: u32,
|
||||
offset_src1: u32,
|
||||
offset_dst: u32,
|
||||
|
||||
// Strides (in elements)
|
||||
stride_src01: u32,
|
||||
stride_src02: u32,
|
||||
stride_src03: u32,
|
||||
|
||||
stride_src11: u32,
|
||||
stride_src12: u32,
|
||||
stride_src13: u32,
|
||||
|
||||
stride_dst1: u32,
|
||||
stride_dst2: u32,
|
||||
stride_dst3: u32,
|
||||
|
||||
// shape of dst
|
||||
ne: u32,
|
||||
ne0: u32,
|
||||
ne1: u32,
|
||||
ne2: u32,
|
||||
|
||||
swapped: u32,
|
||||
alpha: f32,
|
||||
limit: f32,
|
||||
}
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage, read_write> src0: array<DataType>;
|
||||
|
||||
#ifdef NO_SPLIT
|
||||
@group(0) @binding(1)
|
||||
var<storage, read_write> dst: array<DataType>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<uniform> params: Params;
|
||||
|
||||
fn a_value(base: u32) -> DataType {
|
||||
let offset: u32 = select(0, params.ne0, params.swapped != 0);
|
||||
return src0[base + offset];
|
||||
}
|
||||
|
||||
fn b_value(base: u32) -> DataType {
|
||||
let offset: u32 = select(params.ne0, 0, params.swapped != 0);
|
||||
return src0[base + offset];
|
||||
}
|
||||
|
||||
#else
|
||||
@group(0) @binding(1)
|
||||
var<storage, read_write> src1: array<DataType>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<storage, read_write> dst: array<DataType>;
|
||||
|
||||
@group(0) @binding(3)
|
||||
var<uniform> params: Params;
|
||||
|
||||
fn a_value(base: u32) -> DataType {
|
||||
return src0[base];
|
||||
}
|
||||
|
||||
fn b_value(base: u32) -> DataType {
|
||||
return src1[base];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@compute @workgroup_size(WG_SIZE)
|
||||
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
||||
if (gid.x >= params.ne) {
|
||||
return;
|
||||
}
|
||||
|
||||
var i = gid.x;
|
||||
let i3 = i / (params.ne2 * params.ne1 * params.ne0);
|
||||
i = i % (params.ne2 * params.ne1 * params.ne0);
|
||||
let i2 = i / (params.ne1 * params.ne0);
|
||||
i = i % (params.ne1 * params.ne0);
|
||||
let i1 = i / params.ne0;
|
||||
let i0 = i % params.ne0;
|
||||
|
||||
let i_a = params.offset_src0 + i3 * params.stride_src03 + i2 * params.stride_src02 + i1 * params.stride_src01 + i0;
|
||||
let i_b = params.offset_src1 + i3 * params.stride_src13 + i2 * params.stride_src12 + i1 * params.stride_src11 + i0;
|
||||
let i_dst = params.offset_dst + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1 + i0;
|
||||
|
||||
dst[i_dst] = op(a_value(i_a), b_value(i_b));
|
||||
}
|
||||
@@ -294,7 +294,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
||||
}
|
||||
|
||||
// get extra buffer types of the CPU
|
||||
// TODO: a more general solution for non-CPU extra buft should be imlpemented in the future
|
||||
// TODO: a more general solution for non-CPU extra buft should be implemented in the future
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
|
||||
std::vector<ggml_backend_buffer_type_t> buft_extra;
|
||||
{
|
||||
|
||||
@@ -18,7 +18,7 @@ struct llama_ubatch {
|
||||
}
|
||||
|
||||
// typical for M-RoPE cases:
|
||||
// 0 - sequantial position of the tokens/embeddings in the sequence
|
||||
// 0 - sequential position of the tokens/embeddings in the sequence
|
||||
// 1 - y position in the image
|
||||
// 2 - x position in the image
|
||||
// 3 - other
|
||||
|
||||
@@ -595,7 +595,7 @@ void llama_context::sched_reserve() {
|
||||
|
||||
// reserve again with pp graph to avoid ggml-alloc reallocations during inference
|
||||
{
|
||||
// TODO: not sure if the following graph would be worster case for multi-stream KV caches:
|
||||
// TODO: not sure if the following graph would be worst case for multi-stream KV caches:
|
||||
//
|
||||
// auto * gf = graph_reserve(n_tokens, 1, n_tokens, mctx.get());
|
||||
//
|
||||
|
||||
@@ -1665,7 +1665,7 @@ ggml_tensor * llm_graph_context::build_inp_attn_scale() const {
|
||||
|
||||
ggml_tensor * llm_graph_context::build_inp_out_ids() const {
|
||||
// note: when all tokens are output, we could skip this optimization to spare the ggml_get_rows() calls,
|
||||
// but this would make the graph topology depend on the number of output tokens, which can interere with
|
||||
// but this would make the graph topology depend on the number of output tokens, which can interfere with
|
||||
// features that require constant topology such as pipeline parallelism
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/14275#issuecomment-2987424471
|
||||
//if (n_outputs < n_tokens) {
|
||||
|
||||
@@ -333,7 +333,7 @@ public:
|
||||
ggml_tensor * get_v(ggml_context * ctx, int32_t il) const;
|
||||
|
||||
// store k_cur and v_cur in the cache based on the provided head location
|
||||
// note: the heads in k_cur and v_cur should be layed out contiguously in memory
|
||||
// note: the heads in k_cur and v_cur should be laid out contiguously in memory
|
||||
// - k_cur [n_embd_head_k, n_head_k, n_tokens]
|
||||
// - k_idxs [n_tokens]
|
||||
// - v_cur [n_embd_head_v, n_head_v, n_tokens]
|
||||
|
||||
@@ -9,7 +9,7 @@ llm_build_gemma_embedding::llm_build_gemma_embedding(const llama_model & model,
|
||||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ llm_build_gemma3<iswa>::llm_build_gemma3(const llama_model & model, const llm_gr
|
||||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ llm_build_gemma3n_iswa::llm_build_gemma3n_iswa(const llama_model & model, const
|
||||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
|
||||
@@ -91,12 +91,12 @@ int main(int argc, char ** argv) {
|
||||
common_params params;
|
||||
g_params = ¶ms;
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMPLETION, print_usage)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
|
||||
auto & sparams = params.sampling;
|
||||
|
||||
// save choice to use color for later
|
||||
@@ -147,19 +147,13 @@ int main(int argc, char ** argv) {
|
||||
|
||||
ctx = llama_init->context();
|
||||
model = llama_init->model();
|
||||
smpl = llama_init->sampler(0);
|
||||
|
||||
if (ctx == NULL) {
|
||||
LOG_ERR("%s: error: unable to create context\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (model == NULL) {
|
||||
LOG_ERR("%s: error: unable to load model\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
smpl = llama_init->sampler(0);
|
||||
|
||||
llama_memory_t mem = llama_get_memory(ctx);
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
|
||||
@@ -17,11 +17,12 @@ using namespace std::chrono_literals;
|
||||
int main(int argc, char ** argv) {
|
||||
common_params params;
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
auto mparams = common_model_params_to_llama(params);
|
||||
|
||||
@@ -54,11 +54,12 @@ int main(int argc, char ** argv) {
|
||||
|
||||
common_params params;
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
mtmd_helper_log_set(common_log_default_callback, nullptr);
|
||||
|
||||
if (params.mmproj.path.empty()) {
|
||||
|
||||
@@ -281,11 +281,12 @@ int main(int argc, char ** argv) {
|
||||
|
||||
common_params params;
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
mtmd_helper_log_set(common_log_default_callback, nullptr);
|
||||
|
||||
if (params.mmproj.path.empty()) {
|
||||
|
||||
@@ -259,6 +259,6 @@ npm run test
|
||||
npm run build
|
||||
```
|
||||
|
||||
After `public/index.html.gz` has been generated, rebuild `llama-server` as described in the [build](#build) section to include the updated UI.
|
||||
After `public/index.html` has been generated, rebuild `llama-server` as described in the [build](#build) section to include the updated UI.
|
||||
|
||||
**Note:** The Vite dev server automatically proxies API requests to `http://localhost:8080`. Make sure `llama-server` is running on that port during development.
|
||||
|
||||
1
tools/server/public/bundle.css
Normal file
1
tools/server/public/bundle.css
Normal file
File diff suppressed because one or more lines are too long
469
tools/server/public/bundle.js
Normal file
469
tools/server/public/bundle.js
Normal file
File diff suppressed because one or more lines are too long
34
tools/server/public/index.html
Normal file
34
tools/server/public/index.html
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
@@ -35,8 +35,8 @@ static server_http_res_ptr proxy_request(const server_http_req & req, std::strin
|
||||
std::map<std::string, std::string> headers;
|
||||
for (auto [key, value] : req.headers) {
|
||||
auto new_key = key;
|
||||
if (string_starts_with(new_key, "X-Proxy-Header-")) {
|
||||
string_replace_all(new_key, "X-Proxy-Header-", "");
|
||||
if (string_starts_with(new_key, "x-proxy-header-")) {
|
||||
string_replace_all(new_key, "x-proxy-header-", "");
|
||||
}
|
||||
headers[new_key] = value;
|
||||
}
|
||||
|
||||
@@ -10,7 +10,9 @@
|
||||
|
||||
#ifdef LLAMA_BUILD_WEBUI
|
||||
// auto generated files (see README.md for details)
|
||||
#include "index.html.gz.hpp"
|
||||
#include "index.html.hpp"
|
||||
#include "bundle.js.hpp"
|
||||
#include "bundle.css.hpp"
|
||||
#include "loading.html.hpp"
|
||||
#endif
|
||||
|
||||
@@ -272,16 +274,19 @@ bool server_http_context::init(const common_params & params) {
|
||||
} else {
|
||||
#ifdef LLAMA_BUILD_WEBUI
|
||||
// using embedded static index.html
|
||||
srv->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) {
|
||||
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
|
||||
res.set_content("Error: gzip is not supported by this browser", "text/plain");
|
||||
} else {
|
||||
res.set_header("Content-Encoding", "gzip");
|
||||
// COEP and COOP headers, required by pyodide (python interpreter)
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
|
||||
}
|
||||
srv->Get(params.api_prefix + "/", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
// COEP and COOP headers, required by pyodide (python interpreter)
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
res.set_content(reinterpret_cast<const char*>(index_html), index_html_len, "text/html; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
srv->Get(params.api_prefix + "/bundle.js", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(bundle_js), bundle_js_len, "application/javascript; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
srv->Get(params.api_prefix + "/bundle.css", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(bundle_css), bundle_css_len, "text/css; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
#endif
|
||||
|
||||
@@ -75,6 +75,8 @@ int main(int argc, char ** argv) {
|
||||
// own arguments required by this example
|
||||
common_params params;
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_SERVER)) {
|
||||
return 1;
|
||||
}
|
||||
@@ -100,8 +102,6 @@ int main(int argc, char ** argv) {
|
||||
params.model_alias.insert(params.model.name);
|
||||
}
|
||||
|
||||
common_init();
|
||||
|
||||
// struct that contains llama context and inference
|
||||
server_context ctx_server;
|
||||
|
||||
|
||||
@@ -188,14 +188,14 @@ The build process:
|
||||
1. **Vite Build** - Bundles all TypeScript, Svelte, and CSS
|
||||
2. **Static Adapter** - Outputs to `../public` (llama-server's static file directory)
|
||||
3. **Post-Build Script** - Cleans up intermediate files
|
||||
4. **Custom Plugin** - Creates `index.html.gz` with:
|
||||
4. **Custom Plugin** - Creates `index.html` with:
|
||||
- Inlined favicon as base64
|
||||
- GZIP compression (level 9)
|
||||
- Deterministic output (zeroed timestamps)
|
||||
|
||||
```text
|
||||
tools/server/webui/ → build → tools/server/public/
|
||||
├── src/ ├── index.html.gz (served by llama-server)
|
||||
├── src/ ├── index.html (served by llama-server)
|
||||
├── static/ └── (favicon inlined)
|
||||
└── ...
|
||||
```
|
||||
@@ -219,7 +219,7 @@ output: {
|
||||
|
||||
The WebUI is embedded directly into the llama-server binary:
|
||||
|
||||
1. `npm run build` outputs `index.html.gz` to `tools/server/public/`
|
||||
1. `npm run build` outputs `index.html` to `tools/server/public/`
|
||||
2. llama-server compiles this into the binary at build time
|
||||
3. When accessing `/`, llama-server serves the gzipped HTML
|
||||
4. All assets are inlined (CSS, JS, fonts, favicon)
|
||||
|
||||
@@ -50,7 +50,6 @@
|
||||
"eslint-config-prettier": "^10.0.1",
|
||||
"eslint-plugin-storybook": "^10.2.4",
|
||||
"eslint-plugin-svelte": "^3.0.0",
|
||||
"fflate": "^0.8.2",
|
||||
"globals": "^16.0.0",
|
||||
"http-server": "^14.1.1",
|
||||
"mdast": "^3.0.0",
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to install pre-commit and pre-push hooks for webui
|
||||
# Pre-commit: formats code and runs checks
|
||||
# Pre-push: builds the project, stashes unstaged changes
|
||||
# Script to install pre-commit hook for webui
|
||||
# Pre-commit: formats, checks, builds, and stages build output
|
||||
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
PRE_COMMIT_HOOK="$REPO_ROOT/.git/hooks/pre-commit"
|
||||
PRE_PUSH_HOOK="$REPO_ROOT/.git/hooks/pre-push"
|
||||
|
||||
echo "Installing pre-commit and pre-push hooks for webui..."
|
||||
echo "Installing pre-commit hook for webui..."
|
||||
|
||||
# Create the pre-commit hook
|
||||
cat > "$PRE_COMMIT_HOOK" << 'EOF'
|
||||
@@ -16,21 +14,19 @@ cat > "$PRE_COMMIT_HOOK" << 'EOF'
|
||||
|
||||
# Check if there are any changes in the webui directory
|
||||
if git diff --cached --name-only | grep -q "^tools/server/webui/"; then
|
||||
echo "Formatting and checking webui code..."
|
||||
|
||||
# Change to webui directory and run format
|
||||
cd tools/server/webui
|
||||
|
||||
# Check if npm is available and package.json exists
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
cd "$REPO_ROOT/tools/server/webui"
|
||||
|
||||
# Check if package.json exists
|
||||
if [ ! -f "package.json" ]; then
|
||||
echo "Error: package.json not found in tools/server/webui"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
echo "Formatting and checking webui code..."
|
||||
|
||||
# Run the format command
|
||||
npm run format
|
||||
|
||||
# Check if format command succeeded
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: npm run format failed"
|
||||
exit 1
|
||||
@@ -38,8 +34,6 @@ if git diff --cached --name-only | grep -q "^tools/server/webui/"; then
|
||||
|
||||
# Run the lint command
|
||||
npm run lint
|
||||
|
||||
# Check if lint command succeeded
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: npm run lint failed"
|
||||
exit 1
|
||||
@@ -47,156 +41,42 @@ if git diff --cached --name-only | grep -q "^tools/server/webui/"; then
|
||||
|
||||
# Run the check command
|
||||
npm run check
|
||||
|
||||
# Check if check command succeeded
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: npm run check failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Go back to repo root
|
||||
cd ../../..
|
||||
|
||||
echo "✅ Webui code formatted and checked successfully"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
EOF
|
||||
|
||||
# Create the pre-push hook
|
||||
cat > "$PRE_PUSH_HOOK" << 'EOF'
|
||||
#!/bin/bash
|
||||
|
||||
# Check if there are any webui changes that need building
|
||||
WEBUI_CHANGES=$(git diff --name-only @{push}..HEAD | grep "^tools/server/webui/" || true)
|
||||
|
||||
if [ -n "$WEBUI_CHANGES" ]; then
|
||||
echo "Webui changes detected, checking if build is up-to-date..."
|
||||
|
||||
# Change to webui directory
|
||||
cd tools/server/webui
|
||||
|
||||
# Check if npm is available and package.json exists
|
||||
if [ ! -f "package.json" ]; then
|
||||
echo "Error: package.json not found in tools/server/webui"
|
||||
# Build the webui
|
||||
echo "Building webui..."
|
||||
npm run build
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "❌ npm run build failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if build output exists and is newer than source files
|
||||
BUILD_FILE="../public/index.html.gz"
|
||||
NEEDS_BUILD=false
|
||||
|
||||
if [ ! -f "$BUILD_FILE" ]; then
|
||||
echo "Build output not found, building..."
|
||||
NEEDS_BUILD=true
|
||||
else
|
||||
# Check if any source files are newer than the build output
|
||||
if find src -newer "$BUILD_FILE" -type f | head -1 | grep -q .; then
|
||||
echo "Source files are newer than build output, rebuilding..."
|
||||
NEEDS_BUILD=true
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$NEEDS_BUILD" = true ]; then
|
||||
echo "Building webui..."
|
||||
|
||||
# Stash any unstaged changes to avoid conflicts during build
|
||||
echo "Checking for unstaged changes..."
|
||||
if ! git diff --quiet || ! git diff --cached --quiet --diff-filter=A; then
|
||||
echo "Stashing unstaged changes..."
|
||||
git stash push --include-untracked -m "Pre-push hook: stashed unstaged changes"
|
||||
STASH_CREATED=$?
|
||||
else
|
||||
echo "No unstaged changes to stash"
|
||||
STASH_CREATED=1
|
||||
fi
|
||||
|
||||
# Run the build command
|
||||
npm run build
|
||||
|
||||
# Check if build command succeeded
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: npm run build failed"
|
||||
if [ $STASH_CREATED -eq 0 ]; then
|
||||
echo "You can restore your unstaged changes with: git stash pop"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Go back to repo root
|
||||
cd ../../..
|
||||
|
||||
# Check if build output was created/updated
|
||||
if [ -f "tools/server/public/index.html.gz" ]; then
|
||||
# Add the build output and commit it
|
||||
git add tools/server/public/index.html.gz
|
||||
if ! git diff --cached --quiet; then
|
||||
echo "Committing updated build output..."
|
||||
git commit -m "chore: update webui build output"
|
||||
echo "✅ Build output committed successfully"
|
||||
else
|
||||
echo "Build output unchanged"
|
||||
fi
|
||||
else
|
||||
echo "Error: Build output not found after build"
|
||||
if [ $STASH_CREATED -eq 0 ]; then
|
||||
echo "You can restore your unstaged changes with: git stash pop"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $STASH_CREATED -eq 0 ]; then
|
||||
echo "✅ Build completed. Your unstaged changes have been stashed."
|
||||
echo "They will be automatically restored after the push."
|
||||
# Create a marker file to indicate stash was created by pre-push hook
|
||||
touch .git/WEBUI_PUSH_STASH_MARKER
|
||||
fi
|
||||
else
|
||||
echo "✅ Build output is up-to-date"
|
||||
fi
|
||||
|
||||
echo "✅ Webui ready for push"
|
||||
# Stage the build output alongside the source changes
|
||||
cd "$REPO_ROOT"
|
||||
git add tools/server/public/
|
||||
|
||||
echo "✅ Webui built and build output staged"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
EOF
|
||||
|
||||
# Create the post-push hook (for restoring stashed changes after push)
|
||||
cat > "$REPO_ROOT/.git/hooks/post-push" << 'EOF'
|
||||
#!/bin/bash
|
||||
|
||||
# Check if we have a stash marker from the pre-push hook
|
||||
if [ -f .git/WEBUI_PUSH_STASH_MARKER ]; then
|
||||
echo "Restoring your unstaged changes after push..."
|
||||
git stash pop
|
||||
rm -f .git/WEBUI_PUSH_STASH_MARKER
|
||||
echo "✅ Your unstaged changes have been restored."
|
||||
fi
|
||||
|
||||
exit 0
|
||||
EOF
|
||||
|
||||
# Make all hooks executable
|
||||
# Make hook executable
|
||||
chmod +x "$PRE_COMMIT_HOOK"
|
||||
chmod +x "$PRE_PUSH_HOOK"
|
||||
chmod +x "$REPO_ROOT/.git/hooks/post-push"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✅ Git hooks installed successfully!"
|
||||
echo "✅ Git hook installed successfully!"
|
||||
echo " Pre-commit: $PRE_COMMIT_HOOK"
|
||||
echo " Pre-push: $PRE_PUSH_HOOK"
|
||||
echo " Post-push: $REPO_ROOT/.git/hooks/post-push"
|
||||
echo ""
|
||||
echo "The hooks will automatically:"
|
||||
echo " • Format and check webui code before commits (pre-commit)"
|
||||
echo " • Build webui code before pushes (pre-push)"
|
||||
echo " • Stash unstaged changes during build process"
|
||||
echo " • Restore your unstaged changes after the push"
|
||||
echo ""
|
||||
echo "To test the hooks:"
|
||||
echo " • Make a change to a file in the webui directory and commit it (triggers format/check)"
|
||||
echo " • Push your commits to trigger the build process"
|
||||
echo "The hook will automatically:"
|
||||
echo " • Format, lint and check webui code before commits"
|
||||
echo " • Build webui and stage tools/server/public/ into the same commit"
|
||||
else
|
||||
echo "❌ Failed to make hooks executable"
|
||||
echo "❌ Failed to make hook executable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
rm -rf ../public/_app;
|
||||
rm ../public/favicon.svg;
|
||||
rm ../public/index.html;
|
||||
rm -f ../public/index.html.gz; # deprecated, but may still be generated by older versions of the build process
|
||||
|
||||
@@ -40,6 +40,17 @@
|
||||
--code-background: oklch(0.985 0 0);
|
||||
--code-foreground: oklch(0.145 0 0);
|
||||
--layer-popover: 1000000;
|
||||
|
||||
--chat-form-area-height: 8rem;
|
||||
--chat-form-area-offset: 2rem;
|
||||
--max-message-height: max(24rem, min(80dvh, calc(100dvh - var(--chat-form-area-height) - 12rem)));
|
||||
}
|
||||
|
||||
@media (min-width: 640px) {
|
||||
:root {
|
||||
--chat-form-area-height: 24rem;
|
||||
--chat-form-area-offset: 12rem;
|
||||
}
|
||||
}
|
||||
|
||||
.dark {
|
||||
@@ -116,19 +127,6 @@
|
||||
--color-sidebar-ring: var(--sidebar-ring);
|
||||
}
|
||||
|
||||
:root {
|
||||
--chat-form-area-height: 8rem;
|
||||
--chat-form-area-offset: 2rem;
|
||||
--max-message-height: max(24rem, min(80dvh, calc(100dvh - var(--chat-form-area-height) - 12rem)));
|
||||
}
|
||||
|
||||
@media (min-width: 640px) {
|
||||
:root {
|
||||
--chat-form-area-height: 24rem;
|
||||
--chat-form-area-offset: 12rem;
|
||||
}
|
||||
}
|
||||
|
||||
@layer base {
|
||||
* {
|
||||
@apply border-border outline-ring/50;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import { getChatActionsContext, setMessageEditContext } from '$lib/contexts';
|
||||
import { chatStore, pendingEditMessageId } from '$lib/stores/chat.svelte';
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { DatabaseService } from '$lib/services';
|
||||
import { DatabaseService } from '$lib/services/database.service';
|
||||
import { SYSTEM_MESSAGE_PLACEHOLDER } from '$lib/constants';
|
||||
import { MessageRole, AttachmentType } from '$lib/enums';
|
||||
import {
|
||||
@@ -19,6 +19,7 @@
|
||||
interface Props {
|
||||
class?: string;
|
||||
message: DatabaseMessage;
|
||||
toolMessages?: DatabaseMessage[];
|
||||
isLastAssistantMessage?: boolean;
|
||||
siblingInfo?: ChatMessageSiblingInfo | null;
|
||||
}
|
||||
@@ -26,6 +27,7 @@
|
||||
let {
|
||||
class: className = '',
|
||||
message,
|
||||
toolMessages = [],
|
||||
isLastAssistantMessage = false,
|
||||
siblingInfo = null
|
||||
}: Props = $props();
|
||||
@@ -302,6 +304,7 @@
|
||||
{deletionInfo}
|
||||
{isLastAssistantMessage}
|
||||
{message}
|
||||
{toolMessages}
|
||||
messageContent={message.content}
|
||||
onConfirmDelete={handleConfirmDelete}
|
||||
onContinue={handleContinue}
|
||||
|
||||
@@ -6,42 +6,42 @@
|
||||
SyntaxHighlightedCode
|
||||
} from '$lib/components/app';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { Wrench, Loader2, AlertTriangle, Brain } from '@lucide/svelte';
|
||||
import { AgenticSectionType, AttachmentType, FileTypeText } from '$lib/enums';
|
||||
import { Wrench, Loader2, Brain } from '@lucide/svelte';
|
||||
import { AgenticSectionType, FileTypeText } from '$lib/enums';
|
||||
import { formatJsonPretty } from '$lib/utils';
|
||||
import { ATTACHMENT_SAVED_REGEX, NEWLINE_SEPARATOR } from '$lib/constants';
|
||||
import { parseAgenticContent, type AgenticSection } from '$lib/utils';
|
||||
import type { DatabaseMessage, DatabaseMessageExtraImageFile } from '$lib/types/database';
|
||||
import {
|
||||
deriveAgenticSections,
|
||||
parseToolResultWithImages,
|
||||
type AgenticSection,
|
||||
type ToolResultLine
|
||||
} from '$lib/utils';
|
||||
import type { DatabaseMessage } from '$lib/types/database';
|
||||
import type { ChatMessageAgenticTimings, ChatMessageAgenticTurnStats } from '$lib/types/chat';
|
||||
import { ChatMessageStatsView } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
message?: DatabaseMessage;
|
||||
content: string;
|
||||
message: DatabaseMessage;
|
||||
toolMessages?: DatabaseMessage[];
|
||||
isStreaming?: boolean;
|
||||
highlightTurns?: boolean;
|
||||
}
|
||||
|
||||
type ToolResultLine = {
|
||||
text: string;
|
||||
image?: DatabaseMessageExtraImageFile;
|
||||
};
|
||||
|
||||
let { content, message, isStreaming = false, highlightTurns = false }: Props = $props();
|
||||
let { message, toolMessages = [], isStreaming = false, highlightTurns = false }: Props = $props();
|
||||
|
||||
let expandedStates: Record<number, boolean> = $state({});
|
||||
|
||||
const sections = $derived(parseAgenticContent(content));
|
||||
const showToolCallInProgress = $derived(config().showToolCallInProgress as boolean);
|
||||
const showThoughtInProgress = $derived(config().showThoughtInProgress as boolean);
|
||||
|
||||
// Parse toolResults with images only when sections or message.extra change
|
||||
const sections = $derived(deriveAgenticSections(message, toolMessages, []));
|
||||
|
||||
// Parse tool results with images
|
||||
const sectionsParsed = $derived(
|
||||
sections.map((section) => ({
|
||||
...section,
|
||||
parsedLines: section.toolResult
|
||||
? parseToolResultWithImages(section.toolResult, message?.extra)
|
||||
: []
|
||||
? parseToolResultWithImages(section.toolResult, section.toolResultExtras || message?.extra)
|
||||
: ([] as ToolResultLine[])
|
||||
}))
|
||||
);
|
||||
|
||||
@@ -107,26 +107,6 @@
|
||||
expandedStates[index] = !currentState;
|
||||
}
|
||||
|
||||
function parseToolResultWithImages(
|
||||
toolResult: string,
|
||||
extras?: DatabaseMessage['extra']
|
||||
): ToolResultLine[] {
|
||||
const lines = toolResult.split(NEWLINE_SEPARATOR);
|
||||
|
||||
return lines.map((line) => {
|
||||
const match = line.match(ATTACHMENT_SAVED_REGEX);
|
||||
if (!match || !extras) return { text: line };
|
||||
|
||||
const attachmentName = match[1];
|
||||
const image = extras.find(
|
||||
(e): e is DatabaseMessageExtraImageFile =>
|
||||
e.type === AttachmentType.IMAGE && e.name === attachmentName
|
||||
);
|
||||
|
||||
return { text: line, image };
|
||||
});
|
||||
}
|
||||
|
||||
function buildTurnAgenticTimings(stats: ChatMessageAgenticTurnStats): ChatMessageAgenticTimings {
|
||||
return {
|
||||
turns: 1,
|
||||
@@ -144,9 +124,8 @@
|
||||
<MarkdownContent content={section.content} attachments={message?.extra} />
|
||||
</div>
|
||||
{:else if section.type === AgenticSectionType.TOOL_CALL_STREAMING}
|
||||
{@const streamingIcon = isStreaming ? Loader2 : AlertTriangle}
|
||||
{@const streamingIconClass = isStreaming ? 'h-4 w-4 animate-spin' : 'h-4 w-4 text-yellow-500'}
|
||||
{@const streamingSubtitle = isStreaming ? '' : 'incomplete'}
|
||||
{@const streamingIcon = isStreaming ? Loader2 : Loader2}
|
||||
{@const streamingIconClass = isStreaming ? 'h-4 w-4 animate-spin' : 'h-4 w-4'}
|
||||
|
||||
<CollapsibleContentBlock
|
||||
open={isExpanded(index, section)}
|
||||
@@ -154,7 +133,7 @@
|
||||
icon={streamingIcon}
|
||||
iconClass={streamingIconClass}
|
||||
title={section.toolName || 'Tool call'}
|
||||
subtitle={streamingSubtitle}
|
||||
subtitle={isStreaming ? '' : 'incomplete'}
|
||||
{isStreaming}
|
||||
onToggle={() => toggleExpanded(index, section)}
|
||||
>
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
import { Check, X } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { Checkbox } from '$lib/components/ui/checkbox';
|
||||
import { AGENTIC_TAGS, INPUT_CLASSES, REASONING_TAGS } from '$lib/constants';
|
||||
import { INPUT_CLASSES } from '$lib/constants';
|
||||
import { MessageRole, KeyboardKey, ChatMessageStatsView } from '$lib/enums';
|
||||
import Label from '$lib/components/ui/label/label.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
@@ -23,6 +23,8 @@
|
||||
import { modelsStore } from '$lib/stores/models.svelte';
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
|
||||
import { hasAgenticContent } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
deletionInfo: {
|
||||
@@ -33,6 +35,7 @@
|
||||
} | null;
|
||||
isLastAssistantMessage?: boolean;
|
||||
message: DatabaseMessage;
|
||||
toolMessages?: DatabaseMessage[];
|
||||
messageContent: string | undefined;
|
||||
onCopy: () => void;
|
||||
onConfirmDelete: () => void;
|
||||
@@ -53,6 +56,7 @@
|
||||
deletionInfo,
|
||||
isLastAssistantMessage = false,
|
||||
message,
|
||||
toolMessages = [],
|
||||
messageContent,
|
||||
onConfirmDelete,
|
||||
onContinue,
|
||||
@@ -84,10 +88,8 @@
|
||||
}
|
||||
}
|
||||
|
||||
const hasAgenticMarkers = $derived(
|
||||
messageContent?.includes(AGENTIC_TAGS.TOOL_CALL_START) ?? false
|
||||
);
|
||||
const hasReasoningMarkers = $derived(messageContent?.includes(REASONING_TAGS.START) ?? false);
|
||||
const isAgentic = $derived(hasAgenticContent(message, toolMessages));
|
||||
const hasReasoning = $derived(!!message.reasoningContent);
|
||||
const processingState = useProcessingState();
|
||||
|
||||
let currentConfig = $derived(config());
|
||||
@@ -145,7 +147,7 @@
|
||||
}
|
||||
|
||||
let highlightAgenticTurns = $derived(
|
||||
hasAgenticMarkers &&
|
||||
isAgentic &&
|
||||
(currentConfig.alwaysShowAgenticTurns || activeStatsView === ChatMessageStatsView.SUMMARY)
|
||||
);
|
||||
|
||||
@@ -160,13 +162,14 @@
|
||||
message?.role === MessageRole.ASSISTANT &&
|
||||
isActivelyProcessing &&
|
||||
hasNoContent &&
|
||||
!isAgentic &&
|
||||
isLastAssistantMessage
|
||||
);
|
||||
|
||||
let showProcessingInfoBottom = $derived(
|
||||
message?.role === MessageRole.ASSISTANT &&
|
||||
isActivelyProcessing &&
|
||||
!hasNoContent &&
|
||||
(!hasNoContent || isAgentic) &&
|
||||
isLastAssistantMessage
|
||||
);
|
||||
|
||||
@@ -252,10 +255,10 @@
|
||||
<pre class="raw-output">{messageContent || ''}</pre>
|
||||
{:else}
|
||||
<ChatMessageAgenticContent
|
||||
content={messageContent || ''}
|
||||
{message}
|
||||
{toolMessages}
|
||||
isStreaming={isChatStreaming()}
|
||||
highlightTurns={highlightAgenticTurns}
|
||||
{message}
|
||||
/>
|
||||
{/if}
|
||||
{:else}
|
||||
@@ -344,9 +347,7 @@
|
||||
{onCopy}
|
||||
{onEdit}
|
||||
{onRegenerate}
|
||||
onContinue={currentConfig.enableContinueGeneration && !hasReasoningMarkers
|
||||
? onContinue
|
||||
: undefined}
|
||||
onContinue={currentConfig.enableContinueGeneration && !hasReasoning ? onContinue : undefined}
|
||||
{onForkConversation}
|
||||
{onDelete}
|
||||
{onConfirmDelete}
|
||||
|
||||
@@ -6,7 +6,12 @@
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { conversationsStore, activeConversation } from '$lib/stores/conversations.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { copyToClipboard, formatMessageForClipboard, getMessageSiblings } from '$lib/utils';
|
||||
import {
|
||||
copyToClipboard,
|
||||
formatMessageForClipboard,
|
||||
getMessageSiblings,
|
||||
hasAgenticContent
|
||||
} from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
@@ -119,32 +124,75 @@
|
||||
? messages
|
||||
: messages.filter((msg) => msg.type !== MessageRole.SYSTEM);
|
||||
|
||||
let lastAssistantIndex = -1;
|
||||
// Build display entries, grouping agentic sessions into single entries.
|
||||
// An agentic session = assistant(with tool_calls) → tool → assistant → tool → ... → assistant(final)
|
||||
const result: Array<{
|
||||
message: DatabaseMessage;
|
||||
toolMessages: DatabaseMessage[];
|
||||
isLastAssistantMessage: boolean;
|
||||
siblingInfo: ChatMessageSiblingInfo;
|
||||
}> = [];
|
||||
|
||||
for (let i = filteredMessages.length - 1; i >= 0; i--) {
|
||||
if (filteredMessages[i].role === MessageRole.ASSISTANT) {
|
||||
lastAssistantIndex = i;
|
||||
for (let i = 0; i < filteredMessages.length; i++) {
|
||||
const msg = filteredMessages[i];
|
||||
|
||||
// Skip tool messages - they're grouped with preceding assistant
|
||||
if (msg.role === MessageRole.TOOL) continue;
|
||||
|
||||
const toolMessages: DatabaseMessage[] = [];
|
||||
if (msg.role === MessageRole.ASSISTANT && hasAgenticContent(msg)) {
|
||||
let j = i + 1;
|
||||
|
||||
while (j < filteredMessages.length) {
|
||||
const next = filteredMessages[j];
|
||||
|
||||
if (next.role === MessageRole.TOOL) {
|
||||
toolMessages.push(next);
|
||||
|
||||
j++;
|
||||
} else if (next.role === MessageRole.ASSISTANT) {
|
||||
toolMessages.push(next);
|
||||
|
||||
j++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
i = j - 1;
|
||||
} else if (msg.role === MessageRole.ASSISTANT) {
|
||||
let j = i + 1;
|
||||
|
||||
while (j < filteredMessages.length && filteredMessages[j].role === MessageRole.TOOL) {
|
||||
toolMessages.push(filteredMessages[j]);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
const siblingInfo = getMessageSiblings(allConversationMessages, msg.id);
|
||||
|
||||
result.push({
|
||||
message: msg,
|
||||
toolMessages,
|
||||
isLastAssistantMessage: false,
|
||||
siblingInfo: siblingInfo || {
|
||||
message: msg,
|
||||
siblingIds: [msg.id],
|
||||
currentIndex: 0,
|
||||
totalSiblings: 1
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Mark the last assistant message
|
||||
for (let i = result.length - 1; i >= 0; i--) {
|
||||
if (result[i].message.role === MessageRole.ASSISTANT) {
|
||||
result[i].isLastAssistantMessage = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return filteredMessages.map((message, index) => {
|
||||
const siblingInfo = getMessageSiblings(allConversationMessages, message.id);
|
||||
const isLastAssistantMessage =
|
||||
message.role === MessageRole.ASSISTANT && index === lastAssistantIndex;
|
||||
|
||||
return {
|
||||
message,
|
||||
isLastAssistantMessage,
|
||||
siblingInfo: siblingInfo || {
|
||||
message,
|
||||
siblingIds: [message.id],
|
||||
currentIndex: 0,
|
||||
totalSiblings: 1
|
||||
}
|
||||
};
|
||||
});
|
||||
return result;
|
||||
});
|
||||
</script>
|
||||
|
||||
@@ -152,11 +200,12 @@
|
||||
class="flex h-full flex-col space-y-10 pt-24 {className}"
|
||||
style="height: auto; min-height: calc(100dvh - 14rem);"
|
||||
>
|
||||
{#each displayMessages as { message, isLastAssistantMessage, siblingInfo } (message.id)}
|
||||
{#each displayMessages as { message, toolMessages, isLastAssistantMessage, siblingInfo } (message.id)}
|
||||
<div use:fadeInView>
|
||||
<ChatMessage
|
||||
class="mx-auto w-full max-w-[48rem]"
|
||||
{message}
|
||||
{toolMessages}
|
||||
{isLastAssistantMessage}
|
||||
{siblingInfo}
|
||||
/>
|
||||
|
||||
@@ -425,21 +425,16 @@ export { default as ChatMessage } from './ChatMessages/ChatMessage.svelte';
|
||||
/**
|
||||
* **ChatMessageAgenticContent** - Agentic workflow output display
|
||||
*
|
||||
* Specialized renderer for assistant messages containing agentic workflow markers.
|
||||
* Parses structured content and displays tool calls and reasoning blocks as
|
||||
* interactive collapsible sections with real-time streaming support.
|
||||
* Specialized renderer for assistant messages with tool calls and reasoning.
|
||||
* Derives display sections from structured message data (toolCalls, reasoningContent,
|
||||
* and child tool result messages) and renders them as interactive collapsible sections.
|
||||
*
|
||||
* **Architecture:**
|
||||
* - Uses `parseAgenticContent()` from `$lib/utils` to parse markers
|
||||
* - Uses `deriveAgenticSections()` from `$lib/utils` to build sections from structured data
|
||||
* - Renders sections as CollapsibleContentBlock components
|
||||
* - Handles streaming state for progressive content display
|
||||
* - Falls back to MarkdownContent for plain text sections
|
||||
*
|
||||
* **Marker Format:**
|
||||
* - Tool calls: in constants/agentic.ts (AGENTIC_TAGS)
|
||||
* - Reasoning: in constants/agentic.ts (REASONING_TAGS)
|
||||
* - Partial markers handled gracefully during streaming
|
||||
*
|
||||
* **Execution States:**
|
||||
* - **Streaming**: Animated spinner, block expanded, auto-scroll enabled
|
||||
* - **Pending**: Waiting indicator for queued tool calls
|
||||
|
||||
@@ -15,8 +15,11 @@ export const DEFAULT_AGENTIC_CONFIG: AgenticConfig = {
|
||||
maxToolPreviewLines: 25
|
||||
} as const;
|
||||
|
||||
// Agentic tool call tag markers
|
||||
export const AGENTIC_TAGS = {
|
||||
/**
|
||||
* @deprecated Legacy marker tags - only used for migration of old stored messages.
|
||||
* New messages use structured fields (reasoningContent, toolCalls, toolCallId).
|
||||
*/
|
||||
export const LEGACY_AGENTIC_TAGS = {
|
||||
TOOL_CALL_START: '<<<AGENTIC_TOOL_CALL_START>>>',
|
||||
TOOL_CALL_END: '<<<AGENTIC_TOOL_CALL_END>>>',
|
||||
TOOL_NAME_PREFIX: '<<<TOOL_NAME:',
|
||||
@@ -25,39 +28,25 @@ export const AGENTIC_TAGS = {
|
||||
TAG_SUFFIX: '>>>'
|
||||
} as const;
|
||||
|
||||
export const REASONING_TAGS = {
|
||||
/**
|
||||
* @deprecated Legacy reasoning tags - only used for migration of old stored messages.
|
||||
* New messages use the dedicated reasoningContent field.
|
||||
*/
|
||||
export const LEGACY_REASONING_TAGS = {
|
||||
START: '<<<reasoning_content_start>>>',
|
||||
END: '<<<reasoning_content_end>>>'
|
||||
} as const;
|
||||
|
||||
// Regex for trimming leading/trailing newlines
|
||||
export const TRIM_NEWLINES_REGEX = /^\n+|\n+$/g;
|
||||
|
||||
// Regex patterns for parsing agentic content
|
||||
export const AGENTIC_REGEX = {
|
||||
// Matches completed tool calls (with END marker)
|
||||
/**
|
||||
* @deprecated Legacy regex patterns - only used for migration of old stored messages.
|
||||
*/
|
||||
export const LEGACY_AGENTIC_REGEX = {
|
||||
COMPLETED_TOOL_CALL:
|
||||
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*?)<<<AGENTIC_TOOL_CALL_END>>>/g,
|
||||
// Matches pending tool call (has NAME and ARGS but no END)
|
||||
PENDING_TOOL_CALL:
|
||||
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*)$/,
|
||||
// Matches partial tool call (has START and NAME, ARGS still streaming)
|
||||
PARTIAL_WITH_NAME:
|
||||
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*)$/,
|
||||
// Matches early tool call (just START marker)
|
||||
EARLY_MATCH: /<<<AGENTIC_TOOL_CALL_START>>>([\s\S]*)$/,
|
||||
// Matches partial marker at end of content
|
||||
PARTIAL_MARKER: /<<<[A-Za-z_]*$/,
|
||||
// Matches reasoning content blocks (including tags)
|
||||
REASONING_BLOCK: /<<<reasoning_content_start>>>[\s\S]*?<<<reasoning_content_end>>>/g,
|
||||
// Captures the reasoning text between start/end tags
|
||||
REASONING_EXTRACT: /<<<reasoning_content_start>>>([\s\S]*?)<<<reasoning_content_end>>>/,
|
||||
// Matches an opening reasoning tag and any remaining content (unterminated)
|
||||
REASONING_OPEN: /<<<reasoning_content_start>>>[\s\S]*$/,
|
||||
// Matches a complete agentic tool call display block (start to end marker)
|
||||
AGENTIC_TOOL_CALL_BLOCK: /\n*<<<AGENTIC_TOOL_CALL_START>>>[\s\S]*?<<<AGENTIC_TOOL_CALL_END>>>/g,
|
||||
// Matches a pending/partial agentic tool call (start marker with no matching end)
|
||||
AGENTIC_TOOL_CALL_OPEN: /\n*<<<AGENTIC_TOOL_CALL_START>>>[\s\S]*$/,
|
||||
// Matches tool name inside content
|
||||
TOOL_NAME_EXTRACT: /<<<TOOL_NAME:([^>]+)>>>/
|
||||
HAS_LEGACY_MARKERS: /<<<(?:AGENTIC_TOOL_CALL_START|reasoning_content_start)>>>/
|
||||
} as const;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { getJsonHeaders, formatAttachmentText, isAbortError } from '$lib/utils';
|
||||
import { getJsonHeaders } from '$lib/utils/api-headers';
|
||||
import { formatAttachmentText } from '$lib/utils/formatters';
|
||||
import { isAbortError } from '$lib/utils/abort';
|
||||
import {
|
||||
AGENTIC_REGEX,
|
||||
ATTACHMENT_LABEL_PDF_FILE,
|
||||
ATTACHMENT_LABEL_MCP_PROMPT,
|
||||
ATTACHMENT_LABEL_MCP_RESOURCE
|
||||
@@ -17,38 +18,6 @@ import type { DatabaseMessageExtraMcpPrompt, DatabaseMessageExtraMcpResource } f
|
||||
import { modelsStore } from '$lib/stores/models.svelte';
|
||||
|
||||
export class ChatService {
|
||||
private static stripReasoningContent(
|
||||
content: ApiChatMessageData['content'] | null | undefined
|
||||
): ApiChatMessageData['content'] | null | undefined {
|
||||
if (!content) {
|
||||
return content;
|
||||
}
|
||||
|
||||
if (typeof content === 'string') {
|
||||
return content
|
||||
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
return content.map((part: ApiChatMessageContentPart) => {
|
||||
if (part.type !== ContentPartType.TEXT || !part.text) return part;
|
||||
return {
|
||||
...part,
|
||||
text: part.text
|
||||
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
@@ -57,46 +26,6 @@ export class ChatService {
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Extracts reasoning text from content that contains internal reasoning tags.
|
||||
* Returns the concatenated reasoning content or undefined if none found.
|
||||
*/
|
||||
private static extractReasoningFromContent(
|
||||
content: ApiChatMessageData['content'] | null | undefined
|
||||
): string | undefined {
|
||||
if (!content) return undefined;
|
||||
|
||||
const extractFromString = (text: string): string => {
|
||||
const parts: string[] = [];
|
||||
// Use a fresh regex instance to avoid shared lastIndex state
|
||||
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
|
||||
let match = re.exec(text);
|
||||
while (match) {
|
||||
parts.push(match[1]);
|
||||
// advance past the matched portion and retry
|
||||
text = text.slice(match.index + match[0].length);
|
||||
match = re.exec(text);
|
||||
}
|
||||
return parts.join('');
|
||||
};
|
||||
|
||||
if (typeof content === 'string') {
|
||||
const result = extractFromString(content);
|
||||
return result || undefined;
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) return undefined;
|
||||
|
||||
const parts: string[] = [];
|
||||
for (const part of content) {
|
||||
if (part.type === ContentPartType.TEXT && part.text) {
|
||||
const result = extractFromString(part.text);
|
||||
if (result) parts.push(result);
|
||||
}
|
||||
}
|
||||
return parts.length > 0 ? parts.join('') : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a chat completion request to the llama.cpp server.
|
||||
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
|
||||
@@ -201,20 +130,15 @@ export class ChatService {
|
||||
|
||||
const requestBody: ApiChatCompletionRequest = {
|
||||
messages: normalizedMessages.map((msg: ApiChatMessageData) => {
|
||||
// Always strip internal reasoning/agentic tags from content
|
||||
const cleanedContent = ChatService.stripReasoningContent(msg.content);
|
||||
const mapped: ApiChatCompletionRequest['messages'][0] = {
|
||||
role: msg.role,
|
||||
content: cleanedContent,
|
||||
content: msg.content,
|
||||
tool_calls: msg.tool_calls,
|
||||
tool_call_id: msg.tool_call_id
|
||||
};
|
||||
// When preserving reasoning, extract it from raw content and send as separate field
|
||||
if (!excludeReasoningFromContext) {
|
||||
const reasoning = ChatService.extractReasoningFromContent(msg.content);
|
||||
if (reasoning) {
|
||||
mapped.reasoning_content = reasoning;
|
||||
}
|
||||
// Include reasoning_content from the dedicated field
|
||||
if (!excludeReasoningFromContext && msg.reasoning_content) {
|
||||
mapped.reasoning_content = msg.reasoning_content;
|
||||
}
|
||||
return mapped;
|
||||
}),
|
||||
@@ -730,6 +654,10 @@ export class ChatService {
|
||||
content: message.content
|
||||
};
|
||||
|
||||
if (message.reasoningContent) {
|
||||
result.reasoning_content = message.reasoningContent;
|
||||
}
|
||||
|
||||
if (toolCalls && toolCalls.length > 0) {
|
||||
result.tool_calls = toolCalls;
|
||||
}
|
||||
@@ -858,6 +786,9 @@ export class ChatService {
|
||||
role: message.role as MessageRole,
|
||||
content: contentParts
|
||||
};
|
||||
if (message.reasoningContent) {
|
||||
result.reasoning_content = message.reasoningContent;
|
||||
}
|
||||
if (toolCalls && toolCalls.length > 0) {
|
||||
result.tool_calls = toolCalls;
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ import type {
|
||||
import {
|
||||
buildProxiedUrl,
|
||||
buildProxiedHeaders,
|
||||
getAuthHeaders,
|
||||
throwIfAborted,
|
||||
isAbortError,
|
||||
createBase64DataUrl
|
||||
@@ -124,7 +125,14 @@ export class MCPService {
|
||||
const requestInit: RequestInit = {};
|
||||
|
||||
if (config.headers) {
|
||||
requestInit.headers = buildProxiedHeaders(config.headers);
|
||||
requestInit.headers = config.useProxy ? buildProxiedHeaders(config.headers) : config.headers;
|
||||
}
|
||||
|
||||
if (useProxy) {
|
||||
requestInit.headers = {
|
||||
...getAuthHeaders(),
|
||||
...(requestInit.headers as Record<string, string>)
|
||||
};
|
||||
}
|
||||
|
||||
if (config.credentials) {
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
* - Session state management
|
||||
* - Turn limit enforcement
|
||||
*
|
||||
* Each agentic turn produces separate DB messages:
|
||||
* - One assistant message per LLM turn (with tool_calls if any)
|
||||
* - One tool result message per tool call execution
|
||||
*
|
||||
* **Architecture & Relationships:**
|
||||
* - **ChatService**: Stateless API layer (sendMessage, streaming)
|
||||
* - **mcpStore**: MCP connection management and tool execution
|
||||
@@ -16,7 +20,6 @@
|
||||
* @see mcpStore in stores/mcp.svelte.ts for MCP operations
|
||||
*/
|
||||
|
||||
import { SvelteMap } from 'svelte/reactivity';
|
||||
import { ChatService } from '$lib/services';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { mcpStore } from '$lib/stores/mcp.svelte';
|
||||
@@ -24,7 +27,6 @@ import { modelsStore } from '$lib/stores/models.svelte';
|
||||
import { isAbortError } from '$lib/utils';
|
||||
import {
|
||||
DEFAULT_AGENTIC_CONFIG,
|
||||
AGENTIC_TAGS,
|
||||
NEWLINE_SEPARATOR,
|
||||
TURN_LIMIT_MESSAGE,
|
||||
LLM_ERROR_BLOCK_START,
|
||||
@@ -193,17 +195,6 @@ class AgenticStore {
|
||||
|
||||
async runAgenticFlow(params: AgenticFlowParams): Promise<AgenticFlowResult> {
|
||||
const { conversationId, messages, options = {}, callbacks, signal, perChatOverrides } = params;
|
||||
const {
|
||||
onChunk,
|
||||
onReasoningChunk,
|
||||
onToolCallChunk,
|
||||
onAttachments,
|
||||
onModel,
|
||||
onComplete,
|
||||
onError,
|
||||
onTimings,
|
||||
onTurnComplete
|
||||
} = callbacks;
|
||||
|
||||
const agenticConfig = this.getConfig(config(), perChatOverrides);
|
||||
if (!agenticConfig.enabled) return { handled: false };
|
||||
@@ -253,24 +244,14 @@ class AgenticStore {
|
||||
options,
|
||||
tools,
|
||||
agenticConfig,
|
||||
callbacks: {
|
||||
onChunk,
|
||||
onReasoningChunk,
|
||||
onToolCallChunk,
|
||||
onAttachments,
|
||||
onModel,
|
||||
onComplete,
|
||||
onError,
|
||||
onTimings,
|
||||
onTurnComplete
|
||||
},
|
||||
callbacks,
|
||||
signal
|
||||
});
|
||||
return { handled: true };
|
||||
} catch (error) {
|
||||
const normalizedError = error instanceof Error ? error : new Error(String(error));
|
||||
this.updateSession(conversationId, { lastError: normalizedError });
|
||||
onError?.(normalizedError);
|
||||
callbacks.onError?.(normalizedError);
|
||||
return { handled: true, error: normalizedError };
|
||||
} finally {
|
||||
this.updateSession(conversationId, { isRunning: false });
|
||||
@@ -295,17 +276,20 @@ class AgenticStore {
|
||||
const {
|
||||
onChunk,
|
||||
onReasoningChunk,
|
||||
onToolCallChunk,
|
||||
onToolCallsStreaming,
|
||||
onAttachments,
|
||||
onModel,
|
||||
onComplete,
|
||||
onAssistantTurnComplete,
|
||||
createToolResultMessage,
|
||||
createAssistantMessage,
|
||||
onFlowComplete,
|
||||
onTimings,
|
||||
onTurnComplete
|
||||
} = callbacks;
|
||||
|
||||
const sessionMessages: AgenticMessage[] = toAgenticMessages(messages);
|
||||
const allToolCalls: ApiChatCompletionToolCall[] = [];
|
||||
let capturedTimings: ChatMessageTimings | undefined;
|
||||
let totalToolCallCount = 0;
|
||||
|
||||
const agenticTimings: ChatMessageAgenticTimings = {
|
||||
turns: 0,
|
||||
@@ -316,12 +300,7 @@ class AgenticStore {
|
||||
llm: { predicted_n: 0, predicted_ms: 0, prompt_n: 0, prompt_ms: 0 }
|
||||
};
|
||||
const maxTurns = agenticConfig.maxTurns;
|
||||
const maxToolPreviewLines = agenticConfig.maxToolPreviewLines;
|
||||
|
||||
// Resolve effective model for vision capability checks.
|
||||
// In ROUTER mode, options.model is always set by the caller.
|
||||
// In MODEL mode, options.model is undefined; use the single loaded model
|
||||
// which carries modalities bridged from /props.
|
||||
const effectiveModel = options.model || modelsStore.models[0]?.model || '';
|
||||
|
||||
for (let turn = 0; turn < maxTurns; turn++) {
|
||||
@@ -329,23 +308,20 @@ class AgenticStore {
|
||||
agenticTimings.turns = turn + 1;
|
||||
|
||||
if (signal?.aborted) {
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
return;
|
||||
}
|
||||
|
||||
// For turns > 0, create a new assistant message via callback
|
||||
if (turn > 0 && createAssistantMessage) {
|
||||
await createAssistantMessage();
|
||||
}
|
||||
|
||||
let turnContent = '';
|
||||
let turnReasoningContent = '';
|
||||
let turnToolCalls: ApiChatCompletionToolCall[] = [];
|
||||
let lastStreamingToolCallName = '';
|
||||
let lastStreamingToolCallArgsLength = 0;
|
||||
const emittedToolCallStates = new SvelteMap<
|
||||
number,
|
||||
{ emittedOnce: boolean; lastArgs: string }
|
||||
>();
|
||||
let turnTimings: ChatMessageTimings | undefined;
|
||||
|
||||
const turnStats: ChatMessageAgenticTurnStats = {
|
||||
@@ -366,30 +342,15 @@ class AgenticStore {
|
||||
turnContent += chunk;
|
||||
onChunk?.(chunk);
|
||||
},
|
||||
onReasoningChunk,
|
||||
onReasoningChunk: (chunk: string) => {
|
||||
turnReasoningContent += chunk;
|
||||
onReasoningChunk?.(chunk);
|
||||
},
|
||||
onToolCallChunk: (serialized: string) => {
|
||||
try {
|
||||
turnToolCalls = JSON.parse(serialized) as ApiChatCompletionToolCall[];
|
||||
for (let i = 0; i < turnToolCalls.length; i++) {
|
||||
const toolCall = turnToolCalls[i];
|
||||
const toolName = toolCall.function?.name ?? '';
|
||||
const toolArgs = toolCall.function?.arguments ?? '';
|
||||
const state = emittedToolCallStates.get(i) || {
|
||||
emittedOnce: false,
|
||||
lastArgs: ''
|
||||
};
|
||||
if (!state.emittedOnce) {
|
||||
const output = `\n\n${AGENTIC_TAGS.TOOL_CALL_START}\n${AGENTIC_TAGS.TOOL_NAME_PREFIX}${toolName}${AGENTIC_TAGS.TAG_SUFFIX}\n${AGENTIC_TAGS.TOOL_ARGS_START}\n${toolArgs}`;
|
||||
onChunk?.(output);
|
||||
state.emittedOnce = true;
|
||||
state.lastArgs = toolArgs;
|
||||
emittedToolCallStates.set(i, state);
|
||||
} else if (toolArgs.length > state.lastArgs.length) {
|
||||
onChunk?.(toolArgs.slice(state.lastArgs.length));
|
||||
state.lastArgs = toolArgs;
|
||||
emittedToolCallStates.set(i, state);
|
||||
}
|
||||
}
|
||||
onToolCallsStreaming?.(turnToolCalls);
|
||||
|
||||
if (turnToolCalls.length > 0 && turnToolCalls[0]?.function) {
|
||||
const name = turnToolCalls[0].function.name || '';
|
||||
const args = turnToolCalls[0].function.arguments || '';
|
||||
@@ -442,77 +403,84 @@ class AgenticStore {
|
||||
}
|
||||
} catch (error) {
|
||||
if (signal?.aborted) {
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
// Save whatever we have for this turn before exiting
|
||||
await onAssistantTurnComplete?.(
|
||||
turnContent,
|
||||
turnReasoningContent || undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
return;
|
||||
}
|
||||
const normalizedError = error instanceof Error ? error : new Error('LLM stream error');
|
||||
// Save error as content in the current turn
|
||||
onChunk?.(`${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`);
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
await onAssistantTurnComplete?.(
|
||||
turnContent + `${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`,
|
||||
turnReasoningContent || undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
throw normalizedError;
|
||||
}
|
||||
|
||||
// No tool calls = final turn, save and complete
|
||||
if (turnToolCalls.length === 0) {
|
||||
agenticTimings.perTurn!.push(turnStats);
|
||||
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
const finalTimings = this.buildFinalTimings(capturedTimings, agenticTimings);
|
||||
|
||||
await onAssistantTurnComplete?.(
|
||||
turnContent,
|
||||
turnReasoningContent || undefined,
|
||||
finalTimings,
|
||||
undefined
|
||||
);
|
||||
|
||||
if (finalTimings) onTurnComplete?.(finalTimings);
|
||||
|
||||
onFlowComplete?.(finalTimings);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Normalize and save assistant turn with tool calls
|
||||
const normalizedCalls = this.normalizeToolCalls(turnToolCalls);
|
||||
if (normalizedCalls.length === 0) {
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
await onAssistantTurnComplete?.(
|
||||
turnContent,
|
||||
turnReasoningContent || undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
return;
|
||||
}
|
||||
|
||||
for (const call of normalizedCalls) {
|
||||
allToolCalls.push({
|
||||
id: call.id,
|
||||
type: call.type,
|
||||
function: call.function ? { ...call.function } : undefined
|
||||
});
|
||||
}
|
||||
totalToolCallCount += normalizedCalls.length;
|
||||
this.updateSession(conversationId, { totalToolCalls: totalToolCallCount });
|
||||
|
||||
this.updateSession(conversationId, { totalToolCalls: allToolCalls.length });
|
||||
onToolCallChunk?.(JSON.stringify(allToolCalls));
|
||||
// Save the assistant message with its tool calls
|
||||
await onAssistantTurnComplete?.(
|
||||
turnContent,
|
||||
turnReasoningContent || undefined,
|
||||
turnTimings,
|
||||
normalizedCalls
|
||||
);
|
||||
|
||||
// Add assistant message to session history
|
||||
sessionMessages.push({
|
||||
role: MessageRole.ASSISTANT,
|
||||
content: turnContent || undefined,
|
||||
tool_calls: normalizedCalls
|
||||
});
|
||||
|
||||
// Execute each tool call and create result messages
|
||||
for (const toolCall of normalizedCalls) {
|
||||
if (signal?.aborted) {
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -530,13 +498,7 @@ class AgenticStore {
|
||||
result = executionResult.content;
|
||||
} catch (error) {
|
||||
if (isAbortError(error)) {
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
return;
|
||||
}
|
||||
result = `Error: ${error instanceof Error ? error.message : String(error)}`;
|
||||
@@ -557,21 +519,27 @@ class AgenticStore {
|
||||
turnStats.toolsMs += Math.round(toolDurationMs);
|
||||
|
||||
if (signal?.aborted) {
|
||||
onComplete?.(
|
||||
'',
|
||||
undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
return;
|
||||
}
|
||||
|
||||
const { cleanedResult, attachments } = this.extractBase64Attachments(result);
|
||||
if (attachments.length > 0) onAttachments?.(attachments);
|
||||
|
||||
this.emitToolCallResult(cleanedResult, maxToolPreviewLines, onChunk);
|
||||
// Create the tool result message in the DB
|
||||
let toolResultMessage: DatabaseMessage | undefined;
|
||||
if (createToolResultMessage) {
|
||||
toolResultMessage = await createToolResultMessage(
|
||||
toolCall.id,
|
||||
cleanedResult,
|
||||
attachments.length > 0 ? attachments : undefined
|
||||
);
|
||||
}
|
||||
|
||||
if (attachments.length > 0 && toolResultMessage) {
|
||||
onAttachments?.(toolResultMessage.id, attachments);
|
||||
}
|
||||
|
||||
// Build content parts for session history (including images for vision models)
|
||||
const contentParts: ApiChatMessageContentPart[] = [
|
||||
{ type: ContentPartType.TEXT, text: cleanedResult }
|
||||
];
|
||||
@@ -605,8 +573,15 @@ class AgenticStore {
|
||||
}
|
||||
}
|
||||
|
||||
// Turn limit reached
|
||||
onChunk?.(TURN_LIMIT_MESSAGE);
|
||||
onComplete?.('', undefined, this.buildFinalTimings(capturedTimings, agenticTimings), undefined);
|
||||
await onAssistantTurnComplete?.(
|
||||
TURN_LIMIT_MESSAGE,
|
||||
undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
);
|
||||
onFlowComplete?.(this.buildFinalTimings(capturedTimings, agenticTimings));
|
||||
}
|
||||
|
||||
private buildFinalTimings(
|
||||
@@ -633,23 +608,6 @@ class AgenticStore {
|
||||
}));
|
||||
}
|
||||
|
||||
private emitToolCallResult(
|
||||
result: string,
|
||||
maxLines: number,
|
||||
emit?: (chunk: string) => void
|
||||
): void {
|
||||
if (!emit) {
|
||||
return;
|
||||
}
|
||||
|
||||
let output = `${NEWLINE_SEPARATOR}${AGENTIC_TAGS.TOOL_ARGS_END}`;
|
||||
const lines = result.split(NEWLINE_SEPARATOR);
|
||||
const trimmedLines = lines.length > maxLines ? lines.slice(-maxLines) : lines;
|
||||
|
||||
output += `${NEWLINE_SEPARATOR}${trimmedLines.join(NEWLINE_SEPARATOR)}${NEWLINE_SEPARATOR}${AGENTIC_TAGS.TOOL_CALL_END}${NEWLINE_SEPARATOR}`;
|
||||
emit(output);
|
||||
}
|
||||
|
||||
private extractBase64Attachments(result: string): {
|
||||
cleanedResult: string;
|
||||
attachments: DatabaseMessageExtra[];
|
||||
|
||||
@@ -12,7 +12,8 @@
|
||||
*/
|
||||
|
||||
import { SvelteMap } from 'svelte/reactivity';
|
||||
import { DatabaseService, ChatService } from '$lib/services';
|
||||
import { DatabaseService } from '$lib/services/database.service';
|
||||
import { ChatService } from '$lib/services/chat.service';
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { agenticStore } from '$lib/stores/agentic.svelte';
|
||||
@@ -34,7 +35,6 @@ import {
|
||||
import {
|
||||
MAX_INACTIVE_CONVERSATION_STATES,
|
||||
INACTIVE_CONVERSATION_STATE_MAX_AGE_MS,
|
||||
REASONING_TAGS,
|
||||
SYSTEM_MESSAGE_PLACEHOLDER
|
||||
} from '$lib/constants';
|
||||
import type {
|
||||
@@ -50,15 +50,6 @@ interface ConversationStateEntry {
|
||||
lastAccessed: number;
|
||||
}
|
||||
|
||||
const countOccurrences = (source: string, token: string): number =>
|
||||
source ? source.split(token).length - 1 : 0;
|
||||
const hasUnclosedReasoningTag = (content: string): boolean =>
|
||||
countOccurrences(content, REASONING_TAGS.START) > countOccurrences(content, REASONING_TAGS.END);
|
||||
const wrapReasoningContent = (content: string, reasoningContent?: string): string => {
|
||||
if (!reasoningContent) return content;
|
||||
return `${REASONING_TAGS.START}${reasoningContent}${REASONING_TAGS.END}${content}`;
|
||||
};
|
||||
|
||||
class ChatStore {
|
||||
activeProcessingState = $state<ApiProcessingState | null>(null);
|
||||
currentResponse = $state('');
|
||||
@@ -557,83 +548,76 @@ class ChatStore {
|
||||
await modelsStore.fetchModelProps(effectiveModel);
|
||||
}
|
||||
|
||||
let streamedContent = '',
|
||||
streamedToolCallContent = '',
|
||||
isReasoningOpen = false,
|
||||
hasStreamedChunks = false,
|
||||
resolvedModel: string | null = null,
|
||||
modelPersisted = false;
|
||||
let streamedExtras: DatabaseMessageExtra[] = assistantMessage.extra
|
||||
? JSON.parse(JSON.stringify(assistantMessage.extra))
|
||||
: [];
|
||||
// Mutable state for the current message being streamed
|
||||
let currentMessageId = assistantMessage.id;
|
||||
let streamedContent = '';
|
||||
let streamedReasoningContent = '';
|
||||
let resolvedModel: string | null = null;
|
||||
let modelPersisted = false;
|
||||
const convId = assistantMessage.convId;
|
||||
|
||||
const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
|
||||
if (!modelName) return;
|
||||
const n = normalizeModelName(modelName);
|
||||
if (!n || n === resolvedModel) return;
|
||||
resolvedModel = n;
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
const idx = conversationsStore.findMessageIndex(currentMessageId);
|
||||
conversationsStore.updateMessageAtIndex(idx, { model: n });
|
||||
if (persistImmediately && !modelPersisted) {
|
||||
modelPersisted = true;
|
||||
DatabaseService.updateMessage(assistantMessage.id, { model: n }).catch(() => {
|
||||
DatabaseService.updateMessage(currentMessageId, { model: n }).catch(() => {
|
||||
modelPersisted = false;
|
||||
resolvedModel = null;
|
||||
});
|
||||
}
|
||||
};
|
||||
const updateStreamingContent = () => {
|
||||
this.setChatStreaming(assistantMessage.convId, streamedContent, assistantMessage.id);
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
|
||||
const updateStreamingUI = () => {
|
||||
this.setChatStreaming(convId, streamedContent, currentMessageId);
|
||||
const idx = conversationsStore.findMessageIndex(currentMessageId);
|
||||
conversationsStore.updateMessageAtIndex(idx, { content: streamedContent });
|
||||
};
|
||||
const appendContentChunk = (chunk: string) => {
|
||||
if (isReasoningOpen) {
|
||||
streamedContent += REASONING_TAGS.END;
|
||||
isReasoningOpen = false;
|
||||
}
|
||||
streamedContent += chunk;
|
||||
hasStreamedChunks = true;
|
||||
updateStreamingContent();
|
||||
};
|
||||
const appendReasoningChunk = (chunk: string) => {
|
||||
if (!isReasoningOpen) {
|
||||
streamedContent += REASONING_TAGS.START;
|
||||
isReasoningOpen = true;
|
||||
}
|
||||
streamedContent += chunk;
|
||||
hasStreamedChunks = true;
|
||||
updateStreamingContent();
|
||||
};
|
||||
const finalizeReasoning = () => {
|
||||
if (isReasoningOpen) {
|
||||
streamedContent += REASONING_TAGS.END;
|
||||
isReasoningOpen = false;
|
||||
}
|
||||
|
||||
const cleanupStreamingState = () => {
|
||||
this.setStreamingActive(false);
|
||||
this.setChatLoading(convId, false);
|
||||
this.clearChatStreaming(convId);
|
||||
this.setProcessingState(convId, null);
|
||||
};
|
||||
|
||||
this.setStreamingActive(true);
|
||||
this.setActiveProcessingConversation(assistantMessage.convId);
|
||||
const abortController = this.getOrCreateAbortController(assistantMessage.convId);
|
||||
this.setActiveProcessingConversation(convId);
|
||||
const abortController = this.getOrCreateAbortController(convId);
|
||||
|
||||
const streamCallbacks: ChatStreamCallbacks = {
|
||||
onChunk: (chunk: string) => appendContentChunk(chunk),
|
||||
onReasoningChunk: (chunk: string) => appendReasoningChunk(chunk),
|
||||
onToolCallChunk: (chunk: string) => {
|
||||
const c = chunk.trim();
|
||||
if (!c) return;
|
||||
streamedToolCallContent = c;
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
|
||||
onChunk: (chunk: string) => {
|
||||
streamedContent += chunk;
|
||||
updateStreamingUI();
|
||||
},
|
||||
onAttachments: (extras: DatabaseMessageExtra[]) => {
|
||||
onReasoningChunk: (chunk: string) => {
|
||||
streamedReasoningContent += chunk;
|
||||
// Update UI to show reasoning is being received
|
||||
const idx = conversationsStore.findMessageIndex(currentMessageId);
|
||||
conversationsStore.updateMessageAtIndex(idx, {
|
||||
reasoningContent: streamedReasoningContent
|
||||
});
|
||||
},
|
||||
onToolCallsStreaming: (toolCalls) => {
|
||||
const idx = conversationsStore.findMessageIndex(currentMessageId);
|
||||
conversationsStore.updateMessageAtIndex(idx, { toolCalls: JSON.stringify(toolCalls) });
|
||||
},
|
||||
onAttachments: (messageId: string, extras: DatabaseMessageExtra[]) => {
|
||||
if (!extras.length) return;
|
||||
streamedExtras = [...streamedExtras, ...extras];
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
conversationsStore.updateMessageAtIndex(idx, { extra: streamedExtras });
|
||||
DatabaseService.updateMessage(assistantMessage.id, { extra: streamedExtras }).catch(
|
||||
console.error
|
||||
);
|
||||
const idx = conversationsStore.findMessageIndex(messageId);
|
||||
if (idx === -1) return;
|
||||
const msg = conversationsStore.activeMessages[idx];
|
||||
const updatedExtras = [...(msg.extra || []), ...extras];
|
||||
conversationsStore.updateMessageAtIndex(idx, { extra: updatedExtras });
|
||||
DatabaseService.updateMessage(messageId, { extra: updatedExtras }).catch(console.error);
|
||||
},
|
||||
onModel: (modelName: string) => recordModel(modelName),
|
||||
onTurnComplete: (intermediateTimings: ChatMessageTimings) => {
|
||||
// Update the first assistant message with cumulative agentic timings
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
conversationsStore.updateMessageAtIndex(idx, { timings: intermediateTimings });
|
||||
},
|
||||
@@ -651,56 +635,104 @@ class ChatStore {
|
||||
cache_n: timings?.cache_n || 0,
|
||||
prompt_progress: promptProgress
|
||||
},
|
||||
assistantMessage.convId
|
||||
convId
|
||||
);
|
||||
},
|
||||
onComplete: async (
|
||||
finalContent?: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings,
|
||||
toolCallContent?: string
|
||||
onAssistantTurnComplete: async (
|
||||
content: string,
|
||||
reasoningContent: string | undefined,
|
||||
timings: ChatMessageTimings | undefined,
|
||||
toolCalls: import('$lib/types/api').ApiChatCompletionToolCall[] | undefined
|
||||
) => {
|
||||
this.setStreamingActive(false);
|
||||
finalizeReasoning();
|
||||
const combinedContent = hasStreamedChunks
|
||||
? streamedContent
|
||||
: wrapReasoningContent(finalContent || '', reasoningContent);
|
||||
const updateData: Record<string, unknown> = {
|
||||
content: combinedContent,
|
||||
toolCalls: toolCallContent || streamedToolCallContent,
|
||||
content,
|
||||
reasoningContent: reasoningContent || undefined,
|
||||
toolCalls: toolCalls ? JSON.stringify(toolCalls) : '',
|
||||
timings
|
||||
};
|
||||
if (streamedExtras.length > 0) updateData.extra = streamedExtras;
|
||||
if (resolvedModel && !modelPersisted) updateData.model = resolvedModel;
|
||||
await DatabaseService.updateMessage(assistantMessage.id, updateData);
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
await DatabaseService.updateMessage(currentMessageId, updateData);
|
||||
const idx = conversationsStore.findMessageIndex(currentMessageId);
|
||||
const uiUpdate: Partial<DatabaseMessage> = {
|
||||
content: combinedContent,
|
||||
toolCalls: updateData.toolCalls as string
|
||||
content,
|
||||
reasoningContent: reasoningContent || undefined,
|
||||
toolCalls: toolCalls ? JSON.stringify(toolCalls) : ''
|
||||
};
|
||||
if (streamedExtras.length > 0) uiUpdate.extra = streamedExtras;
|
||||
if (timings) uiUpdate.timings = timings;
|
||||
if (resolvedModel) uiUpdate.model = resolvedModel;
|
||||
conversationsStore.updateMessageAtIndex(idx, uiUpdate);
|
||||
await conversationsStore.updateCurrentNode(assistantMessage.id);
|
||||
if (onComplete) await onComplete(combinedContent);
|
||||
this.setChatLoading(assistantMessage.convId, false);
|
||||
this.clearChatStreaming(assistantMessage.convId);
|
||||
this.setProcessingState(assistantMessage.convId, null);
|
||||
await conversationsStore.updateCurrentNode(currentMessageId);
|
||||
},
|
||||
createToolResultMessage: async (
|
||||
toolCallId: string,
|
||||
content: string,
|
||||
extras?: DatabaseMessageExtra[]
|
||||
) => {
|
||||
const msg = await DatabaseService.createMessageBranch(
|
||||
{
|
||||
convId,
|
||||
type: MessageType.TEXT,
|
||||
role: MessageRole.TOOL,
|
||||
content,
|
||||
toolCallId,
|
||||
timestamp: Date.now(),
|
||||
toolCalls: '',
|
||||
children: [],
|
||||
extra: extras
|
||||
},
|
||||
currentMessageId
|
||||
);
|
||||
conversationsStore.addMessageToActive(msg);
|
||||
await conversationsStore.updateCurrentNode(msg.id);
|
||||
return msg;
|
||||
},
|
||||
createAssistantMessage: async () => {
|
||||
// Reset streaming state for new message
|
||||
streamedContent = '';
|
||||
streamedReasoningContent = '';
|
||||
|
||||
const lastMsg =
|
||||
conversationsStore.activeMessages[conversationsStore.activeMessages.length - 1];
|
||||
const msg = await DatabaseService.createMessageBranch(
|
||||
{
|
||||
convId,
|
||||
type: MessageType.TEXT,
|
||||
role: MessageRole.ASSISTANT,
|
||||
content: '',
|
||||
timestamp: Date.now(),
|
||||
toolCalls: '',
|
||||
children: [],
|
||||
model: resolvedModel
|
||||
},
|
||||
lastMsg.id
|
||||
);
|
||||
conversationsStore.addMessageToActive(msg);
|
||||
currentMessageId = msg.id;
|
||||
return msg;
|
||||
},
|
||||
onFlowComplete: (finalTimings?: ChatMessageTimings) => {
|
||||
if (finalTimings) {
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
|
||||
conversationsStore.updateMessageAtIndex(idx, { timings: finalTimings });
|
||||
DatabaseService.updateMessage(assistantMessage.id, { timings: finalTimings }).catch(
|
||||
console.error
|
||||
);
|
||||
}
|
||||
|
||||
cleanupStreamingState();
|
||||
|
||||
if (onComplete) onComplete(streamedContent);
|
||||
if (isRouterMode()) modelsStore.fetchRouterModels().catch(console.error);
|
||||
},
|
||||
onError: (error: Error) => {
|
||||
this.setStreamingActive(false);
|
||||
if (isAbortError(error)) {
|
||||
this.setChatLoading(assistantMessage.convId, false);
|
||||
this.clearChatStreaming(assistantMessage.convId);
|
||||
this.setProcessingState(assistantMessage.convId, null);
|
||||
cleanupStreamingState();
|
||||
return;
|
||||
}
|
||||
console.error('Streaming error:', error);
|
||||
this.setChatLoading(assistantMessage.convId, false);
|
||||
this.clearChatStreaming(assistantMessage.convId);
|
||||
this.setProcessingState(assistantMessage.convId, null);
|
||||
cleanupStreamingState();
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
if (idx !== -1) {
|
||||
const failedMessage = conversationsStore.removeMessageAtIndex(idx);
|
||||
@@ -717,12 +749,13 @@ class ChatStore {
|
||||
if (onError) onError(error);
|
||||
}
|
||||
};
|
||||
|
||||
const perChatOverrides = conversationsStore.activeConversation?.mcpServerOverrides;
|
||||
|
||||
const agenticConfig = agenticStore.getConfig(config(), perChatOverrides);
|
||||
if (agenticConfig.enabled) {
|
||||
const agenticResult = await agenticStore.runAgenticFlow({
|
||||
conversationId: assistantMessage.convId,
|
||||
conversationId: convId,
|
||||
messages: allMessages,
|
||||
options: { ...this.getApiOptions(), ...(effectiveModel ? { model: effectiveModel } : {}) },
|
||||
callbacks: streamCallbacks,
|
||||
@@ -732,16 +765,50 @@ class ChatStore {
|
||||
if (agenticResult.handled) return;
|
||||
}
|
||||
|
||||
const completionOptions = {
|
||||
...this.getApiOptions(),
|
||||
...(effectiveModel ? { model: effectiveModel } : {}),
|
||||
...streamCallbacks
|
||||
};
|
||||
|
||||
// Non-agentic path: direct streaming into the single assistant message
|
||||
await ChatService.sendMessage(
|
||||
allMessages,
|
||||
completionOptions,
|
||||
assistantMessage.convId,
|
||||
{
|
||||
...this.getApiOptions(),
|
||||
...(effectiveModel ? { model: effectiveModel } : {}),
|
||||
stream: true,
|
||||
onChunk: streamCallbacks.onChunk,
|
||||
onReasoningChunk: streamCallbacks.onReasoningChunk,
|
||||
onModel: streamCallbacks.onModel,
|
||||
onTimings: streamCallbacks.onTimings,
|
||||
onComplete: async (
|
||||
finalContent?: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings,
|
||||
toolCalls?: string
|
||||
) => {
|
||||
const content = streamedContent || finalContent || '';
|
||||
const reasoning = streamedReasoningContent || reasoningContent;
|
||||
const updateData: Record<string, unknown> = {
|
||||
content,
|
||||
reasoningContent: reasoning || undefined,
|
||||
toolCalls: toolCalls || '',
|
||||
timings
|
||||
};
|
||||
if (resolvedModel && !modelPersisted) updateData.model = resolvedModel;
|
||||
await DatabaseService.updateMessage(currentMessageId, updateData);
|
||||
const idx = conversationsStore.findMessageIndex(currentMessageId);
|
||||
const uiUpdate: Partial<DatabaseMessage> = {
|
||||
content,
|
||||
reasoningContent: reasoning || undefined,
|
||||
toolCalls: toolCalls || ''
|
||||
};
|
||||
if (timings) uiUpdate.timings = timings;
|
||||
if (resolvedModel) uiUpdate.model = resolvedModel;
|
||||
conversationsStore.updateMessageAtIndex(idx, uiUpdate);
|
||||
await conversationsStore.updateCurrentNode(currentMessageId);
|
||||
cleanupStreamingState();
|
||||
if (onComplete) await onComplete(content);
|
||||
if (isRouterMode()) modelsStore.fetchRouterModels().catch(console.error);
|
||||
},
|
||||
onError: streamCallbacks.onError
|
||||
},
|
||||
convId,
|
||||
abortController.signal
|
||||
);
|
||||
}
|
||||
@@ -1033,56 +1100,40 @@ class ChatStore {
|
||||
}
|
||||
|
||||
const originalContent = dbMessage.content;
|
||||
const originalReasoning = dbMessage.reasoningContent || '';
|
||||
const conversationContext = conversationsStore.activeMessages.slice(0, idx);
|
||||
const contextWithContinue = [
|
||||
...conversationContext,
|
||||
{ role: MessageRole.ASSISTANT as const, content: originalContent }
|
||||
];
|
||||
|
||||
let appendedContent = '',
|
||||
hasReceivedContent = false,
|
||||
isReasoningOpen = hasUnclosedReasoningTag(originalContent);
|
||||
let appendedContent = '';
|
||||
let appendedReasoning = '';
|
||||
let hasReceivedContent = false;
|
||||
|
||||
const updateStreamingContent = (fullContent: string) => {
|
||||
this.setChatStreaming(msg.convId, fullContent, msg.id);
|
||||
conversationsStore.updateMessageAtIndex(idx, { content: fullContent });
|
||||
};
|
||||
|
||||
const appendContentChunk = (chunk: string) => {
|
||||
if (isReasoningOpen) {
|
||||
appendedContent += REASONING_TAGS.END;
|
||||
isReasoningOpen = false;
|
||||
}
|
||||
appendedContent += chunk;
|
||||
hasReceivedContent = true;
|
||||
updateStreamingContent(originalContent + appendedContent);
|
||||
};
|
||||
|
||||
const appendReasoningChunk = (chunk: string) => {
|
||||
if (!isReasoningOpen) {
|
||||
appendedContent += REASONING_TAGS.START;
|
||||
isReasoningOpen = true;
|
||||
}
|
||||
appendedContent += chunk;
|
||||
hasReceivedContent = true;
|
||||
updateStreamingContent(originalContent + appendedContent);
|
||||
};
|
||||
|
||||
const finalizeReasoning = () => {
|
||||
if (isReasoningOpen) {
|
||||
appendedContent += REASONING_TAGS.END;
|
||||
isReasoningOpen = false;
|
||||
}
|
||||
};
|
||||
|
||||
const abortController = this.getOrCreateAbortController(msg.convId);
|
||||
|
||||
await ChatService.sendMessage(
|
||||
contextWithContinue,
|
||||
{
|
||||
...this.getApiOptions(),
|
||||
onChunk: (chunk: string) => appendContentChunk(chunk),
|
||||
onReasoningChunk: (chunk: string) => appendReasoningChunk(chunk),
|
||||
onChunk: (chunk: string) => {
|
||||
appendedContent += chunk;
|
||||
hasReceivedContent = true;
|
||||
updateStreamingContent(originalContent + appendedContent);
|
||||
},
|
||||
onReasoningChunk: (chunk: string) => {
|
||||
appendedReasoning += chunk;
|
||||
hasReceivedContent = true;
|
||||
conversationsStore.updateMessageAtIndex(idx, {
|
||||
reasoningContent: originalReasoning + appendedReasoning
|
||||
});
|
||||
},
|
||||
onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
|
||||
const tokensPerSecond =
|
||||
timings?.predicted_ms && timings?.predicted_n
|
||||
@@ -1105,21 +1156,23 @@ class ChatStore {
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings
|
||||
) => {
|
||||
finalizeReasoning();
|
||||
|
||||
const appendedFromCompletion = hasReceivedContent
|
||||
? appendedContent
|
||||
: wrapReasoningContent(finalContent || '', reasoningContent);
|
||||
const fullContent = originalContent + appendedFromCompletion;
|
||||
const finalAppendedContent = hasReceivedContent ? appendedContent : finalContent || '';
|
||||
const finalAppendedReasoning = hasReceivedContent
|
||||
? appendedReasoning
|
||||
: reasoningContent || '';
|
||||
const fullContent = originalContent + finalAppendedContent;
|
||||
const fullReasoning = originalReasoning + finalAppendedReasoning || undefined;
|
||||
|
||||
await DatabaseService.updateMessage(msg.id, {
|
||||
content: fullContent,
|
||||
reasoningContent: fullReasoning,
|
||||
timestamp: Date.now(),
|
||||
timings
|
||||
});
|
||||
|
||||
conversationsStore.updateMessageAtIndex(idx, {
|
||||
content: fullContent,
|
||||
reasoningContent: fullReasoning,
|
||||
timestamp: Date.now(),
|
||||
timings
|
||||
});
|
||||
@@ -1135,11 +1188,13 @@ class ChatStore {
|
||||
if (hasReceivedContent && appendedContent) {
|
||||
await DatabaseService.updateMessage(msg.id, {
|
||||
content: originalContent + appendedContent,
|
||||
reasoningContent: originalReasoning + appendedReasoning || undefined,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
|
||||
conversationsStore.updateMessageAtIndex(idx, {
|
||||
content: originalContent + appendedContent,
|
||||
reasoningContent: originalReasoning + appendedReasoning || undefined,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ import { browser } from '$app/environment';
|
||||
import { toast } from 'svelte-sonner';
|
||||
import { DatabaseService } from '$lib/services/database.service';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { filterByLeafNodeId, findLeafNode } from '$lib/utils';
|
||||
import { filterByLeafNodeId, findLeafNode, runLegacyMigration } from '$lib/utils';
|
||||
import type { McpServerOverride } from '$lib/types/database';
|
||||
import { MessageRole } from '$lib/enums';
|
||||
import {
|
||||
@@ -128,6 +128,10 @@ class ConversationsStore {
|
||||
if (this.isInitialized) return;
|
||||
|
||||
try {
|
||||
// @deprecated Legacy migration for old marker-based messages.
|
||||
// Remove once all users have migrated to the structured format.
|
||||
await runLegacyMigration();
|
||||
|
||||
await this.loadConversations();
|
||||
this.isInitialized = true;
|
||||
} catch (error) {
|
||||
|
||||
43
tools/server/webui/src/lib/types/agentic.d.ts
vendored
43
tools/server/webui/src/lib/types/agentic.d.ts
vendored
@@ -2,6 +2,7 @@ import type { MessageRole } from '$lib/enums';
|
||||
import { ToolCallType } from '$lib/enums';
|
||||
import type {
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionToolCall,
|
||||
ApiChatMessageContentPart,
|
||||
ApiChatMessageData
|
||||
} from './api';
|
||||
@@ -70,22 +71,48 @@ export interface AgenticSession {
|
||||
}
|
||||
|
||||
/**
|
||||
* Callbacks for agentic flow execution
|
||||
* Callbacks for agentic flow execution.
|
||||
*
|
||||
* The agentic loop creates separate DB messages for each turn:
|
||||
* - assistant messages (one per LLM turn, with tool_calls if any)
|
||||
* - tool result messages (one per tool call execution)
|
||||
*
|
||||
* The first assistant message is created by the caller before starting the flow.
|
||||
* Subsequent messages are created via createToolResultMessage / createAssistantMessage.
|
||||
*/
|
||||
export interface AgenticFlowCallbacks {
|
||||
/** Content chunk for the current assistant message */
|
||||
onChunk?: (chunk: string) => void;
|
||||
/** Reasoning content chunk for the current assistant message */
|
||||
onReasoningChunk?: (chunk: string) => void;
|
||||
onToolCallChunk?: (serializedToolCalls: string) => void;
|
||||
onAttachments?: (extras: DatabaseMessageExtra[]) => void;
|
||||
/** Tool calls being streamed (partial, accumulating) for the current turn */
|
||||
onToolCallsStreaming?: (toolCalls: ApiChatCompletionToolCall[]) => void;
|
||||
/** Attachments extracted from tool results */
|
||||
onAttachments?: (messageId: string, extras: DatabaseMessageExtra[]) => void;
|
||||
/** Model name detected from response */
|
||||
onModel?: (model: string) => void;
|
||||
onComplete?: (
|
||||
/** Current assistant turn's streaming is complete - save to DB */
|
||||
onAssistantTurnComplete?: (
|
||||
content: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings,
|
||||
toolCalls?: string
|
||||
) => void;
|
||||
reasoningContent: string | undefined,
|
||||
timings: ChatMessageTimings | undefined,
|
||||
toolCalls: ApiChatCompletionToolCall[] | undefined
|
||||
) => Promise<void>;
|
||||
/** Create a tool result message in the DB tree */
|
||||
createToolResultMessage?: (
|
||||
toolCallId: string,
|
||||
content: string,
|
||||
extras?: DatabaseMessageExtra[]
|
||||
) => Promise<DatabaseMessage>;
|
||||
/** Create a new assistant message for the next agentic turn */
|
||||
createAssistantMessage?: () => Promise<DatabaseMessage>;
|
||||
/** Entire agentic flow is complete */
|
||||
onFlowComplete?: (timings?: ChatMessageTimings) => void;
|
||||
/** Error during flow */
|
||||
onError?: (error: Error) => void;
|
||||
/** Timing updates during streaming */
|
||||
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
|
||||
/** An agentic turn (LLM + tool execution) completed - intermediate timing update */
|
||||
onTurnComplete?: (intermediateTimings: ChatMessageTimings) => void;
|
||||
}
|
||||
|
||||
|
||||
28
tools/server/webui/src/lib/types/chat.d.ts
vendored
28
tools/server/webui/src/lib/types/chat.d.ts
vendored
@@ -1,5 +1,6 @@
|
||||
import type { ErrorDialogType } from '$lib/enums';
|
||||
import type { DatabaseMessageExtra } from './database';
|
||||
import type { ApiChatCompletionToolCall } from './api';
|
||||
import type { DatabaseMessage, DatabaseMessageExtra } from './database';
|
||||
|
||||
export interface ChatUploadedFile {
|
||||
id: string;
|
||||
@@ -99,21 +100,28 @@ export interface ChatMessageToolCallTiming {
|
||||
}
|
||||
|
||||
/**
|
||||
* Callbacks for streaming chat responses
|
||||
* Callbacks for streaming chat responses (used by both agentic and non-agentic paths)
|
||||
*/
|
||||
export interface ChatStreamCallbacks {
|
||||
onChunk?: (chunk: string) => void;
|
||||
onReasoningChunk?: (chunk: string) => void;
|
||||
onToolCallChunk?: (chunk: string) => void;
|
||||
onAttachments?: (extras: DatabaseMessageExtra[]) => void;
|
||||
onToolCallsStreaming?: (toolCalls: ApiChatCompletionToolCall[]) => void;
|
||||
onAttachments?: (messageId: string, extras: DatabaseMessageExtra[]) => void;
|
||||
onModel?: (model: string) => void;
|
||||
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
|
||||
onComplete?: (
|
||||
content?: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings,
|
||||
toolCallContent?: string
|
||||
) => void;
|
||||
onAssistantTurnComplete?: (
|
||||
content: string,
|
||||
reasoningContent: string | undefined,
|
||||
timings: ChatMessageTimings | undefined,
|
||||
toolCalls: ApiChatCompletionToolCall[] | undefined
|
||||
) => Promise<void>;
|
||||
createToolResultMessage?: (
|
||||
toolCallId: string,
|
||||
content: string,
|
||||
extras?: DatabaseMessageExtra[]
|
||||
) => Promise<DatabaseMessage>;
|
||||
createAssistantMessage?: () => Promise<DatabaseMessage>;
|
||||
onFlowComplete?: (timings?: ChatMessageTimings) => void;
|
||||
onError?: (error: Error) => void;
|
||||
onTurnComplete?: (intermediateTimings: ChatMessageTimings) => void;
|
||||
}
|
||||
|
||||
@@ -92,6 +92,8 @@ export interface DatabaseMessage {
|
||||
* @deprecated - left for backward compatibility
|
||||
*/
|
||||
thinking?: string;
|
||||
/** Reasoning content produced by the model (separate from visible content) */
|
||||
reasoningContent?: string;
|
||||
/** Serialized JSON array of tool calls made by assistant messages */
|
||||
toolCalls?: string;
|
||||
/** Tool call ID for tool result messages (role: 'tool') */
|
||||
|
||||
@@ -1,8 +1,15 @@
|
||||
import { AgenticSectionType } from '$lib/enums';
|
||||
import { AGENTIC_TAGS, AGENTIC_REGEX, REASONING_TAGS, TRIM_NEWLINES_REGEX } from '$lib/constants';
|
||||
import { AgenticSectionType, MessageRole } from '$lib/enums';
|
||||
import { ATTACHMENT_SAVED_REGEX, NEWLINE_SEPARATOR } from '$lib/constants';
|
||||
import type { ApiChatCompletionToolCall } from '$lib/types/api';
|
||||
import type {
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
DatabaseMessageExtraImageFile
|
||||
} from '$lib/types/database';
|
||||
import { AttachmentType } from '$lib/enums';
|
||||
|
||||
/**
|
||||
* Represents a parsed section of agentic content
|
||||
* Represents a parsed section of agentic content for display
|
||||
*/
|
||||
export interface AgenticSection {
|
||||
type: AgenticSectionType;
|
||||
@@ -10,63 +17,70 @@ export interface AgenticSection {
|
||||
toolName?: string;
|
||||
toolArgs?: string;
|
||||
toolResult?: string;
|
||||
toolResultExtras?: DatabaseMessageExtra[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a segment of content that may contain reasoning blocks
|
||||
* Represents a tool result line that may reference an image attachment
|
||||
*/
|
||||
type ReasoningSegment = {
|
||||
type:
|
||||
| AgenticSectionType.TEXT
|
||||
| AgenticSectionType.REASONING
|
||||
| AgenticSectionType.REASONING_PENDING;
|
||||
content: string;
|
||||
export type ToolResultLine = {
|
||||
text: string;
|
||||
image?: DatabaseMessageExtraImageFile;
|
||||
};
|
||||
|
||||
/**
|
||||
* Parses agentic content into structured sections
|
||||
* Derives display sections from a single assistant message and its direct tool results.
|
||||
*
|
||||
* Main parsing function that processes content containing:
|
||||
* - Tool calls (completed, pending, or streaming)
|
||||
* - Reasoning blocks (completed or streaming)
|
||||
* - Regular text content
|
||||
*
|
||||
* The parser handles chronological display of agentic flow output, maintaining
|
||||
* the order of operations and properly identifying different states of tool calls
|
||||
* and reasoning blocks during streaming.
|
||||
*
|
||||
* @param rawContent - The raw content string to parse
|
||||
* @returns Array of structured agentic sections ready for display
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const content = "Some text <<<AGENTIC_TOOL_CALL>>>tool_name...";
|
||||
* const sections = parseAgenticContent(content);
|
||||
* // Returns: [{ type: 'text', content: 'Some text' }, { type: 'tool_call_streaming', ... }]
|
||||
* ```
|
||||
* @param message - The assistant message
|
||||
* @param toolMessages - Tool result messages for this assistant's tool_calls
|
||||
* @param streamingToolCalls - Partial tool calls during streaming (not yet persisted)
|
||||
*/
|
||||
export function parseAgenticContent(rawContent: string): AgenticSection[] {
|
||||
if (!rawContent) return [];
|
||||
|
||||
const segments = splitReasoningSegments(rawContent);
|
||||
function deriveSingleTurnSections(
|
||||
message: DatabaseMessage,
|
||||
toolMessages: DatabaseMessage[] = [],
|
||||
streamingToolCalls: ApiChatCompletionToolCall[] = []
|
||||
): AgenticSection[] {
|
||||
const sections: AgenticSection[] = [];
|
||||
|
||||
for (const segment of segments) {
|
||||
if (segment.type === AgenticSectionType.TEXT) {
|
||||
sections.push(...parseToolCallContent(segment.content));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (segment.type === AgenticSectionType.REASONING) {
|
||||
if (segment.content.trim()) {
|
||||
sections.push({ type: AgenticSectionType.REASONING, content: segment.content });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 1. Reasoning content (from dedicated field)
|
||||
if (message.reasoningContent) {
|
||||
sections.push({
|
||||
type: AgenticSectionType.REASONING_PENDING,
|
||||
content: segment.content
|
||||
type: AgenticSectionType.REASONING,
|
||||
content: message.reasoningContent
|
||||
});
|
||||
}
|
||||
|
||||
// 2. Text content
|
||||
if (message.content?.trim()) {
|
||||
sections.push({
|
||||
type: AgenticSectionType.TEXT,
|
||||
content: message.content
|
||||
});
|
||||
}
|
||||
|
||||
// 3. Persisted tool calls (from message.toolCalls field)
|
||||
const toolCalls = parseToolCalls(message.toolCalls);
|
||||
for (const tc of toolCalls) {
|
||||
const resultMsg = toolMessages.find((m) => m.toolCallId === tc.id);
|
||||
sections.push({
|
||||
type: resultMsg ? AgenticSectionType.TOOL_CALL : AgenticSectionType.TOOL_CALL_PENDING,
|
||||
content: resultMsg?.content || '',
|
||||
toolName: tc.function?.name,
|
||||
toolArgs: tc.function?.arguments,
|
||||
toolResult: resultMsg?.content,
|
||||
toolResultExtras: resultMsg?.extra
|
||||
});
|
||||
}
|
||||
|
||||
// 4. Streaming tool calls (not yet persisted - currently being received)
|
||||
for (const tc of streamingToolCalls) {
|
||||
// Skip if already in persisted tool calls
|
||||
if (tc.id && toolCalls.find((t) => t.id === tc.id)) continue;
|
||||
sections.push({
|
||||
type: AgenticSectionType.TOOL_CALL_STREAMING,
|
||||
content: '',
|
||||
toolName: tc.function?.name,
|
||||
toolArgs: tc.function?.arguments
|
||||
});
|
||||
}
|
||||
|
||||
@@ -74,211 +88,123 @@ export function parseAgenticContent(rawContent: string): AgenticSection[] {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses content containing tool call markers
|
||||
* Derives display sections from structured message data.
|
||||
*
|
||||
* Identifies and extracts tool calls from content, handling:
|
||||
* - Completed tool calls with name, arguments, and results
|
||||
* - Pending tool calls (execution in progress)
|
||||
* - Streaming tool calls (arguments being received)
|
||||
* - Early-stage tool calls (just started)
|
||||
* Handles both single-turn (one assistant + its tool results) and multi-turn
|
||||
* agentic sessions (multiple assistant + tool messages grouped together).
|
||||
*
|
||||
* @param rawContent - The raw content string to parse
|
||||
* @returns Array of agentic sections representing tool calls and text
|
||||
* When `toolMessages` contains continuation assistant messages (from multi-turn
|
||||
* agentic flows), they are processed in order to produce sections across all turns.
|
||||
*
|
||||
* @param message - The first/anchor assistant message
|
||||
* @param toolMessages - Tool result messages and continuation assistant messages
|
||||
* @param streamingToolCalls - Partial tool calls during streaming (not yet persisted)
|
||||
* @param isStreaming - Whether the message is currently being streamed
|
||||
*/
|
||||
function parseToolCallContent(rawContent: string): AgenticSection[] {
|
||||
if (!rawContent) return [];
|
||||
export function deriveAgenticSections(
|
||||
message: DatabaseMessage,
|
||||
toolMessages: DatabaseMessage[] = [],
|
||||
streamingToolCalls: ApiChatCompletionToolCall[] = []
|
||||
): AgenticSection[] {
|
||||
const hasAssistantContinuations = toolMessages.some((m) => m.role === MessageRole.ASSISTANT);
|
||||
|
||||
if (!hasAssistantContinuations) {
|
||||
return deriveSingleTurnSections(message, toolMessages, streamingToolCalls);
|
||||
}
|
||||
|
||||
const sections: AgenticSection[] = [];
|
||||
|
||||
const completedToolCallRegex = new RegExp(AGENTIC_REGEX.COMPLETED_TOOL_CALL.source, 'g');
|
||||
const firstTurnToolMsgs = collectToolMessages(toolMessages, 0);
|
||||
sections.push(...deriveSingleTurnSections(message, firstTurnToolMsgs));
|
||||
|
||||
let lastIndex = 0;
|
||||
let match;
|
||||
let i = firstTurnToolMsgs.length;
|
||||
|
||||
while ((match = completedToolCallRegex.exec(rawContent)) !== null) {
|
||||
if (match.index > lastIndex) {
|
||||
const textBefore = rawContent.slice(lastIndex, match.index).trim();
|
||||
if (textBefore) {
|
||||
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
|
||||
}
|
||||
while (i < toolMessages.length) {
|
||||
const msg = toolMessages[i];
|
||||
|
||||
if (msg.role === MessageRole.ASSISTANT) {
|
||||
const turnToolMsgs = collectToolMessages(toolMessages, i + 1);
|
||||
const isLastTurn = i + 1 + turnToolMsgs.length >= toolMessages.length;
|
||||
|
||||
sections.push(
|
||||
...deriveSingleTurnSections(msg, turnToolMsgs, isLastTurn ? streamingToolCalls : [])
|
||||
);
|
||||
|
||||
i += 1 + turnToolMsgs.length;
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
|
||||
const toolName = match[1];
|
||||
const toolArgs = match[2];
|
||||
const toolResult = match[3].replace(TRIM_NEWLINES_REGEX, '');
|
||||
|
||||
sections.push({
|
||||
type: AgenticSectionType.TOOL_CALL,
|
||||
content: toolResult,
|
||||
toolName,
|
||||
toolArgs,
|
||||
toolResult
|
||||
});
|
||||
|
||||
lastIndex = match.index + match[0].length;
|
||||
}
|
||||
|
||||
const remainingContent = rawContent.slice(lastIndex);
|
||||
|
||||
const pendingMatch = remainingContent.match(AGENTIC_REGEX.PENDING_TOOL_CALL);
|
||||
const partialWithNameMatch = remainingContent.match(AGENTIC_REGEX.PARTIAL_WITH_NAME);
|
||||
const earlyMatch = remainingContent.match(AGENTIC_REGEX.EARLY_MATCH);
|
||||
|
||||
if (pendingMatch) {
|
||||
const pendingIndex = remainingContent.indexOf(AGENTIC_TAGS.TOOL_CALL_START);
|
||||
|
||||
if (pendingIndex > 0) {
|
||||
const textBefore = remainingContent.slice(0, pendingIndex).trim();
|
||||
|
||||
if (textBefore) {
|
||||
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
|
||||
}
|
||||
}
|
||||
|
||||
const toolName = pendingMatch[1];
|
||||
const toolArgs = pendingMatch[2];
|
||||
const streamingResult = (pendingMatch[3] || '').replace(TRIM_NEWLINES_REGEX, '');
|
||||
|
||||
sections.push({
|
||||
type: AgenticSectionType.TOOL_CALL_PENDING,
|
||||
content: streamingResult,
|
||||
toolName,
|
||||
toolArgs,
|
||||
toolResult: streamingResult || undefined
|
||||
});
|
||||
} else if (partialWithNameMatch) {
|
||||
const pendingIndex = remainingContent.indexOf(AGENTIC_TAGS.TOOL_CALL_START);
|
||||
|
||||
if (pendingIndex > 0) {
|
||||
const textBefore = remainingContent.slice(0, pendingIndex).trim();
|
||||
if (textBefore) {
|
||||
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
|
||||
}
|
||||
}
|
||||
|
||||
const partialArgs = partialWithNameMatch[2] || '';
|
||||
|
||||
sections.push({
|
||||
type: AgenticSectionType.TOOL_CALL_STREAMING,
|
||||
content: '',
|
||||
toolName: partialWithNameMatch[1],
|
||||
toolArgs: partialArgs || undefined,
|
||||
toolResult: undefined
|
||||
});
|
||||
} else if (earlyMatch) {
|
||||
const pendingIndex = remainingContent.indexOf(AGENTIC_TAGS.TOOL_CALL_START);
|
||||
|
||||
if (pendingIndex > 0) {
|
||||
const textBefore = remainingContent.slice(0, pendingIndex).trim();
|
||||
if (textBefore) {
|
||||
sections.push({ type: AgenticSectionType.TEXT, content: textBefore });
|
||||
}
|
||||
}
|
||||
|
||||
const nameMatch = earlyMatch[1]?.match(AGENTIC_REGEX.TOOL_NAME_EXTRACT);
|
||||
|
||||
sections.push({
|
||||
type: AgenticSectionType.TOOL_CALL_STREAMING,
|
||||
content: '',
|
||||
toolName: nameMatch?.[1],
|
||||
toolArgs: undefined,
|
||||
toolResult: undefined
|
||||
});
|
||||
} else if (lastIndex < rawContent.length) {
|
||||
let remainingText = rawContent.slice(lastIndex).trim();
|
||||
|
||||
const partialMarkerMatch = remainingText.match(AGENTIC_REGEX.PARTIAL_MARKER);
|
||||
|
||||
if (partialMarkerMatch) {
|
||||
remainingText = remainingText.slice(0, partialMarkerMatch.index).trim();
|
||||
}
|
||||
|
||||
if (remainingText) {
|
||||
sections.push({ type: AgenticSectionType.TEXT, content: remainingText });
|
||||
}
|
||||
}
|
||||
|
||||
if (sections.length === 0 && rawContent.trim()) {
|
||||
sections.push({ type: AgenticSectionType.TEXT, content: rawContent });
|
||||
}
|
||||
|
||||
return sections;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips partial marker from text content
|
||||
*
|
||||
* Removes incomplete agentic markers (e.g., "<<<", "<<<AGENTIC") that may appear
|
||||
* at the end of streaming content.
|
||||
*
|
||||
* @param text - The text content to process
|
||||
* @returns Text with partial markers removed
|
||||
* Collect consecutive tool messages starting at `startIndex`.
|
||||
*/
|
||||
function stripPartialMarker(text: string): string {
|
||||
const partialMarkerMatch = text.match(AGENTIC_REGEX.PARTIAL_MARKER);
|
||||
function collectToolMessages(messages: DatabaseMessage[], startIndex: number): DatabaseMessage[] {
|
||||
const result: DatabaseMessage[] = [];
|
||||
|
||||
if (partialMarkerMatch) {
|
||||
return text.slice(0, partialMarkerMatch.index).trim();
|
||||
for (let i = startIndex; i < messages.length; i++) {
|
||||
if (messages[i].role === MessageRole.TOOL) {
|
||||
result.push(messages[i]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return text;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits raw content into segments based on reasoning blocks
|
||||
*
|
||||
* Identifies and extracts reasoning content wrapped in REASONING_TAGS.START/END markers,
|
||||
* separating it from regular text content. Handles both complete and incomplete
|
||||
* (streaming) reasoning blocks.
|
||||
*
|
||||
* @param rawContent - The raw content string to parse
|
||||
* @returns Array of reasoning segments with their types and content
|
||||
* Parse tool result text into lines, matching image attachments by name.
|
||||
*/
|
||||
function splitReasoningSegments(rawContent: string): ReasoningSegment[] {
|
||||
if (!rawContent) return [];
|
||||
export function parseToolResultWithImages(
|
||||
toolResult: string,
|
||||
extras?: DatabaseMessageExtra[]
|
||||
): ToolResultLine[] {
|
||||
const lines = toolResult.split(NEWLINE_SEPARATOR);
|
||||
return lines.map((line) => {
|
||||
const match = line.match(ATTACHMENT_SAVED_REGEX);
|
||||
if (!match || !extras) return { text: line };
|
||||
|
||||
const segments: ReasoningSegment[] = [];
|
||||
let cursor = 0;
|
||||
const attachmentName = match[1];
|
||||
const image = extras.find(
|
||||
(e): e is DatabaseMessageExtraImageFile =>
|
||||
e.type === AttachmentType.IMAGE && e.name === attachmentName
|
||||
);
|
||||
|
||||
while (cursor < rawContent.length) {
|
||||
const startIndex = rawContent.indexOf(REASONING_TAGS.START, cursor);
|
||||
return { text: line, image };
|
||||
});
|
||||
}
|
||||
|
||||
if (startIndex === -1) {
|
||||
const remainingText = rawContent.slice(cursor);
|
||||
/**
|
||||
* Safely parse the toolCalls JSON string from a DatabaseMessage.
|
||||
*/
|
||||
function parseToolCalls(toolCallsJson?: string): ApiChatCompletionToolCall[] {
|
||||
if (!toolCallsJson) return [];
|
||||
|
||||
if (remainingText) {
|
||||
segments.push({ type: AgenticSectionType.TEXT, content: remainingText });
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(toolCallsJson);
|
||||
|
||||
break;
|
||||
}
|
||||
return Array.isArray(parsed) ? parsed : [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
if (startIndex > cursor) {
|
||||
const textBefore = rawContent.slice(cursor, startIndex);
|
||||
/**
|
||||
* Check if a message has agentic content (tool calls or is part of an agentic flow).
|
||||
*/
|
||||
export function hasAgenticContent(
|
||||
message: DatabaseMessage,
|
||||
toolMessages: DatabaseMessage[] = []
|
||||
): boolean {
|
||||
if (message.toolCalls) {
|
||||
const tc = parseToolCalls(message.toolCalls);
|
||||
|
||||
if (textBefore) {
|
||||
segments.push({ type: AgenticSectionType.TEXT, content: textBefore });
|
||||
}
|
||||
}
|
||||
|
||||
const contentStart = startIndex + REASONING_TAGS.START.length;
|
||||
const endIndex = rawContent.indexOf(REASONING_TAGS.END, contentStart);
|
||||
|
||||
if (endIndex === -1) {
|
||||
const pendingContent = rawContent.slice(contentStart);
|
||||
|
||||
segments.push({
|
||||
type: AgenticSectionType.REASONING_PENDING,
|
||||
content: stripPartialMarker(pendingContent)
|
||||
});
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
const reasoningContent = rawContent.slice(contentStart, endIndex);
|
||||
segments.push({ type: AgenticSectionType.REASONING, content: reasoningContent });
|
||||
cursor = endIndex + REASONING_TAGS.END.length;
|
||||
if (tc.length > 0) return true;
|
||||
}
|
||||
|
||||
return segments;
|
||||
return toolMessages.length > 0;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ export function buildProxiedHeaders(headers: Record<string, string>): Record<str
|
||||
const proxiedHeaders: Record<string, string> = {};
|
||||
|
||||
for (const [key, value] of Object.entries(headers)) {
|
||||
proxiedHeaders[`X-Proxy-Header-${key}`] = value;
|
||||
proxiedHeaders[`x-proxy-header-${key}`] = value;
|
||||
}
|
||||
|
||||
return proxiedHeaders;
|
||||
|
||||
@@ -149,8 +149,17 @@ export { parseHeadersToArray, serializeHeaders } from './headers';
|
||||
// Favicon utilities
|
||||
export { getFaviconUrl } from './favicon';
|
||||
|
||||
// Agentic content parsing utilities
|
||||
export { parseAgenticContent, type AgenticSection } from './agentic';
|
||||
// Agentic content utilities (structured section derivation)
|
||||
export {
|
||||
deriveAgenticSections,
|
||||
parseToolResultWithImages,
|
||||
hasAgenticContent,
|
||||
type AgenticSection,
|
||||
type ToolResultLine
|
||||
} from './agentic';
|
||||
|
||||
// Legacy migration utilities
|
||||
export { runLegacyMigration, isMigrationNeeded } from './legacy-migration';
|
||||
|
||||
// Cache utilities
|
||||
export { TTLCache, ReactiveTTLMap, type TTLCacheOptions } from './cache-ttl';
|
||||
|
||||
345
tools/server/webui/src/lib/utils/legacy-migration.ts
Normal file
345
tools/server/webui/src/lib/utils/legacy-migration.ts
Normal file
@@ -0,0 +1,345 @@
|
||||
/**
|
||||
* @deprecated Legacy migration utility — remove at some point in the future once all users have migrated to the new structured agentic message format.
|
||||
*
|
||||
* Converts old marker-based agentic messages to the new structured format
|
||||
* with separate messages per turn.
|
||||
*
|
||||
* Old format: Single assistant message with markers in content:
|
||||
* <<<reasoning_content_start>>>...<<<reasoning_content_end>>>
|
||||
* <<<AGENTIC_TOOL_CALL_START>>>...<<<AGENTIC_TOOL_CALL_END>>>
|
||||
*
|
||||
* New format: Separate messages per turn:
|
||||
* - assistant (content + reasoningContent + toolCalls)
|
||||
* - tool (toolCallId + content)
|
||||
* - assistant (next turn)
|
||||
* - ...
|
||||
*/
|
||||
|
||||
import { LEGACY_AGENTIC_REGEX, LEGACY_REASONING_TAGS } from '$lib/constants';
|
||||
import { DatabaseService } from '$lib/services/database.service';
|
||||
import { MessageRole, MessageType } from '$lib/enums';
|
||||
import type { DatabaseMessage } from '$lib/types/database';
|
||||
|
||||
const MIGRATION_DONE_KEY = 'llama-webui-migration-v2-done';
|
||||
|
||||
/**
|
||||
* @deprecated Part of legacy migration — remove with the migration module.
|
||||
* Check if migration has been performed.
|
||||
*/
|
||||
export function isMigrationNeeded(): boolean {
|
||||
try {
|
||||
return !localStorage.getItem(MIGRATION_DONE_KEY);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark migration as done.
|
||||
*/
|
||||
function markMigrationDone(): void {
|
||||
try {
|
||||
localStorage.setItem(MIGRATION_DONE_KEY, String(Date.now()));
|
||||
} catch {
|
||||
// Ignore localStorage errors
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a message has legacy markers in its content.
|
||||
*/
|
||||
function hasLegacyMarkers(message: DatabaseMessage): boolean {
|
||||
if (!message.content) return false;
|
||||
return LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test(message.content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract reasoning content from legacy marker format.
|
||||
*/
|
||||
function extractLegacyReasoning(content: string): { reasoning: string; cleanContent: string } {
|
||||
let reasoning = '';
|
||||
let cleanContent = content;
|
||||
|
||||
// Extract all reasoning blocks
|
||||
const re = new RegExp(LEGACY_AGENTIC_REGEX.REASONING_EXTRACT.source, 'g');
|
||||
let match;
|
||||
while ((match = re.exec(content)) !== null) {
|
||||
reasoning += match[1];
|
||||
}
|
||||
|
||||
// Remove reasoning tags from content
|
||||
cleanContent = cleanContent
|
||||
.replace(new RegExp(LEGACY_AGENTIC_REGEX.REASONING_BLOCK.source, 'g'), '')
|
||||
.replace(LEGACY_AGENTIC_REGEX.REASONING_OPEN, '');
|
||||
|
||||
return { reasoning, cleanContent };
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse legacy content with tool call markers into structured turns.
|
||||
*/
|
||||
interface ParsedTurn {
|
||||
textBefore: string;
|
||||
toolCalls: Array<{
|
||||
name: string;
|
||||
args: string;
|
||||
result: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
function parseLegacyToolCalls(content: string): ParsedTurn[] {
|
||||
const turns: ParsedTurn[] = [];
|
||||
const regex = new RegExp(LEGACY_AGENTIC_REGEX.COMPLETED_TOOL_CALL.source, 'g');
|
||||
|
||||
let lastIndex = 0;
|
||||
let currentTurn: ParsedTurn = { textBefore: '', toolCalls: [] };
|
||||
let match;
|
||||
|
||||
while ((match = regex.exec(content)) !== null) {
|
||||
const textBefore = content.slice(lastIndex, match.index).trim();
|
||||
|
||||
// If there's text between tool calls and we already have tool calls,
|
||||
// that means a new turn started (text after tool results = new LLM turn)
|
||||
if (textBefore && currentTurn.toolCalls.length > 0) {
|
||||
turns.push(currentTurn);
|
||||
currentTurn = { textBefore, toolCalls: [] };
|
||||
} else if (textBefore && currentTurn.toolCalls.length === 0) {
|
||||
currentTurn.textBefore = textBefore;
|
||||
}
|
||||
|
||||
currentTurn.toolCalls.push({
|
||||
name: match[1],
|
||||
args: match[2],
|
||||
result: match[3].replace(/^\n+|\n+$/g, '')
|
||||
});
|
||||
|
||||
lastIndex = match.index + match[0].length;
|
||||
}
|
||||
|
||||
// Any remaining text after the last tool call
|
||||
const remainingText = content.slice(lastIndex).trim();
|
||||
|
||||
if (currentTurn.toolCalls.length > 0) {
|
||||
turns.push(currentTurn);
|
||||
}
|
||||
|
||||
// If there's text after all tool calls, it's the final assistant response
|
||||
if (remainingText) {
|
||||
// Remove any partial/open markers
|
||||
const cleanRemaining = remainingText
|
||||
.replace(LEGACY_AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
|
||||
.trim();
|
||||
if (cleanRemaining) {
|
||||
turns.push({ textBefore: cleanRemaining, toolCalls: [] });
|
||||
}
|
||||
}
|
||||
|
||||
// If no tool calls found at all, return the original content as a single turn
|
||||
if (turns.length === 0) {
|
||||
turns.push({ textBefore: content.trim(), toolCalls: [] });
|
||||
}
|
||||
|
||||
return turns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Migrate a single conversation's messages from legacy format to new format.
|
||||
*/
|
||||
async function migrateConversation(convId: string): Promise<number> {
|
||||
const allMessages = await DatabaseService.getConversationMessages(convId);
|
||||
let migratedCount = 0;
|
||||
|
||||
for (const message of allMessages) {
|
||||
if (message.role !== MessageRole.ASSISTANT) continue;
|
||||
if (!hasLegacyMarkers(message)) {
|
||||
// Still check for reasoning-only markers (no tool calls)
|
||||
if (message.content?.includes(LEGACY_REASONING_TAGS.START)) {
|
||||
const { reasoning, cleanContent } = extractLegacyReasoning(message.content);
|
||||
await DatabaseService.updateMessage(message.id, {
|
||||
content: cleanContent.trim(),
|
||||
reasoningContent: reasoning || undefined
|
||||
});
|
||||
migratedCount++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Has agentic markers - full migration needed
|
||||
const { reasoning, cleanContent } = extractLegacyReasoning(message.content);
|
||||
const turns = parseLegacyToolCalls(cleanContent);
|
||||
|
||||
// Parse existing toolCalls JSON to try to match IDs
|
||||
let existingToolCalls: Array<{ id: string; function?: { name: string; arguments: string } }> =
|
||||
[];
|
||||
if (message.toolCalls) {
|
||||
try {
|
||||
existingToolCalls = JSON.parse(message.toolCalls);
|
||||
} catch {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
// First turn uses the existing message
|
||||
const firstTurn = turns[0];
|
||||
if (!firstTurn) continue;
|
||||
|
||||
// Match tool calls from the first turn to existing IDs
|
||||
const firstTurnToolCalls = firstTurn.toolCalls.map((tc, i) => {
|
||||
const existing =
|
||||
existingToolCalls.find((e) => e.function?.name === tc.name) || existingToolCalls[i];
|
||||
return {
|
||||
id: existing?.id || `legacy_tool_${i}`,
|
||||
type: 'function' as const,
|
||||
function: { name: tc.name, arguments: tc.args }
|
||||
};
|
||||
});
|
||||
|
||||
// Update the existing message for the first turn
|
||||
await DatabaseService.updateMessage(message.id, {
|
||||
content: firstTurn.textBefore,
|
||||
reasoningContent: reasoning || undefined,
|
||||
toolCalls: firstTurnToolCalls.length > 0 ? JSON.stringify(firstTurnToolCalls) : ''
|
||||
});
|
||||
|
||||
let currentParentId = message.id;
|
||||
let toolCallIdCounter = existingToolCalls.length;
|
||||
|
||||
// Create tool result messages for the first turn
|
||||
for (let i = 0; i < firstTurn.toolCalls.length; i++) {
|
||||
const tc = firstTurn.toolCalls[i];
|
||||
const toolCallId = firstTurnToolCalls[i]?.id || `legacy_tool_${i}`;
|
||||
|
||||
const toolMsg = await DatabaseService.createMessageBranch(
|
||||
{
|
||||
convId,
|
||||
type: MessageType.TEXT,
|
||||
role: MessageRole.TOOL,
|
||||
content: tc.result,
|
||||
toolCallId,
|
||||
timestamp: message.timestamp + i + 1,
|
||||
toolCalls: '',
|
||||
children: []
|
||||
},
|
||||
currentParentId
|
||||
);
|
||||
currentParentId = toolMsg.id;
|
||||
}
|
||||
|
||||
// Create messages for subsequent turns
|
||||
for (let turnIdx = 1; turnIdx < turns.length; turnIdx++) {
|
||||
const turn = turns[turnIdx];
|
||||
|
||||
const turnToolCalls = turn.toolCalls.map((tc, i) => {
|
||||
const idx = toolCallIdCounter + i;
|
||||
const existing = existingToolCalls[idx];
|
||||
return {
|
||||
id: existing?.id || `legacy_tool_${idx}`,
|
||||
type: 'function' as const,
|
||||
function: { name: tc.name, arguments: tc.args }
|
||||
};
|
||||
});
|
||||
toolCallIdCounter += turn.toolCalls.length;
|
||||
|
||||
// Create assistant message for this turn
|
||||
const assistantMsg = await DatabaseService.createMessageBranch(
|
||||
{
|
||||
convId,
|
||||
type: MessageType.TEXT,
|
||||
role: MessageRole.ASSISTANT,
|
||||
content: turn.textBefore,
|
||||
timestamp: message.timestamp + turnIdx * 100,
|
||||
toolCalls: turnToolCalls.length > 0 ? JSON.stringify(turnToolCalls) : '',
|
||||
children: [],
|
||||
model: message.model
|
||||
},
|
||||
currentParentId
|
||||
);
|
||||
currentParentId = assistantMsg.id;
|
||||
|
||||
// Create tool result messages for this turn
|
||||
for (let i = 0; i < turn.toolCalls.length; i++) {
|
||||
const tc = turn.toolCalls[i];
|
||||
const toolCallId = turnToolCalls[i]?.id || `legacy_tool_${toolCallIdCounter + i}`;
|
||||
|
||||
const toolMsg = await DatabaseService.createMessageBranch(
|
||||
{
|
||||
convId,
|
||||
type: MessageType.TEXT,
|
||||
role: MessageRole.TOOL,
|
||||
content: tc.result,
|
||||
toolCallId,
|
||||
timestamp: message.timestamp + turnIdx * 100 + i + 1,
|
||||
toolCalls: '',
|
||||
children: []
|
||||
},
|
||||
currentParentId
|
||||
);
|
||||
currentParentId = toolMsg.id;
|
||||
}
|
||||
}
|
||||
|
||||
// Re-parent any children of the original message to the last created message
|
||||
// (the original message's children list was the next user message or similar)
|
||||
if (message.children.length > 0 && currentParentId !== message.id) {
|
||||
for (const childId of message.children) {
|
||||
// Skip children we just created (they were already properly parented)
|
||||
const child = allMessages.find((m) => m.id === childId);
|
||||
if (!child) continue;
|
||||
// Only re-parent non-tool messages that were original children
|
||||
if (child.role !== MessageRole.TOOL) {
|
||||
await DatabaseService.updateMessage(childId, { parent: currentParentId });
|
||||
// Add to new parent's children
|
||||
const newParent = await DatabaseService.getConversationMessages(convId).then((msgs) =>
|
||||
msgs.find((m) => m.id === currentParentId)
|
||||
);
|
||||
if (newParent && !newParent.children.includes(childId)) {
|
||||
await DatabaseService.updateMessage(currentParentId, {
|
||||
children: [...newParent.children, childId]
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// Clear re-parented children from the original message
|
||||
await DatabaseService.updateMessage(message.id, { children: [] });
|
||||
}
|
||||
|
||||
migratedCount++;
|
||||
}
|
||||
|
||||
return migratedCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Part of legacy migration — remove with the migration module.
|
||||
* Run the full migration across all conversations.
|
||||
* This should be called once at app startup if migration is needed.
|
||||
*/
|
||||
export async function runLegacyMigration(): Promise<void> {
|
||||
if (!isMigrationNeeded()) return;
|
||||
|
||||
console.log('[Migration] Starting legacy message format migration...');
|
||||
|
||||
try {
|
||||
const conversations = await DatabaseService.getAllConversations();
|
||||
let totalMigrated = 0;
|
||||
|
||||
for (const conv of conversations) {
|
||||
const count = await migrateConversation(conv.id);
|
||||
totalMigrated += count;
|
||||
}
|
||||
|
||||
if (totalMigrated > 0) {
|
||||
console.log(
|
||||
`[Migration] Migrated ${totalMigrated} messages across ${conversations.length} conversations`
|
||||
);
|
||||
} else {
|
||||
console.log('[Migration] No legacy messages found, marking as done');
|
||||
}
|
||||
|
||||
markMigrationDone();
|
||||
} catch (error) {
|
||||
console.error('[Migration] Failed to migrate legacy messages:', error);
|
||||
// Still mark as done to avoid infinite retry loops
|
||||
markMigrationDone();
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ const config = {
|
||||
strict: true
|
||||
}),
|
||||
output: {
|
||||
bundleStrategy: 'inline'
|
||||
bundleStrategy: 'single'
|
||||
},
|
||||
alias: {
|
||||
$styles: 'src/styles'
|
||||
|
||||
@@ -2,5 +2,5 @@ import { expect, test } from '@playwright/test';
|
||||
|
||||
test('home page has expected h1', async ({ page }) => {
|
||||
await page.goto('/');
|
||||
await expect(page.locator('h1')).toBeVisible();
|
||||
await expect(page.locator('h1').first()).toBeVisible();
|
||||
});
|
||||
|
||||
211
tools/server/webui/tests/unit/agentic-sections.test.ts
Normal file
211
tools/server/webui/tests/unit/agentic-sections.test.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { deriveAgenticSections, hasAgenticContent } from '$lib/utils/agentic';
|
||||
import { AgenticSectionType, MessageRole } from '$lib/enums';
|
||||
import type { DatabaseMessage } from '$lib/types/database';
|
||||
import type { ApiChatCompletionToolCall } from '$lib/types/api';
|
||||
|
||||
function makeAssistant(overrides: Partial<DatabaseMessage> = {}): DatabaseMessage {
|
||||
return {
|
||||
id: overrides.id ?? 'ast-1',
|
||||
convId: 'conv-1',
|
||||
type: 'text',
|
||||
timestamp: Date.now(),
|
||||
role: MessageRole.ASSISTANT,
|
||||
content: overrides.content ?? '',
|
||||
parent: null,
|
||||
children: [],
|
||||
...overrides
|
||||
} as DatabaseMessage;
|
||||
}
|
||||
|
||||
function makeToolMsg(overrides: Partial<DatabaseMessage> = {}): DatabaseMessage {
|
||||
return {
|
||||
id: overrides.id ?? 'tool-1',
|
||||
convId: 'conv-1',
|
||||
type: 'text',
|
||||
timestamp: Date.now(),
|
||||
role: MessageRole.TOOL,
|
||||
content: overrides.content ?? 'tool result',
|
||||
parent: null,
|
||||
children: [],
|
||||
toolCallId: overrides.toolCallId ?? 'call_1',
|
||||
...overrides
|
||||
} as DatabaseMessage;
|
||||
}
|
||||
|
||||
describe('deriveAgenticSections', () => {
|
||||
it('returns empty array for assistant with no content', () => {
|
||||
const msg = makeAssistant({ content: '' });
|
||||
const sections = deriveAgenticSections(msg);
|
||||
expect(sections).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns text section for simple assistant message', () => {
|
||||
const msg = makeAssistant({ content: 'Hello world' });
|
||||
const sections = deriveAgenticSections(msg);
|
||||
expect(sections).toHaveLength(1);
|
||||
expect(sections[0].type).toBe(AgenticSectionType.TEXT);
|
||||
expect(sections[0].content).toBe('Hello world');
|
||||
});
|
||||
|
||||
it('returns reasoning + text for message with reasoning', () => {
|
||||
const msg = makeAssistant({
|
||||
content: 'Answer is 4.',
|
||||
reasoningContent: 'Let me think...'
|
||||
});
|
||||
const sections = deriveAgenticSections(msg);
|
||||
expect(sections).toHaveLength(2);
|
||||
expect(sections[0].type).toBe(AgenticSectionType.REASONING);
|
||||
expect(sections[0].content).toBe('Let me think...');
|
||||
expect(sections[1].type).toBe(AgenticSectionType.TEXT);
|
||||
});
|
||||
|
||||
it('single turn: assistant with tool calls and results', () => {
|
||||
const msg = makeAssistant({
|
||||
content: 'Let me check.',
|
||||
toolCalls: JSON.stringify([
|
||||
{ id: 'call_1', type: 'function', function: { name: 'search', arguments: '{"q":"test"}' } }
|
||||
])
|
||||
});
|
||||
const toolResult = makeToolMsg({
|
||||
toolCallId: 'call_1',
|
||||
content: 'Found 3 results'
|
||||
});
|
||||
const sections = deriveAgenticSections(msg, [toolResult]);
|
||||
expect(sections).toHaveLength(2);
|
||||
expect(sections[0].type).toBe(AgenticSectionType.TEXT);
|
||||
expect(sections[1].type).toBe(AgenticSectionType.TOOL_CALL);
|
||||
expect(sections[1].toolName).toBe('search');
|
||||
expect(sections[1].toolResult).toBe('Found 3 results');
|
||||
});
|
||||
|
||||
it('single turn: pending tool call without result', () => {
|
||||
const msg = makeAssistant({
|
||||
toolCalls: JSON.stringify([
|
||||
{ id: 'call_1', type: 'function', function: { name: 'bash', arguments: '{}' } }
|
||||
])
|
||||
});
|
||||
const sections = deriveAgenticSections(msg, []);
|
||||
expect(sections).toHaveLength(1);
|
||||
expect(sections[0].type).toBe(AgenticSectionType.TOOL_CALL_PENDING);
|
||||
expect(sections[0].toolName).toBe('bash');
|
||||
});
|
||||
|
||||
it('multi-turn: two assistant turns grouped as one session', () => {
|
||||
const assistant1 = makeAssistant({
|
||||
id: 'ast-1',
|
||||
content: 'Turn 1 text',
|
||||
toolCalls: JSON.stringify([
|
||||
{ id: 'call_1', type: 'function', function: { name: 'search', arguments: '{"q":"foo"}' } }
|
||||
])
|
||||
});
|
||||
const tool1 = makeToolMsg({ id: 'tool-1', toolCallId: 'call_1', content: 'result 1' });
|
||||
const assistant2 = makeAssistant({
|
||||
id: 'ast-2',
|
||||
content: 'Final answer based on results.'
|
||||
});
|
||||
|
||||
// toolMessages contains both tool result and continuation assistant
|
||||
const sections = deriveAgenticSections(assistant1, [tool1, assistant2]);
|
||||
expect(sections).toHaveLength(3);
|
||||
// Turn 1
|
||||
expect(sections[0].type).toBe(AgenticSectionType.TEXT);
|
||||
expect(sections[0].content).toBe('Turn 1 text');
|
||||
expect(sections[1].type).toBe(AgenticSectionType.TOOL_CALL);
|
||||
expect(sections[1].toolName).toBe('search');
|
||||
expect(sections[1].toolResult).toBe('result 1');
|
||||
// Turn 2 (final)
|
||||
expect(sections[2].type).toBe(AgenticSectionType.TEXT);
|
||||
expect(sections[2].content).toBe('Final answer based on results.');
|
||||
});
|
||||
|
||||
it('multi-turn: three turns with tool calls', () => {
|
||||
const assistant1 = makeAssistant({
|
||||
id: 'ast-1',
|
||||
content: '',
|
||||
toolCalls: JSON.stringify([
|
||||
{ id: 'call_1', type: 'function', function: { name: 'list_files', arguments: '{}' } }
|
||||
])
|
||||
});
|
||||
const tool1 = makeToolMsg({ id: 'tool-1', toolCallId: 'call_1', content: 'file1 file2' });
|
||||
const assistant2 = makeAssistant({
|
||||
id: 'ast-2',
|
||||
content: 'Reading file1...',
|
||||
toolCalls: JSON.stringify([
|
||||
{
|
||||
id: 'call_2',
|
||||
type: 'function',
|
||||
function: { name: 'read_file', arguments: '{"path":"file1"}' }
|
||||
}
|
||||
])
|
||||
});
|
||||
const tool2 = makeToolMsg({ id: 'tool-2', toolCallId: 'call_2', content: 'contents of file1' });
|
||||
const assistant3 = makeAssistant({
|
||||
id: 'ast-3',
|
||||
content: 'Here is the analysis.',
|
||||
reasoningContent: 'The file contains...'
|
||||
});
|
||||
|
||||
const sections = deriveAgenticSections(assistant1, [tool1, assistant2, tool2, assistant3]);
|
||||
// Turn 1: tool_call (no text since content is empty)
|
||||
// Turn 2: text + tool_call
|
||||
// Turn 3: reasoning + text
|
||||
expect(sections).toHaveLength(5);
|
||||
expect(sections[0].type).toBe(AgenticSectionType.TOOL_CALL);
|
||||
expect(sections[0].toolName).toBe('list_files');
|
||||
expect(sections[1].type).toBe(AgenticSectionType.TEXT);
|
||||
expect(sections[1].content).toBe('Reading file1...');
|
||||
expect(sections[2].type).toBe(AgenticSectionType.TOOL_CALL);
|
||||
expect(sections[2].toolName).toBe('read_file');
|
||||
expect(sections[3].type).toBe(AgenticSectionType.REASONING);
|
||||
expect(sections[4].type).toBe(AgenticSectionType.TEXT);
|
||||
expect(sections[4].content).toBe('Here is the analysis.');
|
||||
});
|
||||
|
||||
it('multi-turn: streaming tool calls on last turn', () => {
|
||||
const assistant1 = makeAssistant({
|
||||
toolCalls: JSON.stringify([
|
||||
{ id: 'call_1', type: 'function', function: { name: 'search', arguments: '{}' } }
|
||||
])
|
||||
});
|
||||
const tool1 = makeToolMsg({ toolCallId: 'call_1', content: 'result' });
|
||||
const assistant2 = makeAssistant({ id: 'ast-2', content: '' });
|
||||
|
||||
const streamingToolCalls: ApiChatCompletionToolCall[] = [
|
||||
{ id: 'call_2', type: 'function', function: { name: 'write_file', arguments: '{"pa' } }
|
||||
];
|
||||
|
||||
const sections = deriveAgenticSections(assistant1, [tool1, assistant2], streamingToolCalls);
|
||||
// Turn 1: tool_call
|
||||
// Turn 2 (streaming): streaming tool call
|
||||
expect(sections.some((s) => s.type === AgenticSectionType.TOOL_CALL)).toBe(true);
|
||||
expect(sections.some((s) => s.type === AgenticSectionType.TOOL_CALL_STREAMING)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasAgenticContent', () => {
|
||||
it('returns false for plain assistant', () => {
|
||||
const msg = makeAssistant({ content: 'Just text' });
|
||||
expect(hasAgenticContent(msg)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns true when message has toolCalls', () => {
|
||||
const msg = makeAssistant({
|
||||
toolCalls: JSON.stringify([
|
||||
{ id: 'call_1', type: 'function', function: { name: 'test', arguments: '{}' } }
|
||||
])
|
||||
});
|
||||
expect(hasAgenticContent(msg)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true when toolMessages are provided', () => {
|
||||
const msg = makeAssistant();
|
||||
const tool = makeToolMsg();
|
||||
expect(hasAgenticContent(msg, [tool])).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for empty toolCalls JSON', () => {
|
||||
const msg = makeAssistant({ toolCalls: '[]' });
|
||||
expect(hasAgenticContent(msg)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,17 +1,22 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { AGENTIC_REGEX } from '$lib/constants/agentic';
|
||||
import { LEGACY_AGENTIC_REGEX } from '$lib/constants/agentic';
|
||||
|
||||
// Mirror the logic in ChatService.stripReasoningContent so we can test it in isolation.
|
||||
// The real function is private static, so we replicate the strip pipeline here.
|
||||
function stripContextMarkers(content: string): string {
|
||||
/**
|
||||
* Tests for legacy marker stripping (used in migration).
|
||||
* The new system does not embed markers in content - these tests verify
|
||||
* the legacy regex patterns still work for the migration code.
|
||||
*/
|
||||
|
||||
// Mirror the legacy stripping logic used during migration
|
||||
function stripLegacyContextMarkers(content: string): string {
|
||||
return content
|
||||
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
|
||||
.replace(new RegExp(LEGACY_AGENTIC_REGEX.REASONING_BLOCK.source, 'g'), '')
|
||||
.replace(LEGACY_AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(new RegExp(LEGACY_AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK.source, 'g'), '')
|
||||
.replace(LEGACY_AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
|
||||
}
|
||||
|
||||
// A realistic complete tool call block as stored in message.content after a turn.
|
||||
// A realistic complete tool call block as stored in old message.content
|
||||
const COMPLETE_BLOCK =
|
||||
'\n\n<<<AGENTIC_TOOL_CALL_START>>>\n' +
|
||||
'<<<TOOL_NAME:bash_tool>>>\n' +
|
||||
@@ -30,11 +35,10 @@ const OPEN_BLOCK =
|
||||
'<<<TOOL_ARGS_END>>>\n' +
|
||||
'partial output...';
|
||||
|
||||
describe('agentic marker stripping for context', () => {
|
||||
describe('legacy agentic marker stripping (for migration)', () => {
|
||||
it('strips a complete tool call block, leaving surrounding text', () => {
|
||||
const input = 'Before.' + COMPLETE_BLOCK + 'After.';
|
||||
const result = stripContextMarkers(input);
|
||||
// markers gone; residual newlines between fragments are fine
|
||||
const result = stripLegacyContextMarkers(input);
|
||||
expect(result).not.toContain('<<<');
|
||||
expect(result).toContain('Before.');
|
||||
expect(result).toContain('After.');
|
||||
@@ -42,7 +46,7 @@ describe('agentic marker stripping for context', () => {
|
||||
|
||||
it('strips multiple complete tool call blocks', () => {
|
||||
const input = 'A' + COMPLETE_BLOCK + 'B' + COMPLETE_BLOCK + 'C';
|
||||
const result = stripContextMarkers(input);
|
||||
const result = stripLegacyContextMarkers(input);
|
||||
expect(result).not.toContain('<<<');
|
||||
expect(result).toContain('A');
|
||||
expect(result).toContain('B');
|
||||
@@ -51,19 +55,19 @@ describe('agentic marker stripping for context', () => {
|
||||
|
||||
it('strips an open/partial tool call block (no END marker)', () => {
|
||||
const input = 'Lead text.' + OPEN_BLOCK;
|
||||
const result = stripContextMarkers(input);
|
||||
const result = stripLegacyContextMarkers(input);
|
||||
expect(result).toBe('Lead text.');
|
||||
expect(result).not.toContain('<<<');
|
||||
});
|
||||
|
||||
it('does not alter content with no markers', () => {
|
||||
const input = 'Just a normal assistant response.';
|
||||
expect(stripContextMarkers(input)).toBe(input);
|
||||
expect(stripLegacyContextMarkers(input)).toBe(input);
|
||||
});
|
||||
|
||||
it('strips reasoning block independently', () => {
|
||||
const input = '<<<reasoning_content_start>>>think hard<<<reasoning_content_end>>>Answer.';
|
||||
expect(stripContextMarkers(input)).toBe('Answer.');
|
||||
expect(stripLegacyContextMarkers(input)).toBe('Answer.');
|
||||
});
|
||||
|
||||
it('strips both reasoning and agentic blocks together', () => {
|
||||
@@ -71,11 +75,21 @@ describe('agentic marker stripping for context', () => {
|
||||
'<<<reasoning_content_start>>>plan<<<reasoning_content_end>>>' +
|
||||
'Some text.' +
|
||||
COMPLETE_BLOCK;
|
||||
expect(stripContextMarkers(input)).not.toContain('<<<');
|
||||
expect(stripContextMarkers(input)).toContain('Some text.');
|
||||
expect(stripLegacyContextMarkers(input)).not.toContain('<<<');
|
||||
expect(stripLegacyContextMarkers(input)).toContain('Some text.');
|
||||
});
|
||||
|
||||
it('empty string survives', () => {
|
||||
expect(stripContextMarkers('')).toBe('');
|
||||
expect(stripLegacyContextMarkers('')).toBe('');
|
||||
});
|
||||
|
||||
it('detects legacy markers', () => {
|
||||
expect(LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test('normal text')).toBe(false);
|
||||
expect(
|
||||
LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test('text<<<AGENTIC_TOOL_CALL_START>>>more')
|
||||
).toBe(true);
|
||||
expect(LEGACY_AGENTIC_REGEX.HAS_LEGACY_MARKERS.test('<<<reasoning_content_start>>>think')).toBe(
|
||||
true
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,196 +1,89 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { AGENTIC_REGEX, REASONING_TAGS } from '$lib/constants/agentic';
|
||||
import { ContentPartType } from '$lib/enums';
|
||||
import { MessageRole } from '$lib/enums';
|
||||
|
||||
// Replicate ChatService.extractReasoningFromContent (private static)
|
||||
function extractReasoningFromContent(
|
||||
content: string | Array<{ type: string; text?: string }> | null | undefined
|
||||
): string | undefined {
|
||||
if (!content) return undefined;
|
||||
/**
|
||||
* Tests for the new reasoning content handling.
|
||||
* In the new architecture, reasoning content is stored in a dedicated
|
||||
* `reasoningContent` field on DatabaseMessage, not embedded in content with tags.
|
||||
* The API sends it as `reasoning_content` on ApiChatMessageData.
|
||||
*/
|
||||
|
||||
const extractFromString = (text: string): string => {
|
||||
const parts: string[] = [];
|
||||
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
|
||||
let match = re.exec(text);
|
||||
while (match) {
|
||||
parts.push(match[1]);
|
||||
text = text.slice(match.index + match[0].length);
|
||||
match = re.exec(text);
|
||||
}
|
||||
return parts.join('');
|
||||
};
|
||||
|
||||
if (typeof content === 'string') {
|
||||
const result = extractFromString(content);
|
||||
return result || undefined;
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) return undefined;
|
||||
|
||||
const parts: string[] = [];
|
||||
for (const part of content) {
|
||||
if (part.type === ContentPartType.TEXT && part.text) {
|
||||
const result = extractFromString(part.text);
|
||||
if (result) parts.push(result);
|
||||
}
|
||||
}
|
||||
return parts.length > 0 ? parts.join('') : undefined;
|
||||
}
|
||||
|
||||
// Replicate ChatService.stripReasoningContent (private static)
|
||||
function stripReasoningContent(
|
||||
content: string | Array<{ type: string; text?: string }> | null | undefined
|
||||
): typeof content {
|
||||
if (!content) return content;
|
||||
|
||||
if (typeof content === 'string') {
|
||||
return content
|
||||
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) return content;
|
||||
|
||||
return content.map((part) => {
|
||||
if (part.type !== ContentPartType.TEXT || !part.text) return part;
|
||||
return {
|
||||
...part,
|
||||
text: part.text
|
||||
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
|
||||
describe('reasoning content in new structured format', () => {
|
||||
it('reasoning is stored as separate field, not in content', () => {
|
||||
// Simulate what the new chat store does
|
||||
const message = {
|
||||
content: 'The answer is 4.',
|
||||
reasoningContent: 'Let me think: 2+2=4, basic arithmetic.'
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Simulate the message mapping logic from ChatService.sendMessage
|
||||
function buildApiMessage(
|
||||
content: string,
|
||||
excludeReasoningFromContext: boolean
|
||||
): { role: string; content: string; reasoning_content?: string } {
|
||||
const cleaned = stripReasoningContent(content) as string;
|
||||
const mapped: { role: string; content: string; reasoning_content?: string } = {
|
||||
role: 'assistant',
|
||||
content: cleaned
|
||||
};
|
||||
if (!excludeReasoningFromContext) {
|
||||
const reasoning = extractReasoningFromContent(content);
|
||||
if (reasoning) {
|
||||
mapped.reasoning_content = reasoning;
|
||||
// Content should be clean
|
||||
expect(message.content).not.toContain('<<<');
|
||||
expect(message.content).toBe('The answer is 4.');
|
||||
|
||||
// Reasoning in dedicated field
|
||||
expect(message.reasoningContent).toBe('Let me think: 2+2=4, basic arithmetic.');
|
||||
});
|
||||
|
||||
it('convertDbMessageToApiChatMessageData includes reasoning_content', () => {
|
||||
// Simulate the conversion logic
|
||||
const dbMessage = {
|
||||
role: MessageRole.ASSISTANT,
|
||||
content: 'The answer is 4.',
|
||||
reasoningContent: 'Let me think: 2+2=4, basic arithmetic.'
|
||||
};
|
||||
|
||||
const apiMessage: Record<string, unknown> = {
|
||||
role: dbMessage.role,
|
||||
content: dbMessage.content
|
||||
};
|
||||
if (dbMessage.reasoningContent) {
|
||||
apiMessage.reasoning_content = dbMessage.reasoningContent;
|
||||
}
|
||||
}
|
||||
return mapped;
|
||||
}
|
||||
|
||||
// Helper: wrap reasoning the same way the chat store does during streaming
|
||||
function wrapReasoning(reasoning: string, content: string): string {
|
||||
return `${REASONING_TAGS.START}${reasoning}${REASONING_TAGS.END}${content}`;
|
||||
}
|
||||
|
||||
describe('reasoning content extraction', () => {
|
||||
it('extracts reasoning from tagged string content', () => {
|
||||
const input = wrapReasoning('step 1, step 2', 'The answer is 42.');
|
||||
const result = extractReasoningFromContent(input);
|
||||
expect(result).toBe('step 1, step 2');
|
||||
expect(apiMessage.content).toBe('The answer is 4.');
|
||||
expect(apiMessage.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
|
||||
// No internal tags leak into either field
|
||||
expect(apiMessage.content).not.toContain('<<<');
|
||||
expect(apiMessage.reasoning_content).not.toContain('<<<');
|
||||
});
|
||||
|
||||
it('returns undefined when no reasoning tags present', () => {
|
||||
expect(extractReasoningFromContent('Just a normal response.')).toBeUndefined();
|
||||
it('API message excludes reasoning when excludeReasoningFromContext is true', () => {
|
||||
const dbMessage = {
|
||||
role: MessageRole.ASSISTANT,
|
||||
content: 'The answer is 4.',
|
||||
reasoningContent: 'internal thinking'
|
||||
};
|
||||
|
||||
const excludeReasoningFromContext = true;
|
||||
|
||||
const apiMessage: Record<string, unknown> = {
|
||||
role: dbMessage.role,
|
||||
content: dbMessage.content
|
||||
};
|
||||
if (!excludeReasoningFromContext && dbMessage.reasoningContent) {
|
||||
apiMessage.reasoning_content = dbMessage.reasoningContent;
|
||||
}
|
||||
|
||||
expect(apiMessage.content).toBe('The answer is 4.');
|
||||
expect(apiMessage.reasoning_content).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for null/empty input', () => {
|
||||
expect(extractReasoningFromContent(null)).toBeUndefined();
|
||||
expect(extractReasoningFromContent(undefined)).toBeUndefined();
|
||||
expect(extractReasoningFromContent('')).toBeUndefined();
|
||||
});
|
||||
it('handles messages with no reasoning', () => {
|
||||
const dbMessage = {
|
||||
role: MessageRole.ASSISTANT,
|
||||
content: 'No reasoning here.',
|
||||
reasoningContent: undefined
|
||||
};
|
||||
|
||||
it('extracts reasoning from content part arrays', () => {
|
||||
const input = [
|
||||
{
|
||||
type: ContentPartType.TEXT,
|
||||
text: wrapReasoning('thinking hard', 'result')
|
||||
}
|
||||
];
|
||||
expect(extractReasoningFromContent(input)).toBe('thinking hard');
|
||||
});
|
||||
const apiMessage: Record<string, unknown> = {
|
||||
role: dbMessage.role,
|
||||
content: dbMessage.content
|
||||
};
|
||||
if (dbMessage.reasoningContent) {
|
||||
apiMessage.reasoning_content = dbMessage.reasoningContent;
|
||||
}
|
||||
|
||||
it('handles multiple reasoning blocks', () => {
|
||||
const input =
|
||||
REASONING_TAGS.START +
|
||||
'block1' +
|
||||
REASONING_TAGS.END +
|
||||
'middle' +
|
||||
REASONING_TAGS.START +
|
||||
'block2' +
|
||||
REASONING_TAGS.END +
|
||||
'end';
|
||||
expect(extractReasoningFromContent(input)).toBe('block1block2');
|
||||
});
|
||||
|
||||
it('ignores non-text content parts', () => {
|
||||
const input = [{ type: 'image_url', text: wrapReasoning('hidden', 'img') }];
|
||||
expect(extractReasoningFromContent(input)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('strip reasoning content', () => {
|
||||
it('removes reasoning tags from string content', () => {
|
||||
const input = wrapReasoning('internal thoughts', 'visible answer');
|
||||
expect(stripReasoningContent(input)).toBe('visible answer');
|
||||
});
|
||||
|
||||
it('removes reasoning from content part arrays', () => {
|
||||
const input = [
|
||||
{
|
||||
type: ContentPartType.TEXT,
|
||||
text: wrapReasoning('thoughts', 'answer')
|
||||
}
|
||||
];
|
||||
const result = stripReasoningContent(input) as Array<{ type: string; text?: string }>;
|
||||
expect(result[0].text).toBe('answer');
|
||||
});
|
||||
});
|
||||
|
||||
describe('API message building with reasoning preservation', () => {
|
||||
const storedContent = wrapReasoning('Let me think: 2+2=4, basic arithmetic.', 'The answer is 4.');
|
||||
|
||||
it('preserves reasoning_content when excludeReasoningFromContext is false', () => {
|
||||
const msg = buildApiMessage(storedContent, false);
|
||||
expect(msg.content).toBe('The answer is 4.');
|
||||
expect(msg.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
|
||||
// no internal tags leak into either field
|
||||
expect(msg.content).not.toContain('<<<');
|
||||
expect(msg.reasoning_content).not.toContain('<<<');
|
||||
});
|
||||
|
||||
it('strips reasoning_content when excludeReasoningFromContext is true', () => {
|
||||
const msg = buildApiMessage(storedContent, true);
|
||||
expect(msg.content).toBe('The answer is 4.');
|
||||
expect(msg.reasoning_content).toBeUndefined();
|
||||
});
|
||||
|
||||
it('handles content with no reasoning in both modes', () => {
|
||||
const plain = 'No reasoning here.';
|
||||
const msgPreserve = buildApiMessage(plain, false);
|
||||
const msgExclude = buildApiMessage(plain, true);
|
||||
expect(msgPreserve.content).toBe(plain);
|
||||
expect(msgPreserve.reasoning_content).toBeUndefined();
|
||||
expect(msgExclude.content).toBe(plain);
|
||||
expect(msgExclude.reasoning_content).toBeUndefined();
|
||||
});
|
||||
|
||||
it('cleans agentic tool call blocks from content even when preserving reasoning', () => {
|
||||
const input =
|
||||
wrapReasoning('plan', 'text') +
|
||||
'\n\n<<<AGENTIC_TOOL_CALL_START>>>\n' +
|
||||
'<<<TOOL_NAME:bash>>>\n' +
|
||||
'<<<TOOL_ARGS_START>>>\n{}\n<<<TOOL_ARGS_END>>>\nout\n' +
|
||||
'<<<AGENTIC_TOOL_CALL_END>>>\n';
|
||||
const msg = buildApiMessage(input, false);
|
||||
expect(msg.content).not.toContain('<<<');
|
||||
expect(msg.reasoning_content).toBe('plan');
|
||||
expect(apiMessage.content).toBe('No reasoning here.');
|
||||
expect(apiMessage.reasoning_content).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import tailwindcss from '@tailwindcss/vite';
|
||||
import { sveltekit } from '@sveltejs/kit/vite';
|
||||
import * as fflate from 'fflate';
|
||||
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
||||
import { readFileSync, writeFileSync, existsSync, readdirSync, copyFileSync } from 'fs';
|
||||
import { dirname, resolve } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
@@ -20,15 +19,13 @@ const GUIDE_FOR_FRONTEND = `
|
||||
-->
|
||||
`.trim();
|
||||
|
||||
const MAX_BUNDLE_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* the maximum size of an embedded asset in bytes,
|
||||
* e.g. maximum size of embedded font (see node_modules/katex/dist/fonts/*.woff2)
|
||||
*/
|
||||
const MAX_ASSET_SIZE = 32000;
|
||||
|
||||
/** public/index.html.gz minified flag */
|
||||
/** public/index.html minified flag */
|
||||
const ENABLE_JS_MINIFICATION = true;
|
||||
|
||||
function llamaCppBuildPlugin() {
|
||||
@@ -40,7 +37,6 @@ function llamaCppBuildPlugin() {
|
||||
setTimeout(() => {
|
||||
try {
|
||||
const indexPath = resolve('../public/index.html');
|
||||
const gzipPath = resolve('../public/index.html.gz');
|
||||
|
||||
if (!existsSync(indexPath)) {
|
||||
return;
|
||||
@@ -61,26 +57,35 @@ function llamaCppBuildPlugin() {
|
||||
|
||||
content = content.replace(/\r/g, '');
|
||||
content = GUIDE_FOR_FRONTEND + '\n' + content;
|
||||
content = content.replace(/\/_app\/immutable\/bundle\.[^"]+\.js/g, './bundle.js');
|
||||
content = content.replace(
|
||||
/\/_app\/immutable\/assets\/bundle\.[^"]+\.css/g,
|
||||
'./bundle.css'
|
||||
);
|
||||
|
||||
const compressed = fflate.gzipSync(Buffer.from(content, 'utf-8'), { level: 9 });
|
||||
writeFileSync(indexPath, content, 'utf-8');
|
||||
console.log('✓ Updated index.html');
|
||||
|
||||
compressed[0x4] = 0;
|
||||
compressed[0x5] = 0;
|
||||
compressed[0x6] = 0;
|
||||
compressed[0x7] = 0;
|
||||
compressed[0x9] = 0;
|
||||
|
||||
if (compressed.byteLength > MAX_BUNDLE_SIZE) {
|
||||
throw new Error(
|
||||
`Bundle size is too large (${Math.ceil(compressed.byteLength / 1024)} KB).\n` +
|
||||
`Please reduce the size of the frontend or increase MAX_BUNDLE_SIZE in vite.config.ts.\n`
|
||||
);
|
||||
// Copy bundle.*.js -> ../public/bundle.js
|
||||
const immutableDir = resolve('../public/_app/immutable');
|
||||
const bundleDir = resolve('../public/_app/immutable/assets');
|
||||
if (existsSync(immutableDir)) {
|
||||
const jsFiles = readdirSync(immutableDir).filter((f) => f.match(/^bundle\..+\.js$/));
|
||||
if (jsFiles.length > 0) {
|
||||
copyFileSync(resolve(immutableDir, jsFiles[0]), resolve('../public/bundle.js'));
|
||||
console.log(`✓ Copied ${jsFiles[0]} -> bundle.js`);
|
||||
}
|
||||
}
|
||||
// Copy bundle.*.css -> ../public/bundle.css
|
||||
if (existsSync(bundleDir)) {
|
||||
const cssFiles = readdirSync(bundleDir).filter((f) => f.match(/^bundle\..+\.css$/));
|
||||
if (cssFiles.length > 0) {
|
||||
copyFileSync(resolve(bundleDir, cssFiles[0]), resolve('../public/bundle.css'));
|
||||
console.log(`✓ Copied ${cssFiles[0]} -> bundle.css`);
|
||||
}
|
||||
}
|
||||
|
||||
writeFileSync(gzipPath, compressed);
|
||||
console.log('✓ Created index.html.gz');
|
||||
} catch (error) {
|
||||
console.error('Failed to create gzip file:', error);
|
||||
console.error('Failed to update index.html:', error);
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
|
||||
@@ -551,6 +551,8 @@ int main(int argc, char ** argv) {
|
||||
params.sampling.top_k = 4;
|
||||
params.sampling.samplers = { COMMON_SAMPLER_TYPE_TOP_K, };
|
||||
|
||||
common_init();
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_TTS, print_usage)) {
|
||||
return 1;
|
||||
}
|
||||
@@ -558,8 +560,6 @@ int main(int argc, char ** argv) {
|
||||
const int n_parallel = params.n_parallel;
|
||||
const int n_predict = params.n_predict;
|
||||
|
||||
common_init();
|
||||
|
||||
// init LLM
|
||||
|
||||
llama_backend_init();
|
||||
|
||||
2
vendor/cpp-httplib/CMakeLists.txt
vendored
2
vendor/cpp-httplib/CMakeLists.txt
vendored
@@ -39,7 +39,7 @@ if (LLAMA_BUILD_BORINGSSL)
|
||||
set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")
|
||||
|
||||
set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
|
||||
set(BORINGSSL_VERSION "0.20260211.0" CACHE STRING "BoringSSL version")
|
||||
set(BORINGSSL_VERSION "0.20260327.0" CACHE STRING "BoringSSL version")
|
||||
|
||||
message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user