Compare commits

...

5 Commits

Author SHA1 Message Date
Tarek Dakhran
7dad2f1a17 chat : fix LFM2 tool-call parsing double-escaping (#24667)
* Add escape test cases

* chat : fix LFM2 tool-call parsing double-escaping
2026-06-15 22:10:09 +02:00
Xuan-Son Nguyen
e36a602ba3 mtmd: fix miscounting n_tokens (#24656) 2026-06-15 18:07:14 +02:00
Piotr Wilkin (ilintar)
38d546330a chat: include full unparsed prompt in debug (#24650)
message on parse error
2026-06-15 17:33:54 +02:00
Julien Jerphanion
a1eb756c0b docs: Add instructions to install llama.cpp from conda-forge (#22219)
* docs: Add instructions to install `llama.cpp` from conda-forge

Signed-off-by: Julien Jerphanion <git@jjerphan.xyz>

* Rewording of instructions

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Signed-off-by: Julien Jerphanion <git@jjerphan.xyz>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2026-06-15 17:12:25 +02:00
Pascal
581e8eca8b chat: harden peg-native tool call parsing (#24329)
* chat: harden peg-native tool call parsing

accept an optional leading type: function field in
build_json_tools_flat_keys so openai style tool calls parse on
templates whose serialization opens on the name field.

return a clean error and log the unparsed fragment on a final peg
parse failure instead of throwing the raw parser position and input.

keep the raw arguments string in func_args_not_string when it is not
valid json instead of aborting the prompt render.

* chat: surface peg-native parse failures

a final peg parse failure threw the raw parser position and input. log
the unparsed fragment and raise a clearer error instead, so a model
output that does not match the expected format no longer fails silently
with an empty assistant turn.

minimal change, no behavior change on successful parses.

* chat: handle openai style tool calls in peg-native

* nits

* common: scope OpenAI wrapper grammar trigger via autoparser flag

* chat: gate type:function parsing leniency on the analysis flag

Thread accept_openai_wrapper from the generator to build_json_tools_flat_keys
so the leading "type": "function" field is accepted only when openai_wrapper_trigger is set.
2026-06-15 15:37:04 +02:00
10 changed files with 98 additions and 28 deletions

View File

@@ -37,7 +37,7 @@ LLM inference in C/C++
Getting started with llama.cpp is straightforward. Here are several ways to install it on your machine:
- Install `llama.cpp` using [brew, nix or winget](docs/install.md)
- Install `llama.cpp` using [brew, nix, winget, or conda-forge](docs/install.md)
- Run with Docker - see our [Docker documentation](docs/docker.md)
- Download pre-built binaries from the [releases page](https://github.com/ggml-org/llama.cpp/releases)
- Build from source by cloning this repository - check out [our build guide](docs/build.md)

View File

@@ -103,6 +103,10 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
data.grammar_triggers = {
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
};
if (autoparser.tools.format.openai_wrapper_trigger) {
// model emits the OpenAI function wrapper, trigger on it
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "{\"type\": \"function\"," });
}
}
}
@@ -224,13 +228,13 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
auto single_tool_parser = p.standard_json_tools(
format.per_call_start, format.per_call_end, inputs.tools, inputs.parallel_tool_calls,
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, format.tools_array_wrapped,
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order);
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order, format.openai_wrapper_trigger);
tools_parser = p.trigger_rule("tool-calls", p.one_or_more(single_tool_parser + p.space()));
} else {
tools_parser = p.standard_json_tools(
format.section_start, format.section_end, inputs.tools, inputs.parallel_tool_calls,
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, format.tools_array_wrapped,
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order);
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order, format.openai_wrapper_trigger);
}
// Handle content wrappers if present

View File

@@ -181,6 +181,7 @@ struct tool_format_analysis {
bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "<funname>": { ... arguments ... } }
bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...]
bool openai_wrapper_trigger = false; // model emits the OpenAI function wrapper, trigger on it
std::string function_field = "function";
std::string name_field = "name";

View File

@@ -165,6 +165,14 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
LOG_DBG(ANSI_ORANGE "[Patch: Apriel 1.6]\n" ANSI_RESET);
}
},
// template uses the JSON {name, parameters} tool instruction, emits the OpenAI function wrapper
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
if (tmpl.src.find("Respond in the format {\"name\": function name") != std::string::npos &&
tmpl.src.find("Do not use variables.") != std::string::npos) {
analysis.tools.format.openai_wrapper_trigger = true;
LOG_DBG(ANSI_ORANGE "[Patch: JSON name/parameters tool instruction]\n" ANSI_RESET);
}
},
});

View File

@@ -540,10 +540,11 @@ common_peg_parser common_chat_peg_builder::python_style_tool_calls(
auto arg_name_parser = literal(prop_name);
common_peg_parser arg_value_parser = eps();
auto string_value_parser = choice({
literal("\"") + tool_arg_string_value(string_content('"')) + literal("\""),
literal("'") + tool_arg_string_value(string_content('\'')) + literal("'")
});
// Quoted literal as a value: normalize_quotes_to_json preserves escapes.
auto string_value_parser = tool_arg_value(choice({
literal("\"") + string_content('"') + literal("\""),
literal("'") + string_content('\'') + literal("'")
}));
if (is_string_type) {
arg_value_parser = string_value_parser;
@@ -745,7 +746,8 @@ common_peg_parser common_chat_peg_builder::build_json_tools_flat_keys(
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key,
const std::vector<std::string> & parameters_order) {
const std::vector<std::string> & parameters_order,
bool accept_openai_wrapper) {
auto tool_choices = choice();
auto name_key_parser = literal("\"" + effective_name_key + "\"");
@@ -807,7 +809,13 @@ common_peg_parser common_chat_peg_builder::build_json_tools_flat_keys(
return idx_a < idx_b;
});
auto ordered_body = tool_open(literal("{")) + space();
// accept an optional leading "type": "function" field when the model emits the OpenAI wrapper
common_peg_parser type_field = eps();
if (accept_openai_wrapper) {
type_field = optional(literal("\"type\"") + space() + literal(":") + space() +
literal("\"function\"") + space() + literal(",") + space());
}
auto ordered_body = tool_open(literal("{")) + space() + type_field;
for (size_t i = 0; i < parser_pairs.size(); i++) {
ordered_body = ordered_body + parser_pairs[i].first;
if (i < parser_pairs.size() - 1) {
@@ -870,7 +878,8 @@ common_peg_parser common_chat_peg_builder::standard_json_tools(
bool function_is_key,
const std::string & call_id_key,
const std::string & gen_call_id_key,
const std::vector<std::string> & parameters_order) {
const std::vector<std::string> & parameters_order,
bool accept_openai_wrapper) {
if (!tools.is_array() || tools.empty()) {
return eps();
}
@@ -888,7 +897,7 @@ common_peg_parser common_chat_peg_builder::standard_json_tools(
if (!name_spec.first.empty() || !args_spec.first.empty()) {
tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
} else {
tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order, accept_openai_wrapper);
}
}

View File

@@ -120,7 +120,8 @@ class common_chat_peg_builder : public common_peg_parser_builder {
bool function_is_key = false,
const std::string & call_id_key = "",
const std::string & gen_call_id_key = "",
const std::vector<std::string> & parameters_order = {});
const std::vector<std::string> & parameters_order = {},
bool accept_openai_wrapper = false);
// Legacy-compatible helper for building XML/tagged style tool calls
// Used by tests and manual parsers
@@ -157,7 +158,8 @@ class common_chat_peg_builder : public common_peg_parser_builder {
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key,
const std::vector<std::string> & parameters_order);
const std::vector<std::string> & parameters_order,
bool accept_openai_wrapper);
};
inline common_peg_arena build_chat_peg_parser(

View File

@@ -2678,8 +2678,9 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars
}
return msg;
}
throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " +
effective_input.substr(result.end));
LOG_WRN("%s: unparsed %s output: %s\n", __func__, common_chat_format_name(params.format), effective_input.substr(result.end).c_str());
LOG_DBG("%s: full %s output triggering error:\n=== BEGIN ===\n%s\n=== END ===\n", __func__, common_chat_format_name(params.format), effective_input.c_str());
throw std::runtime_error(std::string("The model produced output that does not match the expected ") + common_chat_format_name(params.format) + " format");
}
common_chat_msg msg;

View File

@@ -1,12 +1,40 @@
# Install pre-built version of llama.cpp
| Install via | Windows | Mac | Linux |
|-------------|---------|-----|-------|
| Install via | Windows | Mac | Linux |
|-------------|---------|------|-------|
| conda-forge | ✅ | ✅ | ✅ |
| Winget | ✅ | | |
| Homebrew | | ✅ | ✅ |
| MacPorts | | ✅ | |
| Nix | | ✅ | ✅ |
## conda-forge (Windows, Mac and Linux)
conda-forge provides builds for:
- CUDA (Windows and Linux)
- Vulkan (Windows and Linux)
- Apple Metal (macOS)
```sh
conda install -c conda-forge llama-cpp
```
```sh
mamba install -c conda-forge llama-cpp
```
```sh
# Project-local installation
pixi add llama-cpp
# Global installation
pixi global install llama-cpp
```
This distribution is managed on [`conda-forge/llama-cpp-feedstock`](https://github.com/conda-forge/llama.cpp-feedstock/).
Shall you have any problems, please open an issue on [its issue tracker](https://github.com/conda-forge/llama.cpp-feedstock/issues).
## Winget (Windows)
```sh

View File

@@ -1882,11 +1882,29 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de
.expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
.run();
// Python tool with multiline code in string
// Python tool with multiline code in string: the \n in the literal decodes to a real
// newline, emitted as a JSON \n escape (not a doubled backslash).
tst.test("<|tool_call_start|>[python(code=\"def hello():\\n print('hey')\")]<|tool_call_end|>")
.tools({ python_tool })
.expect_tool_calls({
{ "python", R"#({"code": "def hello():\\n print('hey')"})#", "" }
{ "python", R"#({"code": "def hello():\n print('hey')"})#", "" }
})
.run();
// String escape sequences decode to their actual characters (newline + tab here),
// so a "write a two line file" style call produces real line breaks, not literal "\n".
tst.test("<|tool_call_start|>[python(code=\"First line\\nSecond line\\tindented\")]<|tool_call_end|>")
.tools({ python_tool })
.expect_tool_calls({
{ "python", R"#({"code": "First line\nSecond line\tindented"})#", "" }
})
.run();
// Escaped quotes inside a string argument survive the round-trip.
tst.test("<|tool_call_start|>[python(code=\"print(\\\"hi\\\")\")]<|tool_call_end|>")
.tools({ python_tool })
.expect_tool_calls({
{ "python", R"#({"code": "print(\"hi\")"})#", "" }
})
.run();

View File

@@ -96,16 +96,15 @@ struct mtmd_image_tokens {
// [BOI] [row0 tokens + newline] ... [row(ny-1) tokens + newline] [EOI]
return (nx + 1) * ny + 2;
}
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
if (batch_f32.entries.size() == 1 || n_temporal_merge == 1) {
return nx * ny;
}
uint32_t nz = batch_f32.entries.size();
// TODO: simplify this by repeating the last frame until it fits the temporal merge
if (nz % n_temporal_merge != 0) {
nz = nz / n_temporal_merge + 1;
} else {
nz = nz / n_temporal_merge;
if (n_temporal_merge > 1) {
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
// TODO: simplify this by repeating the last frame until it fits the temporal merge
if (nz % n_temporal_merge != 0) {
nz = nz / n_temporal_merge + 1;
} else {
nz = nz / n_temporal_merge;
}
}
return nx * ny * nz;
}