Compare commits

...

2 Commits

Author SHA1 Message Date
liminfei-amd a4107133a6 llama : add guard for K/V rotation input when buffer is unallocated (#25215)
llm_graph_input_attn_kv::set_input and llm_graph_input_attn_kv_iswa::set_input
call set_input_k_rot / set_input_v_rot whenever the rotation tensor pointer is
non-null, but the tensor's buffer can be unallocated (NULL) when a graph only
stores K/V without attending -- e.g. DFlash speculative decoding's KV-injection
pass. set_input_k_rot then calls ggml_backend_buffer_is_host() on a NULL buffer
and aborts with GGML_ASSERT(buffer).

Guard the four k_rot/v_rot inputs with the same "&& ->buffer" check that the
adjacent kq_mask inputs already use in these two functions. When the buffer is
unallocated there is no data to upload, so skipping is correct.

Fixes #25191

Signed-off-by: liminfei-amd <91481003+liminfei-amd@users.noreply.github.com>
2026-07-04 22:37:38 +02:00
Pascal 665892536d ui: add sync blocks so display/behavior settings can be set via --ui-config-file (#25132)
* ui: add sync blocks so display/behavior settings can be set via --ui-config-file

* ui: remove enable thinking setting
2026-07-04 16:12:27 +02:00
3 changed files with 46 additions and 23 deletions
+6 -6
View File
@@ -494,11 +494,11 @@ void llm_graph_input_attn_kv::set_input(const llama_ubatch * ubatch) {
mctx->set_input_kq_mask(self_kq_mask, ubatch, cparams.causal_attn);
}
if (self_k_rot) {
if (self_k_rot && self_k_rot->buffer) {
mctx->set_input_k_rot(self_k_rot);
}
if (self_v_rot) {
if (self_v_rot && self_v_rot->buffer) {
mctx->set_input_v_rot(self_v_rot);
}
}
@@ -592,19 +592,19 @@ void llm_graph_input_attn_kv_iswa::set_input(const llama_ubatch * ubatch) {
mctx->get_swa()->set_input_kq_mask(self_kq_mask_swa, ubatch, cparams.causal_attn);
}
if (self_k_rot) {
if (self_k_rot && self_k_rot->buffer) {
mctx->get_base()->set_input_k_rot(self_k_rot);
}
if (self_v_rot) {
if (self_v_rot && self_v_rot->buffer) {
mctx->get_base()->set_input_v_rot(self_v_rot);
}
if (self_k_rot_swa) {
if (self_k_rot_swa && self_k_rot_swa->buffer) {
mctx->get_swa()->set_input_k_rot(self_k_rot_swa);
}
if (self_v_rot_swa) {
if (self_v_rot_swa && self_v_rot_swa->buffer) {
mctx->get_swa()->set_input_v_rot(self_v_rot_swa);
}
}
@@ -69,7 +69,6 @@ export const SETTINGS_KEYS = {
// Developer
DISABLE_REASONING_PARSING: 'disableReasoningParsing',
EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext',
ENABLE_THINKING: 'enableThinking',
SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch',
// PY_INTERPRETER_ENABLED: 'pyInterpreterEnabled',
JS_SANDBOX_ENABLED: 'jsSandboxEnabled',
+40 -16
View File
@@ -185,7 +185,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
defaultValue: false,
type: SettingsFieldType.CHECKBOX,
section: SETTINGS_SECTION_SLUGS.GENERAL,
isExperimental: true
isExperimental: true,
sync: {
serverKey: SETTINGS_KEYS.TITLE_GENERATION_USE_LLM,
paramType: SyncableParameterType.BOOLEAN
}
},
{
key: SETTINGS_KEYS.TITLE_GENERATION_PROMPT,
@@ -193,7 +197,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
help: 'Optional template for the title generation prompt. Use {{USER}} for the user message and {{ASSISTANT}} for the assistant message.',
defaultValue: TITLE_GENERATION.DEFAULT_PROMPT,
type: SettingsFieldType.TEXTAREA,
section: SETTINGS_SECTION_SLUGS.GENERAL
section: SETTINGS_SECTION_SLUGS.GENERAL,
sync: {
serverKey: SETTINGS_KEYS.TITLE_GENERATION_PROMPT,
paramType: SyncableParameterType.STRING
}
},
{
key: SETTINGS_KEYS.MAX_IMAGE_RESOLUTION,
@@ -201,7 +209,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
help: 'Images larger than this will be resized before sending to server. Set to 0 to disable.',
defaultValue: 0,
type: SettingsFieldType.INPUT,
section: SETTINGS_SECTION_SLUGS.GENERAL
section: SETTINGS_SECTION_SLUGS.GENERAL,
sync: {
serverKey: SETTINGS_KEYS.MAX_IMAGE_RESOLUTION,
paramType: SyncableParameterType.NUMBER
}
}
]
},
@@ -385,7 +397,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
help: 'Display the current build version in the bottom-right corner of the interface.',
defaultValue: false,
type: SettingsFieldType.CHECKBOX,
section: SETTINGS_SECTION_SLUGS.DISPLAY
section: SETTINGS_SECTION_SLUGS.DISPLAY,
sync: {
serverKey: SETTINGS_KEYS.SHOW_BUILD_VERSION,
paramType: SyncableParameterType.BOOLEAN
}
}
]
},
@@ -669,7 +685,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
help: 'After each response, re-submit the conversation to pre-fill the server KV cache. Makes the next turn faster since the prompt is already encoded while you read the response.',
defaultValue: false,
type: SettingsFieldType.CHECKBOX,
section: SETTINGS_SECTION_SLUGS.DEVELOPER
section: SETTINGS_SECTION_SLUGS.DEVELOPER,
sync: {
serverKey: SETTINGS_KEYS.PRE_ENCODE_CONVERSATION,
paramType: SyncableParameterType.BOOLEAN
}
},
{
key: SETTINGS_KEYS.DISABLE_REASONING_PARSING,
@@ -677,7 +697,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
help: 'Send reasoning_format=none so the server returns thinking tokens inline instead of extracting them into a separate field.',
defaultValue: false,
type: SettingsFieldType.CHECKBOX,
section: SETTINGS_SECTION_SLUGS.DEVELOPER
section: SETTINGS_SECTION_SLUGS.DEVELOPER,
sync: {
serverKey: SETTINGS_KEYS.DISABLE_REASONING_PARSING,
paramType: SyncableParameterType.BOOLEAN
}
},
{
key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT,
@@ -691,14 +715,6 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
paramType: SyncableParameterType.BOOLEAN
}
},
{
key: SETTINGS_KEYS.ENABLE_THINKING,
label: 'Enable thinking',
help: 'Enable model thinking/reasoning for each request. When off, the model will skip the thinking phase and go straight to the response.',
defaultValue: false,
type: SettingsFieldType.CHECKBOX,
section: SETTINGS_SECTION_SLUGS.DEVELOPER
},
{
key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH,
label: 'Enable raw output toggle',
@@ -717,7 +733,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
help: 'Expose a run_javascript tool to the model. Code runs in a Web Worker inside a sandboxed iframe with an opaque origin, isolated from the WebUI and its API, with a hard timeout.',
defaultValue: false,
type: SettingsFieldType.CHECKBOX,
section: SETTINGS_SECTION_SLUGS.DEVELOPER
section: SETTINGS_SECTION_SLUGS.DEVELOPER,
sync: {
serverKey: SETTINGS_KEYS.JS_SANDBOX_ENABLED,
paramType: SyncableParameterType.BOOLEAN
}
},
{
key: SETTINGS_KEYS.CUSTOM_JSON,
@@ -753,7 +773,11 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
defaultValue: DEFAULT_MCP_CONFIG.requestTimeoutSeconds,
type: SettingsFieldType.INPUT,
section: SETTINGS_SECTION_SLUGS.MCP,
isPositiveInteger: true
isPositiveInteger: true,
sync: {
serverKey: SETTINGS_KEYS.MCP_REQUEST_TIMEOUT_SECONDS,
paramType: SyncableParameterType.NUMBER
}
}
]
}