vocab : remove </s> eog token if gemma4 (#21492)

2026-04-09 10:01:54 +02:00 · 2026-04-08 09:53:06 -05:00
parent 4a05e0c566
commit d9a12c82f0
1 changed files with 29 additions and 1 deletions
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2558,7 +2558,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                    || t.first == "[EOS]" // Kimi-K2
                    || t.first == "<|end_of_text|>"
                    || t.first == "<end_of_utterance>" // smoldocling
-                    || t.first == "<turn|>" // gemma4
+                    || t.first == "<eos>"            // gemma4
+                    || t.first == "<turn|>"          // gemma4
                    || t.first == "<|tool_response>" // gemma4
                    || t.first == "<｜end▁of▁sentence｜>" // deepseek-ocr
               ) {
@@ -2645,6 +2646,33 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
            }
        }
+
+        // workaround for gemma4 and paddleocr: do not include </s> as an eog token
+        {
+            bool has_tool_response = false;
+            bool has_s = false;
+
+            llama_token s_id = LLAMA_TOKEN_NULL;
+
+            for (auto tid : special_eog_ids) {
+                const auto & text = id_to_token[tid].text;
+                if (text == "<|tool_response>") {
+                    has_tool_response = true;
+                } else if (text == "</s>") {
+                    has_s = true;
+                    s_id = tid;
+                }
+            }
+
+            if (has_tool_response && has_s) {
+                special_eog_ids.erase(s_id);
+
+                auto & attr = id_to_token[s_id].attr;
+                attr = LLAMA_TOKEN_ATTR_NORMAL;
+
+                LLAMA_LOG_WARN("%s: special_eog_ids contains '<|tool_response>', removing '</s>' token from EOG list\n", __func__);
+            }
+        }
    }

    // build special tokens cache