gracefully handle bad grammar instead of crashing

2026-04-09 10:31:45 +02:00 · 2026-03-23 17:00:53 +08:00
parent ef854f002e
commit 993925ba96
3 changed files with 17 additions and 2 deletions
--- a/expose.cpp
+++ b/expose.cpp
@@ -195,7 +195,18 @@ extern "C"

    generation_outputs generate(const generation_inputs inputs)
    {
-        return gpttype_generate(inputs);
+        try {
+            return gpttype_generate(inputs);
+        } catch (const std::exception & e) {
+            generation_outputs output;
+            printf("\nGeneration encountered an exception: %s\n", e.what());
+            output.text = nullptr;
+            output.status = 0;
+            output.prompt_tokens = output.completion_tokens = 0;
+            output.stopreason = stop_reason::ERROR_ENCOUNTERED;
+            generation_finished = true;
+            return output;
+        }
    }

    bool sd_load_model(const sd_load_model_inputs inputs)
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -789,6 +789,10 @@ static speculative_draft_result speculative_decoding_eval_chunk(llama_context *

 // KCPP SAMPLING FUNCTIONS
 void sample_softmax(llama_token_data_array * cur_p, bool do_sort=true) {
+    if(!(cur_p->size > 0))
+    {
+        throw std::runtime_error("No valid candidates during sampling. Current request aborted!");
+    }
    GGML_ASSERT(cur_p->size > 0);
    // Sort the logits in descending order
    if (!cur_p->sorted && do_sort) {
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@@ -1282,7 +1282,7 @@ void llama_model_loader::done_getting_tensors() const {
        throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
    }
    if (n_tensors_moved > 0) {
-        LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %zu others) cannot be used with preferred buffer type %s, using %s instead\n",
+        LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %zu others) moved from %s, using %s instead\n",
            __func__, first_tensor_moved_name.c_str(), first_tensor_moved_type_name.c_str(), n_tensors_moved - 1,
            ggml_backend_buft_name(first_moved_from_buft), ggml_backend_buft_name(first_moved_to_buft));
    }