gracefully handle bad grammar instead of crashing

This commit is contained in:
Concedo
2026-03-23 17:00:53 +08:00
parent ef854f002e
commit 993925ba96
3 changed files with 17 additions and 2 deletions

View File

@@ -195,7 +195,18 @@ extern "C"
generation_outputs generate(const generation_inputs inputs)
{
return gpttype_generate(inputs);
try {
return gpttype_generate(inputs);
} catch (const std::exception & e) {
generation_outputs output;
printf("\nGeneration encountered an exception: %s\n", e.what());
output.text = nullptr;
output.status = 0;
output.prompt_tokens = output.completion_tokens = 0;
output.stopreason = stop_reason::ERROR_ENCOUNTERED;
generation_finished = true;
return output;
}
}
bool sd_load_model(const sd_load_model_inputs inputs)

View File

@@ -789,6 +789,10 @@ static speculative_draft_result speculative_decoding_eval_chunk(llama_context *
// KCPP SAMPLING FUNCTIONS
void sample_softmax(llama_token_data_array * cur_p, bool do_sort=true) {
if(!(cur_p->size > 0))
{
throw std::runtime_error("No valid candidates during sampling. Current request aborted!");
}
GGML_ASSERT(cur_p->size > 0);
// Sort the logits in descending order
if (!cur_p->sorted && do_sort) {

View File

@@ -1282,7 +1282,7 @@ void llama_model_loader::done_getting_tensors() const {
throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
}
if (n_tensors_moved > 0) {
LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %zu others) cannot be used with preferred buffer type %s, using %s instead\n",
LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %zu others) moved from %s, using %s instead\n",
__func__, first_tensor_moved_name.c_str(), first_tensor_moved_type_name.c_str(), n_tensors_moved - 1,
ggml_backend_buft_name(first_moved_from_buft), ggml_backend_buft_name(first_moved_to_buft));
}