mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-09 10:31:45 +02:00
gracefully handle bad grammar instead of crashing
This commit is contained in:
13
expose.cpp
13
expose.cpp
@@ -195,7 +195,18 @@ extern "C"
|
||||
|
||||
generation_outputs generate(const generation_inputs inputs)
|
||||
{
|
||||
return gpttype_generate(inputs);
|
||||
try {
|
||||
return gpttype_generate(inputs);
|
||||
} catch (const std::exception & e) {
|
||||
generation_outputs output;
|
||||
printf("\nGeneration encountered an exception: %s\n", e.what());
|
||||
output.text = nullptr;
|
||||
output.status = 0;
|
||||
output.prompt_tokens = output.completion_tokens = 0;
|
||||
output.stopreason = stop_reason::ERROR_ENCOUNTERED;
|
||||
generation_finished = true;
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
bool sd_load_model(const sd_load_model_inputs inputs)
|
||||
|
||||
@@ -789,6 +789,10 @@ static speculative_draft_result speculative_decoding_eval_chunk(llama_context *
|
||||
|
||||
// KCPP SAMPLING FUNCTIONS
|
||||
void sample_softmax(llama_token_data_array * cur_p, bool do_sort=true) {
|
||||
if(!(cur_p->size > 0))
|
||||
{
|
||||
throw std::runtime_error("No valid candidates during sampling. Current request aborted!");
|
||||
}
|
||||
GGML_ASSERT(cur_p->size > 0);
|
||||
// Sort the logits in descending order
|
||||
if (!cur_p->sorted && do_sort) {
|
||||
|
||||
@@ -1282,7 +1282,7 @@ void llama_model_loader::done_getting_tensors() const {
|
||||
throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
|
||||
}
|
||||
if (n_tensors_moved > 0) {
|
||||
LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %zu others) cannot be used with preferred buffer type %s, using %s instead\n",
|
||||
LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %zu others) moved from %s, using %s instead\n",
|
||||
__func__, first_tensor_moved_name.c_str(), first_tensor_moved_type_name.c_str(), n_tensors_moved - 1,
|
||||
ggml_backend_buft_name(first_moved_from_buft), ggml_backend_buft_name(first_moved_to_buft));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user