sd: report back image generation parameters and metadata (#2062)

* sd: refactor image generation result handling

* sd: report back image generation metadata
This commit is contained in:
Wagner Bruna
2026-03-27 13:49:03 -03:00
committed by GitHub
parent 0c2b679ea3
commit e3c6227d46
3 changed files with 113 additions and 69 deletions

View File

@@ -239,6 +239,7 @@ struct sd_generation_outputs
int animated = 0;
const char * data = "";
const char * data_extra = "";
const char * info = "";
};
struct sd_upscale_inputs
{

View File

@@ -376,7 +376,8 @@ class sd_generation_outputs(ctypes.Structure):
_fields_ = [("status", ctypes.c_int),
("animated", ctypes.c_int),
("data", ctypes.c_char_p),
("data_extra", ctypes.c_char_p)]
("data_extra", ctypes.c_char_p),
("info", ctypes.c_char_p)]
class sd_upscale_inputs(ctypes.Structure):
_fields_ = [("init_images", ctypes.c_char_p),
@@ -2468,12 +2469,14 @@ def sd_generate(genparams):
ret = handle.sd_generate(inputs)
data_main = ""
data_extra = ""
info = {}
animated = False
if ret.status==1:
data_main = ret.data.decode("UTF-8","ignore")
data_extra = ret.data_extra.decode("UTF-8","ignore")
info = json.loads(ret.info.decode("UTF-8","ignore"))
animated = True if ret.animated else False
return {"animated": animated, "data":data_main, "data_extra":data_extra}
return {"animated": animated, "data":data_main, "data_extra":data_extra, "info": info}
def whisper_load_model(model_filename):
@@ -5707,6 +5710,7 @@ Change Mode<br>
gendat = gen["data"]
genanim = gen["animated"]
gendatextra = gen["data_extra"]
geninfo = json.dumps(gen["info"]) # sdapi really expects a stringified JSON
genresp = None
if is_comfyui_imggen:
if gendat:
@@ -5717,7 +5721,7 @@ Change Mode<br>
elif is_oai_imggen:
genresp = (json.dumps({"created":int(time.time()),"data":[{"b64_json":gendat}],"background":"opaque","output_format":"png","size":"1024x1024","quality":"medium"}).encode())
else:
genresp = (json.dumps({"images":[gendat],"parameters":{},"info":"","animated":genanim,"extra_data":gendatextra}).encode())
genresp = (json.dumps({"images":[gendat],"parameters":{},"info":geninfo,"animated":genanim,"extra_data":gendatextra}).encode())
self.send_response(200)
self.send_header('content-length', str(len(genresp)))
self.end_headers(content_type='application/json')

View File

@@ -150,8 +150,6 @@ static SDParams * sd_params = nullptr;
static sd_ctx_t * sd_ctx = nullptr;
static upscaler_ctx_t* upscaler_ctx = nullptr;
static int sddebugmode = 0;
static std::string recent_data = "";
static std::string recent_data2 = ""; //for cases when we have 2 outputs
static uint8_t * input_image_buffer = NULL;
static uint8_t * input_mask_buffer = NULL;
static uint8_t * upscale_src_buffer = NULL;
@@ -164,12 +162,38 @@ static int cfg_tiled_vae_threshold = 0;
static int cfg_square_limit = 0;
static int cfg_side_limit = 0;
static bool sd_is_quiet = false;
static std::string sdmodelfilename = "";
static bool photomaker_enabled = false;
static bool is_vid_model = false;
static bool remove_limits = false;
static struct {
std::string data;
std::string data_extra;
std::string info;
bool animated;
void reset() {
data = "";
data_extra = "";
info = "{}";
animated = false;
}
sd_generation_outputs outputs(int status) {
sd_generation_outputs output;
output.status = status;
output.data = data.c_str();
output.data_extra = data_extra.c_str();
output.info = info.c_str();
output.animated = animated;
return output;
}
sd_generation_outputs error(const char* message) {
reset();
printf("\n%s\n", message);
return outputs(0);
}
} sd_generation;
static int get_loaded_sd_version(sd_ctx_t* ctx)
{
return ctx->sd->version;
@@ -495,9 +519,6 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
is_vid_model = true;
}
std::filesystem::path mpath(inputs.model_filename);
sdmodelfilename = mpath.filename().string();
// preload the LoRAs with the initial multipliers
std::vector<sd_lora_t> lora_specs = sd_params->lora_map.get_lora_specs(lora_dynamic&& lora_cache);
if(lora_specs.size()>0)
@@ -529,6 +550,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
return true;
}
static std::string friendly_model_name(std::filesystem::path model_path) {
return model_path.filename().stem().string();
}
std::string clean_input_prompt(const std::string& input) {
std::string result;
result.reserve(input.size());
@@ -553,7 +578,11 @@ static std::string get_scheduler_name(scheduler_t scheduler, bool as_sampler_suf
}
}
static std::string get_image_params(const sd_img_gen_params_t & params, const std::string& lora_meta) {
static std::string get_image_params(const sd_img_gen_params_t & params, const std::string& lora_meta, int seed_offset) {
std::string model = sd_params->model_path;
if (model.empty())
model = sd_params->diffusion_model_path;
model = friendly_model_name(model);
std::stringstream ss;
ss << std::setprecision(3)
<< "Prompt: " << params.prompt << lora_meta
@@ -561,7 +590,7 @@ static std::string get_image_params(const sd_img_gen_params_t & params, const st
<< " | Steps: " << params.sample_params.sample_steps
<< " | CFGScale: " << params.sample_params.guidance.txt_cfg
<< " | Guidance: " << params.sample_params.guidance.distilled_guidance
<< " | Seed: " << params.seed
<< " | Seed: " << (params.seed + seed_offset)
<< " | Size: " << params.width << "x" << params.height
<< " | Sampler: " << sd_sample_method_name(params.sample_params.sample_method)
<< get_scheduler_name(params.sample_params.scheduler, true);
@@ -570,8 +599,10 @@ static std::string get_image_params(const sd_img_gen_params_t & params, const st
if (params.sample_params.flow_shift > 0.f && params.sample_params.flow_shift != INFINITY)
ss << "| Flow Shift: " << params.sample_params.flow_shift;
ss << " | Clip skip: " << params.clip_skip
<< " | Model: " << sdmodelfilename
<< " | Version: KoboldCpp";
<< " | Model: " << model;
if (sd_params->vae_path != "")
ss << " | VAE: " << friendly_model_name(sd_params->vae_path);
ss << " | Version: KoboldCpp";
return ss.str();
}
@@ -954,22 +985,11 @@ static std::string raw_image_to_png_base64(const sd_image_t& img, std::string pa
return result;
}
sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
{
recent_data = "";
recent_data2 = "";
sd_generation_outputs output;
if(sd_ctx == nullptr || sd_params == nullptr)
{
printf("\nWarning: KCPP image generation not initialized!\n");
output.data = "";
output.data_extra = "";
output.animated = 0;
output.status = 0;
return output;
return sd_generation.error("Warning: KCPP image generation not initialized!");
}
sd_image_t * results = nullptr;
@@ -1239,7 +1259,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
params.strength = sd_params->strength;
params.vae_tiling_params.enabled = dotile;
parse_cache_options(params.cache, sd_params->cache_mode, sd_params->cache_options);
params.batch_count = 1;
LoraMap lora_map = sd_params->lora_map;
if (sd_params->lora_dynamic) {
@@ -1360,12 +1379,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
input_image_buffer = load_image_from_b64(img2img_data,nx,ny,img2imgW,img2imgH,3);
if (!input_image_buffer) {
printf("\nKCPP SD: load image from memory failed!\n");
output.data = "";
output.data_extra = "";
output.animated = 0;
output.status = 0;
return output;
return sd_generation.error("KCPP SD: load image from memory failed!");
}
if(img2img_mask!="")
@@ -1424,17 +1438,44 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
}
if (!is_passthrough && results == NULL) {
printf("\nKCPP SD generate failed!\n");
output.data = "";
output.data_extra = "";
output.animated = 0;
output.status = 0;
return output;
return sd_generation.error("KCPP SD generate failed!");
}
bool isanim = (vid_req_frames>1 && generated_num_results>1 && is_vid_model);
nlohmann::json jsoninfo = nlohmann::json::object();
if (!isanim) {
jsoninfo["prompt"] = params.prompt + lora_meta;
if (*params.negative_prompt)
jsoninfo["negative_prompt"] = params.negative_prompt;
jsoninfo["seed"] = params.seed;
jsoninfo["cfg_scale"] = params.sample_params.guidance.txt_cfg;
jsoninfo["width"] = params.width;
jsoninfo["height"] = params.height;
jsoninfo["steps"] = params.sample_params.sample_steps;
jsoninfo["sampler_name"] = sd_sample_method_name(params.sample_params.sample_method);
if (params.clip_skip > 0)
jsoninfo["clip_skip"] = params.clip_skip;
jsoninfo["extra_generation_params"] = nlohmann::json::object();
if (params.sample_params.scheduler != scheduler_t::SCHEDULER_COUNT)
jsoninfo["extra_generation_params"]["Schedule type"] = get_scheduler_name(params.sample_params.scheduler);
if (is_img2img)
jsoninfo["denoising_strength"] = params.strength;
if (sd_params->model_path.empty())
jsoninfo["sd_model_name"] = friendly_model_name(sd_params->diffusion_model_path);
else
jsoninfo["sd_model_name"] = friendly_model_name(sd_params->model_path);
if (sd_params->vae_path != "")
jsoninfo["sd_vae_name"] = friendly_model_name(sd_params->vae_path);
jsoninfo["infotexts"] = nlohmann::json::array();
jsoninfo["all_prompts"] = nlohmann::json::array();
jsoninfo["all_negative_prompts"] = nlohmann::json::array();
jsoninfo["all_seeds"] = nlohmann::json::array();
jsoninfo["version"] = "KoboldCpp";
}
sd_image_t upscaled_image;
upscaled_image.data = nullptr;
std::string gen_data;
std::string gen_data2;
if (is_passthrough)
{
@@ -1446,7 +1487,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
upscaled_image = upscale(upscaler_ctx, input_image, 2);
result_image = &upscaled_image;
}
recent_data = raw_image_to_png_base64(*result_image);
gen_data = raw_image_to_png_base64(*result_image);
}
else
{
@@ -1491,14 +1532,14 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
}
if(status==0 && out_len>0)
{
recent_data = kcpp_base64_encode(out_data, out_len);
gen_data = kcpp_base64_encode(out_data, out_len);
free(out_data);
}
if (status2 == 0 && out_len2 > 0) {
if (recent_data == "") {
recent_data = kcpp_base64_encode(out_data2, out_len2);
if (gen_data == "") {
gen_data = kcpp_base64_encode(out_data2, out_len2);
} else {
recent_data2 = kcpp_base64_encode(out_data2, out_len2);
gen_data2 = kcpp_base64_encode(out_data2, out_len2);
}
free(out_data2);
}
@@ -1512,8 +1553,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
upscaled_image = upscale(upscaler_ctx, results[i], 2);
result_image = &upscaled_image;
}
std::string meta_image_info = get_image_params(params, lora_meta);
recent_data = raw_image_to_png_base64(*result_image, meta_image_info);
std::string meta_image_info = get_image_params(params, lora_meta, i);
gen_data = raw_image_to_png_base64(*result_image, meta_image_info);
jsoninfo["infotexts"][i] = meta_image_info;
jsoninfo["all_seeds"][i] = params.seed + i;
jsoninfo["all_prompts"][i] = params.prompt;
jsoninfo["all_negative_prompts"][i] = params.negative_prompt;
}
free(results[i].data);
@@ -1528,37 +1573,28 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
}
free(results);
output.data = recent_data.c_str();
output.data_extra = recent_data2.c_str();
output.animated = (isanim?1:0);
output.status = 1;
total_img_gens += 1;
if(!sd_is_quiet)
{
std::string ts = get_timestamp_str();
printf("[%s] Generating Media Complete\n",ts.c_str());
}
return output;
sd_generation.data = gen_data;
sd_generation.data_extra = gen_data2;
sd_generation.animated = isanim;
sd_generation.info = jsoninfo.dump();
return sd_generation.outputs(1);
}
sd_generation_outputs sdtype_upscale(const sd_upscale_inputs inputs)
{
recent_data = "";
recent_data2 = "";
sd_generation.reset();
sd_generation_outputs output;
output.data = "";
output.data_extra = "";
output.animated = 0;
output.status = 0;
if(sd_ctx == nullptr || upscaler_ctx == nullptr || sd_params == nullptr)
{
printf("\nWarning: KCPP image upscaling not initialized!\n");
output.data = "";
output.data_extra = "";
output.animated = 0;
output.status = 0;
return output;
return sd_generation.error("Warning: KCPP image upscaling not initialized!");
}
std::string rawb64 = inputs.init_images;
@@ -1573,6 +1609,7 @@ sd_generation_outputs sdtype_upscale(const sd_upscale_inputs inputs)
sd_image_t upscaled_image;
source_img.data = nullptr;
upscaled_image.data = nullptr;
std::string result;
if(upscale_src_buffer)
{
source_img.width = nx;
@@ -1581,15 +1618,17 @@ sd_generation_outputs sdtype_upscale(const sd_upscale_inputs inputs)
source_img.data = upscale_src_buffer;
upscaled_image = upscale(upscaler_ctx, source_img, inputs.upscaling_resize);
recent_data = raw_image_to_png_base64(upscaled_image);
result = raw_image_to_png_base64(upscaled_image);
free(upscaled_image.data);
output.data = recent_data.c_str();
output.data_extra = recent_data2.c_str();
output.animated = 0;
output.status = 1;
}
return output;
if (result == "") {
return sd_generation.error("Warning: KCPP failed to upscale image");
}
sd_generation.data = result;
return sd_generation.outputs(1);
}
sd_info_outputs sdtype_get_info()