mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-09 10:31:45 +02:00
sd: report back image generation parameters and metadata (#2062)
* sd: refactor image generation result handling * sd: report back image generation metadata
This commit is contained in:
1
expose.h
1
expose.h
@@ -239,6 +239,7 @@ struct sd_generation_outputs
|
||||
int animated = 0;
|
||||
const char * data = "";
|
||||
const char * data_extra = "";
|
||||
const char * info = "";
|
||||
};
|
||||
struct sd_upscale_inputs
|
||||
{
|
||||
|
||||
10
koboldcpp.py
10
koboldcpp.py
@@ -376,7 +376,8 @@ class sd_generation_outputs(ctypes.Structure):
|
||||
_fields_ = [("status", ctypes.c_int),
|
||||
("animated", ctypes.c_int),
|
||||
("data", ctypes.c_char_p),
|
||||
("data_extra", ctypes.c_char_p)]
|
||||
("data_extra", ctypes.c_char_p),
|
||||
("info", ctypes.c_char_p)]
|
||||
|
||||
class sd_upscale_inputs(ctypes.Structure):
|
||||
_fields_ = [("init_images", ctypes.c_char_p),
|
||||
@@ -2468,12 +2469,14 @@ def sd_generate(genparams):
|
||||
ret = handle.sd_generate(inputs)
|
||||
data_main = ""
|
||||
data_extra = ""
|
||||
info = {}
|
||||
animated = False
|
||||
if ret.status==1:
|
||||
data_main = ret.data.decode("UTF-8","ignore")
|
||||
data_extra = ret.data_extra.decode("UTF-8","ignore")
|
||||
info = json.loads(ret.info.decode("UTF-8","ignore"))
|
||||
animated = True if ret.animated else False
|
||||
return {"animated": animated, "data":data_main, "data_extra":data_extra}
|
||||
return {"animated": animated, "data":data_main, "data_extra":data_extra, "info": info}
|
||||
|
||||
|
||||
def whisper_load_model(model_filename):
|
||||
@@ -5707,6 +5710,7 @@ Change Mode<br>
|
||||
gendat = gen["data"]
|
||||
genanim = gen["animated"]
|
||||
gendatextra = gen["data_extra"]
|
||||
geninfo = json.dumps(gen["info"]) # sdapi really expects a stringified JSON
|
||||
genresp = None
|
||||
if is_comfyui_imggen:
|
||||
if gendat:
|
||||
@@ -5717,7 +5721,7 @@ Change Mode<br>
|
||||
elif is_oai_imggen:
|
||||
genresp = (json.dumps({"created":int(time.time()),"data":[{"b64_json":gendat}],"background":"opaque","output_format":"png","size":"1024x1024","quality":"medium"}).encode())
|
||||
else:
|
||||
genresp = (json.dumps({"images":[gendat],"parameters":{},"info":"","animated":genanim,"extra_data":gendatextra}).encode())
|
||||
genresp = (json.dumps({"images":[gendat],"parameters":{},"info":geninfo,"animated":genanim,"extra_data":gendatextra}).encode())
|
||||
self.send_response(200)
|
||||
self.send_header('content-length', str(len(genresp)))
|
||||
self.end_headers(content_type='application/json')
|
||||
|
||||
@@ -150,8 +150,6 @@ static SDParams * sd_params = nullptr;
|
||||
static sd_ctx_t * sd_ctx = nullptr;
|
||||
static upscaler_ctx_t* upscaler_ctx = nullptr;
|
||||
static int sddebugmode = 0;
|
||||
static std::string recent_data = "";
|
||||
static std::string recent_data2 = ""; //for cases when we have 2 outputs
|
||||
static uint8_t * input_image_buffer = NULL;
|
||||
static uint8_t * input_mask_buffer = NULL;
|
||||
static uint8_t * upscale_src_buffer = NULL;
|
||||
@@ -164,12 +162,38 @@ static int cfg_tiled_vae_threshold = 0;
|
||||
static int cfg_square_limit = 0;
|
||||
static int cfg_side_limit = 0;
|
||||
static bool sd_is_quiet = false;
|
||||
static std::string sdmodelfilename = "";
|
||||
static bool photomaker_enabled = false;
|
||||
|
||||
static bool is_vid_model = false;
|
||||
static bool remove_limits = false;
|
||||
|
||||
static struct {
|
||||
std::string data;
|
||||
std::string data_extra;
|
||||
std::string info;
|
||||
bool animated;
|
||||
void reset() {
|
||||
data = "";
|
||||
data_extra = "";
|
||||
info = "{}";
|
||||
animated = false;
|
||||
}
|
||||
sd_generation_outputs outputs(int status) {
|
||||
sd_generation_outputs output;
|
||||
output.status = status;
|
||||
output.data = data.c_str();
|
||||
output.data_extra = data_extra.c_str();
|
||||
output.info = info.c_str();
|
||||
output.animated = animated;
|
||||
return output;
|
||||
}
|
||||
sd_generation_outputs error(const char* message) {
|
||||
reset();
|
||||
printf("\n%s\n", message);
|
||||
return outputs(0);
|
||||
}
|
||||
} sd_generation;
|
||||
|
||||
static int get_loaded_sd_version(sd_ctx_t* ctx)
|
||||
{
|
||||
return ctx->sd->version;
|
||||
@@ -495,9 +519,6 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
||||
is_vid_model = true;
|
||||
}
|
||||
|
||||
std::filesystem::path mpath(inputs.model_filename);
|
||||
sdmodelfilename = mpath.filename().string();
|
||||
|
||||
// preload the LoRAs with the initial multipliers
|
||||
std::vector<sd_lora_t> lora_specs = sd_params->lora_map.get_lora_specs(lora_dynamic&& lora_cache);
|
||||
if(lora_specs.size()>0)
|
||||
@@ -529,6 +550,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::string friendly_model_name(std::filesystem::path model_path) {
|
||||
return model_path.filename().stem().string();
|
||||
}
|
||||
|
||||
std::string clean_input_prompt(const std::string& input) {
|
||||
std::string result;
|
||||
result.reserve(input.size());
|
||||
@@ -553,7 +578,11 @@ static std::string get_scheduler_name(scheduler_t scheduler, bool as_sampler_suf
|
||||
}
|
||||
}
|
||||
|
||||
static std::string get_image_params(const sd_img_gen_params_t & params, const std::string& lora_meta) {
|
||||
static std::string get_image_params(const sd_img_gen_params_t & params, const std::string& lora_meta, int seed_offset) {
|
||||
std::string model = sd_params->model_path;
|
||||
if (model.empty())
|
||||
model = sd_params->diffusion_model_path;
|
||||
model = friendly_model_name(model);
|
||||
std::stringstream ss;
|
||||
ss << std::setprecision(3)
|
||||
<< "Prompt: " << params.prompt << lora_meta
|
||||
@@ -561,7 +590,7 @@ static std::string get_image_params(const sd_img_gen_params_t & params, const st
|
||||
<< " | Steps: " << params.sample_params.sample_steps
|
||||
<< " | CFGScale: " << params.sample_params.guidance.txt_cfg
|
||||
<< " | Guidance: " << params.sample_params.guidance.distilled_guidance
|
||||
<< " | Seed: " << params.seed
|
||||
<< " | Seed: " << (params.seed + seed_offset)
|
||||
<< " | Size: " << params.width << "x" << params.height
|
||||
<< " | Sampler: " << sd_sample_method_name(params.sample_params.sample_method)
|
||||
<< get_scheduler_name(params.sample_params.scheduler, true);
|
||||
@@ -570,8 +599,10 @@ static std::string get_image_params(const sd_img_gen_params_t & params, const st
|
||||
if (params.sample_params.flow_shift > 0.f && params.sample_params.flow_shift != INFINITY)
|
||||
ss << "| Flow Shift: " << params.sample_params.flow_shift;
|
||||
ss << " | Clip skip: " << params.clip_skip
|
||||
<< " | Model: " << sdmodelfilename
|
||||
<< " | Version: KoboldCpp";
|
||||
<< " | Model: " << model;
|
||||
if (sd_params->vae_path != "")
|
||||
ss << " | VAE: " << friendly_model_name(sd_params->vae_path);
|
||||
ss << " | Version: KoboldCpp";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
@@ -954,22 +985,11 @@ static std::string raw_image_to_png_base64(const sd_image_t& img, std::string pa
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
{
|
||||
recent_data = "";
|
||||
recent_data2 = "";
|
||||
|
||||
sd_generation_outputs output;
|
||||
|
||||
if(sd_ctx == nullptr || sd_params == nullptr)
|
||||
{
|
||||
printf("\nWarning: KCPP image generation not initialized!\n");
|
||||
output.data = "";
|
||||
output.data_extra = "";
|
||||
output.animated = 0;
|
||||
output.status = 0;
|
||||
return output;
|
||||
return sd_generation.error("Warning: KCPP image generation not initialized!");
|
||||
}
|
||||
sd_image_t * results = nullptr;
|
||||
|
||||
@@ -1239,7 +1259,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
params.strength = sd_params->strength;
|
||||
params.vae_tiling_params.enabled = dotile;
|
||||
parse_cache_options(params.cache, sd_params->cache_mode, sd_params->cache_options);
|
||||
params.batch_count = 1;
|
||||
|
||||
LoraMap lora_map = sd_params->lora_map;
|
||||
if (sd_params->lora_dynamic) {
|
||||
@@ -1360,12 +1379,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
input_image_buffer = load_image_from_b64(img2img_data,nx,ny,img2imgW,img2imgH,3);
|
||||
|
||||
if (!input_image_buffer) {
|
||||
printf("\nKCPP SD: load image from memory failed!\n");
|
||||
output.data = "";
|
||||
output.data_extra = "";
|
||||
output.animated = 0;
|
||||
output.status = 0;
|
||||
return output;
|
||||
return sd_generation.error("KCPP SD: load image from memory failed!");
|
||||
}
|
||||
|
||||
if(img2img_mask!="")
|
||||
@@ -1424,17 +1438,44 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
}
|
||||
|
||||
if (!is_passthrough && results == NULL) {
|
||||
printf("\nKCPP SD generate failed!\n");
|
||||
output.data = "";
|
||||
output.data_extra = "";
|
||||
output.animated = 0;
|
||||
output.status = 0;
|
||||
return output;
|
||||
return sd_generation.error("KCPP SD generate failed!");
|
||||
}
|
||||
|
||||
bool isanim = (vid_req_frames>1 && generated_num_results>1 && is_vid_model);
|
||||
nlohmann::json jsoninfo = nlohmann::json::object();
|
||||
if (!isanim) {
|
||||
jsoninfo["prompt"] = params.prompt + lora_meta;
|
||||
if (*params.negative_prompt)
|
||||
jsoninfo["negative_prompt"] = params.negative_prompt;
|
||||
jsoninfo["seed"] = params.seed;
|
||||
jsoninfo["cfg_scale"] = params.sample_params.guidance.txt_cfg;
|
||||
jsoninfo["width"] = params.width;
|
||||
jsoninfo["height"] = params.height;
|
||||
jsoninfo["steps"] = params.sample_params.sample_steps;
|
||||
jsoninfo["sampler_name"] = sd_sample_method_name(params.sample_params.sample_method);
|
||||
if (params.clip_skip > 0)
|
||||
jsoninfo["clip_skip"] = params.clip_skip;
|
||||
jsoninfo["extra_generation_params"] = nlohmann::json::object();
|
||||
if (params.sample_params.scheduler != scheduler_t::SCHEDULER_COUNT)
|
||||
jsoninfo["extra_generation_params"]["Schedule type"] = get_scheduler_name(params.sample_params.scheduler);
|
||||
if (is_img2img)
|
||||
jsoninfo["denoising_strength"] = params.strength;
|
||||
if (sd_params->model_path.empty())
|
||||
jsoninfo["sd_model_name"] = friendly_model_name(sd_params->diffusion_model_path);
|
||||
else
|
||||
jsoninfo["sd_model_name"] = friendly_model_name(sd_params->model_path);
|
||||
if (sd_params->vae_path != "")
|
||||
jsoninfo["sd_vae_name"] = friendly_model_name(sd_params->vae_path);
|
||||
jsoninfo["infotexts"] = nlohmann::json::array();
|
||||
jsoninfo["all_prompts"] = nlohmann::json::array();
|
||||
jsoninfo["all_negative_prompts"] = nlohmann::json::array();
|
||||
jsoninfo["all_seeds"] = nlohmann::json::array();
|
||||
jsoninfo["version"] = "KoboldCpp";
|
||||
}
|
||||
sd_image_t upscaled_image;
|
||||
upscaled_image.data = nullptr;
|
||||
std::string gen_data;
|
||||
std::string gen_data2;
|
||||
|
||||
if (is_passthrough)
|
||||
{
|
||||
@@ -1446,7 +1487,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
upscaled_image = upscale(upscaler_ctx, input_image, 2);
|
||||
result_image = &upscaled_image;
|
||||
}
|
||||
recent_data = raw_image_to_png_base64(*result_image);
|
||||
gen_data = raw_image_to_png_base64(*result_image);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1491,14 +1532,14 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
}
|
||||
if(status==0 && out_len>0)
|
||||
{
|
||||
recent_data = kcpp_base64_encode(out_data, out_len);
|
||||
gen_data = kcpp_base64_encode(out_data, out_len);
|
||||
free(out_data);
|
||||
}
|
||||
if (status2 == 0 && out_len2 > 0) {
|
||||
if (recent_data == "") {
|
||||
recent_data = kcpp_base64_encode(out_data2, out_len2);
|
||||
if (gen_data == "") {
|
||||
gen_data = kcpp_base64_encode(out_data2, out_len2);
|
||||
} else {
|
||||
recent_data2 = kcpp_base64_encode(out_data2, out_len2);
|
||||
gen_data2 = kcpp_base64_encode(out_data2, out_len2);
|
||||
}
|
||||
free(out_data2);
|
||||
}
|
||||
@@ -1512,8 +1553,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
upscaled_image = upscale(upscaler_ctx, results[i], 2);
|
||||
result_image = &upscaled_image;
|
||||
}
|
||||
std::string meta_image_info = get_image_params(params, lora_meta);
|
||||
recent_data = raw_image_to_png_base64(*result_image, meta_image_info);
|
||||
std::string meta_image_info = get_image_params(params, lora_meta, i);
|
||||
gen_data = raw_image_to_png_base64(*result_image, meta_image_info);
|
||||
jsoninfo["infotexts"][i] = meta_image_info;
|
||||
jsoninfo["all_seeds"][i] = params.seed + i;
|
||||
jsoninfo["all_prompts"][i] = params.prompt;
|
||||
jsoninfo["all_negative_prompts"][i] = params.negative_prompt;
|
||||
}
|
||||
|
||||
free(results[i].data);
|
||||
@@ -1528,37 +1573,28 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||
}
|
||||
|
||||
free(results);
|
||||
output.data = recent_data.c_str();
|
||||
output.data_extra = recent_data2.c_str();
|
||||
output.animated = (isanim?1:0);
|
||||
output.status = 1;
|
||||
|
||||
total_img_gens += 1;
|
||||
if(!sd_is_quiet)
|
||||
{
|
||||
std::string ts = get_timestamp_str();
|
||||
printf("[%s] Generating Media Complete\n",ts.c_str());
|
||||
}
|
||||
return output;
|
||||
|
||||
sd_generation.data = gen_data;
|
||||
sd_generation.data_extra = gen_data2;
|
||||
sd_generation.animated = isanim;
|
||||
sd_generation.info = jsoninfo.dump();
|
||||
return sd_generation.outputs(1);
|
||||
}
|
||||
|
||||
sd_generation_outputs sdtype_upscale(const sd_upscale_inputs inputs)
|
||||
{
|
||||
recent_data = "";
|
||||
recent_data2 = "";
|
||||
sd_generation.reset();
|
||||
|
||||
sd_generation_outputs output;
|
||||
output.data = "";
|
||||
output.data_extra = "";
|
||||
output.animated = 0;
|
||||
output.status = 0;
|
||||
if(sd_ctx == nullptr || upscaler_ctx == nullptr || sd_params == nullptr)
|
||||
{
|
||||
printf("\nWarning: KCPP image upscaling not initialized!\n");
|
||||
output.data = "";
|
||||
output.data_extra = "";
|
||||
output.animated = 0;
|
||||
output.status = 0;
|
||||
return output;
|
||||
return sd_generation.error("Warning: KCPP image upscaling not initialized!");
|
||||
}
|
||||
|
||||
std::string rawb64 = inputs.init_images;
|
||||
@@ -1573,6 +1609,7 @@ sd_generation_outputs sdtype_upscale(const sd_upscale_inputs inputs)
|
||||
sd_image_t upscaled_image;
|
||||
source_img.data = nullptr;
|
||||
upscaled_image.data = nullptr;
|
||||
std::string result;
|
||||
if(upscale_src_buffer)
|
||||
{
|
||||
source_img.width = nx;
|
||||
@@ -1581,15 +1618,17 @@ sd_generation_outputs sdtype_upscale(const sd_upscale_inputs inputs)
|
||||
source_img.data = upscale_src_buffer;
|
||||
|
||||
upscaled_image = upscale(upscaler_ctx, source_img, inputs.upscaling_resize);
|
||||
recent_data = raw_image_to_png_base64(upscaled_image);
|
||||
result = raw_image_to_png_base64(upscaled_image);
|
||||
free(upscaled_image.data);
|
||||
output.data = recent_data.c_str();
|
||||
output.data_extra = recent_data2.c_str();
|
||||
output.animated = 0;
|
||||
output.status = 1;
|
||||
|
||||
}
|
||||
|
||||
return output;
|
||||
if (result == "") {
|
||||
return sd_generation.error("Warning: KCPP failed to upscale image");
|
||||
}
|
||||
|
||||
sd_generation.data = result;
|
||||
return sd_generation.outputs(1);
|
||||
}
|
||||
|
||||
sd_info_outputs sdtype_get_info()
|
||||
|
||||
Reference in New Issue
Block a user