From 15585fbf51fe91caf7a276f0dd087ff67b93801e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 9 Jan 2026 22:01:29 +0100 Subject: [PATCH 01/10] Select backend devices via arg --- examples/common/common.hpp | 63 ++++++---- stable-diffusion.cpp | 241 +++++++++++++++++++++---------------- stable-diffusion.h | 12 +- 3 files changed, 185 insertions(+), 131 deletions(-) diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 82328bccb..de0333cfb 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -447,6 +447,13 @@ struct SDContextParams { std::string tensor_type_rules; std::string lora_model_dir; + std::string main_backend_device; + std::string diffusion_backend_device; + std::string clip_backend_device; + std::string vae_backend_device; + std::string tae_backend_device; + std::string control_net_backend_device; + std::map embedding_map; std::vector embedding_vec; @@ -454,9 +461,6 @@ struct SDContextParams { rng_type_t sampler_rng_type = RNG_TYPE_COUNT; bool offload_params_to_cpu = false; bool enable_mmap = false; - bool control_net_cpu = false; - bool clip_on_cpu = false; - bool vae_on_cpu = false; bool diffusion_flash_attn = false; bool diffusion_conv_direct = false; bool vae_conv_direct = false; @@ -561,6 +565,31 @@ struct SDContextParams { "--upscale-model", "path to esrgan model.", &esrgan_path}, + {"", + "--main-backend-device", + "default device to use for all backends (defaults to main gpu device if hardware acceleration is available, otherwise cpu)", + &main_backend_device}, + {"", + "--diffusion-backend-device", + "device to use for diffusion (defaults to main-backend-device)", + &diffusion_backend_device}, + {"", + "--clip-backend-device", + "device to use for clip (defaults to main-backend-device)", + &clip_backend_device}, + {"", + "--vae-backend-device", + "device to use for vae (defaults to main-backend-device). Also applies to tae, unless tae-backend-device is specified", + &vae_backend_device}, + {"", + "--tae-backend-device", + "device to use for tae (defaults to vae-backend-device)", + &tae_backend_device}, + {"", + "--control-net-backend-device", + "device to use for control net (defaults to main-backend-device)", + &control_net_backend_device}, + }; options.int_options = { @@ -603,18 +632,6 @@ struct SDContextParams { "--mmap", "whether to memory-map model", true, &enable_mmap}, - {"", - "--control-net-cpu", - "keep controlnet in cpu (for low vram)", - true, &control_net_cpu}, - {"", - "--clip-on-cpu", - "keep clip in cpu (for low vram)", - true, &clip_on_cpu}, - {"", - "--vae-on-cpu", - "keep vae in cpu (for low vram)", - true, &vae_on_cpu}, {"", "--diffusion-fa", "use flash attention in the diffusion model", @@ -875,6 +892,7 @@ struct SDContextParams { std::string embeddings_str = emb_ss.str(); std::ostringstream oss; + // TODO backend devices oss << "SDContextParams {\n" << " n_threads: " << n_threads << ",\n" << " model_path: \"" << model_path << "\",\n" @@ -901,9 +919,9 @@ struct SDContextParams { << " flow_shift: " << (std::isinf(flow_shift) ? "INF" : std::to_string(flow_shift)) << "\n" << " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n" << " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n" - << " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n" - << " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n" - << " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n" + // << " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n" + // << " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n" + // << " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n" << " diffusion_flash_attn: " << (diffusion_flash_attn ? "true" : "false") << ",\n" << " diffusion_conv_direct: " << (diffusion_conv_direct ? "true" : "false") << ",\n" << " vae_conv_direct: " << (vae_conv_direct ? "true" : "false") << ",\n" @@ -965,9 +983,6 @@ struct SDContextParams { lora_apply_mode, offload_params_to_cpu, enable_mmap, - clip_on_cpu, - control_net_cpu, - vae_on_cpu, diffusion_flash_attn, taesd_preview, diffusion_conv_direct, @@ -980,6 +995,12 @@ struct SDContextParams { chroma_t5_mask_pad, qwen_image_zero_cond_t, flow_shift, + main_backend_device.c_str(), + diffusion_backend_device.c_str(), + clip_backend_device.c_str(), + vae_backend_device.c_str(), + tae_backend_device.c_str(), + control_net_backend_device.c_str(), }; return sd_ctx_params; } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 60bcba4d3..d7ed81665 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -96,9 +96,13 @@ void suppress_pp(int step, int steps, float time, void* data) { class StableDiffusionGGML { public: ggml_backend_t backend = nullptr; // general backend + ggml_backend_t diffusion_backend = nullptr; ggml_backend_t clip_backend = nullptr; ggml_backend_t control_net_backend = nullptr; ggml_backend_t vae_backend = nullptr; + ggml_backend_t tae_backend = nullptr; + + // TODO: clip_vision and photomaker backends SDVersion version; bool vae_decode_only = false; @@ -144,72 +148,65 @@ class StableDiffusionGGML { StableDiffusionGGML() = default; ~StableDiffusionGGML() { + if (diffusion_backend != backend) { + ggml_backend_free(diffusion_backend); + } if (clip_backend != backend) { ggml_backend_free(clip_backend); } if (control_net_backend != backend) { ggml_backend_free(control_net_backend); } + if (tae_backend != vae_backend) { + ggml_backend_free(tae_backend); + } if (vae_backend != backend) { ggml_backend_free(vae_backend); } ggml_backend_free(backend); } - void init_backend() { -#ifdef SD_USE_CUDA - LOG_DEBUG("Using CUDA backend"); - backend = ggml_backend_cuda_init(0); -#endif -#ifdef SD_USE_METAL - LOG_DEBUG("Using Metal backend"); - backend = ggml_backend_metal_init(); -#endif -#ifdef SD_USE_VULKAN - LOG_DEBUG("Using Vulkan backend"); - size_t device = 0; - const int device_count = ggml_backend_vk_get_device_count(); - if (device_count) { - const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE"); - if (SD_VK_DEVICE != nullptr) { - std::string sd_vk_device_str = SD_VK_DEVICE; - try { - device = std::stoull(sd_vk_device_str); - } catch (const std::invalid_argument&) { - LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to device 0.", SD_VK_DEVICE); - device = 0; - } catch (const std::out_of_range&) { - LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to device 0.", SD_VK_DEVICE); - device = 0; - } - if (device >= device_count) { - LOG_WARN("Cannot find targeted vulkan device (%llu). Falling back to device 0.", device); - device = 0; - } + void list_backends() { + // TODO: expose via C API and fill a cstr + const int device_count = ggml_backend_dev_count(); + for (int i = 0; i < device_count; i++) { + LOG_INFO("%s", ggml_backend_dev_name(ggml_backend_dev_get(i))); + } + } + + bool backend_name_exists(std::string name) { + const int device_count = ggml_backend_dev_count(); + for (int i = 0; i < device_count; i++) { + if (name == ggml_backend_dev_name(ggml_backend_dev_get(i))) { + return true; } - LOG_INFO("Vulkan: Using device %llu", device); - backend = ggml_backend_vk_init(device); - } - if (!backend) { - LOG_WARN("Failed to initialize Vulkan backend"); - } -#endif -#ifdef SD_USE_OPENCL - LOG_DEBUG("Using OpenCL backend"); - // ggml_log_set(ggml_log_callback_default, nullptr); // Optional ggml logs - backend = ggml_backend_opencl_init(); - if (!backend) { - LOG_WARN("Failed to initialize OpenCL backend"); - } -#endif -#ifdef SD_USE_SYCL - LOG_DEBUG("Using SYCL backend"); - backend = ggml_backend_sycl_init(0); -#endif - - if (!backend) { - LOG_DEBUG("Using CPU backend"); - backend = ggml_backend_cpu_init(); + } + return false; + } + + std::string sanitize_backend_name(std::string name) { + if (name == "" || backend_name_exists(name)) { + return name; + } else { + LOG_WARN("Backend %s not found, using default backend", name.c_str()); + return ""; + } + } + + std::string get_default_backend_name() { + // should pick the same backend as ggml_backend_init_best + ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU); + dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU); + dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); + return ggml_backend_dev_name(dev); + } + + ggml_backend_t init_named_backend(std::string name = "") { + LOG_DEBUG("Initializing backend: %s", name.c_str()); + if (name.empty()) { + return ggml_backend_init_best(); + } else { + return ggml_backend_init_by_name(name.c_str(), nullptr); } } @@ -240,7 +237,44 @@ class StableDiffusionGGML { ggml_log_set(ggml_log_callback_default, nullptr); - init_backend(); + list_backends(); + + std::string default_backend_name = get_default_backend_name(); + + std::string override_default_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->main_device)); + + if (override_default_backend_name.size() > 0) { + LOG_INFO("Setting default backend to %s", override_default_backend_name.c_str()); + default_backend_name = override_default_backend_name; + } + + std::string diffusion_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->diffusion_device)); + std::string clip_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->clip_device)); + std::string control_net_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->control_net_device)); + std::string vae_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->vae_device)); + std::string tae_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->tae_device)); + + bool diffusion_backend_is_default = diffusion_backend_name.empty() || diffusion_backend_name == default_backend_name; + bool clip_backend_is_default = (clip_backend_name.empty() || clip_backend_name == default_backend_name); + bool control_net_backend_is_default = (control_net_backend_name.empty() || control_net_backend_name == default_backend_name); + bool vae_backend_is_default = (vae_backend_name.empty() || vae_backend_name == default_backend_name); + // if tae_backend_name is empty, it will use the same backend as vae + bool tae_backend_is_default = (tae_backend_name.empty() && vae_backend_is_default) || tae_backend_name == default_backend_name; + + // if some backend is not specified or is the same as the default backend, use the default backend + bool use_default_backend = diffusion_backend_is_default || clip_backend_is_default || control_net_backend_is_default || vae_backend_is_default || tae_backend_is_default; + + if (use_default_backend) { + backend = init_named_backend(override_default_backend_name); + LOG_DEBUG("Loaded default backend %s", ggml_backend_name(backend)); + } + + if (!diffusion_backend_is_default) { + diffusion_backend = init_named_backend(diffusion_backend_name); + LOG_INFO("Using diffusion backend: %s", ggml_backend_name(diffusion_backend)); + } else { + diffusion_backend = backend; + } ModelLoader model_loader; @@ -411,21 +445,19 @@ class StableDiffusionGGML { LOG_INFO("Using circular padding for convolutions"); } - bool clip_on_cpu = sd_ctx_params->keep_clip_on_cpu; - { clip_backend = backend; - if (clip_on_cpu && !ggml_backend_is_cpu(backend)) { - LOG_INFO("CLIP: Using CPU backend"); - clip_backend = ggml_backend_cpu_init(); + if (!clip_backend_is_default) { + clip_backend = init_named_backend(clip_backend_name); + LOG_INFO("CLIP: Using %s backend", ggml_backend_name(clip_backend)); } if (sd_version_is_sd3(version)) { cond_stage_model = std::make_shared(clip_backend, offload_params_to_cpu, tensor_storage_map); - diffusion_model = std::make_shared(backend, - offload_params_to_cpu, - tensor_storage_map); + diffusion_model = std::make_shared(diffusion_backend, + offload_params_to_cpu, + tensor_storage_map); } else if (sd_version_is_flux(version)) { bool is_chroma = false; for (auto pair : tensor_storage_map) { @@ -461,7 +493,7 @@ class StableDiffusionGGML { offload_params_to_cpu, tensor_storage_map); } - diffusion_model = std::make_shared(backend, + diffusion_model = std::make_shared(diffusion_backend, offload_params_to_cpu, tensor_storage_map, version, @@ -472,11 +504,11 @@ class StableDiffusionGGML { offload_params_to_cpu, tensor_storage_map, version); - diffusion_model = std::make_shared(backend, - offload_params_to_cpu, - tensor_storage_map, - version, - sd_ctx_params->chroma_use_dit_mask); + diffusion_model = std::make_shared(diffusion_backend, + offload_params_to_cpu, + tensor_storage_map, + version, + sd_ctx_params->chroma_use_dit_mask); } else if (sd_version_is_wan(version)) { cond_stage_model = std::make_shared(clip_backend, offload_params_to_cpu, @@ -484,13 +516,13 @@ class StableDiffusionGGML { true, 1, true); - diffusion_model = std::make_shared(backend, - offload_params_to_cpu, - tensor_storage_map, - "model.diffusion_model", - version); + diffusion_model = std::make_shared(diffusion_backend, + offload_params_to_cpu, + tensor_storage_map, + "model.diffusion_model", + version); if (strlen(SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path)) > 0) { - high_noise_diffusion_model = std::make_shared(backend, + high_noise_diffusion_model = std::make_shared(diffusion_backend, offload_params_to_cpu, tensor_storage_map, "model.high_noise_diffusion_model", @@ -516,22 +548,22 @@ class StableDiffusionGGML { version, "", enable_vision); - diffusion_model = std::make_shared(backend, - offload_params_to_cpu, - tensor_storage_map, - "model.diffusion_model", - version, - sd_ctx_params->qwen_image_zero_cond_t); + diffusion_model = std::make_shared(diffusion_backend, + offload_params_to_cpu, + tensor_storage_map, + "model.diffusion_model", + version, + sd_ctx_params->qwen_image_zero_cond_t); } else if (sd_version_is_z_image(version)) { cond_stage_model = std::make_shared(clip_backend, offload_params_to_cpu, tensor_storage_map, version); - diffusion_model = std::make_shared(backend, - offload_params_to_cpu, - tensor_storage_map, - "model.diffusion_model", - version); + diffusion_model = std::make_shared(diffusion_backend, + offload_params_to_cpu, + tensor_storage_map, + "model.diffusion_model", + version); } else { // SD1.x SD2.x SDXL std::map embbeding_map; for (uint32_t i = 0; i < sd_ctx_params->embedding_count; i++) { @@ -551,7 +583,7 @@ class StableDiffusionGGML { embbeding_map, version); } - diffusion_model = std::make_shared(backend, + diffusion_model = std::make_shared(diffusion_backend, offload_params_to_cpu, tensor_storage_map, version); @@ -584,11 +616,15 @@ class StableDiffusionGGML { high_noise_diffusion_model->get_param_tensors(tensors); } - if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu(backend)) { - LOG_INFO("VAE Autoencoder: Using CPU backend"); - vae_backend = ggml_backend_cpu_init(); - } else { - vae_backend = backend; + vae_backend = backend; + if (!vae_backend_is_default) { + vae_backend = init_named_backend(vae_backend_name); + LOG_INFO("VAE Autoencoder: Using %s backend", ggml_backend_name(vae_backend)); + } + tae_backend = vae_backend; + if (tae_backend_name.length() > 0 && tae_backend_name != vae_backend_name) { + tae_backend = init_named_backend(tae_backend_name); + LOG_INFO("Tiny Autoencoder: Using %s backend", ggml_backend_name(tae_backend)); } if (!use_tiny_autoencoder || sd_ctx_params->tae_preview_only) { @@ -632,14 +668,14 @@ class StableDiffusionGGML { if (use_tiny_autoencoder) { if (sd_version_is_wan(version) || sd_version_is_qwen_image(version)) { - tae_first_stage = std::make_shared(vae_backend, + tae_first_stage = std::make_shared(tae_backend, offload_params_to_cpu, tensor_storage_map, "decoder", vae_decode_only, version); } else { - tae_first_stage = std::make_shared(vae_backend, + tae_first_stage = std::make_shared(tae_backend, offload_params_to_cpu, tensor_storage_map, "decoder.layers", @@ -654,9 +690,9 @@ class StableDiffusionGGML { if (strlen(SAFE_STR(sd_ctx_params->control_net_path)) > 0) { ggml_backend_t controlnet_backend = nullptr; - if (sd_ctx_params->keep_control_net_on_cpu && !ggml_backend_is_cpu(backend)) { - LOG_DEBUG("ControlNet: Using CPU backend"); - controlnet_backend = ggml_backend_cpu_init(); + if (!control_net_backend_is_default) { + control_net_backend = init_named_backend(control_net_backend_name); + LOG_INFO("ControlNet: Using %s backend", control_net_backend_name); } else { controlnet_backend = backend; } @@ -811,7 +847,7 @@ class StableDiffusionGGML { total_params_vram_size += clip_params_mem_size + pmid_params_mem_size; } - if (ggml_backend_is_cpu(backend)) { + if (ggml_backend_is_cpu(diffusion_backend)) { total_params_ram_size += unet_params_mem_size; } else { total_params_vram_size += unet_params_mem_size; @@ -2881,9 +2917,6 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO; sd_ctx_params->offload_params_to_cpu = false; sd_ctx_params->enable_mmap = false; - sd_ctx_params->keep_clip_on_cpu = false; - sd_ctx_params->keep_control_net_on_cpu = false; - sd_ctx_params->keep_vae_on_cpu = false; sd_ctx_params->diffusion_flash_attn = false; sd_ctx_params->circular_x = false; sd_ctx_params->circular_y = false; @@ -2898,7 +2931,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { if (!buf) return nullptr; buf[0] = '\0'; - + // TODO devices snprintf(buf + strlen(buf), 4096 - strlen(buf), "model_path: %s\n" "clip_l_path: %s\n" @@ -2922,9 +2955,6 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { "sampler_rng_type: %s\n" "prediction: %s\n" "offload_params_to_cpu: %s\n" - "keep_clip_on_cpu: %s\n" - "keep_control_net_on_cpu: %s\n" - "keep_vae_on_cpu: %s\n" "diffusion_flash_attn: %s\n" "circular_x: %s\n" "circular_y: %s\n" @@ -2953,9 +2983,6 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { sd_rng_type_name(sd_ctx_params->sampler_rng_type), sd_prediction_name(sd_ctx_params->prediction), BOOL_STR(sd_ctx_params->offload_params_to_cpu), - BOOL_STR(sd_ctx_params->keep_clip_on_cpu), - BOOL_STR(sd_ctx_params->keep_control_net_on_cpu), - BOOL_STR(sd_ctx_params->keep_vae_on_cpu), BOOL_STR(sd_ctx_params->diffusion_flash_attn), BOOL_STR(sd_ctx_params->circular_x), BOOL_STR(sd_ctx_params->circular_y), diff --git a/stable-diffusion.h b/stable-diffusion.h index 8f040d2bd..35cf2d521 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -183,9 +183,9 @@ typedef struct { enum lora_apply_mode_t lora_apply_mode; bool offload_params_to_cpu; bool enable_mmap; - bool keep_clip_on_cpu; - bool keep_control_net_on_cpu; - bool keep_vae_on_cpu; + // bool keep_clip_on_cpu; + // bool keep_control_net_on_cpu; + // bool keep_vae_on_cpu; bool diffusion_flash_attn; bool tae_preview_only; bool diffusion_conv_direct; @@ -198,6 +198,12 @@ typedef struct { int chroma_t5_mask_pad; bool qwen_image_zero_cond_t; float flow_shift; + const char* main_device; + const char* diffusion_device; + const char* clip_device; + const char* vae_device; + const char* tae_device; + const char* control_net_device; } sd_ctx_params_t; typedef struct { From 350df04f87ae83f5d33ddb48328f46d0dbef679f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 9 Jan 2026 22:59:31 +0100 Subject: [PATCH 02/10] fix build --- stable-diffusion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index d7ed81665..4c64574cf 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -692,7 +692,7 @@ class StableDiffusionGGML { ggml_backend_t controlnet_backend = nullptr; if (!control_net_backend_is_default) { control_net_backend = init_named_backend(control_net_backend_name); - LOG_INFO("ControlNet: Using %s backend", control_net_backend_name); + LOG_INFO("ControlNet: Using %s backend", ggml_backend_name(controlnet_backend)); } else { controlnet_backend = backend; } From b1434fdf565dc914009a9ca29e13e3702e3462e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sun, 11 Jan 2026 20:43:16 +0100 Subject: [PATCH 03/10] show backend device description --- stable-diffusion.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 4c64574cf..0f351d200 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -170,7 +170,8 @@ class StableDiffusionGGML { // TODO: expose via C API and fill a cstr const int device_count = ggml_backend_dev_count(); for (int i = 0; i < device_count; i++) { - LOG_INFO("%s", ggml_backend_dev_name(ggml_backend_dev_get(i))); + auto dev = ggml_backend_dev_get(i); + LOG_INFO("%s (%s)", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev)); } } From 2d06765130527ffe4900e7dc6fd2a625f5515a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sun, 11 Jan 2026 21:36:43 +0100 Subject: [PATCH 04/10] CLI: add --list-devices arg --- examples/cli/main.cpp | 35 ++++++++++++++- examples/common/common.hpp | 14 +++++- stable-diffusion.cpp | 90 ++++++++++++++++++++++++++++++++++++-- stable-diffusion.h | 5 +++ 4 files changed, 138 insertions(+), 6 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 0a7da7aee..97fc999ac 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -46,6 +46,7 @@ struct SDCliParams { bool color = false; bool normal_exit = false; + bool skip_usage = false; ArgOptions get_options() { ArgOptions options; @@ -143,7 +144,27 @@ struct SDCliParams { auto on_help_arg = [&](int argc, const char** argv, int index) { normal_exit = true; - return -1; + return VALID_BREAK_OPT; + }; + + auto on_rpc_arg = [&](int argc, const char** argv, int index) { + if (++index >= argc) { + return -1; + } + const char* rpc_device = argv[index]; + add_rpc_device(rpc_device); + return 1; + }; + + auto on_list_devices_arg = [&](int argc, const char** argv, int index) { + size_t buff_size = backend_list_size(); + char* buff = (char*)malloc(buff_size); + list_backends_to_buffer(buff, buff_size); + printf("List of available GGML devices:\nName\tDescription\n-------------------\n%s\n", buff); + free(buff); + normal_exit = true; + skip_usage = true; + return VALID_BREAK_OPT; }; options.manual_options = { @@ -159,6 +180,14 @@ struct SDCliParams { "--help", "show this help message and exit", on_help_arg}, + {"", + "--rpc", + "add a rpc device", + on_rpc_arg}, + {"", + "--list-devices", + "list available ggml compute devices", + on_list_devices_arg}, }; return options; @@ -213,7 +242,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP std::vector options_vec = {cli_params.get_options(), ctx_params.get_options(), gen_params.get_options()}; if (!parse_options(argc, argv, options_vec)) { - print_usage(argc, argv, options_vec); + if (!cli_params.skip_usage){ + print_usage(argc, argv, options_vec); + } exit(cli_params.normal_exit ? 0 : 1); } diff --git a/examples/common/common.hpp b/examples/common/common.hpp index de0333cfb..227bccc29 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -34,6 +34,8 @@ namespace fs = std::filesystem; #define SAFE_STR(s) ((s) ? (s) : "") #define BOOL_STR(b) ((b) ? "true" : "false") +#define VALID_BREAK_OPT -42 + const char* modes_str[] = { "img_gen", "vid_gen", @@ -401,16 +403,26 @@ static bool parse_options(int argc, const char** argv, const std::vector string_split(const std::string & input, char separator) +{ + std::vector parts; + size_t begin_pos = 0; + size_t separator_pos = input.find(separator); + while (separator_pos != std::string::npos) { + std::string part = input.substr(begin_pos, separator_pos - begin_pos); + parts.emplace_back(part); + begin_pos = separator_pos + 1; + separator_pos = input.find(separator, begin_pos); + } + parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos)); + return parts; +} + +static void add_rpc_devices(const std::string & servers) { + auto rpc_servers = string_split(servers, ','); + if (rpc_servers.empty()) { + throw std::invalid_argument("no RPC servers specified"); + } + ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); + if (!rpc_reg) { + throw std::invalid_argument("failed to find RPC backend"); + } + typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint); + ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server"); + if (!ggml_backend_rpc_add_server_fn) { + throw std::invalid_argument("failed to find RPC add server function"); + } + for (const auto & server : rpc_servers) { + auto reg = ggml_backend_rpc_add_server_fn(server.c_str()); + ggml_backend_register(reg); + } +} + +void add_rpc_device(const char* servers_cstr){ + std::string servers(servers_cstr); + add_rpc_devices(servers); +} + +std::vector> list_backends_vector() { + std::vector> backends; + const int device_count = ggml_backend_dev_count(); + for (int i = 0; i < device_count; i++) { + auto dev = ggml_backend_dev_get(i); + backends.push_back({ggml_backend_dev_name(dev), ggml_backend_dev_description(dev)}); + } + return backends; +} + +SD_API size_t backend_list_size(){ + // for C API + size_t buffer_size = 0; + auto backends = list_backends_vector(); + for (auto& backend : backends) { + auto dev_name_size = backend.first.size(); + auto dev_desc_size = backend.second.size(); + buffer_size+=dev_name_size+dev_desc_size+2; // +2 for the separators + } + return buffer_size; +} + +// devices are separated by \n and name and description are separated by \t +SD_API void list_backends_to_buffer(char* buffer, size_t buffer_size) { + auto backends = list_backends_vector(); + size_t offset = 0; + for (auto& backend : backends) { + size_t name_size = backend.first.size(); + size_t desc_size = backend.second.size(); + if (offset + name_size + desc_size + 2 > buffer_size) { + break; // Not enough space in the buffer + } + memcpy(buffer + offset, backend.first.c_str(), name_size); + offset += name_size; + buffer[offset++] = '\t'; + memcpy(buffer + offset, backend.second.c_str(), desc_size); + offset += desc_size; + buffer[offset++] = '\n'; + } + if (offset < buffer_size) { + buffer[offset] = '\0'; // Ensure the buffer is null-terminated at the end + } +} + /*=============================================== StableDiffusionGGML ================================================*/ class StableDiffusionGGML { @@ -166,8 +250,8 @@ class StableDiffusionGGML { ggml_backend_free(backend); } - void list_backends() { - // TODO: expose via C API and fill a cstr + + void log_backends() { const int device_count = ggml_backend_dev_count(); for (int i = 0; i < device_count; i++) { auto dev = ggml_backend_dev_get(i); @@ -238,7 +322,7 @@ class StableDiffusionGGML { ggml_log_set(ggml_log_callback_default, nullptr); - list_backends(); + log_backends(); std::string default_backend_name = get_default_backend_name(); diff --git a/stable-diffusion.h b/stable-diffusion.h index 35cf2d521..0a1359bb7 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -409,6 +409,11 @@ SD_API bool preprocess_canny(sd_image_t image, SD_API const char* sd_commit(void); SD_API const char* sd_version(void); +SD_API size_t backend_list_size(void); +SD_API void list_backends_to_buffer(char* buffer, size_t buffer_size); + +SD_API void add_rpc_device(const char* address); + #ifdef __cplusplus } #endif From 4e56cdfcdade9370b5b08e0f5afcae0025a48fcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 16 Jan 2026 00:46:44 +0100 Subject: [PATCH 05/10] null-terminate even if buffer is too small --- stable-diffusion.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 4cc960c26..04213fde0 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -172,6 +172,9 @@ SD_API void list_backends_to_buffer(char* buffer, size_t buffer_size) { } if (offset < buffer_size) { buffer[offset] = '\0'; // Ensure the buffer is null-terminated at the end + } else { + LOG_WARN("Provided buffer size is too small to contain details of all devices."); + buffer[buffer_size - 1] = '\0'; // Ensure the buffer is null-terminated at the end } } From 9331df77a50c1aed27b927a6309f163f5729540a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 16 Jan 2026 01:03:37 +0100 Subject: [PATCH 06/10] move stuff to ggml_extend.cpp --- ggml_extend.hpp | 36 ++++++++++++++++++++++++++++++++++++ stable-diffusion.cpp | 36 ------------------------------------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 1ff450116..fca7a8cbe 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -88,6 +88,42 @@ __STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const cha } } +__STATIC_INLINE__ bool backend_name_exists(std::string name) { + const int device_count = ggml_backend_dev_count(); + for (int i = 0; i < device_count; i++) { + if (name == ggml_backend_dev_name(ggml_backend_dev_get(i))) { + return true; + } + } + return false; +} + +__STATIC_INLINE__ std::string sanitize_backend_name(std::string name) { + if (name == "" || backend_name_exists(name)) { + return name; + } else { + LOG_WARN("Backend %s not found, using default backend", name.c_str()); + return ""; + } +} + +__STATIC_INLINE__ std::string get_default_backend_name() { + // should pick the same backend as ggml_backend_init_best + ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU); + dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU); + dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); + return ggml_backend_dev_name(dev); +} + +__STATIC_INLINE__ ggml_backend_t init_named_backend(std::string name = "") { + LOG_DEBUG("Initializing backend: %s", name.c_str()); + if (name.empty()) { + return ggml_backend_init_best(); + } else { + return ggml_backend_init_by_name(name.c_str(), nullptr); + } +} + static_assert(GGML_MAX_NAME >= 128, "GGML_MAX_NAME must be at least 128"); // n-mode tensor-matrix product diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 04213fde0..8e69a3df7 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -262,42 +262,6 @@ class StableDiffusionGGML { } } - bool backend_name_exists(std::string name) { - const int device_count = ggml_backend_dev_count(); - for (int i = 0; i < device_count; i++) { - if (name == ggml_backend_dev_name(ggml_backend_dev_get(i))) { - return true; - } - } - return false; - } - - std::string sanitize_backend_name(std::string name) { - if (name == "" || backend_name_exists(name)) { - return name; - } else { - LOG_WARN("Backend %s not found, using default backend", name.c_str()); - return ""; - } - } - - std::string get_default_backend_name() { - // should pick the same backend as ggml_backend_init_best - ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU); - dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU); - dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); - return ggml_backend_dev_name(dev); - } - - ggml_backend_t init_named_backend(std::string name = "") { - LOG_DEBUG("Initializing backend: %s", name.c_str()); - if (name.empty()) { - return ggml_backend_init_best(); - } else { - return ggml_backend_init_by_name(name.c_str(), nullptr); - } - } - std::shared_ptr get_rng(rng_type_t rng_type) { if (rng_type == STD_DEFAULT_RNG) { return std::make_shared(); From 7c49029b705bc8d12a6fe6ef2fd73661dd6cd50f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 16 Jan 2026 01:06:53 +0100 Subject: [PATCH 07/10] --upscaler-backend-device --- examples/cli/main.cpp | 3 ++- examples/common/common.hpp | 7 +++++++ stable-diffusion.h | 3 ++- upscaler.cpp | 38 +++++++++++--------------------------- 4 files changed, 22 insertions(+), 29 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 97fc999ac..d14c899d9 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -810,7 +810,8 @@ int main(int argc, const char* argv[]) { ctx_params.offload_params_to_cpu, ctx_params.diffusion_conv_direct, ctx_params.n_threads, - gen_params.upscale_tile_size); + gen_params.upscale_tile_size, + ctx_params.upscaler_backend_device.c_str()); if (upscaler_ctx == nullptr) { LOG_ERROR("new_upscaler_ctx failed"); diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 227bccc29..9804fd80e 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -465,6 +465,8 @@ struct SDContextParams { std::string vae_backend_device; std::string tae_backend_device; std::string control_net_backend_device; + std::string upscaler_backend_device; + std::map embedding_map; std::vector embedding_vec; @@ -601,6 +603,11 @@ struct SDContextParams { "--control-net-backend-device", "device to use for control net (defaults to main-backend-device)", &control_net_backend_device}, + {"", + "--upscaler-backend-device", + "device to use for upscaling models (defaults to main-backend-device)", + &upscaler_backend_device}, + }; diff --git a/stable-diffusion.h b/stable-diffusion.h index 0a1359bb7..8424f121e 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -383,7 +383,8 @@ SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path, bool offload_params_to_cpu, bool direct, int n_threads, - int tile_size); + int tile_size, + const char * device); SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx); SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, diff --git a/upscaler.cpp b/upscaler.cpp index 29ac981e6..ea198f166 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -22,37 +22,20 @@ struct UpscalerGGML { bool load_from_file(const std::string& esrgan_path, bool offload_params_to_cpu, - int n_threads) { + int n_threads, + std::string device = "") { ggml_log_set(ggml_log_callback_default, nullptr); -#ifdef SD_USE_CUDA - LOG_DEBUG("Using CUDA backend"); - backend = ggml_backend_cuda_init(0); -#endif -#ifdef SD_USE_METAL - LOG_DEBUG("Using Metal backend"); - backend = ggml_backend_metal_init(); -#endif -#ifdef SD_USE_VULKAN - LOG_DEBUG("Using Vulkan backend"); - backend = ggml_backend_vk_init(0); -#endif -#ifdef SD_USE_OPENCL - LOG_DEBUG("Using OpenCL backend"); - backend = ggml_backend_opencl_init(); -#endif -#ifdef SD_USE_SYCL - LOG_DEBUG("Using SYCL backend"); - backend = ggml_backend_sycl_init(0); -#endif + device = sanitize_backend_name(device); + backend = init_named_backend(device); ModelLoader model_loader; if (!model_loader.init_from_file_and_convert_name(esrgan_path)) { LOG_ERROR("init model loader from file failed: '%s'", esrgan_path.c_str()); } model_loader.set_wtype_override(model_data_type); - if (!backend) { - LOG_DEBUG("Using CPU backend"); - backend = ggml_backend_cpu_init(); - } + // if (!backend) { + // LOG_DEBUG("Using CPU backend"); + // backend = ggml_backend_cpu_init(); + // } LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type)); esrgan_upscaler = std::make_shared(backend, offload_params_to_cpu, tile_size, model_loader.get_tensor_storage_map()); if (direct) { @@ -117,7 +100,8 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str, bool offload_params_to_cpu, bool direct, int n_threads, - int tile_size) { + int tile_size, + const char* device) { upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t)); if (upscaler_ctx == nullptr) { return nullptr; @@ -129,7 +113,7 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str, return nullptr; } - if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu, n_threads)) { + if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu, n_threads, SAFE_STR(device))) { delete upscaler_ctx->upscaler; upscaler_ctx->upscaler = nullptr; free(upscaler_ctx); From a5306873931a863482637400e9fa3b7f78e60de7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 16 Jan 2026 02:00:34 +0100 Subject: [PATCH 08/10] use diffusion_backend for loading LoRAs --- stable-diffusion.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 8e69a3df7..b04b882f6 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -774,7 +774,7 @@ class StableDiffusionGGML { version); } if (strlen(SAFE_STR(sd_ctx_params->photo_maker_path)) > 0) { - pmid_lora = std::make_shared("pmid", backend, sd_ctx_params->photo_maker_path, "", version); + pmid_lora = std::make_shared("pmid", diffusion_backend, sd_ctx_params->photo_maker_path, "", version); auto lora_tensor_filter = [&](const std::string& tensor_name) { if (starts_with(tensor_name, "lora.model")) { return true; @@ -1128,8 +1128,11 @@ class StableDiffusionGGML { for (auto& kv : lora_state_diff) { int64_t t0 = ggml_time_ms(); - - auto lora = load_lora_model_from_file(kv.first, kv.second, backend); + // TODO: Fix that + if(diffusion_backend!=clip_backend && !ggml_backend_is_cpu(clip_backend)){ + LOG_WARN("Diffusion models and text encoders are running on different backends. This may cause issues when immediately applying LoRAs."); + } + auto lora = load_lora_model_from_file(kv.first, kv.second, diffusion_backend); if (!lora || lora->lora_tensors.empty()) { continue; } @@ -1207,7 +1210,7 @@ class StableDiffusionGGML { const std::string& lora_name = kv.first; float multiplier = kv.second; - auto lora = load_lora_model_from_file(lora_name, multiplier, backend, lora_tensor_filter); + auto lora = load_lora_model_from_file(lora_name, multiplier, diffusion_backend, lora_tensor_filter); if (lora && !lora->lora_tensors.empty()) { lora->preprocess_lora_tensors(tensors); diffusion_lora_models.push_back(lora); From b9e0f46b84a951ae33c8164ade5c4743f8f9e387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 16 Jan 2026 02:19:45 +0100 Subject: [PATCH 09/10] --photomaker-backend-device (+fixes) --- examples/common/common.hpp | 7 ++++++- stable-diffusion.cpp | 26 +++++++++++++++++--------- stable-diffusion.h | 1 + 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 9804fd80e..057354012 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -466,7 +466,7 @@ struct SDContextParams { std::string tae_backend_device; std::string control_net_backend_device; std::string upscaler_backend_device; - + std::string photomaker_backend_device; std::map embedding_map; std::vector embedding_vec; @@ -607,6 +607,10 @@ struct SDContextParams { "--upscaler-backend-device", "device to use for upscaling models (defaults to main-backend-device)", &upscaler_backend_device}, + {"", + "--photomaker-backend-device", + "device to use for photomaker (defaults to main-backend-device)", + &photomaker_backend_device}, }; @@ -1020,6 +1024,7 @@ struct SDContextParams { vae_backend_device.c_str(), tae_backend_device.c_str(), control_net_backend_device.c_str(), + photomaker_backend_device.c_str(), }; return sd_ctx_params; } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index b04b882f6..c4858943f 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -188,6 +188,7 @@ class StableDiffusionGGML { ggml_backend_t control_net_backend = nullptr; ggml_backend_t vae_backend = nullptr; ggml_backend_t tae_backend = nullptr; + ggml_backend_t pmid_backend = nullptr; // TODO: clip_vision and photomaker backends @@ -305,6 +306,7 @@ class StableDiffusionGGML { std::string control_net_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->control_net_device)); std::string vae_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->vae_device)); std::string tae_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->tae_device)); + std::string pmid_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->photomaker_device)); bool diffusion_backend_is_default = diffusion_backend_name.empty() || diffusion_backend_name == default_backend_name; bool clip_backend_is_default = (clip_backend_name.empty() || clip_backend_name == default_backend_name); @@ -312,9 +314,10 @@ class StableDiffusionGGML { bool vae_backend_is_default = (vae_backend_name.empty() || vae_backend_name == default_backend_name); // if tae_backend_name is empty, it will use the same backend as vae bool tae_backend_is_default = (tae_backend_name.empty() && vae_backend_is_default) || tae_backend_name == default_backend_name; + bool pmid_backend_is_default = (pmid_backend_name.empty() || pmid_backend_name == default_backend_name); // if some backend is not specified or is the same as the default backend, use the default backend - bool use_default_backend = diffusion_backend_is_default || clip_backend_is_default || control_net_backend_is_default || vae_backend_is_default || tae_backend_is_default; + bool use_default_backend = diffusion_backend_is_default || clip_backend_is_default || control_net_backend_is_default || vae_backend_is_default || tae_backend_is_default || pmid_backend_is_default; if (use_default_backend) { backend = init_named_backend(override_default_backend_name); @@ -741,14 +744,13 @@ class StableDiffusionGGML { } if (strlen(SAFE_STR(sd_ctx_params->control_net_path)) > 0) { - ggml_backend_t controlnet_backend = nullptr; if (!control_net_backend_is_default) { control_net_backend = init_named_backend(control_net_backend_name); - LOG_INFO("ControlNet: Using %s backend", ggml_backend_name(controlnet_backend)); + LOG_INFO("ControlNet: Using %s backend", ggml_backend_name(control_net_backend)); } else { - controlnet_backend = backend; + control_net_backend = backend; } - control_net = std::make_shared(controlnet_backend, + control_net = std::make_shared(control_net_backend, offload_params_to_cpu, tensor_storage_map, version); @@ -757,9 +759,15 @@ class StableDiffusionGGML { control_net->set_conv2d_direct_enabled(true); } } - + pmid_backend = backend; + if (!pmid_backend_is_default) { + pmid_backend = init_named_backend(pmid_backend_name); + LOG_INFO("PhotoMaker: Using %s backend", ggml_backend_name(pmid_backend)); + } else { + pmid_backend = backend; + } if (strstr(SAFE_STR(sd_ctx_params->photo_maker_path), "v2")) { - pmid_model = std::make_shared(backend, + pmid_model = std::make_shared(pmid_backend, offload_params_to_cpu, tensor_storage_map, "pmid", @@ -767,7 +775,7 @@ class StableDiffusionGGML { PM_VERSION_2); LOG_INFO("using PhotoMaker Version 2"); } else { - pmid_model = std::make_shared(backend, + pmid_model = std::make_shared(pmid_backend, offload_params_to_cpu, tensor_storage_map, "pmid", @@ -933,7 +941,7 @@ class StableDiffusionGGML { control_net_params_mem_size / 1024.0 / 1024.0, ggml_backend_is_cpu(control_net_backend) ? "RAM" : "VRAM", pmid_params_mem_size / 1024.0 / 1024.0, - ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM"); + ggml_backend_is_cpu(pmid_backend) ? "RAM" : "VRAM"); } // init denoiser diff --git a/stable-diffusion.h b/stable-diffusion.h index 8424f121e..eaa7916f7 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -204,6 +204,7 @@ typedef struct { const char* vae_device; const char* tae_device; const char* control_net_device; + const char* photomaker_device; } sd_ctx_params_t; typedef struct { From 6fb7e18f67c8f336f79df32a6c418d0b4df2de2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 16 Jan 2026 02:28:37 +0100 Subject: [PATCH 10/10] --vision-backend-device --- examples/common/common.hpp | 7 ++++++- stable-diffusion.cpp | 10 ++++++---- stable-diffusion.h | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 057354012..3fd2f0775 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -467,6 +467,7 @@ struct SDContextParams { std::string control_net_backend_device; std::string upscaler_backend_device; std::string photomaker_backend_device; + std::string vision_backend_device; std::map embedding_map; std::vector embedding_vec; @@ -611,7 +612,10 @@ struct SDContextParams { "--photomaker-backend-device", "device to use for photomaker (defaults to main-backend-device)", &photomaker_backend_device}, - + {"", + "--vision-backend-device", + "device to use for clip-vision model (defaults to clip-backend-device)", + &vision_backend_device}, }; @@ -1025,6 +1029,7 @@ struct SDContextParams { tae_backend_device.c_str(), control_net_backend_device.c_str(), photomaker_backend_device.c_str(), + vision_backend_device.c_str(), }; return sd_ctx_params; } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index c4858943f..c6edcaa18 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -189,8 +189,7 @@ class StableDiffusionGGML { ggml_backend_t vae_backend = nullptr; ggml_backend_t tae_backend = nullptr; ggml_backend_t pmid_backend = nullptr; - - // TODO: clip_vision and photomaker backends + ggml_backend_t vision_backend = nullptr; SDVersion version; bool vae_decode_only = false; @@ -307,6 +306,7 @@ class StableDiffusionGGML { std::string vae_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->vae_device)); std::string tae_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->tae_device)); std::string pmid_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->photomaker_device)); + std::string vision_backend_name = sanitize_backend_name(SAFE_STR(sd_ctx_params->vision_device)); bool diffusion_backend_is_default = diffusion_backend_name.empty() || diffusion_backend_name == default_backend_name; bool clip_backend_is_default = (clip_backend_name.empty() || clip_backend_name == default_backend_name); @@ -315,9 +315,11 @@ class StableDiffusionGGML { // if tae_backend_name is empty, it will use the same backend as vae bool tae_backend_is_default = (tae_backend_name.empty() && vae_backend_is_default) || tae_backend_name == default_backend_name; bool pmid_backend_is_default = (pmid_backend_name.empty() || pmid_backend_name == default_backend_name); + // if vision_backend_name is empty, it will use the same backend as clip + bool vision_backend_is_default = (vision_backend_name.empty() && clip_backend_is_default) || vision_backend_name == default_backend_name; // if some backend is not specified or is the same as the default backend, use the default backend - bool use_default_backend = diffusion_backend_is_default || clip_backend_is_default || control_net_backend_is_default || vae_backend_is_default || tae_backend_is_default || pmid_backend_is_default; + bool use_default_backend = diffusion_backend_is_default || clip_backend_is_default || control_net_backend_is_default || vae_backend_is_default || tae_backend_is_default || pmid_backend_is_default || vision_backend_is_default; if (use_default_backend) { backend = init_named_backend(override_default_backend_name); @@ -586,7 +588,7 @@ class StableDiffusionGGML { if (diffusion_model->get_desc() == "Wan2.1-I2V-14B" || diffusion_model->get_desc() == "Wan2.1-FLF2V-14B" || diffusion_model->get_desc() == "Wan2.1-I2V-1.3B") { - clip_vision = std::make_shared(backend, + clip_vision = std::make_shared(vision_backend, offload_params_to_cpu, tensor_storage_map); clip_vision->alloc_params_buffer(); diff --git a/stable-diffusion.h b/stable-diffusion.h index eaa7916f7..a10dd7d60 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -205,6 +205,7 @@ typedef struct { const char* tae_device; const char* control_net_device; const char* photomaker_device; + const char* vision_device; } sd_ctx_params_t; typedef struct {