Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct SDCliParams {
bool color = false;

bool normal_exit = false;
bool skip_usage = false;

ArgOptions get_options() {
ArgOptions options;
Expand Down Expand Up @@ -143,7 +144,27 @@ struct SDCliParams {

auto on_help_arg = [&](int argc, const char** argv, int index) {
normal_exit = true;
return -1;
return VALID_BREAK_OPT;
};

auto on_rpc_arg = [&](int argc, const char** argv, int index) {
if (++index >= argc) {
return -1;
}
const char* rpc_device = argv[index];
add_rpc_device(rpc_device);
return 1;
};

auto on_list_devices_arg = [&](int argc, const char** argv, int index) {
size_t buff_size = backend_list_size();
char* buff = (char*)malloc(buff_size);
list_backends_to_buffer(buff, buff_size);
printf("List of available GGML devices:\nName\tDescription\n-------------------\n%s\n", buff);
free(buff);
normal_exit = true;
skip_usage = true;
return VALID_BREAK_OPT;
};

options.manual_options = {
Expand All @@ -159,6 +180,14 @@ struct SDCliParams {
"--help",
"show this help message and exit",
on_help_arg},
{"",
"--rpc",
"add a rpc device",
on_rpc_arg},
{"",
"--list-devices",
"list available ggml compute devices",
on_list_devices_arg},
};

return options;
Expand Down Expand Up @@ -213,7 +242,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
std::vector<ArgOptions> options_vec = {cli_params.get_options(), ctx_params.get_options(), gen_params.get_options()};

if (!parse_options(argc, argv, options_vec)) {
print_usage(argc, argv, options_vec);
if (!cli_params.skip_usage){
print_usage(argc, argv, options_vec);
}
exit(cli_params.normal_exit ? 0 : 1);
}

Expand Down Expand Up @@ -779,7 +810,8 @@ int main(int argc, const char* argv[]) {
ctx_params.offload_params_to_cpu,
ctx_params.diffusion_conv_direct,
ctx_params.n_threads,
gen_params.upscale_tile_size);
gen_params.upscale_tile_size,
ctx_params.upscaler_backend_device.c_str());

if (upscaler_ctx == nullptr) {
LOG_ERROR("new_upscaler_ctx failed");
Expand Down
94 changes: 72 additions & 22 deletions examples/common/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ namespace fs = std::filesystem;
#define SAFE_STR(s) ((s) ? (s) : "")
#define BOOL_STR(b) ((b) ? "true" : "false")

#define VALID_BREAK_OPT -42

const char* modes_str[] = {
"img_gen",
"vid_gen",
Expand Down Expand Up @@ -401,16 +403,26 @@ static bool parse_options(int argc, const char** argv, const std::vector<ArgOpti
}))
break;

bool kill_flow = false;
if (match_and_apply(options.manual_options, [&](auto& option) {
int ret = option.cb(argc, argv, i);
if (ret == VALID_BREAK_OPT) {
// not an error, but still break out of the loop (e.g. --help)
kill_flow = true;
return;
}
if (ret < 0) {
invalid_arg = true;
return;
}
i += ret;
found_arg = true;
}))
})) {
if (kill_flow) {
return false;
}
break;
}
}

if (invalid_arg) {
Expand Down Expand Up @@ -447,16 +459,23 @@ struct SDContextParams {
std::string tensor_type_rules;
std::string lora_model_dir;

std::string main_backend_device;
std::string diffusion_backend_device;
std::string clip_backend_device;
std::string vae_backend_device;
std::string tae_backend_device;
std::string control_net_backend_device;
std::string upscaler_backend_device;
std::string photomaker_backend_device;
std::string vision_backend_device;

std::map<std::string, std::string> embedding_map;
std::vector<sd_embedding_t> embedding_vec;

rng_type_t rng_type = CUDA_RNG;
rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
bool offload_params_to_cpu = false;
bool enable_mmap = false;
bool control_net_cpu = false;
bool clip_on_cpu = false;
bool vae_on_cpu = false;
bool diffusion_flash_attn = false;
bool diffusion_conv_direct = false;
bool vae_conv_direct = false;
Expand Down Expand Up @@ -561,6 +580,43 @@ struct SDContextParams {
"--upscale-model",
"path to esrgan model.",
&esrgan_path},
{"",
"--main-backend-device",
"default device to use for all backends (defaults to main gpu device if hardware acceleration is available, otherwise cpu)",
&main_backend_device},
{"",
"--diffusion-backend-device",
"device to use for diffusion (defaults to main-backend-device)",
&diffusion_backend_device},
{"",
"--clip-backend-device",
"device to use for clip (defaults to main-backend-device)",
&clip_backend_device},
{"",
"--vae-backend-device",
"device to use for vae (defaults to main-backend-device). Also applies to tae, unless tae-backend-device is specified",
&vae_backend_device},
{"",
"--tae-backend-device",
"device to use for tae (defaults to vae-backend-device)",
&tae_backend_device},
{"",
"--control-net-backend-device",
"device to use for control net (defaults to main-backend-device)",
&control_net_backend_device},
{"",
"--upscaler-backend-device",
"device to use for upscaling models (defaults to main-backend-device)",
&upscaler_backend_device},
{"",
"--photomaker-backend-device",
"device to use for photomaker (defaults to main-backend-device)",
&photomaker_backend_device},
{"",
"--vision-backend-device",
"device to use for clip-vision model (defaults to clip-backend-device)",
&vision_backend_device},

};

options.int_options = {
Expand Down Expand Up @@ -603,18 +659,6 @@ struct SDContextParams {
"--mmap",
"whether to memory-map model",
true, &enable_mmap},
{"",
"--control-net-cpu",
"keep controlnet in cpu (for low vram)",
true, &control_net_cpu},
{"",
"--clip-on-cpu",
"keep clip in cpu (for low vram)",
true, &clip_on_cpu},
{"",
"--vae-on-cpu",
"keep vae in cpu (for low vram)",
true, &vae_on_cpu},
{"",
"--diffusion-fa",
"use flash attention in the diffusion model",
Expand Down Expand Up @@ -875,6 +919,7 @@ struct SDContextParams {

std::string embeddings_str = emb_ss.str();
std::ostringstream oss;
// TODO backend devices
oss << "SDContextParams {\n"
<< " n_threads: " << n_threads << ",\n"
<< " model_path: \"" << model_path << "\",\n"
Expand All @@ -901,9 +946,9 @@ struct SDContextParams {
<< " flow_shift: " << (std::isinf(flow_shift) ? "INF" : std::to_string(flow_shift)) << "\n"
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
<< " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
<< " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n"
// << " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
// << " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
// << " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n"
<< " diffusion_flash_attn: " << (diffusion_flash_attn ? "true" : "false") << ",\n"
<< " diffusion_conv_direct: " << (diffusion_conv_direct ? "true" : "false") << ",\n"
<< " vae_conv_direct: " << (vae_conv_direct ? "true" : "false") << ",\n"
Expand Down Expand Up @@ -965,9 +1010,6 @@ struct SDContextParams {
lora_apply_mode,
offload_params_to_cpu,
enable_mmap,
clip_on_cpu,
control_net_cpu,
vae_on_cpu,
diffusion_flash_attn,
taesd_preview,
diffusion_conv_direct,
Expand All @@ -980,6 +1022,14 @@ struct SDContextParams {
chroma_t5_mask_pad,
qwen_image_zero_cond_t,
flow_shift,
main_backend_device.c_str(),
diffusion_backend_device.c_str(),
clip_backend_device.c_str(),
vae_backend_device.c_str(),
tae_backend_device.c_str(),
control_net_backend_device.c_str(),
photomaker_backend_device.c_str(),
vision_backend_device.c_str(),
};
return sd_ctx_params;
}
Expand Down
36 changes: 36 additions & 0 deletions ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,42 @@ __STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const cha
}
}

__STATIC_INLINE__ bool backend_name_exists(std::string name) {
const int device_count = ggml_backend_dev_count();
for (int i = 0; i < device_count; i++) {
if (name == ggml_backend_dev_name(ggml_backend_dev_get(i))) {
return true;
}
}
return false;
}

__STATIC_INLINE__ std::string sanitize_backend_name(std::string name) {
if (name == "" || backend_name_exists(name)) {
return name;
} else {
LOG_WARN("Backend %s not found, using default backend", name.c_str());
return "";
}
}

__STATIC_INLINE__ std::string get_default_backend_name() {
// should pick the same backend as ggml_backend_init_best
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU);
dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
return ggml_backend_dev_name(dev);
}

__STATIC_INLINE__ ggml_backend_t init_named_backend(std::string name = "") {
LOG_DEBUG("Initializing backend: %s", name.c_str());
if (name.empty()) {
return ggml_backend_init_best();
} else {
return ggml_backend_init_by_name(name.c_str(), nullptr);
}
}

static_assert(GGML_MAX_NAME >= 128, "GGML_MAX_NAME must be at least 128");

// n-mode tensor-matrix product
Expand Down
Loading
Loading