From 75c0b7f1bee1c07cac73b996365dce75064c4847 Mon Sep 17 00:00:00 2001
From: akleine <alb.kleine@gmx.de>
Date: Fri, 16 Jan 2026 09:58:04 +0100
Subject: [PATCH 1/4] feat: add support for Segmind-Vega model

---
 model.cpp            | 11 +++++++----
 model.h              |  3 ++-
 stable-diffusion.cpp |  1 +
 unet.hpp             |  7 +++++--
 4 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/model.cpp b/model.cpp
index e05d31468..b892e310d 100644
--- a/model.cpp
+++ b/model.cpp
@@ -1038,7 +1038,7 @@ SDVersion ModelLoader::get_sd_version() {
     int64_t patch_embedding_channels = 0;
     bool has_img_emb                 = false;
     bool has_middle_block_1          = false;
-    bool has_output_block_71         = false;
+    bool has_output_block_311        = false;
 
     for (auto& [name, tensor_storage] : tensor_storage_map) {
         if (!(is_xl)) {
@@ -1095,8 +1095,8 @@ SDVersion ModelLoader::get_sd_version() {
             tensor_storage.name.find("unet.mid_block.resnets.1.") != std::string::npos) {
             has_middle_block_1 = true;
         }
-        if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos) {
-            has_output_block_71 = true;
+        if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos) {
+            has_output_block_311 = true;
         }
         if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
             tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
@@ -1133,6 +1133,9 @@ SDVersion ModelLoader::get_sd_version() {
             return VERSION_SDXL_PIX2PIX;
         }
         if (!has_middle_block_1) {
+            if (!has_output_block_311) {
+                return VERSION_SDXL_VEGA;
+            }
             return VERSION_SDXL_SSD1B;
         }
         return VERSION_SDXL;
@@ -1159,7 +1162,7 @@ SDVersion ModelLoader::get_sd_version() {
             return VERSION_SD1_PIX2PIX;
         }
         if (!has_middle_block_1) {
-            if (!has_output_block_71) {
+            if (!has_output_block_311) {
                 return VERSION_SDXS;
             }
             return VERSION_SD1_TINY_UNET;
diff --git a/model.h b/model.h
index e52766cc0..536867936 100644
--- a/model.h
+++ b/model.h
@@ -32,6 +32,7 @@ enum SDVersion {
     VERSION_SDXL,
     VERSION_SDXL_INPAINT,
     VERSION_SDXL_PIX2PIX,
+    VERSION_SDXL_VEGA,
     VERSION_SDXL_SSD1B,
     VERSION_SVD,
     VERSION_SD3,
@@ -65,7 +66,7 @@ static inline bool sd_version_is_sd2(SDVersion version) {
 }
 
 static inline bool sd_version_is_sdxl(SDVersion version) {
-    if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT || version == VERSION_SDXL_PIX2PIX || version == VERSION_SDXL_SSD1B) {
+    if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT || version == VERSION_SDXL_PIX2PIX || version == VERSION_SDXL_SSD1B || version == VERSION_SDXL_VEGA) {
         return true;
     }
     return false;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 060b85302..feeb6ea4e 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -35,6 +35,7 @@ const char* model_version_to_str[] = {
     "SDXL",
     "SDXL Inpaint",
     "SDXL Instruct-Pix2Pix",
+    "SDXL (Vega)",
     "SDXL (SSD1B)",
     "SVD",
     "SD3.x",
diff --git a/unet.hpp b/unet.hpp
index 9fe24e243..f92711037 100644
--- a/unet.hpp
+++ b/unet.hpp
@@ -201,6 +201,9 @@ class UnetModelBlock : public GGMLBlock {
             num_head_channels     = 64;
             num_heads             = -1;
             use_linear_projection = true;
+            if (version == VERSION_SDXL_VEGA) {
+                transformer_depth     = {1, 1, 2};
+            }
         } else if (version == VERSION_SVD) {
             in_channels           = 8;
             out_channels          = 4;
@@ -319,7 +322,7 @@ class UnetModelBlock : public GGMLBlock {
         }
         if (!tiny_unet) {
             blocks["middle_block.0"] = std::shared_ptr<GGMLBlock>(get_resblock(ch, time_embed_dim, ch));
-            if (version != VERSION_SDXL_SSD1B) {
+            if (version != VERSION_SDXL_SSD1B && version != VERSION_SDXL_VEGA) {
                 blocks["middle_block.1"] = std::shared_ptr<GGMLBlock>(get_attention_layer(ch,
                                                                                           n_head,
                                                                                           d_head,
@@ -520,7 +523,7 @@ class UnetModelBlock : public GGMLBlock {
         // middle_block
         if (!tiny_unet) {
             h = resblock_forward("middle_block.0", ctx, h, emb, num_video_frames);  // [N, 4*model_channels, h/8, w/8]
-            if (version != VERSION_SDXL_SSD1B) {
+            if (version != VERSION_SDXL_SSD1B && version != VERSION_SDXL_VEGA) {
                 h = attention_layer_forward("middle_block.1", ctx, h, context, num_video_frames);  // [N, 4*model_channels, h/8, w/8]
                 h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames);             // [N, 4*model_channels, h/8, w/8]
             }

From 97255f93b202c33ba1f81c2fd6a957104a7829cb Mon Sep 17 00:00:00 2001
From: akleine <alb.kleine@gmx.de>
Date: Fri, 16 Jan 2026 10:52:54 +0100
Subject: [PATCH 2/4] docs: update distilled_sd.md for the Vega model

---
 docs/distilled_sd.md | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/docs/distilled_sd.md b/docs/distilled_sd.md
index 232c02288..3174b18f8 100644
--- a/docs/distilled_sd.md
+++ b/docs/distilled_sd.md
@@ -1,8 +1,8 @@
-# Running distilled models: SSD1B and SDx.x with tiny U-Nets
+# Running distilled models: SSD1B, Vega and SDx.x with tiny U-Nets
 
 ## Preface 
 
-These models feature a reduced U-Net architecture. Unlike standard SDXL models, the SSD-1B U-Net contains only one middle block and fewer attention layers in its up- and down-blocks, resulting in significantly smaller file sizes. Using these models can reduce inference time by more than 33%. For more details, refer to Segmind's paper: https://arxiv.org/abs/2401.02677v1.
+These models feature a reduced U-Net architecture. Unlike standard SDXL models, the SSD-1B and Vega U-Net contains only one middle block and fewer attention layers in its up- and down-blocks, resulting in significantly smaller file sizes. Using these models can reduce inference time by more than 33%. For more details, refer to Segmind's paper: https://arxiv.org/abs/2401.02677v1.
 Similarly, SD1.x- and SD2.x-style models with a tiny U-Net consist of only 6 U-Net blocks, leading to very small files and time savings of up to 50%. For more information, see the paper: https://arxiv.org/pdf/2305.15798.pdf.
 
 ## SSD1B
@@ -17,7 +17,17 @@ Useful LoRAs are also available:
  * https://huggingface.co/seungminh/lora-swarovski-SSD-1B/resolve/main/pytorch_lora_weights.safetensors
  * https://huggingface.co/kylielee505/mylcmlorassd/resolve/main/pytorch_lora_weights.safetensors
 
-These files can be used out-of-the-box, unlike the models described in the next section.
+## Vega
+
+Segmind's Vega model is available online here:
+
+ * https://huggingface.co/segmind/Segmind-Vega/resolve/main/segmind-vega.safetensors
+ 
+VegaRT is an example for an LCM-LoRA:
+
+ * https://huggingface.co/segmind/Segmind-VegaRT/resolve/main/pytorch_lora_weights.safetensors
+
+Both files can be used out-of-the-box, unlike the models described in next sections.
 
 
 ## SD1.x, SD2.x with tiny U-Nets

From 3c56dac40a83d1b43c50d5cc141b483903adc969 Mon Sep 17 00:00:00 2001
From: akleine <alb.kleine@gmx.de>
Date: Sat, 17 Jan 2026 07:42:47 +0100
Subject: [PATCH 3/4] fix: Correction of diff between SSD-1B/Vega/SD-Tiny/SDXS

---
 model.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/model.cpp b/model.cpp
index b892e310d..12cf44c8c 100644
--- a/model.cpp
+++ b/model.cpp
@@ -1039,6 +1039,7 @@ SDVersion ModelLoader::get_sd_version() {
     bool has_img_emb                 = false;
     bool has_middle_block_1          = false;
     bool has_output_block_311        = false;
+    bool has_output_block_71         = false;
 
     for (auto& [name, tensor_storage] : tensor_storage_map) {
         if (!(is_xl)) {
@@ -1098,6 +1099,9 @@ SDVersion ModelLoader::get_sd_version() {
         if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos) {
             has_output_block_311 = true;
         }
+        if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos) {
+            has_output_block_71 = true;
+        }
         if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
             tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
             tensor_storage.name == "text_model.embeddings.token_embedding.weight" ||
@@ -1162,7 +1166,7 @@ SDVersion ModelLoader::get_sd_version() {
             return VERSION_SD1_PIX2PIX;
         }
         if (!has_middle_block_1) {
-            if (!has_output_block_311) {
+            if (!has_output_block_71) {
                 return VERSION_SDXS;
             }
             return VERSION_SD1_TINY_UNET;

From c94c55160fa4ac5adad5ae5067cb8be3877bd9d8 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Mon, 19 Jan 2026 23:08:45 +0800
Subject: [PATCH 4/4] format code

---
 unet.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unet.hpp b/unet.hpp
index f92711037..6e15e1f45 100644
--- a/unet.hpp
+++ b/unet.hpp
@@ -202,7 +202,7 @@ class UnetModelBlock : public GGMLBlock {
             num_heads             = -1;
             use_linear_projection = true;
             if (version == VERSION_SDXL_VEGA) {
-                transformer_depth     = {1, 1, 2};
+                transformer_depth = {1, 1, 2};
             }
         } else if (version == VERSION_SVD) {
             in_channels           = 8;