diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt
index 932611291ab..b2961cb7f36 100644
--- a/.github/actions/spelling/expect/expect.txt
+++ b/.github/actions/spelling/expect/expect.txt
@@ -1017,6 +1017,7 @@ minkernel
 MINMAXINFO
 minwin
 minwindef
+misprediction
 MMBB
 mmcc
 MMCPL
diff --git a/src/terminal/adapter/SixelParser.cpp b/src/terminal/adapter/SixelParser.cpp
index 15b9499d2ad..e23c23d1e60 100644
--- a/src/terminal/adapter/SixelParser.cpp
+++ b/src/terminal/adapter/SixelParser.cpp
@@ -729,6 +729,11 @@ void SixelParser::_decreaseFilledBackgroundHeight(const int decreasedHeight) noe
     }
 }
 
+#pragma warning(push)
+#pragma warning(disable : 26429) // Symbol 'imageBufferPtr' is never tested for nullness, it can be marked as not_null (f.23).
+#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
+#pragma warning(disable : 26490) // Don't use reinterpret_cast (type.1).
+
 void SixelParser::_writeToImageBuffer(int sixelValue, int repeatCount)
 {
     // On terminals that support the raster attributes command (which sets the
@@ -736,32 +741,88 @@ void SixelParser::_writeToImageBuffer(int sixelValue, int repeatCount)
     // is received. So if we haven't filled it yet, we need to do so now.
     _fillImageBackground();
 
+    repeatCount = std::min(repeatCount, _imageMaxWidth - _imageCursor.x);
+    if (repeatCount <= 0)
+    {
+        return;
+    }
+
+    if (sixelValue == 0)
+    {
+        _imageCursor.x += repeatCount;
+        return;
+    }
+
+    // This allows us to unsafely cast _imageBuffer to uint16_t
+    // and benefit from compiler/STL optimizations.
+    static_assert(sizeof(IndexedPixel) == sizeof(int16_t));
+    static_assert(alignof(IndexedPixel) == alignof(int16_t));
+
     // Then we need to render the 6 vertical pixels that are represented by the
     // bits in the sixel value. Although note that each of these sixel pixels
     // may cover more than one device pixel, depending on the aspect ratio.
     const auto targetOffset = _imageCursor.y * _imageMaxWidth + _imageCursor.x;
-    auto imageBufferPtr = std::next(_imageBuffer.data(), targetOffset);
-    repeatCount = std::min(repeatCount, _imageMaxWidth - _imageCursor.x);
-    for (auto i = 0; i < 6; i++)
+    const auto foreground = std::bit_cast<int16_t>(_foregroundPixel);
+    auto imageBufferPtr = reinterpret_cast<int16_t*>(_imageBuffer.data() + targetOffset);
+
+    // A aspect ratio of 1:1 is the most common and worth optimizing.
+    if (_pixelAspectRatio == 1)
     {
-        if (sixelValue & 1)
+        do
         {
-            auto repeatAspectRatio = _pixelAspectRatio;
-            do
+            // This gets unrolled by MSVC. It's written this way to use CMOV instructions.
+            // Modern CPUs have fat caches and deep pipelines. It's better to do pointless reads
+            // from memory than causing branch misprediction, as sixelValue is highly random.
+            for (int i = 0; i < 6; i++)
             {
-                std::fill_n(imageBufferPtr, repeatCount, _foregroundPixel);
-                std::advance(imageBufferPtr, _imageMaxWidth);
-            } while (--repeatAspectRatio > 0);
-        }
-        else
+                const auto test = sixelValue & (1 << i);
+                // Possibly pointless read from memory, but...
+                const auto before = imageBufferPtr[i * _imageMaxWidth];
+                // ...it allows the compiler to turn this into a CMOV (= no branch misprediction).
+                const auto after = test ? foreground : before;
+                imageBufferPtr[i * _imageMaxWidth] = after;
+            }
+
+            _imageCursor.x += 1;
+            imageBufferPtr += 1;
+            repeatCount -= 1;
+        } while (repeatCount > 0);
+    }
+    else
+    {
+        for (auto i = 0; i < 6; i++)
         {
-            std::advance(imageBufferPtr, _imageMaxWidth * _pixelAspectRatio);
+            if (sixelValue & 1)
+            {
+                auto repeatAspectRatio = _pixelAspectRatio;
+                do
+                {
+                    // If this used std::fill_n or just a primitive loop, MSVC would compile
+                    // it to a `rep stosw` instruction, which has a high startup cost.
+                    // This is not ideal when our repeatCount is almost always small.
+                    // The way this does ptr++ and len-- is also ideal for optimization.
+                    auto ptr = imageBufferPtr;
+                    auto remaining = repeatCount;
+                    do
+                    {
+                        __iso_volatile_store16(ptr++, foreground);
+                    } while (--remaining != 0);
+
+                    imageBufferPtr += _imageMaxWidth;
+                } while (--repeatAspectRatio > 0);
+            }
+            else
+            {
+                std::advance(imageBufferPtr, _imageMaxWidth * _pixelAspectRatio);
+            }
+            sixelValue >>= 1;
         }
-        sixelValue >>= 1;
+        _imageCursor.x += repeatCount;
     }
-    _imageCursor.x += repeatCount;
 }
 
+#pragma warning(pop)
+
 void SixelParser::_eraseImageBufferRows(const int rowCount, const til::CoordType rowOffset) noexcept
 {
     const auto pixelCount = rowCount * _cellSize.height;
diff --git a/src/terminal/adapter/SixelParser.hpp b/src/terminal/adapter/SixelParser.hpp
index 62a7f7eae7c..17ed654df15 100644
--- a/src/terminal/adapter/SixelParser.hpp
+++ b/src/terminal/adapter/SixelParser.hpp
@@ -42,7 +42,7 @@ namespace Microsoft::Console::VirtualTerminal
         // to retain the 16-bit size.
         static constexpr size_t MAX_COLORS = 256;
         using IndexType = uint8_t;
-        struct IndexedPixel
+        struct alignas(int16_t) IndexedPixel
         {
             uint8_t transparent = false;
             IndexType colorIndex = 0;
@@ -104,8 +104,8 @@ namespace Microsoft::Console::VirtualTerminal
         const size_t _maxColors;
         size_t _colorsUsed = 0;
         size_t _colorsAvailable = 0;
-        bool _colorTableChanged = false;
         IndexedPixel _foregroundPixel = {};
+        bool _colorTableChanged = false;
 
         void _initImageBuffer();
         void _resizeImageBuffer(const til::CoordType requiredHeight);