diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 932611291ab..b2961cb7f36 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -1017,6 +1017,7 @@ minkernel MINMAXINFO minwin minwindef +misprediction MMBB mmcc MMCPL diff --git a/src/terminal/adapter/SixelParser.cpp b/src/terminal/adapter/SixelParser.cpp index 15b9499d2ad..e23c23d1e60 100644 --- a/src/terminal/adapter/SixelParser.cpp +++ b/src/terminal/adapter/SixelParser.cpp @@ -729,6 +729,11 @@ void SixelParser::_decreaseFilledBackgroundHeight(const int decreasedHeight) noe } } +#pragma warning(push) +#pragma warning(disable : 26429) // Symbol 'imageBufferPtr' is never tested for nullness, it can be marked as not_null (f.23). +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). +#pragma warning(disable : 26490) // Don't use reinterpret_cast (type.1). + void SixelParser::_writeToImageBuffer(int sixelValue, int repeatCount) { // On terminals that support the raster attributes command (which sets the @@ -736,32 +741,88 @@ void SixelParser::_writeToImageBuffer(int sixelValue, int repeatCount) // is received. So if we haven't filled it yet, we need to do so now. _fillImageBackground(); + repeatCount = std::min(repeatCount, _imageMaxWidth - _imageCursor.x); + if (repeatCount <= 0) + { + return; + } + + if (sixelValue == 0) + { + _imageCursor.x += repeatCount; + return; + } + + // This allows us to unsafely cast _imageBuffer to uint16_t + // and benefit from compiler/STL optimizations. + static_assert(sizeof(IndexedPixel) == sizeof(int16_t)); + static_assert(alignof(IndexedPixel) == alignof(int16_t)); + // Then we need to render the 6 vertical pixels that are represented by the // bits in the sixel value. Although note that each of these sixel pixels // may cover more than one device pixel, depending on the aspect ratio. const auto targetOffset = _imageCursor.y * _imageMaxWidth + _imageCursor.x; - auto imageBufferPtr = std::next(_imageBuffer.data(), targetOffset); - repeatCount = std::min(repeatCount, _imageMaxWidth - _imageCursor.x); - for (auto i = 0; i < 6; i++) + const auto foreground = std::bit_cast(_foregroundPixel); + auto imageBufferPtr = reinterpret_cast(_imageBuffer.data() + targetOffset); + + // A aspect ratio of 1:1 is the most common and worth optimizing. + if (_pixelAspectRatio == 1) { - if (sixelValue & 1) + do { - auto repeatAspectRatio = _pixelAspectRatio; - do + // This gets unrolled by MSVC. It's written this way to use CMOV instructions. + // Modern CPUs have fat caches and deep pipelines. It's better to do pointless reads + // from memory than causing branch misprediction, as sixelValue is highly random. + for (int i = 0; i < 6; i++) { - std::fill_n(imageBufferPtr, repeatCount, _foregroundPixel); - std::advance(imageBufferPtr, _imageMaxWidth); - } while (--repeatAspectRatio > 0); - } - else + const auto test = sixelValue & (1 << i); + // Possibly pointless read from memory, but... + const auto before = imageBufferPtr[i * _imageMaxWidth]; + // ...it allows the compiler to turn this into a CMOV (= no branch misprediction). + const auto after = test ? foreground : before; + imageBufferPtr[i * _imageMaxWidth] = after; + } + + _imageCursor.x += 1; + imageBufferPtr += 1; + repeatCount -= 1; + } while (repeatCount > 0); + } + else + { + for (auto i = 0; i < 6; i++) { - std::advance(imageBufferPtr, _imageMaxWidth * _pixelAspectRatio); + if (sixelValue & 1) + { + auto repeatAspectRatio = _pixelAspectRatio; + do + { + // If this used std::fill_n or just a primitive loop, MSVC would compile + // it to a `rep stosw` instruction, which has a high startup cost. + // This is not ideal when our repeatCount is almost always small. + // The way this does ptr++ and len-- is also ideal for optimization. + auto ptr = imageBufferPtr; + auto remaining = repeatCount; + do + { + __iso_volatile_store16(ptr++, foreground); + } while (--remaining != 0); + + imageBufferPtr += _imageMaxWidth; + } while (--repeatAspectRatio > 0); + } + else + { + std::advance(imageBufferPtr, _imageMaxWidth * _pixelAspectRatio); + } + sixelValue >>= 1; } - sixelValue >>= 1; + _imageCursor.x += repeatCount; } - _imageCursor.x += repeatCount; } +#pragma warning(pop) + void SixelParser::_eraseImageBufferRows(const int rowCount, const til::CoordType rowOffset) noexcept { const auto pixelCount = rowCount * _cellSize.height; diff --git a/src/terminal/adapter/SixelParser.hpp b/src/terminal/adapter/SixelParser.hpp index 62a7f7eae7c..17ed654df15 100644 --- a/src/terminal/adapter/SixelParser.hpp +++ b/src/terminal/adapter/SixelParser.hpp @@ -42,7 +42,7 @@ namespace Microsoft::Console::VirtualTerminal // to retain the 16-bit size. static constexpr size_t MAX_COLORS = 256; using IndexType = uint8_t; - struct IndexedPixel + struct alignas(int16_t) IndexedPixel { uint8_t transparent = false; IndexType colorIndex = 0; @@ -104,8 +104,8 @@ namespace Microsoft::Console::VirtualTerminal const size_t _maxColors; size_t _colorsUsed = 0; size_t _colorsAvailable = 0; - bool _colorTableChanged = false; IndexedPixel _foregroundPixel = {}; + bool _colorTableChanged = false; void _initImageBuffer(); void _resizeImageBuffer(const til::CoordType requiredHeight);