From c83fbbb89e54de86c8e90be89f1706f993e7e63c Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 10 Dec 2025 16:52:49 +0100 Subject: [PATCH 1/2] Optimize DCS performance with batch processing --- src/terminal/adapter/ITermDispatch.hpp | 2 +- src/terminal/adapter/MacroBuffer.cpp | 150 +++---- src/terminal/adapter/MacroBuffer.hpp | 2 +- src/terminal/adapter/SixelParser.cpp | 7 +- src/terminal/adapter/SixelParser.hpp | 2 +- src/terminal/adapter/adaptDispatch.cpp | 462 ++++++++++---------- src/terminal/parser/IStateMachineEngine.hpp | 2 +- src/terminal/parser/stateMachine.cpp | 298 +++++++++---- src/terminal/parser/stateMachine.hpp | 20 +- src/types/inc/utils.hpp | 1 + src/types/utils.cpp | 5 +- 11 files changed, 536 insertions(+), 415 deletions(-) diff --git a/src/terminal/adapter/ITermDispatch.hpp b/src/terminal/adapter/ITermDispatch.hpp index 5ebb2ff31c8..56a58b963c7 100644 --- a/src/terminal/adapter/ITermDispatch.hpp +++ b/src/terminal/adapter/ITermDispatch.hpp @@ -23,7 +23,7 @@ namespace Microsoft::Console::VirtualTerminal class Microsoft::Console::VirtualTerminal::ITermDispatch { public: - using StringHandler = std::function; + using StringHandler = std::function; enum class OptionalFeature { diff --git a/src/terminal/adapter/MacroBuffer.cpp b/src/terminal/adapter/MacroBuffer.cpp index 7ae1f05e5b7..ebefb699ac5 100644 --- a/src/terminal/adapter/MacroBuffer.cpp +++ b/src/terminal/adapter/MacroBuffer.cpp @@ -113,92 +113,96 @@ bool MacroBuffer::InitParser(const size_t macroId, const DispatchTypes::MacroDel return false; } -bool MacroBuffer::ParseDefinition(const wchar_t ch) +bool MacroBuffer::ParseDefinition(const std::wstring_view str) { - // Once we receive an ESC, that marks the end of the definition, but if - // an unterminated repeat is still pending, we should apply that now. - if (ch == AsciiChars::ESC) + for (const auto ch : str) { - if (_repeatPending && !_applyPendingRepeat()) + // Once we receive an ESC, that marks the end of the definition, but if + // an unterminated repeat is still pending, we should apply that now. + if (ch == AsciiChars::ESC) { - _deleteMacro(_activeMacro()); + if (_repeatPending && !_applyPendingRepeat()) + { + _deleteMacro(_activeMacro()); + } + return false; } - return false; - } - - // Any other control characters are just ignored. - if (ch < L' ') - { - return true; - } - // For "text encoded" macros, we'll always be in the ExpectingText state. - // For "hex encoded" macros, we'll typically be alternating between the - // ExpectingHexDigit and ExpectingSecondHexDigit states as we parse the two - // digits of each hex pair. But we also need to deal with repeat sequences, - // which start with `!`, followed by a numeric repeat count, and then a - // range of hex pairs between two `;` characters. When parsing the repeat - // count, we use the ExpectingRepeatCount state, but when parsing the hex - // pairs of the repeat, we just use the regular ExpectingHexDigit states. - - auto success = true; - switch (_parseState) - { - case State::ExpectingText: - success = _appendToActiveMacro(ch); - break; - case State::ExpectingHexDigit: - if (_decodeHexDigit(ch)) + // Any other control characters are just ignored. + if (ch < L' ') { - _parseState = State::ExpectingSecondHexDigit; - } - else if (ch == L'!' && !_repeatPending) - { - _parseState = State::ExpectingRepeatCount; - _repeatCount = 0; - } - else if (ch == L';' && _repeatPending) - { - success = _applyPendingRepeat(); - } - else - { - success = false; - } - break; - case State::ExpectingSecondHexDigit: - success = _decodeHexDigit(ch) && _appendToActiveMacro(_decodedChar); - _decodedChar = 0; - _parseState = State::ExpectingHexDigit; - break; - case State::ExpectingRepeatCount: - if (ch >= L'0' && ch <= L'9') - { - _repeatCount = _repeatCount * 10 + (ch - L'0'); - _repeatCount = std::min(_repeatCount, MAX_PARAMETER_VALUE); + return true; } - else if (ch == L';') + + // For "text encoded" macros, we'll always be in the ExpectingText state. + // For "hex encoded" macros, we'll typically be alternating between the + // ExpectingHexDigit and ExpectingSecondHexDigit states as we parse the two + // digits of each hex pair. But we also need to deal with repeat sequences, + // which start with `!`, followed by a numeric repeat count, and then a + // range of hex pairs between two `;` characters. When parsing the repeat + // count, we use the ExpectingRepeatCount state, but when parsing the hex + // pairs of the repeat, we just use the regular ExpectingHexDigit states. + + auto success = true; + switch (_parseState) { - _repeatPending = true; - _repeatStart = _activeMacro().length(); + case State::ExpectingText: + success = _appendToActiveMacro(ch); + break; + case State::ExpectingHexDigit: + if (_decodeHexDigit(ch)) + { + _parseState = State::ExpectingSecondHexDigit; + } + else if (ch == L'!' && !_repeatPending) + { + _parseState = State::ExpectingRepeatCount; + _repeatCount = 0; + } + else if (ch == L';' && _repeatPending) + { + success = _applyPendingRepeat(); + } + else + { + success = false; + } + break; + case State::ExpectingSecondHexDigit: + success = _decodeHexDigit(ch) && _appendToActiveMacro(_decodedChar); + _decodedChar = 0; _parseState = State::ExpectingHexDigit; - } - else - { + break; + case State::ExpectingRepeatCount: + if (ch >= L'0' && ch <= L'9') + { + _repeatCount = _repeatCount * 10 + (ch - L'0'); + _repeatCount = std::min(_repeatCount, MAX_PARAMETER_VALUE); + } + else if (ch == L';') + { + _repeatPending = true; + _repeatStart = _activeMacro().length(); + _parseState = State::ExpectingHexDigit; + } + else + { + success = false; + } + break; + default: success = false; + break; } - break; - default: - success = false; - break; - } - // If there is an error in the definition, clear everything received so far. - if (!success) - { - _deleteMacro(_activeMacro()); + // If there is an error in the definition, clear everything received so far. + if (!success) + { + _deleteMacro(_activeMacro()); + return false; + } } - return success; + return true; } bool MacroBuffer::_decodeHexDigit(const wchar_t ch) noexcept diff --git a/src/terminal/adapter/MacroBuffer.hpp b/src/terminal/adapter/MacroBuffer.hpp index 86e5dfed889..2fef11fdb33 100644 --- a/src/terminal/adapter/MacroBuffer.hpp +++ b/src/terminal/adapter/MacroBuffer.hpp @@ -44,7 +44,7 @@ namespace Microsoft::Console::VirtualTerminal void InvokeMacro(const size_t macroId, StateMachine& stateMachine); void ClearMacrosIfInUse(); bool InitParser(const size_t macroId, const DispatchTypes::MacroDeleteControl deleteControl, const DispatchTypes::MacroEncoding encoding); - bool ParseDefinition(const wchar_t ch); + bool ParseDefinition(std::wstring_view str); private: bool _decodeHexDigit(const wchar_t ch) noexcept; diff --git a/src/terminal/adapter/SixelParser.cpp b/src/terminal/adapter/SixelParser.cpp index 15b9499d2ad..762a7f8dc70 100644 --- a/src/terminal/adapter/SixelParser.cpp +++ b/src/terminal/adapter/SixelParser.cpp @@ -80,7 +80,7 @@ void SixelParser::SetDisplayMode(const bool enabled) noexcept } } -std::function SixelParser::DefineImage(const VTInt macroParameter, const DispatchTypes::SixelBackground backgroundSelect, const VTParameter backgroundColor) +std::function SixelParser::DefineImage(const VTInt macroParameter, const DispatchTypes::SixelBackground backgroundSelect, const VTParameter backgroundColor) { if (_initTextBufferBoundaries()) { @@ -89,8 +89,11 @@ std::function SixelParser::DefineImage(const VTInt macroParameter _initImageBuffer(); _state = States::Normal; _parameters.clear(); - return [&](const auto ch) { + return [&](const std::wstring_view str) { + for (const auto ch : str) + { _parseCommandChar(ch); + } return true; }; } diff --git a/src/terminal/adapter/SixelParser.hpp b/src/terminal/adapter/SixelParser.hpp index 62a7f7eae7c..6b5b641e7c2 100644 --- a/src/terminal/adapter/SixelParser.hpp +++ b/src/terminal/adapter/SixelParser.hpp @@ -34,7 +34,7 @@ namespace Microsoft::Console::VirtualTerminal SixelParser(AdaptDispatch& dispatcher, const StateMachine& stateMachine, const VTInt conformanceLevel = DefaultConformance) noexcept; void SoftReset(); void SetDisplayMode(const bool enabled) noexcept; - std::function DefineImage(const VTInt macroParameter, const DispatchTypes::SixelBackground backgroundSelect, const VTParameter backgroundColor); + std::function DefineImage(const VTInt macroParameter, const DispatchTypes::SixelBackground backgroundSelect, const VTParameter backgroundColor); private: // NB: If we want to support more than 256 colors, we'll also need to diff --git a/src/terminal/adapter/adaptDispatch.cpp b/src/terminal/adapter/adaptDispatch.cpp index 9de1509bd73..7cee4e20f9f 100644 --- a/src/terminal/adapter/adaptDispatch.cpp +++ b/src/terminal/adapter/adaptDispatch.cpp @@ -3836,34 +3836,37 @@ ITermDispatch::StringHandler AdaptDispatch::DownloadDRCS(const VTInt fontNumber, return nullptr; } - return [=](const auto ch) { - // We pass the data string straight through to the font buffer class - // until we receive an ESC, indicating the end of the string. At that - // point we can finalize the buffer, and if valid, update the renderer - // with the constructed bit pattern. - if (ch != AsciiChars::ESC) + return [=](const std::wstring_view str) { + for (const auto ch : str) { - _fontBuffer->AddSixelData(ch); - } - else if (_fontBuffer->FinalizeSixelData()) - { - // We also need to inform the character set mapper of the ID that - // will map to this font (we only support one font buffer so there - // will only ever be one active dynamic character set). - if (charsetSize == DispatchTypes::CharsetSize::Size96) - { - _termOutput.SetDrcs96Designation(_fontBuffer->GetDesignation()); - } - else + // We pass the data string straight through to the font buffer class + // until we receive an ESC, indicating the end of the string. At that + // point we can finalize the buffer, and if valid, update the renderer + // with the constructed bit pattern. + if (ch != AsciiChars::ESC) { - _termOutput.SetDrcs94Designation(_fontBuffer->GetDesignation()); + _fontBuffer->AddSixelData(ch); } - if (_renderer) + else if (_fontBuffer->FinalizeSixelData()) { - const auto bitPattern = _fontBuffer->GetBitPattern(); - const auto cellSize = _fontBuffer->GetCellSize(); - const auto centeringHint = _fontBuffer->GetTextCenteringHint(); - _renderer->UpdateSoftFont(bitPattern, cellSize, centeringHint); + // We also need to inform the character set mapper of the ID that + // will map to this font (we only support one font buffer so there + // will only ever be one active dynamic character set). + if (charsetSize == DispatchTypes::CharsetSize::Size96) + { + _termOutput.SetDrcs96Designation(_fontBuffer->GetDesignation()); + } + else + { + _termOutput.SetDrcs94Designation(_fontBuffer->GetDesignation()); + } + if (_renderer) + { + const auto bitPattern = _fontBuffer->GetBitPattern(); + const auto cellSize = _fontBuffer->GetCellSize(); + const auto centeringHint = _fontBuffer->GetTextCenteringHint(); + _renderer->UpdateSoftFont(bitPattern, cellSize, centeringHint); + } } } return true; @@ -3889,24 +3892,27 @@ void AdaptDispatch::RequestUserPreferenceCharset() // - a function to parse the character set ID ITermDispatch::StringHandler AdaptDispatch::AssignUserPreferenceCharset(const DispatchTypes::CharsetSize charsetSize) { - return [this, charsetSize, idBuilder = VTIDBuilder{}](const auto ch) mutable { - if (ch >= L'\x20' && ch <= L'\x2f') + return [this, charsetSize, idBuilder = VTIDBuilder{}](const std::wstring_view str) mutable { + for (const auto ch : str) { - idBuilder.AddIntermediate(ch); - } - else if (ch >= L'\x30' && ch <= L'\x7e') - { - const auto id = idBuilder.Finalize(ch); - switch (charsetSize) + if (ch >= L'\x20' && ch <= L'\x2f') { - case DispatchTypes::CharsetSize::Size94: - _termOutput.AssignUserPreferenceCharset(id, false); - break; - case DispatchTypes::CharsetSize::Size96: - _termOutput.AssignUserPreferenceCharset(id, true); - break; + idBuilder.AddIntermediate(ch); + } + else if (ch >= L'\x30' && ch <= L'\x7e') + { + const auto id = idBuilder.Finalize(ch); + switch (charsetSize) + { + case DispatchTypes::CharsetSize::Size94: + _termOutput.AssignUserPreferenceCharset(id, false); + break; + case DispatchTypes::CharsetSize::Size96: + _termOutput.AssignUserPreferenceCharset(id, true); + break; + } + return false; } - return false; } return true; }; @@ -3932,8 +3938,8 @@ ITermDispatch::StringHandler AdaptDispatch::DefineMacro(const VTInt macroId, if (_macroBuffer->InitParser(macroId, deleteControl, encoding)) { - return [&](const auto ch) { - return _macroBuffer->ParseDefinition(ch); + return [&](const std::wstring_view str) { + return _macroBuffer->ParseDefinition(str); }; } @@ -4053,45 +4059,48 @@ void AdaptDispatch::_ReportColorTable(const DispatchTypes::ColorModel colorModel // - a function to parse the report data. ITermDispatch::StringHandler AdaptDispatch::_RestoreColorTable() { - return [this, parameter = VTInt{}, parameters = std::vector{}](const auto ch) mutable { - if (ch >= L'0' && ch <= L'9') - { - parameter *= 10; - parameter += (ch - L'0'); - parameter = std::min(parameter, MAX_PARAMETER_VALUE); - } - else if (ch == L';') + return [this, parameter = VTInt{}, parameters = std::vector{}](const std::wstring_view str) mutable { + for (const auto ch : str) { - if (parameters.size() < 5) + if (ch >= L'0' && ch <= L'9') { - parameters.push_back(parameter); + parameter *= 10; + parameter += (ch - L'0'); + parameter = std::min(parameter, MAX_PARAMETER_VALUE); } - parameter = 0; - } - else if (ch == L'/' || ch == AsciiChars::ESC) - { - parameters.push_back(parameter); - const auto colorParameters = VTParameters{ parameters.data(), parameters.size() }; - const auto colorNumber = colorParameters.at(0).value_or(0); - if (colorNumber < TextColor::TABLE_SIZE) + else if (ch == L';') { - const auto colorModel = DispatchTypes::ColorModel{ colorParameters.at(1) }; - const auto x = colorParameters.at(2).value_or(0); - const auto y = colorParameters.at(3).value_or(0); - const auto z = colorParameters.at(4).value_or(0); - if (colorModel == DispatchTypes::ColorModel::HLS) + if (parameters.size() < 5) { - SetColorTableEntry(colorNumber, Utils::ColorFromHLS(x, y, z)); + parameters.push_back(parameter); } - else if (colorModel == DispatchTypes::ColorModel::RGB) + parameter = 0; + } + else if (ch == L'/' || ch == AsciiChars::ESC) + { + parameters.push_back(parameter); + const auto colorParameters = VTParameters{ parameters.data(), parameters.size() }; + const auto colorNumber = colorParameters.at(0).value_or(0); + if (colorNumber < TextColor::TABLE_SIZE) { - SetColorTableEntry(colorNumber, Utils::ColorFromRGB100(x, y, z)); + const auto colorModel = DispatchTypes::ColorModel{ colorParameters.at(1) }; + const auto x = colorParameters.at(2).value_or(0); + const auto y = colorParameters.at(3).value_or(0); + const auto z = colorParameters.at(4).value_or(0); + if (colorModel == DispatchTypes::ColorModel::HLS) + { + SetColorTableEntry(colorNumber, Utils::ColorFromHLS(x, y, z)); + } + else if (colorModel == DispatchTypes::ColorModel::RGB) + { + SetColorTableEntry(colorNumber, Utils::ColorFromRGB100(x, y, z)); + } } + parameters.clear(); + parameter = 0; } - parameters.clear(); - parameter = 0; } - return (ch != AsciiChars::ESC); + return true; }; } @@ -4111,42 +4120,43 @@ ITermDispatch::StringHandler AdaptDispatch::RequestSetting() // this is the opposite of what is documented in most DEC manuals, which // say that 0 is for a valid response, and 1 is for an error. The correct // interpretation is documented in the DEC STD 070 reference. - return [this, parameter = VTInt{}, idBuilder = VTIDBuilder{}](const auto ch) mutable { - const auto isFinal = ch >= L'\x40' && ch <= L'\x7e'; - if (isFinal) + return [this, parameter = VTInt{}, idBuilder = VTIDBuilder{}](const std::wstring_view str) mutable { + for (const auto ch : str) { - const auto id = idBuilder.Finalize(ch); - switch (id) + const auto isFinal = ch >= L'\x40' && ch <= L'\x7e'; + if (isFinal) { - case VTID("m"): - _ReportSGRSetting(); - break; - case VTID("r"): - _ReportDECSTBMSetting(); - break; - case VTID("s"): - _ReportDECSLRMSetting(); - break; - case VTID(" q"): - _ReportDECSCUSRSetting(); - break; - case VTID("\"q"): - _ReportDECSCASetting(); - break; - case VTID("*x"): - _ReportDECSACESetting(); - break; - case VTID(",|"): - _ReportDECACSetting(VTParameter{ parameter }); - break; - default: - _ReturnDcsResponse(L"0$r"); - break; + const auto id = idBuilder.Finalize(ch); + switch (id) + { + case VTID("m"): + _ReportSGRSetting(); + break; + case VTID("r"): + _ReportDECSTBMSetting(); + break; + case VTID("s"): + _ReportDECSLRMSetting(); + break; + case VTID(" q"): + _ReportDECSCUSRSetting(); + break; + case VTID("\"q"): + _ReportDECSCASetting(); + break; + case VTID("*x"): + _ReportDECSACESetting(); + break; + case VTID(",|"): + _ReportDECACSetting(VTParameter{ parameter }); + break; + default: + _ReturnDcsResponse(L"0$r"); + break; + } + return false; } - return false; - } - else - { + // Although we don't yet support any operations with parameter // prefixes, it's important that we still parse the prefix and // include it in the ID. Otherwise, we'll mistakenly respond to @@ -4164,8 +4174,8 @@ ITermDispatch::StringHandler AdaptDispatch::RequestSetting() parameter += (ch - L'0'); parameter = std::min(parameter, MAX_PARAMETER_VALUE); } - return true; } + return true; }; } @@ -4513,126 +4523,129 @@ ITermDispatch::StringHandler AdaptDispatch::_RestoreCursorInformation() VTParameter row{}; VTParameter column{}; }; - return [&, state = State{}](const auto ch) mutable { - if (numeric.test(state.field)) + return [&, state = State{}](const std::wstring_view str) mutable { + for (const auto ch : str) { - if (ch >= '0' && ch <= '9') - { - state.value *= 10; - state.value += (ch - L'0'); - state.value = std::min(state.value, MAX_PARAMETER_VALUE); - } - else if (ch == L';' || ch == AsciiChars::ESC) + if (numeric.test(state.field)) { - if (state.field == Field::Row) + if (ch >= '0' && ch <= '9') { - state.row = state.value; + state.value *= 10; + state.value += (ch - L'0'); + state.value = std::min(state.value, MAX_PARAMETER_VALUE); } - else if (state.field == Field::Column) + else if (ch == L';' || ch == AsciiChars::ESC) { - state.column = state.value; - } - else if (state.field == Field::Page) - { - PagePositionAbsolute(state.value); - } - else if (state.field == Field::GL && state.value <= 3) - { - LockingShift(state.value); - } - else if (state.field == Field::GR && state.value <= 3) - { - LockingShiftRight(state.value); + if (state.field == Field::Row) + { + state.row = state.value; + } + else if (state.field == Field::Column) + { + state.column = state.value; + } + else if (state.field == Field::Page) + { + PagePositionAbsolute(state.value); + } + else if (state.field == Field::GL && state.value <= 3) + { + LockingShift(state.value); + } + else if (state.field == Field::GR && state.value <= 3) + { + LockingShiftRight(state.value); + } + state.value = {}; + state.field = static_cast(state.field + 1); } - state.value = {}; - state.field = static_cast(state.field + 1); } - } - else if (flags.test(state.field)) - { - // Note that there could potentially be multiple characters in a - // flag field, so we process the flags as soon as they're received. - // But for now we're only interested in the first one, so once the - // state.value is set, we ignore everything else until the `;`. - if (ch >= L'@' && ch <= '~' && !state.value) + else if (flags.test(state.field)) { - state.value = ch; - if (state.field == Field::SGR) + // Note that there could potentially be multiple characters in a + // flag field, so we process the flags as soon as they're received. + // But for now we're only interested in the first one, so once the + // state.value is set, we ignore everything else until the `;`. + if (ch >= L'@' && ch <= '~' && !state.value) { - const auto page = _pages.ActivePage(); - auto attr = page.Attributes(); - attr.SetIntense(state.value & 1); - attr.SetUnderlineStyle(state.value & 2 ? UnderlineStyle::SinglyUnderlined : UnderlineStyle::NoUnderline); - attr.SetBlinking(state.value & 4); - attr.SetReverseVideo(state.value & 8); - attr.SetInvisible(state.value & 16); - page.SetAttributes(attr); - } - else if (state.field == Field::Attr) - { - const auto page = _pages.ActivePage(); - auto attr = page.Attributes(); - attr.SetProtected(state.value & 1); - page.SetAttributes(attr); - } - else if (state.field == Field::Sizes) - { - state.charset96.at(0) = state.value & 1; - state.charset96.at(1) = state.value & 2; - state.charset96.at(2) = state.value & 4; - state.charset96.at(3) = state.value & 8; - } - else if (state.field == Field::Flags) - { - const bool originMode = state.value & 1; - const bool ss2 = state.value & 2; - const bool ss3 = state.value & 4; - const bool delayedEOLWrap = state.value & 8; - // The cursor position is parsed at the start of the sequence, - // but we only set the position once we know the origin mode. - _modes.set(Mode::Origin, originMode); - CursorPosition(state.row, state.column); - // There can only be one single shift applied at a time, so - // we'll just apply the last one that is enabled. - _termOutput.SingleShift(ss3 ? 3 : (ss2 ? 2 : 0)); - // The EOL flag will always be reset by the cursor movement - // above, so we only need to worry about setting it. - if (delayedEOLWrap) + state.value = ch; + if (state.field == Field::SGR) + { + const auto page = _pages.ActivePage(); + auto attr = page.Attributes(); + attr.SetIntense(state.value & 1); + attr.SetUnderlineStyle(state.value & 2 ? UnderlineStyle::SinglyUnderlined : UnderlineStyle::NoUnderline); + attr.SetBlinking(state.value & 4); + attr.SetReverseVideo(state.value & 8); + attr.SetInvisible(state.value & 16); + page.SetAttributes(attr); + } + else if (state.field == Field::Attr) { const auto page = _pages.ActivePage(); - page.Cursor().DelayEOLWrap(); + auto attr = page.Attributes(); + attr.SetProtected(state.value & 1); + page.SetAttributes(attr); + } + else if (state.field == Field::Sizes) + { + state.charset96.at(0) = state.value & 1; + state.charset96.at(1) = state.value & 2; + state.charset96.at(2) = state.value & 4; + state.charset96.at(3) = state.value & 8; + } + else if (state.field == Field::Flags) + { + const bool originMode = state.value & 1; + const bool ss2 = state.value & 2; + const bool ss3 = state.value & 4; + const bool delayedEOLWrap = state.value & 8; + // The cursor position is parsed at the start of the sequence, + // but we only set the position once we know the origin mode. + _modes.set(Mode::Origin, originMode); + CursorPosition(state.row, state.column); + // There can only be one single shift applied at a time, so + // we'll just apply the last one that is enabled. + _termOutput.SingleShift(ss3 ? 3 : (ss2 ? 2 : 0)); + // The EOL flag will always be reset by the cursor movement + // above, so we only need to worry about setting it. + if (delayedEOLWrap) + { + const auto page = _pages.ActivePage(); + page.Cursor().DelayEOLWrap(); + } } } + else if (ch == L';') + { + state.value = 0; + state.field = static_cast(state.field + 1); + } } - else if (ch == L';') - { - state.value = 0; - state.field = static_cast(state.field + 1); - } - } - else if (charset.test(state.field)) - { - if (ch >= L' ' && ch <= L'/') - { - state.charsetId.AddIntermediate(ch); - } - else if (ch >= L'0' && ch <= L'~') + else if (charset.test(state.field)) { - const auto id = state.charsetId.Finalize(ch); - const auto gset = state.field - Field::G0; - if (state.charset96.at(gset)) + if (ch >= L' ' && ch <= L'/') { - Designate96Charset(gset, id); + state.charsetId.AddIntermediate(ch); } - else + else if (ch >= L'0' && ch <= L'~') { - Designate94Charset(gset, id); + const auto id = state.charsetId.Finalize(ch); + const auto gset = state.field - Field::G0; + if (state.charset96.at(gset)) + { + Designate96Charset(gset, id); + } + else + { + Designate94Charset(gset, id); + } + state.charsetId.Clear(); + state.field = static_cast(state.field + 1); } - state.charsetId.Clear(); - state.field = static_cast(state.field + 1); } } - return (ch != AsciiChars::ESC); + return true; }; } @@ -4685,30 +4698,33 @@ ITermDispatch::StringHandler AdaptDispatch::_RestoreTabStops() _ClearAllTabStops(); _InitTabStopsForWidth(width); - return [this, width, column = size_t{}](const auto ch) mutable { - if (ch >= L'0' && ch <= L'9') - { - column *= 10; - column += (ch - L'0'); - column = std::min(column, MAX_PARAMETER_VALUE); - } - else if (ch == L'/' || ch == AsciiChars::ESC) + return [this, width, column = size_t{}](const std::wstring_view str) mutable { + for (const auto ch : str) { - // Note that column 1 is always a tab stop, so there is no - // need to record an entry at that offset. - if (column > 1u && column <= static_cast(width)) + if (ch >= L'0' && ch <= L'9') { - _tabStopColumns.at(column - 1) = true; + column *= 10; + column += (ch - L'0'); + column = std::min(column, MAX_PARAMETER_VALUE); + } + else if (ch == L'/' || ch == AsciiChars::ESC) + { + // Note that column 1 is always a tab stop, so there is no + // need to record an entry at that offset. + if (column > 1u && column <= static_cast(width)) + { + _tabStopColumns.at(column - 1) = true; + } + column = 0; + } + else + { + // If we receive an unexpected character, we don't try and + // process any more of the input - we just abort. + return false; } - column = 0; - } - else - { - // If we receive an unexpected character, we don't try and - // process any more of the input - we just abort. - return false; } - return (ch != AsciiChars::ESC); + return true; }; } diff --git a/src/terminal/parser/IStateMachineEngine.hpp b/src/terminal/parser/IStateMachineEngine.hpp index b0b55dc05ed..b83e85238fa 100644 --- a/src/terminal/parser/IStateMachineEngine.hpp +++ b/src/terminal/parser/IStateMachineEngine.hpp @@ -20,7 +20,7 @@ namespace Microsoft::Console::VirtualTerminal class IStateMachineEngine { public: - using StringHandler = std::function; + using StringHandler = std::function; virtual ~IStateMachineEngine() = 0; IStateMachineEngine(const IStateMachineEngine&) = default; diff --git a/src/terminal/parser/stateMachine.cpp b/src/terminal/parser/stateMachine.cpp index c45e2fc2f43..350536461de 100644 --- a/src/terminal/parser/stateMachine.cpp +++ b/src/terminal/parser/stateMachine.cpp @@ -655,7 +655,7 @@ void StateMachine::_ActionInterrupt() if (_state == VTStates::DcsPassThrough) { // The ESC signals the end of the data string. - _dcsStringHandler(AsciiChars::ESC); + _dcsStringHandler(L"\x1b"); _dcsStringHandler = nullptr; } } @@ -1799,20 +1799,81 @@ void StateMachine::_EventDcsParam(const wchar_t wch) // - wch - Character that triggered the event // Return Value: // - -void StateMachine::_EventDcsPassThrough(const wchar_t wch) +void StateMachine::_EventDcsPassThrough(const wchar_t) { _trace.TraceOnEvent(L"DcsPassThrough"); - if (_isC0Code(wch) || _isDcsPassThroughValid(wch)) + + assert(_runEnd != 0); + + auto beg = _runEnd - 1; + auto end = beg; + + for (;;) { - if (!_dcsStringHandler(wch)) + // Find the end of a run of valid DCS characters. + for (;;) + { + const auto ch = _currentString[end]; + if (_isEscape(ch) || (!_isC0Code(ch) && !_isDcsPassThroughValid(ch))) + { + break; + } + + ++end; + if (end >= _currentString.size()) + { + break; + } + } + + // If we found a run, pass it to the handler. + if (beg != end && !_dcsStringHandler(_currentString.substr(beg, end))) { _EnterDcsIgnore(); + break; + } + + // Out of input? Done. + if (end >= _currentString.size()) + { + break; + } + + // Escape character? We treat it as the ST terminator for DCS passthrough. + // Let StateMachine::ProcessCharacter take care of that. + if (_isEscape(_currentString[end])) + { + break; + } + + // Skip all invalid characters. This mirrors the search loop above, + // including the same post-loop exit conditions. + for (;;) + { + const auto ch = _currentString[end]; + if (!_isEscape(ch) && (_isC0Code(ch) || _isDcsPassThroughValid(ch))) + { + break; + } + + ++end; + if (end >= _currentString.size()) + { + break; + } + } + if (end >= _currentString.size()) + { + break; + } + if (_isEscape(_currentString[end])) + { + break; } } - else - { - _ActionIgnore(); - } + + // Mark all of the parsed text as processed. + _runEnd = std::max(_runEnd, end); } // Routine Description: @@ -1973,125 +2034,161 @@ bool StateMachine::FlushToTerminal() // - string - Characters to operate upon // Return Value: // - +#pragma warning(suppress : 26438) // Avoid 'goto'(es .76). +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).) void StateMachine::ProcessString(const std::wstring_view string) { - size_t i = 0; _currentString = string; - _runOffset = 0; - _runSize = 0; + _runBeg = 0; + _runEnd = 0; _injections.clear(); if (_state != VTStates::Ground) { // Jump straight to where we need to. -#pragma warning(suppress : 26438) // Avoid 'goto'(es .76). goto processStringLoopVtStart; } - while (i < string.size()) + for (;;) { + // Process as much plain text as possible. { - // Pointer arithmetic is perfectly fine for our hot path. -#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).) - const auto beg = string.data() + i; - const auto len = string.size() - i; - const auto it = Microsoft::Console::Utils::FindActionableControlCharacter(beg, len); + // We're starting a new "chunk". + // --> Reset beg(in). + _runBeg = _runEnd; - _runOffset = i; - _runSize = it - beg; + // Ran out of text. Return. + if (_runBeg >= _currentString.size()) + { + return; + } + + const auto beg = _currentString.data() + _runBeg; + const auto len = _currentString.size() - _runBeg; + const auto it = Microsoft::Console::Utils::FindActionableControlCharacter(beg, len); - if (_runSize) + if (it != beg) { + _runEnd = it - _currentString.data(); _ActionPrintString(_CurrentRun()); - - i += _runSize; - _runOffset = i; - _runSize = 0; + _runBeg = _runEnd; } } + // Next, process VT sequences (plural!). processStringLoopVtStart: - if (i >= string.size()) + // Ran out of text. Return. + if (_runBeg >= _currentString.size()) { - break; + return; } - - do - { - _runSize++; - _processingLastCharacter = i + 1 >= string.size(); - // If we're processing characters individually, send it to the state machine. - ProcessCharacter(til::at(string, i)); - ++i; - } while (i < string.size() && _state != VTStates::Ground); - } - - // If we're at the end of the string and have remaining un-printed characters, - if (_state != VTStates::Ground) - { - const auto run = _CurrentRun(); - auto cacheUnusedRun = true; - - // One of the "weird things" in VT input is the case of something like - // alt+[. In VT, that's encoded as `\x1b[`. However, that's - // also the start of a CSI, and could be the start of a longer sequence, - // there's no way to know for sure. For an alt+[ keypress, - // the parser originally would just sit in the `CsiEntry` state after - // processing it, which would pollute the following keypress (e.g. - // alt+[, A would be processed like `\x1b[A`, - // which is _wrong_). - // - // At the same time, input may be broken up arbitrarily, depending on the pipe's - // buffer size, our read-buffer size, the sender's write-buffer size, and more. - // In fact, with the current WSL, input is broken up in 16 byte chunks (Why? :(), - // which breaks up many of our longer sequences, like our Win32InputMode ones. - // - // As a heuristic, this code specifically checks for a trailing Esc or Alt+key. - // If we encountered a win32-input-mode sequence before, we know that our \x1b[?9001h - // request to enable them was successful. While a client may still send \x1b{some char} - // intentionally, it's far more likely now that we're looking at a broken up sequence. - // The most common win32-input-mode is ConPTY itself after all, and we never emit - // \x1b{some char} once it's enabled. - if (_isEngineForInput) + for (;;) { - const auto win32 = _engine->EncounteredWin32InputModeSequence(); - if (!win32 && run.size() <= 2 && run.front() == L'\x1b') + // The inner loop deals with 1 VT sequence at a time. + for (;;) { - _EnterGround(); - if (run.size() == 1) + const auto ch = til::at(_currentString, _runEnd); + ++_runEnd; + ProcessCharacter(ch); + + if (_state == VTStates::Ground) { - _ActionExecute(L'\x1b'); + break; } - else + + // We're not back in ground state, but ran out of text. + // --> Incomplete sequence. Clean up at the end. + if (_runEnd >= _currentString.size()) { - _EnterEscape(); - _ActionEscDispatch(run.back()); + _processStringIncompleteSequence(); + return; } - _EnterGround(); - // No need to cache the run, since we've dealt with it now. - cacheUnusedRun = false; + } + + // The only way we get here is if the above loop ran into the ground state. + // This means we're starting a new "chunk". + // --> Reset beg(in). + _runBeg = _runEnd; + + // Ran out of text. Return. + if (_runBeg >= _currentString.size()) + { + return; + } + + // Plain text? Go back to the start, where we process plain text. + if (!Utils::IsActionableFromGround(til::at(_currentString, _runEnd))) + { + break; } } - else if (_state == VTStates::SosPmApcString || _state == VTStates::DcsPassThrough || _state == VTStates::DcsIgnore) - { - // There is no need to cache the run if we've reached one of the - // string processing states in the output engine, since that data - // will be dealt with as soon as it is received. - cacheUnusedRun = false; - } + } +} - // If the run hasn't been dealt with in one of the cases above, we cache - // the partial sequence in case we have to flush the whole thing later. - if (cacheUnusedRun) +void Microsoft::Console::VirtualTerminal::StateMachine::_processStringIncompleteSequence() +{ + const auto run = _CurrentRun(); + auto cacheUnusedRun = true; + + // One of the "weird things" in VT input is the case of something like + // alt+[. In VT, that's encoded as `\x1b[`. However, that's + // also the start of a CSI, and could be the start of a longer sequence, + // there's no way to know for sure. For an alt+[ keypress, + // the parser originally would just sit in the `CsiEntry` state after + // processing it, which would pollute the following keypress (e.g. + // alt+[, A would be processed like `\x1b[A`, + // which is _wrong_). + // + // At the same time, input may be broken up arbitrarily, depending on the pipe's + // buffer size, our read-buffer size, the sender's write-buffer size, and more. + // In fact, with the current WSL, input is broken up in 16 byte chunks (Why? :(), + // which breaks up many of our longer sequences, like our Win32InputMode ones. + // + // As a heuristic, this code specifically checks for a trailing Esc or Alt+key. + // If we encountered a win32-input-mode sequence before, we know that our \x1b[?9001h + // request to enable them was successful. While a client may still send \x1b{some char} + // intentionally, it's far more likely now that we're looking at a broken up sequence. + // The most common win32-input-mode is ConPTY itself after all, and we never emit + // \x1b{some char} once it's enabled. + if (_isEngineForInput) + { + const auto win32 = _engine->EncounteredWin32InputModeSequence(); + if (!win32 && run.size() <= 2 && run.front() == L'\x1b') { - if (!_cachedSequence) + _EnterGround(); + if (run.size() == 1) { - _cachedSequence.emplace(std::wstring{}); + _ActionExecute(L'\x1b'); } + else + { + _EnterEscape(); + _ActionEscDispatch(run.back()); + } + _EnterGround(); + // No need to cache the run, since we've dealt with it now. + cacheUnusedRun = false; + } + } + else if (_state == VTStates::SosPmApcString || _state == VTStates::DcsPassThrough || _state == VTStates::DcsIgnore) + { + // There is no need to cache the run if we've reached one of the + // string processing states in the output engine, since that data + // will be dealt with as soon as it is received. + cacheUnusedRun = false; + } - auto& cachedSequence = *_cachedSequence; - cachedSequence.append(run); + // If the run hasn't been dealt with in one of the cases above, we cache + // the partial sequence in case we have to flush the whole thing later. + if (cacheUnusedRun) + { + if (!_cachedSequence) + { + _cachedSequence.emplace(std::wstring{}); } + + auto& cachedSequence = *_cachedSequence; + cachedSequence.append(run); } } @@ -2105,12 +2202,12 @@ void StateMachine::ProcessString(const std::wstring_view string) // - True if we're processing the last character. False if not. bool StateMachine::IsProcessingLastCharacter() const noexcept { - return _processingLastCharacter; + return _runEnd == _currentString.size(); } void StateMachine::InjectSequence(const InjectionType type) { - _injections.emplace_back(type, _runOffset + _runSize); + _injections.emplace_back(type, _runEnd); } const til::small_vector& StateMachine::GetInjections() const noexcept @@ -2191,8 +2288,8 @@ void StateMachine::_ExecuteCsiCompleteCallback() // We need to save the state of the string that we're currently // processing in case the callback injects another string. const auto savedCurrentString = _currentString; - const auto savedRunOffset = _runOffset; - const auto savedRunSize = _runSize; + const auto savedRunBeg = _runBeg; + const auto savedRunEnd = _runEnd; // We also need to take ownership of the callback function before // executing it so there's no risk of it being run more than once. const auto callback = std::move(_onCsiCompleteCallback); @@ -2200,7 +2297,16 @@ void StateMachine::_ExecuteCsiCompleteCallback() // Once the callback has returned, we can restore the original state // and continue where we left off. _currentString = savedCurrentString; - _runOffset = savedRunOffset; - _runSize = savedRunSize; + _runBeg = savedRunBeg; + _runEnd = savedRunEnd; } } + +// Construct current run. +// +// Note: We intentionally use this method to create the run lazily for better performance. +// You may find the usage of offset & size unsafe, but under heavy load it shows noticeable performance benefit. +std::wstring_view Microsoft::Console::VirtualTerminal::StateMachine::_CurrentRun() const +{ + return _currentString.substr(_runBeg, _runEnd - _runBeg); +} diff --git a/src/terminal/parser/stateMachine.hpp b/src/terminal/parser/stateMachine.hpp index 42756abd15d..6b9d06692d8 100644 --- a/src/terminal/parser/stateMachine.hpp +++ b/src/terminal/parser/stateMachine.hpp @@ -94,6 +94,8 @@ namespace Microsoft::Console::VirtualTerminal IStateMachineEngine& Engine() noexcept; private: + void _processStringIncompleteSequence(); + void _ActionExecute(const wchar_t wch); void _ActionExecuteFromEscape(const wchar_t wch); void _ActionPrint(const wchar_t wch); @@ -162,6 +164,7 @@ namespace Microsoft::Console::VirtualTerminal bool _SafeExecute(TLambda&& lambda); void _ExecuteCsiCompleteCallback(); + std::wstring_view _CurrentRun() const; enum class VTStates { @@ -197,17 +200,8 @@ namespace Microsoft::Console::VirtualTerminal til::enumset _parserMode{ Mode::Ansi }; std::wstring_view _currentString; - size_t _runOffset; - size_t _runSize; - - // Construct current run. - // - // Note: We intentionally use this method to create the run lazily for better performance. - // You may find the usage of offset & size unsafe, but under heavy load it shows noticeable performance benefit. - std::wstring_view _CurrentRun() const - { - return _currentString.substr(_runOffset, _runSize); - } + size_t _runBeg; + size_t _runEnd; VTIDBuilder _identifier; std::vector _parameters; @@ -225,10 +219,6 @@ namespace Microsoft::Console::VirtualTerminal std::optional _cachedSequence; til::small_vector _injections; - // This is tracked per state machine instance so that separate calls to Process* - // can start and finish a sequence. - bool _processingLastCharacter; - std::function _onCsiCompleteCallback; }; } diff --git a/src/types/inc/utils.hpp b/src/types/inc/utils.hpp index b06f1712064..ce204a74b6a 100644 --- a/src/types/inc/utils.hpp +++ b/src/types/inc/utils.hpp @@ -126,6 +126,7 @@ namespace Microsoft::Console::Utils // testing easier. std::wstring_view TrimPaste(std::wstring_view textView) noexcept; + bool IsActionableFromGround(const wchar_t wch) noexcept; const wchar_t* FindActionableControlCharacter(const wchar_t* beg, const size_t len) noexcept; // Same deal, but in TerminalPage::_evaluatePathForCwd diff --git a/src/types/utils.cpp b/src/types/utils.cpp index 1044b17abdf..b711cf1d506 100644 --- a/src/types/utils.cpp +++ b/src/types/utils.cpp @@ -1139,7 +1139,8 @@ std::wstring_view Utils::TrimPaste(std::wstring_view textView) noexcept #pragma warning(disable : 26490) // Don't use reinterpret_cast (type.1). // Returns true for C0 characters and C1 [single-character] CSI. -constexpr bool isActionableFromGround(const wchar_t wch) noexcept +#pragma warning(suppress : 26497) // You can attempt to make 'Microsoft::Console::Utils::IsActionableFromGround' constexpr unless it contains any undefined behavior(f.4). +bool Utils::IsActionableFromGround(const wchar_t wch) noexcept { // This is equivalent to: // return (wch <= 0x1f) || (wch >= 0x7f && wch <= 0x9f); @@ -1230,7 +1231,7 @@ const wchar_t* Utils::FindActionableControlCharacter(const wchar_t* beg, const s #endif #pragma loop(no_vector) - for (const auto end = beg + len; it < end && !isActionableFromGround(*it); ++it) + for (const auto end = beg + len; it < end && !IsActionableFromGround(*it); ++it) { } From e51769b072ee3fa99e9ff890a30c1f72f3f5b6dc Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 11 Dec 2025 15:00:07 +0100 Subject: [PATCH 2/2] Fix parsing, tests --- src/terminal/adapter/SixelParser.cpp | 2 +- src/terminal/adapter/ut_adapter/adapterTest.cpp | 14 ++++---------- src/terminal/parser/stateMachine.cpp | 12 ++++++------ 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/terminal/adapter/SixelParser.cpp b/src/terminal/adapter/SixelParser.cpp index 762a7f8dc70..86840c0f37c 100644 --- a/src/terminal/adapter/SixelParser.cpp +++ b/src/terminal/adapter/SixelParser.cpp @@ -92,7 +92,7 @@ std::function SixelParser::DefineImage(const VTInt macr return [&](const std::wstring_view str) { for (const auto ch : str) { - _parseCommandChar(ch); + _parseCommandChar(ch); } return true; }; diff --git a/src/terminal/adapter/ut_adapter/adapterTest.cpp b/src/terminal/adapter/ut_adapter/adapterTest.cpp index 1f42da08f66..21eafeb9b8d 100644 --- a/src/terminal/adapter/ut_adapter/adapterTest.cpp +++ b/src/terminal/adapter/ut_adapter/adapterTest.cpp @@ -1847,11 +1847,8 @@ class AdapterTest { const auto requestSetting = [=](const std::wstring_view settingId = {}) { const auto stringHandler = _pDispatch->RequestSetting(); - for (auto ch : settingId) - { - stringHandler(ch); - } - stringHandler(L'\033'); // String terminator + stringHandler(settingId); + stringHandler(L"\033"); // String terminator }; Log::Comment(L"Requesting DECSTBM margins (5 to 10)."); @@ -3562,11 +3559,8 @@ class AdapterTest { const auto assignCharset = [=](const auto charsetSize, const std::wstring_view charsetId = {}) { const auto stringHandler = _pDispatch->AssignUserPreferenceCharset(charsetSize); - for (auto ch : charsetId) - { - stringHandler(ch); - } - stringHandler(L'\033'); // String terminator + stringHandler(charsetId); + stringHandler(L"\033"); // String terminator }; auto& termOutput = _pDispatch->_termOutput; termOutput.SoftReset(); diff --git a/src/terminal/parser/stateMachine.cpp b/src/terminal/parser/stateMachine.cpp index 350536461de..4126c5fb3e5 100644 --- a/src/terminal/parser/stateMachine.cpp +++ b/src/terminal/parser/stateMachine.cpp @@ -1804,17 +1804,17 @@ void StateMachine::_EventDcsPassThrough(const wchar_t) _trace.TraceOnEvent(L"DcsPassThrough"); assert(_runEnd != 0); - - auto beg = _runEnd - 1; - auto end = beg; + auto end = _runEnd - 1; for (;;) { + const auto beg = end; + // Find the end of a run of valid DCS characters. for (;;) { const auto ch = _currentString[end]; - if (_isEscape(ch) || (!_isC0Code(ch) && !_isDcsPassThroughValid(ch))) + if (_isEscape(ch) || _isC0Code(ch) || !_isDcsPassThroughValid(ch)) { break; } @@ -1827,7 +1827,7 @@ void StateMachine::_EventDcsPassThrough(const wchar_t) } // If we found a run, pass it to the handler. - if (beg != end && !_dcsStringHandler(_currentString.substr(beg, end))) + if (beg != end && !_dcsStringHandler(_currentString.substr(beg, end - beg))) { _EnterDcsIgnore(); break; @@ -1851,7 +1851,7 @@ void StateMachine::_EventDcsPassThrough(const wchar_t) for (;;) { const auto ch = _currentString[end]; - if (!_isEscape(ch) && (_isC0Code(ch) || _isDcsPassThroughValid(ch))) + if (!_isEscape(ch) && !_isC0Code(ch) && _isDcsPassThroughValid(ch)) { break; }