From 93c545280b8893b5e944ad548b2651b8243e6509 Mon Sep 17 00:00:00 2001 From: Helgard Ferreira Date: Mon, 4 Mar 2024 08:03:04 +0200 Subject: [PATCH 1/4] fix: tree sitter parsing failures for certain TypeScript expressions --- src/language/SourceAnalyzer.ts | 12 +++++++-- src/language/helpers.ts | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/language/SourceAnalyzer.ts b/src/language/SourceAnalyzer.ts index ce015de1..2aa22a0e 100644 --- a/src/language/SourceAnalyzer.ts +++ b/src/language/SourceAnalyzer.ts @@ -1,7 +1,12 @@ import { RegExps, StringOrRegExp } from '@cucumber/cucumber-expressions' import { LocationLink } from 'vscode-languageserver-types' -import { createLocationLink, makeParameterType, syntaxNode } from './helpers.js' +import { + createLocationLink, + makeParameterType, + stripBlacklistedExpressions, + syntaxNode, +} from './helpers.js' import { getLanguage } from './languages.js' import { Language, @@ -136,7 +141,10 @@ language: ${source.languageName} private parse(source: Source): TreeSitterTree { let tree: TreeSitterTree | undefined = this.treeByContent.get(source) if (!tree) { - this.treeByContent.set(source, (tree = this.parserAdapter.parser.parse(source.content))) + // This is currently necessary since the tree-sitter parser currently errors on certain expressions + const content = stripBlacklistedExpressions(source.content) + tree = this.parserAdapter.parser.parse(content) + this.treeByContent.set(source, tree) } return tree } diff --git a/src/language/helpers.ts b/src/language/helpers.ts index 69c12bf4..32bccc7f 100644 --- a/src/language/helpers.ts +++ b/src/language/helpers.ts @@ -70,3 +70,51 @@ export function filter( function flatten(node: TreeSitterSyntaxNode): TreeSitterSyntaxNode[] { return node.children.reduce((r, o) => [...r, ...flatten(o)], [node]) } + +/** + * + * This constant represents a list of regular expressions + * that should be stripped from the content of a file. + */ +export const BLACKLISTED_EXPRESSIONS: RegExp[] = [ + /* + * This regular expression matches sequences of decorators applied to a class, + * potentially including type parameters and arguments. + * The regex supports matching these patterns preceding + * an optionally exported class definition. + */ + /(@(\w+)(?:<[^>]+>)?\s*(?:\([^)]*\))?\s*)*(?=\s*(export)*\s+class)/g, +] + +/** + * + * Strips blacklisted expressions from the given content. + * + * @param content The content to strip blacklisted expressions from. + * + * @returns The content with blacklisted expressions stripped. + * + * @example + * + * ```typescript + * const content = + * "@decorator\n" ++ + * "export class Foo {\n" ++ + * "@decorator\n" ++ + * "public bar() { }\n" ++ + * "}" + * + * const strippedContent = stripBlacklistedExpressions(content) + * console.log(strippedContent) + * + * // Output: + * "export class Foo {\n" ++ + * "@decorator\n" ++ + * "public bar() { }\n" ++ + * "}" + * + * ``` + */ +export function stripBlacklistedExpressions(content: string): string { + return BLACKLISTED_EXPRESSIONS.reduce((acc, regExp) => acc.replace(regExp, ''), content) +} From 73875e22f43db0d76c3e32cc95bc3629006f1abe Mon Sep 17 00:00:00 2001 From: Helgard Ferreira Date: Mon, 4 Mar 2024 08:10:50 +0200 Subject: [PATCH 2/4] refactor: only apply language specific expression stripping --- src/language/SourceAnalyzer.ts | 2 +- src/language/helpers.ts | 47 ++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/language/SourceAnalyzer.ts b/src/language/SourceAnalyzer.ts index 2aa22a0e..076f5855 100644 --- a/src/language/SourceAnalyzer.ts +++ b/src/language/SourceAnalyzer.ts @@ -142,7 +142,7 @@ language: ${source.languageName} let tree: TreeSitterTree | undefined = this.treeByContent.get(source) if (!tree) { // This is currently necessary since the tree-sitter parser currently errors on certain expressions - const content = stripBlacklistedExpressions(source.content) + const content = stripBlacklistedExpressions(source.content, source.languageName) tree = this.parserAdapter.parser.parse(content) this.treeByContent.set(source, tree) } diff --git a/src/language/helpers.ts b/src/language/helpers.ts index 32bccc7f..4eda6f06 100644 --- a/src/language/helpers.ts +++ b/src/language/helpers.ts @@ -1,7 +1,13 @@ import { ParameterType, RegExps } from '@cucumber/cucumber-expressions' import { DocumentUri, LocationLink, Range } from 'vscode-languageserver-types' -import { Link, NodePredicate, TreeSitterQueryMatch, TreeSitterSyntaxNode } from './types' +import { + LanguageName, + Link, + NodePredicate, + TreeSitterQueryMatch, + TreeSitterSyntaxNode, +} from './types' export function syntaxNode(match: TreeSitterQueryMatch, name: string): TreeSitterSyntaxNode | null { const nodes = syntaxNodes(match, name) @@ -76,21 +82,33 @@ function flatten(node: TreeSitterSyntaxNode): TreeSitterSyntaxNode[] { * This constant represents a list of regular expressions * that should be stripped from the content of a file. */ -export const BLACKLISTED_EXPRESSIONS: RegExp[] = [ - /* - * This regular expression matches sequences of decorators applied to a class, - * potentially including type parameters and arguments. - * The regex supports matching these patterns preceding - * an optionally exported class definition. - */ - /(@(\w+)(?:<[^>]+>)?\s*(?:\([^)]*\))?\s*)*(?=\s*(export)*\s+class)/g, -] +export const BLACKLISTED_EXPRESSIONS: { + [key in LanguageName]: RegExp[] +} = { + tsx: [ + /* + * This regular expression matches sequences of decorators applied to a class, + * potentially including type parameters and arguments. + * The regex supports matching these patterns preceding + * an optionally exported class definition. + */ + /(@(\w+)(?:<[^>]+>)?\s*(?:\([^)]*\))?\s*)*(?=\s*(export)*\s+class)/g, + ], + java: [], + c_sharp: [], + php: [], + python: [], + ruby: [], + rust: [], + javascript: [], +} /** * * Strips blacklisted expressions from the given content. * * @param content The content to strip blacklisted expressions from. + * @param languageName The name of the language to use for stripping. * * @returns The content with blacklisted expressions stripped. * @@ -104,7 +122,7 @@ export const BLACKLISTED_EXPRESSIONS: RegExp[] = [ * "public bar() { }\n" ++ * "}" * - * const strippedContent = stripBlacklistedExpressions(content) + * const strippedContent = stripBlacklistedExpressions(content, 'tsx') * console.log(strippedContent) * * // Output: @@ -115,6 +133,9 @@ export const BLACKLISTED_EXPRESSIONS: RegExp[] = [ * * ``` */ -export function stripBlacklistedExpressions(content: string): string { - return BLACKLISTED_EXPRESSIONS.reduce((acc, regExp) => acc.replace(regExp, ''), content) +export function stripBlacklistedExpressions(content: string, languageName: LanguageName): string { + return BLACKLISTED_EXPRESSIONS[languageName].reduce( + (acc, regExp) => acc.replace(regExp, ''), + content + ) } From a6fa052b1e95e93dbc465cdf7c5c9bae7d9933f4 Mon Sep 17 00:00:00 2001 From: Helgard Ferreira Date: Mon, 4 Mar 2024 08:25:25 +0200 Subject: [PATCH 3/4] refactor: update blacklist JSDoc comment --- src/language/helpers.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/language/helpers.ts b/src/language/helpers.ts index 4eda6f06..6dadfca7 100644 --- a/src/language/helpers.ts +++ b/src/language/helpers.ts @@ -79,8 +79,8 @@ function flatten(node: TreeSitterSyntaxNode): TreeSitterSyntaxNode[] { /** * - * This constant represents a list of regular expressions - * that should be stripped from the content of a file. + * This constant represents a record of language names that contain lists + * of regular expressions that should be stripped from the content of a file. */ export const BLACKLISTED_EXPRESSIONS: { [key in LanguageName]: RegExp[] From 89065dd0c355b06cf4adeb9bf98fb07bc17e36bf Mon Sep 17 00:00:00 2001 From: Helgard Ferreira Date: Mon, 4 Mar 2024 08:25:49 +0200 Subject: [PATCH 4/4] chore: update change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 686c8555..0a615470 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - (Rust) Support for r# raw strings with step definition patterns ([#176](https://github.com/cucumber/language-service/pull/176)) - (Rust) Line continuation characters in rust step definition patterns ([#179](https://github.com/cucumber/language-service/pull/179)) - (Python) Unexpected spaces and commas in generated step definitions [#160](https://github.com/cucumber/language-service/issues/160) +- (TypeScript) Tree sitter parser failing on class decorators ([#186](https://github.com/cucumber/language-service/pull/186)) ### Added - (Python) Support for u-strings with step definition patterns ([#173](https://github.com/cucumber/language-service/pull/173))