Skip to content

Commit 48e17ab

Browse files
authored
Merge pull request #2749 from guwirth/cpp23_line_splicing
C++23: Trimming whitespaces before line splicing
2 parents 94b0a7f + 9e98b51 commit 48e17ab

File tree

17 files changed

+470
-100
lines changed

17 files changed

+470
-100
lines changed

cxx-squid/src/main/java/org/sonar/cxx/channels/BackslashChannel.java

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,23 @@
2525

2626
public class BackslashChannel extends Channel<Lexer> {
2727

28-
private static boolean isNewLine(char ch) {
29-
return (ch == '\n') || (ch == '\r');
30-
}
28+
private final StringBuilder sb = new StringBuilder(256);
3129

3230
@Override
3331
public boolean consume(CodeReader code, Lexer output) {
34-
var ch = (char) code.peek();
35-
36-
if ((ch == '\\') && isNewLine(code.charAt(1))) {
37-
// just throw away the backslash
38-
code.pop();
39-
return true;
32+
if (code.charAt(0) != '\\') {
33+
return false;
4034
}
4135

42-
return false;
36+
var lineSplicing = read(code, sb);
37+
sb.delete(0, sb.length());
38+
return lineSplicing != 0;
39+
}
40+
41+
public static int read(CodeReader code, StringBuilder sb) {
42+
var end = ChannelUtils.handleLineSplicing(code, 0);
43+
code.skip(end); // remove line splicing
44+
return end;
4345
}
4446

4547
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* C++ Community Plugin (cxx plugin)
3+
* Copyright (C) 2010-2023 SonarOpenCommunity
4+
* http://github.com/SonarOpenCommunity/sonar-cxx
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.cxx.channels;
21+
22+
import org.sonar.cxx.sslr.channel.CodeReader;
23+
24+
public class ChannelUtils {
25+
26+
public static final char LF = '\n';
27+
public static final char CR = '\r';
28+
public static final char EOF = (char) -1;
29+
30+
private ChannelUtils() {
31+
// empty
32+
}
33+
34+
public static boolean isNewLine(char ch) {
35+
return (ch == LF) || (ch == CR);
36+
}
37+
38+
public static boolean isWhitespace(char ch) {
39+
return (ch == ' ') || (ch == '\t');
40+
}
41+
42+
public static boolean isSuffix(char c) {
43+
return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_');
44+
}
45+
46+
/**
47+
* Handle line splicing.
48+
* - lines terminated by a \ are spliced together with the next line
49+
* - P2178R0 making trailing whitespaces non-significant
50+
*
51+
* line endings:
52+
* - Linux/Unix, Mac from OS X a.k.a macOS: LF
53+
* - Windows/DOS: CR LF
54+
* - Classic Mac OS: CR
55+
*
56+
* @return numbers of sign to remove to splice the lines
57+
*/
58+
public static int handleLineSplicing(CodeReader code, int start) {
59+
int next = start;
60+
if (code.charAt(next) != '\\') {
61+
return 0;
62+
}
63+
64+
boolean newline = false;
65+
next++;
66+
while (true) {
67+
var charAt = code.charAt(next);
68+
if (charAt == LF) {
69+
newline = true;
70+
break;
71+
}
72+
if (charAt == CR) {
73+
if (code.charAt(next + 1) == LF) {
74+
next++;
75+
}
76+
newline = true;
77+
break;
78+
}
79+
if (!isWhitespace(charAt)) {
80+
break;
81+
}
82+
next++;
83+
}
84+
85+
return newline ? (next - start + 1) : 0;
86+
}
87+
88+
}

cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@
3030
*/
3131
public class CharacterLiteralsChannel extends Channel<Lexer> {
3232

33-
private static final char EOF = (char) -1;
34-
3533
private final StringBuilder sb = new StringBuilder(256);
3634

3735
private int index = 0;
@@ -67,7 +65,7 @@ public boolean consume(CodeReader code, Lexer output) {
6765
private boolean read(CodeReader code) {
6866
index++;
6967
while (code.charAt(index) != ch) {
70-
if (code.charAt(index) == EOF) {
68+
if (code.charAt(index) == ChannelUtils.EOF) {
7169
return false;
7270
}
7371
if (code.charAt(index) == '\\') {
@@ -95,10 +93,10 @@ private void readUdSuffix(CodeReader code) {
9593
int len = 0;
9694
for (int start_index = index;; index++) {
9795
var charAt = code.charAt(index);
98-
if (charAt == EOF) {
96+
if (charAt == ChannelUtils.EOF) {
9997
return;
10098
}
101-
if (isSuffix(charAt)) {
99+
if (ChannelUtils.isSuffix(charAt)) {
102100
len++;
103101
} else if (Character.isDigit(charAt)) {
104102
if (len > 0) {
@@ -113,8 +111,4 @@ private void readUdSuffix(CodeReader code) {
113111
}
114112
}
115113

116-
private static boolean isSuffix(char c) {
117-
return Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_');
118-
}
119-
120114
}

cxx-squid/src/main/java/org/sonar/cxx/channels/KeywordChannel.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ public class KeywordChannel extends Channel<Lexer> {
3838
private final Matcher matcher;
3939
private final Token.Builder tokenBuilder = Token.builder();
4040

41-
public KeywordChannel(String regexp, TokenType[]
42-
... keywordSets) {
41+
public KeywordChannel(String regexp, TokenType[]... keywordSets) {
4342
for (var keywords : keywordSets) {
4443
for (var keyword : keywords) {
4544
keywordsMap.put(keyword.getValue(), keyword);
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* C++ Community Plugin (cxx plugin)
3+
* Copyright (C) 2010-2023 SonarOpenCommunity
4+
* http://github.com/SonarOpenCommunity/sonar-cxx
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.cxx.channels;
21+
22+
import static com.sonar.cxx.sslr.api.GenericTokenType.COMMENT;
23+
import com.sonar.cxx.sslr.api.Token;
24+
import com.sonar.cxx.sslr.api.Trivia;
25+
import com.sonar.cxx.sslr.impl.Lexer;
26+
import org.sonar.cxx.sslr.channel.Channel;
27+
import org.sonar.cxx.sslr.channel.CodeReader;
28+
29+
public class MultiLineCommentChannel extends Channel<Lexer> {
30+
31+
private final StringBuilder sb = new StringBuilder(256);
32+
private final Token.Builder tokenBuilder = Token.builder();
33+
34+
@Override
35+
public boolean consume(CodeReader code, Lexer lexer) {
36+
// start of multi line comment?
37+
int next = isComment(code);
38+
if (next == 0) {
39+
return false;
40+
}
41+
42+
int line = code.getLinePosition();
43+
int column = code.getColumnPosition();
44+
45+
code.skip(next);
46+
sb.append('/');
47+
sb.append('*');
48+
49+
read(code, sb); // search end of multi line comment
50+
51+
var value = sb.toString();
52+
var token = tokenBuilder
53+
.setType(COMMENT)
54+
.setValueAndOriginalValue(value)
55+
.setURI(lexer.getURI())
56+
.setLine(line)
57+
.setColumn(column)
58+
.build();
59+
60+
lexer.addTrivia(Trivia.createComment(token));
61+
sb.delete(0, sb.length());
62+
return true;
63+
}
64+
65+
public static int isComment(CodeReader code) {
66+
int next = 0;
67+
68+
// start of multi line comment?
69+
if (code.charAt(next) != '/') {
70+
return 0;
71+
}
72+
next += 1;
73+
next += ChannelUtils.handleLineSplicing(code, next);
74+
75+
if (code.charAt(next) != '*') {
76+
return 0;
77+
}
78+
next += 1;
79+
return next;
80+
}
81+
82+
public static boolean read(CodeReader code, StringBuilder sb) {
83+
boolean first = false;
84+
while (true) { // search end of multi line comment: */
85+
var end = ChannelUtils.handleLineSplicing(code, 0);
86+
code.skip(end); // remove line splicing
87+
88+
var charAt = (char) code.pop();
89+
switch (charAt) {
90+
case '*':
91+
first = true;
92+
break;
93+
case '/':
94+
if (first) {
95+
sb.append('/');
96+
return true;
97+
}
98+
break;
99+
case ChannelUtils.EOF:
100+
return false;
101+
default:
102+
first = false;
103+
break;
104+
}
105+
106+
sb.append(charAt);
107+
}
108+
109+
}
110+
111+
}

cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java

Lines changed: 32 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@
3434
//
3535
public class PreprocessorChannel extends Channel<Lexer> {
3636

37-
private static final char EOF = (char) -1;
3837
private final StringLiteralsChannel stringLiteralsChannel = new StringLiteralsChannel();
3938
private final StringBuilder sb = new StringBuilder(256);
39+
private final StringBuilder dummy = new StringBuilder(256);
4040
private final Matcher matcher;
4141

4242
public PreprocessorChannel(TokenType[]... keywordSets) {
@@ -84,67 +84,46 @@ public boolean consume(CodeReader code, Lexer output) {
8484

8585
private void read(CodeReader code) {
8686
while (true) {
87-
var ch = code.charAt(0);
88-
if (isNewline(ch) || ch == EOF) {
87+
var charAt = code.charAt(0);
88+
if (ChannelUtils.isNewLine(charAt) || charAt == ChannelUtils.EOF) {
8989
code.pop();
9090
break;
91-
} else if (stringLiteralsChannel.read(code, sb)) {
91+
} else if (stringLiteralsChannel.read(code, sb)) { // string literal
9292
continue;
9393
}
94-
ch = (char) code.pop();
95-
if (ch == '/' && code.charAt(0) == '/') {
96-
consumeSingleLineComment(code);
97-
} else if (ch == '/' && code.charAt(0) == '*') {
98-
consumeMultiLineComment(code);
99-
} else if (ch == '\\' && isNewline((char) code.peek())) {
100-
// the newline is escaped: we have a the multi line preprocessor directive
101-
// consume both the backslash and the newline, insert a space instead
102-
consumeNewline(code);
103-
sb.append(' ');
104-
} else {
105-
sb.append(ch);
106-
}
107-
}
108-
}
10994

110-
private static void consumeNewline(CodeReader code) {
111-
if ((code.charAt(0) == '\r') && (code.charAt(1) == '\n')) {
112-
// \r\n
113-
code.pop();
114-
code.pop();
115-
} else {
116-
// \r or \n
117-
code.pop();
118-
}
119-
}
120-
121-
private static void consumeSingleLineComment(CodeReader code) {
122-
code.pop(); // initial '/'
123-
while (true) {
124-
var charAt = code.charAt(0);
125-
if (isNewline(charAt) || charAt == EOF) {
126-
break;
95+
var len = 0;
96+
switch (charAt) {
97+
case '/': // comment?
98+
len = SingleLineCommentChannel.isComment(code);
99+
if (len != 0) {
100+
// single line comment
101+
code.skip(len);
102+
SingleLineCommentChannel.read(code, dummy);
103+
dummy.delete(0, dummy.length());
104+
} else {
105+
len = MultiLineCommentChannel.isComment(code);
106+
if (len != 0) {
107+
// multi line comment
108+
code.skip(len);
109+
MultiLineCommentChannel.read(code, dummy);
110+
dummy.delete(0, dummy.length());
111+
}
112+
}
113+
break;
114+
case '\\':
115+
len = BackslashChannel.read(code, dummy);
116+
if (len != 0) {
117+
// consume backslash and the newline
118+
dummy.delete(0, dummy.length());
119+
}
120+
break;
127121
}
128-
code.pop();
129-
}
130-
}
131122

132-
private static void consumeMultiLineComment(CodeReader code) {
133-
code.pop(); // initial '*'
134-
while (true) {
135-
var ch = (char) code.pop();
136-
if (ch == EOF) {
137-
return;
138-
}
139-
if (ch == '*' && code.charAt(0) == '/') {
140-
code.pop();
141-
return;
123+
if (len == 0) {
124+
sb.append((char) code.pop());
142125
}
143126
}
144127
}
145128

146-
private static boolean isNewline(char ch) {
147-
return (ch == '\n') || (ch == '\r');
148-
}
149-
150129
}

0 commit comments

Comments
 (0)