Skip to content

Commit 889b77e

Browse files
committed
feat: add more conditional parse options
1 parent 1d74daf commit 889b77e

File tree

4 files changed

+124
-8
lines changed

4 files changed

+124
-8
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ let json_value = parse_to_value(text, &ParseOptions {
9090
allow_comments: false,
9191
allow_loose_object_property_names: false,
9292
allow_trailing_commas: false,
93+
allow_single_quoted_strings: false,
94+
allow_hexadecimal_numbers: false,
95+
allow_unary_plus_numbers: false,
9396
})?;
9497
```
9598

src/errors.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@ pub enum ParseErrorKind {
1313
ExpectedDigitFollowingNegativeSign,
1414
ExpectedPlusMinusOrDigitInNumberLiteral,
1515
ExpectedStringObjectProperty,
16+
HexadecimalNumbersNotAllowed,
1617
MultipleRootJsonValues,
18+
SingleQuotedStringsNotAllowed,
1719
String(ParseStringErrorKind),
1820
TrailingCommasNotAllowed,
21+
UnaryPlusNumbersNotAllowed,
1922
UnexpectedCloseBrace,
2023
UnexpectedCloseBracket,
2124
UnexpectedColon,
@@ -53,13 +56,22 @@ impl std::fmt::Display for ParseErrorKind {
5356
ExpectedStringObjectProperty => {
5457
write!(f, "Expected string for object property")
5558
}
59+
HexadecimalNumbersNotAllowed => {
60+
write!(f, "Hexadecimal numbers are not allowed")
61+
}
5662
MultipleRootJsonValues => {
5763
write!(f, "Text cannot contain more than one JSON value")
5864
}
65+
SingleQuotedStringsNotAllowed => {
66+
write!(f, "Single-quoted strings are not allowed")
67+
}
5968
String(kind) => kind.fmt(f),
6069
TrailingCommasNotAllowed => {
6170
write!(f, "Trailing commas are not allowed")
6271
}
72+
UnaryPlusNumbersNotAllowed => {
73+
write!(f, "Unary plus on numbers is not allowed")
74+
}
6375
UnexpectedCloseBrace => {
6476
write!(f, "Unexpected close brace")
6577
}

src/parse_to_ast.rs

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use super::ast::*;
66
use super::common::Range;
77
use super::errors::*;
88
use super::scanner::Scanner;
9+
use super::scanner::ScannerOptions;
910
use super::tokens::Token;
1011
use super::tokens::TokenAndRange;
1112

@@ -14,11 +15,25 @@ use super::tokens::TokenAndRange;
1415
/// next token start or end of the file.
1516
pub type CommentMap<'a> = HashMap<usize, Rc<Vec<Comment<'a>>>>;
1617

18+
/// Strategy for handling comments during parsing.
19+
///
20+
/// This enum determines how comments in the JSON/JSONC input are collected
21+
/// and represented in the resulting abstract syntax tree (AST).
1722
#[derive(Default, Debug, PartialEq, Clone)]
1823
pub enum CommentCollectionStrategy {
24+
/// Comments are not collected and are effectively ignored during parsing.
1925
#[default]
2026
Off,
27+
/// Comments are collected and stored separately from the main AST structure.
28+
///
29+
/// When this strategy is used, comments are placed in a [`CommentMap`] where
30+
/// the key is the previous token end or start of file, or the next token start
31+
/// or end of file.
2132
Separate,
33+
/// Comments are collected and treated as tokens within the AST.
34+
///
35+
/// When this strategy is used, comments appear alongside other tokens in the
36+
/// token stream when `tokens: true` is set in [`CollectOptions`].
2237
AsTokens,
2338
}
2439

@@ -40,6 +55,12 @@ pub struct ParseOptions {
4055
pub allow_loose_object_property_names: bool,
4156
/// Allow trailing commas on object literal and array literal values (defaults to `true`).
4257
pub allow_trailing_commas: bool,
58+
/// Allow single-quoted strings (defaults to `true`).
59+
pub allow_single_quoted_strings: bool,
60+
/// Allow hexadecimal numbers like 0xFF (defaults to `true`).
61+
pub allow_hexadecimal_numbers: bool,
62+
/// Allow unary plus sign on numbers like +42 (defaults to `true`).
63+
pub allow_unary_plus_numbers: bool,
4364
}
4465

4566
impl Default for ParseOptions {
@@ -48,6 +69,9 @@ impl Default for ParseOptions {
4869
allow_comments: true,
4970
allow_loose_object_property_names: true,
5071
allow_trailing_commas: true,
72+
allow_single_quoted_strings: true,
73+
allow_hexadecimal_numbers: true,
74+
allow_unary_plus_numbers: true,
5175
}
5276
}
5377
}
@@ -218,7 +242,14 @@ pub fn parse_to_ast<'a>(
218242
parse_options: &ParseOptions,
219243
) -> Result<ParseResult<'a>, ParseError> {
220244
let mut context = Context {
221-
scanner: Scanner::new(text),
245+
scanner: Scanner::new(
246+
text,
247+
&ScannerOptions {
248+
allow_single_quoted_strings: parse_options.allow_single_quoted_strings,
249+
allow_hexadecimal_numbers: parse_options.allow_hexadecimal_numbers,
250+
allow_unary_plus_numbers: parse_options.allow_unary_plus_numbers,
251+
},
252+
),
222253
comments: match collect_options.comments {
223254
CommentCollectionStrategy::Separate => Some(Default::default()),
224255
CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None,
@@ -503,6 +534,30 @@ mod tests {
503534
);
504535
}
505536

537+
#[test]
538+
fn strict_should_error_single_quoted_string() {
539+
assert_has_strict_error(
540+
r#"{ "key": 'value' }"#,
541+
"Single-quoted strings are not allowed on line 1 column 10",
542+
);
543+
}
544+
545+
#[test]
546+
fn strict_should_error_hexadecimal_number() {
547+
assert_has_strict_error(
548+
r#"{ "key": 0xFF }"#,
549+
"Hexadecimal numbers are not allowed on line 1 column 10",
550+
);
551+
}
552+
553+
#[test]
554+
fn strict_should_error_unary_plus_number() {
555+
assert_has_strict_error(
556+
r#"{ "key": +42 }"#,
557+
"Unary plus on numbers is not allowed on line 1 column 10",
558+
);
559+
}
560+
506561
#[track_caller]
507562
fn assert_has_strict_error(text: &str, message: &str) {
508563
let result = parse_to_ast(
@@ -512,6 +567,9 @@ mod tests {
512567
allow_comments: false,
513568
allow_loose_object_property_names: false,
514569
allow_trailing_commas: false,
570+
allow_single_quoted_strings: false,
571+
allow_hexadecimal_numbers: false,
572+
allow_unary_plus_numbers: false,
515573
},
516574
);
517575
match result {

src/scanner.rs

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,37 @@ pub struct Scanner<'a> {
1414
char_buffer: Vec<char>,
1515
current_token: Option<Token<'a>>,
1616
file_text: &'a str,
17+
allow_single_quoted_strings: bool,
18+
allow_hexadecimal_numbers: bool,
19+
allow_unary_plus_numbers: bool,
1720
}
1821

1922
const CHAR_BUFFER_MAX_SIZE: usize = 6;
2023

24+
/// Options for the scanner.
25+
#[derive(Debug)]
26+
pub struct ScannerOptions {
27+
/// Allow single-quoted strings (defaults to `true`).
28+
pub allow_single_quoted_strings: bool,
29+
/// Allow hexadecimal numbers like 0xFF (defaults to `true`).
30+
pub allow_hexadecimal_numbers: bool,
31+
/// Allow unary plus sign on numbers like +42 (defaults to `true`).
32+
pub allow_unary_plus_numbers: bool,
33+
}
34+
35+
impl Default for ScannerOptions {
36+
fn default() -> Self {
37+
Self {
38+
allow_single_quoted_strings: true,
39+
allow_hexadecimal_numbers: true,
40+
allow_unary_plus_numbers: true,
41+
}
42+
}
43+
}
44+
2145
impl<'a> Scanner<'a> {
22-
/// Creates a new scanner based on the provided text.
23-
pub fn new(file_text: &'a str) -> Scanner<'a> {
46+
/// Creates a new scanner with specific options.
47+
pub fn new(file_text: &'a str, options: &ScannerOptions) -> Scanner<'a> {
2448
let mut char_iter = file_text.chars();
2549
let mut char_buffer = Vec::with_capacity(CHAR_BUFFER_MAX_SIZE);
2650
let current_char = char_iter.next();
@@ -35,6 +59,9 @@ impl<'a> Scanner<'a> {
3559
char_buffer,
3660
current_token: None,
3761
file_text,
62+
allow_single_quoted_strings: options.allow_single_quoted_strings,
63+
allow_hexadecimal_numbers: options.allow_hexadecimal_numbers,
64+
allow_unary_plus_numbers: options.allow_unary_plus_numbers,
3865
}
3966
}
4067

@@ -72,7 +99,14 @@ impl<'a> Scanner<'a> {
7299
self.move_next_char();
73100
Ok(Token::Colon)
74101
}
75-
'\'' | '"' => self.parse_string(),
102+
'\'' => {
103+
if self.allow_single_quoted_strings {
104+
self.parse_string()
105+
} else {
106+
Err(self.create_error_for_current_token(ParseErrorKind::SingleQuotedStringsNotAllowed))
107+
}
108+
}
109+
'"' => self.parse_string(),
76110
'/' => match self.peek_char() {
77111
Some('/') => Ok(self.parse_comment_line()),
78112
Some('*') => self.parse_comment_block(),
@@ -154,8 +188,13 @@ impl<'a> Scanner<'a> {
154188
fn parse_number(&mut self) -> Result<Token<'a>, ParseError> {
155189
let start_byte_index = self.byte_index;
156190

157-
// handle unary plus or minus
158-
if self.is_negative_sign() || self.is_positive_sign() {
191+
// handle unary plus and unary minus
192+
if self.is_positive_sign() {
193+
if !self.allow_unary_plus_numbers {
194+
return Err(self.create_error_for_current_token(ParseErrorKind::UnaryPlusNumbersNotAllowed));
195+
}
196+
self.move_next_char();
197+
} else if self.is_negative_sign() {
159198
self.move_next_char();
160199
}
161200

@@ -164,6 +203,10 @@ impl<'a> Scanner<'a> {
164203

165204
// check for hexadecimal literal (0x or 0X)
166205
if matches!(self.current_char(), Some('x') | Some('X')) {
206+
if !self.allow_hexadecimal_numbers {
207+
return Err(self.create_error_for_current_token(ParseErrorKind::HexadecimalNumbersNotAllowed));
208+
}
209+
167210
self.move_next_char();
168211

169212
// must have at least one hex digit
@@ -625,7 +668,7 @@ mod tests {
625668
}
626669

627670
fn assert_has_tokens(text: &str, tokens: Vec<Token>) {
628-
let mut scanner = Scanner::new(text);
671+
let mut scanner = Scanner::new(text, &Default::default());
629672
let mut scanned_tokens = Vec::new();
630673

631674
loop {
@@ -640,7 +683,7 @@ mod tests {
640683
}
641684

642685
fn assert_has_error(text: &str, message: &str) {
643-
let mut scanner = Scanner::new(text);
686+
let mut scanner = Scanner::new(text, &Default::default());
644687
let mut error_message = String::new();
645688

646689
loop {

0 commit comments

Comments
 (0)