Skip to content

Commit bcc4456

Browse files
committed
Add error recovery
Adds a new expression `error!{"message" e}` which reports an error and attempts to recover it with `e`. Also `error_if!{}` and `error_unless!{}`. Maintains and returns a list of errors which have been recovered. This enables the parser to report multiple errors, which is useful in contexts like IDEs and compilers where the user may find it easier to be presented with all errors rather than just the first.
1 parent 90e9b40 commit bcc4456

File tree

11 files changed

+1366
-197
lines changed

11 files changed

+1366
-197
lines changed

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Helpful `rustc` error messages for errors in the grammar definition or the Rust
1414
code embedded within it
1515
* Rule-level tracing to debug grammars
16+
* Error recovery
1617

1718
## Example
1819

@@ -39,12 +40,12 @@ pub fn main() {
3940

4041
## Comparison with similar parser generators
4142

42-
| crate | parser type | action code | integration | input type | precedence climbing | parameterized rules | streaming input |
43-
|----------- |------------- |------------- |-------------------- |------------------------ |--------------------- |-------------------- |----------------- |
44-
| peg | PEG | in grammar | proc macro (block) | `&str`, `&[T]`, custom | Yes | Yes | No |
45-
| [pest] | PEG | external | proc macro (file) | `&str` | Yes | No | No |
46-
| [nom] | combinators | in source | library | `&[u8]`, custom | No | Yes | Yes |
47-
| [lalrpop] | LR(1) | in grammar | build script | `&str` | No | Yes | No |
43+
| crate | parser type | action code | integration | input type | precedence climbing | parameterized rules | streaming input | recovery |
44+
|----------- |------------- |------------- |-------------------- |------------------------ |--------------------- |-------------------- |----------------- |--------- |
45+
| peg | PEG | in grammar | proc macro (block) | `&str`, `&[T]`, custom | Yes | Yes | No | Yes |
46+
| [pest] | PEG | external | proc macro (file) | `&str` | Yes | No | No | No |
47+
| [nom] | combinators | in source | library | `&[u8]`, custom | No | Yes | Yes | No |
48+
| [lalrpop] | LR(1) | in grammar | build script | `&str` | No | Yes | No | Yes |
4849

4950
[pest]: https://github.com/pest-parser/pest
5051
[nom]: https://github.com/geal/nom
@@ -63,7 +64,8 @@ pub fn main() {
6364

6465
## Upgrade guide
6566

66-
The rule return type has changed between 0.8 to 0.9.
67+
The rule return type has changed between 0.8 to 0.9,
68+
and now supports recovery and reporting multiple errors.
6769
To upgrade, add a call to `.into_result()` to convert the new rule return type
6870
to a simple `Result`.
6971
## Development

peg-macros/analysis.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ impl<'a> LeftRecursionVisitor<'a> {
164164
nullable
165165
}
166166

167-
LiteralExpr(_) | PatternExpr(_) | MethodExpr(_, _) | FailExpr(_) | MarkerExpr(_) => false,
167+
LiteralExpr(_) | PatternExpr(_) | MethodExpr(_, _) | FailExpr(_) | ErrorIfExpr(..) | ErrorUnlessExpr(..) |MarkerExpr(_) => false,
168168

169169
PositionExpr => true,
170170
}
@@ -220,7 +220,7 @@ impl<'a> LoopNullabilityVisitor<'a> {
220220
let name = rule_ident.to_string();
221221
*self.rule_nullability.get(&name).unwrap_or(&false)
222222
}
223-
223+
224224
ActionExpr(ref elems, ..) => {
225225
let mut nullable = true;
226226
for elem in elems {
@@ -250,7 +250,7 @@ impl<'a> LoopNullabilityVisitor<'a> {
250250
if inner_nullable && sep_nullable && !bound.has_upper_bound() {
251251
self.errors.push(LoopNullabilityError { span: this_expr.span });
252252
}
253-
253+
254254
inner_nullable | !bound.has_lower_bound()
255255
}
256256

@@ -269,10 +269,10 @@ impl<'a> LoopNullabilityVisitor<'a> {
269269
}
270270
}
271271

272-
nullable
272+
nullable
273273
}
274274

275-
LiteralExpr(_) | PatternExpr(_) | MethodExpr(_, _) | FailExpr(_) | MarkerExpr(_) => false,
275+
LiteralExpr(_) | PatternExpr(_) | MethodExpr(_, _) | FailExpr(_) | ErrorIfExpr(..) | ErrorUnlessExpr(..) | MarkerExpr(_) => false,
276276
PositionExpr => true,
277277
}
278278
}

peg-macros/ast.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ pub enum Expr {
8585
PositionExpr,
8686
QuietExpr(Box<SpannedExpr>),
8787
FailExpr(Literal),
88+
ErrorIfExpr(Box<SpannedExpr>, Literal, Box<SpannedExpr>),
89+
ErrorUnlessExpr(Box<SpannedExpr>, Literal, Box<SpannedExpr>),
8890
PrecedenceExpr {
8991
levels: Vec<PrecedenceLevel>,
9092
},

peg-macros/grammar.rs

Lines changed: 821 additions & 169 deletions
Large diffs are not rendered by default.

peg-macros/grammar.rustpeg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ rule primary() -> SpannedExpr
125125
/ sp:sp() "position" "!" "(" ")" { PositionExpr.at(sp) }
126126
/ sp:sp() "quiet" "!" "{" e:expression() "}" { QuietExpr(Box::new(e)).at(sp) }
127127
/ sp:sp() "expected" "!" "(" s:LITERAL() ")" { FailExpr(s).at(sp) }
128+
/ sp:sp() "error" "!" "{" s:LITERAL() seq:sequence() "}" { ErrorIfExpr(Box::new(ActionExpr(vec![], None).at(sp)), s, Box::new(seq)).at(sp) }
129+
/ sp:sp() "error_if" "!" "{" seq1:sequence() "|" s:LITERAL() seq2:sequence() "}" { ErrorIfExpr(Box::new(seq1), s, Box::new(seq2)).at(sp) }
130+
/ sp:sp() "error_unless" "!" "{" seq1:sequence() "|" s:LITERAL() seq2:sequence() "}" { ErrorUnlessExpr(Box::new(seq1), s, Box::new(seq2)).at(sp) }
128131
/ &("_" / "__" / "___") sp:sp() name:IDENT() { RuleExpr(name, Vec::new()).at(sp) }
129132
/ sp:sp() name:IDENT() "(" args:(rule_arg() ** ",") ")" { RuleExpr(name, args).at(sp) }
130133
/ sp:sp() l:LITERAL() { LiteralExpr(l).at(sp) }

peg-macros/translate.rs

Lines changed: 96 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,10 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
227227
::peg::RuleResult::Failed => {
228228
println!("[PEG_TRACE] Failed to match rule `{}` at {}", #str_rule_name, loc);
229229
}
230+
::peg::RuleResult::Error(e) => {
231+
let eloc = ::peg::Parse::position_repr(__input, e.location);
232+
println!("[PEG_TRACE] Error matching rule `{}` at {}: {} at {}", #str_rule_name, loc, e.error, eloc);
233+
}
230234
}
231235

232236
__peg_result
@@ -249,6 +253,7 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
249253
match &entry {
250254
&::peg::RuleResult::Matched(..) => println!("[PEG_TRACE] Cached match of rule {} at {}", #str_rule_name, loc),
251255
&Failed => println!("[PEG_TRACE] Cached fail of rule {} at {}", #str_rule_name, loc),
256+
&::peg::RuleResult::Error(..) => println!("[PEG_TRACE] Cached error of rule {} at {}", #str_rule_name, loc),
252257
};
253258
}
254259
} else {
@@ -282,10 +287,16 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
282287
let __current_result = { #wrapped_body };
283288
match __current_result {
284289
::peg::RuleResult::Failed => break,
290+
::peg::RuleResult::Error(..) => {
291+
__state.#cache_field.insert(__pos, __current_result.clone());
292+
__last_result = __current_result;
293+
break
294+
},
285295
::peg::RuleResult::Matched(__current_endpos, _) =>
286296
match __last_result {
287297
::peg::RuleResult::Matched(__last_endpos, _) if __current_endpos <= __last_endpos => break,
288-
_ => {
298+
::peg::RuleResult::Error(..) => panic!(), // impossible; we would have broken on previous iteration
299+
::peg::RuleResult::Failed | ::peg::RuleResult::Matched(..) => {
289300
__state.#cache_field.insert(__pos, __current_result.clone());
290301
__last_result = __current_result;
291302
},
@@ -352,7 +363,10 @@ fn compile_rule_export(context: &Context, rule: &Rule) -> TokenStream {
352363
__err_state.mark_failure(__pos, "EOF");
353364
}
354365
}
355-
_ => ()
366+
::peg::RuleResult::Error(__e) => {
367+
return __err_state.into_error(__e, __input)
368+
}
369+
::peg::RuleResult::Failed => ()
356370
}
357371

358372
__state = ParseState::new();
@@ -387,6 +401,7 @@ fn ordered_choice(span: Span, mut rs: impl DoubleEndedIterator<Item = TokenStrea
387401
let __choice_res = #preferred;
388402
match __choice_res {
389403
::peg::RuleResult::Matched(__pos, __value) => ::peg::RuleResult::Matched(__pos, __value),
404+
::peg::RuleResult::Error(__e) => ::peg::RuleResult::Error(__e),
390405
::peg::RuleResult::Failed => #fallback
391406
}
392407
}}
@@ -421,6 +436,7 @@ fn compile_expr_continuation(context: &Context, e: &SpannedExpr, result_name: Op
421436
let __seq_res = #seq_res;
422437
match __seq_res {
423438
::peg::RuleResult::Matched(__pos, #result_pat) => { #continuation }
439+
::peg::RuleResult::Error(__e) => ::peg::RuleResult::Error(__e),
424440
::peg::RuleResult::Failed => ::peg::RuleResult::Failed,
425441
}
426442
}}
@@ -434,6 +450,7 @@ fn compile_literal_expr(s: &Literal, continuation: TokenStream) -> TokenStream {
434450
quote_spanned! { span =>
435451
match ::peg::ParseLiteral::parse_string_literal(__input, __pos, #s) {
436452
::peg::RuleResult::Matched(__pos, __val) => { #continuation }
453+
::peg::RuleResult::Error(__e) => { __err_state.mark_error(__e); ::peg::RuleResult::Error(__e) } // unexpected, but do something sensible
437454
::peg::RuleResult::Failed => { __err_state.mark_failure(__pos, #escaped_str); ::peg::RuleResult::Failed }
438455
}
439456
}
@@ -457,6 +474,7 @@ fn compile_pattern_expr(pattern_group: &Group, result_name: Ident, success_res:
457474
_ => #not_in_set,
458475
}
459476
::peg::RuleResult::Failed => { __err_state.mark_failure(__pos, #pat_str); ::peg::RuleResult::Failed },
477+
::peg::RuleResult::Error(__e) => { __err_state.mark_error(__e); ::peg::RuleResult::Error(__e) }, // unexpected, but do something sensible
460478
}
461479
}
462480
}
@@ -543,6 +561,7 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
543561
quote_spanned!{ span=>
544562
match #func(__input, __state, __err_state, __pos #extra_args_call #(, #rule_args_call)*){
545563
::peg::RuleResult::Matched(pos, _) => ::peg::RuleResult::Matched(pos, ()),
564+
::peg::RuleResult::Error(e) => ::peg::RuleResult::Error(e),
546565
::peg::RuleResult::Failed => ::peg::RuleResult::Failed,
547566
}
548567
}
@@ -567,13 +586,15 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
567586
match #optional_res {
568587
::peg::RuleResult::Matched(__newpos, __value) => { ::peg::RuleResult::Matched(__newpos, Some(__value)) },
569588
::peg::RuleResult::Failed => { ::peg::RuleResult::Matched(__pos, None) },
589+
::peg::RuleResult::Error(__e) => { ::peg::RuleResult::Error(__e) },
570590
}
571591
}
572592
} else {
573593
quote_spanned!{ span=>
574594
match #optional_res {
575595
::peg::RuleResult::Matched(__newpos, _) => { ::peg::RuleResult::Matched(__newpos, ()) },
576596
::peg::RuleResult::Failed => { ::peg::RuleResult::Matched(__pos, ()) },
597+
::peg::RuleResult::Error(__e) => { ::peg::RuleResult::Error(__e) },
577598
}
578599
}
579600
}
@@ -597,6 +618,7 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
597618
match __sep_res {
598619
::peg::RuleResult::Matched(__newpos, _) => { __newpos },
599620
::peg::RuleResult::Failed => break,
621+
::peg::RuleResult::Error(__e) => { __maybe_err = Some(__e); break }
600622
}
601623
};
602624
}
@@ -626,7 +648,10 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
626648

627649
let result_check = if let Some(min) = min {
628650
quote_spanned!{ span=>
629-
if __repeat_value.len() >= #min {
651+
if let Some(__e) = __maybe_err {
652+
::peg::RuleResult::Error(__e)
653+
}
654+
else if __repeat_value.len() >= #min {
630655
::peg::RuleResult::Matched(__repeat_pos, #result)
631656
} else {
632657
::peg::RuleResult::Failed
@@ -638,6 +663,7 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
638663

639664
quote_spanned!{ span=> {
640665
let mut __repeat_pos = __pos;
666+
let mut __maybe_err = None;
641667
#repeat_vec
642668

643669
loop {
@@ -655,6 +681,10 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
655681
::peg::RuleResult::Failed => {
656682
break;
657683
}
684+
::peg::RuleResult::Error(__e) => {
685+
__maybe_err = Some(__e);
686+
break;
687+
}
658688
}
659689
}
660690

@@ -671,6 +701,7 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
671701
match __assert_res {
672702
::peg::RuleResult::Matched(_, __value) => ::peg::RuleResult::Matched(__pos, __value),
673703
::peg::RuleResult::Failed => ::peg::RuleResult::Failed,
704+
::peg::RuleResult::Error(..) => ::peg::RuleResult::Failed,
674705
}
675706
}}
676707
}
@@ -683,6 +714,7 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
683714
__err_state.suppress_fail -= 1;
684715
match __assert_res {
685716
::peg::RuleResult::Failed => ::peg::RuleResult::Matched(__pos, ()),
717+
::peg::RuleResult::Error(..) => ::peg::RuleResult::Matched(__pos, ()),
686718
::peg::RuleResult::Matched(..) => __err_state.mark_failure(__pos, "mismatch"),
687719
}
688720
}}
@@ -717,6 +749,7 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
717749
match #inner {
718750
::peg::RuleResult::Matched(__newpos, _) => { ::peg::RuleResult::Matched(__newpos, ::peg::ParseSlice::parse_slice(__input, str_start, __newpos)) },
719751
::peg::RuleResult::Failed => ::peg::RuleResult::Failed,
752+
::peg::RuleResult::Error(__e) => ::peg::RuleResult::Error(__e),
720753
}
721754
}}
722755
}
@@ -735,7 +768,66 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
735768
FailExpr(ref expected) => {
736769
quote_spanned! { span => { __err_state.mark_failure(__pos, #expected); ::peg::RuleResult::Failed }}
737770
}
738-
771+
ErrorIfExpr(ref expr1, ref message, ref expr2) => {
772+
let if_res = compile_expr(context, expr1, false);
773+
let recover_res = compile_expr(context, expr2, result_used);
774+
quote_spanned! { span => {
775+
let __if_res = { #if_res };
776+
match __if_res {
777+
::peg::RuleResult::Failed => ::peg::RuleResult::Failed,
778+
::peg::RuleResult::Error(__e) => ::peg::RuleResult::Error(__e),
779+
::peg::RuleResult::Matched(__newpos, _) => {
780+
// Report error (if any) at start of `expr1`, then consume it by shadowing `__pos`.
781+
let __parse_err = ::peg::error::ParseErr { error: #message, location: __pos };
782+
let __pos = __newpos;
783+
if __err_state.suppress_fail == 0 {
784+
__err_state.suppress_fail += 1;
785+
let __recover_res = { #recover_res };
786+
__err_state.suppress_fail -= 1;
787+
788+
match __recover_res {
789+
::peg::RuleResult::Matched(__newpos, __value) => {
790+
__err_state.mark_error(__parse_err);
791+
::peg::RuleResult::Matched(__newpos, __value)
792+
},
793+
::peg::RuleResult::Failed | ::peg::RuleResult::Error(..) => ::peg::RuleResult::Error(__parse_err)
794+
}
795+
} else {
796+
::peg::RuleResult::Error(__parse_err)
797+
}
798+
},
799+
}
800+
}}
801+
}
802+
ErrorUnlessExpr(ref expr1, ref message, ref expr2) => {
803+
let unless_res = compile_expr(context, expr1, result_used);
804+
let recover_res = compile_expr(context, expr2, result_used);
805+
quote_spanned! { span => {
806+
let __unless_res = { #unless_res };
807+
match __unless_res {
808+
::peg::RuleResult::Matched(__newpos, __value) => ::peg::RuleResult::Matched(__newpos, __value),
809+
::peg::RuleResult::Error(__e) => ::peg::RuleResult::Error(__e),
810+
::peg::RuleResult::Failed => {
811+
let __parse_err = ::peg::error::ParseErr { error: #message, location: __pos };
812+
if __err_state.suppress_fail == 0 {
813+
__err_state.suppress_fail += 1;
814+
let __recover_res = { #recover_res };
815+
__err_state.suppress_fail -= 1;
816+
817+
match __recover_res {
818+
::peg::RuleResult::Matched(__newpos, __value) => {
819+
__err_state.mark_error(__parse_err);
820+
::peg::RuleResult::Matched(__newpos, __value)
821+
},
822+
::peg::RuleResult::Failed | ::peg::RuleResult::Error(..) => ::peg::RuleResult::Error(__parse_err)
823+
}
824+
} else {
825+
::peg::RuleResult::Error(__parse_err)
826+
}
827+
},
828+
}
829+
}}
830+
}
739831
PrecedenceExpr { ref levels } => {
740832
let mut pre_rules = Vec::new();
741833
let mut level_code = Vec::new();

0 commit comments

Comments
 (0)