Skip to content

Commit 50ef591

Browse files
committed
Handle empty branches during grammr parsing
1 parent 1765e4f commit 50ef591

File tree

4 files changed

+67
-37
lines changed

4 files changed

+67
-37
lines changed

custom-parser/grammar-factoring/MySQLParser-factored-versioned.json

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10184,6 +10184,9 @@
1018410184
"bnf": [
1018510185
[
1018610186
"%xaConvert00"
10187+
],
10188+
[
10189+
"\u03b5"
1018710190
]
1018810191
]
1018910192
},
@@ -11570,6 +11573,9 @@
1157011573
"bnf": [
1157111574
[
1157211575
"%slaveConnectionOptions00"
11576+
],
11577+
[
11578+
"\u03b5"
1157311579
]
1157411580
]
1157511581
},
@@ -12435,11 +12441,47 @@
1243512441
]
1243612442
]
1243712443
},
12444+
{
12445+
"name": "createUser",
12446+
"bnf": [
12447+
[
12448+
"CREATE_SYMBOL",
12449+
"USER_SYMBOL",
12450+
"%createUser02",
12451+
"createUserList",
12452+
"defaultRoleClause",
12453+
"createUserTail"
12454+
]
12455+
]
12456+
},
12457+
{
12458+
"versions": "serverVersion >= 50706",
12459+
"name": "%createUser0200",
12460+
"bnf": [
12461+
[
12462+
"ifNotExists"
12463+
]
12464+
]
12465+
},
12466+
{
12467+
"name": "%createUser02",
12468+
"bnf": [
12469+
[
12470+
"%createUser0200"
12471+
],
12472+
[
12473+
"\u03b5"
12474+
]
12475+
]
12476+
},
1243812477
{
1243912478
"name": "createUserTail",
1244012479
"bnf": [
1244112480
[
1244212481
"%createUserTail00"
12482+
],
12483+
[
12484+
"\u03b5"
1244312485
]
1244412486
]
1244512487
},
@@ -12496,6 +12538,9 @@
1249612538
"bnf": [
1249712539
[
1249812540
"%defaultRoleClause00"
12541+
],
12542+
[
12543+
"\u03b5"
1249912544
]
1250012545
]
1250112546
},
@@ -15603,6 +15648,9 @@
1560315648
"bnf": [
1560415649
[
1560515650
"%nonBlocking00"
15651+
],
15652+
[
15653+
"\u03b5"
1560615654
]
1560715655
]
1560815656
},
@@ -24202,6 +24250,9 @@
2420224250
"bnf": [
2420324251
[
2420424252
"%createTableOption21100"
24253+
],
24254+
[
24255+
"\u03b5"
2420524256
]
2420624257
]
2420724258
},
@@ -30457,30 +30508,6 @@
3045730508
]
3045830509
]
3045930510
},
30460-
{
30461-
"name": "ifNotExists_zero_or_one",
30462-
"bnf": [
30463-
[
30464-
"ifNotExists"
30465-
],
30466-
[
30467-
"\u03b5"
30468-
]
30469-
]
30470-
},
30471-
{
30472-
"name": "createUser",
30473-
"bnf": [
30474-
[
30475-
"CREATE_SYMBOL",
30476-
"USER_SYMBOL",
30477-
"ifNotExists_zero_or_one",
30478-
"createUserList",
30479-
"defaultRoleClause",
30480-
"createUserTail"
30481-
]
30482-
]
30483-
},
3048430511
{
3048530512
"name": "castType",
3048630513
"bnf": [

custom-parser/grammar-factoring/MySQLParser.g4

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,10 +1765,10 @@ userFunction:
17651765
USER_SYMBOL parentheses
17661766
;
17671767

1768-
//createUser:
1769-
// CREATE_SYMBOL USER_SYMBOL ({serverVersion >= 50706}? ifNotExists | /* empty */) createUserList defaultRoleClause
1770-
// createUserTail
1771-
//;
1768+
createUser:
1769+
CREATE_SYMBOL USER_SYMBOL ({serverVersion >= 50706}? ifNotExists | /* empty */) createUserList defaultRoleClause
1770+
createUserTail
1771+
;
17721772

17731773
createUserTail:
17741774
{serverVersion >= 50706}? requireClause? connectOptions? accountLockPasswordExpireOptions*
@@ -5036,13 +5036,6 @@ roleOrLabelKeyword:
50365036
*/
50375037
alterOrderList: qualifiedIdentifier direction? (COMMA_SYMBOL qualifiedIdentifier direction?)*;
50385038

5039-
/*
5040-
* Fix CREATE USER statement.
5041-
* The original grammar contains "({serverVersion >= 50706}? ifNotExists | / * empty * /)",
5042-
* but the optionality of "| / * empty * /" part was lost during the conversion.
5043-
*/
5044-
createUser: CREATE_SYMBOL USER_SYMBOL ifNotExists? createUserList defaultRoleClause createUserTail;
5045-
50465039
/*
50475040
* Fix CAST(2024 AS YEAR).
50485041
* The original grammar was missing the YEAR_SYMBOL in the "castType" rule.

custom-parser/grammar-factoring/convert-grammar.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,23 @@ function process_rule(string $rule) {
2020

2121
$parens_regex = PARENS_REGEX;
2222

23+
// Match empty branches in the original grammar. The equal to "ε", making the parent optional.
24+
// This matches a "|" not followed by any rule, e.g. (A | B |) or (A | | B), etc.
25+
$empty_branch_regex = '\|(?=\s*(?:\||\)|$))';
26+
2327
// extract rule branches (split by | not inside parentheses)
24-
preg_match_all("/((?:[^()|]|$parens_regex)+)/", $rule, $matches);
28+
preg_match_all("/((?:[^()|]|$parens_regex)+|$empty_branch_regex)/", $rule, $matches);
2529
$branches = $matches[0];
2630
$subrules = [];
2731
foreach ($branches as $branch) {
2832
$branch = trim($branch);
2933

34+
// empty branch equals to "ε"
35+
if ($branch === '|') {
36+
$subrules[] = ["ε"];
37+
continue;
38+
}
39+
3040
// extract version specifiers (like "{serverVersion >= 80000}?")
3141
$versions = null;
3242
if (preg_match('/^\{(.+?)}\?\s+(.*)$/s', $branch, $matches)) {

custom-parser/parser/grammar.php

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)