Skip to content

Commit efe1007

Browse files
committed
mlx lexer hack around [< and >] tokens
1 parent cda3b7e commit efe1007

File tree

4 files changed

+66
-9
lines changed

4 files changed

+66
-9
lines changed

mlx/lexer.mll

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,6 @@ rule token = parse
558558
| "|]" { BARRBRACKET }
559559
| ">" { GREATER }
560560
| "/>" { SLASHGREATER }
561-
| ">]" { GREATERRBRACKET }
562561
| "}" { RBRACE }
563562
| ">}" { GREATERRBRACE }
564563
| "[@" { LBRACKETAT }
@@ -805,6 +804,28 @@ and skip_hash_bang = parse
805804
806805
and docstring = Docstrings.docstring
807806
807+
let token_with_comments lexbuf =
808+
match token_with_comments lexbuf with
809+
| LBRACKETLESS ->
810+
(* Check if the next character (if any) could start an identifier.
811+
UIDENT starts with A-Z, LIDENT starts with a-z or _ *)
812+
let should_split =
813+
lexbuf.Lexing.lex_curr_pos < lexbuf.Lexing.lex_buffer_len &&
814+
begin
815+
let next = Bytes.get lexbuf.Lexing.lex_buffer lexbuf.Lexing.lex_curr_pos in
816+
next >= 'A' && next <= 'Z' || next >= 'a' && next <= 'z' || next = '_'
817+
end
818+
in
819+
if should_split then begin
820+
(* Backtrack one character to before the "<" so it will be lexed separately *)
821+
lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
822+
let lex_curr_p = lexbuf.lex_curr_p in
823+
lexbuf.lex_curr_p <- { lex_curr_p with pos_cnum = lex_curr_p.pos_cnum - 1 };
824+
LBRACKET
825+
end else
826+
LBRACKETLESS
827+
| tok -> tok
828+
808829
let token lexbuf =
809830
let post_pos = lexeme_end_p lexbuf in
810831
let attach lines docs pre_pos =

ocamlmerlin_mlx/ocaml/preprocess/lexer_raw.mll

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,6 @@ rule token state = parse
590590
| "|]" { return BARRBRACKET }
591591
| ">" { return GREATER }
592592
| "/>" { return SLASHGREATER }
593-
| ">]" { return GREATERRBRACKET }
594593
| "}" { return RBRACE }
595594
| ">}" { return GREATERRBRACE }
596595
| "[@" { return LBRACKETAT }
@@ -825,6 +824,28 @@ and skip_sharp_bang state = parse
825824
(* preprocessor support not implemented, not compatible with monadic
826825
interface *)
827826
827+
let token state lexbuf =
828+
match token state lexbuf with
829+
| Return LBRACKETLESS ->
830+
(* Check if the next character (if any) could start an identifier.
831+
UIDENT starts with A-Z, LIDENT starts with a-z or _ *)
832+
let should_split =
833+
lexbuf.Lexing.lex_curr_pos < lexbuf.Lexing.lex_buffer_len &&
834+
begin
835+
let next = Bytes.get lexbuf.Lexing.lex_buffer lexbuf.Lexing.lex_curr_pos in
836+
next >= 'A' && next <= 'Z' || next >= 'a' && next <= 'z' || next = '_'
837+
end
838+
in
839+
if should_split then begin
840+
(* Backtrack one character to before the "<" so it will be lexed separately *)
841+
lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
842+
let lex_curr_p = lexbuf.lex_curr_p in
843+
lexbuf.lex_curr_p <- { lex_curr_p with pos_cnum = lex_curr_p.pos_cnum - 1 };
844+
Return LBRACKET
845+
end else
846+
Return LBRACKETLESS
847+
| tok -> tok
848+
828849
let rec token_without_comments state lexbuf =
829850
token state lexbuf >>= function
830851
| COMMENT _ ->

test/example/main.mlx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ let div ~children () = Element ("div", children)
1717
let text s = Text s
1818

1919
let () =
20+
let _ = [<div>(text "title")</div>; <div>(text "content")</div>] in
2021
print_endline (render (
2122
<div>
2223
<div>(text "title")</div>

test/mlx.t

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,27 @@ Some tests for prop expressions:
116116
MERLIN
117117
let _ = element () ~children:[] ~prop:!?ref [@JSX]
118118

119-
TODO: fix the following parse error, where [< is treated as LBRACKETLESS:
119+
We have a lexer hack to parse [<element and [<Element as JSX:
120120
$ echo 'let _ = [<element />]' | ./mlx
121121
BATCH
122-
File "*stdin*", line 1, characters 8-10:
123-
Error: Syntax error
124-
122+
let _ = [ (element () ~children:[] [@JSX]) ]
125123
MERLIN
126-
File "*stdin*", line 1, characters 18-20
127-
Error: Syntax error
128-
124+
let _ = [ (element () ~children:[] [@JSX]) ]
125+
126+
$ echo 'let _ = [<M.element />]' | ./mlx
127+
BATCH
128+
let _ = [ (M.element () ~children:[] [@JSX]) ]
129+
MERLIN
130+
let _ = [ (M.element () ~children:[] [@JSX]) ]
131+
132+
$ echo 'let _ = [<element> 1 </element>]' | ./mlx
133+
BATCH
134+
let _ = [ (element () ~children:[ 1 ] [@JSX]) ]
135+
MERLIN
136+
let _ = [ (element () ~children:[ 1 ] [@JSX]) ]
137+
138+
$ echo 'let _ = [<M.element> 1 </M.element>]' | ./mlx
139+
BATCH
140+
let _ = [ (M.element () ~children:[ 1 ] [@JSX]) ]
141+
MERLIN
142+
let _ = [ (M.element () ~children:[ 1 ] [@JSX]) ]

0 commit comments

Comments
 (0)