Skip to content

Commit f89bb80

Browse files
committed
Refine doubleScan
1 parent e8ecfd9 commit f89bb80

File tree

2 files changed

+88
-62
lines changed

2 files changed

+88
-62
lines changed

src/Scanner.hs

Lines changed: 85 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
{-# LANGUAGE OverloadedStrings #-}
22
{-# LANGUAGE NoImplicitPrelude #-}
3-
{-# LANGUAGE TemplateHaskell #-}
43

54
module Scanner where
65

@@ -15,39 +14,67 @@ import RIO.Partial (read)
1514
data LoxObject = JString | JDouble
1615
deriving (Show, Eq)
1716

18-
data LoxTok =
19-
-- Single-character tokens.
20-
LEFT_PAREN| RIGHT_PAREN| LEFT_BRACE| RIGHT_BRACE|
21-
COMMA| DOT| MINUS| PLUS| SEMICOLON| SLASH| STAR|
22-
23-
-- One or two character tokens.
24-
BANG| BANG_EQUAL|
25-
EQUAL| EQUAL_EQUAL|
26-
GREATER| GREATER_EQUAL|
27-
LESS| LESS_EQUAL|
28-
29-
-- Literals.
30-
IDENTIFIER String| STRING String| NUMBER Double|
31-
32-
-- Keywords.
33-
AND| CLASS| ELSE| FALSE| FUN| FOR| IF| NIL| OR|
34-
PRINT| RETURN| SUPER| THIS| TRUE| VAR| WHILE|
35-
36-
WHITESPACE | COMMENT Text|
37-
38-
EOF
17+
data LoxTok
18+
= -- Single-character tokens.
19+
LEFT_PAREN
20+
| RIGHT_PAREN
21+
| LEFT_BRACE
22+
| RIGHT_BRACE
23+
| COMMA
24+
| DOT
25+
| MINUS
26+
| PLUS
27+
| SEMICOLON
28+
| SLASH
29+
| STAR
30+
| -- One or two character tokens.
31+
BANG
32+
| BANG_EQUAL
33+
| EQUAL
34+
| EQUAL_EQUAL
35+
| GREATER
36+
| GREATER_EQUAL
37+
| LESS
38+
| LESS_EQUAL
39+
| -- Literals.
40+
IDENTIFIER String
41+
| STRING String
42+
| NUMBER Double
43+
| COMMENT Text
44+
-- Keywords.
45+
| AND
46+
| CLASS
47+
| ELSE
48+
| FALSE
49+
| FUN
50+
| FOR
51+
| IF
52+
| NIL
53+
| OR
54+
| PRINT
55+
| RETURN
56+
| SUPER
57+
| THIS
58+
| TRUE
59+
| VAR
60+
| WHILE
61+
| WHITESPACE
62+
| EOF
3963
deriving (Show, Eq)
4064

41-
data LoxTokInfo = LoxTokInfo {
42-
tokinfo_type:: LoxTok,
43-
tokinfo_lexeme:: Maybe T.Text,
44-
tokinfo_literal:: Maybe LoxObject,
45-
tok_position:: SourcePos
65+
data LoxTokInfo = LoxTokInfo
66+
{ tokinfo_type :: LoxTok,
67+
tokinfo_lexeme :: Maybe T.Text,
68+
tokinfo_literal :: Maybe LoxObject,
69+
tok_position :: SourcePos
4670
}
4771
deriving (Show, Eq)
4872

4973

50-
type LoxScanner = Parser
74+
tokenShow :: LoxTokInfo -> String
75+
tokenShow t = "LoxTok=" ++ show (tokinfo_type t)
76+
77+
type LoxScannerResult = Either ParseError [LoxTokInfo]
5178
-- type LoxScanner = Parsec String () [LoxTok]
5279

5380
whitespace :: Parser ()
@@ -59,18 +86,6 @@ whitespace = void $ many $ oneOf " \n\t"
5986
-- return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
6087

6188
whitespaceToken :: Parser LoxTokInfo
62-
whitespaceToken = do
63-
source_pos <- getPosition
64-
_ <- many1 $ oneOf " "
65-
return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
66-
67-
scanComment :: Parser LoxTokInfo
68-
scanComment = do
69-
source_pos <- getPosition
70-
_ <- string "//"
71-
comment <- try (manyTill anyToken (try (oneOf "\n"))) <|> manyTill anyToken eof
72-
return $ LoxTokInfo (COMMENT (T.pack comment)) Nothing Nothing source_pos
73-
7489
charMapping :: [(LoxTok, Char)]
7590
charMapping =
7691
[ (LEFT_PAREN, '('),
@@ -95,9 +110,9 @@ scanSingleCharToken = do
95110
source_pos <- getPosition
96111
sel <- choice $ build <$> charMapping
97112
return $ LoxTokInfo sel Nothing Nothing source_pos
98-
where
99-
build :: (LoxTok, Char) -> Parser LoxTok
100-
build (x, y) = x <$ char y <* whitespace
113+
where
114+
build :: (LoxTok, Char) -> Parser LoxTok
115+
build (x, y) = x <$ char y <* whitespace
101116

102117
doubleCharMapping :: [(LoxTok, String)]
103118
doubleCharMapping =
@@ -118,8 +133,7 @@ scanDoubleToken = do
118133

119134
keywordMapping :: [(LoxTok, String)]
120135
keywordMapping =
121-
[
122-
(AND, "and"),
136+
[ (AND, "and"),
123137
(CLASS, "class"),
124138
(ELSE, "else"),
125139
(FALSE, "false"),
@@ -135,7 +149,7 @@ keywordMapping =
135149
(TRUE, "true"),
136150
(VAR, "var"),
137151
(WHILE, "while")
138-
]
152+
]
139153

140154
scanKeywordToken :: Parser LoxTokInfo
141155
scanKeywordToken = do
@@ -146,13 +160,18 @@ scanKeywordToken = do
146160
build :: (LoxTok, String) -> Parser LoxTok
147161
build (x, y) = x <$ string y <* whitespace
148162

163+
whitespaceToken = do
164+
source_pos <- getPosition
165+
_ <- many1 $ char ' '
166+
return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
167+
149168
scanDouble :: Parser LoxTokInfo
150169
scanDouble = do
151170
source_pos <- getPosition
152-
sel <- (do
171+
let la = lookAhead (whitespaceToken <|> scanSingleCharToken)
172+
sel <- do
153173
firstPart <- Text.Parsec.many1 digit
154-
try (secondCharacter firstPart) <|> return (NUMBER (read firstPart)))
155-
_ <- lookAhead (scanSingleCharToken <|> whitespaceToken)
174+
try (secondCharacter firstPart <* la <* whitespace) <|> NUMBER (read firstPart) <$ la <* whitespace
156175
return $ LoxTokInfo sel Nothing Nothing source_pos
157176
where
158177
secondCharacter :: String -> Parser LoxTok
@@ -199,18 +218,24 @@ checkIfIdentifier = do
199218
result xs s source_pos = do
200219
case xs of
201220
[] -> return $ LoxTokInfo (IDENTIFIER s) Nothing Nothing source_pos
202-
(x, _):_ -> return $ LoxTokInfo x Nothing Nothing source_pos
221+
(x, _) : _ -> return $ LoxTokInfo x Nothing Nothing source_pos
203222

223+
scanComment :: Parser LoxTokInfo
224+
scanComment = do
225+
source_pos <- getPosition
226+
_ <- string "//"
227+
-- TODO: Find a better way to do this, scanning this more than once is not desirable
228+
comment <- try (manyTill anyToken (try (oneOf "\n"))) <|> manyTill anyToken eof
229+
return $ LoxTokInfo (COMMENT (T.pack comment)) Nothing Nothing source_pos
204230

205231
scanToken :: Parser LoxTokInfo
206232
scanToken =
207-
try scanComment <|>
208-
try scanDoubleToken <|>
209-
try scanSingleCharToken <|>
210-
try scanQuotedString <|>
211-
try scanDouble <|>
212-
checkIfIdentifier
213-
214-
215-
scanner :: String -> Either ParseError [LoxTokInfo]
216-
scanner = parse (many scanToken <* eof) ""
233+
try scanComment
234+
<|> try scanDoubleToken
235+
<|> try scanSingleCharToken
236+
<|> try scanQuotedString
237+
<|> scanDouble
238+
<|> checkIfIdentifier
239+
240+
scanner :: String -> LoxScannerResult
241+
scanner = parse (many scanToken <* eof) ""

test/test_lexer.hs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ testSingleCharInvalidToken = testInvalidToken "testSingleCharInvalidToken" "%"
5454

5555
testDoubleCharInvalidToken = testInvalidToken "testDoubleCharInvalidToken" "%%"
5656

57-
testScanInvalidDouble = testInvalidToken "testScanInvalidDouble" ".1121"
57+
testScanInvalidDOT = testInvalidToken "testScanInvalidDoubleDOT" ".1121"
5858

5959
testScanInvalidIdentifier_1 = testInvalidToken "testScanInvalidIdentifier_1" "1and"
6060
testScanInvalidIdentifier_2 = testInvalidToken "testScanInvalidIdentifier_2" "1_and"
@@ -70,14 +70,15 @@ main = do
7070
testScanDouble_1,
7171
testScanDouble_2,
7272

73+
7374
testScanDouble_4,
7475
testScanIdentifier,
7576
-- invalid tokens
76-
testScanInvalidDouble,
7777
testSingleCharInvalidToken,
7878
testDoubleCharInvalidToken,
7979
testScanInvalidIdentifier_1,
8080
testScanInvalidIdentifier_2,
81+
testScanInvalidDOT,
8182
testComment_1,
8283
testComment_2
8384
]

0 commit comments

Comments
 (0)