11{-# LANGUAGE OverloadedStrings #-}
22{-# LANGUAGE NoImplicitPrelude #-}
3- {-# LANGUAGE TemplateHaskell #-}
43
54module Scanner where
65
@@ -15,39 +14,67 @@ import RIO.Partial (read)
1514data LoxObject = JString | JDouble
1615 deriving (Show , Eq )
1716
18- data LoxTok =
19- -- Single-character tokens.
20- LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE |
21- COMMA | DOT | MINUS | PLUS | SEMICOLON | SLASH | STAR |
22-
23- -- One or two character tokens.
24- BANG | BANG_EQUAL |
25- EQUAL | EQUAL_EQUAL |
26- GREATER | GREATER_EQUAL |
27- LESS | LESS_EQUAL |
28-
29- -- Literals.
30- IDENTIFIER String | STRING String | NUMBER Double |
31-
32- -- Keywords.
33- AND | CLASS | ELSE | FALSE | FUN | FOR | IF | NIL | OR |
34- PRINT | RETURN | SUPER | THIS | TRUE | VAR | WHILE |
35-
36- WHITESPACE | COMMENT Text |
37-
38- EOF
17+ data LoxTok
18+ = -- Single-character tokens.
19+ LEFT_PAREN
20+ | RIGHT_PAREN
21+ | LEFT_BRACE
22+ | RIGHT_BRACE
23+ | COMMA
24+ | DOT
25+ | MINUS
26+ | PLUS
27+ | SEMICOLON
28+ | SLASH
29+ | STAR
30+ | -- One or two character tokens.
31+ BANG
32+ | BANG_EQUAL
33+ | EQUAL
34+ | EQUAL_EQUAL
35+ | GREATER
36+ | GREATER_EQUAL
37+ | LESS
38+ | LESS_EQUAL
39+ | -- Literals.
40+ IDENTIFIER String
41+ | STRING String
42+ | NUMBER Double
43+ | COMMENT Text
44+ -- Keywords.
45+ | AND
46+ | CLASS
47+ | ELSE
48+ | FALSE
49+ | FUN
50+ | FOR
51+ | IF
52+ | NIL
53+ | OR
54+ | PRINT
55+ | RETURN
56+ | SUPER
57+ | THIS
58+ | TRUE
59+ | VAR
60+ | WHILE
61+ | WHITESPACE
62+ | EOF
3963 deriving (Show , Eq )
4064
41- data LoxTokInfo = LoxTokInfo {
42- tokinfo_type :: LoxTok ,
43- tokinfo_lexeme :: Maybe T. Text ,
44- tokinfo_literal :: Maybe LoxObject ,
45- tok_position :: SourcePos
65+ data LoxTokInfo = LoxTokInfo
66+ { tokinfo_type :: LoxTok ,
67+ tokinfo_lexeme :: Maybe T. Text ,
68+ tokinfo_literal :: Maybe LoxObject ,
69+ tok_position :: SourcePos
4670 }
4771 deriving (Show , Eq )
4872
4973
50- type LoxScanner = Parser
74+ tokenShow :: LoxTokInfo -> String
75+ tokenShow t = " LoxTok=" ++ show (tokinfo_type t)
76+
77+ type LoxScannerResult = Either ParseError [LoxTokInfo ]
5178-- type LoxScanner = Parsec String () [LoxTok]
5279
5380whitespace :: Parser ()
@@ -59,18 +86,6 @@ whitespace = void $ many $ oneOf " \n\t"
5986-- return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
6087
6188whitespaceToken :: Parser LoxTokInfo
62- whitespaceToken = do
63- source_pos <- getPosition
64- _ <- many1 $ oneOf " "
65- return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
66-
67- scanComment :: Parser LoxTokInfo
68- scanComment = do
69- source_pos <- getPosition
70- _ <- string " //"
71- comment <- try (manyTill anyToken (try (oneOf " \n " ))) <|> manyTill anyToken eof
72- return $ LoxTokInfo (COMMENT (T. pack comment)) Nothing Nothing source_pos
73-
7489charMapping :: [(LoxTok , Char )]
7590charMapping =
7691 [ (LEFT_PAREN , ' (' ),
@@ -95,9 +110,9 @@ scanSingleCharToken = do
95110 source_pos <- getPosition
96111 sel <- choice $ build <$> charMapping
97112 return $ LoxTokInfo sel Nothing Nothing source_pos
98- where
99- build :: (LoxTok , Char ) -> Parser LoxTok
100- build (x, y) = x <$ char y <* whitespace
113+ where
114+ build :: (LoxTok , Char ) -> Parser LoxTok
115+ build (x, y) = x <$ char y <* whitespace
101116
102117doubleCharMapping :: [(LoxTok , String )]
103118doubleCharMapping =
@@ -118,8 +133,7 @@ scanDoubleToken = do
118133
119134keywordMapping :: [(LoxTok , String )]
120135keywordMapping =
121- [
122- (AND , " and" ),
136+ [ (AND , " and" ),
123137 (CLASS , " class" ),
124138 (ELSE , " else" ),
125139 (FALSE , " false" ),
@@ -135,7 +149,7 @@ keywordMapping =
135149 (TRUE , " true" ),
136150 (VAR , " var" ),
137151 (WHILE , " while" )
138- ]
152+ ]
139153
140154scanKeywordToken :: Parser LoxTokInfo
141155scanKeywordToken = do
@@ -146,13 +160,18 @@ scanKeywordToken = do
146160 build :: (LoxTok , String ) -> Parser LoxTok
147161 build (x, y) = x <$ string y <* whitespace
148162
163+ whitespaceToken = do
164+ source_pos <- getPosition
165+ _ <- many1 $ char ' '
166+ return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
167+
149168scanDouble :: Parser LoxTokInfo
150169scanDouble = do
151170 source_pos <- getPosition
152- sel <- (do
171+ let la = lookAhead (whitespaceToken <|> scanSingleCharToken)
172+ sel <- do
153173 firstPart <- Text.Parsec. many1 digit
154- try (secondCharacter firstPart) <|> return (NUMBER (read firstPart)))
155- _ <- lookAhead (scanSingleCharToken <|> whitespaceToken)
174+ try (secondCharacter firstPart <* la <* whitespace) <|> NUMBER (read firstPart) <$ la <* whitespace
156175 return $ LoxTokInfo sel Nothing Nothing source_pos
157176 where
158177 secondCharacter :: String -> Parser LoxTok
@@ -199,18 +218,24 @@ checkIfIdentifier = do
199218 result xs s source_pos = do
200219 case xs of
201220 [] -> return $ LoxTokInfo (IDENTIFIER s) Nothing Nothing source_pos
202- (x, _): _ -> return $ LoxTokInfo x Nothing Nothing source_pos
221+ (x, _) : _ -> return $ LoxTokInfo x Nothing Nothing source_pos
203222
223+ scanComment :: Parser LoxTokInfo
224+ scanComment = do
225+ source_pos <- getPosition
226+ _ <- string " //"
227+ -- TODO: Find a better way to do this, scanning this more than once is not desirable
228+ comment <- try (manyTill anyToken (try (oneOf " \n " ))) <|> manyTill anyToken eof
229+ return $ LoxTokInfo (COMMENT (T. pack comment)) Nothing Nothing source_pos
204230
205231scanToken :: Parser LoxTokInfo
206232scanToken =
207- try scanComment <|>
208- try scanDoubleToken <|>
209- try scanSingleCharToken <|>
210- try scanQuotedString <|>
211- try scanDouble <|>
212- checkIfIdentifier
213-
214-
215- scanner :: String -> Either ParseError [LoxTokInfo ]
216- scanner = parse (many scanToken <* eof) " "
233+ try scanComment
234+ <|> try scanDoubleToken
235+ <|> try scanSingleCharToken
236+ <|> try scanQuotedString
237+ <|> scanDouble
238+ <|> checkIfIdentifier
239+
240+ scanner :: String -> LoxScannerResult
241+ scanner = parse (many scanToken <* eof) " "
0 commit comments