diff --git a/.gitignore b/.gitignore index 9839dd9f..9cfdf04f 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,9 @@ cabal.sandbox.config /testing/regression-tests/266_define/nostl-2.9.0/ /testing/regression-tests/comments/ocaml/ + +ignore-dir/ + +.DS_Store +.vscode +.metals diff --git a/source/BNFC.cabal b/source/BNFC.cabal index 7300a8d2..b4de72df 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -272,6 +272,14 @@ library -- Pygments backend BNFC.Backend.Pygments + -- Scala backend + BNFC.Backend.Scala + BNFC.Backend.Scala.Utils + BNFC.Backend.Scala.CFtoScalaLex + BNFC.Backend.Scala.CFtoScalaLexToken + BNFC.Backend.Scala.CFtoScalaParser + BNFC.Backend.Scala.CFtoScalaParserAST + -- Agda backend BNFC.Backend.Agda diff --git a/source/main/Main.hs b/source/main/Main.hs index 754bf268..8e4c2765 100644 --- a/source/main/Main.hs +++ b/source/main/Main.hs @@ -26,6 +26,7 @@ import BNFC.Backend.Latex import BNFC.Backend.OCaml import BNFC.Backend.Pygments import BNFC.Backend.TreeSitter +import BNFC.Backend.Scala import BNFC.CF (CF) import BNFC.GetCF import BNFC.Options hiding (make, Backend) @@ -83,3 +84,4 @@ maketarget = \case TargetPygments -> makePygments TargetCheck -> error "impossible" TargetTreeSitter -> makeTreeSitter + TargetScala -> makeScala diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs new file mode 100644 index 00000000..1498aacb --- /dev/null +++ b/source/src/BNFC/Backend/Scala.hs @@ -0,0 +1,26 @@ +module BNFC.Backend.Scala where + +import Prelude hiding ((<>)) + +import BNFC.Backend.Base (mkfile, Backend) +import BNFC.CF +import BNFC.Options hiding (Backend) +-- import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) +import BNFC.Backend.Scala.CFtoScalaLex (cf2ScalaLex) +import BNFC.Backend.Scala.CFtoScalaLexToken (cf2ScalaLexToken) +import BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) +import BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) + +-- | Entrypoint for the Scala backend. + +makeScala :: SharedOptions -> CF -> Backend +makeScala opts cf = do + -- mkfile (name ++ ".scala") comment $ cf2ScalaAbs opts cf + mkfile (name ++ "LexToken.scala") comment $ cf2ScalaLexToken opts cf + mkfile (name ++ "Lex.scala") comment $ cf2ScalaLex opts cf + mkfile (name ++ "Parser.scala") comment $ cf2ScalaParser opts cf + mkfile (name ++ "ParserAST.scala") comment $ cf2ScalaParserAST opts cf + where name = lang opts + +comment :: String -> String +comment = ("// " ++) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs new file mode 100644 index 00000000..7a265358 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -0,0 +1,204 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Lextract syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura + + Description : This module generates the Scala Lextract Syntax + tree classes. Using Scala Parser Combinator + + Author : Juan Pablo Poittevin, Guillermo Poladura + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaLex (cf2ScalaLex) where + +import Prelude hiding ((<>)) + +import BNFC.Utils (symbolToName) +import BNFC.CF +import BNFC.PrettyPrint +import BNFC.Options +import BNFC.Backend.Common (unicodeAndSymbols) +import Data.List (intercalate) +import Data.Char (toLower) +import GHC.OldList (nub) +import GHC.Unicode (toUpper) +import BNFC.Backend.Scala.Utils (scalaReserverWords, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated) +import Data.Maybe (fromMaybe) + +-- | Converts a string to lowercase +toLowerString :: String -> String +toLowerString s = map toLower s + +-- | Main function that generates the Scala lexer code +cf2ScalaLex :: SharedOptions -> CF -> Doc +cf2ScalaLex Options{ lang } cf = vcat $ + -- Generate header and imports + imports lang ++ + -- Generate error and location classes + addExtraClasses ++ + -- Start the WorkflowLexer object + initWorkflowClass ++ + -- Indent the class contents + [nest 4 $ vcat $ + -- Add apply function + getApplyFunction ++ + -- Add tokens function + getTokensFunction (keyWords ++ symbs ++ liters) ++ + -- Add parser functions for literals + map addParserFunction liters ++ + -- Add keyword parsers + map getBaseLexs (keyWords ++ symbs) + ] ++ + -- End the WorkflowLexer object + endWorkflowClass + where + keyWords = reservedWords cf + symbs = unicodeAndSymbols cf + liters = nub $ literals cf + +-- | Generate a parser function for a specific literal type +addParserFunction :: String -> Doc +addParserFunction liter + | liter == catInteger = getIntegerFunction + | liter == catDouble = getDoubleFunction + | liter == catIdent = getIdentifiersFunction + | liter == catString = getLiteralsFunction + | liter == catChar = getLiteralFunction + | otherwise = empty + +-- | Generate the imports section +imports :: String -> [Doc] +imports name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Lex", + text "", + text "import scala.util.parsing.combinator.RegexParsers" + ] + +-- | Generate error and location classes +addExtraClasses :: [Doc] +addExtraClasses = [ + text "", + text "sealed trait WorkflowCompilationError", + text "case class WorkflowLexerError(location: Location, msg: String) extends WorkflowCompilationError", + text "case class WorkflowParserError(location: Location, msg: String) extends WorkflowCompilationError", + text "", + text "case class Location(line: Int, column: Int) {", + nest 4 $ text "override def toString = s\"$line:$column\"", + text "}" + ] + +-- | Start the WorkflowLexer object +initWorkflowClass :: [Doc] +initWorkflowClass = [ + text "", + text "object WorkflowLexer extends RegexParsers {" + ] + +-- | End the WorkflowLexer object +endWorkflowClass :: [Doc] +endWorkflowClass = [ + text "}" + ] + +-- | Generate the apply function +getApplyFunction :: [Doc] +getApplyFunction = [ + text "def apply(code: String): Either[WorkflowLexerError, List[WorkflowToken]] = {", + nest 4 $ text "parse(tokens, code) match {", + nest 8 $ text "case NoSuccess(msg, next) => Left(WorkflowLexerError(Location(next.pos.line, next.pos.column), msg))", + nest 8 $ text "case Success(result, next) => Right(result)", + nest 4 $ text "}", + text "}" + ] + +-- | Generate the function for parsing identifiers +getIdentifiersFunction :: Doc +getIdentifiersFunction = vcat [ + text "", + text "def ident: Parser[IDENT] = {", + nest 4 $ text "\"[a-zA-Z_][a-zA-Z0-9_]*\".r ^^ { str => IDENT(str) }", + text "}" + ] + +-- | Generate the function for parsing integers +getIntegerFunction :: Doc +getIntegerFunction = vcat [ + text "", + text "def integer: Parser[INTEGER] = {", + nest 4 $ text "\"[0-9]+\".r ^^ {i => INTEGER(i)}", + text "}" + ] + +-- | Generate the function for parsing double values +getDoubleFunction :: Doc +getDoubleFunction = vcat [ + text "", + text "def double: Parser[DOUBLE] = {", + nest 4 $ text "\"[0-9]+.[0-9]+\".r ^^ {i => DOUBLE(i)}", + text "}" + ] + +-- | Generate the function for parsing string literals +getLiteralsFunction :: Doc +getLiteralsFunction = vcat [ + text "", + text "def string: Parser[STRING] = {", + nest 4 $ text "\"\\\"[^\\\"]*\\\"\".r ^^ { str =>", + nest 8 $ text "val content = str.substring(1, str.length - 1)", + nest 8 $ text "STRING(content)", + nest 4 $ text "}", + text "}" + ] + +-- | Generate the function for parsing character literals +getLiteralFunction :: Doc +getLiteralFunction = vcat [ + text "", + text "def char: Parser[CHAR] = {", + nest 4 $ text "\"\\\'[^\\\']*\\\'\".r ^^ { str =>", + nest 8 $ text "val content = str.substring(1, str.length - 1)", + nest 8 $ text "CHAR(content)", + nest 4 $ text "}", + text "}" + ] + +-- | Get a symbol name from a string +getSymbFromName :: String -> String +getSymbFromName s = + case symbolToName s of + Just s -> s + _ -> map toUpper s + +-- | Generate the tokens function +getTokensFunction :: [String] -> [Doc] +getTokensFunction tokens = [ + text "", + text "def tokens: Parser[List[WorkflowToken]] = {", + nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " proccesedRepetedSymbs ++ "))", + text "}" + ] + where + proccesedRepetedSymbs = reverse $ applyToRepeated ("p" ++) $ reverse $ getListTokensNames tokens + +-- | Get the lowercase symbol name +getTokenName :: String -> String +getTokenName s = fromMaybe (toLowerString $ getSymbFromName s) $ scalaReserverWords $ toLowerString $ getSymbFromName s + +-- | Get a list of symbol names +getListTokensNames :: [String] -> [String] +getListTokensNames = map getTokenName + +-- | Generate a keyword parser for a symbol +getBaseLexs :: String -> Doc +getBaseLexs symb = + text "" $+$ + text ("def " ++ defName ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ => " ++ param' ++ "()) }") + where + param = map toUpper $ getSymbFromName symb + param' = fromMaybe param $ mapManualTypeMap param + defName = fromMaybe (getTokenName symb) $ scalaBNFCReserverWords $ getTokenName symb + diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs new file mode 100644 index 00000000..576b832d --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -0,0 +1,81 @@ + +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Lextract syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin + + Description : This module generates the Scala Lextract Syntax + tree classes. It generates both a Header file + and an Implementation file + + Author : Juan Pablo Poittevin + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaLexToken (cf2ScalaLexToken) where + +import Prelude hiding ((<>)) + +import BNFC.CF +import BNFC.PrettyPrint +import BNFC.Options +import BNFC.Backend.Common (unicodeAndSymbols) +import BNFC.Utils (symbolToName) +import Data.Char (toUpper) +import Data.List (nub) +import BNFC.Backend.Scala.Utils (scalaReserverWords, mapManualTypeMap) +import Data.Maybe (fromMaybe) + +cf2ScalaLexToken + :: SharedOptions + -> CF + -> Doc +cf2ScalaLexToken Options{ lang } cf = vsep . concat $ + [ + headers lang + , [text $ concat $ map generateSymbClass (symbs)] + , [generateStringClasses liters] + , [generateKeyWordClasses (keyWords ++ ["empty"])] + -- , [text $ "Symbols: " ++ show symbs] + -- , [text $ "Literals: " ++ show liters] + -- , [text $ "Keywords: " ++ show keyWords] + ] + where + liters = nub $ literals cf + symbs = unicodeAndSymbols cf + keyWords = reservedWords cf + + +generateSymbClass :: String -> String +generateSymbClass symb = case symbolToName symb of + Just s -> "case class " ++ fromMaybe s (scalaReserverWords s) ++ "() extends WorkflowToken \n" + Nothing -> mempty + + +generateKeyWordClasses :: [String] -> Doc +generateKeyWordClasses params = text $ concat $ map generateKeyWordClass params + +generateKeyWordClass :: String -> String +generateKeyWordClass key = "case class " ++ param' ++ "() extends WorkflowToken \n" + where + param = map toUpper key + param' = fromMaybe param $ mapManualTypeMap param + + +generateStringClasses :: [String] -> Doc +generateStringClasses params = text $ concat $ map generateStringClass params + +generateStringClass :: String -> String +generateStringClass param = "case class " ++ (map toUpper param) ++ "(str: String) extends WorkflowToken \n" + +headers :: String -> [Doc] +headers name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Lex" + , "import scala.util.parsing.input.Positional" + , "sealed trait WorkflowToken extends Positional" + ] + + diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs new file mode 100644 index 00000000..05f3b182 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -0,0 +1,332 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Parser syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura + + Description : This module generates the Scala Parser Syntax + Using Scala Parser Combinator + + Author : Juan Pablo Poittevin, Guillermo Poladura + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where + +import qualified Data.Foldable as DF (toList) +import Prelude hiding ((<>)) + +import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, isCoercionCategory, getTerminalFromListRules, isSymbol) +import BNFC.CF + ( allEntryPoints, + catChar, + catIdent, + catInteger, + catString, + isNilCons, + normCat, + ruleGroups, + rulesForNormalizedCat, + sameCat, + sortRulesByPrecedence, + CF, + Cat, + IsFun(isCoercion, funName), + Rul(Rule, valRCat, rhsRule), + Rule, + TokenCat, + WithPosition(wpThing), strToCat, catDouble, rulesForCat ) +import BNFC.PrettyPrint +import BNFC.Options ( SharedOptions(lang, Options) ) +import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) + +import Data.List (find, intercalate, nub) +import Data.Char (toLower) +import Data.Maybe (listToMaybe, fromMaybe, isJust) +import BNFC.Utils ((+++), symbolToName) +import BNFC.Backend.Scala.CFtoScalaParserAST (getASTNames) +import GHC.Unicode (toUpper) + +-- | Main function that generates the Scala parser code +cf2ScalaParser :: SharedOptions -> CF -> Doc +cf2ScalaParser Options{ lang } cf = vcat $ + -- Generate header and imports + imports lang ++ + -- Start the WorkflowParser object + initWorkflowClass ++ + -- Indent the class contents + [nest 4 $ vcat $ + -- Add extra classes + addExtraClasses ++ + [text ""] ++ + -- Add apply function + getApplyFunction ++ + [text ""] ++ + -- Add program function + getProgramFunction cf ++ + [text ""] ++ + -- Add parser rules + generateAllRules cf (ruleGroups cf) ++ + [text ""] + -- inspect rules, only for debugging + -- ++ inspectListRulesByCategory (ruleGroups cf) + ] ++ + -- End the WorkflowParser object + endWorkflowClass ++ + -- Add the WorkflowCompiler object + addWorkflowCompiler + +-- | Generate all parser rules from rule groups +generateAllRules :: CF -> [(Cat, [Rule])] -> [Doc] +generateAllRules cf catsAndRules = + let + -- filter categories with all the rules isNilCons + rulesToProcess = filter (not . all isNilCons . snd) catsAndRules + -- Generate regular rules + mainRules = map text $ concatMap (generateRuleGroup cf) (fixCoercions rulesToProcess) + + -- existe isUsedCat seguramente nos simplifique esto + -- existe specialCats seguramente nos simplifique esto + -- existe sigLookup wtf con esto + -- Generate special rules for integer and string if needed + integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" "pInteger" catsAndRules + doubleRule = generateSpecialRule catDouble "double" "DOUBLE" "toInt" "pDouble" catsAndRules + stringRule = generateSpecialRule catString "string" "STRING" "toString" "pString" catsAndRules + charRule = generateSpecialRule catChar "char" "CHAR" "toString" "pChar" catsAndRules + identRule = generateSpecialRule catIdent "ident" "IDENT" "toString" "pIdent" catsAndRules + + in mainRules ++ integerRule ++ stringRule ++ charRule ++ identRule ++ doubleRule + + +getRuleFunName :: Cat -> Rule -> String +getRuleFunName cat (Rule fnam _ _ _) = firstLowerCase $ prefixIfNeeded $ funName fnam + where + prefixIfNeeded name + | map toLower name == map toLower (safeCatName cat) || isJust (scalaReserverWords (map toLower name)) = "internal_" ++ map toLower name + | otherwise = name + +getRulesFunsName :: [Rule] -> Cat -> [String] +getRulesFunsName rules cat = nub $ map (getRuleFunName cat) rules + +-- | Generate a rule group (definition for a single category) +generateRuleGroup :: CF -> (Cat, [Rule]) -> [String] +generateRuleGroup cf (cat, rules) = + ["def " ++ catName ++ ": Parser[WorkflowAST] = positioned {"] ++ + [replicate 4 ' ' ++ intercalate " | " subFuns] ++ + ["}"] ++ + concatMap (generateSingleRuleBody cf cat) nonCoercionRules + where + subFuns = getRulesFunsName nonCoercionRules cat + catName = safeCatName cat + nonCoercionRules = reverse $ map snd $ sortRulesByPrecedence $ filter (not . isCoercion) rules + +generateSingleRuleBody :: CF -> Cat -> Rule -> [String] +generateSingleRuleBody cf cat rule = [generateRuleDefinition cf cat rule ++ generateRuleTransformation rule] + +generateRuleDefinition :: CF -> Cat -> Rule -> String +generateRuleDefinition cf cat rule = + "def " ++ getRuleFunName cat rule ++ ": Parser[WorkflowAST] =" + +++ intercalate " ~ " (generateRuleForm cf rule) + + +getBaseCatOfRecursiveRule :: Rule -> [String] +getBaseCatOfRecursiveRule rule@(Rule _ _ rhs _) = + nub $ concatMap extractBaseCat rhs + where + -- Extrae las categorías base de un elemento del RHS + extractBaseCat :: Either Cat String -> [String] + extractBaseCat (Left cat) + | isBaseCat cat = [getRuleFunName cat rule] + | otherwise = [] + extractBaseCat (Right s) = if isSymbol s then [] else [getRuleFunName (strToCat s) rule] + + isBaseCat :: Cat -> Bool + isBaseCat cat = isSpecialCat $ normCat cat + + +getBasesOfRecursiveRule :: CF -> Rule -> String +getBasesOfRecursiveRule cf rule = + let + allRulesForCat = rulesForNormalizedCat cf (normCat $ wpThing $ valRCat rule) + baseTypes = nub $ concatMap getBaseCatOfRecursiveRule allRulesForCat + in + case baseTypes of + [] -> "" + x:[] -> x + x:xs -> "(" ++ intercalate " | " (x:xs) ++ ")" + + +generateRuleForm :: CF -> Rule -> [String] +generateRuleForm cf rule@(Rule _ _ rhs _) = + if isRecursiveRule rule + then snd $ generateRecursiveRuleForm rhs False + else case rhs of + [Right s] -> [fromMaybe (paramS s) (mapManualTypeMap (paramS s)) ++ "()"] + _ -> map (addRuleForListCat cf rhs) (rhsToSafeStrings rhs) + where + generateRecursiveRuleForm :: [Either Cat String] -> Bool -> (Bool, [String]) + generateRecursiveRuleForm [] added = (added, []) + generateRecursiveRuleForm (r:rest) added = + case r of + Left cat -> + if isCoercionCategory cat && not added + then + let (_, strsRest) = generateRecursiveRuleForm rest True + in (True, getBasesOfRecursiveRule cf rule : strsRest) + else + let (addedRest, strsRest) = generateRecursiveRuleForm rest added + in (addedRest, rhsToSafeStrings [r] ++ strsRest) + Right _ -> + let (addedRest, strsRest) = generateRecursiveRuleForm rest added + in (addedRest, rhsToSafeStrings [r] ++ strsRest) + + paramS s = fromMaybe (map toUpper s) (symbolToName s) + +generateRuleTransformation :: Rule -> String +generateRuleTransformation rule = + if isRuleOnlySpecials rule + then case rhsRule rule of + [] -> "EMPTY() ^^ {" ++ generateCaseStatement rule ++ "}" + _ -> "" + else " ^^ { " ++ generateCaseStatement rule ++ " }" + +isRecursiveRule :: Rule -> Bool +isRecursiveRule (Rule _ cat rhs _) = + any (sameCat (wpThing cat)) (getRHSCats rhs) + +addRuleForListCat :: CF -> [Either Cat String] -> String -> String +addRuleForListCat cf rhs s = + case find (\cat -> isListCat cat && safeCatName cat == s) (getRHSCats rhs) of + Just c -> case (getTerminalFromListRules $ rulesForCat cf c) of + Just term -> "repsep(" ++ firstLowerCase s ++ ", " ++ term ++ "())" + _ -> "rep(" ++ firstLowerCase s ++ ")" + Nothing -> s + +isRuleOnlySpecials :: Rule -> Bool +isRuleOnlySpecials (Rule _ _ rhs _) = + all isSpecialCat (getRHSCats rhs) && all isLeft rhs + +-- -- | Generate a case statement for a rule +generateCaseStatement :: Rule -> String +generateCaseStatement rule@(Rule fun c rhs _) + | isCoercion fun = "" + | null vars = "_ => " ++ fnm ++ "()" + | otherwise = + "case (" ++ intercalate " ~ " params ++ ") => " + ++ fnm ++ "(" ++ intercalate ", " vars ++ ")" + where + getBaseType :: Cat -> String + getBaseType cat + | isListCat cat = "List[WorkflowAST]" + | otherwise = "WorkflowAST" + + fnm = fromMaybe (getRuleFunName (wpThing c) rule) $ listToMaybe $ getASTNames [rule] + + -- generate a list of with (rule, finalName) + zipped = zip rhs (disambiguateNames $ map getSymb rhs) + + getSymb (Left cat) = [toLower $ safeHeadChar $ safeCatName cat] + getSymb (Right str) = [toLower $ safeHeadChar $ fromMaybe "_" $ symbolToName str] + + params = map (wildCardSymbs.snd) zipped + + -- For Left cat we assign types, for Right str (tokens), we ignore (or use "_") + vars = [ p ++ ".asInstanceOf[" ++ getBaseType cat ++ "]" + | (Left cat, p) <- zipped + ] + +-- | Generate a special rule for tokens like Integer or String +generateSpecialRule :: TokenCat -> String -> String -> String -> String -> [(Cat, [Rule])] -> [Doc] +generateSpecialRule tokenCat ruleName tokenName conversion pTypeName catsAndRules = + case find (\(_, rules) -> any (hasTokenCat tokenCat) rules) catsAndRules of + Just (_, rules) -> case find (hasTokenCat tokenCat) rules of + Just _ -> + let + conversionPart = if null conversion then "" else "." ++ conversion + in + [ text $ "def " ++ ruleName ++ ": Parser[" ++ pTypeName ++ "] = {" + , nest 4 $ text $ "accept(\"" ++ ruleName ++ "\", { case " ++ tokenName ++ "(i) => " ++ pTypeName ++ "(i" ++ conversionPart ++ ") })" + , text "}" + ] + Nothing -> [] + Nothing -> [] + +-- | Generate the imports section +imports :: String -> [Doc] +imports name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser", + text "", + text $ "import " ++ name ++ ".workflowtoken." ++ name ++ "Lex._", + text "", + text "import scala.util.parsing.combinator.Parsers", + text "", + text "import scala.util.parsing.input.{NoPosition, Position, Reader}" + ] + +-- | Generate the extra classes section +addExtraClasses :: [Doc] +addExtraClasses = [ + text "override type Elem = WorkflowToken", + text "", + text "class WorkflowTokenReader(tokens: Seq[WorkflowToken]) extends Reader[WorkflowToken] {", + nest 4 $ text "override def first: WorkflowToken = tokens.head", + nest 4 $ text "override def atEnd: Boolean = tokens.isEmpty", + nest 4 $ text "override def pos: Position = tokens.headOption.map(_.pos).getOrElse(NoPosition)", + nest 4 $ text "override def rest: Reader[WorkflowToken] = new WorkflowTokenReader(tokens.tail)", + text "}" + ] + +-- | Generate the WorkflowCompiler object +addWorkflowCompiler :: [Doc] +addWorkflowCompiler = [ + text "", + text "object WorkflowCompiler {", + nest 4 $ text "def apply(code: String): Either[WorkflowCompilationError, WorkflowAST] = {", + nest 8 $ text "for {", + nest 12 $ text "tokens <- WorkflowLexer(code).right", + nest 12 $ text "ast <- WorkflowParser(tokens).right", + nest 8 $ text "} yield ast", + nest 4 $ text "}", + text "}" + ] + +-- | Start the WorkflowParser object +initWorkflowClass :: [Doc] +initWorkflowClass = [ + text "", + text "object WorkflowParser extends Parsers {" + ] + +-- | End the WorkflowParser object +endWorkflowClass :: [Doc] +endWorkflowClass = [ + text "}" + ] + +-- | Generate the apply function +getApplyFunction :: [Doc] +getApplyFunction = [ + text "", + text "def apply(tokens: Seq[WorkflowToken]): Either[WorkflowParserError, WorkflowAST] = {", + nest 4 $ text "val reader = new WorkflowTokenReader(tokens)", + nest 4 $ text "program(reader) match {", + nest 8 $ text "case NoSuccess(msg, next) => Left(WorkflowParserError(Location(next.pos.line, next.pos.column), msg))", + nest 8 $ text "case Success(result, next) => Right(result)", + nest 4 $ text "}", + text "}" + ] + +-- | Generate the program function +getProgramFunction :: CF -> [Doc] +getProgramFunction cf = [ + text "", + text "def program: Parser[WorkflowAST] = positioned {", + nest 4 $ text $ "phrase(" ++ fromMaybe entryPoint (scalaReserverWords entryPoint) ++ ")", + text "}" + ] + where + entryPoint = case listToMaybe (map normCat $ DF.toList $ allEntryPoints cf) of + Just ep -> firstLowerCase $ show ep + Nothing -> error "No entry points found in the context-free grammar." diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs new file mode 100644 index 00000000..a3616fc6 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -0,0 +1,110 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Lextract syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura + + Description : This module generates the Scala Lextract Syntax + tree classes. It generates both a Header file + and an Implementation file + + Author : Juan Pablo Poittevin, Guillermo Poladura + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST, getASTNames) where + +import Prelude hiding ((<>)) + +import BNFC.CF + ( ruleGroups, + CF, + Cat(ListCat), + IsFun(funName), + Rul(Rule), + Rule, TokenCat, literals, wpThing ) +import BNFC.PrettyPrint ( text, vcat, Doc ) +import BNFC.Options ( SharedOptions(lang, Options) ) +import BNFC.Backend.Scala.Utils (generateVarsList, isLeft, baseTypeToScalaType, wrapList, scalaReserverWords, isCoercionRule) +import Data.List (intercalate) +import BNFC.Utils ((+++)) +import Data.Maybe (fromMaybe) +import GHC.OldList (nub) + +-- | Main function that generates the AST code +cf2ScalaParserAST :: SharedOptions -> CF -> Doc +cf2ScalaParserAST Options{ lang } cf = vcat $ + -- Generate headers + headers lang ++ + -- Add an empty line after the trait definition + [text ""] ++ + -- Generate case class definitions + generateRuleDefs rules ++ + generateLiteralsDefs allLiterals + where + rules = ruleGroups cf + allLiterals = nub $ literals cf + +getASTNames :: [Rule] -> [String] +getASTNames rules = rulesNames + where + rulesNames = map (\(Rule fun _ _ _) -> fromMaybe (funName fun) $ scalaReserverWords $ funName fun) $ + filter (\(Rule _ cat _ _) -> not $ case wpThing cat of ListCat _ -> True; _ -> False) filteredRules + filteredRules = filter (not . isCoercionRule) $ rules + +-- | Generate all case class definitions +generateRuleDefs :: [(Cat, [Rule])] -> [Doc] +generateRuleDefs [] = [] +generateRuleDefs rules = concatMap processRuleGroup rules + +-- | Process a single rule group and generate case classes +processRuleGroup :: (Cat, [Rule]) -> [Doc] +processRuleGroup (_, rules) = map createCaseClass (filter (not . isCoercionRule) rules) + + +-- | Generate the class params +generateClassParams :: Rule -> String +generateClassParams (Rule _ _ rhs _) = + intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) + where + -- Function to format parameters based on whether they are Cat or String + catParams :: Either Cat String -> String + catParams (Left c) = formatParamType c + catParams (Right _) = "WorkflowAST" + + filteredRhs = filter isLeft rhs + +-- | Format a parameter with its type +formatParamType :: Cat -> String +formatParamType cat = wrapList cat "WorkflowAST" + +-- | Create a single case class definition +createCaseClass :: Rule -> Doc +createCaseClass rule@(Rule fun cat _ _) + | ListCat _ <- (wpThing cat) = mempty + | otherwise = text $ formatCaseClass className params + where + className = fromMaybe (funName fun) $ scalaReserverWords $ funName fun + params = generateClassParams rule + +-- | Helper function to format the case class definition +formatCaseClass :: String -> String -> String +formatCaseClass className params = "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" + +-- | Generate the Scala types for basic LBNF types +generateLiteralsDefs :: [TokenCat] -> [Doc] +generateLiteralsDefs tokens = map ( + \token -> text $ formatCaseClass ("p" ++ token) ("var1: " ++ fromMaybe "String" (baseTypeToScalaType token)) + ) tokens + +-- | Generate the header part of the file +headers :: String -> [Doc] +headers name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser", + text "", + text "import scala.util.parsing.input.Positional", + text "", + text "sealed trait WorkflowAST extends Positional" + ] \ No newline at end of file diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs new file mode 100644 index 00000000..d123f553 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -0,0 +1,300 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Utils + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura + + Description : This module is a helper for Scala backend + Author : Juan Pablo Poittevin, Guillermo Poladura + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.Utils ( + generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, + wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, + getSymbFromName, catToStrings, getFunName, hasTokenCat, isListCat, disambiguateTuples, getListSeparator, + wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, isCoercionRule, isCoercionCategory, isSymbol, + getTerminalFromListRules +) where +import BNFC.CF +import Data.Map +import BNFC.Backend.Common.NamedVariables (firstLowerCase) +import System.Directory.Internal.Prelude (fromMaybe) +import BNFC.Utils (symbolToName) +import Data.Char (toUpper) +import Data.List (isSuffixOf, find) +import Data.Maybe (isJust) + + + +generateVarsList :: [a] -> [String] +generateVarsList xs = zipWith (\_ (i :: Int) -> "var" ++ show i) xs [1..] + +unwrapListCat :: Cat -> TokenCat +unwrapListCat (TokenCat c) = c +unwrapListCat (ListCat lc) = unwrapListCat lc +unwrapListCat (CoercCat cc _) = cc +unwrapListCat (Cat s) = s + +wrapList :: Cat -> String -> [Char] +wrapList cat s = case cat of + ListCat _ -> "List[" ++ s ++ "]" + _ -> s + +-- | Safe version of tail that returns an empty list for an empty list +safeTail :: [a] -> [a] +safeTail [] = [] +safeTail (_:xs) = xs + +-- | Safe version of head that returns an empty list for an empty list +safeHeadString :: [String] -> String +safeHeadString [] = "" +safeHeadString (x:_) = x + +-- | Safe version of head that returns an empty list for an empty list +safeHeadChar :: [Char] -> Char +safeHeadChar [] = ' ' +safeHeadChar (x:_) = x + +safeCatName :: Cat -> String +safeCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScalaCatName) + where + notSafeScalaCatName = case cat of + ListCat innerCat -> firstUpperCase (safeCatName innerCat) + _ -> firstLowerCase $ show cat + + +firstUpperCase :: String -> String +firstUpperCase [] = [] +firstUpperCase (x:xs) = toUpper x : xs + +-- | Get a symbol name from a string +getSymbFromName :: String -> String +getSymbFromName s = + case symbolToName s of + Just s -> s ++ "()" + _ -> s + +wildCardSymbs :: String -> String +wildCardSymbs s = + case symbolToName s of + Just _ -> "_" + _ -> s + +-- | Convert a category or string to its string representation +safeCatToStrings :: [Either Cat String] -> [String] +safeCatToStrings = Prelude.map (\case + Left c -> safeCatName c + Right s -> s + ) + + +-- | Convert a category or string to its string representation +catToStrings :: [Either Cat String] -> [String] +catToStrings = Prelude.map (\case + Left c -> show c + Right s -> s + ) + +-- | Gived a list of rhs, return the list vars in safe strings +-- | so for the EAdd it will return: ["exp", "PLUS()", "exp"] +rhsToSafeStrings :: [Either Cat String] -> [String] +rhsToSafeStrings = Prelude.map (\case + Left c -> safeCatName $ normCat c + Right s -> case symbolToName s of + Just s' -> s' ++ "()" + Nothing -> (Prelude.map toUpper s) ++ "()" + ) +-- | Get all the Left Cat of the rhs of a rule +getRHSCats :: [Either Cat String] -> [Cat] +getRHSCats rhs = [c | Left c <- rhs] + +getTerminalFromListRules :: [Rule] -> Maybe String +getTerminalFromListRules rules = (find (not . (== "")) terminals) + where + terminals = Prelude.concatMap (\rule -> + Prelude.map (\rhs -> case rhs of + Right s -> fromMaybe "" $ symbolToName s + _ -> "" + ) (rhsRule rule) + + ) rules + +-- | Get the function name for a rule +getFunName :: Rule -> String +getFunName (Rule fun _ _ _) = wpThing fun + +-- | Check if a rule contains a specific token category +hasTokenCat :: TokenCat -> Rule -> Bool +hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] + +isSpecialCat :: Cat -> Bool +isSpecialCat (TokenCat cat) = cat `elem` specialCatsP +isSpecialCat _ = False + +isSymbol :: String -> Bool +isSymbol = isJust . symbolToName + + +isLeft :: Either a b -> Bool +isLeft (Left _) = True +isLeft _ = False + +isListCat :: Cat -> Bool +isListCat (ListCat _) = True +isListCat _ = False + +-- | Make variable names unique by adding numbers to duplicates +disambiguateNames :: [String] -> [String] +disambiguateNames = disamb [] + where + disamb ns1 (n:ns2) + | n == "_" = n : disamb (n:ns1) ns2 + | "()" `isSuffixOf` n = n : disamb (n:ns1) ns2 + | n `elem` (ns1 ++ ns2) = let i = length (Prelude.filter (==n) ns1) + 1 + in (n ++ show i) : disamb (n:ns1) ns2 + | otherwise = n : disamb (n:ns1) ns2 + disamb _ [] = [] + +disambiguateTuples :: [(String, String)] -> [(String, String)] +disambiguateTuples tuples = + let (names, values) = unzip tuples + newNames = disambiguateNames names + in zip newNames values + +-- | applies a function only to the first occurrence of a repetead element of the list +-- | example: applyToRepeated (++ "1") ["a", "b", "a", "c"] +-- | ["a", "b", "a1", "c"] +applyToRepeated :: Eq a => (a -> a) -> [a] -> [a] +applyToRepeated f xs = apply [] xs + where + apply _ [] = [] + apply seen (y:ys) + | y `elem` seen = f y : ys + | otherwise = y : apply (y:seen) ys + +-- | Check if a rule is a coercion rule +isCoercionRule :: Rule -> Bool +isCoercionRule (Rule fun _ _ _) = isCoercion fun + +isCoercionCategory :: Cat -> Bool +isCoercionCategory (CoercCat _ _) = True +isCoercionCategory _ = False + +getListSeparator :: CF -> Rule -> Maybe Symbol +getListSeparator cf (Rule _ cat _ _) = + case normCat (wpThing cat) of + ListCat baseCat -> + let (_, layoutKeywords, _) = layoutPragmas cf + in Prelude.lookup (show (normCat baseCat)) layoutKeywords >>= Just . listSep + _ -> Nothing + + + -- | Convert base LBNF type to Scala type +mapManualTypeMap :: String -> Maybe String +mapManualTypeMap = (`Data.Map.lookup` manualTypeMap) + +-- | Map from base LBNF Type to scala Type +manualTypeMap :: Map String String +manualTypeMap = fromList manualTypesMap + +-- | Scala types mapping +manualTypesMap :: [(String, String)] +manualTypesMap = + [ ("INTEGER" , "PINTEGER") + , ("STRING" , "PSTRING") + , ("DOUBLE" , "PDOUBLE") + , ("CHAR" , "PCHAR") + , ("IDENT" , "PIDENT") + ] + + +-- | There are some words used in the Scala parser combinator code generated by BNFC +scalaBNFCReserverWords :: String -> Maybe String +scalaBNFCReserverWords = (`Data.Map.lookup` baseScalaBNFCReserverWordsMap) + +-- | Map from base LBNF Type to scala Type +baseScalaBNFCReserverWordsMap :: Map String String +baseScalaBNFCReserverWordsMap = fromList scalaBNFCReserverWordsMap + +-- | Scala types mapping +scalaBNFCReserverWordsMap :: [(String, String)] +scalaBNFCReserverWordsMap = + [ ("string" , "pstring") + , ("integer" , "pinteger") + , ("double" , "pdouble") + , ("char" , "pchar") + , ("program" , "internal_program") + , ("apply" , "internal_apply") + ] + + +-- | Convert base LBNF type to Scala type +baseTypeToScalaType :: String -> Maybe String +baseTypeToScalaType = (`Data.Map.lookup` baseTypeMap) + +-- | Map from base LBNF Type to scala Type +baseTypeMap :: Map String String +baseTypeMap = fromList scalaTypesMap + +-- | Scala types mapping +scalaTypesMap :: [(String, String)] +scalaTypesMap = + [ ("Integer" , "Int") + , ("String" , "String") + , ("Double" , "Double") + , ("Ident" , "String") + ] + +scalaReserverWords :: String -> Maybe String +scalaReserverWords = (`Data.Map.lookup` reserverWordsMap) + +-- | Map from base LBNF Type to scala Type +reserverWordsMap :: Map String String +reserverWordsMap = fromList wordsMap + +-- | Scala reserverd words mapping +wordsMap :: [(String, String)] +wordsMap = + [ + ("def" , "pdef") + , ("val" , "pval") + , ("var" , "pvar") + , ("class", "pclass") + , ("type", "ptype") + , ("object", "pobject") + , ("trait", "ptrait") + , ("extends", "pextends") + , ("with", "pwith") + , ("case", "pcase") + , ("sealed", "psealed") + , ("abstract", "pabstract") + , ("final", "pfinal") + , ("override", "poverride") + , ("implicit", "pimplicit") + , ("lazy", "plazy") + , ("private", "pprivate") + , ("protected", "pprotected") + , ("public", "ppublic") + , ("import", "pimport") + , ("package", "ppackage") + , ("return", "preturn") + , ("if", "pif") + , ("else", "pelse") + , ("while", "pwhile") + , ("for", "pfor") + , ("do", "pdo") + , ("match", "pmatch") + , ("try", "pttry") + , ("catch", "pcatch") + , ("finally", "pfinally") + , ("throw", "pthrow") + , ("true", "ptrue") + , ("false", "pfalse") + , ("apply", "papply") + , ("Int", "pInt") + , ("String", "pString") + , ("program", "pprogram") + ] diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs index ac5fdbf6..573e6049 100644 --- a/source/src/BNFC/Options.hs +++ b/source/src/BNFC/Options.hs @@ -64,7 +64,7 @@ data Target = TargetC | TargetCpp | TargetCppNoStl | TargetHaskell | TargetHaskellGadt | TargetLatex | TargetJava | TargetOCaml | TargetPygments | TargetTreeSitter - | TargetCheck + | TargetCheck | TargetScala deriving (Eq, Bounded, Enum, Ord) -- | List of Haskell target. @@ -83,6 +83,7 @@ instance Show Target where show TargetPygments = "Pygments" show TargetTreeSitter = "Tree-sitter" show TargetCheck = "Check LBNF file" + show TargetScala = "Scala" -- | Which version of Alex is targeted? data AlexVersion = Alex3 @@ -312,6 +313,8 @@ targetOptions = "Output OCaml code for use with ocamllex and menhir (short for --ocaml --menhir)" , Option "" ["pygments"] (NoArg (\o -> o {target = TargetPygments})) "Output a Python lexer for Pygments" + , Option "" ["scala"] (NoArg (\o -> o {target = TargetScala})) + "Output a Scala lexer" , Option "" ["tree-sitter"] (NoArg (\o -> o {target = TargetTreeSitter})) "Output grammar.js file for use with tree-sitter" , Option "" ["check"] (NoArg (\ o -> o{target = TargetCheck })) @@ -529,6 +532,7 @@ instance Maintained Target where TargetJava -> True TargetOCaml -> True TargetPygments -> True + TargetScala -> True TargetTreeSitter -> True TargetCheck -> True @@ -645,6 +649,7 @@ translateOldOptions = mapM $ \ o -> do , ("-csharp" , "--csharp") , ("-ocaml" , "--ocaml") , ("-haskell" , "--haskell") + , ("-scala" , "--scala") , ("-prof" , "--profile") , ("-gadt" , "--haskell-gadt") , ("-alex1" , "--alex1")