{-# OPTIONS -w #-}
module Lambdabot.Plugin.Haskell.Free.Parse where
import Control.Applicative
import Control.Monad
import Control.Monad.Fail (MonadFail)
import qualified Control.Monad.Fail
data Token
= QVarId String
| QConId String
| QVarSym String
| QConSym String
| OpenParen
| CloseParen
| Comma
| Semicolon
| OpenBracket
| CloseBracket
| BackQuote
| OpenBrace
| CloseBrace
| OpDotDot
| OpColon
| OpColonColon
| OpEquals
| OpBackslash
| OpPipe
| OpBackArrow
| OpArrow
| OpAt
| OpTilde
| OpImplies
| IdCase
| IdClass
| IdData
| IdDefault
| IdDeriving
| IdDo
| IdElse
| IdForall
| IdIf
| IdImport
| IdIn
| IdInfix
| IdInfixl
| IdInfixr
| IdInstance
| IdLet
| IdModule
| IdNewtype
| IdOf
| IdThen
| IdType
| IdWhere
| IdUscore
| TokError String
deriving (Show,Eq,Ord)
data ParseResult a
= ParseSuccess a [Token]
| ParseError String
deriving (Show)
newtype ParseS a = ParseS { parse :: [Token] -> ParseResult a }
instance Functor ParseS where
fmap = liftM
instance Applicative ParseS where
pure = return
(<*>) = ap
instance Monad ParseS where
return x = ParseS (\ts -> ParseSuccess x ts)
m >>= k = ParseS (\ts -> case parse m ts of
ParseSuccess x ts' -> parse (k x) ts'
ParseError s -> ParseError s)
instance MonadFail ParseS where
fail str = ParseS (\_ -> ParseError str)
instance Alternative ParseS where
empty = mzero
(<|>) = mplus
instance MonadPlus ParseS where
mzero = ParseS (\ts -> ParseError "parse error")
mplus m1 m2
= ParseS (\ts -> case parse m1 ts of
res@(ParseSuccess _ _) -> res
ParseError _ -> parse m2 ts)
peekToken :: ParseS (Maybe Token)
peekToken = ParseS (\ts -> case ts of
[] -> ParseSuccess Nothing []
(t':_) -> ParseSuccess (Just t') ts)
getToken :: ParseS (Maybe Token)
getToken = ParseS (\ts -> case ts of
[] -> ParseSuccess Nothing []
(t:ts) -> ParseSuccess (Just t) ts)
match :: Token -> ParseS ()
match m
= do
mt <- getToken
case mt of
Just t | t == m -> return ()
_ -> fail ("Expected " ++ show m)
ascSymbol = ['!','#','$','%','&','*','+','.','/','<','=','>','?','@','\\',
'^','|','-','~']
lexer :: String -> [Token]
lexer []
= []
lexer (' ':cs)
= lexer cs
lexer ('\t':cs)
= lexer cs
lexer ('\f':cs)
= lexer cs
lexer ('\r':cs)
= lexer cs
lexer ('\n':cs)
= lexer cs
lexer ('\v':cs)
= lexer cs
lexer ('-':'-':cs)
= lexerLineComment cs
where
lexerLineComment ('\r':'\n':cs) = lexer cs
lexerLineComment ('\r':cs) = lexer cs
lexerLineComment ('\n':cs) = lexer cs
lexerLineComment ('\f':cs) = lexer cs
lexerLineComment (c:cs) = lexerLineComment cs
lexerLineComment [] = []
lexer ('{':'-':cs)
= lexerComment lexer cs
where
lexerComment k ('{':'-':cs) = lexerComment (lexerComment k) cs
lexerComment k ('-':'}':cs) = k cs
lexerComment k (_:cs) = lexerComment k cs
lexerComment k [] = [TokError "Unterminated comment"]
lexer ('(':cs)
= OpenParen : lexer cs
lexer (')':cs)
= CloseParen : lexer cs
lexer (',':cs)
= Comma : lexer cs
lexer ('[':cs)
= OpenBracket : lexer cs
lexer (']':cs)
= CloseBracket : lexer cs
lexer (c@':':cs)
= lexerConSym [c] cs
where
lexerConSym con (c:cs)
| c == ':'
|| c `elem` ascSymbol
= lexerConSym (c:con) cs
lexerConSym con cs
= case reverse con of
":" -> OpColon : lexer cs
"::" -> OpColonColon : lexer cs
con -> QConSym con : lexer cs
lexer (c:cs)
| c `elem` ['A'..'Z']
= lexerConId [c] cs
| c `elem` ['a'..'z'] || c == '_'
= lexerVarId [c] cs
| c `elem` ascSymbol
= lexerVarSym [c] cs
| otherwise
= [TokError "Illegal char"]
where
lexerConId con (c:cs)
| c `elem` ['A'..'Z']
|| c `elem` ['a'..'z']
|| c `elem` ['0'..'9']
|| c == '\''
|| c == '_'
= lexerConId (c:con) cs
lexerConId con cs
= QConId (reverse con) : lexer cs
lexerVarId var (c:cs)
| c `elem` ['A'..'Z']
|| c `elem` ['a'..'z']
|| c `elem` ['0'..'9']
|| c == '\''
|| c == '_'
= lexerVarId (c:var) cs
lexerVarId var cs
= case reverse var of
"_" -> IdUscore : lexer cs
"case" -> IdCase : lexer cs
"class" -> IdClass : lexer cs
"data" -> IdData : lexer cs
"default" -> IdDefault : lexer cs
"deriving" -> IdDeriving : lexer cs
"do" -> IdDo : lexer cs
"else" -> IdElse : lexer cs
"forall" -> IdForall : lexer cs
"if" -> IdIf : lexer cs
"import" -> IdImport : lexer cs
"in" -> IdIn : lexer cs
"infix" -> IdInfix : lexer cs
"infixl" -> IdInfixl : lexer cs
"infixr" -> IdInfixr : lexer cs
"instance" -> IdInstance : lexer cs
"let" -> IdLet : lexer cs
"module" -> IdModule : lexer cs
"newtype" -> IdNewtype : lexer cs
"of" -> IdOf : lexer cs
"then" -> IdThen : lexer cs
"type" -> IdType : lexer cs
"where" -> IdWhere : lexer cs
v -> QVarId v : lexer cs
lexerVarSym var (c:cs)
| c == ':' || c `elem` ascSymbol
= lexerVarSym (c:var) cs
lexerVarSym var cs
= case reverse var of
".." -> OpDotDot : lexer cs
"=" -> OpEquals : lexer cs
"\\" -> OpBackslash : lexer cs
"|" -> OpPipe : lexer cs
"<-" -> OpBackArrow : lexer cs
"->" -> OpArrow : lexer cs
"@" -> OpAt : lexer cs
"~" -> OpTilde : lexer cs
"=>" -> OpImplies : lexer cs
var -> QVarSym var : lexer cs