module DDC.Core.Lexer
( module DDC.Core.Lexer.Tokens
, module DDC.Core.Lexer.Names
, lexModuleWithOffside
, lexExp)
where
import DDC.Core.Lexer.Offside
import DDC.Core.Lexer.Comments
import DDC.Core.Lexer.Names
import DDC.Core.Lexer.Tokens
import DDC.Data.SourcePos
import DDC.Data.Token
import Data.Char
import Data.List
lexModuleWithOffside
:: FilePath
-> Int
-> String
-> [Token (Tok String)]
lexModuleWithOffside sourceName lineStart str
=
applyOffside [] []
$ addStarts
$ dropComments
$ lexString sourceName lineStart str
lexExp :: FilePath
-> Int
-> String
-> [Token (Tok String)]
lexExp sourceName lineStart str
=
dropNewLines
$ dropComments
$ lexString sourceName lineStart str
lexString :: String -> Int -> String -> [Token (Tok String)]
lexString sourceName lineStart str
= lexWord lineStart 1 str
where
lexWord :: Int -> Int -> String -> [Token (Tok String)]
lexWord line column w
= let tok t = Token t (SourcePos sourceName line column)
tokM = tok . KM
tokA = tok . KA
tokN = tok . KN
lexUpto pat rest
= case dropWhile (not . isPrefixOf pat) (tails rest) of
(x:_) -> x
_ -> []
lexMore n rest
= lexWord line (column + n) rest
in case w of
[] -> []
' ' : w' -> lexMore 1 w'
'\t' : w' -> lexMore 8 w'
c : cs
| isDigit c
, (body, rest) <- span isLitBody cs
-> tokN (KLit (c:body)) : lexMore (length (c:body)) rest
'-' : c : cs
| isDigit c
, (body, rest) <- span isLitBody cs
-> tokN (KLit ('-':c:body)) : lexMore (length (c:body)) rest
'{' : '-' : w'
-> tokM KCommentBlockStart : lexMore 2 (lexUpto "-}" w')
'-' : '}' : w'
-> tokM KCommentBlockEnd : lexMore 2 w'
'-' : '-' : w'
-> let (_junk, w'') = span (/= '\n') w'
in tokM KCommentLineStart : lexMore 2 w''
'\n' : w' -> tokM KNewLine : lexWord (line + 1) 1 w'
'(' : c : cs
| isOpStart c
, (body, ')' : w') <- span isOpBody cs
-> tokA (KOpVar (c : body)) : lexMore (2 + length (c : body)) w'
'(' : ')' : w' -> tokA KDaConUnit : lexMore 2 w'
'[' : ':' : w' -> tokA KSquareColonBra : lexMore 2 w'
':' : ']' : w' -> tokA KSquareColonKet : lexMore 2 w'
'{' : ':' : w' -> tokA KBraceColonBra : lexMore 2 w'
':' : '}' : w' -> tokA KBraceColonKet : lexMore 2 w'
'~' : '>' : w' -> tokA KArrowTilde : lexMore 2 w'
'-' : '>' : w' -> tokA KArrowDash : lexMore 2 w'
'<' : '-' : w' -> tokA KArrowDashLeft : lexMore 2 w'
'=' : '>' : w' -> tokA KArrowEquals : lexMore 2 w'
'/' : '\\' : w' -> tokA KBigLambda : lexMore 2 w'
'^' : cs
| (ds, rest) <- span isDigit cs
, length ds >= 1
-> tokA (KIndex (read ds)) : lexMore (1 + length ds) rest
'(' : w' -> tokA KRoundBra : lexMore 1 w'
')' : w' -> tokA KRoundKet : lexMore 1 w'
'[' : w' -> tokA KSquareBra : lexMore 1 w'
']' : w' -> tokA KSquareKet : lexMore 1 w'
'{' : w' -> tokA KBraceBra : lexMore 1 w'
'}' : w' -> tokA KBraceKet : lexMore 1 w'
'.' : w' -> tokA KDot : lexMore 1 w'
',' : w' -> tokA KComma : lexMore 1 w'
';' : w' -> tokA KSemiColon : lexMore 1 w'
'_' : w' -> tokA KUnderscore : lexMore 1 w'
'\\' : w' -> tokA KBackSlash : lexMore 1 w'
c : cs
| isOpStart c
, (body, rest) <- span isOpBody cs
-> tokA (KOp (c : body)) : lexMore (length (c : body)) rest
'^' : w' -> tokA KHat : lexMore 1 w'
name
| Just w' <- stripPrefix "Pure" name
-> tokA KBotEffect : lexMore 2 w'
| Just w' <- stripPrefix "Empty" name
-> tokA KBotClosure : lexMore 2 w'
c : cs
| isConStart c
, (body, rest) <- span isConBody cs
, (body', rest') <- case rest of
'\'' : rest' -> (body ++ "'", rest')
'#' : rest' -> (body ++ "#", rest')
_ -> (body, rest)
-> let readNamedCon s
| Just socon <- readSoConBuiltin s
= tokA (KSoConBuiltin socon) : lexMore (length s) rest'
| Just kicon <- readKiConBuiltin s
= tokA (KKiConBuiltin kicon) : lexMore (length s) rest'
| Just twcon <- readTwConBuiltin s
= tokA (KTwConBuiltin twcon) : lexMore (length s) rest'
| Just tccon <- readTcConBuiltin s
= tokA (KTcConBuiltin tccon) : lexMore (length s) rest'
| Just con <- readCon s
= tokN (KCon con) : lexMore (length s) rest'
| otherwise
= [tok (KJunk [c])]
in readNamedCon (c : body')
c : cs
| isVarStart c
, (body, rest) <- span isVarBody cs
, (body', rest') <- case rest of
'#' : rest' -> (body ++ "#", rest')
_ -> (body, rest)
-> let readNamedVar s
| Just t <- lookup s keywords
= tok t : lexMore (length s) rest'
| Just wc <- readWbConBuiltin s
= tokA (KWbConBuiltin wc) : lexMore (length s) rest'
| Just v <- readVar s
= tokN (KVar v) : lexMore (length s) rest'
| otherwise
= [tok (KJunk [c])]
in readNamedVar (c : body')
c : cs -> (tok $ KJunk [c]) : lexMore 1 cs