module SMR.Source.Lexer
( lexTokens
, Located (..)
, Location(..))
where
import SMR.Source.Token
import SMR.Data.Located
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Char as Char
lexTokens :: Location -> [Char] -> ([Located Token], Location, [Char])
lexTokens lStart0 cs0
= case skipSpace lStart0 cs0 of
(lStart, [])
-> ( LL lStart lStart KEnd : []
, lStart, [])
(lStart, cs)
-> case lexToken lStart cs of
Nothing
-> ([], lStart, cs)
Just (k, cs')
| (ks, lStart', cs'') <- lexTokens (endOfLocated k) cs'
-> (k : ks, lStart', cs'')
lexToken :: Location -> [Char] -> Maybe (Located Token, [Char])
lexToken lStart xx
= case xx of
[]
-> Nothing
c : cs
| isCharPunc c
-> let lEnd = incCharOfLocation 1 lStart
tok = KPunc c
in Just (LL lStart lEnd tok, cs)
| Just (space, xx') <- takeSpace c cs
, Just (name, lEnd, csRest) <- lexName (incCharOfLocation 1 lStart) xx'
-> let tok = KName space name
in Just (LL lStart lEnd tok, csRest)
| Char.isDigit c
, Just (nat, lEnd, csRest) <- lexNat lStart (c : cs)
-> let tok = KNat nat
in Just (LL lStart lEnd tok, csRest)
| c == '\"'
, Just (tx, lEnd, csRest) <- lexText lStart cs
-> let tok = KText tx
in Just (LL lStart lEnd tok, csRest)
| otherwise
-> Nothing
lexName :: Location -> [Char] -> Maybe (Text, Location, [Char])
lexName lStart xx
= go lStart [] xx
where
go lStart' acc []
| not $ null acc
= let name = Text.pack $ reverse acc
in Just (name, lStart', [])
| otherwise
= Nothing
go lStart' acc (c : cs)
| isNameBodyChar c
= go (incCharOfLocation 1 lStart') (c : acc) cs
| otherwise
= let name = Text.pack $ reverse acc
in Just (name, lStart', c : cs)
lexNat :: Location -> [Char] -> Maybe (Integer, Location, [Char])
lexNat lStart xx
= go lStart [] xx
where
go lStart' acc []
| not $ null acc
, all Char.isDigit acc
, nat <- read $ reverse acc
= Just (nat, lStart', [])
go lStart' acc (c : cs)
| Char.isDigit c
= go (incCharOfLocation 1 lStart') (c : acc) cs
| all Char.isDigit acc
, not $ null acc
, nat <- read $ reverse acc
= Just (nat, lStart', c : cs)
go _ _ _
= Nothing
lexText :: Location -> [Char] -> Maybe (Text, Location, [Char])
lexText lStart xx
= go lStart [] xx
where
go _ _ []
= Nothing
go lStart' acc ('\"' : cs)
= Just (Text.pack $ reverse acc, lStart', cs)
go lStart' acc ('\\' : c : cs)
= let l' = incCharOfLocation 1 lStart'
in case c of
'\"' -> go l' (c : acc) cs
'\\' -> go l' (c : acc) cs
'b' -> go l' ('\b' : acc) cs
'f' -> go l' ('\f' : acc) cs
'n' -> go l' ('\n' : acc) cs
'r' -> go l' ('\r' : acc) cs
't' -> go l' ('\t' : acc) cs
_ -> Nothing
go lStart' acc (c : cs)
= let l' = incCharOfLocation 1 lStart'
in go l' (c : acc) cs
skipSpace :: Location -> [Char] -> (Location, [Char])
skipSpace lStart xx
= case xx of
[] -> (lStart, xx)
c : cs
| c == ' ' -> skipSpace (incCharOfLocation 1 lStart) cs
| c == '\n' -> skipSpace (incLineOfLocation 1 lStart) cs
| c == '\t' -> skipSpace (incCharOfLocation 8 lStart) cs
| c == '-'
, c2 : cs2 <- cs
, c2 == '-'
-> skipSpace lStart $ dropWhile (\x -> x /= '\n') cs2
| otherwise -> (lStart, xx)
takeSpace :: Char -> [Char] -> Maybe (Space, [Char])
takeSpace c cs
| Char.isLower c = Just (SVar, c : cs)
| c == '@' = Just (SMac, cs)
| c == '%' = Just (SSym, cs)
| c == '+' = Just (SSet, cs)
| c == '#'
, c' : cs' <- cs
, c' == '#'
= Just (SKey, cs')
| c == '#' = Just (SPrm, cs)
| otherwise = Nothing
isNameBodyChar :: Char -> Bool
isNameBodyChar c
= Char.isLower c
|| Char.isUpper c
|| Char.isDigit c
|| (c == '-' || c == '\'' || c == '_')
isCharPunc :: Char -> Bool
isCharPunc c
| c == '(' = True
| c == ')' = True
| c == '{' = True
| c == '}' = True
| c == '[' = True
| c == ']' = True
| c == '<' = True
| c == '>' = True
| c == '^' = True
| c == ',' = True
| c == ':' = True
| c == '\\' = True
| c == '.' = True
| c == ';' = True
| c == '=' = True
| c == '$' = True
| c == '!' = True
| c == '~' = True
| c == '?' = True
| otherwise = False