{-# LANGUAGE BangPatterns #-}
module Config.LexerUtils where
import Data.Char (GeneralCategory(..), generalCategory, digitToInt,
isAscii, isSpace, readLitChar, ord, isDigit)
import Data.Monoid ((<>))
import Data.Text (Text)
import Data.Word (Word8)
import Numeric (readInt)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LText
import Config.Tokens
type AlexInput = Located Text
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte (Located p cs)
= do (c,cs') <- Text.uncons cs
let !b = byteForChar c
!inp = Located (move p c) cs'
return (b, inp)
startPos :: Position
startPos = Position { posIndex = 0, posLine = 1, posColumn = 1 }
move :: Position -> Char -> Position
move (Position ix line column) c =
case c of
'\t' -> Position (ix + 1) line (((column + 7) `div` 8) * 8 + 1)
'\n' -> Position (ix + 1) (line + 1) 1
_ -> Position (ix + 1) line (column + 1)
eofAction :: Position -> LexerMode -> [Located Token]
eofAction eofPosn st =
case st of
_ | posColumn eofPosn /= 1 -> [Located eofPosn (Error UntermFile)]
InComment posn _ -> [Located posn (Error UntermComment)]
InCommentString posn _ -> [Located posn (Error UntermCommentString)]
InString posn _ -> [Located posn (Error UntermString)]
InNormal -> [Located eofPosn{posColumn=0} EOF]
errorAction :: AlexInput -> [Located Token]
errorAction inp = [fmap (Error . NoMatch . Text.head) inp]
data LexerMode
= InNormal
| InComment !Position !LexerMode
| InCommentString !Position !LexerMode
| InString !Position !Text
type Action =
Int ->
Located Text ->
LexerMode ->
(LexerMode, [Located Token])
token :: (Text -> Token) -> Action
token f len match st = (st, [fmap (f . Text.take len) match])
token_ :: Token -> Action
token_ = token . const
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode f _ match st = (f (locPosition match) st, [])
startString :: Action
startString _ (Located posn text) _ = (InString posn text, [])
endMode :: Action
endMode len (Located endPosn match) mode =
case mode of
InNormal -> (InNormal, [])
InCommentString _ st -> (st, [])
InComment _ st -> (st, [])
InString startPosn input ->
let n = posIndex endPosn - posIndex startPosn + len
badEscape = BadEscape (Text.pack "out of range")
in case reads (Text.unpack (Text.take n input)) of
[(s,"")] -> (InNormal, [Located startPosn (String (Text.pack s))])
_ -> (InNormal, [Located startPosn (Error badEscape)])
untermString :: Action
untermString _ _ = \(InString posn _) ->
(InNormal, [Located posn (Error UntermString)])
number ::
Int ->
Int ->
Text ->
Token
number prefixLen base str =
case readInt (fromIntegral base) (const True) digitToInt str2 of
[(n,"")] -> Number base (s*n)
_ -> error "number: Lexer failure"
where
str2 = drop prefixLen str1
(s,str1) = case Text.unpack str of
'-':rest -> (-1, rest)
rest -> ( 1, rest)
floating ::
Text ->
Token
floating str = Floating (s * read (x1++x2)) (x3-fromIntegral (length x2))
where
(s,str1) = case Text.unpack str of
'-':rest -> (-1, rest)
rest -> ( 1, rest)
(x1,str2) = span isDigit str1
(x2,str3) = case str2 of
'.':xs -> span isDigit xs
_ -> ("", str2)
x3 = case str3 of
[] -> 0
_e:'+':xs -> read xs
_e:xs -> read xs
section :: Text -> Token
section = Section . Text.dropWhileEnd isSpace . Text.init
byteForChar :: Char -> Word8
byteForChar c
| c <= '\6' = non_graphic
| isAscii c = fromIntegral (ord c)
| otherwise = case generalCategory c of
LowercaseLetter -> lower
OtherLetter -> lower
UppercaseLetter -> upper
TitlecaseLetter -> upper
DecimalNumber -> digit
OtherNumber -> digit
ConnectorPunctuation -> symbol
DashPunctuation -> symbol
OtherPunctuation -> symbol
MathSymbol -> symbol
CurrencySymbol -> symbol
ModifierSymbol -> symbol
OtherSymbol -> symbol
Space -> space
ModifierLetter -> other
NonSpacingMark -> other
SpacingCombiningMark -> other
EnclosingMark -> other
LetterNumber -> other
OpenPunctuation -> other
ClosePunctuation -> other
InitialQuote -> other
FinalQuote -> other
_ -> non_graphic
where
non_graphic = 0
upper = 1
lower = 2
digit = 3
symbol = 4
space = 5
other = 6