{-# LANGUAGE TemplateHaskell, CPP #-} module AlexTools ( -- * Lexer Basics initialInput, Input(..), inputFile , Lexeme(..) , SourcePos(..), startPos, beforeStartPos , SourceRange(..) , prettySourcePos, prettySourceRange , prettySourcePosLong, prettySourceRangeLong , HasRange(..) , (<->) , moveSourcePos -- * Writing Lexer Actions , Action -- ** Lexemes , lexeme , matchLength , matchRange , matchText -- ** Manipulating the lexer's state , getLexerState , setLexerState -- ** Access to the lexer's input , startInput , endInput -- * Interface with Alex , AlexInput , alexInputPrevChar , makeAlexGetByte , makeLexer , LexerConfig(..) , simpleLexer , Word8 ) where import Control.DeepSeq import Data.Word(Word8) import Data.Text(Text) import qualified Data.Text as Text import Control.Monad(liftM,ap,replicateM) import Language.Haskell.TH #if !MIN_VERSION_base(4,8,0) import Control.Applicative #endif data Lexeme t = Lexeme { lexemeText :: !Text , lexemeToken :: !t , lexemeRange :: !SourceRange } deriving (Show, Eq) instance NFData t => NFData (Lexeme t) where rnf (Lexeme x y z) = rnf (x,y,z) data SourcePos = SourcePos { sourceIndex :: !Int , sourceLine :: !Int , sourceColumn :: !Int , sourceFile :: !Text } deriving (Show, Eq) -- | Pretty print the source position without the file name. prettySourcePos :: SourcePos -> String prettySourcePos x = show (sourceLine x) ++ ":" ++ show (sourceColumn x) -- | Pretty print the source position, including the file name. prettySourcePosLong :: SourcePos -> String prettySourcePosLong x = Text.unpack (sourceFile x) ++ ":" ++ show (sourceLine x) ++ ":" ++ show (sourceColumn x) instance NFData SourcePos where rnf (SourcePos w x y z) = rnf (w,x,y,z) -- | Update a 'SourcePos' for a particular matched character moveSourcePos :: Char -> SourcePos -> SourcePos moveSourcePos c p = SourcePos { sourceIndex = sourceIndex p + 1 , sourceLine = newLine , sourceColumn = newColumn , sourceFile = sourceFile p } where line = sourceLine p column = sourceColumn p (newLine,newColumn) = case c of '\t' -> (line, ((column + 7) `div` 8) * 8 + 1) '\n' -> (line + 1, 1) _ -> (line, column + 1) -- | A range in the source code. data SourceRange = SourceRange { sourceFrom :: !SourcePos , sourceTo :: !SourcePos } deriving (Show, Eq) -- | Pretty print the range, without the file name prettySourceRange :: SourceRange -> String prettySourceRange x = prettySourcePos (sourceFrom x) ++ "--" ++ prettySourcePos (sourceTo x) -- | Pretty print the range, including the file name. prettySourceRangeLong :: SourceRange -> String prettySourceRangeLong x | sourceFile pfrom == sourceFile pto = Text.unpack (sourceFile pfrom) ++ ":" ++ prettySourcePos pfrom ++ "--" ++ prettySourcePos pto | otherwise = prettySourcePosLong pfrom ++ "--" ++ prettySourcePosLong pto where pfrom = sourceFrom x pto = sourceTo x instance NFData SourceRange where rnf (SourceRange x y) = rnf (x,y) class HasRange t where range :: t -> SourceRange instance HasRange SourcePos where range p = SourceRange { sourceFrom = p , sourceTo = p } instance HasRange SourceRange where range = id instance HasRange (Lexeme t) where range = lexemeRange instance (HasRange a, HasRange b) => HasRange (Either a b) where range (Left x) = range x range (Right x) = range x (<->) :: (HasRange a, HasRange b) => a -> b -> SourceRange x <-> y = SourceRange { sourceFrom = sourceFrom (range x) , sourceTo = sourceTo (range y) } -------------------------------------------------------------------------------- -- | An action to be taken when a regular expression matchers. newtype Action s a = A { runA :: Input -> Input -> Int -> s -> (s, a) } instance Functor (Action s) where fmap = liftM instance Applicative (Action s) where pure a = A (\_ _ _ s -> (s,a)) (<*>) = ap instance Monad (Action s) where return = pure A m >>= f = A (\i1 i2 l s -> let (s1,a) = m i1 i2 l s A m1 = f a in m1 i1 i2 l s1) -- | Acces the input just before the regular expression started matching. startInput :: Action s Input startInput = A (\i1 _ _ s -> (s,i1)) -- | Acces the input just after the regular expression that matched. endInput :: Action s Input endInput = A (\_ i2 _ s -> (s,i2)) -- | The number of characters in the matching input. matchLength :: Action s Int matchLength = A (\_ _ l s -> (s,l)) -- | Acces the curent state of the lexer. getLexerState :: Action s s getLexerState = A (\_ _ _ s -> (s,s)) -- | Change the state of the lexer. setLexerState :: s -> Action s () setLexerState s = A (\_ _ _ _ -> (s,())) -- | Get the range for the matching input. matchRange :: Action s SourceRange matchRange = do i1 <- startInput i2 <- endInput return (inputPos i1 <-> inputPrev i2) -- | Get the text associated with the matched input. matchText :: Action s Text matchText = do i1 <- startInput n <- matchLength return (Text.take n (inputText i1)) -- | Use the token and the current match to construct a lexeme. lexeme :: t -> Action s [Lexeme t] lexeme tok = do r <- matchRange txt <- matchText return [ Lexeme { lexemeRange = r , lexemeToken = tok , lexemeText = txt } ] -- | Information about the lexer's input. data Input = Input { inputPos :: {-# UNPACK #-} !SourcePos -- ^ Current input position. , inputText :: {-# UNPACK #-} !Text -- ^ The text that needs to be lexed. , inputPrev :: {-# UNPACK #-} !SourcePos -- ^ Location of the last consumed character. , inputPrevChar :: {-# UNPACK #-} !Char -- ^ The last consumed character. } -- | Prepare the text for lexing. initialInput :: Text {- ^ Where the text came from -} -> Text {- ^ The text to lex -} -> Input initialInput file str = Input { inputPos = startPos file , inputPrev = beforeStartPos file , inputPrevChar = '\n' -- end of the virtual previous line , inputText = str } startPos :: Text {- ^ Name of file/thing containing this -} -> SourcePos startPos file = SourcePos { sourceIndex = 0 , sourceLine = 1 , sourceColumn = 1 , sourceFile = file } beforeStartPos :: Text -> SourcePos beforeStartPos file = SourcePos { sourceIndex = -1 , sourceLine = 0 , sourceColumn = 0 , sourceFile = file } -- | The file/thing for the current position. inputFile :: Input -> Text inputFile = sourceFile . inputPos -------------------------------------------------------------------------------- -- | Lexer configuration. data LexerConfig s t = LexerConfig { lexerInitialState :: s -- ^ State that the lexer starts in , lexerStateMode :: s -> Int -- ^ Determine the current lexer mode from the lexer's state. , lexerEOF :: s -> [Lexeme t] -- ^ Emit some lexemes at the end of the input. } -- | A lexer that uses no lexer-modes, and does not emit anything at the -- end of the file. simpleLexer :: LexerConfig () t simpleLexer = LexerConfig { lexerInitialState = () , lexerStateMode = \_ -> 0 , lexerEOF = \_ -> [] } -- | Generate a function to use an Alex lexer. -- The expression is of type @LexerConfig s t -> Input -> [Lexeme t]@ makeLexer :: ExpQ makeLexer = do let local = do n <- newName "x" return (varP n, varE n) ([xP,yP,zP], [xE,yE,zE]) <- unzip <$> replicateM 3 local let -- Defined by Alex alexEOF = conP (mkName "AlexEOF") [ ] alexError = conP (mkName "AlexError") [ wildP ] alexSkip = conP (mkName "AlexSkip") [ xP, wildP ] alexToken = conP (mkName "AlexToken") [ xP, yP, zP ] alexScanUser = varE (mkName "alexScanUser") let p ~> e = match p (normalB e) [] body go mode inp cfg = caseE [| $alexScanUser $mode $inp (lexerStateMode $cfg $mode) |] [ alexEOF ~> [| lexerEOF $cfg $mode |] , alexError ~> [| error "internal error in lexer (AlexTools.hs)" |] , alexSkip ~> [| $go $mode $xE |] , alexToken ~> [| case runA $zE $inp $xE $yE $mode of (mode', ts) -> ts ++ $go mode' $xE |] ] [e| \cfg -> let go mode inp = $(body [|go|] [|mode|] [|inp|] [|cfg|]) in go (lexerInitialState cfg) |] type AlexInput = Input alexInputPrevChar :: AlexInput -> Char alexInputPrevChar = inputPrevChar {-# INLINE makeAlexGetByte #-} makeAlexGetByte :: (Char -> Word8) -> AlexInput -> Maybe (Word8,AlexInput) makeAlexGetByte charToByte Input { inputPos = p, inputText = text } = do (c,text') <- Text.uncons text let p' = moveSourcePos c p x = charToByte c inp = Input { inputPrev = p , inputPrevChar = c , inputPos = p' , inputText = text' } x `seq` inp `seq` return (x, inp)