{- | Module : $Header$ Description : Lexer combinators Copyright : (c) 1999 - 2004, Wolfgang Lux 2012 - 2013, Björn Peemöller 2016 , Jan Tikovsky License : BSD-3-clause Maintainer : bjp@informatik.uni-kiel.de Stability : experimental Portability : portable This module provides the basic types and combinators to implement the lexers. The combinators use continuation passing code in a monadic style. The first argument of the continuation function is the current span, and the second is the string to be parsed. The third argument is a flag which signals the lexer that it is lexing the beginning of a line and therefore has to check for layout tokens. The fourth argument is a stack of indentations that is used to handle nested layout groups. -} module Curry.Base.LexComb ( -- * Types Symbol (..), Indent, Context, P, CYM, SuccessP, FailP, Lexer -- * Monadic functions , parse, applyLexer, returnP, thenP, thenP_, failP, warnP , liftP, closeP0, closeP1 -- * Combinators for layout handling , pushContext, popContext -- * Conversion of numbers , convertSignedIntegral, convertSignedFloating , convertIntegral, convertFloating ) where import Data.Char (digitToInt) import Curry.Base.Monad (CYM, failMessageAt, warnMessageAt) import Curry.Base.Span ( Distance, Span (..), startCol, fstSpan, span2Pos , setDistance) infixl 1 `thenP`, `thenP_` -- |Type class for symbols class (Ord s, Show s) => Symbol s where -- |Does the 'Symbol' represent the end of the input? isEOF :: s -> Bool -- |Compute the distance of a 'Symbol' dist :: Int -> s -> Distance -- |Type for indentations, necessary for the layout rule type Indent = Int -- |Type of context for representing layout grouping type Context = [Indent] -- |Basic lexer function type P a = Span -- ^ Current source code span -> String -- ^ 'String' to be parsed -> Bool -- ^ Flag whether the beginning of a line should be -- parsed, which requires layout checking -> Context -- ^ context as a stack of 'Indent's -> CYM a -- |Apply a lexer on a 'String' to lex the content. The second parameter -- requires a 'FilePath' to use in the 'Span' parse :: P a -> FilePath -> String -> CYM a parse p fn s = p (fstSpan fn) s True [] -- --------------------------------------------------------------------------- -- CPS lexer -- --------------------------------------------------------------------------- -- |success continuation type SuccessP s a = Span -> s -> P a -- |failure continuation type FailP a = Span -> String -> P a -- |A CPS lexer type Lexer s a = SuccessP s a -> FailP a -> P a -- |Apply a lexer applyLexer :: Symbol s => Lexer s [(Span, s)] -> P [(Span, s)] applyLexer lexer = lexer successP failP where successP sp t | isEOF t = returnP [(sp', t)] | otherwise = ((sp', t) :) `liftP` lexer successP failP where sp' = setDistance sp (dist (startCol sp) t) -- --------------------------------------------------------------------------- -- Monadic functions for the lexer. -- --------------------------------------------------------------------------- -- |Lift a value into the lexer type returnP :: a -> P a returnP x _ _ _ _ = return x -- |Apply the first lexer and then apply the second one, based on the result -- of the first lexer. thenP :: P a -> (a -> P b) -> P b thenP lexer k sp s bol ctxt = lexer sp s bol ctxt >>= \x -> k x sp s bol ctxt -- |Apply the first lexer and then apply the second one, ignoring the first -- result. thenP_ :: P a -> P b -> P b p1 `thenP_` p2 = p1 `thenP` \_ -> p2 -- |Fail to lex on a 'Span', given an error message failP :: Span -> String -> P a failP sp msg _ _ _ _ = failMessageAt (span2Pos sp) msg -- |Warn on a 'Span', given a warning message warnP :: Span -> String -> P a -> P a warnP warnSpan msg lexer sp s bol ctxt = warnMessageAt (span2Pos warnSpan) msg >> lexer sp s bol ctxt -- |Apply a pure function to the lexers result liftP :: (a -> b) -> P a -> P b liftP f p = p `thenP` returnP . f -- |Lift a lexer into the 'P' monad, returning the lexer when evaluated. closeP0 :: P a -> P (P a) closeP0 lexer sp s bol ctxt = return (\_ _ _ _ -> lexer sp s bol ctxt) -- |Lift a lexer-generating function into the 'P' monad, returning the -- function when evaluated. closeP1 :: (a -> P b) -> P (a -> P b) closeP1 f sp s bol ctxt = return (\x _ _ _ _ -> f x sp s bol ctxt) -- --------------------------------------------------------------------------- -- Combinators for handling layout. -- --------------------------------------------------------------------------- -- |Push an 'Indent' to the context, increasing the levels of indentation pushContext :: Indent -> P a -> P a pushContext col cont sp s bol ctxt = cont sp s bol (col : ctxt) -- |Pop an 'Indent' from the context, decreasing the levels of indentation popContext :: P a -> P a popContext cont sp s bol (_ : ctxt) = cont sp s bol ctxt popContext _ sp _ _ [] = failMessageAt (span2Pos sp) $ "Parse error: popping layout from empty context stack. " ++ "Perhaps you have inserted too many '}'?" -- --------------------------------------------------------------------------- -- Conversions from 'String's into numbers. -- --------------------------------------------------------------------------- -- |Convert a String into a signed intergral using a given base convertSignedIntegral :: Num a => a -> String -> a convertSignedIntegral b ('+':s) = convertIntegral b s convertSignedIntegral b ('-':s) = - convertIntegral b s convertSignedIntegral b s = convertIntegral b s -- |Convert a String into an unsigned intergral using a given base convertIntegral :: Num a => a -> String -> a convertIntegral b = foldl op 0 where m `op` n = b * m + fromIntegral (digitToInt n) -- |Convert a mantissa, a fraction part and an exponent into a signed -- floating value convertSignedFloating :: Fractional a => String -> String -> Int -> a convertSignedFloating ('+':m) f e = convertFloating m f e convertSignedFloating ('-':m) f e = - convertFloating m f e convertSignedFloating m f e = convertFloating m f e -- |Convert a mantissa, a fraction part and an exponent into an unsigned -- floating value convertFloating :: Fractional a => String -> String -> Int -> a convertFloating m f e | e' == 0 = m' | e' > 0 = m' * 10 ^ e' | otherwise = m' / 10 ^ (- e') where m' = convertIntegral 10 (m ++ f) e' = e - length f