#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702
#endif
module Text.Parser.Token
(
whiteSpace
, charLiteral
, stringLiteral
, stringLiteral'
, natural
, integer
, double
, naturalOrDouble
, integerOrDouble
, scientific
, naturalOrScientific
, integerOrScientific
, symbol
, textSymbol
, symbolic
, parens
, braces
, angles
, brackets
, comma
, colon
, dot
, semiSep
, semiSep1
, commaSep
, commaSep1
, TokenParsing(..)
, Unspaced(..)
, Unlined(..)
, Unhighlighted(..)
, decimal
, hexadecimal
, octal
, characterChar
, integer'
, IdentifierStyle(..)
, liftIdentifierStyle
, ident
, reserve
, reserveText
, styleName
, styleStart
, styleLetter
, styleChars
, styleReserved
, styleHighlight
, styleReservedHighlight
, styleHighlights
) where
import Control.Applicative
import Control.Monad (MonadPlus(..), when)
import Control.Monad.Trans.Class
import Control.Monad.Trans.State.Lazy as Lazy
import Control.Monad.Trans.State.Strict as Strict
import Control.Monad.Trans.Writer.Lazy as Lazy
import Control.Monad.Trans.Writer.Strict as Strict
import Control.Monad.Trans.RWS.Lazy as Lazy
import Control.Monad.Trans.RWS.Strict as Strict
import Control.Monad.Trans.Reader
import Control.Monad.Trans.Identity
import Control.Monad.State.Class as Class
import Control.Monad.Reader.Class as Class
import Control.Monad.Writer.Class as Class
import Data.Char
import Data.Functor.Identity
import qualified Data.HashSet as HashSet
import Data.HashSet (HashSet)
import Data.List (foldl', transpose)
#if __GLASGOW_HASKELL__ < 710
import Data.Monoid
#endif
import Data.Scientific ( Scientific )
import qualified Data.Scientific as Sci
import Data.String
import Data.Text hiding (empty,zip,foldl,foldl',take,map,length,splitAt,null,transpose)
import Numeric (showIntAtBase)
import qualified Text.ParserCombinators.ReadP as ReadP
import qualified Text.Parsec as Parsec
import qualified Data.Attoparsec.Types as Att
import Text.Parser.Char
import Text.Parser.Combinators
import Text.Parser.Token.Highlight
whiteSpace :: TokenParsing m => m ()
whiteSpace = someSpace <|> pure ()
charLiteral :: forall m. TokenParsing m => m Char
charLiteral = token (highlight CharLiteral lit) where
lit :: m Char
lit = between (char '\'') (char '\'' <?> "end of character") characterChar
<?> "character"
stringLiteral :: forall m s. (TokenParsing m, IsString s) => m s
stringLiteral = fromString <$> token (highlight StringLiteral lit) where
lit :: m [Char]
lit = Prelude.foldr (maybe id (:)) ""
<$> between (char '"') (char '"' <?> "end of string") (many stringChar)
<?> "string"
stringChar :: m (Maybe Char)
stringChar = Just <$> stringLetter
<|> stringEscape
<?> "string character"
stringLetter :: m Char
stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
stringEscape :: m (Maybe Char)
stringEscape = highlight EscapeCode $ char '\\' *> esc where
esc :: m (Maybe Char)
esc = Nothing <$ escapeGap
<|> Nothing <$ escapeEmpty
<|> Just <$> escapeCode
escapeEmpty, escapeGap :: m Char
escapeEmpty = char '&'
escapeGap = skipSome space *> (char '\\' <?> "end of string gap")
stringLiteral' :: forall m s. (TokenParsing m, IsString s) => m s
stringLiteral' = fromString <$> token (highlight StringLiteral lit) where
lit :: m [Char]
lit = Prelude.foldr (maybe id (:)) ""
<$> between (char '\'') (char '\'' <?> "end of string") (many stringChar)
<?> "string"
stringChar :: m (Maybe Char)
stringChar = Just <$> stringLetter
<|> stringEscape
<?> "string character"
stringLetter :: m Char
stringLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
stringEscape :: m (Maybe Char)
stringEscape = highlight EscapeCode $ char '\\' *> esc where
esc :: m (Maybe Char)
esc = Nothing <$ escapeGap
<|> Nothing <$ escapeEmpty
<|> Just <$> escapeCode
escapeEmpty, escapeGap :: m Char
escapeEmpty = char '&'
escapeGap = skipSome space *> (char '\\' <?> "end of string gap")
natural :: TokenParsing m => m Integer
natural = token natural'
integer :: forall m. TokenParsing m => m Integer
integer = token (token (highlight Operator sgn <*> natural')) <?> "integer"
where
sgn :: m (Integer -> Integer)
sgn = negate <$ char '-'
<|> id <$ char '+'
<|> pure id
double :: TokenParsing m => m Double
double = token (highlight Number (Sci.toRealFloat <$> floating) <?> "double")
naturalOrDouble :: TokenParsing m => m (Either Integer Double)
naturalOrDouble = fmap Sci.toRealFloat <$> naturalOrScientific
integerOrDouble :: TokenParsing m => m (Either Integer Double)
integerOrDouble = fmap Sci.toRealFloat <$> integerOrScientific
scientific :: TokenParsing m => m Scientific
scientific = token (highlight Number floating <?> "scientific")
naturalOrScientific :: TokenParsing m => m (Either Integer Scientific)
naturalOrScientific = token (highlight Number natFloating <?> "number")
integerOrScientific :: forall m. TokenParsing m => m (Either Integer Scientific)
integerOrScientific = token (highlight Number ios <?> "number")
where ios :: m (Either Integer Scientific)
ios = mneg <$> optional (oneOf "+-") <*> natFloating
mneg (Just '-') nd = either (Left . negate) (Right . negate) nd
mneg _ nd = nd
symbol :: TokenParsing m => String -> m String
symbol name = token (highlight Symbol (string name))
textSymbol :: TokenParsing m => Text -> m Text
textSymbol name = token (highlight Symbol (text name))
symbolic :: TokenParsing m => Char -> m Char
symbolic name = token (highlight Symbol (char name))
parens :: TokenParsing m => m a -> m a
parens = nesting . between (symbolic '(') (symbolic ')')
braces :: TokenParsing m => m a -> m a
braces = nesting . between (symbolic '{') (symbolic '}')
angles :: TokenParsing m => m a -> m a
angles = nesting . between (symbolic '<') (symbolic '>')
brackets :: TokenParsing m => m a -> m a
brackets = nesting . between (symbolic '[') (symbolic ']')
comma :: TokenParsing m => m Char
comma = symbolic ','
colon :: TokenParsing m => m Char
colon = symbolic ':'
dot :: TokenParsing m => m Char
dot = symbolic '.'
semiSep :: TokenParsing m => m a -> m [a]
semiSep p = sepBy p semi
semiSep1 :: TokenParsing m => m a -> m [a]
semiSep1 p = sepBy1 p semi
commaSep :: TokenParsing m => m a -> m [a]
commaSep p = sepBy p comma
commaSep1 :: TokenParsing m => m a -> m [a]
commaSep1 p = sepBy1 p comma
class CharParsing m => TokenParsing m where
someSpace :: m ()
someSpace = skipSome (satisfy isSpace)
nesting :: m a -> m a
nesting = id
semi :: m Char
semi = token (satisfy (';'==) <?> ";")
highlight :: Highlight -> m a -> m a
highlight _ a = a
token :: m a -> m a
token p = p <* (someSpace <|> pure ())
instance (TokenParsing m, MonadPlus m) => TokenParsing (Lazy.StateT s m) where
nesting (Lazy.StateT m) = Lazy.StateT $ nesting . m
someSpace = lift someSpace
semi = lift semi
highlight h (Lazy.StateT m) = Lazy.StateT $ highlight h . m
instance (TokenParsing m, MonadPlus m) => TokenParsing (Strict.StateT s m) where
nesting (Strict.StateT m) = Strict.StateT $ nesting . m
someSpace = lift someSpace
semi = lift semi
highlight h (Strict.StateT m) = Strict.StateT $ highlight h . m
instance (TokenParsing m, MonadPlus m) => TokenParsing (ReaderT e m) where
nesting (ReaderT m) = ReaderT $ nesting . m
someSpace = lift someSpace
semi = lift semi
highlight h (ReaderT m) = ReaderT $ highlight h . m
instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.WriterT w m) where
nesting (Strict.WriterT m) = Strict.WriterT $ nesting m
someSpace = lift someSpace
semi = lift semi
highlight h (Strict.WriterT m) = Strict.WriterT $ highlight h m
instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.WriterT w m) where
nesting (Lazy.WriterT m) = Lazy.WriterT $ nesting m
someSpace = lift someSpace
semi = lift semi
highlight h (Lazy.WriterT m) = Lazy.WriterT $ highlight h m
instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.RWST r w s m) where
nesting (Lazy.RWST m) = Lazy.RWST $ \r s -> nesting (m r s)
someSpace = lift someSpace
semi = lift semi
highlight h (Lazy.RWST m) = Lazy.RWST $ \r s -> highlight h (m r s)
instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.RWST r w s m) where
nesting (Strict.RWST m) = Strict.RWST $ \r s -> nesting (m r s)
someSpace = lift someSpace
semi = lift semi
highlight h (Strict.RWST m) = Strict.RWST $ \r s -> highlight h (m r s)
instance (TokenParsing m, MonadPlus m) => TokenParsing (IdentityT m) where
nesting = IdentityT . nesting . runIdentityT
someSpace = lift someSpace
semi = lift semi
highlight h = IdentityT . highlight h . runIdentityT
data IdentifierStyle m = IdentifierStyle
{ _styleName :: String
, _styleStart :: m Char
, _styleLetter :: m Char
, _styleReserved :: HashSet String
, _styleHighlight :: Highlight
, _styleReservedHighlight :: Highlight
}
styleName :: Functor f => (String -> f String) -> IdentifierStyle m -> f (IdentifierStyle m)
styleName f is = (\n -> is { _styleName = n }) <$> f (_styleName is)
styleStart :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m)
styleStart f is = (\n -> is { _styleStart = n }) <$> f (_styleStart is)
styleLetter :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m)
styleLetter f is = (\n -> is { _styleLetter = n }) <$> f (_styleLetter is)
styleChars :: Applicative f => (m Char -> f (n Char)) -> IdentifierStyle m -> f (IdentifierStyle n)
styleChars f is = (\n m -> is { _styleStart = n, _styleLetter = m }) <$> f (_styleStart is) <*> f (_styleLetter is)
styleReserved :: Functor f => (HashSet String -> f (HashSet String)) -> IdentifierStyle m -> f (IdentifierStyle m)
styleReserved f is = (\n -> is { _styleReserved = n }) <$> f (_styleReserved is)
styleHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m)
styleHighlight f is = (\n -> is { _styleHighlight = n }) <$> f (_styleHighlight is)
styleReservedHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m)
styleReservedHighlight f is = (\n -> is { _styleReservedHighlight = n }) <$> f (_styleReservedHighlight is)
styleHighlights :: Applicative f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m)
styleHighlights f is = (\n m -> is { _styleHighlight = n, _styleReservedHighlight = m }) <$> f (_styleHighlight is) <*> f (_styleReservedHighlight is)
liftIdentifierStyle :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m)
liftIdentifierStyle = runIdentity . styleChars (Identity . lift)
reserve :: (TokenParsing m, Monad m) => IdentifierStyle m -> String -> m ()
reserve s name = token $ try $ do
_ <- highlight (_styleReservedHighlight s) $ string name
notFollowedBy (_styleLetter s) <?> "end of " ++ show name
reserveText :: (TokenParsing m, Monad m) => IdentifierStyle m -> Text -> m ()
reserveText s name = token $ try $ do
_ <- highlight (_styleReservedHighlight s) $ text name
notFollowedBy (_styleLetter s) <?> "end of " ++ show name
ident :: (TokenParsing m, Monad m, IsString s) => IdentifierStyle m -> m s
ident s = fmap fromString $ token $ try $ do
name <- highlight (_styleHighlight s)
((:) <$> _styleStart s <*> many (_styleLetter s) <?> _styleName s)
when (HashSet.member name (_styleReserved s)) $ unexpected $ "reserved " ++ _styleName s ++ " " ++ show name
return name
characterChar :: TokenParsing m => m Char
charEscape, charLetter :: TokenParsing m => m Char
characterChar = charLetter <|> charEscape <?> "literal character"
charEscape = highlight EscapeCode $ char '\\' *> escapeCode
charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
escapeCode :: forall m. TokenParsing m => m Char
escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) <?> "escape code"
where
charControl, charNum :: m Char
charControl = (\c -> toEnum (fromEnum c fromEnum '@')) <$> (char '^' *> (upper <|> char '@'))
charNum = toEnum <$> num
where
num :: m Int
num = bounded 10 maxchar
<|> (char 'o' *> bounded 8 maxchar)
<|> (char 'x' *> bounded 16 maxchar)
maxchar = fromEnum (maxBound :: Char)
bounded :: Int -> Int -> m Int
bounded base bnd = foldl' (\x d -> base * x + digitToInt d) 0
<$> bounded' (take base thedigits) (map digitToInt $ showIntAtBase base intToDigit bnd "")
where
thedigits :: [m Char]
thedigits = map char ['0'..'9'] ++ map oneOf (transpose [['A'..'F'],['a'..'f']])
toomuch :: m a
toomuch = unexpected "out-of-range numeric escape sequence"
bounded', bounded'' :: [m Char] -> [Int] -> m [Char]
bounded' dps@(zero:_) bds = skipSome zero *> ([] <$ notFollowedBy (choice dps) <|> bounded'' dps bds)
<|> bounded'' dps bds
bounded' [] _ = error "bounded called with base 0"
bounded'' dps [] = [] <$ notFollowedBy (choice dps) <|> toomuch
bounded'' dps (bd : bds) = let anyd :: m Char
anyd = choice dps
nomore :: m ()
nomore = notFollowedBy anyd <|> toomuch
(low, ex : high) = splitAt bd dps
in ((:) <$> choice low <*> atMost (length bds) anyd) <* nomore
<|> ((:) <$> ex <*> ([] <$ nomore <|> bounded'' dps bds))
<|> if not (null bds)
then (:) <$> choice high <*> atMost (length bds 1) anyd <* nomore
else empty
atMost n p | n <= 0 = pure []
| otherwise = ((:) <$> p <*> atMost (n 1) p) <|> pure []
charEsc :: m Char
charEsc = choice $ parseEsc <$> escMap
parseEsc (c,code) = code <$ char c
escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
charAscii :: m Char
charAscii = choice $ parseAscii <$> asciiMap
parseAscii (asc,code) = try $ code <$ string asc
asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
ascii2codes, ascii3codes :: [String]
ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO"
, "SI","EM","FS","GS","RS","US","SP"]
ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK"
,"BEL","DLE","DC1","DC2","DC3","DC4","NAK"
,"SYN","ETB","CAN","SUB","ESC","DEL"]
ascii2, ascii3 :: String
ascii2 = "\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP"
ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
natural' :: TokenParsing m => m Integer
natural' = highlight Number nat <?> "natural"
number :: TokenParsing m => Integer -> m Char -> m Integer
number base baseDigit =
foldl' (\x d -> base*x + toInteger (digitToInt d)) 0 <$> some baseDigit
integer' :: TokenParsing m => m Integer
integer' = int <?> "integer"
sign :: TokenParsing m => m (Integer -> Integer)
sign = highlight Operator
$ negate <$ char '-'
<|> id <$ char '+'
<|> pure id
int :: TokenParsing m => m Integer
int = sign <*> highlight Number nat
nat, zeroNumber :: TokenParsing m => m Integer
nat = zeroNumber <|> decimal
zeroNumber = char '0' *> (hexadecimal <|> octal <|> decimal <|> pure 0) <?> ""
floating :: TokenParsing m => m Scientific
floating = decimal <**> fractExponent
fractExponent :: forall m. TokenParsing m => m (Integer -> Scientific)
fractExponent = (\fract expo n -> (fromInteger n + fract) * expo) <$> fraction <*> option 1 exponent'
<|> (\expo n -> fromInteger n * expo) <$> exponent'
where
fraction :: m Scientific
fraction = foldl' op 0 <$> (char '.' *> (some digit <?> "fraction"))
op f d = f + Sci.scientific (fromIntegral (digitToInt d)) (Sci.base10Exponent f 1)
exponent' :: m Scientific
exponent' = ((\f e -> power (f e)) <$ oneOf "eE" <*> sign <*> (decimal <?> "exponent")) <?> "exponent"
power = Sci.scientific 1 . fromInteger
natFloating, zeroNumFloat, decimalFloat :: TokenParsing m => m (Either Integer Scientific)
natFloating
= char '0' *> zeroNumFloat
<|> decimalFloat
zeroNumFloat
= Left <$> (hexadecimal <|> octal)
<|> decimalFloat
<|> pure 0 <**> try fractFloat
<|> pure (Left 0)
decimalFloat = decimal <**> option Left (try fractFloat)
fractFloat :: TokenParsing m => m (Integer -> Either Integer Scientific)
fractFloat = (Right .) <$> fractExponent
decimal :: TokenParsing m => m Integer
decimal = number 10 digit
hexadecimal :: TokenParsing m => m Integer
hexadecimal = oneOf "xX" *> number 16 hexDigit
octal :: TokenParsing m => m Integer
octal = oneOf "oO" *> number 8 octDigit
newtype Unhighlighted m a = Unhighlighted { runUnhighlighted :: m a }
deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing)
instance Parsing m => Parsing (Unhighlighted m) where
try (Unhighlighted m) = Unhighlighted $ try m
Unhighlighted m <?> l = Unhighlighted $ m <?> l
unexpected = Unhighlighted . unexpected
eof = Unhighlighted eof
notFollowedBy (Unhighlighted m) = Unhighlighted $ notFollowedBy m
instance MonadTrans Unhighlighted where
lift = Unhighlighted
instance MonadState s m => MonadState s (Unhighlighted m) where
get = lift Class.get
put = lift . Class.put
instance MonadReader e m => MonadReader e (Unhighlighted m) where
ask = lift Class.ask
local f = Unhighlighted . Class.local f . runUnhighlighted
instance MonadWriter e m => MonadWriter e (Unhighlighted m) where
tell = lift . Class.tell
listen = Unhighlighted . Class.listen . runUnhighlighted
pass = Unhighlighted . Class.pass . runUnhighlighted
instance TokenParsing m => TokenParsing (Unhighlighted m) where
nesting (Unhighlighted m) = Unhighlighted (nesting m)
someSpace = Unhighlighted someSpace
semi = Unhighlighted semi
highlight _ m = m
newtype Unspaced m a = Unspaced { runUnspaced :: m a }
deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing)
instance Parsing m => Parsing (Unspaced m) where
try (Unspaced m) = Unspaced $ try m
Unspaced m <?> l = Unspaced $ m <?> l
unexpected = Unspaced . unexpected
eof = Unspaced eof
notFollowedBy (Unspaced m) = Unspaced $ notFollowedBy m
instance MonadTrans Unspaced where
lift = Unspaced
instance MonadState s m => MonadState s (Unspaced m) where
get = lift Class.get
put = lift . Class.put
instance MonadReader e m => MonadReader e (Unspaced m) where
ask = lift Class.ask
local f = Unspaced . Class.local f . runUnspaced
instance MonadWriter e m => MonadWriter e (Unspaced m) where
tell = lift . Class.tell
listen = Unspaced . Class.listen . runUnspaced
pass = Unspaced . Class.pass . runUnspaced
instance TokenParsing m => TokenParsing (Unspaced m) where
nesting (Unspaced m) = Unspaced (nesting m)
someSpace = empty
semi = Unspaced semi
highlight h (Unspaced m) = Unspaced (highlight h m)
newtype Unlined m a = Unlined { runUnlined :: m a }
deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing)
instance Parsing m => Parsing (Unlined m) where
try (Unlined m) = Unlined $ try m
Unlined m <?> l = Unlined $ m <?> l
unexpected = Unlined . unexpected
eof = Unlined eof
notFollowedBy (Unlined m) = Unlined $ notFollowedBy m
instance MonadTrans Unlined where
lift = Unlined
instance MonadState s m => MonadState s (Unlined m) where
get = lift Class.get
put = lift . Class.put
instance MonadReader e m => MonadReader e (Unlined m) where
ask = lift Class.ask
local f = Unlined . Class.local f . runUnlined
instance MonadWriter e m => MonadWriter e (Unlined m) where
tell = lift . Class.tell
listen = Unlined . Class.listen . runUnlined
pass = Unlined . Class.pass . runUnlined
instance TokenParsing m => TokenParsing (Unlined m) where
nesting (Unlined m) = Unlined (nesting m)
someSpace = skipMany (satisfy $ \c -> c /= '\n' && isSpace c)
semi = Unlined semi
highlight h (Unlined m) = Unlined (highlight h m)
instance Parsec.Stream s m Char => TokenParsing (Parsec.ParsecT s u m)
instance Att.Chunk t => TokenParsing (Att.Parser t)
instance TokenParsing ReadP.ReadP