{-# OPTIONS_GHC -fno-warn-missing-methods #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Skylighting.Tokenizer (
tokenize
, TokenizerConfig(..)
) where
import Control.Applicative
import Control.Monad
import Control.Monad.Except
import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.UTF8 as UTF8
import Data.CaseInsensitive (mk)
import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, ord)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe (catMaybes)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Debug.Trace
import Skylighting.Regex
import Skylighting.Types
import Skylighting.Parser (resolveKeywords)
import Data.List.NonEmpty (NonEmpty((:|)), (<|), toList)
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup
#endif
newtype Captures = Captures{ Captures -> IntMap ByteString
unCaptures :: IntMap.IntMap ByteString }
deriving (Int -> Captures -> ShowS
[Captures] -> ShowS
Captures -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Captures] -> ShowS
$cshowList :: [Captures] -> ShowS
show :: Captures -> [Char]
$cshow :: Captures -> [Char]
showsPrec :: Int -> Captures -> ShowS
$cshowsPrec :: Int -> Captures -> ShowS
Show)
newtype ContextStack =
ContextStack{ ContextStack -> NonEmpty (Context, Captures)
unContextStack :: NonEmpty (Context, Captures) }
deriving (Int -> ContextStack -> ShowS
[ContextStack] -> ShowS
ContextStack -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [ContextStack] -> ShowS
$cshowList :: [ContextStack] -> ShowS
show :: ContextStack -> [Char]
$cshow :: ContextStack -> [Char]
showsPrec :: Int -> ContextStack -> ShowS
$cshowsPrec :: Int -> ContextStack -> ShowS
Show)
data TokenizerState = TokenizerState{
TokenizerState -> ByteString
input :: ByteString
, TokenizerState -> Bool
endline :: Bool
, TokenizerState -> Char
prevChar :: Char
, TokenizerState -> ContextStack
contextStack :: ContextStack
, TokenizerState -> Captures
captures :: Captures
, TokenizerState -> Int
column :: Int
, TokenizerState -> Bool
lineContinuation :: Bool
, TokenizerState -> Maybe Int
firstNonspaceColumn :: Maybe Int
}
data TokenizerConfig = TokenizerConfig{
TokenizerConfig -> SyntaxMap
syntaxMap :: SyntaxMap
, TokenizerConfig -> Bool
traceOutput :: Bool
} deriving (Int -> TokenizerConfig -> ShowS
[TokenizerConfig] -> ShowS
TokenizerConfig -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [TokenizerConfig] -> ShowS
$cshowList :: [TokenizerConfig] -> ShowS
show :: TokenizerConfig -> [Char]
$cshow :: TokenizerConfig -> [Char]
showsPrec :: Int -> TokenizerConfig -> ShowS
$cshowsPrec :: Int -> TokenizerConfig -> ShowS
Show)
data Result e a = Success a
| Failure
| Error e
deriving (forall a b. a -> Result e b -> Result e a
forall a b. (a -> b) -> Result e a -> Result e b
forall e a b. a -> Result e b -> Result e a
forall e a b. (a -> b) -> Result e a -> Result e b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> Result e b -> Result e a
$c<$ :: forall e a b. a -> Result e b -> Result e a
fmap :: forall a b. (a -> b) -> Result e a -> Result e b
$cfmap :: forall e a b. (a -> b) -> Result e a -> Result e b
Functor)
deriving instance (Show a, Show e) => Show (Result e a)
newtype TokenizerM a = TM { forall a.
TokenizerM a
-> TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result [Char] a)
runTokenizerM :: TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result String a) }
mapsnd :: (a -> b) -> (c, a) -> (c, b)
mapsnd :: forall a b c. (a -> b) -> (c, a) -> (c, b)
mapsnd a -> b
f (c
x, a
y) = (c
x, a -> b
f a
y)
instance Functor TokenizerM where
fmap :: forall a b. (a -> b) -> TokenizerM a -> TokenizerM b
fmap a -> b
f (TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
g) = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
c TokenizerState
s -> forall a b c. (a -> b) -> (c, a) -> (c, b)
mapsnd (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f) (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
g TokenizerConfig
c TokenizerState
s))
instance Applicative TokenizerM where
pure :: forall a. a -> TokenizerM a
pure a
x = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
_ TokenizerState
s -> (TokenizerState
s, forall e a. a -> Result e a
Success a
x))
(TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] (a -> b))
f) <*> :: forall a b. TokenizerM (a -> b) -> TokenizerM a -> TokenizerM b
<*> (TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
y) = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
c TokenizerState
s ->
case (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] (a -> b))
f TokenizerConfig
c TokenizerState
s) of
(TokenizerState
s', Result [Char] (a -> b)
Failure ) -> (TokenizerState
s', forall e a. Result e a
Failure)
(TokenizerState
s', Error [Char]
e ) -> (TokenizerState
s', forall e a. e -> Result e a
Error [Char]
e)
(TokenizerState
s', Success a -> b
f') ->
case (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
y TokenizerConfig
c TokenizerState
s') of
(TokenizerState
s'', Result [Char] a
Failure ) -> (TokenizerState
s'', forall e a. Result e a
Failure)
(TokenizerState
s'', Error [Char]
e' ) -> (TokenizerState
s'', forall e a. e -> Result e a
Error [Char]
e')
(TokenizerState
s'', Success a
y') -> (TokenizerState
s'', forall e a. a -> Result e a
Success (a -> b
f' a
y')))
instance Monad TokenizerM where
return :: forall a. a -> TokenizerM a
return = forall (f :: * -> *) a. Applicative f => a -> f a
pure
(TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x) >>= :: forall a b. TokenizerM a -> (a -> TokenizerM b) -> TokenizerM b
>>= a -> TokenizerM b
f = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
c TokenizerState
s ->
case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x TokenizerConfig
c TokenizerState
s of
(TokenizerState
s', Result [Char] a
Failure ) -> (TokenizerState
s', forall e a. Result e a
Failure)
(TokenizerState
s', Error [Char]
e ) -> (TokenizerState
s', forall e a. e -> Result e a
Error [Char]
e)
(TokenizerState
s', Success a
x') -> TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] b)
g TokenizerConfig
c TokenizerState
s'
where TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] b)
g = a -> TokenizerM b
f a
x')
instance Alternative TokenizerM where
empty :: forall a. TokenizerM a
empty = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
_ TokenizerState
s -> (TokenizerState
s, forall e a. Result e a
Failure))
<|> :: forall a. TokenizerM a -> TokenizerM a -> TokenizerM a
(<|>) (TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x) (TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
y) = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
c TokenizerState
s ->
case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x TokenizerConfig
c TokenizerState
s of
(TokenizerState
_, Result [Char] a
Failure ) -> TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
y TokenizerConfig
c TokenizerState
s
(TokenizerState
s', Error [Char]
e ) -> (TokenizerState
s', forall e a. e -> Result e a
Error [Char]
e)
(TokenizerState
s', Success a
x') -> (TokenizerState
s', forall e a. a -> Result e a
Success a
x'))
many :: forall a. TokenizerM a -> TokenizerM [a]
many (TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x) = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
c TokenizerState
s ->
case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x TokenizerConfig
c TokenizerState
s of
(TokenizerState
_, Result [Char] a
Failure ) -> (TokenizerState
s, forall e a. a -> Result e a
Success [])
(TokenizerState
s', Error [Char]
e ) -> (TokenizerState
s', forall e a. e -> Result e a
Error [Char]
e)
(TokenizerState
s', Success a
x') -> forall a b c. (a -> b) -> (c, a) -> (c, b)
mapsnd (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (a
x'forall a. a -> [a] -> [a]
:)) (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] [a])
g TokenizerConfig
c TokenizerState
s')
where TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] [a])
g = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x))
some :: forall a. TokenizerM a -> TokenizerM [a]
some TokenizerM a
x = (:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerM a
x forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many TokenizerM a
x
instance MonadPlus TokenizerM where
mzero :: forall a. TokenizerM a
mzero = forall (f :: * -> *) a. Alternative f => f a
empty
mplus :: forall a. TokenizerM a -> TokenizerM a -> TokenizerM a
mplus = forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
(<|>)
instance MonadReader TokenizerConfig TokenizerM where
ask :: TokenizerM TokenizerConfig
ask = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
c TokenizerState
s -> (TokenizerState
s, forall e a. a -> Result e a
Success TokenizerConfig
c))
local :: forall a.
(TokenizerConfig -> TokenizerConfig)
-> TokenizerM a -> TokenizerM a
local TokenizerConfig -> TokenizerConfig
f (TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x) = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerConfig -> TokenizerConfig
f)
instance MonadState TokenizerState TokenizerM where
get :: TokenizerM TokenizerState
get = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
_ TokenizerState
s -> (TokenizerState
s, forall e a. a -> Result e a
Success TokenizerState
s))
put :: TokenizerState -> TokenizerM ()
put TokenizerState
x = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
_ TokenizerState
_ -> (TokenizerState
x, forall e a. a -> Result e a
Success ()))
instance MonadError String TokenizerM where
throwError :: forall a. [Char] -> TokenizerM a
throwError [Char]
e = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
_ TokenizerState
s -> (TokenizerState
s, forall e a. e -> Result e a
Error [Char]
e))
catchError :: forall a. TokenizerM a -> ([Char] -> TokenizerM a) -> TokenizerM a
catchError (TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x) [Char] -> TokenizerM a
f = forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a))
-> TokenizerM a
TM (\TokenizerConfig
c TokenizerState
s -> case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
x TokenizerConfig
c TokenizerState
s of
(TokenizerState
_, Error [Char]
e) -> let TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
y = [Char] -> TokenizerM a
f [Char]
e in TokenizerConfig
-> TokenizerState -> (TokenizerState, Result [Char] a)
y TokenizerConfig
c TokenizerState
s
(TokenizerState, Result [Char] a)
z -> (TokenizerState, Result [Char] a)
z)
tokenize :: TokenizerConfig -> Syntax -> Text -> Either String [SourceLine]
tokenize :: TokenizerConfig -> Syntax -> Text -> Either [Char] [SourceLine]
tokenize TokenizerConfig
config Syntax
syntax Text
inp =
Either [Char] ContextStack
eitherStack forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(!ContextStack
stack) ->
case forall a.
TokenizerM a
-> TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result [Char] a)
runTokenizerM TokenizerM [SourceLine]
action
TokenizerConfig
config{ syntaxMap :: SyntaxMap
syntaxMap = forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (SyntaxMap -> Syntax -> Syntax
resolveKeywords (TokenizerConfig -> SyntaxMap
syntaxMap TokenizerConfig
config))
(TokenizerConfig -> SyntaxMap
syntaxMap TokenizerConfig
config) }
(ContextStack -> TokenizerState
startingState ContextStack
stack) of
(TokenizerState
_, Success [SourceLine]
ls) -> forall a b. b -> Either a b
Right [SourceLine]
ls
(TokenizerState
_, Error [Char]
e) -> forall a b. a -> Either a b
Left [Char]
e
(TokenizerState
_, Result [Char] [SourceLine]
Failure) -> forall a b. a -> Either a b
Left [Char]
"Could not tokenize code"
where
action :: TokenizerM [SourceLine]
action = forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (ByteString, Int) -> TokenizerM SourceLine
tokenizeLine (forall a b. [a] -> [b] -> [(a, b)]
zip (ByteString -> [ByteString]
BS.lines (Text -> ByteString
encodeUtf8 Text
inp)) [Int
1..])
eitherStack :: Either [Char] ContextStack
eitherStack = case Text -> Syntax -> Maybe Context
lookupContext (Syntax -> Text
sStartingContext Syntax
syntax)
(SyntaxMap -> Syntax -> Syntax
resolveKeywords (TokenizerConfig -> SyntaxMap
syntaxMap TokenizerConfig
config) Syntax
syntax) of
Just Context
c -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ NonEmpty (Context, Captures) -> ContextStack
ContextStack ((Context
c, IntMap ByteString -> Captures
Captures forall a. Monoid a => a
mempty) forall a. a -> [a] -> NonEmpty a
:| [])
Maybe Context
Nothing -> forall a b. a -> Either a b
Left [Char]
"No starting context specified"
startingState :: ContextStack -> TokenizerState
startingState ContextStack
stack =
TokenizerState{ input :: ByteString
input = ByteString
BS.empty
, endline :: Bool
endline = Text -> Bool
Text.null Text
inp
, prevChar :: Char
prevChar = Char
'\n'
, contextStack :: ContextStack
contextStack = ContextStack
stack
, captures :: Captures
captures = IntMap ByteString -> Captures
Captures forall a. Monoid a => a
mempty
, column :: Int
column = Int
0
, lineContinuation :: Bool
lineContinuation = Bool
False
, firstNonspaceColumn :: Maybe Int
firstNonspaceColumn = forall a. Maybe a
Nothing
}
info :: String -> TokenizerM ()
info :: [Char] -> TokenizerM ()
info [Char]
s = do
Bool
tr <- forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> Bool
traceOutput
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
tr forall a b. (a -> b) -> a -> b
$ forall a. [Char] -> a -> a
trace [Char]
s (forall (m :: * -> *) a. Monad m => a -> m a
return ())
infoContextStack :: TokenizerM ()
infoContextStack :: TokenizerM ()
infoContextStack = do
Bool
tr <- forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> Bool
traceOutput
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
tr forall a b. (a -> b) -> a -> b
$ do
ContextStack NonEmpty (Context, Captures)
stack <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"CONTEXT STACK " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall a b. (a -> b) -> [a] -> [b]
map (Context -> Text
cName forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall a b. (a -> b) -> a -> b
$ forall a. NonEmpty a -> [a]
toList NonEmpty (Context, Captures)
stack)
popContextStack :: TokenizerM ()
popContextStack :: TokenizerM ()
popContextStack = do
ContextStack NonEmpty (Context, Captures)
cs <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
case NonEmpty (Context, Captures)
cs of
((Context, Captures)
_ :| []) -> [Char] -> TokenizerM ()
info [Char]
"WARNING: Tried to pop only element on context stack!"
((Context, Captures)
_ :| ((Context, Captures)
x:[(Context, Captures)]
xs)) -> do
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (\TokenizerState
st -> TokenizerState
st{ contextStack :: ContextStack
contextStack = NonEmpty (Context, Captures) -> ContextStack
ContextStack ((Context, Captures)
x forall a. a -> [a] -> NonEmpty a
:| [(Context, Captures)]
xs) })
TokenizerM ()
infoContextStack
pushContextStack :: Context -> TokenizerM ()
pushContextStack :: Context -> TokenizerM ()
pushContextStack Context
cont = do
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (\TokenizerState
st -> TokenizerState
st{ contextStack :: ContextStack
contextStack =
NonEmpty (Context, Captures) -> ContextStack
ContextStack
(((Context
cont, IntMap ByteString -> Captures
Captures forall a. Monoid a => a
mempty) forall a. a -> NonEmpty a -> NonEmpty a
<|) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ContextStack -> NonEmpty (Context, Captures)
unContextStack
forall a b. (a -> b) -> a -> b
$ TokenizerState -> ContextStack
contextStack TokenizerState
st) } )
TokenizerM ()
infoContextStack
currentContext :: TokenizerM Context
currentContext :: TokenizerM Context
currentContext = do
ContextStack ((Context
c,Captures
_) :| [(Context, Captures)]
_) <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
forall (m :: * -> *) a. Monad m => a -> m a
return Context
c
doContextSwitch :: ContextSwitch -> TokenizerM ()
doContextSwitch :: ContextSwitch -> TokenizerM ()
doContextSwitch ContextSwitch
Pop = TokenizerM ()
popContextStack
doContextSwitch (Push (!Text
syn,!Text
c)) = do
SyntaxMap
syntaxes <- forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> SyntaxMap
syntaxMap
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
syn SyntaxMap
syntaxes forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Syntax -> Maybe Context
lookupContext Text
c of
Just !Context
con -> Context -> TokenizerM ()
pushContextStack Context
con
Maybe Context
Nothing -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ [Char]
"Unknown syntax or context: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Text
syn, Text
c)
doContextSwitches :: [ContextSwitch] -> TokenizerM ()
doContextSwitches :: [ContextSwitch] -> TokenizerM ()
doContextSwitches = forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ContextSwitch -> TokenizerM ()
doContextSwitch
addCaptures :: TokenizerM ()
addCaptures :: TokenizerM ()
addCaptures = do
Captures
capts <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Captures
captures
if forall a. IntMap a -> Bool
IntMap.null (Captures -> IntMap ByteString
unCaptures Captures
capts)
then forall (m :: * -> *) a. Monad m => a -> m a
return ()
else do
ContextStack ((Context
c,Captures
_) :| [(Context, Captures)]
cs) <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"Adding captures to " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> [Char]
show (Context -> Text
cName Context
c) forall a. Semigroup a => a -> a -> a
<> [Char]
": " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> [Char]
show Captures
capts
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ contextStack :: ContextStack
contextStack = NonEmpty (Context, Captures) -> ContextStack
ContextStack ((Context
c,Captures
capts) forall a. a -> [a] -> NonEmpty a
:| [(Context, Captures)]
cs) }
getCapture :: Int -> TokenizerM Text
getCapture :: Int -> TokenizerM Text
getCapture Int
capnum = do
ContextStack ((Context
_,Captures IntMap ByteString
capts) :| [(Context, Captures)]
_) <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"Retrieving capture " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> [Char]
show Int
capnum
Text
res <- case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
capnum IntMap ByteString
capts of
Maybe ByteString
Nothing -> do
[Char] -> TokenizerM ()
info [Char]
"Not found"
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Just ByteString
x -> ByteString -> TokenizerM Text
decodeBS ByteString
x
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"Got " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> [Char]
show Text
res
forall (m :: * -> *) a. Monad m => a -> m a
return Text
res
lookupContext :: Text -> Syntax -> Maybe Context
lookupContext :: Text -> Syntax -> Maybe Context
lookupContext Text
name Syntax
syntax | Text -> Bool
Text.null Text
name =
if Text -> Bool
Text.null (Syntax -> Text
sStartingContext Syntax
syntax)
then forall a. Maybe a
Nothing
else Text -> Syntax -> Maybe Context
lookupContext (Syntax -> Text
sStartingContext Syntax
syntax) Syntax
syntax
lookupContext Text
name Syntax
syntax = forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
name forall a b. (a -> b) -> a -> b
$ Syntax -> Map Text Context
sContexts Syntax
syntax
tokenizeLine :: (ByteString, Int) -> TokenizerM [Token]
tokenizeLine :: (ByteString, Int) -> TokenizerM SourceLine
tokenizeLine (!ByteString
ln, !Int
linenum) = do
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ input :: ByteString
input = ByteString
ln, endline :: Bool
endline = ByteString -> Bool
BS.null ByteString
ln, prevChar :: Char
prevChar = Char
'\n' }
Context
cur <- TokenizerM Context
currentContext
Bool
lineCont <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
lineContinuation
if Bool
lineCont
then forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ lineContinuation :: Bool
lineContinuation = Bool
False }
else do
let !mbFirstNonspace :: Maybe Int
mbFirstNonspace = (Char -> Bool) -> ByteString -> Maybe Int
BS.findIndex (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) forall a b. (a -> b) -> a -> b
$! ByteString
ln
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ column :: Int
column = Int
0
, firstNonspaceColumn :: Maybe Int
firstNonspaceColumn = Maybe Int
mbFirstNonspace }
[ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineBeginContext Context
cur)
if ByteString -> Bool
BS.null ByteString
ln
then [ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineEmptyContext Context
cur)
else [ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineBeginContext Context
cur)
SourceLine
ts <- SourceLine -> SourceLine
normalizeHighlighting forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [Maybe a] -> [a]
catMaybes forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many TokenizerM (Maybe (TokenType, Text))
getToken
Bool
eol <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
endline
if Bool
eol
then do
TokenizerM Context
currentContext forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Context -> TokenizerM ()
checkLineEnd
forall (m :: * -> *) a. Monad m => a -> m a
return SourceLine
ts
else do
Int
col <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Int
column
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ [Char]
"Could not match anything at line " forall a. [a] -> [a] -> [a]
++
forall a. Show a => a -> [Char]
show Int
linenum forall a. [a] -> [a] -> [a]
++ [Char]
" column " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
col
getToken :: TokenizerM (Maybe Token)
getToken :: TokenizerM (Maybe (TokenType, Text))
getToken = do
ByteString
inp <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
endline forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Bool
not
!Context
context <- TokenizerM Context
currentContext
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum (forall a b. (a -> b) -> [a] -> [b]
map (\Rule
r -> Rule -> ByteString -> TokenizerM (Maybe (TokenType, Text))
tryRule Rule
r ByteString
inp) (Context -> [Rule]
cRules Context
context)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
case Context -> [ContextSwitch]
cFallthroughContext Context
context of
[] | Context -> Bool
cFallthrough Context
context -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ [ContextSwitch] -> TokenizerM ()
doContextSwitches [ContextSwitch
Pop]
| Bool
otherwise -> do
Text
t <- TokenizerM Text
normalChunk
let mbtok :: Maybe (TokenType, Text)
mbtok = forall a. a -> Maybe a
Just (Context -> TokenType
cAttribute Context
context, Text
t)
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"FALLTHROUGH " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe (TokenType, Text)
mbtok
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (TokenType, Text)
mbtok
[ContextSwitch]
cs -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ [ContextSwitch] -> TokenizerM ()
doContextSwitches [ContextSwitch]
cs
takeChars :: Int -> TokenizerM Text
takeChars :: Int -> TokenizerM Text
takeChars Int
0 = forall (m :: * -> *) a. MonadPlus m => m a
mzero
takeChars Int
numchars = do
ByteString
inp <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
let (ByteString
bs,ByteString
rest) = Int -> ByteString -> (ByteString, ByteString)
UTF8.splitAt Int
numchars ByteString
inp
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (ByteString -> Bool
BS.null ByteString
bs)
!Text
t <- ByteString -> TokenizerM Text
decodeBS ByteString
bs
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ input :: ByteString
input = ByteString
rest,
endline :: Bool
endline = ByteString -> Bool
BS.null ByteString
rest,
prevChar :: Char
prevChar = Text -> Char
Text.last Text
t,
column :: Int
column = TokenizerState -> Int
column TokenizerState
st forall a. Num a => a -> a -> a
+ Int
numchars }
forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
tryRule :: Rule -> ByteString -> TokenizerM (Maybe Token)
tryRule :: Rule -> ByteString -> TokenizerM (Maybe (TokenType, Text))
tryRule Rule
_ ByteString
"" = forall (m :: * -> *) a. MonadPlus m => m a
mzero
tryRule Rule
rule ByteString
inp = do
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"Trying rule " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Rule
rule
case Rule -> Maybe Int
rColumn Rule
rule of
Maybe Int
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just Int
n -> forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Int
column forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Eq a => a -> a -> Bool
== Int
n)
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Rule -> Bool
rFirstNonspace Rule
rule) forall a b. (a -> b) -> a -> b
$ do
!Maybe Int
firstNonspace <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Maybe Int
firstNonspaceColumn
!Int
col <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Int
column
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Maybe Int
firstNonspace forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just Int
col)
Maybe TokenizerState
oldstate <- if Rule -> Bool
rLookahead Rule
rule
then forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *). MonadState s m => m s
get
else forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ captures :: Captures
captures = IntMap ByteString -> Captures
Captures forall a. Monoid a => a
mempty }
let attr :: TokenType
attr = Rule -> TokenType
rAttribute Rule
rule
Maybe (TokenType, Text)
mbtok <- case Rule -> Matcher
rMatcher Rule
rule of
DetectChar Char
c -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ Bool -> Char -> ByteString -> TokenizerM Text
detectChar (Rule -> Bool
rDynamic Rule
rule) Char
c ByteString
inp
Detect2Chars Char
c Char
d -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$
Bool -> Char -> Char -> ByteString -> TokenizerM Text
detect2Chars (Rule -> Bool
rDynamic Rule
rule) Char
c Char
d ByteString
inp
AnyChar Set Char
cs -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ Set Char -> ByteString -> TokenizerM Text
anyChar Set Char
cs ByteString
inp
RangeDetect Char
c Char
d -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ Char -> Char -> ByteString -> TokenizerM Text
rangeDetect Char
c Char
d ByteString
inp
RegExpr RE
re -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ Bool -> RE -> ByteString -> TokenizerM Text
regExpr (Rule -> Bool
rDynamic Rule
rule) RE
re ByteString
inp
Matcher
Int -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseInt ByteString
inp
Matcher
HlCOct -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseOct ByteString
inp
Matcher
HlCHex -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseHex ByteString
inp
Matcher
HlCStringChar -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseCStringChar ByteString
inp
Matcher
HlCChar -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseCChar ByteString
inp
Matcher
Float -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseFloat ByteString
inp
Keyword KeywordAttr
_kwattr (Left Text
listname) ->
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ [Char]
"Keyword with unresolved list " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> [Char]
show Text
listname
Keyword KeywordAttr
kwattr (Right WordSet Text
kws) ->
TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ KeywordAttr -> WordSet Text -> ByteString -> TokenizerM Text
keyword KeywordAttr
kwattr WordSet Text
kws ByteString
inp
StringDetect Text
s -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$
Bool -> Bool -> Text -> ByteString -> TokenizerM Text
stringDetect (Rule -> Bool
rDynamic Rule
rule) (Rule -> Bool
rCaseSensitive Rule
rule)
Text
s ByteString
inp
WordDetect Text
s -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$
Bool -> Text -> ByteString -> TokenizerM Text
wordDetect (Rule -> Bool
rCaseSensitive Rule
rule) Text
s ByteString
inp
Matcher
LineContinue -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
lineContinue ByteString
inp
Matcher
DetectSpaces -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
detectSpaces ByteString
inp
Matcher
DetectIdentifier -> TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
attr forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
detectIdentifier ByteString
inp
IncludeRules (Text, Text)
cname -> Maybe TokenType
-> (Text, Text)
-> ByteString
-> TokenizerM (Maybe (TokenType, Text))
includeRules
(if Rule -> Bool
rIncludeAttribute Rule
rule then forall a. a -> Maybe a
Just TokenType
attr else forall a. Maybe a
Nothing)
(Text, Text)
cname ByteString
inp
Maybe (TokenType, Text)
mbchildren <- do
ByteString
inp' <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum (forall a b. (a -> b) -> [a] -> [b]
map (\Rule
r -> Rule -> ByteString -> TokenizerM (Maybe (TokenType, Text))
tryRule Rule
r ByteString
inp') (Rule -> [Rule]
rChildren Rule
rule)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
Maybe (TokenType, Text)
mbtok' <- case Maybe (TokenType, Text)
mbtok of
Maybe (TokenType, Text)
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
Just (TokenType
tt, Text
s)
| Rule -> Bool
rLookahead Rule
rule -> do
(ByteString
oldinput, Bool
oldendline, Char
oldprevChar, Int
oldColumn) <-
case Maybe TokenizerState
oldstate of
Maybe TokenizerState
Nothing -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError
[Char]
"oldstate not saved with lookahead rule"
Just TokenizerState
st -> forall (m :: * -> *) a. Monad m => a -> m a
return
(TokenizerState -> ByteString
input TokenizerState
st, TokenizerState -> Bool
endline TokenizerState
st,
TokenizerState -> Char
prevChar TokenizerState
st, TokenizerState -> Int
column TokenizerState
st)
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ input :: ByteString
input = ByteString
oldinput
, endline :: Bool
endline = Bool
oldendline
, prevChar :: Char
prevChar = Char
oldprevChar
, column :: Int
column = Int
oldColumn }
forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
| Bool
otherwise -> do
case Maybe (TokenType, Text)
mbchildren of
Maybe (TokenType, Text)
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just (TokenType
tt, Text
s)
Just (TokenType
_, Text
cresult) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just (TokenType
tt, Text
s forall a. Semigroup a => a -> a -> a
<> Text
cresult)
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/=Char
' ') (forall a. Show a => a -> [Char]
show (Rule -> Matcher
rMatcher Rule
rule)) forall a. [a] -> [a] -> [a]
++ [Char]
" MATCHED " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe (TokenType, Text)
mbtok'
[ContextSwitch] -> TokenizerM ()
doContextSwitches (Rule -> [ContextSwitch]
rContextSwitch Rule
rule)
TokenizerM ()
addCaptures
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (TokenType, Text)
mbtok'
withAttr :: TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr :: TokenType
-> TokenizerM Text -> TokenizerM (Maybe (TokenType, Text))
withAttr TokenType
tt TokenizerM Text
p = do
Text
res <- TokenizerM Text
p
if Text -> Bool
Text.null Text
res
then forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
else forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just (TokenType
tt, Text
res)
wordDetect :: Bool -> Text -> ByteString -> TokenizerM Text
wordDetect :: Bool -> Text -> ByteString -> TokenizerM Text
wordDetect Bool
caseSensitive Text
s ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
Text
t <- ByteString -> TokenizerM Text
decodeBS forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
UTF8.take (Text -> Int
Text.length Text
s) ByteString
inp
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ if Bool
caseSensitive
then Text
s forall a. Eq a => a -> a -> Bool
== Text
t
else forall s. FoldCase s => s -> CI s
mk Text
s forall a. Eq a => a -> a -> Bool
== forall s. FoldCase s => s -> CI s
mk Text
t
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Text -> Bool
Text.null Text
t)
let c :: Char
c = Text -> Char
Text.last Text
t
let rest :: ByteString
rest = Int -> ByteString -> ByteString
UTF8.drop (Text -> Int
Text.length Text
s) ByteString
inp
let d :: Char
d = case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
rest of
Maybe (Char, ByteString)
Nothing -> Char
'\n'
Just (Char
x,ByteString
_) -> Char
x
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isWordBoundary Char
c Char
d
Int -> TokenizerM Text
takeChars (Text -> Int
Text.length Text
t)
stringDetect :: Bool -> Bool -> Text -> ByteString -> TokenizerM Text
stringDetect :: Bool -> Bool -> Text -> ByteString -> TokenizerM Text
stringDetect Bool
dynamic Bool
caseSensitive Text
s ByteString
inp = do
Text
s' <- if Bool
dynamic
then do
Text
dynStr <- Text -> TokenizerM Text
subDynamicText Text
s
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"Dynamic string: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Text
dynStr
forall (m :: * -> *) a. Monad m => a -> m a
return Text
dynStr
else forall (m :: * -> *) a. Monad m => a -> m a
return Text
s
Text
t <- ByteString -> TokenizerM Text
decodeBS forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
UTF8.take (Text -> Int
Text.length Text
s') ByteString
inp
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ if Bool
caseSensitive
then Text
s' forall a. Eq a => a -> a -> Bool
== Text
t
else forall s. FoldCase s => s -> CI s
mk Text
s' forall a. Eq a => a -> a -> Bool
== forall s. FoldCase s => s -> CI s
mk Text
t
Int -> TokenizerM Text
takeChars (Text -> Int
Text.length Text
s')
subDynamicText :: Text -> TokenizerM Text
subDynamicText :: Text -> TokenizerM Text
subDynamicText Text
t = do
let substitute :: Text -> TokenizerM Text
substitute Text
x = case Text -> Maybe (Char, Text)
Text.uncons Text
x of
Just (Char
c, Text
rest) | Char -> Bool
isDigit Char
c -> let capNum :: Int
capNum = Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0'
in (forall a. Semigroup a => a -> a -> a
<> Text
rest) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> TokenizerM Text
getCapture Int
capNum
Maybe (Char, Text)
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
Text.cons Char
'%' Text
x
case (Char -> Bool) -> Text -> [Text]
Text.split (forall a. Eq a => a -> a -> Bool
== Char
'%') Text
t of
[] -> forall (m :: * -> *) a. Monad m => a -> m a
return Text
Text.empty
Text
x:[Text]
rest -> (Text
x forall a. Semigroup a => a -> a -> a
<>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
Text.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Text -> TokenizerM Text
substitute [Text]
rest
normalChunk :: TokenizerM Text
normalChunk :: TokenizerM Text
normalChunk = do
ByteString
inp <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Maybe (Char, ByteString)
Nothing -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Just (Char
c, ByteString
_)
| Char
c forall a. Eq a => a -> a -> Bool
== Char
' ' ->
let bs :: ByteString
bs = (Char -> Bool) -> ByteString -> ByteString
BS.takeWhile (forall a. Eq a => a -> a -> Bool
==Char
' ') ByteString
inp
in Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
bs)
| Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isAlphaNum Char
c ->
let (ByteString
bs, ByteString
_) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
UTF8.span Char -> Bool
isAlphaNum ByteString
inp
in Int -> TokenizerM Text
takeChars (ByteString -> Int
UTF8.length ByteString
bs)
| Bool
otherwise -> Int -> TokenizerM Text
takeChars Int
1
includeRules :: Maybe TokenType -> ContextName -> ByteString
-> TokenizerM (Maybe Token)
includeRules :: Maybe TokenType
-> (Text, Text)
-> ByteString
-> TokenizerM (Maybe (TokenType, Text))
includeRules Maybe TokenType
mbattr (Text
syn, Text
con) ByteString
inp = do
SyntaxMap
syntaxes <- forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> SyntaxMap
syntaxMap
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
syn SyntaxMap
syntaxes forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Syntax -> Maybe Context
lookupContext Text
con of
Maybe Context
Nothing -> do
Context
cur <- TokenizerM Context
currentContext
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ [Char]
"IncludeRules in " forall a. [a] -> [a] -> [a]
++ Text -> [Char]
Text.unpack (Context -> Text
cSyntax Context
cur) forall a. [a] -> [a] -> [a]
++
[Char]
" requires undefined context " forall a. [a] -> [a] -> [a]
++
Text -> [Char]
Text.unpack Text
con forall a. [a] -> [a] -> [a]
++ [Char]
"##" forall a. [a] -> [a] -> [a]
++ Text -> [Char]
Text.unpack Text
syn
Just Context
c -> do
Maybe (TokenType, Text)
mbtok <- forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum (forall a b. (a -> b) -> [a] -> [b]
map (\Rule
r -> Rule -> ByteString -> TokenizerM (Maybe (TokenType, Text))
tryRule Rule
r ByteString
inp) (Context -> [Rule]
cRules Context
c))
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ captures :: Captures
captures = IntMap ByteString -> Captures
Captures forall a. Monoid a => a
mempty }
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case (Maybe (TokenType, Text)
mbtok, Maybe TokenType
mbattr) of
(Just (TokenType
NormalTok, Text
xs), Just TokenType
attr) -> forall a. a -> Maybe a
Just (TokenType
attr, Text
xs)
(Maybe (TokenType, Text), Maybe TokenType)
_ -> Maybe (TokenType, Text)
mbtok
checkLineEnd :: Context -> TokenizerM ()
checkLineEnd :: Context -> TokenizerM ()
checkLineEnd Context
c = do
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Context -> [ContextSwitch]
cLineEndContext Context
c)) forall a b. (a -> b) -> a -> b
$ do
Bool
eol <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
endline
[Char] -> TokenizerM ()
info forall a b. (a -> b) -> a -> b
$ [Char]
"checkLineEnd for " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Context -> Text
cName Context
c) forall a. [a] -> [a] -> [a]
++ [Char]
" eol = " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Bool
eol forall a. [a] -> [a] -> [a]
++ [Char]
" cLineEndContext = " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Context -> [ContextSwitch]
cLineEndContext Context
c)
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
eol forall a b. (a -> b) -> a -> b
$ do
Bool
lineCont' <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
lineContinuation
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
lineCont' forall a b. (a -> b) -> a -> b
$ do
[ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineEndContext Context
c)
Context
c' <- TokenizerM Context
currentContext
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Context
c forall a. Eq a => a -> a -> Bool
== Context
c') forall a b. (a -> b) -> a -> b
$ Context -> TokenizerM ()
checkLineEnd Context
c'
detectChar :: Bool -> Char -> ByteString -> TokenizerM Text
detectChar :: Bool -> Char -> ByteString -> TokenizerM Text
detectChar Bool
dynamic Char
c ByteString
inp = do
Char
c' <- if Bool
dynamic Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9'
then Char -> TokenizerM Char
getDynamicChar Char
c
else forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Just (Char
x,ByteString
_) | Char
x forall a. Eq a => a -> a -> Bool
== Char
c' -> Int -> TokenizerM Text
takeChars Int
1
Maybe (Char, ByteString)
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
getDynamicChar :: Char -> TokenizerM Char
getDynamicChar :: Char -> TokenizerM Char
getDynamicChar Char
c = do
let capNum :: Int
capNum = Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0'
Text
res <- Int -> TokenizerM Text
getCapture Int
capNum
case Text -> Maybe (Char, Text)
Text.uncons Text
res of
Maybe (Char, Text)
Nothing -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Just (Char
d,Text
_) -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
d
detect2Chars :: Bool -> Char -> Char -> ByteString -> TokenizerM Text
detect2Chars :: Bool -> Char -> Char -> ByteString -> TokenizerM Text
detect2Chars Bool
dynamic Char
c Char
d ByteString
inp = do
Char
c' <- if Bool
dynamic Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9'
then Char -> TokenizerM Char
getDynamicChar Char
c
else forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
Char
d' <- if Bool
dynamic Bool -> Bool -> Bool
&& Char
d forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
d forall a. Ord a => a -> a -> Bool
<= Char
'9'
then Char -> TokenizerM Char
getDynamicChar Char
d
else forall (m :: * -> *) a. Monad m => a -> m a
return Char
d
if (Text -> ByteString
encodeUtf8 ([Char] -> Text
Text.pack [Char
c',Char
d'])) ByteString -> ByteString -> Bool
`BS.isPrefixOf` ByteString
inp
then Int -> TokenizerM Text
takeChars Int
2
else forall (m :: * -> *) a. MonadPlus m => m a
mzero
rangeDetect :: Char -> Char -> ByteString -> TokenizerM Text
rangeDetect :: Char -> Char -> ByteString -> TokenizerM Text
rangeDetect Char
c Char
d ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Just (Char
x, ByteString
rest)
| Char
x forall a. Eq a => a -> a -> Bool
== Char
c -> case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
UTF8.span (forall a. Eq a => a -> a -> Bool
/= Char
d) ByteString
rest of
(ByteString
in_t, ByteString
out_t)
| ByteString -> Bool
BS.null ByteString
out_t -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
| Bool
otherwise -> do
Text
t <- ByteString -> TokenizerM Text
decodeBS ByteString
in_t
Int -> TokenizerM Text
takeChars (Text -> Int
Text.length Text
t forall a. Num a => a -> a -> a
+ Int
2)
Maybe (Char, ByteString)
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
detectSpaces :: ByteString -> TokenizerM Text
detectSpaces :: ByteString -> TokenizerM Text
detectSpaces ByteString
inp = do
case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.span (\Char
c -> Char -> Bool
isSpace Char
c) ByteString
inp of
(ByteString
t, ByteString
_)
| ByteString -> Bool
BS.null ByteString
t -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
| Bool
otherwise -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
t)
detectIdentifier :: ByteString -> TokenizerM Text
detectIdentifier :: ByteString -> TokenizerM Text
detectIdentifier ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
inp of
Just (Char
c, ByteString
t) | (Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isLetter Char
c) Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'_' ->
Int -> TokenizerM Text
takeChars forall a b. (a -> b) -> a -> b
$ Int
1 forall a. Num a => a -> a -> a
+ forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ByteString -> Int
BS.length ByteString
t) forall a. a -> a
id
((Char -> Bool) -> ByteString -> Maybe Int
BS.findIndex (\Char
d -> Bool -> Bool
not (Char -> Bool
isAscii Char
d) Bool -> Bool -> Bool
||
Bool -> Bool
not (Char -> Bool
isAlphaNum Char
d Bool -> Bool -> Bool
|| Char
d forall a. Eq a => a -> a -> Bool
== Char
'_')) ByteString
t)
Maybe (Char, ByteString)
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
lineContinue :: ByteString -> TokenizerM Text
lineContinue :: ByteString -> TokenizerM Text
lineContinue ByteString
inp = do
if ByteString
inp forall a. Eq a => a -> a -> Bool
== ByteString
"\\"
then do
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ lineContinuation :: Bool
lineContinuation = Bool
True }
Int -> TokenizerM Text
takeChars Int
1
else forall (m :: * -> *) a. MonadPlus m => m a
mzero
anyChar :: Set.Set Char -> ByteString -> TokenizerM Text
anyChar :: Set Char -> ByteString -> TokenizerM Text
anyChar Set Char
cs ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Just (Char
x, ByteString
_) | Char
x forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Char
cs -> Int -> TokenizerM Text
takeChars Int
1
Maybe (Char, ByteString)
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
regExpr :: Bool -> RE -> ByteString -> TokenizerM Text
regExpr :: Bool -> RE -> ByteString -> TokenizerM Text
regExpr Bool
dynamic RE
re ByteString
inp = do
let reStr :: ByteString
reStr = RE -> ByteString
reString RE
re
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int -> ByteString -> ByteString
BS.take Int
2 ByteString
reStr forall a. Eq a => a -> a -> Bool
== ByteString
"\\b") forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM ()
wordBoundary ByteString
inp
Regex
regex <- case RE -> Either [Char] Regex
compileRE RE
re of
Right Regex
r -> forall (m :: * -> *) a. Monad m => a -> m a
return Regex
r
Left [Char]
e -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$
[Char]
"Error compiling regex " forall a. [a] -> [a] -> [a]
++
ByteString -> [Char]
UTF8.toString ByteString
reStr forall a. [a] -> [a] -> [a]
++ [Char]
": " forall a. [a] -> [a] -> [a]
++ [Char]
e
Regex
regex' <- if Bool
dynamic
then Regex -> TokenizerM Regex
subDynamic Regex
regex
else forall (m :: * -> *) a. Monad m => a -> m a
return Regex
regex
case Regex -> ByteString -> Maybe (ByteString, IntMap (Int, Int))
matchRegex Regex
regex' ByteString
inp of
Just (ByteString
matchedBytes, IntMap (Int, Int)
capts) -> do
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (forall (t :: * -> *) a. Foldable t => t a -> Bool
null IntMap (Int, Int)
capts) forall a b. (a -> b) -> a -> b
$
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \TokenizerState
st -> TokenizerState
st{ captures :: Captures
captures = IntMap ByteString -> Captures
Captures forall a b. (a -> b) -> a -> b
$
forall a b. (a -> b) -> IntMap a -> IntMap b
IntMap.map (ByteString -> (Int, Int) -> ByteString
toSlice ByteString
inp) IntMap (Int, Int)
capts }
Int -> TokenizerM Text
takeChars (ByteString -> Int
UTF8.length ByteString
matchedBytes)
Maybe (ByteString, IntMap (Int, Int))
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
toSlice :: ByteString -> (Int, Int) -> ByteString
toSlice :: ByteString -> (Int, Int) -> ByteString
toSlice ByteString
bs (Int
off, Int
len) = Int -> ByteString -> ByteString
BS.take Int
len forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop Int
off ByteString
bs
wordBoundary :: ByteString -> TokenizerM ()
wordBoundary :: ByteString -> TokenizerM ()
wordBoundary ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Maybe (Char, ByteString)
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just (Char
d, ByteString
_) -> do
Char
c <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Char
prevChar
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isWordBoundary Char
c Char
d
isWordBoundary :: Char -> Char -> Bool
isWordBoundary :: Char -> Char -> Bool
isWordBoundary Char
c Char
d = Char -> Bool
isWordChar Char
c forall a. Eq a => a -> a -> Bool
/= Char -> Bool
isWordChar Char
d
decodeBS :: ByteString -> TokenizerM Text
decodeBS :: ByteString -> TokenizerM Text
decodeBS ByteString
bs = case ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
bs of
Left UnicodeException
_ -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError ([Char]
"ByteString " forall a. [a] -> [a] -> [a]
++
forall a. Show a => a -> [Char]
show ByteString
bs forall a. [a] -> [a] -> [a]
++ [Char]
"is not UTF8")
Right Text
t -> forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
subDynamic :: Regex -> TokenizerM Regex
subDynamic :: Regex -> TokenizerM Regex
subDynamic (MatchDynamic Int
capNum) = do
Text
replacement <- Int -> TokenizerM Text
getCapture Int
capNum
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Bool) -> Regex
MatchChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Eq a => a -> a -> Bool
(==)) forall a b. (a -> b) -> a -> b
$ Text -> [Char]
Text.unpack Text
replacement
subDynamic (MatchAlt Regex
r1 Regex
r2) =
Regex -> Regex -> Regex
MatchAlt forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r1 forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Regex -> TokenizerM Regex
subDynamic Regex
r2
subDynamic (MatchConcat Regex
r1 Regex
r2) =
Regex -> Regex -> Regex
MatchConcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r1 forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Regex -> TokenizerM Regex
subDynamic Regex
r2
subDynamic (MatchSome Regex
r) =
Regex -> Regex
MatchSome forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic (MatchCapture Int
i Regex
r) =
Int -> Regex -> Regex
MatchCapture Int
i forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic (AssertPositive Direction
dir Regex
r) =
Direction -> Regex -> Regex
AssertPositive Direction
dir forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic (AssertNegative Direction
dir Regex
r) =
Direction -> Regex -> Regex
AssertNegative Direction
dir forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic Regex
x = forall (m :: * -> *) a. Monad m => a -> m a
return Regex
x
keyword :: KeywordAttr -> WordSet Text -> ByteString -> TokenizerM Text
keyword :: KeywordAttr -> WordSet Text -> ByteString -> TokenizerM Text
keyword KeywordAttr
kwattr WordSet Text
kws ByteString
inp = do
Char
prev <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Char
prevChar
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Char
prev forall a. Ord a => a -> Set a -> Bool
`Set.member` (KeywordAttr -> Set Char
keywordDelims KeywordAttr
kwattr)
let (ByteString
w,ByteString
_) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
UTF8.break (forall a. Ord a => a -> Set a -> Bool
`Set.member` (KeywordAttr -> Set Char
keywordDelims KeywordAttr
kwattr)) ByteString
inp
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (ByteString -> Bool
BS.null ByteString
w)
Text
w' <- ByteString -> TokenizerM Text
decodeBS ByteString
w
let numchars :: Int
numchars = Text -> Int
Text.length Text
w'
if Text
w' forall a. (FoldCase a, Ord a) => a -> WordSet a -> Bool
`inWordSet` WordSet Text
kws
then Int -> TokenizerM Text
takeChars Int
numchars
else forall (m :: * -> *) a. MonadPlus m => m a
mzero
normalizeHighlighting :: [Token] -> [Token]
normalizeHighlighting :: SourceLine -> SourceLine
normalizeHighlighting [] = []
normalizeHighlighting ((!TokenType
t,!Text
x):SourceLine
xs)
| Text -> Bool
Text.null Text
x = SourceLine -> SourceLine
normalizeHighlighting SourceLine
xs
| Bool
otherwise =
(TokenType
t, Text
matchedText) forall a. a -> [a] -> [a]
: SourceLine -> SourceLine
normalizeHighlighting SourceLine
rest
where (SourceLine
matches, SourceLine
rest) = forall a. (a -> Bool) -> [a] -> ([a], [a])
span (\(TokenType
z,Text
_) -> TokenType
z forall a. Eq a => a -> a -> Bool
== TokenType
t) SourceLine
xs
!matchedText :: Text
matchedText = [Text] -> Text
Text.concat (Text
x forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd SourceLine
matches)
parseCStringChar :: ByteString -> TokenizerM Text
parseCStringChar :: ByteString -> TokenizerM Text
parseCStringChar ByteString
inp = do
case forall a. Parser a -> ByteString -> Either [Char] a
A.parseOnly (forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pCStringChar) ByteString
inp of
Left [Char]
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (ByteString
r,()
_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pCStringChar :: A.Parser ()
pCStringChar :: Parser ()
pCStringChar = do
Char
_ <- Char -> Parser Char
A.char Char
'\\'
Char
next <- Parser Char
A.anyChar
case Char
next of
Char
c | Char
c forall a. Eq a => a -> a -> Bool
== Char
'x' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'X' -> () forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser ByteString
A.takeWhile1 ([Char] -> Char -> Bool
A.inClass [Char]
"0-9a-fA-F")
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'0' -> () forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser ByteString
A.takeWhile ([Char] -> Char -> Bool
A.inClass [Char]
"0-7")
| [Char] -> Char -> Bool
A.inClass [Char]
"abefnrtv\"'?\\" Char
c -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
| Bool
otherwise -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
parseCChar :: ByteString -> TokenizerM Text
parseCChar :: ByteString -> TokenizerM Text
parseCChar ByteString
inp = do
case forall a. Parser a -> ByteString -> Either [Char] a
A.parseOnly (forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pCChar) ByteString
inp of
Left [Char]
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (ByteString
r,()
_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pCChar :: A.Parser ()
pCChar :: Parser ()
pCChar = do
() forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
A.char Char
'\''
Parser ()
pCStringChar forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser Char
A.satisfy (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\'' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\\')
() forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
A.char Char
'\''
parseInt :: ByteString -> TokenizerM Text
parseInt :: ByteString -> TokenizerM Text
parseInt ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case forall a. Parser a -> ByteString -> Either [Char] a
A.parseOnly (forall a. Parser a -> Parser (ByteString, a)
A.match (Parser ()
pHex forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
pOct forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
pDec)) ByteString
inp of
Left [Char]
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (ByteString
r,()
_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pDec :: A.Parser ()
pDec :: Parser ()
pDec = do
Parser ()
mbMinus
ByteString
_ <- (Char -> Bool) -> Parser ByteString
A.takeWhile1 ([Char] -> Char -> Bool
A.inClass [Char]
"0-9")
Parser ()
guardWordBoundary
parseOct :: ByteString -> TokenizerM Text
parseOct :: ByteString -> TokenizerM Text
parseOct ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case forall a. Parser a -> ByteString -> Either [Char] a
A.parseOnly (forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pHex) ByteString
inp of
Left [Char]
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (ByteString
r,()
_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pOct :: A.Parser ()
pOct :: Parser ()
pOct = do
Parser ()
mbMinus
Char
_ <- Char -> Parser Char
A.char Char
'0'
Char
_ <- (Char -> Bool) -> Parser Char
A.satisfy ([Char] -> Char -> Bool
A.inClass [Char]
"Oo")
ByteString
_ <- (Char -> Bool) -> Parser ByteString
A.takeWhile1 ([Char] -> Char -> Bool
A.inClass [Char]
"0-7")
Parser ()
guardWordBoundary
parseHex :: ByteString -> TokenizerM Text
parseHex :: ByteString -> TokenizerM Text
parseHex ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case forall a. Parser a -> ByteString -> Either [Char] a
A.parseOnly (forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pHex) ByteString
inp of
Left [Char]
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (ByteString
r,()
_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pHex :: A.Parser ()
pHex :: Parser ()
pHex = do
Parser ()
mbMinus
Char
_ <- Char -> Parser Char
A.char Char
'0'
Char
_ <- (Char -> Bool) -> Parser Char
A.satisfy ([Char] -> Char -> Bool
A.inClass [Char]
"Xx")
ByteString
_ <- (Char -> Bool) -> Parser ByteString
A.takeWhile1 ([Char] -> Char -> Bool
A.inClass [Char]
"0-9a-fA-F")
Parser ()
guardWordBoundary
guardWordBoundary :: A.Parser ()
guardWordBoundary :: Parser ()
guardWordBoundary = do
Maybe Char
mbw <- Parser (Maybe Char)
A.peekChar
case Maybe Char
mbw of
Just Char
c -> forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isWordBoundary Char
'0' Char
c
Maybe Char
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
mbMinus :: A.Parser ()
mbMinus :: Parser ()
mbMinus = (() forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
A.char Char
'-') forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return ()
mbPlusMinus :: A.Parser ()
mbPlusMinus :: Parser ()
mbPlusMinus = () forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser Char
A.satisfy ([Char] -> Char -> Bool
A.inClass [Char]
"+-") forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return ()
parseFloat :: ByteString -> TokenizerM Text
parseFloat :: ByteString -> TokenizerM Text
parseFloat ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case forall a. Parser a -> ByteString -> Either [Char] a
A.parseOnly (forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pFloat) ByteString
inp of
Left [Char]
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (ByteString
r,()
_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
where pFloat :: A.Parser ()
pFloat :: Parser ()
pFloat = do
let digits :: Parser ByteString
digits = (Char -> Bool) -> Parser ByteString
A.takeWhile1 ([Char] -> Char -> Bool
A.inClass [Char]
"0-9")
Parser ()
mbPlusMinus
Bool
before <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False forall a b. (a -> b) -> a -> b
$ Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString
digits
Bool
dot <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False forall a b. (a -> b) -> a -> b
$ Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser Char
A.satisfy ([Char] -> Char -> Bool
A.inClass [Char]
".")
Bool
after <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False forall a b. (a -> b) -> a -> b
$ Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString
digits
Bool
e <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False forall a b. (a -> b) -> a -> b
$ Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ((Char -> Bool) -> Parser Char
A.satisfy ([Char] -> Char -> Bool
A.inClass [Char]
"Ee") forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
Parser ()
mbPlusMinus forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
digits)
Maybe Char
mbnext <- Parser (Maybe Char)
A.peekChar
case Maybe Char
mbnext of
Maybe Char
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just Char
c -> forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ [Char] -> Char -> Bool
A.inClass [Char]
"." Char
c)
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ (Bool
before Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
dot Bool -> Bool -> Bool
&& Bool
e)
Bool -> Bool -> Bool
|| (Bool
before Bool -> Bool -> Bool
&& Bool
dot Bool -> Bool -> Bool
&& (Bool
after Bool -> Bool -> Bool
|| Bool -> Bool
not Bool
e))
Bool -> Bool -> Bool
|| (Bool -> Bool
not Bool
before Bool -> Bool -> Bool
&& Bool
dot Bool -> Bool -> Bool
&& Bool
after)