{-# LANGUAGE ScopedTypeVariables, DeriveGeneric, DeriveAnyClass
, OverloadedStrings #-}
module Text.ANTLR.Lex.Tokenizer where
import Text.ANTLR.Lex.Automata
import Text.ANTLR.Lex.DFA
import qualified Text.ANTLR.Set as Set
import Text.ANTLR.Set (Hashable, member, Generic(..), Set(..))
import Text.ANTLR.Pretty
import qualified Debug.Trace as D
import Data.List (find)
import qualified Data.Text as T
data Token n v =
Token n v Int
| EOF
| Error T.Text
deriving (Show, Ord, Generic, Hashable)
instance (Prettify n, Prettify v) => Prettify (Token n v) where
prettify EOF = pStr "EOF"
prettify (Error s) = pStr "Token Error: " >> pStr s
prettify (Token n v i) =
prettify v
instance Eq n => Eq (Token n v) where
Token s _ _ == Token s1 _ _ = s == s1
EOF == EOF = True
Error s == Error s1 = s == s1
_ == _ = False
tokenName :: Token n v -> n
tokenName (Token n v _) = n
tokenValue :: Token n v -> v
tokenValue (Token n v _) = v
tokenSize :: Token n v -> Int
tokenSize (Token _ _ i) = i
tokenSize EOF = 0
type Lexeme s = [s]
type NDFA s i n = (n, DFA s i)
tokenize ::
forall s i n v. (Eq i, Ord s, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s)
=> [(n, DFA s i)]
-> (Lexeme s -> n -> v)
-> [s]
-> [Token n v]
tokenize dfaTuples fncn input0 = let
dfas0 = map snd dfaTuples
allTok :: [(NDFA s i n, State i)] -> [s] -> [Token n v]
allTok dfaSims0 currInput = let
oneTok :: [(NDFA s i n, State i)] -> [s] -> Maybe (Lexeme s, NDFA s i n)
oneTok dfaSims [] = Nothing
oneTok [] ss = Nothing
oneTok dfaSims (s:ss) = let
dfaSims' =
[ ((n, dfa), stop)
| ((n, dfa), cursor) <- dfaSims
, (start, es, stop) <- Set.toList $ _Δ dfa
, start == cursor && s `edgeMember` es ]
accepting = [ (n,dfa) | ((n, dfa), cursor) <- dfaSims', cursor `member` _F dfa ]
in (case (oneTok dfaSims' ss, accepting) of
(Nothing, []) -> Nothing
(Nothing, d:ds) -> Just ([s], d)
(Just (l,d), _) -> Just (s:l, d))
in case (currInput, oneTok dfaSims0 currInput) of
([], _) -> [EOF]
(ss, Nothing) -> [Error $ T.pack $ show ss]
(ss, Just (l, (name,d))) ->
Token name (fncn l name) (length l)
: allTok dfaSims0 (drop (length l) currInput)
in allTok (zip dfaTuples (map s0 dfas0)) input0
tokenizeInc
:: forall s i n v. (Eq i, Ord s, Eq n, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s, Hashable n)
=> (n -> Bool)
-> [(n, DFA s i)]
-> (Lexeme s -> n -> v)
-> (Set n -> [s] -> (Token n v, [s]))
tokenizeInc filterF dfaTuples fncn = let
tI :: Set n -> [s] -> (Token n v, [s])
tI ns input = let
dfaTuples' = filter (\(n,_) -> n `Set.member` ns || filterF n) dfaTuples
tokenized = tokenize dfaTuples' fncn input
filterF' (Token n _ _) = filterF n
filterF' _ = False
ignored = takeWhile filterF' tokenized
nextTokens = dropWhile filterF' tokenized
next = case nextTokens of
[] -> EOF
(t:_) -> t
in (next, drop (sum $ map tokenSize $ next : ignored) input)
in tI