module GLM.Tokenizer where
import qualified Text.Parsec as P
import Text.Parsec ((<|>))
import Data.String
import Data.Either
import Data.Maybe
import Data.Monoid
import Control.Lens
import Control.Applicative hiding (many, (<|>))
import Test.Framework
import Test.Framework.TH
import Test.Framework.Providers.QuickCheck2
type P a = P.Parsec String () a
type T a = P.Parsec [TokenPos] () a
type TokenPos = (Token, P.SourcePos)
data Token = TString String
| TComment String
| LBrace
| RBrace
| Semi
deriving (Eq, Show)
makePrisms ''Token
instance s ~ String => IsString (P s) where fromString = P.string
instance t ~ Token => IsString (T t) where fromString = pTParse
(?>) :: s -> Getting (First a) s a -> Bool
d ?> p = isJust $ d ^? p
parseTokens :: P [TokenPos]
parseTokens = P.sepEndBy parseTokenPos (P.many P.space)
parseTokenPos :: P TokenPos
parseTokenPos = do
pos <- P.getPosition
tok <- pToken
return (tok, pos)
pToken :: P Token
pToken = pComment <|> pLBrace <|> pRBrace <|> pSemi <|> pString <|> pWord
tests :: Test
tests = $(testGroupGenerator)
prop_tokens_1, prop_tokens_2, prop_tokens_3, prop_tokens_4, prop_tokens_5 :: Bool
prop_tokens_1 = isRight $ P.parse parseTokens "TEST" "\"he\\\"as\\ndf\\\"llo\";;;"
prop_tokens_2 = isRight $ P.parse parseTokens "TEST" "\"he\\\"as\\ndf\\\"llo\"; ;;\n//wtf alksdjhfs \n;;"
prop_tokens_3 = Right LBrace == P.parse pToken "TEST" "{"
prop_tokens_4 = Right RBrace == P.parse pToken "TEST" "}"
prop_tokens_5 = Right RBrace == P.parse pToken "TEST" "}"
pTStringE, pTCommentE, pTParse :: String -> T Token
pTParse s = tSatisfy ((== P.parse pToken "inline" s) . Right . fst)
pTStringE s = tSatisfy ((== TString s) . fst)
pTCommentE s = tSatisfy ((== TComment s) . fst)
pTLBrace, pTRBrace, pTSemi, pTComment, pTString, pTAny :: T Token
pTAny = tSatisfy (const True)
pTString = tSatisfy (?> _1 . _TString )
pTComment = tSatisfy (?> _1 . _TComment)
pTLBrace = tSatisfy (?> _1 . _LBrace )
pTRBrace = tSatisfy (?> _1 . _RBrace )
pTSemi = tSatisfy (?> _1 . _Semi )
pString, pWord, pLBrace, pRBrace, pSemi, pComment :: P Token
pString = fmap TString parseString
pWord = fmap TString parseWord
pLBrace = "{" *> return LBrace
pRBrace = "}" *> return RBrace
pSemi = ";" *> return Semi
pComment = do
start <- "#" <|> "//"
body <- P.many $ P.noneOf "\r\n"
P.optional P.endOfLine
return $ TComment $ start ++ body
parseWord :: P String
parseWord = P.many1 (P.noneOf " ;\n\t\r{}\"\\#")
escape :: P String
escape = do
d <- P.char '\\'
c <- P.oneOf "\\\"0nrvtbf"
return [d, c]
nonEscape :: P Char
nonEscape = P.noneOf "\\\"\0\n\r\v\t\b\f"
character :: P String
character = fmap return nonEscape <|> escape
parseString :: P String
parseString = fmap concat ( P.char '"' *> P.many character <* P.char '"' )
advance :: P.SourcePos -> t -> [TokenPos] -> P.SourcePos
advance _ _ ((_, pos) : _) = pos
advance pos _ [] = pos
tSatisfy :: (TokenPos -> Bool) -> T Token
tSatisfy f = P.tokenPrim show advance (\c -> if f c then Just (fst c) else Nothing)