module Hydrogen.Syntax.Parser where import Hydrogen.Prelude hiding ((<|>), many) import Hydrogen.Syntax.Types import Hydrogen.Util.Parsec hiding (token, tokens) import Text.Parsec.Char (char, hexDigit, noneOf, oneOf, string) tokenize :: String -> Parser String (Tokens Token) tokenize srcName = either mkError sanitize . runIdentity . runParserT tokens () srcName where tokens = many (liftA2 (,) getPosition token) <* eof token = choice [ TIndent <$> indent , something >>= \x -> choice [ TString x '\'' <$> chars3 "'''" , TString x '\"' <$> chars3 "\"\"\"" , TString x '`' <$> chars3 "```" , TString x '\'' <$> chars' '\'' , TString x '\"' <$> chars' '\"' , TString x '`' <$> chars' '`' , TSomething <$> return x ] , TString "" '\'' <$> chars3 "'''" , TString "" '\"' <$> chars3 "\"\"\"" , TString "" '`' <$> chars3 "```" , TString "" '\'' <$> chars '\'' , TString "" '\"' <$> chars '\"' , TString "" '`' <$> chars' '`' , const TSpaces <$> many1 (char ' ') , const TComma <$> char ',' , const TSemicolon <$> char ';' , TBraceOpen "" <$> oneOf "([{" , TBraceClose <$> oneOf ")]}" ] something = many1 (noneOf ("([{)]}\'\"`;, " ++ ['\0' .. '\x1F'])) indent = many1 (try (many space >> (nl <|> comment))) *> (length <$> many space) where space = char ' ' nl = string "\n" comment = string ";;" >> many (noneOf "\n") <* char '\n' chars' d = inBetween (char d) (many (try (char '\\' >> char d) <|> noneOf (d : "\n"))) chars d = inBetween (char d) (many (noneOf (d : "\\\n") <|> escape)) chars3 d = manyBetween (try (string d)) (try (string d)) (noneOf "\\" <|> escape) inBetween d p = d *> p <* d escape = char '\\' >> choice [ oneOf "\\\'\"?`" , special <$> oneOf (map fst escapes) , char 'x' >> (convert <$> count 2 hexDigit) , char 'u' >> (convert <$> count 4 hexDigit) , char 'U' >> (convert <$> count 8 hexDigit) -- like Python ] where special = fromJust . flip lookup escapes escapes = [ ('0', '\0'), ('a', '\a'), ('b', '\b'), ('f', '\f'), ('n', '\n'), ('r', '\r'), ('t', '\t'), ('v', '\v') ] convert str = let [(hex, _)] = readHex str in toEnum hex sanitize (t1@(p1, v1) : ts@(_t2@(p2, v2) : tss)) = case (v1, v2) of (TSomething k, TBraceOpen _ t) -> sanitize ((p1, TBraceOpen k t) : tss) (TString _ _ _, TSomething _) -> Left (p2, []) (TString _ _ _, TBraceOpen _ _) -> Left (p2, []) (TBraceClose _, TSomething _) -> Left (p2, []) (TBraceClose _, TString _ _ _) -> Left (p2, []) (TBraceClose _, TBraceOpen _ _) -> Left (p2, []) (TBraceOpen _ _, TIndent _) -> sanitize (t1 : tss) (TBraceOpen _ _, TSpaces) -> sanitize (t1 : tss) (TBraceOpen _ _, _) -> fmap ([t1, (p2, TIndent (sourceColumn p2 - 1))] ++) (sanitize ts) (TSpaces, _) -> sanitize ts (TIndent _, TBraceOpen _ '{') -> sanitize ts (TIndent _, TBraceClose _) -> sanitize ts (TIndent _, TSemicolon) -> sanitize ts (TSemicolon, TIndent _) -> sanitize (t1 : tss) _ -> fmap (t1 :) (sanitize ts) sanitize [t@(_, v)] = Right $ case v of TSpaces -> [] TIndent _ -> [] TSemicolon -> [t] _ -> [t] sanitize [] = Right [] layout :: Parser (Tokens Token) POPs layout = either mkError Right . runIdentity . runParserT document () "-" where document = many (liftA2 (,) getPosition (someToken 0)) <* eof someToken :: Monad m => Int -> ParsecT [(SourcePos, Token)] u m POP someToken i = choice [ sourceToken $ \case TIndent i' | i' <= i -> Just (Token SomethingT "" ";;") TString k '\'' v -> Just (Token AposString k v) TString k '"' v -> Just (Token QuotString k v) TString k '`' v -> Just (Token TickString k v) TComma -> Just (Token SomethingT "" ",") TSemicolon -> Just (Token SomethingT "" ";") TSomething v -> Just (Token SomethingT "" v) _ -> Nothing , do (k, c) <- sourceToken $ \case TBraceOpen k c -> curry Just k c _ -> Nothing i' <- sourceToken $ \(TIndent indent) -> Just indent ts <- many (liftA2 (,) getPosition (someToken i')) bt <- case c of '(' -> sourceToken $ \case TBraceClose ')' -> Just Grouping ; _ -> Nothing '[' -> sourceToken $ \case TBraceClose ']' -> Just Brackets ; _ -> Nothing '{' -> sourceToken $ \case TBraceClose '}' -> Just Mustache ; _ -> Nothing _ -> error "Popsicle!" return (Block bt k ts) ] <* skipMany (sourceToken lineContinuation) where lineContinuation = \case TIndent i' | i' > i -> Just () ; _ -> Nothing parse :: String -> Parser String POPs parse srcName = tokenize srcName >+> layout