{-# LANGUAGE OverloadedStrings, StrictData #-} module HaskellLex where import Data.Text.Lazy (Text) import qualified Data.Text.Lazy as T import Text (splits) -- We have two basic problems: -- 1. splitting haskell code based on the presence of {{ }} -- processing template directives -- splitting into chunks according to the need for line pragmas -- -- 2. splitting off haskell code according to user-chosen template symbols -- lexing the haskell contents in order to prevent injection problems -- We don't want to trigger comments in strings literals or comments, -- so we track them. In order to track string literals, we must also -- track character literals (since '"' does not start or end a string -- literal). data SourceState = InCode | InCodeOpenBrace | InCodeMinus | InCharStart | InCharEnd | InString | InStringEscape | InLineComment | InBlockComment Int | InBlockCommentMinus Int | InBlockCommentOpenBrace Int deriving (Eq, Show) evolveSource :: SourceState -> Char -> SourceState evolveSource InCode '{' = InCodeOpenBrace evolveSource InCode '\'' = InCharStart evolveSource InCode '"' = InString evolveSource InCode '-' = InCodeMinus evolveSource InCode _ = InCode evolveSource InCodeOpenBrace '-' = InBlockComment 0 evolveSource InCodeOpenBrace c = evolveSource InCode c evolveSource InCodeMinus '-' = InLineComment evolveSource InCodeMinus c = evolveSource InCode c evolveSource InCharStart '\\' = InCharStart evolveSource InCharStart _ = InCharEnd evolveSource InCharEnd _ = InCode evolveSource InString '"' = InCode evolveSource InString '\\' = InStringEscape evolveSource InString _ = InString evolveSource InStringEscape _ = InString evolveSource InLineComment '\n' = InCode evolveSource InLineComment _ = InLineComment evolveSource (InBlockComment d) '-' = InBlockCommentMinus d evolveSource (InBlockComment d) '{' = InBlockCommentOpenBrace d evolveSource (InBlockComment d) _ = InBlockComment d evolveSource (InBlockCommentMinus 0) '}' = InCode evolveSource (InBlockCommentMinus d) '}' = InBlockComment (d-1) evolveSource (InBlockCommentMinus d) '-' = InBlockCommentMinus d evolveSource (InBlockCommentMinus d) _ = InBlockComment d evolveSource (InBlockCommentOpenBrace d) '-' = InBlockComment (d+1) evolveSource (InBlockCommentOpenBrace d) '{' = InBlockCommentOpenBrace d evolveSource (InBlockCommentOpenBrace d) _ = InBlockComment d evolveLine :: Int -> Char -> Int evolveLine n '\n' = n+1 evolveLine n _ = n data ReadStatus = ReadStatus { char :: Char, before :: Text, after :: Text, line :: Int, sourceState :: SourceState } reading :: Int -> Text -> [ReadStatus] reading n t = scanl step start $ splits t where start = ReadStatus { char = '?', before = "", after = t, line = n, sourceState = InCode } step s (c,(t1,t2)) = ReadStatus { char = c, before = t1, after = t2, line = evolveLine (line s) c, sourceState = evolveSource (sourceState s) c } splitCodeOn :: Text -> Text -> Maybe (Text, Text) splitCodeOn pat src = f InCode steps where steps = zip3 (T.unpack src) (T.inits src) (T.tails src) f _ [] = Nothing f InCode ((c,u,v):l) = case T.stripPrefix pat v of Nothing -> f (evolveSource InCode c) l Just v' -> Just (u,v') f s ((c,_,_):l) = f (evolveSource s c) l -- TODO Adapt this code to check that Haskell code in templates is -- balanced and ends in code (to protect against injection errors) data ParenStatus = ParenStatus { parenDepth :: Int, parenBadness :: Int } deriving (Eq) instance Semigroup ParenStatus where ParenStatus d1 b1 <> ParenStatus d2 b2 = ParenStatus (d1+d2) (b1 `min` (d1+b2)) instance Monoid ParenStatus where mempty = ParenStatus 0 0 -- We assume this string begins in Haskell code: check that it then -- ends in Haskell code and has balanced parentheses balancedCode :: Text -> Bool balancedCode = f . T.foldl' g (InCode, mempty) where f = (== (InCode, mempty)) g (s,p) c = (evolveSource s c, h s p c) h InCode p '(' = p <> ParenStatus 1 0 h InCode p ')' = p <> ParenStatus (-1) (-1) h _ p _ = p