\begin{code} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE OverloadedStrings #-} module Main ( main ) where import Control.Applicative import Control.Monad import qualified Data.ByteString.Lazy.Char8 as LBS import Data.IORef import Data.Maybe import Data.Monoid import qualified Data.Text as T import qualified Shelly as SH import System.Directory import System.Environment import TestKit import Text.Printf import Text.RE.Edit import Text.RE.TDFA.ByteString.Lazy import Text.RE.TDFA.Text as T import Text.RE.Tools.Grep import Text.RE.Tools.Sed \end{code} \begin{code} main :: IO () main = do as <- getArgs case as of [] -> test ["test"] -> test ["doc",fn,fn'] | is_file fn -> doc fn fn' ["gen",fn,fn'] | is_file fn -> gen fn fn' ["all"] -> gen_all _ -> usage where is_file = not . (== "--") . take 2 doc fn fn' = docMode >>= \dm -> loop dm fn fn' gen fn fn' = genMode >>= \gm -> loop gm fn fn' usage = do pnm <- getProgName let prg = ((pnm++" ")++) putStr $ unlines [ "usage:" , " "++prg "--help" , " "++prg "[test]" , " "++prg "all" , " "++prg "doc (-|) (-|)" , " "++prg "gen (-|) (-|)" ] \end{code} The Sed Script -------------- \begin{code} -- | the MODE determines whether we are generating documentation -- or a Haskell testsuite and includes any IO-accessible state -- needed by the relevant processor data MODE = Doc DocState -- ^ document-generation state | Gen GenState -- ^ adjusting-the-program-for-testing state \end{code} The `DocState` is initialised to `Outside` and flips though the different states as it traverses a code block, so that we can wrap code blocks in special
blocks when their first line indicates that it contains a REPL calculation, which the style sheet can pick up and present accordingly. \begin{code} data DocMode = Outside -- not inside a begin{code} ... \end{code} block | Beginning -- at the start of a begin{code} ... \end{code} block | InsideRepl -- inside a REPL code block | InsideProg -- inside a non-REPL code block deriving (Eq,Show) type DocState = IORef DocMode genMode :: IO MODE genMode = Gen <$> newIORef [] \end{code} \begin{code} -- | the state is the accumulated test function identifiers for -- generating the list of them gets added to the end of the programme type GenState = IORef [String] docMode :: IO MODE docMode = Doc <$> newIORef Outside \end{code} \begin{code} loop :: MODE -> FilePath -> FilePath -> IO () loop mode = sed $ Select [ (,) [re|^%include ${file}(@{%string}) ${rex}(@{%string})$|] $ EDIT_fun TOP $ include mode , (,) [re|^%main ${arg}(top|bottom)$|] $ EDIT_gen $ main_ mode , (,) [re|^\\begin\{code\}$|] $ EDIT_gen $ begin mode , (,) [re|^${fn}(evalme@{%id}) = checkThis ${arg}(@{%string}).*$|] $ EDIT_fun TOP $ evalme mode , (,) [re|^\\end\{code\}$|] $ EDIT_fun TOP $ end mode , (,) [re|^.*$|] $ EDIT_fun TOP $ other mode ] \end{code} \begin{code} include, evalme, end, other :: MODE -> LineNo -> Match LBS.ByteString -> Location -> Capture LBS.ByteString -> IO (Maybe LBS.ByteString) main_, begin :: MODE -> LineNo -> Matches LBS.ByteString -> IO (LineEdit LBS.ByteString) include (Doc _ ) = includeDoc include (Gen _ ) = passthru main_ (Doc _ ) = mainDoc main_ (Gen gs) = mainGen gs begin (Doc ds) = beginDoc ds begin (Gen _ ) = passthru_g evalme (Doc ds) = evalmeDoc ds evalme (Gen gs) = evalmeGen gs end (Doc ds) = endDoc ds end (Gen _ ) = passthru other (Doc ds) = otherDoc ds other (Gen _ ) = passthru passthru :: LineNo -> Match LBS.ByteString -> Location -> Capture LBS.ByteString -> IO (Maybe LBS.ByteString) passthru _ _ _ _ = return Nothing passthru_g :: LineNo -> Matches LBS.ByteString -> IO (LineEdit LBS.ByteString) passthru_g _ _ = return NoEdit \end{code} Script to Generate All Tutorial Tests and Docs ---------------------------------------------- \begin{code} gen_all :: IO () gen_all = do -- prepare HTML docs for the (literate) tools pd "re-gen-modules" pd "re-include" pd "re-nginx-log-processor" pd "re-pp" pd "re-tests" pd "TestKit" pd "RE/Capture" pd "RE/Edit" pd "RE/IsRegex" pd "RE/Options" pd "RE/Replace" pd "RE/TestBench" pd "RE/Tools/Grep" pd "RE/Tools/Lex" pd "RE/Tools/Sed" pd "RE/Internal/NamedCaptures" -- render the tutorial in HTML dm <- docMode loop dm "examples/re-tutorial-master.lhs" "tmp/re-tutorial.lhs" createDirectoryIfMissing False "tmp" pandoc' "re-tutorial.lhs" "examples/re-tutorial.lhs" "tmp/re-tutorial.lhs" "docs/re-tutorial.html" -- generate the tutorial-based tests gm <- genMode loop gm "examples/re-tutorial-master.lhs" "examples/re-tutorial.lhs" putStrLn ">> examples/re-tutorial.lhs" where pd fnm = case captureTextMaybe [cp|mnm|] $ fnm T.?=~ [re|^RE/(Tools/|Internal/)?${mnm}(@{%id})|] of Just mnm -> pandoc fnm ("Text/"<>fnm<>".lhs") ("docs/"<>mnm<>".html") Nothing -> pandoc fnm ( "examples/"<>fnm<>".lhs") ("docs/"<>fnm<>".html") \end{code} Generating the Tutorial ----------------------- \begin{code} includeDoc :: LineNo -> Match LBS.ByteString -> Location -> Capture LBS.ByteString -> IO (Maybe LBS.ByteString) includeDoc _ mtch _ _ = fmap Just $ extract fp =<< compileRegex () re_s where fp = prs_s $ captureText [cp|file|] mtch re_s = prs_s $ captureText [cp|rex|] mtch prs_s = fromMaybe (error "includeDoc") . parseString \end{code} \begin{code} mainDoc :: LineNo -> Matches LBS.ByteString -> IO (LineEdit LBS.ByteString) mainDoc _ _ = return Delete \end{code} \begin{code} beginDoc :: DocState -> LineNo -> Matches LBS.ByteString -> IO (LineEdit LBS.ByteString) beginDoc ds _ _ = writeIORef ds Beginning >> return Delete \end{code} \begin{code} evalmeDoc, endDoc, otherDoc :: DocState -> LineNo -> Match LBS.ByteString -> Location -> Capture LBS.ByteString -> IO (Maybe LBS.ByteString) evalmeDoc ds lno _ _ _ = do dm <- readIORef ds when (dm/=Beginning) $ bad_state "evalme" lno dm writeIORef ds InsideRepl return $ Just $ "
\n"<>begin_code endDoc ds lno _ _ _ = do dm <- readIORef ds case dm of Outside -> bad_state "end" lno dm Beginning -> return $ Just $ begin_code <> "\n" <> end_code InsideRepl -> return $ Just $ end_code <> "\n
" InsideProg -> return Nothing otherDoc ds _ mtch _ _ = do dm <- readIORef ds case dm of Beginning -> do writeIORef ds InsideProg return $ Just $ begin_code <> "\n" <> matchSource mtch _ -> return Nothing bad_state :: String -> LineNo -> DocMode -> IO a bad_state lab lno dm = error $ printf "Bad document syntax: %s: %d: %s" lab (getLineNo lno) $ show dm \end{code} Generating the Tests -------------------- \begin{code} evalmeGen :: GenState -> LineNo -> Match LBS.ByteString -> Location -> Capture LBS.ByteString -> IO (Maybe LBS.ByteString) evalmeGen gs _ mtch0 _ _ = Just <$> replaceCapturesM replace_ ALL f mtch0 where f mtch loc cap = case _loc_capture loc of 2 -> do modifyIORef gs (ide:) return $ Just $ LBS.pack $ show ide where ide = LBS.unpack $ captureText [cp|fn|] mtch _ -> return $ Just $ capturedText cap \end{code} How are we doing? \begin{code} mainGen :: GenState -> LineNo -> Matches LBS.ByteString -> IO (LineEdit LBS.ByteString) mainGen gs _ mtchs = case allMatches mtchs of [mtch] -> case captureText [cp|arg|] $ mtch of "top" -> return $ ReplaceWith $ LBS.unlines $ [ begin_code , "module Main(main) where" , end_code , "" , "*********************************************************" , "*" , "* WARNING: this is generated from pp-tutorial-master.lhs " , "*" , "*********************************************************" ] "bottom" -> do fns <- readIORef gs return $ ReplaceWith $ LBS.unlines $ [ begin_code , "main :: IO ()" , "main = runTests" ] ++ mk_list fns ++ [ end_code ] _ -> error "mainGen (b)" _ -> error "mainGen (a)" \end{code} We cannot place these strings inline without confusing pandoc so we use these definitions instead. \begin{code} begin_code, end_code :: LBS.ByteString begin_code = "\\"<>"begin{code}" end_code = "\\"<>"end{code}" \end{code} \begin{code} mk_list :: [String] -> [LBS.ByteString] mk_list [] = ["[]"] mk_list (ide0:ides) = f "[" ide0 $ foldr (f ",") [" ]"] ides where f pfx ide t = (" "<>pfx<>" "<>LBS.pack ide) : t \end{code} Extracting a Literate Fragment from a Haskell Program Text ---------------------------------------------------------- \begin{code} extract :: FilePath -> RE -> IO LBS.ByteString extract fp rex = extr . LBS.lines <$> LBS.readFile fp where extr lns = case parse $ scan rex lns of Nothing -> oops Just (lno,n) -> LBS.unlines $ (hdr :) $ (take n $ drop i lns) ++ [ftr] where i = getZeroBasedLineNo lno oops = error $ concat [ "failed to locate fragment matching " , show $ reSource rex , " in file " , show fp ] hdr = "
" ftr = "
" \end{code} \begin{code} parse :: [Token] -> Maybe (LineNo,Int) parse [] = Nothing parse (tk:tks) = case (tk,tks) of (Bra b_ln,Hit:Ket k_ln:_) -> Just (b_ln,count_lines_incl b_ln k_ln) _ -> parse tks \end{code} \begin{code} count_lines_incl :: LineNo -> LineNo -> Int count_lines_incl b_ln k_ln = getZeroBasedLineNo k_ln + 1 - getZeroBasedLineNo b_ln \end{code} \begin{code} data Token = Bra LineNo | Hit | Ket LineNo deriving (Show) \end{code} \begin{code} scan :: RE -> [LBS.ByteString] -> [Token] scan rex = grepScript [ (,) [re|\\begin\{code\}|] $ \i -> chk $ Bra i , (,) rex $ \_ -> chk Hit , (,) [re|\\end\{code\}|] $ \i -> chk $ Ket i ] where chk x mtchs = case anyMatches mtchs of True -> Just x False -> Nothing \end{code} pandoc ------ \begin{code} pandoc :: T.Text -> T.Text -> T.Text -> IO () pandoc title in_file = pandoc' title in_file in_file pandoc' :: T.Text -> T.Text -> T.Text -> T.Text -> IO () pandoc' title repo_path in_file out_file = do writeFile "tmp/bc.html" bc fmap (const ()) $ SH.shelly $ SH.verbosely $ SH.run "pandoc" [ "-f", "markdown+lhs+grid_tables" , "-t", "html" , "-s" , "-B", "tmp/bc.html" , "-c", "lib/styles.css" , "-c", "lib/bs.css" , "-o", out_file , in_file ] where bc = concat [ "" ] repo_url = concat [ "https://github.com/iconnect/regex/blob/master/" , T.unpack repo_path ] \end{code} testing ------- \begin{code} test :: IO () test = do dm <- docMode test_pp "pp-doc" (loop dm) "data/pp-test.lhs" "data/pp-result-doc.lhs" gm <- genMode test_pp "pp-gen" (loop gm) "data/pp-test.lhs" "data/pp-result-gen.lhs" putStrLn "tests passed" \end{code}