\begin{code}
module Text.RE.TestBench
( MacroID(..)
, RegexType(..)
, MacroEnv
, WithCaptures(..)
, MacroDescriptor(..)
, TestResult(..)
, RegexSource(..)
, FunctionID(..)
, mkMacros
, testMacroEnv
, badMacros
, runTests
, runTests'
, formatMacroTable
, dumpMacroTable
, formatMacroSummary
, formatMacroSources
, formatMacroSource
, testMacroDescriptors
) where
import Data.Array
import Control.Applicative
import qualified Data.HashMap.Lazy as HML
import Data.List
import Data.Maybe
import Data.Ord
import Data.String
import Text.Printf
import Text.RE.Capture
import Text.RE.Options
import Text.RE.Replace
import qualified Text.Regex.PCRE as PCRE
import qualified Text.Regex.TDFA as TDFA
\end{code}
Types
-----
\begin{code}
data RegexType
= TDFA
| PCRE
deriving (Bounded,Enum,Eq,Ord,Show)
data WithCaptures
= InclCaptures
| ExclCaptures
deriving (Eq,Ord,Show)
type MacroEnv = HML.HashMap MacroID MacroDescriptor
data MacroDescriptor =
MacroDescriptor
{ _md_source :: !RegexSource
, _md_samples :: ![String]
, _md_counter_samples :: ![String]
, _md_test_results :: ![TestResult]
, _md_parser :: !(Maybe FunctionID)
, _md_description :: !String
}
deriving (Show)
newtype TestResult =
TestResult { _TestResult :: String }
deriving (IsString,Show)
newtype RegexSource =
RegexSource { _RegexSource :: String }
deriving (IsString,Show)
newtype FunctionID =
FunctionID { _FunctionID :: String }
deriving (IsString,Show)
data REToken =
REToken
{ _ret_prefix :: String
, _ret_fixed :: Bool
, _ret_grouping :: Bool
, _ret_capturing :: Bool
}
deriving (Show)
\end{code}
mkMacros
--------
\begin{code}
mkMacros :: (Monad m,Functor m)
=> (String->m r)
-> RegexType
-> WithCaptures
-> MacroEnv
-> m (Macros r)
mkMacros prs rty wc env =
HML.fromList <$> mapM (uncurry mk) (HML.toList env)
where
mk mid md = (,) mid <$> prs (mdRegexSource rty wc env md)
\end{code}
testMacroEnv, badMacros
-----------------------
\begin{code}
testMacroEnv :: String -> RegexType -> MacroEnv -> IO Bool
testMacroEnv lab rty m_env = case badMacros m_env of
[] -> return True
fails -> do
putStrLn $ lab' ++ " has failing tests for these macros: "
putStr $ unlines $ [ " "++_MacroID mid | mid<-fails ]
putStrLn $ "The whole table:"
putStrLn $ "========================================================"
putStr $ formatMacroTable rty m_env
putStrLn $ "========================================================"
return False
where
lab' = lab ++ " [" ++ show rty ++"]"
badMacros :: MacroEnv -> [MacroID]
badMacros m_env =
[ mid
| (mid,MacroDescriptor{..}) <- HML.toList m_env
, not $ null _md_test_results
]
runTests :: (Eq a,Show a)
=> RegexType
-> (String->Maybe a)
-> [(String,a)]
-> MacroEnv
-> MacroID
-> MacroDescriptor
-> MacroDescriptor
runTests rty parser = runTests' rty parser'
where
parser' caps = fmap capturedText (matchCapture caps) >>= parser
runTests' :: (Eq a,Show a)
=> RegexType
-> (Match String->Maybe a)
-> [(String,a)]
-> MacroEnv
-> MacroID
-> MacroDescriptor
-> MacroDescriptor
runTests' rty parser vector env mid md@MacroDescriptor{..} =
md { _md_test_results = test_results }
where
test_results = concat
[ concat $ map test vector
, concat $ map test_neg _md_counter_samples
]
test (src,x) = test' mid rty parser x $ match_ src env md
test_neg src = test_neg' mid rty parser $ match_ src env md
match_ = case rty of
TDFA -> match_tdfa
PCRE -> match_pcre
\end{code}
dumpMacroTable, formatMacroTable, formatMacroSummary, formatMacroSources, formatMacroSource
-------------------------------------------------------------------------------------------
\begin{code}
dumpMacroTable :: String -> RegexType -> MacroEnv -> IO ()
dumpMacroTable lab rty m_env = do
writeFile fp_t $ formatMacroTable rty m_env
writeFile fp_s $ formatMacroSources rty ExclCaptures m_env
where
fp_t = "tables/" ++ lab ++ "-" ++ show rty ++ ".md"
fp_s = "tables/" ++ lab ++ "-" ++ show rty ++ ".txt"
\end{code}
\begin{code}
formatMacroTable :: RegexType -> MacroEnv -> String
formatMacroTable rty env = unlines $
format_table macro_table_hdr
[ macro_table_row rty mid md
| (mid,md) <- sortBy (comparing fst) $ HML.toList env
]
\end{code}
\begin{code}
formatMacroSummary :: RegexType -> MacroEnv -> MacroID -> String
formatMacroSummary rty env mid = maybe oops prep $ HML.lookup mid env
where
prep :: MacroDescriptor -> String
prep md = unlines $ concat $ map (fmt md) [minBound..maxBound]
fmt :: MacroDescriptor -> Col -> [String]
fmt md c =
[ printf "%-15s : %s" (present_col c) ini
] ++ map (" "++) lns
where
(ini,lns) = case macro_attribute rty mid md c of
[] -> (,) "" []
[ln] -> (,) ln []
lns_ -> (,) "" lns_
oops = error $ _MacroID mid ++ ": macro not defined in this environment"
\end{code}
\begin{code}
formatMacroSources :: RegexType
-> WithCaptures
-> MacroEnv
-> String
formatMacroSources rty wc env = unlines $
[ printf "%-20s : %s" (_MacroID mid) $ formatMacroSource rty wc env mid
| mid <- sort $ HML.keys env
]
\end{code}
\begin{code}
formatMacroSource :: RegexType
-> WithCaptures
-> MacroEnv
-> MacroID
-> String
formatMacroSource rty wc env mid =
mdRegexSource rty wc env $ fromMaybe oops $ HML.lookup mid env
where
oops = error $ "formatMacroSource: not found: " ++ _MacroID mid
\end{code}
testMacroDescriptors, regexSource
---------------------------------
\begin{code}
testMacroDescriptors :: [MacroDescriptor] -> [TestResult]
testMacroDescriptors = concat . map _md_test_results
regexSource :: RegexType -> WithCaptures -> RegexSource -> String
regexSource rty wc = format_tokens rty wc . scan_re
\end{code}
Formatting helpers
------------------
\begin{code}
type TableRow = Array Col [String]
data Col
= C_name
| C_caps
| C_regex
| C_examples
| C_anti_examples
| C_fails
| C_parser
| C_comment
deriving (Ix,Bounded,Enum,Ord,Eq,Show)
present_col :: Col -> String
present_col = map tr . drop 2 . show
where
tr '_' = '-'
tr c = c
macro_table_hdr :: TableRow
macro_table_hdr = listArray (minBound,maxBound)
[ [present_col c]
| c<-[minBound..maxBound]
]
macro_table_row :: RegexType -> MacroID -> MacroDescriptor -> TableRow
macro_table_row rty mid md =
listArray (minBound,maxBound) $
map (macro_attribute rty mid md) [minBound..maxBound]
macro_attribute :: RegexType
-> MacroID
-> MacroDescriptor
-> Col
-> [String]
macro_attribute rty mid MacroDescriptor{..} c =
case c of
C_name -> [_MacroID mid]
C_caps -> [show $ min_captures rty $ scan_re _md_source]
C_regex -> [regexSource rty ExclCaptures _md_source]
C_examples -> _md_samples
C_anti_examples -> _md_counter_samples
C_fails -> map _TestResult _md_test_results
C_parser -> [maybe "-" _FunctionID _md_parser]
C_comment -> [_md_description]
format_table :: TableRow -> [TableRow] -> [String]
format_table hdr rows0 = concat
[ format_row cws hdr'
, format_row cws dsh
, concat $ map (format_row cws) rows
]
where
dsh = listArray (minBound,maxBound)
[ [replicate n '-'] | n<-elems cws ]
hdr' = hdr // [(,) C_regex $ [take n $ concat $ repeat "regex="] ]
where
n = min 29 $ cws!C_regex
cws = widths $ hdr : rows
rows = map wrap_row rows0
field_width :: Int
field_width = 40
wrap_row :: TableRow -> TableRow
wrap_row = fmap $ concat . map f
where
f, g :: String -> [String]
f cts = (ini ++ ['\\' | not (null rst)]) : g rst
where
(ini,rst) = splitAt (1+field_width) cts
g "" = []
g cts = ('\\' : ini ++ ['\\' | not (null rst)]) : g rst
where
(ini,rst) = splitAt field_width cts
widths :: [TableRow] -> Array Col Int
widths rows = listArray (minBound,maxBound)
[ maximum $ concat [ map length $ row!c | row<-rows ]
| c<-[minBound..maxBound]
]
format_row :: Array Col Int -> TableRow -> [String]
format_row cw_arr row =
[ ("|"++) $ intercalate "|"
[ field cw_arr row c i | c<-[minBound..maxBound] ]
| i <- [0..depth1]
]
where
depth = maximum [ length $ row!c | c<-[minBound..maxBound] ]
field :: Array Col Int -> TableRow -> Col -> Int -> String
field cws row c i = ljust (cws!c) $ sel i $ row!c
sel :: Int -> [String] -> String
sel i ss = case drop i ss of
[] -> ""
s:_ -> s
ljust :: Int -> String -> String
ljust w s = s ++ replicate n ' '
where
n = max 0 $ w length s
min_captures :: RegexType -> [REToken] -> Int
min_captures rty rets = length
[ ()
| REToken{..}<-rets
, _ret_fixed || (_ret_grouping && rty==TDFA)
]
\end{code}
Formatting tokens
-----------------
\begin{code}
format_tokens :: RegexType -> WithCaptures -> [REToken] -> String
format_tokens rty wc = foldr f ""
where
f REToken{..} rst = _ret_prefix ++ bra ++ xket rst
where
bra = case _ret_fixed of
True -> "("
False ->
case (,) _ret_grouping (_ret_capturing && wc==InclCaptures) of
(False,False) -> ""
(True ,False) -> if rty==PCRE then "(?:" else "("
(False,True ) -> "("
(True ,True ) -> "("
xket =
case not _ret_grouping && _ret_capturing && wc==ExclCaptures of
True -> delete_ket 0
False -> id
delete_ket :: Int -> String -> String
delete_ket _ "" = error "delete_ket: end of input"
delete_ket n (c:t) = case c of
'\\' -> case t of
"" -> error "delete_ket: end of input"
c':t' -> c : c' : delete_ket n t'
')' -> case n of
0 -> t
_ -> c : delete_ket (n1) t
'(' -> c : delete_ket (n+1) t
_ -> c : delete_ket n t
\end{code}
scan_re
-------
\begin{code}
scan_re :: RegexSource -> [REToken]
scan_re (RegexSource src0) = loop src0
where
loop "" = []
loop src =
case rst of
'\\':t -> case t of
"" -> REToken (ini++['\\']) False False False : []
c':t' -> REToken (ini++['\\',c']) False False False : loop t'
'(' :t -> case t of
c:':':t'
| c=='?' -> REToken ini False True False : loop t'
| c=='}' -> REToken ini False False True : loop t'
| c==']' -> REToken ini False True True : loop t'
_ -> REToken ini True True True : loop t
_ -> [REToken src False False False]
where
(ini,rst) = break chk src
chk '\\' = True
chk '(' = True
chk _ = False
\end{code}
scan_re
-------
\begin{code}
match_tdfa :: String -> MacroEnv -> MacroDescriptor -> Matches String
match_tdfa txt env md = txt TDFA.=~ mdRegexSource TDFA ExclCaptures env md
match_pcre :: String -> MacroEnv -> MacroDescriptor -> Matches String
match_pcre txt env md = txt PCRE.=~ mdRegexSource PCRE ExclCaptures env md
\end{code}
mdRegexSource
-------------
\begin{code}
mdRegexSource :: RegexType
-> WithCaptures
-> MacroEnv
-> MacroDescriptor
-> String
mdRegexSource rty wc env md =
expandMacros' lu $ regexSource rty wc $ _md_source md
where
lu = fmap (regexSource rty wc . _md_source) .
flip HML.lookup env
\end{code}
test', test_neg'
----------------
\begin{code}
test' :: (Eq a,Show a)
=> MacroID
-> RegexType
-> (Match String->Maybe a)
-> a
-> Matches String
-> [TestResult]
test' mid rty prs x Matches{..} = either (:[]) (const []) $ do
cs <- case allMatches of
[cs] -> return cs
_ -> oops "RE failed to parse"
mtx <- case matchCapture cs of
Nothing -> oops $ "RE parse failure: " ++ show cs
Just c -> return $ capturedText c
case mtx == matchesSource of
True -> return ()
False -> oops "RE failed to match the whole text"
x' <- case prs cs of
Nothing -> oops "matched text failed to parse"
Just x' -> return x'
case x'==x of
True -> return ()
False -> oops "parser failed to yield the expected result"
where
oops = Left . test_diagnostic mid False rty matchesSource
test_neg' :: MacroID
-> RegexType
-> (Match String->Maybe a)
-> Matches String
-> [TestResult]
test_neg' mid rty prs Matches{..} = either id (const []) $ do
case allMatches of
[] -> return ()
cz -> case ms of
[] -> return ()
_ -> Left [oops "RE parse succeeded"]
where
ms =
[ ()
| cs <- cz
, Just c <- [matchCapture cs]
, let t = capturedText c
, t == matchesSource
, isJust $ prs cs
]
where
oops = test_diagnostic mid True rty matchesSource
test_diagnostic :: MacroID
-> Bool
-> RegexType
-> String
-> String
-> TestResult
test_diagnostic mid is_neg rty tst msg =
TestResult $
printf "%-20s [%s %s] : %s (%s)" mid_s neg_s rty_s msg tst
where
mid_s = _MacroID mid
neg_s = if is_neg then "-ve" else "+ve" :: String
rty_s = show rty
\end{code}